00001 #ifndef ONIGURUMA_REGPARSE_H
00002 #define ONIGURUMA_REGPARSE_H
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033 #include "regint.h"
00034
00035 #if defined __GNUC__ && __GNUC__ >= 4
00036 #pragma GCC visibility push(default)
00037 #endif
00038
00039
00040 #define NT_STR 0
00041 #define NT_CCLASS 1
00042 #define NT_CTYPE 2
00043 #define NT_CANY 3
00044 #define NT_BREF 4
00045 #define NT_QTFR 5
00046 #define NT_ENCLOSE 6
00047 #define NT_ANCHOR 7
00048 #define NT_LIST 8
00049 #define NT_ALT 9
00050 #define NT_CALL 10
00051
00052
00053 #define NTYPE2BIT(type) (1<<(type))
00054
00055 #define BIT_NT_STR NTYPE2BIT(NT_STR)
00056 #define BIT_NT_CCLASS NTYPE2BIT(NT_CCLASS)
00057 #define BIT_NT_CTYPE NTYPE2BIT(NT_CTYPE)
00058 #define BIT_NT_CANY NTYPE2BIT(NT_CANY)
00059 #define BIT_NT_BREF NTYPE2BIT(NT_BREF)
00060 #define BIT_NT_QTFR NTYPE2BIT(NT_QTFR)
00061 #define BIT_NT_ENCLOSE NTYPE2BIT(NT_ENCLOSE)
00062 #define BIT_NT_ANCHOR NTYPE2BIT(NT_ANCHOR)
00063 #define BIT_NT_LIST NTYPE2BIT(NT_LIST)
00064 #define BIT_NT_ALT NTYPE2BIT(NT_ALT)
00065 #define BIT_NT_CALL NTYPE2BIT(NT_CALL)
00066
00067 #define IS_NODE_TYPE_SIMPLE(type) \
00068 ((NTYPE2BIT(type) & (BIT_NT_STR | BIT_NT_CCLASS | BIT_NT_CTYPE |\
00069 BIT_NT_CANY | BIT_NT_BREF)) != 0)
00070
00071 #define NTYPE(node) ((node)->u.base.type)
00072 #define SET_NTYPE(node, ntype) (node)->u.base.type = (ntype)
00073
00074 #define NSTR(node) (&((node)->u.str))
00075 #define NCCLASS(node) (&((node)->u.cclass))
00076 #define NCTYPE(node) (&((node)->u.ctype))
00077 #define NBREF(node) (&((node)->u.bref))
00078 #define NQTFR(node) (&((node)->u.qtfr))
00079 #define NENCLOSE(node) (&((node)->u.enclose))
00080 #define NANCHOR(node) (&((node)->u.anchor))
00081 #define NCONS(node) (&((node)->u.cons))
00082 #define NCALL(node) (&((node)->u.call))
00083
00084 #define NCAR(node) (NCONS(node)->car)
00085 #define NCDR(node) (NCONS(node)->cdr)
00086
00087
00088
00089 #define ANCHOR_ANYCHAR_STAR_MASK (ANCHOR_ANYCHAR_STAR | ANCHOR_ANYCHAR_STAR_ML)
00090 #define ANCHOR_END_BUF_MASK (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF)
00091
00092 #define ENCLOSE_MEMORY (1<<0)
00093 #define ENCLOSE_OPTION (1<<1)
00094 #define ENCLOSE_STOP_BACKTRACK (1<<2)
00095 #define ENCLOSE_CONDITION (1<<3)
00096
00097 #define NODE_STR_MARGIN 16
00098 #define NODE_STR_BUF_SIZE 24
00099 #define NODE_BACKREFS_SIZE 6
00100
00101 #define NSTR_RAW (1<<0)
00102 #define NSTR_AMBIG (1<<1)
00103 #define NSTR_DONT_GET_OPT_INFO (1<<2)
00104
00105 #define NSTRING_LEN(node) (OnigDistance )((node)->u.str.end - (node)->u.str.s)
00106 #define NSTRING_SET_RAW(node) (node)->u.str.flag |= NSTR_RAW
00107 #define NSTRING_CLEAR_RAW(node) (node)->u.str.flag &= ~NSTR_RAW
00108 #define NSTRING_SET_AMBIG(node) (node)->u.str.flag |= NSTR_AMBIG
00109 #define NSTRING_SET_DONT_GET_OPT_INFO(node) \
00110 (node)->u.str.flag |= NSTR_DONT_GET_OPT_INFO
00111 #define NSTRING_IS_RAW(node) (((node)->u.str.flag & NSTR_RAW) != 0)
00112 #define NSTRING_IS_AMBIG(node) (((node)->u.str.flag & NSTR_AMBIG) != 0)
00113 #define NSTRING_IS_DONT_GET_OPT_INFO(node) \
00114 (((node)->u.str.flag & NSTR_DONT_GET_OPT_INFO) != 0)
00115
00116 #define BACKREFS_P(br) \
00117 (IS_NOT_NULL((br)->back_dynamic) ? (br)->back_dynamic : (br)->back_static);
00118
00119 #define NQ_TARGET_ISNOT_EMPTY 0
00120 #define NQ_TARGET_IS_EMPTY 1
00121 #define NQ_TARGET_IS_EMPTY_MEM 2
00122 #define NQ_TARGET_IS_EMPTY_REC 3
00123
00124
00125 #define NST_MIN_FIXED (1<<0)
00126 #define NST_MAX_FIXED (1<<1)
00127 #define NST_CLEN_FIXED (1<<2)
00128 #define NST_MARK1 (1<<3)
00129 #define NST_MARK2 (1<<4)
00130 #define NST_MEM_BACKREFED (1<<5)
00131 #define NST_STOP_BT_SIMPLE_REPEAT (1<<6)
00132 #define NST_RECURSION (1<<7)
00133 #define NST_CALLED (1<<8)
00134 #define NST_ADDR_FIXED (1<<9)
00135 #define NST_NAMED_GROUP (1<<10)
00136 #define NST_NAME_REF (1<<11)
00137 #define NST_IN_REPEAT (1<<12)
00138 #define NST_NEST_LEVEL (1<<13)
00139 #define NST_BY_NUMBER (1<<14)
00140
00141 #define SET_ENCLOSE_STATUS(node,f) (node)->u.enclose.state |= (f)
00142 #define CLEAR_ENCLOSE_STATUS(node,f) (node)->u.enclose.state &= ~(f)
00143
00144 #define IS_ENCLOSE_CALLED(en) (((en)->state & NST_CALLED) != 0)
00145 #define IS_ENCLOSE_ADDR_FIXED(en) (((en)->state & NST_ADDR_FIXED) != 0)
00146 #define IS_ENCLOSE_RECURSION(en) (((en)->state & NST_RECURSION) != 0)
00147 #define IS_ENCLOSE_MARK1(en) (((en)->state & NST_MARK1) != 0)
00148 #define IS_ENCLOSE_MARK2(en) (((en)->state & NST_MARK2) != 0)
00149 #define IS_ENCLOSE_MIN_FIXED(en) (((en)->state & NST_MIN_FIXED) != 0)
00150 #define IS_ENCLOSE_MAX_FIXED(en) (((en)->state & NST_MAX_FIXED) != 0)
00151 #define IS_ENCLOSE_CLEN_FIXED(en) (((en)->state & NST_CLEN_FIXED) != 0)
00152 #define IS_ENCLOSE_STOP_BT_SIMPLE_REPEAT(en) \
00153 (((en)->state & NST_STOP_BT_SIMPLE_REPEAT) != 0)
00154 #define IS_ENCLOSE_NAMED_GROUP(en) (((en)->state & NST_NAMED_GROUP) != 0)
00155 #define IS_ENCLOSE_NAME_REF(en) (((en)->state & NST_NAME_REF) != 0)
00156
00157 #define SET_CALL_RECURSION(node) (node)->u.call.state |= NST_RECURSION
00158 #define IS_CALL_RECURSION(cn) (((cn)->state & NST_RECURSION) != 0)
00159 #define IS_CALL_NAME_REF(cn) (((cn)->state & NST_NAME_REF) != 0)
00160 #define IS_BACKREF_NAME_REF(bn) (((bn)->state & NST_NAME_REF) != 0)
00161 #define IS_BACKREF_NEST_LEVEL(bn) (((bn)->state & NST_NEST_LEVEL) != 0)
00162 #define IS_QUANTIFIER_IN_REPEAT(qn) (((qn)->state & NST_IN_REPEAT) != 0)
00163 #define IS_QUANTIFIER_BY_NUMBER(qn) (((qn)->state & NST_BY_NUMBER) != 0)
00164
00165 #define CALLNODE_REFNUM_UNDEF -1
00166
00167 typedef struct {
00168 NodeBase base;
00169 UChar* s;
00170 UChar* end;
00171 unsigned int flag;
00172 int capa;
00173 UChar buf[NODE_STR_BUF_SIZE];
00174 } StrNode;
00175
00176 typedef struct {
00177 NodeBase base;
00178 int state;
00179 struct _Node* target;
00180 int lower;
00181 int upper;
00182 int greedy;
00183 int target_empty_info;
00184 struct _Node* head_exact;
00185 struct _Node* next_head_exact;
00186 int is_refered;
00187 #ifdef USE_COMBINATION_EXPLOSION_CHECK
00188 int comb_exp_check_num;
00189 #endif
00190 } QtfrNode;
00191
00192 typedef struct {
00193 NodeBase base;
00194 int state;
00195 int type;
00196 int regnum;
00197 OnigOptionType option;
00198 struct _Node* target;
00199 AbsAddrType call_addr;
00200
00201 OnigDistance min_len;
00202 OnigDistance max_len;
00203 int char_len;
00204 int opt_count;
00205 } EncloseNode;
00206
00207 #ifdef USE_SUBEXP_CALL
00208
00209 typedef struct {
00210 int offset;
00211 struct _Node* target;
00212 } UnsetAddr;
00213
00214 typedef struct {
00215 int num;
00216 int alloc;
00217 UnsetAddr* us;
00218 } UnsetAddrList;
00219
00220 typedef struct {
00221 NodeBase base;
00222 int state;
00223 int group_num;
00224 UChar* name;
00225 UChar* name_end;
00226 struct _Node* target;
00227 UnsetAddrList* unset_addr_list;
00228 } CallNode;
00229
00230 #endif
00231
00232 typedef struct {
00233 NodeBase base;
00234 int state;
00235 int back_num;
00236 int back_static[NODE_BACKREFS_SIZE];
00237 int* back_dynamic;
00238 int nest_level;
00239 } BRefNode;
00240
00241 typedef struct {
00242 NodeBase base;
00243 int type;
00244 struct _Node* target;
00245 int char_len;
00246 int ascii_range;
00247 } AnchorNode;
00248
00249 typedef struct {
00250 NodeBase base;
00251 struct _Node* car;
00252 struct _Node* cdr;
00253 } ConsAltNode;
00254
00255 typedef struct {
00256 NodeBase base;
00257 int ctype;
00258 int not;
00259 int ascii_range;
00260 } CtypeNode;
00261
00262 typedef struct _Node {
00263 union {
00264 NodeBase base;
00265 StrNode str;
00266 CClassNode cclass;
00267 QtfrNode qtfr;
00268 EncloseNode enclose;
00269 BRefNode bref;
00270 AnchorNode anchor;
00271 ConsAltNode cons;
00272 CtypeNode ctype;
00273 #ifdef USE_SUBEXP_CALL
00274 CallNode call;
00275 #endif
00276 } u;
00277 } Node;
00278
00279
00280 #define NULL_NODE ((Node* )0)
00281
00282 #define SCANENV_MEMNODES_SIZE 8
00283 #define SCANENV_MEM_NODES(senv) \
00284 (IS_NOT_NULL((senv)->mem_nodes_dynamic) ? \
00285 (senv)->mem_nodes_dynamic : (senv)->mem_nodes_static)
00286
00287 typedef struct {
00288 OnigOptionType option;
00289 OnigCaseFoldType case_fold_flag;
00290 OnigEncoding enc;
00291 const OnigSyntaxType* syntax;
00292 BitStatusType capture_history;
00293 BitStatusType bt_mem_start;
00294 BitStatusType bt_mem_end;
00295 BitStatusType backrefed_mem;
00296 UChar* pattern;
00297 UChar* pattern_end;
00298 UChar* error;
00299 UChar* error_end;
00300 regex_t* reg;
00301 int num_call;
00302 #ifdef USE_SUBEXP_CALL
00303 UnsetAddrList* unset_addr_list;
00304 #endif
00305 int num_mem;
00306 #ifdef USE_NAMED_GROUP
00307 int num_named;
00308 #endif
00309 int mem_alloc;
00310 Node* mem_nodes_static[SCANENV_MEMNODES_SIZE];
00311 Node** mem_nodes_dynamic;
00312 #ifdef USE_COMBINATION_EXPLOSION_CHECK
00313 int num_comb_exp_check;
00314 int comb_exp_max_regnum;
00315 int curr_max_regnum;
00316 int has_recursion;
00317 #endif
00318 int warnings_flag;
00319 const char* sourcefile;
00320 int sourceline;
00321 } ScanEnv;
00322
00323
00324 #define IS_SYNTAX_OP(syn, opm) (((syn)->op & (opm)) != 0)
00325 #define IS_SYNTAX_OP2(syn, opm) (((syn)->op2 & (opm)) != 0)
00326 #define IS_SYNTAX_BV(syn, bvm) (((syn)->behavior & (bvm)) != 0)
00327
00328 #ifdef USE_NAMED_GROUP
00329 typedef struct {
00330 int new_val;
00331 } GroupNumRemap;
00332
00333 extern int onig_renumber_name_table P_((regex_t* reg, GroupNumRemap* map));
00334 #endif
00335
00336 extern int onig_strncmp P_((const UChar* s1, const UChar* s2, int n));
00337 extern void onig_strcpy P_((UChar* dest, const UChar* src, const UChar* end));
00338 extern void onig_scan_env_set_error_string P_((ScanEnv* env, int ecode, UChar* arg, UChar* arg_end));
00339 extern int onig_scan_unsigned_number P_((UChar** src, const UChar* end, OnigEncoding enc));
00340 extern void onig_reduce_nested_quantifier P_((Node* pnode, Node* cnode));
00341 extern void onig_node_conv_to_str_node P_((Node* node, int raw));
00342 extern int onig_node_str_cat P_((Node* node, const UChar* s, const UChar* end));
00343 extern int onig_node_str_set P_((Node* node, const UChar* s, const UChar* end));
00344 extern void onig_node_free P_((Node* node));
00345 extern Node* onig_node_new_enclose P_((int type));
00346 extern Node* onig_node_new_anchor P_((int type));
00347 extern Node* onig_node_new_str P_((const UChar* s, const UChar* end));
00348 extern Node* onig_node_new_list P_((Node* left, Node* right));
00349 extern Node* onig_node_list_add P_((Node* list, Node* x));
00350 extern Node* onig_node_new_alt P_((Node* left, Node* right));
00351 extern void onig_node_str_clear P_((Node* node));
00352 extern int onig_free_node_list P_((void));
00353 extern int onig_names_free P_((regex_t* reg));
00354 extern int onig_parse_make_tree P_((Node** root, const UChar* pattern, const UChar* end, regex_t* reg, ScanEnv* env));
00355 extern int onig_free_shared_cclass_table P_((void));
00356
00357 #ifdef ONIG_DEBUG
00358 #ifdef USE_NAMED_GROUP
00359 extern int onig_print_names(FILE*, regex_t*);
00360 #endif
00361 #endif
00362
00363 #if defined __GNUC__ && __GNUC__ >= 4
00364 #pragma GCC visibility pop
00365 #endif
00366
00367 #endif
00368