00001 #ifndef ONIGURUMA_REGPARSE_H
00002 #define ONIGURUMA_REGPARSE_H
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033 #include "regint.h"
00034
00035 RUBY_SYMBOL_EXPORT_BEGIN
00036
00037
00038 #define NT_STR 0
00039 #define NT_CCLASS 1
00040 #define NT_CTYPE 2
00041 #define NT_CANY 3
00042 #define NT_BREF 4
00043 #define NT_QTFR 5
00044 #define NT_ENCLOSE 6
00045 #define NT_ANCHOR 7
00046 #define NT_LIST 8
00047 #define NT_ALT 9
00048 #define NT_CALL 10
00049
00050
00051 #define NTYPE2BIT(type) (1<<(type))
00052
00053 #define BIT_NT_STR NTYPE2BIT(NT_STR)
00054 #define BIT_NT_CCLASS NTYPE2BIT(NT_CCLASS)
00055 #define BIT_NT_CTYPE NTYPE2BIT(NT_CTYPE)
00056 #define BIT_NT_CANY NTYPE2BIT(NT_CANY)
00057 #define BIT_NT_BREF NTYPE2BIT(NT_BREF)
00058 #define BIT_NT_QTFR NTYPE2BIT(NT_QTFR)
00059 #define BIT_NT_ENCLOSE NTYPE2BIT(NT_ENCLOSE)
00060 #define BIT_NT_ANCHOR NTYPE2BIT(NT_ANCHOR)
00061 #define BIT_NT_LIST NTYPE2BIT(NT_LIST)
00062 #define BIT_NT_ALT NTYPE2BIT(NT_ALT)
00063 #define BIT_NT_CALL NTYPE2BIT(NT_CALL)
00064
00065 #define IS_NODE_TYPE_SIMPLE(type) \
00066 ((NTYPE2BIT(type) & (BIT_NT_STR | BIT_NT_CCLASS | BIT_NT_CTYPE |\
00067 BIT_NT_CANY | BIT_NT_BREF)) != 0)
00068
00069 #define NTYPE(node) ((node)->u.base.type)
00070 #define SET_NTYPE(node, ntype) (node)->u.base.type = (ntype)
00071
00072 #define NSTR(node) (&((node)->u.str))
00073 #define NCCLASS(node) (&((node)->u.cclass))
00074 #define NCTYPE(node) (&((node)->u.ctype))
00075 #define NBREF(node) (&((node)->u.bref))
00076 #define NQTFR(node) (&((node)->u.qtfr))
00077 #define NENCLOSE(node) (&((node)->u.enclose))
00078 #define NANCHOR(node) (&((node)->u.anchor))
00079 #define NCONS(node) (&((node)->u.cons))
00080 #define NCALL(node) (&((node)->u.call))
00081
00082 #define NCAR(node) (NCONS(node)->car)
00083 #define NCDR(node) (NCONS(node)->cdr)
00084
00085
00086
00087 #define ANCHOR_ANYCHAR_STAR_MASK (ANCHOR_ANYCHAR_STAR | ANCHOR_ANYCHAR_STAR_ML)
00088 #define ANCHOR_END_BUF_MASK (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF)
00089
00090 #define ENCLOSE_MEMORY (1<<0)
00091 #define ENCLOSE_OPTION (1<<1)
00092 #define ENCLOSE_STOP_BACKTRACK (1<<2)
00093 #define ENCLOSE_CONDITION (1<<3)
00094
00095 #define NODE_STR_MARGIN 16
00096 #define NODE_STR_BUF_SIZE 24
00097 #define NODE_BACKREFS_SIZE 6
00098
00099 #define NSTR_RAW (1<<0)
00100 #define NSTR_AMBIG (1<<1)
00101 #define NSTR_DONT_GET_OPT_INFO (1<<2)
00102
00103 #define NSTRING_LEN(node) (OnigDistance )((node)->u.str.end - (node)->u.str.s)
00104 #define NSTRING_SET_RAW(node) (node)->u.str.flag |= NSTR_RAW
00105 #define NSTRING_CLEAR_RAW(node) (node)->u.str.flag &= ~NSTR_RAW
00106 #define NSTRING_SET_AMBIG(node) (node)->u.str.flag |= NSTR_AMBIG
00107 #define NSTRING_SET_DONT_GET_OPT_INFO(node) \
00108 (node)->u.str.flag |= NSTR_DONT_GET_OPT_INFO
00109 #define NSTRING_IS_RAW(node) (((node)->u.str.flag & NSTR_RAW) != 0)
00110 #define NSTRING_IS_AMBIG(node) (((node)->u.str.flag & NSTR_AMBIG) != 0)
00111 #define NSTRING_IS_DONT_GET_OPT_INFO(node) \
00112 (((node)->u.str.flag & NSTR_DONT_GET_OPT_INFO) != 0)
00113
00114 #define BACKREFS_P(br) \
00115 (IS_NOT_NULL((br)->back_dynamic) ? (br)->back_dynamic : (br)->back_static);
00116
00117 #define NQ_TARGET_ISNOT_EMPTY 0
00118 #define NQ_TARGET_IS_EMPTY 1
00119 #define NQ_TARGET_IS_EMPTY_MEM 2
00120 #define NQ_TARGET_IS_EMPTY_REC 3
00121
00122
00123 #define NST_MIN_FIXED (1<<0)
00124 #define NST_MAX_FIXED (1<<1)
00125 #define NST_CLEN_FIXED (1<<2)
00126 #define NST_MARK1 (1<<3)
00127 #define NST_MARK2 (1<<4)
00128 #define NST_MEM_BACKREFED (1<<5)
00129 #define NST_STOP_BT_SIMPLE_REPEAT (1<<6)
00130 #define NST_RECURSION (1<<7)
00131 #define NST_CALLED (1<<8)
00132 #define NST_ADDR_FIXED (1<<9)
00133 #define NST_NAMED_GROUP (1<<10)
00134 #define NST_NAME_REF (1<<11)
00135 #define NST_IN_REPEAT (1<<12)
00136 #define NST_NEST_LEVEL (1<<13)
00137 #define NST_BY_NUMBER (1<<14)
00138
00139 #define SET_ENCLOSE_STATUS(node,f) (node)->u.enclose.state |= (f)
00140 #define CLEAR_ENCLOSE_STATUS(node,f) (node)->u.enclose.state &= ~(f)
00141
00142 #define IS_ENCLOSE_CALLED(en) (((en)->state & NST_CALLED) != 0)
00143 #define IS_ENCLOSE_ADDR_FIXED(en) (((en)->state & NST_ADDR_FIXED) != 0)
00144 #define IS_ENCLOSE_RECURSION(en) (((en)->state & NST_RECURSION) != 0)
00145 #define IS_ENCLOSE_MARK1(en) (((en)->state & NST_MARK1) != 0)
00146 #define IS_ENCLOSE_MARK2(en) (((en)->state & NST_MARK2) != 0)
00147 #define IS_ENCLOSE_MIN_FIXED(en) (((en)->state & NST_MIN_FIXED) != 0)
00148 #define IS_ENCLOSE_MAX_FIXED(en) (((en)->state & NST_MAX_FIXED) != 0)
00149 #define IS_ENCLOSE_CLEN_FIXED(en) (((en)->state & NST_CLEN_FIXED) != 0)
00150 #define IS_ENCLOSE_STOP_BT_SIMPLE_REPEAT(en) \
00151 (((en)->state & NST_STOP_BT_SIMPLE_REPEAT) != 0)
00152 #define IS_ENCLOSE_NAMED_GROUP(en) (((en)->state & NST_NAMED_GROUP) != 0)
00153 #define IS_ENCLOSE_NAME_REF(en) (((en)->state & NST_NAME_REF) != 0)
00154
00155 #define SET_CALL_RECURSION(node) (node)->u.call.state |= NST_RECURSION
00156 #define IS_CALL_RECURSION(cn) (((cn)->state & NST_RECURSION) != 0)
00157 #define IS_CALL_NAME_REF(cn) (((cn)->state & NST_NAME_REF) != 0)
00158 #define IS_BACKREF_NAME_REF(bn) (((bn)->state & NST_NAME_REF) != 0)
00159 #define IS_BACKREF_NEST_LEVEL(bn) (((bn)->state & NST_NEST_LEVEL) != 0)
00160 #define IS_QUANTIFIER_IN_REPEAT(qn) (((qn)->state & NST_IN_REPEAT) != 0)
00161 #define IS_QUANTIFIER_BY_NUMBER(qn) (((qn)->state & NST_BY_NUMBER) != 0)
00162
00163 #define CALLNODE_REFNUM_UNDEF -1
00164
00165 typedef struct {
00166 NodeBase base;
00167 UChar* s;
00168 UChar* end;
00169 unsigned int flag;
00170 int capa;
00171 UChar buf[NODE_STR_BUF_SIZE];
00172 } StrNode;
00173
00174 typedef struct {
00175 NodeBase base;
00176 int state;
00177 struct _Node* target;
00178 int lower;
00179 int upper;
00180 int greedy;
00181 int target_empty_info;
00182 struct _Node* head_exact;
00183 struct _Node* next_head_exact;
00184 int is_refered;
00185 #ifdef USE_COMBINATION_EXPLOSION_CHECK
00186 int comb_exp_check_num;
00187 #endif
00188 } QtfrNode;
00189
00190 typedef struct {
00191 NodeBase base;
00192 int state;
00193 int type;
00194 int regnum;
00195 OnigOptionType option;
00196 struct _Node* target;
00197 AbsAddrType call_addr;
00198
00199 OnigDistance min_len;
00200 OnigDistance max_len;
00201 int char_len;
00202 int opt_count;
00203 } EncloseNode;
00204
00205 #ifdef USE_SUBEXP_CALL
00206
00207 typedef struct {
00208 int offset;
00209 struct _Node* target;
00210 } UnsetAddr;
00211
00212 typedef struct {
00213 int num;
00214 int alloc;
00215 UnsetAddr* us;
00216 } UnsetAddrList;
00217
00218 typedef struct {
00219 NodeBase base;
00220 int state;
00221 int group_num;
00222 UChar* name;
00223 UChar* name_end;
00224 struct _Node* target;
00225 UnsetAddrList* unset_addr_list;
00226 } CallNode;
00227
00228 #endif
00229
00230 typedef struct {
00231 NodeBase base;
00232 int state;
00233 int back_num;
00234 int back_static[NODE_BACKREFS_SIZE];
00235 int* back_dynamic;
00236 int nest_level;
00237 } BRefNode;
00238
00239 typedef struct {
00240 NodeBase base;
00241 int type;
00242 struct _Node* target;
00243 int char_len;
00244 int ascii_range;
00245 } AnchorNode;
00246
00247 typedef struct {
00248 NodeBase base;
00249 struct _Node* car;
00250 struct _Node* cdr;
00251 } ConsAltNode;
00252
00253 typedef struct {
00254 NodeBase base;
00255 int ctype;
00256 int not;
00257 int ascii_range;
00258 } CtypeNode;
00259
00260 typedef struct _Node {
00261 union {
00262 NodeBase base;
00263 StrNode str;
00264 CClassNode cclass;
00265 QtfrNode qtfr;
00266 EncloseNode enclose;
00267 BRefNode bref;
00268 AnchorNode anchor;
00269 ConsAltNode cons;
00270 CtypeNode ctype;
00271 #ifdef USE_SUBEXP_CALL
00272 CallNode call;
00273 #endif
00274 } u;
00275 } Node;
00276
00277
00278 #define NULL_NODE ((Node* )0)
00279
00280 #define SCANENV_MEMNODES_SIZE 8
00281 #define SCANENV_MEM_NODES(senv) \
00282 (IS_NOT_NULL((senv)->mem_nodes_dynamic) ? \
00283 (senv)->mem_nodes_dynamic : (senv)->mem_nodes_static)
00284
00285 typedef struct {
00286 OnigOptionType option;
00287 OnigCaseFoldType case_fold_flag;
00288 OnigEncoding enc;
00289 const OnigSyntaxType* syntax;
00290 BitStatusType capture_history;
00291 BitStatusType bt_mem_start;
00292 BitStatusType bt_mem_end;
00293 BitStatusType backrefed_mem;
00294 UChar* pattern;
00295 UChar* pattern_end;
00296 UChar* error;
00297 UChar* error_end;
00298 regex_t* reg;
00299 int num_call;
00300 #ifdef USE_SUBEXP_CALL
00301 UnsetAddrList* unset_addr_list;
00302 #endif
00303 int num_mem;
00304 #ifdef USE_NAMED_GROUP
00305 int num_named;
00306 #endif
00307 int mem_alloc;
00308 Node* mem_nodes_static[SCANENV_MEMNODES_SIZE];
00309 Node** mem_nodes_dynamic;
00310 #ifdef USE_COMBINATION_EXPLOSION_CHECK
00311 int num_comb_exp_check;
00312 int comb_exp_max_regnum;
00313 int curr_max_regnum;
00314 int has_recursion;
00315 #endif
00316 int warnings_flag;
00317 const char* sourcefile;
00318 int sourceline;
00319 } ScanEnv;
00320
00321
00322 #define IS_SYNTAX_OP(syn, opm) (((syn)->op & (opm)) != 0)
00323 #define IS_SYNTAX_OP2(syn, opm) (((syn)->op2 & (opm)) != 0)
00324 #define IS_SYNTAX_BV(syn, bvm) (((syn)->behavior & (bvm)) != 0)
00325
00326 #ifdef USE_NAMED_GROUP
00327 typedef struct {
00328 int new_val;
00329 } GroupNumRemap;
00330
00331 extern int onig_renumber_name_table P_((regex_t* reg, GroupNumRemap* map));
00332 #endif
00333
00334 extern int onig_strncmp P_((const UChar* s1, const UChar* s2, int n));
00335 extern void onig_strcpy P_((UChar* dest, const UChar* src, const UChar* end));
00336 extern void onig_scan_env_set_error_string P_((ScanEnv* env, int ecode, UChar* arg, UChar* arg_end));
00337 extern int onig_scan_unsigned_number P_((UChar** src, const UChar* end, OnigEncoding enc));
00338 extern void onig_reduce_nested_quantifier P_((Node* pnode, Node* cnode));
00339 extern void onig_node_conv_to_str_node P_((Node* node, int raw));
00340 extern int onig_node_str_cat P_((Node* node, const UChar* s, const UChar* end));
00341 extern int onig_node_str_set P_((Node* node, const UChar* s, const UChar* end));
00342 extern void onig_node_free P_((Node* node));
00343 extern Node* onig_node_new_enclose P_((int type));
00344 extern Node* onig_node_new_anchor P_((int type));
00345 extern Node* onig_node_new_str P_((const UChar* s, const UChar* end));
00346 extern Node* onig_node_new_list P_((Node* left, Node* right));
00347 extern Node* onig_node_list_add P_((Node* list, Node* x));
00348 extern Node* onig_node_new_alt P_((Node* left, Node* right));
00349 extern void onig_node_str_clear P_((Node* node));
00350 extern int onig_free_node_list P_((void));
00351 extern int onig_names_free P_((regex_t* reg));
00352 extern int onig_parse_make_tree P_((Node** root, const UChar* pattern, const UChar* end, regex_t* reg, ScanEnv* env));
00353 extern int onig_free_shared_cclass_table P_((void));
00354
00355 #ifdef ONIG_DEBUG
00356 #ifdef USE_NAMED_GROUP
00357 extern int onig_print_names(FILE*, regex_t*);
00358 #endif
00359 #endif
00360
00361 RUBY_SYMBOL_EXPORT_END
00362
00363 #endif
00364