00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031 #include "regparse.h"
00032
00033 OnigCaseFoldType OnigDefaultCaseFoldFlag = ONIGENC_CASE_FOLD_MIN;
00034
00035 extern OnigCaseFoldType
00036 onig_get_default_case_fold_flag(void)
00037 {
00038 return OnigDefaultCaseFoldFlag;
00039 }
00040
00041 extern int
00042 onig_set_default_case_fold_flag(OnigCaseFoldType case_fold_flag)
00043 {
00044 OnigDefaultCaseFoldFlag = case_fold_flag;
00045 return 0;
00046 }
00047
00048
00049 #ifndef PLATFORM_UNALIGNED_WORD_ACCESS
00050 static unsigned char PadBuf[WORD_ALIGNMENT_SIZE];
00051 #endif
00052
00053 #if 0
00054 static UChar*
00055 str_dup(UChar* s, UChar* end)
00056 {
00057 ptrdiff_t len = end - s;
00058
00059 if (len > 0) {
00060 UChar* r = (UChar* )xmalloc(len + 1);
00061 CHECK_NULL_RETURN(r);
00062 xmemcpy(r, s, len);
00063 r[len] = (UChar )0;
00064 return r;
00065 }
00066 else return NULL;
00067 }
00068 #endif
00069
00070 static void
00071 swap_node(Node* a, Node* b)
00072 {
00073 Node c;
00074 c = *a; *a = *b; *b = c;
00075
00076 if (NTYPE(a) == NT_STR) {
00077 StrNode* sn = NSTR(a);
00078 if (sn->capa == 0) {
00079 size_t len = sn->end - sn->s;
00080 sn->s = sn->buf;
00081 sn->end = sn->s + len;
00082 }
00083 }
00084
00085 if (NTYPE(b) == NT_STR) {
00086 StrNode* sn = NSTR(b);
00087 if (sn->capa == 0) {
00088 size_t len = sn->end - sn->s;
00089 sn->s = sn->buf;
00090 sn->end = sn->s + len;
00091 }
00092 }
00093 }
00094
00095 static OnigDistance
00096 distance_add(OnigDistance d1, OnigDistance d2)
00097 {
00098 if (d1 == ONIG_INFINITE_DISTANCE || d2 == ONIG_INFINITE_DISTANCE)
00099 return ONIG_INFINITE_DISTANCE;
00100 else {
00101 if (d1 <= ONIG_INFINITE_DISTANCE - d2) return d1 + d2;
00102 else return ONIG_INFINITE_DISTANCE;
00103 }
00104 }
00105
00106 static OnigDistance
00107 distance_multiply(OnigDistance d, int m)
00108 {
00109 if (m == 0) return 0;
00110
00111 if (d < ONIG_INFINITE_DISTANCE / m)
00112 return d * m;
00113 else
00114 return ONIG_INFINITE_DISTANCE;
00115 }
00116
00117 static int
00118 bitset_is_empty(BitSetRef bs)
00119 {
00120 int i;
00121 for (i = 0; i < BITSET_SIZE; i++) {
00122 if (bs[i] != 0) return 0;
00123 }
00124 return 1;
00125 }
00126
00127 #ifdef ONIG_DEBUG
00128 static int
00129 onig_is_prelude(void)
00130 {
00131 return !rb_const_defined(rb_cThread, rb_intern_const("MUTEX_FOR_THREAD_EXCLUSIVE"));
00132 }
00133
00134 static int
00135 bitset_on_num(BitSetRef bs)
00136 {
00137 int i, n;
00138
00139 n = 0;
00140 for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
00141 if (BITSET_AT(bs, i)) n++;
00142 }
00143 return n;
00144 }
00145 #endif
00146
00147 extern int
00148 onig_bbuf_init(BBuf* buf, OnigDistance size)
00149 {
00150 if (size <= 0) {
00151 size = 0;
00152 buf->p = NULL;
00153 }
00154 else {
00155 buf->p = (UChar* )xmalloc(size);
00156 if (IS_NULL(buf->p)) return(ONIGERR_MEMORY);
00157 }
00158
00159 buf->alloc = (unsigned int )size;
00160 buf->used = 0;
00161 return 0;
00162 }
00163
00164
00165 #ifdef USE_SUBEXP_CALL
00166
00167 static int
00168 unset_addr_list_init(UnsetAddrList* uslist, int size)
00169 {
00170 UnsetAddr* p;
00171
00172 p = (UnsetAddr* )xmalloc(sizeof(UnsetAddr)* size);
00173 CHECK_NULL_RETURN_MEMERR(p);
00174 uslist->num = 0;
00175 uslist->alloc = size;
00176 uslist->us = p;
00177 return 0;
00178 }
00179
00180 static void
00181 unset_addr_list_end(UnsetAddrList* uslist)
00182 {
00183 if (IS_NOT_NULL(uslist->us))
00184 xfree(uslist->us);
00185 }
00186
00187 static int
00188 unset_addr_list_add(UnsetAddrList* uslist, int offset, struct _Node* node)
00189 {
00190 UnsetAddr* p;
00191 int size;
00192
00193 if (uslist->num >= uslist->alloc) {
00194 size = uslist->alloc * 2;
00195 p = (UnsetAddr* )xrealloc(uslist->us, sizeof(UnsetAddr) * size);
00196 CHECK_NULL_RETURN_MEMERR(p);
00197 uslist->alloc = size;
00198 uslist->us = p;
00199 }
00200
00201 uslist->us[uslist->num].offset = offset;
00202 uslist->us[uslist->num].target = node;
00203 uslist->num++;
00204 return 0;
00205 }
00206 #endif
00207
00208
00209 static int
00210 add_opcode(regex_t* reg, int opcode)
00211 {
00212 BBUF_ADD1(reg, opcode);
00213 return 0;
00214 }
00215
00216 #ifdef USE_COMBINATION_EXPLOSION_CHECK
00217 static int
00218 add_state_check_num(regex_t* reg, int num)
00219 {
00220 StateCheckNumType n = (StateCheckNumType )num;
00221
00222 BBUF_ADD(reg, &n, SIZE_STATE_CHECK_NUM);
00223 return 0;
00224 }
00225 #endif
00226
00227 static int
00228 add_rel_addr(regex_t* reg, int addr)
00229 {
00230 RelAddrType ra = (RelAddrType )addr;
00231
00232 BBUF_ADD(reg, &ra, SIZE_RELADDR);
00233 return 0;
00234 }
00235
00236 static int
00237 add_abs_addr(regex_t* reg, int addr)
00238 {
00239 AbsAddrType ra = (AbsAddrType )addr;
00240
00241 BBUF_ADD(reg, &ra, SIZE_ABSADDR);
00242 return 0;
00243 }
00244
00245 static int
00246 add_length(regex_t* reg, OnigDistance len)
00247 {
00248 LengthType l = (LengthType )len;
00249
00250 BBUF_ADD(reg, &l, SIZE_LENGTH);
00251 return 0;
00252 }
00253
00254 static int
00255 add_mem_num(regex_t* reg, int num)
00256 {
00257 MemNumType n = (MemNumType )num;
00258
00259 BBUF_ADD(reg, &n, SIZE_MEMNUM);
00260 return 0;
00261 }
00262
00263 static int
00264 add_pointer(regex_t* reg, void* addr)
00265 {
00266 PointerType ptr = (PointerType )addr;
00267
00268 BBUF_ADD(reg, &ptr, SIZE_POINTER);
00269 return 0;
00270 }
00271
00272 static int
00273 add_option(regex_t* reg, OnigOptionType option)
00274 {
00275 BBUF_ADD(reg, &option, SIZE_OPTION);
00276 return 0;
00277 }
00278
00279 static int
00280 add_opcode_rel_addr(regex_t* reg, int opcode, int addr)
00281 {
00282 int r;
00283
00284 r = add_opcode(reg, opcode);
00285 if (r) return r;
00286 r = add_rel_addr(reg, addr);
00287 return r;
00288 }
00289
00290 static int
00291 add_bytes(regex_t* reg, UChar* bytes, OnigDistance len)
00292 {
00293 BBUF_ADD(reg, bytes, len);
00294 return 0;
00295 }
00296
00297 static int
00298 add_bitset(regex_t* reg, BitSetRef bs)
00299 {
00300 BBUF_ADD(reg, bs, SIZE_BITSET);
00301 return 0;
00302 }
00303
00304 static int
00305 add_opcode_option(regex_t* reg, int opcode, OnigOptionType option)
00306 {
00307 int r;
00308
00309 r = add_opcode(reg, opcode);
00310 if (r) return r;
00311 r = add_option(reg, option);
00312 return r;
00313 }
00314
00315 static int compile_length_tree(Node* node, regex_t* reg);
00316 static int compile_tree(Node* node, regex_t* reg);
00317
00318
00319 #define IS_NEED_STR_LEN_OP_EXACT(op) \
00320 ((op) == OP_EXACTN || (op) == OP_EXACTMB2N ||\
00321 (op) == OP_EXACTMB3N || (op) == OP_EXACTMBN || (op) == OP_EXACTN_IC)
00322
00323 static int
00324 select_str_opcode(int mb_len, OnigDistance str_len, int ignore_case)
00325 {
00326 int op;
00327
00328 if (ignore_case) {
00329 switch (str_len) {
00330 case 1: op = OP_EXACT1_IC; break;
00331 default: op = OP_EXACTN_IC; break;
00332 }
00333 }
00334 else {
00335 switch (mb_len) {
00336 case 1:
00337 switch (str_len) {
00338 case 1: op = OP_EXACT1; break;
00339 case 2: op = OP_EXACT2; break;
00340 case 3: op = OP_EXACT3; break;
00341 case 4: op = OP_EXACT4; break;
00342 case 5: op = OP_EXACT5; break;
00343 default: op = OP_EXACTN; break;
00344 }
00345 break;
00346
00347 case 2:
00348 switch (str_len) {
00349 case 1: op = OP_EXACTMB2N1; break;
00350 case 2: op = OP_EXACTMB2N2; break;
00351 case 3: op = OP_EXACTMB2N3; break;
00352 default: op = OP_EXACTMB2N; break;
00353 }
00354 break;
00355
00356 case 3:
00357 op = OP_EXACTMB3N;
00358 break;
00359
00360 default:
00361 op = OP_EXACTMBN;
00362 break;
00363 }
00364 }
00365 return op;
00366 }
00367
00368 static int
00369 compile_tree_empty_check(Node* node, regex_t* reg, int empty_info)
00370 {
00371 int r;
00372 int saved_num_null_check = reg->num_null_check;
00373
00374 if (empty_info != 0) {
00375 r = add_opcode(reg, OP_NULL_CHECK_START);
00376 if (r) return r;
00377 r = add_mem_num(reg, reg->num_null_check);
00378 if (r) return r;
00379 reg->num_null_check++;
00380 }
00381
00382 r = compile_tree(node, reg);
00383 if (r) return r;
00384
00385 if (empty_info != 0) {
00386 if (empty_info == NQ_TARGET_IS_EMPTY)
00387 r = add_opcode(reg, OP_NULL_CHECK_END);
00388 else if (empty_info == NQ_TARGET_IS_EMPTY_MEM)
00389 r = add_opcode(reg, OP_NULL_CHECK_END_MEMST);
00390 else if (empty_info == NQ_TARGET_IS_EMPTY_REC)
00391 r = add_opcode(reg, OP_NULL_CHECK_END_MEMST_PUSH);
00392
00393 if (r) return r;
00394 r = add_mem_num(reg, saved_num_null_check);
00395 }
00396 return r;
00397 }
00398
00399 #ifdef USE_SUBEXP_CALL
00400 static int
00401 compile_call(CallNode* node, regex_t* reg)
00402 {
00403 int r;
00404
00405 r = add_opcode(reg, OP_CALL);
00406 if (r) return r;
00407 r = unset_addr_list_add(node->unset_addr_list, BBUF_GET_OFFSET_POS(reg),
00408 node->target);
00409 if (r) return r;
00410 r = add_abs_addr(reg, 0 );
00411 return r;
00412 }
00413 #endif
00414
00415 static int
00416 compile_tree_n_times(Node* node, int n, regex_t* reg)
00417 {
00418 int i, r;
00419
00420 for (i = 0; i < n; i++) {
00421 r = compile_tree(node, reg);
00422 if (r) return r;
00423 }
00424 return 0;
00425 }
00426
00427 static int
00428 add_compile_string_length(UChar* s ARG_UNUSED, int mb_len, OnigDistance str_len,
00429 regex_t* reg ARG_UNUSED, int ignore_case)
00430 {
00431 int len;
00432 int op = select_str_opcode(mb_len, str_len, ignore_case);
00433
00434 len = SIZE_OPCODE;
00435
00436 if (op == OP_EXACTMBN) len += SIZE_LENGTH;
00437 if (IS_NEED_STR_LEN_OP_EXACT(op))
00438 len += SIZE_LENGTH;
00439
00440 len += mb_len * (int )str_len;
00441 return len;
00442 }
00443
00444 static int
00445 add_compile_string(UChar* s, int mb_len, OnigDistance str_len,
00446 regex_t* reg, int ignore_case)
00447 {
00448 int op = select_str_opcode(mb_len, str_len, ignore_case);
00449 add_opcode(reg, op);
00450
00451 if (op == OP_EXACTMBN)
00452 add_length(reg, mb_len);
00453
00454 if (IS_NEED_STR_LEN_OP_EXACT(op)) {
00455 if (op == OP_EXACTN_IC)
00456 add_length(reg, mb_len * str_len);
00457 else
00458 add_length(reg, str_len);
00459 }
00460
00461 add_bytes(reg, s, mb_len * str_len);
00462 return 0;
00463 }
00464
00465
00466 static int
00467 compile_length_string_node(Node* node, regex_t* reg)
00468 {
00469 int rlen, r, len, prev_len, slen, ambig;
00470 OnigEncoding enc = reg->enc;
00471 UChar *p, *prev;
00472 StrNode* sn;
00473
00474 sn = NSTR(node);
00475 if (sn->end <= sn->s)
00476 return 0;
00477
00478 ambig = NSTRING_IS_AMBIG(node);
00479
00480 p = prev = sn->s;
00481 prev_len = enclen(enc, p, sn->end);
00482 p += prev_len;
00483 slen = 1;
00484 rlen = 0;
00485
00486 for (; p < sn->end; ) {
00487 len = enclen(enc, p, sn->end);
00488 if (len == prev_len) {
00489 slen++;
00490 }
00491 else {
00492 r = add_compile_string_length(prev, prev_len, slen, reg, ambig);
00493 rlen += r;
00494 prev = p;
00495 slen = 1;
00496 prev_len = len;
00497 }
00498 p += len;
00499 }
00500 r = add_compile_string_length(prev, prev_len, slen, reg, ambig);
00501 rlen += r;
00502 return rlen;
00503 }
00504
00505 static int
00506 compile_length_string_raw_node(StrNode* sn, regex_t* reg)
00507 {
00508 if (sn->end <= sn->s)
00509 return 0;
00510
00511 return add_compile_string_length(sn->s, 1 , sn->end - sn->s, reg, 0);
00512 }
00513
00514 static int
00515 compile_string_node(Node* node, regex_t* reg)
00516 {
00517 int r, len, prev_len, slen, ambig;
00518 OnigEncoding enc = reg->enc;
00519 UChar *p, *prev, *end;
00520 StrNode* sn;
00521
00522 sn = NSTR(node);
00523 if (sn->end <= sn->s)
00524 return 0;
00525
00526 end = sn->end;
00527 ambig = NSTRING_IS_AMBIG(node);
00528
00529 p = prev = sn->s;
00530 prev_len = enclen(enc, p, end);
00531 p += prev_len;
00532 slen = 1;
00533
00534 for (; p < end; ) {
00535 len = enclen(enc, p, end);
00536 if (len == prev_len) {
00537 slen++;
00538 }
00539 else {
00540 r = add_compile_string(prev, prev_len, slen, reg, ambig);
00541 if (r) return r;
00542
00543 prev = p;
00544 slen = 1;
00545 prev_len = len;
00546 }
00547
00548 p += len;
00549 }
00550 return add_compile_string(prev, prev_len, slen, reg, ambig);
00551 }
00552
00553 static int
00554 compile_string_raw_node(StrNode* sn, regex_t* reg)
00555 {
00556 if (sn->end <= sn->s)
00557 return 0;
00558
00559 return add_compile_string(sn->s, 1 , sn->end - sn->s, reg, 0);
00560 }
00561
00562 static int
00563 add_multi_byte_cclass(BBuf* mbuf, regex_t* reg)
00564 {
00565 #ifdef PLATFORM_UNALIGNED_WORD_ACCESS
00566 add_length(reg, mbuf->used);
00567 return add_bytes(reg, mbuf->p, mbuf->used);
00568 #else
00569 int r, pad_size;
00570 UChar* p = BBUF_GET_ADD_ADDRESS(reg) + SIZE_LENGTH;
00571
00572 GET_ALIGNMENT_PAD_SIZE(p, pad_size);
00573 add_length(reg, mbuf->used + (WORD_ALIGNMENT_SIZE - 1));
00574 if (pad_size != 0) add_bytes(reg, PadBuf, pad_size);
00575
00576 r = add_bytes(reg, mbuf->p, mbuf->used);
00577
00578
00579 pad_size = (WORD_ALIGNMENT_SIZE - 1) - pad_size;
00580 if (pad_size != 0) add_bytes(reg, PadBuf, pad_size);
00581 return r;
00582 #endif
00583 }
00584
00585 static int
00586 compile_length_cclass_node(CClassNode* cc, regex_t* reg)
00587 {
00588 int len;
00589
00590 if (IS_NCCLASS_SHARE(cc)) {
00591 len = SIZE_OPCODE + SIZE_POINTER;
00592 return len;
00593 }
00594
00595 if (IS_NULL(cc->mbuf)) {
00596 len = SIZE_OPCODE + SIZE_BITSET;
00597 }
00598 else {
00599 if (ONIGENC_MBC_MINLEN(reg->enc) > 1 || bitset_is_empty(cc->bs)) {
00600 len = SIZE_OPCODE;
00601 }
00602 else {
00603 len = SIZE_OPCODE + SIZE_BITSET;
00604 }
00605 #ifdef PLATFORM_UNALIGNED_WORD_ACCESS
00606 len += SIZE_LENGTH + cc->mbuf->used;
00607 #else
00608 len += SIZE_LENGTH + cc->mbuf->used + (WORD_ALIGNMENT_SIZE - 1);
00609 #endif
00610 }
00611
00612 return len;
00613 }
00614
00615 static int
00616 compile_cclass_node(CClassNode* cc, regex_t* reg)
00617 {
00618 int r;
00619
00620 if (IS_NCCLASS_SHARE(cc)) {
00621 add_opcode(reg, OP_CCLASS_NODE);
00622 r = add_pointer(reg, cc);
00623 return r;
00624 }
00625
00626 if (IS_NULL(cc->mbuf)) {
00627 if (IS_NCCLASS_NOT(cc))
00628 add_opcode(reg, OP_CCLASS_NOT);
00629 else
00630 add_opcode(reg, OP_CCLASS);
00631
00632 r = add_bitset(reg, cc->bs);
00633 }
00634 else {
00635 if (ONIGENC_MBC_MINLEN(reg->enc) > 1 || bitset_is_empty(cc->bs)) {
00636 if (IS_NCCLASS_NOT(cc))
00637 add_opcode(reg, OP_CCLASS_MB_NOT);
00638 else
00639 add_opcode(reg, OP_CCLASS_MB);
00640
00641 r = add_multi_byte_cclass(cc->mbuf, reg);
00642 }
00643 else {
00644 if (IS_NCCLASS_NOT(cc))
00645 add_opcode(reg, OP_CCLASS_MIX_NOT);
00646 else
00647 add_opcode(reg, OP_CCLASS_MIX);
00648
00649 r = add_bitset(reg, cc->bs);
00650 if (r) return r;
00651 r = add_multi_byte_cclass(cc->mbuf, reg);
00652 }
00653 }
00654
00655 return r;
00656 }
00657
00658 static int
00659 entry_repeat_range(regex_t* reg, int id, int lower, int upper)
00660 {
00661 #define REPEAT_RANGE_ALLOC 4
00662
00663 OnigRepeatRange* p;
00664
00665 if (reg->repeat_range_alloc == 0) {
00666 p = (OnigRepeatRange* )xmalloc(sizeof(OnigRepeatRange) * REPEAT_RANGE_ALLOC);
00667 CHECK_NULL_RETURN_MEMERR(p);
00668 reg->repeat_range = p;
00669 reg->repeat_range_alloc = REPEAT_RANGE_ALLOC;
00670 }
00671 else if (reg->repeat_range_alloc <= id) {
00672 int n;
00673 n = reg->repeat_range_alloc + REPEAT_RANGE_ALLOC;
00674 p = (OnigRepeatRange* )xrealloc(reg->repeat_range,
00675 sizeof(OnigRepeatRange) * n);
00676 CHECK_NULL_RETURN_MEMERR(p);
00677 reg->repeat_range = p;
00678 reg->repeat_range_alloc = n;
00679 }
00680 else {
00681 p = reg->repeat_range;
00682 }
00683
00684 p[id].lower = lower;
00685 p[id].upper = (IS_REPEAT_INFINITE(upper) ? 0x7fffffff : upper);
00686 return 0;
00687 }
00688
00689 static int
00690 compile_range_repeat_node(QtfrNode* qn, int target_len, int empty_info,
00691 regex_t* reg)
00692 {
00693 int r;
00694 int num_repeat = reg->num_repeat;
00695
00696 r = add_opcode(reg, qn->greedy ? OP_REPEAT : OP_REPEAT_NG);
00697 if (r) return r;
00698 r = add_mem_num(reg, num_repeat);
00699 reg->num_repeat++;
00700 if (r) return r;
00701 r = add_rel_addr(reg, target_len + SIZE_OP_REPEAT_INC);
00702 if (r) return r;
00703
00704 r = entry_repeat_range(reg, num_repeat, qn->lower, qn->upper);
00705 if (r) return r;
00706
00707 r = compile_tree_empty_check(qn->target, reg, empty_info);
00708 if (r) return r;
00709
00710 if (
00711 #ifdef USE_SUBEXP_CALL
00712 reg->num_call > 0 ||
00713 #endif
00714 IS_QUANTIFIER_IN_REPEAT(qn)) {
00715 r = add_opcode(reg, qn->greedy ? OP_REPEAT_INC_SG : OP_REPEAT_INC_NG_SG);
00716 }
00717 else {
00718 r = add_opcode(reg, qn->greedy ? OP_REPEAT_INC : OP_REPEAT_INC_NG);
00719 }
00720 if (r) return r;
00721 r = add_mem_num(reg, num_repeat);
00722 return r;
00723 }
00724
00725 static int
00726 is_anychar_star_quantifier(QtfrNode* qn)
00727 {
00728 if (qn->greedy && IS_REPEAT_INFINITE(qn->upper) &&
00729 NTYPE(qn->target) == NT_CANY)
00730 return 1;
00731 else
00732 return 0;
00733 }
00734
00735 #define QUANTIFIER_EXPAND_LIMIT_SIZE 50
00736 #define CKN_ON (ckn > 0)
00737
00738 #ifdef USE_COMBINATION_EXPLOSION_CHECK
00739
00740 static int
00741 compile_length_quantifier_node(QtfrNode* qn, regex_t* reg)
00742 {
00743 int len, mod_tlen, cklen;
00744 int ckn;
00745 int infinite = IS_REPEAT_INFINITE(qn->upper);
00746 int empty_info = qn->target_empty_info;
00747 int tlen = compile_length_tree(qn->target, reg);
00748
00749 if (tlen < 0) return tlen;
00750
00751 ckn = ((reg->num_comb_exp_check > 0) ? qn->comb_exp_check_num : 0);
00752
00753 cklen = (CKN_ON ? SIZE_STATE_CHECK_NUM: 0);
00754
00755
00756 if (NTYPE(qn->target) == NT_CANY) {
00757 if (qn->greedy && infinite) {
00758 if (IS_NOT_NULL(qn->next_head_exact) && !CKN_ON)
00759 return SIZE_OP_ANYCHAR_STAR_PEEK_NEXT + tlen * qn->lower + cklen;
00760 else
00761 return SIZE_OP_ANYCHAR_STAR + tlen * qn->lower + cklen;
00762 }
00763 }
00764
00765 if (empty_info != 0)
00766 mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);
00767 else
00768 mod_tlen = tlen;
00769
00770 if (infinite && qn->lower <= 1) {
00771 if (qn->greedy) {
00772 if (qn->lower == 1)
00773 len = SIZE_OP_JUMP;
00774 else
00775 len = 0;
00776
00777 len += SIZE_OP_PUSH + cklen + mod_tlen + SIZE_OP_JUMP;
00778 }
00779 else {
00780 if (qn->lower == 0)
00781 len = SIZE_OP_JUMP;
00782 else
00783 len = 0;
00784
00785 len += mod_tlen + SIZE_OP_PUSH + cklen;
00786 }
00787 }
00788 else if (qn->upper == 0) {
00789 if (qn->is_refered != 0)
00790 len = SIZE_OP_JUMP + tlen;
00791 else
00792 len = 0;
00793 }
00794 else if (qn->upper == 1 && qn->greedy) {
00795 if (qn->lower == 0) {
00796 if (CKN_ON) {
00797 len = SIZE_OP_STATE_CHECK_PUSH + tlen;
00798 }
00799 else {
00800 len = SIZE_OP_PUSH + tlen;
00801 }
00802 }
00803 else {
00804 len = tlen;
00805 }
00806 }
00807 else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) {
00808 len = SIZE_OP_PUSH + cklen + SIZE_OP_JUMP + tlen;
00809 }
00810 else {
00811 len = SIZE_OP_REPEAT_INC
00812 + mod_tlen + SIZE_OPCODE + SIZE_RELADDR + SIZE_MEMNUM;
00813 if (CKN_ON)
00814 len += SIZE_OP_STATE_CHECK;
00815 }
00816
00817 return len;
00818 }
00819
00820 static int
00821 compile_quantifier_node(QtfrNode* qn, regex_t* reg)
00822 {
00823 int r, mod_tlen;
00824 int ckn;
00825 int infinite = IS_REPEAT_INFINITE(qn->upper);
00826 int empty_info = qn->target_empty_info;
00827 int tlen = compile_length_tree(qn->target, reg);
00828
00829 if (tlen < 0) return tlen;
00830
00831 ckn = ((reg->num_comb_exp_check > 0) ? qn->comb_exp_check_num : 0);
00832
00833 if (is_anychar_star_quantifier(qn)) {
00834 r = compile_tree_n_times(qn->target, qn->lower, reg);
00835 if (r) return r;
00836 if (IS_NOT_NULL(qn->next_head_exact) && !CKN_ON) {
00837 if (IS_MULTILINE(reg->options))
00838 r = add_opcode(reg, OP_ANYCHAR_ML_STAR_PEEK_NEXT);
00839 else
00840 r = add_opcode(reg, OP_ANYCHAR_STAR_PEEK_NEXT);
00841 if (r) return r;
00842 if (CKN_ON) {
00843 r = add_state_check_num(reg, ckn);
00844 if (r) return r;
00845 }
00846
00847 return add_bytes(reg, NSTR(qn->next_head_exact)->s, 1);
00848 }
00849 else {
00850 if (IS_MULTILINE(reg->options)) {
00851 r = add_opcode(reg, (CKN_ON ?
00852 OP_STATE_CHECK_ANYCHAR_ML_STAR
00853 : OP_ANYCHAR_ML_STAR));
00854 }
00855 else {
00856 r = add_opcode(reg, (CKN_ON ?
00857 OP_STATE_CHECK_ANYCHAR_STAR
00858 : OP_ANYCHAR_STAR));
00859 }
00860 if (r) return r;
00861 if (CKN_ON)
00862 r = add_state_check_num(reg, ckn);
00863
00864 return r;
00865 }
00866 }
00867
00868 if (empty_info != 0)
00869 mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);
00870 else
00871 mod_tlen = tlen;
00872
00873 if (infinite && qn->lower <= 1) {
00874 if (qn->greedy) {
00875 if (qn->lower == 1) {
00876 r = add_opcode_rel_addr(reg, OP_JUMP,
00877 (CKN_ON ? SIZE_OP_STATE_CHECK_PUSH : SIZE_OP_PUSH));
00878 if (r) return r;
00879 }
00880
00881 if (CKN_ON) {
00882 r = add_opcode(reg, OP_STATE_CHECK_PUSH);
00883 if (r) return r;
00884 r = add_state_check_num(reg, ckn);
00885 if (r) return r;
00886 r = add_rel_addr(reg, mod_tlen + SIZE_OP_JUMP);
00887 }
00888 else {
00889 r = add_opcode_rel_addr(reg, OP_PUSH, mod_tlen + SIZE_OP_JUMP);
00890 }
00891 if (r) return r;
00892 r = compile_tree_empty_check(qn->target, reg, empty_info);
00893 if (r) return r;
00894 r = add_opcode_rel_addr(reg, OP_JUMP,
00895 -(mod_tlen + (int )SIZE_OP_JUMP
00896 + (int )(CKN_ON ? SIZE_OP_STATE_CHECK_PUSH : SIZE_OP_PUSH)));
00897 }
00898 else {
00899 if (qn->lower == 0) {
00900 r = add_opcode_rel_addr(reg, OP_JUMP, mod_tlen);
00901 if (r) return r;
00902 }
00903 r = compile_tree_empty_check(qn->target, reg, empty_info);
00904 if (r) return r;
00905 if (CKN_ON) {
00906 r = add_opcode(reg, OP_STATE_CHECK_PUSH_OR_JUMP);
00907 if (r) return r;
00908 r = add_state_check_num(reg, ckn);
00909 if (r) return r;
00910 r = add_rel_addr(reg,
00911 -(mod_tlen + (int )SIZE_OP_STATE_CHECK_PUSH_OR_JUMP));
00912 }
00913 else
00914 r = add_opcode_rel_addr(reg, OP_PUSH, -(mod_tlen + (int )SIZE_OP_PUSH));
00915 }
00916 }
00917 else if (qn->upper == 0) {
00918 if (qn->is_refered != 0) {
00919 r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
00920 if (r) return r;
00921 r = compile_tree(qn->target, reg);
00922 }
00923 else
00924 r = 0;
00925 }
00926 else if (qn->upper == 1 && qn->greedy) {
00927 if (qn->lower == 0) {
00928 if (CKN_ON) {
00929 r = add_opcode(reg, OP_STATE_CHECK_PUSH);
00930 if (r) return r;
00931 r = add_state_check_num(reg, ckn);
00932 if (r) return r;
00933 r = add_rel_addr(reg, tlen);
00934 }
00935 else {
00936 r = add_opcode_rel_addr(reg, OP_PUSH, tlen);
00937 }
00938 if (r) return r;
00939 }
00940
00941 r = compile_tree(qn->target, reg);
00942 }
00943 else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) {
00944 if (CKN_ON) {
00945 r = add_opcode(reg, OP_STATE_CHECK_PUSH);
00946 if (r) return r;
00947 r = add_state_check_num(reg, ckn);
00948 if (r) return r;
00949 r = add_rel_addr(reg, SIZE_OP_JUMP);
00950 }
00951 else {
00952 r = add_opcode_rel_addr(reg, OP_PUSH, SIZE_OP_JUMP);
00953 }
00954
00955 if (r) return r;
00956 r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
00957 if (r) return r;
00958 r = compile_tree(qn->target, reg);
00959 }
00960 else {
00961 r = compile_range_repeat_node(qn, mod_tlen, empty_info, reg);
00962 if (CKN_ON) {
00963 if (r) return r;
00964 r = add_opcode(reg, OP_STATE_CHECK);
00965 if (r) return r;
00966 r = add_state_check_num(reg, ckn);
00967 }
00968 }
00969 return r;
00970 }
00971
00972 #else
00973
00974 static int
00975 compile_length_quantifier_node(QtfrNode* qn, regex_t* reg)
00976 {
00977 int len, mod_tlen;
00978 int infinite = IS_REPEAT_INFINITE(qn->upper);
00979 int empty_info = qn->target_empty_info;
00980 int tlen = compile_length_tree(qn->target, reg);
00981
00982 if (tlen < 0) return tlen;
00983
00984
00985 if (NTYPE(qn->target) == NT_CANY) {
00986 if (qn->greedy && infinite) {
00987 if (IS_NOT_NULL(qn->next_head_exact))
00988 return SIZE_OP_ANYCHAR_STAR_PEEK_NEXT + tlen * qn->lower;
00989 else
00990 return SIZE_OP_ANYCHAR_STAR + tlen * qn->lower;
00991 }
00992 }
00993
00994 if (empty_info != 0)
00995 mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);
00996 else
00997 mod_tlen = tlen;
00998
00999 if (infinite &&
01000 (qn->lower <= 1 || tlen * qn->lower <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
01001 if (qn->lower == 1 && tlen > QUANTIFIER_EXPAND_LIMIT_SIZE) {
01002 len = SIZE_OP_JUMP;
01003 }
01004 else {
01005 len = tlen * qn->lower;
01006 }
01007
01008 if (qn->greedy) {
01009 if (IS_NOT_NULL(qn->head_exact))
01010 len += SIZE_OP_PUSH_OR_JUMP_EXACT1 + mod_tlen + SIZE_OP_JUMP;
01011 else if (IS_NOT_NULL(qn->next_head_exact))
01012 len += SIZE_OP_PUSH_IF_PEEK_NEXT + mod_tlen + SIZE_OP_JUMP;
01013 else
01014 len += SIZE_OP_PUSH + mod_tlen + SIZE_OP_JUMP;
01015 }
01016 else
01017 len += SIZE_OP_JUMP + mod_tlen + SIZE_OP_PUSH;
01018 }
01019 else if (qn->upper == 0 && qn->is_refered != 0) {
01020 len = SIZE_OP_JUMP + tlen;
01021 }
01022 else if (!infinite && qn->greedy &&
01023 (qn->upper == 1 || (tlen + SIZE_OP_PUSH) * qn->upper
01024 <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
01025 len = tlen * qn->lower;
01026 len += (SIZE_OP_PUSH + tlen) * (qn->upper - qn->lower);
01027 }
01028 else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) {
01029 len = SIZE_OP_PUSH + SIZE_OP_JUMP + tlen;
01030 }
01031 else {
01032 len = SIZE_OP_REPEAT_INC
01033 + mod_tlen + SIZE_OPCODE + SIZE_RELADDR + SIZE_MEMNUM;
01034 }
01035
01036 return len;
01037 }
01038
01039 static int
01040 compile_quantifier_node(QtfrNode* qn, regex_t* reg)
01041 {
01042 int i, r, mod_tlen;
01043 int infinite = IS_REPEAT_INFINITE(qn->upper);
01044 int empty_info = qn->target_empty_info;
01045 int tlen = compile_length_tree(qn->target, reg);
01046
01047 if (tlen < 0) return tlen;
01048
01049 if (is_anychar_star_quantifier(qn)) {
01050 r = compile_tree_n_times(qn->target, qn->lower, reg);
01051 if (r) return r;
01052 if (IS_NOT_NULL(qn->next_head_exact)) {
01053 if (IS_MULTILINE(reg->options))
01054 r = add_opcode(reg, OP_ANYCHAR_ML_STAR_PEEK_NEXT);
01055 else
01056 r = add_opcode(reg, OP_ANYCHAR_STAR_PEEK_NEXT);
01057 if (r) return r;
01058 return add_bytes(reg, NSTR(qn->next_head_exact)->s, 1);
01059 }
01060 else {
01061 if (IS_MULTILINE(reg->options))
01062 return add_opcode(reg, OP_ANYCHAR_ML_STAR);
01063 else
01064 return add_opcode(reg, OP_ANYCHAR_STAR);
01065 }
01066 }
01067
01068 if (empty_info != 0)
01069 mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);
01070 else
01071 mod_tlen = tlen;
01072
01073 if (infinite &&
01074 (qn->lower <= 1 || tlen * qn->lower <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
01075 if (qn->lower == 1 && tlen > QUANTIFIER_EXPAND_LIMIT_SIZE) {
01076 if (qn->greedy) {
01077 if (IS_NOT_NULL(qn->head_exact))
01078 r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH_OR_JUMP_EXACT1);
01079 else if (IS_NOT_NULL(qn->next_head_exact))
01080 r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH_IF_PEEK_NEXT);
01081 else
01082 r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH);
01083 }
01084 else {
01085 r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_JUMP);
01086 }
01087 if (r) return r;
01088 }
01089 else {
01090 r = compile_tree_n_times(qn->target, qn->lower, reg);
01091 if (r) return r;
01092 }
01093
01094 if (qn->greedy) {
01095 if (IS_NOT_NULL(qn->head_exact)) {
01096 r = add_opcode_rel_addr(reg, OP_PUSH_OR_JUMP_EXACT1,
01097 mod_tlen + SIZE_OP_JUMP);
01098 if (r) return r;
01099 add_bytes(reg, NSTR(qn->head_exact)->s, 1);
01100 r = compile_tree_empty_check(qn->target, reg, empty_info);
01101 if (r) return r;
01102 r = add_opcode_rel_addr(reg, OP_JUMP,
01103 -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH_OR_JUMP_EXACT1));
01104 }
01105 else if (IS_NOT_NULL(qn->next_head_exact)) {
01106 r = add_opcode_rel_addr(reg, OP_PUSH_IF_PEEK_NEXT,
01107 mod_tlen + SIZE_OP_JUMP);
01108 if (r) return r;
01109 add_bytes(reg, NSTR(qn->next_head_exact)->s, 1);
01110 r = compile_tree_empty_check(qn->target, reg, empty_info);
01111 if (r) return r;
01112 r = add_opcode_rel_addr(reg, OP_JUMP,
01113 -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH_IF_PEEK_NEXT));
01114 }
01115 else {
01116 r = add_opcode_rel_addr(reg, OP_PUSH, mod_tlen + SIZE_OP_JUMP);
01117 if (r) return r;
01118 r = compile_tree_empty_check(qn->target, reg, empty_info);
01119 if (r) return r;
01120 r = add_opcode_rel_addr(reg, OP_JUMP,
01121 -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH));
01122 }
01123 }
01124 else {
01125 r = add_opcode_rel_addr(reg, OP_JUMP, mod_tlen);
01126 if (r) return r;
01127 r = compile_tree_empty_check(qn->target, reg, empty_info);
01128 if (r) return r;
01129 r = add_opcode_rel_addr(reg, OP_PUSH, -(mod_tlen + (int )SIZE_OP_PUSH));
01130 }
01131 }
01132 else if (qn->upper == 0 && qn->is_refered != 0) {
01133 r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
01134 if (r) return r;
01135 r = compile_tree(qn->target, reg);
01136 }
01137 else if (!infinite && qn->greedy &&
01138 (qn->upper == 1 || (tlen + SIZE_OP_PUSH) * qn->upper
01139 <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
01140 int n = qn->upper - qn->lower;
01141
01142 r = compile_tree_n_times(qn->target, qn->lower, reg);
01143 if (r) return r;
01144
01145 for (i = 0; i < n; i++) {
01146 r = add_opcode_rel_addr(reg, OP_PUSH,
01147 (n - i) * tlen + (n - i - 1) * SIZE_OP_PUSH);
01148 if (r) return r;
01149 r = compile_tree(qn->target, reg);
01150 if (r) return r;
01151 }
01152 }
01153 else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) {
01154 r = add_opcode_rel_addr(reg, OP_PUSH, SIZE_OP_JUMP);
01155 if (r) return r;
01156 r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
01157 if (r) return r;
01158 r = compile_tree(qn->target, reg);
01159 }
01160 else {
01161 r = compile_range_repeat_node(qn, mod_tlen, empty_info, reg);
01162 }
01163 return r;
01164 }
01165 #endif
01166
01167 static int
01168 compile_length_option_node(EncloseNode* node, regex_t* reg)
01169 {
01170 int tlen;
01171 OnigOptionType prev = reg->options;
01172
01173 reg->options = node->option;
01174 tlen = compile_length_tree(node->target, reg);
01175 reg->options = prev;
01176
01177 if (tlen < 0) return tlen;
01178
01179 if (IS_DYNAMIC_OPTION(prev ^ node->option)) {
01180 return SIZE_OP_SET_OPTION_PUSH + SIZE_OP_SET_OPTION + SIZE_OP_FAIL
01181 + tlen + SIZE_OP_SET_OPTION;
01182 }
01183 else
01184 return tlen;
01185 }
01186
01187 static int
01188 compile_option_node(EncloseNode* node, regex_t* reg)
01189 {
01190 int r;
01191 OnigOptionType prev = reg->options;
01192
01193 if (IS_DYNAMIC_OPTION(prev ^ node->option)) {
01194 r = add_opcode_option(reg, OP_SET_OPTION_PUSH, node->option);
01195 if (r) return r;
01196 r = add_opcode_option(reg, OP_SET_OPTION, prev);
01197 if (r) return r;
01198 r = add_opcode(reg, OP_FAIL);
01199 if (r) return r;
01200 }
01201
01202 reg->options = node->option;
01203 r = compile_tree(node->target, reg);
01204 reg->options = prev;
01205
01206 if (IS_DYNAMIC_OPTION(prev ^ node->option)) {
01207 if (r) return r;
01208 r = add_opcode_option(reg, OP_SET_OPTION, prev);
01209 }
01210 return r;
01211 }
01212
01213 static int
01214 compile_length_enclose_node(EncloseNode* node, regex_t* reg)
01215 {
01216 int len;
01217 int tlen;
01218
01219 if (node->type == ENCLOSE_OPTION)
01220 return compile_length_option_node(node, reg);
01221
01222 if (node->target) {
01223 tlen = compile_length_tree(node->target, reg);
01224 if (tlen < 0) return tlen;
01225 }
01226 else
01227 tlen = 0;
01228
01229 switch (node->type) {
01230 case ENCLOSE_MEMORY:
01231 #ifdef USE_SUBEXP_CALL
01232 if (IS_ENCLOSE_CALLED(node)) {
01233 len = SIZE_OP_MEMORY_START_PUSH + tlen
01234 + SIZE_OP_CALL + SIZE_OP_JUMP + SIZE_OP_RETURN;
01235 if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
01236 len += (IS_ENCLOSE_RECURSION(node)
01237 ? SIZE_OP_MEMORY_END_PUSH_REC : SIZE_OP_MEMORY_END_PUSH);
01238 else
01239 len += (IS_ENCLOSE_RECURSION(node)
01240 ? SIZE_OP_MEMORY_END_REC : SIZE_OP_MEMORY_END);
01241 }
01242 else
01243 #endif
01244 {
01245 if (BIT_STATUS_AT(reg->bt_mem_start, node->regnum))
01246 len = SIZE_OP_MEMORY_START_PUSH;
01247 else
01248 len = SIZE_OP_MEMORY_START;
01249
01250 len += tlen + (BIT_STATUS_AT(reg->bt_mem_end, node->regnum)
01251 ? SIZE_OP_MEMORY_END_PUSH : SIZE_OP_MEMORY_END);
01252 }
01253 break;
01254
01255 case ENCLOSE_STOP_BACKTRACK:
01256 if (IS_ENCLOSE_STOP_BT_SIMPLE_REPEAT(node)) {
01257 QtfrNode* qn = NQTFR(node->target);
01258 tlen = compile_length_tree(qn->target, reg);
01259 if (tlen < 0) return tlen;
01260
01261 len = tlen * qn->lower
01262 + SIZE_OP_PUSH + tlen + SIZE_OP_POP + SIZE_OP_JUMP;
01263 }
01264 else {
01265 len = SIZE_OP_PUSH_STOP_BT + tlen + SIZE_OP_POP_STOP_BT;
01266 }
01267 break;
01268
01269 case ENCLOSE_CONDITION:
01270 len = SIZE_OP_CONDITION;
01271 if (NTYPE(node->target) == NT_ALT) {
01272 Node* x = node->target;
01273
01274 tlen = compile_length_tree(NCAR(x), reg);
01275 if (tlen < 0) return tlen;
01276 len += tlen + SIZE_OP_JUMP;
01277 if (NCDR(x) == NULL) return ONIGERR_PARSER_BUG;
01278 x = NCDR(x);
01279 tlen = compile_length_tree(NCAR(x), reg);
01280 if (tlen < 0) return tlen;
01281 len += tlen;
01282 if (NCDR(x) != NULL) return ONIGERR_INVALID_CONDITION_PATTERN;
01283 }
01284 else {
01285 return ONIGERR_PARSER_BUG;
01286 }
01287 break;
01288
01289 default:
01290 return ONIGERR_TYPE_BUG;
01291 break;
01292 }
01293
01294 return len;
01295 }
01296
01297 static int get_char_length_tree(Node* node, regex_t* reg, int* len);
01298
01299 static int
01300 compile_enclose_node(EncloseNode* node, regex_t* reg)
01301 {
01302 int r, len;
01303
01304 if (node->type == ENCLOSE_OPTION)
01305 return compile_option_node(node, reg);
01306
01307 switch (node->type) {
01308 case ENCLOSE_MEMORY:
01309 #ifdef USE_SUBEXP_CALL
01310 if (IS_ENCLOSE_CALLED(node)) {
01311 r = add_opcode(reg, OP_CALL);
01312 if (r) return r;
01313 node->call_addr = BBUF_GET_OFFSET_POS(reg) + SIZE_ABSADDR + SIZE_OP_JUMP;
01314 node->state |= NST_ADDR_FIXED;
01315 r = add_abs_addr(reg, (int )node->call_addr);
01316 if (r) return r;
01317 len = compile_length_tree(node->target, reg);
01318 len += (SIZE_OP_MEMORY_START_PUSH + SIZE_OP_RETURN);
01319 if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
01320 len += (IS_ENCLOSE_RECURSION(node)
01321 ? SIZE_OP_MEMORY_END_PUSH_REC : SIZE_OP_MEMORY_END_PUSH);
01322 else
01323 len += (IS_ENCLOSE_RECURSION(node)
01324 ? SIZE_OP_MEMORY_END_REC : SIZE_OP_MEMORY_END);
01325
01326 r = add_opcode_rel_addr(reg, OP_JUMP, len);
01327 if (r) return r;
01328 }
01329 #endif
01330 if (BIT_STATUS_AT(reg->bt_mem_start, node->regnum))
01331 r = add_opcode(reg, OP_MEMORY_START_PUSH);
01332 else
01333 r = add_opcode(reg, OP_MEMORY_START);
01334 if (r) return r;
01335 r = add_mem_num(reg, node->regnum);
01336 if (r) return r;
01337 r = compile_tree(node->target, reg);
01338 if (r) return r;
01339 #ifdef USE_SUBEXP_CALL
01340 if (IS_ENCLOSE_CALLED(node)) {
01341 if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
01342 r = add_opcode(reg, (IS_ENCLOSE_RECURSION(node)
01343 ? OP_MEMORY_END_PUSH_REC : OP_MEMORY_END_PUSH));
01344 else
01345 r = add_opcode(reg, (IS_ENCLOSE_RECURSION(node)
01346 ? OP_MEMORY_END_REC : OP_MEMORY_END));
01347
01348 if (r) return r;
01349 r = add_mem_num(reg, node->regnum);
01350 if (r) return r;
01351 r = add_opcode(reg, OP_RETURN);
01352 }
01353 else
01354 #endif
01355 {
01356 if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
01357 r = add_opcode(reg, OP_MEMORY_END_PUSH);
01358 else
01359 r = add_opcode(reg, OP_MEMORY_END);
01360 if (r) return r;
01361 r = add_mem_num(reg, node->regnum);
01362 }
01363 break;
01364
01365 case ENCLOSE_STOP_BACKTRACK:
01366 if (IS_ENCLOSE_STOP_BT_SIMPLE_REPEAT(node)) {
01367 QtfrNode* qn = NQTFR(node->target);
01368 r = compile_tree_n_times(qn->target, qn->lower, reg);
01369 if (r) return r;
01370
01371 len = compile_length_tree(qn->target, reg);
01372 if (len < 0) return len;
01373
01374 r = add_opcode_rel_addr(reg, OP_PUSH, len + SIZE_OP_POP + SIZE_OP_JUMP);
01375 if (r) return r;
01376 r = compile_tree(qn->target, reg);
01377 if (r) return r;
01378 r = add_opcode(reg, OP_POP);
01379 if (r) return r;
01380 r = add_opcode_rel_addr(reg, OP_JUMP,
01381 -((int )SIZE_OP_PUSH + len + (int )SIZE_OP_POP + (int )SIZE_OP_JUMP));
01382 }
01383 else {
01384 r = add_opcode(reg, OP_PUSH_STOP_BT);
01385 if (r) return r;
01386 r = compile_tree(node->target, reg);
01387 if (r) return r;
01388 r = add_opcode(reg, OP_POP_STOP_BT);
01389 }
01390 break;
01391
01392 case ENCLOSE_CONDITION:
01393 r = add_opcode(reg, OP_CONDITION);
01394 if (r) return r;
01395 r = add_mem_num(reg, node->regnum);
01396 if (r) return r;
01397
01398 if (NTYPE(node->target) == NT_ALT) {
01399 Node* x = node->target;
01400 int len2;
01401
01402 len = compile_length_tree(NCAR(x), reg);
01403 if (len < 0) return len;
01404 if (NCDR(x) == NULL) return ONIGERR_PARSER_BUG;
01405 x = NCDR(x);
01406 len2 = compile_length_tree(NCAR(x), reg);
01407 if (len2 < 0) return len2;
01408 if (NCDR(x) != NULL) return ONIGERR_INVALID_CONDITION_PATTERN;
01409
01410 x = node->target;
01411 r = add_rel_addr(reg, len + SIZE_OP_JUMP);
01412 if (r) return r;
01413 r = compile_tree(NCAR(x), reg);
01414 if (r) return r;
01415 r = add_opcode_rel_addr(reg, OP_JUMP, len2);
01416 if (r) return r;
01417 x = NCDR(x);
01418 r = compile_tree(NCAR(x), reg);
01419 }
01420 else {
01421 return ONIGERR_PARSER_BUG;
01422 }
01423 break;
01424
01425 default:
01426 return ONIGERR_TYPE_BUG;
01427 break;
01428 }
01429
01430 return r;
01431 }
01432
01433 static int
01434 compile_length_anchor_node(AnchorNode* node, regex_t* reg)
01435 {
01436 int len;
01437 int tlen = 0;
01438
01439 if (node->target) {
01440 tlen = compile_length_tree(node->target, reg);
01441 if (tlen < 0) return tlen;
01442 }
01443
01444 switch (node->type) {
01445 case ANCHOR_PREC_READ:
01446 len = SIZE_OP_PUSH_POS + tlen + SIZE_OP_POP_POS;
01447 break;
01448 case ANCHOR_PREC_READ_NOT:
01449 len = SIZE_OP_PUSH_POS_NOT + tlen + SIZE_OP_FAIL_POS;
01450 break;
01451 case ANCHOR_LOOK_BEHIND:
01452 len = SIZE_OP_LOOK_BEHIND + tlen;
01453 break;
01454 case ANCHOR_LOOK_BEHIND_NOT:
01455 len = SIZE_OP_PUSH_LOOK_BEHIND_NOT + tlen + SIZE_OP_FAIL_LOOK_BEHIND_NOT;
01456 break;
01457
01458 default:
01459 len = SIZE_OPCODE;
01460 break;
01461 }
01462
01463 return len;
01464 }
01465
01466 static int
01467 compile_anchor_node(AnchorNode* node, regex_t* reg)
01468 {
01469 int r, len;
01470
01471 switch (node->type) {
01472 case ANCHOR_BEGIN_BUF: r = add_opcode(reg, OP_BEGIN_BUF); break;
01473 case ANCHOR_END_BUF: r = add_opcode(reg, OP_END_BUF); break;
01474 case ANCHOR_BEGIN_LINE: r = add_opcode(reg, OP_BEGIN_LINE); break;
01475 case ANCHOR_END_LINE: r = add_opcode(reg, OP_END_LINE); break;
01476 case ANCHOR_SEMI_END_BUF: r = add_opcode(reg, OP_SEMI_END_BUF); break;
01477 case ANCHOR_BEGIN_POSITION: r = add_opcode(reg, OP_BEGIN_POSITION); break;
01478
01479
01480 case ANCHOR_ANYCHAR_STAR: r = add_opcode(reg, OP_BEGIN_POS_OR_LINE); break;
01481
01482 case ANCHOR_WORD_BOUND:
01483 if (node->ascii_range) r = add_opcode(reg, OP_ASCII_WORD_BOUND);
01484 else r = add_opcode(reg, OP_WORD_BOUND);
01485 break;
01486 case ANCHOR_NOT_WORD_BOUND:
01487 if (node->ascii_range) r = add_opcode(reg, OP_NOT_ASCII_WORD_BOUND);
01488 else r = add_opcode(reg, OP_NOT_WORD_BOUND);
01489 break;
01490 #ifdef USE_WORD_BEGIN_END
01491 case ANCHOR_WORD_BEGIN:
01492 if (node->ascii_range) r = add_opcode(reg, OP_ASCII_WORD_BEGIN);
01493 else r = add_opcode(reg, OP_WORD_BEGIN);
01494 break;
01495 case ANCHOR_WORD_END:
01496 if (node->ascii_range) r = add_opcode(reg, OP_ASCII_WORD_END);
01497 else r = add_opcode(reg, OP_WORD_END);
01498 break;
01499 #endif
01500 case ANCHOR_KEEP: r = add_opcode(reg, OP_KEEP); break;
01501
01502 case ANCHOR_PREC_READ:
01503 r = add_opcode(reg, OP_PUSH_POS);
01504 if (r) return r;
01505 r = compile_tree(node->target, reg);
01506 if (r) return r;
01507 r = add_opcode(reg, OP_POP_POS);
01508 break;
01509
01510 case ANCHOR_PREC_READ_NOT:
01511 len = compile_length_tree(node->target, reg);
01512 if (len < 0) return len;
01513 r = add_opcode_rel_addr(reg, OP_PUSH_POS_NOT, len + SIZE_OP_FAIL_POS);
01514 if (r) return r;
01515 r = compile_tree(node->target, reg);
01516 if (r) return r;
01517 r = add_opcode(reg, OP_FAIL_POS);
01518 break;
01519
01520 case ANCHOR_LOOK_BEHIND:
01521 {
01522 int n;
01523 r = add_opcode(reg, OP_LOOK_BEHIND);
01524 if (r) return r;
01525 if (node->char_len < 0) {
01526 r = get_char_length_tree(node->target, reg, &n);
01527 if (r) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
01528 }
01529 else
01530 n = node->char_len;
01531 r = add_length(reg, n);
01532 if (r) return r;
01533 r = compile_tree(node->target, reg);
01534 }
01535 break;
01536
01537 case ANCHOR_LOOK_BEHIND_NOT:
01538 {
01539 int n;
01540 len = compile_length_tree(node->target, reg);
01541 r = add_opcode_rel_addr(reg, OP_PUSH_LOOK_BEHIND_NOT,
01542 len + SIZE_OP_FAIL_LOOK_BEHIND_NOT);
01543 if (r) return r;
01544 if (node->char_len < 0) {
01545 r = get_char_length_tree(node->target, reg, &n);
01546 if (r) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
01547 }
01548 else
01549 n = node->char_len;
01550 r = add_length(reg, n);
01551 if (r) return r;
01552 r = compile_tree(node->target, reg);
01553 if (r) return r;
01554 r = add_opcode(reg, OP_FAIL_LOOK_BEHIND_NOT);
01555 }
01556 break;
01557
01558 default:
01559 return ONIGERR_TYPE_BUG;
01560 break;
01561 }
01562
01563 return r;
01564 }
01565
01566 static int
01567 compile_length_tree(Node* node, regex_t* reg)
01568 {
01569 int len, type, r;
01570
01571 type = NTYPE(node);
01572 switch (type) {
01573 case NT_LIST:
01574 len = 0;
01575 do {
01576 r = compile_length_tree(NCAR(node), reg);
01577 if (r < 0) return r;
01578 len += r;
01579 } while (IS_NOT_NULL(node = NCDR(node)));
01580 r = len;
01581 break;
01582
01583 case NT_ALT:
01584 {
01585 int n;
01586
01587 n = r = 0;
01588 do {
01589 r += compile_length_tree(NCAR(node), reg);
01590 n++;
01591 } while (IS_NOT_NULL(node = NCDR(node)));
01592 r += (SIZE_OP_PUSH + SIZE_OP_JUMP) * (n - 1);
01593 }
01594 break;
01595
01596 case NT_STR:
01597 if (NSTRING_IS_RAW(node))
01598 r = compile_length_string_raw_node(NSTR(node), reg);
01599 else
01600 r = compile_length_string_node(node, reg);
01601 break;
01602
01603 case NT_CCLASS:
01604 r = compile_length_cclass_node(NCCLASS(node), reg);
01605 break;
01606
01607 case NT_CTYPE:
01608 case NT_CANY:
01609 r = SIZE_OPCODE;
01610 break;
01611
01612 case NT_BREF:
01613 {
01614 BRefNode* br = NBREF(node);
01615
01616 #ifdef USE_BACKREF_WITH_LEVEL
01617 if (IS_BACKREF_NEST_LEVEL(br)) {
01618 r = SIZE_OPCODE + SIZE_OPTION + SIZE_LENGTH +
01619 SIZE_LENGTH + (SIZE_MEMNUM * br->back_num);
01620 }
01621 else
01622 #endif
01623 if (br->back_num == 1) {
01624 r = ((!IS_IGNORECASE(reg->options) && br->back_static[0] <= 2)
01625 ? SIZE_OPCODE : (SIZE_OPCODE + SIZE_MEMNUM));
01626 }
01627 else {
01628 r = SIZE_OPCODE + SIZE_LENGTH + (SIZE_MEMNUM * br->back_num);
01629 }
01630 }
01631 break;
01632
01633 #ifdef USE_SUBEXP_CALL
01634 case NT_CALL:
01635 r = SIZE_OP_CALL;
01636 break;
01637 #endif
01638
01639 case NT_QTFR:
01640 r = compile_length_quantifier_node(NQTFR(node), reg);
01641 break;
01642
01643 case NT_ENCLOSE:
01644 r = compile_length_enclose_node(NENCLOSE(node), reg);
01645 break;
01646
01647 case NT_ANCHOR:
01648 r = compile_length_anchor_node(NANCHOR(node), reg);
01649 break;
01650
01651 default:
01652 return ONIGERR_TYPE_BUG;
01653 break;
01654 }
01655
01656 return r;
01657 }
01658
01659 static int
01660 compile_tree(Node* node, regex_t* reg)
01661 {
01662 int n, type, len, pos, r = 0;
01663
01664 type = NTYPE(node);
01665 switch (type) {
01666 case NT_LIST:
01667 do {
01668 r = compile_tree(NCAR(node), reg);
01669 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
01670 break;
01671
01672 case NT_ALT:
01673 {
01674 Node* x = node;
01675 len = 0;
01676 do {
01677 len += compile_length_tree(NCAR(x), reg);
01678 if (NCDR(x) != NULL) {
01679 len += SIZE_OP_PUSH + SIZE_OP_JUMP;
01680 }
01681 } while (IS_NOT_NULL(x = NCDR(x)));
01682 pos = reg->used + len;
01683
01684 do {
01685 len = compile_length_tree(NCAR(node), reg);
01686 if (IS_NOT_NULL(NCDR(node))) {
01687 r = add_opcode_rel_addr(reg, OP_PUSH, len + SIZE_OP_JUMP);
01688 if (r) break;
01689 }
01690 r = compile_tree(NCAR(node), reg);
01691 if (r) break;
01692 if (IS_NOT_NULL(NCDR(node))) {
01693 len = pos - (reg->used + SIZE_OP_JUMP);
01694 r = add_opcode_rel_addr(reg, OP_JUMP, len);
01695 if (r) break;
01696 }
01697 } while (IS_NOT_NULL(node = NCDR(node)));
01698 }
01699 break;
01700
01701 case NT_STR:
01702 if (NSTRING_IS_RAW(node))
01703 r = compile_string_raw_node(NSTR(node), reg);
01704 else
01705 r = compile_string_node(node, reg);
01706 break;
01707
01708 case NT_CCLASS:
01709 r = compile_cclass_node(NCCLASS(node), reg);
01710 break;
01711
01712 case NT_CTYPE:
01713 {
01714 int op;
01715
01716 switch (NCTYPE(node)->ctype) {
01717 case ONIGENC_CTYPE_WORD:
01718 if (NCTYPE(node)->ascii_range != 0) {
01719 if (NCTYPE(node)->not != 0) op = OP_NOT_ASCII_WORD;
01720 else op = OP_ASCII_WORD;
01721 }
01722 else {
01723 if (NCTYPE(node)->not != 0) op = OP_NOT_WORD;
01724 else op = OP_WORD;
01725 }
01726 break;
01727 default:
01728 return ONIGERR_TYPE_BUG;
01729 break;
01730 }
01731 r = add_opcode(reg, op);
01732 }
01733 break;
01734
01735 case NT_CANY:
01736 if (IS_MULTILINE(reg->options))
01737 r = add_opcode(reg, OP_ANYCHAR_ML);
01738 else
01739 r = add_opcode(reg, OP_ANYCHAR);
01740 break;
01741
01742 case NT_BREF:
01743 {
01744 BRefNode* br = NBREF(node);
01745
01746 #ifdef USE_BACKREF_WITH_LEVEL
01747 if (IS_BACKREF_NEST_LEVEL(br)) {
01748 r = add_opcode(reg, OP_BACKREF_WITH_LEVEL);
01749 if (r) return r;
01750 r = add_option(reg, (reg->options & ONIG_OPTION_IGNORECASE));
01751 if (r) return r;
01752 r = add_length(reg, br->nest_level);
01753 if (r) return r;
01754
01755 goto add_bacref_mems;
01756 }
01757 else
01758 #endif
01759 if (br->back_num == 1) {
01760 n = br->back_static[0];
01761 if (IS_IGNORECASE(reg->options)) {
01762 r = add_opcode(reg, OP_BACKREFN_IC);
01763 if (r) return r;
01764 r = add_mem_num(reg, n);
01765 }
01766 else {
01767 switch (n) {
01768 case 1: r = add_opcode(reg, OP_BACKREF1); break;
01769 case 2: r = add_opcode(reg, OP_BACKREF2); break;
01770 default:
01771 r = add_opcode(reg, OP_BACKREFN);
01772 if (r) return r;
01773 r = add_mem_num(reg, n);
01774 break;
01775 }
01776 }
01777 }
01778 else {
01779 int i;
01780 int* p;
01781
01782 if (IS_IGNORECASE(reg->options)) {
01783 r = add_opcode(reg, OP_BACKREF_MULTI_IC);
01784 }
01785 else {
01786 r = add_opcode(reg, OP_BACKREF_MULTI);
01787 }
01788 if (r) return r;
01789
01790 #ifdef USE_BACKREF_WITH_LEVEL
01791 add_bacref_mems:
01792 #endif
01793 r = add_length(reg, br->back_num);
01794 if (r) return r;
01795 p = BACKREFS_P(br);
01796 for (i = br->back_num - 1; i >= 0; i--) {
01797 r = add_mem_num(reg, p[i]);
01798 if (r) return r;
01799 }
01800 }
01801 }
01802 break;
01803
01804 #ifdef USE_SUBEXP_CALL
01805 case NT_CALL:
01806 r = compile_call(NCALL(node), reg);
01807 break;
01808 #endif
01809
01810 case NT_QTFR:
01811 r = compile_quantifier_node(NQTFR(node), reg);
01812 break;
01813
01814 case NT_ENCLOSE:
01815 r = compile_enclose_node(NENCLOSE(node), reg);
01816 break;
01817
01818 case NT_ANCHOR:
01819 r = compile_anchor_node(NANCHOR(node), reg);
01820 break;
01821
01822 default:
01823 #ifdef ONIG_DEBUG
01824 fprintf(stderr, "compile_tree: undefined node type %d\n", NTYPE(node));
01825 #endif
01826 break;
01827 }
01828
01829 return r;
01830 }
01831
01832 #ifdef USE_NAMED_GROUP
01833
01834 static int
01835 noname_disable_map(Node** plink, GroupNumRemap* map, int* counter)
01836 {
01837 int r = 0;
01838 Node* node = *plink;
01839
01840 switch (NTYPE(node)) {
01841 case NT_LIST:
01842 case NT_ALT:
01843 do {
01844 r = noname_disable_map(&(NCAR(node)), map, counter);
01845 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
01846 break;
01847
01848 case NT_QTFR:
01849 {
01850 Node** ptarget = &(NQTFR(node)->target);
01851 Node* old = *ptarget;
01852 r = noname_disable_map(ptarget, map, counter);
01853 if (*ptarget != old && NTYPE(*ptarget) == NT_QTFR) {
01854 onig_reduce_nested_quantifier(node, *ptarget);
01855 }
01856 }
01857 break;
01858
01859 case NT_ENCLOSE:
01860 {
01861 EncloseNode* en = NENCLOSE(node);
01862 if (en->type == ENCLOSE_MEMORY) {
01863 if (IS_ENCLOSE_NAMED_GROUP(en)) {
01864 (*counter)++;
01865 map[en->regnum].new_val = *counter;
01866 en->regnum = *counter;
01867 r = noname_disable_map(&(en->target), map, counter);
01868 }
01869 else {
01870 *plink = en->target;
01871 en->target = NULL_NODE;
01872 onig_node_free(node);
01873 r = noname_disable_map(plink, map, counter);
01874 }
01875 }
01876 else
01877 r = noname_disable_map(&(en->target), map, counter);
01878 }
01879 break;
01880
01881 case NT_ANCHOR:
01882 {
01883 AnchorNode* an = NANCHOR(node);
01884 switch (an->type) {
01885 case ANCHOR_PREC_READ:
01886 case ANCHOR_PREC_READ_NOT:
01887 case ANCHOR_LOOK_BEHIND:
01888 case ANCHOR_LOOK_BEHIND_NOT:
01889 r = noname_disable_map(&(an->target), map, counter);
01890 break;
01891 }
01892 }
01893 break;
01894
01895 default:
01896 break;
01897 }
01898
01899 return r;
01900 }
01901
01902 static int
01903 renumber_node_backref(Node* node, GroupNumRemap* map)
01904 {
01905 int i, pos, n, old_num;
01906 int *backs;
01907 BRefNode* bn = NBREF(node);
01908
01909 if (! IS_BACKREF_NAME_REF(bn))
01910 return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;
01911
01912 old_num = bn->back_num;
01913 if (IS_NULL(bn->back_dynamic))
01914 backs = bn->back_static;
01915 else
01916 backs = bn->back_dynamic;
01917
01918 for (i = 0, pos = 0; i < old_num; i++) {
01919 n = map[backs[i]].new_val;
01920 if (n > 0) {
01921 backs[pos] = n;
01922 pos++;
01923 }
01924 }
01925
01926 bn->back_num = pos;
01927 return 0;
01928 }
01929
01930 static int
01931 renumber_by_map(Node* node, GroupNumRemap* map)
01932 {
01933 int r = 0;
01934
01935 switch (NTYPE(node)) {
01936 case NT_LIST:
01937 case NT_ALT:
01938 do {
01939 r = renumber_by_map(NCAR(node), map);
01940 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
01941 break;
01942 case NT_QTFR:
01943 r = renumber_by_map(NQTFR(node)->target, map);
01944 break;
01945 case NT_ENCLOSE:
01946 {
01947 EncloseNode* en = NENCLOSE(node);
01948 if (en->type == ENCLOSE_CONDITION)
01949 en->regnum = map[en->regnum].new_val;
01950 r = renumber_by_map(en->target, map);
01951 }
01952 break;
01953
01954 case NT_BREF:
01955 r = renumber_node_backref(node, map);
01956 break;
01957
01958 case NT_ANCHOR:
01959 {
01960 AnchorNode* an = NANCHOR(node);
01961 switch (an->type) {
01962 case ANCHOR_PREC_READ:
01963 case ANCHOR_PREC_READ_NOT:
01964 case ANCHOR_LOOK_BEHIND:
01965 case ANCHOR_LOOK_BEHIND_NOT:
01966 r = renumber_by_map(an->target, map);
01967 break;
01968 }
01969 }
01970 break;
01971
01972 default:
01973 break;
01974 }
01975
01976 return r;
01977 }
01978
01979 static int
01980 numbered_ref_check(Node* node)
01981 {
01982 int r = 0;
01983
01984 switch (NTYPE(node)) {
01985 case NT_LIST:
01986 case NT_ALT:
01987 do {
01988 r = numbered_ref_check(NCAR(node));
01989 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
01990 break;
01991 case NT_QTFR:
01992 r = numbered_ref_check(NQTFR(node)->target);
01993 break;
01994 case NT_ENCLOSE:
01995 r = numbered_ref_check(NENCLOSE(node)->target);
01996 break;
01997
01998 case NT_BREF:
01999 if (! IS_BACKREF_NAME_REF(NBREF(node)))
02000 return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;
02001 break;
02002
02003 default:
02004 break;
02005 }
02006
02007 return r;
02008 }
02009
02010 static int
02011 disable_noname_group_capture(Node** root, regex_t* reg, ScanEnv* env)
02012 {
02013 int r, i, pos, counter;
02014 BitStatusType loc;
02015 GroupNumRemap* map;
02016
02017 map = (GroupNumRemap* )xalloca(sizeof(GroupNumRemap) * (env->num_mem + 1));
02018 CHECK_NULL_RETURN_MEMERR(map);
02019 for (i = 1; i <= env->num_mem; i++) {
02020 map[i].new_val = 0;
02021 }
02022 counter = 0;
02023 r = noname_disable_map(root, map, &counter);
02024 if (r != 0) return r;
02025
02026 r = renumber_by_map(*root, map);
02027 if (r != 0) return r;
02028
02029 for (i = 1, pos = 1; i <= env->num_mem; i++) {
02030 if (map[i].new_val > 0) {
02031 SCANENV_MEM_NODES(env)[pos] = SCANENV_MEM_NODES(env)[i];
02032 pos++;
02033 }
02034 }
02035
02036 loc = env->capture_history;
02037 BIT_STATUS_CLEAR(env->capture_history);
02038 for (i = 1; i <= ONIG_MAX_CAPTURE_HISTORY_GROUP; i++) {
02039 if (BIT_STATUS_AT(loc, i)) {
02040 BIT_STATUS_ON_AT_SIMPLE(env->capture_history, map[i].new_val);
02041 }
02042 }
02043
02044 env->num_mem = env->num_named;
02045 reg->num_mem = env->num_named;
02046
02047 return onig_renumber_name_table(reg, map);
02048 }
02049 #endif
02050
02051 #ifdef USE_SUBEXP_CALL
02052 static int
02053 unset_addr_list_fix(UnsetAddrList* uslist, regex_t* reg)
02054 {
02055 int i, offset;
02056 EncloseNode* en;
02057 AbsAddrType addr;
02058
02059 for (i = 0; i < uslist->num; i++) {
02060 en = NENCLOSE(uslist->us[i].target);
02061 if (! IS_ENCLOSE_ADDR_FIXED(en)) return ONIGERR_PARSER_BUG;
02062 addr = en->call_addr;
02063 offset = uslist->us[i].offset;
02064
02065 BBUF_WRITE(reg, offset, &addr, SIZE_ABSADDR);
02066 }
02067 return 0;
02068 }
02069 #endif
02070
02071 #ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT
02072 static int
02073 quantifiers_memory_node_info(Node* node)
02074 {
02075 int r = 0;
02076
02077 switch (NTYPE(node)) {
02078 case NT_LIST:
02079 case NT_ALT:
02080 {
02081 int v;
02082 do {
02083 v = quantifiers_memory_node_info(NCAR(node));
02084 if (v > r) r = v;
02085 } while (v >= 0 && IS_NOT_NULL(node = NCDR(node)));
02086 }
02087 break;
02088
02089 #ifdef USE_SUBEXP_CALL
02090 case NT_CALL:
02091 if (IS_CALL_RECURSION(NCALL(node))) {
02092 return NQ_TARGET_IS_EMPTY_REC;
02093 }
02094 else
02095 r = quantifiers_memory_node_info(NCALL(node)->target);
02096 break;
02097 #endif
02098
02099 case NT_QTFR:
02100 {
02101 QtfrNode* qn = NQTFR(node);
02102 if (qn->upper != 0) {
02103 r = quantifiers_memory_node_info(qn->target);
02104 }
02105 }
02106 break;
02107
02108 case NT_ENCLOSE:
02109 {
02110 EncloseNode* en = NENCLOSE(node);
02111 switch (en->type) {
02112 case ENCLOSE_MEMORY:
02113 return NQ_TARGET_IS_EMPTY_MEM;
02114 break;
02115
02116 case ENCLOSE_OPTION:
02117 case ENCLOSE_STOP_BACKTRACK:
02118 case ENCLOSE_CONDITION:
02119 r = quantifiers_memory_node_info(en->target);
02120 break;
02121 default:
02122 break;
02123 }
02124 }
02125 break;
02126
02127 case NT_BREF:
02128 case NT_STR:
02129 case NT_CTYPE:
02130 case NT_CCLASS:
02131 case NT_CANY:
02132 case NT_ANCHOR:
02133 default:
02134 break;
02135 }
02136
02137 return r;
02138 }
02139 #endif
02140
02141 static int
02142 get_min_match_length(Node* node, OnigDistance *min, ScanEnv* env)
02143 {
02144 OnigDistance tmin;
02145 int r = 0;
02146
02147 *min = 0;
02148 switch (NTYPE(node)) {
02149 case NT_BREF:
02150 {
02151 int i;
02152 int* backs;
02153 Node** nodes = SCANENV_MEM_NODES(env);
02154 BRefNode* br = NBREF(node);
02155 if (br->state & NST_RECURSION) break;
02156
02157 backs = BACKREFS_P(br);
02158 if (backs[0] > env->num_mem) return ONIGERR_INVALID_BACKREF;
02159 r = get_min_match_length(nodes[backs[0]], min, env);
02160 if (r != 0) break;
02161 for (i = 1; i < br->back_num; i++) {
02162 if (backs[i] > env->num_mem) return ONIGERR_INVALID_BACKREF;
02163 r = get_min_match_length(nodes[backs[i]], &tmin, env);
02164 if (r != 0) break;
02165 if (*min > tmin) *min = tmin;
02166 }
02167 }
02168 break;
02169
02170 #ifdef USE_SUBEXP_CALL
02171 case NT_CALL:
02172 if (IS_CALL_RECURSION(NCALL(node))) {
02173 EncloseNode* en = NENCLOSE(NCALL(node)->target);
02174 if (IS_ENCLOSE_MIN_FIXED(en))
02175 *min = en->min_len;
02176 }
02177 else
02178 r = get_min_match_length(NCALL(node)->target, min, env);
02179 break;
02180 #endif
02181
02182 case NT_LIST:
02183 do {
02184 r = get_min_match_length(NCAR(node), &tmin, env);
02185 if (r == 0) *min += tmin;
02186 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
02187 break;
02188
02189 case NT_ALT:
02190 {
02191 Node *x, *y;
02192 y = node;
02193 do {
02194 x = NCAR(y);
02195 r = get_min_match_length(x, &tmin, env);
02196 if (r != 0) break;
02197 if (y == node) *min = tmin;
02198 else if (*min > tmin) *min = tmin;
02199 } while (r == 0 && IS_NOT_NULL(y = NCDR(y)));
02200 }
02201 break;
02202
02203 case NT_STR:
02204 {
02205 StrNode* sn = NSTR(node);
02206 *min = sn->end - sn->s;
02207 }
02208 break;
02209
02210 case NT_CTYPE:
02211 *min = 1;
02212 break;
02213
02214 case NT_CCLASS:
02215 case NT_CANY:
02216 *min = 1;
02217 break;
02218
02219 case NT_QTFR:
02220 {
02221 QtfrNode* qn = NQTFR(node);
02222
02223 if (qn->lower > 0) {
02224 r = get_min_match_length(qn->target, min, env);
02225 if (r == 0)
02226 *min = distance_multiply(*min, qn->lower);
02227 }
02228 }
02229 break;
02230
02231 case NT_ENCLOSE:
02232 {
02233 EncloseNode* en = NENCLOSE(node);
02234 switch (en->type) {
02235 case ENCLOSE_MEMORY:
02236 #ifdef USE_SUBEXP_CALL
02237 if (IS_ENCLOSE_MIN_FIXED(en))
02238 *min = en->min_len;
02239 else {
02240 r = get_min_match_length(en->target, min, env);
02241 if (r == 0) {
02242 en->min_len = *min;
02243 SET_ENCLOSE_STATUS(node, NST_MIN_FIXED);
02244 }
02245 }
02246 break;
02247 #endif
02248 case ENCLOSE_OPTION:
02249 case ENCLOSE_STOP_BACKTRACK:
02250 case ENCLOSE_CONDITION:
02251 r = get_min_match_length(en->target, min, env);
02252 break;
02253 }
02254 }
02255 break;
02256
02257 case NT_ANCHOR:
02258 default:
02259 break;
02260 }
02261
02262 return r;
02263 }
02264
02265 static int
02266 get_max_match_length(Node* node, OnigDistance *max, ScanEnv* env)
02267 {
02268 OnigDistance tmax;
02269 int r = 0;
02270
02271 *max = 0;
02272 switch (NTYPE(node)) {
02273 case NT_LIST:
02274 do {
02275 r = get_max_match_length(NCAR(node), &tmax, env);
02276 if (r == 0)
02277 *max = distance_add(*max, tmax);
02278 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
02279 break;
02280
02281 case NT_ALT:
02282 do {
02283 r = get_max_match_length(NCAR(node), &tmax, env);
02284 if (r == 0 && *max < tmax) *max = tmax;
02285 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
02286 break;
02287
02288 case NT_STR:
02289 {
02290 StrNode* sn = NSTR(node);
02291 *max = sn->end - sn->s;
02292 }
02293 break;
02294
02295 case NT_CTYPE:
02296 *max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
02297 break;
02298
02299 case NT_CCLASS:
02300 case NT_CANY:
02301 *max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
02302 break;
02303
02304 case NT_BREF:
02305 {
02306 int i;
02307 int* backs;
02308 Node** nodes = SCANENV_MEM_NODES(env);
02309 BRefNode* br = NBREF(node);
02310 if (br->state & NST_RECURSION) {
02311 *max = ONIG_INFINITE_DISTANCE;
02312 break;
02313 }
02314 backs = BACKREFS_P(br);
02315 for (i = 0; i < br->back_num; i++) {
02316 if (backs[i] > env->num_mem) return ONIGERR_INVALID_BACKREF;
02317 r = get_max_match_length(nodes[backs[i]], &tmax, env);
02318 if (r != 0) break;
02319 if (*max < tmax) *max = tmax;
02320 }
02321 }
02322 break;
02323
02324 #ifdef USE_SUBEXP_CALL
02325 case NT_CALL:
02326 if (! IS_CALL_RECURSION(NCALL(node)))
02327 r = get_max_match_length(NCALL(node)->target, max, env);
02328 else
02329 *max = ONIG_INFINITE_DISTANCE;
02330 break;
02331 #endif
02332
02333 case NT_QTFR:
02334 {
02335 QtfrNode* qn = NQTFR(node);
02336
02337 if (qn->upper != 0) {
02338 r = get_max_match_length(qn->target, max, env);
02339 if (r == 0 && *max != 0) {
02340 if (! IS_REPEAT_INFINITE(qn->upper))
02341 *max = distance_multiply(*max, qn->upper);
02342 else
02343 *max = ONIG_INFINITE_DISTANCE;
02344 }
02345 }
02346 }
02347 break;
02348
02349 case NT_ENCLOSE:
02350 {
02351 EncloseNode* en = NENCLOSE(node);
02352 switch (en->type) {
02353 case ENCLOSE_MEMORY:
02354 #ifdef USE_SUBEXP_CALL
02355 if (IS_ENCLOSE_MAX_FIXED(en))
02356 *max = en->max_len;
02357 else {
02358 r = get_max_match_length(en->target, max, env);
02359 if (r == 0) {
02360 en->max_len = *max;
02361 SET_ENCLOSE_STATUS(node, NST_MAX_FIXED);
02362 }
02363 }
02364 break;
02365 #endif
02366 case ENCLOSE_OPTION:
02367 case ENCLOSE_STOP_BACKTRACK:
02368 case ENCLOSE_CONDITION:
02369 r = get_max_match_length(en->target, max, env);
02370 break;
02371 }
02372 }
02373 break;
02374
02375 case NT_ANCHOR:
02376 default:
02377 break;
02378 }
02379
02380 return r;
02381 }
02382
02383 #define GET_CHAR_LEN_VARLEN -1
02384 #define GET_CHAR_LEN_TOP_ALT_VARLEN -2
02385
02386
02387 static int
02388 get_char_length_tree1(Node* node, regex_t* reg, int* len, int level)
02389 {
02390 int tlen;
02391 int r = 0;
02392
02393 level++;
02394 *len = 0;
02395 switch (NTYPE(node)) {
02396 case NT_LIST:
02397 do {
02398 r = get_char_length_tree1(NCAR(node), reg, &tlen, level);
02399 if (r == 0)
02400 *len = (int )distance_add(*len, tlen);
02401 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
02402 break;
02403
02404 case NT_ALT:
02405 {
02406 int tlen2;
02407 int varlen = 0;
02408
02409 r = get_char_length_tree1(NCAR(node), reg, &tlen, level);
02410 while (r == 0 && IS_NOT_NULL(node = NCDR(node))) {
02411 r = get_char_length_tree1(NCAR(node), reg, &tlen2, level);
02412 if (r == 0) {
02413 if (tlen != tlen2)
02414 varlen = 1;
02415 }
02416 }
02417 if (r == 0) {
02418 if (varlen != 0) {
02419 if (level == 1)
02420 r = GET_CHAR_LEN_TOP_ALT_VARLEN;
02421 else
02422 r = GET_CHAR_LEN_VARLEN;
02423 }
02424 else
02425 *len = tlen;
02426 }
02427 }
02428 break;
02429
02430 case NT_STR:
02431 {
02432 StrNode* sn = NSTR(node);
02433 UChar *s = sn->s;
02434 while (s < sn->end) {
02435 s += enclen(reg->enc, s, sn->end);
02436 (*len)++;
02437 }
02438 }
02439 break;
02440
02441 case NT_QTFR:
02442 {
02443 QtfrNode* qn = NQTFR(node);
02444 if (qn->lower == qn->upper) {
02445 r = get_char_length_tree1(qn->target, reg, &tlen, level);
02446 if (r == 0)
02447 *len = (int )distance_multiply(tlen, qn->lower);
02448 }
02449 else
02450 r = GET_CHAR_LEN_VARLEN;
02451 }
02452 break;
02453
02454 #ifdef USE_SUBEXP_CALL
02455 case NT_CALL:
02456 if (! IS_CALL_RECURSION(NCALL(node)))
02457 r = get_char_length_tree1(NCALL(node)->target, reg, len, level);
02458 else
02459 r = GET_CHAR_LEN_VARLEN;
02460 break;
02461 #endif
02462
02463 case NT_CTYPE:
02464 *len = 1;
02465 break;
02466
02467 case NT_CCLASS:
02468 case NT_CANY:
02469 *len = 1;
02470 break;
02471
02472 case NT_ENCLOSE:
02473 {
02474 EncloseNode* en = NENCLOSE(node);
02475 switch (en->type) {
02476 case ENCLOSE_MEMORY:
02477 #ifdef USE_SUBEXP_CALL
02478 if (IS_ENCLOSE_CLEN_FIXED(en))
02479 *len = en->char_len;
02480 else {
02481 r = get_char_length_tree1(en->target, reg, len, level);
02482 if (r == 0) {
02483 en->char_len = *len;
02484 SET_ENCLOSE_STATUS(node, NST_CLEN_FIXED);
02485 }
02486 }
02487 break;
02488 #endif
02489 case ENCLOSE_OPTION:
02490 case ENCLOSE_STOP_BACKTRACK:
02491 case ENCLOSE_CONDITION:
02492 r = get_char_length_tree1(en->target, reg, len, level);
02493 break;
02494 default:
02495 break;
02496 }
02497 }
02498 break;
02499
02500 case NT_ANCHOR:
02501 break;
02502
02503 default:
02504 r = GET_CHAR_LEN_VARLEN;
02505 break;
02506 }
02507
02508 return r;
02509 }
02510
02511 static int
02512 get_char_length_tree(Node* node, regex_t* reg, int* len)
02513 {
02514 return get_char_length_tree1(node, reg, len, 0);
02515 }
02516
02517
02518 static int
02519 is_not_included(Node* x, Node* y, regex_t* reg)
02520 {
02521 int i;
02522 OnigDistance len;
02523 OnigCodePoint code;
02524 UChar *p;
02525 int ytype;
02526
02527 retry:
02528 ytype = NTYPE(y);
02529 switch (NTYPE(x)) {
02530 case NT_CTYPE:
02531 {
02532 switch (ytype) {
02533 case NT_CTYPE:
02534 if (NCTYPE(y)->ctype == NCTYPE(x)->ctype &&
02535 NCTYPE(y)->not != NCTYPE(x)->not &&
02536 NCTYPE(y)->ascii_range == NCTYPE(x)->ascii_range)
02537 return 1;
02538 else
02539 return 0;
02540 break;
02541
02542 case NT_CCLASS:
02543 swap:
02544 {
02545 Node* tmp;
02546 tmp = x; x = y; y = tmp;
02547 goto retry;
02548 }
02549 break;
02550
02551 case NT_STR:
02552 goto swap;
02553 break;
02554
02555 default:
02556 break;
02557 }
02558 }
02559 break;
02560
02561 case NT_CCLASS:
02562 {
02563 CClassNode* xc = NCCLASS(x);
02564 switch (ytype) {
02565 case NT_CTYPE:
02566 switch (NCTYPE(y)->ctype) {
02567 case ONIGENC_CTYPE_WORD:
02568 if (NCTYPE(y)->not == 0) {
02569 if (IS_NULL(xc->mbuf) && !IS_NCCLASS_NOT(xc)) {
02570 for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
02571 if (BITSET_AT(xc->bs, i)) {
02572 if (NCTYPE(y)->ascii_range) {
02573 if (IS_CODE_SB_WORD(reg->enc, i)) return 0;
02574 }
02575 else {
02576 if (ONIGENC_IS_CODE_WORD(reg->enc, i)) return 0;
02577 }
02578 }
02579 }
02580 return 1;
02581 }
02582 return 0;
02583 }
02584 else {
02585 for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
02586 int is_word;
02587 if (NCTYPE(y)->ascii_range)
02588 is_word = IS_CODE_SB_WORD(reg->enc, i);
02589 else
02590 is_word = ONIGENC_IS_CODE_WORD(reg->enc, i);
02591 if (! is_word) {
02592 if (!IS_NCCLASS_NOT(xc)) {
02593 if (BITSET_AT(xc->bs, i))
02594 return 0;
02595 }
02596 else {
02597 if (! BITSET_AT(xc->bs, i))
02598 return 0;
02599 }
02600 }
02601 }
02602 return 1;
02603 }
02604 break;
02605
02606 default:
02607 break;
02608 }
02609 break;
02610
02611 case NT_CCLASS:
02612 {
02613 int v;
02614 CClassNode* yc = NCCLASS(y);
02615
02616 for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
02617 v = BITSET_AT(xc->bs, i);
02618 if ((v != 0 && !IS_NCCLASS_NOT(xc)) ||
02619 (v == 0 && IS_NCCLASS_NOT(xc))) {
02620 v = BITSET_AT(yc->bs, i);
02621 if ((v != 0 && !IS_NCCLASS_NOT(yc)) ||
02622 (v == 0 && IS_NCCLASS_NOT(yc)))
02623 return 0;
02624 }
02625 }
02626 if ((IS_NULL(xc->mbuf) && !IS_NCCLASS_NOT(xc)) ||
02627 (IS_NULL(yc->mbuf) && !IS_NCCLASS_NOT(yc)))
02628 return 1;
02629 return 0;
02630 }
02631 break;
02632
02633 case NT_STR:
02634 goto swap;
02635 break;
02636
02637 default:
02638 break;
02639 }
02640 }
02641 break;
02642
02643 case NT_STR:
02644 {
02645 StrNode* xs = NSTR(x);
02646 if (NSTRING_LEN(x) == 0)
02647 break;
02648
02649 switch (ytype) {
02650 case NT_CTYPE:
02651 switch (NCTYPE(y)->ctype) {
02652 case ONIGENC_CTYPE_WORD:
02653 if (NCTYPE(y)->ascii_range) {
02654 if (ONIGENC_IS_MBC_ASCII_WORD(reg->enc, xs->s, xs->end))
02655 return NCTYPE(y)->not;
02656 else
02657 return !(NCTYPE(y)->not);
02658 }
02659 else {
02660 if (ONIGENC_IS_MBC_WORD(reg->enc, xs->s, xs->end))
02661 return NCTYPE(y)->not;
02662 else
02663 return !(NCTYPE(y)->not);
02664 }
02665 break;
02666 default:
02667 break;
02668 }
02669 break;
02670
02671 case NT_CCLASS:
02672 {
02673 CClassNode* cc = NCCLASS(y);
02674
02675 code = ONIGENC_MBC_TO_CODE(reg->enc, xs->s,
02676 xs->s + ONIGENC_MBC_MAXLEN(reg->enc));
02677 return (onig_is_code_in_cc(reg->enc, code, cc) != 0 ? 0 : 1);
02678 }
02679 break;
02680
02681 case NT_STR:
02682 {
02683 UChar *q;
02684 StrNode* ys = NSTR(y);
02685 len = NSTRING_LEN(x);
02686 if (len > NSTRING_LEN(y)) len = NSTRING_LEN(y);
02687 if (NSTRING_IS_AMBIG(x) || NSTRING_IS_AMBIG(y)) {
02688
02689 return 0;
02690 }
02691 else {
02692 for (i = 0, p = ys->s, q = xs->s; (OnigDistance )i < len; i++, p++, q++) {
02693 if (*p != *q) return 1;
02694 }
02695 }
02696 }
02697 break;
02698
02699 default:
02700 break;
02701 }
02702 }
02703 break;
02704
02705 default:
02706 break;
02707 }
02708
02709 return 0;
02710 }
02711
02712 static Node*
02713 get_head_value_node(Node* node, int exact, regex_t* reg)
02714 {
02715 Node* n = NULL_NODE;
02716
02717 switch (NTYPE(node)) {
02718 case NT_BREF:
02719 case NT_ALT:
02720 case NT_CANY:
02721 #ifdef USE_SUBEXP_CALL
02722 case NT_CALL:
02723 #endif
02724 break;
02725
02726 case NT_CTYPE:
02727 case NT_CCLASS:
02728 if (exact == 0) {
02729 n = node;
02730 }
02731 break;
02732
02733 case NT_LIST:
02734 n = get_head_value_node(NCAR(node), exact, reg);
02735 break;
02736
02737 case NT_STR:
02738 {
02739 StrNode* sn = NSTR(node);
02740
02741 if (sn->end <= sn->s)
02742 break;
02743
02744 if (exact != 0 &&
02745 !NSTRING_IS_RAW(node) && IS_IGNORECASE(reg->options)) {
02746 }
02747 else {
02748 n = node;
02749 }
02750 }
02751 break;
02752
02753 case NT_QTFR:
02754 {
02755 QtfrNode* qn = NQTFR(node);
02756 if (qn->lower > 0) {
02757 if (IS_NOT_NULL(qn->head_exact))
02758 n = qn->head_exact;
02759 else
02760 n = get_head_value_node(qn->target, exact, reg);
02761 }
02762 }
02763 break;
02764
02765 case NT_ENCLOSE:
02766 {
02767 EncloseNode* en = NENCLOSE(node);
02768 switch (en->type) {
02769 case ENCLOSE_OPTION:
02770 {
02771 OnigOptionType options = reg->options;
02772
02773 reg->options = NENCLOSE(node)->option;
02774 n = get_head_value_node(NENCLOSE(node)->target, exact, reg);
02775 reg->options = options;
02776 }
02777 break;
02778
02779 case ENCLOSE_MEMORY:
02780 case ENCLOSE_STOP_BACKTRACK:
02781 case ENCLOSE_CONDITION:
02782 n = get_head_value_node(en->target, exact, reg);
02783 break;
02784 }
02785 }
02786 break;
02787
02788 case NT_ANCHOR:
02789 if (NANCHOR(node)->type == ANCHOR_PREC_READ)
02790 n = get_head_value_node(NANCHOR(node)->target, exact, reg);
02791 break;
02792
02793 default:
02794 break;
02795 }
02796
02797 return n;
02798 }
02799
02800 static int
02801 check_type_tree(Node* node, int type_mask, int enclose_mask, int anchor_mask)
02802 {
02803 int type, r = 0;
02804
02805 type = NTYPE(node);
02806 if ((NTYPE2BIT(type) & type_mask) == 0)
02807 return 1;
02808
02809 switch (type) {
02810 case NT_LIST:
02811 case NT_ALT:
02812 do {
02813 r = check_type_tree(NCAR(node), type_mask, enclose_mask,
02814 anchor_mask);
02815 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
02816 break;
02817
02818 case NT_QTFR:
02819 r = check_type_tree(NQTFR(node)->target, type_mask, enclose_mask,
02820 anchor_mask);
02821 break;
02822
02823 case NT_ENCLOSE:
02824 {
02825 EncloseNode* en = NENCLOSE(node);
02826 if ((en->type & enclose_mask) == 0)
02827 return 1;
02828
02829 r = check_type_tree(en->target, type_mask, enclose_mask, anchor_mask);
02830 }
02831 break;
02832
02833 case NT_ANCHOR:
02834 type = NANCHOR(node)->type;
02835 if ((type & anchor_mask) == 0)
02836 return 1;
02837
02838 if (NANCHOR(node)->target)
02839 r = check_type_tree(NANCHOR(node)->target,
02840 type_mask, enclose_mask, anchor_mask);
02841 break;
02842
02843 default:
02844 break;
02845 }
02846 return r;
02847 }
02848
02849 #ifdef USE_SUBEXP_CALL
02850
02851 #define RECURSION_EXIST 1
02852 #define RECURSION_INFINITE 2
02853
02854 static int
02855 subexp_inf_recursive_check(Node* node, ScanEnv* env, int head)
02856 {
02857 int type;
02858 int r = 0;
02859
02860 type = NTYPE(node);
02861 switch (type) {
02862 case NT_LIST:
02863 {
02864 Node *x;
02865 OnigDistance min;
02866 int ret;
02867
02868 x = node;
02869 do {
02870 ret = subexp_inf_recursive_check(NCAR(x), env, head);
02871 if (ret < 0 || ret == RECURSION_INFINITE) return ret;
02872 r |= ret;
02873 if (head) {
02874 ret = get_min_match_length(NCAR(x), &min, env);
02875 if (ret != 0) return ret;
02876 if (min != 0) head = 0;
02877 }
02878 } while (IS_NOT_NULL(x = NCDR(x)));
02879 }
02880 break;
02881
02882 case NT_ALT:
02883 {
02884 int ret;
02885 r = RECURSION_EXIST;
02886 do {
02887 ret = subexp_inf_recursive_check(NCAR(node), env, head);
02888 if (ret < 0 || ret == RECURSION_INFINITE) return ret;
02889 r &= ret;
02890 } while (IS_NOT_NULL(node = NCDR(node)));
02891 }
02892 break;
02893
02894 case NT_QTFR:
02895 r = subexp_inf_recursive_check(NQTFR(node)->target, env, head);
02896 if (r == RECURSION_EXIST) {
02897 if (NQTFR(node)->lower == 0) r = 0;
02898 }
02899 break;
02900
02901 case NT_ANCHOR:
02902 {
02903 AnchorNode* an = NANCHOR(node);
02904 switch (an->type) {
02905 case ANCHOR_PREC_READ:
02906 case ANCHOR_PREC_READ_NOT:
02907 case ANCHOR_LOOK_BEHIND:
02908 case ANCHOR_LOOK_BEHIND_NOT:
02909 r = subexp_inf_recursive_check(an->target, env, head);
02910 break;
02911 }
02912 }
02913 break;
02914
02915 case NT_CALL:
02916 r = subexp_inf_recursive_check(NCALL(node)->target, env, head);
02917 break;
02918
02919 case NT_ENCLOSE:
02920 if (IS_ENCLOSE_MARK2(NENCLOSE(node)))
02921 return 0;
02922 else if (IS_ENCLOSE_MARK1(NENCLOSE(node)))
02923 return (head == 0 ? RECURSION_EXIST : RECURSION_INFINITE);
02924 else {
02925 SET_ENCLOSE_STATUS(node, NST_MARK2);
02926 r = subexp_inf_recursive_check(NENCLOSE(node)->target, env, head);
02927 CLEAR_ENCLOSE_STATUS(node, NST_MARK2);
02928 }
02929 break;
02930
02931 default:
02932 break;
02933 }
02934
02935 return r;
02936 }
02937
02938 static int
02939 subexp_inf_recursive_check_trav(Node* node, ScanEnv* env)
02940 {
02941 int type;
02942 int r = 0;
02943
02944 type = NTYPE(node);
02945 switch (type) {
02946 case NT_LIST:
02947 case NT_ALT:
02948 do {
02949 r = subexp_inf_recursive_check_trav(NCAR(node), env);
02950 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
02951 break;
02952
02953 case NT_QTFR:
02954 r = subexp_inf_recursive_check_trav(NQTFR(node)->target, env);
02955 break;
02956
02957 case NT_ANCHOR:
02958 {
02959 AnchorNode* an = NANCHOR(node);
02960 switch (an->type) {
02961 case ANCHOR_PREC_READ:
02962 case ANCHOR_PREC_READ_NOT:
02963 case ANCHOR_LOOK_BEHIND:
02964 case ANCHOR_LOOK_BEHIND_NOT:
02965 r = subexp_inf_recursive_check_trav(an->target, env);
02966 break;
02967 }
02968 }
02969 break;
02970
02971 case NT_ENCLOSE:
02972 {
02973 EncloseNode* en = NENCLOSE(node);
02974
02975 if (IS_ENCLOSE_RECURSION(en)) {
02976 SET_ENCLOSE_STATUS(node, NST_MARK1);
02977 r = subexp_inf_recursive_check(en->target, env, 1);
02978 if (r > 0) return ONIGERR_NEVER_ENDING_RECURSION;
02979 CLEAR_ENCLOSE_STATUS(node, NST_MARK1);
02980 }
02981 r = subexp_inf_recursive_check_trav(en->target, env);
02982 }
02983
02984 break;
02985
02986 default:
02987 break;
02988 }
02989
02990 return r;
02991 }
02992
02993 static int
02994 subexp_recursive_check(Node* node)
02995 {
02996 int r = 0;
02997
02998 switch (NTYPE(node)) {
02999 case NT_LIST:
03000 case NT_ALT:
03001 do {
03002 r |= subexp_recursive_check(NCAR(node));
03003 } while (IS_NOT_NULL(node = NCDR(node)));
03004 break;
03005
03006 case NT_QTFR:
03007 r = subexp_recursive_check(NQTFR(node)->target);
03008 break;
03009
03010 case NT_ANCHOR:
03011 {
03012 AnchorNode* an = NANCHOR(node);
03013 switch (an->type) {
03014 case ANCHOR_PREC_READ:
03015 case ANCHOR_PREC_READ_NOT:
03016 case ANCHOR_LOOK_BEHIND:
03017 case ANCHOR_LOOK_BEHIND_NOT:
03018 r = subexp_recursive_check(an->target);
03019 break;
03020 }
03021 }
03022 break;
03023
03024 case NT_CALL:
03025 r = subexp_recursive_check(NCALL(node)->target);
03026 if (r != 0) SET_CALL_RECURSION(node);
03027 break;
03028
03029 case NT_ENCLOSE:
03030 if (IS_ENCLOSE_MARK2(NENCLOSE(node)))
03031 return 0;
03032 else if (IS_ENCLOSE_MARK1(NENCLOSE(node)))
03033 return 1;
03034 else {
03035 SET_ENCLOSE_STATUS(node, NST_MARK2);
03036 r = subexp_recursive_check(NENCLOSE(node)->target);
03037 CLEAR_ENCLOSE_STATUS(node, NST_MARK2);
03038 }
03039 break;
03040
03041 default:
03042 break;
03043 }
03044
03045 return r;
03046 }
03047
03048
03049 static int
03050 subexp_recursive_check_trav(Node* node, ScanEnv* env)
03051 {
03052 #define FOUND_CALLED_NODE 1
03053
03054 int type;
03055 int r = 0;
03056
03057 type = NTYPE(node);
03058 switch (type) {
03059 case NT_LIST:
03060 case NT_ALT:
03061 {
03062 int ret;
03063 do {
03064 ret = subexp_recursive_check_trav(NCAR(node), env);
03065 if (ret == FOUND_CALLED_NODE) r = FOUND_CALLED_NODE;
03066 else if (ret < 0) return ret;
03067 } while (IS_NOT_NULL(node = NCDR(node)));
03068 }
03069 break;
03070
03071 case NT_QTFR:
03072 r = subexp_recursive_check_trav(NQTFR(node)->target, env);
03073 if (NQTFR(node)->upper == 0) {
03074 if (r == FOUND_CALLED_NODE)
03075 NQTFR(node)->is_refered = 1;
03076 }
03077 break;
03078
03079 case NT_ANCHOR:
03080 {
03081 AnchorNode* an = NANCHOR(node);
03082 switch (an->type) {
03083 case ANCHOR_PREC_READ:
03084 case ANCHOR_PREC_READ_NOT:
03085 case ANCHOR_LOOK_BEHIND:
03086 case ANCHOR_LOOK_BEHIND_NOT:
03087 r = subexp_recursive_check_trav(an->target, env);
03088 break;
03089 }
03090 }
03091 break;
03092
03093 case NT_ENCLOSE:
03094 {
03095 EncloseNode* en = NENCLOSE(node);
03096
03097 if (! IS_ENCLOSE_RECURSION(en)) {
03098 if (IS_ENCLOSE_CALLED(en)) {
03099 SET_ENCLOSE_STATUS(node, NST_MARK1);
03100 r = subexp_recursive_check(en->target);
03101 if (r != 0) SET_ENCLOSE_STATUS(node, NST_RECURSION);
03102 CLEAR_ENCLOSE_STATUS(node, NST_MARK1);
03103 }
03104 }
03105 r = subexp_recursive_check_trav(en->target, env);
03106 if (IS_ENCLOSE_CALLED(en))
03107 r |= FOUND_CALLED_NODE;
03108 }
03109 break;
03110
03111 default:
03112 break;
03113 }
03114
03115 return r;
03116 }
03117
03118 static int
03119 setup_subexp_call(Node* node, ScanEnv* env)
03120 {
03121 int type;
03122 int r = 0;
03123
03124 type = NTYPE(node);
03125 switch (type) {
03126 case NT_LIST:
03127 do {
03128 r = setup_subexp_call(NCAR(node), env);
03129 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
03130 break;
03131
03132 case NT_ALT:
03133 do {
03134 r = setup_subexp_call(NCAR(node), env);
03135 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
03136 break;
03137
03138 case NT_QTFR:
03139 r = setup_subexp_call(NQTFR(node)->target, env);
03140 break;
03141 case NT_ENCLOSE:
03142 r = setup_subexp_call(NENCLOSE(node)->target, env);
03143 break;
03144
03145 case NT_CALL:
03146 {
03147 CallNode* cn = NCALL(node);
03148 Node** nodes = SCANENV_MEM_NODES(env);
03149
03150 if (cn->group_num != 0) {
03151 int gnum = cn->group_num;
03152
03153 #ifdef USE_NAMED_GROUP
03154 if (env->num_named > 0 &&
03155 IS_SYNTAX_BV(env->syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) &&
03156 !ONIG_IS_OPTION_ON(env->option, ONIG_OPTION_CAPTURE_GROUP)) {
03157 return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;
03158 }
03159 #endif
03160 if (gnum > env->num_mem) {
03161 onig_scan_env_set_error_string(env,
03162 ONIGERR_UNDEFINED_GROUP_REFERENCE, cn->name, cn->name_end);
03163 return ONIGERR_UNDEFINED_GROUP_REFERENCE;
03164 }
03165
03166 #ifdef USE_NAMED_GROUP
03167 set_call_attr:
03168 #endif
03169 cn->target = nodes[cn->group_num];
03170 if (IS_NULL(cn->target)) {
03171 onig_scan_env_set_error_string(env,
03172 ONIGERR_UNDEFINED_NAME_REFERENCE, cn->name, cn->name_end);
03173 return ONIGERR_UNDEFINED_NAME_REFERENCE;
03174 }
03175 SET_ENCLOSE_STATUS(cn->target, NST_CALLED);
03176 BIT_STATUS_ON_AT(env->bt_mem_start, cn->group_num);
03177 cn->unset_addr_list = env->unset_addr_list;
03178 }
03179 #ifdef USE_NAMED_GROUP
03180 #ifdef USE_PERL_SUBEXP_CALL
03181 else if (cn->name == cn->name_end) {
03182 goto set_call_attr;
03183 }
03184 #endif
03185 else {
03186 int *refs;
03187
03188 int n = onig_name_to_group_numbers(env->reg, cn->name, cn->name_end,
03189 &refs);
03190 if (n <= 0) {
03191 onig_scan_env_set_error_string(env,
03192 ONIGERR_UNDEFINED_NAME_REFERENCE, cn->name, cn->name_end);
03193 return ONIGERR_UNDEFINED_NAME_REFERENCE;
03194 }
03195 else if (n > 1 &&
03196 ! IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME_CALL)) {
03197 onig_scan_env_set_error_string(env,
03198 ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL, cn->name, cn->name_end);
03199 return ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL;
03200 }
03201 else {
03202 cn->group_num = refs[0];
03203 goto set_call_attr;
03204 }
03205 }
03206 #endif
03207 }
03208 break;
03209
03210 case NT_ANCHOR:
03211 {
03212 AnchorNode* an = NANCHOR(node);
03213
03214 switch (an->type) {
03215 case ANCHOR_PREC_READ:
03216 case ANCHOR_PREC_READ_NOT:
03217 case ANCHOR_LOOK_BEHIND:
03218 case ANCHOR_LOOK_BEHIND_NOT:
03219 r = setup_subexp_call(an->target, env);
03220 break;
03221 }
03222 }
03223 break;
03224
03225 default:
03226 break;
03227 }
03228
03229 return r;
03230 }
03231 #endif
03232
03233
03234
03235
03236
03237 static int
03238 divide_look_behind_alternatives(Node* node)
03239 {
03240 Node *head, *np, *insert_node;
03241 AnchorNode* an = NANCHOR(node);
03242 int anc_type = an->type;
03243
03244 head = an->target;
03245 np = NCAR(head);
03246 swap_node(node, head);
03247 NCAR(node) = head;
03248 NANCHOR(head)->target = np;
03249
03250 np = node;
03251 while ((np = NCDR(np)) != NULL_NODE) {
03252 insert_node = onig_node_new_anchor(anc_type);
03253 CHECK_NULL_RETURN_MEMERR(insert_node);
03254 NANCHOR(insert_node)->target = NCAR(np);
03255 NCAR(np) = insert_node;
03256 }
03257
03258 if (anc_type == ANCHOR_LOOK_BEHIND_NOT) {
03259 np = node;
03260 do {
03261 SET_NTYPE(np, NT_LIST);
03262 } while ((np = NCDR(np)) != NULL_NODE);
03263 }
03264 return 0;
03265 }
03266
03267 static int
03268 setup_look_behind(Node* node, regex_t* reg, ScanEnv* env)
03269 {
03270 int r, len;
03271 AnchorNode* an = NANCHOR(node);
03272
03273 r = get_char_length_tree(an->target, reg, &len);
03274 if (r == 0)
03275 an->char_len = len;
03276 else if (r == GET_CHAR_LEN_VARLEN)
03277 r = ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
03278 else if (r == GET_CHAR_LEN_TOP_ALT_VARLEN) {
03279 if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND))
03280 r = divide_look_behind_alternatives(node);
03281 else
03282 r = ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
03283 }
03284
03285 return r;
03286 }
03287
03288 static int
03289 next_setup(Node* node, Node* next_node, int in_root, regex_t* reg)
03290 {
03291 int type;
03292
03293 retry:
03294 type = NTYPE(node);
03295 if (type == NT_QTFR) {
03296 QtfrNode* qn = NQTFR(node);
03297 if (qn->greedy && IS_REPEAT_INFINITE(qn->upper)) {
03298 #ifdef USE_QTFR_PEEK_NEXT
03299 Node* n = get_head_value_node(next_node, 1, reg);
03300
03301 if (IS_NOT_NULL(n) && NSTR(n)->s[0] != '\0') {
03302 qn->next_head_exact = n;
03303 }
03304 #endif
03305
03306 if (qn->lower <= 1) {
03307 int ttype = NTYPE(qn->target);
03308 if (IS_NODE_TYPE_SIMPLE(ttype)) {
03309 Node *x, *y;
03310 x = get_head_value_node(qn->target, 0, reg);
03311 if (IS_NOT_NULL(x)) {
03312 y = get_head_value_node(next_node, 0, reg);
03313 if (IS_NOT_NULL(y) && is_not_included(x, y, reg)) {
03314 Node* en = onig_node_new_enclose(ENCLOSE_STOP_BACKTRACK);
03315 CHECK_NULL_RETURN_MEMERR(en);
03316 SET_ENCLOSE_STATUS(en, NST_STOP_BT_SIMPLE_REPEAT);
03317 swap_node(node, en);
03318 NENCLOSE(node)->target = en;
03319 }
03320 }
03321 }
03322 }
03323
03324 #ifndef ONIG_DONT_OPTIMIZE
03325 if (NTYPE(node) == NT_QTFR &&
03326 in_root &&
03327 NTYPE(qn->target) == NT_CANY &&
03328 ! IS_MULTILINE(reg->options)) {
03329
03330 Node *np;
03331 np = onig_node_new_list(NULL_NODE, NULL_NODE);
03332 CHECK_NULL_RETURN_MEMERR(np);
03333 swap_node(node, np);
03334 NCDR(node) = onig_node_new_list(np, NULL_NODE);
03335 if (IS_NULL(NCDR(node))) {
03336 onig_node_free(np);
03337 return ONIGERR_MEMORY;
03338 }
03339 np = onig_node_new_anchor(ANCHOR_ANYCHAR_STAR);
03340 CHECK_NULL_RETURN_MEMERR(np);
03341 NCAR(node) = np;
03342 }
03343 #endif
03344 }
03345 }
03346 else if (type == NT_ENCLOSE) {
03347 EncloseNode* en = NENCLOSE(node);
03348 in_root = 0;
03349 if (en->type == ENCLOSE_MEMORY) {
03350 node = en->target;
03351 goto retry;
03352 }
03353 }
03354 return 0;
03355 }
03356
03357
03358 static int
03359 update_string_node_case_fold(regex_t* reg, Node *node)
03360 {
03361 UChar *p, *end, buf[ONIGENC_MBC_CASE_FOLD_MAXLEN];
03362 UChar *sbuf, *ebuf, *sp;
03363 int r, i, len;
03364 OnigDistance sbuf_size;
03365 StrNode* sn = NSTR(node);
03366
03367 end = sn->end;
03368 sbuf_size = (end - sn->s) * 2;
03369 sbuf = (UChar* )xmalloc(sbuf_size);
03370 CHECK_NULL_RETURN_MEMERR(sbuf);
03371 ebuf = sbuf + sbuf_size;
03372
03373 sp = sbuf;
03374 p = sn->s;
03375 while (p < end) {
03376 len = ONIGENC_MBC_CASE_FOLD(reg->enc, reg->case_fold_flag, &p, end, buf);
03377 for (i = 0; i < len; i++) {
03378 if (sp >= ebuf) {
03379 UChar* p = (UChar* )xrealloc(sbuf, sbuf_size * 2);
03380 if (IS_NULL(p)) {
03381 xfree(sbuf);
03382 return ONIGERR_MEMORY;
03383 }
03384 sbuf = p;
03385 sp = sbuf + sbuf_size;
03386 sbuf_size *= 2;
03387 ebuf = sbuf + sbuf_size;
03388 }
03389
03390 *sp++ = buf[i];
03391 }
03392 }
03393
03394 r = onig_node_str_set(node, sbuf, sp);
03395 if (r != 0) {
03396 xfree(sbuf);
03397 return r;
03398 }
03399
03400 xfree(sbuf);
03401 return 0;
03402 }
03403
03404 static int
03405 expand_case_fold_make_rem_string(Node** rnode, UChar *s, UChar *end,
03406 regex_t* reg)
03407 {
03408 int r;
03409 Node *node;
03410
03411 node = onig_node_new_str(s, end);
03412 if (IS_NULL(node)) return ONIGERR_MEMORY;
03413
03414 r = update_string_node_case_fold(reg, node);
03415 if (r != 0) {
03416 onig_node_free(node);
03417 return r;
03418 }
03419
03420 NSTRING_SET_AMBIG(node);
03421 NSTRING_SET_DONT_GET_OPT_INFO(node);
03422 *rnode = node;
03423 return 0;
03424 }
03425
03426 static int
03427 expand_case_fold_string_alt(int item_num, OnigCaseFoldCodeItem items[],
03428 UChar *p, int slen, UChar *end,
03429 regex_t* reg, Node **rnode)
03430 {
03431 int r, i, j, len, varlen, varclen;
03432 Node *anode, *var_anode, *snode, *xnode, *an;
03433 UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN];
03434
03435 *rnode = var_anode = NULL_NODE;
03436
03437 varlen = 0;
03438 varclen = 0;
03439 for (i = 0; i < item_num; i++) {
03440 if (items[i].byte_len != slen) {
03441 varlen = 1;
03442 break;
03443 }
03444 if (items[i].code_len != 1) {
03445 varclen = 1;
03446 }
03447 }
03448
03449 if (varlen != 0) {
03450 *rnode = var_anode = onig_node_new_alt(NULL_NODE, NULL_NODE);
03451 if (IS_NULL(var_anode)) return ONIGERR_MEMORY;
03452
03453 xnode = onig_node_new_list(NULL, NULL);
03454 if (IS_NULL(xnode)) goto mem_err;
03455 NCAR(var_anode) = xnode;
03456
03457 anode = onig_node_new_alt(NULL_NODE, NULL_NODE);
03458 if (IS_NULL(anode)) goto mem_err;
03459 NCAR(xnode) = anode;
03460 }
03461 else {
03462 *rnode = anode = onig_node_new_alt(NULL_NODE, NULL_NODE);
03463 if (IS_NULL(anode)) return ONIGERR_MEMORY;
03464 }
03465
03466 snode = onig_node_new_str(p, p + slen);
03467 if (IS_NULL(snode)) goto mem_err;
03468
03469 NCAR(anode) = snode;
03470
03471 for (i = 0; i < item_num; i++) {
03472 snode = onig_node_new_str(NULL, NULL);
03473 if (IS_NULL(snode)) goto mem_err;
03474
03475 for (j = 0; j < items[i].code_len; j++) {
03476 len = ONIGENC_CODE_TO_MBC(reg->enc, items[i].code[j], buf);
03477 if (len < 0) {
03478 r = len;
03479 goto mem_err2;
03480 }
03481
03482 r = onig_node_str_cat(snode, buf, buf + len);
03483 if (r != 0) goto mem_err2;
03484 }
03485
03486 an = onig_node_new_alt(NULL_NODE, NULL_NODE);
03487 if (IS_NULL(an)) {
03488 goto mem_err2;
03489 }
03490
03491 if (items[i].byte_len != slen) {
03492 Node *rem;
03493 UChar *q = p + items[i].byte_len;
03494
03495 if (q < end) {
03496 r = expand_case_fold_make_rem_string(&rem, q, end, reg);
03497 if (r != 0) {
03498 onig_node_free(an);
03499 goto mem_err2;
03500 }
03501
03502 xnode = onig_node_list_add(NULL_NODE, snode);
03503 if (IS_NULL(xnode)) {
03504 onig_node_free(an);
03505 onig_node_free(rem);
03506 goto mem_err2;
03507 }
03508 if (IS_NULL(onig_node_list_add(xnode, rem))) {
03509 onig_node_free(an);
03510 onig_node_free(xnode);
03511 onig_node_free(rem);
03512 goto mem_err;
03513 }
03514
03515 NCAR(an) = xnode;
03516 }
03517 else {
03518 NCAR(an) = snode;
03519 }
03520
03521 NCDR(var_anode) = an;
03522 var_anode = an;
03523 }
03524 else {
03525 NCAR(an) = snode;
03526 NCDR(anode) = an;
03527 anode = an;
03528 }
03529 }
03530
03531 if (varclen && !varlen)
03532 return 2;
03533 return varlen;
03534
03535 mem_err2:
03536 onig_node_free(snode);
03537
03538 mem_err:
03539 onig_node_free(*rnode);
03540
03541 return ONIGERR_MEMORY;
03542 }
03543
03544 static int
03545 expand_case_fold_string(Node* node, regex_t* reg)
03546 {
03547 #define THRESHOLD_CASE_FOLD_ALT_FOR_EXPANSION 8
03548
03549 int r, n, len, alt_num;
03550 int varlen = 0;
03551 UChar *start, *end, *p;
03552 Node *top_root, *root, *snode, *prev_node;
03553 OnigCaseFoldCodeItem items[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM];
03554 StrNode* sn = NSTR(node);
03555
03556 if (NSTRING_IS_AMBIG(node)) return 0;
03557
03558 start = sn->s;
03559 end = sn->end;
03560 if (start >= end) return 0;
03561
03562 r = 0;
03563 top_root = root = prev_node = snode = NULL_NODE;
03564 alt_num = 1;
03565 p = start;
03566 while (p < end) {
03567 n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(reg->enc, reg->case_fold_flag,
03568 p, end, items);
03569 if (n < 0) {
03570 r = n;
03571 goto err;
03572 }
03573
03574 len = enclen(reg->enc, p, end);
03575
03576 if (n == 0) {
03577 if (IS_NULL(snode)) {
03578 if (IS_NULL(root) && IS_NOT_NULL(prev_node)) {
03579 top_root = root = onig_node_list_add(NULL_NODE, prev_node);
03580 if (IS_NULL(root)) {
03581 onig_node_free(prev_node);
03582 goto mem_err;
03583 }
03584 }
03585
03586 prev_node = snode = onig_node_new_str(NULL, NULL);
03587 if (IS_NULL(snode)) goto mem_err;
03588 if (IS_NOT_NULL(root)) {
03589 if (IS_NULL(onig_node_list_add(root, snode))) {
03590 onig_node_free(snode);
03591 goto mem_err;
03592 }
03593 }
03594 }
03595
03596 r = onig_node_str_cat(snode, p, p + len);
03597 if (r != 0) goto err;
03598 }
03599 else {
03600 alt_num *= (n + 1);
03601 if (alt_num > THRESHOLD_CASE_FOLD_ALT_FOR_EXPANSION) break;
03602
03603 if (IS_NULL(root) && IS_NOT_NULL(prev_node)) {
03604 top_root = root = onig_node_list_add(NULL_NODE, prev_node);
03605 if (IS_NULL(root)) {
03606 onig_node_free(prev_node);
03607 goto mem_err;
03608 }
03609 }
03610
03611 r = expand_case_fold_string_alt(n, items, p, len, end, reg, &prev_node);
03612 if (r < 0) goto mem_err;
03613 if (r > 0) varlen = 1;
03614 if (r == 1) {
03615 if (IS_NULL(root)) {
03616 top_root = prev_node;
03617 }
03618 else {
03619 if (IS_NULL(onig_node_list_add(root, prev_node))) {
03620 onig_node_free(prev_node);
03621 goto mem_err;
03622 }
03623 }
03624
03625 root = NCAR(prev_node);
03626 }
03627 else {
03628 if (IS_NOT_NULL(root)) {
03629 if (IS_NULL(onig_node_list_add(root, prev_node))) {
03630 onig_node_free(prev_node);
03631 goto mem_err;
03632 }
03633 }
03634 }
03635
03636 snode = NULL_NODE;
03637 }
03638
03639 p += len;
03640 }
03641
03642 if (p < end) {
03643 Node *srem;
03644
03645 r = expand_case_fold_make_rem_string(&srem, p, end, reg);
03646 if (r != 0) goto mem_err;
03647
03648 if (IS_NOT_NULL(prev_node) && IS_NULL(root)) {
03649 top_root = root = onig_node_list_add(NULL_NODE, prev_node);
03650 if (IS_NULL(root)) {
03651 onig_node_free(srem);
03652 onig_node_free(prev_node);
03653 goto mem_err;
03654 }
03655 }
03656
03657 if (IS_NULL(root)) {
03658 prev_node = srem;
03659 }
03660 else {
03661 if (IS_NULL(onig_node_list_add(root, srem))) {
03662 onig_node_free(srem);
03663 goto mem_err;
03664 }
03665 }
03666 }
03667
03668
03669 top_root = (IS_NOT_NULL(top_root) ? top_root : prev_node);
03670 if (!varlen) {
03671
03672
03673 r = update_string_node_case_fold(reg, node);
03674 if (r == 0) {
03675 NSTRING_SET_AMBIG(node);
03676 }
03677 }
03678 else {
03679 swap_node(node, top_root);
03680 r = 0;
03681 }
03682 onig_node_free(top_root);
03683 return r;
03684
03685 mem_err:
03686 r = ONIGERR_MEMORY;
03687
03688 err:
03689 onig_node_free(top_root);
03690 return r;
03691 }
03692
03693
03694 #ifdef USE_COMBINATION_EXPLOSION_CHECK
03695
03696 #define CEC_THRES_NUM_BIG_REPEAT 512
03697 #define CEC_INFINITE_NUM 0x7fffffff
03698
03699 #define CEC_IN_INFINITE_REPEAT (1<<0)
03700 #define CEC_IN_FINITE_REPEAT (1<<1)
03701 #define CEC_CONT_BIG_REPEAT (1<<2)
03702
03703 static int
03704 setup_comb_exp_check(Node* node, int state, ScanEnv* env)
03705 {
03706 int type;
03707 int r = state;
03708
03709 type = NTYPE(node);
03710 switch (type) {
03711 case NT_LIST:
03712 {
03713 Node* prev = NULL_NODE;
03714 do {
03715 r = setup_comb_exp_check(NCAR(node), r, env);
03716 prev = NCAR(node);
03717 } while (r >= 0 && IS_NOT_NULL(node = NCDR(node)));
03718 }
03719 break;
03720
03721 case NT_ALT:
03722 {
03723 int ret;
03724 do {
03725 ret = setup_comb_exp_check(NCAR(node), state, env);
03726 r |= ret;
03727 } while (ret >= 0 && IS_NOT_NULL(node = NCDR(node)));
03728 }
03729 break;
03730
03731 case NT_QTFR:
03732 {
03733 int child_state = state;
03734 int add_state = 0;
03735 QtfrNode* qn = NQTFR(node);
03736 Node* target = qn->target;
03737 int var_num;
03738
03739 if (! IS_REPEAT_INFINITE(qn->upper)) {
03740 if (qn->upper > 1) {
03741
03742 child_state |= CEC_IN_FINITE_REPEAT;
03743
03744
03745 if (env->backrefed_mem == 0) {
03746 if (NTYPE(qn->target) == NT_ENCLOSE) {
03747 EncloseNode* en = NENCLOSE(qn->target);
03748 if (en->type == ENCLOSE_MEMORY) {
03749 if (NTYPE(en->target) == NT_QTFR) {
03750 QtfrNode* q = NQTFR(en->target);
03751 if (IS_REPEAT_INFINITE(q->upper)
03752 && q->greedy == qn->greedy) {
03753 qn->upper = (qn->lower == 0 ? 1 : qn->lower);
03754 if (qn->upper == 1)
03755 child_state = state;
03756 }
03757 }
03758 }
03759 }
03760 }
03761 }
03762 }
03763
03764 if (state & CEC_IN_FINITE_REPEAT) {
03765 qn->comb_exp_check_num = -1;
03766 }
03767 else {
03768 if (IS_REPEAT_INFINITE(qn->upper)) {
03769 var_num = CEC_INFINITE_NUM;
03770 child_state |= CEC_IN_INFINITE_REPEAT;
03771 }
03772 else {
03773 var_num = qn->upper - qn->lower;
03774 }
03775
03776 if (var_num >= CEC_THRES_NUM_BIG_REPEAT)
03777 add_state |= CEC_CONT_BIG_REPEAT;
03778
03779 if (((state & CEC_IN_INFINITE_REPEAT) != 0 && var_num != 0) ||
03780 ((state & CEC_CONT_BIG_REPEAT) != 0 &&
03781 var_num >= CEC_THRES_NUM_BIG_REPEAT)) {
03782 if (qn->comb_exp_check_num == 0) {
03783 env->num_comb_exp_check++;
03784 qn->comb_exp_check_num = env->num_comb_exp_check;
03785 if (env->curr_max_regnum > env->comb_exp_max_regnum)
03786 env->comb_exp_max_regnum = env->curr_max_regnum;
03787 }
03788 }
03789 }
03790
03791 r = setup_comb_exp_check(target, child_state, env);
03792 r |= add_state;
03793 }
03794 break;
03795
03796 case NT_ENCLOSE:
03797 {
03798 EncloseNode* en = NENCLOSE(node);
03799
03800 switch (en->type) {
03801 case ENCLOSE_MEMORY:
03802 {
03803 if (env->curr_max_regnum < en->regnum)
03804 env->curr_max_regnum = en->regnum;
03805
03806 r = setup_comb_exp_check(en->target, state, env);
03807 }
03808 break;
03809
03810 default:
03811 r = setup_comb_exp_check(en->target, state, env);
03812 break;
03813 }
03814 }
03815 break;
03816
03817 #ifdef USE_SUBEXP_CALL
03818 case NT_CALL:
03819 if (IS_CALL_RECURSION(NCALL(node)))
03820 env->has_recursion = 1;
03821 else
03822 r = setup_comb_exp_check(NCALL(node)->target, state, env);
03823 break;
03824 #endif
03825
03826 default:
03827 break;
03828 }
03829
03830 return r;
03831 }
03832 #endif
03833
03834 #define IN_ALT (1<<0)
03835 #define IN_NOT (1<<1)
03836 #define IN_REPEAT (1<<2)
03837 #define IN_VAR_REPEAT (1<<3)
03838 #define IN_ROOT (1<<4)
03839
03840
03841
03842
03843
03844
03845
03846
03847
03848 static int
03849 setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env)
03850 {
03851 int type;
03852 int r = 0;
03853 int in_root = state & IN_ROOT;
03854
03855 state &= ~IN_ROOT;
03856 restart:
03857 type = NTYPE(node);
03858 switch (type) {
03859 case NT_LIST:
03860 {
03861 Node* prev = NULL_NODE;
03862 int prev_in_root = 0;
03863 state |= in_root;
03864 do {
03865 r = setup_tree(NCAR(node), reg, state, env);
03866 if (IS_NOT_NULL(prev) && r == 0) {
03867 r = next_setup(prev, NCAR(node), prev_in_root, reg);
03868 }
03869 prev = NCAR(node);
03870 prev_in_root = state & IN_ROOT;
03871 state &= ~IN_ROOT;
03872 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
03873 }
03874 break;
03875
03876 case NT_ALT:
03877 do {
03878 r = setup_tree(NCAR(node), reg, (state | IN_ALT), env);
03879 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
03880 break;
03881
03882 case NT_CCLASS:
03883 break;
03884
03885 case NT_STR:
03886 if (IS_IGNORECASE(reg->options) && !NSTRING_IS_RAW(node)) {
03887 r = expand_case_fold_string(node, reg);
03888 }
03889 break;
03890
03891 case NT_CTYPE:
03892 case NT_CANY:
03893 break;
03894
03895 #ifdef USE_SUBEXP_CALL
03896 case NT_CALL:
03897 break;
03898 #endif
03899
03900 case NT_BREF:
03901 {
03902 int i;
03903 int* p;
03904 Node** nodes = SCANENV_MEM_NODES(env);
03905 BRefNode* br = NBREF(node);
03906 p = BACKREFS_P(br);
03907 for (i = 0; i < br->back_num; i++) {
03908 if (p[i] > env->num_mem) return ONIGERR_INVALID_BACKREF;
03909 BIT_STATUS_ON_AT(env->backrefed_mem, p[i]);
03910 BIT_STATUS_ON_AT(env->bt_mem_start, p[i]);
03911 #ifdef USE_BACKREF_WITH_LEVEL
03912 if (IS_BACKREF_NEST_LEVEL(br)) {
03913 BIT_STATUS_ON_AT(env->bt_mem_end, p[i]);
03914 }
03915 #endif
03916 SET_ENCLOSE_STATUS(nodes[p[i]], NST_MEM_BACKREFED);
03917 }
03918 }
03919 break;
03920
03921 case NT_QTFR:
03922 {
03923 OnigDistance d;
03924 QtfrNode* qn = NQTFR(node);
03925 Node* target = qn->target;
03926
03927 if ((state & IN_REPEAT) != 0) {
03928 qn->state |= NST_IN_REPEAT;
03929 }
03930
03931 if (IS_REPEAT_INFINITE(qn->upper) || qn->upper >= 1) {
03932 r = get_min_match_length(target, &d, env);
03933 if (r) break;
03934 if (d == 0) {
03935 qn->target_empty_info = NQ_TARGET_IS_EMPTY;
03936 #ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT
03937 r = quantifiers_memory_node_info(target);
03938 if (r < 0) break;
03939 if (r > 0) {
03940 qn->target_empty_info = r;
03941 }
03942 #endif
03943 #if 0
03944 r = get_max_match_length(target, &d, env);
03945 if (r == 0 && d == 0) {
03946
03947 qn->upper = 1;
03948 if (qn->lower > 1) qn->lower = 1;
03949 if (NTYPE(target) == NT_STR) {
03950 qn->upper = qn->lower = 0;
03951 }
03952 }
03953 #endif
03954 }
03955 }
03956
03957 state |= IN_REPEAT;
03958 if (qn->lower != qn->upper)
03959 state |= IN_VAR_REPEAT;
03960 r = setup_tree(target, reg, state, env);
03961 if (r) break;
03962
03963
03964 #define EXPAND_STRING_MAX_LENGTH 100
03965 if (NTYPE(target) == NT_STR) {
03966 if (qn->lower > 1) {
03967 int i, n = qn->lower;
03968 OnigDistance len = NSTRING_LEN(target);
03969 StrNode* sn = NSTR(target);
03970 Node* np;
03971
03972 np = onig_node_new_str(sn->s, sn->end);
03973 if (IS_NULL(np)) return ONIGERR_MEMORY;
03974 NSTR(np)->flag = sn->flag;
03975
03976 for (i = 1; i < n && (i+1) * len <= EXPAND_STRING_MAX_LENGTH; i++) {
03977 r = onig_node_str_cat(np, sn->s, sn->end);
03978 if (r) {
03979 onig_node_free(np);
03980 return r;
03981 }
03982 }
03983 if (i < qn->upper || IS_REPEAT_INFINITE(qn->upper)) {
03984 Node *np1, *np2;
03985
03986 qn->lower -= i;
03987 if (! IS_REPEAT_INFINITE(qn->upper))
03988 qn->upper -= i;
03989
03990 np1 = onig_node_new_list(np, NULL);
03991 if (IS_NULL(np1)) {
03992 onig_node_free(np);
03993 return ONIGERR_MEMORY;
03994 }
03995 swap_node(np1, node);
03996 np2 = onig_node_list_add(node, np1);
03997 if (IS_NULL(np2)) {
03998 onig_node_free(np1);
03999 return ONIGERR_MEMORY;
04000 }
04001 }
04002 else {
04003 swap_node(np, node);
04004 onig_node_free(np);
04005 }
04006 break;
04007 }
04008 }
04009
04010 #ifdef USE_OP_PUSH_OR_JUMP_EXACT
04011 if (qn->greedy && (qn->target_empty_info != 0)) {
04012 if (NTYPE(target) == NT_QTFR) {
04013 QtfrNode* tqn = NQTFR(target);
04014 if (IS_NOT_NULL(tqn->head_exact)) {
04015 qn->head_exact = tqn->head_exact;
04016 tqn->head_exact = NULL;
04017 }
04018 }
04019 else {
04020 qn->head_exact = get_head_value_node(qn->target, 1, reg);
04021 }
04022 }
04023 #endif
04024 }
04025 break;
04026
04027 case NT_ENCLOSE:
04028 {
04029 EncloseNode* en = NENCLOSE(node);
04030
04031 switch (en->type) {
04032 case ENCLOSE_OPTION:
04033 {
04034 OnigOptionType options = reg->options;
04035 state |= in_root;
04036 reg->options = NENCLOSE(node)->option;
04037 r = setup_tree(NENCLOSE(node)->target, reg, state, env);
04038 reg->options = options;
04039 }
04040 break;
04041
04042 case ENCLOSE_MEMORY:
04043 if ((state & (IN_ALT | IN_NOT | IN_VAR_REPEAT)) != 0) {
04044 BIT_STATUS_ON_AT(env->bt_mem_start, en->regnum);
04045
04046 }
04047 r = setup_tree(en->target, reg, state, env);
04048 break;
04049
04050 case ENCLOSE_STOP_BACKTRACK:
04051 {
04052 Node* target = en->target;
04053 r = setup_tree(target, reg, state, env);
04054 if (NTYPE(target) == NT_QTFR) {
04055 QtfrNode* tqn = NQTFR(target);
04056 if (IS_REPEAT_INFINITE(tqn->upper) && tqn->lower <= 1 &&
04057 tqn->greedy != 0) {
04058 int qtype = NTYPE(tqn->target);
04059 if (IS_NODE_TYPE_SIMPLE(qtype))
04060 SET_ENCLOSE_STATUS(node, NST_STOP_BT_SIMPLE_REPEAT);
04061 }
04062 }
04063 }
04064 break;
04065
04066 case ENCLOSE_CONDITION:
04067 #ifdef USE_NAMED_GROUP
04068 if (! IS_ENCLOSE_NAME_REF(NENCLOSE(node)) &&
04069 env->num_named > 0 &&
04070 IS_SYNTAX_BV(env->syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) &&
04071 !ONIG_IS_OPTION_ON(env->option, ONIG_OPTION_CAPTURE_GROUP)) {
04072 return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;
04073 }
04074 #endif
04075 r = setup_tree(NENCLOSE(node)->target, reg, state, env);
04076 break;
04077 }
04078 }
04079 break;
04080
04081 case NT_ANCHOR:
04082 {
04083 AnchorNode* an = NANCHOR(node);
04084
04085 switch (an->type) {
04086 case ANCHOR_PREC_READ:
04087 r = setup_tree(an->target, reg, state, env);
04088 break;
04089 case ANCHOR_PREC_READ_NOT:
04090 r = setup_tree(an->target, reg, (state | IN_NOT), env);
04091 break;
04092
04093
04094 #define ALLOWED_TYPE_IN_LB \
04095 ( BIT_NT_LIST | BIT_NT_ALT | BIT_NT_STR | BIT_NT_CCLASS | BIT_NT_CTYPE | \
04096 BIT_NT_CANY | BIT_NT_ANCHOR | BIT_NT_ENCLOSE | BIT_NT_QTFR | BIT_NT_CALL )
04097
04098 #define ALLOWED_ENCLOSE_IN_LB ( ENCLOSE_MEMORY | ENCLOSE_OPTION )
04099 #define ALLOWED_ENCLOSE_IN_LB_NOT ENCLOSE_OPTION
04100
04101 #define ALLOWED_ANCHOR_IN_LB \
04102 ( ANCHOR_LOOK_BEHIND | ANCHOR_LOOK_BEHIND_NOT | ANCHOR_BEGIN_LINE | \
04103 ANCHOR_END_LINE | ANCHOR_BEGIN_BUF | ANCHOR_BEGIN_POSITION | ANCHOR_KEEP | \
04104 ANCHOR_WORD_BOUND | ANCHOR_NOT_WORD_BOUND | \
04105 ANCHOR_WORD_BEGIN | ANCHOR_WORD_END )
04106 #define ALLOWED_ANCHOR_IN_LB_NOT \
04107 ( ANCHOR_LOOK_BEHIND | ANCHOR_LOOK_BEHIND_NOT | ANCHOR_BEGIN_LINE | \
04108 ANCHOR_END_LINE | ANCHOR_BEGIN_BUF | ANCHOR_BEGIN_POSITION | ANCHOR_KEEP | \
04109 ANCHOR_WORD_BOUND | ANCHOR_NOT_WORD_BOUND | \
04110 ANCHOR_WORD_BEGIN | ANCHOR_WORD_END )
04111
04112 case ANCHOR_LOOK_BEHIND:
04113 {
04114 r = check_type_tree(an->target, ALLOWED_TYPE_IN_LB,
04115 ALLOWED_ENCLOSE_IN_LB, ALLOWED_ANCHOR_IN_LB);
04116 if (r < 0) return r;
04117 if (r > 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
04118 r = setup_look_behind(node, reg, env);
04119 if (r != 0) return r;
04120 if (NTYPE(node) != NT_ANCHOR) goto restart;
04121 r = setup_tree(an->target, reg, state, env);
04122 }
04123 break;
04124
04125 case ANCHOR_LOOK_BEHIND_NOT:
04126 {
04127 r = check_type_tree(an->target, ALLOWED_TYPE_IN_LB,
04128 ALLOWED_ENCLOSE_IN_LB_NOT, ALLOWED_ANCHOR_IN_LB_NOT);
04129 if (r < 0) return r;
04130 if (r > 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
04131 r = setup_look_behind(node, reg, env);
04132 if (r != 0) return r;
04133 if (NTYPE(node) != NT_ANCHOR) goto restart;
04134 r = setup_tree(an->target, reg, (state | IN_NOT), env);
04135 }
04136 break;
04137 }
04138 }
04139 break;
04140
04141 default:
04142 break;
04143 }
04144
04145 return r;
04146 }
04147
04148 #ifndef USE_SUNDAY_QUICK_SEARCH
04149
04150 static int
04151 set_bm_skip(UChar* s, UChar* end, regex_t* reg,
04152 UChar skip[], int** int_skip, int ignore_case)
04153 {
04154 OnigDistance i, len;
04155 int clen, flen, n, j, k;
04156 UChar *p, buf[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM][ONIGENC_MBC_CASE_FOLD_MAXLEN];
04157 OnigCaseFoldCodeItem items[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM];
04158 OnigEncoding enc = reg->enc;
04159
04160 len = end - s;
04161 if (len < ONIG_CHAR_TABLE_SIZE) {
04162 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) skip[i] = (UChar )len;
04163
04164 n = 0;
04165 for (i = 0; i < len - 1; i += clen) {
04166 p = s + i;
04167 if (ignore_case)
04168 n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, reg->case_fold_flag,
04169 p, end, items);
04170 clen = enclen(enc, p, end);
04171
04172 for (j = 0; j < n; j++) {
04173 if ((items[j].code_len != 1) || (items[j].byte_len != clen))
04174 return 1;
04175 flen = ONIGENC_CODE_TO_MBC(enc, items[j].code[0], buf[j]);
04176 if (flen != clen)
04177 return 1;
04178 }
04179 for (j = 0; j < clen; j++) {
04180 skip[s[i + j]] = (UChar )(len - 1 - i - j);
04181 for (k = 0; k < n; k++) {
04182 skip[buf[k][j]] = (UChar )(len - 1 - i - j);
04183 }
04184 }
04185 }
04186 }
04187 else {
04188 if (IS_NULL(*int_skip)) {
04189 *int_skip = (int* )xmalloc(sizeof(int) * ONIG_CHAR_TABLE_SIZE);
04190 if (IS_NULL(*int_skip)) return ONIGERR_MEMORY;
04191 }
04192 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) (*int_skip)[i] = (int )len;
04193
04194 n = 0;
04195 for (i = 0; i < len - 1; i += clen) {
04196 p = s + i;
04197 if (ignore_case)
04198 n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, reg->case_fold_flag,
04199 p, end, items);
04200 clen = enclen(enc, p, end);
04201
04202 for (j = 0; j < n; j++) {
04203 if ((items[j].code_len != 1) || (items[j].byte_len != clen))
04204 return 1;
04205 flen = ONIGENC_CODE_TO_MBC(enc, items[j].code[0], buf[j]);
04206 if (flen != clen)
04207 return 1;
04208 }
04209 for (j = 0; j < clen; j++) {
04210 (*int_skip)[s[i + j]] = (int )(len - 1 - i - j);
04211 for (k = 0; k < n; k++) {
04212 (*int_skip)[buf[k][j]] = (int )(len - 1 - i - j);
04213 }
04214 }
04215 }
04216 }
04217 return 0;
04218 }
04219
04220 #else
04221
04222
04223 static int
04224 set_bm_skip(UChar* s, UChar* end, regex_t* reg,
04225 UChar skip[], int** int_skip, int ignore_case)
04226 {
04227 OnigDistance i, len;
04228 int clen, flen, n, j, k;
04229 UChar *p, buf[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM][ONIGENC_MBC_CASE_FOLD_MAXLEN];
04230 OnigCaseFoldCodeItem items[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM];
04231 OnigEncoding enc = reg->enc;
04232
04233 len = end - s;
04234 if (len < ONIG_CHAR_TABLE_SIZE) {
04235 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) skip[i] = (UChar )(len + 1);
04236
04237 n = 0;
04238 for (i = 0; i < len; i += clen) {
04239 p = s + i;
04240 if (ignore_case)
04241 n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, reg->case_fold_flag,
04242 p, end, items);
04243 clen = enclen(enc, p, end);
04244
04245 for (j = 0; j < n; j++) {
04246 if ((items[j].code_len != 1) || (items[j].byte_len != clen))
04247 return 1;
04248 flen = ONIGENC_CODE_TO_MBC(enc, items[j].code[0], buf[j]);
04249 if (flen != clen)
04250 return 1;
04251 }
04252 for (j = 0; j < clen; j++) {
04253 skip[s[i + j]] = (UChar )(len - i - j);
04254 for (k = 0; k < n; k++) {
04255 skip[buf[k][j]] = (UChar )(len - i - j);
04256 }
04257 }
04258 }
04259 }
04260 else {
04261 if (IS_NULL(*int_skip)) {
04262 *int_skip = (int* )xmalloc(sizeof(int) * ONIG_CHAR_TABLE_SIZE);
04263 if (IS_NULL(*int_skip)) return ONIGERR_MEMORY;
04264 }
04265 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) (*int_skip)[i] = (int )(len + 1);
04266
04267 n = 0;
04268 for (i = 0; i < len; i += clen) {
04269 p = s + i;
04270 if (ignore_case)
04271 n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, reg->case_fold_flag,
04272 p, end, items);
04273 clen = enclen(enc, p, end);
04274
04275 for (j = 0; j < n; j++) {
04276 if ((items[j].code_len != 1) || (items[j].byte_len != clen))
04277 return 1;
04278 flen = ONIGENC_CODE_TO_MBC(enc, items[j].code[0], buf[j]);
04279 if (flen != clen)
04280 return 1;
04281 }
04282 for (j = 0; j < clen; j++) {
04283 (*int_skip)[s[i + j]] = (int )(len - i - j);
04284 for (k = 0; k < n; k++) {
04285 (*int_skip)[buf[k][j]] = (int )(len - i - j);
04286 }
04287 }
04288 }
04289 }
04290 return 0;
04291 }
04292 #endif
04293
04294 #define OPT_EXACT_MAXLEN 24
04295
04296 typedef struct {
04297 OnigDistance min;
04298 OnigDistance max;
04299 } MinMaxLen;
04300
04301 typedef struct {
04302 MinMaxLen mmd;
04303 OnigEncoding enc;
04304 OnigOptionType options;
04305 OnigCaseFoldType case_fold_flag;
04306 ScanEnv* scan_env;
04307 } OptEnv;
04308
04309 typedef struct {
04310 int left_anchor;
04311 int right_anchor;
04312 } OptAncInfo;
04313
04314 typedef struct {
04315 MinMaxLen mmd;
04316 OptAncInfo anc;
04317
04318 int reach_end;
04319 int ignore_case;
04320 int len;
04321 UChar s[OPT_EXACT_MAXLEN];
04322 } OptExactInfo;
04323
04324 typedef struct {
04325 MinMaxLen mmd;
04326 OptAncInfo anc;
04327
04328 int value;
04329 UChar map[ONIG_CHAR_TABLE_SIZE];
04330 } OptMapInfo;
04331
04332 typedef struct {
04333 MinMaxLen len;
04334
04335 OptAncInfo anc;
04336 OptExactInfo exb;
04337 OptExactInfo exm;
04338 OptExactInfo expr;
04339
04340 OptMapInfo map;
04341 } NodeOptInfo;
04342
04343
04344 static int
04345 map_position_value(OnigEncoding enc, int i)
04346 {
04347 static const short int ByteValTable[] = {
04348 5, 1, 1, 1, 1, 1, 1, 1, 1, 10, 10, 1, 1, 10, 1, 1,
04349 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
04350 12, 4, 7, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5,
04351 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 5, 5,
04352 5, 6, 6, 6, 6, 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
04353 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 6, 5, 5, 5,
04354 5, 6, 6, 6, 6, 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
04355 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 1
04356 };
04357
04358 if (i < (int )(sizeof(ByteValTable)/sizeof(ByteValTable[0]))) {
04359 if (i == 0 && ONIGENC_MBC_MINLEN(enc) > 1)
04360 return 20;
04361 else
04362 return (int )ByteValTable[i];
04363 }
04364 else
04365 return 4;
04366 }
04367
04368 static int
04369 distance_value(MinMaxLen* mm)
04370 {
04371
04372 static const short int dist_vals[] = {
04373 1000, 500, 333, 250, 200, 167, 143, 125, 111, 100,
04374 91, 83, 77, 71, 67, 63, 59, 56, 53, 50,
04375 48, 45, 43, 42, 40, 38, 37, 36, 34, 33,
04376 32, 31, 30, 29, 29, 28, 27, 26, 26, 25,
04377 24, 24, 23, 23, 22, 22, 21, 21, 20, 20,
04378 20, 19, 19, 19, 18, 18, 18, 17, 17, 17,
04379 16, 16, 16, 16, 15, 15, 15, 15, 14, 14,
04380 14, 14, 14, 14, 13, 13, 13, 13, 13, 13,
04381 12, 12, 12, 12, 12, 12, 11, 11, 11, 11,
04382 11, 11, 11, 11, 11, 10, 10, 10, 10, 10
04383 };
04384
04385 OnigDistance d;
04386
04387 if (mm->max == ONIG_INFINITE_DISTANCE) return 0;
04388
04389 d = mm->max - mm->min;
04390 if (d < sizeof(dist_vals)/sizeof(dist_vals[0]))
04391
04392 return (int )dist_vals[d];
04393 else
04394 return 1;
04395 }
04396
04397 static int
04398 comp_distance_value(MinMaxLen* d1, MinMaxLen* d2, int v1, int v2)
04399 {
04400 if (v2 <= 0) return -1;
04401 if (v1 <= 0) return 1;
04402
04403 v1 *= distance_value(d1);
04404 v2 *= distance_value(d2);
04405
04406 if (v2 > v1) return 1;
04407 if (v2 < v1) return -1;
04408
04409 if (d2->min < d1->min) return 1;
04410 if (d2->min > d1->min) return -1;
04411 return 0;
04412 }
04413
04414 static int
04415 is_equal_mml(MinMaxLen* a, MinMaxLen* b)
04416 {
04417 return (a->min == b->min && a->max == b->max) ? 1 : 0;
04418 }
04419
04420
04421 static void
04422 set_mml(MinMaxLen* mml, OnigDistance min, OnigDistance max)
04423 {
04424 mml->min = min;
04425 mml->max = max;
04426 }
04427
04428 static void
04429 clear_mml(MinMaxLen* mml)
04430 {
04431 mml->min = mml->max = 0;
04432 }
04433
04434 static void
04435 copy_mml(MinMaxLen* to, MinMaxLen* from)
04436 {
04437 to->min = from->min;
04438 to->max = from->max;
04439 }
04440
04441 static void
04442 add_mml(MinMaxLen* to, MinMaxLen* from)
04443 {
04444 to->min = distance_add(to->min, from->min);
04445 to->max = distance_add(to->max, from->max);
04446 }
04447
04448 #if 0
04449 static void
04450 add_len_mml(MinMaxLen* to, OnigDistance len)
04451 {
04452 to->min = distance_add(to->min, len);
04453 to->max = distance_add(to->max, len);
04454 }
04455 #endif
04456
04457 static void
04458 alt_merge_mml(MinMaxLen* to, MinMaxLen* from)
04459 {
04460 if (to->min > from->min) to->min = from->min;
04461 if (to->max < from->max) to->max = from->max;
04462 }
04463
04464 static void
04465 copy_opt_env(OptEnv* to, OptEnv* from)
04466 {
04467 *to = *from;
04468 }
04469
04470 static void
04471 clear_opt_anc_info(OptAncInfo* anc)
04472 {
04473 anc->left_anchor = 0;
04474 anc->right_anchor = 0;
04475 }
04476
04477 static void
04478 copy_opt_anc_info(OptAncInfo* to, OptAncInfo* from)
04479 {
04480 *to = *from;
04481 }
04482
04483 static void
04484 concat_opt_anc_info(OptAncInfo* to, OptAncInfo* left, OptAncInfo* right,
04485 OnigDistance left_len, OnigDistance right_len)
04486 {
04487 clear_opt_anc_info(to);
04488
04489 to->left_anchor = left->left_anchor;
04490 if (left_len == 0) {
04491 to->left_anchor |= right->left_anchor;
04492 }
04493
04494 to->right_anchor = right->right_anchor;
04495 if (right_len == 0) {
04496 to->right_anchor |= left->right_anchor;
04497 }
04498 else {
04499 to->right_anchor |= (left->right_anchor & ANCHOR_PREC_READ_NOT);
04500 }
04501 }
04502
04503 static int
04504 is_left_anchor(int anc)
04505 {
04506 if (anc == ANCHOR_END_BUF || anc == ANCHOR_SEMI_END_BUF ||
04507 anc == ANCHOR_END_LINE || anc == ANCHOR_PREC_READ ||
04508 anc == ANCHOR_PREC_READ_NOT)
04509 return 0;
04510
04511 return 1;
04512 }
04513
04514 static int
04515 is_set_opt_anc_info(OptAncInfo* to, int anc)
04516 {
04517 if ((to->left_anchor & anc) != 0) return 1;
04518
04519 return ((to->right_anchor & anc) != 0 ? 1 : 0);
04520 }
04521
04522 static void
04523 add_opt_anc_info(OptAncInfo* to, int anc)
04524 {
04525 if (is_left_anchor(anc))
04526 to->left_anchor |= anc;
04527 else
04528 to->right_anchor |= anc;
04529 }
04530
04531 static void
04532 remove_opt_anc_info(OptAncInfo* to, int anc)
04533 {
04534 if (is_left_anchor(anc))
04535 to->left_anchor &= ~anc;
04536 else
04537 to->right_anchor &= ~anc;
04538 }
04539
04540 static void
04541 alt_merge_opt_anc_info(OptAncInfo* to, OptAncInfo* add)
04542 {
04543 to->left_anchor &= add->left_anchor;
04544 to->right_anchor &= add->right_anchor;
04545 }
04546
04547 static int
04548 is_full_opt_exact_info(OptExactInfo* ex)
04549 {
04550 return (ex->len >= OPT_EXACT_MAXLEN ? 1 : 0);
04551 }
04552
04553 static void
04554 clear_opt_exact_info(OptExactInfo* ex)
04555 {
04556 clear_mml(&ex->mmd);
04557 clear_opt_anc_info(&ex->anc);
04558 ex->reach_end = 0;
04559 ex->ignore_case = -1;
04560 ex->len = 0;
04561 ex->s[0] = '\0';
04562 }
04563
04564 static void
04565 copy_opt_exact_info(OptExactInfo* to, OptExactInfo* from)
04566 {
04567 *to = *from;
04568 }
04569
04570 static void
04571 concat_opt_exact_info(OptExactInfo* to, OptExactInfo* add, OnigEncoding enc)
04572 {
04573 int i, j, len;
04574 UChar *p, *end;
04575 OptAncInfo tanc;
04576
04577 if (to->ignore_case < 0)
04578 to->ignore_case = add->ignore_case;
04579 else if (to->ignore_case != add->ignore_case)
04580 return ;
04581
04582 p = add->s;
04583 end = p + add->len;
04584 for (i = to->len; p < end; ) {
04585 len = enclen(enc, p, end);
04586 if (i + len > OPT_EXACT_MAXLEN) break;
04587 for (j = 0; j < len && p < end; j++)
04588 to->s[i++] = *p++;
04589 }
04590
04591 to->len = i;
04592 to->reach_end = (p == end ? add->reach_end : 0);
04593
04594 concat_opt_anc_info(&tanc, &to->anc, &add->anc, 1, 1);
04595 if (! to->reach_end) tanc.right_anchor = 0;
04596 copy_opt_anc_info(&to->anc, &tanc);
04597 }
04598
04599 static void
04600 concat_opt_exact_info_str(OptExactInfo* to, UChar* s, UChar* end,
04601 int raw ARG_UNUSED, OnigEncoding enc)
04602 {
04603 int i, j, len;
04604 UChar *p;
04605
04606 for (i = to->len, p = s; p < end && i < OPT_EXACT_MAXLEN; ) {
04607 len = enclen(enc, p, end);
04608 if (i + len > OPT_EXACT_MAXLEN) break;
04609 for (j = 0; j < len && p < end; j++)
04610 to->s[i++] = *p++;
04611 }
04612
04613 to->len = i;
04614 }
04615
04616 static void
04617 alt_merge_opt_exact_info(OptExactInfo* to, OptExactInfo* add, OptEnv* env)
04618 {
04619 int i, j, len;
04620
04621 if (add->len == 0 || to->len == 0) {
04622 clear_opt_exact_info(to);
04623 return ;
04624 }
04625
04626 if (! is_equal_mml(&to->mmd, &add->mmd)) {
04627 clear_opt_exact_info(to);
04628 return ;
04629 }
04630
04631 for (i = 0; i < to->len && i < add->len; ) {
04632 if (to->s[i] != add->s[i]) break;
04633 len = enclen(env->enc, to->s + i, to->s + to->len);
04634
04635 for (j = 1; j < len; j++) {
04636 if (to->s[i+j] != add->s[i+j]) break;
04637 }
04638 if (j < len) break;
04639 i += len;
04640 }
04641
04642 if (! add->reach_end || i < add->len || i < to->len) {
04643 to->reach_end = 0;
04644 }
04645 to->len = i;
04646 if (to->ignore_case < 0)
04647 to->ignore_case = add->ignore_case;
04648 else if (add->ignore_case >= 0)
04649 to->ignore_case |= add->ignore_case;
04650
04651 alt_merge_opt_anc_info(&to->anc, &add->anc);
04652 if (! to->reach_end) to->anc.right_anchor = 0;
04653 }
04654
04655 static void
04656 select_opt_exact_info(OnigEncoding enc, OptExactInfo* now, OptExactInfo* alt)
04657 {
04658 int v1, v2;
04659
04660 v1 = now->len;
04661 v2 = alt->len;
04662
04663 if (v2 == 0) {
04664 return ;
04665 }
04666 else if (v1 == 0) {
04667 copy_opt_exact_info(now, alt);
04668 return ;
04669 }
04670 else if (v1 <= 2 && v2 <= 2) {
04671
04672 v2 = map_position_value(enc, now->s[0]);
04673 v1 = map_position_value(enc, alt->s[0]);
04674
04675 if (now->len > 1) v1 += 5;
04676 if (alt->len > 1) v2 += 5;
04677 }
04678
04679 if (now->ignore_case <= 0) v1 *= 2;
04680 if (alt->ignore_case <= 0) v2 *= 2;
04681
04682 if (comp_distance_value(&now->mmd, &alt->mmd, v1, v2) > 0)
04683 copy_opt_exact_info(now, alt);
04684 }
04685
04686 static void
04687 clear_opt_map_info(OptMapInfo* map)
04688 {
04689 static const OptMapInfo clean_info = {
04690 {0, 0}, {0, 0}, 0,
04691 {
04692 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
04693 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
04694 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
04695 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
04696 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
04697 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
04698 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
04699 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
04700 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
04701 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
04702 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
04703 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
04704 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
04705 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
04706 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
04707 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
04708 }
04709 };
04710
04711 xmemcpy(map, &clean_info, sizeof(OptMapInfo));
04712 }
04713
04714 static void
04715 copy_opt_map_info(OptMapInfo* to, OptMapInfo* from)
04716 {
04717 *to = *from;
04718 }
04719
04720 static void
04721 add_char_opt_map_info(OptMapInfo* map, UChar c, OnigEncoding enc)
04722 {
04723 if (map->map[c] == 0) {
04724 map->map[c] = 1;
04725 map->value += map_position_value(enc, c);
04726 }
04727 }
04728
04729 static int
04730 add_char_amb_opt_map_info(OptMapInfo* map, UChar* p, UChar* end,
04731 OnigEncoding enc, OnigCaseFoldType case_fold_flag)
04732 {
04733 OnigCaseFoldCodeItem items[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM];
04734 UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN];
04735 int i, n;
04736
04737 add_char_opt_map_info(map, p[0], enc);
04738
04739 case_fold_flag = DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag);
04740 n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, case_fold_flag, p, end, items);
04741 if (n < 0) return n;
04742
04743 for (i = 0; i < n; i++) {
04744 ONIGENC_CODE_TO_MBC(enc, items[i].code[0], buf);
04745 add_char_opt_map_info(map, buf[0], enc);
04746 }
04747
04748 return 0;
04749 }
04750
04751 static void
04752 select_opt_map_info(OptMapInfo* now, OptMapInfo* alt)
04753 {
04754 const int z = 1<<15;
04755
04756 int v1, v2;
04757
04758 if (alt->value == 0) return ;
04759 if (now->value == 0) {
04760 copy_opt_map_info(now, alt);
04761 return ;
04762 }
04763
04764 v1 = z / now->value;
04765 v2 = z / alt->value;
04766 if (comp_distance_value(&now->mmd, &alt->mmd, v1, v2) > 0)
04767 copy_opt_map_info(now, alt);
04768 }
04769
04770 static int
04771 comp_opt_exact_or_map_info(OptExactInfo* e, OptMapInfo* m)
04772 {
04773 #define COMP_EM_BASE 20
04774 int ve, vm;
04775
04776 if (m->value <= 0) return -1;
04777
04778 ve = COMP_EM_BASE * e->len * (e->ignore_case > 0 ? 1 : 2);
04779 vm = COMP_EM_BASE * 5 * 2 / m->value;
04780 return comp_distance_value(&e->mmd, &m->mmd, ve, vm);
04781 }
04782
04783 static void
04784 alt_merge_opt_map_info(OnigEncoding enc, OptMapInfo* to, OptMapInfo* add)
04785 {
04786 int i, val;
04787
04788
04789 if (to->value == 0) return ;
04790 if (add->value == 0 || to->mmd.max < add->mmd.min) {
04791 clear_opt_map_info(to);
04792 return ;
04793 }
04794
04795 alt_merge_mml(&to->mmd, &add->mmd);
04796
04797 val = 0;
04798 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) {
04799 if (add->map[i])
04800 to->map[i] = 1;
04801
04802 if (to->map[i])
04803 val += map_position_value(enc, i);
04804 }
04805 to->value = val;
04806
04807 alt_merge_opt_anc_info(&to->anc, &add->anc);
04808 }
04809
04810 static void
04811 set_bound_node_opt_info(NodeOptInfo* opt, MinMaxLen* mmd)
04812 {
04813 copy_mml(&(opt->exb.mmd), mmd);
04814 copy_mml(&(opt->expr.mmd), mmd);
04815 copy_mml(&(opt->map.mmd), mmd);
04816 }
04817
04818 static void
04819 clear_node_opt_info(NodeOptInfo* opt)
04820 {
04821 clear_mml(&opt->len);
04822 clear_opt_anc_info(&opt->anc);
04823 clear_opt_exact_info(&opt->exb);
04824 clear_opt_exact_info(&opt->exm);
04825 clear_opt_exact_info(&opt->expr);
04826 clear_opt_map_info(&opt->map);
04827 }
04828
04829 static void
04830 copy_node_opt_info(NodeOptInfo* to, NodeOptInfo* from)
04831 {
04832 *to = *from;
04833 }
04834
04835 static void
04836 concat_left_node_opt_info(OnigEncoding enc, NodeOptInfo* to, NodeOptInfo* add)
04837 {
04838 int exb_reach, exm_reach;
04839 OptAncInfo tanc;
04840
04841 concat_opt_anc_info(&tanc, &to->anc, &add->anc, to->len.max, add->len.max);
04842 copy_opt_anc_info(&to->anc, &tanc);
04843
04844 if (add->exb.len > 0 && to->len.max == 0) {
04845 concat_opt_anc_info(&tanc, &to->anc, &add->exb.anc,
04846 to->len.max, add->len.max);
04847 copy_opt_anc_info(&add->exb.anc, &tanc);
04848 }
04849
04850 if (add->map.value > 0 && to->len.max == 0) {
04851 if (add->map.mmd.max == 0)
04852 add->map.anc.left_anchor |= to->anc.left_anchor;
04853 }
04854
04855 exb_reach = to->exb.reach_end;
04856 exm_reach = to->exm.reach_end;
04857
04858 if (add->len.max != 0)
04859 to->exb.reach_end = to->exm.reach_end = 0;
04860
04861 if (add->exb.len > 0) {
04862 if (exb_reach) {
04863 concat_opt_exact_info(&to->exb, &add->exb, enc);
04864 clear_opt_exact_info(&add->exb);
04865 }
04866 else if (exm_reach) {
04867 concat_opt_exact_info(&to->exm, &add->exb, enc);
04868 clear_opt_exact_info(&add->exb);
04869 }
04870 }
04871 select_opt_exact_info(enc, &to->exm, &add->exb);
04872 select_opt_exact_info(enc, &to->exm, &add->exm);
04873
04874 if (to->expr.len > 0) {
04875 if (add->len.max > 0) {
04876 if (to->expr.len > (int )add->len.max)
04877 to->expr.len = (int )add->len.max;
04878
04879 if (to->expr.mmd.max == 0)
04880 select_opt_exact_info(enc, &to->exb, &to->expr);
04881 else
04882 select_opt_exact_info(enc, &to->exm, &to->expr);
04883 }
04884 }
04885 else if (add->expr.len > 0) {
04886 copy_opt_exact_info(&to->expr, &add->expr);
04887 }
04888
04889 select_opt_map_info(&to->map, &add->map);
04890
04891 add_mml(&to->len, &add->len);
04892 }
04893
04894 static void
04895 alt_merge_node_opt_info(NodeOptInfo* to, NodeOptInfo* add, OptEnv* env)
04896 {
04897 alt_merge_opt_anc_info (&to->anc, &add->anc);
04898 alt_merge_opt_exact_info(&to->exb, &add->exb, env);
04899 alt_merge_opt_exact_info(&to->exm, &add->exm, env);
04900 alt_merge_opt_exact_info(&to->expr, &add->expr, env);
04901 alt_merge_opt_map_info(env->enc, &to->map, &add->map);
04902
04903 alt_merge_mml(&to->len, &add->len);
04904 }
04905
04906
04907 #define MAX_NODE_OPT_INFO_REF_COUNT 5
04908
04909 static int
04910 optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env)
04911 {
04912 int type;
04913 int r = 0;
04914
04915 clear_node_opt_info(opt);
04916 set_bound_node_opt_info(opt, &env->mmd);
04917
04918 type = NTYPE(node);
04919 switch (type) {
04920 case NT_LIST:
04921 {
04922 OptEnv nenv;
04923 NodeOptInfo nopt;
04924 Node* nd = node;
04925
04926 copy_opt_env(&nenv, env);
04927 do {
04928 r = optimize_node_left(NCAR(nd), &nopt, &nenv);
04929 if (r == 0) {
04930 add_mml(&nenv.mmd, &nopt.len);
04931 concat_left_node_opt_info(env->enc, opt, &nopt);
04932 }
04933 } while (r == 0 && IS_NOT_NULL(nd = NCDR(nd)));
04934 }
04935 break;
04936
04937 case NT_ALT:
04938 {
04939 NodeOptInfo nopt;
04940 Node* nd = node;
04941
04942 do {
04943 r = optimize_node_left(NCAR(nd), &nopt, env);
04944 if (r == 0) {
04945 if (nd == node) copy_node_opt_info(opt, &nopt);
04946 else alt_merge_node_opt_info(opt, &nopt, env);
04947 }
04948 } while ((r == 0) && IS_NOT_NULL(nd = NCDR(nd)));
04949 }
04950 break;
04951
04952 case NT_STR:
04953 {
04954 StrNode* sn = NSTR(node);
04955 OnigDistance slen = sn->end - sn->s;
04956 int is_raw = NSTRING_IS_RAW(node);
04957
04958 if (! NSTRING_IS_AMBIG(node)) {
04959 concat_opt_exact_info_str(&opt->exb, sn->s, sn->end,
04960 is_raw, env->enc);
04961 opt->exb.ignore_case = 0;
04962 if (slen > 0) {
04963 add_char_opt_map_info(&opt->map, *(sn->s), env->enc);
04964 }
04965 set_mml(&opt->len, slen, slen);
04966 }
04967 else {
04968 OnigDistance max;
04969
04970 if (NSTRING_IS_DONT_GET_OPT_INFO(node)) {
04971 int n = onigenc_strlen(env->enc, sn->s, sn->end);
04972 max = ONIGENC_MBC_MAXLEN_DIST(env->enc) * n;
04973 }
04974 else {
04975 concat_opt_exact_info_str(&opt->exb, sn->s, sn->end,
04976 is_raw, env->enc);
04977 opt->exb.ignore_case = 1;
04978
04979 if (slen > 0) {
04980 r = add_char_amb_opt_map_info(&opt->map, sn->s, sn->end,
04981 env->enc, env->case_fold_flag);
04982 if (r != 0) break;
04983 }
04984
04985 max = slen;
04986 }
04987
04988 set_mml(&opt->len, slen, max);
04989 }
04990
04991 if ((OnigDistance )opt->exb.len == slen)
04992 opt->exb.reach_end = 1;
04993 }
04994 break;
04995
04996 case NT_CCLASS:
04997 {
04998 int i, z;
04999 CClassNode* cc = NCCLASS(node);
05000
05001
05002
05003 if (IS_NOT_NULL(cc->mbuf) || IS_NCCLASS_NOT(cc)) {
05004 OnigDistance min = ONIGENC_MBC_MINLEN(env->enc);
05005 OnigDistance max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
05006
05007 set_mml(&opt->len, min, max);
05008 }
05009 else {
05010 for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
05011 z = BITSET_AT(cc->bs, i);
05012 if ((z && !IS_NCCLASS_NOT(cc)) || (!z && IS_NCCLASS_NOT(cc))) {
05013 add_char_opt_map_info(&opt->map, (UChar )i, env->enc);
05014 }
05015 }
05016 set_mml(&opt->len, 1, 1);
05017 }
05018 }
05019 break;
05020
05021 case NT_CTYPE:
05022 {
05023 int i, min, max;
05024 int maxcode;
05025
05026 max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
05027
05028 if (max == 1) {
05029 min = 1;
05030
05031 maxcode = NCTYPE(node)->ascii_range ? 0x80 : SINGLE_BYTE_SIZE;
05032 switch (NCTYPE(node)->ctype) {
05033 case ONIGENC_CTYPE_WORD:
05034 if (NCTYPE(node)->not != 0) {
05035 for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
05036 if (! ONIGENC_IS_CODE_WORD(env->enc, i) || i >= maxcode) {
05037 add_char_opt_map_info(&opt->map, (UChar )i, env->enc);
05038 }
05039 }
05040 }
05041 else {
05042 for (i = 0; i < maxcode; i++) {
05043 if (ONIGENC_IS_CODE_WORD(env->enc, i)) {
05044 add_char_opt_map_info(&opt->map, (UChar )i, env->enc);
05045 }
05046 }
05047 }
05048 break;
05049 }
05050 }
05051 else {
05052 min = ONIGENC_MBC_MINLEN(env->enc);
05053 }
05054 set_mml(&opt->len, min, max);
05055 }
05056 break;
05057
05058 case NT_CANY:
05059 {
05060 OnigDistance min = ONIGENC_MBC_MINLEN(env->enc);
05061 OnigDistance max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
05062 set_mml(&opt->len, min, max);
05063 }
05064 break;
05065
05066 case NT_ANCHOR:
05067 switch (NANCHOR(node)->type) {
05068 case ANCHOR_BEGIN_BUF:
05069 case ANCHOR_BEGIN_POSITION:
05070 case ANCHOR_BEGIN_LINE:
05071 case ANCHOR_END_BUF:
05072 case ANCHOR_SEMI_END_BUF:
05073 case ANCHOR_END_LINE:
05074 case ANCHOR_LOOK_BEHIND:
05075 case ANCHOR_PREC_READ_NOT:
05076 add_opt_anc_info(&opt->anc, NANCHOR(node)->type);
05077 break;
05078
05079 case ANCHOR_PREC_READ:
05080 {
05081 NodeOptInfo nopt;
05082
05083 r = optimize_node_left(NANCHOR(node)->target, &nopt, env);
05084 if (r == 0) {
05085 if (nopt.exb.len > 0)
05086 copy_opt_exact_info(&opt->expr, &nopt.exb);
05087 else if (nopt.exm.len > 0)
05088 copy_opt_exact_info(&opt->expr, &nopt.exm);
05089
05090 opt->expr.reach_end = 0;
05091
05092 if (nopt.map.value > 0)
05093 copy_opt_map_info(&opt->map, &nopt.map);
05094 }
05095 }
05096 break;
05097
05098 case ANCHOR_LOOK_BEHIND_NOT:
05099 break;
05100 }
05101 break;
05102
05103 case NT_BREF:
05104 {
05105 int i;
05106 int* backs;
05107 OnigDistance min, max, tmin, tmax;
05108 Node** nodes = SCANENV_MEM_NODES(env->scan_env);
05109 BRefNode* br = NBREF(node);
05110
05111 if (br->state & NST_RECURSION) {
05112 set_mml(&opt->len, 0, ONIG_INFINITE_DISTANCE);
05113 break;
05114 }
05115 backs = BACKREFS_P(br);
05116 r = get_min_match_length(nodes[backs[0]], &min, env->scan_env);
05117 if (r != 0) break;
05118 r = get_max_match_length(nodes[backs[0]], &max, env->scan_env);
05119 if (r != 0) break;
05120 for (i = 1; i < br->back_num; i++) {
05121 r = get_min_match_length(nodes[backs[i]], &tmin, env->scan_env);
05122 if (r != 0) break;
05123 r = get_max_match_length(nodes[backs[i]], &tmax, env->scan_env);
05124 if (r != 0) break;
05125 if (min > tmin) min = tmin;
05126 if (max < tmax) max = tmax;
05127 }
05128 if (r == 0) set_mml(&opt->len, min, max);
05129 }
05130 break;
05131
05132 #ifdef USE_SUBEXP_CALL
05133 case NT_CALL:
05134 if (IS_CALL_RECURSION(NCALL(node)))
05135 set_mml(&opt->len, 0, ONIG_INFINITE_DISTANCE);
05136 else {
05137 OnigOptionType save = env->options;
05138 env->options = NENCLOSE(NCALL(node)->target)->option;
05139 r = optimize_node_left(NCALL(node)->target, opt, env);
05140 env->options = save;
05141 }
05142 break;
05143 #endif
05144
05145 case NT_QTFR:
05146 {
05147 int i;
05148 OnigDistance min, max;
05149 NodeOptInfo nopt;
05150 QtfrNode* qn = NQTFR(node);
05151
05152 r = optimize_node_left(qn->target, &nopt, env);
05153 if (r) break;
05154
05155 if ( IS_REPEAT_INFINITE(qn->upper)) {
05156 if (env->mmd.max == 0 &&
05157 NTYPE(qn->target) == NT_CANY && qn->greedy) {
05158 if (IS_MULTILINE(env->options))
05159
05160 add_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_ML);
05161 else
05162 add_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR);
05163 }
05164 }
05165 else {
05166 if (qn->lower > 0) {
05167 copy_node_opt_info(opt, &nopt);
05168 if (nopt.exb.len > 0) {
05169 if (nopt.exb.reach_end) {
05170 for (i = 2; i <= qn->lower &&
05171 ! is_full_opt_exact_info(&opt->exb); i++) {
05172 concat_opt_exact_info(&opt->exb, &nopt.exb, env->enc);
05173 }
05174 if (i < qn->lower) {
05175 opt->exb.reach_end = 0;
05176 }
05177 }
05178 }
05179
05180 if (qn->lower != qn->upper) {
05181 opt->exb.reach_end = 0;
05182 opt->exm.reach_end = 0;
05183 }
05184 if (qn->lower > 1)
05185 opt->exm.reach_end = 0;
05186 }
05187 }
05188
05189 min = distance_multiply(nopt.len.min, qn->lower);
05190 if (IS_REPEAT_INFINITE(qn->upper))
05191 max = (nopt.len.max > 0 ? ONIG_INFINITE_DISTANCE : 0);
05192 else
05193 max = distance_multiply(nopt.len.max, qn->upper);
05194
05195 set_mml(&opt->len, min, max);
05196 }
05197 break;
05198
05199 case NT_ENCLOSE:
05200 {
05201 EncloseNode* en = NENCLOSE(node);
05202
05203 switch (en->type) {
05204 case ENCLOSE_OPTION:
05205 {
05206 OnigOptionType save = env->options;
05207
05208 env->options = en->option;
05209 r = optimize_node_left(en->target, opt, env);
05210 env->options = save;
05211 }
05212 break;
05213
05214 case ENCLOSE_MEMORY:
05215 #ifdef USE_SUBEXP_CALL
05216 en->opt_count++;
05217 if (en->opt_count > MAX_NODE_OPT_INFO_REF_COUNT) {
05218 OnigDistance min, max;
05219
05220 min = 0;
05221 max = ONIG_INFINITE_DISTANCE;
05222 if (IS_ENCLOSE_MIN_FIXED(en)) min = en->min_len;
05223 if (IS_ENCLOSE_MAX_FIXED(en)) max = en->max_len;
05224 set_mml(&opt->len, min, max);
05225 }
05226 else
05227 #endif
05228 {
05229 r = optimize_node_left(en->target, opt, env);
05230
05231 if (is_set_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_MASK)) {
05232 if (BIT_STATUS_AT(env->scan_env->backrefed_mem, en->regnum))
05233 remove_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_MASK);
05234 }
05235 }
05236 break;
05237
05238 case ENCLOSE_STOP_BACKTRACK:
05239 case ENCLOSE_CONDITION:
05240 r = optimize_node_left(en->target, opt, env);
05241 break;
05242 }
05243 }
05244 break;
05245
05246 default:
05247 #ifdef ONIG_DEBUG
05248 if (!onig_is_prelude()) fprintf(stderr, "optimize_node_left: undefined node type %d\n",
05249 NTYPE(node));
05250 #endif
05251 r = ONIGERR_TYPE_BUG;
05252 break;
05253 }
05254
05255 return r;
05256 }
05257
05258 static int
05259 set_optimize_exact_info(regex_t* reg, OptExactInfo* e)
05260 {
05261 int r;
05262 int allow_reverse;
05263
05264 if (e->len == 0) return 0;
05265
05266 reg->exact = (UChar* )xmalloc(e->len);
05267 CHECK_NULL_RETURN_MEMERR(reg->exact);
05268 xmemcpy(reg->exact, e->s, e->len);
05269 reg->exact_end = reg->exact + e->len;
05270
05271 allow_reverse =
05272 ONIGENC_IS_ALLOWED_REVERSE_MATCH(reg->enc, reg->exact, reg->exact_end);
05273
05274 if (e->ignore_case > 0) {
05275 if (e->len >= 3 || (e->len >= 2 && allow_reverse)) {
05276 r = set_bm_skip(reg->exact, reg->exact_end, reg,
05277 reg->map, &(reg->int_map), 1);
05278 if (r == 0) {
05279 reg->optimize = (allow_reverse != 0
05280 ? ONIG_OPTIMIZE_EXACT_BM_IC : ONIG_OPTIMIZE_EXACT_BM_NOT_REV_IC);
05281 }
05282 else {
05283 reg->optimize = ONIG_OPTIMIZE_EXACT_IC;
05284 }
05285 }
05286 else {
05287 reg->optimize = ONIG_OPTIMIZE_EXACT_IC;
05288 }
05289 }
05290 else {
05291 if (e->len >= 3 || (e->len >= 2 && allow_reverse)) {
05292 r = set_bm_skip(reg->exact, reg->exact_end, reg,
05293 reg->map, &(reg->int_map), 0);
05294 if (r) return r;
05295
05296 reg->optimize = (allow_reverse != 0
05297 ? ONIG_OPTIMIZE_EXACT_BM : ONIG_OPTIMIZE_EXACT_BM_NOT_REV);
05298 }
05299 else {
05300 reg->optimize = ONIG_OPTIMIZE_EXACT;
05301 }
05302 }
05303
05304 reg->dmin = e->mmd.min;
05305 reg->dmax = e->mmd.max;
05306
05307 if (reg->dmin != ONIG_INFINITE_DISTANCE) {
05308 reg->threshold_len = (int )(reg->dmin + (reg->exact_end - reg->exact));
05309 }
05310
05311 return 0;
05312 }
05313
05314 static void
05315 set_optimize_map_info(regex_t* reg, OptMapInfo* m)
05316 {
05317 int i;
05318
05319 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++)
05320 reg->map[i] = m->map[i];
05321
05322 reg->optimize = ONIG_OPTIMIZE_MAP;
05323 reg->dmin = m->mmd.min;
05324 reg->dmax = m->mmd.max;
05325
05326 if (reg->dmin != ONIG_INFINITE_DISTANCE) {
05327 reg->threshold_len = (int )(reg->dmin + 1);
05328 }
05329 }
05330
05331 static void
05332 set_sub_anchor(regex_t* reg, OptAncInfo* anc)
05333 {
05334 reg->sub_anchor |= anc->left_anchor & ANCHOR_BEGIN_LINE;
05335 reg->sub_anchor |= anc->right_anchor & ANCHOR_END_LINE;
05336 }
05337
05338 #ifdef ONIG_DEBUG
05339 static void print_optimize_info(FILE* f, regex_t* reg);
05340 #endif
05341
05342 static int
05343 set_optimize_info_from_tree(Node* node, regex_t* reg, ScanEnv* scan_env)
05344 {
05345
05346 int r;
05347 NodeOptInfo opt;
05348 OptEnv env;
05349
05350 env.enc = reg->enc;
05351 env.options = reg->options;
05352 env.case_fold_flag = reg->case_fold_flag;
05353 env.scan_env = scan_env;
05354 clear_mml(&env.mmd);
05355
05356 r = optimize_node_left(node, &opt, &env);
05357 if (r) return r;
05358
05359 reg->anchor = opt.anc.left_anchor & (ANCHOR_BEGIN_BUF |
05360 ANCHOR_BEGIN_POSITION | ANCHOR_ANYCHAR_STAR | ANCHOR_ANYCHAR_STAR_ML |
05361 ANCHOR_LOOK_BEHIND);
05362
05363 reg->anchor |= opt.anc.right_anchor & (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF |
05364 ANCHOR_PREC_READ_NOT);
05365
05366 if (reg->anchor & (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF)) {
05367 reg->anchor_dmin = opt.len.min;
05368 reg->anchor_dmax = opt.len.max;
05369 }
05370
05371 if (opt.exb.len > 0 || opt.exm.len > 0) {
05372 select_opt_exact_info(reg->enc, &opt.exb, &opt.exm);
05373 if (opt.map.value > 0 &&
05374 comp_opt_exact_or_map_info(&opt.exb, &opt.map) > 0) {
05375 goto set_map;
05376 }
05377 else {
05378 r = set_optimize_exact_info(reg, &opt.exb);
05379 set_sub_anchor(reg, &opt.exb.anc);
05380 }
05381 }
05382 else if (opt.map.value > 0) {
05383 set_map:
05384 set_optimize_map_info(reg, &opt.map);
05385 set_sub_anchor(reg, &opt.map.anc);
05386 }
05387 else {
05388 reg->sub_anchor |= opt.anc.left_anchor & ANCHOR_BEGIN_LINE;
05389 if (opt.len.max == 0)
05390 reg->sub_anchor |= opt.anc.right_anchor & ANCHOR_END_LINE;
05391 }
05392
05393 #if defined(ONIG_DEBUG_COMPILE) || defined(ONIG_DEBUG_MATCH)
05394 if (!onig_is_prelude()) print_optimize_info(stderr, reg);
05395 #endif
05396 return r;
05397 }
05398
05399 static void
05400 clear_optimize_info(regex_t* reg)
05401 {
05402 reg->optimize = ONIG_OPTIMIZE_NONE;
05403 reg->anchor = 0;
05404 reg->anchor_dmin = 0;
05405 reg->anchor_dmax = 0;
05406 reg->sub_anchor = 0;
05407 reg->exact_end = (UChar* )NULL;
05408 reg->threshold_len = 0;
05409 if (IS_NOT_NULL(reg->exact)) {
05410 xfree(reg->exact);
05411 reg->exact = (UChar* )NULL;
05412 }
05413 }
05414
05415 #ifdef ONIG_DEBUG
05416
05417 static void print_enc_string(FILE* fp, OnigEncoding enc,
05418 const UChar *s, const UChar *end)
05419 {
05420 fprintf(fp, "\nPATTERN: /");
05421
05422 if (ONIGENC_MBC_MINLEN(enc) > 1) {
05423 const UChar *p;
05424 OnigCodePoint code;
05425
05426 p = s;
05427 while (p < end) {
05428 code = ONIGENC_MBC_TO_CODE(enc, p, end);
05429 if (code >= 0x80) {
05430 fprintf(fp, " 0x%04x ", (int )code);
05431 }
05432 else {
05433 fputc((int )code, fp);
05434 }
05435
05436 p += enclen(enc, p, end);
05437 }
05438 }
05439 else {
05440 while (s < end) {
05441 fputc((int )*s, fp);
05442 s++;
05443 }
05444 }
05445
05446 fprintf(fp, "/ (%s)\n", enc->name);
05447 }
05448
05449 static void
05450 print_distance_range(FILE* f, OnigDistance a, OnigDistance b)
05451 {
05452 if (a == ONIG_INFINITE_DISTANCE)
05453 fputs("inf", f);
05454 else
05455 fprintf(f, "(%"PRIuSIZE")", a);
05456
05457 fputs("-", f);
05458
05459 if (b == ONIG_INFINITE_DISTANCE)
05460 fputs("inf", f);
05461 else
05462 fprintf(f, "(%"PRIuSIZE")", b);
05463 }
05464
05465 static void
05466 print_anchor(FILE* f, int anchor)
05467 {
05468 int q = 0;
05469
05470 fprintf(f, "[");
05471
05472 if (anchor & ANCHOR_BEGIN_BUF) {
05473 fprintf(f, "begin-buf");
05474 q = 1;
05475 }
05476 if (anchor & ANCHOR_BEGIN_LINE) {
05477 if (q) fprintf(f, ", ");
05478 q = 1;
05479 fprintf(f, "begin-line");
05480 }
05481 if (anchor & ANCHOR_BEGIN_POSITION) {
05482 if (q) fprintf(f, ", ");
05483 q = 1;
05484 fprintf(f, "begin-pos");
05485 }
05486 if (anchor & ANCHOR_END_BUF) {
05487 if (q) fprintf(f, ", ");
05488 q = 1;
05489 fprintf(f, "end-buf");
05490 }
05491 if (anchor & ANCHOR_SEMI_END_BUF) {
05492 if (q) fprintf(f, ", ");
05493 q = 1;
05494 fprintf(f, "semi-end-buf");
05495 }
05496 if (anchor & ANCHOR_END_LINE) {
05497 if (q) fprintf(f, ", ");
05498 q = 1;
05499 fprintf(f, "end-line");
05500 }
05501 if (anchor & ANCHOR_ANYCHAR_STAR) {
05502 if (q) fprintf(f, ", ");
05503 q = 1;
05504 fprintf(f, "anychar-star");
05505 }
05506 if (anchor & ANCHOR_ANYCHAR_STAR_ML) {
05507 if (q) fprintf(f, ", ");
05508 fprintf(f, "anychar-star-ml");
05509 }
05510
05511 fprintf(f, "]");
05512 }
05513
05514 static void
05515 print_optimize_info(FILE* f, regex_t* reg)
05516 {
05517 static const char* on[] = { "NONE", "EXACT", "EXACT_BM", "EXACT_BM_NOT_REV",
05518 "EXACT_IC", "MAP",
05519 "EXACT_BM_IC", "EXACT_BM_NOT_REV_IC" };
05520
05521 fprintf(f, "optimize: %s\n", on[reg->optimize]);
05522 fprintf(f, " anchor: "); print_anchor(f, reg->anchor);
05523 if ((reg->anchor & ANCHOR_END_BUF_MASK) != 0)
05524 print_distance_range(f, reg->anchor_dmin, reg->anchor_dmax);
05525 fprintf(f, "\n");
05526
05527 if (reg->optimize) {
05528 fprintf(f, " sub anchor: "); print_anchor(f, reg->sub_anchor);
05529 fprintf(f, "\n");
05530 }
05531 fprintf(f, "\n");
05532
05533 if (reg->exact) {
05534 UChar *p;
05535 fprintf(f, "exact: [");
05536 for (p = reg->exact; p < reg->exact_end; p++) {
05537 fputc(*p, f);
05538 }
05539 fprintf(f, "]: length: %ld\n", (reg->exact_end - reg->exact));
05540 }
05541 else if (reg->optimize & ONIG_OPTIMIZE_MAP) {
05542 int c, i, n = 0;
05543
05544 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++)
05545 if (reg->map[i]) n++;
05546
05547 fprintf(f, "map: n=%d\n", n);
05548 if (n > 0) {
05549 c = 0;
05550 fputc('[', f);
05551 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) {
05552 if (reg->map[i] != 0) {
05553 if (c > 0) fputs(", ", f);
05554 c++;
05555 if (ONIGENC_MBC_MAXLEN(reg->enc) == 1 &&
05556 ONIGENC_IS_CODE_PRINT(reg->enc, (OnigCodePoint )i))
05557 fputc(i, f);
05558 else
05559 fprintf(f, "%d", i);
05560 }
05561 }
05562 fprintf(f, "]\n");
05563 }
05564 }
05565 }
05566 #endif
05567
05568
05569 extern void
05570 onig_free_body(regex_t* reg)
05571 {
05572 if (IS_NOT_NULL(reg)) {
05573 if (IS_NOT_NULL(reg->p)) xfree(reg->p);
05574 if (IS_NOT_NULL(reg->exact)) xfree(reg->exact);
05575 if (IS_NOT_NULL(reg->int_map)) xfree(reg->int_map);
05576 if (IS_NOT_NULL(reg->int_map_backward)) xfree(reg->int_map_backward);
05577 if (IS_NOT_NULL(reg->repeat_range)) xfree(reg->repeat_range);
05578 if (IS_NOT_NULL(reg->chain)) onig_free(reg->chain);
05579
05580 #ifdef USE_NAMED_GROUP
05581 onig_names_free(reg);
05582 #endif
05583 }
05584 }
05585
05586 extern void
05587 onig_free(regex_t* reg)
05588 {
05589 if (IS_NOT_NULL(reg)) {
05590 onig_free_body(reg);
05591 xfree(reg);
05592 }
05593 }
05594
05595 size_t
05596 onig_memsize(const regex_t *reg)
05597 {
05598 size_t size = sizeof(regex_t);
05599 if (IS_NULL(reg)) return 0;
05600 if (IS_NOT_NULL(reg->p)) size += reg->alloc;
05601 if (IS_NOT_NULL(reg->exact)) size += reg->exact_end - reg->exact;
05602 if (IS_NOT_NULL(reg->int_map)) size += sizeof(int) * ONIG_CHAR_TABLE_SIZE;
05603 if (IS_NOT_NULL(reg->int_map_backward)) size += sizeof(int) * ONIG_CHAR_TABLE_SIZE;
05604 if (IS_NOT_NULL(reg->repeat_range)) size += reg->repeat_range_alloc * sizeof(OnigRepeatRange);
05605 if (IS_NOT_NULL(reg->chain)) size += onig_memsize(reg->chain);
05606
05607 return size;
05608 }
05609
05610 size_t
05611 onig_region_memsize(const OnigRegion *regs)
05612 {
05613 size_t size = sizeof(*regs);
05614 if (IS_NULL(regs)) return 0;
05615 size += regs->allocated * (sizeof(*regs->beg) + sizeof(*regs->end));
05616 return size;
05617 }
05618
05619 #define REGEX_TRANSFER(to,from) do {\
05620 (to)->state = ONIG_STATE_MODIFY;\
05621 onig_free_body(to);\
05622 xmemcpy(to, from, sizeof(regex_t));\
05623 xfree(from);\
05624 } while (0)
05625
05626 extern void
05627 onig_transfer(regex_t* to, regex_t* from)
05628 {
05629 THREAD_ATOMIC_START;
05630 REGEX_TRANSFER(to, from);
05631 THREAD_ATOMIC_END;
05632 }
05633
05634 #define REGEX_CHAIN_HEAD(reg) do {\
05635 while (IS_NOT_NULL((reg)->chain)) {\
05636 (reg) = (reg)->chain;\
05637 }\
05638 } while (0)
05639
05640 extern void
05641 onig_chain_link_add(regex_t* to, regex_t* add)
05642 {
05643 THREAD_ATOMIC_START;
05644 REGEX_CHAIN_HEAD(to);
05645 to->chain = add;
05646 THREAD_ATOMIC_END;
05647 }
05648
05649 extern void
05650 onig_chain_reduce(regex_t* reg)
05651 {
05652 regex_t *head, *prev;
05653
05654 prev = reg;
05655 head = prev->chain;
05656 if (IS_NOT_NULL(head)) {
05657 reg->state = ONIG_STATE_MODIFY;
05658 while (IS_NOT_NULL(head->chain)) {
05659 prev = head;
05660 head = head->chain;
05661 }
05662 prev->chain = (regex_t* )NULL;
05663 REGEX_TRANSFER(reg, head);
05664 }
05665 }
05666
05667 #ifdef ONIG_DEBUG
05668 static void print_compiled_byte_code_list P_((FILE* f, regex_t* reg));
05669 #endif
05670 #ifdef ONIG_DEBUG_PARSE_TREE
05671 static void print_tree P_((FILE* f, Node* node));
05672 #endif
05673
05674 extern int
05675 onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end,
05676 OnigErrorInfo* einfo, const char *sourcefile, int sourceline)
05677 {
05678 #define COMPILE_INIT_SIZE 20
05679
05680 int r;
05681 OnigDistance init_size;
05682 Node* root;
05683 ScanEnv scan_env = {0};
05684 #ifdef USE_SUBEXP_CALL
05685 UnsetAddrList uslist;
05686 #endif
05687
05688 if (IS_NOT_NULL(einfo)) einfo->par = (UChar* )NULL;
05689
05690 scan_env.sourcefile = sourcefile;
05691 scan_env.sourceline = sourceline;
05692 reg->state = ONIG_STATE_COMPILING;
05693
05694 #ifdef ONIG_DEBUG
05695 if (!onig_is_prelude()) print_enc_string(stderr, reg->enc, pattern, pattern_end);
05696 #endif
05697
05698 if (reg->alloc == 0) {
05699 init_size = (pattern_end - pattern) * 2;
05700 if (init_size <= 0) init_size = COMPILE_INIT_SIZE;
05701 r = BBUF_INIT(reg, init_size);
05702 if (r != 0) goto end;
05703 }
05704 else
05705 reg->used = 0;
05706
05707 reg->num_mem = 0;
05708 reg->num_repeat = 0;
05709 reg->num_null_check = 0;
05710 reg->repeat_range_alloc = 0;
05711 reg->repeat_range = (OnigRepeatRange* )NULL;
05712 #ifdef USE_COMBINATION_EXPLOSION_CHECK
05713 reg->num_comb_exp_check = 0;
05714 #endif
05715
05716 r = onig_parse_make_tree(&root, pattern, pattern_end, reg, &scan_env);
05717 if (r != 0) goto err;
05718
05719 #ifdef ONIG_DEBUG_PARSE_TREE
05720 # if 0
05721 fprintf(stderr, "ORIGINAL PARSE TREE:\n");
05722 if (!onig_is_prelude()) {
05723 print_tree(stderr, root);
05724 }
05725 # endif
05726 #endif
05727
05728 #ifdef USE_NAMED_GROUP
05729
05730 if (scan_env.num_named > 0 &&
05731 IS_SYNTAX_BV(scan_env.syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) &&
05732 !ONIG_IS_OPTION_ON(reg->options, ONIG_OPTION_CAPTURE_GROUP)) {
05733 if (scan_env.num_named != scan_env.num_mem)
05734 r = disable_noname_group_capture(&root, reg, &scan_env);
05735 else
05736 r = numbered_ref_check(root);
05737
05738 if (r != 0) goto err;
05739 }
05740 #endif
05741
05742 #ifdef USE_SUBEXP_CALL
05743 if (scan_env.num_call > 0) {
05744 r = unset_addr_list_init(&uslist, scan_env.num_call);
05745 if (r != 0) goto err;
05746 scan_env.unset_addr_list = &uslist;
05747 r = setup_subexp_call(root, &scan_env);
05748 if (r != 0) goto err_unset;
05749 r = subexp_recursive_check_trav(root, &scan_env);
05750 if (r < 0) goto err_unset;
05751 r = subexp_inf_recursive_check_trav(root, &scan_env);
05752 if (r != 0) goto err_unset;
05753
05754 reg->num_call = scan_env.num_call;
05755 }
05756 else
05757 reg->num_call = 0;
05758 #endif
05759
05760 r = setup_tree(root, reg, IN_ROOT, &scan_env);
05761 if (r != 0) goto err_unset;
05762
05763 #ifdef ONIG_DEBUG_PARSE_TREE
05764 if (!onig_is_prelude()) print_tree(stderr, root);
05765 #endif
05766
05767 reg->capture_history = scan_env.capture_history;
05768 reg->bt_mem_start = scan_env.bt_mem_start;
05769 reg->bt_mem_start |= reg->capture_history;
05770 if (IS_FIND_CONDITION(reg->options))
05771 BIT_STATUS_ON_ALL(reg->bt_mem_end);
05772 else {
05773 reg->bt_mem_end = scan_env.bt_mem_end;
05774 reg->bt_mem_end |= reg->capture_history;
05775 }
05776
05777 #ifdef USE_COMBINATION_EXPLOSION_CHECK
05778 if (scan_env.backrefed_mem == 0
05779 #ifdef USE_SUBEXP_CALL
05780 || scan_env.num_call == 0
05781 #endif
05782 ) {
05783 setup_comb_exp_check(root, 0, &scan_env);
05784 #ifdef USE_SUBEXP_CALL
05785 if (scan_env.has_recursion != 0) {
05786 scan_env.num_comb_exp_check = 0;
05787 }
05788 else
05789 #endif
05790 if (scan_env.comb_exp_max_regnum > 0) {
05791 int i;
05792 for (i = 1; i <= scan_env.comb_exp_max_regnum; i++) {
05793 if (BIT_STATUS_AT(scan_env.backrefed_mem, i) != 0) {
05794 scan_env.num_comb_exp_check = 0;
05795 break;
05796 }
05797 }
05798 }
05799 }
05800
05801 reg->num_comb_exp_check = scan_env.num_comb_exp_check;
05802 #endif
05803
05804 clear_optimize_info(reg);
05805 #ifndef ONIG_DONT_OPTIMIZE
05806 r = set_optimize_info_from_tree(root, reg, &scan_env);
05807 if (r != 0) goto err_unset;
05808 #endif
05809
05810 if (IS_NOT_NULL(scan_env.mem_nodes_dynamic)) {
05811 xfree(scan_env.mem_nodes_dynamic);
05812 scan_env.mem_nodes_dynamic = (Node** )NULL;
05813 }
05814
05815 r = compile_tree(root, reg);
05816 if (r == 0) {
05817 r = add_opcode(reg, OP_END);
05818 #ifdef USE_SUBEXP_CALL
05819 if (scan_env.num_call > 0) {
05820 r = unset_addr_list_fix(&uslist, reg);
05821 unset_addr_list_end(&uslist);
05822 if (r) goto err;
05823 }
05824 #endif
05825
05826 if ((reg->num_repeat != 0) || (reg->bt_mem_end != 0))
05827 reg->stack_pop_level = STACK_POP_LEVEL_ALL;
05828 else {
05829 if (reg->bt_mem_start != 0)
05830 reg->stack_pop_level = STACK_POP_LEVEL_MEM_START;
05831 else
05832 reg->stack_pop_level = STACK_POP_LEVEL_FREE;
05833 }
05834 }
05835 #ifdef USE_SUBEXP_CALL
05836 else if (scan_env.num_call > 0) {
05837 unset_addr_list_end(&uslist);
05838 }
05839 #endif
05840 onig_node_free(root);
05841
05842 #ifdef ONIG_DEBUG_COMPILE
05843 #ifdef USE_NAMED_GROUP
05844 if (!onig_is_prelude()) onig_print_names(stderr, reg);
05845 #endif
05846 if (!onig_is_prelude()) print_compiled_byte_code_list(stderr, reg);
05847 #endif
05848
05849 end:
05850 reg->state = ONIG_STATE_NORMAL;
05851 return r;
05852
05853 err_unset:
05854 #ifdef USE_SUBEXP_CALL
05855 if (scan_env.num_call > 0) {
05856 unset_addr_list_end(&uslist);
05857 }
05858 #endif
05859 err:
05860 if (IS_NOT_NULL(scan_env.error)) {
05861 if (IS_NOT_NULL(einfo)) {
05862 einfo->enc = scan_env.enc;
05863 einfo->par = scan_env.error;
05864 einfo->par_end = scan_env.error_end;
05865 }
05866 }
05867
05868 onig_node_free(root);
05869 if (IS_NOT_NULL(scan_env.mem_nodes_dynamic))
05870 xfree(scan_env.mem_nodes_dynamic);
05871 return r;
05872 }
05873
05874 #ifdef USE_RECOMPILE_API
05875 extern int
05876 onig_recompile(regex_t* reg, const UChar* pattern, const UChar* pattern_end,
05877 OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax,
05878 OnigErrorInfo* einfo)
05879 {
05880 int r;
05881 regex_t *new_reg;
05882
05883 r = onig_new(&new_reg, pattern, pattern_end, option, enc, syntax, einfo);
05884 if (r) return r;
05885 if (ONIG_STATE(reg) == ONIG_STATE_NORMAL) {
05886 onig_transfer(reg, new_reg);
05887 }
05888 else {
05889 onig_chain_link_add(reg, new_reg);
05890 }
05891 return 0;
05892 }
05893 #endif
05894
05895 static int onig_inited = 0;
05896
05897 extern int
05898 onig_reg_init(regex_t* reg, OnigOptionType option,
05899 OnigCaseFoldType case_fold_flag,
05900 OnigEncoding enc, const OnigSyntaxType* syntax)
05901 {
05902 if (! onig_inited)
05903 onig_init();
05904
05905 if (IS_NULL(reg))
05906 return ONIGERR_INVALID_ARGUMENT;
05907
05908 if (ONIGENC_IS_UNDEF(enc))
05909 return ONIGERR_DEFAULT_ENCODING_IS_NOT_SET;
05910
05911 if ((option & (ONIG_OPTION_DONT_CAPTURE_GROUP|ONIG_OPTION_CAPTURE_GROUP))
05912 == (ONIG_OPTION_DONT_CAPTURE_GROUP|ONIG_OPTION_CAPTURE_GROUP)) {
05913 return ONIGERR_INVALID_COMBINATION_OF_OPTIONS;
05914 }
05915
05916 (reg)->state = ONIG_STATE_MODIFY;
05917
05918 if ((option & ONIG_OPTION_NEGATE_SINGLELINE) != 0) {
05919 option |= syntax->options;
05920 option &= ~ONIG_OPTION_SINGLELINE;
05921 }
05922 else
05923 option |= syntax->options;
05924
05925 (reg)->enc = enc;
05926 (reg)->options = option;
05927 (reg)->syntax = syntax;
05928 (reg)->optimize = 0;
05929 (reg)->exact = (UChar* )NULL;
05930 (reg)->int_map = (int* )NULL;
05931 (reg)->int_map_backward = (int* )NULL;
05932 (reg)->chain = (regex_t* )NULL;
05933
05934 (reg)->p = (UChar* )NULL;
05935 (reg)->alloc = 0;
05936 (reg)->used = 0;
05937 (reg)->name_table = (void* )NULL;
05938
05939 (reg)->case_fold_flag = case_fold_flag;
05940 return 0;
05941 }
05942
05943 extern int
05944 onig_new_without_alloc(regex_t* reg, const UChar* pattern,
05945 const UChar* pattern_end, OnigOptionType option, OnigEncoding enc,
05946 OnigSyntaxType* syntax, OnigErrorInfo* einfo)
05947 {
05948 int r;
05949
05950 r = onig_reg_init(reg, option, ONIGENC_CASE_FOLD_DEFAULT, enc, syntax);
05951 if (r) return r;
05952
05953 r = onig_compile(reg, pattern, pattern_end, einfo, NULL, 0);
05954 return r;
05955 }
05956
05957 extern int
05958 onig_new(regex_t** reg, const UChar* pattern, const UChar* pattern_end,
05959 OnigOptionType option, OnigEncoding enc, const OnigSyntaxType* syntax,
05960 OnigErrorInfo* einfo)
05961 {
05962 int r;
05963
05964 *reg = (regex_t* )xmalloc(sizeof(regex_t));
05965 if (IS_NULL(*reg)) return ONIGERR_MEMORY;
05966
05967 r = onig_reg_init(*reg, option, ONIGENC_CASE_FOLD_DEFAULT, enc, syntax);
05968 if (r) goto err;
05969
05970 r = onig_compile(*reg, pattern, pattern_end, einfo, NULL, 0);
05971 if (r) {
05972 err:
05973 onig_free(*reg);
05974 *reg = NULL;
05975 }
05976 return r;
05977 }
05978
05979
05980 extern int
05981 onig_init(void)
05982 {
05983 if (onig_inited != 0)
05984 return 0;
05985
05986 THREAD_SYSTEM_INIT;
05987 THREAD_ATOMIC_START;
05988
05989 onig_inited = 1;
05990
05991 onigenc_init();
05992
05993
05994 #ifdef ONIG_DEBUG_STATISTICS
05995 onig_statistics_init();
05996 #endif
05997
05998 THREAD_ATOMIC_END;
05999 return 0;
06000 }
06001
06002
06003 extern int
06004 onig_end(void)
06005 {
06006 THREAD_ATOMIC_START;
06007
06008 #ifdef ONIG_DEBUG_STATISTICS
06009 if (!onig_is_prelude()) onig_print_statistics(stderr);
06010 #endif
06011
06012 #ifdef USE_SHARED_CCLASS_TABLE
06013 onig_free_shared_cclass_table();
06014 #endif
06015
06016 #ifdef USE_PARSE_TREE_NODE_RECYCLE
06017 onig_free_node_list();
06018 #endif
06019
06020 onig_inited = 0;
06021
06022 THREAD_ATOMIC_END;
06023 THREAD_SYSTEM_END;
06024 return 0;
06025 }
06026
06027 extern int
06028 onig_is_in_code_range(const UChar* p, OnigCodePoint code)
06029 {
06030 OnigCodePoint n, *data;
06031 OnigCodePoint low, high, x;
06032
06033 GET_CODE_POINT(n, p);
06034 data = (OnigCodePoint* )p;
06035 data++;
06036
06037 for (low = 0, high = n; low < high; ) {
06038 x = (low + high) >> 1;
06039 if (code > data[x * 2 + 1])
06040 low = x + 1;
06041 else
06042 high = x;
06043 }
06044
06045 return ((low < n && code >= data[low * 2]) ? 1 : 0);
06046 }
06047
06048 extern int
06049 onig_is_code_in_cc_len(int elen, OnigCodePoint code, CClassNode* cc)
06050 {
06051 int found;
06052
06053 if (elen > 1 || (code >= SINGLE_BYTE_SIZE)) {
06054 if (IS_NULL(cc->mbuf)) {
06055 found = 0;
06056 }
06057 else {
06058 found = (onig_is_in_code_range(cc->mbuf->p, code) != 0 ? 1 : 0);
06059 }
06060 }
06061 else {
06062 found = (BITSET_AT(cc->bs, code) == 0 ? 0 : 1);
06063 }
06064
06065 if (IS_NCCLASS_NOT(cc))
06066 return !found;
06067 else
06068 return found;
06069 }
06070
06071 extern int
06072 onig_is_code_in_cc(OnigEncoding enc, OnigCodePoint code, CClassNode* cc)
06073 {
06074 int len;
06075
06076 if (ONIGENC_MBC_MINLEN(enc) > 1) {
06077 len = 2;
06078 }
06079 else {
06080 len = ONIGENC_CODE_TO_MBCLEN(enc, code);
06081 }
06082 return onig_is_code_in_cc_len(len, code, cc);
06083 }
06084
06085
06086 #ifdef ONIG_DEBUG
06087
06088
06089 #define ARG_SPECIAL -1
06090 #define ARG_NON 0
06091 #define ARG_RELADDR 1
06092 #define ARG_ABSADDR 2
06093 #define ARG_LENGTH 3
06094 #define ARG_MEMNUM 4
06095 #define ARG_OPTION 5
06096 #define ARG_STATE_CHECK 6
06097
06098 OnigOpInfoType OnigOpInfo[] = {
06099 { OP_FINISH, "finish", ARG_NON },
06100 { OP_END, "end", ARG_NON },
06101 { OP_EXACT1, "exact1", ARG_SPECIAL },
06102 { OP_EXACT2, "exact2", ARG_SPECIAL },
06103 { OP_EXACT3, "exact3", ARG_SPECIAL },
06104 { OP_EXACT4, "exact4", ARG_SPECIAL },
06105 { OP_EXACT5, "exact5", ARG_SPECIAL },
06106 { OP_EXACTN, "exactn", ARG_SPECIAL },
06107 { OP_EXACTMB2N1, "exactmb2-n1", ARG_SPECIAL },
06108 { OP_EXACTMB2N2, "exactmb2-n2", ARG_SPECIAL },
06109 { OP_EXACTMB2N3, "exactmb2-n3", ARG_SPECIAL },
06110 { OP_EXACTMB2N, "exactmb2-n", ARG_SPECIAL },
06111 { OP_EXACTMB3N, "exactmb3n" , ARG_SPECIAL },
06112 { OP_EXACTMBN, "exactmbn", ARG_SPECIAL },
06113 { OP_EXACT1_IC, "exact1-ic", ARG_SPECIAL },
06114 { OP_EXACTN_IC, "exactn-ic", ARG_SPECIAL },
06115 { OP_CCLASS, "cclass", ARG_SPECIAL },
06116 { OP_CCLASS_MB, "cclass-mb", ARG_SPECIAL },
06117 { OP_CCLASS_MIX, "cclass-mix", ARG_SPECIAL },
06118 { OP_CCLASS_NOT, "cclass-not", ARG_SPECIAL },
06119 { OP_CCLASS_MB_NOT, "cclass-mb-not", ARG_SPECIAL },
06120 { OP_CCLASS_MIX_NOT, "cclass-mix-not", ARG_SPECIAL },
06121 { OP_CCLASS_NODE, "cclass-node", ARG_SPECIAL },
06122 { OP_ANYCHAR, "anychar", ARG_NON },
06123 { OP_ANYCHAR_ML, "anychar-ml", ARG_NON },
06124 { OP_ANYCHAR_STAR, "anychar*", ARG_NON },
06125 { OP_ANYCHAR_ML_STAR, "anychar-ml*", ARG_NON },
06126 { OP_ANYCHAR_STAR_PEEK_NEXT, "anychar*-peek-next", ARG_SPECIAL },
06127 { OP_ANYCHAR_ML_STAR_PEEK_NEXT, "anychar-ml*-peek-next", ARG_SPECIAL },
06128 { OP_WORD, "word", ARG_NON },
06129 { OP_NOT_WORD, "not-word", ARG_NON },
06130 { OP_WORD_BOUND, "word-bound", ARG_NON },
06131 { OP_NOT_WORD_BOUND, "not-word-bound", ARG_NON },
06132 { OP_WORD_BEGIN, "word-begin", ARG_NON },
06133 { OP_WORD_END, "word-end", ARG_NON },
06134 { OP_ASCII_WORD, "ascii-word", ARG_NON },
06135 { OP_NOT_ASCII_WORD, "not-ascii-word", ARG_NON },
06136 { OP_ASCII_WORD_BOUND, "ascii-word-bound", ARG_NON },
06137 { OP_NOT_ASCII_WORD_BOUND,"not-ascii-word-bound", ARG_NON },
06138 { OP_ASCII_WORD_BEGIN, "ascii-word-begin", ARG_NON },
06139 { OP_ASCII_WORD_END, "ascii-word-end", ARG_NON },
06140 { OP_BEGIN_BUF, "begin-buf", ARG_NON },
06141 { OP_END_BUF, "end-buf", ARG_NON },
06142 { OP_BEGIN_LINE, "begin-line", ARG_NON },
06143 { OP_END_LINE, "end-line", ARG_NON },
06144 { OP_SEMI_END_BUF, "semi-end-buf", ARG_NON },
06145 { OP_BEGIN_POSITION, "begin-position", ARG_NON },
06146 { OP_BEGIN_POS_OR_LINE, "begin-pos-or-line", ARG_NON },
06147 { OP_BACKREF1, "backref1", ARG_NON },
06148 { OP_BACKREF2, "backref2", ARG_NON },
06149 { OP_BACKREFN, "backrefn", ARG_MEMNUM },
06150 { OP_BACKREFN_IC, "backrefn-ic", ARG_SPECIAL },
06151 { OP_BACKREF_MULTI, "backref_multi", ARG_SPECIAL },
06152 { OP_BACKREF_MULTI_IC, "backref_multi-ic", ARG_SPECIAL },
06153 { OP_BACKREF_WITH_LEVEL, "backref_at_level", ARG_SPECIAL },
06154 { OP_MEMORY_START_PUSH, "mem-start-push", ARG_MEMNUM },
06155 { OP_MEMORY_START, "mem-start", ARG_MEMNUM },
06156 { OP_MEMORY_END_PUSH, "mem-end-push", ARG_MEMNUM },
06157 { OP_MEMORY_END_PUSH_REC, "mem-end-push-rec", ARG_MEMNUM },
06158 { OP_MEMORY_END, "mem-end", ARG_MEMNUM },
06159 { OP_MEMORY_END_REC, "mem-end-rec", ARG_MEMNUM },
06160 { OP_SET_OPTION_PUSH, "set-option-push", ARG_OPTION },
06161 { OP_SET_OPTION, "set-option", ARG_OPTION },
06162 { OP_KEEP, "keep", ARG_NON },
06163 { OP_FAIL, "fail", ARG_NON },
06164 { OP_JUMP, "jump", ARG_RELADDR },
06165 { OP_PUSH, "push", ARG_RELADDR },
06166 { OP_POP, "pop", ARG_NON },
06167 { OP_PUSH_OR_JUMP_EXACT1, "push-or-jump-e1", ARG_SPECIAL },
06168 { OP_PUSH_IF_PEEK_NEXT, "push-if-peek-next", ARG_SPECIAL },
06169 { OP_REPEAT, "repeat", ARG_SPECIAL },
06170 { OP_REPEAT_NG, "repeat-ng", ARG_SPECIAL },
06171 { OP_REPEAT_INC, "repeat-inc", ARG_MEMNUM },
06172 { OP_REPEAT_INC_NG, "repeat-inc-ng", ARG_MEMNUM },
06173 { OP_REPEAT_INC_SG, "repeat-inc-sg", ARG_MEMNUM },
06174 { OP_REPEAT_INC_NG_SG, "repeat-inc-ng-sg", ARG_MEMNUM },
06175 { OP_NULL_CHECK_START, "null-check-start", ARG_MEMNUM },
06176 { OP_NULL_CHECK_END, "null-check-end", ARG_MEMNUM },
06177 { OP_NULL_CHECK_END_MEMST,"null-check-end-memst", ARG_MEMNUM },
06178 { OP_NULL_CHECK_END_MEMST_PUSH,"null-check-end-memst-push", ARG_MEMNUM },
06179 { OP_PUSH_POS, "push-pos", ARG_NON },
06180 { OP_POP_POS, "pop-pos", ARG_NON },
06181 { OP_PUSH_POS_NOT, "push-pos-not", ARG_RELADDR },
06182 { OP_FAIL_POS, "fail-pos", ARG_NON },
06183 { OP_PUSH_STOP_BT, "push-stop-bt", ARG_NON },
06184 { OP_POP_STOP_BT, "pop-stop-bt", ARG_NON },
06185 { OP_LOOK_BEHIND, "look-behind", ARG_SPECIAL },
06186 { OP_PUSH_LOOK_BEHIND_NOT, "push-look-behind-not", ARG_SPECIAL },
06187 { OP_FAIL_LOOK_BEHIND_NOT, "fail-look-behind-not", ARG_NON },
06188 { OP_CALL, "call", ARG_ABSADDR },
06189 { OP_RETURN, "return", ARG_NON },
06190 { OP_CONDITION, "condition", ARG_SPECIAL },
06191 { OP_STATE_CHECK_PUSH, "state-check-push", ARG_SPECIAL },
06192 { OP_STATE_CHECK_PUSH_OR_JUMP, "state-check-push-or-jump", ARG_SPECIAL },
06193 { OP_STATE_CHECK, "state-check", ARG_STATE_CHECK },
06194 { OP_STATE_CHECK_ANYCHAR_STAR, "state-check-anychar*", ARG_STATE_CHECK },
06195 { OP_STATE_CHECK_ANYCHAR_ML_STAR,
06196 "state-check-anychar-ml*", ARG_STATE_CHECK },
06197 { -1, "", ARG_NON }
06198 };
06199
06200 static const char*
06201 op2name(int opcode)
06202 {
06203 int i;
06204
06205 for (i = 0; OnigOpInfo[i].opcode >= 0; i++) {
06206 if (opcode == OnigOpInfo[i].opcode)
06207 return OnigOpInfo[i].name;
06208 }
06209 return "";
06210 }
06211
06212 static int
06213 op2arg_type(int opcode)
06214 {
06215 int i;
06216
06217 for (i = 0; OnigOpInfo[i].opcode >= 0; i++) {
06218 if (opcode == OnigOpInfo[i].opcode)
06219 return OnigOpInfo[i].arg_type;
06220 }
06221 return ARG_SPECIAL;
06222 }
06223
06224 static void
06225 Indent(FILE* f, int indent)
06226 {
06227 int i;
06228 for (i = 0; i < indent; i++) putc(' ', f);
06229 }
06230
06231 static void
06232 p_string(FILE* f, int len, UChar* s)
06233 {
06234 fputs(":", f);
06235 while (len-- > 0) { fputc(*s++, f); }
06236 }
06237
06238 static void
06239 p_len_string(FILE* f, LengthType len, int mb_len, UChar* s)
06240 {
06241 int x = len * mb_len;
06242
06243 fprintf(f, ":%d:", len);
06244 while (x-- > 0) { fputc(*s++, f); }
06245 }
06246
06247 extern void
06248 onig_print_compiled_byte_code(FILE* f, UChar* bp, UChar* bpend, UChar** nextp,
06249 OnigEncoding enc)
06250 {
06251 int i, n, arg_type;
06252 RelAddrType addr;
06253 LengthType len;
06254 MemNumType mem;
06255 StateCheckNumType scn;
06256 OnigCodePoint code;
06257 UChar *q;
06258
06259 fprintf(f, "[%s", op2name(*bp));
06260 arg_type = op2arg_type(*bp);
06261 if (arg_type != ARG_SPECIAL) {
06262 bp++;
06263 switch (arg_type) {
06264 case ARG_NON:
06265 break;
06266 case ARG_RELADDR:
06267 GET_RELADDR_INC(addr, bp);
06268 fprintf(f, ":(%d)", addr);
06269 break;
06270 case ARG_ABSADDR:
06271 GET_ABSADDR_INC(addr, bp);
06272 fprintf(f, ":(%d)", addr);
06273 break;
06274 case ARG_LENGTH:
06275 GET_LENGTH_INC(len, bp);
06276 fprintf(f, ":%d", len);
06277 break;
06278 case ARG_MEMNUM:
06279 mem = *((MemNumType* )bp);
06280 bp += SIZE_MEMNUM;
06281 fprintf(f, ":%d", mem);
06282 break;
06283 case ARG_OPTION:
06284 {
06285 OnigOptionType option = *((OnigOptionType* )bp);
06286 bp += SIZE_OPTION;
06287 fprintf(f, ":%d", option);
06288 }
06289 break;
06290
06291 case ARG_STATE_CHECK:
06292 scn = *((StateCheckNumType* )bp);
06293 bp += SIZE_STATE_CHECK_NUM;
06294 fprintf(f, ":%d", scn);
06295 break;
06296 }
06297 }
06298 else {
06299 switch (*bp++) {
06300 case OP_EXACT1:
06301 case OP_ANYCHAR_STAR_PEEK_NEXT:
06302 case OP_ANYCHAR_ML_STAR_PEEK_NEXT:
06303 p_string(f, 1, bp++); break;
06304 case OP_EXACT2:
06305 p_string(f, 2, bp); bp += 2; break;
06306 case OP_EXACT3:
06307 p_string(f, 3, bp); bp += 3; break;
06308 case OP_EXACT4:
06309 p_string(f, 4, bp); bp += 4; break;
06310 case OP_EXACT5:
06311 p_string(f, 5, bp); bp += 5; break;
06312 case OP_EXACTN:
06313 GET_LENGTH_INC(len, bp);
06314 p_len_string(f, len, 1, bp);
06315 bp += len;
06316 break;
06317
06318 case OP_EXACTMB2N1:
06319 p_string(f, 2, bp); bp += 2; break;
06320 case OP_EXACTMB2N2:
06321 p_string(f, 4, bp); bp += 4; break;
06322 case OP_EXACTMB2N3:
06323 p_string(f, 6, bp); bp += 6; break;
06324 case OP_EXACTMB2N:
06325 GET_LENGTH_INC(len, bp);
06326 p_len_string(f, len, 2, bp);
06327 bp += len * 2;
06328 break;
06329 case OP_EXACTMB3N:
06330 GET_LENGTH_INC(len, bp);
06331 p_len_string(f, len, 3, bp);
06332 bp += len * 3;
06333 break;
06334 case OP_EXACTMBN:
06335 {
06336 int mb_len;
06337
06338 GET_LENGTH_INC(mb_len, bp);
06339 GET_LENGTH_INC(len, bp);
06340 fprintf(f, ":%d:%d:", mb_len, len);
06341 n = len * mb_len;
06342 while (n-- > 0) { fputc(*bp++, f); }
06343 }
06344 break;
06345
06346 case OP_EXACT1_IC:
06347 len = enclen(enc, bp, bpend);
06348 p_string(f, len, bp);
06349 bp += len;
06350 break;
06351 case OP_EXACTN_IC:
06352 GET_LENGTH_INC(len, bp);
06353 p_len_string(f, len, 1, bp);
06354 bp += len;
06355 break;
06356
06357 case OP_CCLASS:
06358 n = bitset_on_num((BitSetRef )bp);
06359 bp += SIZE_BITSET;
06360 fprintf(f, ":%d", n);
06361 break;
06362
06363 case OP_CCLASS_NOT:
06364 n = bitset_on_num((BitSetRef )bp);
06365 bp += SIZE_BITSET;
06366 fprintf(f, ":%d", n);
06367 break;
06368
06369 case OP_CCLASS_MB:
06370 case OP_CCLASS_MB_NOT:
06371 GET_LENGTH_INC(len, bp);
06372 q = bp;
06373 #ifndef PLATFORM_UNALIGNED_WORD_ACCESS
06374 ALIGNMENT_RIGHT(q);
06375 #endif
06376 GET_CODE_POINT(code, q);
06377 bp += len;
06378 fprintf(f, ":%d:%d", (int )code, len);
06379 break;
06380
06381 case OP_CCLASS_MIX:
06382 case OP_CCLASS_MIX_NOT:
06383 n = bitset_on_num((BitSetRef )bp);
06384 bp += SIZE_BITSET;
06385 GET_LENGTH_INC(len, bp);
06386 q = bp;
06387 #ifndef PLATFORM_UNALIGNED_WORD_ACCESS
06388 ALIGNMENT_RIGHT(q);
06389 #endif
06390 GET_CODE_POINT(code, q);
06391 bp += len;
06392 fprintf(f, ":%d:%d:%d", n, (int )code, len);
06393 break;
06394
06395 case OP_CCLASS_NODE:
06396 {
06397 CClassNode *cc;
06398
06399 GET_POINTER_INC(cc, bp);
06400 n = bitset_on_num(cc->bs);
06401 fprintf(f, ":%"PRIuPTR":%d", (uintptr_t)cc, n);
06402 }
06403 break;
06404
06405 case OP_BACKREFN_IC:
06406 mem = *((MemNumType* )bp);
06407 bp += SIZE_MEMNUM;
06408 fprintf(f, ":%d", mem);
06409 break;
06410
06411 case OP_BACKREF_MULTI_IC:
06412 case OP_BACKREF_MULTI:
06413 fputs(" ", f);
06414 GET_LENGTH_INC(len, bp);
06415 for (i = 0; i < len; i++) {
06416 GET_MEMNUM_INC(mem, bp);
06417 if (i > 0) fputs(", ", f);
06418 fprintf(f, "%d", mem);
06419 }
06420 break;
06421
06422 case OP_BACKREF_WITH_LEVEL:
06423 {
06424 OnigOptionType option;
06425 LengthType level;
06426
06427 GET_OPTION_INC(option, bp);
06428 fprintf(f, ":%d", option);
06429 GET_LENGTH_INC(level, bp);
06430 fprintf(f, ":%d", level);
06431
06432 fputs(" ", f);
06433 GET_LENGTH_INC(len, bp);
06434 for (i = 0; i < len; i++) {
06435 GET_MEMNUM_INC(mem, bp);
06436 if (i > 0) fputs(", ", f);
06437 fprintf(f, "%d", mem);
06438 }
06439 }
06440 break;
06441
06442 case OP_REPEAT:
06443 case OP_REPEAT_NG:
06444 {
06445 mem = *((MemNumType* )bp);
06446 bp += SIZE_MEMNUM;
06447 addr = *((RelAddrType* )bp);
06448 bp += SIZE_RELADDR;
06449 fprintf(f, ":%d:%d", mem, addr);
06450 }
06451 break;
06452
06453 case OP_PUSH_OR_JUMP_EXACT1:
06454 case OP_PUSH_IF_PEEK_NEXT:
06455 addr = *((RelAddrType* )bp);
06456 bp += SIZE_RELADDR;
06457 fprintf(f, ":(%d)", addr);
06458 p_string(f, 1, bp);
06459 bp += 1;
06460 break;
06461
06462 case OP_LOOK_BEHIND:
06463 GET_LENGTH_INC(len, bp);
06464 fprintf(f, ":%d", len);
06465 break;
06466
06467 case OP_PUSH_LOOK_BEHIND_NOT:
06468 GET_RELADDR_INC(addr, bp);
06469 GET_LENGTH_INC(len, bp);
06470 fprintf(f, ":%d:(%d)", len, addr);
06471 break;
06472
06473 case OP_STATE_CHECK_PUSH:
06474 case OP_STATE_CHECK_PUSH_OR_JUMP:
06475 scn = *((StateCheckNumType* )bp);
06476 bp += SIZE_STATE_CHECK_NUM;
06477 addr = *((RelAddrType* )bp);
06478 bp += SIZE_RELADDR;
06479 fprintf(f, ":%d:(%d)", scn, addr);
06480 break;
06481
06482 case OP_CONDITION:
06483 GET_MEMNUM_INC(mem, bp);
06484 GET_RELADDR_INC(addr, bp);
06485 fprintf(f, ":%d:(%d)", mem, addr);
06486 break;
06487
06488 default:
06489 fprintf(stderr, "onig_print_compiled_byte_code: undefined code %d\n",
06490 *--bp);
06491 }
06492 }
06493 fputs("]", f);
06494 if (nextp) *nextp = bp;
06495 }
06496
06497 static void
06498 print_compiled_byte_code_list(FILE* f, regex_t* reg)
06499 {
06500 int ncode;
06501 UChar* bp = reg->p;
06502 UChar* end = reg->p + reg->used;
06503
06504 fprintf(f, "code length: %d", reg->used);
06505
06506 ncode = -1;
06507 while (bp < end) {
06508 ncode++;
06509 if (ncode % 5 == 0)
06510 fprintf(f, "\n%ld:", bp - reg->p);
06511 else
06512 fprintf(f, " %ld:", bp - reg->p);
06513 onig_print_compiled_byte_code(f, bp, end, &bp, reg->enc);
06514 }
06515
06516 fprintf(f, "\n");
06517 }
06518
06519 static void
06520 print_indent_tree(FILE* f, Node* node, int indent)
06521 {
06522 int i, type, container_p = 0;
06523 int add = 3;
06524 UChar* p;
06525
06526 Indent(f, indent);
06527 if (IS_NULL(node)) {
06528 fprintf(f, "ERROR: null node!!!\n");
06529 exit (0);
06530 }
06531
06532 type = NTYPE(node);
06533 switch (type) {
06534 case NT_LIST:
06535 case NT_ALT:
06536 if (NTYPE(node) == NT_LIST)
06537 fprintf(f, "<list:%"PRIxPTR">\n", (intptr_t)node);
06538 else
06539 fprintf(f, "<alt:%"PRIxPTR">\n", (intptr_t)node);
06540
06541 print_indent_tree(f, NCAR(node), indent + add);
06542 while (IS_NOT_NULL(node = NCDR(node))) {
06543 if (NTYPE(node) != type) {
06544 fprintf(f, "ERROR: list/alt right is not a cons. %d\n", NTYPE(node));
06545 exit(0);
06546 }
06547 print_indent_tree(f, NCAR(node), indent + add);
06548 }
06549 break;
06550
06551 case NT_STR:
06552 fprintf(f, "<string%s:%"PRIxPTR">",
06553 (NSTRING_IS_RAW(node) ? "-raw" : ""), (intptr_t)node);
06554 for (p = NSTR(node)->s; p < NSTR(node)->end; p++) {
06555 if (*p >= 0x20 && *p < 0x7f)
06556 fputc(*p, f);
06557 else {
06558 fprintf(f, " 0x%02x", *p);
06559 }
06560 }
06561 break;
06562
06563 case NT_CCLASS:
06564 fprintf(f, "<cclass:%"PRIxPTR">", (intptr_t)node);
06565 if (IS_NCCLASS_NOT(NCCLASS(node))) fputs(" not", f);
06566 if (NCCLASS(node)->mbuf) {
06567 BBuf* bbuf = NCCLASS(node)->mbuf;
06568 for (i = 0; i < (int )bbuf->used; i++) {
06569 if (i > 0) fprintf(f, ",");
06570 fprintf(f, "%0x", bbuf->p[i]);
06571 }
06572 }
06573 break;
06574
06575 case NT_CTYPE:
06576 fprintf(f, "<ctype:%"PRIxPTR"> ", (intptr_t)node);
06577 switch (NCTYPE(node)->ctype) {
06578 case ONIGENC_CTYPE_WORD:
06579 if (NCTYPE(node)->not != 0)
06580 fputs("not word", f);
06581 else
06582 fputs("word", f);
06583 break;
06584
06585 default:
06586 fprintf(f, "ERROR: undefined ctype.\n");
06587 exit(0);
06588 }
06589 break;
06590
06591 case NT_CANY:
06592 fprintf(f, "<anychar:%"PRIxPTR">", (intptr_t)node);
06593 break;
06594
06595 case NT_ANCHOR:
06596 fprintf(f, "<anchor:%"PRIxPTR"> ", (intptr_t)node);
06597 switch (NANCHOR(node)->type) {
06598 case ANCHOR_BEGIN_BUF: fputs("begin buf", f); break;
06599 case ANCHOR_END_BUF: fputs("end buf", f); break;
06600 case ANCHOR_BEGIN_LINE: fputs("begin line", f); break;
06601 case ANCHOR_END_LINE: fputs("end line", f); break;
06602 case ANCHOR_SEMI_END_BUF: fputs("semi end buf", f); break;
06603 case ANCHOR_BEGIN_POSITION: fputs("begin position", f); break;
06604 case ANCHOR_ANYCHAR_STAR: fputs("begin position/line", f); break;
06605
06606 case ANCHOR_WORD_BOUND: fputs("word bound", f); break;
06607 case ANCHOR_NOT_WORD_BOUND: fputs("not word bound", f); break;
06608 #ifdef USE_WORD_BEGIN_END
06609 case ANCHOR_WORD_BEGIN: fputs("word begin", f); break;
06610 case ANCHOR_WORD_END: fputs("word end", f); break;
06611 #endif
06612 case ANCHOR_PREC_READ: fputs("prec read", f); container_p = TRUE; break;
06613 case ANCHOR_PREC_READ_NOT: fputs("prec read not", f); container_p = TRUE; break;
06614 case ANCHOR_LOOK_BEHIND: fputs("look_behind", f); container_p = TRUE; break;
06615 case ANCHOR_LOOK_BEHIND_NOT: fputs("look_behind_not",f); container_p = TRUE; break;
06616 case ANCHOR_KEEP: fputs("keep",f); break;
06617
06618 default:
06619 fprintf(f, "ERROR: undefined anchor type.\n");
06620 break;
06621 }
06622 break;
06623
06624 case NT_BREF:
06625 {
06626 int* p;
06627 BRefNode* br = NBREF(node);
06628 p = BACKREFS_P(br);
06629 fprintf(f, "<backref:%"PRIxPTR">", (intptr_t)node);
06630 for (i = 0; i < br->back_num; i++) {
06631 if (i > 0) fputs(", ", f);
06632 fprintf(f, "%d", p[i]);
06633 }
06634 }
06635 break;
06636
06637 #ifdef USE_SUBEXP_CALL
06638 case NT_CALL:
06639 {
06640 CallNode* cn = NCALL(node);
06641 fprintf(f, "<call:%"PRIxPTR">", (intptr_t)node);
06642 p_string(f, cn->name_end - cn->name, cn->name);
06643 }
06644 break;
06645 #endif
06646
06647 case NT_QTFR:
06648 fprintf(f, "<quantifier:%"PRIxPTR">{%d,%d}%s\n", (intptr_t)node,
06649 NQTFR(node)->lower, NQTFR(node)->upper,
06650 (NQTFR(node)->greedy ? "" : "?"));
06651 print_indent_tree(f, NQTFR(node)->target, indent + add);
06652 break;
06653
06654 case NT_ENCLOSE:
06655 fprintf(f, "<enclose:%"PRIxPTR"> ", (intptr_t)node);
06656 switch (NENCLOSE(node)->type) {
06657 case ENCLOSE_OPTION:
06658 fprintf(f, "option:%d", NENCLOSE(node)->option);
06659 break;
06660 case ENCLOSE_MEMORY:
06661 fprintf(f, "memory:%d", NENCLOSE(node)->regnum);
06662 break;
06663 case ENCLOSE_STOP_BACKTRACK:
06664 fprintf(f, "stop-bt");
06665 break;
06666 case ENCLOSE_CONDITION:
06667 fprintf(f, "condition:%d", NENCLOSE(node)->regnum);
06668 break;
06669
06670 default:
06671 break;
06672 }
06673 fprintf(f, "\n");
06674 print_indent_tree(f, NENCLOSE(node)->target, indent + add);
06675 break;
06676
06677 default:
06678 fprintf(f, "print_indent_tree: undefined node type %d\n", NTYPE(node));
06679 break;
06680 }
06681
06682 if (type != NT_LIST && type != NT_ALT && type != NT_QTFR &&
06683 type != NT_ENCLOSE)
06684 fprintf(f, "\n");
06685
06686 if (container_p) print_indent_tree(f, NANCHOR(node)->target, indent + add);
06687
06688 fflush(f);
06689 }
06690 #endif
06691
06692 #ifdef ONIG_DEBUG_PARSE_TREE
06693 static void
06694 print_tree(FILE* f, Node* node)
06695 {
06696 print_indent_tree(f, node, 0);
06697 }
06698 #endif
06699