00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031 #include "regint.h"
00032
00033
00034
00035 #ifdef USE_CRNL_AS_LINE_TERMINATOR
00036 #define ONIGENC_IS_MBC_CRNL(enc,p,end) \
00037 (ONIGENC_MBC_TO_CODE(enc,p,end) == 13 && \
00038 ONIGENC_MBC_TO_CODE(enc,(p+enclen(enc,p,end)),end) == 10)
00039 #define ONIGENC_IS_MBC_NEWLINE_EX(enc,p,start,end,option,check_prev) \
00040 is_mbc_newline_ex((enc),(p),(start),(end),(option),(check_prev))
00041 static int
00042 is_mbc_newline_ex(OnigEncoding enc, const UChar *p, const UChar *start,
00043 const UChar *end, OnigOptionType option, int check_prev)
00044 {
00045 if (IS_NEWLINE_CRLF(option)) {
00046 if (ONIGENC_MBC_TO_CODE(enc, p, end) == 0x0a) {
00047 if (check_prev) {
00048 const UChar *prev = onigenc_get_prev_char_head(enc, start, p, end);
00049 if ((prev != NULL) && ONIGENC_MBC_TO_CODE(enc, prev, end) == 0x0d)
00050 return 0;
00051 else
00052 return 1;
00053 }
00054 else
00055 return 1;
00056 }
00057 else {
00058 const UChar *pnext = p + enclen(enc, p, end);
00059 if (pnext < end &&
00060 ONIGENC_MBC_TO_CODE(enc, p, end) == 0x0d &&
00061 ONIGENC_MBC_TO_CODE(enc, pnext, end) == 0x0a)
00062 return 1;
00063 if (ONIGENC_IS_MBC_NEWLINE(enc, p, end))
00064 return 1;
00065 return 0;
00066 }
00067 }
00068 else {
00069 return ONIGENC_IS_MBC_NEWLINE(enc, p, end);
00070 }
00071 }
00072 #else
00073 #define ONIGENC_IS_MBC_NEWLINE_EX(enc,p,start,end,option,check_prev) \
00074 ONIGENC_IS_MBC_NEWLINE((enc), (p), (end))
00075 #endif
00076
00077 #ifdef USE_CAPTURE_HISTORY
00078 static void history_tree_free(OnigCaptureTreeNode* node);
00079
00080 static void
00081 history_tree_clear(OnigCaptureTreeNode* node)
00082 {
00083 int i;
00084
00085 if (IS_NOT_NULL(node)) {
00086 for (i = 0; i < node->num_childs; i++) {
00087 if (IS_NOT_NULL(node->childs[i])) {
00088 history_tree_free(node->childs[i]);
00089 }
00090 }
00091 for (i = 0; i < node->allocated; i++) {
00092 node->childs[i] = (OnigCaptureTreeNode* )0;
00093 }
00094 node->num_childs = 0;
00095 node->beg = ONIG_REGION_NOTPOS;
00096 node->end = ONIG_REGION_NOTPOS;
00097 node->group = -1;
00098 xfree(node->childs);
00099 node->childs = (OnigCaptureTreeNode** )0;
00100 }
00101 }
00102
00103 static void
00104 history_tree_free(OnigCaptureTreeNode* node)
00105 {
00106 history_tree_clear(node);
00107 xfree(node);
00108 }
00109
00110 static void
00111 history_root_free(OnigRegion* r)
00112 {
00113 if (IS_NOT_NULL(r->history_root)) {
00114 history_tree_free(r->history_root);
00115 r->history_root = (OnigCaptureTreeNode* )0;
00116 }
00117 }
00118
00119 static OnigCaptureTreeNode*
00120 history_node_new(void)
00121 {
00122 OnigCaptureTreeNode* node;
00123
00124 node = (OnigCaptureTreeNode* )xmalloc(sizeof(OnigCaptureTreeNode));
00125 CHECK_NULL_RETURN(node);
00126 node->childs = (OnigCaptureTreeNode** )0;
00127 node->allocated = 0;
00128 node->num_childs = 0;
00129 node->group = -1;
00130 node->beg = ONIG_REGION_NOTPOS;
00131 node->end = ONIG_REGION_NOTPOS;
00132
00133 return node;
00134 }
00135
00136 static int
00137 history_tree_add_child(OnigCaptureTreeNode* parent, OnigCaptureTreeNode* child)
00138 {
00139 #define HISTORY_TREE_INIT_ALLOC_SIZE 8
00140
00141 if (parent->num_childs >= parent->allocated) {
00142 int n, i;
00143
00144 if (IS_NULL(parent->childs)) {
00145 n = HISTORY_TREE_INIT_ALLOC_SIZE;
00146 parent->childs =
00147 (OnigCaptureTreeNode** )xmalloc(sizeof(OnigCaptureTreeNode*) * n);
00148 CHECK_NULL_RETURN_MEMERR(parent->childs);
00149 }
00150 else {
00151 OnigCaptureTreeNode** tmp;
00152 n = parent->allocated * 2;
00153 tmp =
00154 (OnigCaptureTreeNode** )xrealloc(parent->childs,
00155 sizeof(OnigCaptureTreeNode*) * n);
00156 if (tmp == 0) {
00157 history_tree_clear(parent);
00158 return ONIGERR_MEMORY;
00159 }
00160 parent->childs = tmp;
00161 }
00162 for (i = parent->allocated; i < n; i++) {
00163 parent->childs[i] = (OnigCaptureTreeNode* )0;
00164 }
00165 parent->allocated = n;
00166 }
00167
00168 parent->childs[parent->num_childs] = child;
00169 parent->num_childs++;
00170 return 0;
00171 }
00172
00173 static OnigCaptureTreeNode*
00174 history_tree_clone(OnigCaptureTreeNode* node)
00175 {
00176 int i, r;
00177 OnigCaptureTreeNode *clone, *child;
00178
00179 clone = history_node_new();
00180 CHECK_NULL_RETURN(clone);
00181
00182 clone->beg = node->beg;
00183 clone->end = node->end;
00184 for (i = 0; i < node->num_childs; i++) {
00185 child = history_tree_clone(node->childs[i]);
00186 if (IS_NULL(child)) {
00187 history_tree_free(clone);
00188 return (OnigCaptureTreeNode* )0;
00189 }
00190 r = history_tree_add_child(clone, child);
00191 if (r != 0) {
00192 history_tree_free(child);
00193 history_tree_free(clone);
00194 return (OnigCaptureTreeNode* )0;
00195 }
00196 }
00197
00198 return clone;
00199 }
00200
00201 extern OnigCaptureTreeNode*
00202 onig_get_capture_tree(OnigRegion* region)
00203 {
00204 return region->history_root;
00205 }
00206 #endif
00207
00208 extern void
00209 onig_region_clear(OnigRegion* region)
00210 {
00211 int i;
00212
00213 for (i = 0; i < region->num_regs; i++) {
00214 region->beg[i] = region->end[i] = ONIG_REGION_NOTPOS;
00215 }
00216 #ifdef USE_CAPTURE_HISTORY
00217 history_root_free(region);
00218 #endif
00219 }
00220
00221 extern int
00222 onig_region_resize(OnigRegion* region, int n)
00223 {
00224 region->num_regs = n;
00225
00226 if (n < ONIG_NREGION)
00227 n = ONIG_NREGION;
00228
00229 if (region->allocated == 0) {
00230 region->beg = (OnigPosition* )xmalloc(n * sizeof(OnigPosition));
00231 if (region->beg == 0)
00232 return ONIGERR_MEMORY;
00233
00234 region->end = (OnigPosition* )xmalloc(n * sizeof(OnigPosition));
00235 if (region->end == 0) {
00236 xfree(region->beg);
00237 return ONIGERR_MEMORY;
00238 }
00239
00240 region->allocated = n;
00241 }
00242 else if (region->allocated < n) {
00243 OnigPosition *tmp;
00244
00245 region->allocated = 0;
00246 tmp = (OnigPosition* )xrealloc(region->beg, n * sizeof(OnigPosition));
00247 if (tmp == 0) {
00248 xfree(region->beg);
00249 xfree(region->end);
00250 return ONIGERR_MEMORY;
00251 }
00252 region->beg = tmp;
00253 tmp = (OnigPosition* )xrealloc(region->end, n * sizeof(OnigPosition));
00254 if (tmp == 0) {
00255 xfree(region->beg);
00256 xfree(region->end);
00257 return ONIGERR_MEMORY;
00258 }
00259 region->end = tmp;
00260
00261 region->allocated = n;
00262 }
00263
00264 return 0;
00265 }
00266
00267 static int
00268 onig_region_resize_clear(OnigRegion* region, int n)
00269 {
00270 int r;
00271
00272 r = onig_region_resize(region, n);
00273 if (r != 0) return r;
00274 onig_region_clear(region);
00275 return 0;
00276 }
00277
00278 extern int
00279 onig_region_set(OnigRegion* region, int at, int beg, int end)
00280 {
00281 if (at < 0) return ONIGERR_INVALID_ARGUMENT;
00282
00283 if (at >= region->allocated) {
00284 int r = onig_region_resize(region, at + 1);
00285 if (r < 0) return r;
00286 }
00287
00288 region->beg[at] = beg;
00289 region->end[at] = end;
00290 return 0;
00291 }
00292
00293 extern void
00294 onig_region_init(OnigRegion* region)
00295 {
00296 region->num_regs = 0;
00297 region->allocated = 0;
00298 region->beg = (OnigPosition* )0;
00299 region->end = (OnigPosition* )0;
00300 region->history_root = (OnigCaptureTreeNode* )0;
00301 }
00302
00303 extern OnigRegion*
00304 onig_region_new(void)
00305 {
00306 OnigRegion* r;
00307
00308 r = (OnigRegion* )xmalloc(sizeof(OnigRegion));
00309 if (r)
00310 onig_region_init(r);
00311 return r;
00312 }
00313
00314 extern void
00315 onig_region_free(OnigRegion* r, int free_self)
00316 {
00317 if (r) {
00318 if (r->allocated > 0) {
00319 if (r->beg) xfree(r->beg);
00320 if (r->end) xfree(r->end);
00321 r->allocated = 0;
00322 }
00323 #ifdef USE_CAPTURE_HISTORY
00324 history_root_free(r);
00325 #endif
00326 if (free_self) xfree(r);
00327 }
00328 }
00329
00330 extern void
00331 onig_region_copy(OnigRegion* to, OnigRegion* from)
00332 {
00333 #define RREGC_SIZE (sizeof(int) * from->num_regs)
00334 int i, r;
00335
00336 if (to == from) return;
00337
00338 r = onig_region_resize(to, from->num_regs);
00339 if (r) return;
00340
00341 for (i = 0; i < from->num_regs; i++) {
00342 to->beg[i] = from->beg[i];
00343 to->end[i] = from->end[i];
00344 }
00345 to->num_regs = from->num_regs;
00346
00347 #ifdef USE_CAPTURE_HISTORY
00348 history_root_free(to);
00349
00350 if (IS_NOT_NULL(from->history_root)) {
00351 to->history_root = history_tree_clone(from->history_root);
00352 }
00353 #endif
00354 }
00355
00356
00358 #define INVALID_STACK_INDEX -1
00359
00360
00361
00362 #define STK_ALT 0x0001
00363 #define STK_LOOK_BEHIND_NOT 0x0002
00364 #define STK_POS_NOT 0x0003
00365
00366 #define STK_MEM_START 0x0100
00367 #define STK_MEM_END 0x8200
00368 #define STK_REPEAT_INC 0x0300
00369 #define STK_STATE_CHECK_MARK 0x1000
00370
00371 #define STK_NULL_CHECK_START 0x3000
00372 #define STK_NULL_CHECK_END 0x5000
00373 #define STK_MEM_END_MARK 0x8400
00374 #define STK_POS 0x0500
00375 #define STK_STOP_BT 0x0600
00376 #define STK_REPEAT 0x0700
00377 #define STK_CALL_FRAME 0x0800
00378 #define STK_RETURN 0x0900
00379 #define STK_VOID 0x0a00
00380
00381
00382 #define STK_MASK_POP_USED 0x00ff
00383 #define STK_MASK_TO_VOID_TARGET 0x10ff
00384 #define STK_MASK_MEM_END_OR_MARK 0x8000
00385
00386 #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
00387 #define MATCH_ARG_INIT(msa, arg_option, arg_region, arg_start, arg_gpos) do {\
00388 (msa).stack_p = (void* )0;\
00389 (msa).options = (arg_option);\
00390 (msa).region = (arg_region);\
00391 (msa).start = (arg_start);\
00392 (msa).gpos = (arg_gpos);\
00393 (msa).best_len = ONIG_MISMATCH;\
00394 } while(0)
00395 #else
00396 #define MATCH_ARG_INIT(msa, arg_option, arg_region, arg_start, arg_gpos) do {\
00397 (msa).stack_p = (void* )0;\
00398 (msa).options = (arg_option);\
00399 (msa).region = (arg_region);\
00400 (msa).start = (arg_start);\
00401 (msa).gpos = (arg_gpos);\
00402 } while(0)
00403 #endif
00404
00405 #ifdef USE_COMBINATION_EXPLOSION_CHECK
00406
00407 #define STATE_CHECK_BUFF_MALLOC_THRESHOLD_SIZE 16
00408
00409 #define STATE_CHECK_BUFF_INIT(msa, str_len, offset, state_num) do { \
00410 if ((state_num) > 0 && str_len >= STATE_CHECK_STRING_THRESHOLD_LEN) {\
00411 unsigned int size = (unsigned int )(((str_len) + 1) * (state_num) + 7) >> 3;\
00412 offset = ((offset) * (state_num)) >> 3;\
00413 if (size > 0 && offset < size && size < STATE_CHECK_BUFF_MAX_SIZE) {\
00414 if (size >= STATE_CHECK_BUFF_MALLOC_THRESHOLD_SIZE) {\
00415 (msa).state_check_buff = (void* )xmalloc(size);\
00416 CHECK_NULL_RETURN_MEMERR((msa).state_check_buff);\
00417 }\
00418 else \
00419 (msa).state_check_buff = (void* )xalloca(size);\
00420 xmemset(((char* )((msa).state_check_buff)+(offset)), 0, \
00421 (size_t )(size - (offset))); \
00422 (msa).state_check_buff_size = size;\
00423 }\
00424 else {\
00425 (msa).state_check_buff = (void* )0;\
00426 (msa).state_check_buff_size = 0;\
00427 }\
00428 }\
00429 else {\
00430 (msa).state_check_buff = (void* )0;\
00431 (msa).state_check_buff_size = 0;\
00432 }\
00433 } while(0)
00434
00435 #define MATCH_ARG_FREE(msa) do {\
00436 if ((msa).stack_p) xfree((msa).stack_p);\
00437 if ((msa).state_check_buff_size >= STATE_CHECK_BUFF_MALLOC_THRESHOLD_SIZE) { \
00438 if ((msa).state_check_buff) xfree((msa).state_check_buff);\
00439 }\
00440 } while(0)
00441 #else
00442 #define MATCH_ARG_FREE(msa) if ((msa).stack_p) xfree((msa).stack_p)
00443 #endif
00444
00445
00446
00447 #define MAX_PTR_NUM 100
00448
00449 #define STACK_INIT(alloc_addr, heap_addr, ptr_num, stack_num) do {\
00450 if (ptr_num > MAX_PTR_NUM) {\
00451 alloc_addr = (char* )xmalloc(sizeof(OnigStackIndex) * (ptr_num));\
00452 heap_addr = alloc_addr;\
00453 if (msa->stack_p) {\
00454 stk_alloc = (OnigStackType* )(msa->stack_p);\
00455 stk_base = stk_alloc;\
00456 stk = stk_base;\
00457 stk_end = stk_base + msa->stack_n;\
00458 } else {\
00459 stk_alloc = (OnigStackType* )xalloca(sizeof(OnigStackType) * (stack_num));\
00460 stk_base = stk_alloc;\
00461 stk = stk_base;\
00462 stk_end = stk_base + (stack_num);\
00463 }\
00464 } else if (msa->stack_p) {\
00465 alloc_addr = (char* )xalloca(sizeof(OnigStackIndex) * (ptr_num));\
00466 heap_addr = NULL;\
00467 stk_alloc = (OnigStackType* )(msa->stack_p);\
00468 stk_base = stk_alloc;\
00469 stk = stk_base;\
00470 stk_end = stk_base + msa->stack_n;\
00471 }\
00472 else {\
00473 alloc_addr = (char* )xalloca(sizeof(OnigStackIndex) * (ptr_num)\
00474 + sizeof(OnigStackType) * (stack_num));\
00475 heap_addr = NULL;\
00476 stk_alloc = (OnigStackType* )(alloc_addr + sizeof(OnigStackIndex) * (ptr_num));\
00477 stk_base = stk_alloc;\
00478 stk = stk_base;\
00479 stk_end = stk_base + (stack_num);\
00480 }\
00481 } while(0)
00482
00483 #define STACK_SAVE do{\
00484 if (stk_base != stk_alloc) {\
00485 msa->stack_p = stk_base;\
00486 msa->stack_n = stk_end - stk_base; \
00487 };\
00488 } while(0)
00489
00490 static unsigned int MatchStackLimitSize = DEFAULT_MATCH_STACK_LIMIT_SIZE;
00491
00492 extern unsigned int
00493 onig_get_match_stack_limit_size(void)
00494 {
00495 return MatchStackLimitSize;
00496 }
00497
00498 extern int
00499 onig_set_match_stack_limit_size(unsigned int size)
00500 {
00501 MatchStackLimitSize = size;
00502 return 0;
00503 }
00504
00505 static int
00506 stack_double(OnigStackType** arg_stk_base, OnigStackType** arg_stk_end,
00507 OnigStackType** arg_stk, OnigStackType* stk_alloc, OnigMatchArg* msa)
00508 {
00509 size_t n;
00510 OnigStackType *x, *stk_base, *stk_end, *stk;
00511
00512 stk_base = *arg_stk_base;
00513 stk_end = *arg_stk_end;
00514 stk = *arg_stk;
00515
00516 n = stk_end - stk_base;
00517 if (stk_base == stk_alloc && IS_NULL(msa->stack_p)) {
00518 x = (OnigStackType* )xmalloc(sizeof(OnigStackType) * n * 2);
00519 if (IS_NULL(x)) {
00520 STACK_SAVE;
00521 return ONIGERR_MEMORY;
00522 }
00523 xmemcpy(x, stk_base, n * sizeof(OnigStackType));
00524 n *= 2;
00525 }
00526 else {
00527 unsigned int limit_size = MatchStackLimitSize;
00528 n *= 2;
00529 if (limit_size != 0 && n > limit_size) {
00530 if ((unsigned int )(stk_end - stk_base) == limit_size)
00531 return ONIGERR_MATCH_STACK_LIMIT_OVER;
00532 else
00533 n = limit_size;
00534 }
00535 x = (OnigStackType* )xrealloc(stk_base, sizeof(OnigStackType) * n);
00536 if (IS_NULL(x)) {
00537 STACK_SAVE;
00538 return ONIGERR_MEMORY;
00539 }
00540 }
00541 *arg_stk = x + (stk - stk_base);
00542 *arg_stk_base = x;
00543 *arg_stk_end = x + n;
00544 return 0;
00545 }
00546
00547 #define STACK_ENSURE(n) do {\
00548 if (stk_end - stk < (n)) {\
00549 int r = stack_double(&stk_base, &stk_end, &stk, stk_alloc, msa);\
00550 if (r != 0) {\
00551 STACK_SAVE;\
00552 if (xmalloc_base) xfree(xmalloc_base);\
00553 return r;\
00554 }\
00555 }\
00556 } while(0)
00557
00558 #define STACK_AT(index) (stk_base + (index))
00559 #define GET_STACK_INDEX(stk) ((stk) - stk_base)
00560
00561 #define STACK_PUSH_TYPE(stack_type) do {\
00562 STACK_ENSURE(1);\
00563 stk->type = (stack_type);\
00564 STACK_INC;\
00565 } while(0)
00566
00567 #define IS_TO_VOID_TARGET(stk) (((stk)->type & STK_MASK_TO_VOID_TARGET) != 0)
00568
00569 #ifdef USE_COMBINATION_EXPLOSION_CHECK
00570 #define STATE_CHECK_POS(s,snum) \
00571 (((s) - str) * num_comb_exp_check + ((snum) - 1))
00572 #define STATE_CHECK_VAL(v,snum) do {\
00573 if (state_check_buff != NULL) {\
00574 int x = STATE_CHECK_POS(s,snum);\
00575 (v) = state_check_buff[x/8] & (1<<(x%8));\
00576 }\
00577 else (v) = 0;\
00578 } while(0)
00579
00580
00581 #define ELSE_IF_STATE_CHECK_MARK(stk) \
00582 else if ((stk)->type == STK_STATE_CHECK_MARK) { \
00583 int x = STATE_CHECK_POS(stk->u.state.pstr, stk->u.state.state_check);\
00584 state_check_buff[x/8] |= (1<<(x%8)); \
00585 }
00586
00587 #define STACK_PUSH(stack_type,pat,s,sprev,keep) do {\
00588 STACK_ENSURE(1);\
00589 stk->type = (stack_type);\
00590 stk->u.state.pcode = (pat);\
00591 stk->u.state.pstr = (s);\
00592 stk->u.state.pstr_prev = (sprev);\
00593 stk->u.state.state_check = 0;\
00594 stk->u.state.pkeep = (keep);\
00595 STACK_INC;\
00596 } while(0)
00597
00598 #define STACK_PUSH_ENSURED(stack_type,pat) do {\
00599 stk->type = (stack_type);\
00600 stk->u.state.pcode = (pat);\
00601 stk->u.state.state_check = 0;\
00602 STACK_INC;\
00603 } while(0)
00604
00605 #define STACK_PUSH_ALT_WITH_STATE_CHECK(pat,s,sprev,snum,keep) do {\
00606 STACK_ENSURE(1);\
00607 stk->type = STK_ALT;\
00608 stk->u.state.pcode = (pat);\
00609 stk->u.state.pstr = (s);\
00610 stk->u.state.pstr_prev = (sprev);\
00611 stk->u.state.state_check = ((state_check_buff != NULL) ? (snum) : 0);\
00612 stk->u.state.pkeep = (keep);\
00613 STACK_INC;\
00614 } while(0)
00615
00616 #define STACK_PUSH_STATE_CHECK(s,snum) do {\
00617 if (state_check_buff != NULL) {\
00618 STACK_ENSURE(1);\
00619 stk->type = STK_STATE_CHECK_MARK;\
00620 stk->u.state.pstr = (s);\
00621 stk->u.state.state_check = (snum);\
00622 STACK_INC;\
00623 }\
00624 } while(0)
00625
00626 #else
00627
00628 #define ELSE_IF_STATE_CHECK_MARK(stk)
00629
00630 #define STACK_PUSH(stack_type,pat,s,sprev,keep) do {\
00631 STACK_ENSURE(1);\
00632 stk->type = (stack_type);\
00633 stk->u.state.pcode = (pat);\
00634 stk->u.state.pstr = (s);\
00635 stk->u.state.pstr_prev = (sprev);\
00636 stk->u.state.pkeep = (keep);\
00637 STACK_INC;\
00638 } while(0)
00639
00640 #define STACK_PUSH_ENSURED(stack_type,pat) do {\
00641 stk->type = (stack_type);\
00642 stk->u.state.pcode = (pat);\
00643 STACK_INC;\
00644 } while(0)
00645 #endif
00646
00647 #define STACK_PUSH_ALT(pat,s,sprev,keep) STACK_PUSH(STK_ALT,pat,s,sprev,keep)
00648 #define STACK_PUSH_POS(s,sprev,keep) STACK_PUSH(STK_POS,NULL_UCHARP,s,sprev,keep)
00649 #define STACK_PUSH_POS_NOT(pat,s,sprev,keep) STACK_PUSH(STK_POS_NOT,pat,s,sprev,keep)
00650 #define STACK_PUSH_STOP_BT STACK_PUSH_TYPE(STK_STOP_BT)
00651 #define STACK_PUSH_LOOK_BEHIND_NOT(pat,s,sprev,keep) \
00652 STACK_PUSH(STK_LOOK_BEHIND_NOT,pat,s,sprev,keep)
00653
00654 #define STACK_PUSH_REPEAT(id, pat) do {\
00655 STACK_ENSURE(1);\
00656 stk->type = STK_REPEAT;\
00657 stk->u.repeat.num = (id);\
00658 stk->u.repeat.pcode = (pat);\
00659 stk->u.repeat.count = 0;\
00660 STACK_INC;\
00661 } while(0)
00662
00663 #define STACK_PUSH_REPEAT_INC(sindex) do {\
00664 STACK_ENSURE(1);\
00665 stk->type = STK_REPEAT_INC;\
00666 stk->u.repeat_inc.si = (sindex);\
00667 STACK_INC;\
00668 } while(0)
00669
00670 #define STACK_PUSH_MEM_START(mnum, s) do {\
00671 STACK_ENSURE(1);\
00672 stk->type = STK_MEM_START;\
00673 stk->u.mem.num = (mnum);\
00674 stk->u.mem.pstr = (s);\
00675 stk->u.mem.start = mem_start_stk[mnum];\
00676 stk->u.mem.end = mem_end_stk[mnum];\
00677 mem_start_stk[mnum] = GET_STACK_INDEX(stk);\
00678 mem_end_stk[mnum] = INVALID_STACK_INDEX;\
00679 STACK_INC;\
00680 } while(0)
00681
00682 #define STACK_PUSH_MEM_END(mnum, s) do {\
00683 STACK_ENSURE(1);\
00684 stk->type = STK_MEM_END;\
00685 stk->u.mem.num = (mnum);\
00686 stk->u.mem.pstr = (s);\
00687 stk->u.mem.start = mem_start_stk[mnum];\
00688 stk->u.mem.end = mem_end_stk[mnum];\
00689 mem_end_stk[mnum] = GET_STACK_INDEX(stk);\
00690 STACK_INC;\
00691 } while(0)
00692
00693 #define STACK_PUSH_MEM_END_MARK(mnum) do {\
00694 STACK_ENSURE(1);\
00695 stk->type = STK_MEM_END_MARK;\
00696 stk->u.mem.num = (mnum);\
00697 STACK_INC;\
00698 } while(0)
00699
00700 #define STACK_GET_MEM_START(mnum, k) do {\
00701 int level = 0;\
00702 k = stk;\
00703 while (k > stk_base) {\
00704 k--;\
00705 if ((k->type & STK_MASK_MEM_END_OR_MARK) != 0 \
00706 && k->u.mem.num == (mnum)) {\
00707 level++;\
00708 }\
00709 else if (k->type == STK_MEM_START && k->u.mem.num == (mnum)) {\
00710 if (level == 0) break;\
00711 level--;\
00712 }\
00713 }\
00714 } while(0)
00715
00716 #define STACK_GET_MEM_RANGE(k, mnum, start, end) do {\
00717 int level = 0;\
00718 while (k < stk) {\
00719 if (k->type == STK_MEM_START && k->u.mem.num == (mnum)) {\
00720 if (level == 0) (start) = k->u.mem.pstr;\
00721 level++;\
00722 }\
00723 else if (k->type == STK_MEM_END && k->u.mem.num == (mnum)) {\
00724 level--;\
00725 if (level == 0) {\
00726 (end) = k->u.mem.pstr;\
00727 break;\
00728 }\
00729 }\
00730 k++;\
00731 }\
00732 } while(0)
00733
00734 #define STACK_PUSH_NULL_CHECK_START(cnum, s) do {\
00735 STACK_ENSURE(1);\
00736 stk->type = STK_NULL_CHECK_START;\
00737 stk->u.null_check.num = (cnum);\
00738 stk->u.null_check.pstr = (s);\
00739 STACK_INC;\
00740 } while(0)
00741
00742 #define STACK_PUSH_NULL_CHECK_END(cnum) do {\
00743 STACK_ENSURE(1);\
00744 stk->type = STK_NULL_CHECK_END;\
00745 stk->u.null_check.num = (cnum);\
00746 STACK_INC;\
00747 } while(0)
00748
00749 #define STACK_PUSH_CALL_FRAME(pat) do {\
00750 STACK_ENSURE(1);\
00751 stk->type = STK_CALL_FRAME;\
00752 stk->u.call_frame.ret_addr = (pat);\
00753 STACK_INC;\
00754 } while(0)
00755
00756 #define STACK_PUSH_RETURN do {\
00757 STACK_ENSURE(1);\
00758 stk->type = STK_RETURN;\
00759 STACK_INC;\
00760 } while(0)
00761
00762
00763 #ifdef ONIG_DEBUG
00764 #define STACK_BASE_CHECK(p, at) \
00765 if ((p) < stk_base) {\
00766 fprintf(stderr, "at %s\n", at);\
00767 goto stack_error;\
00768 }
00769 #else
00770 #define STACK_BASE_CHECK(p, at)
00771 #endif
00772
00773 #define STACK_POP_ONE do {\
00774 stk--;\
00775 STACK_BASE_CHECK(stk, "STACK_POP_ONE"); \
00776 } while(0)
00777
00778 #define STACK_POP do {\
00779 switch (pop_level) {\
00780 case STACK_POP_LEVEL_FREE:\
00781 while (1) {\
00782 stk--;\
00783 STACK_BASE_CHECK(stk, "STACK_POP"); \
00784 if ((stk->type & STK_MASK_POP_USED) != 0) break;\
00785 ELSE_IF_STATE_CHECK_MARK(stk);\
00786 }\
00787 break;\
00788 case STACK_POP_LEVEL_MEM_START:\
00789 while (1) {\
00790 stk--;\
00791 STACK_BASE_CHECK(stk, "STACK_POP 2"); \
00792 if ((stk->type & STK_MASK_POP_USED) != 0) break;\
00793 else if (stk->type == STK_MEM_START) {\
00794 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
00795 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
00796 }\
00797 ELSE_IF_STATE_CHECK_MARK(stk);\
00798 }\
00799 break;\
00800 default:\
00801 while (1) {\
00802 stk--;\
00803 STACK_BASE_CHECK(stk, "STACK_POP 3"); \
00804 if ((stk->type & STK_MASK_POP_USED) != 0) break;\
00805 else if (stk->type == STK_MEM_START) {\
00806 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
00807 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
00808 }\
00809 else if (stk->type == STK_REPEAT_INC) {\
00810 STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\
00811 }\
00812 else if (stk->type == STK_MEM_END) {\
00813 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
00814 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
00815 }\
00816 ELSE_IF_STATE_CHECK_MARK(stk);\
00817 }\
00818 break;\
00819 }\
00820 } while(0)
00821
00822 #define STACK_POP_TIL_POS_NOT do {\
00823 while (1) {\
00824 stk--;\
00825 STACK_BASE_CHECK(stk, "STACK_POP_TIL_POS_NOT"); \
00826 if (stk->type == STK_POS_NOT) break;\
00827 else if (stk->type == STK_MEM_START) {\
00828 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
00829 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
00830 }\
00831 else if (stk->type == STK_REPEAT_INC) {\
00832 STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\
00833 }\
00834 else if (stk->type == STK_MEM_END) {\
00835 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
00836 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
00837 }\
00838 ELSE_IF_STATE_CHECK_MARK(stk);\
00839 }\
00840 } while(0)
00841
00842 #define STACK_POP_TIL_LOOK_BEHIND_NOT do {\
00843 while (1) {\
00844 stk--;\
00845 STACK_BASE_CHECK(stk, "STACK_POP_TIL_LOOK_BEHIND_NOT"); \
00846 if (stk->type == STK_LOOK_BEHIND_NOT) break;\
00847 else if (stk->type == STK_MEM_START) {\
00848 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
00849 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
00850 }\
00851 else if (stk->type == STK_REPEAT_INC) {\
00852 STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\
00853 }\
00854 else if (stk->type == STK_MEM_END) {\
00855 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
00856 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
00857 }\
00858 ELSE_IF_STATE_CHECK_MARK(stk);\
00859 }\
00860 } while(0)
00861
00862 #define STACK_POS_END(k) do {\
00863 k = stk;\
00864 while (1) {\
00865 k--;\
00866 STACK_BASE_CHECK(k, "STACK_POS_END"); \
00867 if (IS_TO_VOID_TARGET(k)) {\
00868 k->type = STK_VOID;\
00869 }\
00870 else if (k->type == STK_POS) {\
00871 k->type = STK_VOID;\
00872 break;\
00873 }\
00874 }\
00875 } while(0)
00876
00877 #define STACK_STOP_BT_END do {\
00878 OnigStackType *k = stk;\
00879 while (1) {\
00880 k--;\
00881 STACK_BASE_CHECK(k, "STACK_STOP_BT_END"); \
00882 if (IS_TO_VOID_TARGET(k)) {\
00883 k->type = STK_VOID;\
00884 }\
00885 else if (k->type == STK_STOP_BT) {\
00886 k->type = STK_VOID;\
00887 break;\
00888 }\
00889 }\
00890 } while(0)
00891
00892 #define STACK_NULL_CHECK(isnull,id,s) do {\
00893 OnigStackType* k = stk;\
00894 while (1) {\
00895 k--;\
00896 STACK_BASE_CHECK(k, "STACK_NULL_CHECK"); \
00897 if (k->type == STK_NULL_CHECK_START) {\
00898 if (k->u.null_check.num == (id)) {\
00899 (isnull) = (k->u.null_check.pstr == (s));\
00900 break;\
00901 }\
00902 }\
00903 }\
00904 } while(0)
00905
00906 #define STACK_NULL_CHECK_REC(isnull,id,s) do {\
00907 int level = 0;\
00908 OnigStackType* k = stk;\
00909 while (1) {\
00910 k--;\
00911 STACK_BASE_CHECK(k, "STACK_NULL_CHECK_REC"); \
00912 if (k->type == STK_NULL_CHECK_START) {\
00913 if (k->u.null_check.num == (id)) {\
00914 if (level == 0) {\
00915 (isnull) = (k->u.null_check.pstr == (s));\
00916 break;\
00917 }\
00918 else level--;\
00919 }\
00920 }\
00921 else if (k->type == STK_NULL_CHECK_END) {\
00922 level++;\
00923 }\
00924 }\
00925 } while(0)
00926
00927 #define STACK_NULL_CHECK_MEMST(isnull,id,s,reg) do {\
00928 OnigStackType* k = stk;\
00929 while (1) {\
00930 k--;\
00931 STACK_BASE_CHECK(k, "STACK_NULL_CHECK_MEMST"); \
00932 if (k->type == STK_NULL_CHECK_START) {\
00933 if (k->u.null_check.num == (id)) {\
00934 if (k->u.null_check.pstr != (s)) {\
00935 (isnull) = 0;\
00936 break;\
00937 }\
00938 else {\
00939 UChar* endp;\
00940 (isnull) = 1;\
00941 while (k < stk) {\
00942 if (k->type == STK_MEM_START) {\
00943 if (k->u.mem.end == INVALID_STACK_INDEX) {\
00944 (isnull) = 0; break;\
00945 }\
00946 if (BIT_STATUS_AT(reg->bt_mem_end, k->u.mem.num))\
00947 endp = STACK_AT(k->u.mem.end)->u.mem.pstr;\
00948 else\
00949 endp = (UChar* )k->u.mem.end;\
00950 if (STACK_AT(k->u.mem.start)->u.mem.pstr != endp) {\
00951 (isnull) = 0; break;\
00952 }\
00953 else if (endp != s) {\
00954 (isnull) = -1; \
00955 }\
00956 }\
00957 k++;\
00958 }\
00959 break;\
00960 }\
00961 }\
00962 }\
00963 }\
00964 } while(0)
00965
00966 #define STACK_NULL_CHECK_MEMST_REC(isnull,id,s,reg) do {\
00967 int level = 0;\
00968 OnigStackType* k = stk;\
00969 while (1) {\
00970 k--;\
00971 STACK_BASE_CHECK(k, "STACK_NULL_CHECK_MEMST_REC"); \
00972 if (k->type == STK_NULL_CHECK_START) {\
00973 if (k->u.null_check.num == (id)) {\
00974 if (level == 0) {\
00975 if (k->u.null_check.pstr != (s)) {\
00976 (isnull) = 0;\
00977 break;\
00978 }\
00979 else {\
00980 UChar* endp;\
00981 (isnull) = 1;\
00982 while (k < stk) {\
00983 if (k->type == STK_MEM_START) {\
00984 if (k->u.mem.end == INVALID_STACK_INDEX) {\
00985 (isnull) = 0; break;\
00986 }\
00987 if (BIT_STATUS_AT(reg->bt_mem_end, k->u.mem.num))\
00988 endp = STACK_AT(k->u.mem.end)->u.mem.pstr;\
00989 else\
00990 endp = (UChar* )k->u.mem.end;\
00991 if (STACK_AT(k->u.mem.start)->u.mem.pstr != endp) {\
00992 (isnull) = 0; break;\
00993 }\
00994 else if (endp != s) {\
00995 (isnull) = -1; \
00996 }\
00997 }\
00998 k++;\
00999 }\
01000 break;\
01001 }\
01002 }\
01003 else {\
01004 level--;\
01005 }\
01006 }\
01007 }\
01008 else if (k->type == STK_NULL_CHECK_END) {\
01009 if (k->u.null_check.num == (id)) level++;\
01010 }\
01011 }\
01012 } while(0)
01013
01014 #define STACK_GET_REPEAT(id, k) do {\
01015 int level = 0;\
01016 k = stk;\
01017 while (1) {\
01018 k--;\
01019 STACK_BASE_CHECK(k, "STACK_GET_REPEAT"); \
01020 if (k->type == STK_REPEAT) {\
01021 if (level == 0) {\
01022 if (k->u.repeat.num == (id)) {\
01023 break;\
01024 }\
01025 }\
01026 }\
01027 else if (k->type == STK_CALL_FRAME) level--;\
01028 else if (k->type == STK_RETURN) level++;\
01029 }\
01030 } while(0)
01031
01032 #define STACK_RETURN(addr) do {\
01033 int level = 0;\
01034 OnigStackType* k = stk;\
01035 while (1) {\
01036 k--;\
01037 STACK_BASE_CHECK(k, "STACK_RETURN"); \
01038 if (k->type == STK_CALL_FRAME) {\
01039 if (level == 0) {\
01040 (addr) = k->u.call_frame.ret_addr;\
01041 break;\
01042 }\
01043 else level--;\
01044 }\
01045 else if (k->type == STK_RETURN)\
01046 level++;\
01047 }\
01048 } while(0)
01049
01050
01051 #define STRING_CMP(s1,s2,len) do {\
01052 while (len-- > 0) {\
01053 if (*s1++ != *s2++) goto fail;\
01054 }\
01055 } while(0)
01056
01057 #define STRING_CMP_IC(case_fold_flag,s1,ps2,len,text_end) do {\
01058 if (string_cmp_ic(encode, case_fold_flag, s1, ps2, len, text_end) == 0) \
01059 goto fail; \
01060 } while(0)
01061
01062 static int string_cmp_ic(OnigEncoding enc, int case_fold_flag,
01063 UChar* s1, UChar** ps2, OnigDistance mblen, const UChar* text_end)
01064 {
01065 UChar buf1[ONIGENC_MBC_CASE_FOLD_MAXLEN];
01066 UChar buf2[ONIGENC_MBC_CASE_FOLD_MAXLEN];
01067 UChar *p1, *p2, *end1, *s2;
01068 int len1, len2;
01069
01070 s2 = *ps2;
01071 end1 = s1 + mblen;
01072 while (s1 < end1) {
01073 len1 = ONIGENC_MBC_CASE_FOLD(enc, case_fold_flag, &s1, text_end, buf1);
01074 len2 = ONIGENC_MBC_CASE_FOLD(enc, case_fold_flag, &s2, text_end, buf2);
01075 if (len1 != len2) return 0;
01076 p1 = buf1;
01077 p2 = buf2;
01078 while (len1-- > 0) {
01079 if (*p1 != *p2) return 0;
01080 p1++;
01081 p2++;
01082 }
01083 }
01084
01085 *ps2 = s2;
01086 return 1;
01087 }
01088
01089 #define STRING_CMP_VALUE(s1,s2,len,is_fail) do {\
01090 is_fail = 0;\
01091 while (len-- > 0) {\
01092 if (*s1++ != *s2++) {\
01093 is_fail = 1; break;\
01094 }\
01095 }\
01096 } while(0)
01097
01098 #define STRING_CMP_VALUE_IC(case_fold_flag,s1,ps2,len,text_end,is_fail) do {\
01099 if (string_cmp_ic(encode, case_fold_flag, s1, ps2, len, text_end) == 0) \
01100 is_fail = 1; \
01101 else \
01102 is_fail = 0; \
01103 } while(0)
01104
01105
01106 #define IS_EMPTY_STR (str == end)
01107 #define ON_STR_BEGIN(s) ((s) == str)
01108 #define ON_STR_END(s) ((s) == end)
01109 #ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
01110 #define DATA_ENSURE_CHECK1 (s < right_range)
01111 #define DATA_ENSURE_CHECK(n) (s + (n) <= right_range)
01112 #define DATA_ENSURE(n) if (s + (n) > right_range) goto fail
01113 #else
01114 #define DATA_ENSURE_CHECK1 (s < end)
01115 #define DATA_ENSURE_CHECK(n) (s + (n) <= end)
01116 #define DATA_ENSURE(n) if (s + (n) > end) goto fail
01117 #endif
01118
01119
01120 #ifdef USE_CAPTURE_HISTORY
01121 static int
01122 make_capture_history_tree(OnigCaptureTreeNode* node, OnigStackType** kp,
01123 OnigStackType* stk_top, UChar* str, regex_t* reg)
01124 {
01125 int n, r;
01126 OnigCaptureTreeNode* child;
01127 OnigStackType* k = *kp;
01128
01129 while (k < stk_top) {
01130 if (k->type == STK_MEM_START) {
01131 n = k->u.mem.num;
01132 if (n <= ONIG_MAX_CAPTURE_HISTORY_GROUP &&
01133 BIT_STATUS_AT(reg->capture_history, n) != 0) {
01134 child = history_node_new();
01135 CHECK_NULL_RETURN_MEMERR(child);
01136 child->group = n;
01137 child->beg = k->u.mem.pstr - str;
01138 r = history_tree_add_child(node, child);
01139 if (r != 0) {
01140 history_tree_free(child);
01141 return r;
01142 }
01143 *kp = (k + 1);
01144 r = make_capture_history_tree(child, kp, stk_top, str, reg);
01145 if (r != 0) return r;
01146
01147 k = *kp;
01148 child->end = k->u.mem.pstr - str;
01149 }
01150 }
01151 else if (k->type == STK_MEM_END) {
01152 if (k->u.mem.num == node->group) {
01153 node->end = k->u.mem.pstr - str;
01154 *kp = k;
01155 return 0;
01156 }
01157 }
01158 k++;
01159 }
01160
01161 return 1;
01162 }
01163 #endif
01164
01165 #ifdef USE_BACKREF_WITH_LEVEL
01166 static int mem_is_in_memp(int mem, int num, UChar* memp)
01167 {
01168 int i;
01169 MemNumType m;
01170
01171 for (i = 0; i < num; i++) {
01172 GET_MEMNUM_INC(m, memp);
01173 if (mem == (int )m) return 1;
01174 }
01175 return 0;
01176 }
01177
01178 static int backref_match_at_nested_level(regex_t* reg
01179 , OnigStackType* top, OnigStackType* stk_base
01180 , int ignore_case, int case_fold_flag
01181 , int nest, int mem_num, UChar* memp, UChar** s, const UChar* send)
01182 {
01183 UChar *ss, *p, *pstart, *pend = NULL_UCHARP;
01184 int level;
01185 OnigStackType* k;
01186
01187 level = 0;
01188 k = top;
01189 k--;
01190 while (k >= stk_base) {
01191 if (k->type == STK_CALL_FRAME) {
01192 level--;
01193 }
01194 else if (k->type == STK_RETURN) {
01195 level++;
01196 }
01197 else if (level == nest) {
01198 if (k->type == STK_MEM_START) {
01199 if (mem_is_in_memp(k->u.mem.num, mem_num, memp)) {
01200 pstart = k->u.mem.pstr;
01201 if (pend != NULL_UCHARP) {
01202 if (pend - pstart > send - *s) return 0;
01203 p = pstart;
01204 ss = *s;
01205
01206 if (ignore_case != 0) {
01207 if (string_cmp_ic(reg->enc, case_fold_flag,
01208 pstart, &ss, pend - pstart, send) == 0)
01209 return 0;
01210 }
01211 else {
01212 while (p < pend) {
01213 if (*p++ != *ss++) return 0;
01214 }
01215 }
01216
01217 *s = ss;
01218 return 1;
01219 }
01220 }
01221 }
01222 else if (k->type == STK_MEM_END) {
01223 if (mem_is_in_memp(k->u.mem.num, mem_num, memp)) {
01224 pend = k->u.mem.pstr;
01225 }
01226 }
01227 }
01228 k--;
01229 }
01230
01231 return 0;
01232 }
01233 #endif
01234
01235
01236 #ifdef ONIG_DEBUG_STATISTICS
01237
01238 #define USE_TIMEOFDAY
01239
01240 #ifdef USE_TIMEOFDAY
01241 #ifdef HAVE_SYS_TIME_H
01242 #include <sys/time.h>
01243 #endif
01244 #ifdef HAVE_UNISTD_H
01245 #include <unistd.h>
01246 #endif
01247 static struct timeval ts, te;
01248 #define GETTIME(t) gettimeofday(&(t), (struct timezone* )0)
01249 #define TIMEDIFF(te,ts) (((te).tv_usec - (ts).tv_usec) + \
01250 (((te).tv_sec - (ts).tv_sec)*1000000))
01251 #else
01252 #ifdef HAVE_SYS_TIMES_H
01253 #include <sys/times.h>
01254 #endif
01255 static struct tms ts, te;
01256 #define GETTIME(t) times(&(t))
01257 #define TIMEDIFF(te,ts) ((te).tms_utime - (ts).tms_utime)
01258 #endif
01259
01260 static int OpCounter[256];
01261 static int OpPrevCounter[256];
01262 static unsigned long OpTime[256];
01263 static int OpCurr = OP_FINISH;
01264 static int OpPrevTarget = OP_FAIL;
01265 static int MaxStackDepth = 0;
01266
01267 #define MOP_IN(opcode) do {\
01268 if (opcode == OpPrevTarget) OpPrevCounter[OpCurr]++;\
01269 OpCurr = opcode;\
01270 OpCounter[opcode]++;\
01271 GETTIME(ts);\
01272 } while(0)
01273
01274 #define MOP_OUT do {\
01275 GETTIME(te);\
01276 OpTime[OpCurr] += TIMEDIFF(te, ts);\
01277 } while(0)
01278
01279 extern void
01280 onig_statistics_init(void)
01281 {
01282 int i;
01283 for (i = 0; i < 256; i++) {
01284 OpCounter[i] = OpPrevCounter[i] = 0; OpTime[i] = 0;
01285 }
01286 MaxStackDepth = 0;
01287 }
01288
01289 extern void
01290 onig_print_statistics(FILE* f)
01291 {
01292 int i;
01293 fprintf(f, " count prev time\n");
01294 for (i = 0; OnigOpInfo[i].opcode >= 0; i++) {
01295 fprintf(f, "%8d: %8d: %10ld: %s\n",
01296 OpCounter[i], OpPrevCounter[i], OpTime[i], OnigOpInfo[i].name);
01297 }
01298 fprintf(f, "\nmax stack depth: %d\n", MaxStackDepth);
01299 }
01300
01301 #define STACK_INC do {\
01302 stk++;\
01303 if (stk - stk_base > MaxStackDepth) \
01304 MaxStackDepth = stk - stk_base;\
01305 } while(0)
01306
01307 #else
01308 #define STACK_INC stk++
01309
01310 #define MOP_IN(opcode)
01311 #define MOP_OUT
01312 #endif
01313
01314
01315
01316
01317 typedef int regoff_t;
01318
01319 typedef struct {
01320 regoff_t rm_so;
01321 regoff_t rm_eo;
01322 } posix_regmatch_t;
01323
01324 void onig_print_compiled_byte_code(FILE* f, UChar* bp, UChar* bpend, UChar** nextp,
01325 OnigEncoding enc);
01326
01327
01328
01329 static OnigPosition
01330 match_at(regex_t* reg, const UChar* str, const UChar* end,
01331 #ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
01332 const UChar* right_range,
01333 #endif
01334 const UChar* sstart, UChar* sprev, OnigMatchArg* msa)
01335 {
01336 static const UChar FinishCode[] = { OP_FINISH };
01337
01338 int i, num_mem, pop_level;
01339 ptrdiff_t n, best_len;
01340 LengthType tlen, tlen2;
01341 MemNumType mem;
01342 RelAddrType addr;
01343 OnigOptionType option = reg->options;
01344 OnigEncoding encode = reg->enc;
01345 OnigCaseFoldType case_fold_flag = reg->case_fold_flag;
01346 UChar *s, *q, *sbegin;
01347 UChar *p = reg->p;
01348 UChar *pkeep;
01349 char *alloca_base;
01350 char *xmalloc_base = NULL;
01351 OnigStackType *stk_alloc, *stk_base, *stk, *stk_end;
01352 OnigStackType *stkp;
01353 OnigStackIndex si;
01354 OnigStackIndex *repeat_stk;
01355 OnigStackIndex *mem_start_stk, *mem_end_stk;
01356 #ifdef USE_COMBINATION_EXPLOSION_CHECK
01357 int scv;
01358 unsigned char* state_check_buff = msa->state_check_buff;
01359 int num_comb_exp_check = reg->num_comb_exp_check;
01360 #endif
01361
01362 #ifdef USE_SUBEXP_CALL
01363
01364 n = reg->num_repeat + (reg->num_mem + 1) * 2;
01365
01366 STACK_INIT(alloca_base, xmalloc_base, n, INIT_MATCH_STACK_SIZE);
01367 pop_level = reg->stack_pop_level;
01368 num_mem = reg->num_mem;
01369 repeat_stk = (OnigStackIndex* )alloca_base;
01370
01371 mem_start_stk = (OnigStackIndex* )(repeat_stk + reg->num_repeat);
01372 mem_end_stk = mem_start_stk + (num_mem + 1);
01373 for (i = 0; i <= num_mem; i++) {
01374 mem_start_stk[i] = mem_end_stk[i] = INVALID_STACK_INDEX;
01375 }
01376 #else
01377
01378 n = reg->num_repeat + reg->num_mem * 2;
01379
01380 STACK_INIT(alloca_base, xmalloc_base, n, INIT_MATCH_STACK_SIZE);
01381 pop_level = reg->stack_pop_level;
01382 num_mem = reg->num_mem;
01383 repeat_stk = (OnigStackIndex* )alloca_base;
01384
01385 mem_start_stk = (OnigStackIndex* )(repeat_stk + reg->num_repeat);
01386 mem_end_stk = mem_start_stk + num_mem;
01387 mem_start_stk--;
01388
01389 mem_end_stk--;
01390
01391 for (i = 1; i <= num_mem; i++) {
01392 mem_start_stk[i] = mem_end_stk[i] = INVALID_STACK_INDEX;
01393 }
01394 #endif
01395
01396 #ifdef ONIG_DEBUG_MATCH
01397 fprintf(stderr, "match_at: str: %"PRIdPTR" (%p), end: %"PRIdPTR" (%p), start: %"PRIdPTR" (%p), sprev: %"PRIdPTR" (%p)\n",
01398 (intptr_t)str, str, (intptr_t)end, end, (intptr_t)sstart, sstart, (intptr_t)sprev, sprev);
01399 fprintf(stderr, "size: %d, start offset: %d\n",
01400 (int )(end - str), (int )(sstart - str));
01401 #endif
01402
01403 STACK_PUSH_ENSURED(STK_ALT, (UChar *)FinishCode);
01404 best_len = ONIG_MISMATCH;
01405 s = (UChar* )sstart;
01406 pkeep = (UChar* )sstart;
01407 while (1) {
01408 #ifdef ONIG_DEBUG_MATCH
01409 if (s) {
01410 UChar *q, *bp, buf[50];
01411 int len;
01412 fprintf(stderr, "%4d> \"", (int )(s - str));
01413 bp = buf;
01414 if (*p != OP_FINISH) {
01415 for (i = 0, q = s; i < 7 && q < end; i++) {
01416 len = enclen(encode, q, end);
01417 while (len-- > 0) *bp++ = *q++;
01418 }
01419 }
01420 if (q < end) { xmemcpy(bp, "...\"", 4); bp += 4; }
01421 else { xmemcpy(bp, "\"", 1); bp += 1; }
01422 *bp = 0;
01423 fputs((char* )buf, stderr);
01424 for (i = 0; i < 20 - (bp - buf); i++) fputc(' ', stderr);
01425 onig_print_compiled_byte_code(stderr, p, p + strlen((char *)p), NULL, encode);
01426 fprintf(stderr, "\n");
01427 }
01428 #endif
01429
01430 sbegin = s;
01431 switch (*p++) {
01432 case OP_END: MOP_IN(OP_END);
01433 n = s - sstart;
01434 if (n > best_len) {
01435 OnigRegion* region;
01436 #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
01437 if (IS_FIND_LONGEST(option)) {
01438 if (n > msa->best_len) {
01439 msa->best_len = n;
01440 msa->best_s = (UChar* )sstart;
01441 }
01442 else
01443 goto end_best_len;
01444 }
01445 #endif
01446 best_len = n;
01447 region = msa->region;
01448 if (region) {
01449 #ifdef USE_POSIX_API_REGION_OPTION
01450 if (IS_POSIX_REGION(msa->options)) {
01451 posix_regmatch_t* rmt = (posix_regmatch_t* )region;
01452
01453 rmt[0].rm_so = (regoff_t )(((pkeep > s) ? s : pkeep) - str);
01454 rmt[0].rm_eo = (regoff_t )(s - str);
01455 for (i = 1; i <= num_mem; i++) {
01456 if (mem_end_stk[i] != INVALID_STACK_INDEX) {
01457 if (BIT_STATUS_AT(reg->bt_mem_start, i))
01458 rmt[i].rm_so = (regoff_t )(STACK_AT(mem_start_stk[i])->u.mem.pstr - str);
01459 else
01460 rmt[i].rm_so = (regoff_t )((UChar* )((void* )(mem_start_stk[i])) - str);
01461
01462 rmt[i].rm_eo = (regoff_t )((BIT_STATUS_AT(reg->bt_mem_end, i)
01463 ? STACK_AT(mem_end_stk[i])->u.mem.pstr
01464 : (UChar* )((void* )mem_end_stk[i])) - str);
01465 }
01466 else {
01467 rmt[i].rm_so = rmt[i].rm_eo = ONIG_REGION_NOTPOS;
01468 }
01469 }
01470 }
01471 else {
01472 #endif
01473 region->beg[0] = ((pkeep > s) ? s : pkeep) - str;
01474 region->end[0] = s - str;
01475 for (i = 1; i <= num_mem; i++) {
01476 if (mem_end_stk[i] != INVALID_STACK_INDEX) {
01477 if (BIT_STATUS_AT(reg->bt_mem_start, i))
01478 region->beg[i] = STACK_AT(mem_start_stk[i])->u.mem.pstr - str;
01479 else
01480 region->beg[i] = (UChar* )((void* )mem_start_stk[i]) - str;
01481
01482 region->end[i] = (BIT_STATUS_AT(reg->bt_mem_end, i)
01483 ? STACK_AT(mem_end_stk[i])->u.mem.pstr
01484 : (UChar* )((void* )mem_end_stk[i])) - str;
01485 }
01486 else {
01487 region->beg[i] = region->end[i] = ONIG_REGION_NOTPOS;
01488 }
01489 }
01490
01491 #ifdef USE_CAPTURE_HISTORY
01492 if (reg->capture_history != 0) {
01493 int r;
01494 OnigCaptureTreeNode* node;
01495
01496 if (IS_NULL(region->history_root)) {
01497 region->history_root = node = history_node_new();
01498 CHECK_NULL_RETURN_MEMERR(node);
01499 }
01500 else {
01501 node = region->history_root;
01502 history_tree_clear(node);
01503 }
01504
01505 node->group = 0;
01506 node->beg = ((pkeep > s) ? s : pkeep) - str;
01507 node->end = s - str;
01508
01509 stkp = stk_base;
01510 r = make_capture_history_tree(region->history_root, &stkp,
01511 stk, (UChar* )str, reg);
01512 if (r < 0) {
01513 best_len = r;
01514 goto finish;
01515 }
01516 }
01517 #endif
01518 #ifdef USE_POSIX_API_REGION_OPTION
01519 }
01520 #endif
01521 }
01522 }
01523
01524 #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
01525 end_best_len:
01526 #endif
01527 MOP_OUT;
01528
01529 if (IS_FIND_CONDITION(option)) {
01530 if (IS_FIND_NOT_EMPTY(option) && s == sstart) {
01531 best_len = ONIG_MISMATCH;
01532 goto fail;
01533 }
01534 if (IS_FIND_LONGEST(option) && DATA_ENSURE_CHECK1) {
01535 goto fail;
01536 }
01537 }
01538
01539
01540 goto finish;
01541 break;
01542
01543 case OP_EXACT1: MOP_IN(OP_EXACT1);
01544 #if 0
01545 DATA_ENSURE(1);
01546 if (*p != *s) goto fail;
01547 p++; s++;
01548 #endif
01549 if (*p != *s++) goto fail;
01550 DATA_ENSURE(0);
01551 p++;
01552 MOP_OUT;
01553 break;
01554
01555 case OP_EXACT1_IC: MOP_IN(OP_EXACT1_IC);
01556 {
01557 int len;
01558 UChar *q, lowbuf[ONIGENC_MBC_CASE_FOLD_MAXLEN];
01559
01560 DATA_ENSURE(1);
01561 len = ONIGENC_MBC_CASE_FOLD(encode,
01562
01563 case_fold_flag,
01564 &s, end, lowbuf);
01565 DATA_ENSURE(0);
01566 q = lowbuf;
01567 while (len-- > 0) {
01568 if (*p != *q) {
01569 goto fail;
01570 }
01571 p++; q++;
01572 }
01573 }
01574 MOP_OUT;
01575 break;
01576
01577 case OP_EXACT2: MOP_IN(OP_EXACT2);
01578 DATA_ENSURE(2);
01579 if (*p != *s) goto fail;
01580 p++; s++;
01581 if (*p != *s) goto fail;
01582 sprev = s;
01583 p++; s++;
01584 MOP_OUT;
01585 continue;
01586 break;
01587
01588 case OP_EXACT3: MOP_IN(OP_EXACT3);
01589 DATA_ENSURE(3);
01590 if (*p != *s) goto fail;
01591 p++; s++;
01592 if (*p != *s) goto fail;
01593 p++; s++;
01594 if (*p != *s) goto fail;
01595 sprev = s;
01596 p++; s++;
01597 MOP_OUT;
01598 continue;
01599 break;
01600
01601 case OP_EXACT4: MOP_IN(OP_EXACT4);
01602 DATA_ENSURE(4);
01603 if (*p != *s) goto fail;
01604 p++; s++;
01605 if (*p != *s) goto fail;
01606 p++; s++;
01607 if (*p != *s) goto fail;
01608 p++; s++;
01609 if (*p != *s) goto fail;
01610 sprev = s;
01611 p++; s++;
01612 MOP_OUT;
01613 continue;
01614 break;
01615
01616 case OP_EXACT5: MOP_IN(OP_EXACT5);
01617 DATA_ENSURE(5);
01618 if (*p != *s) goto fail;
01619 p++; s++;
01620 if (*p != *s) goto fail;
01621 p++; s++;
01622 if (*p != *s) goto fail;
01623 p++; s++;
01624 if (*p != *s) goto fail;
01625 p++; s++;
01626 if (*p != *s) goto fail;
01627 sprev = s;
01628 p++; s++;
01629 MOP_OUT;
01630 continue;
01631 break;
01632
01633 case OP_EXACTN: MOP_IN(OP_EXACTN);
01634 GET_LENGTH_INC(tlen, p);
01635 DATA_ENSURE(tlen);
01636 while (tlen-- > 0) {
01637 if (*p++ != *s++) goto fail;
01638 }
01639 sprev = s - 1;
01640 MOP_OUT;
01641 continue;
01642 break;
01643
01644 case OP_EXACTN_IC: MOP_IN(OP_EXACTN_IC);
01645 {
01646 int len;
01647 UChar *q, *endp, lowbuf[ONIGENC_MBC_CASE_FOLD_MAXLEN];
01648
01649 GET_LENGTH_INC(tlen, p);
01650 endp = p + tlen;
01651
01652 while (p < endp) {
01653 sprev = s;
01654 DATA_ENSURE(1);
01655 len = ONIGENC_MBC_CASE_FOLD(encode,
01656
01657 case_fold_flag,
01658 &s, end, lowbuf);
01659 DATA_ENSURE(0);
01660 q = lowbuf;
01661 while (len-- > 0) {
01662 if (*p != *q) goto fail;
01663 p++; q++;
01664 }
01665 }
01666 }
01667
01668 MOP_OUT;
01669 continue;
01670 break;
01671
01672 case OP_EXACTMB2N1: MOP_IN(OP_EXACTMB2N1);
01673 DATA_ENSURE(2);
01674 if (*p != *s) goto fail;
01675 p++; s++;
01676 if (*p != *s) goto fail;
01677 p++; s++;
01678 MOP_OUT;
01679 break;
01680
01681 case OP_EXACTMB2N2: MOP_IN(OP_EXACTMB2N2);
01682 DATA_ENSURE(4);
01683 if (*p != *s) goto fail;
01684 p++; s++;
01685 if (*p != *s) goto fail;
01686 p++; s++;
01687 sprev = s;
01688 if (*p != *s) goto fail;
01689 p++; s++;
01690 if (*p != *s) goto fail;
01691 p++; s++;
01692 MOP_OUT;
01693 continue;
01694 break;
01695
01696 case OP_EXACTMB2N3: MOP_IN(OP_EXACTMB2N3);
01697 DATA_ENSURE(6);
01698 if (*p != *s) goto fail;
01699 p++; s++;
01700 if (*p != *s) goto fail;
01701 p++; s++;
01702 if (*p != *s) goto fail;
01703 p++; s++;
01704 if (*p != *s) goto fail;
01705 p++; s++;
01706 sprev = s;
01707 if (*p != *s) goto fail;
01708 p++; s++;
01709 if (*p != *s) goto fail;
01710 p++; s++;
01711 MOP_OUT;
01712 continue;
01713 break;
01714
01715 case OP_EXACTMB2N: MOP_IN(OP_EXACTMB2N);
01716 GET_LENGTH_INC(tlen, p);
01717 DATA_ENSURE(tlen * 2);
01718 while (tlen-- > 0) {
01719 if (*p != *s) goto fail;
01720 p++; s++;
01721 if (*p != *s) goto fail;
01722 p++; s++;
01723 }
01724 sprev = s - 2;
01725 MOP_OUT;
01726 continue;
01727 break;
01728
01729 case OP_EXACTMB3N: MOP_IN(OP_EXACTMB3N);
01730 GET_LENGTH_INC(tlen, p);
01731 DATA_ENSURE(tlen * 3);
01732 while (tlen-- > 0) {
01733 if (*p != *s) goto fail;
01734 p++; s++;
01735 if (*p != *s) goto fail;
01736 p++; s++;
01737 if (*p != *s) goto fail;
01738 p++; s++;
01739 }
01740 sprev = s - 3;
01741 MOP_OUT;
01742 continue;
01743 break;
01744
01745 case OP_EXACTMBN: MOP_IN(OP_EXACTMBN);
01746 GET_LENGTH_INC(tlen, p);
01747 GET_LENGTH_INC(tlen2, p);
01748 tlen2 *= tlen;
01749 DATA_ENSURE(tlen2);
01750 while (tlen2-- > 0) {
01751 if (*p != *s) goto fail;
01752 p++; s++;
01753 }
01754 sprev = s - tlen;
01755 MOP_OUT;
01756 continue;
01757 break;
01758
01759 case OP_CCLASS: MOP_IN(OP_CCLASS);
01760 DATA_ENSURE(1);
01761 if (BITSET_AT(((BitSetRef )p), *s) == 0) goto fail;
01762 p += SIZE_BITSET;
01763 s += enclen(encode, s, end);
01764 MOP_OUT;
01765 break;
01766
01767 case OP_CCLASS_MB: MOP_IN(OP_CCLASS_MB);
01768 if (! ONIGENC_IS_MBC_HEAD(encode, s, end)) goto fail;
01769
01770 cclass_mb:
01771 GET_LENGTH_INC(tlen, p);
01772 {
01773 OnigCodePoint code;
01774 UChar *ss;
01775 int mb_len;
01776
01777 DATA_ENSURE(1);
01778 mb_len = enclen(encode, s, end);
01779 DATA_ENSURE(mb_len);
01780 ss = s;
01781 s += mb_len;
01782 code = ONIGENC_MBC_TO_CODE(encode, ss, s);
01783
01784 #ifdef PLATFORM_UNALIGNED_WORD_ACCESS
01785 if (! onig_is_in_code_range(p, code)) goto fail;
01786 #else
01787 q = p;
01788 ALIGNMENT_RIGHT(q);
01789 if (! onig_is_in_code_range(q, code)) goto fail;
01790 #endif
01791 }
01792 p += tlen;
01793 MOP_OUT;
01794 break;
01795
01796 case OP_CCLASS_MIX: MOP_IN(OP_CCLASS_MIX);
01797 DATA_ENSURE(1);
01798 if (ONIGENC_IS_MBC_HEAD(encode, s, end)) {
01799 p += SIZE_BITSET;
01800 goto cclass_mb;
01801 }
01802 else {
01803 if (BITSET_AT(((BitSetRef )p), *s) == 0)
01804 goto fail;
01805
01806 p += SIZE_BITSET;
01807 GET_LENGTH_INC(tlen, p);
01808 p += tlen;
01809 s++;
01810 }
01811 MOP_OUT;
01812 break;
01813
01814 case OP_CCLASS_NOT: MOP_IN(OP_CCLASS_NOT);
01815 DATA_ENSURE(1);
01816 if (BITSET_AT(((BitSetRef )p), *s) != 0) goto fail;
01817 p += SIZE_BITSET;
01818 s += enclen(encode, s, end);
01819 MOP_OUT;
01820 break;
01821
01822 case OP_CCLASS_MB_NOT: MOP_IN(OP_CCLASS_MB_NOT);
01823 DATA_ENSURE(1);
01824 if (! ONIGENC_IS_MBC_HEAD(encode, s, end)) {
01825 s++;
01826 GET_LENGTH_INC(tlen, p);
01827 p += tlen;
01828 goto cc_mb_not_success;
01829 }
01830
01831 cclass_mb_not:
01832 GET_LENGTH_INC(tlen, p);
01833 {
01834 OnigCodePoint code;
01835 UChar *ss;
01836 int mb_len = enclen(encode, s, end);
01837
01838 if (! DATA_ENSURE_CHECK(mb_len)) {
01839 DATA_ENSURE(1);
01840 s = (UChar* )end;
01841 p += tlen;
01842 goto cc_mb_not_success;
01843 }
01844
01845 ss = s;
01846 s += mb_len;
01847 code = ONIGENC_MBC_TO_CODE(encode, ss, s);
01848
01849 #ifdef PLATFORM_UNALIGNED_WORD_ACCESS
01850 if (onig_is_in_code_range(p, code)) goto fail;
01851 #else
01852 q = p;
01853 ALIGNMENT_RIGHT(q);
01854 if (onig_is_in_code_range(q, code)) goto fail;
01855 #endif
01856 }
01857 p += tlen;
01858
01859 cc_mb_not_success:
01860 MOP_OUT;
01861 break;
01862
01863 case OP_CCLASS_MIX_NOT: MOP_IN(OP_CCLASS_MIX_NOT);
01864 DATA_ENSURE(1);
01865 if (ONIGENC_IS_MBC_HEAD(encode, s, end)) {
01866 p += SIZE_BITSET;
01867 goto cclass_mb_not;
01868 }
01869 else {
01870 if (BITSET_AT(((BitSetRef )p), *s) != 0)
01871 goto fail;
01872
01873 p += SIZE_BITSET;
01874 GET_LENGTH_INC(tlen, p);
01875 p += tlen;
01876 s++;
01877 }
01878 MOP_OUT;
01879 break;
01880
01881 case OP_CCLASS_NODE: MOP_IN(OP_CCLASS_NODE);
01882 {
01883 OnigCodePoint code;
01884 void *node;
01885 int mb_len;
01886 UChar *ss;
01887
01888 DATA_ENSURE(1);
01889 GET_POINTER_INC(node, p);
01890 mb_len = enclen(encode, s, end);
01891 ss = s;
01892 s += mb_len;
01893 DATA_ENSURE(0);
01894 code = ONIGENC_MBC_TO_CODE(encode, ss, s);
01895 if (onig_is_code_in_cc_len(mb_len, code, node) == 0) goto fail;
01896 }
01897 MOP_OUT;
01898 break;
01899
01900 case OP_ANYCHAR: MOP_IN(OP_ANYCHAR);
01901 DATA_ENSURE(1);
01902 n = enclen(encode, s, end);
01903 DATA_ENSURE(n);
01904 if (ONIGENC_IS_MBC_NEWLINE_EX(encode, s, str, end, option, 0)) goto fail;
01905 s += n;
01906 MOP_OUT;
01907 break;
01908
01909 case OP_ANYCHAR_ML: MOP_IN(OP_ANYCHAR_ML);
01910 DATA_ENSURE(1);
01911 n = enclen(encode, s, end);
01912 DATA_ENSURE(n);
01913 s += n;
01914 MOP_OUT;
01915 break;
01916
01917 case OP_ANYCHAR_STAR: MOP_IN(OP_ANYCHAR_STAR);
01918 while (DATA_ENSURE_CHECK1) {
01919 STACK_PUSH_ALT(p, s, sprev, pkeep);
01920 n = enclen(encode, s, end);
01921 DATA_ENSURE(n);
01922 if (ONIGENC_IS_MBC_NEWLINE_EX(encode, s, str, end, option, 0)) goto fail;
01923 sprev = s;
01924 s += n;
01925 }
01926 MOP_OUT;
01927 break;
01928
01929 case OP_ANYCHAR_ML_STAR: MOP_IN(OP_ANYCHAR_ML_STAR);
01930 while (DATA_ENSURE_CHECK1) {
01931 STACK_PUSH_ALT(p, s, sprev, pkeep);
01932 n = enclen(encode, s, end);
01933 if (n > 1) {
01934 DATA_ENSURE(n);
01935 sprev = s;
01936 s += n;
01937 }
01938 else {
01939 sprev = s;
01940 s++;
01941 }
01942 }
01943 MOP_OUT;
01944 break;
01945
01946 case OP_ANYCHAR_STAR_PEEK_NEXT: MOP_IN(OP_ANYCHAR_STAR_PEEK_NEXT);
01947 while (DATA_ENSURE_CHECK1) {
01948 if (*p == *s) {
01949 STACK_PUSH_ALT(p + 1, s, sprev, pkeep);
01950 }
01951 n = enclen(encode, s, end);
01952 DATA_ENSURE(n);
01953 if (ONIGENC_IS_MBC_NEWLINE_EX(encode, s, str, end, option, 0)) goto fail;
01954 sprev = s;
01955 s += n;
01956 }
01957 p++;
01958 MOP_OUT;
01959 break;
01960
01961 case OP_ANYCHAR_ML_STAR_PEEK_NEXT:MOP_IN(OP_ANYCHAR_ML_STAR_PEEK_NEXT);
01962 while (DATA_ENSURE_CHECK1) {
01963 if (*p == *s) {
01964 STACK_PUSH_ALT(p + 1, s, sprev, pkeep);
01965 }
01966 n = enclen(encode, s, end);
01967 if (n > 1) {
01968 DATA_ENSURE(n);
01969 sprev = s;
01970 s += n;
01971 }
01972 else {
01973 sprev = s;
01974 s++;
01975 }
01976 }
01977 p++;
01978 MOP_OUT;
01979 break;
01980
01981 #ifdef USE_COMBINATION_EXPLOSION_CHECK
01982 case OP_STATE_CHECK_ANYCHAR_STAR: MOP_IN(OP_STATE_CHECK_ANYCHAR_STAR);
01983 GET_STATE_CHECK_NUM_INC(mem, p);
01984 while (DATA_ENSURE_CHECK1) {
01985 STATE_CHECK_VAL(scv, mem);
01986 if (scv) goto fail;
01987
01988 STACK_PUSH_ALT_WITH_STATE_CHECK(p, s, sprev, mem, pkeep);
01989 n = enclen(encode, s, end);
01990 DATA_ENSURE(n);
01991 if (ONIGENC_IS_MBC_NEWLINE_EX(encode, s, str, end, option, 0)) goto fail;
01992 sprev = s;
01993 s += n;
01994 }
01995 MOP_OUT;
01996 break;
01997
01998 case OP_STATE_CHECK_ANYCHAR_ML_STAR:
01999 MOP_IN(OP_STATE_CHECK_ANYCHAR_ML_STAR);
02000
02001 GET_STATE_CHECK_NUM_INC(mem, p);
02002 while (DATA_ENSURE_CHECK1) {
02003 STATE_CHECK_VAL(scv, mem);
02004 if (scv) goto fail;
02005
02006 STACK_PUSH_ALT_WITH_STATE_CHECK(p, s, sprev, mem, pkeep);
02007 n = enclen(encode, s, end);
02008 if (n > 1) {
02009 DATA_ENSURE(n);
02010 sprev = s;
02011 s += n;
02012 }
02013 else {
02014 sprev = s;
02015 s++;
02016 }
02017 }
02018 MOP_OUT;
02019 break;
02020 #endif
02021
02022 case OP_WORD: MOP_IN(OP_WORD);
02023 DATA_ENSURE(1);
02024 if (! ONIGENC_IS_MBC_WORD(encode, s, end))
02025 goto fail;
02026
02027 s += enclen(encode, s, end);
02028 MOP_OUT;
02029 break;
02030
02031 case OP_ASCII_WORD: MOP_IN(OP_ASCII_WORD);
02032 DATA_ENSURE(1);
02033 if (! ONIGENC_IS_MBC_ASCII_WORD(encode, s, end))
02034 goto fail;
02035
02036 s += enclen(encode, s, end);
02037 MOP_OUT;
02038 break;
02039
02040 case OP_NOT_WORD: MOP_IN(OP_NOT_WORD);
02041 DATA_ENSURE(1);
02042 if (ONIGENC_IS_MBC_WORD(encode, s, end))
02043 goto fail;
02044
02045 s += enclen(encode, s, end);
02046 MOP_OUT;
02047 break;
02048
02049 case OP_NOT_ASCII_WORD: MOP_IN(OP_NOT_ASCII_WORD);
02050 DATA_ENSURE(1);
02051 if (ONIGENC_IS_MBC_ASCII_WORD(encode, s, end))
02052 goto fail;
02053
02054 s += enclen(encode, s, end);
02055 MOP_OUT;
02056 break;
02057
02058 case OP_WORD_BOUND: MOP_IN(OP_WORD_BOUND);
02059 if (ON_STR_BEGIN(s)) {
02060 DATA_ENSURE(1);
02061 if (! ONIGENC_IS_MBC_WORD(encode, s, end))
02062 goto fail;
02063 }
02064 else if (ON_STR_END(s)) {
02065 if (! ONIGENC_IS_MBC_WORD(encode, sprev, end))
02066 goto fail;
02067 }
02068 else {
02069 if (ONIGENC_IS_MBC_WORD(encode, s, end)
02070 == ONIGENC_IS_MBC_WORD(encode, sprev, end))
02071 goto fail;
02072 }
02073 MOP_OUT;
02074 continue;
02075 break;
02076
02077 case OP_ASCII_WORD_BOUND: MOP_IN(OP_ASCII_WORD_BOUND);
02078 if (ON_STR_BEGIN(s)) {
02079 DATA_ENSURE(1);
02080 if (! ONIGENC_IS_MBC_ASCII_WORD(encode, s, end))
02081 goto fail;
02082 }
02083 else if (ON_STR_END(s)) {
02084 if (! ONIGENC_IS_MBC_ASCII_WORD(encode, sprev, end))
02085 goto fail;
02086 }
02087 else {
02088 if (ONIGENC_IS_MBC_ASCII_WORD(encode, s, end)
02089 == ONIGENC_IS_MBC_ASCII_WORD(encode, sprev, end))
02090 goto fail;
02091 }
02092 MOP_OUT;
02093 continue;
02094 break;
02095
02096 case OP_NOT_WORD_BOUND: MOP_IN(OP_NOT_WORD_BOUND);
02097 if (ON_STR_BEGIN(s)) {
02098 if (DATA_ENSURE_CHECK1 && ONIGENC_IS_MBC_WORD(encode, s, end))
02099 goto fail;
02100 }
02101 else if (ON_STR_END(s)) {
02102 if (ONIGENC_IS_MBC_WORD(encode, sprev, end))
02103 goto fail;
02104 }
02105 else {
02106 if (ONIGENC_IS_MBC_WORD(encode, s, end)
02107 != ONIGENC_IS_MBC_WORD(encode, sprev, end))
02108 goto fail;
02109 }
02110 MOP_OUT;
02111 continue;
02112 break;
02113
02114 case OP_NOT_ASCII_WORD_BOUND: MOP_IN(OP_NOT_ASCII_WORD_BOUND);
02115 if (ON_STR_BEGIN(s)) {
02116 if (DATA_ENSURE_CHECK1 && ONIGENC_IS_MBC_ASCII_WORD(encode, s, end))
02117 goto fail;
02118 }
02119 else if (ON_STR_END(s)) {
02120 if (ONIGENC_IS_MBC_ASCII_WORD(encode, sprev, end))
02121 goto fail;
02122 }
02123 else {
02124 if (ONIGENC_IS_MBC_ASCII_WORD(encode, s, end)
02125 != ONIGENC_IS_MBC_ASCII_WORD(encode, sprev, end))
02126 goto fail;
02127 }
02128 MOP_OUT;
02129 continue;
02130 break;
02131
02132 #ifdef USE_WORD_BEGIN_END
02133 case OP_WORD_BEGIN: MOP_IN(OP_WORD_BEGIN);
02134 if (DATA_ENSURE_CHECK1 && ONIGENC_IS_MBC_WORD(encode, s, end)) {
02135 if (ON_STR_BEGIN(s) || !ONIGENC_IS_MBC_WORD(encode, sprev, end)) {
02136 MOP_OUT;
02137 continue;
02138 }
02139 }
02140 goto fail;
02141 break;
02142
02143 case OP_ASCII_WORD_BEGIN: MOP_IN(OP_ASCII_WORD_BEGIN);
02144 if (DATA_ENSURE_CHECK1 && ONIGENC_IS_MBC_ASCII_WORD(encode, s, end)) {
02145 if (ON_STR_BEGIN(s) || !ONIGENC_IS_MBC_ASCII_WORD(encode, sprev, end)) {
02146 MOP_OUT;
02147 continue;
02148 }
02149 }
02150 goto fail;
02151 break;
02152
02153 case OP_WORD_END: MOP_IN(OP_WORD_END);
02154 if (!ON_STR_BEGIN(s) && ONIGENC_IS_MBC_WORD(encode, sprev, end)) {
02155 if (ON_STR_END(s) || !ONIGENC_IS_MBC_WORD(encode, s, end)) {
02156 MOP_OUT;
02157 continue;
02158 }
02159 }
02160 goto fail;
02161 break;
02162
02163 case OP_ASCII_WORD_END: MOP_IN(OP_ASCII_WORD_END);
02164 if (!ON_STR_BEGIN(s) && ONIGENC_IS_MBC_ASCII_WORD(encode, sprev, end)) {
02165 if (ON_STR_END(s) || !ONIGENC_IS_MBC_ASCII_WORD(encode, s, end)) {
02166 MOP_OUT;
02167 continue;
02168 }
02169 }
02170 goto fail;
02171 break;
02172 #endif
02173
02174 case OP_BEGIN_BUF: MOP_IN(OP_BEGIN_BUF);
02175 if (! ON_STR_BEGIN(s)) goto fail;
02176
02177 MOP_OUT;
02178 continue;
02179 break;
02180
02181 case OP_END_BUF: MOP_IN(OP_END_BUF);
02182 if (! ON_STR_END(s)) goto fail;
02183
02184 MOP_OUT;
02185 continue;
02186 break;
02187
02188 case OP_BEGIN_LINE: MOP_IN(OP_BEGIN_LINE);
02189 op_begin_line:
02190 if (ON_STR_BEGIN(s)) {
02191 if (IS_NOTBOL(msa->options)) goto fail;
02192 MOP_OUT;
02193 continue;
02194 }
02195 else if (ONIGENC_IS_MBC_NEWLINE(encode, sprev, end)
02196 #ifdef USE_CRNL_AS_LINE_TERMINATOR
02197 && !(IS_NEWLINE_CRLF(option)
02198 && ONIGENC_IS_MBC_CRNL(encode, sprev, end))
02199 #endif
02200 && !ON_STR_END(s)) {
02201 MOP_OUT;
02202 continue;
02203 }
02204 goto fail;
02205 break;
02206
02207 case OP_END_LINE: MOP_IN(OP_END_LINE);
02208 if (ON_STR_END(s)) {
02209 #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
02210 if (IS_EMPTY_STR || !ONIGENC_IS_MBC_NEWLINE_EX(encode, sprev, str, end, option, 1)) {
02211 #endif
02212 if (IS_NOTEOL(msa->options)) goto fail;
02213 MOP_OUT;
02214 continue;
02215 #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
02216 }
02217 #endif
02218 }
02219 else if (ONIGENC_IS_MBC_NEWLINE_EX(encode, s, str, end, option, 1)) {
02220 MOP_OUT;
02221 continue;
02222 }
02223 goto fail;
02224 break;
02225
02226 case OP_SEMI_END_BUF: MOP_IN(OP_SEMI_END_BUF);
02227 if (ON_STR_END(s)) {
02228 #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
02229 if (IS_EMPTY_STR || !ONIGENC_IS_MBC_NEWLINE_EX(encode, sprev, str, end, option, 1)) {
02230 #endif
02231 if (IS_NOTEOL(msa->options)) goto fail;
02232 MOP_OUT;
02233 continue;
02234 #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
02235 }
02236 #endif
02237 }
02238 else if (ONIGENC_IS_MBC_NEWLINE_EX(encode, s, str, end, option, 1)) {
02239 UChar* ss = s + enclen(encode, s, end);
02240 if (ON_STR_END(ss)) {
02241 MOP_OUT;
02242 continue;
02243 }
02244 #ifdef USE_CRNL_AS_LINE_TERMINATOR
02245 else if (IS_NEWLINE_CRLF(option)
02246 && ONIGENC_IS_MBC_CRNL(encode, s, end)) {
02247 ss += enclen(encode, ss, end);
02248 if (ON_STR_END(ss)) {
02249 MOP_OUT;
02250 continue;
02251 }
02252 }
02253 #endif
02254 }
02255 goto fail;
02256 break;
02257
02258 case OP_BEGIN_POSITION: MOP_IN(OP_BEGIN_POSITION);
02259 if (s != msa->gpos)
02260 goto fail;
02261
02262 MOP_OUT;
02263 continue;
02264 break;
02265
02266 case OP_BEGIN_POS_OR_LINE: MOP_IN(OP_BEGIN_POS_OR_LINE);
02267 if (s != msa->gpos)
02268 goto op_begin_line;
02269
02270 MOP_OUT;
02271 continue;
02272 break;
02273
02274 case OP_MEMORY_START_PUSH: MOP_IN(OP_MEMORY_START_PUSH);
02275 GET_MEMNUM_INC(mem, p);
02276 STACK_PUSH_MEM_START(mem, s);
02277 MOP_OUT;
02278 continue;
02279 break;
02280
02281 case OP_MEMORY_START: MOP_IN(OP_MEMORY_START);
02282 GET_MEMNUM_INC(mem, p);
02283 mem_start_stk[mem] = (OnigStackIndex )((void* )s);
02284 MOP_OUT;
02285 continue;
02286 break;
02287
02288 case OP_MEMORY_END_PUSH: MOP_IN(OP_MEMORY_END_PUSH);
02289 GET_MEMNUM_INC(mem, p);
02290 STACK_PUSH_MEM_END(mem, s);
02291 MOP_OUT;
02292 continue;
02293 break;
02294
02295 case OP_MEMORY_END: MOP_IN(OP_MEMORY_END);
02296 GET_MEMNUM_INC(mem, p);
02297 mem_end_stk[mem] = (OnigStackIndex )((void* )s);
02298 MOP_OUT;
02299 continue;
02300 break;
02301
02302 case OP_KEEP: MOP_IN(OP_KEEP);
02303 pkeep = s;
02304 MOP_OUT;
02305 continue;
02306 break;
02307
02308 #ifdef USE_SUBEXP_CALL
02309 case OP_MEMORY_END_PUSH_REC: MOP_IN(OP_MEMORY_END_PUSH_REC);
02310 GET_MEMNUM_INC(mem, p);
02311 STACK_GET_MEM_START(mem, stkp);
02312 STACK_PUSH_MEM_END(mem, s);
02313 mem_start_stk[mem] = GET_STACK_INDEX(stkp);
02314 MOP_OUT;
02315 continue;
02316 break;
02317
02318 case OP_MEMORY_END_REC: MOP_IN(OP_MEMORY_END_REC);
02319 GET_MEMNUM_INC(mem, p);
02320 mem_end_stk[mem] = (OnigStackIndex )((void* )s);
02321 STACK_GET_MEM_START(mem, stkp);
02322
02323 if (BIT_STATUS_AT(reg->bt_mem_start, mem))
02324 mem_start_stk[mem] = GET_STACK_INDEX(stkp);
02325 else
02326 mem_start_stk[mem] = (OnigStackIndex )((void* )stkp->u.mem.pstr);
02327
02328 STACK_PUSH_MEM_END_MARK(mem);
02329 MOP_OUT;
02330 continue;
02331 break;
02332 #endif
02333
02334 case OP_BACKREF1: MOP_IN(OP_BACKREF1);
02335 mem = 1;
02336 goto backref;
02337 break;
02338
02339 case OP_BACKREF2: MOP_IN(OP_BACKREF2);
02340 mem = 2;
02341 goto backref;
02342 break;
02343
02344 case OP_BACKREFN: MOP_IN(OP_BACKREFN);
02345 GET_MEMNUM_INC(mem, p);
02346 backref:
02347 {
02348 int len;
02349 UChar *pstart, *pend;
02350
02351
02352
02353 if (mem > num_mem) goto fail;
02354 if (mem_end_stk[mem] == INVALID_STACK_INDEX) goto fail;
02355 if (mem_start_stk[mem] == INVALID_STACK_INDEX) goto fail;
02356
02357 if (BIT_STATUS_AT(reg->bt_mem_start, mem))
02358 pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr;
02359 else
02360 pstart = (UChar* )((void* )mem_start_stk[mem]);
02361
02362 pend = (BIT_STATUS_AT(reg->bt_mem_end, mem)
02363 ? STACK_AT(mem_end_stk[mem])->u.mem.pstr
02364 : (UChar* )((void* )mem_end_stk[mem]));
02365 n = pend - pstart;
02366 DATA_ENSURE(n);
02367 sprev = s;
02368 STRING_CMP(pstart, s, n);
02369 while (sprev + (len = enclen(encode, sprev, end)) < s)
02370 sprev += len;
02371
02372 MOP_OUT;
02373 continue;
02374 }
02375 break;
02376
02377 case OP_BACKREFN_IC: MOP_IN(OP_BACKREFN_IC);
02378 GET_MEMNUM_INC(mem, p);
02379 {
02380 int len;
02381 UChar *pstart, *pend;
02382
02383
02384
02385 if (mem > num_mem) goto fail;
02386 if (mem_end_stk[mem] == INVALID_STACK_INDEX) goto fail;
02387 if (mem_start_stk[mem] == INVALID_STACK_INDEX) goto fail;
02388
02389 if (BIT_STATUS_AT(reg->bt_mem_start, mem))
02390 pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr;
02391 else
02392 pstart = (UChar* )((void* )mem_start_stk[mem]);
02393
02394 pend = (BIT_STATUS_AT(reg->bt_mem_end, mem)
02395 ? STACK_AT(mem_end_stk[mem])->u.mem.pstr
02396 : (UChar* )((void* )mem_end_stk[mem]));
02397 n = pend - pstart;
02398 DATA_ENSURE(n);
02399 sprev = s;
02400 STRING_CMP_IC(case_fold_flag, pstart, &s, (int)n, end);
02401 while (sprev + (len = enclen(encode, sprev, end)) < s)
02402 sprev += len;
02403
02404 MOP_OUT;
02405 continue;
02406 }
02407 break;
02408
02409 case OP_BACKREF_MULTI: MOP_IN(OP_BACKREF_MULTI);
02410 {
02411 int len, is_fail;
02412 UChar *pstart, *pend, *swork;
02413
02414 GET_LENGTH_INC(tlen, p);
02415 for (i = 0; i < tlen; i++) {
02416 GET_MEMNUM_INC(mem, p);
02417
02418 if (mem_end_stk[mem] == INVALID_STACK_INDEX) continue;
02419 if (mem_start_stk[mem] == INVALID_STACK_INDEX) continue;
02420
02421 if (BIT_STATUS_AT(reg->bt_mem_start, mem))
02422 pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr;
02423 else
02424 pstart = (UChar* )((void* )mem_start_stk[mem]);
02425
02426 pend = (BIT_STATUS_AT(reg->bt_mem_end, mem)
02427 ? STACK_AT(mem_end_stk[mem])->u.mem.pstr
02428 : (UChar* )((void* )mem_end_stk[mem]));
02429 n = pend - pstart;
02430 DATA_ENSURE(n);
02431 sprev = s;
02432 swork = s;
02433 STRING_CMP_VALUE(pstart, swork, n, is_fail);
02434 if (is_fail) continue;
02435 s = swork;
02436 while (sprev + (len = enclen(encode, sprev, end)) < s)
02437 sprev += len;
02438
02439 p += (SIZE_MEMNUM * (tlen - i - 1));
02440 break;
02441 }
02442 if (i == tlen) goto fail;
02443 MOP_OUT;
02444 continue;
02445 }
02446 break;
02447
02448 case OP_BACKREF_MULTI_IC: MOP_IN(OP_BACKREF_MULTI_IC);
02449 {
02450 int len, is_fail;
02451 UChar *pstart, *pend, *swork;
02452
02453 GET_LENGTH_INC(tlen, p);
02454 for (i = 0; i < tlen; i++) {
02455 GET_MEMNUM_INC(mem, p);
02456
02457 if (mem_end_stk[mem] == INVALID_STACK_INDEX) continue;
02458 if (mem_start_stk[mem] == INVALID_STACK_INDEX) continue;
02459
02460 if (BIT_STATUS_AT(reg->bt_mem_start, mem))
02461 pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr;
02462 else
02463 pstart = (UChar* )((void* )mem_start_stk[mem]);
02464
02465 pend = (BIT_STATUS_AT(reg->bt_mem_end, mem)
02466 ? STACK_AT(mem_end_stk[mem])->u.mem.pstr
02467 : (UChar* )((void* )mem_end_stk[mem]));
02468 n = pend - pstart;
02469 DATA_ENSURE(n);
02470 sprev = s;
02471 swork = s;
02472 STRING_CMP_VALUE_IC(case_fold_flag, pstart, &swork, n, end, is_fail);
02473 if (is_fail) continue;
02474 s = swork;
02475 while (sprev + (len = enclen(encode, sprev, end)) < s)
02476 sprev += len;
02477
02478 p += (SIZE_MEMNUM * (tlen - i - 1));
02479 break;
02480 }
02481 if (i == tlen) goto fail;
02482 MOP_OUT;
02483 continue;
02484 }
02485 break;
02486
02487 #ifdef USE_BACKREF_WITH_LEVEL
02488 case OP_BACKREF_WITH_LEVEL:
02489 {
02490 int len;
02491 OnigOptionType ic;
02492 LengthType level;
02493
02494 GET_OPTION_INC(ic, p);
02495 GET_LENGTH_INC(level, p);
02496 GET_LENGTH_INC(tlen, p);
02497
02498 sprev = s;
02499 if (backref_match_at_nested_level(reg, stk, stk_base, ic
02500 , case_fold_flag, (int )level, (int )tlen, p, &s, end)) {
02501 while (sprev + (len = enclen(encode, sprev, end)) < s)
02502 sprev += len;
02503
02504 p += (SIZE_MEMNUM * tlen);
02505 }
02506 else
02507 goto fail;
02508
02509 MOP_OUT;
02510 continue;
02511 }
02512
02513 break;
02514 #endif
02515
02516 #if 0
02517 case OP_SET_OPTION_PUSH: MOP_IN(OP_SET_OPTION_PUSH);
02518 GET_OPTION_INC(option, p);
02519 STACK_PUSH_ALT(p, s, sprev, pkeep);
02520 p += SIZE_OP_SET_OPTION + SIZE_OP_FAIL;
02521 MOP_OUT;
02522 continue;
02523 break;
02524
02525 case OP_SET_OPTION: MOP_IN(OP_SET_OPTION);
02526 GET_OPTION_INC(option, p);
02527 MOP_OUT;
02528 continue;
02529 break;
02530 #endif
02531
02532 case OP_NULL_CHECK_START: MOP_IN(OP_NULL_CHECK_START);
02533 GET_MEMNUM_INC(mem, p);
02534 STACK_PUSH_NULL_CHECK_START(mem, s);
02535 MOP_OUT;
02536 continue;
02537 break;
02538
02539 case OP_NULL_CHECK_END: MOP_IN(OP_NULL_CHECK_END);
02540 {
02541 int isnull;
02542
02543 GET_MEMNUM_INC(mem, p);
02544 STACK_NULL_CHECK(isnull, mem, s);
02545 if (isnull) {
02546 #ifdef ONIG_DEBUG_MATCH
02547 fprintf(stderr, "NULL_CHECK_END: skip id:%d, s:%"PRIdPTR" (%p)\n",
02548 (int )mem, (intptr_t )s, s);
02549 #endif
02550 null_check_found:
02551
02552 switch (*p++) {
02553 case OP_JUMP:
02554 case OP_PUSH:
02555 p += SIZE_RELADDR;
02556 break;
02557 case OP_REPEAT_INC:
02558 case OP_REPEAT_INC_NG:
02559 case OP_REPEAT_INC_SG:
02560 case OP_REPEAT_INC_NG_SG:
02561 p += SIZE_MEMNUM;
02562 break;
02563 default:
02564 goto unexpected_bytecode_error;
02565 break;
02566 }
02567 }
02568 }
02569 MOP_OUT;
02570 continue;
02571 break;
02572
02573 #ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT
02574 case OP_NULL_CHECK_END_MEMST: MOP_IN(OP_NULL_CHECK_END_MEMST);
02575 {
02576 int isnull;
02577
02578 GET_MEMNUM_INC(mem, p);
02579 STACK_NULL_CHECK_MEMST(isnull, mem, s, reg);
02580 if (isnull) {
02581 #ifdef ONIG_DEBUG_MATCH
02582 fprintf(stderr, "NULL_CHECK_END_MEMST: skip id:%d, s:%"PRIdPTR" (%p)\n",
02583 (int )mem, (intptr_t )s, s);
02584 #endif
02585 if (isnull == -1) goto fail;
02586 goto null_check_found;
02587 }
02588 }
02589 MOP_OUT;
02590 continue;
02591 break;
02592 #endif
02593
02594 #ifdef USE_SUBEXP_CALL
02595 case OP_NULL_CHECK_END_MEMST_PUSH:
02596 MOP_IN(OP_NULL_CHECK_END_MEMST_PUSH);
02597 {
02598 int isnull;
02599
02600 GET_MEMNUM_INC(mem, p);
02601 #ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT
02602 STACK_NULL_CHECK_MEMST_REC(isnull, mem, s, reg);
02603 #else
02604 STACK_NULL_CHECK_REC(isnull, mem, s);
02605 #endif
02606 if (isnull) {
02607 #ifdef ONIG_DEBUG_MATCH
02608 fprintf(stderr, "NULL_CHECK_END_MEMST_PUSH: skip id:%d, s:%"PRIdPTR" (%p)\n",
02609 (int )mem, (intptr_t )s, s);
02610 #endif
02611 if (isnull == -1) goto fail;
02612 goto null_check_found;
02613 }
02614 else {
02615 STACK_PUSH_NULL_CHECK_END(mem);
02616 }
02617 }
02618 MOP_OUT;
02619 continue;
02620 break;
02621 #endif
02622
02623 case OP_JUMP: MOP_IN(OP_JUMP);
02624 GET_RELADDR_INC(addr, p);
02625 p += addr;
02626 MOP_OUT;
02627 CHECK_INTERRUPT_IN_MATCH_AT;
02628 continue;
02629 break;
02630
02631 case OP_PUSH: MOP_IN(OP_PUSH);
02632 GET_RELADDR_INC(addr, p);
02633 STACK_PUSH_ALT(p + addr, s, sprev, pkeep);
02634 MOP_OUT;
02635 continue;
02636 break;
02637
02638 #ifdef USE_COMBINATION_EXPLOSION_CHECK
02639 case OP_STATE_CHECK_PUSH: MOP_IN(OP_STATE_CHECK_PUSH);
02640 GET_STATE_CHECK_NUM_INC(mem, p);
02641 STATE_CHECK_VAL(scv, mem);
02642 if (scv) goto fail;
02643
02644 GET_RELADDR_INC(addr, p);
02645 STACK_PUSH_ALT_WITH_STATE_CHECK(p + addr, s, sprev, mem, pkeep);
02646 MOP_OUT;
02647 continue;
02648 break;
02649
02650 case OP_STATE_CHECK_PUSH_OR_JUMP: MOP_IN(OP_STATE_CHECK_PUSH_OR_JUMP);
02651 GET_STATE_CHECK_NUM_INC(mem, p);
02652 GET_RELADDR_INC(addr, p);
02653 STATE_CHECK_VAL(scv, mem);
02654 if (scv) {
02655 p += addr;
02656 }
02657 else {
02658 STACK_PUSH_ALT_WITH_STATE_CHECK(p + addr, s, sprev, mem, pkeep);
02659 }
02660 MOP_OUT;
02661 continue;
02662 break;
02663
02664 case OP_STATE_CHECK: MOP_IN(OP_STATE_CHECK);
02665 GET_STATE_CHECK_NUM_INC(mem, p);
02666 STATE_CHECK_VAL(scv, mem);
02667 if (scv) goto fail;
02668
02669 STACK_PUSH_STATE_CHECK(s, mem);
02670 MOP_OUT;
02671 continue;
02672 break;
02673 #endif
02674
02675 case OP_POP: MOP_IN(OP_POP);
02676 STACK_POP_ONE;
02677 MOP_OUT;
02678 continue;
02679 break;
02680
02681 case OP_PUSH_OR_JUMP_EXACT1: MOP_IN(OP_PUSH_OR_JUMP_EXACT1);
02682 GET_RELADDR_INC(addr, p);
02683 if (*p == *s && DATA_ENSURE_CHECK1) {
02684 p++;
02685 STACK_PUSH_ALT(p + addr, s, sprev, pkeep);
02686 MOP_OUT;
02687 continue;
02688 }
02689 p += (addr + 1);
02690 MOP_OUT;
02691 continue;
02692 break;
02693
02694 case OP_PUSH_IF_PEEK_NEXT: MOP_IN(OP_PUSH_IF_PEEK_NEXT);
02695 GET_RELADDR_INC(addr, p);
02696 if (*p == *s) {
02697 p++;
02698 STACK_PUSH_ALT(p + addr, s, sprev, pkeep);
02699 MOP_OUT;
02700 continue;
02701 }
02702 p++;
02703 MOP_OUT;
02704 continue;
02705 break;
02706
02707 case OP_REPEAT: MOP_IN(OP_REPEAT);
02708 {
02709 GET_MEMNUM_INC(mem, p);
02710 GET_RELADDR_INC(addr, p);
02711
02712 STACK_ENSURE(1);
02713 repeat_stk[mem] = GET_STACK_INDEX(stk);
02714 STACK_PUSH_REPEAT(mem, p);
02715
02716 if (reg->repeat_range[mem].lower == 0) {
02717 STACK_PUSH_ALT(p + addr, s, sprev, pkeep);
02718 }
02719 }
02720 MOP_OUT;
02721 continue;
02722 break;
02723
02724 case OP_REPEAT_NG: MOP_IN(OP_REPEAT_NG);
02725 {
02726 GET_MEMNUM_INC(mem, p);
02727 GET_RELADDR_INC(addr, p);
02728
02729 STACK_ENSURE(1);
02730 repeat_stk[mem] = GET_STACK_INDEX(stk);
02731 STACK_PUSH_REPEAT(mem, p);
02732
02733 if (reg->repeat_range[mem].lower == 0) {
02734 STACK_PUSH_ALT(p, s, sprev, pkeep);
02735 p += addr;
02736 }
02737 }
02738 MOP_OUT;
02739 continue;
02740 break;
02741
02742 case OP_REPEAT_INC: MOP_IN(OP_REPEAT_INC);
02743 GET_MEMNUM_INC(mem, p);
02744 si = repeat_stk[mem];
02745 stkp = STACK_AT(si);
02746
02747 repeat_inc:
02748 stkp->u.repeat.count++;
02749 if (stkp->u.repeat.count >= reg->repeat_range[mem].upper) {
02750
02751 }
02752 else if (stkp->u.repeat.count >= reg->repeat_range[mem].lower) {
02753 STACK_PUSH_ALT(p, s, sprev, pkeep);
02754 p = STACK_AT(si)->u.repeat.pcode;
02755 }
02756 else {
02757 p = stkp->u.repeat.pcode;
02758 }
02759 STACK_PUSH_REPEAT_INC(si);
02760 MOP_OUT;
02761 CHECK_INTERRUPT_IN_MATCH_AT;
02762 continue;
02763 break;
02764
02765 case OP_REPEAT_INC_SG: MOP_IN(OP_REPEAT_INC_SG);
02766 GET_MEMNUM_INC(mem, p);
02767 STACK_GET_REPEAT(mem, stkp);
02768 si = GET_STACK_INDEX(stkp);
02769 goto repeat_inc;
02770 break;
02771
02772 case OP_REPEAT_INC_NG: MOP_IN(OP_REPEAT_INC_NG);
02773 GET_MEMNUM_INC(mem, p);
02774 si = repeat_stk[mem];
02775 stkp = STACK_AT(si);
02776
02777 repeat_inc_ng:
02778 stkp->u.repeat.count++;
02779 if (stkp->u.repeat.count < reg->repeat_range[mem].upper) {
02780 if (stkp->u.repeat.count >= reg->repeat_range[mem].lower) {
02781 UChar* pcode = stkp->u.repeat.pcode;
02782
02783 STACK_PUSH_REPEAT_INC(si);
02784 STACK_PUSH_ALT(pcode, s, sprev, pkeep);
02785 }
02786 else {
02787 p = stkp->u.repeat.pcode;
02788 STACK_PUSH_REPEAT_INC(si);
02789 }
02790 }
02791 else if (stkp->u.repeat.count == reg->repeat_range[mem].upper) {
02792 STACK_PUSH_REPEAT_INC(si);
02793 }
02794 MOP_OUT;
02795 CHECK_INTERRUPT_IN_MATCH_AT;
02796 continue;
02797 break;
02798
02799 case OP_REPEAT_INC_NG_SG: MOP_IN(OP_REPEAT_INC_NG_SG);
02800 GET_MEMNUM_INC(mem, p);
02801 STACK_GET_REPEAT(mem, stkp);
02802 si = GET_STACK_INDEX(stkp);
02803 goto repeat_inc_ng;
02804 break;
02805
02806 case OP_PUSH_POS: MOP_IN(OP_PUSH_POS);
02807 STACK_PUSH_POS(s, sprev, pkeep);
02808 MOP_OUT;
02809 continue;
02810 break;
02811
02812 case OP_POP_POS: MOP_IN(OP_POP_POS);
02813 {
02814 STACK_POS_END(stkp);
02815 s = stkp->u.state.pstr;
02816 sprev = stkp->u.state.pstr_prev;
02817 }
02818 MOP_OUT;
02819 continue;
02820 break;
02821
02822 case OP_PUSH_POS_NOT: MOP_IN(OP_PUSH_POS_NOT);
02823 GET_RELADDR_INC(addr, p);
02824 STACK_PUSH_POS_NOT(p + addr, s, sprev, pkeep);
02825 MOP_OUT;
02826 continue;
02827 break;
02828
02829 case OP_FAIL_POS: MOP_IN(OP_FAIL_POS);
02830 STACK_POP_TIL_POS_NOT;
02831 goto fail;
02832 break;
02833
02834 case OP_PUSH_STOP_BT: MOP_IN(OP_PUSH_STOP_BT);
02835 STACK_PUSH_STOP_BT;
02836 MOP_OUT;
02837 continue;
02838 break;
02839
02840 case OP_POP_STOP_BT: MOP_IN(OP_POP_STOP_BT);
02841 STACK_STOP_BT_END;
02842 MOP_OUT;
02843 continue;
02844 break;
02845
02846 case OP_LOOK_BEHIND: MOP_IN(OP_LOOK_BEHIND);
02847 GET_LENGTH_INC(tlen, p);
02848 s = (UChar* )ONIGENC_STEP_BACK(encode, str, s, end, (int )tlen);
02849 if (IS_NULL(s)) goto fail;
02850 sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s, end);
02851 MOP_OUT;
02852 continue;
02853 break;
02854
02855 case OP_PUSH_LOOK_BEHIND_NOT: MOP_IN(OP_PUSH_LOOK_BEHIND_NOT);
02856 GET_RELADDR_INC(addr, p);
02857 GET_LENGTH_INC(tlen, p);
02858 q = (UChar* )ONIGENC_STEP_BACK(encode, str, s, end, (int )tlen);
02859 if (IS_NULL(q)) {
02860
02861
02862 p += addr;
02863
02864 }
02865 else {
02866 STACK_PUSH_LOOK_BEHIND_NOT(p + addr, s, sprev, pkeep);
02867 s = q;
02868 sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s, end);
02869 }
02870 MOP_OUT;
02871 continue;
02872 break;
02873
02874 case OP_FAIL_LOOK_BEHIND_NOT: MOP_IN(OP_FAIL_LOOK_BEHIND_NOT);
02875 STACK_POP_TIL_LOOK_BEHIND_NOT;
02876 goto fail;
02877 break;
02878
02879 #ifdef USE_SUBEXP_CALL
02880 case OP_CALL: MOP_IN(OP_CALL);
02881 GET_ABSADDR_INC(addr, p);
02882 STACK_PUSH_CALL_FRAME(p);
02883 p = reg->p + addr;
02884 MOP_OUT;
02885 continue;
02886 break;
02887
02888 case OP_RETURN: MOP_IN(OP_RETURN);
02889 STACK_RETURN(p);
02890 STACK_PUSH_RETURN;
02891 MOP_OUT;
02892 continue;
02893 break;
02894 #endif
02895
02896 case OP_CONDITION: MOP_IN(OP_CONDITION);
02897 GET_MEMNUM_INC(mem, p);
02898 GET_RELADDR_INC(addr, p);
02899 if ((mem > num_mem) ||
02900 (mem_end_stk[mem] == INVALID_STACK_INDEX) ||
02901 (mem_start_stk[mem] == INVALID_STACK_INDEX)) {
02902 p += addr;
02903 }
02904 MOP_OUT;
02905 continue;
02906 break;
02907
02908 case OP_FINISH:
02909 goto finish;
02910 break;
02911
02912 fail:
02913 MOP_OUT;
02914
02915 case OP_FAIL: MOP_IN(OP_FAIL);
02916 STACK_POP;
02917 p = stk->u.state.pcode;
02918 s = stk->u.state.pstr;
02919 sprev = stk->u.state.pstr_prev;
02920 pkeep = stk->u.state.pkeep;
02921
02922 #ifdef USE_COMBINATION_EXPLOSION_CHECK
02923 if (stk->u.state.state_check != 0) {
02924 stk->type = STK_STATE_CHECK_MARK;
02925 stk++;
02926 }
02927 #endif
02928
02929 MOP_OUT;
02930 continue;
02931 break;
02932
02933 default:
02934 goto bytecode_error;
02935
02936 }
02937 sprev = sbegin;
02938 }
02939
02940 finish:
02941 STACK_SAVE;
02942 if (xmalloc_base) xfree(xmalloc_base);
02943 return best_len;
02944
02945 #ifdef ONIG_DEBUG
02946 stack_error:
02947 STACK_SAVE;
02948 if (xmalloc_base) xfree(xmalloc_base);
02949 return ONIGERR_STACK_BUG;
02950 #endif
02951
02952 bytecode_error:
02953 STACK_SAVE;
02954 if (xmalloc_base) xfree(xmalloc_base);
02955 return ONIGERR_UNDEFINED_BYTECODE;
02956
02957 unexpected_bytecode_error:
02958 STACK_SAVE;
02959 if (xmalloc_base) xfree(xmalloc_base);
02960 return ONIGERR_UNEXPECTED_BYTECODE;
02961 }
02962
02963
02964 static UChar*
02965 slow_search(OnigEncoding enc, UChar* target, UChar* target_end,
02966 const UChar* text, const UChar* text_end, UChar* text_range)
02967 {
02968 UChar *t, *p, *s, *end;
02969
02970 end = (UChar* )text_end;
02971 end -= target_end - target - 1;
02972 if (end > text_range)
02973 end = text_range;
02974
02975 s = (UChar* )text;
02976
02977 if (enc->max_enc_len == enc->min_enc_len) {
02978 int n = enc->max_enc_len;
02979
02980 while (s < end) {
02981 if (*s == *target) {
02982 p = s + 1;
02983 t = target + 1;
02984 if (target_end == t || memcmp(t, p, target_end - t) == 0)
02985 return s;
02986 }
02987 s += n;
02988 }
02989 return (UChar* )NULL;
02990 }
02991 while (s < end) {
02992 if (*s == *target) {
02993 p = s + 1;
02994 t = target + 1;
02995 if (target_end == t || memcmp(t, p, target_end - t) == 0)
02996 return s;
02997 }
02998 s += enclen(enc, s, text_end);
02999 }
03000
03001 return (UChar* )NULL;
03002 }
03003
03004 static int
03005 str_lower_case_match(OnigEncoding enc, int case_fold_flag,
03006 const UChar* t, const UChar* tend,
03007 const UChar* p, const UChar* end)
03008 {
03009 int lowlen;
03010 UChar *q, lowbuf[ONIGENC_MBC_CASE_FOLD_MAXLEN];
03011
03012 while (t < tend) {
03013 lowlen = ONIGENC_MBC_CASE_FOLD(enc, case_fold_flag, &p, end, lowbuf);
03014 q = lowbuf;
03015 while (lowlen > 0) {
03016 if (*t++ != *q++) return 0;
03017 lowlen--;
03018 }
03019 }
03020
03021 return 1;
03022 }
03023
03024 static UChar*
03025 slow_search_ic(OnigEncoding enc, int case_fold_flag,
03026 UChar* target, UChar* target_end,
03027 const UChar* text, const UChar* text_end, UChar* text_range)
03028 {
03029 UChar *s, *end;
03030
03031 end = (UChar* )text_end;
03032 end -= target_end - target - 1;
03033 if (end > text_range)
03034 end = text_range;
03035
03036 s = (UChar* )text;
03037
03038 while (s < end) {
03039 if (str_lower_case_match(enc, case_fold_flag, target, target_end,
03040 s, text_end))
03041 return s;
03042
03043 s += enclen(enc, s, text_end);
03044 }
03045
03046 return (UChar* )NULL;
03047 }
03048
03049 static UChar*
03050 slow_search_backward(OnigEncoding enc, UChar* target, UChar* target_end,
03051 const UChar* text, const UChar* adjust_text,
03052 const UChar* text_end, const UChar* text_start)
03053 {
03054 UChar *t, *p, *s;
03055
03056 s = (UChar* )text_end;
03057 s -= (target_end - target);
03058 if (s > text_start)
03059 s = (UChar* )text_start;
03060 else
03061 s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, adjust_text, s, text_end);
03062
03063 while (s >= text) {
03064 if (*s == *target) {
03065 p = s + 1;
03066 t = target + 1;
03067 while (t < target_end) {
03068 if (*t != *p++)
03069 break;
03070 t++;
03071 }
03072 if (t == target_end)
03073 return s;
03074 }
03075 s = (UChar* )onigenc_get_prev_char_head(enc, adjust_text, s, text_end);
03076 }
03077
03078 return (UChar* )NULL;
03079 }
03080
03081 static UChar*
03082 slow_search_backward_ic(OnigEncoding enc, int case_fold_flag,
03083 UChar* target, UChar* target_end,
03084 const UChar* text, const UChar* adjust_text,
03085 const UChar* text_end, const UChar* text_start)
03086 {
03087 UChar *s;
03088
03089 s = (UChar* )text_end;
03090 s -= (target_end - target);
03091 if (s > text_start)
03092 s = (UChar* )text_start;
03093 else
03094 s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, adjust_text, s, text_end);
03095
03096 while (s >= text) {
03097 if (str_lower_case_match(enc, case_fold_flag,
03098 target, target_end, s, text_end))
03099 return s;
03100
03101 s = (UChar* )onigenc_get_prev_char_head(enc, adjust_text, s, text_end);
03102 }
03103
03104 return (UChar* )NULL;
03105 }
03106
03107 #ifndef USE_SUNDAY_QUICK_SEARCH
03108
03109 static UChar*
03110 bm_search_notrev(regex_t* reg, const UChar* target, const UChar* target_end,
03111 const UChar* text, const UChar* text_end,
03112 const UChar* text_range)
03113 {
03114 const UChar *s, *se, *t, *p, *end;
03115 const UChar *tail;
03116 ptrdiff_t skip, tlen1;
03117
03118 #ifdef ONIG_DEBUG_SEARCH
03119 fprintf(stderr, "bm_search_notrev: text: %"PRIuPTR" (%p), text_end: %"PRIuPTR" (%p), text_range: %"PRIuPTR" (%p)\n",
03120 text, text, text_end, text_end, text_range, text_range);
03121 #endif
03122
03123 tail = target_end - 1;
03124 tlen1 = tail - target;
03125 end = text_range;
03126 if (end + tlen1 > text_end)
03127 end = text_end - tlen1;
03128
03129 s = text;
03130
03131 if (IS_NULL(reg->int_map)) {
03132 while (s < end) {
03133 p = se = s + tlen1;
03134 t = tail;
03135 while (*p == *t) {
03136 if (t == target) return (UChar* )s;
03137 p--; t--;
03138 }
03139 skip = reg->map[*se];
03140 t = s;
03141 do {
03142 s += enclen(reg->enc, s, end);
03143 } while ((s - t) < skip && s < end);
03144 }
03145 }
03146 else {
03147 while (s < end) {
03148 p = se = s + tlen1;
03149 t = tail;
03150 while (*p == *t) {
03151 if (t == target) return (UChar* )s;
03152 p--; t--;
03153 }
03154 skip = reg->int_map[*se];
03155 t = s;
03156 do {
03157 s += enclen(reg->enc, s, end);
03158 } while ((s - t) < skip && s < end);
03159 }
03160 }
03161
03162 return (UChar* )NULL;
03163 }
03164
03165
03166 static UChar*
03167 bm_search(regex_t* reg, const UChar* target, const UChar* target_end,
03168 const UChar* text, const UChar* text_end, const UChar* text_range)
03169 {
03170 const UChar *s, *t, *p, *end;
03171 const UChar *tail;
03172
03173 #ifdef ONIG_DEBUG_SEARCH
03174 fprintf(stderr, "bm_search: text: %"PRIuPTR", text_end: %"PRIuPTR", text_range: %"PRIuPTR"\n",
03175 text, text_end, text_range);
03176 #endif
03177
03178 end = text_range + (target_end - target) - 1;
03179 if (end > text_end)
03180 end = text_end;
03181
03182 tail = target_end - 1;
03183 s = text + (target_end - target) - 1;
03184 if (IS_NULL(reg->int_map)) {
03185 while (s < end) {
03186 p = s;
03187 t = tail;
03188 #ifdef ONIG_DEBUG_SEARCH
03189 fprintf(stderr, "bm_search_loop: pos: %d %s\n",
03190 (int)(s - text), s);
03191 #endif
03192 while (*p == *t) {
03193 if (t == target) return (UChar* )p;
03194 p--; t--;
03195 }
03196 s += reg->map[*s];
03197 }
03198 }
03199 else {
03200 while (s < end) {
03201 p = s;
03202 t = tail;
03203 while (*p == *t) {
03204 if (t == target) return (UChar* )p;
03205 p--; t--;
03206 }
03207 s += reg->int_map[*s];
03208 }
03209 }
03210 return (UChar* )NULL;
03211 }
03212
03213
03214 static UChar*
03215 bm_search_notrev_ic(regex_t* reg, const UChar* target, const UChar* target_end,
03216 const UChar* text, const UChar* text_end,
03217 const UChar* text_range)
03218 {
03219 const UChar *s, *se, *t, *end;
03220 const UChar *tail;
03221 ptrdiff_t skip, tlen1;
03222 OnigEncoding enc = reg->enc;
03223 int case_fold_flag = reg->case_fold_flag;
03224
03225 #ifdef ONIG_DEBUG_SEARCH
03226 fprintf(stderr, "bm_search_notrev_ic: text: %d (%p), text_end: %d (%p), text_range: %d (%p)\n",
03227 (int )text, text, (int )text_end, text_end, (int )text_range, text_range);
03228 #endif
03229
03230 tail = target_end - 1;
03231 tlen1 = tail - target;
03232 end = text_range;
03233 if (end + tlen1 > text_end)
03234 end = text_end - tlen1;
03235
03236 s = text;
03237
03238 if (IS_NULL(reg->int_map)) {
03239 while (s < end) {
03240 se = s + tlen1;
03241 if (str_lower_case_match(enc, case_fold_flag, target, target_end,
03242 s, se + 1))
03243 return (UChar* )s;
03244 skip = reg->map[*se];
03245 t = s;
03246 do {
03247 s += enclen(reg->enc, s, end);
03248 } while ((s - t) < skip && s < end);
03249 }
03250 }
03251 else {
03252 while (s < end) {
03253 se = s + tlen1;
03254 if (str_lower_case_match(enc, case_fold_flag, target, target_end,
03255 s, se + 1))
03256 return (UChar* )s;
03257 skip = reg->int_map[*se];
03258 t = s;
03259 do {
03260 s += enclen(reg->enc, s, end);
03261 } while ((s - t) < skip && s < end);
03262 }
03263 }
03264
03265 return (UChar* )NULL;
03266 }
03267
03268
03269 static UChar*
03270 bm_search_ic(regex_t* reg, const UChar* target, const UChar* target_end,
03271 const UChar* text, const UChar* text_end, const UChar* text_range)
03272 {
03273 const UChar *s, *p, *end;
03274 const UChar *tail;
03275 OnigEncoding enc = reg->enc;
03276 int case_fold_flag = reg->case_fold_flag;
03277
03278 #ifdef ONIG_DEBUG_SEARCH
03279 fprintf(stderr, "bm_search_ic: text: %d (%p), text_end: %d (%p), text_range: %d (%p)\n",
03280 (int )text, text, (int )text_end, text_end, (int )text_range, text_range);
03281 #endif
03282
03283 end = text_range + (target_end - target) - 1;
03284 if (end > text_end)
03285 end = text_end;
03286
03287 tail = target_end - 1;
03288 s = text + (target_end - target) - 1;
03289 if (IS_NULL(reg->int_map)) {
03290 while (s < end) {
03291 p = s - (target_end - target) + 1;
03292 if (str_lower_case_match(enc, case_fold_flag, target, target_end,
03293 p, s + 1))
03294 return (UChar* )p;
03295 s += reg->map[*s];
03296 }
03297 }
03298 else {
03299 while (s < end) {
03300 p = s - (target_end - target) + 1;
03301 if (str_lower_case_match(enc, case_fold_flag, target, target_end,
03302 p, s + 1))
03303 return (UChar* )p;
03304 s += reg->int_map[*s];
03305 }
03306 }
03307 return (UChar* )NULL;
03308 }
03309
03310 #else
03311
03312
03313 static UChar*
03314 bm_search_notrev(regex_t* reg, const UChar* target, const UChar* target_end,
03315 const UChar* text, const UChar* text_end,
03316 const UChar* text_range)
03317 {
03318 const UChar *s, *se, *t, *p, *end;
03319 const UChar *tail;
03320 ptrdiff_t skip, tlen1;
03321 OnigEncoding enc = reg->enc;
03322
03323 #ifdef ONIG_DEBUG_SEARCH
03324 fprintf(stderr, "bm_search_notrev: text: %d (%p), text_end: %d (%p), text_range: %d (%p)\n",
03325 (int )text, text, (int )text_end, text_end, (int )text_range, text_range);
03326 #endif
03327
03328 tail = target_end - 1;
03329 tlen1 = tail - target;
03330 end = text_range;
03331 if (end + tlen1 > text_end)
03332 end = text_end - tlen1;
03333
03334 s = text;
03335
03336 if (IS_NULL(reg->int_map)) {
03337 while (s < end) {
03338 p = se = s + tlen1;
03339 t = tail;
03340 while (*p == *t) {
03341 if (t == target) return (UChar* )s;
03342 p--; t--;
03343 }
03344 if (s + 1 >= end) break;
03345 skip = reg->map[se[1]];
03346 t = s;
03347 do {
03348 s += enclen(enc, s, end);
03349 } while ((s - t) < skip && s < end);
03350 }
03351 }
03352 else {
03353 while (s < end) {
03354 p = se = s + tlen1;
03355 t = tail;
03356 while (*p == *t) {
03357 if (t == target) return (UChar* )s;
03358 p--; t--;
03359 }
03360 if (s + 1 >= end) break;
03361 skip = reg->int_map[se[1]];
03362 t = s;
03363 do {
03364 s += enclen(enc, s, end);
03365 } while ((s - t) < skip && s < end);
03366 }
03367 }
03368
03369 return (UChar* )NULL;
03370 }
03371
03372
03373 static UChar*
03374 bm_search(regex_t* reg, const UChar* target, const UChar* target_end,
03375 const UChar* text, const UChar* text_end, const UChar* text_range)
03376 {
03377 const UChar *s, *t, *p, *end;
03378 const UChar *tail;
03379 ptrdiff_t tlen1;
03380
03381 tail = target_end - 1;
03382 tlen1 = tail - target;
03383 end = text_range + tlen1;
03384 if (end > text_end)
03385 end = text_end;
03386
03387 s = text + tlen1;
03388 if (IS_NULL(reg->int_map)) {
03389 while (s < end) {
03390 p = s;
03391 t = tail;
03392 while (*p == *t) {
03393 if (t == target) return (UChar* )p;
03394 p--; t--;
03395 }
03396 if (s + 1 >= end) break;
03397 s += reg->map[s[1]];
03398 }
03399 }
03400 else {
03401 while (s < end) {
03402 p = s;
03403 t = tail;
03404 while (*p == *t) {
03405 if (t == target) return (UChar* )p;
03406 p--; t--;
03407 }
03408 if (s + 1 >= end) break;
03409 s += reg->int_map[s[1]];
03410 }
03411 }
03412 return (UChar* )NULL;
03413 }
03414
03415
03416 static UChar*
03417 bm_search_notrev_ic(regex_t* reg, const UChar* target, const UChar* target_end,
03418 const UChar* text, const UChar* text_end,
03419 const UChar* text_range)
03420 {
03421 const UChar *s, *se, *t, *end;
03422 const UChar *tail;
03423 ptrdiff_t skip, tlen1;
03424 OnigEncoding enc = reg->enc;
03425 int case_fold_flag = reg->case_fold_flag;
03426
03427 #ifdef ONIG_DEBUG_SEARCH
03428 fprintf(stderr, "bm_search_notrev_ic: text: %d (%p), text_end: %d (%p), text_range: %d (%p)\n",
03429 (int )text, text, (int )text_end, text_end, (int )text_range, text_range);
03430 #endif
03431
03432 tail = target_end - 1;
03433 tlen1 = tail - target;
03434 end = text_range;
03435 if (end + tlen1 > text_end)
03436 end = text_end - tlen1;
03437
03438 s = text;
03439
03440 if (IS_NULL(reg->int_map)) {
03441 while (s < end) {
03442 se = s + tlen1;
03443 if (str_lower_case_match(enc, case_fold_flag, target, target_end,
03444 s, se + 1))
03445 return (UChar* )s;
03446 if (s + 1 >= end) break;
03447 skip = reg->map[se[1]];
03448 t = s;
03449 do {
03450 s += enclen(enc, s, end);
03451 } while ((s - t) < skip && s < end);
03452 }
03453 }
03454 else {
03455 while (s < end) {
03456 se = s + tlen1;
03457 if (str_lower_case_match(enc, case_fold_flag, target, target_end,
03458 s, se + 1))
03459 return (UChar* )s;
03460 if (s + 1 >= end) break;
03461 skip = reg->int_map[se[1]];
03462 t = s;
03463 do {
03464 s += enclen(enc, s, end);
03465 } while ((s - t) < skip && s < end);
03466 }
03467 }
03468
03469 return (UChar* )NULL;
03470 }
03471
03472
03473 static UChar*
03474 bm_search_ic(regex_t* reg, const UChar* target, const UChar* target_end,
03475 const UChar* text, const UChar* text_end, const UChar* text_range)
03476 {
03477 const UChar *s, *p, *end;
03478 const UChar *tail;
03479 ptrdiff_t tlen1;
03480 OnigEncoding enc = reg->enc;
03481 int case_fold_flag = reg->case_fold_flag;
03482
03483 #ifdef ONIG_DEBUG_SEARCH
03484 fprintf(stderr, "bm_search_ic: text: %d (%p), text_end: %d (%p), text_range: %d (%p)\n",
03485 (int )text, text, (int )text_end, text_end, (int )text_range, text_range);
03486 #endif
03487
03488 tail = target_end - 1;
03489 tlen1 = tail - target;
03490 end = text_range + tlen1;
03491 if (end > text_end)
03492 end = text_end;
03493
03494 s = text + tlen1;
03495 if (IS_NULL(reg->int_map)) {
03496 while (s < end) {
03497 p = s - tlen1;
03498 if (str_lower_case_match(enc, case_fold_flag, target, target_end,
03499 p, s + 1))
03500 return (UChar* )p;
03501 if (s + 1 >= end) break;
03502 s += reg->map[s[1]];
03503 }
03504 }
03505 else {
03506 while (s < end) {
03507 p = s - tlen1;
03508 if (str_lower_case_match(enc, case_fold_flag, target, target_end,
03509 p, s + 1))
03510 return (UChar* )p;
03511 if (s + 1 >= end) break;
03512 s += reg->int_map[s[1]];
03513 }
03514 }
03515 return (UChar* )NULL;
03516 }
03517 #endif
03518
03519 static int
03520 set_bm_backward_skip(UChar* s, UChar* end, OnigEncoding enc ARG_UNUSED,
03521 int** skip)
03522 {
03523 int i, len;
03524
03525 if (IS_NULL(*skip)) {
03526 *skip = (int* )xmalloc(sizeof(int) * ONIG_CHAR_TABLE_SIZE);
03527 if (IS_NULL(*skip)) return ONIGERR_MEMORY;
03528 }
03529
03530 len = (int )(end - s);
03531 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++)
03532 (*skip)[i] = len;
03533
03534 for (i = len - 1; i > 0; i--)
03535 (*skip)[s[i]] = i;
03536
03537 return 0;
03538 }
03539
03540 static UChar*
03541 bm_search_backward(regex_t* reg, const UChar* target, const UChar* target_end,
03542 const UChar* text, const UChar* adjust_text,
03543 const UChar* text_end, const UChar* text_start)
03544 {
03545 const UChar *s, *t, *p;
03546
03547 s = text_end - (target_end - target);
03548 if (text_start < s)
03549 s = text_start;
03550 else
03551 s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, adjust_text, s, text_end);
03552
03553 while (s >= text) {
03554 p = s;
03555 t = target;
03556 while (t < target_end && *p == *t) {
03557 p++; t++;
03558 }
03559 if (t == target_end)
03560 return (UChar* )s;
03561
03562 s -= reg->int_map_backward[*s];
03563 s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, adjust_text, s, text_end);
03564 }
03565
03566 return (UChar* )NULL;
03567 }
03568
03569 static UChar*
03570 map_search(OnigEncoding enc, UChar map[],
03571 const UChar* text, const UChar* text_range, const UChar* text_end)
03572 {
03573 const UChar *s = text;
03574
03575 while (s < text_range) {
03576 if (map[*s]) return (UChar* )s;
03577
03578 s += enclen(enc, s, text_end);
03579 }
03580 return (UChar* )NULL;
03581 }
03582
03583 static UChar*
03584 map_search_backward(OnigEncoding enc, UChar map[],
03585 const UChar* text, const UChar* adjust_text,
03586 const UChar* text_start, const UChar* text_end)
03587 {
03588 const UChar *s = text_start;
03589
03590 while (s >= text) {
03591 if (map[*s]) return (UChar* )s;
03592
03593 s = onigenc_get_prev_char_head(enc, adjust_text, s, text_end);
03594 }
03595 return (UChar* )NULL;
03596 }
03597
03598 extern OnigPosition
03599 onig_match(regex_t* reg, const UChar* str, const UChar* end, const UChar* at, OnigRegion* region,
03600 OnigOptionType option)
03601 {
03602 ptrdiff_t r;
03603 UChar *prev;
03604 OnigMatchArg msa;
03605
03606 #if defined(USE_RECOMPILE_API) && defined(USE_MULTI_THREAD_SYSTEM)
03607 start:
03608 THREAD_ATOMIC_START;
03609 if (ONIG_STATE(reg) >= ONIG_STATE_NORMAL) {
03610 ONIG_STATE_INC(reg);
03611 if (IS_NOT_NULL(reg->chain) && ONIG_STATE(reg) == ONIG_STATE_NORMAL) {
03612 onig_chain_reduce(reg);
03613 ONIG_STATE_INC(reg);
03614 }
03615 }
03616 else {
03617 int n;
03618
03619 THREAD_ATOMIC_END;
03620 n = 0;
03621 while (ONIG_STATE(reg) < ONIG_STATE_NORMAL) {
03622 if (++n > THREAD_PASS_LIMIT_COUNT)
03623 return ONIGERR_OVER_THREAD_PASS_LIMIT_COUNT;
03624 THREAD_PASS;
03625 }
03626 goto start;
03627 }
03628 THREAD_ATOMIC_END;
03629 #endif
03630
03631 MATCH_ARG_INIT(msa, option, region, at, at);
03632 #ifdef USE_COMBINATION_EXPLOSION_CHECK
03633 {
03634 int offset = at - str;
03635 STATE_CHECK_BUFF_INIT(msa, end - str, offset, reg->num_comb_exp_check);
03636 }
03637 #endif
03638
03639 if (region
03640 #ifdef USE_POSIX_API_REGION_OPTION
03641 && !IS_POSIX_REGION(option)
03642 #endif
03643 ) {
03644 r = onig_region_resize_clear(region, reg->num_mem + 1);
03645 }
03646 else
03647 r = 0;
03648
03649 if (r == 0) {
03650 prev = (UChar* )onigenc_get_prev_char_head(reg->enc, str, at, end);
03651 r = match_at(reg, str, end,
03652 #ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
03653 end,
03654 #endif
03655 at, prev, &msa);
03656 }
03657
03658 MATCH_ARG_FREE(msa);
03659 ONIG_STATE_DEC_THREAD(reg);
03660 return r;
03661 }
03662
03663 static int
03664 forward_search_range(regex_t* reg, const UChar* str, const UChar* end, UChar* s,
03665 UChar* range, UChar** low, UChar** high, UChar** low_prev)
03666 {
03667 UChar *p, *pprev = (UChar* )NULL;
03668
03669 #ifdef ONIG_DEBUG_SEARCH
03670 fprintf(stderr, "forward_search_range: str: %"PRIuPTR" (%p), end: %"PRIuPTR" (%p), s: %"PRIuPTR" (%p), range: %"PRIuPTR" (%p)\n",
03671 str, str, end, end, s, s, range, range);
03672 #endif
03673
03674 p = s;
03675 if (reg->dmin > 0) {
03676 if (ONIGENC_IS_SINGLEBYTE(reg->enc)) {
03677 p += reg->dmin;
03678 }
03679 else {
03680 UChar *q = p + reg->dmin;
03681 while (p < q) p += enclen(reg->enc, p, end);
03682 }
03683 }
03684
03685 retry:
03686 switch (reg->optimize) {
03687 case ONIG_OPTIMIZE_EXACT:
03688 p = slow_search(reg->enc, reg->exact, reg->exact_end, p, end, range);
03689 break;
03690 case ONIG_OPTIMIZE_EXACT_IC:
03691 p = slow_search_ic(reg->enc, reg->case_fold_flag,
03692 reg->exact, reg->exact_end, p, end, range);
03693 break;
03694
03695 case ONIG_OPTIMIZE_EXACT_BM:
03696 p = bm_search(reg, reg->exact, reg->exact_end, p, end, range);
03697 break;
03698
03699 case ONIG_OPTIMIZE_EXACT_BM_NOT_REV:
03700 p = bm_search_notrev(reg, reg->exact, reg->exact_end, p, end, range);
03701 break;
03702
03703 case ONIG_OPTIMIZE_EXACT_BM_IC:
03704 p = bm_search_ic(reg, reg->exact, reg->exact_end, p, end, range);
03705 break;
03706
03707 case ONIG_OPTIMIZE_EXACT_BM_NOT_REV_IC:
03708 p = bm_search_notrev_ic(reg, reg->exact, reg->exact_end, p, end, range);
03709 break;
03710
03711 case ONIG_OPTIMIZE_MAP:
03712 p = map_search(reg->enc, reg->map, p, range, end);
03713 break;
03714 }
03715
03716 if (p && p < range) {
03717 if (p - reg->dmin < s) {
03718 retry_gate:
03719 pprev = p;
03720 p += enclen(reg->enc, p, end);
03721 goto retry;
03722 }
03723
03724 if (reg->sub_anchor) {
03725 UChar* prev;
03726
03727 switch (reg->sub_anchor) {
03728 case ANCHOR_BEGIN_LINE:
03729 if (!ON_STR_BEGIN(p)) {
03730 prev = onigenc_get_prev_char_head(reg->enc,
03731 (pprev ? pprev : str), p, end);
03732 if (!ONIGENC_IS_MBC_NEWLINE_EX(reg->enc, prev, str, end, reg->options, 0))
03733 goto retry_gate;
03734 }
03735 break;
03736
03737 case ANCHOR_END_LINE:
03738 if (ON_STR_END(p)) {
03739 #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
03740 prev = (UChar* )onigenc_get_prev_char_head(reg->enc,
03741 (pprev ? pprev : str), p);
03742 if (prev && ONIGENC_IS_MBC_NEWLINE_EX(reg->enc, prev, str, end, reg->options, 1))
03743 goto retry_gate;
03744 #endif
03745 }
03746 else if (! ONIGENC_IS_MBC_NEWLINE_EX(reg->enc, p, str, end, reg->options, 1))
03747 goto retry_gate;
03748 break;
03749 }
03750 }
03751
03752 if (reg->dmax == 0) {
03753 *low = p;
03754 if (low_prev) {
03755 if (*low > s)
03756 *low_prev = onigenc_get_prev_char_head(reg->enc, s, p, end);
03757 else
03758 *low_prev = onigenc_get_prev_char_head(reg->enc,
03759 (pprev ? pprev : str), p, end);
03760 }
03761 }
03762 else {
03763 if (reg->dmax != ONIG_INFINITE_DISTANCE) {
03764 *low = p - reg->dmax;
03765 if (*low > s) {
03766 *low = onigenc_get_right_adjust_char_head_with_prev(reg->enc, s,
03767 *low, end, (const UChar** )low_prev);
03768 if (low_prev && IS_NULL(*low_prev))
03769 *low_prev = onigenc_get_prev_char_head(reg->enc,
03770 (pprev ? pprev : s), *low, end);
03771 }
03772 else {
03773 if (low_prev)
03774 *low_prev = onigenc_get_prev_char_head(reg->enc,
03775 (pprev ? pprev : str), *low, end);
03776 }
03777 }
03778 }
03779
03780 *high = p - reg->dmin;
03781
03782 #ifdef ONIG_DEBUG_SEARCH
03783 fprintf(stderr,
03784 "forward_search_range success: low: %d, high: %d, dmin: %d, dmax: %d\n",
03785 (int )(*low - str), (int )(*high - str), reg->dmin, reg->dmax);
03786 #endif
03787 return 1;
03788 }
03789
03790 return 0;
03791 }
03792
03793 static int set_bm_backward_skip P_((UChar* s, UChar* end, OnigEncoding enc,
03794 int** skip));
03795
03796 #define BM_BACKWARD_SEARCH_LENGTH_THRESHOLD 100
03797
03798 static int
03799 backward_search_range(regex_t* reg, const UChar* str, const UChar* end,
03800 UChar* s, const UChar* range, UChar* adjrange,
03801 UChar** low, UChar** high)
03802 {
03803 int r;
03804 UChar *p;
03805
03806 range += reg->dmin;
03807 p = s;
03808
03809 retry:
03810 switch (reg->optimize) {
03811 case ONIG_OPTIMIZE_EXACT:
03812 exact_method:
03813 p = slow_search_backward(reg->enc, reg->exact, reg->exact_end,
03814 range, adjrange, end, p);
03815 break;
03816
03817 case ONIG_OPTIMIZE_EXACT_IC:
03818 case ONIG_OPTIMIZE_EXACT_BM_IC:
03819 case ONIG_OPTIMIZE_EXACT_BM_NOT_REV_IC:
03820 p = slow_search_backward_ic(reg->enc, reg->case_fold_flag,
03821 reg->exact, reg->exact_end,
03822 range, adjrange, end, p);
03823 break;
03824
03825 case ONIG_OPTIMIZE_EXACT_BM:
03826 case ONIG_OPTIMIZE_EXACT_BM_NOT_REV:
03827 if (IS_NULL(reg->int_map_backward)) {
03828 if (s - range < BM_BACKWARD_SEARCH_LENGTH_THRESHOLD)
03829 goto exact_method;
03830
03831 r = set_bm_backward_skip(reg->exact, reg->exact_end, reg->enc,
03832 &(reg->int_map_backward));
03833 if (r) return r;
03834 }
03835 p = bm_search_backward(reg, reg->exact, reg->exact_end, range, adjrange,
03836 end, p);
03837 break;
03838
03839 case ONIG_OPTIMIZE_MAP:
03840 p = map_search_backward(reg->enc, reg->map, range, adjrange, p, end);
03841 break;
03842 }
03843
03844 if (p) {
03845 if (reg->sub_anchor) {
03846 UChar* prev;
03847
03848 switch (reg->sub_anchor) {
03849 case ANCHOR_BEGIN_LINE:
03850 if (!ON_STR_BEGIN(p)) {
03851 prev = onigenc_get_prev_char_head(reg->enc, str, p, end);
03852 if (!ONIGENC_IS_MBC_NEWLINE_EX(reg->enc, prev, str, end, reg->options, 0)) {
03853 p = prev;
03854 goto retry;
03855 }
03856 }
03857 break;
03858
03859 case ANCHOR_END_LINE:
03860 if (ON_STR_END(p)) {
03861 #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
03862 prev = onigenc_get_prev_char_head(reg->enc, adjrange, p);
03863 if (IS_NULL(prev)) goto fail;
03864 if (ONIGENC_IS_MBC_NEWLINE_EX(reg->enc, prev, str, end, reg->options, 1)) {
03865 p = prev;
03866 goto retry;
03867 }
03868 #endif
03869 }
03870 else if (! ONIGENC_IS_MBC_NEWLINE_EX(reg->enc, p, str, end, reg->options, 1)) {
03871 p = onigenc_get_prev_char_head(reg->enc, adjrange, p, end);
03872 if (IS_NULL(p)) goto fail;
03873 goto retry;
03874 }
03875 break;
03876 }
03877 }
03878
03879
03880 if (reg->dmax != ONIG_INFINITE_DISTANCE) {
03881 *low = p - reg->dmax;
03882 *high = p - reg->dmin;
03883 *high = onigenc_get_right_adjust_char_head(reg->enc, adjrange, *high, end);
03884 }
03885
03886 #ifdef ONIG_DEBUG_SEARCH
03887 fprintf(stderr, "backward_search_range: low: %d, high: %d\n",
03888 (int )(*low - str), (int )(*high - str));
03889 #endif
03890 return 1;
03891 }
03892
03893 fail:
03894 #ifdef ONIG_DEBUG_SEARCH
03895 fprintf(stderr, "backward_search_range: fail.\n");
03896 #endif
03897 return 0;
03898 }
03899
03900
03901 extern OnigPosition
03902 onig_search(regex_t* reg, const UChar* str, const UChar* end,
03903 const UChar* start, const UChar* range, OnigRegion* region, OnigOptionType option)
03904 {
03905 return onig_search_gpos(reg, str, end, start, start, range, region, option);
03906 }
03907
03908 extern OnigPosition
03909 onig_search_gpos(regex_t* reg, const UChar* str, const UChar* end,
03910 const UChar* global_pos,
03911 const UChar* start, const UChar* range, OnigRegion* region, OnigOptionType option)
03912 {
03913 ptrdiff_t r;
03914 UChar *s, *prev;
03915 OnigMatchArg msa;
03916 #ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
03917 const UChar *orig_start = start;
03918 const UChar *orig_range = range;
03919 #endif
03920
03921 #if defined(USE_RECOMPILE_API) && defined(USE_MULTI_THREAD_SYSTEM)
03922 start:
03923 THREAD_ATOMIC_START;
03924 if (ONIG_STATE(reg) >= ONIG_STATE_NORMAL) {
03925 ONIG_STATE_INC(reg);
03926 if (IS_NOT_NULL(reg->chain) && ONIG_STATE(reg) == ONIG_STATE_NORMAL) {
03927 onig_chain_reduce(reg);
03928 ONIG_STATE_INC(reg);
03929 }
03930 }
03931 else {
03932 int n;
03933
03934 THREAD_ATOMIC_END;
03935 n = 0;
03936 while (ONIG_STATE(reg) < ONIG_STATE_NORMAL) {
03937 if (++n > THREAD_PASS_LIMIT_COUNT)
03938 return ONIGERR_OVER_THREAD_PASS_LIMIT_COUNT;
03939 THREAD_PASS;
03940 }
03941 goto start;
03942 }
03943 THREAD_ATOMIC_END;
03944 #endif
03945
03946 #ifdef ONIG_DEBUG_SEARCH
03947 fprintf(stderr,
03948 "onig_search (entry point): str: %"PRIuPTR" (%p), end: %"PRIuPTR", start: %"PRIuPTR", range: %"PRIuPTR"\n",
03949 str, str, end - str, start - str, range - str);
03950 #endif
03951
03952 if (region
03953 #ifdef USE_POSIX_API_REGION_OPTION
03954 && !IS_POSIX_REGION(option)
03955 #endif
03956 ) {
03957 r = onig_region_resize_clear(region, reg->num_mem + 1);
03958 if (r) goto finish_no_msa;
03959 }
03960
03961 if (start > end || start < str) goto mismatch_no_msa;
03962
03963
03964 #ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
03965 #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
03966 #define MATCH_AND_RETURN_CHECK(upper_range) \
03967 r = match_at(reg, str, end, (upper_range), s, prev, &msa); \
03968 if (r != ONIG_MISMATCH) {\
03969 if (r >= 0) {\
03970 if (! IS_FIND_LONGEST(reg->options)) {\
03971 goto match;\
03972 }\
03973 }\
03974 else goto finish; \
03975 }
03976 #else
03977 #define MATCH_AND_RETURN_CHECK(upper_range) \
03978 r = match_at(reg, str, end, (upper_range), s, prev, &msa); \
03979 if (r != ONIG_MISMATCH) {\
03980 if (r >= 0) {\
03981 goto match;\
03982 }\
03983 else goto finish; \
03984 }
03985 #endif
03986 #else
03987 #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
03988 #define MATCH_AND_RETURN_CHECK(none) \
03989 r = match_at(reg, str, end, s, prev, &msa);\
03990 if (r != ONIG_MISMATCH) {\
03991 if (r >= 0) {\
03992 if (! IS_FIND_LONGEST(reg->options)) {\
03993 goto match;\
03994 }\
03995 }\
03996 else goto finish; \
03997 }
03998 #else
03999 #define MATCH_AND_RETURN_CHECK(none) \
04000 r = match_at(reg, str, end, s, prev, &msa);\
04001 if (r != ONIG_MISMATCH) {\
04002 if (r >= 0) {\
04003 goto match;\
04004 }\
04005 else goto finish; \
04006 }
04007 #endif
04008 #endif
04009
04010
04011
04012 if (reg->anchor != 0 && str < end) {
04013 UChar *min_semi_end, *max_semi_end;
04014
04015 if (reg->anchor & ANCHOR_BEGIN_POSITION) {
04016
04017 begin_position:
04018 if (range > start)
04019 range = start + 1;
04020 else
04021 range = start;
04022 }
04023 else if (reg->anchor & ANCHOR_BEGIN_BUF) {
04024
04025 if (range > start) {
04026 if (start != str) goto mismatch_no_msa;
04027 range = str + 1;
04028 }
04029 else {
04030 if (range <= str) {
04031 start = str;
04032 range = str;
04033 }
04034 else
04035 goto mismatch_no_msa;
04036 }
04037 }
04038 else if (reg->anchor & ANCHOR_END_BUF) {
04039 min_semi_end = max_semi_end = (UChar* )end;
04040
04041 end_buf:
04042 if ((OnigDistance )(max_semi_end - str) < reg->anchor_dmin)
04043 goto mismatch_no_msa;
04044
04045 if (range > start) {
04046 if ((OnigDistance )(min_semi_end - start) > reg->anchor_dmax) {
04047 start = min_semi_end - reg->anchor_dmax;
04048 if (start < end)
04049 start = onigenc_get_right_adjust_char_head(reg->enc, str, start, end);
04050 }
04051 if ((OnigDistance )(max_semi_end - (range - 1)) < reg->anchor_dmin) {
04052 range = max_semi_end - reg->anchor_dmin + 1;
04053 }
04054
04055 if (start > range) goto mismatch_no_msa;
04056
04057
04058 }
04059 else {
04060 if ((OnigDistance )(min_semi_end - range) > reg->anchor_dmax) {
04061 range = min_semi_end - reg->anchor_dmax;
04062 }
04063 if ((OnigDistance )(max_semi_end - start) < reg->anchor_dmin) {
04064 start = max_semi_end - reg->anchor_dmin;
04065 start = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, str, start, end);
04066 }
04067 if (range > start) goto mismatch_no_msa;
04068 }
04069 }
04070 else if (reg->anchor & ANCHOR_SEMI_END_BUF) {
04071 UChar* pre_end = ONIGENC_STEP_BACK(reg->enc, str, end, end, 1);
04072
04073 max_semi_end = (UChar* )end;
04074 if (ONIGENC_IS_MBC_NEWLINE(reg->enc, pre_end, end)) {
04075 min_semi_end = pre_end;
04076
04077 #ifdef USE_CRNL_AS_LINE_TERMINATOR
04078 pre_end = ONIGENC_STEP_BACK(reg->enc, str, pre_end, end, 1);
04079 if (IS_NOT_NULL(pre_end) &&
04080 IS_NEWLINE_CRLF(reg->options) &&
04081 ONIGENC_IS_MBC_CRNL(reg->enc, pre_end, end)) {
04082 min_semi_end = pre_end;
04083 }
04084 #endif
04085 if (min_semi_end > str && start <= min_semi_end) {
04086 goto end_buf;
04087 }
04088 }
04089 else {
04090 min_semi_end = (UChar* )end;
04091 goto end_buf;
04092 }
04093 }
04094 else if ((reg->anchor & ANCHOR_ANYCHAR_STAR_ML)) {
04095 if (! (reg->anchor & ANCHOR_LOOK_BEHIND)) {
04096 goto begin_position;
04097 }
04098 }
04099 }
04100 else if (str == end) {
04101 static const UChar address_for_empty_string[] = "";
04102
04103 #ifdef ONIG_DEBUG_SEARCH
04104 fprintf(stderr, "onig_search: empty string.\n");
04105 #endif
04106
04107 if (reg->threshold_len == 0) {
04108 start = end = str = address_for_empty_string;
04109 s = (UChar* )start;
04110 prev = (UChar* )NULL;
04111
04112 MATCH_ARG_INIT(msa, option, region, start, start);
04113 #ifdef USE_COMBINATION_EXPLOSION_CHECK
04114 msa.state_check_buff = (void* )0;
04115 msa.state_check_buff_size = 0;
04116 #endif
04117 MATCH_AND_RETURN_CHECK(end);
04118 goto mismatch;
04119 }
04120 goto mismatch_no_msa;
04121 }
04122
04123 #ifdef ONIG_DEBUG_SEARCH
04124 fprintf(stderr, "onig_search(apply anchor): end: %d, start: %d, range: %d\n",
04125 (int )(end - str), (int )(start - str), (int )(range - str));
04126 #endif
04127
04128 MATCH_ARG_INIT(msa, option, region, start, global_pos);
04129 #ifdef USE_COMBINATION_EXPLOSION_CHECK
04130 {
04131 int offset = (MIN(start, range) - str);
04132 STATE_CHECK_BUFF_INIT(msa, end - str, offset, reg->num_comb_exp_check);
04133 }
04134 #endif
04135
04136 s = (UChar* )start;
04137 if (range > start) {
04138 if (s > str)
04139 prev = onigenc_get_prev_char_head(reg->enc, str, s, end);
04140 else
04141 prev = (UChar* )NULL;
04142
04143 if (reg->optimize != ONIG_OPTIMIZE_NONE) {
04144 UChar *sch_range, *low, *high, *low_prev;
04145
04146 sch_range = (UChar* )range;
04147 if (reg->dmax != 0) {
04148 if (reg->dmax == ONIG_INFINITE_DISTANCE)
04149 sch_range = (UChar* )end;
04150 else {
04151 sch_range += reg->dmax;
04152 if (sch_range > end) sch_range = (UChar* )end;
04153 }
04154 }
04155
04156 if ((end - start) < reg->threshold_len)
04157 goto mismatch;
04158
04159 if (reg->dmax != ONIG_INFINITE_DISTANCE) {
04160 do {
04161 if (! forward_search_range(reg, str, end, s, sch_range,
04162 &low, &high, &low_prev)) goto mismatch;
04163 if (s < low) {
04164 s = low;
04165 prev = low_prev;
04166 }
04167 while (s <= high) {
04168 MATCH_AND_RETURN_CHECK(orig_range);
04169 prev = s;
04170 s += enclen(reg->enc, s, end);
04171 }
04172 } while (s < range);
04173 goto mismatch;
04174 }
04175 else {
04176 if (! forward_search_range(reg, str, end, s, sch_range,
04177 &low, &high, (UChar** )NULL)) goto mismatch;
04178
04179 if ((reg->anchor & ANCHOR_ANYCHAR_STAR) != 0) {
04180 do {
04181 if ((reg->anchor & ANCHOR_BEGIN_POSITION) == 0)
04182 msa.gpos = s;
04183 MATCH_AND_RETURN_CHECK(orig_range);
04184 prev = s;
04185 s += enclen(reg->enc, s, end);
04186
04187 if ((reg->anchor & (ANCHOR_LOOK_BEHIND | ANCHOR_PREC_READ_NOT)) == 0) {
04188 while (!ONIGENC_IS_MBC_NEWLINE_EX(reg->enc, prev, str, end, reg->options, 0)
04189 && s < range) {
04190 prev = s;
04191 s += enclen(reg->enc, s, end);
04192 }
04193 }
04194 } while (s < range);
04195 goto mismatch;
04196 }
04197 }
04198 }
04199
04200 do {
04201 MATCH_AND_RETURN_CHECK(orig_range);
04202 prev = s;
04203 s += enclen(reg->enc, s, end);
04204 } while (s < range);
04205
04206 if (s == range) {
04207 MATCH_AND_RETURN_CHECK(orig_range);
04208 }
04209 }
04210 else {
04211 #ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
04212 if (orig_start < end)
04213 orig_start += enclen(reg->enc, orig_start, end);
04214 #endif
04215
04216 if (reg->optimize != ONIG_OPTIMIZE_NONE) {
04217 UChar *low, *high, *adjrange, *sch_start;
04218
04219 if (range < end)
04220 adjrange = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, str, range, end);
04221 else
04222 adjrange = (UChar* )end;
04223
04224 if (reg->dmax != ONIG_INFINITE_DISTANCE &&
04225 (end - range) >= reg->threshold_len) {
04226 do {
04227 sch_start = s + reg->dmax;
04228 if (sch_start > end) sch_start = (UChar* )end;
04229 if (backward_search_range(reg, str, end, sch_start, range, adjrange,
04230 &low, &high) <= 0)
04231 goto mismatch;
04232
04233 if (s > high)
04234 s = high;
04235
04236 while (s >= low) {
04237 prev = onigenc_get_prev_char_head(reg->enc, str, s, end);
04238 MATCH_AND_RETURN_CHECK(orig_start);
04239 s = prev;
04240 }
04241 } while (s >= range);
04242 goto mismatch;
04243 }
04244 else {
04245 if ((end - range) < reg->threshold_len) goto mismatch;
04246
04247 sch_start = s;
04248 if (reg->dmax != 0) {
04249 if (reg->dmax == ONIG_INFINITE_DISTANCE)
04250 sch_start = (UChar* )end;
04251 else {
04252 sch_start += reg->dmax;
04253 if (sch_start > end) sch_start = (UChar* )end;
04254 else
04255 sch_start = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc,
04256 start, sch_start, end);
04257 }
04258 }
04259 if (backward_search_range(reg, str, end, sch_start, range, adjrange,
04260 &low, &high) <= 0) goto mismatch;
04261 }
04262 }
04263
04264 do {
04265 prev = onigenc_get_prev_char_head(reg->enc, str, s, end);
04266 MATCH_AND_RETURN_CHECK(orig_start);
04267 s = prev;
04268 } while (s >= range);
04269 }
04270
04271 mismatch:
04272 #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
04273 if (IS_FIND_LONGEST(reg->options)) {
04274 if (msa.best_len >= 0) {
04275 s = msa.best_s;
04276 goto match;
04277 }
04278 }
04279 #endif
04280 r = ONIG_MISMATCH;
04281
04282 finish:
04283 MATCH_ARG_FREE(msa);
04284 ONIG_STATE_DEC_THREAD(reg);
04285
04286
04287
04288 if (IS_FIND_NOT_EMPTY(reg->options) && region
04289 #ifdef USE_POSIX_API_REGION_OPTION
04290 && !IS_POSIX_REGION(option)
04291 #endif
04292 ) {
04293 onig_region_clear(region);
04294 }
04295
04296 #ifdef ONIG_DEBUG
04297 if (r != ONIG_MISMATCH)
04298 fprintf(stderr, "onig_search: error %d\n", r);
04299 #endif
04300 return r;
04301
04302 mismatch_no_msa:
04303 r = ONIG_MISMATCH;
04304 finish_no_msa:
04305 ONIG_STATE_DEC_THREAD(reg);
04306 #ifdef ONIG_DEBUG
04307 if (r != ONIG_MISMATCH)
04308 fprintf(stderr, "onig_search: error %d\n", r);
04309 #endif
04310 return r;
04311
04312 match:
04313 ONIG_STATE_DEC_THREAD(reg);
04314 MATCH_ARG_FREE(msa);
04315 return s - str;
04316 }
04317
04318 extern OnigEncoding
04319 onig_get_encoding(regex_t* reg)
04320 {
04321 return reg->enc;
04322 }
04323
04324 extern OnigOptionType
04325 onig_get_options(regex_t* reg)
04326 {
04327 return reg->options;
04328 }
04329
04330 extern OnigCaseFoldType
04331 onig_get_case_fold_flag(regex_t* reg)
04332 {
04333 return reg->case_fold_flag;
04334 }
04335
04336 extern const OnigSyntaxType*
04337 onig_get_syntax(regex_t* reg)
04338 {
04339 return reg->syntax;
04340 }
04341
04342 extern int
04343 onig_number_of_captures(regex_t* reg)
04344 {
04345 return reg->num_mem;
04346 }
04347
04348 extern int
04349 onig_number_of_capture_histories(regex_t* reg)
04350 {
04351 #ifdef USE_CAPTURE_HISTORY
04352 int i, n;
04353
04354 n = 0;
04355 for (i = 0; i <= ONIG_MAX_CAPTURE_HISTORY_GROUP; i++) {
04356 if (BIT_STATUS_AT(reg->capture_history, i) != 0)
04357 n++;
04358 }
04359 return n;
04360 #else
04361 return 0;
04362 #endif
04363 }
04364
04365 extern void
04366 onig_copy_encoding(OnigEncoding to, OnigEncoding from)
04367 {
04368 *to = *from;
04369 }
04370
04371