00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031 #include "regint.h"
00032
00033
00034
00035 #ifdef USE_CRNL_AS_LINE_TERMINATOR
00036 #define ONIGENC_IS_MBC_CRNL(enc,p,end) \
00037 (ONIGENC_MBC_TO_CODE(enc,p,end) == 13 && \
00038 ONIGENC_MBC_TO_CODE(enc,(p+enclen(enc,p,end)),end) == 10)
00039 #define ONIGENC_IS_MBC_NEWLINE_EX(enc,p,start,end,option,check_prev) \
00040 is_mbc_newline_ex((enc),(p),(start),(end),(option),(check_prev))
00041 static int
00042 is_mbc_newline_ex(OnigEncoding enc, const UChar *p, const UChar *start,
00043 const UChar *end, OnigOptionType option, int check_prev)
00044 {
00045 if (IS_NEWLINE_CRLF(option)) {
00046 if (ONIGENC_MBC_TO_CODE(enc, p, end) == 0x0a) {
00047 if (check_prev) {
00048 const UChar *prev = onigenc_get_prev_char_head(enc, start, p, end);
00049 if ((prev != NULL) && ONIGENC_MBC_TO_CODE(enc, prev, end) == 0x0d)
00050 return 0;
00051 else
00052 return 1;
00053 }
00054 else
00055 return 1;
00056 }
00057 else {
00058 const UChar *pnext = p + enclen(enc, p, end);
00059 if (pnext < end &&
00060 ONIGENC_MBC_TO_CODE(enc, p, end) == 0x0d &&
00061 ONIGENC_MBC_TO_CODE(enc, pnext, end) == 0x0a)
00062 return 1;
00063 if (ONIGENC_IS_MBC_NEWLINE(enc, p, end))
00064 return 1;
00065 return 0;
00066 }
00067 }
00068 else {
00069 return ONIGENC_IS_MBC_NEWLINE(enc, p, end);
00070 }
00071 }
00072 #else
00073 #define ONIGENC_IS_MBC_NEWLINE_EX(enc,p,start,end,option,check_prev) \
00074 ONIGENC_IS_MBC_NEWLINE((enc), (p), (end))
00075 #endif
00076
00077 #ifdef USE_CAPTURE_HISTORY
00078 static void history_tree_free(OnigCaptureTreeNode* node);
00079
00080 static void
00081 history_tree_clear(OnigCaptureTreeNode* node)
00082 {
00083 int i;
00084
00085 if (IS_NOT_NULL(node)) {
00086 for (i = 0; i < node->num_childs; i++) {
00087 if (IS_NOT_NULL(node->childs[i])) {
00088 history_tree_free(node->childs[i]);
00089 }
00090 }
00091 for (i = 0; i < node->allocated; i++) {
00092 node->childs[i] = (OnigCaptureTreeNode* )0;
00093 }
00094 node->num_childs = 0;
00095 node->beg = ONIG_REGION_NOTPOS;
00096 node->end = ONIG_REGION_NOTPOS;
00097 node->group = -1;
00098 xfree(node->childs);
00099 node->childs = (OnigCaptureTreeNode** )0;
00100 }
00101 }
00102
00103 static void
00104 history_tree_free(OnigCaptureTreeNode* node)
00105 {
00106 history_tree_clear(node);
00107 xfree(node);
00108 }
00109
00110 static void
00111 history_root_free(OnigRegion* r)
00112 {
00113 if (IS_NOT_NULL(r->history_root)) {
00114 history_tree_free(r->history_root);
00115 r->history_root = (OnigCaptureTreeNode* )0;
00116 }
00117 }
00118
00119 static OnigCaptureTreeNode*
00120 history_node_new(void)
00121 {
00122 OnigCaptureTreeNode* node;
00123
00124 node = (OnigCaptureTreeNode* )xmalloc(sizeof(OnigCaptureTreeNode));
00125 CHECK_NULL_RETURN(node);
00126 node->childs = (OnigCaptureTreeNode** )0;
00127 node->allocated = 0;
00128 node->num_childs = 0;
00129 node->group = -1;
00130 node->beg = ONIG_REGION_NOTPOS;
00131 node->end = ONIG_REGION_NOTPOS;
00132
00133 return node;
00134 }
00135
00136 static int
00137 history_tree_add_child(OnigCaptureTreeNode* parent, OnigCaptureTreeNode* child)
00138 {
00139 #define HISTORY_TREE_INIT_ALLOC_SIZE 8
00140
00141 if (parent->num_childs >= parent->allocated) {
00142 int n, i;
00143
00144 if (IS_NULL(parent->childs)) {
00145 n = HISTORY_TREE_INIT_ALLOC_SIZE;
00146 parent->childs =
00147 (OnigCaptureTreeNode** )xmalloc(sizeof(OnigCaptureTreeNode*) * n);
00148 CHECK_NULL_RETURN_MEMERR(parent->childs);
00149 }
00150 else {
00151 OnigCaptureTreeNode** tmp;
00152 n = parent->allocated * 2;
00153 tmp =
00154 (OnigCaptureTreeNode** )xrealloc(parent->childs,
00155 sizeof(OnigCaptureTreeNode*) * n);
00156 if (tmp == 0) {
00157 history_tree_clear(parent);
00158 return ONIGERR_MEMORY;
00159 }
00160 parent->childs = tmp;
00161 }
00162 for (i = parent->allocated; i < n; i++) {
00163 parent->childs[i] = (OnigCaptureTreeNode* )0;
00164 }
00165 parent->allocated = n;
00166 }
00167
00168 parent->childs[parent->num_childs] = child;
00169 parent->num_childs++;
00170 return 0;
00171 }
00172
00173 static OnigCaptureTreeNode*
00174 history_tree_clone(OnigCaptureTreeNode* node)
00175 {
00176 int i, r;
00177 OnigCaptureTreeNode *clone, *child;
00178
00179 clone = history_node_new();
00180 CHECK_NULL_RETURN(clone);
00181
00182 clone->beg = node->beg;
00183 clone->end = node->end;
00184 for (i = 0; i < node->num_childs; i++) {
00185 child = history_tree_clone(node->childs[i]);
00186 if (IS_NULL(child)) {
00187 history_tree_free(clone);
00188 return (OnigCaptureTreeNode* )0;
00189 }
00190 r = history_tree_add_child(clone, child);
00191 if (r != 0) {
00192 history_tree_free(child);
00193 history_tree_free(clone);
00194 return (OnigCaptureTreeNode* )0;
00195 }
00196 }
00197
00198 return clone;
00199 }
00200
00201 extern OnigCaptureTreeNode*
00202 onig_get_capture_tree(OnigRegion* region)
00203 {
00204 return region->history_root;
00205 }
00206 #endif
00207
00208 extern void
00209 onig_region_clear(OnigRegion* region)
00210 {
00211 int i;
00212
00213 for (i = 0; i < region->num_regs; i++) {
00214 region->beg[i] = region->end[i] = ONIG_REGION_NOTPOS;
00215 }
00216 #ifdef USE_CAPTURE_HISTORY
00217 history_root_free(region);
00218 #endif
00219 }
00220
00221 extern int
00222 onig_region_resize(OnigRegion* region, int n)
00223 {
00224 region->num_regs = n;
00225
00226 if (n < ONIG_NREGION)
00227 n = ONIG_NREGION;
00228
00229 if (region->allocated == 0) {
00230 region->beg = (OnigPosition* )xmalloc(n * sizeof(OnigPosition));
00231 if (region->beg == 0)
00232 return ONIGERR_MEMORY;
00233
00234 region->end = (OnigPosition* )xmalloc(n * sizeof(OnigPosition));
00235 if (region->end == 0) {
00236 xfree(region->beg);
00237 return ONIGERR_MEMORY;
00238 }
00239
00240 region->allocated = n;
00241 }
00242 else if (region->allocated < n) {
00243 OnigPosition *tmp;
00244
00245 region->allocated = 0;
00246 tmp = (OnigPosition* )xrealloc(region->beg, n * sizeof(OnigPosition));
00247 if (tmp == 0) {
00248 xfree(region->beg);
00249 xfree(region->end);
00250 return ONIGERR_MEMORY;
00251 }
00252 region->beg = tmp;
00253 tmp = (OnigPosition* )xrealloc(region->end, n * sizeof(OnigPosition));
00254 if (tmp == 0) {
00255 xfree(region->beg);
00256 xfree(region->end);
00257 return ONIGERR_MEMORY;
00258 }
00259 region->end = tmp;
00260
00261 region->allocated = n;
00262 }
00263
00264 return 0;
00265 }
00266
00267 static int
00268 onig_region_resize_clear(OnigRegion* region, int n)
00269 {
00270 int r;
00271
00272 r = onig_region_resize(region, n);
00273 if (r != 0) return r;
00274 onig_region_clear(region);
00275 return 0;
00276 }
00277
00278 extern int
00279 onig_region_set(OnigRegion* region, int at, int beg, int end)
00280 {
00281 if (at < 0) return ONIGERR_INVALID_ARGUMENT;
00282
00283 if (at >= region->allocated) {
00284 int r = onig_region_resize(region, at + 1);
00285 if (r < 0) return r;
00286 }
00287
00288 region->beg[at] = beg;
00289 region->end[at] = end;
00290 return 0;
00291 }
00292
00293 extern void
00294 onig_region_init(OnigRegion* region)
00295 {
00296 region->num_regs = 0;
00297 region->allocated = 0;
00298 region->beg = (OnigPosition* )0;
00299 region->end = (OnigPosition* )0;
00300 region->history_root = (OnigCaptureTreeNode* )0;
00301 }
00302
00303 extern OnigRegion*
00304 onig_region_new(void)
00305 {
00306 OnigRegion* r;
00307
00308 r = (OnigRegion* )xmalloc(sizeof(OnigRegion));
00309 if (r)
00310 onig_region_init(r);
00311 return r;
00312 }
00313
00314 extern void
00315 onig_region_free(OnigRegion* r, int free_self)
00316 {
00317 if (r) {
00318 if (r->allocated > 0) {
00319 if (r->beg) xfree(r->beg);
00320 if (r->end) xfree(r->end);
00321 r->allocated = 0;
00322 }
00323 #ifdef USE_CAPTURE_HISTORY
00324 history_root_free(r);
00325 #endif
00326 if (free_self) xfree(r);
00327 }
00328 }
00329
00330 extern void
00331 onig_region_copy(OnigRegion* to, OnigRegion* from)
00332 {
00333 #define RREGC_SIZE (sizeof(int) * from->num_regs)
00334 int i, r;
00335
00336 if (to == from) return;
00337
00338 r = onig_region_resize(to, from->num_regs);
00339 if (r) return;
00340
00341 for (i = 0; i < from->num_regs; i++) {
00342 to->beg[i] = from->beg[i];
00343 to->end[i] = from->end[i];
00344 }
00345 to->num_regs = from->num_regs;
00346
00347 #ifdef USE_CAPTURE_HISTORY
00348 history_root_free(to);
00349
00350 if (IS_NOT_NULL(from->history_root)) {
00351 to->history_root = history_tree_clone(from->history_root);
00352 }
00353 #endif
00354 }
00355
00356
00358 #define INVALID_STACK_INDEX -1
00359
00360
00361
00362 #define STK_ALT 0x0001
00363 #define STK_LOOK_BEHIND_NOT 0x0002
00364 #define STK_POS_NOT 0x0003
00365
00366 #define STK_MEM_START 0x0100
00367 #define STK_MEM_END 0x8200
00368 #define STK_REPEAT_INC 0x0300
00369 #define STK_STATE_CHECK_MARK 0x1000
00370
00371 #define STK_NULL_CHECK_START 0x3000
00372 #define STK_NULL_CHECK_END 0x5000
00373 #define STK_MEM_END_MARK 0x8400
00374 #define STK_POS 0x0500
00375 #define STK_STOP_BT 0x0600
00376 #define STK_REPEAT 0x0700
00377 #define STK_CALL_FRAME 0x0800
00378 #define STK_RETURN 0x0900
00379 #define STK_VOID 0x0a00
00380
00381
00382 #define STK_MASK_POP_USED 0x00ff
00383 #define STK_MASK_TO_VOID_TARGET 0x10ff
00384 #define STK_MASK_MEM_END_OR_MARK 0x8000
00385
00386 #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
00387 #define MATCH_ARG_INIT(msa, arg_option, arg_region, arg_start, arg_gpos) do {\
00388 (msa).stack_p = (void* )0;\
00389 (msa).options = (arg_option);\
00390 (msa).region = (arg_region);\
00391 (msa).start = (arg_start);\
00392 (msa).gpos = (arg_gpos);\
00393 (msa).best_len = ONIG_MISMATCH;\
00394 } while(0)
00395 #else
00396 #define MATCH_ARG_INIT(msa, arg_option, arg_region, arg_start, arg_gpos) do {\
00397 (msa).stack_p = (void* )0;\
00398 (msa).options = (arg_option);\
00399 (msa).region = (arg_region);\
00400 (msa).start = (arg_start);\
00401 (msa).gpos = (arg_gpos);\
00402 } while(0)
00403 #endif
00404
00405 #ifdef USE_COMBINATION_EXPLOSION_CHECK
00406
00407 #define STATE_CHECK_BUFF_MALLOC_THRESHOLD_SIZE 16
00408
00409 #define STATE_CHECK_BUFF_INIT(msa, str_len, offset, state_num) do { \
00410 if ((state_num) > 0 && str_len >= STATE_CHECK_STRING_THRESHOLD_LEN) {\
00411 unsigned int size = (unsigned int )(((str_len) + 1) * (state_num) + 7) >> 3;\
00412 offset = ((offset) * (state_num)) >> 3;\
00413 if (size > 0 && offset < size && size < STATE_CHECK_BUFF_MAX_SIZE) {\
00414 if (size >= STATE_CHECK_BUFF_MALLOC_THRESHOLD_SIZE) {\
00415 (msa).state_check_buff = (void* )xmalloc(size);\
00416 CHECK_NULL_RETURN_MEMERR((msa).state_check_buff);\
00417 }\
00418 else \
00419 (msa).state_check_buff = (void* )xalloca(size);\
00420 xmemset(((char* )((msa).state_check_buff)+(offset)), 0, \
00421 (size_t )(size - (offset))); \
00422 (msa).state_check_buff_size = size;\
00423 }\
00424 else {\
00425 (msa).state_check_buff = (void* )0;\
00426 (msa).state_check_buff_size = 0;\
00427 }\
00428 }\
00429 else {\
00430 (msa).state_check_buff = (void* )0;\
00431 (msa).state_check_buff_size = 0;\
00432 }\
00433 } while(0)
00434
00435 #define MATCH_ARG_FREE(msa) do {\
00436 if ((msa).stack_p) xfree((msa).stack_p);\
00437 if ((msa).state_check_buff_size >= STATE_CHECK_BUFF_MALLOC_THRESHOLD_SIZE) { \
00438 if ((msa).state_check_buff) xfree((msa).state_check_buff);\
00439 }\
00440 } while(0)
00441 #else
00442 #define MATCH_ARG_FREE(msa) if ((msa).stack_p) xfree((msa).stack_p)
00443 #endif
00444
00445
00446
00447 #define STACK_INIT(alloc_addr, ptr_num, stack_num) do {\
00448 if (msa->stack_p) {\
00449 alloc_addr = (char* )xalloca(sizeof(OnigStackIndex) * (ptr_num));\
00450 stk_alloc = (OnigStackType* )(msa->stack_p);\
00451 stk_base = stk_alloc;\
00452 stk = stk_base;\
00453 stk_end = stk_base + msa->stack_n;\
00454 }\
00455 else {\
00456 alloc_addr = (char* )xalloca(sizeof(OnigStackIndex) * (ptr_num)\
00457 + sizeof(OnigStackType) * (stack_num));\
00458 stk_alloc = (OnigStackType* )(alloc_addr + sizeof(OnigStackIndex) * (ptr_num));\
00459 stk_base = stk_alloc;\
00460 stk = stk_base;\
00461 stk_end = stk_base + (stack_num);\
00462 }\
00463 } while(0)
00464
00465 #define STACK_SAVE do{\
00466 if (stk_base != stk_alloc) {\
00467 msa->stack_p = stk_base;\
00468 msa->stack_n = stk_end - stk_base; \
00469 };\
00470 } while(0)
00471
00472 static unsigned int MatchStackLimitSize = DEFAULT_MATCH_STACK_LIMIT_SIZE;
00473
00474 extern unsigned int
00475 onig_get_match_stack_limit_size(void)
00476 {
00477 return MatchStackLimitSize;
00478 }
00479
00480 extern int
00481 onig_set_match_stack_limit_size(unsigned int size)
00482 {
00483 MatchStackLimitSize = size;
00484 return 0;
00485 }
00486
00487 static int
00488 stack_double(OnigStackType** arg_stk_base, OnigStackType** arg_stk_end,
00489 OnigStackType** arg_stk, OnigStackType* stk_alloc, OnigMatchArg* msa)
00490 {
00491 size_t n;
00492 OnigStackType *x, *stk_base, *stk_end, *stk;
00493
00494 stk_base = *arg_stk_base;
00495 stk_end = *arg_stk_end;
00496 stk = *arg_stk;
00497
00498 n = stk_end - stk_base;
00499 if (stk_base == stk_alloc && IS_NULL(msa->stack_p)) {
00500 x = (OnigStackType* )xmalloc(sizeof(OnigStackType) * n * 2);
00501 if (IS_NULL(x)) {
00502 STACK_SAVE;
00503 return ONIGERR_MEMORY;
00504 }
00505 xmemcpy(x, stk_base, n * sizeof(OnigStackType));
00506 n *= 2;
00507 }
00508 else {
00509 unsigned int limit_size = MatchStackLimitSize;
00510 n *= 2;
00511 if (limit_size != 0 && n > limit_size) {
00512 if ((unsigned int )(stk_end - stk_base) == limit_size)
00513 return ONIGERR_MATCH_STACK_LIMIT_OVER;
00514 else
00515 n = limit_size;
00516 }
00517 x = (OnigStackType* )xrealloc(stk_base, sizeof(OnigStackType) * n);
00518 if (IS_NULL(x)) {
00519 STACK_SAVE;
00520 return ONIGERR_MEMORY;
00521 }
00522 }
00523 *arg_stk = x + (stk - stk_base);
00524 *arg_stk_base = x;
00525 *arg_stk_end = x + n;
00526 return 0;
00527 }
00528
00529 #define STACK_ENSURE(n) do {\
00530 if (stk_end - stk < (n)) {\
00531 int r = stack_double(&stk_base, &stk_end, &stk, stk_alloc, msa);\
00532 if (r != 0) { STACK_SAVE; return r; } \
00533 }\
00534 } while(0)
00535
00536 #define STACK_AT(index) (stk_base + (index))
00537 #define GET_STACK_INDEX(stk) ((stk) - stk_base)
00538
00539 #define STACK_PUSH_TYPE(stack_type) do {\
00540 STACK_ENSURE(1);\
00541 stk->type = (stack_type);\
00542 STACK_INC;\
00543 } while(0)
00544
00545 #define IS_TO_VOID_TARGET(stk) (((stk)->type & STK_MASK_TO_VOID_TARGET) != 0)
00546
00547 #ifdef USE_COMBINATION_EXPLOSION_CHECK
00548 #define STATE_CHECK_POS(s,snum) \
00549 (((s) - str) * num_comb_exp_check + ((snum) - 1))
00550 #define STATE_CHECK_VAL(v,snum) do {\
00551 if (state_check_buff != NULL) {\
00552 int x = STATE_CHECK_POS(s,snum);\
00553 (v) = state_check_buff[x/8] & (1<<(x%8));\
00554 }\
00555 else (v) = 0;\
00556 } while(0)
00557
00558
00559 #define ELSE_IF_STATE_CHECK_MARK(stk) \
00560 else if ((stk)->type == STK_STATE_CHECK_MARK) { \
00561 int x = STATE_CHECK_POS(stk->u.state.pstr, stk->u.state.state_check);\
00562 state_check_buff[x/8] |= (1<<(x%8)); \
00563 }
00564
00565 #define STACK_PUSH(stack_type,pat,s,sprev,keep) do {\
00566 STACK_ENSURE(1);\
00567 stk->type = (stack_type);\
00568 stk->u.state.pcode = (pat);\
00569 stk->u.state.pstr = (s);\
00570 stk->u.state.pstr_prev = (sprev);\
00571 stk->u.state.state_check = 0;\
00572 stk->u.state.pkeep = (keep);\
00573 STACK_INC;\
00574 } while(0)
00575
00576 #define STACK_PUSH_ENSURED(stack_type,pat) do {\
00577 stk->type = (stack_type);\
00578 stk->u.state.pcode = (pat);\
00579 stk->u.state.state_check = 0;\
00580 STACK_INC;\
00581 } while(0)
00582
00583 #define STACK_PUSH_ALT_WITH_STATE_CHECK(pat,s,sprev,snum,keep) do {\
00584 STACK_ENSURE(1);\
00585 stk->type = STK_ALT;\
00586 stk->u.state.pcode = (pat);\
00587 stk->u.state.pstr = (s);\
00588 stk->u.state.pstr_prev = (sprev);\
00589 stk->u.state.state_check = ((state_check_buff != NULL) ? (snum) : 0);\
00590 stk->u.state.pkeep = (keep);\
00591 STACK_INC;\
00592 } while(0)
00593
00594 #define STACK_PUSH_STATE_CHECK(s,snum) do {\
00595 if (state_check_buff != NULL) {\
00596 STACK_ENSURE(1);\
00597 stk->type = STK_STATE_CHECK_MARK;\
00598 stk->u.state.pstr = (s);\
00599 stk->u.state.state_check = (snum);\
00600 STACK_INC;\
00601 }\
00602 } while(0)
00603
00604 #else
00605
00606 #define ELSE_IF_STATE_CHECK_MARK(stk)
00607
00608 #define STACK_PUSH(stack_type,pat,s,sprev,keep) do {\
00609 STACK_ENSURE(1);\
00610 stk->type = (stack_type);\
00611 stk->u.state.pcode = (pat);\
00612 stk->u.state.pstr = (s);\
00613 stk->u.state.pstr_prev = (sprev);\
00614 stk->u.state.pkeep = (keep);\
00615 STACK_INC;\
00616 } while(0)
00617
00618 #define STACK_PUSH_ENSURED(stack_type,pat) do {\
00619 stk->type = (stack_type);\
00620 stk->u.state.pcode = (pat);\
00621 STACK_INC;\
00622 } while(0)
00623 #endif
00624
00625 #define STACK_PUSH_ALT(pat,s,sprev,keep) STACK_PUSH(STK_ALT,pat,s,sprev,keep)
00626 #define STACK_PUSH_POS(s,sprev,keep) STACK_PUSH(STK_POS,NULL_UCHARP,s,sprev,keep)
00627 #define STACK_PUSH_POS_NOT(pat,s,sprev,keep) STACK_PUSH(STK_POS_NOT,pat,s,sprev,keep)
00628 #define STACK_PUSH_STOP_BT STACK_PUSH_TYPE(STK_STOP_BT)
00629 #define STACK_PUSH_LOOK_BEHIND_NOT(pat,s,sprev,keep) \
00630 STACK_PUSH(STK_LOOK_BEHIND_NOT,pat,s,sprev,keep)
00631
00632 #define STACK_PUSH_REPEAT(id, pat) do {\
00633 STACK_ENSURE(1);\
00634 stk->type = STK_REPEAT;\
00635 stk->u.repeat.num = (id);\
00636 stk->u.repeat.pcode = (pat);\
00637 stk->u.repeat.count = 0;\
00638 STACK_INC;\
00639 } while(0)
00640
00641 #define STACK_PUSH_REPEAT_INC(sindex) do {\
00642 STACK_ENSURE(1);\
00643 stk->type = STK_REPEAT_INC;\
00644 stk->u.repeat_inc.si = (sindex);\
00645 STACK_INC;\
00646 } while(0)
00647
00648 #define STACK_PUSH_MEM_START(mnum, s) do {\
00649 STACK_ENSURE(1);\
00650 stk->type = STK_MEM_START;\
00651 stk->u.mem.num = (mnum);\
00652 stk->u.mem.pstr = (s);\
00653 stk->u.mem.start = mem_start_stk[mnum];\
00654 stk->u.mem.end = mem_end_stk[mnum];\
00655 mem_start_stk[mnum] = GET_STACK_INDEX(stk);\
00656 mem_end_stk[mnum] = INVALID_STACK_INDEX;\
00657 STACK_INC;\
00658 } while(0)
00659
00660 #define STACK_PUSH_MEM_END(mnum, s) do {\
00661 STACK_ENSURE(1);\
00662 stk->type = STK_MEM_END;\
00663 stk->u.mem.num = (mnum);\
00664 stk->u.mem.pstr = (s);\
00665 stk->u.mem.start = mem_start_stk[mnum];\
00666 stk->u.mem.end = mem_end_stk[mnum];\
00667 mem_end_stk[mnum] = GET_STACK_INDEX(stk);\
00668 STACK_INC;\
00669 } while(0)
00670
00671 #define STACK_PUSH_MEM_END_MARK(mnum) do {\
00672 STACK_ENSURE(1);\
00673 stk->type = STK_MEM_END_MARK;\
00674 stk->u.mem.num = (mnum);\
00675 STACK_INC;\
00676 } while(0)
00677
00678 #define STACK_GET_MEM_START(mnum, k) do {\
00679 int level = 0;\
00680 k = stk;\
00681 while (k > stk_base) {\
00682 k--;\
00683 if ((k->type & STK_MASK_MEM_END_OR_MARK) != 0 \
00684 && k->u.mem.num == (mnum)) {\
00685 level++;\
00686 }\
00687 else if (k->type == STK_MEM_START && k->u.mem.num == (mnum)) {\
00688 if (level == 0) break;\
00689 level--;\
00690 }\
00691 }\
00692 } while(0)
00693
00694 #define STACK_GET_MEM_RANGE(k, mnum, start, end) do {\
00695 int level = 0;\
00696 while (k < stk) {\
00697 if (k->type == STK_MEM_START && k->u.mem.num == (mnum)) {\
00698 if (level == 0) (start) = k->u.mem.pstr;\
00699 level++;\
00700 }\
00701 else if (k->type == STK_MEM_END && k->u.mem.num == (mnum)) {\
00702 level--;\
00703 if (level == 0) {\
00704 (end) = k->u.mem.pstr;\
00705 break;\
00706 }\
00707 }\
00708 k++;\
00709 }\
00710 } while(0)
00711
00712 #define STACK_PUSH_NULL_CHECK_START(cnum, s) do {\
00713 STACK_ENSURE(1);\
00714 stk->type = STK_NULL_CHECK_START;\
00715 stk->u.null_check.num = (cnum);\
00716 stk->u.null_check.pstr = (s);\
00717 STACK_INC;\
00718 } while(0)
00719
00720 #define STACK_PUSH_NULL_CHECK_END(cnum) do {\
00721 STACK_ENSURE(1);\
00722 stk->type = STK_NULL_CHECK_END;\
00723 stk->u.null_check.num = (cnum);\
00724 STACK_INC;\
00725 } while(0)
00726
00727 #define STACK_PUSH_CALL_FRAME(pat) do {\
00728 STACK_ENSURE(1);\
00729 stk->type = STK_CALL_FRAME;\
00730 stk->u.call_frame.ret_addr = (pat);\
00731 STACK_INC;\
00732 } while(0)
00733
00734 #define STACK_PUSH_RETURN do {\
00735 STACK_ENSURE(1);\
00736 stk->type = STK_RETURN;\
00737 STACK_INC;\
00738 } while(0)
00739
00740
00741 #ifdef ONIG_DEBUG
00742 #define STACK_BASE_CHECK(p, at) \
00743 if ((p) < stk_base) {\
00744 fprintf(stderr, "at %s\n", at);\
00745 goto stack_error;\
00746 }
00747 #else
00748 #define STACK_BASE_CHECK(p, at)
00749 #endif
00750
00751 #define STACK_POP_ONE do {\
00752 stk--;\
00753 STACK_BASE_CHECK(stk, "STACK_POP_ONE"); \
00754 } while(0)
00755
00756 #define STACK_POP do {\
00757 switch (pop_level) {\
00758 case STACK_POP_LEVEL_FREE:\
00759 while (1) {\
00760 stk--;\
00761 STACK_BASE_CHECK(stk, "STACK_POP"); \
00762 if ((stk->type & STK_MASK_POP_USED) != 0) break;\
00763 ELSE_IF_STATE_CHECK_MARK(stk);\
00764 }\
00765 break;\
00766 case STACK_POP_LEVEL_MEM_START:\
00767 while (1) {\
00768 stk--;\
00769 STACK_BASE_CHECK(stk, "STACK_POP 2"); \
00770 if ((stk->type & STK_MASK_POP_USED) != 0) break;\
00771 else if (stk->type == STK_MEM_START) {\
00772 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
00773 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
00774 }\
00775 ELSE_IF_STATE_CHECK_MARK(stk);\
00776 }\
00777 break;\
00778 default:\
00779 while (1) {\
00780 stk--;\
00781 STACK_BASE_CHECK(stk, "STACK_POP 3"); \
00782 if ((stk->type & STK_MASK_POP_USED) != 0) break;\
00783 else if (stk->type == STK_MEM_START) {\
00784 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
00785 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
00786 }\
00787 else if (stk->type == STK_REPEAT_INC) {\
00788 STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\
00789 }\
00790 else if (stk->type == STK_MEM_END) {\
00791 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
00792 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
00793 }\
00794 ELSE_IF_STATE_CHECK_MARK(stk);\
00795 }\
00796 break;\
00797 }\
00798 } while(0)
00799
00800 #define STACK_POP_TIL_POS_NOT do {\
00801 while (1) {\
00802 stk--;\
00803 STACK_BASE_CHECK(stk, "STACK_POP_TIL_POS_NOT"); \
00804 if (stk->type == STK_POS_NOT) break;\
00805 else if (stk->type == STK_MEM_START) {\
00806 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
00807 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
00808 }\
00809 else if (stk->type == STK_REPEAT_INC) {\
00810 STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\
00811 }\
00812 else if (stk->type == STK_MEM_END) {\
00813 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
00814 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
00815 }\
00816 ELSE_IF_STATE_CHECK_MARK(stk);\
00817 }\
00818 } while(0)
00819
00820 #define STACK_POP_TIL_LOOK_BEHIND_NOT do {\
00821 while (1) {\
00822 stk--;\
00823 STACK_BASE_CHECK(stk, "STACK_POP_TIL_LOOK_BEHIND_NOT"); \
00824 if (stk->type == STK_LOOK_BEHIND_NOT) break;\
00825 else if (stk->type == STK_MEM_START) {\
00826 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
00827 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
00828 }\
00829 else if (stk->type == STK_REPEAT_INC) {\
00830 STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\
00831 }\
00832 else if (stk->type == STK_MEM_END) {\
00833 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
00834 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
00835 }\
00836 ELSE_IF_STATE_CHECK_MARK(stk);\
00837 }\
00838 } while(0)
00839
00840 #define STACK_POS_END(k) do {\
00841 k = stk;\
00842 while (1) {\
00843 k--;\
00844 STACK_BASE_CHECK(k, "STACK_POS_END"); \
00845 if (IS_TO_VOID_TARGET(k)) {\
00846 k->type = STK_VOID;\
00847 }\
00848 else if (k->type == STK_POS) {\
00849 k->type = STK_VOID;\
00850 break;\
00851 }\
00852 }\
00853 } while(0)
00854
00855 #define STACK_STOP_BT_END do {\
00856 OnigStackType *k = stk;\
00857 while (1) {\
00858 k--;\
00859 STACK_BASE_CHECK(k, "STACK_STOP_BT_END"); \
00860 if (IS_TO_VOID_TARGET(k)) {\
00861 k->type = STK_VOID;\
00862 }\
00863 else if (k->type == STK_STOP_BT) {\
00864 k->type = STK_VOID;\
00865 break;\
00866 }\
00867 }\
00868 } while(0)
00869
00870 #define STACK_NULL_CHECK(isnull,id,s) do {\
00871 OnigStackType* k = stk;\
00872 while (1) {\
00873 k--;\
00874 STACK_BASE_CHECK(k, "STACK_NULL_CHECK"); \
00875 if (k->type == STK_NULL_CHECK_START) {\
00876 if (k->u.null_check.num == (id)) {\
00877 (isnull) = (k->u.null_check.pstr == (s));\
00878 break;\
00879 }\
00880 }\
00881 }\
00882 } while(0)
00883
00884 #define STACK_NULL_CHECK_REC(isnull,id,s) do {\
00885 int level = 0;\
00886 OnigStackType* k = stk;\
00887 while (1) {\
00888 k--;\
00889 STACK_BASE_CHECK(k, "STACK_NULL_CHECK_REC"); \
00890 if (k->type == STK_NULL_CHECK_START) {\
00891 if (k->u.null_check.num == (id)) {\
00892 if (level == 0) {\
00893 (isnull) = (k->u.null_check.pstr == (s));\
00894 break;\
00895 }\
00896 else level--;\
00897 }\
00898 }\
00899 else if (k->type == STK_NULL_CHECK_END) {\
00900 level++;\
00901 }\
00902 }\
00903 } while(0)
00904
00905 #define STACK_NULL_CHECK_MEMST(isnull,id,s,reg) do {\
00906 OnigStackType* k = stk;\
00907 while (1) {\
00908 k--;\
00909 STACK_BASE_CHECK(k, "STACK_NULL_CHECK_MEMST"); \
00910 if (k->type == STK_NULL_CHECK_START) {\
00911 if (k->u.null_check.num == (id)) {\
00912 if (k->u.null_check.pstr != (s)) {\
00913 (isnull) = 0;\
00914 break;\
00915 }\
00916 else {\
00917 UChar* endp;\
00918 (isnull) = 1;\
00919 while (k < stk) {\
00920 if (k->type == STK_MEM_START) {\
00921 if (k->u.mem.end == INVALID_STACK_INDEX) {\
00922 (isnull) = 0; break;\
00923 }\
00924 if (BIT_STATUS_AT(reg->bt_mem_end, k->u.mem.num))\
00925 endp = STACK_AT(k->u.mem.end)->u.mem.pstr;\
00926 else\
00927 endp = (UChar* )k->u.mem.end;\
00928 if (STACK_AT(k->u.mem.start)->u.mem.pstr != endp) {\
00929 (isnull) = 0; break;\
00930 }\
00931 else if (endp != s) {\
00932 (isnull) = -1; \
00933 }\
00934 }\
00935 k++;\
00936 }\
00937 break;\
00938 }\
00939 }\
00940 }\
00941 }\
00942 } while(0)
00943
00944 #define STACK_NULL_CHECK_MEMST_REC(isnull,id,s,reg) do {\
00945 int level = 0;\
00946 OnigStackType* k = stk;\
00947 while (1) {\
00948 k--;\
00949 STACK_BASE_CHECK(k, "STACK_NULL_CHECK_MEMST_REC"); \
00950 if (k->type == STK_NULL_CHECK_START) {\
00951 if (k->u.null_check.num == (id)) {\
00952 if (level == 0) {\
00953 if (k->u.null_check.pstr != (s)) {\
00954 (isnull) = 0;\
00955 break;\
00956 }\
00957 else {\
00958 UChar* endp;\
00959 (isnull) = 1;\
00960 while (k < stk) {\
00961 if (k->type == STK_MEM_START) {\
00962 if (k->u.mem.end == INVALID_STACK_INDEX) {\
00963 (isnull) = 0; break;\
00964 }\
00965 if (BIT_STATUS_AT(reg->bt_mem_end, k->u.mem.num))\
00966 endp = STACK_AT(k->u.mem.end)->u.mem.pstr;\
00967 else\
00968 endp = (UChar* )k->u.mem.end;\
00969 if (STACK_AT(k->u.mem.start)->u.mem.pstr != endp) {\
00970 (isnull) = 0; break;\
00971 }\
00972 else if (endp != s) {\
00973 (isnull) = -1; \
00974 }\
00975 }\
00976 k++;\
00977 }\
00978 break;\
00979 }\
00980 }\
00981 else {\
00982 level--;\
00983 }\
00984 }\
00985 }\
00986 else if (k->type == STK_NULL_CHECK_END) {\
00987 if (k->u.null_check.num == (id)) level++;\
00988 }\
00989 }\
00990 } while(0)
00991
00992 #define STACK_GET_REPEAT(id, k) do {\
00993 int level = 0;\
00994 k = stk;\
00995 while (1) {\
00996 k--;\
00997 STACK_BASE_CHECK(k, "STACK_GET_REPEAT"); \
00998 if (k->type == STK_REPEAT) {\
00999 if (level == 0) {\
01000 if (k->u.repeat.num == (id)) {\
01001 break;\
01002 }\
01003 }\
01004 }\
01005 else if (k->type == STK_CALL_FRAME) level--;\
01006 else if (k->type == STK_RETURN) level++;\
01007 }\
01008 } while(0)
01009
01010 #define STACK_RETURN(addr) do {\
01011 int level = 0;\
01012 OnigStackType* k = stk;\
01013 while (1) {\
01014 k--;\
01015 STACK_BASE_CHECK(k, "STACK_RETURN"); \
01016 if (k->type == STK_CALL_FRAME) {\
01017 if (level == 0) {\
01018 (addr) = k->u.call_frame.ret_addr;\
01019 break;\
01020 }\
01021 else level--;\
01022 }\
01023 else if (k->type == STK_RETURN)\
01024 level++;\
01025 }\
01026 } while(0)
01027
01028
01029 #define STRING_CMP(s1,s2,len) do {\
01030 while (len-- > 0) {\
01031 if (*s1++ != *s2++) goto fail;\
01032 }\
01033 } while(0)
01034
01035 #define STRING_CMP_IC(case_fold_flag,s1,ps2,len,text_end) do {\
01036 if (string_cmp_ic(encode, case_fold_flag, s1, ps2, len, text_end) == 0) \
01037 goto fail; \
01038 } while(0)
01039
01040 static int string_cmp_ic(OnigEncoding enc, int case_fold_flag,
01041 UChar* s1, UChar** ps2, OnigDistance mblen, const UChar* text_end)
01042 {
01043 UChar buf1[ONIGENC_MBC_CASE_FOLD_MAXLEN];
01044 UChar buf2[ONIGENC_MBC_CASE_FOLD_MAXLEN];
01045 UChar *p1, *p2, *end1, *s2;
01046 int len1, len2;
01047
01048 s2 = *ps2;
01049 end1 = s1 + mblen;
01050 while (s1 < end1) {
01051 len1 = ONIGENC_MBC_CASE_FOLD(enc, case_fold_flag, &s1, text_end, buf1);
01052 len2 = ONIGENC_MBC_CASE_FOLD(enc, case_fold_flag, &s2, text_end, buf2);
01053 if (len1 != len2) return 0;
01054 p1 = buf1;
01055 p2 = buf2;
01056 while (len1-- > 0) {
01057 if (*p1 != *p2) return 0;
01058 p1++;
01059 p2++;
01060 }
01061 }
01062
01063 *ps2 = s2;
01064 return 1;
01065 }
01066
01067 #define STRING_CMP_VALUE(s1,s2,len,is_fail) do {\
01068 is_fail = 0;\
01069 while (len-- > 0) {\
01070 if (*s1++ != *s2++) {\
01071 is_fail = 1; break;\
01072 }\
01073 }\
01074 } while(0)
01075
01076 #define STRING_CMP_VALUE_IC(case_fold_flag,s1,ps2,len,text_end,is_fail) do {\
01077 if (string_cmp_ic(encode, case_fold_flag, s1, ps2, len, text_end) == 0) \
01078 is_fail = 1; \
01079 else \
01080 is_fail = 0; \
01081 } while(0)
01082
01083
01084 #define IS_EMPTY_STR (str == end)
01085 #define ON_STR_BEGIN(s) ((s) == str)
01086 #define ON_STR_END(s) ((s) == end)
01087 #ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
01088 #define DATA_ENSURE_CHECK1 (s < right_range)
01089 #define DATA_ENSURE_CHECK(n) (s + (n) <= right_range)
01090 #define DATA_ENSURE(n) if (s + (n) > right_range) goto fail
01091 #else
01092 #define DATA_ENSURE_CHECK1 (s < end)
01093 #define DATA_ENSURE_CHECK(n) (s + (n) <= end)
01094 #define DATA_ENSURE(n) if (s + (n) > end) goto fail
01095 #endif
01096
01097
01098 #ifdef USE_CAPTURE_HISTORY
01099 static int
01100 make_capture_history_tree(OnigCaptureTreeNode* node, OnigStackType** kp,
01101 OnigStackType* stk_top, UChar* str, regex_t* reg)
01102 {
01103 int n, r;
01104 OnigCaptureTreeNode* child;
01105 OnigStackType* k = *kp;
01106
01107 while (k < stk_top) {
01108 if (k->type == STK_MEM_START) {
01109 n = k->u.mem.num;
01110 if (n <= ONIG_MAX_CAPTURE_HISTORY_GROUP &&
01111 BIT_STATUS_AT(reg->capture_history, n) != 0) {
01112 child = history_node_new();
01113 CHECK_NULL_RETURN_MEMERR(child);
01114 child->group = n;
01115 child->beg = k->u.mem.pstr - str;
01116 r = history_tree_add_child(node, child);
01117 if (r != 0) {
01118 history_tree_free(child);
01119 return r;
01120 }
01121 *kp = (k + 1);
01122 r = make_capture_history_tree(child, kp, stk_top, str, reg);
01123 if (r != 0) return r;
01124
01125 k = *kp;
01126 child->end = k->u.mem.pstr - str;
01127 }
01128 }
01129 else if (k->type == STK_MEM_END) {
01130 if (k->u.mem.num == node->group) {
01131 node->end = k->u.mem.pstr - str;
01132 *kp = k;
01133 return 0;
01134 }
01135 }
01136 k++;
01137 }
01138
01139 return 1;
01140 }
01141 #endif
01142
01143 #ifdef USE_BACKREF_WITH_LEVEL
01144 static int mem_is_in_memp(int mem, int num, UChar* memp)
01145 {
01146 int i;
01147 MemNumType m;
01148
01149 for (i = 0; i < num; i++) {
01150 GET_MEMNUM_INC(m, memp);
01151 if (mem == (int )m) return 1;
01152 }
01153 return 0;
01154 }
01155
01156 static int backref_match_at_nested_level(regex_t* reg
01157 , OnigStackType* top, OnigStackType* stk_base
01158 , int ignore_case, int case_fold_flag
01159 , int nest, int mem_num, UChar* memp, UChar** s, const UChar* send)
01160 {
01161 UChar *ss, *p, *pstart, *pend = NULL_UCHARP;
01162 int level;
01163 OnigStackType* k;
01164
01165 level = 0;
01166 k = top;
01167 k--;
01168 while (k >= stk_base) {
01169 if (k->type == STK_CALL_FRAME) {
01170 level--;
01171 }
01172 else if (k->type == STK_RETURN) {
01173 level++;
01174 }
01175 else if (level == nest) {
01176 if (k->type == STK_MEM_START) {
01177 if (mem_is_in_memp(k->u.mem.num, mem_num, memp)) {
01178 pstart = k->u.mem.pstr;
01179 if (pend != NULL_UCHARP) {
01180 if (pend - pstart > send - *s) return 0;
01181 p = pstart;
01182 ss = *s;
01183
01184 if (ignore_case != 0) {
01185 if (string_cmp_ic(reg->enc, case_fold_flag,
01186 pstart, &ss, pend - pstart, send) == 0)
01187 return 0;
01188 }
01189 else {
01190 while (p < pend) {
01191 if (*p++ != *ss++) return 0;
01192 }
01193 }
01194
01195 *s = ss;
01196 return 1;
01197 }
01198 }
01199 }
01200 else if (k->type == STK_MEM_END) {
01201 if (mem_is_in_memp(k->u.mem.num, mem_num, memp)) {
01202 pend = k->u.mem.pstr;
01203 }
01204 }
01205 }
01206 k--;
01207 }
01208
01209 return 0;
01210 }
01211 #endif
01212
01213
01214 #ifdef ONIG_DEBUG_STATISTICS
01215
01216 #define USE_TIMEOFDAY
01217
01218 #ifdef USE_TIMEOFDAY
01219 #ifdef HAVE_SYS_TIME_H
01220 #include <sys/time.h>
01221 #endif
01222 #ifdef HAVE_UNISTD_H
01223 #include <unistd.h>
01224 #endif
01225 static struct timeval ts, te;
01226 #define GETTIME(t) gettimeofday(&(t), (struct timezone* )0)
01227 #define TIMEDIFF(te,ts) (((te).tv_usec - (ts).tv_usec) + \
01228 (((te).tv_sec - (ts).tv_sec)*1000000))
01229 #else
01230 #ifdef HAVE_SYS_TIMES_H
01231 #include <sys/times.h>
01232 #endif
01233 static struct tms ts, te;
01234 #define GETTIME(t) times(&(t))
01235 #define TIMEDIFF(te,ts) ((te).tms_utime - (ts).tms_utime)
01236 #endif
01237
01238 static int OpCounter[256];
01239 static int OpPrevCounter[256];
01240 static unsigned long OpTime[256];
01241 static int OpCurr = OP_FINISH;
01242 static int OpPrevTarget = OP_FAIL;
01243 static int MaxStackDepth = 0;
01244
01245 #define MOP_IN(opcode) do {\
01246 if (opcode == OpPrevTarget) OpPrevCounter[OpCurr]++;\
01247 OpCurr = opcode;\
01248 OpCounter[opcode]++;\
01249 GETTIME(ts);\
01250 } while(0)
01251
01252 #define MOP_OUT do {\
01253 GETTIME(te);\
01254 OpTime[OpCurr] += TIMEDIFF(te, ts);\
01255 } while(0)
01256
01257 extern void
01258 onig_statistics_init(void)
01259 {
01260 int i;
01261 for (i = 0; i < 256; i++) {
01262 OpCounter[i] = OpPrevCounter[i] = 0; OpTime[i] = 0;
01263 }
01264 MaxStackDepth = 0;
01265 }
01266
01267 extern void
01268 onig_print_statistics(FILE* f)
01269 {
01270 int i;
01271 fprintf(f, " count prev time\n");
01272 for (i = 0; OnigOpInfo[i].opcode >= 0; i++) {
01273 fprintf(f, "%8d: %8d: %10ld: %s\n",
01274 OpCounter[i], OpPrevCounter[i], OpTime[i], OnigOpInfo[i].name);
01275 }
01276 fprintf(f, "\nmax stack depth: %d\n", MaxStackDepth);
01277 }
01278
01279 #define STACK_INC do {\
01280 stk++;\
01281 if (stk - stk_base > MaxStackDepth) \
01282 MaxStackDepth = stk - stk_base;\
01283 } while(0)
01284
01285 #else
01286 #define STACK_INC stk++
01287
01288 #define MOP_IN(opcode)
01289 #define MOP_OUT
01290 #endif
01291
01292
01293
01294
01295 typedef int regoff_t;
01296
01297 typedef struct {
01298 regoff_t rm_so;
01299 regoff_t rm_eo;
01300 } posix_regmatch_t;
01301
01302 void onig_print_compiled_byte_code(FILE* f, UChar* bp, UChar* bpend, UChar** nextp,
01303 OnigEncoding enc);
01304
01305
01306
01307 static OnigPosition
01308 match_at(regex_t* reg, const UChar* str, const UChar* end,
01309 #ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
01310 const UChar* right_range,
01311 #endif
01312 const UChar* sstart, UChar* sprev, OnigMatchArg* msa)
01313 {
01314 static const UChar FinishCode[] = { OP_FINISH };
01315
01316 int i, num_mem, pop_level;
01317 ptrdiff_t n, best_len;
01318 LengthType tlen, tlen2;
01319 MemNumType mem;
01320 RelAddrType addr;
01321 OnigOptionType option = reg->options;
01322 OnigEncoding encode = reg->enc;
01323 OnigCaseFoldType case_fold_flag = reg->case_fold_flag;
01324 UChar *s, *q, *sbegin;
01325 UChar *p = reg->p;
01326 UChar *pkeep;
01327 char *alloca_base;
01328 OnigStackType *stk_alloc, *stk_base, *stk, *stk_end;
01329 OnigStackType *stkp;
01330 OnigStackIndex si;
01331 OnigStackIndex *repeat_stk;
01332 OnigStackIndex *mem_start_stk, *mem_end_stk;
01333 #ifdef USE_COMBINATION_EXPLOSION_CHECK
01334 int scv;
01335 unsigned char* state_check_buff = msa->state_check_buff;
01336 int num_comb_exp_check = reg->num_comb_exp_check;
01337 #endif
01338
01339 #ifdef USE_SUBEXP_CALL
01340
01341 n = reg->num_repeat + (reg->num_mem + 1) * 2;
01342
01343 STACK_INIT(alloca_base, n, INIT_MATCH_STACK_SIZE);
01344 pop_level = reg->stack_pop_level;
01345 num_mem = reg->num_mem;
01346 repeat_stk = (OnigStackIndex* )alloca_base;
01347
01348 mem_start_stk = (OnigStackIndex* )(repeat_stk + reg->num_repeat);
01349 mem_end_stk = mem_start_stk + (num_mem + 1);
01350 for (i = 0; i <= num_mem; i++) {
01351 mem_start_stk[i] = mem_end_stk[i] = INVALID_STACK_INDEX;
01352 }
01353 #else
01354
01355 n = reg->num_repeat + reg->num_mem * 2;
01356
01357 STACK_INIT(alloca_base, n, INIT_MATCH_STACK_SIZE);
01358 pop_level = reg->stack_pop_level;
01359 num_mem = reg->num_mem;
01360 repeat_stk = (OnigStackIndex* )alloca_base;
01361
01362 mem_start_stk = (OnigStackIndex* )(repeat_stk + reg->num_repeat);
01363 mem_end_stk = mem_start_stk + num_mem;
01364 mem_start_stk--;
01365
01366 mem_end_stk--;
01367
01368 for (i = 1; i <= num_mem; i++) {
01369 mem_start_stk[i] = mem_end_stk[i] = INVALID_STACK_INDEX;
01370 }
01371 #endif
01372
01373 #ifdef ONIG_DEBUG_MATCH
01374 fprintf(stderr, "match_at: str: %"PRIdPTR" (%p), end: %"PRIdPTR" (%p), start: %"PRIdPTR" (%p), sprev: %"PRIdPTR" (%p)\n",
01375 (intptr_t)str, str, (intptr_t)end, end, (intptr_t)sstart, sstart, (intptr_t)sprev, sprev);
01376 fprintf(stderr, "size: %d, start offset: %d\n",
01377 (int )(end - str), (int )(sstart - str));
01378 #endif
01379
01380 STACK_PUSH_ENSURED(STK_ALT, (UChar *)FinishCode);
01381 best_len = ONIG_MISMATCH;
01382 s = (UChar* )sstart;
01383 pkeep = (UChar* )sstart;
01384 while (1) {
01385 #ifdef ONIG_DEBUG_MATCH
01386 if (s) {
01387 UChar *q, *bp, buf[50];
01388 int len;
01389 fprintf(stderr, "%4d> \"", (int )(s - str));
01390 bp = buf;
01391 if (*p != OP_FINISH) {
01392 for (i = 0, q = s; i < 7 && q < end; i++) {
01393 len = enclen(encode, q, end);
01394 while (len-- > 0) *bp++ = *q++;
01395 }
01396 }
01397 if (q < end) { xmemcpy(bp, "...\"", 4); bp += 4; }
01398 else { xmemcpy(bp, "\"", 1); bp += 1; }
01399 *bp = 0;
01400 fputs((char* )buf, stderr);
01401 for (i = 0; i < 20 - (bp - buf); i++) fputc(' ', stderr);
01402 onig_print_compiled_byte_code(stderr, p, p + strlen((char *)p), NULL, encode);
01403 fprintf(stderr, "\n");
01404 }
01405 #endif
01406
01407 sbegin = s;
01408 switch (*p++) {
01409 case OP_END: MOP_IN(OP_END);
01410 n = s - sstart;
01411 if (n > best_len) {
01412 OnigRegion* region;
01413 #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
01414 if (IS_FIND_LONGEST(option)) {
01415 if (n > msa->best_len) {
01416 msa->best_len = n;
01417 msa->best_s = (UChar* )sstart;
01418 }
01419 else
01420 goto end_best_len;
01421 }
01422 #endif
01423 best_len = n;
01424 region = msa->region;
01425 if (region) {
01426 #ifdef USE_POSIX_API_REGION_OPTION
01427 if (IS_POSIX_REGION(msa->options)) {
01428 posix_regmatch_t* rmt = (posix_regmatch_t* )region;
01429
01430 rmt[0].rm_so = (regoff_t )(((pkeep > s) ? s : pkeep) - str);
01431 rmt[0].rm_eo = (regoff_t )(s - str);
01432 for (i = 1; i <= num_mem; i++) {
01433 if (mem_end_stk[i] != INVALID_STACK_INDEX) {
01434 if (BIT_STATUS_AT(reg->bt_mem_start, i))
01435 rmt[i].rm_so = (regoff_t )(STACK_AT(mem_start_stk[i])->u.mem.pstr - str);
01436 else
01437 rmt[i].rm_so = (regoff_t )((UChar* )((void* )(mem_start_stk[i])) - str);
01438
01439 rmt[i].rm_eo = (regoff_t )((BIT_STATUS_AT(reg->bt_mem_end, i)
01440 ? STACK_AT(mem_end_stk[i])->u.mem.pstr
01441 : (UChar* )((void* )mem_end_stk[i])) - str);
01442 }
01443 else {
01444 rmt[i].rm_so = rmt[i].rm_eo = ONIG_REGION_NOTPOS;
01445 }
01446 }
01447 }
01448 else {
01449 #endif
01450 region->beg[0] = ((pkeep > s) ? s : pkeep) - str;
01451 region->end[0] = s - str;
01452 for (i = 1; i <= num_mem; i++) {
01453 if (mem_end_stk[i] != INVALID_STACK_INDEX) {
01454 if (BIT_STATUS_AT(reg->bt_mem_start, i))
01455 region->beg[i] = STACK_AT(mem_start_stk[i])->u.mem.pstr - str;
01456 else
01457 region->beg[i] = (UChar* )((void* )mem_start_stk[i]) - str;
01458
01459 region->end[i] = (BIT_STATUS_AT(reg->bt_mem_end, i)
01460 ? STACK_AT(mem_end_stk[i])->u.mem.pstr
01461 : (UChar* )((void* )mem_end_stk[i])) - str;
01462 }
01463 else {
01464 region->beg[i] = region->end[i] = ONIG_REGION_NOTPOS;
01465 }
01466 }
01467
01468 #ifdef USE_CAPTURE_HISTORY
01469 if (reg->capture_history != 0) {
01470 int r;
01471 OnigCaptureTreeNode* node;
01472
01473 if (IS_NULL(region->history_root)) {
01474 region->history_root = node = history_node_new();
01475 CHECK_NULL_RETURN_MEMERR(node);
01476 }
01477 else {
01478 node = region->history_root;
01479 history_tree_clear(node);
01480 }
01481
01482 node->group = 0;
01483 node->beg = ((pkeep > s) ? s : pkeep) - str;
01484 node->end = s - str;
01485
01486 stkp = stk_base;
01487 r = make_capture_history_tree(region->history_root, &stkp,
01488 stk, (UChar* )str, reg);
01489 if (r < 0) {
01490 best_len = r;
01491 goto finish;
01492 }
01493 }
01494 #endif
01495 #ifdef USE_POSIX_API_REGION_OPTION
01496 }
01497 #endif
01498 }
01499 }
01500
01501 #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
01502 end_best_len:
01503 #endif
01504 MOP_OUT;
01505
01506 if (IS_FIND_CONDITION(option)) {
01507 if (IS_FIND_NOT_EMPTY(option) && s == sstart) {
01508 best_len = ONIG_MISMATCH;
01509 goto fail;
01510 }
01511 if (IS_FIND_LONGEST(option) && DATA_ENSURE_CHECK1) {
01512 goto fail;
01513 }
01514 }
01515
01516
01517 goto finish;
01518 break;
01519
01520 case OP_EXACT1: MOP_IN(OP_EXACT1);
01521 #if 0
01522 DATA_ENSURE(1);
01523 if (*p != *s) goto fail;
01524 p++; s++;
01525 #endif
01526 if (*p != *s++) goto fail;
01527 DATA_ENSURE(0);
01528 p++;
01529 MOP_OUT;
01530 break;
01531
01532 case OP_EXACT1_IC: MOP_IN(OP_EXACT1_IC);
01533 {
01534 int len;
01535 UChar *q, lowbuf[ONIGENC_MBC_CASE_FOLD_MAXLEN];
01536
01537 DATA_ENSURE(1);
01538 len = ONIGENC_MBC_CASE_FOLD(encode,
01539
01540 case_fold_flag,
01541 &s, end, lowbuf);
01542 DATA_ENSURE(0);
01543 q = lowbuf;
01544 while (len-- > 0) {
01545 if (*p != *q) {
01546 goto fail;
01547 }
01548 p++; q++;
01549 }
01550 }
01551 MOP_OUT;
01552 break;
01553
01554 case OP_EXACT2: MOP_IN(OP_EXACT2);
01555 DATA_ENSURE(2);
01556 if (*p != *s) goto fail;
01557 p++; s++;
01558 if (*p != *s) goto fail;
01559 sprev = s;
01560 p++; s++;
01561 MOP_OUT;
01562 continue;
01563 break;
01564
01565 case OP_EXACT3: MOP_IN(OP_EXACT3);
01566 DATA_ENSURE(3);
01567 if (*p != *s) goto fail;
01568 p++; s++;
01569 if (*p != *s) goto fail;
01570 p++; s++;
01571 if (*p != *s) goto fail;
01572 sprev = s;
01573 p++; s++;
01574 MOP_OUT;
01575 continue;
01576 break;
01577
01578 case OP_EXACT4: MOP_IN(OP_EXACT4);
01579 DATA_ENSURE(4);
01580 if (*p != *s) goto fail;
01581 p++; s++;
01582 if (*p != *s) goto fail;
01583 p++; s++;
01584 if (*p != *s) goto fail;
01585 p++; s++;
01586 if (*p != *s) goto fail;
01587 sprev = s;
01588 p++; s++;
01589 MOP_OUT;
01590 continue;
01591 break;
01592
01593 case OP_EXACT5: MOP_IN(OP_EXACT5);
01594 DATA_ENSURE(5);
01595 if (*p != *s) goto fail;
01596 p++; s++;
01597 if (*p != *s) goto fail;
01598 p++; s++;
01599 if (*p != *s) goto fail;
01600 p++; s++;
01601 if (*p != *s) goto fail;
01602 p++; s++;
01603 if (*p != *s) goto fail;
01604 sprev = s;
01605 p++; s++;
01606 MOP_OUT;
01607 continue;
01608 break;
01609
01610 case OP_EXACTN: MOP_IN(OP_EXACTN);
01611 GET_LENGTH_INC(tlen, p);
01612 DATA_ENSURE(tlen);
01613 while (tlen-- > 0) {
01614 if (*p++ != *s++) goto fail;
01615 }
01616 sprev = s - 1;
01617 MOP_OUT;
01618 continue;
01619 break;
01620
01621 case OP_EXACTN_IC: MOP_IN(OP_EXACTN_IC);
01622 {
01623 int len;
01624 UChar *q, *endp, lowbuf[ONIGENC_MBC_CASE_FOLD_MAXLEN];
01625
01626 GET_LENGTH_INC(tlen, p);
01627 endp = p + tlen;
01628
01629 while (p < endp) {
01630 sprev = s;
01631 DATA_ENSURE(1);
01632 len = ONIGENC_MBC_CASE_FOLD(encode,
01633
01634 case_fold_flag,
01635 &s, end, lowbuf);
01636 DATA_ENSURE(0);
01637 q = lowbuf;
01638 while (len-- > 0) {
01639 if (*p != *q) goto fail;
01640 p++; q++;
01641 }
01642 }
01643 }
01644
01645 MOP_OUT;
01646 continue;
01647 break;
01648
01649 case OP_EXACTMB2N1: MOP_IN(OP_EXACTMB2N1);
01650 DATA_ENSURE(2);
01651 if (*p != *s) goto fail;
01652 p++; s++;
01653 if (*p != *s) goto fail;
01654 p++; s++;
01655 MOP_OUT;
01656 break;
01657
01658 case OP_EXACTMB2N2: MOP_IN(OP_EXACTMB2N2);
01659 DATA_ENSURE(4);
01660 if (*p != *s) goto fail;
01661 p++; s++;
01662 if (*p != *s) goto fail;
01663 p++; s++;
01664 sprev = s;
01665 if (*p != *s) goto fail;
01666 p++; s++;
01667 if (*p != *s) goto fail;
01668 p++; s++;
01669 MOP_OUT;
01670 continue;
01671 break;
01672
01673 case OP_EXACTMB2N3: MOP_IN(OP_EXACTMB2N3);
01674 DATA_ENSURE(6);
01675 if (*p != *s) goto fail;
01676 p++; s++;
01677 if (*p != *s) goto fail;
01678 p++; s++;
01679 if (*p != *s) goto fail;
01680 p++; s++;
01681 if (*p != *s) goto fail;
01682 p++; s++;
01683 sprev = s;
01684 if (*p != *s) goto fail;
01685 p++; s++;
01686 if (*p != *s) goto fail;
01687 p++; s++;
01688 MOP_OUT;
01689 continue;
01690 break;
01691
01692 case OP_EXACTMB2N: MOP_IN(OP_EXACTMB2N);
01693 GET_LENGTH_INC(tlen, p);
01694 DATA_ENSURE(tlen * 2);
01695 while (tlen-- > 0) {
01696 if (*p != *s) goto fail;
01697 p++; s++;
01698 if (*p != *s) goto fail;
01699 p++; s++;
01700 }
01701 sprev = s - 2;
01702 MOP_OUT;
01703 continue;
01704 break;
01705
01706 case OP_EXACTMB3N: MOP_IN(OP_EXACTMB3N);
01707 GET_LENGTH_INC(tlen, p);
01708 DATA_ENSURE(tlen * 3);
01709 while (tlen-- > 0) {
01710 if (*p != *s) goto fail;
01711 p++; s++;
01712 if (*p != *s) goto fail;
01713 p++; s++;
01714 if (*p != *s) goto fail;
01715 p++; s++;
01716 }
01717 sprev = s - 3;
01718 MOP_OUT;
01719 continue;
01720 break;
01721
01722 case OP_EXACTMBN: MOP_IN(OP_EXACTMBN);
01723 GET_LENGTH_INC(tlen, p);
01724 GET_LENGTH_INC(tlen2, p);
01725 tlen2 *= tlen;
01726 DATA_ENSURE(tlen2);
01727 while (tlen2-- > 0) {
01728 if (*p != *s) goto fail;
01729 p++; s++;
01730 }
01731 sprev = s - tlen;
01732 MOP_OUT;
01733 continue;
01734 break;
01735
01736 case OP_CCLASS: MOP_IN(OP_CCLASS);
01737 DATA_ENSURE(1);
01738 if (BITSET_AT(((BitSetRef )p), *s) == 0) goto fail;
01739 p += SIZE_BITSET;
01740 s += enclen(encode, s, end);
01741 MOP_OUT;
01742 break;
01743
01744 case OP_CCLASS_MB: MOP_IN(OP_CCLASS_MB);
01745 if (! ONIGENC_IS_MBC_HEAD(encode, s, end)) goto fail;
01746
01747 cclass_mb:
01748 GET_LENGTH_INC(tlen, p);
01749 {
01750 OnigCodePoint code;
01751 UChar *ss;
01752 int mb_len;
01753
01754 DATA_ENSURE(1);
01755 mb_len = enclen(encode, s, end);
01756 DATA_ENSURE(mb_len);
01757 ss = s;
01758 s += mb_len;
01759 code = ONIGENC_MBC_TO_CODE(encode, ss, s);
01760
01761 #ifdef PLATFORM_UNALIGNED_WORD_ACCESS
01762 if (! onig_is_in_code_range(p, code)) goto fail;
01763 #else
01764 q = p;
01765 ALIGNMENT_RIGHT(q);
01766 if (! onig_is_in_code_range(q, code)) goto fail;
01767 #endif
01768 }
01769 p += tlen;
01770 MOP_OUT;
01771 break;
01772
01773 case OP_CCLASS_MIX: MOP_IN(OP_CCLASS_MIX);
01774 DATA_ENSURE(1);
01775 if (ONIGENC_IS_MBC_HEAD(encode, s, end)) {
01776 p += SIZE_BITSET;
01777 goto cclass_mb;
01778 }
01779 else {
01780 if (BITSET_AT(((BitSetRef )p), *s) == 0)
01781 goto fail;
01782
01783 p += SIZE_BITSET;
01784 GET_LENGTH_INC(tlen, p);
01785 p += tlen;
01786 s++;
01787 }
01788 MOP_OUT;
01789 break;
01790
01791 case OP_CCLASS_NOT: MOP_IN(OP_CCLASS_NOT);
01792 DATA_ENSURE(1);
01793 if (BITSET_AT(((BitSetRef )p), *s) != 0) goto fail;
01794 p += SIZE_BITSET;
01795 s += enclen(encode, s, end);
01796 MOP_OUT;
01797 break;
01798
01799 case OP_CCLASS_MB_NOT: MOP_IN(OP_CCLASS_MB_NOT);
01800 DATA_ENSURE(1);
01801 if (! ONIGENC_IS_MBC_HEAD(encode, s, end)) {
01802 s++;
01803 GET_LENGTH_INC(tlen, p);
01804 p += tlen;
01805 goto cc_mb_not_success;
01806 }
01807
01808 cclass_mb_not:
01809 GET_LENGTH_INC(tlen, p);
01810 {
01811 OnigCodePoint code;
01812 UChar *ss;
01813 int mb_len = enclen(encode, s, end);
01814
01815 if (! DATA_ENSURE_CHECK(mb_len)) {
01816 DATA_ENSURE(1);
01817 s = (UChar* )end;
01818 p += tlen;
01819 goto cc_mb_not_success;
01820 }
01821
01822 ss = s;
01823 s += mb_len;
01824 code = ONIGENC_MBC_TO_CODE(encode, ss, s);
01825
01826 #ifdef PLATFORM_UNALIGNED_WORD_ACCESS
01827 if (onig_is_in_code_range(p, code)) goto fail;
01828 #else
01829 q = p;
01830 ALIGNMENT_RIGHT(q);
01831 if (onig_is_in_code_range(q, code)) goto fail;
01832 #endif
01833 }
01834 p += tlen;
01835
01836 cc_mb_not_success:
01837 MOP_OUT;
01838 break;
01839
01840 case OP_CCLASS_MIX_NOT: MOP_IN(OP_CCLASS_MIX_NOT);
01841 DATA_ENSURE(1);
01842 if (ONIGENC_IS_MBC_HEAD(encode, s, end)) {
01843 p += SIZE_BITSET;
01844 goto cclass_mb_not;
01845 }
01846 else {
01847 if (BITSET_AT(((BitSetRef )p), *s) != 0)
01848 goto fail;
01849
01850 p += SIZE_BITSET;
01851 GET_LENGTH_INC(tlen, p);
01852 p += tlen;
01853 s++;
01854 }
01855 MOP_OUT;
01856 break;
01857
01858 case OP_CCLASS_NODE: MOP_IN(OP_CCLASS_NODE);
01859 {
01860 OnigCodePoint code;
01861 void *node;
01862 int mb_len;
01863 UChar *ss;
01864
01865 DATA_ENSURE(1);
01866 GET_POINTER_INC(node, p);
01867 mb_len = enclen(encode, s, end);
01868 ss = s;
01869 s += mb_len;
01870 DATA_ENSURE(0);
01871 code = ONIGENC_MBC_TO_CODE(encode, ss, s);
01872 if (onig_is_code_in_cc_len(mb_len, code, node) == 0) goto fail;
01873 }
01874 MOP_OUT;
01875 break;
01876
01877 case OP_ANYCHAR: MOP_IN(OP_ANYCHAR);
01878 DATA_ENSURE(1);
01879 n = enclen(encode, s, end);
01880 DATA_ENSURE(n);
01881 if (ONIGENC_IS_MBC_NEWLINE_EX(encode, s, str, end, option, 0)) goto fail;
01882 s += n;
01883 MOP_OUT;
01884 break;
01885
01886 case OP_ANYCHAR_ML: MOP_IN(OP_ANYCHAR_ML);
01887 DATA_ENSURE(1);
01888 n = enclen(encode, s, end);
01889 DATA_ENSURE(n);
01890 s += n;
01891 MOP_OUT;
01892 break;
01893
01894 case OP_ANYCHAR_STAR: MOP_IN(OP_ANYCHAR_STAR);
01895 while (DATA_ENSURE_CHECK1) {
01896 STACK_PUSH_ALT(p, s, sprev, pkeep);
01897 n = enclen(encode, s, end);
01898 DATA_ENSURE(n);
01899 if (ONIGENC_IS_MBC_NEWLINE_EX(encode, s, str, end, option, 0)) goto fail;
01900 sprev = s;
01901 s += n;
01902 }
01903 MOP_OUT;
01904 break;
01905
01906 case OP_ANYCHAR_ML_STAR: MOP_IN(OP_ANYCHAR_ML_STAR);
01907 while (DATA_ENSURE_CHECK1) {
01908 STACK_PUSH_ALT(p, s, sprev, pkeep);
01909 n = enclen(encode, s, end);
01910 if (n > 1) {
01911 DATA_ENSURE(n);
01912 sprev = s;
01913 s += n;
01914 }
01915 else {
01916 sprev = s;
01917 s++;
01918 }
01919 }
01920 MOP_OUT;
01921 break;
01922
01923 case OP_ANYCHAR_STAR_PEEK_NEXT: MOP_IN(OP_ANYCHAR_STAR_PEEK_NEXT);
01924 while (DATA_ENSURE_CHECK1) {
01925 if (*p == *s) {
01926 STACK_PUSH_ALT(p + 1, s, sprev, pkeep);
01927 }
01928 n = enclen(encode, s, end);
01929 DATA_ENSURE(n);
01930 if (ONIGENC_IS_MBC_NEWLINE_EX(encode, s, str, end, option, 0)) goto fail;
01931 sprev = s;
01932 s += n;
01933 }
01934 p++;
01935 MOP_OUT;
01936 break;
01937
01938 case OP_ANYCHAR_ML_STAR_PEEK_NEXT:MOP_IN(OP_ANYCHAR_ML_STAR_PEEK_NEXT);
01939 while (DATA_ENSURE_CHECK1) {
01940 if (*p == *s) {
01941 STACK_PUSH_ALT(p + 1, s, sprev, pkeep);
01942 }
01943 n = enclen(encode, s, end);
01944 if (n > 1) {
01945 DATA_ENSURE(n);
01946 sprev = s;
01947 s += n;
01948 }
01949 else {
01950 sprev = s;
01951 s++;
01952 }
01953 }
01954 p++;
01955 MOP_OUT;
01956 break;
01957
01958 #ifdef USE_COMBINATION_EXPLOSION_CHECK
01959 case OP_STATE_CHECK_ANYCHAR_STAR: MOP_IN(OP_STATE_CHECK_ANYCHAR_STAR);
01960 GET_STATE_CHECK_NUM_INC(mem, p);
01961 while (DATA_ENSURE_CHECK1) {
01962 STATE_CHECK_VAL(scv, mem);
01963 if (scv) goto fail;
01964
01965 STACK_PUSH_ALT_WITH_STATE_CHECK(p, s, sprev, mem, pkeep);
01966 n = enclen(encode, s, end);
01967 DATA_ENSURE(n);
01968 if (ONIGENC_IS_MBC_NEWLINE_EX(encode, s, str, end, option, 0)) goto fail;
01969 sprev = s;
01970 s += n;
01971 }
01972 MOP_OUT;
01973 break;
01974
01975 case OP_STATE_CHECK_ANYCHAR_ML_STAR:
01976 MOP_IN(OP_STATE_CHECK_ANYCHAR_ML_STAR);
01977
01978 GET_STATE_CHECK_NUM_INC(mem, p);
01979 while (DATA_ENSURE_CHECK1) {
01980 STATE_CHECK_VAL(scv, mem);
01981 if (scv) goto fail;
01982
01983 STACK_PUSH_ALT_WITH_STATE_CHECK(p, s, sprev, mem, pkeep);
01984 n = enclen(encode, s, end);
01985 if (n > 1) {
01986 DATA_ENSURE(n);
01987 sprev = s;
01988 s += n;
01989 }
01990 else {
01991 sprev = s;
01992 s++;
01993 }
01994 }
01995 MOP_OUT;
01996 break;
01997 #endif
01998
01999 case OP_WORD: MOP_IN(OP_WORD);
02000 DATA_ENSURE(1);
02001 if (! ONIGENC_IS_MBC_WORD(encode, s, end))
02002 goto fail;
02003
02004 s += enclen(encode, s, end);
02005 MOP_OUT;
02006 break;
02007
02008 case OP_ASCII_WORD: MOP_IN(OP_ASCII_WORD);
02009 DATA_ENSURE(1);
02010 if (! ONIGENC_IS_MBC_ASCII_WORD(encode, s, end))
02011 goto fail;
02012
02013 s += enclen(encode, s, end);
02014 MOP_OUT;
02015 break;
02016
02017 case OP_NOT_WORD: MOP_IN(OP_NOT_WORD);
02018 DATA_ENSURE(1);
02019 if (ONIGENC_IS_MBC_WORD(encode, s, end))
02020 goto fail;
02021
02022 s += enclen(encode, s, end);
02023 MOP_OUT;
02024 break;
02025
02026 case OP_NOT_ASCII_WORD: MOP_IN(OP_NOT_ASCII_WORD);
02027 DATA_ENSURE(1);
02028 if (ONIGENC_IS_MBC_ASCII_WORD(encode, s, end))
02029 goto fail;
02030
02031 s += enclen(encode, s, end);
02032 MOP_OUT;
02033 break;
02034
02035 case OP_WORD_BOUND: MOP_IN(OP_WORD_BOUND);
02036 if (ON_STR_BEGIN(s)) {
02037 DATA_ENSURE(1);
02038 if (! ONIGENC_IS_MBC_WORD(encode, s, end))
02039 goto fail;
02040 }
02041 else if (ON_STR_END(s)) {
02042 if (! ONIGENC_IS_MBC_WORD(encode, sprev, end))
02043 goto fail;
02044 }
02045 else {
02046 if (ONIGENC_IS_MBC_WORD(encode, s, end)
02047 == ONIGENC_IS_MBC_WORD(encode, sprev, end))
02048 goto fail;
02049 }
02050 MOP_OUT;
02051 continue;
02052 break;
02053
02054 case OP_ASCII_WORD_BOUND: MOP_IN(OP_ASCII_WORD_BOUND);
02055 if (ON_STR_BEGIN(s)) {
02056 DATA_ENSURE(1);
02057 if (! ONIGENC_IS_MBC_ASCII_WORD(encode, s, end))
02058 goto fail;
02059 }
02060 else if (ON_STR_END(s)) {
02061 if (! ONIGENC_IS_MBC_ASCII_WORD(encode, sprev, end))
02062 goto fail;
02063 }
02064 else {
02065 if (ONIGENC_IS_MBC_ASCII_WORD(encode, s, end)
02066 == ONIGENC_IS_MBC_ASCII_WORD(encode, sprev, end))
02067 goto fail;
02068 }
02069 MOP_OUT;
02070 continue;
02071 break;
02072
02073 case OP_NOT_WORD_BOUND: MOP_IN(OP_NOT_WORD_BOUND);
02074 if (ON_STR_BEGIN(s)) {
02075 if (DATA_ENSURE_CHECK1 && ONIGENC_IS_MBC_WORD(encode, s, end))
02076 goto fail;
02077 }
02078 else if (ON_STR_END(s)) {
02079 if (ONIGENC_IS_MBC_WORD(encode, sprev, end))
02080 goto fail;
02081 }
02082 else {
02083 if (ONIGENC_IS_MBC_WORD(encode, s, end)
02084 != ONIGENC_IS_MBC_WORD(encode, sprev, end))
02085 goto fail;
02086 }
02087 MOP_OUT;
02088 continue;
02089 break;
02090
02091 case OP_NOT_ASCII_WORD_BOUND: MOP_IN(OP_NOT_ASCII_WORD_BOUND);
02092 if (ON_STR_BEGIN(s)) {
02093 if (DATA_ENSURE_CHECK1 && ONIGENC_IS_MBC_ASCII_WORD(encode, s, end))
02094 goto fail;
02095 }
02096 else if (ON_STR_END(s)) {
02097 if (ONIGENC_IS_MBC_ASCII_WORD(encode, sprev, end))
02098 goto fail;
02099 }
02100 else {
02101 if (ONIGENC_IS_MBC_ASCII_WORD(encode, s, end)
02102 != ONIGENC_IS_MBC_ASCII_WORD(encode, sprev, end))
02103 goto fail;
02104 }
02105 MOP_OUT;
02106 continue;
02107 break;
02108
02109 #ifdef USE_WORD_BEGIN_END
02110 case OP_WORD_BEGIN: MOP_IN(OP_WORD_BEGIN);
02111 if (DATA_ENSURE_CHECK1 && ONIGENC_IS_MBC_WORD(encode, s, end)) {
02112 if (ON_STR_BEGIN(s) || !ONIGENC_IS_MBC_WORD(encode, sprev, end)) {
02113 MOP_OUT;
02114 continue;
02115 }
02116 }
02117 goto fail;
02118 break;
02119
02120 case OP_ASCII_WORD_BEGIN: MOP_IN(OP_ASCII_WORD_BEGIN);
02121 if (DATA_ENSURE_CHECK1 && ONIGENC_IS_MBC_ASCII_WORD(encode, s, end)) {
02122 if (ON_STR_BEGIN(s) || !ONIGENC_IS_MBC_ASCII_WORD(encode, sprev, end)) {
02123 MOP_OUT;
02124 continue;
02125 }
02126 }
02127 goto fail;
02128 break;
02129
02130 case OP_WORD_END: MOP_IN(OP_WORD_END);
02131 if (!ON_STR_BEGIN(s) && ONIGENC_IS_MBC_WORD(encode, sprev, end)) {
02132 if (ON_STR_END(s) || !ONIGENC_IS_MBC_WORD(encode, s, end)) {
02133 MOP_OUT;
02134 continue;
02135 }
02136 }
02137 goto fail;
02138 break;
02139
02140 case OP_ASCII_WORD_END: MOP_IN(OP_ASCII_WORD_END);
02141 if (!ON_STR_BEGIN(s) && ONIGENC_IS_MBC_ASCII_WORD(encode, sprev, end)) {
02142 if (ON_STR_END(s) || !ONIGENC_IS_MBC_ASCII_WORD(encode, s, end)) {
02143 MOP_OUT;
02144 continue;
02145 }
02146 }
02147 goto fail;
02148 break;
02149 #endif
02150
02151 case OP_BEGIN_BUF: MOP_IN(OP_BEGIN_BUF);
02152 if (! ON_STR_BEGIN(s)) goto fail;
02153
02154 MOP_OUT;
02155 continue;
02156 break;
02157
02158 case OP_END_BUF: MOP_IN(OP_END_BUF);
02159 if (! ON_STR_END(s)) goto fail;
02160
02161 MOP_OUT;
02162 continue;
02163 break;
02164
02165 case OP_BEGIN_LINE: MOP_IN(OP_BEGIN_LINE);
02166 op_begin_line:
02167 if (ON_STR_BEGIN(s)) {
02168 if (IS_NOTBOL(msa->options)) goto fail;
02169 MOP_OUT;
02170 continue;
02171 }
02172 else if (ONIGENC_IS_MBC_NEWLINE(encode, sprev, end)
02173 #ifdef USE_CRNL_AS_LINE_TERMINATOR
02174 && !(IS_NEWLINE_CRLF(option)
02175 && ONIGENC_IS_MBC_CRNL(encode, sprev, end))
02176 #endif
02177 && !ON_STR_END(s)) {
02178 MOP_OUT;
02179 continue;
02180 }
02181 goto fail;
02182 break;
02183
02184 case OP_END_LINE: MOP_IN(OP_END_LINE);
02185 if (ON_STR_END(s)) {
02186 #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
02187 if (IS_EMPTY_STR || !ONIGENC_IS_MBC_NEWLINE_EX(encode, sprev, str, end, option, 1)) {
02188 #endif
02189 if (IS_NOTEOL(msa->options)) goto fail;
02190 MOP_OUT;
02191 continue;
02192 #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
02193 }
02194 #endif
02195 }
02196 else if (ONIGENC_IS_MBC_NEWLINE_EX(encode, s, str, end, option, 1)) {
02197 MOP_OUT;
02198 continue;
02199 }
02200 goto fail;
02201 break;
02202
02203 case OP_SEMI_END_BUF: MOP_IN(OP_SEMI_END_BUF);
02204 if (ON_STR_END(s)) {
02205 #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
02206 if (IS_EMPTY_STR || !ONIGENC_IS_MBC_NEWLINE_EX(encode, sprev, str, end, option, 1)) {
02207 #endif
02208 if (IS_NOTEOL(msa->options)) goto fail;
02209 MOP_OUT;
02210 continue;
02211 #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
02212 }
02213 #endif
02214 }
02215 else if (ONIGENC_IS_MBC_NEWLINE_EX(encode, s, str, end, option, 1)) {
02216 UChar* ss = s + enclen(encode, s, end);
02217 if (ON_STR_END(ss)) {
02218 MOP_OUT;
02219 continue;
02220 }
02221 #ifdef USE_CRNL_AS_LINE_TERMINATOR
02222 else if (IS_NEWLINE_CRLF(option)
02223 && ONIGENC_IS_MBC_CRNL(encode, s, end)) {
02224 ss += enclen(encode, ss, end);
02225 if (ON_STR_END(ss)) {
02226 MOP_OUT;
02227 continue;
02228 }
02229 }
02230 #endif
02231 }
02232 goto fail;
02233 break;
02234
02235 case OP_BEGIN_POSITION: MOP_IN(OP_BEGIN_POSITION);
02236 if (s != msa->gpos)
02237 goto fail;
02238
02239 MOP_OUT;
02240 continue;
02241 break;
02242
02243 case OP_BEGIN_POS_OR_LINE: MOP_IN(OP_BEGIN_POS_OR_LINE);
02244 if (s != msa->gpos)
02245 goto op_begin_line;
02246
02247 MOP_OUT;
02248 continue;
02249 break;
02250
02251 case OP_MEMORY_START_PUSH: MOP_IN(OP_MEMORY_START_PUSH);
02252 GET_MEMNUM_INC(mem, p);
02253 STACK_PUSH_MEM_START(mem, s);
02254 MOP_OUT;
02255 continue;
02256 break;
02257
02258 case OP_MEMORY_START: MOP_IN(OP_MEMORY_START);
02259 GET_MEMNUM_INC(mem, p);
02260 mem_start_stk[mem] = (OnigStackIndex )((void* )s);
02261 MOP_OUT;
02262 continue;
02263 break;
02264
02265 case OP_MEMORY_END_PUSH: MOP_IN(OP_MEMORY_END_PUSH);
02266 GET_MEMNUM_INC(mem, p);
02267 STACK_PUSH_MEM_END(mem, s);
02268 MOP_OUT;
02269 continue;
02270 break;
02271
02272 case OP_MEMORY_END: MOP_IN(OP_MEMORY_END);
02273 GET_MEMNUM_INC(mem, p);
02274 mem_end_stk[mem] = (OnigStackIndex )((void* )s);
02275 MOP_OUT;
02276 continue;
02277 break;
02278
02279 case OP_KEEP: MOP_IN(OP_KEEP);
02280 pkeep = s;
02281 MOP_OUT;
02282 continue;
02283 break;
02284
02285 #ifdef USE_SUBEXP_CALL
02286 case OP_MEMORY_END_PUSH_REC: MOP_IN(OP_MEMORY_END_PUSH_REC);
02287 GET_MEMNUM_INC(mem, p);
02288 STACK_GET_MEM_START(mem, stkp);
02289 STACK_PUSH_MEM_END(mem, s);
02290 mem_start_stk[mem] = GET_STACK_INDEX(stkp);
02291 MOP_OUT;
02292 continue;
02293 break;
02294
02295 case OP_MEMORY_END_REC: MOP_IN(OP_MEMORY_END_REC);
02296 GET_MEMNUM_INC(mem, p);
02297 mem_end_stk[mem] = (OnigStackIndex )((void* )s);
02298 STACK_GET_MEM_START(mem, stkp);
02299
02300 if (BIT_STATUS_AT(reg->bt_mem_start, mem))
02301 mem_start_stk[mem] = GET_STACK_INDEX(stkp);
02302 else
02303 mem_start_stk[mem] = (OnigStackIndex )((void* )stkp->u.mem.pstr);
02304
02305 STACK_PUSH_MEM_END_MARK(mem);
02306 MOP_OUT;
02307 continue;
02308 break;
02309 #endif
02310
02311 case OP_BACKREF1: MOP_IN(OP_BACKREF1);
02312 mem = 1;
02313 goto backref;
02314 break;
02315
02316 case OP_BACKREF2: MOP_IN(OP_BACKREF2);
02317 mem = 2;
02318 goto backref;
02319 break;
02320
02321 case OP_BACKREFN: MOP_IN(OP_BACKREFN);
02322 GET_MEMNUM_INC(mem, p);
02323 backref:
02324 {
02325 int len;
02326 UChar *pstart, *pend;
02327
02328
02329
02330 if (mem > num_mem) goto fail;
02331 if (mem_end_stk[mem] == INVALID_STACK_INDEX) goto fail;
02332 if (mem_start_stk[mem] == INVALID_STACK_INDEX) goto fail;
02333
02334 if (BIT_STATUS_AT(reg->bt_mem_start, mem))
02335 pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr;
02336 else
02337 pstart = (UChar* )((void* )mem_start_stk[mem]);
02338
02339 pend = (BIT_STATUS_AT(reg->bt_mem_end, mem)
02340 ? STACK_AT(mem_end_stk[mem])->u.mem.pstr
02341 : (UChar* )((void* )mem_end_stk[mem]));
02342 n = pend - pstart;
02343 DATA_ENSURE(n);
02344 sprev = s;
02345 STRING_CMP(pstart, s, n);
02346 while (sprev + (len = enclen(encode, sprev, end)) < s)
02347 sprev += len;
02348
02349 MOP_OUT;
02350 continue;
02351 }
02352 break;
02353
02354 case OP_BACKREFN_IC: MOP_IN(OP_BACKREFN_IC);
02355 GET_MEMNUM_INC(mem, p);
02356 {
02357 int len;
02358 UChar *pstart, *pend;
02359
02360
02361
02362 if (mem > num_mem) goto fail;
02363 if (mem_end_stk[mem] == INVALID_STACK_INDEX) goto fail;
02364 if (mem_start_stk[mem] == INVALID_STACK_INDEX) goto fail;
02365
02366 if (BIT_STATUS_AT(reg->bt_mem_start, mem))
02367 pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr;
02368 else
02369 pstart = (UChar* )((void* )mem_start_stk[mem]);
02370
02371 pend = (BIT_STATUS_AT(reg->bt_mem_end, mem)
02372 ? STACK_AT(mem_end_stk[mem])->u.mem.pstr
02373 : (UChar* )((void* )mem_end_stk[mem]));
02374 n = pend - pstart;
02375 DATA_ENSURE(n);
02376 sprev = s;
02377 STRING_CMP_IC(case_fold_flag, pstart, &s, (int)n, end);
02378 while (sprev + (len = enclen(encode, sprev, end)) < s)
02379 sprev += len;
02380
02381 MOP_OUT;
02382 continue;
02383 }
02384 break;
02385
02386 case OP_BACKREF_MULTI: MOP_IN(OP_BACKREF_MULTI);
02387 {
02388 int len, is_fail;
02389 UChar *pstart, *pend, *swork;
02390
02391 GET_LENGTH_INC(tlen, p);
02392 for (i = 0; i < tlen; i++) {
02393 GET_MEMNUM_INC(mem, p);
02394
02395 if (mem_end_stk[mem] == INVALID_STACK_INDEX) continue;
02396 if (mem_start_stk[mem] == INVALID_STACK_INDEX) continue;
02397
02398 if (BIT_STATUS_AT(reg->bt_mem_start, mem))
02399 pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr;
02400 else
02401 pstart = (UChar* )((void* )mem_start_stk[mem]);
02402
02403 pend = (BIT_STATUS_AT(reg->bt_mem_end, mem)
02404 ? STACK_AT(mem_end_stk[mem])->u.mem.pstr
02405 : (UChar* )((void* )mem_end_stk[mem]));
02406 n = pend - pstart;
02407 DATA_ENSURE(n);
02408 sprev = s;
02409 swork = s;
02410 STRING_CMP_VALUE(pstart, swork, n, is_fail);
02411 if (is_fail) continue;
02412 s = swork;
02413 while (sprev + (len = enclen(encode, sprev, end)) < s)
02414 sprev += len;
02415
02416 p += (SIZE_MEMNUM * (tlen - i - 1));
02417 break;
02418 }
02419 if (i == tlen) goto fail;
02420 MOP_OUT;
02421 continue;
02422 }
02423 break;
02424
02425 case OP_BACKREF_MULTI_IC: MOP_IN(OP_BACKREF_MULTI_IC);
02426 {
02427 int len, is_fail;
02428 UChar *pstart, *pend, *swork;
02429
02430 GET_LENGTH_INC(tlen, p);
02431 for (i = 0; i < tlen; i++) {
02432 GET_MEMNUM_INC(mem, p);
02433
02434 if (mem_end_stk[mem] == INVALID_STACK_INDEX) continue;
02435 if (mem_start_stk[mem] == INVALID_STACK_INDEX) continue;
02436
02437 if (BIT_STATUS_AT(reg->bt_mem_start, mem))
02438 pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr;
02439 else
02440 pstart = (UChar* )((void* )mem_start_stk[mem]);
02441
02442 pend = (BIT_STATUS_AT(reg->bt_mem_end, mem)
02443 ? STACK_AT(mem_end_stk[mem])->u.mem.pstr
02444 : (UChar* )((void* )mem_end_stk[mem]));
02445 n = pend - pstart;
02446 DATA_ENSURE(n);
02447 sprev = s;
02448 swork = s;
02449 STRING_CMP_VALUE_IC(case_fold_flag, pstart, &swork, n, end, is_fail);
02450 if (is_fail) continue;
02451 s = swork;
02452 while (sprev + (len = enclen(encode, sprev, end)) < s)
02453 sprev += len;
02454
02455 p += (SIZE_MEMNUM * (tlen - i - 1));
02456 break;
02457 }
02458 if (i == tlen) goto fail;
02459 MOP_OUT;
02460 continue;
02461 }
02462 break;
02463
02464 #ifdef USE_BACKREF_WITH_LEVEL
02465 case OP_BACKREF_WITH_LEVEL:
02466 {
02467 int len;
02468 OnigOptionType ic;
02469 LengthType level;
02470
02471 GET_OPTION_INC(ic, p);
02472 GET_LENGTH_INC(level, p);
02473 GET_LENGTH_INC(tlen, p);
02474
02475 sprev = s;
02476 if (backref_match_at_nested_level(reg, stk, stk_base, ic
02477 , case_fold_flag, (int )level, (int )tlen, p, &s, end)) {
02478 while (sprev + (len = enclen(encode, sprev, end)) < s)
02479 sprev += len;
02480
02481 p += (SIZE_MEMNUM * tlen);
02482 }
02483 else
02484 goto fail;
02485
02486 MOP_OUT;
02487 continue;
02488 }
02489
02490 break;
02491 #endif
02492
02493 #if 0
02494 case OP_SET_OPTION_PUSH: MOP_IN(OP_SET_OPTION_PUSH);
02495 GET_OPTION_INC(option, p);
02496 STACK_PUSH_ALT(p, s, sprev, pkeep);
02497 p += SIZE_OP_SET_OPTION + SIZE_OP_FAIL;
02498 MOP_OUT;
02499 continue;
02500 break;
02501
02502 case OP_SET_OPTION: MOP_IN(OP_SET_OPTION);
02503 GET_OPTION_INC(option, p);
02504 MOP_OUT;
02505 continue;
02506 break;
02507 #endif
02508
02509 case OP_NULL_CHECK_START: MOP_IN(OP_NULL_CHECK_START);
02510 GET_MEMNUM_INC(mem, p);
02511 STACK_PUSH_NULL_CHECK_START(mem, s);
02512 MOP_OUT;
02513 continue;
02514 break;
02515
02516 case OP_NULL_CHECK_END: MOP_IN(OP_NULL_CHECK_END);
02517 {
02518 int isnull;
02519
02520 GET_MEMNUM_INC(mem, p);
02521 STACK_NULL_CHECK(isnull, mem, s);
02522 if (isnull) {
02523 #ifdef ONIG_DEBUG_MATCH
02524 fprintf(stderr, "NULL_CHECK_END: skip id:%d, s:%"PRIdPTR" (%p)\n",
02525 (int )mem, (intptr_t )s, s);
02526 #endif
02527 null_check_found:
02528
02529 switch (*p++) {
02530 case OP_JUMP:
02531 case OP_PUSH:
02532 p += SIZE_RELADDR;
02533 break;
02534 case OP_REPEAT_INC:
02535 case OP_REPEAT_INC_NG:
02536 case OP_REPEAT_INC_SG:
02537 case OP_REPEAT_INC_NG_SG:
02538 p += SIZE_MEMNUM;
02539 break;
02540 default:
02541 goto unexpected_bytecode_error;
02542 break;
02543 }
02544 }
02545 }
02546 MOP_OUT;
02547 continue;
02548 break;
02549
02550 #ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT
02551 case OP_NULL_CHECK_END_MEMST: MOP_IN(OP_NULL_CHECK_END_MEMST);
02552 {
02553 int isnull;
02554
02555 GET_MEMNUM_INC(mem, p);
02556 STACK_NULL_CHECK_MEMST(isnull, mem, s, reg);
02557 if (isnull) {
02558 #ifdef ONIG_DEBUG_MATCH
02559 fprintf(stderr, "NULL_CHECK_END_MEMST: skip id:%d, s:%"PRIdPTR" (%p)\n",
02560 (int )mem, (intptr_t )s, s);
02561 #endif
02562 if (isnull == -1) goto fail;
02563 goto null_check_found;
02564 }
02565 }
02566 MOP_OUT;
02567 continue;
02568 break;
02569 #endif
02570
02571 #ifdef USE_SUBEXP_CALL
02572 case OP_NULL_CHECK_END_MEMST_PUSH:
02573 MOP_IN(OP_NULL_CHECK_END_MEMST_PUSH);
02574 {
02575 int isnull;
02576
02577 GET_MEMNUM_INC(mem, p);
02578 #ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT
02579 STACK_NULL_CHECK_MEMST_REC(isnull, mem, s, reg);
02580 #else
02581 STACK_NULL_CHECK_REC(isnull, mem, s);
02582 #endif
02583 if (isnull) {
02584 #ifdef ONIG_DEBUG_MATCH
02585 fprintf(stderr, "NULL_CHECK_END_MEMST_PUSH: skip id:%d, s:%"PRIdPTR" (%p)\n",
02586 (int )mem, (intptr_t )s, s);
02587 #endif
02588 if (isnull == -1) goto fail;
02589 goto null_check_found;
02590 }
02591 else {
02592 STACK_PUSH_NULL_CHECK_END(mem);
02593 }
02594 }
02595 MOP_OUT;
02596 continue;
02597 break;
02598 #endif
02599
02600 case OP_JUMP: MOP_IN(OP_JUMP);
02601 GET_RELADDR_INC(addr, p);
02602 p += addr;
02603 MOP_OUT;
02604 CHECK_INTERRUPT_IN_MATCH_AT;
02605 continue;
02606 break;
02607
02608 case OP_PUSH: MOP_IN(OP_PUSH);
02609 GET_RELADDR_INC(addr, p);
02610 STACK_PUSH_ALT(p + addr, s, sprev, pkeep);
02611 MOP_OUT;
02612 continue;
02613 break;
02614
02615 #ifdef USE_COMBINATION_EXPLOSION_CHECK
02616 case OP_STATE_CHECK_PUSH: MOP_IN(OP_STATE_CHECK_PUSH);
02617 GET_STATE_CHECK_NUM_INC(mem, p);
02618 STATE_CHECK_VAL(scv, mem);
02619 if (scv) goto fail;
02620
02621 GET_RELADDR_INC(addr, p);
02622 STACK_PUSH_ALT_WITH_STATE_CHECK(p + addr, s, sprev, mem, pkeep);
02623 MOP_OUT;
02624 continue;
02625 break;
02626
02627 case OP_STATE_CHECK_PUSH_OR_JUMP: MOP_IN(OP_STATE_CHECK_PUSH_OR_JUMP);
02628 GET_STATE_CHECK_NUM_INC(mem, p);
02629 GET_RELADDR_INC(addr, p);
02630 STATE_CHECK_VAL(scv, mem);
02631 if (scv) {
02632 p += addr;
02633 }
02634 else {
02635 STACK_PUSH_ALT_WITH_STATE_CHECK(p + addr, s, sprev, mem, pkeep);
02636 }
02637 MOP_OUT;
02638 continue;
02639 break;
02640
02641 case OP_STATE_CHECK: MOP_IN(OP_STATE_CHECK);
02642 GET_STATE_CHECK_NUM_INC(mem, p);
02643 STATE_CHECK_VAL(scv, mem);
02644 if (scv) goto fail;
02645
02646 STACK_PUSH_STATE_CHECK(s, mem);
02647 MOP_OUT;
02648 continue;
02649 break;
02650 #endif
02651
02652 case OP_POP: MOP_IN(OP_POP);
02653 STACK_POP_ONE;
02654 MOP_OUT;
02655 continue;
02656 break;
02657
02658 case OP_PUSH_OR_JUMP_EXACT1: MOP_IN(OP_PUSH_OR_JUMP_EXACT1);
02659 GET_RELADDR_INC(addr, p);
02660 if (*p == *s && DATA_ENSURE_CHECK1) {
02661 p++;
02662 STACK_PUSH_ALT(p + addr, s, sprev, pkeep);
02663 MOP_OUT;
02664 continue;
02665 }
02666 p += (addr + 1);
02667 MOP_OUT;
02668 continue;
02669 break;
02670
02671 case OP_PUSH_IF_PEEK_NEXT: MOP_IN(OP_PUSH_IF_PEEK_NEXT);
02672 GET_RELADDR_INC(addr, p);
02673 if (*p == *s) {
02674 p++;
02675 STACK_PUSH_ALT(p + addr, s, sprev, pkeep);
02676 MOP_OUT;
02677 continue;
02678 }
02679 p++;
02680 MOP_OUT;
02681 continue;
02682 break;
02683
02684 case OP_REPEAT: MOP_IN(OP_REPEAT);
02685 {
02686 GET_MEMNUM_INC(mem, p);
02687 GET_RELADDR_INC(addr, p);
02688
02689 STACK_ENSURE(1);
02690 repeat_stk[mem] = GET_STACK_INDEX(stk);
02691 STACK_PUSH_REPEAT(mem, p);
02692
02693 if (reg->repeat_range[mem].lower == 0) {
02694 STACK_PUSH_ALT(p + addr, s, sprev, pkeep);
02695 }
02696 }
02697 MOP_OUT;
02698 continue;
02699 break;
02700
02701 case OP_REPEAT_NG: MOP_IN(OP_REPEAT_NG);
02702 {
02703 GET_MEMNUM_INC(mem, p);
02704 GET_RELADDR_INC(addr, p);
02705
02706 STACK_ENSURE(1);
02707 repeat_stk[mem] = GET_STACK_INDEX(stk);
02708 STACK_PUSH_REPEAT(mem, p);
02709
02710 if (reg->repeat_range[mem].lower == 0) {
02711 STACK_PUSH_ALT(p, s, sprev, pkeep);
02712 p += addr;
02713 }
02714 }
02715 MOP_OUT;
02716 continue;
02717 break;
02718
02719 case OP_REPEAT_INC: MOP_IN(OP_REPEAT_INC);
02720 GET_MEMNUM_INC(mem, p);
02721 si = repeat_stk[mem];
02722 stkp = STACK_AT(si);
02723
02724 repeat_inc:
02725 stkp->u.repeat.count++;
02726 if (stkp->u.repeat.count >= reg->repeat_range[mem].upper) {
02727
02728 }
02729 else if (stkp->u.repeat.count >= reg->repeat_range[mem].lower) {
02730 STACK_PUSH_ALT(p, s, sprev, pkeep);
02731 p = STACK_AT(si)->u.repeat.pcode;
02732 }
02733 else {
02734 p = stkp->u.repeat.pcode;
02735 }
02736 STACK_PUSH_REPEAT_INC(si);
02737 MOP_OUT;
02738 CHECK_INTERRUPT_IN_MATCH_AT;
02739 continue;
02740 break;
02741
02742 case OP_REPEAT_INC_SG: MOP_IN(OP_REPEAT_INC_SG);
02743 GET_MEMNUM_INC(mem, p);
02744 STACK_GET_REPEAT(mem, stkp);
02745 si = GET_STACK_INDEX(stkp);
02746 goto repeat_inc;
02747 break;
02748
02749 case OP_REPEAT_INC_NG: MOP_IN(OP_REPEAT_INC_NG);
02750 GET_MEMNUM_INC(mem, p);
02751 si = repeat_stk[mem];
02752 stkp = STACK_AT(si);
02753
02754 repeat_inc_ng:
02755 stkp->u.repeat.count++;
02756 if (stkp->u.repeat.count < reg->repeat_range[mem].upper) {
02757 if (stkp->u.repeat.count >= reg->repeat_range[mem].lower) {
02758 UChar* pcode = stkp->u.repeat.pcode;
02759
02760 STACK_PUSH_REPEAT_INC(si);
02761 STACK_PUSH_ALT(pcode, s, sprev, pkeep);
02762 }
02763 else {
02764 p = stkp->u.repeat.pcode;
02765 STACK_PUSH_REPEAT_INC(si);
02766 }
02767 }
02768 else if (stkp->u.repeat.count == reg->repeat_range[mem].upper) {
02769 STACK_PUSH_REPEAT_INC(si);
02770 }
02771 MOP_OUT;
02772 CHECK_INTERRUPT_IN_MATCH_AT;
02773 continue;
02774 break;
02775
02776 case OP_REPEAT_INC_NG_SG: MOP_IN(OP_REPEAT_INC_NG_SG);
02777 GET_MEMNUM_INC(mem, p);
02778 STACK_GET_REPEAT(mem, stkp);
02779 si = GET_STACK_INDEX(stkp);
02780 goto repeat_inc_ng;
02781 break;
02782
02783 case OP_PUSH_POS: MOP_IN(OP_PUSH_POS);
02784 STACK_PUSH_POS(s, sprev, pkeep);
02785 MOP_OUT;
02786 continue;
02787 break;
02788
02789 case OP_POP_POS: MOP_IN(OP_POP_POS);
02790 {
02791 STACK_POS_END(stkp);
02792 s = stkp->u.state.pstr;
02793 sprev = stkp->u.state.pstr_prev;
02794 }
02795 MOP_OUT;
02796 continue;
02797 break;
02798
02799 case OP_PUSH_POS_NOT: MOP_IN(OP_PUSH_POS_NOT);
02800 GET_RELADDR_INC(addr, p);
02801 STACK_PUSH_POS_NOT(p + addr, s, sprev, pkeep);
02802 MOP_OUT;
02803 continue;
02804 break;
02805
02806 case OP_FAIL_POS: MOP_IN(OP_FAIL_POS);
02807 STACK_POP_TIL_POS_NOT;
02808 goto fail;
02809 break;
02810
02811 case OP_PUSH_STOP_BT: MOP_IN(OP_PUSH_STOP_BT);
02812 STACK_PUSH_STOP_BT;
02813 MOP_OUT;
02814 continue;
02815 break;
02816
02817 case OP_POP_STOP_BT: MOP_IN(OP_POP_STOP_BT);
02818 STACK_STOP_BT_END;
02819 MOP_OUT;
02820 continue;
02821 break;
02822
02823 case OP_LOOK_BEHIND: MOP_IN(OP_LOOK_BEHIND);
02824 GET_LENGTH_INC(tlen, p);
02825 s = (UChar* )ONIGENC_STEP_BACK(encode, str, s, end, (int )tlen);
02826 if (IS_NULL(s)) goto fail;
02827 sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s, end);
02828 MOP_OUT;
02829 continue;
02830 break;
02831
02832 case OP_PUSH_LOOK_BEHIND_NOT: MOP_IN(OP_PUSH_LOOK_BEHIND_NOT);
02833 GET_RELADDR_INC(addr, p);
02834 GET_LENGTH_INC(tlen, p);
02835 q = (UChar* )ONIGENC_STEP_BACK(encode, str, s, end, (int )tlen);
02836 if (IS_NULL(q)) {
02837
02838
02839 p += addr;
02840
02841 }
02842 else {
02843 STACK_PUSH_LOOK_BEHIND_NOT(p + addr, s, sprev, pkeep);
02844 s = q;
02845 sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s, end);
02846 }
02847 MOP_OUT;
02848 continue;
02849 break;
02850
02851 case OP_FAIL_LOOK_BEHIND_NOT: MOP_IN(OP_FAIL_LOOK_BEHIND_NOT);
02852 STACK_POP_TIL_LOOK_BEHIND_NOT;
02853 goto fail;
02854 break;
02855
02856 #ifdef USE_SUBEXP_CALL
02857 case OP_CALL: MOP_IN(OP_CALL);
02858 GET_ABSADDR_INC(addr, p);
02859 STACK_PUSH_CALL_FRAME(p);
02860 p = reg->p + addr;
02861 MOP_OUT;
02862 continue;
02863 break;
02864
02865 case OP_RETURN: MOP_IN(OP_RETURN);
02866 STACK_RETURN(p);
02867 STACK_PUSH_RETURN;
02868 MOP_OUT;
02869 continue;
02870 break;
02871 #endif
02872
02873 case OP_CONDITION: MOP_IN(OP_CONDITION);
02874 GET_MEMNUM_INC(mem, p);
02875 GET_RELADDR_INC(addr, p);
02876 if ((mem > num_mem) ||
02877 (mem_end_stk[mem] == INVALID_STACK_INDEX) ||
02878 (mem_start_stk[mem] == INVALID_STACK_INDEX)) {
02879 p += addr;
02880 }
02881 MOP_OUT;
02882 continue;
02883 break;
02884
02885 case OP_FINISH:
02886 goto finish;
02887 break;
02888
02889 fail:
02890 MOP_OUT;
02891
02892 case OP_FAIL: MOP_IN(OP_FAIL);
02893 STACK_POP;
02894 p = stk->u.state.pcode;
02895 s = stk->u.state.pstr;
02896 sprev = stk->u.state.pstr_prev;
02897 pkeep = stk->u.state.pkeep;
02898
02899 #ifdef USE_COMBINATION_EXPLOSION_CHECK
02900 if (stk->u.state.state_check != 0) {
02901 stk->type = STK_STATE_CHECK_MARK;
02902 stk++;
02903 }
02904 #endif
02905
02906 MOP_OUT;
02907 continue;
02908 break;
02909
02910 default:
02911 goto bytecode_error;
02912
02913 }
02914 sprev = sbegin;
02915 }
02916
02917 finish:
02918 STACK_SAVE;
02919 return best_len;
02920
02921 #ifdef ONIG_DEBUG
02922 stack_error:
02923 STACK_SAVE;
02924 return ONIGERR_STACK_BUG;
02925 #endif
02926
02927 bytecode_error:
02928 STACK_SAVE;
02929 return ONIGERR_UNDEFINED_BYTECODE;
02930
02931 unexpected_bytecode_error:
02932 STACK_SAVE;
02933 return ONIGERR_UNEXPECTED_BYTECODE;
02934 }
02935
02936
02937 static UChar*
02938 slow_search(OnigEncoding enc, UChar* target, UChar* target_end,
02939 const UChar* text, const UChar* text_end, UChar* text_range)
02940 {
02941 UChar *t, *p, *s, *end;
02942
02943 end = (UChar* )text_end;
02944 end -= target_end - target - 1;
02945 if (end > text_range)
02946 end = text_range;
02947
02948 s = (UChar* )text;
02949
02950 if (enc->max_enc_len == enc->min_enc_len) {
02951 int n = enc->max_enc_len;
02952
02953 while (s < end) {
02954 if (*s == *target) {
02955 p = s + 1;
02956 t = target + 1;
02957 if (target_end == t || memcmp(t, p, target_end - t) == 0)
02958 return s;
02959 }
02960 s += n;
02961 }
02962 return (UChar* )NULL;
02963 }
02964 while (s < end) {
02965 if (*s == *target) {
02966 p = s + 1;
02967 t = target + 1;
02968 if (target_end == t || memcmp(t, p, target_end - t) == 0)
02969 return s;
02970 }
02971 s += enclen(enc, s, text_end);
02972 }
02973
02974 return (UChar* )NULL;
02975 }
02976
02977 static int
02978 str_lower_case_match(OnigEncoding enc, int case_fold_flag,
02979 const UChar* t, const UChar* tend,
02980 const UChar* p, const UChar* end)
02981 {
02982 int lowlen;
02983 UChar *q, lowbuf[ONIGENC_MBC_CASE_FOLD_MAXLEN];
02984
02985 while (t < tend) {
02986 lowlen = ONIGENC_MBC_CASE_FOLD(enc, case_fold_flag, &p, end, lowbuf);
02987 q = lowbuf;
02988 while (lowlen > 0) {
02989 if (*t++ != *q++) return 0;
02990 lowlen--;
02991 }
02992 }
02993
02994 return 1;
02995 }
02996
02997 static UChar*
02998 slow_search_ic(OnigEncoding enc, int case_fold_flag,
02999 UChar* target, UChar* target_end,
03000 const UChar* text, const UChar* text_end, UChar* text_range)
03001 {
03002 UChar *s, *end;
03003
03004 end = (UChar* )text_end;
03005 end -= target_end - target - 1;
03006 if (end > text_range)
03007 end = text_range;
03008
03009 s = (UChar* )text;
03010
03011 while (s < end) {
03012 if (str_lower_case_match(enc, case_fold_flag, target, target_end,
03013 s, text_end))
03014 return s;
03015
03016 s += enclen(enc, s, text_end);
03017 }
03018
03019 return (UChar* )NULL;
03020 }
03021
03022 static UChar*
03023 slow_search_backward(OnigEncoding enc, UChar* target, UChar* target_end,
03024 const UChar* text, const UChar* adjust_text,
03025 const UChar* text_end, const UChar* text_start)
03026 {
03027 UChar *t, *p, *s;
03028
03029 s = (UChar* )text_end;
03030 s -= (target_end - target);
03031 if (s > text_start)
03032 s = (UChar* )text_start;
03033 else
03034 s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, adjust_text, s, text_end);
03035
03036 while (s >= text) {
03037 if (*s == *target) {
03038 p = s + 1;
03039 t = target + 1;
03040 while (t < target_end) {
03041 if (*t != *p++)
03042 break;
03043 t++;
03044 }
03045 if (t == target_end)
03046 return s;
03047 }
03048 s = (UChar* )onigenc_get_prev_char_head(enc, adjust_text, s, text_end);
03049 }
03050
03051 return (UChar* )NULL;
03052 }
03053
03054 static UChar*
03055 slow_search_backward_ic(OnigEncoding enc, int case_fold_flag,
03056 UChar* target, UChar* target_end,
03057 const UChar* text, const UChar* adjust_text,
03058 const UChar* text_end, const UChar* text_start)
03059 {
03060 UChar *s;
03061
03062 s = (UChar* )text_end;
03063 s -= (target_end - target);
03064 if (s > text_start)
03065 s = (UChar* )text_start;
03066 else
03067 s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, adjust_text, s, text_end);
03068
03069 while (s >= text) {
03070 if (str_lower_case_match(enc, case_fold_flag,
03071 target, target_end, s, text_end))
03072 return s;
03073
03074 s = (UChar* )onigenc_get_prev_char_head(enc, adjust_text, s, text_end);
03075 }
03076
03077 return (UChar* )NULL;
03078 }
03079
03080 #ifndef USE_SUNDAY_QUICK_SEARCH
03081
03082 static UChar*
03083 bm_search_notrev(regex_t* reg, const UChar* target, const UChar* target_end,
03084 const UChar* text, const UChar* text_end,
03085 const UChar* text_range)
03086 {
03087 const UChar *s, *se, *t, *p, *end;
03088 const UChar *tail;
03089 ptrdiff_t skip, tlen1;
03090
03091 #ifdef ONIG_DEBUG_SEARCH
03092 fprintf(stderr, "bm_search_notrev: text: %"PRIuPTR" (%p), text_end: %"PRIuPTR" (%p), text_range: %"PRIuPTR" (%p)\n",
03093 text, text, text_end, text_end, text_range, text_range);
03094 #endif
03095
03096 tail = target_end - 1;
03097 tlen1 = tail - target;
03098 end = text_range;
03099 if (end + tlen1 > text_end)
03100 end = text_end - tlen1;
03101
03102 s = text;
03103
03104 if (IS_NULL(reg->int_map)) {
03105 while (s < end) {
03106 p = se = s + tlen1;
03107 t = tail;
03108 while (*p == *t) {
03109 if (t == target) return (UChar* )s;
03110 p--; t--;
03111 }
03112 skip = reg->map[*se];
03113 t = s;
03114 do {
03115 s += enclen(reg->enc, s, end);
03116 } while ((s - t) < skip && s < end);
03117 }
03118 }
03119 else {
03120 while (s < end) {
03121 p = se = s + tlen1;
03122 t = tail;
03123 while (*p == *t) {
03124 if (t == target) return (UChar* )s;
03125 p--; t--;
03126 }
03127 skip = reg->int_map[*se];
03128 t = s;
03129 do {
03130 s += enclen(reg->enc, s, end);
03131 } while ((s - t) < skip && s < end);
03132 }
03133 }
03134
03135 return (UChar* )NULL;
03136 }
03137
03138
03139 static UChar*
03140 bm_search(regex_t* reg, const UChar* target, const UChar* target_end,
03141 const UChar* text, const UChar* text_end, const UChar* text_range)
03142 {
03143 const UChar *s, *t, *p, *end;
03144 const UChar *tail;
03145
03146 #ifdef ONIG_DEBUG_SEARCH
03147 fprintf(stderr, "bm_search: text: %"PRIuPTR", text_end: %"PRIuPTR", text_range: %"PRIuPTR"\n",
03148 text, text_end, text_range);
03149 #endif
03150
03151 end = text_range + (target_end - target) - 1;
03152 if (end > text_end)
03153 end = text_end;
03154
03155 tail = target_end - 1;
03156 s = text + (target_end - target) - 1;
03157 if (IS_NULL(reg->int_map)) {
03158 while (s < end) {
03159 p = s;
03160 t = tail;
03161 #ifdef ONIG_DEBUG_SEARCH
03162 fprintf(stderr, "bm_search_loop: pos: %d %s\n",
03163 (int)(s - text), s);
03164 #endif
03165 while (*p == *t) {
03166 if (t == target) return (UChar* )p;
03167 p--; t--;
03168 }
03169 s += reg->map[*s];
03170 }
03171 }
03172 else {
03173 while (s < end) {
03174 p = s;
03175 t = tail;
03176 while (*p == *t) {
03177 if (t == target) return (UChar* )p;
03178 p--; t--;
03179 }
03180 s += reg->int_map[*s];
03181 }
03182 }
03183 return (UChar* )NULL;
03184 }
03185
03186
03187 static UChar*
03188 bm_search_notrev_ic(regex_t* reg, const UChar* target, const UChar* target_end,
03189 const UChar* text, const UChar* text_end,
03190 const UChar* text_range)
03191 {
03192 const UChar *s, *se, *t, *end;
03193 const UChar *tail;
03194 ptrdiff_t skip, tlen1;
03195 OnigEncoding enc = reg->enc;
03196 int case_fold_flag = reg->case_fold_flag;
03197
03198 #ifdef ONIG_DEBUG_SEARCH
03199 fprintf(stderr, "bm_search_notrev_ic: text: %d (%p), text_end: %d (%p), text_range: %d (%p)\n",
03200 (int )text, text, (int )text_end, text_end, (int )text_range, text_range);
03201 #endif
03202
03203 tail = target_end - 1;
03204 tlen1 = tail - target;
03205 end = text_range;
03206 if (end + tlen1 > text_end)
03207 end = text_end - tlen1;
03208
03209 s = text;
03210
03211 if (IS_NULL(reg->int_map)) {
03212 while (s < end) {
03213 se = s + tlen1;
03214 if (str_lower_case_match(enc, case_fold_flag, target, target_end,
03215 s, se + 1))
03216 return (UChar* )s;
03217 skip = reg->map[*se];
03218 t = s;
03219 do {
03220 s += enclen(reg->enc, s, end);
03221 } while ((s - t) < skip && s < end);
03222 }
03223 }
03224 else {
03225 while (s < end) {
03226 se = s + tlen1;
03227 if (str_lower_case_match(enc, case_fold_flag, target, target_end,
03228 s, se + 1))
03229 return (UChar* )s;
03230 skip = reg->int_map[*se];
03231 t = s;
03232 do {
03233 s += enclen(reg->enc, s, end);
03234 } while ((s - t) < skip && s < end);
03235 }
03236 }
03237
03238 return (UChar* )NULL;
03239 }
03240
03241
03242 static UChar*
03243 bm_search_ic(regex_t* reg, const UChar* target, const UChar* target_end,
03244 const UChar* text, const UChar* text_end, const UChar* text_range)
03245 {
03246 const UChar *s, *p, *end;
03247 const UChar *tail;
03248 OnigEncoding enc = reg->enc;
03249 int case_fold_flag = reg->case_fold_flag;
03250
03251 #ifdef ONIG_DEBUG_SEARCH
03252 fprintf(stderr, "bm_search_ic: text: %d (%p), text_end: %d (%p), text_range: %d (%p)\n",
03253 (int )text, text, (int )text_end, text_end, (int )text_range, text_range);
03254 #endif
03255
03256 end = text_range + (target_end - target) - 1;
03257 if (end > text_end)
03258 end = text_end;
03259
03260 tail = target_end - 1;
03261 s = text + (target_end - target) - 1;
03262 if (IS_NULL(reg->int_map)) {
03263 while (s < end) {
03264 p = s - (target_end - target) + 1;
03265 if (str_lower_case_match(enc, case_fold_flag, target, target_end,
03266 p, s + 1))
03267 return (UChar* )p;
03268 s += reg->map[*s];
03269 }
03270 }
03271 else {
03272 while (s < end) {
03273 p = s - (target_end - target) + 1;
03274 if (str_lower_case_match(enc, case_fold_flag, target, target_end,
03275 p, s + 1))
03276 return (UChar* )p;
03277 s += reg->int_map[*s];
03278 }
03279 }
03280 return (UChar* )NULL;
03281 }
03282
03283 #else
03284
03285
03286 static UChar*
03287 bm_search_notrev(regex_t* reg, const UChar* target, const UChar* target_end,
03288 const UChar* text, const UChar* text_end,
03289 const UChar* text_range)
03290 {
03291 const UChar *s, *se, *t, *p, *end;
03292 const UChar *tail;
03293 ptrdiff_t skip, tlen1;
03294 OnigEncoding enc = reg->enc;
03295
03296 #ifdef ONIG_DEBUG_SEARCH
03297 fprintf(stderr, "bm_search_notrev: text: %d (%p), text_end: %d (%p), text_range: %d (%p)\n",
03298 (int )text, text, (int )text_end, text_end, (int )text_range, text_range);
03299 #endif
03300
03301 tail = target_end - 1;
03302 tlen1 = tail - target;
03303 end = text_range;
03304 if (end + tlen1 > text_end)
03305 end = text_end - tlen1;
03306
03307 s = text;
03308
03309 if (IS_NULL(reg->int_map)) {
03310 while (s < end) {
03311 p = se = s + tlen1;
03312 t = tail;
03313 while (*p == *t) {
03314 if (t == target) return (UChar* )s;
03315 p--; t--;
03316 }
03317 if (s + 1 >= end) break;
03318 skip = reg->map[se[1]];
03319 t = s;
03320 do {
03321 s += enclen(enc, s, end);
03322 } while ((s - t) < skip && s < end);
03323 }
03324 }
03325 else {
03326 while (s < end) {
03327 p = se = s + tlen1;
03328 t = tail;
03329 while (*p == *t) {
03330 if (t == target) return (UChar* )s;
03331 p--; t--;
03332 }
03333 if (s + 1 >= end) break;
03334 skip = reg->int_map[se[1]];
03335 t = s;
03336 do {
03337 s += enclen(enc, s, end);
03338 } while ((s - t) < skip && s < end);
03339 }
03340 }
03341
03342 return (UChar* )NULL;
03343 }
03344
03345
03346 static UChar*
03347 bm_search(regex_t* reg, const UChar* target, const UChar* target_end,
03348 const UChar* text, const UChar* text_end, const UChar* text_range)
03349 {
03350 const UChar *s, *t, *p, *end;
03351 const UChar *tail;
03352 ptrdiff_t tlen1;
03353
03354 tail = target_end - 1;
03355 tlen1 = tail - target;
03356 end = text_range + tlen1;
03357 if (end > text_end)
03358 end = text_end;
03359
03360 s = text + tlen1;
03361 if (IS_NULL(reg->int_map)) {
03362 while (s < end) {
03363 p = s;
03364 t = tail;
03365 while (*p == *t) {
03366 if (t == target) return (UChar* )p;
03367 p--; t--;
03368 }
03369 if (s + 1 >= end) break;
03370 s += reg->map[s[1]];
03371 }
03372 }
03373 else {
03374 while (s < end) {
03375 p = s;
03376 t = tail;
03377 while (*p == *t) {
03378 if (t == target) return (UChar* )p;
03379 p--; t--;
03380 }
03381 if (s + 1 >= end) break;
03382 s += reg->int_map[s[1]];
03383 }
03384 }
03385 return (UChar* )NULL;
03386 }
03387
03388
03389 static UChar*
03390 bm_search_notrev_ic(regex_t* reg, const UChar* target, const UChar* target_end,
03391 const UChar* text, const UChar* text_end,
03392 const UChar* text_range)
03393 {
03394 const UChar *s, *se, *t, *end;
03395 const UChar *tail;
03396 ptrdiff_t skip, tlen1;
03397 OnigEncoding enc = reg->enc;
03398 int case_fold_flag = reg->case_fold_flag;
03399
03400 #ifdef ONIG_DEBUG_SEARCH
03401 fprintf(stderr, "bm_search_notrev_ic: text: %d (%p), text_end: %d (%p), text_range: %d (%p)\n",
03402 (int )text, text, (int )text_end, text_end, (int )text_range, text_range);
03403 #endif
03404
03405 tail = target_end - 1;
03406 tlen1 = tail - target;
03407 end = text_range;
03408 if (end + tlen1 > text_end)
03409 end = text_end - tlen1;
03410
03411 s = text;
03412
03413 if (IS_NULL(reg->int_map)) {
03414 while (s < end) {
03415 se = s + tlen1;
03416 if (str_lower_case_match(enc, case_fold_flag, target, target_end,
03417 s, se + 1))
03418 return (UChar* )s;
03419 if (s + 1 >= end) break;
03420 skip = reg->map[se[1]];
03421 t = s;
03422 do {
03423 s += enclen(enc, s, end);
03424 } while ((s - t) < skip && s < end);
03425 }
03426 }
03427 else {
03428 while (s < end) {
03429 se = s + tlen1;
03430 if (str_lower_case_match(enc, case_fold_flag, target, target_end,
03431 s, se + 1))
03432 return (UChar* )s;
03433 if (s + 1 >= end) break;
03434 skip = reg->int_map[se[1]];
03435 t = s;
03436 do {
03437 s += enclen(enc, s, end);
03438 } while ((s - t) < skip && s < end);
03439 }
03440 }
03441
03442 return (UChar* )NULL;
03443 }
03444
03445
03446 static UChar*
03447 bm_search_ic(regex_t* reg, const UChar* target, const UChar* target_end,
03448 const UChar* text, const UChar* text_end, const UChar* text_range)
03449 {
03450 const UChar *s, *p, *end;
03451 const UChar *tail;
03452 ptrdiff_t tlen1;
03453 OnigEncoding enc = reg->enc;
03454 int case_fold_flag = reg->case_fold_flag;
03455
03456 #ifdef ONIG_DEBUG_SEARCH
03457 fprintf(stderr, "bm_search_ic: text: %d (%p), text_end: %d (%p), text_range: %d (%p)\n",
03458 (int )text, text, (int )text_end, text_end, (int )text_range, text_range);
03459 #endif
03460
03461 tail = target_end - 1;
03462 tlen1 = tail - target;
03463 end = text_range + tlen1;
03464 if (end > text_end)
03465 end = text_end;
03466
03467 s = text + tlen1;
03468 if (IS_NULL(reg->int_map)) {
03469 while (s < end) {
03470 p = s - tlen1;
03471 if (str_lower_case_match(enc, case_fold_flag, target, target_end,
03472 p, s + 1))
03473 return (UChar* )p;
03474 if (s + 1 >= end) break;
03475 s += reg->map[s[1]];
03476 }
03477 }
03478 else {
03479 while (s < end) {
03480 p = s - tlen1;
03481 if (str_lower_case_match(enc, case_fold_flag, target, target_end,
03482 p, s + 1))
03483 return (UChar* )p;
03484 if (s + 1 >= end) break;
03485 s += reg->int_map[s[1]];
03486 }
03487 }
03488 return (UChar* )NULL;
03489 }
03490 #endif
03491
03492 static int
03493 set_bm_backward_skip(UChar* s, UChar* end, OnigEncoding enc ARG_UNUSED,
03494 int** skip)
03495 {
03496 int i, len;
03497
03498 if (IS_NULL(*skip)) {
03499 *skip = (int* )xmalloc(sizeof(int) * ONIG_CHAR_TABLE_SIZE);
03500 if (IS_NULL(*skip)) return ONIGERR_MEMORY;
03501 }
03502
03503 len = (int )(end - s);
03504 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++)
03505 (*skip)[i] = len;
03506
03507 for (i = len - 1; i > 0; i--)
03508 (*skip)[s[i]] = i;
03509
03510 return 0;
03511 }
03512
03513 static UChar*
03514 bm_search_backward(regex_t* reg, const UChar* target, const UChar* target_end,
03515 const UChar* text, const UChar* adjust_text,
03516 const UChar* text_end, const UChar* text_start)
03517 {
03518 const UChar *s, *t, *p;
03519
03520 s = text_end - (target_end - target);
03521 if (text_start < s)
03522 s = text_start;
03523 else
03524 s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, adjust_text, s, text_end);
03525
03526 while (s >= text) {
03527 p = s;
03528 t = target;
03529 while (t < target_end && *p == *t) {
03530 p++; t++;
03531 }
03532 if (t == target_end)
03533 return (UChar* )s;
03534
03535 s -= reg->int_map_backward[*s];
03536 s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, adjust_text, s, text_end);
03537 }
03538
03539 return (UChar* )NULL;
03540 }
03541
03542 static UChar*
03543 map_search(OnigEncoding enc, UChar map[],
03544 const UChar* text, const UChar* text_range, const UChar* text_end)
03545 {
03546 const UChar *s = text;
03547
03548 while (s < text_range) {
03549 if (map[*s]) return (UChar* )s;
03550
03551 s += enclen(enc, s, text_end);
03552 }
03553 return (UChar* )NULL;
03554 }
03555
03556 static UChar*
03557 map_search_backward(OnigEncoding enc, UChar map[],
03558 const UChar* text, const UChar* adjust_text,
03559 const UChar* text_start, const UChar* text_end)
03560 {
03561 const UChar *s = text_start;
03562
03563 while (s >= text) {
03564 if (map[*s]) return (UChar* )s;
03565
03566 s = onigenc_get_prev_char_head(enc, adjust_text, s, text_end);
03567 }
03568 return (UChar* )NULL;
03569 }
03570
03571 extern OnigPosition
03572 onig_match(regex_t* reg, const UChar* str, const UChar* end, const UChar* at, OnigRegion* region,
03573 OnigOptionType option)
03574 {
03575 ptrdiff_t r;
03576 UChar *prev;
03577 OnigMatchArg msa;
03578
03579 #if defined(USE_RECOMPILE_API) && defined(USE_MULTI_THREAD_SYSTEM)
03580 start:
03581 THREAD_ATOMIC_START;
03582 if (ONIG_STATE(reg) >= ONIG_STATE_NORMAL) {
03583 ONIG_STATE_INC(reg);
03584 if (IS_NOT_NULL(reg->chain) && ONIG_STATE(reg) == ONIG_STATE_NORMAL) {
03585 onig_chain_reduce(reg);
03586 ONIG_STATE_INC(reg);
03587 }
03588 }
03589 else {
03590 int n;
03591
03592 THREAD_ATOMIC_END;
03593 n = 0;
03594 while (ONIG_STATE(reg) < ONIG_STATE_NORMAL) {
03595 if (++n > THREAD_PASS_LIMIT_COUNT)
03596 return ONIGERR_OVER_THREAD_PASS_LIMIT_COUNT;
03597 THREAD_PASS;
03598 }
03599 goto start;
03600 }
03601 THREAD_ATOMIC_END;
03602 #endif
03603
03604 MATCH_ARG_INIT(msa, option, region, at, at);
03605 #ifdef USE_COMBINATION_EXPLOSION_CHECK
03606 {
03607 int offset = at - str;
03608 STATE_CHECK_BUFF_INIT(msa, end - str, offset, reg->num_comb_exp_check);
03609 }
03610 #endif
03611
03612 if (region
03613 #ifdef USE_POSIX_API_REGION_OPTION
03614 && !IS_POSIX_REGION(option)
03615 #endif
03616 ) {
03617 r = onig_region_resize_clear(region, reg->num_mem + 1);
03618 }
03619 else
03620 r = 0;
03621
03622 if (r == 0) {
03623 prev = (UChar* )onigenc_get_prev_char_head(reg->enc, str, at, end);
03624 r = match_at(reg, str, end,
03625 #ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
03626 end,
03627 #endif
03628 at, prev, &msa);
03629 }
03630
03631 MATCH_ARG_FREE(msa);
03632 ONIG_STATE_DEC_THREAD(reg);
03633 return r;
03634 }
03635
03636 static int
03637 forward_search_range(regex_t* reg, const UChar* str, const UChar* end, UChar* s,
03638 UChar* range, UChar** low, UChar** high, UChar** low_prev)
03639 {
03640 UChar *p, *pprev = (UChar* )NULL;
03641
03642 #ifdef ONIG_DEBUG_SEARCH
03643 fprintf(stderr, "forward_search_range: str: %"PRIuPTR" (%p), end: %"PRIuPTR" (%p), s: %"PRIuPTR" (%p), range: %"PRIuPTR" (%p)\n",
03644 str, str, end, end, s, s, range, range);
03645 #endif
03646
03647 p = s;
03648 if (reg->dmin > 0) {
03649 if (ONIGENC_IS_SINGLEBYTE(reg->enc)) {
03650 p += reg->dmin;
03651 }
03652 else {
03653 UChar *q = p + reg->dmin;
03654 while (p < q) p += enclen(reg->enc, p, end);
03655 }
03656 }
03657
03658 retry:
03659 switch (reg->optimize) {
03660 case ONIG_OPTIMIZE_EXACT:
03661 p = slow_search(reg->enc, reg->exact, reg->exact_end, p, end, range);
03662 break;
03663 case ONIG_OPTIMIZE_EXACT_IC:
03664 p = slow_search_ic(reg->enc, reg->case_fold_flag,
03665 reg->exact, reg->exact_end, p, end, range);
03666 break;
03667
03668 case ONIG_OPTIMIZE_EXACT_BM:
03669 p = bm_search(reg, reg->exact, reg->exact_end, p, end, range);
03670 break;
03671
03672 case ONIG_OPTIMIZE_EXACT_BM_NOT_REV:
03673 p = bm_search_notrev(reg, reg->exact, reg->exact_end, p, end, range);
03674 break;
03675
03676 case ONIG_OPTIMIZE_EXACT_BM_IC:
03677 p = bm_search_ic(reg, reg->exact, reg->exact_end, p, end, range);
03678 break;
03679
03680 case ONIG_OPTIMIZE_EXACT_BM_NOT_REV_IC:
03681 p = bm_search_notrev_ic(reg, reg->exact, reg->exact_end, p, end, range);
03682 break;
03683
03684 case ONIG_OPTIMIZE_MAP:
03685 p = map_search(reg->enc, reg->map, p, range, end);
03686 break;
03687 }
03688
03689 if (p && p < range) {
03690 if (p - reg->dmin < s) {
03691 retry_gate:
03692 pprev = p;
03693 p += enclen(reg->enc, p, end);
03694 goto retry;
03695 }
03696
03697 if (reg->sub_anchor) {
03698 UChar* prev;
03699
03700 switch (reg->sub_anchor) {
03701 case ANCHOR_BEGIN_LINE:
03702 if (!ON_STR_BEGIN(p)) {
03703 prev = onigenc_get_prev_char_head(reg->enc,
03704 (pprev ? pprev : str), p, end);
03705 if (!ONIGENC_IS_MBC_NEWLINE_EX(reg->enc, prev, str, end, reg->options, 0))
03706 goto retry_gate;
03707 }
03708 break;
03709
03710 case ANCHOR_END_LINE:
03711 if (ON_STR_END(p)) {
03712 #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
03713 prev = (UChar* )onigenc_get_prev_char_head(reg->enc,
03714 (pprev ? pprev : str), p);
03715 if (prev && ONIGENC_IS_MBC_NEWLINE_EX(reg->enc, prev, str, end, reg->options, 1))
03716 goto retry_gate;
03717 #endif
03718 }
03719 else if (! ONIGENC_IS_MBC_NEWLINE_EX(reg->enc, p, str, end, reg->options, 1))
03720 goto retry_gate;
03721 break;
03722 }
03723 }
03724
03725 if (reg->dmax == 0) {
03726 *low = p;
03727 if (low_prev) {
03728 if (*low > s)
03729 *low_prev = onigenc_get_prev_char_head(reg->enc, s, p, end);
03730 else
03731 *low_prev = onigenc_get_prev_char_head(reg->enc,
03732 (pprev ? pprev : str), p, end);
03733 }
03734 }
03735 else {
03736 if (reg->dmax != ONIG_INFINITE_DISTANCE) {
03737 *low = p - reg->dmax;
03738 if (*low > s) {
03739 *low = onigenc_get_right_adjust_char_head_with_prev(reg->enc, s,
03740 *low, end, (const UChar** )low_prev);
03741 if (low_prev && IS_NULL(*low_prev))
03742 *low_prev = onigenc_get_prev_char_head(reg->enc,
03743 (pprev ? pprev : s), *low, end);
03744 }
03745 else {
03746 if (low_prev)
03747 *low_prev = onigenc_get_prev_char_head(reg->enc,
03748 (pprev ? pprev : str), *low, end);
03749 }
03750 }
03751 }
03752
03753 *high = p - reg->dmin;
03754
03755 #ifdef ONIG_DEBUG_SEARCH
03756 fprintf(stderr,
03757 "forward_search_range success: low: %d, high: %d, dmin: %d, dmax: %d\n",
03758 (int )(*low - str), (int )(*high - str), reg->dmin, reg->dmax);
03759 #endif
03760 return 1;
03761 }
03762
03763 return 0;
03764 }
03765
03766 static int set_bm_backward_skip P_((UChar* s, UChar* end, OnigEncoding enc,
03767 int** skip));
03768
03769 #define BM_BACKWARD_SEARCH_LENGTH_THRESHOLD 100
03770
03771 static int
03772 backward_search_range(regex_t* reg, const UChar* str, const UChar* end,
03773 UChar* s, const UChar* range, UChar* adjrange,
03774 UChar** low, UChar** high)
03775 {
03776 int r;
03777 UChar *p;
03778
03779 range += reg->dmin;
03780 p = s;
03781
03782 retry:
03783 switch (reg->optimize) {
03784 case ONIG_OPTIMIZE_EXACT:
03785 exact_method:
03786 p = slow_search_backward(reg->enc, reg->exact, reg->exact_end,
03787 range, adjrange, end, p);
03788 break;
03789
03790 case ONIG_OPTIMIZE_EXACT_IC:
03791 case ONIG_OPTIMIZE_EXACT_BM_IC:
03792 case ONIG_OPTIMIZE_EXACT_BM_NOT_REV_IC:
03793 p = slow_search_backward_ic(reg->enc, reg->case_fold_flag,
03794 reg->exact, reg->exact_end,
03795 range, adjrange, end, p);
03796 break;
03797
03798 case ONIG_OPTIMIZE_EXACT_BM:
03799 case ONIG_OPTIMIZE_EXACT_BM_NOT_REV:
03800 if (IS_NULL(reg->int_map_backward)) {
03801 if (s - range < BM_BACKWARD_SEARCH_LENGTH_THRESHOLD)
03802 goto exact_method;
03803
03804 r = set_bm_backward_skip(reg->exact, reg->exact_end, reg->enc,
03805 &(reg->int_map_backward));
03806 if (r) return r;
03807 }
03808 p = bm_search_backward(reg, reg->exact, reg->exact_end, range, adjrange,
03809 end, p);
03810 break;
03811
03812 case ONIG_OPTIMIZE_MAP:
03813 p = map_search_backward(reg->enc, reg->map, range, adjrange, p, end);
03814 break;
03815 }
03816
03817 if (p) {
03818 if (reg->sub_anchor) {
03819 UChar* prev;
03820
03821 switch (reg->sub_anchor) {
03822 case ANCHOR_BEGIN_LINE:
03823 if (!ON_STR_BEGIN(p)) {
03824 prev = onigenc_get_prev_char_head(reg->enc, str, p, end);
03825 if (!ONIGENC_IS_MBC_NEWLINE_EX(reg->enc, prev, str, end, reg->options, 0)) {
03826 p = prev;
03827 goto retry;
03828 }
03829 }
03830 break;
03831
03832 case ANCHOR_END_LINE:
03833 if (ON_STR_END(p)) {
03834 #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
03835 prev = onigenc_get_prev_char_head(reg->enc, adjrange, p);
03836 if (IS_NULL(prev)) goto fail;
03837 if (ONIGENC_IS_MBC_NEWLINE_EX(reg->enc, prev, str, end, reg->options, 1)) {
03838 p = prev;
03839 goto retry;
03840 }
03841 #endif
03842 }
03843 else if (! ONIGENC_IS_MBC_NEWLINE_EX(reg->enc, p, str, end, reg->options, 1)) {
03844 p = onigenc_get_prev_char_head(reg->enc, adjrange, p, end);
03845 if (IS_NULL(p)) goto fail;
03846 goto retry;
03847 }
03848 break;
03849 }
03850 }
03851
03852
03853 if (reg->dmax != ONIG_INFINITE_DISTANCE) {
03854 *low = p - reg->dmax;
03855 *high = p - reg->dmin;
03856 *high = onigenc_get_right_adjust_char_head(reg->enc, adjrange, *high, end);
03857 }
03858
03859 #ifdef ONIG_DEBUG_SEARCH
03860 fprintf(stderr, "backward_search_range: low: %d, high: %d\n",
03861 (int )(*low - str), (int )(*high - str));
03862 #endif
03863 return 1;
03864 }
03865
03866 fail:
03867 #ifdef ONIG_DEBUG_SEARCH
03868 fprintf(stderr, "backward_search_range: fail.\n");
03869 #endif
03870 return 0;
03871 }
03872
03873
03874 extern OnigPosition
03875 onig_search(regex_t* reg, const UChar* str, const UChar* end,
03876 const UChar* start, const UChar* range, OnigRegion* region, OnigOptionType option)
03877 {
03878 return onig_search_gpos(reg, str, end, start, start, range, region, option);
03879 }
03880
03881 extern OnigPosition
03882 onig_search_gpos(regex_t* reg, const UChar* str, const UChar* end,
03883 const UChar* global_pos,
03884 const UChar* start, const UChar* range, OnigRegion* region, OnigOptionType option)
03885 {
03886 ptrdiff_t r;
03887 UChar *s, *prev;
03888 OnigMatchArg msa;
03889 #ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
03890 const UChar *orig_start = start;
03891 const UChar *orig_range = range;
03892 #endif
03893
03894 #if defined(USE_RECOMPILE_API) && defined(USE_MULTI_THREAD_SYSTEM)
03895 start:
03896 THREAD_ATOMIC_START;
03897 if (ONIG_STATE(reg) >= ONIG_STATE_NORMAL) {
03898 ONIG_STATE_INC(reg);
03899 if (IS_NOT_NULL(reg->chain) && ONIG_STATE(reg) == ONIG_STATE_NORMAL) {
03900 onig_chain_reduce(reg);
03901 ONIG_STATE_INC(reg);
03902 }
03903 }
03904 else {
03905 int n;
03906
03907 THREAD_ATOMIC_END;
03908 n = 0;
03909 while (ONIG_STATE(reg) < ONIG_STATE_NORMAL) {
03910 if (++n > THREAD_PASS_LIMIT_COUNT)
03911 return ONIGERR_OVER_THREAD_PASS_LIMIT_COUNT;
03912 THREAD_PASS;
03913 }
03914 goto start;
03915 }
03916 THREAD_ATOMIC_END;
03917 #endif
03918
03919 #ifdef ONIG_DEBUG_SEARCH
03920 fprintf(stderr,
03921 "onig_search (entry point): str: %"PRIuPTR" (%p), end: %"PRIuPTR", start: %"PRIuPTR", range: %"PRIuPTR"\n",
03922 str, str, end - str, start - str, range - str);
03923 #endif
03924
03925 if (region
03926 #ifdef USE_POSIX_API_REGION_OPTION
03927 && !IS_POSIX_REGION(option)
03928 #endif
03929 ) {
03930 r = onig_region_resize_clear(region, reg->num_mem + 1);
03931 if (r) goto finish_no_msa;
03932 }
03933
03934 if (start > end || start < str) goto mismatch_no_msa;
03935
03936
03937 #ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
03938 #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
03939 #define MATCH_AND_RETURN_CHECK(upper_range) \
03940 r = match_at(reg, str, end, (upper_range), s, prev, &msa); \
03941 if (r != ONIG_MISMATCH) {\
03942 if (r >= 0) {\
03943 if (! IS_FIND_LONGEST(reg->options)) {\
03944 goto match;\
03945 }\
03946 }\
03947 else goto finish; \
03948 }
03949 #else
03950 #define MATCH_AND_RETURN_CHECK(upper_range) \
03951 r = match_at(reg, str, end, (upper_range), s, prev, &msa); \
03952 if (r != ONIG_MISMATCH) {\
03953 if (r >= 0) {\
03954 goto match;\
03955 }\
03956 else goto finish; \
03957 }
03958 #endif
03959 #else
03960 #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
03961 #define MATCH_AND_RETURN_CHECK(none) \
03962 r = match_at(reg, str, end, s, prev, &msa);\
03963 if (r != ONIG_MISMATCH) {\
03964 if (r >= 0) {\
03965 if (! IS_FIND_LONGEST(reg->options)) {\
03966 goto match;\
03967 }\
03968 }\
03969 else goto finish; \
03970 }
03971 #else
03972 #define MATCH_AND_RETURN_CHECK(none) \
03973 r = match_at(reg, str, end, s, prev, &msa);\
03974 if (r != ONIG_MISMATCH) {\
03975 if (r >= 0) {\
03976 goto match;\
03977 }\
03978 else goto finish; \
03979 }
03980 #endif
03981 #endif
03982
03983
03984
03985 if (reg->anchor != 0 && str < end) {
03986 UChar *min_semi_end, *max_semi_end;
03987
03988 if (reg->anchor & ANCHOR_BEGIN_POSITION) {
03989
03990 begin_position:
03991 if (range > start)
03992 range = start + 1;
03993 else
03994 range = start;
03995 }
03996 else if (reg->anchor & ANCHOR_BEGIN_BUF) {
03997
03998 if (range > start) {
03999 if (start != str) goto mismatch_no_msa;
04000 range = str + 1;
04001 }
04002 else {
04003 if (range <= str) {
04004 start = str;
04005 range = str;
04006 }
04007 else
04008 goto mismatch_no_msa;
04009 }
04010 }
04011 else if (reg->anchor & ANCHOR_END_BUF) {
04012 min_semi_end = max_semi_end = (UChar* )end;
04013
04014 end_buf:
04015 if ((OnigDistance )(max_semi_end - str) < reg->anchor_dmin)
04016 goto mismatch_no_msa;
04017
04018 if (range > start) {
04019 if ((OnigDistance )(min_semi_end - start) > reg->anchor_dmax) {
04020 start = min_semi_end - reg->anchor_dmax;
04021 if (start < end)
04022 start = onigenc_get_right_adjust_char_head(reg->enc, str, start, end);
04023 }
04024 if ((OnigDistance )(max_semi_end - (range - 1)) < reg->anchor_dmin) {
04025 range = max_semi_end - reg->anchor_dmin + 1;
04026 }
04027
04028 if (start > range) goto mismatch_no_msa;
04029
04030
04031 }
04032 else {
04033 if ((OnigDistance )(min_semi_end - range) > reg->anchor_dmax) {
04034 range = min_semi_end - reg->anchor_dmax;
04035 }
04036 if ((OnigDistance )(max_semi_end - start) < reg->anchor_dmin) {
04037 start = max_semi_end - reg->anchor_dmin;
04038 start = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, str, start, end);
04039 }
04040 if (range > start) goto mismatch_no_msa;
04041 }
04042 }
04043 else if (reg->anchor & ANCHOR_SEMI_END_BUF) {
04044 UChar* pre_end = ONIGENC_STEP_BACK(reg->enc, str, end, end, 1);
04045
04046 max_semi_end = (UChar* )end;
04047 if (ONIGENC_IS_MBC_NEWLINE(reg->enc, pre_end, end)) {
04048 min_semi_end = pre_end;
04049
04050 #ifdef USE_CRNL_AS_LINE_TERMINATOR
04051 pre_end = ONIGENC_STEP_BACK(reg->enc, str, pre_end, end, 1);
04052 if (IS_NOT_NULL(pre_end) &&
04053 IS_NEWLINE_CRLF(reg->options) &&
04054 ONIGENC_IS_MBC_CRNL(reg->enc, pre_end, end)) {
04055 min_semi_end = pre_end;
04056 }
04057 #endif
04058 if (min_semi_end > str && start <= min_semi_end) {
04059 goto end_buf;
04060 }
04061 }
04062 else {
04063 min_semi_end = (UChar* )end;
04064 goto end_buf;
04065 }
04066 }
04067 else if ((reg->anchor & ANCHOR_ANYCHAR_STAR_ML)) {
04068 if (! (reg->anchor & ANCHOR_LOOK_BEHIND)) {
04069 goto begin_position;
04070 }
04071 }
04072 }
04073 else if (str == end) {
04074 static const UChar address_for_empty_string[] = "";
04075
04076 #ifdef ONIG_DEBUG_SEARCH
04077 fprintf(stderr, "onig_search: empty string.\n");
04078 #endif
04079
04080 if (reg->threshold_len == 0) {
04081 start = end = str = address_for_empty_string;
04082 s = (UChar* )start;
04083 prev = (UChar* )NULL;
04084
04085 MATCH_ARG_INIT(msa, option, region, start, start);
04086 #ifdef USE_COMBINATION_EXPLOSION_CHECK
04087 msa.state_check_buff = (void* )0;
04088 msa.state_check_buff_size = 0;
04089 #endif
04090 MATCH_AND_RETURN_CHECK(end);
04091 goto mismatch;
04092 }
04093 goto mismatch_no_msa;
04094 }
04095
04096 #ifdef ONIG_DEBUG_SEARCH
04097 fprintf(stderr, "onig_search(apply anchor): end: %d, start: %d, range: %d\n",
04098 (int )(end - str), (int )(start - str), (int )(range - str));
04099 #endif
04100
04101 MATCH_ARG_INIT(msa, option, region, start, global_pos);
04102 #ifdef USE_COMBINATION_EXPLOSION_CHECK
04103 {
04104 int offset = (MIN(start, range) - str);
04105 STATE_CHECK_BUFF_INIT(msa, end - str, offset, reg->num_comb_exp_check);
04106 }
04107 #endif
04108
04109 s = (UChar* )start;
04110 if (range > start) {
04111 if (s > str)
04112 prev = onigenc_get_prev_char_head(reg->enc, str, s, end);
04113 else
04114 prev = (UChar* )NULL;
04115
04116 if (reg->optimize != ONIG_OPTIMIZE_NONE) {
04117 UChar *sch_range, *low, *high, *low_prev;
04118
04119 sch_range = (UChar* )range;
04120 if (reg->dmax != 0) {
04121 if (reg->dmax == ONIG_INFINITE_DISTANCE)
04122 sch_range = (UChar* )end;
04123 else {
04124 sch_range += reg->dmax;
04125 if (sch_range > end) sch_range = (UChar* )end;
04126 }
04127 }
04128
04129 if ((end - start) < reg->threshold_len)
04130 goto mismatch;
04131
04132 if (reg->dmax != ONIG_INFINITE_DISTANCE) {
04133 do {
04134 if (! forward_search_range(reg, str, end, s, sch_range,
04135 &low, &high, &low_prev)) goto mismatch;
04136 if (s < low) {
04137 s = low;
04138 prev = low_prev;
04139 }
04140 while (s <= high) {
04141 MATCH_AND_RETURN_CHECK(orig_range);
04142 prev = s;
04143 s += enclen(reg->enc, s, end);
04144 }
04145 } while (s < range);
04146 goto mismatch;
04147 }
04148 else {
04149 if (! forward_search_range(reg, str, end, s, sch_range,
04150 &low, &high, (UChar** )NULL)) goto mismatch;
04151
04152 if ((reg->anchor & ANCHOR_ANYCHAR_STAR) != 0) {
04153 do {
04154 if ((reg->anchor & ANCHOR_BEGIN_POSITION) == 0)
04155 msa.gpos = s;
04156 MATCH_AND_RETURN_CHECK(orig_range);
04157 prev = s;
04158 s += enclen(reg->enc, s, end);
04159
04160 if ((reg->anchor & (ANCHOR_LOOK_BEHIND | ANCHOR_PREC_READ_NOT)) == 0) {
04161 while (!ONIGENC_IS_MBC_NEWLINE_EX(reg->enc, prev, str, end, reg->options, 0)
04162 && s < range) {
04163 prev = s;
04164 s += enclen(reg->enc, s, end);
04165 }
04166 }
04167 } while (s < range);
04168 goto mismatch;
04169 }
04170 }
04171 }
04172
04173 do {
04174 MATCH_AND_RETURN_CHECK(orig_range);
04175 prev = s;
04176 s += enclen(reg->enc, s, end);
04177 } while (s < range);
04178
04179 if (s == range) {
04180 MATCH_AND_RETURN_CHECK(orig_range);
04181 }
04182 }
04183 else {
04184 #ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
04185 if (orig_start < end)
04186 orig_start += enclen(reg->enc, orig_start, end);
04187 #endif
04188
04189 if (reg->optimize != ONIG_OPTIMIZE_NONE) {
04190 UChar *low, *high, *adjrange, *sch_start;
04191
04192 if (range < end)
04193 adjrange = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, str, range, end);
04194 else
04195 adjrange = (UChar* )end;
04196
04197 if (reg->dmax != ONIG_INFINITE_DISTANCE &&
04198 (end - range) >= reg->threshold_len) {
04199 do {
04200 sch_start = s + reg->dmax;
04201 if (sch_start > end) sch_start = (UChar* )end;
04202 if (backward_search_range(reg, str, end, sch_start, range, adjrange,
04203 &low, &high) <= 0)
04204 goto mismatch;
04205
04206 if (s > high)
04207 s = high;
04208
04209 while (s >= low) {
04210 prev = onigenc_get_prev_char_head(reg->enc, str, s, end);
04211 MATCH_AND_RETURN_CHECK(orig_start);
04212 s = prev;
04213 }
04214 } while (s >= range);
04215 goto mismatch;
04216 }
04217 else {
04218 if ((end - range) < reg->threshold_len) goto mismatch;
04219
04220 sch_start = s;
04221 if (reg->dmax != 0) {
04222 if (reg->dmax == ONIG_INFINITE_DISTANCE)
04223 sch_start = (UChar* )end;
04224 else {
04225 sch_start += reg->dmax;
04226 if (sch_start > end) sch_start = (UChar* )end;
04227 else
04228 sch_start = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc,
04229 start, sch_start, end);
04230 }
04231 }
04232 if (backward_search_range(reg, str, end, sch_start, range, adjrange,
04233 &low, &high) <= 0) goto mismatch;
04234 }
04235 }
04236
04237 do {
04238 prev = onigenc_get_prev_char_head(reg->enc, str, s, end);
04239 MATCH_AND_RETURN_CHECK(orig_start);
04240 s = prev;
04241 } while (s >= range);
04242 }
04243
04244 mismatch:
04245 #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
04246 if (IS_FIND_LONGEST(reg->options)) {
04247 if (msa.best_len >= 0) {
04248 s = msa.best_s;
04249 goto match;
04250 }
04251 }
04252 #endif
04253 r = ONIG_MISMATCH;
04254
04255 finish:
04256 MATCH_ARG_FREE(msa);
04257 ONIG_STATE_DEC_THREAD(reg);
04258
04259
04260
04261 if (IS_FIND_NOT_EMPTY(reg->options) && region
04262 #ifdef USE_POSIX_API_REGION_OPTION
04263 && !IS_POSIX_REGION(option)
04264 #endif
04265 ) {
04266 onig_region_clear(region);
04267 }
04268
04269 #ifdef ONIG_DEBUG
04270 if (r != ONIG_MISMATCH)
04271 fprintf(stderr, "onig_search: error %d\n", r);
04272 #endif
04273 return r;
04274
04275 mismatch_no_msa:
04276 r = ONIG_MISMATCH;
04277 finish_no_msa:
04278 ONIG_STATE_DEC_THREAD(reg);
04279 #ifdef ONIG_DEBUG
04280 if (r != ONIG_MISMATCH)
04281 fprintf(stderr, "onig_search: error %d\n", r);
04282 #endif
04283 return r;
04284
04285 match:
04286 ONIG_STATE_DEC_THREAD(reg);
04287 MATCH_ARG_FREE(msa);
04288 return s - str;
04289 }
04290
04291 extern OnigEncoding
04292 onig_get_encoding(regex_t* reg)
04293 {
04294 return reg->enc;
04295 }
04296
04297 extern OnigOptionType
04298 onig_get_options(regex_t* reg)
04299 {
04300 return reg->options;
04301 }
04302
04303 extern OnigCaseFoldType
04304 onig_get_case_fold_flag(regex_t* reg)
04305 {
04306 return reg->case_fold_flag;
04307 }
04308
04309 extern const OnigSyntaxType*
04310 onig_get_syntax(regex_t* reg)
04311 {
04312 return reg->syntax;
04313 }
04314
04315 extern int
04316 onig_number_of_captures(regex_t* reg)
04317 {
04318 return reg->num_mem;
04319 }
04320
04321 extern int
04322 onig_number_of_capture_histories(regex_t* reg)
04323 {
04324 #ifdef USE_CAPTURE_HISTORY
04325 int i, n;
04326
04327 n = 0;
04328 for (i = 0; i <= ONIG_MAX_CAPTURE_HISTORY_GROUP; i++) {
04329 if (BIT_STATUS_AT(reg->capture_history, i) != 0)
04330 n++;
04331 }
04332 return n;
04333 #else
04334 return 0;
04335 #endif
04336 }
04337
04338 extern void
04339 onig_copy_encoding(OnigEncoding to, OnigEncoding from)
04340 {
04341 *to = *from;
04342 }
04343
04344