00001 #ifndef ONIGURUMA_REGINT_H
00002 #define ONIGURUMA_REGINT_H
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00041
00042
00043 #if defined(ONIG_DEBUG_PARSE_TREE) || defined(ONIG_DEBUG_MATCH) || \
00044 defined(ONIG_DEBUG_SEARCH) || defined(ONIG_DEBUG_COMPILE) || \
00045 defined(ONIG_DEBUG_STATISTICS)
00046 #ifndef ONIG_DEBUG
00047 #define ONIG_DEBUG
00048 #endif
00049 #endif
00050
00051 #if defined(__i386) || defined(__i386__) || defined(_M_IX86) || \
00052 defined(__x86_64) || defined(__x86_64__) || defined(_M_AMD64) || \
00053 defined(__mc68020__)
00054 #define PLATFORM_UNALIGNED_WORD_ACCESS
00055 #endif
00056
00057
00058
00059 #define USE_NAMED_GROUP
00060 #define USE_SUBEXP_CALL
00061 #define USE_PERL_SUBEXP_CALL
00062 #define USE_CAPITAL_P_NAMED_GROUP
00063 #define USE_BACKREF_WITH_LEVEL
00064 #define USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT
00065 #define USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
00066 #define USE_WARNING_REDUNDANT_NESTED_REPEAT_OPERATOR
00067
00068
00069 #define USE_NO_INVALID_QUANTIFIER
00070
00071
00072 #define USE_PARSE_TREE_NODE_RECYCLE
00073 #define USE_OP_PUSH_OR_JUMP_EXACT
00074 #define USE_QTFR_PEEK_NEXT
00075 #define USE_ST_LIBRARY
00076 #define USE_SHARED_CCLASS_TABLE
00077 #define USE_SUNDAY_QUICK_SEARCH
00078
00079 #define INIT_MATCH_STACK_SIZE 160
00080 #define DEFAULT_MATCH_STACK_LIMIT_SIZE 0
00081
00082
00083 #if defined(USE_PERL_SUBEXP_CALL) || defined(USE_CAPITAL_P_NAMED_GROUP)
00084 #if !defined(USE_NAMED_GROUP) || !defined(USE_SUBEXP_CALL)
00085 #error USE_NAMED_GROUP and USE_SUBEXP_CALL must be defined.
00086 #endif
00087 #endif
00088
00089 #if defined(__GNUC__)
00090 # define ARG_UNUSED __attribute__ ((unused))
00091 #else
00092 # define ARG_UNUSED
00093 #endif
00094
00095
00096
00097 #ifndef RUBY_DEFINES_H
00098 #include "ruby/ruby.h"
00099 #undef xmalloc
00100 #undef xrealloc
00101 #undef xcalloc
00102 #undef xfree
00103 #endif
00104 #ifdef ONIG_ESCAPE_UCHAR_COLLISION
00105 #undef ONIG_ESCAPE_UCHAR_COLLISION
00106 #endif
00107 #define USE_WORD_BEGIN_END
00108 #undef USE_MATCH_RANGE_IS_COMPLETE_RANGE
00109 #undef USE_CAPTURE_HISTORY
00110 #define USE_VARIABLE_META_CHARS
00111 #define USE_POSIX_API_REGION_OPTION
00112 #define USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
00113
00114
00115
00116 #define THREAD_SYSTEM_INIT
00117 #define THREAD_SYSTEM_END
00118 #define THREAD_ATOMIC_START
00119 #define THREAD_ATOMIC_END
00120 #define THREAD_PASS
00121 #ifndef xmalloc
00122 #define xmalloc malloc
00123 #define xrealloc realloc
00124 #define xcalloc calloc
00125 #define xfree free
00126 #endif
00127
00128 #ifdef RUBY
00129
00130 #define CHECK_INTERRUPT_IN_MATCH_AT rb_thread_check_ints()
00131 #define onig_st_init_table st_init_table
00132 #define onig_st_init_table_with_size st_init_table_with_size
00133 #define onig_st_init_numtable st_init_numtable
00134 #define onig_st_init_numtable_with_size st_init_numtable_with_size
00135 #define onig_st_init_strtable st_init_strtable
00136 #define onig_st_init_strtable_with_size st_init_strtable_with_size
00137 #define onig_st_delete st_delete
00138 #define onig_st_delete_safe st_delete_safe
00139 #define onig_st_insert st_insert
00140 #define onig_st_lookup st_lookup
00141 #define onig_st_foreach st_foreach
00142 #define onig_st_add_direct st_add_direct
00143 #define onig_st_free_table st_free_table
00144 #define onig_st_cleanup_safe st_cleanup_safe
00145 #define onig_st_copy st_copy
00146 #define onig_st_nothing_key_clone st_nothing_key_clone
00147 #define onig_st_nothing_key_free st_nothing_key_free
00148 #define onig_st_is_member st_is_member
00149
00150 #define USE_UPPER_CASE_TABLE
00151 #else
00152
00153 #define st_init_table onig_st_init_table
00154 #define st_init_table_with_size onig_st_init_table_with_size
00155 #define st_init_numtable onig_st_init_numtable
00156 #define st_init_numtable_with_size onig_st_init_numtable_with_size
00157 #define st_init_strtable onig_st_init_strtable
00158 #define st_init_strtable_with_size onig_st_init_strtable_with_size
00159 #define st_delete onig_st_delete
00160 #define st_delete_safe onig_st_delete_safe
00161 #define st_insert onig_st_insert
00162 #define st_lookup onig_st_lookup
00163 #define st_foreach onig_st_foreach
00164 #define st_add_direct onig_st_add_direct
00165 #define st_free_table onig_st_free_table
00166 #define st_cleanup_safe onig_st_cleanup_safe
00167 #define st_copy onig_st_copy
00168 #define st_nothing_key_clone onig_st_nothing_key_clone
00169 #define st_nothing_key_free onig_st_nothing_key_free
00170
00171 #define onig_st_is_member st_is_member
00172
00173 #define CHECK_INTERRUPT_IN_MATCH_AT
00174
00175 #endif
00176
00177 #define STATE_CHECK_STRING_THRESHOLD_LEN 7
00178 #define STATE_CHECK_BUFF_MAX_SIZE 0x4000
00179
00180 #define THREAD_PASS_LIMIT_COUNT 8
00181 #define xmemset memset
00182 #define xmemcpy memcpy
00183 #define xmemmove memmove
00184
00185 #if defined(_WIN32) && !defined(__GNUC__)
00186 #define xalloca _alloca
00187 #define xvsnprintf _vsnprintf
00188 #else
00189 #define xalloca alloca
00190 #define xvsnprintf vsnprintf
00191 #endif
00192
00193
00194 #if defined(USE_RECOMPILE_API) && defined(USE_MULTI_THREAD_SYSTEM)
00195 #define ONIG_STATE_INC(reg) (reg)->state++
00196 #define ONIG_STATE_DEC(reg) (reg)->state--
00197
00198 #define ONIG_STATE_INC_THREAD(reg) do {\
00199 THREAD_ATOMIC_START;\
00200 (reg)->state++;\
00201 THREAD_ATOMIC_END;\
00202 } while(0)
00203 #define ONIG_STATE_DEC_THREAD(reg) do {\
00204 THREAD_ATOMIC_START;\
00205 (reg)->state--;\
00206 THREAD_ATOMIC_END;\
00207 } while(0)
00208 #else
00209 #define ONIG_STATE_INC(reg)
00210 #define ONIG_STATE_DEC(reg)
00211 #define ONIG_STATE_INC_THREAD(reg)
00212 #define ONIG_STATE_DEC_THREAD(reg)
00213 #endif
00214
00215 #ifdef HAVE_STDLIB_H
00216 #include <stdlib.h>
00217 #endif
00218
00219 #if defined(HAVE_ALLOCA_H) && (defined(_AIX) || !defined(__GNUC__))
00220 #include <alloca.h>
00221 #endif
00222
00223 #ifdef HAVE_STRING_H
00224 # include <string.h>
00225 #else
00226 # include <strings.h>
00227 #endif
00228
00229 #include <ctype.h>
00230 #ifdef HAVE_SYS_TYPES_H
00231 #include <sys/types.h>
00232 #endif
00233
00234 #ifdef HAVE_STDINT_H
00235 # include <stdint.h>
00236 #endif
00237
00238 #ifdef STDC_HEADERS
00239 # include <stddef.h>
00240 #endif
00241
00242 #ifdef __BORLANDC__
00243 #include <malloc.h>
00244 #endif
00245
00246 #ifdef ONIG_DEBUG
00247 # include <stdio.h>
00248 #endif
00249
00250 #ifdef _WIN32
00251 #if defined(_MSC_VER) && (_MSC_VER < 1300)
00252 #ifndef _INTPTR_T_DEFINED
00253 #define _INTPTR_T_DEFINED
00254 typedef int intptr_t;
00255 #endif
00256 #ifndef _UINTPTR_T_DEFINED
00257 #define _UINTPTR_T_DEFINED
00258 typedef unsigned int uintptr_t;
00259 #endif
00260 #endif
00261 #endif
00262
00263 #include "regenc.h"
00264
00265 RUBY_SYMBOL_EXPORT_BEGIN
00266
00267 #ifdef MIN
00268 #undef MIN
00269 #endif
00270 #ifdef MAX
00271 #undef MAX
00272 #endif
00273 #define MIN(a,b) (((a)>(b))?(b):(a))
00274 #define MAX(a,b) (((a)<(b))?(b):(a))
00275
00276 #define IS_NULL(p) (((void*)(p)) == (void*)0)
00277 #define IS_NOT_NULL(p) (((void*)(p)) != (void*)0)
00278 #define CHECK_NULL_RETURN(p) if (IS_NULL(p)) return NULL
00279 #define CHECK_NULL_RETURN_MEMERR(p) if (IS_NULL(p)) return ONIGERR_MEMORY
00280 #define NULL_UCHARP ((UChar* )0)
00281
00282 #define ONIG_LAST_CODE_POINT (~((OnigCodePoint )0))
00283
00284 #ifdef PLATFORM_UNALIGNED_WORD_ACCESS
00285
00286 #define PLATFORM_GET_INC(val,p,type) do{\
00287 val = *(type* )p;\
00288 (p) += sizeof(type);\
00289 } while(0)
00290
00291 #else
00292
00293 #define PLATFORM_GET_INC(val,p,type) do{\
00294 xmemcpy(&val, (p), sizeof(type));\
00295 (p) += sizeof(type);\
00296 } while(0)
00297
00298
00299 #define WORD_ALIGNMENT_SIZE SIZEOF_LONG
00300
00301 #define GET_ALIGNMENT_PAD_SIZE(addr,pad_size) do {\
00302 (pad_size) = WORD_ALIGNMENT_SIZE \
00303 - ((uintptr_t )(addr) % WORD_ALIGNMENT_SIZE);\
00304 if ((pad_size) == WORD_ALIGNMENT_SIZE) (pad_size) = 0;\
00305 } while (0)
00306
00307 #define ALIGNMENT_RIGHT(addr) do {\
00308 (addr) += (WORD_ALIGNMENT_SIZE - 1);\
00309 (addr) -= ((uintptr_t )(addr) % WORD_ALIGNMENT_SIZE);\
00310 } while (0)
00311
00312 #endif
00313
00314
00315 #define STACK_POP_LEVEL_FREE 0
00316 #define STACK_POP_LEVEL_MEM_START 1
00317 #define STACK_POP_LEVEL_ALL 2
00318
00319
00320 #define ONIG_OPTIMIZE_NONE 0
00321 #define ONIG_OPTIMIZE_EXACT 1
00322 #define ONIG_OPTIMIZE_EXACT_BM 2
00323 #define ONIG_OPTIMIZE_EXACT_BM_NOT_REV 3
00324 #define ONIG_OPTIMIZE_EXACT_IC 4
00325 #define ONIG_OPTIMIZE_MAP 5
00326 #define ONIG_OPTIMIZE_EXACT_BM_IC 6
00327 #define ONIG_OPTIMIZE_EXACT_BM_NOT_REV_IC 7
00328
00329
00330 typedef unsigned int BitStatusType;
00331
00332 #define BIT_STATUS_BITS_NUM (sizeof(BitStatusType) * 8)
00333 #define BIT_STATUS_CLEAR(stats) (stats) = 0
00334 #define BIT_STATUS_ON_ALL(stats) (stats) = ~((BitStatusType )0)
00335 #define BIT_STATUS_AT(stats,n) \
00336 ((n) < (int )BIT_STATUS_BITS_NUM ? ((stats) & (1 << n)) : ((stats) & 1))
00337
00338 #define BIT_STATUS_ON_AT(stats,n) do {\
00339 if ((n) < (int )BIT_STATUS_BITS_NUM) \
00340 (stats) |= (1 << (n));\
00341 else\
00342 (stats) |= 1;\
00343 } while (0)
00344
00345 #define BIT_STATUS_ON_AT_SIMPLE(stats,n) do {\
00346 if ((n) < (int )BIT_STATUS_BITS_NUM)\
00347 (stats) |= (1 << (n));\
00348 } while (0)
00349
00350
00351 #define INT_MAX_LIMIT ((1UL << (SIZEOF_INT * 8 - 1)) - 1)
00352
00353 #define DIGITVAL(code) ((code) - '0')
00354 #define ODIGITVAL(code) DIGITVAL(code)
00355 #define XDIGITVAL(enc,code) \
00356 (ONIGENC_IS_CODE_DIGIT(enc,code) ? DIGITVAL(code) \
00357 : (ONIGENC_IS_CODE_UPPER(enc,code) ? (code) - 'A' + 10 : (code) - 'a' + 10))
00358
00359 #define IS_SINGLELINE(option) ((option) & ONIG_OPTION_SINGLELINE)
00360 #define IS_MULTILINE(option) ((option) & ONIG_OPTION_MULTILINE)
00361 #define IS_IGNORECASE(option) ((option) & ONIG_OPTION_IGNORECASE)
00362 #define IS_EXTEND(option) ((option) & ONIG_OPTION_EXTEND)
00363 #define IS_FIND_LONGEST(option) ((option) & ONIG_OPTION_FIND_LONGEST)
00364 #define IS_FIND_NOT_EMPTY(option) ((option) & ONIG_OPTION_FIND_NOT_EMPTY)
00365 #define IS_FIND_CONDITION(option) ((option) & \
00366 (ONIG_OPTION_FIND_LONGEST | ONIG_OPTION_FIND_NOT_EMPTY))
00367 #define IS_NOTBOL(option) ((option) & ONIG_OPTION_NOTBOL)
00368 #define IS_NOTEOL(option) ((option) & ONIG_OPTION_NOTEOL)
00369 #define IS_POSIX_REGION(option) ((option) & ONIG_OPTION_POSIX_REGION)
00370 #define IS_ASCII_RANGE(option) ((option) & ONIG_OPTION_ASCII_RANGE)
00371 #define IS_POSIX_BRACKET_ALL_RANGE(option) ((option) & ONIG_OPTION_POSIX_BRACKET_ALL_RANGE)
00372 #define IS_WORD_BOUND_ALL_RANGE(option) ((option) & ONIG_OPTION_WORD_BOUND_ALL_RANGE)
00373 #define IS_NEWLINE_CRLF(option) ((option) & ONIG_OPTION_NEWLINE_CRLF)
00374
00375
00376
00377
00378
00379
00380 #define IS_DYNAMIC_OPTION(option) 0
00381
00382 #define DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag) \
00383 ((case_fold_flag) & ~INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR)
00384
00385 #define REPEAT_INFINITE -1
00386 #define IS_REPEAT_INFINITE(n) ((n) == REPEAT_INFINITE)
00387
00388
00389 #define BITS_PER_BYTE 8
00390 #define SINGLE_BYTE_SIZE (1 << BITS_PER_BYTE)
00391 #define BITS_IN_ROOM ((int )sizeof(Bits) * BITS_PER_BYTE)
00392 #define BITSET_SIZE (SINGLE_BYTE_SIZE / BITS_IN_ROOM)
00393
00394 #ifdef PLATFORM_UNALIGNED_WORD_ACCESS
00395 typedef unsigned int Bits;
00396 #else
00397 typedef unsigned char Bits;
00398 #endif
00399 typedef Bits BitSet[BITSET_SIZE];
00400 typedef Bits* BitSetRef;
00401
00402 #define SIZE_BITSET (int )sizeof(BitSet)
00403
00404 #define BITSET_CLEAR(bs) do {\
00405 int i;\
00406 for (i = 0; i < BITSET_SIZE; i++) { (bs)[i] = 0; } \
00407 } while (0)
00408
00409 #define BS_ROOM(bs,pos) (bs)[(int )(pos) / BITS_IN_ROOM]
00410 #define BS_BIT(pos) (1 << ((int )(pos) % BITS_IN_ROOM))
00411
00412 #define BITSET_AT(bs, pos) (BS_ROOM(bs,pos) & BS_BIT(pos))
00413 #define BITSET_SET_BIT(bs, pos) BS_ROOM(bs,pos) |= BS_BIT(pos)
00414 #define BITSET_CLEAR_BIT(bs, pos) BS_ROOM(bs,pos) &= ~(BS_BIT(pos))
00415 #define BITSET_INVERT_BIT(bs, pos) BS_ROOM(bs,pos) ^= BS_BIT(pos)
00416
00417
00418 typedef struct _BBuf {
00419 UChar* p;
00420 unsigned int used;
00421 unsigned int alloc;
00422 } BBuf;
00423
00424 #define BBUF_INIT(buf,size) onig_bbuf_init((BBuf* )(buf), (size))
00425
00426 #define BBUF_SIZE_INC(buf,inc) do{\
00427 (buf)->alloc += (inc);\
00428 (buf)->p = (UChar* )xrealloc((buf)->p, (buf)->alloc);\
00429 if (IS_NULL((buf)->p)) return(ONIGERR_MEMORY);\
00430 } while (0)
00431
00432 #define BBUF_EXPAND(buf,low) do{\
00433 do { (buf)->alloc *= 2; } while ((buf)->alloc < (unsigned int )low);\
00434 (buf)->p = (UChar* )xrealloc((buf)->p, (buf)->alloc);\
00435 if (IS_NULL((buf)->p)) return(ONIGERR_MEMORY);\
00436 } while (0)
00437
00438 #define BBUF_ENSURE_SIZE(buf,size) do{\
00439 unsigned int new_alloc = (buf)->alloc;\
00440 while (new_alloc < (unsigned int )(size)) { new_alloc *= 2; }\
00441 if ((buf)->alloc != new_alloc) {\
00442 (buf)->p = (UChar* )xrealloc((buf)->p, new_alloc);\
00443 if (IS_NULL((buf)->p)) return(ONIGERR_MEMORY);\
00444 (buf)->alloc = new_alloc;\
00445 }\
00446 } while (0)
00447
00448 #define BBUF_WRITE(buf,pos,bytes,n) do{\
00449 int used = (pos) + (int )(n);\
00450 if ((buf)->alloc < (unsigned int )used) BBUF_EXPAND((buf),used);\
00451 xmemcpy((buf)->p + (pos), (bytes), (n));\
00452 if ((buf)->used < (unsigned int )used) (buf)->used = used;\
00453 } while (0)
00454
00455 #define BBUF_WRITE1(buf,pos,byte) do{\
00456 int used = (pos) + 1;\
00457 if ((buf)->alloc < (unsigned int )used) BBUF_EXPAND((buf),used);\
00458 (buf)->p[(pos)] = (UChar )(byte);\
00459 if ((buf)->used < (unsigned int )used) (buf)->used = used;\
00460 } while (0)
00461
00462 #define BBUF_ADD(buf,bytes,n) BBUF_WRITE((buf),(buf)->used,(bytes),(n))
00463 #define BBUF_ADD1(buf,byte) BBUF_WRITE1((buf),(buf)->used,(byte))
00464 #define BBUF_GET_ADD_ADDRESS(buf) ((buf)->p + (buf)->used)
00465 #define BBUF_GET_OFFSET_POS(buf) ((buf)->used)
00466
00467
00468 #define BBUF_MOVE_RIGHT(buf,from,to,n) do {\
00469 if ((unsigned int )((to)+(n)) > (buf)->alloc) BBUF_EXPAND((buf),(to) + (n));\
00470 xmemmove((buf)->p + (to), (buf)->p + (from), (n));\
00471 if ((unsigned int )((to)+(n)) > (buf)->used) (buf)->used = (to) + (n);\
00472 } while (0)
00473
00474
00475 #define BBUF_MOVE_LEFT(buf,from,to,n) do {\
00476 xmemmove((buf)->p + (to), (buf)->p + (from), (n));\
00477 } while (0)
00478
00479
00480 #define BBUF_MOVE_LEFT_REDUCE(buf,from,to) do {\
00481 xmemmove((buf)->p + (to), (buf)->p + (from), (buf)->used - (from));\
00482 (buf)->used -= (from - to);\
00483 } while (0)
00484
00485 #define BBUF_INSERT(buf,pos,bytes,n) do {\
00486 if (pos >= (buf)->used) {\
00487 BBUF_WRITE(buf,pos,bytes,n);\
00488 }\
00489 else {\
00490 BBUF_MOVE_RIGHT((buf),(pos),(pos) + (n),((buf)->used - (pos)));\
00491 xmemcpy((buf)->p + (pos), (bytes), (n));\
00492 }\
00493 } while (0)
00494
00495 #define BBUF_GET_BYTE(buf, pos) (buf)->p[(pos)]
00496
00497
00498 #define ANCHOR_BEGIN_BUF (1<<0)
00499 #define ANCHOR_BEGIN_LINE (1<<1)
00500 #define ANCHOR_BEGIN_POSITION (1<<2)
00501 #define ANCHOR_END_BUF (1<<3)
00502 #define ANCHOR_SEMI_END_BUF (1<<4)
00503 #define ANCHOR_END_LINE (1<<5)
00504
00505 #define ANCHOR_WORD_BOUND (1<<6)
00506 #define ANCHOR_NOT_WORD_BOUND (1<<7)
00507 #define ANCHOR_WORD_BEGIN (1<<8)
00508 #define ANCHOR_WORD_END (1<<9)
00509 #define ANCHOR_PREC_READ (1<<10)
00510 #define ANCHOR_PREC_READ_NOT (1<<11)
00511 #define ANCHOR_LOOK_BEHIND (1<<12)
00512 #define ANCHOR_LOOK_BEHIND_NOT (1<<13)
00513
00514 #define ANCHOR_ANYCHAR_STAR (1<<14)
00515 #define ANCHOR_ANYCHAR_STAR_ML (1<<15)
00516
00517 #define ANCHOR_KEEP (1<<16)
00518
00519
00520 enum OpCode {
00521 OP_FINISH = 0,
00522 OP_END = 1,
00523
00524 OP_EXACT1 = 2,
00525 OP_EXACT2,
00526 OP_EXACT3,
00527 OP_EXACT4,
00528 OP_EXACT5,
00529 OP_EXACTN,
00530 OP_EXACTMB2N1,
00531 OP_EXACTMB2N2,
00532 OP_EXACTMB2N3,
00533 OP_EXACTMB2N,
00534 OP_EXACTMB3N,
00535 OP_EXACTMBN,
00536
00537 OP_EXACT1_IC,
00538 OP_EXACTN_IC,
00539
00540 OP_CCLASS,
00541 OP_CCLASS_MB,
00542 OP_CCLASS_MIX,
00543 OP_CCLASS_NOT,
00544 OP_CCLASS_MB_NOT,
00545 OP_CCLASS_MIX_NOT,
00546 OP_CCLASS_NODE,
00547
00548 OP_ANYCHAR,
00549 OP_ANYCHAR_ML,
00550 OP_ANYCHAR_STAR,
00551 OP_ANYCHAR_ML_STAR,
00552 OP_ANYCHAR_STAR_PEEK_NEXT,
00553 OP_ANYCHAR_ML_STAR_PEEK_NEXT,
00554
00555 OP_WORD,
00556 OP_NOT_WORD,
00557 OP_WORD_BOUND,
00558 OP_NOT_WORD_BOUND,
00559 OP_WORD_BEGIN,
00560 OP_WORD_END,
00561
00562 OP_ASCII_WORD,
00563 OP_NOT_ASCII_WORD,
00564 OP_ASCII_WORD_BOUND,
00565 OP_NOT_ASCII_WORD_BOUND,
00566 OP_ASCII_WORD_BEGIN,
00567 OP_ASCII_WORD_END,
00568
00569 OP_BEGIN_BUF,
00570 OP_END_BUF,
00571 OP_BEGIN_LINE,
00572 OP_END_LINE,
00573 OP_SEMI_END_BUF,
00574 OP_BEGIN_POSITION,
00575 OP_BEGIN_POS_OR_LINE,
00576
00577 OP_BACKREF1,
00578 OP_BACKREF2,
00579 OP_BACKREFN,
00580 OP_BACKREFN_IC,
00581 OP_BACKREF_MULTI,
00582 OP_BACKREF_MULTI_IC,
00583 OP_BACKREF_WITH_LEVEL,
00584
00585 OP_MEMORY_START,
00586 OP_MEMORY_START_PUSH,
00587 OP_MEMORY_END_PUSH,
00588 OP_MEMORY_END_PUSH_REC,
00589 OP_MEMORY_END,
00590 OP_MEMORY_END_REC,
00591
00592 OP_KEEP,
00593
00594 OP_FAIL,
00595 OP_JUMP,
00596 OP_PUSH,
00597 OP_POP,
00598 OP_PUSH_OR_JUMP_EXACT1,
00599 OP_PUSH_IF_PEEK_NEXT,
00600 OP_REPEAT,
00601 OP_REPEAT_NG,
00602 OP_REPEAT_INC,
00603 OP_REPEAT_INC_NG,
00604 OP_REPEAT_INC_SG,
00605 OP_REPEAT_INC_NG_SG,
00606 OP_NULL_CHECK_START,
00607 OP_NULL_CHECK_END,
00608 OP_NULL_CHECK_END_MEMST,
00609 OP_NULL_CHECK_END_MEMST_PUSH,
00610
00611 OP_PUSH_POS,
00612 OP_POP_POS,
00613 OP_PUSH_POS_NOT,
00614 OP_FAIL_POS,
00615 OP_PUSH_STOP_BT,
00616 OP_POP_STOP_BT,
00617 OP_LOOK_BEHIND,
00618 OP_PUSH_LOOK_BEHIND_NOT,
00619 OP_FAIL_LOOK_BEHIND_NOT,
00620
00621 OP_CALL,
00622 OP_RETURN,
00623
00624 OP_CONDITION,
00625
00626 OP_STATE_CHECK_PUSH,
00627 OP_STATE_CHECK_PUSH_OR_JUMP,
00628 OP_STATE_CHECK,
00629 OP_STATE_CHECK_ANYCHAR_STAR,
00630 OP_STATE_CHECK_ANYCHAR_ML_STAR,
00631
00632
00633 OP_SET_OPTION_PUSH,
00634 OP_SET_OPTION
00635 };
00636
00637 typedef int RelAddrType;
00638 typedef int AbsAddrType;
00639 typedef int LengthType;
00640 typedef int RepeatNumType;
00641 typedef short int MemNumType;
00642 typedef short int StateCheckNumType;
00643 typedef void* PointerType;
00644
00645 #define SIZE_OPCODE 1
00646 #define SIZE_RELADDR (int )sizeof(RelAddrType)
00647 #define SIZE_ABSADDR (int )sizeof(AbsAddrType)
00648 #define SIZE_LENGTH (int )sizeof(LengthType)
00649 #define SIZE_MEMNUM (int )sizeof(MemNumType)
00650 #define SIZE_STATE_CHECK_NUM (int )sizeof(StateCheckNumType)
00651 #define SIZE_REPEATNUM (int )sizeof(RepeatNumType)
00652 #define SIZE_OPTION (int )sizeof(OnigOptionType)
00653 #define SIZE_CODE_POINT (int )sizeof(OnigCodePoint)
00654 #define SIZE_POINTER (int )sizeof(PointerType)
00655
00656
00657 #define GET_RELADDR_INC(addr,p) PLATFORM_GET_INC(addr, p, RelAddrType)
00658 #define GET_ABSADDR_INC(addr,p) PLATFORM_GET_INC(addr, p, AbsAddrType)
00659 #define GET_LENGTH_INC(len,p) PLATFORM_GET_INC(len, p, LengthType)
00660 #define GET_MEMNUM_INC(num,p) PLATFORM_GET_INC(num, p, MemNumType)
00661 #define GET_REPEATNUM_INC(num,p) PLATFORM_GET_INC(num, p, RepeatNumType)
00662 #define GET_OPTION_INC(option,p) PLATFORM_GET_INC(option, p, OnigOptionType)
00663 #define GET_POINTER_INC(ptr,p) PLATFORM_GET_INC(ptr, p, PointerType)
00664 #define GET_STATE_CHECK_NUM_INC(num,p) PLATFORM_GET_INC(num, p, StateCheckNumType)
00665
00666
00667 #define GET_CODE_POINT(code,p) code = *((OnigCodePoint* )(p))
00668 #define GET_BYTE_INC(byte,p) do{\
00669 byte = *(p);\
00670 (p)++;\
00671 } while(0)
00672
00673
00674
00675 #define SIZE_OP_ANYCHAR_STAR SIZE_OPCODE
00676 #define SIZE_OP_ANYCHAR_STAR_PEEK_NEXT (SIZE_OPCODE + 1)
00677 #define SIZE_OP_JUMP (SIZE_OPCODE + SIZE_RELADDR)
00678 #define SIZE_OP_PUSH (SIZE_OPCODE + SIZE_RELADDR)
00679 #define SIZE_OP_POP SIZE_OPCODE
00680 #define SIZE_OP_PUSH_OR_JUMP_EXACT1 (SIZE_OPCODE + SIZE_RELADDR + 1)
00681 #define SIZE_OP_PUSH_IF_PEEK_NEXT (SIZE_OPCODE + SIZE_RELADDR + 1)
00682 #define SIZE_OP_REPEAT_INC (SIZE_OPCODE + SIZE_MEMNUM)
00683 #define SIZE_OP_REPEAT_INC_NG (SIZE_OPCODE + SIZE_MEMNUM)
00684 #define SIZE_OP_PUSH_POS SIZE_OPCODE
00685 #define SIZE_OP_PUSH_POS_NOT (SIZE_OPCODE + SIZE_RELADDR)
00686 #define SIZE_OP_POP_POS SIZE_OPCODE
00687 #define SIZE_OP_FAIL_POS SIZE_OPCODE
00688 #define SIZE_OP_SET_OPTION (SIZE_OPCODE + SIZE_OPTION)
00689 #define SIZE_OP_SET_OPTION_PUSH (SIZE_OPCODE + SIZE_OPTION)
00690 #define SIZE_OP_FAIL SIZE_OPCODE
00691 #define SIZE_OP_MEMORY_START (SIZE_OPCODE + SIZE_MEMNUM)
00692 #define SIZE_OP_MEMORY_START_PUSH (SIZE_OPCODE + SIZE_MEMNUM)
00693 #define SIZE_OP_MEMORY_END_PUSH (SIZE_OPCODE + SIZE_MEMNUM)
00694 #define SIZE_OP_MEMORY_END_PUSH_REC (SIZE_OPCODE + SIZE_MEMNUM)
00695 #define SIZE_OP_MEMORY_END (SIZE_OPCODE + SIZE_MEMNUM)
00696 #define SIZE_OP_MEMORY_END_REC (SIZE_OPCODE + SIZE_MEMNUM)
00697 #define SIZE_OP_PUSH_STOP_BT SIZE_OPCODE
00698 #define SIZE_OP_POP_STOP_BT SIZE_OPCODE
00699 #define SIZE_OP_NULL_CHECK_START (SIZE_OPCODE + SIZE_MEMNUM)
00700 #define SIZE_OP_NULL_CHECK_END (SIZE_OPCODE + SIZE_MEMNUM)
00701 #define SIZE_OP_LOOK_BEHIND (SIZE_OPCODE + SIZE_LENGTH)
00702 #define SIZE_OP_PUSH_LOOK_BEHIND_NOT (SIZE_OPCODE + SIZE_RELADDR + SIZE_LENGTH)
00703 #define SIZE_OP_FAIL_LOOK_BEHIND_NOT SIZE_OPCODE
00704 #define SIZE_OP_CALL (SIZE_OPCODE + SIZE_ABSADDR)
00705 #define SIZE_OP_RETURN SIZE_OPCODE
00706 #define SIZE_OP_CONDITION (SIZE_OPCODE + SIZE_MEMNUM + SIZE_RELADDR)
00707
00708 #ifdef USE_COMBINATION_EXPLOSION_CHECK
00709 #define SIZE_OP_STATE_CHECK (SIZE_OPCODE + SIZE_STATE_CHECK_NUM)
00710 #define SIZE_OP_STATE_CHECK_PUSH (SIZE_OPCODE + SIZE_STATE_CHECK_NUM + SIZE_RELADDR)
00711 #define SIZE_OP_STATE_CHECK_PUSH_OR_JUMP (SIZE_OPCODE + SIZE_STATE_CHECK_NUM + SIZE_RELADDR)
00712 #define SIZE_OP_STATE_CHECK_ANYCHAR_STAR (SIZE_OPCODE + SIZE_STATE_CHECK_NUM)
00713 #endif
00714
00715 #define MC_ESC(syn) (syn)->meta_char_table.esc
00716 #define MC_ANYCHAR(syn) (syn)->meta_char_table.anychar
00717 #define MC_ANYTIME(syn) (syn)->meta_char_table.anytime
00718 #define MC_ZERO_OR_ONE_TIME(syn) (syn)->meta_char_table.zero_or_one_time
00719 #define MC_ONE_OR_MORE_TIME(syn) (syn)->meta_char_table.one_or_more_time
00720 #define MC_ANYCHAR_ANYTIME(syn) (syn)->meta_char_table.anychar_anytime
00721
00722 #define IS_MC_ESC_CODE(code, syn) \
00723 ((code) == MC_ESC(syn) && \
00724 !IS_SYNTAX_OP2((syn), ONIG_SYN_OP2_INEFFECTIVE_ESCAPE))
00725
00726
00727 #define SYN_POSIX_COMMON_OP \
00728 ( ONIG_SYN_OP_DOT_ANYCHAR | ONIG_SYN_OP_POSIX_BRACKET | \
00729 ONIG_SYN_OP_DECIMAL_BACKREF | \
00730 ONIG_SYN_OP_BRACKET_CC | ONIG_SYN_OP_ASTERISK_ZERO_INF | \
00731 ONIG_SYN_OP_LINE_ANCHOR | \
00732 ONIG_SYN_OP_ESC_CONTROL_CHARS )
00733
00734 #define SYN_GNU_REGEX_OP \
00735 ( ONIG_SYN_OP_DOT_ANYCHAR | ONIG_SYN_OP_BRACKET_CC | \
00736 ONIG_SYN_OP_POSIX_BRACKET | ONIG_SYN_OP_DECIMAL_BACKREF | \
00737 ONIG_SYN_OP_BRACE_INTERVAL | ONIG_SYN_OP_LPAREN_SUBEXP | \
00738 ONIG_SYN_OP_VBAR_ALT | \
00739 ONIG_SYN_OP_ASTERISK_ZERO_INF | ONIG_SYN_OP_PLUS_ONE_INF | \
00740 ONIG_SYN_OP_QMARK_ZERO_ONE | \
00741 ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR | ONIG_SYN_OP_ESC_CAPITAL_G_BEGIN_ANCHOR | \
00742 ONIG_SYN_OP_ESC_W_WORD | \
00743 ONIG_SYN_OP_ESC_B_WORD_BOUND | ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END | \
00744 ONIG_SYN_OP_ESC_S_WHITE_SPACE | ONIG_SYN_OP_ESC_D_DIGIT | \
00745 ONIG_SYN_OP_LINE_ANCHOR )
00746
00747 #define SYN_GNU_REGEX_BV \
00748 ( ONIG_SYN_CONTEXT_INDEP_ANCHORS | ONIG_SYN_CONTEXT_INDEP_REPEAT_OPS | \
00749 ONIG_SYN_CONTEXT_INVALID_REPEAT_OPS | ONIG_SYN_ALLOW_INVALID_INTERVAL | \
00750 ONIG_SYN_BACKSLASH_ESCAPE_IN_CC | ONIG_SYN_ALLOW_DOUBLE_RANGE_OP_IN_CC )
00751
00752
00753 #define NCCLASS_FLAGS(cc) ((cc)->flags)
00754 #define NCCLASS_FLAG_SET(cc,flag) (NCCLASS_FLAGS(cc) |= (flag))
00755 #define NCCLASS_FLAG_CLEAR(cc,flag) (NCCLASS_FLAGS(cc) &= ~(flag))
00756 #define IS_NCCLASS_FLAG_ON(cc,flag) ((NCCLASS_FLAGS(cc) & (flag)) != 0)
00757
00758
00759 #define FLAG_NCCLASS_NOT (1<<0)
00760 #define FLAG_NCCLASS_SHARE (1<<1)
00761
00762 #define NCCLASS_SET_NOT(nd) NCCLASS_FLAG_SET(nd, FLAG_NCCLASS_NOT)
00763 #define NCCLASS_SET_SHARE(nd) NCCLASS_FLAG_SET(nd, FLAG_NCCLASS_SHARE)
00764 #define NCCLASS_CLEAR_NOT(nd) NCCLASS_FLAG_CLEAR(nd, FLAG_NCCLASS_NOT)
00765 #define IS_NCCLASS_NOT(nd) IS_NCCLASS_FLAG_ON(nd, FLAG_NCCLASS_NOT)
00766 #define IS_NCCLASS_SHARE(nd) IS_NCCLASS_FLAG_ON(nd, FLAG_NCCLASS_SHARE)
00767
00768 typedef struct {
00769 int type;
00770
00771
00772 } NodeBase;
00773
00774 typedef struct {
00775 NodeBase base;
00776 unsigned int flags;
00777 BitSet bs;
00778 BBuf* mbuf;
00779 } CClassNode;
00780
00781 typedef intptr_t OnigStackIndex;
00782
00783 typedef struct _OnigStackType {
00784 unsigned int type;
00785 union {
00786 struct {
00787 UChar *pcode;
00788 UChar *pstr;
00789 UChar *pstr_prev;
00790 #ifdef USE_COMBINATION_EXPLOSION_CHECK
00791 unsigned int state_check;
00792 #endif
00793 UChar *pkeep;
00794 } state;
00795 struct {
00796 int count;
00797 UChar *pcode;
00798 int num;
00799 } repeat;
00800 struct {
00801 OnigStackIndex si;
00802 } repeat_inc;
00803 struct {
00804 int num;
00805 UChar *pstr;
00806
00807 OnigStackIndex start;
00808 OnigStackIndex end;
00809 } mem;
00810 struct {
00811 int num;
00812 UChar *pstr;
00813 } null_check;
00814 #ifdef USE_SUBEXP_CALL
00815 struct {
00816 UChar *ret_addr;
00817 int num;
00818 UChar *pstr;
00819 } call_frame;
00820 #endif
00821 } u;
00822 } OnigStackType;
00823
00824 typedef struct {
00825 void* stack_p;
00826 size_t stack_n;
00827 OnigOptionType options;
00828 OnigRegion* region;
00829 const UChar* start;
00830 const UChar* gpos;
00831 #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
00832 OnigPosition best_len;
00833 UChar* best_s;
00834 #endif
00835 #ifdef USE_COMBINATION_EXPLOSION_CHECK
00836 void* state_check_buff;
00837 int state_check_buff_size;
00838 #endif
00839 } OnigMatchArg;
00840
00841
00842 #define IS_CODE_SB_WORD(enc,code) \
00843 (ONIGENC_IS_CODE_ASCII(code) && ONIGENC_IS_CODE_WORD(enc,code))
00844
00845 #ifdef ONIG_DEBUG
00846
00847 typedef struct {
00848 short int opcode;
00849 const char* name;
00850 short int arg_type;
00851 } OnigOpInfoType;
00852
00853 extern OnigOpInfoType OnigOpInfo[];
00854
00855
00856
00857 #ifdef ONIG_DEBUG_STATISTICS
00858 extern void onig_statistics_init P_((void));
00859 extern void onig_print_statistics P_((FILE* f));
00860 #endif
00861 #endif
00862
00863 extern UChar* onig_error_code_to_format P_((OnigPosition code));
00864 extern void onig_snprintf_with_pattern PV_((UChar buf[], int bufsize, OnigEncoding enc, UChar* pat, UChar* pat_end, const UChar *fmt, ...));
00865 extern int onig_bbuf_init P_((BBuf* buf, OnigDistance size));
00866 extern int onig_compile P_((regex_t* reg, const UChar* pattern, const UChar* pattern_end, OnigErrorInfo* einfo, const char *sourcefile, int sourceline));
00867 extern void onig_chain_reduce P_((regex_t* reg));
00868 extern void onig_chain_link_add P_((regex_t* to, regex_t* add));
00869 extern void onig_transfer P_((regex_t* to, regex_t* from));
00870 extern int onig_is_code_in_cc P_((OnigEncoding enc, OnigCodePoint code, CClassNode* cc));
00871 extern int onig_is_code_in_cc_len P_((int enclen, OnigCodePoint code, CClassNode* cc));
00872
00873
00874 typedef void hash_table_type;
00875 #ifdef RUBY
00876 #include "ruby/st.h"
00877 typedef st_data_t hash_data_type;
00878 #else
00879 #include "st.h"
00880 typedef uintptr_t hash_data_type;
00881 #endif
00882
00883 extern hash_table_type* onig_st_init_strend_table_with_size P_((st_index_t size));
00884 extern int onig_st_lookup_strend P_((hash_table_type* table, const UChar* str_key, const UChar* end_key, hash_data_type *value));
00885 extern int onig_st_insert_strend P_((hash_table_type* table, const UChar* str_key, const UChar* end_key, hash_data_type value));
00886
00887
00888 #define PROPERTY_LIST_ADD_PROP(Name, CR) \
00889 r = onigenc_property_list_add_property((UChar* )Name, CR,\
00890 &PropertyNameTable, &PropertyList, &PropertyListNum,\
00891 &PropertyListSize);\
00892 if (r != 0) goto end
00893
00894 #define PROPERTY_LIST_INIT_CHECK \
00895 if (PropertyInited == 0) {\
00896 int r = onigenc_property_list_init(init_property_list);\
00897 if (r != 0) return r;\
00898 }
00899
00900 extern int onigenc_property_list_add_property P_((UChar* name, const OnigCodePoint* prop, hash_table_type **table, const OnigCodePoint*** plist, int *pnum, int *psize));
00901
00902 typedef int (*ONIGENC_INIT_PROPERTY_LIST_FUNC_TYPE)(void);
00903
00904 extern int onigenc_property_list_init P_((ONIGENC_INIT_PROPERTY_LIST_FUNC_TYPE));
00905
00906 extern size_t onig_memsize P_((const regex_t *reg));
00907 extern size_t onig_region_memsize P_((const struct re_registers *regs));
00908
00909 RUBY_SYMBOL_EXPORT_END
00910
00911 #endif
00912