00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031 #include "regint.h"
00032 #include <stdio.h>
00033
00034 #ifdef HAVE_STDARG_PROTOTYPES
00035 #include <stdarg.h>
00036 #define va_init_list(a,b) va_start(a,b)
00037 #else
00038 #include <varargs.h>
00039 #define va_init_list(a,b) va_start(a)
00040 #endif
00041
00042 extern UChar*
00043 onig_error_code_to_format(OnigPosition code)
00044 {
00045 const char *p;
00046
00047 if (code >= 0) return (UChar* )0;
00048
00049 switch (code) {
00050 case ONIG_MISMATCH:
00051 p = "mismatch"; break;
00052 case ONIG_NO_SUPPORT_CONFIG:
00053 p = "no support in this configuration"; break;
00054 case ONIGERR_MEMORY:
00055 p = "failed to allocate memory"; break;
00056 case ONIGERR_MATCH_STACK_LIMIT_OVER:
00057 p = "match-stack limit over"; break;
00058 case ONIGERR_TYPE_BUG:
00059 p = "undefined type (bug)"; break;
00060 case ONIGERR_PARSER_BUG:
00061 p = "internal parser error (bug)"; break;
00062 case ONIGERR_STACK_BUG:
00063 p = "stack error (bug)"; break;
00064 case ONIGERR_UNDEFINED_BYTECODE:
00065 p = "undefined bytecode (bug)"; break;
00066 case ONIGERR_UNEXPECTED_BYTECODE:
00067 p = "unexpected bytecode (bug)"; break;
00068 case ONIGERR_DEFAULT_ENCODING_IS_NOT_SET:
00069 p = "default multibyte-encoding is not set"; break;
00070 case ONIGERR_SPECIFIED_ENCODING_CANT_CONVERT_TO_WIDE_CHAR:
00071 p = "can't convert to wide-char on specified multibyte-encoding"; break;
00072 case ONIGERR_INVALID_ARGUMENT:
00073 p = "invalid argument"; break;
00074 case ONIGERR_END_PATTERN_AT_LEFT_BRACE:
00075 p = "end pattern at left brace"; break;
00076 case ONIGERR_END_PATTERN_AT_LEFT_BRACKET:
00077 p = "end pattern at left bracket"; break;
00078 case ONIGERR_EMPTY_CHAR_CLASS:
00079 p = "empty char-class"; break;
00080 case ONIGERR_PREMATURE_END_OF_CHAR_CLASS:
00081 p = "premature end of char-class"; break;
00082 case ONIGERR_END_PATTERN_AT_ESCAPE:
00083 p = "end pattern at escape"; break;
00084 case ONIGERR_END_PATTERN_AT_META:
00085 p = "end pattern at meta"; break;
00086 case ONIGERR_END_PATTERN_AT_CONTROL:
00087 p = "end pattern at control"; break;
00088 case ONIGERR_META_CODE_SYNTAX:
00089 p = "invalid meta-code syntax"; break;
00090 case ONIGERR_CONTROL_CODE_SYNTAX:
00091 p = "invalid control-code syntax"; break;
00092 case ONIGERR_CHAR_CLASS_VALUE_AT_END_OF_RANGE:
00093 p = "char-class value at end of range"; break;
00094 case ONIGERR_CHAR_CLASS_VALUE_AT_START_OF_RANGE:
00095 p = "char-class value at start of range"; break;
00096 case ONIGERR_UNMATCHED_RANGE_SPECIFIER_IN_CHAR_CLASS:
00097 p = "unmatched range specifier in char-class"; break;
00098 case ONIGERR_TARGET_OF_REPEAT_OPERATOR_NOT_SPECIFIED:
00099 p = "target of repeat operator is not specified"; break;
00100 case ONIGERR_TARGET_OF_REPEAT_OPERATOR_INVALID:
00101 p = "target of repeat operator is invalid"; break;
00102 case ONIGERR_NESTED_REPEAT_OPERATOR:
00103 p = "nested repeat operator"; break;
00104 case ONIGERR_UNMATCHED_CLOSE_PARENTHESIS:
00105 p = "unmatched close parenthesis"; break;
00106 case ONIGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS:
00107 p = "end pattern with unmatched parenthesis"; break;
00108 case ONIGERR_END_PATTERN_IN_GROUP:
00109 p = "end pattern in group"; break;
00110 case ONIGERR_UNDEFINED_GROUP_OPTION:
00111 p = "undefined group option"; break;
00112 case ONIGERR_INVALID_POSIX_BRACKET_TYPE:
00113 p = "invalid POSIX bracket type"; break;
00114 case ONIGERR_INVALID_LOOK_BEHIND_PATTERN:
00115 p = "invalid pattern in look-behind"; break;
00116 case ONIGERR_INVALID_REPEAT_RANGE_PATTERN:
00117 p = "invalid repeat range {lower,upper}"; break;
00118 case ONIGERR_INVALID_CONDITION_PATTERN:
00119 p = "invalid conditional pattern"; break;
00120 case ONIGERR_TOO_BIG_NUMBER:
00121 p = "too big number"; break;
00122 case ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE:
00123 p = "too big number for repeat range"; break;
00124 case ONIGERR_UPPER_SMALLER_THAN_LOWER_IN_REPEAT_RANGE:
00125 p = "upper is smaller than lower in repeat range"; break;
00126 case ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS:
00127 p = "empty range in char class"; break;
00128 case ONIGERR_MISMATCH_CODE_LENGTH_IN_CLASS_RANGE:
00129 p = "mismatch multibyte code length in char-class range"; break;
00130 case ONIGERR_TOO_MANY_MULTI_BYTE_RANGES:
00131 p = "too many multibyte code ranges are specified"; break;
00132 case ONIGERR_TOO_SHORT_MULTI_BYTE_STRING:
00133 p = "too short multibyte code string"; break;
00134 case ONIGERR_TOO_BIG_BACKREF_NUMBER:
00135 p = "too big backref number"; break;
00136 case ONIGERR_TOO_MANY_CAPTURE_GROUPS:
00137 p = "too many capture groups are specified"; break;
00138 case ONIGERR_INVALID_BACKREF:
00139 #ifdef USE_NAMED_GROUP
00140 p = "invalid backref number/name"; break;
00141 #else
00142 p = "invalid backref number"; break;
00143 #endif
00144 case ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED:
00145 p = "numbered backref/call is not allowed. (use name)"; break;
00146 case ONIGERR_TOO_BIG_WIDE_CHAR_VALUE:
00147 p = "too big wide-char value"; break;
00148 case ONIGERR_TOO_SHORT_DIGITS:
00149 p = "too short digits"; break;
00150 case ONIGERR_TOO_LONG_WIDE_CHAR_VALUE:
00151 p = "too long wide-char value"; break;
00152 case ONIGERR_INVALID_CODE_POINT_VALUE:
00153 p = "invalid code point value"; break;
00154 case ONIGERR_EMPTY_GROUP_NAME:
00155 p = "group name is empty"; break;
00156 case ONIGERR_INVALID_GROUP_NAME:
00157 p = "invalid group name <%n>"; break;
00158 case ONIGERR_INVALID_CHAR_IN_GROUP_NAME:
00159 #ifdef USE_NAMED_GROUP
00160 p = "invalid char in group name <%n>"; break;
00161 #else
00162 p = "invalid char in group number <%n>"; break;
00163 #endif
00164 case ONIGERR_UNDEFINED_NAME_REFERENCE:
00165 p = "undefined name <%n> reference"; break;
00166 case ONIGERR_UNDEFINED_GROUP_REFERENCE:
00167 p = "undefined group <%n> reference"; break;
00168 case ONIGERR_MULTIPLEX_DEFINED_NAME:
00169 p = "multiplex defined name <%n>"; break;
00170 case ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL:
00171 p = "multiplex definition name <%n> call"; break;
00172 case ONIGERR_NEVER_ENDING_RECURSION:
00173 p = "never ending recursion"; break;
00174 case ONIGERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY:
00175 p = "group number is too big for capture history"; break;
00176 case ONIGERR_INVALID_CHAR_PROPERTY_NAME:
00177 p = "invalid character property name {%n}"; break;
00178 case ONIGERR_NOT_SUPPORTED_ENCODING_COMBINATION:
00179 p = "not supported encoding combination"; break;
00180 case ONIGERR_INVALID_COMBINATION_OF_OPTIONS:
00181 p = "invalid combination of options"; break;
00182 case ONIGERR_OVER_THREAD_PASS_LIMIT_COUNT:
00183 p = "over thread pass limit count"; break;
00184
00185 default:
00186 p = "undefined error code"; break;
00187 }
00188
00189 return (UChar* )p;
00190 }
00191
00192 static void sprint_byte(char* s, unsigned int v)
00193 {
00194 sprintf(s, "%02x", (v & 0377));
00195 }
00196
00197 static void sprint_byte_with_x(char* s, unsigned int v)
00198 {
00199 sprintf(s, "\\x%02x", (v & 0377));
00200 }
00201
00202 static int to_ascii(OnigEncoding enc, UChar *s, UChar *end,
00203 UChar buf[], int buf_size, int *is_over)
00204 {
00205 int len;
00206 UChar *p;
00207 OnigCodePoint code;
00208
00209 if (ONIGENC_MBC_MINLEN(enc) > 1) {
00210 p = s;
00211 len = 0;
00212 while (p < end) {
00213 code = ONIGENC_MBC_TO_CODE(enc, p, end);
00214 if (code >= 0x80) {
00215 if (code > 0xffff && len + 10 <= buf_size) {
00216 sprint_byte_with_x((char*)(&(buf[len])), (unsigned int)(code >> 24));
00217 sprint_byte((char*)(&(buf[len+4])), (unsigned int)(code >> 16));
00218 sprint_byte((char*)(&(buf[len+6])), (unsigned int)(code >> 8));
00219 sprint_byte((char*)(&(buf[len+8])), (unsigned int)code);
00220 len += 10;
00221 }
00222 else if (len + 6 <= buf_size) {
00223 sprint_byte_with_x((char*)(&(buf[len])), (unsigned int)(code >> 8));
00224 sprint_byte((char*)(&(buf[len+4])), (unsigned int)code);
00225 len += 6;
00226 }
00227 else {
00228 break;
00229 }
00230 }
00231 else {
00232 buf[len++] = (UChar )code;
00233 }
00234
00235 p += enclen(enc, p, end);
00236 if (len >= buf_size) break;
00237 }
00238
00239 *is_over = ((p < end) ? 1 : 0);
00240 }
00241 else {
00242 len = (int )MIN((end - s), buf_size);
00243 xmemcpy(buf, s, (size_t )len);
00244 *is_over = ((buf_size < (end - s)) ? 1 : 0);
00245 }
00246
00247 return len;
00248 }
00249
00250
00251
00252 #define MAX_ERROR_PAR_LEN 30
00253
00254 extern int
00255 #ifdef HAVE_STDARG_PROTOTYPES
00256 onig_error_code_to_str(UChar* s, OnigPosition code, ...)
00257 #else
00258 onig_error_code_to_str(s, code, va_alist)
00259 UChar* s;
00260 OnigPosition code;
00261 va_dcl
00262 #endif
00263 {
00264 UChar *p, *q;
00265 OnigErrorInfo* einfo;
00266 size_t len;
00267 int is_over;
00268 UChar parbuf[MAX_ERROR_PAR_LEN];
00269 va_list vargs;
00270
00271 va_init_list(vargs, code);
00272
00273 switch (code) {
00274 case ONIGERR_UNDEFINED_NAME_REFERENCE:
00275 case ONIGERR_UNDEFINED_GROUP_REFERENCE:
00276 case ONIGERR_MULTIPLEX_DEFINED_NAME:
00277 case ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL:
00278 case ONIGERR_INVALID_GROUP_NAME:
00279 case ONIGERR_INVALID_CHAR_IN_GROUP_NAME:
00280 case ONIGERR_INVALID_CHAR_PROPERTY_NAME:
00281 einfo = va_arg(vargs, OnigErrorInfo*);
00282 len = to_ascii(einfo->enc, einfo->par, einfo->par_end,
00283 parbuf, MAX_ERROR_PAR_LEN - 3, &is_over);
00284 q = onig_error_code_to_format(code);
00285 p = s;
00286 while (*q != '\0') {
00287 if (*q == '%') {
00288 q++;
00289 if (*q == 'n') {
00290 xmemcpy(p, parbuf, len);
00291 p += len;
00292 if (is_over != 0) {
00293 xmemcpy(p, "...", 3);
00294 p += 3;
00295 }
00296 q++;
00297 }
00298 else
00299 goto normal_char;
00300 }
00301 else {
00302 normal_char:
00303 *p++ = *q++;
00304 }
00305 }
00306 *p = '\0';
00307 len = p - s;
00308 break;
00309
00310 default:
00311 q = onig_error_code_to_format(code);
00312 len = onigenc_str_bytelen_null(ONIG_ENCODING_ASCII, q);
00313 xmemcpy(s, q, len);
00314 s[len] = '\0';
00315 break;
00316 }
00317
00318 va_end(vargs);
00319 return (int )len;
00320 }
00321
00322 void
00323 onig_vsnprintf_with_pattern(UChar buf[], int bufsize, OnigEncoding enc,
00324 UChar* pat, UChar* pat_end, const UChar *fmt, va_list args)
00325 {
00326 size_t need;
00327 int n, len;
00328 UChar *p, *s, *bp;
00329 UChar bs[6];
00330
00331 n = xvsnprintf((char* )buf, bufsize, (const char* )fmt, args);
00332
00333 need = (pat_end - pat) * 4 + 4;
00334
00335 if (n + need < (size_t )bufsize) {
00336 strcat((char* )buf, ": /");
00337 s = buf + onigenc_str_bytelen_null(ONIG_ENCODING_ASCII, buf);
00338
00339 p = pat;
00340 while (p < pat_end) {
00341 if (*p == '\\') {
00342 *s++ = *p++;
00343 len = enclen(enc, p, pat_end);
00344 while (len-- > 0) *s++ = *p++;
00345 }
00346 else if (*p == '/') {
00347 *s++ = (unsigned char )'\\';
00348 *s++ = *p++;
00349 }
00350 else if (ONIGENC_IS_MBC_HEAD(enc, p, pat_end)) {
00351 len = enclen(enc, p, pat_end);
00352 if (ONIGENC_MBC_MINLEN(enc) == 1) {
00353 while (len-- > 0) *s++ = *p++;
00354 }
00355 else {
00356 int blen;
00357
00358 while (len-- > 0) {
00359 sprint_byte_with_x((char* )bs, (unsigned int )(*p++));
00360 blen = onigenc_str_bytelen_null(ONIG_ENCODING_ASCII, bs);
00361 bp = bs;
00362 while (blen-- > 0) *s++ = *bp++;
00363 }
00364 }
00365 }
00366 else if (!ONIGENC_IS_CODE_PRINT(enc, *p) &&
00367 !ONIGENC_IS_CODE_SPACE(enc, *p)) {
00368 sprint_byte_with_x((char* )bs, (unsigned int )(*p++));
00369 len = onigenc_str_bytelen_null(ONIG_ENCODING_ASCII, bs);
00370 bp = bs;
00371 while (len-- > 0) *s++ = *bp++;
00372 }
00373 else {
00374 *s++ = *p++;
00375 }
00376 }
00377
00378 *s++ = '/';
00379 *s = '\0';
00380 }
00381 }
00382
00383 void
00384 #ifdef HAVE_STDARG_PROTOTYPES
00385 onig_snprintf_with_pattern(UChar buf[], int bufsize, OnigEncoding enc,
00386 UChar* pat, UChar* pat_end, const UChar *fmt, ...)
00387 #else
00388 onig_snprintf_with_pattern(buf, bufsize, enc, pat, pat_end, fmt, va_alist)
00389 UChar buf[];
00390 int bufsize;
00391 OnigEncoding enc;
00392 UChar* pat;
00393 UChar* pat_end;
00394 const UChar *fmt;
00395 va_dcl
00396 #endif
00397 {
00398 va_list args;
00399 va_init_list(args, fmt);
00400 onig_vsnprintf_with_pattern(buf, bufsize, enc,
00401 pat, pat_end, fmt, args);
00402 va_end(args);
00403 }
00404
00405