00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031 #include "regint.h"
00032
00033 OnigEncoding OnigEncDefaultCharEncoding = ONIG_ENCODING_INIT_DEFAULT;
00034
00035 extern int
00036 onigenc_init(void)
00037 {
00038 return 0;
00039 }
00040
00041 extern OnigEncoding
00042 onigenc_get_default_encoding(void)
00043 {
00044 return OnigEncDefaultCharEncoding;
00045 }
00046
00047 extern int
00048 onigenc_set_default_encoding(OnigEncoding enc)
00049 {
00050 OnigEncDefaultCharEncoding = enc;
00051 return 0;
00052 }
00053
00054 extern int
00055 onigenc_mbclen_approximate(const OnigUChar* p,const OnigUChar* e, struct OnigEncodingTypeST* enc)
00056 {
00057 int ret = ONIGENC_PRECISE_MBC_ENC_LEN(enc,p,e);
00058 if (ONIGENC_MBCLEN_CHARFOUND_P(ret))
00059 return ONIGENC_MBCLEN_CHARFOUND_LEN(ret);
00060 else if (ONIGENC_MBCLEN_NEEDMORE_P(ret))
00061 return (int)(e-p)+ONIGENC_MBCLEN_NEEDMORE_LEN(ret);
00062 return 1;
00063 }
00064
00065 extern UChar*
00066 onigenc_get_right_adjust_char_head(OnigEncoding enc, const UChar* start, const UChar* s, const UChar* end)
00067 {
00068 UChar* p = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s, end);
00069 if (p < s) {
00070 p += enclen(enc, p, end);
00071 }
00072 return p;
00073 }
00074
00075 extern UChar*
00076 onigenc_get_right_adjust_char_head_with_prev(OnigEncoding enc,
00077 const UChar* start, const UChar* s, const UChar* end, const UChar** prev)
00078 {
00079 UChar* p = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s, end);
00080
00081 if (p < s) {
00082 if (prev) *prev = (const UChar* )p;
00083 p += enclen(enc, p, end);
00084 }
00085 else {
00086 if (prev) *prev = (const UChar* )NULL;
00087 }
00088 return p;
00089 }
00090
00091 extern UChar*
00092 onigenc_get_prev_char_head(OnigEncoding enc, const UChar* start, const UChar* s, const UChar* end)
00093 {
00094 if (s <= start)
00095 return (UChar* )NULL;
00096
00097 return ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s - 1, end);
00098 }
00099
00100 extern UChar*
00101 onigenc_step_back(OnigEncoding enc, const UChar* start, const UChar* s, const UChar* end, int n)
00102 {
00103 while (ONIG_IS_NOT_NULL(s) && n-- > 0) {
00104 if (s <= start)
00105 return (UChar* )NULL;
00106
00107 s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s - 1, end);
00108 }
00109 return (UChar* )s;
00110 }
00111
00112 extern UChar*
00113 onigenc_step(OnigEncoding enc, const UChar* p, const UChar* end, int n)
00114 {
00115 UChar* q = (UChar* )p;
00116 while (n-- > 0) {
00117 q += ONIGENC_MBC_ENC_LEN(enc, q, end);
00118 }
00119 return (q <= end ? q : NULL);
00120 }
00121
00122 extern int
00123 onigenc_strlen(OnigEncoding enc, const UChar* p, const UChar* end)
00124 {
00125 int n = 0;
00126 UChar* q = (UChar* )p;
00127
00128 while (q < end) {
00129 q += ONIGENC_MBC_ENC_LEN(enc, q, end);
00130 n++;
00131 }
00132 return n;
00133 }
00134
00135 extern int
00136 onigenc_strlen_null(OnigEncoding enc, const UChar* s)
00137 {
00138 int n = 0;
00139 UChar* p = (UChar* )s;
00140 UChar* e;
00141
00142 while (1) {
00143 if (*p == '\0') {
00144 UChar* q;
00145 int len = ONIGENC_MBC_MINLEN(enc);
00146
00147 if (len == 1) return n;
00148 q = p + 1;
00149 while (len > 1) {
00150 if (*q != '\0') break;
00151 q++;
00152 len--;
00153 }
00154 if (len == 1) return n;
00155 }
00156 e = p + ONIGENC_MBC_MAXLEN(enc);
00157 p += ONIGENC_MBC_ENC_LEN(enc, p, e);
00158 n++;
00159 }
00160 }
00161
00162 extern int
00163 onigenc_str_bytelen_null(OnigEncoding enc, const UChar* s)
00164 {
00165 UChar* start = (UChar* )s;
00166 UChar* p = (UChar* )s;
00167 UChar* e;
00168
00169 while (1) {
00170 if (*p == '\0') {
00171 UChar* q;
00172 int len = ONIGENC_MBC_MINLEN(enc);
00173
00174 if (len == 1) return (int )(p - start);
00175 q = p + 1;
00176 while (len > 1) {
00177 if (*q != '\0') break;
00178 q++;
00179 len--;
00180 }
00181 if (len == 1) return (int )(p - start);
00182 }
00183 e = p + ONIGENC_MBC_MAXLEN(enc);
00184 p += ONIGENC_MBC_ENC_LEN(enc, p, e);
00185 }
00186 }
00187
00188 const UChar OnigEncAsciiToLowerCaseTable[] = {
00189 '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
00190 '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
00191 '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
00192 '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037',
00193 '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047',
00194 '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057',
00195 '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067',
00196 '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077',
00197 '\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
00198 '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
00199 '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
00200 '\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137',
00201 '\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
00202 '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
00203 '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
00204 '\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177',
00205 '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207',
00206 '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217',
00207 '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227',
00208 '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237',
00209 '\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247',
00210 '\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257',
00211 '\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267',
00212 '\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277',
00213 '\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307',
00214 '\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317',
00215 '\320', '\321', '\322', '\323', '\324', '\325', '\326', '\327',
00216 '\330', '\331', '\332', '\333', '\334', '\335', '\336', '\337',
00217 '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
00218 '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
00219 '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367',
00220 '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377',
00221 };
00222
00223 #ifdef USE_UPPER_CASE_TABLE
00224 const UChar OnigEncAsciiToUpperCaseTable[256] = {
00225 '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
00226 '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
00227 '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
00228 '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037',
00229 '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047',
00230 '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057',
00231 '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067',
00232 '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077',
00233 '\100', '\101', '\102', '\103', '\104', '\105', '\106', '\107',
00234 '\110', '\111', '\112', '\113', '\114', '\115', '\116', '\117',
00235 '\120', '\121', '\122', '\123', '\124', '\125', '\126', '\127',
00236 '\130', '\131', '\132', '\133', '\134', '\135', '\136', '\137',
00237 '\140', '\101', '\102', '\103', '\104', '\105', '\106', '\107',
00238 '\110', '\111', '\112', '\113', '\114', '\115', '\116', '\117',
00239 '\120', '\121', '\122', '\123', '\124', '\125', '\126', '\127',
00240 '\130', '\131', '\132', '\173', '\174', '\175', '\176', '\177',
00241 '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207',
00242 '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217',
00243 '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227',
00244 '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237',
00245 '\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247',
00246 '\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257',
00247 '\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267',
00248 '\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277',
00249 '\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307',
00250 '\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317',
00251 '\320', '\321', '\322', '\323', '\324', '\325', '\326', '\327',
00252 '\330', '\331', '\332', '\333', '\334', '\335', '\336', '\337',
00253 '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
00254 '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
00255 '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367',
00256 '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377',
00257 };
00258 #endif
00259
00260 const unsigned short OnigEncAsciiCtypeTable[256] = {
00261 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
00262 0x4008, 0x420c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008,
00263 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
00264 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
00265 0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
00266 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
00267 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0,
00268 0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
00269 0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2,
00270 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
00271 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
00272 0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0,
00273 0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2,
00274 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
00275 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
00276 0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008,
00277 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
00278 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
00279 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
00280 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
00281 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
00282 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
00283 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
00284 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
00285 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
00286 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
00287 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
00288 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
00289 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
00290 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
00291 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
00292 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000
00293 };
00294
00295 const UChar OnigEncISO_8859_1_ToLowerCaseTable[256] = {
00296 '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
00297 '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
00298 '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
00299 '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037',
00300 '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047',
00301 '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057',
00302 '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067',
00303 '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077',
00304 '\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
00305 '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
00306 '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
00307 '\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137',
00308 '\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
00309 '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
00310 '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
00311 '\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177',
00312 '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207',
00313 '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217',
00314 '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227',
00315 '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237',
00316 '\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247',
00317 '\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257',
00318 '\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267',
00319 '\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277',
00320 '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
00321 '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
00322 '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\327',
00323 '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\337',
00324 '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
00325 '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
00326 '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367',
00327 '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377'
00328 };
00329
00330 #ifdef USE_UPPER_CASE_TABLE
00331 const UChar OnigEncISO_8859_1_ToUpperCaseTable[256] = {
00332 '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
00333 '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
00334 '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
00335 '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037',
00336 '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047',
00337 '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057',
00338 '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067',
00339 '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077',
00340 '\100', '\101', '\102', '\103', '\104', '\105', '\106', '\107',
00341 '\110', '\111', '\112', '\113', '\114', '\115', '\116', '\117',
00342 '\120', '\121', '\122', '\123', '\124', '\125', '\126', '\127',
00343 '\130', '\131', '\132', '\133', '\134', '\135', '\136', '\137',
00344 '\140', '\101', '\102', '\103', '\104', '\105', '\106', '\107',
00345 '\110', '\111', '\112', '\113', '\114', '\115', '\116', '\117',
00346 '\120', '\121', '\122', '\123', '\124', '\125', '\126', '\127',
00347 '\130', '\131', '\132', '\173', '\174', '\175', '\176', '\177',
00348 '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207',
00349 '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217',
00350 '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227',
00351 '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237',
00352 '\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247',
00353 '\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257',
00354 '\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267',
00355 '\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277',
00356 '\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307',
00357 '\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317',
00358 '\320', '\321', '\322', '\323', '\324', '\325', '\326', '\327',
00359 '\330', '\331', '\332', '\333', '\334', '\335', '\336', '\337',
00360 '\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307',
00361 '\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317',
00362 '\320', '\321', '\322', '\323', '\324', '\325', '\326', '\367',
00363 '\330', '\331', '\332', '\333', '\334', '\335', '\336', '\377',
00364 };
00365 #endif
00366
00367 extern void
00368 onigenc_set_default_caseconv_table(const UChar* table ARG_UNUSED)
00369 {
00370
00371
00372 }
00373
00374 extern UChar*
00375 onigenc_get_left_adjust_char_head(OnigEncoding enc, const UChar* start, const UChar* s, const UChar* end)
00376 {
00377 return ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s, end);
00378 }
00379
00380 const OnigPairCaseFoldCodes OnigAsciiLowerMap[] = {
00381 { 0x41, 0x61 },
00382 { 0x42, 0x62 },
00383 { 0x43, 0x63 },
00384 { 0x44, 0x64 },
00385 { 0x45, 0x65 },
00386 { 0x46, 0x66 },
00387 { 0x47, 0x67 },
00388 { 0x48, 0x68 },
00389 { 0x49, 0x69 },
00390 { 0x4a, 0x6a },
00391 { 0x4b, 0x6b },
00392 { 0x4c, 0x6c },
00393 { 0x4d, 0x6d },
00394 { 0x4e, 0x6e },
00395 { 0x4f, 0x6f },
00396 { 0x50, 0x70 },
00397 { 0x51, 0x71 },
00398 { 0x52, 0x72 },
00399 { 0x53, 0x73 },
00400 { 0x54, 0x74 },
00401 { 0x55, 0x75 },
00402 { 0x56, 0x76 },
00403 { 0x57, 0x77 },
00404 { 0x58, 0x78 },
00405 { 0x59, 0x79 },
00406 { 0x5a, 0x7a }
00407 };
00408
00409 extern int
00410 onigenc_ascii_apply_all_case_fold(OnigCaseFoldType flag ARG_UNUSED,
00411 OnigApplyAllCaseFoldFunc f, void* arg,
00412 OnigEncoding enc ARG_UNUSED)
00413 {
00414 OnigCodePoint code;
00415 int i, r;
00416
00417 for (i = 0;
00418 i < (int )(sizeof(OnigAsciiLowerMap)/sizeof(OnigPairCaseFoldCodes));
00419 i++) {
00420 code = OnigAsciiLowerMap[i].to;
00421 r = (*f)(OnigAsciiLowerMap[i].from, &code, 1, arg);
00422 if (r != 0) return r;
00423
00424 code = OnigAsciiLowerMap[i].from;
00425 r = (*f)(OnigAsciiLowerMap[i].to, &code, 1, arg);
00426 if (r != 0) return r;
00427 }
00428
00429 return 0;
00430 }
00431
00432 extern int
00433 onigenc_ascii_get_case_fold_codes_by_str(OnigCaseFoldType flag ARG_UNUSED,
00434 const OnigUChar* p, const OnigUChar* end ARG_UNUSED, OnigCaseFoldCodeItem items[],
00435 OnigEncoding enc ARG_UNUSED)
00436 {
00437 if (0x41 <= *p && *p <= 0x5a) {
00438 items[0].byte_len = 1;
00439 items[0].code_len = 1;
00440 items[0].code[0] = (OnigCodePoint )(*p + 0x20);
00441 return 1;
00442 }
00443 else if (0x61 <= *p && *p <= 0x7a) {
00444 items[0].byte_len = 1;
00445 items[0].code_len = 1;
00446 items[0].code[0] = (OnigCodePoint )(*p - 0x20);
00447 return 1;
00448 }
00449 else
00450 return 0;
00451 }
00452
00453 static int
00454 ss_apply_all_case_fold(OnigCaseFoldType flag ARG_UNUSED,
00455 OnigApplyAllCaseFoldFunc f, void* arg)
00456 {
00457 OnigCodePoint ss[] = { 0x73, 0x73 };
00458
00459 return (*f)((OnigCodePoint )0xdf, ss, 2, arg);
00460 }
00461
00462 extern int
00463 onigenc_apply_all_case_fold_with_map(int map_size,
00464 const OnigPairCaseFoldCodes map[],
00465 int ess_tsett_flag, OnigCaseFoldType flag,
00466 OnigApplyAllCaseFoldFunc f, void* arg)
00467 {
00468 OnigCodePoint code;
00469 int i, r;
00470
00471 r = onigenc_ascii_apply_all_case_fold(flag, f, arg, 0);
00472 if (r != 0) return r;
00473
00474 for (i = 0; i < map_size; i++) {
00475 code = map[i].to;
00476 r = (*f)(map[i].from, &code, 1, arg);
00477 if (r != 0) return r;
00478
00479 code = map[i].from;
00480 r = (*f)(map[i].to, &code, 1, arg);
00481 if (r != 0) return r;
00482 }
00483
00484 if (ess_tsett_flag != 0)
00485 return ss_apply_all_case_fold(flag, f, arg);
00486
00487 return 0;
00488 }
00489
00490 extern int
00491 onigenc_get_case_fold_codes_by_str_with_map(int map_size,
00492 const OnigPairCaseFoldCodes map[],
00493 int ess_tsett_flag, OnigCaseFoldType flag ARG_UNUSED,
00494 const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[])
00495 {
00496 if (0x41 <= *p && *p <= 0x5a) {
00497 items[0].byte_len = 1;
00498 items[0].code_len = 1;
00499 items[0].code[0] = (OnigCodePoint )(*p + 0x20);
00500 if (*p == 0x53 && ess_tsett_flag != 0 && end > p + 1
00501 && (*(p+1) == 0x53 || *(p+1) == 0x73)) {
00502
00503 items[1].byte_len = 2;
00504 items[1].code_len = 1;
00505 items[1].code[0] = (OnigCodePoint )0xdf;
00506 return 2;
00507 }
00508 else
00509 return 1;
00510 }
00511 else if (0x61 <= *p && *p <= 0x7a) {
00512 items[0].byte_len = 1;
00513 items[0].code_len = 1;
00514 items[0].code[0] = (OnigCodePoint )(*p - 0x20);
00515 if (*p == 0x73 && ess_tsett_flag != 0 && end > p + 1
00516 && (*(p+1) == 0x73 || *(p+1) == 0x53)) {
00517
00518 items[1].byte_len = 2;
00519 items[1].code_len = 1;
00520 items[1].code[0] = (OnigCodePoint )0xdf;
00521 return 2;
00522 }
00523 else
00524 return 1;
00525 }
00526 else if (*p == 0xdf && ess_tsett_flag != 0) {
00527 items[0].byte_len = 1;
00528 items[0].code_len = 2;
00529 items[0].code[0] = (OnigCodePoint )'s';
00530 items[0].code[1] = (OnigCodePoint )'s';
00531
00532 items[1].byte_len = 1;
00533 items[1].code_len = 2;
00534 items[1].code[0] = (OnigCodePoint )'S';
00535 items[1].code[1] = (OnigCodePoint )'S';
00536
00537 items[2].byte_len = 1;
00538 items[2].code_len = 2;
00539 items[2].code[0] = (OnigCodePoint )'s';
00540 items[2].code[1] = (OnigCodePoint )'S';
00541
00542 items[3].byte_len = 1;
00543 items[3].code_len = 2;
00544 items[3].code[0] = (OnigCodePoint )'S';
00545 items[3].code[1] = (OnigCodePoint )'s';
00546
00547 return 4;
00548 }
00549 else {
00550 int i;
00551
00552 for (i = 0; i < map_size; i++) {
00553 if (*p == map[i].from) {
00554 items[0].byte_len = 1;
00555 items[0].code_len = 1;
00556 items[0].code[0] = map[i].to;
00557 return 1;
00558 }
00559 else if (*p == map[i].to) {
00560 items[0].byte_len = 1;
00561 items[0].code_len = 1;
00562 items[0].code[0] = map[i].from;
00563 return 1;
00564 }
00565 }
00566 }
00567
00568 return 0;
00569 }
00570
00571
00572 extern int
00573 onigenc_not_support_get_ctype_code_range(OnigCtype ctype,
00574 OnigCodePoint* sb_out, const OnigCodePoint* ranges[],
00575 OnigEncoding enc)
00576 {
00577 return ONIG_NO_SUPPORT_CONFIG;
00578 }
00579
00580 extern int
00581 onigenc_is_mbc_newline_0x0a(const UChar* p, const UChar* end, OnigEncoding enc ARG_UNUSED)
00582 {
00583 if (p < end) {
00584 if (*p == 0x0a) return 1;
00585 }
00586 return 0;
00587 }
00588
00589
00590 extern int
00591 onigenc_ascii_mbc_case_fold(OnigCaseFoldType flag ARG_UNUSED, const UChar** p,
00592 const UChar*end, UChar* lower, OnigEncoding enc ARG_UNUSED)
00593 {
00594 *lower = ONIGENC_ASCII_CODE_TO_LOWER_CASE(**p);
00595
00596 (*p)++;
00597 return 1;
00598 }
00599
00600 #if 0
00601 extern int
00602 onigenc_ascii_is_mbc_ambiguous(OnigCaseFoldType flag ARG_UNUSED,
00603 const UChar** pp, const UChar* end ARG_UNUSED)
00604 {
00605 const UChar* p = *pp;
00606
00607 (*pp)++;
00608 return ONIGENC_IS_ASCII_CODE_CASE_AMBIG(*p);
00609 }
00610 #endif
00611
00612 extern int
00613 onigenc_single_byte_mbc_enc_len(const UChar* p ARG_UNUSED, const UChar* e ARG_UNUSED,
00614 OnigEncoding enc ARG_UNUSED)
00615 {
00616 return 1;
00617 }
00618
00619 extern OnigCodePoint
00620 onigenc_single_byte_mbc_to_code(const UChar* p, const UChar* end ARG_UNUSED,
00621 OnigEncoding enc ARG_UNUSED)
00622 {
00623 return (OnigCodePoint )(*p);
00624 }
00625
00626 extern int
00627 onigenc_single_byte_code_to_mbclen(OnigCodePoint code ARG_UNUSED, OnigEncoding enc ARG_UNUSED)
00628 {
00629 return 1;
00630 }
00631
00632 extern int
00633 onigenc_single_byte_code_to_mbc(OnigCodePoint code, UChar *buf, OnigEncoding enc ARG_UNUSED)
00634 {
00635 if (code > 0xff)
00636 rb_raise(rb_eRangeError, "%u out of char range", code);
00637 *buf = (UChar )(code & 0xff);
00638 return 1;
00639 }
00640
00641 extern UChar*
00642 onigenc_single_byte_left_adjust_char_head(const UChar* start ARG_UNUSED, const UChar* s,
00643 const UChar* end,
00644 OnigEncoding enc ARG_UNUSED)
00645 {
00646 return (UChar* )s;
00647 }
00648
00649 extern int
00650 onigenc_always_true_is_allowed_reverse_match(const UChar* s ARG_UNUSED, const UChar* end ARG_UNUSED,
00651 OnigEncoding enc ARG_UNUSED)
00652 {
00653 return TRUE;
00654 }
00655
00656 extern int
00657 onigenc_always_false_is_allowed_reverse_match(const UChar* s ARG_UNUSED, const UChar* end ARG_UNUSED,
00658 OnigEncoding enc ARG_UNUSED)
00659 {
00660 return FALSE;
00661 }
00662
00663 extern int
00664 onigenc_ascii_is_code_ctype(OnigCodePoint code, unsigned int ctype,
00665 OnigEncoding enc ARG_UNUSED)
00666 {
00667 if (code < 128)
00668 return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype);
00669 else
00670 return FALSE;
00671 }
00672
00673 extern OnigCodePoint
00674 onigenc_mbn_mbc_to_code(OnigEncoding enc, const UChar* p, const UChar* end)
00675 {
00676 int c, i, len;
00677 OnigCodePoint n;
00678
00679 len = enclen(enc, p, end);
00680 n = (OnigCodePoint )(*p++);
00681 if (len == 1) return n;
00682
00683 for (i = 1; i < len; i++) {
00684 if (p >= end) break;
00685 c = *p++;
00686 n <<= 8; n += c;
00687 }
00688 return n;
00689 }
00690
00691 extern int
00692 onigenc_mbn_mbc_case_fold(OnigEncoding enc, OnigCaseFoldType flag ARG_UNUSED,
00693 const UChar** pp, const UChar* end ARG_UNUSED,
00694 UChar* lower)
00695 {
00696 int len;
00697 const UChar *p = *pp;
00698
00699 if (ONIGENC_IS_MBC_ASCII(p)) {
00700 *lower = ONIGENC_ASCII_CODE_TO_LOWER_CASE(*p);
00701 (*pp)++;
00702 return 1;
00703 }
00704 else {
00705 int i;
00706
00707 len = enclen(enc, p, end);
00708 for (i = 0; i < len; i++) {
00709 *lower++ = *p++;
00710 }
00711 (*pp) += len;
00712 return len;
00713 }
00714 }
00715
00716 #if 0
00717 extern int
00718 onigenc_mbn_is_mbc_ambiguous(OnigEncoding enc, OnigCaseFoldType flag,
00719 const UChar** pp ARG_UNUSED, const UChar* end ARG_UNUSED)
00720 {
00721 const UChar* p = *pp;
00722
00723 if (ONIGENC_IS_MBC_ASCII(p)) {
00724 (*pp)++;
00725 return ONIGENC_IS_ASCII_CODE_CASE_AMBIG(*p);
00726 }
00727
00728 (*pp) += enclen(enc, p);
00729 return FALSE;
00730 }
00731 #endif
00732
00733 extern int
00734 onigenc_mb2_code_to_mbclen(OnigCodePoint code, OnigEncoding enc ARG_UNUSED)
00735 {
00736 if (code <= 0xff) return 1;
00737 if (code <= 0xffff) return 2;
00738 return ONIGERR_TOO_BIG_WIDE_CHAR_VALUE;
00739 }
00740
00741 extern int
00742 onigenc_mb4_code_to_mbclen(OnigCodePoint code, OnigEncoding enc ARG_UNUSED)
00743 {
00744 if ((code & 0xff000000) != 0) return 4;
00745 else if ((code & 0xff0000) != 0) return 3;
00746 else if ((code & 0xff00) != 0) return 2;
00747 else return 1;
00748 }
00749
00750 extern int
00751 onigenc_mb2_code_to_mbc(OnigEncoding enc, OnigCodePoint code, UChar *buf)
00752 {
00753 UChar *p = buf;
00754
00755 if ((code & 0xff00) != 0) {
00756 *p++ = (UChar )((code >> 8) & 0xff);
00757 }
00758 *p++ = (UChar )(code & 0xff);
00759
00760 #if 1
00761 if (enclen(enc, buf, p) != (p - buf))
00762 return ONIGERR_INVALID_CODE_POINT_VALUE;
00763 #endif
00764 return (int )(p - buf);
00765 }
00766
00767 extern int
00768 onigenc_mb4_code_to_mbc(OnigEncoding enc, OnigCodePoint code, UChar *buf)
00769 {
00770 UChar *p = buf;
00771
00772 if ((code & 0xff000000) != 0) {
00773 *p++ = (UChar )((code >> 24) & 0xff);
00774 }
00775 if ((code & 0xff0000) != 0 || p != buf) {
00776 *p++ = (UChar )((code >> 16) & 0xff);
00777 }
00778 if ((code & 0xff00) != 0 || p != buf) {
00779 *p++ = (UChar )((code >> 8) & 0xff);
00780 }
00781 *p++ = (UChar )(code & 0xff);
00782
00783 #if 1
00784 if (enclen(enc, buf, p) != (p - buf))
00785 return ONIGERR_INVALID_CODE_POINT_VALUE;
00786 #endif
00787 return (int )(p - buf);
00788 }
00789
00790 extern int
00791 onigenc_minimum_property_name_to_ctype(OnigEncoding enc, UChar* p, UChar* end)
00792 {
00793 static const PosixBracketEntryType PBS[] = {
00794 PosixBracketEntryInit("Alnum", ONIGENC_CTYPE_ALNUM),
00795 PosixBracketEntryInit("Alpha", ONIGENC_CTYPE_ALPHA),
00796 PosixBracketEntryInit("Blank", ONIGENC_CTYPE_BLANK),
00797 PosixBracketEntryInit("Cntrl", ONIGENC_CTYPE_CNTRL),
00798 PosixBracketEntryInit("Digit", ONIGENC_CTYPE_DIGIT),
00799 PosixBracketEntryInit("Graph", ONIGENC_CTYPE_GRAPH),
00800 PosixBracketEntryInit("Lower", ONIGENC_CTYPE_LOWER),
00801 PosixBracketEntryInit("Print", ONIGENC_CTYPE_PRINT),
00802 PosixBracketEntryInit("Punct", ONIGENC_CTYPE_PUNCT),
00803 PosixBracketEntryInit("Space", ONIGENC_CTYPE_SPACE),
00804 PosixBracketEntryInit("Upper", ONIGENC_CTYPE_UPPER),
00805 PosixBracketEntryInit("XDigit", ONIGENC_CTYPE_XDIGIT),
00806 PosixBracketEntryInit("ASCII", ONIGENC_CTYPE_ASCII),
00807 PosixBracketEntryInit("Word", ONIGENC_CTYPE_WORD),
00808 };
00809
00810 const PosixBracketEntryType *pb, *pbe;
00811 int len;
00812
00813 len = onigenc_strlen(enc, p, end);
00814 for (pbe = (pb = PBS) + sizeof(PBS)/sizeof(PBS[0]); pb < pbe; ++pb) {
00815 if (len == pb->len &&
00816 onigenc_with_ascii_strnicmp(enc, p, end, pb->name, pb->len) == 0)
00817 return pb->ctype;
00818 }
00819
00820 return ONIGERR_INVALID_CHAR_PROPERTY_NAME;
00821 }
00822
00823 extern int
00824 onigenc_mb2_is_code_ctype(OnigEncoding enc, OnigCodePoint code,
00825 unsigned int ctype)
00826 {
00827 if (code < 128)
00828 return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype);
00829 else {
00830 if (CTYPE_IS_WORD_GRAPH_PRINT(ctype)) {
00831 return (ONIGENC_CODE_TO_MBCLEN(enc, code) > 1 ? TRUE : FALSE);
00832 }
00833 }
00834
00835 return FALSE;
00836 }
00837
00838 extern int
00839 onigenc_mb4_is_code_ctype(OnigEncoding enc, OnigCodePoint code,
00840 unsigned int ctype)
00841 {
00842 if (code < 128)
00843 return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype);
00844 else {
00845 if (CTYPE_IS_WORD_GRAPH_PRINT(ctype)) {
00846 return (ONIGENC_CODE_TO_MBCLEN(enc, code) > 1 ? TRUE : FALSE);
00847 }
00848 }
00849
00850 return FALSE;
00851 }
00852
00853 extern int
00854 onigenc_with_ascii_strncmp(OnigEncoding enc, const UChar* p, const UChar* end,
00855 const UChar* sascii , int n)
00856 {
00857 int x, c;
00858
00859 while (n-- > 0) {
00860 if (p >= end) return (int )(*sascii);
00861
00862 c = (int )ONIGENC_MBC_TO_CODE(enc, p, end);
00863 x = *sascii - c;
00864 if (x) return x;
00865
00866 sascii++;
00867 p += enclen(enc, p, end);
00868 }
00869 return 0;
00870 }
00871
00872 extern int
00873 onigenc_with_ascii_strnicmp(OnigEncoding enc, const UChar* p, const UChar* end,
00874 const UChar* sascii , int n)
00875 {
00876 int x, c;
00877
00878 while (n-- > 0) {
00879 if (p >= end) return (int )(*sascii);
00880
00881 c = (int )ONIGENC_MBC_TO_CODE(enc, p, end);
00882 if (ONIGENC_IS_ASCII_CODE(c))
00883 c = ONIGENC_ASCII_CODE_TO_LOWER_CASE(c);
00884 x = ONIGENC_ASCII_CODE_TO_LOWER_CASE(*sascii) - c;
00885 if (x) return x;
00886
00887 sascii++;
00888 p += enclen(enc, p, end);
00889 }
00890 return 0;
00891 }
00892
00893
00894 static int
00895 resize_property_list(int new_size, const OnigCodePoint*** plist, int* psize)
00896 {
00897 size_t size;
00898 const OnigCodePoint **list = *plist;
00899
00900 size = sizeof(OnigCodePoint*) * new_size;
00901 if (IS_NULL(list)) {
00902 list = (const OnigCodePoint** )xmalloc(size);
00903 }
00904 else {
00905 list = (const OnigCodePoint** )xrealloc((void* )list, size);
00906 }
00907
00908 if (IS_NULL(list)) return ONIGERR_MEMORY;
00909
00910 *plist = list;
00911 *psize = new_size;
00912
00913 return 0;
00914 }
00915
00916 extern int
00917 onigenc_property_list_add_property(UChar* name, const OnigCodePoint* prop,
00918 hash_table_type **table, const OnigCodePoint*** plist, int *pnum,
00919 int *psize)
00920 {
00921 #define PROP_INIT_SIZE 16
00922
00923 int r;
00924
00925 if (*psize <= *pnum) {
00926 int new_size = (*psize == 0 ? PROP_INIT_SIZE : *psize * 2);
00927 r = resize_property_list(new_size, plist, psize);
00928 if (r != 0) return r;
00929 }
00930
00931 (*plist)[*pnum] = prop;
00932
00933 if (ONIG_IS_NULL(*table)) {
00934 *table = onig_st_init_strend_table_with_size(PROP_INIT_SIZE);
00935 if (ONIG_IS_NULL(*table)) return ONIGERR_MEMORY;
00936 }
00937
00938 *pnum = *pnum + 1;
00939 onig_st_insert_strend(*table, name, name + strlen((char* )name),
00940 (hash_data_type )(*pnum + ONIGENC_MAX_STD_CTYPE));
00941 return 0;
00942 }
00943
00944 extern int
00945 onigenc_property_list_init(int (*f)(void))
00946 {
00947 int r;
00948
00949 THREAD_ATOMIC_START;
00950
00951 r = f();
00952
00953 THREAD_ATOMIC_END;
00954 return r;
00955 }
00956