00001
00002
00003
00004
00005 #include "ruby.h"
00006 #include "ruby/encoding.h"
00007 #include "ruby/re.h"
00008 #include <ctype.h>
00009
00010 static const char *day_names[] = {
00011 "Sunday", "Monday", "Tuesday", "Wednesday",
00012 "Thursday", "Friday", "Saturday",
00013 "Sun", "Mon", "Tue", "Wed",
00014 "Thu", "Fri", "Sat"
00015 };
00016
00017 static const char *month_names[] = {
00018 "January", "February", "March", "April",
00019 "May", "June", "July", "August", "September",
00020 "October", "November", "December",
00021 "Jan", "Feb", "Mar", "Apr", "May", "Jun",
00022 "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"
00023 };
00024
00025 static const char *merid_names[] = {
00026 "am", "pm",
00027 "a.m.", "p.m."
00028 };
00029
00030 static const char *extz_pats[] = {
00031 ":z",
00032 "::z",
00033 ":::z"
00034 };
00035
00036 #define sizeof_array(o) (sizeof o / sizeof o[0])
00037
00038 #define f_negate(x) rb_funcall(x, rb_intern("-@"), 0)
00039 #define f_add(x,y) rb_funcall(x, '+', 1, y)
00040 #define f_sub(x,y) rb_funcall(x, '-', 1, y)
00041 #define f_mul(x,y) rb_funcall(x, '*', 1, y)
00042 #define f_div(x,y) rb_funcall(x, '/', 1, y)
00043 #define f_idiv(x,y) rb_funcall(x, rb_intern("div"), 1, y)
00044 #define f_mod(x,y) rb_funcall(x, '%', 1, y)
00045 #define f_expt(x,y) rb_funcall(x, rb_intern("**"), 1, y)
00046
00047 #define f_lt_p(x,y) rb_funcall(x, '<', 1, y)
00048 #define f_gt_p(x,y) rb_funcall(x, '>', 1, y)
00049 #define f_le_p(x,y) rb_funcall(x, rb_intern("<="), 1, y)
00050 #define f_ge_p(x,y) rb_funcall(x, rb_intern(">="), 1, y)
00051
00052 #define f_match(r,s) rb_funcall(r, rb_intern("match"), 1, s)
00053 #define f_aref(o,i) rb_funcall(o, rb_intern("[]"), 1, i)
00054 #define f_end(o,i) rb_funcall(o, rb_intern("end"), 1, i)
00055
00056 #define issign(c) ((c) == '-' || (c) == '+')
00057
00058 static int
00059 num_pattern_p(const char *s)
00060 {
00061 if (isdigit((unsigned char)*s))
00062 return 1;
00063 if (*s == '%') {
00064 s++;
00065 if (*s == 'E' || *s == 'O')
00066 s++;
00067 if (*s &&
00068 (strchr("CDdeFGgHIjkLlMmNQRrSsTUuVvWwXxYy", *s) ||
00069 isdigit((unsigned char)*s)))
00070 return 1;
00071 }
00072 return 0;
00073 }
00074
00075 #define NUM_PATTERN_P() num_pattern_p(&fmt[fi + 1])
00076
00077 static long
00078 read_digits(const char *s, VALUE *n, size_t width)
00079 {
00080 size_t l;
00081
00082 l = strspn(s, "0123456789");
00083
00084 if (l == 0)
00085 return 0;
00086
00087 if (width < l)
00088 l = width;
00089
00090 if ((4 * l * sizeof(char)) <= (sizeof(long)*CHAR_BIT)) {
00091 const char *os = s;
00092 long v;
00093
00094 v = 0;
00095 while ((size_t)(s - os) < l) {
00096 v *= 10;
00097 v += *s - '0';
00098 s++;
00099 }
00100 if (os == s)
00101 return 0;
00102 *n = LONG2NUM(v);
00103 return l;
00104 }
00105 else {
00106 char *s2 = ALLOCA_N(char, l + 1);
00107 memcpy(s2, s, l);
00108 s2[l] = '\0';
00109 *n = rb_cstr_to_inum(s2, 10, 0);
00110 return l;
00111 }
00112 }
00113
00114 #define set_hash(k,v) rb_hash_aset(hash, ID2SYM(rb_intern(k)), v)
00115 #define ref_hash(k) rb_hash_aref(hash, ID2SYM(rb_intern(k)))
00116 #define del_hash(k) rb_hash_delete(hash, ID2SYM(rb_intern(k)))
00117
00118 #define fail() \
00119 { \
00120 set_hash("_fail", Qtrue); \
00121 return 0; \
00122 }
00123
00124 #define fail_p() (!NIL_P(ref_hash("_fail")))
00125
00126 #define READ_DIGITS(n,w) \
00127 { \
00128 size_t l; \
00129 l = read_digits(&str[si], &n, w); \
00130 if (l == 0) \
00131 fail(); \
00132 si += l; \
00133 }
00134
00135 #define READ_DIGITS_MAX(n) READ_DIGITS(n, LONG_MAX)
00136
00137 static int
00138 valid_range_p(VALUE v, int a, int b)
00139 {
00140 if (FIXNUM_P(v)) {
00141 int vi = FIX2INT(v);
00142 return !(vi < a || vi > b);
00143 }
00144 return !(f_lt_p(v, INT2NUM(a)) || f_gt_p(v, INT2NUM(b)));
00145 }
00146
00147 #define recur(fmt) \
00148 { \
00149 size_t l; \
00150 l = date__strptime_internal(&str[si], slen - si, \
00151 fmt, sizeof fmt - 1, hash); \
00152 if (fail_p()) \
00153 return 0; \
00154 si += l; \
00155 }
00156
00157 VALUE date_zone_to_diff(VALUE);
00158
00159 static size_t
00160 date__strptime_internal(const char *str, size_t slen,
00161 const char *fmt, size_t flen, VALUE hash)
00162 {
00163 size_t si, fi;
00164 int c;
00165
00166 si = fi = 0;
00167
00168 while (fi < flen) {
00169
00170 switch (fmt[fi]) {
00171 case '%':
00172
00173 again:
00174 fi++;
00175 c = fmt[fi];
00176
00177 switch (c) {
00178 case 'E':
00179 if (fmt[fi + 1] && strchr("cCxXyY", fmt[fi + 1]))
00180 goto again;
00181 fi--;
00182 goto ordinal;
00183 case 'O':
00184 if (fmt[fi + 1] && strchr("deHImMSuUVwWy", fmt[fi + 1]))
00185 goto again;
00186 fi--;
00187 goto ordinal;
00188 case ':':
00189 {
00190 int i;
00191
00192 for (i = 0; i < (int)sizeof_array(extz_pats); i++)
00193 if (strncmp(extz_pats[i], &fmt[fi],
00194 strlen(extz_pats[i])) == 0) {
00195 fi += i;
00196 goto again;
00197 }
00198 fail();
00199 }
00200
00201 case 'A':
00202 case 'a':
00203 {
00204 int i;
00205
00206 for (i = 0; i < (int)sizeof_array(day_names); i++) {
00207 size_t l = strlen(day_names[i]);
00208 if (strncasecmp(day_names[i], &str[si], l) == 0) {
00209 si += l;
00210 set_hash("wday", INT2FIX(i % 7));
00211 goto matched;
00212 }
00213 }
00214 fail();
00215 }
00216 case 'B':
00217 case 'b':
00218 case 'h':
00219 {
00220 int i;
00221
00222 for (i = 0; i < (int)sizeof_array(month_names); i++) {
00223 size_t l = strlen(month_names[i]);
00224 if (strncasecmp(month_names[i], &str[si], l) == 0) {
00225 si += l;
00226 set_hash("mon", INT2FIX((i % 12) + 1));
00227 goto matched;
00228 }
00229 }
00230 fail();
00231 }
00232
00233 case 'C':
00234 {
00235 VALUE n;
00236
00237 if (NUM_PATTERN_P())
00238 READ_DIGITS(n, 2)
00239 else
00240 READ_DIGITS_MAX(n)
00241 set_hash("_cent", n);
00242 goto matched;
00243 }
00244
00245 case 'c':
00246 recur("%a %b %e %H:%M:%S %Y");
00247 goto matched;
00248
00249 case 'D':
00250 recur("%m/%d/%y");
00251 goto matched;
00252
00253 case 'd':
00254 case 'e':
00255 {
00256 VALUE n;
00257
00258 if (str[si] == ' ') {
00259 si++;
00260 READ_DIGITS(n, 1);
00261 } else {
00262 READ_DIGITS(n, 2);
00263 }
00264 if (!valid_range_p(n, 1, 31))
00265 fail();
00266 set_hash("mday", n);
00267 goto matched;
00268 }
00269
00270 case 'F':
00271 recur("%Y-%m-%d");
00272 goto matched;
00273
00274 case 'G':
00275 {
00276 VALUE n;
00277
00278 if (NUM_PATTERN_P())
00279 READ_DIGITS(n, 4)
00280 else
00281 READ_DIGITS_MAX(n)
00282 set_hash("cwyear", n);
00283 goto matched;
00284 }
00285
00286 case 'g':
00287 {
00288 VALUE n;
00289
00290 READ_DIGITS(n, 2);
00291 if (!valid_range_p(n, 0, 99))
00292 fail();
00293 set_hash("cwyear",n);
00294 set_hash("_cent",
00295 INT2FIX(f_ge_p(n, INT2FIX(69)) ? 19 : 20));
00296 goto matched;
00297 }
00298
00299 case 'H':
00300 case 'k':
00301 {
00302 VALUE n;
00303
00304 if (str[si] == ' ') {
00305 si++;
00306 READ_DIGITS(n, 1);
00307 } else {
00308 READ_DIGITS(n, 2);
00309 }
00310 if (!valid_range_p(n, 0, 24))
00311 fail();
00312 set_hash("hour", n);
00313 goto matched;
00314 }
00315
00316 case 'I':
00317 case 'l':
00318 {
00319 VALUE n;
00320
00321 if (str[si] == ' ') {
00322 si++;
00323 READ_DIGITS(n, 1);
00324 } else {
00325 READ_DIGITS(n, 2);
00326 }
00327 if (!valid_range_p(n, 1, 12))
00328 fail();
00329 set_hash("hour", n);
00330 goto matched;
00331 }
00332
00333 case 'j':
00334 {
00335 VALUE n;
00336
00337 READ_DIGITS(n, 3);
00338 if (!valid_range_p(n, 1, 366))
00339 fail();
00340 set_hash("yday", n);
00341 goto matched;
00342 }
00343
00344 case 'L':
00345 case 'N':
00346 {
00347 VALUE n;
00348 int sign = 1;
00349 size_t osi;
00350
00351 if (issign(str[si])) {
00352 if (str[si] == '-')
00353 sign = -1;
00354 si++;
00355 }
00356 osi = si;
00357 if (NUM_PATTERN_P())
00358 READ_DIGITS(n, c == 'L' ? 3 : 9)
00359 else
00360 READ_DIGITS_MAX(n)
00361 if (sign == -1)
00362 n = f_negate(n);
00363 set_hash("sec_fraction",
00364 rb_rational_new2(n,
00365 f_expt(INT2FIX(10),
00366 ULONG2NUM(si - osi))));
00367 goto matched;
00368 }
00369
00370 case 'M':
00371 {
00372 VALUE n;
00373
00374 READ_DIGITS(n, 2);
00375 if (!valid_range_p(n, 0, 59))
00376 fail();
00377 set_hash("min", n);
00378 goto matched;
00379 }
00380
00381 case 'm':
00382 {
00383 VALUE n;
00384
00385 READ_DIGITS(n, 2);
00386 if (!valid_range_p(n, 1, 12))
00387 fail();
00388 set_hash("mon", n);
00389 goto matched;
00390 }
00391
00392 case 'n':
00393 case 't':
00394 recur(" ");
00395 goto matched;
00396
00397 case 'P':
00398 case 'p':
00399 {
00400 int i;
00401
00402 for (i = 0; i < 4; i++) {
00403 size_t l = strlen(merid_names[i]);
00404 if (strncasecmp(merid_names[i], &str[si], l) == 0) {
00405 si += l;
00406 set_hash("_merid", INT2FIX((i % 2) == 0 ? 0 : 12));
00407 goto matched;
00408 }
00409 }
00410 fail();
00411 }
00412
00413 case 'Q':
00414 {
00415 VALUE n;
00416 int sign = 1;
00417
00418 if (str[si] == '-') {
00419 sign = -1;
00420 si++;
00421 }
00422 READ_DIGITS_MAX(n);
00423 if (sign == -1)
00424 n = f_negate(n);
00425 set_hash("seconds",
00426 rb_rational_new2(n,
00427 f_expt(INT2FIX(10),
00428 INT2FIX(3))));
00429 goto matched;
00430 }
00431
00432 case 'R':
00433 recur("%H:%M");
00434 goto matched;
00435
00436 case 'r':
00437 recur("%I:%M:%S %p");
00438 goto matched;
00439
00440 case 'S':
00441 {
00442 VALUE n;
00443
00444 READ_DIGITS(n, 2);
00445 if (!valid_range_p(n, 0, 60))
00446 fail();
00447 set_hash("sec", n);
00448 goto matched;
00449 }
00450
00451 case 's':
00452 {
00453 VALUE n;
00454 int sign = 1;
00455
00456 if (str[si] == '-') {
00457 sign = -1;
00458 si++;
00459 }
00460 READ_DIGITS_MAX(n);
00461 if (sign == -1)
00462 n = f_negate(n);
00463 set_hash("seconds", n);
00464 goto matched;
00465 }
00466
00467 case 'T':
00468 recur("%H:%M:%S");
00469 goto matched;
00470
00471 case 'U':
00472 case 'W':
00473 {
00474 VALUE n;
00475
00476 READ_DIGITS(n, 2);
00477 if (!valid_range_p(n, 0, 53))
00478 fail();
00479 set_hash(c == 'U' ? "wnum0" : "wnum1", n);
00480 goto matched;
00481 }
00482
00483 case 'u':
00484 {
00485 VALUE n;
00486
00487 READ_DIGITS(n, 1);
00488 if (!valid_range_p(n, 1, 7))
00489 fail();
00490 set_hash("cwday", n);
00491 goto matched;
00492 }
00493
00494 case 'V':
00495 {
00496 VALUE n;
00497
00498 READ_DIGITS(n, 2);
00499 if (!valid_range_p(n, 1, 53))
00500 fail();
00501 set_hash("cweek", n);
00502 goto matched;
00503 }
00504
00505 case 'v':
00506 recur("%e-%b-%Y");
00507 goto matched;
00508
00509 case 'w':
00510 {
00511 VALUE n;
00512
00513 READ_DIGITS(n, 1);
00514 if (!valid_range_p(n, 0, 6))
00515 fail();
00516 set_hash("wday", n);
00517 goto matched;
00518 }
00519
00520 case 'X':
00521 recur("%H:%M:%S");
00522 goto matched;
00523
00524 case 'x':
00525 recur("%m/%d/%y");
00526 goto matched;
00527
00528 case 'Y':
00529 {
00530 VALUE n;
00531 int sign = 1;
00532
00533 if (issign(str[si])) {
00534 if (str[si] == '-')
00535 sign = -1;
00536 si++;
00537 }
00538 if (NUM_PATTERN_P())
00539 READ_DIGITS(n, 4)
00540 else
00541 READ_DIGITS_MAX(n)
00542 if (sign == -1)
00543 n = f_negate(n);
00544 set_hash("year", n);
00545 goto matched;
00546 }
00547
00548 case 'y':
00549 {
00550 VALUE n;
00551 int sign = 1;
00552
00553 READ_DIGITS(n, 2);
00554 if (!valid_range_p(n, 0, 99))
00555 fail();
00556 if (sign == -1)
00557 n = f_negate(n);
00558 set_hash("year", n);
00559 set_hash("_cent",
00560 INT2FIX(f_ge_p(n, INT2FIX(69)) ? 19 : 20));
00561 goto matched;
00562 }
00563
00564 case 'Z':
00565 case 'z':
00566 {
00567 static const char pat_source[] =
00568 "\\A("
00569 "(?:gmt|utc?)?[-+]\\d+(?:[,.:]\\d+(?::\\d+)?)?"
00570 "|(?-i:[[:alpha:].\\s]+)(?:standard|daylight)\\s+time\\b"
00571 "|(?-i:[[:alpha:]]+)(?:\\s+dst)?\\b"
00572 ")";
00573 static VALUE pat = Qnil;
00574 VALUE m, b;
00575
00576 if (NIL_P(pat)) {
00577 pat = rb_reg_new(pat_source, sizeof pat_source - 1,
00578 ONIG_OPTION_IGNORECASE);
00579 rb_gc_register_mark_object(pat);
00580 }
00581
00582 b = rb_backref_get();
00583 rb_match_busy(b);
00584 m = f_match(pat, rb_usascii_str_new2(&str[si]));
00585
00586 if (!NIL_P(m)) {
00587 VALUE s, l, o;
00588
00589 s = rb_reg_nth_match(1, m);
00590 l = f_end(m, INT2FIX(0));
00591 o = date_zone_to_diff(s);
00592 si += NUM2LONG(l);
00593 set_hash("zone", s);
00594 set_hash("offset", o);
00595 rb_backref_set(b);
00596 goto matched;
00597 }
00598 rb_backref_set(b);
00599 fail();
00600 }
00601
00602 case '%':
00603 if (str[si] != '%')
00604 fail();
00605 si++;
00606 goto matched;
00607
00608 case '+':
00609 recur("%a %b %e %H:%M:%S %Z %Y");
00610 goto matched;
00611
00612 default:
00613 if (str[si] != '%')
00614 fail();
00615 si++;
00616 if (fi < flen)
00617 if (str[si] != fmt[fi])
00618 fail();
00619 si++;
00620 goto matched;
00621 }
00622 case ' ':
00623 case '\t':
00624 case '\n':
00625 case '\v':
00626 case '\f':
00627 case '\r':
00628 while (isspace((unsigned char)str[si]))
00629 si++;
00630 fi++;
00631 break;
00632 default:
00633 ordinal:
00634 if (str[si] != fmt[fi])
00635 fail();
00636 si++;
00637 fi++;
00638 break;
00639 matched:
00640 fi++;
00641 break;
00642 }
00643 }
00644
00645 return si;
00646 }
00647
00648 VALUE
00649 date__strptime(const char *str, size_t slen,
00650 const char *fmt, size_t flen, VALUE hash)
00651 {
00652 size_t si;
00653 VALUE cent, merid;
00654
00655 si = date__strptime_internal(str, slen, fmt, flen, hash);
00656
00657 if (slen > si) {
00658 VALUE s;
00659
00660 s = rb_usascii_str_new(&str[si], slen - si);
00661 set_hash("leftover", s);
00662 }
00663
00664 if (fail_p())
00665 return Qnil;
00666
00667 cent = ref_hash("_cent");
00668 if (!NIL_P(cent)) {
00669 VALUE year;
00670
00671 year = ref_hash("cwyear");
00672 if (!NIL_P(year))
00673 set_hash("cwyear", f_add(year, f_mul(cent, INT2FIX(100))));
00674 year = ref_hash("year");
00675 if (!NIL_P(year))
00676 set_hash("year", f_add(year, f_mul(cent, INT2FIX(100))));
00677 del_hash("_cent");
00678 }
00679
00680 merid = ref_hash("_merid");
00681 if (!NIL_P(merid)) {
00682 VALUE hour;
00683
00684 hour = ref_hash("hour");
00685 if (!NIL_P(hour)) {
00686 hour = f_mod(hour, INT2FIX(12));
00687 set_hash("hour", f_add(hour, merid));
00688 }
00689 del_hash("_merid");
00690 }
00691
00692 return hash;
00693 }
00694
00695
00696
00697
00698
00699
00700