00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011 #include "ruby/ruby.h"
00012 #include "ruby/re.h"
00013 #include "ruby/encoding.h"
00014 #include "regint.h"
00015
00016 #define STRSCAN_VERSION "0.7.0"
00017
00018
00019
00020
00021
00022 static VALUE StringScanner;
00023 static VALUE ScanError;
00024 static ID id_byteslice;
00025
00026 struct strscanner
00027 {
00028
00029 unsigned long flags;
00030 #define FLAG_MATCHED (1 << 0)
00031
00032
00033 VALUE str;
00034
00035
00036 long prev;
00037 long curr;
00038
00039
00040 struct re_registers regs;
00041
00042
00043 VALUE regex;
00044 };
00045
00046 #define MATCHED_P(s) ((s)->flags & FLAG_MATCHED)
00047 #define MATCHED(s) (s)->flags |= FLAG_MATCHED
00048 #define CLEAR_MATCH_STATUS(s) (s)->flags &= ~FLAG_MATCHED
00049
00050 #define S_PBEG(s) (RSTRING_PTR((s)->str))
00051 #define S_LEN(s) (RSTRING_LEN((s)->str))
00052 #define S_PEND(s) (S_PBEG(s) + S_LEN(s))
00053 #define CURPTR(s) (S_PBEG(s) + (s)->curr)
00054 #define S_RESTLEN(s) (S_LEN(s) - (s)->curr)
00055
00056 #define EOS_P(s) ((s)->curr >= RSTRING_LEN(p->str))
00057
00058 #define GET_SCANNER(obj,var) do {\
00059 (var) = check_strscan(obj);\
00060 if (NIL_P((var)->str)) rb_raise(rb_eArgError, "uninitialized StringScanner object");\
00061 } while (0)
00062
00063
00064
00065
00066
00067 static VALUE infect _((VALUE str, struct strscanner *p));
00068 static VALUE extract_range _((struct strscanner *p, long beg_i, long end_i));
00069 static VALUE extract_beg_len _((struct strscanner *p, long beg_i, long len));
00070
00071 static struct strscanner *check_strscan _((VALUE obj));
00072 static void strscan_mark _((void *p));
00073 static void strscan_free _((void *p));
00074 static size_t strscan_memsize _((const void *p));
00075 static VALUE strscan_s_allocate _((VALUE klass));
00076 static VALUE strscan_initialize _((int argc, VALUE *argv, VALUE self));
00077 static VALUE strscan_init_copy _((VALUE vself, VALUE vorig));
00078
00079 static VALUE strscan_s_mustc _((VALUE self));
00080 static VALUE strscan_terminate _((VALUE self));
00081 static VALUE strscan_clear _((VALUE self));
00082 static VALUE strscan_get_string _((VALUE self));
00083 static VALUE strscan_set_string _((VALUE self, VALUE str));
00084 static VALUE strscan_concat _((VALUE self, VALUE str));
00085 static VALUE strscan_get_pos _((VALUE self));
00086 static VALUE strscan_set_pos _((VALUE self, VALUE pos));
00087 static VALUE strscan_do_scan _((VALUE self, VALUE regex,
00088 int succptr, int getstr, int headonly));
00089 static VALUE strscan_scan _((VALUE self, VALUE re));
00090 static VALUE strscan_match_p _((VALUE self, VALUE re));
00091 static VALUE strscan_skip _((VALUE self, VALUE re));
00092 static VALUE strscan_check _((VALUE self, VALUE re));
00093 static VALUE strscan_scan_full _((VALUE self, VALUE re,
00094 VALUE succp, VALUE getp));
00095 static VALUE strscan_scan_until _((VALUE self, VALUE re));
00096 static VALUE strscan_skip_until _((VALUE self, VALUE re));
00097 static VALUE strscan_check_until _((VALUE self, VALUE re));
00098 static VALUE strscan_search_full _((VALUE self, VALUE re,
00099 VALUE succp, VALUE getp));
00100 static void adjust_registers_to_matched _((struct strscanner *p));
00101 static VALUE strscan_getch _((VALUE self));
00102 static VALUE strscan_get_byte _((VALUE self));
00103 static VALUE strscan_getbyte _((VALUE self));
00104 static VALUE strscan_peek _((VALUE self, VALUE len));
00105 static VALUE strscan_peep _((VALUE self, VALUE len));
00106 static VALUE strscan_unscan _((VALUE self));
00107 static VALUE strscan_bol_p _((VALUE self));
00108 static VALUE strscan_eos_p _((VALUE self));
00109 static VALUE strscan_empty_p _((VALUE self));
00110 static VALUE strscan_rest_p _((VALUE self));
00111 static VALUE strscan_matched_p _((VALUE self));
00112 static VALUE strscan_matched _((VALUE self));
00113 static VALUE strscan_matched_size _((VALUE self));
00114 static VALUE strscan_aref _((VALUE self, VALUE idx));
00115 static VALUE strscan_pre_match _((VALUE self));
00116 static VALUE strscan_post_match _((VALUE self));
00117 static VALUE strscan_rest _((VALUE self));
00118 static VALUE strscan_rest_size _((VALUE self));
00119
00120 static VALUE strscan_inspect _((VALUE self));
00121 static VALUE inspect1 _((struct strscanner *p));
00122 static VALUE inspect2 _((struct strscanner *p));
00123
00124
00125
00126
00127
00128 static VALUE
00129 infect(VALUE str, struct strscanner *p)
00130 {
00131 OBJ_INFECT(str, p->str);
00132 return str;
00133 }
00134
00135 static VALUE
00136 str_new(struct strscanner *p, const char *ptr, long len)
00137 {
00138 VALUE str = rb_str_new(ptr, len);
00139 rb_enc_copy(str, p->str);
00140 return str;
00141 }
00142
00143 static VALUE
00144 extract_range(struct strscanner *p, long beg_i, long end_i)
00145 {
00146 if (beg_i > S_LEN(p)) return Qnil;
00147 if (end_i > S_LEN(p))
00148 end_i = S_LEN(p);
00149 return infect(str_new(p, S_PBEG(p) + beg_i, end_i - beg_i), p);
00150 }
00151
00152 static VALUE
00153 extract_beg_len(struct strscanner *p, long beg_i, long len)
00154 {
00155 if (beg_i > S_LEN(p)) return Qnil;
00156 if (beg_i + len > S_LEN(p))
00157 len = S_LEN(p) - beg_i;
00158 return infect(str_new(p, S_PBEG(p) + beg_i, len), p);
00159 }
00160
00161
00162
00163
00164
00165 static void
00166 strscan_mark(void *ptr)
00167 {
00168 struct strscanner *p = ptr;
00169 rb_gc_mark(p->str);
00170 }
00171
00172 static void
00173 strscan_free(void *ptr)
00174 {
00175 struct strscanner *p = ptr;
00176 onig_region_free(&(p->regs), 0);
00177 ruby_xfree(p);
00178 }
00179
00180 static size_t
00181 strscan_memsize(const void *ptr)
00182 {
00183 const struct strscanner *p = ptr;
00184 size_t size = 0;
00185 if (p) {
00186 size = sizeof(*p) - sizeof(p->regs) + onig_region_memsize(&p->regs);
00187 }
00188 return size;
00189 }
00190
00191 static const rb_data_type_t strscanner_type = {
00192 "StringScanner",
00193 {strscan_mark, strscan_free, strscan_memsize},
00194 NULL, NULL, RUBY_TYPED_FREE_IMMEDIATELY
00195 };
00196
00197 static VALUE
00198 strscan_s_allocate(VALUE klass)
00199 {
00200 struct strscanner *p;
00201
00202 p = ALLOC(struct strscanner);
00203 MEMZERO(p, struct strscanner, 1);
00204 CLEAR_MATCH_STATUS(p);
00205 onig_region_init(&(p->regs));
00206 p->str = Qnil;
00207 return TypedData_Wrap_Struct(klass, &strscanner_type, p);
00208 }
00209
00210
00211
00212
00213
00214
00215
00216 static VALUE
00217 strscan_initialize(int argc, VALUE *argv, VALUE self)
00218 {
00219 struct strscanner *p;
00220 VALUE str, need_dup;
00221
00222 p = check_strscan(self);
00223 rb_scan_args(argc, argv, "11", &str, &need_dup);
00224 StringValue(str);
00225 p->str = str;
00226
00227 return self;
00228 }
00229
00230 static struct strscanner *
00231 check_strscan(VALUE obj)
00232 {
00233 return rb_check_typeddata(obj, &strscanner_type);
00234 }
00235
00236
00237
00238
00239
00240
00241
00242
00243 static VALUE
00244 strscan_init_copy(VALUE vself, VALUE vorig)
00245 {
00246 struct strscanner *self, *orig;
00247
00248 self = check_strscan(vself);
00249 orig = check_strscan(vorig);
00250 if (self != orig) {
00251 self->flags = orig->flags;
00252 self->str = orig->str;
00253 self->prev = orig->prev;
00254 self->curr = orig->curr;
00255 onig_region_copy(&self->regs, &orig->regs);
00256 }
00257
00258 return vself;
00259 }
00260
00261
00262
00263
00264
00265
00266
00267
00268
00269
00270 static VALUE
00271 strscan_s_mustc(VALUE self)
00272 {
00273 return self;
00274 }
00275
00276
00277
00278
00279 static VALUE
00280 strscan_reset(VALUE self)
00281 {
00282 struct strscanner *p;
00283
00284 GET_SCANNER(self, p);
00285 p->curr = 0;
00286 CLEAR_MATCH_STATUS(p);
00287 return self;
00288 }
00289
00290
00291
00292
00293
00294
00295
00296
00297 static VALUE
00298 strscan_terminate(VALUE self)
00299 {
00300 struct strscanner *p;
00301
00302 GET_SCANNER(self, p);
00303 p->curr = S_LEN(p);
00304 CLEAR_MATCH_STATUS(p);
00305 return self;
00306 }
00307
00308
00309
00310
00311
00312 static VALUE
00313 strscan_clear(VALUE self)
00314 {
00315 rb_warning("StringScanner#clear is obsolete; use #terminate instead");
00316 return strscan_terminate(self);
00317 }
00318
00319
00320
00321
00322 static VALUE
00323 strscan_get_string(VALUE self)
00324 {
00325 struct strscanner *p;
00326
00327 GET_SCANNER(self, p);
00328 return p->str;
00329 }
00330
00331
00332
00333
00334
00335
00336
00337 static VALUE
00338 strscan_set_string(VALUE self, VALUE str)
00339 {
00340 struct strscanner *p = check_strscan(self);
00341
00342 StringValue(str);
00343 p->str = str;
00344 p->curr = 0;
00345 CLEAR_MATCH_STATUS(p);
00346 return str;
00347 }
00348
00349
00350
00351
00352
00353
00354
00355
00356
00357
00358
00359
00360
00361
00362
00363 static VALUE
00364 strscan_concat(VALUE self, VALUE str)
00365 {
00366 struct strscanner *p;
00367
00368 GET_SCANNER(self, p);
00369 StringValue(str);
00370 rb_str_append(p->str, str);
00371 return self;
00372 }
00373
00374
00375
00376
00377
00378
00379
00380
00381
00382
00383
00384
00385
00386
00387
00388 static VALUE
00389 strscan_get_pos(VALUE self)
00390 {
00391 struct strscanner *p;
00392
00393 GET_SCANNER(self, p);
00394 return INT2FIX(p->curr);
00395 }
00396
00397
00398
00399
00400
00401
00402
00403
00404
00405
00406
00407
00408
00409
00410 static VALUE
00411 strscan_get_charpos(VALUE self)
00412 {
00413 struct strscanner *p;
00414 VALUE substr;
00415
00416 GET_SCANNER(self, p);
00417
00418 substr = rb_funcall(p->str, id_byteslice, 2, INT2FIX(0), INT2NUM(p->curr));
00419
00420 return rb_str_length(substr);
00421 }
00422
00423
00424
00425
00426
00427
00428
00429
00430
00431
00432 static VALUE
00433 strscan_set_pos(VALUE self, VALUE v)
00434 {
00435 struct strscanner *p;
00436 long i;
00437
00438 GET_SCANNER(self, p);
00439 i = NUM2INT(v);
00440 if (i < 0) i += S_LEN(p);
00441 if (i < 0) rb_raise(rb_eRangeError, "index out of range");
00442 if (i > S_LEN(p)) rb_raise(rb_eRangeError, "index out of range");
00443 p->curr = i;
00444 return INT2NUM(i);
00445 }
00446
00447 static VALUE
00448 strscan_do_scan(VALUE self, VALUE regex, int succptr, int getstr, int headonly)
00449 {
00450 regex_t *rb_reg_prepare_re(VALUE re, VALUE str);
00451 struct strscanner *p;
00452 regex_t *re;
00453 long ret;
00454 int tmpreg;
00455
00456 Check_Type(regex, T_REGEXP);
00457 GET_SCANNER(self, p);
00458
00459 CLEAR_MATCH_STATUS(p);
00460 if (S_RESTLEN(p) < 0) {
00461 return Qnil;
00462 }
00463
00464 p->regex = regex;
00465 re = rb_reg_prepare_re(regex, p->str);
00466 tmpreg = re != RREGEXP(regex)->ptr;
00467 if (!tmpreg) RREGEXP(regex)->usecnt++;
00468
00469 if (headonly) {
00470 ret = onig_match(re, (UChar* )CURPTR(p),
00471 (UChar* )(CURPTR(p) + S_RESTLEN(p)),
00472 (UChar* )CURPTR(p), &(p->regs), ONIG_OPTION_NONE);
00473 }
00474 else {
00475 ret = onig_search(re,
00476 (UChar* )CURPTR(p), (UChar* )(CURPTR(p) + S_RESTLEN(p)),
00477 (UChar* )CURPTR(p), (UChar* )(CURPTR(p) + S_RESTLEN(p)),
00478 &(p->regs), ONIG_OPTION_NONE);
00479 }
00480 if (!tmpreg) RREGEXP(regex)->usecnt--;
00481 if (tmpreg) {
00482 if (RREGEXP(regex)->usecnt) {
00483 onig_free(re);
00484 }
00485 else {
00486 onig_free(RREGEXP(regex)->ptr);
00487 RREGEXP(regex)->ptr = re;
00488 }
00489 }
00490
00491 if (ret == -2) rb_raise(ScanError, "regexp buffer overflow");
00492 if (ret < 0) {
00493
00494 return Qnil;
00495 }
00496
00497 MATCHED(p);
00498 p->prev = p->curr;
00499 if (succptr) {
00500 p->curr += p->regs.end[0];
00501 }
00502 if (getstr) {
00503 return extract_beg_len(p, p->prev, p->regs.end[0]);
00504 }
00505 else {
00506 return INT2FIX(p->regs.end[0]);
00507 }
00508 }
00509
00510
00511
00512
00513
00514
00515
00516
00517
00518
00519
00520
00521
00522
00523
00524
00525 static VALUE
00526 strscan_scan(VALUE self, VALUE re)
00527 {
00528 return strscan_do_scan(self, re, 1, 1, 1);
00529 }
00530
00531
00532
00533
00534
00535
00536
00537
00538
00539
00540
00541
00542 static VALUE
00543 strscan_match_p(VALUE self, VALUE re)
00544 {
00545 return strscan_do_scan(self, re, 0, 0, 1);
00546 }
00547
00548
00549
00550
00551
00552
00553
00554
00555
00556
00557
00558
00559
00560
00561
00562
00563
00564
00565 static VALUE
00566 strscan_skip(VALUE self, VALUE re)
00567 {
00568 return strscan_do_scan(self, re, 1, 0, 1);
00569 }
00570
00571
00572
00573
00574
00575
00576
00577
00578
00579
00580
00581
00582
00583
00584
00585
00586 static VALUE
00587 strscan_check(VALUE self, VALUE re)
00588 {
00589 return strscan_do_scan(self, re, 0, 1, 1);
00590 }
00591
00592
00593
00594
00595
00596
00597
00598
00599
00600
00601
00602 static VALUE
00603 strscan_scan_full(VALUE self, VALUE re, VALUE s, VALUE f)
00604 {
00605 return strscan_do_scan(self, re, RTEST(s), RTEST(f), 1);
00606 }
00607
00608
00609
00610
00611
00612
00613
00614
00615
00616
00617
00618
00619
00620 static VALUE
00621 strscan_scan_until(VALUE self, VALUE re)
00622 {
00623 return strscan_do_scan(self, re, 1, 1, 0);
00624 }
00625
00626
00627
00628
00629
00630
00631
00632
00633
00634
00635
00636
00637
00638
00639 static VALUE
00640 strscan_exist_p(VALUE self, VALUE re)
00641 {
00642 return strscan_do_scan(self, re, 0, 0, 0);
00643 }
00644
00645
00646
00647
00648
00649
00650
00651
00652
00653
00654
00655
00656
00657
00658
00659
00660
00661 static VALUE
00662 strscan_skip_until(VALUE self, VALUE re)
00663 {
00664 return strscan_do_scan(self, re, 1, 0, 0);
00665 }
00666
00667
00668
00669
00670
00671
00672
00673
00674
00675
00676
00677
00678
00679
00680 static VALUE
00681 strscan_check_until(VALUE self, VALUE re)
00682 {
00683 return strscan_do_scan(self, re, 0, 1, 0);
00684 }
00685
00686
00687
00688
00689
00690
00691
00692
00693
00694
00695 static VALUE
00696 strscan_search_full(VALUE self, VALUE re, VALUE s, VALUE f)
00697 {
00698 return strscan_do_scan(self, re, RTEST(s), RTEST(f), 0);
00699 }
00700
00701 static void
00702 adjust_registers_to_matched(struct strscanner *p)
00703 {
00704 onig_region_clear(&(p->regs));
00705 onig_region_set(&(p->regs), 0, 0, (int)(p->curr - p->prev));
00706 }
00707
00708
00709
00710
00711
00712
00713
00714
00715
00716
00717
00718
00719
00720
00721
00722 static VALUE
00723 strscan_getch(VALUE self)
00724 {
00725 struct strscanner *p;
00726 long len;
00727
00728 GET_SCANNER(self, p);
00729 CLEAR_MATCH_STATUS(p);
00730 if (EOS_P(p))
00731 return Qnil;
00732
00733 len = rb_enc_mbclen(CURPTR(p), S_PEND(p), rb_enc_get(p->str));
00734 if (p->curr + len > S_LEN(p)) {
00735 len = S_LEN(p) - p->curr;
00736 }
00737 p->prev = p->curr;
00738 p->curr += len;
00739 MATCHED(p);
00740 adjust_registers_to_matched(p);
00741 return extract_range(p, p->prev + p->regs.beg[0],
00742 p->prev + p->regs.end[0]);
00743 }
00744
00745
00746
00747
00748
00749
00750
00751
00752
00753
00754
00755
00756
00757
00758
00759
00760
00761 static VALUE
00762 strscan_get_byte(VALUE self)
00763 {
00764 struct strscanner *p;
00765
00766 GET_SCANNER(self, p);
00767 CLEAR_MATCH_STATUS(p);
00768 if (EOS_P(p))
00769 return Qnil;
00770
00771 p->prev = p->curr;
00772 p->curr++;
00773 MATCHED(p);
00774 adjust_registers_to_matched(p);
00775 return extract_range(p, p->prev + p->regs.beg[0],
00776 p->prev + p->regs.end[0]);
00777 }
00778
00779
00780
00781
00782
00783 static VALUE
00784 strscan_getbyte(VALUE self)
00785 {
00786 rb_warning("StringScanner#getbyte is obsolete; use #get_byte instead");
00787 return strscan_get_byte(self);
00788 }
00789
00790
00791
00792
00793
00794
00795
00796
00797
00798
00799
00800
00801 static VALUE
00802 strscan_peek(VALUE self, VALUE vlen)
00803 {
00804 struct strscanner *p;
00805 long len;
00806
00807 GET_SCANNER(self, p);
00808
00809 len = NUM2LONG(vlen);
00810 if (EOS_P(p))
00811 return infect(str_new(p, "", 0), p);
00812
00813 if (p->curr + len > S_LEN(p))
00814 len = S_LEN(p) - p->curr;
00815 return extract_beg_len(p, p->curr, len);
00816 }
00817
00818
00819
00820
00821
00822 static VALUE
00823 strscan_peep(VALUE self, VALUE vlen)
00824 {
00825 rb_warning("StringScanner#peep is obsolete; use #peek instead");
00826 return strscan_peek(self, vlen);
00827 }
00828
00829
00830
00831
00832
00833
00834
00835
00836
00837
00838
00839
00840 static VALUE
00841 strscan_unscan(VALUE self)
00842 {
00843 struct strscanner *p;
00844
00845 GET_SCANNER(self, p);
00846 if (! MATCHED_P(p))
00847 rb_raise(ScanError, "unscan failed: previous match record not exist");
00848 p->curr = p->prev;
00849 CLEAR_MATCH_STATUS(p);
00850 return self;
00851 }
00852
00853
00854
00855
00856
00857
00858
00859
00860
00861
00862
00863
00864
00865 static VALUE
00866 strscan_bol_p(VALUE self)
00867 {
00868 struct strscanner *p;
00869
00870 GET_SCANNER(self, p);
00871 if (CURPTR(p) > S_PEND(p)) return Qnil;
00872 if (p->curr == 0) return Qtrue;
00873 return (*(CURPTR(p) - 1) == '\n') ? Qtrue : Qfalse;
00874 }
00875
00876
00877
00878
00879
00880
00881
00882
00883
00884
00885
00886 static VALUE
00887 strscan_eos_p(VALUE self)
00888 {
00889 struct strscanner *p;
00890
00891 GET_SCANNER(self, p);
00892 return EOS_P(p) ? Qtrue : Qfalse;
00893 }
00894
00895
00896
00897
00898
00899 static VALUE
00900 strscan_empty_p(VALUE self)
00901 {
00902 rb_warning("StringScanner#empty? is obsolete; use #eos? instead");
00903 return strscan_eos_p(self);
00904 }
00905
00906
00907
00908
00909
00910
00911
00912
00913
00914 static VALUE
00915 strscan_rest_p(VALUE self)
00916 {
00917 struct strscanner *p;
00918
00919 GET_SCANNER(self, p);
00920 return EOS_P(p) ? Qfalse : Qtrue;
00921 }
00922
00923
00924
00925
00926
00927
00928
00929
00930
00931
00932 static VALUE
00933 strscan_matched_p(VALUE self)
00934 {
00935 struct strscanner *p;
00936
00937 GET_SCANNER(self, p);
00938 return MATCHED_P(p) ? Qtrue : Qfalse;
00939 }
00940
00941
00942
00943
00944
00945
00946
00947
00948 static VALUE
00949 strscan_matched(VALUE self)
00950 {
00951 struct strscanner *p;
00952
00953 GET_SCANNER(self, p);
00954 if (! MATCHED_P(p)) return Qnil;
00955 return extract_range(p, p->prev + p->regs.beg[0],
00956 p->prev + p->regs.end[0]);
00957 }
00958
00959
00960
00961
00962
00963
00964
00965
00966
00967
00968
00969 static VALUE
00970 strscan_matched_size(VALUE self)
00971 {
00972 struct strscanner *p;
00973
00974 GET_SCANNER(self, p);
00975 if (! MATCHED_P(p)) return Qnil;
00976 return INT2NUM(p->regs.end[0] - p->regs.beg[0]);
00977 }
00978
00979 static int
00980 name_to_backref_number(struct re_registers *regs, VALUE regexp, const char* name, const char* name_end)
00981 {
00982 int num;
00983
00984 num = onig_name_to_backref_number(RREGEXP(regexp)->ptr,
00985 (const unsigned char* )name, (const unsigned char* )name_end, regs);
00986 if (num >= 1) {
00987 return num;
00988 }
00989 else {
00990 VALUE s = rb_str_new(name, (long )(name_end - name));
00991 rb_raise(rb_eIndexError, "undefined group name reference: %s",
00992 StringValuePtr(s));
00993 }
00994
00995 UNREACHABLE;
00996 }
00997
00998
00999
01000
01001
01002
01003
01004
01005
01006
01007
01008
01009
01010
01011
01012
01013
01014
01015
01016
01017
01018
01019
01020
01021
01022
01023
01024 static VALUE
01025 strscan_aref(VALUE self, VALUE idx)
01026 {
01027 const char *name;
01028 struct strscanner *p;
01029 long i;
01030
01031 GET_SCANNER(self, p);
01032 if (! MATCHED_P(p)) return Qnil;
01033
01034 switch (TYPE(idx)) {
01035 case T_SYMBOL:
01036 name = rb_id2name(SYM2ID(idx));
01037 goto name_to_backref;
01038 break;
01039 case T_STRING:
01040 name = StringValuePtr(idx);
01041 name_to_backref:
01042 i = name_to_backref_number(&(p->regs), p->regex, name, name + strlen(name));
01043 break;
01044 default:
01045 i = NUM2LONG(idx);
01046 }
01047
01048 if (i < 0)
01049 i += p->regs.num_regs;
01050 if (i < 0) return Qnil;
01051 if (i >= p->regs.num_regs) return Qnil;
01052 if (p->regs.beg[i] == -1) return Qnil;
01053
01054 return extract_range(p, p->prev + p->regs.beg[i],
01055 p->prev + p->regs.end[i]);
01056 }
01057
01058
01059
01060
01061
01062
01063
01064
01065
01066
01067 static VALUE
01068 strscan_pre_match(VALUE self)
01069 {
01070 struct strscanner *p;
01071
01072 GET_SCANNER(self, p);
01073 if (! MATCHED_P(p)) return Qnil;
01074 return extract_range(p, 0, p->prev + p->regs.beg[0]);
01075 }
01076
01077
01078
01079
01080
01081
01082
01083
01084
01085
01086 static VALUE
01087 strscan_post_match(VALUE self)
01088 {
01089 struct strscanner *p;
01090
01091 GET_SCANNER(self, p);
01092 if (! MATCHED_P(p)) return Qnil;
01093 return extract_range(p, p->prev + p->regs.end[0], S_LEN(p));
01094 }
01095
01096
01097
01098
01099
01100 static VALUE
01101 strscan_rest(VALUE self)
01102 {
01103 struct strscanner *p;
01104
01105 GET_SCANNER(self, p);
01106 if (EOS_P(p)) {
01107 return infect(str_new(p, "", 0), p);
01108 }
01109 return extract_range(p, p->curr, S_LEN(p));
01110 }
01111
01112
01113
01114
01115 static VALUE
01116 strscan_rest_size(VALUE self)
01117 {
01118 struct strscanner *p;
01119 long i;
01120
01121 GET_SCANNER(self, p);
01122 if (EOS_P(p)) {
01123 return INT2FIX(0);
01124 }
01125 i = S_LEN(p) - p->curr;
01126 return INT2FIX(i);
01127 }
01128
01129
01130
01131
01132
01133 static VALUE
01134 strscan_restsize(VALUE self)
01135 {
01136 rb_warning("StringScanner#restsize is obsolete; use #rest_size instead");
01137 return strscan_rest_size(self);
01138 }
01139
01140 #define INSPECT_LENGTH 5
01141 #define BUFSIZE 256
01142
01143
01144
01145
01146
01147
01148
01149
01150
01151
01152
01153
01154 static VALUE
01155 strscan_inspect(VALUE self)
01156 {
01157 struct strscanner *p;
01158 VALUE a, b;
01159
01160 p = check_strscan(self);
01161 if (NIL_P(p->str)) {
01162 a = rb_sprintf("#<%"PRIsVALUE" (uninitialized)>", rb_obj_class(self));
01163 return infect(a, p);
01164 }
01165 if (EOS_P(p)) {
01166 a = rb_sprintf("#<%"PRIsVALUE" fin>", rb_obj_class(self));
01167 return infect(a, p);
01168 }
01169 if (p->curr == 0) {
01170 b = inspect2(p);
01171 a = rb_sprintf("#<%"PRIsVALUE" %ld/%ld @ %"PRIsVALUE">",
01172 rb_obj_class(self),
01173 p->curr, S_LEN(p),
01174 b);
01175 return infect(a, p);
01176 }
01177 a = inspect1(p);
01178 b = inspect2(p);
01179 a = rb_sprintf("#<%"PRIsVALUE" %ld/%ld %"PRIsVALUE" @ %"PRIsVALUE">",
01180 rb_obj_class(self),
01181 p->curr, S_LEN(p),
01182 a, b);
01183 return infect(a, p);
01184 }
01185
01186 static VALUE
01187 inspect1(struct strscanner *p)
01188 {
01189 VALUE str;
01190 long len;
01191
01192 if (p->curr == 0) return rb_str_new2("");
01193 if (p->curr > INSPECT_LENGTH) {
01194 str = rb_str_new_cstr("...");
01195 len = INSPECT_LENGTH;
01196 }
01197 else {
01198 str = rb_str_new(0, 0);
01199 len = p->curr;
01200 }
01201 rb_str_cat(str, CURPTR(p) - len, len);
01202 return rb_str_dump(str);
01203 }
01204
01205 static VALUE
01206 inspect2(struct strscanner *p)
01207 {
01208 VALUE str;
01209 long len;
01210
01211 if (EOS_P(p)) return rb_str_new2("");
01212 len = S_LEN(p) - p->curr;
01213 if (len > INSPECT_LENGTH) {
01214 str = rb_str_new(CURPTR(p), INSPECT_LENGTH);
01215 rb_str_cat2(str, "...");
01216 }
01217 else {
01218 str = rb_str_new(CURPTR(p), len);
01219 }
01220 return rb_str_dump(str);
01221 }
01222
01223
01224
01225
01226
01227
01228
01229
01230
01231
01232
01233
01234
01235
01236
01237
01238
01239
01240
01241
01242
01243
01244
01245
01246
01247
01248
01249
01250
01251
01252
01253
01254
01255
01256
01257
01258
01259
01260
01261
01262
01263
01264
01265
01266
01267
01268
01269
01270
01271
01272
01273
01274
01275
01276
01277
01278
01279
01280
01281
01282
01283
01284
01285
01286
01287
01288
01289
01290
01291
01292
01293
01294
01295
01296
01297
01298
01299
01300
01301
01302
01303
01304
01305
01306
01307
01308
01309
01310
01311
01312
01313
01314
01315
01316
01317
01318
01319
01320
01321
01322
01323
01324
01325
01326
01327
01328
01329
01330 void
01331 Init_strscan()
01332 {
01333 ID id_scanerr = rb_intern("ScanError");
01334 VALUE tmp;
01335
01336 id_byteslice = rb_intern("byteslice");
01337
01338 StringScanner = rb_define_class("StringScanner", rb_cObject);
01339 ScanError = rb_define_class_under(StringScanner, "Error", rb_eStandardError);
01340 if (!rb_const_defined(rb_cObject, id_scanerr)) {
01341 rb_const_set(rb_cObject, id_scanerr, ScanError);
01342 }
01343 tmp = rb_str_new2(STRSCAN_VERSION);
01344 rb_obj_freeze(tmp);
01345 rb_const_set(StringScanner, rb_intern("Version"), tmp);
01346 tmp = rb_str_new2("$Id: strscan.c 44903 2014-02-10 11:45:14Z naruse $");
01347 rb_obj_freeze(tmp);
01348 rb_const_set(StringScanner, rb_intern("Id"), tmp);
01349
01350 rb_define_alloc_func(StringScanner, strscan_s_allocate);
01351 rb_define_private_method(StringScanner, "initialize", strscan_initialize, -1);
01352 rb_define_private_method(StringScanner, "initialize_copy", strscan_init_copy, 1);
01353 rb_define_singleton_method(StringScanner, "must_C_version", strscan_s_mustc, 0);
01354 rb_define_method(StringScanner, "reset", strscan_reset, 0);
01355 rb_define_method(StringScanner, "terminate", strscan_terminate, 0);
01356 rb_define_method(StringScanner, "clear", strscan_clear, 0);
01357 rb_define_method(StringScanner, "string", strscan_get_string, 0);
01358 rb_define_method(StringScanner, "string=", strscan_set_string, 1);
01359 rb_define_method(StringScanner, "concat", strscan_concat, 1);
01360 rb_define_method(StringScanner, "<<", strscan_concat, 1);
01361 rb_define_method(StringScanner, "pos", strscan_get_pos, 0);
01362 rb_define_method(StringScanner, "pos=", strscan_set_pos, 1);
01363 rb_define_method(StringScanner, "charpos", strscan_get_charpos, 0);
01364 rb_define_method(StringScanner, "pointer", strscan_get_pos, 0);
01365 rb_define_method(StringScanner, "pointer=", strscan_set_pos, 1);
01366
01367 rb_define_method(StringScanner, "scan", strscan_scan, 1);
01368 rb_define_method(StringScanner, "skip", strscan_skip, 1);
01369 rb_define_method(StringScanner, "match?", strscan_match_p, 1);
01370 rb_define_method(StringScanner, "check", strscan_check, 1);
01371 rb_define_method(StringScanner, "scan_full", strscan_scan_full, 3);
01372
01373 rb_define_method(StringScanner, "scan_until", strscan_scan_until, 1);
01374 rb_define_method(StringScanner, "skip_until", strscan_skip_until, 1);
01375 rb_define_method(StringScanner, "exist?", strscan_exist_p, 1);
01376 rb_define_method(StringScanner, "check_until", strscan_check_until, 1);
01377 rb_define_method(StringScanner, "search_full", strscan_search_full, 3);
01378
01379 rb_define_method(StringScanner, "getch", strscan_getch, 0);
01380 rb_define_method(StringScanner, "get_byte", strscan_get_byte, 0);
01381 rb_define_method(StringScanner, "getbyte", strscan_getbyte, 0);
01382 rb_define_method(StringScanner, "peek", strscan_peek, 1);
01383 rb_define_method(StringScanner, "peep", strscan_peep, 1);
01384
01385 rb_define_method(StringScanner, "unscan", strscan_unscan, 0);
01386
01387 rb_define_method(StringScanner, "beginning_of_line?", strscan_bol_p, 0);
01388 rb_alias(StringScanner, rb_intern("bol?"), rb_intern("beginning_of_line?"));
01389 rb_define_method(StringScanner, "eos?", strscan_eos_p, 0);
01390 rb_define_method(StringScanner, "empty?", strscan_empty_p, 0);
01391 rb_define_method(StringScanner, "rest?", strscan_rest_p, 0);
01392
01393 rb_define_method(StringScanner, "matched?", strscan_matched_p, 0);
01394 rb_define_method(StringScanner, "matched", strscan_matched, 0);
01395 rb_define_method(StringScanner, "matched_size", strscan_matched_size, 0);
01396 rb_define_method(StringScanner, "[]", strscan_aref, 1);
01397 rb_define_method(StringScanner, "pre_match", strscan_pre_match, 0);
01398 rb_define_method(StringScanner, "post_match", strscan_post_match, 0);
01399
01400 rb_define_method(StringScanner, "rest", strscan_rest, 0);
01401 rb_define_method(StringScanner, "rest_size", strscan_rest_size, 0);
01402 rb_define_method(StringScanner, "restsize", strscan_restsize, 0);
01403
01404 rb_define_method(StringScanner, "inspect", strscan_inspect, 0);
01405 }
01406