00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011 #include "ruby/ruby.h"
00012 #include "ruby/re.h"
00013 #include "ruby/encoding.h"
00014 #include "regint.h"
00015
00016 #define STRSCAN_VERSION "0.7.0"
00017
00018
00019
00020
00021
00022 static VALUE StringScanner;
00023 static VALUE ScanError;
00024 static ID id_byteslice;
00025
00026 struct strscanner
00027 {
00028
00029 unsigned long flags;
00030 #define FLAG_MATCHED (1 << 0)
00031
00032
00033 VALUE str;
00034
00035
00036 long prev;
00037 long curr;
00038
00039
00040 struct re_registers regs;
00041 };
00042
00043 #define MATCHED_P(s) ((s)->flags & FLAG_MATCHED)
00044 #define MATCHED(s) (s)->flags |= FLAG_MATCHED
00045 #define CLEAR_MATCH_STATUS(s) (s)->flags &= ~FLAG_MATCHED
00046
00047 #define S_PBEG(s) (RSTRING_PTR((s)->str))
00048 #define S_LEN(s) (RSTRING_LEN((s)->str))
00049 #define S_PEND(s) (S_PBEG(s) + S_LEN(s))
00050 #define CURPTR(s) (S_PBEG(s) + (s)->curr)
00051 #define S_RESTLEN(s) (S_LEN(s) - (s)->curr)
00052
00053 #define EOS_P(s) ((s)->curr >= RSTRING_LEN(p->str))
00054
00055 #define GET_SCANNER(obj,var) do {\
00056 (var) = check_strscan(obj);\
00057 if (NIL_P((var)->str)) rb_raise(rb_eArgError, "uninitialized StringScanner object");\
00058 } while (0)
00059
00060
00061
00062
00063
00064 static VALUE infect _((VALUE str, struct strscanner *p));
00065 static VALUE extract_range _((struct strscanner *p, long beg_i, long end_i));
00066 static VALUE extract_beg_len _((struct strscanner *p, long beg_i, long len));
00067
00068 static struct strscanner *check_strscan _((VALUE obj));
00069 static void strscan_mark _((void *p));
00070 static void strscan_free _((void *p));
00071 static size_t strscan_memsize _((const void *p));
00072 static VALUE strscan_s_allocate _((VALUE klass));
00073 static VALUE strscan_initialize _((int argc, VALUE *argv, VALUE self));
00074 static VALUE strscan_init_copy _((VALUE vself, VALUE vorig));
00075
00076 static VALUE strscan_s_mustc _((VALUE self));
00077 static VALUE strscan_terminate _((VALUE self));
00078 static VALUE strscan_clear _((VALUE self));
00079 static VALUE strscan_get_string _((VALUE self));
00080 static VALUE strscan_set_string _((VALUE self, VALUE str));
00081 static VALUE strscan_concat _((VALUE self, VALUE str));
00082 static VALUE strscan_get_pos _((VALUE self));
00083 static VALUE strscan_set_pos _((VALUE self, VALUE pos));
00084 static VALUE strscan_do_scan _((VALUE self, VALUE regex,
00085 int succptr, int getstr, int headonly));
00086 static VALUE strscan_scan _((VALUE self, VALUE re));
00087 static VALUE strscan_match_p _((VALUE self, VALUE re));
00088 static VALUE strscan_skip _((VALUE self, VALUE re));
00089 static VALUE strscan_check _((VALUE self, VALUE re));
00090 static VALUE strscan_scan_full _((VALUE self, VALUE re,
00091 VALUE succp, VALUE getp));
00092 static VALUE strscan_scan_until _((VALUE self, VALUE re));
00093 static VALUE strscan_skip_until _((VALUE self, VALUE re));
00094 static VALUE strscan_check_until _((VALUE self, VALUE re));
00095 static VALUE strscan_search_full _((VALUE self, VALUE re,
00096 VALUE succp, VALUE getp));
00097 static void adjust_registers_to_matched _((struct strscanner *p));
00098 static VALUE strscan_getch _((VALUE self));
00099 static VALUE strscan_get_byte _((VALUE self));
00100 static VALUE strscan_getbyte _((VALUE self));
00101 static VALUE strscan_peek _((VALUE self, VALUE len));
00102 static VALUE strscan_peep _((VALUE self, VALUE len));
00103 static VALUE strscan_unscan _((VALUE self));
00104 static VALUE strscan_bol_p _((VALUE self));
00105 static VALUE strscan_eos_p _((VALUE self));
00106 static VALUE strscan_empty_p _((VALUE self));
00107 static VALUE strscan_rest_p _((VALUE self));
00108 static VALUE strscan_matched_p _((VALUE self));
00109 static VALUE strscan_matched _((VALUE self));
00110 static VALUE strscan_matched_size _((VALUE self));
00111 static VALUE strscan_aref _((VALUE self, VALUE idx));
00112 static VALUE strscan_pre_match _((VALUE self));
00113 static VALUE strscan_post_match _((VALUE self));
00114 static VALUE strscan_rest _((VALUE self));
00115 static VALUE strscan_rest_size _((VALUE self));
00116
00117 static VALUE strscan_inspect _((VALUE self));
00118 static VALUE inspect1 _((struct strscanner *p));
00119 static VALUE inspect2 _((struct strscanner *p));
00120
00121
00122
00123
00124
00125 static VALUE
00126 infect(VALUE str, struct strscanner *p)
00127 {
00128 OBJ_INFECT(str, p->str);
00129 return str;
00130 }
00131
00132 static VALUE
00133 str_new(struct strscanner *p, const char *ptr, long len)
00134 {
00135 VALUE str = rb_str_new(ptr, len);
00136 rb_enc_copy(str, p->str);
00137 return str;
00138 }
00139
00140 static VALUE
00141 extract_range(struct strscanner *p, long beg_i, long end_i)
00142 {
00143 if (beg_i > S_LEN(p)) return Qnil;
00144 if (end_i > S_LEN(p))
00145 end_i = S_LEN(p);
00146 return infect(str_new(p, S_PBEG(p) + beg_i, end_i - beg_i), p);
00147 }
00148
00149 static VALUE
00150 extract_beg_len(struct strscanner *p, long beg_i, long len)
00151 {
00152 if (beg_i > S_LEN(p)) return Qnil;
00153 if (beg_i + len > S_LEN(p))
00154 len = S_LEN(p) - beg_i;
00155 return infect(str_new(p, S_PBEG(p) + beg_i, len), p);
00156 }
00157
00158
00159
00160
00161
00162 static void
00163 strscan_mark(void *ptr)
00164 {
00165 struct strscanner *p = ptr;
00166 rb_gc_mark(p->str);
00167 }
00168
00169 static void
00170 strscan_free(void *ptr)
00171 {
00172 struct strscanner *p = ptr;
00173 onig_region_free(&(p->regs), 0);
00174 ruby_xfree(p);
00175 }
00176
00177 static size_t
00178 strscan_memsize(const void *ptr)
00179 {
00180 const struct strscanner *p = ptr;
00181 size_t size = 0;
00182 if (p) {
00183 size = sizeof(*p) - sizeof(p->regs) + onig_region_memsize(&p->regs);
00184 }
00185 return size;
00186 }
00187
00188 static const rb_data_type_t strscanner_type = {
00189 "StringScanner",
00190 {strscan_mark, strscan_free, strscan_memsize}
00191 };
00192
00193 static VALUE
00194 strscan_s_allocate(VALUE klass)
00195 {
00196 struct strscanner *p;
00197
00198 p = ALLOC(struct strscanner);
00199 MEMZERO(p, struct strscanner, 1);
00200 CLEAR_MATCH_STATUS(p);
00201 onig_region_init(&(p->regs));
00202 p->str = Qnil;
00203 return TypedData_Wrap_Struct(klass, &strscanner_type, p);
00204 }
00205
00206
00207
00208
00209
00210
00211
00212 static VALUE
00213 strscan_initialize(int argc, VALUE *argv, VALUE self)
00214 {
00215 struct strscanner *p;
00216 VALUE str, need_dup;
00217
00218 p = check_strscan(self);
00219 rb_scan_args(argc, argv, "11", &str, &need_dup);
00220 StringValue(str);
00221 p->str = str;
00222
00223 return self;
00224 }
00225
00226 static struct strscanner *
00227 check_strscan(VALUE obj)
00228 {
00229 return rb_check_typeddata(obj, &strscanner_type);
00230 }
00231
00232
00233
00234
00235
00236
00237
00238
00239 static VALUE
00240 strscan_init_copy(VALUE vself, VALUE vorig)
00241 {
00242 struct strscanner *self, *orig;
00243
00244 self = check_strscan(vself);
00245 orig = check_strscan(vorig);
00246 if (self != orig) {
00247 self->flags = orig->flags;
00248 self->str = orig->str;
00249 self->prev = orig->prev;
00250 self->curr = orig->curr;
00251 onig_region_copy(&self->regs, &orig->regs);
00252 }
00253
00254 return vself;
00255 }
00256
00257
00258
00259
00260
00261
00262
00263
00264
00265
00266 static VALUE
00267 strscan_s_mustc(VALUE self)
00268 {
00269 return self;
00270 }
00271
00272
00273
00274
00275 static VALUE
00276 strscan_reset(VALUE self)
00277 {
00278 struct strscanner *p;
00279
00280 GET_SCANNER(self, p);
00281 p->curr = 0;
00282 CLEAR_MATCH_STATUS(p);
00283 return self;
00284 }
00285
00286
00287
00288
00289
00290
00291
00292
00293 static VALUE
00294 strscan_terminate(VALUE self)
00295 {
00296 struct strscanner *p;
00297
00298 GET_SCANNER(self, p);
00299 p->curr = S_LEN(p);
00300 CLEAR_MATCH_STATUS(p);
00301 return self;
00302 }
00303
00304
00305
00306
00307
00308 static VALUE
00309 strscan_clear(VALUE self)
00310 {
00311 rb_warning("StringScanner#clear is obsolete; use #terminate instead");
00312 return strscan_terminate(self);
00313 }
00314
00315
00316
00317
00318 static VALUE
00319 strscan_get_string(VALUE self)
00320 {
00321 struct strscanner *p;
00322
00323 GET_SCANNER(self, p);
00324 return p->str;
00325 }
00326
00327
00328
00329
00330
00331
00332
00333 static VALUE
00334 strscan_set_string(VALUE self, VALUE str)
00335 {
00336 struct strscanner *p = check_strscan(self);
00337
00338 StringValue(str);
00339 p->str = str;
00340 p->curr = 0;
00341 CLEAR_MATCH_STATUS(p);
00342 return str;
00343 }
00344
00345
00346
00347
00348
00349
00350
00351
00352
00353
00354
00355
00356
00357
00358
00359 static VALUE
00360 strscan_concat(VALUE self, VALUE str)
00361 {
00362 struct strscanner *p;
00363
00364 GET_SCANNER(self, p);
00365 StringValue(str);
00366 rb_str_append(p->str, str);
00367 return self;
00368 }
00369
00370
00371
00372
00373
00374
00375
00376
00377
00378
00379
00380
00381
00382
00383
00384 static VALUE
00385 strscan_get_pos(VALUE self)
00386 {
00387 struct strscanner *p;
00388
00389 GET_SCANNER(self, p);
00390 return INT2FIX(p->curr);
00391 }
00392
00393
00394
00395
00396
00397
00398
00399
00400
00401
00402
00403
00404
00405
00406 static VALUE
00407 strscan_get_charpos(VALUE self)
00408 {
00409 struct strscanner *p;
00410 VALUE substr;
00411
00412 GET_SCANNER(self, p);
00413
00414 substr = rb_funcall(p->str, id_byteslice, 2, INT2FIX(0), INT2NUM(p->curr));
00415
00416 return rb_str_length(substr);
00417 }
00418
00419
00420
00421
00422
00423
00424
00425
00426
00427
00428 static VALUE
00429 strscan_set_pos(VALUE self, VALUE v)
00430 {
00431 struct strscanner *p;
00432 long i;
00433
00434 GET_SCANNER(self, p);
00435 i = NUM2INT(v);
00436 if (i < 0) i += S_LEN(p);
00437 if (i < 0) rb_raise(rb_eRangeError, "index out of range");
00438 if (i > S_LEN(p)) rb_raise(rb_eRangeError, "index out of range");
00439 p->curr = i;
00440 return INT2NUM(i);
00441 }
00442
00443 static VALUE
00444 strscan_do_scan(VALUE self, VALUE regex, int succptr, int getstr, int headonly)
00445 {
00446 regex_t *rb_reg_prepare_re(VALUE re, VALUE str);
00447 struct strscanner *p;
00448 regex_t *re;
00449 long ret;
00450 int tmpreg;
00451
00452 Check_Type(regex, T_REGEXP);
00453 GET_SCANNER(self, p);
00454
00455 CLEAR_MATCH_STATUS(p);
00456 if (S_RESTLEN(p) < 0) {
00457 return Qnil;
00458 }
00459 re = rb_reg_prepare_re(regex, p->str);
00460 tmpreg = re != RREGEXP(regex)->ptr;
00461 if (!tmpreg) RREGEXP(regex)->usecnt++;
00462
00463 if (headonly) {
00464 ret = onig_match(re, (UChar* )CURPTR(p),
00465 (UChar* )(CURPTR(p) + S_RESTLEN(p)),
00466 (UChar* )CURPTR(p), &(p->regs), ONIG_OPTION_NONE);
00467 }
00468 else {
00469 ret = onig_search(re,
00470 (UChar* )CURPTR(p), (UChar* )(CURPTR(p) + S_RESTLEN(p)),
00471 (UChar* )CURPTR(p), (UChar* )(CURPTR(p) + S_RESTLEN(p)),
00472 &(p->regs), ONIG_OPTION_NONE);
00473 }
00474 if (!tmpreg) RREGEXP(regex)->usecnt--;
00475 if (tmpreg) {
00476 if (RREGEXP(regex)->usecnt) {
00477 onig_free(re);
00478 }
00479 else {
00480 onig_free(RREGEXP(regex)->ptr);
00481 RREGEXP(regex)->ptr = re;
00482 }
00483 }
00484
00485 if (ret == -2) rb_raise(ScanError, "regexp buffer overflow");
00486 if (ret < 0) {
00487
00488 return Qnil;
00489 }
00490
00491 MATCHED(p);
00492 p->prev = p->curr;
00493 if (succptr) {
00494 p->curr += p->regs.end[0];
00495 }
00496 if (getstr) {
00497 return extract_beg_len(p, p->prev, p->regs.end[0]);
00498 }
00499 else {
00500 return INT2FIX(p->regs.end[0]);
00501 }
00502 }
00503
00504
00505
00506
00507
00508
00509
00510
00511
00512
00513
00514
00515
00516
00517
00518
00519 static VALUE
00520 strscan_scan(VALUE self, VALUE re)
00521 {
00522 return strscan_do_scan(self, re, 1, 1, 1);
00523 }
00524
00525
00526
00527
00528
00529
00530
00531
00532
00533
00534
00535
00536 static VALUE
00537 strscan_match_p(VALUE self, VALUE re)
00538 {
00539 return strscan_do_scan(self, re, 0, 0, 1);
00540 }
00541
00542
00543
00544
00545
00546
00547
00548
00549
00550
00551
00552
00553
00554
00555
00556
00557
00558
00559 static VALUE
00560 strscan_skip(VALUE self, VALUE re)
00561 {
00562 return strscan_do_scan(self, re, 1, 0, 1);
00563 }
00564
00565
00566
00567
00568
00569
00570
00571
00572
00573
00574
00575
00576
00577
00578
00579
00580 static VALUE
00581 strscan_check(VALUE self, VALUE re)
00582 {
00583 return strscan_do_scan(self, re, 0, 1, 1);
00584 }
00585
00586
00587
00588
00589
00590
00591
00592
00593
00594
00595
00596 static VALUE
00597 strscan_scan_full(VALUE self, VALUE re, VALUE s, VALUE f)
00598 {
00599 return strscan_do_scan(self, re, RTEST(s), RTEST(f), 1);
00600 }
00601
00602
00603
00604
00605
00606
00607
00608
00609
00610
00611
00612
00613
00614 static VALUE
00615 strscan_scan_until(VALUE self, VALUE re)
00616 {
00617 return strscan_do_scan(self, re, 1, 1, 0);
00618 }
00619
00620
00621
00622
00623
00624
00625
00626
00627
00628
00629
00630
00631
00632
00633 static VALUE
00634 strscan_exist_p(VALUE self, VALUE re)
00635 {
00636 return strscan_do_scan(self, re, 0, 0, 0);
00637 }
00638
00639
00640
00641
00642
00643
00644
00645
00646
00647
00648
00649
00650
00651
00652
00653
00654
00655 static VALUE
00656 strscan_skip_until(VALUE self, VALUE re)
00657 {
00658 return strscan_do_scan(self, re, 1, 0, 0);
00659 }
00660
00661
00662
00663
00664
00665
00666
00667
00668
00669
00670
00671
00672
00673
00674 static VALUE
00675 strscan_check_until(VALUE self, VALUE re)
00676 {
00677 return strscan_do_scan(self, re, 0, 1, 0);
00678 }
00679
00680
00681
00682
00683
00684
00685
00686
00687
00688
00689 static VALUE
00690 strscan_search_full(VALUE self, VALUE re, VALUE s, VALUE f)
00691 {
00692 return strscan_do_scan(self, re, RTEST(s), RTEST(f), 0);
00693 }
00694
00695 static void
00696 adjust_registers_to_matched(struct strscanner *p)
00697 {
00698 onig_region_clear(&(p->regs));
00699 onig_region_set(&(p->regs), 0, 0, (int)(p->curr - p->prev));
00700 }
00701
00702
00703
00704
00705
00706
00707
00708
00709
00710
00711
00712
00713
00714
00715
00716 static VALUE
00717 strscan_getch(VALUE self)
00718 {
00719 struct strscanner *p;
00720 long len;
00721
00722 GET_SCANNER(self, p);
00723 CLEAR_MATCH_STATUS(p);
00724 if (EOS_P(p))
00725 return Qnil;
00726
00727 len = rb_enc_mbclen(CURPTR(p), S_PEND(p), rb_enc_get(p->str));
00728 if (p->curr + len > S_LEN(p)) {
00729 len = S_LEN(p) - p->curr;
00730 }
00731 p->prev = p->curr;
00732 p->curr += len;
00733 MATCHED(p);
00734 adjust_registers_to_matched(p);
00735 return extract_range(p, p->prev + p->regs.beg[0],
00736 p->prev + p->regs.end[0]);
00737 }
00738
00739
00740
00741
00742
00743
00744
00745
00746
00747
00748
00749
00750
00751
00752
00753
00754
00755 static VALUE
00756 strscan_get_byte(VALUE self)
00757 {
00758 struct strscanner *p;
00759
00760 GET_SCANNER(self, p);
00761 CLEAR_MATCH_STATUS(p);
00762 if (EOS_P(p))
00763 return Qnil;
00764
00765 p->prev = p->curr;
00766 p->curr++;
00767 MATCHED(p);
00768 adjust_registers_to_matched(p);
00769 return extract_range(p, p->prev + p->regs.beg[0],
00770 p->prev + p->regs.end[0]);
00771 }
00772
00773
00774
00775
00776
00777 static VALUE
00778 strscan_getbyte(VALUE self)
00779 {
00780 rb_warning("StringScanner#getbyte is obsolete; use #get_byte instead");
00781 return strscan_get_byte(self);
00782 }
00783
00784
00785
00786
00787
00788
00789
00790
00791
00792
00793
00794
00795 static VALUE
00796 strscan_peek(VALUE self, VALUE vlen)
00797 {
00798 struct strscanner *p;
00799 long len;
00800
00801 GET_SCANNER(self, p);
00802
00803 len = NUM2LONG(vlen);
00804 if (EOS_P(p))
00805 return infect(str_new(p, "", 0), p);
00806
00807 if (p->curr + len > S_LEN(p))
00808 len = S_LEN(p) - p->curr;
00809 return extract_beg_len(p, p->curr, len);
00810 }
00811
00812
00813
00814
00815
00816 static VALUE
00817 strscan_peep(VALUE self, VALUE vlen)
00818 {
00819 rb_warning("StringScanner#peep is obsolete; use #peek instead");
00820 return strscan_peek(self, vlen);
00821 }
00822
00823
00824
00825
00826
00827
00828
00829
00830
00831
00832
00833
00834 static VALUE
00835 strscan_unscan(VALUE self)
00836 {
00837 struct strscanner *p;
00838
00839 GET_SCANNER(self, p);
00840 if (! MATCHED_P(p))
00841 rb_raise(ScanError, "unscan failed: previous match record not exist");
00842 p->curr = p->prev;
00843 CLEAR_MATCH_STATUS(p);
00844 return self;
00845 }
00846
00847
00848
00849
00850
00851
00852
00853
00854
00855
00856
00857
00858
00859 static VALUE
00860 strscan_bol_p(VALUE self)
00861 {
00862 struct strscanner *p;
00863
00864 GET_SCANNER(self, p);
00865 if (CURPTR(p) > S_PEND(p)) return Qnil;
00866 if (p->curr == 0) return Qtrue;
00867 return (*(CURPTR(p) - 1) == '\n') ? Qtrue : Qfalse;
00868 }
00869
00870
00871
00872
00873
00874
00875
00876
00877
00878
00879
00880 static VALUE
00881 strscan_eos_p(VALUE self)
00882 {
00883 struct strscanner *p;
00884
00885 GET_SCANNER(self, p);
00886 return EOS_P(p) ? Qtrue : Qfalse;
00887 }
00888
00889
00890
00891
00892
00893 static VALUE
00894 strscan_empty_p(VALUE self)
00895 {
00896 rb_warning("StringScanner#empty? is obsolete; use #eos? instead");
00897 return strscan_eos_p(self);
00898 }
00899
00900
00901
00902
00903
00904
00905
00906
00907
00908 static VALUE
00909 strscan_rest_p(VALUE self)
00910 {
00911 struct strscanner *p;
00912
00913 GET_SCANNER(self, p);
00914 return EOS_P(p) ? Qfalse : Qtrue;
00915 }
00916
00917
00918
00919
00920
00921
00922
00923
00924
00925
00926 static VALUE
00927 strscan_matched_p(VALUE self)
00928 {
00929 struct strscanner *p;
00930
00931 GET_SCANNER(self, p);
00932 return MATCHED_P(p) ? Qtrue : Qfalse;
00933 }
00934
00935
00936
00937
00938
00939
00940
00941
00942 static VALUE
00943 strscan_matched(VALUE self)
00944 {
00945 struct strscanner *p;
00946
00947 GET_SCANNER(self, p);
00948 if (! MATCHED_P(p)) return Qnil;
00949 return extract_range(p, p->prev + p->regs.beg[0],
00950 p->prev + p->regs.end[0]);
00951 }
00952
00953
00954
00955
00956
00957
00958
00959
00960
00961
00962
00963 static VALUE
00964 strscan_matched_size(VALUE self)
00965 {
00966 struct strscanner *p;
00967
00968 GET_SCANNER(self, p);
00969 if (! MATCHED_P(p)) return Qnil;
00970 return INT2NUM(p->regs.end[0] - p->regs.beg[0]);
00971 }
00972
00973
00974
00975
00976
00977
00978
00979
00980
00981
00982
00983
00984
00985
00986
00987 static VALUE
00988 strscan_aref(VALUE self, VALUE idx)
00989 {
00990 struct strscanner *p;
00991 long i;
00992
00993 GET_SCANNER(self, p);
00994 if (! MATCHED_P(p)) return Qnil;
00995
00996 i = NUM2LONG(idx);
00997 if (i < 0)
00998 i += p->regs.num_regs;
00999 if (i < 0) return Qnil;
01000 if (i >= p->regs.num_regs) return Qnil;
01001 if (p->regs.beg[i] == -1) return Qnil;
01002
01003 return extract_range(p, p->prev + p->regs.beg[i],
01004 p->prev + p->regs.end[i]);
01005 }
01006
01007
01008
01009
01010
01011
01012
01013
01014
01015
01016 static VALUE
01017 strscan_pre_match(VALUE self)
01018 {
01019 struct strscanner *p;
01020
01021 GET_SCANNER(self, p);
01022 if (! MATCHED_P(p)) return Qnil;
01023 return extract_range(p, 0, p->prev + p->regs.beg[0]);
01024 }
01025
01026
01027
01028
01029
01030
01031
01032
01033
01034
01035 static VALUE
01036 strscan_post_match(VALUE self)
01037 {
01038 struct strscanner *p;
01039
01040 GET_SCANNER(self, p);
01041 if (! MATCHED_P(p)) return Qnil;
01042 return extract_range(p, p->prev + p->regs.end[0], S_LEN(p));
01043 }
01044
01045
01046
01047
01048
01049 static VALUE
01050 strscan_rest(VALUE self)
01051 {
01052 struct strscanner *p;
01053
01054 GET_SCANNER(self, p);
01055 if (EOS_P(p)) {
01056 return infect(str_new(p, "", 0), p);
01057 }
01058 return extract_range(p, p->curr, S_LEN(p));
01059 }
01060
01061
01062
01063
01064 static VALUE
01065 strscan_rest_size(VALUE self)
01066 {
01067 struct strscanner *p;
01068 long i;
01069
01070 GET_SCANNER(self, p);
01071 if (EOS_P(p)) {
01072 return INT2FIX(0);
01073 }
01074 i = S_LEN(p) - p->curr;
01075 return INT2FIX(i);
01076 }
01077
01078
01079
01080
01081
01082 static VALUE
01083 strscan_restsize(VALUE self)
01084 {
01085 rb_warning("StringScanner#restsize is obsolete; use #rest_size instead");
01086 return strscan_rest_size(self);
01087 }
01088
01089 #define INSPECT_LENGTH 5
01090 #define BUFSIZE 256
01091
01092
01093
01094
01095
01096
01097
01098
01099
01100
01101
01102
01103 static VALUE
01104 strscan_inspect(VALUE self)
01105 {
01106 struct strscanner *p;
01107 VALUE a, b;
01108
01109 p = check_strscan(self);
01110 if (NIL_P(p->str)) {
01111 a = rb_sprintf("#<%"PRIsVALUE" (uninitialized)>", rb_obj_class(self));
01112 return infect(a, p);
01113 }
01114 if (EOS_P(p)) {
01115 a = rb_sprintf("#<%"PRIsVALUE" fin>", rb_obj_class(self));
01116 return infect(a, p);
01117 }
01118 if (p->curr == 0) {
01119 b = inspect2(p);
01120 a = rb_sprintf("#<%"PRIsVALUE" %ld/%ld @ %"PRIsVALUE">",
01121 rb_obj_class(self),
01122 p->curr, S_LEN(p),
01123 b);
01124 return infect(a, p);
01125 }
01126 a = inspect1(p);
01127 b = inspect2(p);
01128 a = rb_sprintf("#<%"PRIsVALUE" %ld/%ld %"PRIsVALUE" @ %"PRIsVALUE">",
01129 rb_obj_class(self),
01130 p->curr, S_LEN(p),
01131 a, b);
01132 return infect(a, p);
01133 }
01134
01135 static VALUE
01136 inspect1(struct strscanner *p)
01137 {
01138 VALUE str;
01139 long len;
01140
01141 if (p->curr == 0) return rb_str_new2("");
01142 if (p->curr > INSPECT_LENGTH) {
01143 str = rb_str_new_cstr("...");
01144 len = INSPECT_LENGTH;
01145 }
01146 else {
01147 str = rb_str_new(0, 0);
01148 len = p->curr;
01149 }
01150 rb_str_cat(str, CURPTR(p) - len, len);
01151 return rb_str_dump(str);
01152 }
01153
01154 static VALUE
01155 inspect2(struct strscanner *p)
01156 {
01157 VALUE str;
01158 long len;
01159
01160 if (EOS_P(p)) return rb_str_new2("");
01161 len = S_LEN(p) - p->curr;
01162 if (len > INSPECT_LENGTH) {
01163 str = rb_str_new(CURPTR(p), INSPECT_LENGTH);
01164 rb_str_cat2(str, "...");
01165 }
01166 else {
01167 str = rb_str_new(CURPTR(p), len);
01168 }
01169 return rb_str_dump(str);
01170 }
01171
01172
01173
01174
01175
01176
01177
01178
01179
01180
01181
01182
01183
01184
01185
01186
01187
01188
01189
01190
01191
01192
01193
01194
01195
01196
01197
01198
01199
01200
01201
01202
01203
01204
01205
01206
01207
01208
01209
01210
01211
01212
01213
01214
01215
01216
01217
01218
01219
01220
01221
01222
01223
01224
01225
01226
01227
01228
01229
01230
01231
01232
01233
01234
01235
01236
01237
01238
01239
01240
01241
01242
01243
01244
01245
01246
01247
01248
01249
01250
01251
01252
01253
01254
01255
01256
01257
01258
01259
01260
01261
01262
01263
01264
01265
01266
01267
01268
01269
01270
01271
01272
01273
01274
01275
01276
01277
01278
01279 void
01280 Init_strscan()
01281 {
01282 ID id_scanerr = rb_intern("ScanError");
01283 VALUE tmp;
01284
01285 id_byteslice = rb_intern("byteslice");
01286
01287 StringScanner = rb_define_class("StringScanner", rb_cObject);
01288 ScanError = rb_define_class_under(StringScanner, "Error", rb_eStandardError);
01289 if (!rb_const_defined(rb_cObject, id_scanerr)) {
01290 rb_const_set(rb_cObject, id_scanerr, ScanError);
01291 }
01292 tmp = rb_str_new2(STRSCAN_VERSION);
01293 rb_obj_freeze(tmp);
01294 rb_const_set(StringScanner, rb_intern("Version"), tmp);
01295 tmp = rb_str_new2("$Id: strscan.c 44659 2014-01-19 16:28:53Z nagachika $");
01296 rb_obj_freeze(tmp);
01297 rb_const_set(StringScanner, rb_intern("Id"), tmp);
01298
01299 rb_define_alloc_func(StringScanner, strscan_s_allocate);
01300 rb_define_private_method(StringScanner, "initialize", strscan_initialize, -1);
01301 rb_define_private_method(StringScanner, "initialize_copy", strscan_init_copy, 1);
01302 rb_define_singleton_method(StringScanner, "must_C_version", strscan_s_mustc, 0);
01303 rb_define_method(StringScanner, "reset", strscan_reset, 0);
01304 rb_define_method(StringScanner, "terminate", strscan_terminate, 0);
01305 rb_define_method(StringScanner, "clear", strscan_clear, 0);
01306 rb_define_method(StringScanner, "string", strscan_get_string, 0);
01307 rb_define_method(StringScanner, "string=", strscan_set_string, 1);
01308 rb_define_method(StringScanner, "concat", strscan_concat, 1);
01309 rb_define_method(StringScanner, "<<", strscan_concat, 1);
01310 rb_define_method(StringScanner, "pos", strscan_get_pos, 0);
01311 rb_define_method(StringScanner, "pos=", strscan_set_pos, 1);
01312 rb_define_method(StringScanner, "charpos", strscan_get_charpos, 0);
01313 rb_define_method(StringScanner, "pointer", strscan_get_pos, 0);
01314 rb_define_method(StringScanner, "pointer=", strscan_set_pos, 1);
01315
01316 rb_define_method(StringScanner, "scan", strscan_scan, 1);
01317 rb_define_method(StringScanner, "skip", strscan_skip, 1);
01318 rb_define_method(StringScanner, "match?", strscan_match_p, 1);
01319 rb_define_method(StringScanner, "check", strscan_check, 1);
01320 rb_define_method(StringScanner, "scan_full", strscan_scan_full, 3);
01321
01322 rb_define_method(StringScanner, "scan_until", strscan_scan_until, 1);
01323 rb_define_method(StringScanner, "skip_until", strscan_skip_until, 1);
01324 rb_define_method(StringScanner, "exist?", strscan_exist_p, 1);
01325 rb_define_method(StringScanner, "check_until", strscan_check_until, 1);
01326 rb_define_method(StringScanner, "search_full", strscan_search_full, 3);
01327
01328 rb_define_method(StringScanner, "getch", strscan_getch, 0);
01329 rb_define_method(StringScanner, "get_byte", strscan_get_byte, 0);
01330 rb_define_method(StringScanner, "getbyte", strscan_getbyte, 0);
01331 rb_define_method(StringScanner, "peek", strscan_peek, 1);
01332 rb_define_method(StringScanner, "peep", strscan_peep, 1);
01333
01334 rb_define_method(StringScanner, "unscan", strscan_unscan, 0);
01335
01336 rb_define_method(StringScanner, "beginning_of_line?", strscan_bol_p, 0);
01337 rb_alias(StringScanner, rb_intern("bol?"), rb_intern("beginning_of_line?"));
01338 rb_define_method(StringScanner, "eos?", strscan_eos_p, 0);
01339 rb_define_method(StringScanner, "empty?", strscan_empty_p, 0);
01340 rb_define_method(StringScanner, "rest?", strscan_rest_p, 0);
01341
01342 rb_define_method(StringScanner, "matched?", strscan_matched_p, 0);
01343 rb_define_method(StringScanner, "matched", strscan_matched, 0);
01344 rb_define_method(StringScanner, "matched_size", strscan_matched_size, 0);
01345 rb_define_method(StringScanner, "[]", strscan_aref, 1);
01346 rb_define_method(StringScanner, "pre_match", strscan_pre_match, 0);
01347 rb_define_method(StringScanner, "post_match", strscan_post_match, 0);
01348
01349 rb_define_method(StringScanner, "rest", strscan_rest, 0);
01350 rb_define_method(StringScanner, "rest_size", strscan_rest_size, 0);
01351 rb_define_method(StringScanner, "restsize", strscan_restsize, 0);
01352
01353 rb_define_method(StringScanner, "inspect", strscan_inspect, 0);
01354 }
01355