00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012 #include "ruby/ruby.h"
00013 #include "ruby/encoding.h"
00014 #include "internal.h"
00015 #include <sys/types.h>
00016 #include <ctype.h>
00017 #include <errno.h>
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027 #ifdef HAVE_TRUE_LONG_LONG
00028 static const char natstr[] = "sSiIlLqQ";
00029 #else
00030 static const char natstr[] = "sSiIlL";
00031 #endif
00032 static const char endstr[] = "sSiIlLqQ";
00033
00034 #ifdef HAVE_TRUE_LONG_LONG
00035
00036 # define NATINT_LEN_Q NATINT_LEN(long long, 8)
00037 #else
00038 # define NATINT_LEN_Q 8
00039 #endif
00040
00041 #if SIZEOF_SHORT != 2 || SIZEOF_LONG != 4 || (defined(HAVE_TRUE_LONG_LONG) && SIZEOF_LONG_LONG != 8)
00042 # define NATINT_PACK
00043 #endif
00044
00045 #ifdef DYNAMIC_ENDIAN
00046
00047
00048 static int
00049 is_bigendian(void)
00050 {
00051 static int init = 0;
00052 static int endian_value;
00053 char *p;
00054
00055 if (init) return endian_value;
00056 init = 1;
00057 p = (char*)&init;
00058 return endian_value = p[0]?0:1;
00059 }
00060 # define BIGENDIAN_P() (is_bigendian())
00061 #elif defined(WORDS_BIGENDIAN)
00062 # define BIGENDIAN_P() 1
00063 #else
00064 # define BIGENDIAN_P() 0
00065 #endif
00066
00067 #ifdef NATINT_PACK
00068 # define NATINT_LEN(type,len) (natint?(int)sizeof(type):(int)(len))
00069 #else
00070 # define NATINT_LEN(type,len) ((int)sizeof(type))
00071 #endif
00072
00073 #if SIZEOF_LONG == 8
00074 # define INT64toNUM(x) LONG2NUM(x)
00075 # define UINT64toNUM(x) ULONG2NUM(x)
00076 #elif defined(HAVE_LONG_LONG) && SIZEOF_LONG_LONG == 8
00077 # define INT64toNUM(x) LL2NUM(x)
00078 # define UINT64toNUM(x) ULL2NUM(x)
00079 #endif
00080
00081 #define define_swapx(x, xtype) \
00082 static xtype \
00083 TOKEN_PASTE(swap,x)(xtype z) \
00084 { \
00085 xtype r; \
00086 xtype *zp; \
00087 unsigned char *s, *t; \
00088 int i; \
00089 \
00090 zp = xmalloc(sizeof(xtype)); \
00091 *zp = z; \
00092 s = (unsigned char*)zp; \
00093 t = xmalloc(sizeof(xtype)); \
00094 for (i=0; i<sizeof(xtype); i++) { \
00095 t[sizeof(xtype)-i-1] = s[i]; \
00096 } \
00097 r = *(xtype *)t; \
00098 xfree(t); \
00099 xfree(zp); \
00100 return r; \
00101 }
00102
00103 #if SIZEOF_SHORT == 2
00104 # define swaps(x) swap16(x)
00105 #elif SIZEOF_SHORT == 4
00106 # define swaps(x) swap32(x)
00107 #else
00108 define_swapx(s,short)
00109 #endif
00110
00111 #if SIZEOF_INT == 2
00112 # define swapi(x) swap16(x)
00113 #elif SIZEOF_INT == 4
00114 # define swapi(x) swap32(x)
00115 #else
00116 define_swapx(i,int)
00117 #endif
00118
00119 #if SIZEOF_LONG == 4
00120 # define swapl(x) swap32(x)
00121 #elif SIZEOF_LONG == 8
00122 # define swapl(x) swap64(x)
00123 #else
00124 define_swapx(l,long)
00125 #endif
00126
00127 #ifdef HAVE_LONG_LONG
00128 # if SIZEOF_LONG_LONG == 8
00129 # define swapll(x) swap64(x)
00130 # else
00131 define_swapx(ll,LONG_LONG)
00132 # endif
00133 #endif
00134
00135 #if SIZEOF_FLOAT == 4 && defined(HAVE_INT32_T)
00136 # define swapf(x) swap32(x)
00137 # define FLOAT_SWAPPER uint32_t
00138 #else
00139 define_swapx(f,float)
00140 #endif
00141
00142 #if SIZEOF_DOUBLE == 8 && defined(HAVE_INT64_T)
00143 # define swapd(x) swap64(x)
00144 # define DOUBLE_SWAPPER uint64_t
00145 #elif SIZEOF_DOUBLE == 8 && defined(HAVE_INT32_T)
00146 static double
00147 swapd(const double d)
00148 {
00149 double dtmp = d;
00150 uint32_t utmp[2];
00151 uint32_t utmp0;
00152
00153 utmp[0] = 0; utmp[1] = 0;
00154 memcpy(utmp,&dtmp,sizeof(double));
00155 utmp0 = utmp[0];
00156 utmp[0] = swap32(utmp[1]);
00157 utmp[1] = swap32(utmp0);
00158 memcpy(&dtmp,utmp,sizeof(double));
00159 return dtmp;
00160 }
00161 #else
00162 define_swapx(d, double)
00163 #endif
00164
00165 #undef define_swapx
00166
00167 #define rb_ntohf(x) (BIGENDIAN_P()?(x):swapf(x))
00168 #define rb_ntohd(x) (BIGENDIAN_P()?(x):swapd(x))
00169 #define rb_htonf(x) (BIGENDIAN_P()?(x):swapf(x))
00170 #define rb_htond(x) (BIGENDIAN_P()?(x):swapd(x))
00171 #define rb_htovf(x) (BIGENDIAN_P()?swapf(x):(x))
00172 #define rb_htovd(x) (BIGENDIAN_P()?swapd(x):(x))
00173 #define rb_vtohf(x) (BIGENDIAN_P()?swapf(x):(x))
00174 #define rb_vtohd(x) (BIGENDIAN_P()?swapd(x):(x))
00175
00176 #ifdef FLOAT_SWAPPER
00177 # define FLOAT_CONVWITH(y) FLOAT_SWAPPER y;
00178 # define HTONF(x,y) (memcpy(&(y),&(x),sizeof(float)), \
00179 (y) = rb_htonf((FLOAT_SWAPPER)(y)), \
00180 memcpy(&(x),&(y),sizeof(float)), \
00181 (x))
00182 # define HTOVF(x,y) (memcpy(&(y),&(x),sizeof(float)), \
00183 (y) = rb_htovf((FLOAT_SWAPPER)(y)), \
00184 memcpy(&(x),&(y),sizeof(float)), \
00185 (x))
00186 # define NTOHF(x,y) (memcpy(&(y),&(x),sizeof(float)), \
00187 (y) = rb_ntohf((FLOAT_SWAPPER)(y)), \
00188 memcpy(&(x),&(y),sizeof(float)), \
00189 (x))
00190 # define VTOHF(x,y) (memcpy(&(y),&(x),sizeof(float)), \
00191 (y) = rb_vtohf((FLOAT_SWAPPER)(y)), \
00192 memcpy(&(x),&(y),sizeof(float)), \
00193 (x))
00194 #else
00195 # define FLOAT_CONVWITH(y)
00196 # define HTONF(x,y) rb_htonf(x)
00197 # define HTOVF(x,y) rb_htovf(x)
00198 # define NTOHF(x,y) rb_ntohf(x)
00199 # define VTOHF(x,y) rb_vtohf(x)
00200 #endif
00201
00202 #ifdef DOUBLE_SWAPPER
00203 # define DOUBLE_CONVWITH(y) DOUBLE_SWAPPER y;
00204 # define HTOND(x,y) (memcpy(&(y),&(x),sizeof(double)), \
00205 (y) = rb_htond((DOUBLE_SWAPPER)(y)), \
00206 memcpy(&(x),&(y),sizeof(double)), \
00207 (x))
00208 # define HTOVD(x,y) (memcpy(&(y),&(x),sizeof(double)), \
00209 (y) = rb_htovd((DOUBLE_SWAPPER)(y)), \
00210 memcpy(&(x),&(y),sizeof(double)), \
00211 (x))
00212 # define NTOHD(x,y) (memcpy(&(y),&(x),sizeof(double)), \
00213 (y) = rb_ntohd((DOUBLE_SWAPPER)(y)), \
00214 memcpy(&(x),&(y),sizeof(double)), \
00215 (x))
00216 # define VTOHD(x,y) (memcpy(&(y),&(x),sizeof(double)), \
00217 (y) = rb_vtohd((DOUBLE_SWAPPER)(y)), \
00218 memcpy(&(x),&(y),sizeof(double)), \
00219 (x))
00220 #else
00221 # define DOUBLE_CONVWITH(y)
00222 # define HTOND(x,y) rb_htond(x)
00223 # define HTOVD(x,y) rb_htovd(x)
00224 # define NTOHD(x,y) rb_ntohd(x)
00225 # define VTOHD(x,y) rb_vtohd(x)
00226 #endif
00227
00228 #define MAX_INTEGER_PACK_SIZE 8
00229
00230 static const char toofew[] = "too few arguments";
00231
00232 static void encodes(VALUE,const char*,long,int,int);
00233 static void qpencode(VALUE,VALUE,long);
00234
00235 static unsigned long utf8_to_uv(const char*,long*);
00236
00237
00238
00239
00240
00241
00242
00243
00244
00245
00246
00247
00248
00249
00250
00251
00252
00253
00254
00255
00256
00257
00258
00259
00260
00261
00262
00263
00264
00265
00266
00267
00268
00269
00270
00271
00272
00273
00274
00275
00276
00277
00278
00279
00280
00281
00282
00283
00284
00285
00286
00287
00288
00289
00290
00291
00292
00293
00294
00295
00296
00297
00298
00299
00300
00301
00302
00303
00304
00305
00306
00307
00308
00309
00310
00311
00312
00313
00314
00315
00316
00317
00318
00319
00320
00321
00322
00323
00324
00325
00326
00327
00328
00329
00330
00331
00332
00333
00334
00335
00336
00337
00338
00339
00340
00341
00342
00343
00344
00345
00346
00347 static VALUE
00348 pack_pack(VALUE ary, VALUE fmt)
00349 {
00350 static const char nul10[] = "\0\0\0\0\0\0\0\0\0\0";
00351 static const char spc10[] = " ";
00352 const char *p, *pend;
00353 VALUE res, from, associates = 0;
00354 char type;
00355 long items, len, idx, plen;
00356 const char *ptr;
00357 int enc_info = 1;
00358 #ifdef NATINT_PACK
00359 int natint;
00360 #endif
00361 int integer_size, bigendian_p;
00362
00363 StringValue(fmt);
00364 p = RSTRING_PTR(fmt);
00365 pend = p + RSTRING_LEN(fmt);
00366 res = rb_str_buf_new(0);
00367
00368 items = RARRAY_LEN(ary);
00369 idx = 0;
00370
00371 #define TOO_FEW (rb_raise(rb_eArgError, toofew), 0)
00372 #define THISFROM (items > 0 ? RARRAY_AREF(ary, idx) : TOO_FEW)
00373 #define NEXTFROM (items-- > 0 ? RARRAY_AREF(ary, idx++) : TOO_FEW)
00374
00375 while (p < pend) {
00376 int explicit_endian = 0;
00377 if (RSTRING_PTR(fmt) + RSTRING_LEN(fmt) != pend) {
00378 rb_raise(rb_eRuntimeError, "format string modified");
00379 }
00380 type = *p++;
00381 #ifdef NATINT_PACK
00382 natint = 0;
00383 #endif
00384
00385 if (ISSPACE(type)) continue;
00386 if (type == '#') {
00387 while ((p < pend) && (*p != '\n')) {
00388 p++;
00389 }
00390 continue;
00391 }
00392
00393 {
00394 modifiers:
00395 switch (*p) {
00396 case '_':
00397 case '!':
00398 if (strchr(natstr, type)) {
00399 #ifdef NATINT_PACK
00400 natint = 1;
00401 #endif
00402 p++;
00403 }
00404 else {
00405 rb_raise(rb_eArgError, "'%c' allowed only after types %s", *p, natstr);
00406 }
00407 goto modifiers;
00408
00409 case '<':
00410 case '>':
00411 if (!strchr(endstr, type)) {
00412 rb_raise(rb_eArgError, "'%c' allowed only after types %s", *p, endstr);
00413 }
00414 if (explicit_endian) {
00415 rb_raise(rb_eRangeError, "Can't use both '<' and '>'");
00416 }
00417 explicit_endian = *p++;
00418 goto modifiers;
00419 }
00420 }
00421
00422 if (*p == '*') {
00423 len = strchr("@Xxu", type) ? 0
00424 : strchr("PMm", type) ? 1
00425 : items;
00426 p++;
00427 }
00428 else if (ISDIGIT(*p)) {
00429 errno = 0;
00430 len = STRTOUL(p, (char**)&p, 10);
00431 if (errno) {
00432 rb_raise(rb_eRangeError, "pack length too big");
00433 }
00434 }
00435 else {
00436 len = 1;
00437 }
00438
00439 switch (type) {
00440 case 'U':
00441
00442 if (enc_info == 1) enc_info = 2;
00443 break;
00444 case 'm': case 'M': case 'u':
00445
00446 break;
00447 default:
00448
00449 enc_info = 0;
00450 break;
00451 }
00452 switch (type) {
00453 case 'A': case 'a': case 'Z':
00454 case 'B': case 'b':
00455 case 'H': case 'h':
00456 from = NEXTFROM;
00457 if (NIL_P(from)) {
00458 ptr = "";
00459 plen = 0;
00460 }
00461 else {
00462 StringValue(from);
00463 ptr = RSTRING_PTR(from);
00464 plen = RSTRING_LEN(from);
00465 OBJ_INFECT(res, from);
00466 }
00467
00468 if (p[-1] == '*')
00469 len = plen;
00470
00471 switch (type) {
00472 case 'a':
00473 case 'A':
00474 case 'Z':
00475 if (plen >= len) {
00476 rb_str_buf_cat(res, ptr, len);
00477 if (p[-1] == '*' && type == 'Z')
00478 rb_str_buf_cat(res, nul10, 1);
00479 }
00480 else {
00481 rb_str_buf_cat(res, ptr, plen);
00482 len -= plen;
00483 while (len >= 10) {
00484 rb_str_buf_cat(res, (type == 'A')?spc10:nul10, 10);
00485 len -= 10;
00486 }
00487 rb_str_buf_cat(res, (type == 'A')?spc10:nul10, len);
00488 }
00489 break;
00490
00491 #define castchar(from) (char)((from) & 0xff)
00492
00493 case 'b':
00494 {
00495 int byte = 0;
00496 long i, j = 0;
00497
00498 if (len > plen) {
00499 j = (len - plen + 1)/2;
00500 len = plen;
00501 }
00502 for (i=0; i++ < len; ptr++) {
00503 if (*ptr & 1)
00504 byte |= 128;
00505 if (i & 7)
00506 byte >>= 1;
00507 else {
00508 char c = castchar(byte);
00509 rb_str_buf_cat(res, &c, 1);
00510 byte = 0;
00511 }
00512 }
00513 if (len & 7) {
00514 char c;
00515 byte >>= 7 - (len & 7);
00516 c = castchar(byte);
00517 rb_str_buf_cat(res, &c, 1);
00518 }
00519 len = j;
00520 goto grow;
00521 }
00522 break;
00523
00524 case 'B':
00525 {
00526 int byte = 0;
00527 long i, j = 0;
00528
00529 if (len > plen) {
00530 j = (len - plen + 1)/2;
00531 len = plen;
00532 }
00533 for (i=0; i++ < len; ptr++) {
00534 byte |= *ptr & 1;
00535 if (i & 7)
00536 byte <<= 1;
00537 else {
00538 char c = castchar(byte);
00539 rb_str_buf_cat(res, &c, 1);
00540 byte = 0;
00541 }
00542 }
00543 if (len & 7) {
00544 char c;
00545 byte <<= 7 - (len & 7);
00546 c = castchar(byte);
00547 rb_str_buf_cat(res, &c, 1);
00548 }
00549 len = j;
00550 goto grow;
00551 }
00552 break;
00553
00554 case 'h':
00555 {
00556 int byte = 0;
00557 long i, j = 0;
00558
00559 if (len > plen) {
00560 j = (len + 1) / 2 - (plen + 1) / 2;
00561 len = plen;
00562 }
00563 for (i=0; i++ < len; ptr++) {
00564 if (ISALPHA(*ptr))
00565 byte |= (((*ptr & 15) + 9) & 15) << 4;
00566 else
00567 byte |= (*ptr & 15) << 4;
00568 if (i & 1)
00569 byte >>= 4;
00570 else {
00571 char c = castchar(byte);
00572 rb_str_buf_cat(res, &c, 1);
00573 byte = 0;
00574 }
00575 }
00576 if (len & 1) {
00577 char c = castchar(byte);
00578 rb_str_buf_cat(res, &c, 1);
00579 }
00580 len = j;
00581 goto grow;
00582 }
00583 break;
00584
00585 case 'H':
00586 {
00587 int byte = 0;
00588 long i, j = 0;
00589
00590 if (len > plen) {
00591 j = (len + 1) / 2 - (plen + 1) / 2;
00592 len = plen;
00593 }
00594 for (i=0; i++ < len; ptr++) {
00595 if (ISALPHA(*ptr))
00596 byte |= ((*ptr & 15) + 9) & 15;
00597 else
00598 byte |= *ptr & 15;
00599 if (i & 1)
00600 byte <<= 4;
00601 else {
00602 char c = castchar(byte);
00603 rb_str_buf_cat(res, &c, 1);
00604 byte = 0;
00605 }
00606 }
00607 if (len & 1) {
00608 char c = castchar(byte);
00609 rb_str_buf_cat(res, &c, 1);
00610 }
00611 len = j;
00612 goto grow;
00613 }
00614 break;
00615 }
00616 break;
00617
00618 case 'c':
00619 case 'C':
00620 integer_size = 1;
00621 bigendian_p = BIGENDIAN_P();
00622 goto pack_integer;
00623
00624 case 's':
00625 integer_size = NATINT_LEN(short, 2);
00626 bigendian_p = BIGENDIAN_P();
00627 goto pack_integer;
00628
00629 case 'S':
00630 integer_size = NATINT_LEN(short, 2);
00631 bigendian_p = BIGENDIAN_P();
00632 goto pack_integer;
00633
00634 case 'i':
00635 integer_size = (int)sizeof(int);
00636 bigendian_p = BIGENDIAN_P();
00637 goto pack_integer;
00638
00639 case 'I':
00640 integer_size = (int)sizeof(int);
00641 bigendian_p = BIGENDIAN_P();
00642 goto pack_integer;
00643
00644 case 'l':
00645 integer_size = NATINT_LEN(long, 4);
00646 bigendian_p = BIGENDIAN_P();
00647 goto pack_integer;
00648
00649 case 'L':
00650 integer_size = NATINT_LEN(long, 4);
00651 bigendian_p = BIGENDIAN_P();
00652 goto pack_integer;
00653
00654 case 'q':
00655 integer_size = NATINT_LEN_Q;
00656 bigendian_p = BIGENDIAN_P();
00657 goto pack_integer;
00658
00659 case 'Q':
00660 integer_size = NATINT_LEN_Q;
00661 bigendian_p = BIGENDIAN_P();
00662 goto pack_integer;
00663
00664 case 'n':
00665 integer_size = 2;
00666 bigendian_p = 1;
00667 goto pack_integer;
00668
00669 case 'N':
00670 integer_size = 4;
00671 bigendian_p = 1;
00672 goto pack_integer;
00673
00674 case 'v':
00675 integer_size = 2;
00676 bigendian_p = 0;
00677 goto pack_integer;
00678
00679 case 'V':
00680 integer_size = 4;
00681 bigendian_p = 0;
00682 goto pack_integer;
00683
00684 pack_integer:
00685 if (explicit_endian) {
00686 bigendian_p = explicit_endian == '>';
00687 }
00688 if (integer_size > MAX_INTEGER_PACK_SIZE)
00689 rb_bug("unexpected intger size for pack: %d", integer_size);
00690 while (len-- > 0) {
00691 char intbuf[MAX_INTEGER_PACK_SIZE];
00692
00693 from = NEXTFROM;
00694 rb_integer_pack(from, intbuf, integer_size, 1, 0,
00695 INTEGER_PACK_2COMP |
00696 (bigendian_p ? INTEGER_PACK_BIG_ENDIAN : INTEGER_PACK_LITTLE_ENDIAN));
00697 rb_str_buf_cat(res, intbuf, integer_size);
00698 }
00699 break;
00700
00701 case 'f':
00702 case 'F':
00703 while (len-- > 0) {
00704 float f;
00705
00706 from = NEXTFROM;
00707 f = (float)RFLOAT_VALUE(rb_to_float(from));
00708 rb_str_buf_cat(res, (char*)&f, sizeof(float));
00709 }
00710 break;
00711
00712 case 'e':
00713 while (len-- > 0) {
00714 float f;
00715 FLOAT_CONVWITH(ftmp);
00716
00717 from = NEXTFROM;
00718 f = (float)RFLOAT_VALUE(rb_to_float(from));
00719 f = HTOVF(f,ftmp);
00720 rb_str_buf_cat(res, (char*)&f, sizeof(float));
00721 }
00722 break;
00723
00724 case 'E':
00725 while (len-- > 0) {
00726 double d;
00727 DOUBLE_CONVWITH(dtmp);
00728
00729 from = NEXTFROM;
00730 d = RFLOAT_VALUE(rb_to_float(from));
00731 d = HTOVD(d,dtmp);
00732 rb_str_buf_cat(res, (char*)&d, sizeof(double));
00733 }
00734 break;
00735
00736 case 'd':
00737 case 'D':
00738 while (len-- > 0) {
00739 double d;
00740
00741 from = NEXTFROM;
00742 d = RFLOAT_VALUE(rb_to_float(from));
00743 rb_str_buf_cat(res, (char*)&d, sizeof(double));
00744 }
00745 break;
00746
00747 case 'g':
00748 while (len-- > 0) {
00749 float f;
00750 FLOAT_CONVWITH(ftmp);
00751
00752 from = NEXTFROM;
00753 f = (float)RFLOAT_VALUE(rb_to_float(from));
00754 f = HTONF(f,ftmp);
00755 rb_str_buf_cat(res, (char*)&f, sizeof(float));
00756 }
00757 break;
00758
00759 case 'G':
00760 while (len-- > 0) {
00761 double d;
00762 DOUBLE_CONVWITH(dtmp);
00763
00764 from = NEXTFROM;
00765 d = RFLOAT_VALUE(rb_to_float(from));
00766 d = HTOND(d,dtmp);
00767 rb_str_buf_cat(res, (char*)&d, sizeof(double));
00768 }
00769 break;
00770
00771 case 'x':
00772 grow:
00773 while (len >= 10) {
00774 rb_str_buf_cat(res, nul10, 10);
00775 len -= 10;
00776 }
00777 rb_str_buf_cat(res, nul10, len);
00778 break;
00779
00780 case 'X':
00781 shrink:
00782 plen = RSTRING_LEN(res);
00783 if (plen < len)
00784 rb_raise(rb_eArgError, "X outside of string");
00785 rb_str_set_len(res, plen - len);
00786 break;
00787
00788 case '@':
00789 len -= RSTRING_LEN(res);
00790 if (len > 0) goto grow;
00791 len = -len;
00792 if (len > 0) goto shrink;
00793 break;
00794
00795 case '%':
00796 rb_raise(rb_eArgError, "%% is not supported");
00797 break;
00798
00799 case 'U':
00800 while (len-- > 0) {
00801 SIGNED_VALUE l;
00802 char buf[8];
00803 int le;
00804
00805 from = NEXTFROM;
00806 from = rb_to_int(from);
00807 l = NUM2LONG(from);
00808 if (l < 0) {
00809 rb_raise(rb_eRangeError, "pack(U): value out of range");
00810 }
00811 le = rb_uv_to_utf8(buf, l);
00812 rb_str_buf_cat(res, (char*)buf, le);
00813 }
00814 break;
00815
00816 case 'u':
00817 case 'm':
00818 from = NEXTFROM;
00819 StringValue(from);
00820 ptr = RSTRING_PTR(from);
00821 plen = RSTRING_LEN(from);
00822
00823 if (len == 0 && type == 'm') {
00824 encodes(res, ptr, plen, type, 0);
00825 ptr += plen;
00826 break;
00827 }
00828 if (len <= 2)
00829 len = 45;
00830 else if (len > 63 && type == 'u')
00831 len = 63;
00832 else
00833 len = len / 3 * 3;
00834 while (plen > 0) {
00835 long todo;
00836
00837 if (plen > len)
00838 todo = len;
00839 else
00840 todo = plen;
00841 encodes(res, ptr, todo, type, 1);
00842 plen -= todo;
00843 ptr += todo;
00844 }
00845 break;
00846
00847 case 'M':
00848 from = rb_obj_as_string(NEXTFROM);
00849 if (len <= 1)
00850 len = 72;
00851 qpencode(res, from, len);
00852 break;
00853
00854 case 'P':
00855 from = THISFROM;
00856 if (!NIL_P(from)) {
00857 StringValue(from);
00858 if (RSTRING_LEN(from) < len) {
00859 rb_raise(rb_eArgError, "too short buffer for P(%ld for %ld)",
00860 RSTRING_LEN(from), len);
00861 }
00862 }
00863 len = 1;
00864
00865 case 'p':
00866 while (len-- > 0) {
00867 char *t;
00868 from = NEXTFROM;
00869 if (NIL_P(from)) {
00870 t = 0;
00871 }
00872 else {
00873 t = StringValuePtr(from);
00874 }
00875 if (!associates) {
00876 associates = rb_ary_new();
00877 }
00878 rb_ary_push(associates, from);
00879 rb_obj_taint(from);
00880 rb_str_buf_cat(res, (char*)&t, sizeof(char*));
00881 }
00882 break;
00883
00884 case 'w':
00885 while (len-- > 0) {
00886 VALUE buf = rb_str_new(0, 0);
00887 size_t numbytes;
00888 int sign;
00889 char *cp;
00890
00891 from = NEXTFROM;
00892 from = rb_to_int(from);
00893 numbytes = rb_absint_numwords(from, 7, NULL);
00894 if (numbytes == 0)
00895 numbytes = 1;
00896 buf = rb_str_new(NULL, numbytes);
00897
00898 sign = rb_integer_pack(from, RSTRING_PTR(buf), RSTRING_LEN(buf), 1, 1, INTEGER_PACK_BIG_ENDIAN);
00899
00900 if (sign < 0)
00901 rb_raise(rb_eArgError, "can't compress negative numbers");
00902 if (sign == 2)
00903 rb_bug("buffer size problem?");
00904
00905 cp = RSTRING_PTR(buf);
00906 while (1 < numbytes) {
00907 *cp |= 0x80;
00908 cp++;
00909 numbytes--;
00910 }
00911
00912 rb_str_buf_cat(res, RSTRING_PTR(buf), RSTRING_LEN(buf));
00913 }
00914 break;
00915
00916 default:
00917 rb_warning("unknown pack directive '%c' in '%s'",
00918 type, RSTRING_PTR(fmt));
00919 break;
00920 }
00921 }
00922
00923 if (associates) {
00924 rb_str_associate(res, associates);
00925 }
00926 OBJ_INFECT(res, fmt);
00927 switch (enc_info) {
00928 case 1:
00929 ENCODING_CODERANGE_SET(res, rb_usascii_encindex(), ENC_CODERANGE_7BIT);
00930 break;
00931 case 2:
00932 rb_enc_set_index(res, rb_utf8_encindex());
00933 break;
00934 default:
00935
00936 break;
00937 }
00938 return res;
00939 }
00940
00941 static const char uu_table[] =
00942 "`!\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_";
00943 static const char b64_table[] =
00944 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
00945
00946 static void
00947 encodes(VALUE str, const char *s, long len, int type, int tail_lf)
00948 {
00949 enum {buff_size = 4096, encoded_unit = 4};
00950 char buff[buff_size + 1];
00951 long i = 0;
00952 const char *trans = type == 'u' ? uu_table : b64_table;
00953 char padding;
00954
00955 if (type == 'u') {
00956 buff[i++] = (char)len + ' ';
00957 padding = '`';
00958 }
00959 else {
00960 padding = '=';
00961 }
00962 while (len >= 3) {
00963 while (len >= 3 && buff_size-i >= encoded_unit) {
00964 buff[i++] = trans[077 & (*s >> 2)];
00965 buff[i++] = trans[077 & (((*s << 4) & 060) | ((s[1] >> 4) & 017))];
00966 buff[i++] = trans[077 & (((s[1] << 2) & 074) | ((s[2] >> 6) & 03))];
00967 buff[i++] = trans[077 & s[2]];
00968 s += 3;
00969 len -= 3;
00970 }
00971 if (buff_size-i < encoded_unit) {
00972 rb_str_buf_cat(str, buff, i);
00973 i = 0;
00974 }
00975 }
00976
00977 if (len == 2) {
00978 buff[i++] = trans[077 & (*s >> 2)];
00979 buff[i++] = trans[077 & (((*s << 4) & 060) | ((s[1] >> 4) & 017))];
00980 buff[i++] = trans[077 & (((s[1] << 2) & 074) | (('\0' >> 6) & 03))];
00981 buff[i++] = padding;
00982 }
00983 else if (len == 1) {
00984 buff[i++] = trans[077 & (*s >> 2)];
00985 buff[i++] = trans[077 & (((*s << 4) & 060) | (('\0' >> 4) & 017))];
00986 buff[i++] = padding;
00987 buff[i++] = padding;
00988 }
00989 if (tail_lf) buff[i++] = '\n';
00990 rb_str_buf_cat(str, buff, i);
00991 if ((size_t)i > sizeof(buff)) rb_bug("encodes() buffer overrun");
00992 }
00993
00994 static const char hex_table[] = "0123456789ABCDEF";
00995
00996 static void
00997 qpencode(VALUE str, VALUE from, long len)
00998 {
00999 char buff[1024];
01000 long i = 0, n = 0, prev = EOF;
01001 unsigned char *s = (unsigned char*)RSTRING_PTR(from);
01002 unsigned char *send = s + RSTRING_LEN(from);
01003
01004 while (s < send) {
01005 if ((*s > 126) ||
01006 (*s < 32 && *s != '\n' && *s != '\t') ||
01007 (*s == '=')) {
01008 buff[i++] = '=';
01009 buff[i++] = hex_table[*s >> 4];
01010 buff[i++] = hex_table[*s & 0x0f];
01011 n += 3;
01012 prev = EOF;
01013 }
01014 else if (*s == '\n') {
01015 if (prev == ' ' || prev == '\t') {
01016 buff[i++] = '=';
01017 buff[i++] = *s;
01018 }
01019 buff[i++] = *s;
01020 n = 0;
01021 prev = *s;
01022 }
01023 else {
01024 buff[i++] = *s;
01025 n++;
01026 prev = *s;
01027 }
01028 if (n > len) {
01029 buff[i++] = '=';
01030 buff[i++] = '\n';
01031 n = 0;
01032 prev = '\n';
01033 }
01034 if (i > 1024 - 5) {
01035 rb_str_buf_cat(str, buff, i);
01036 i = 0;
01037 }
01038 s++;
01039 }
01040 if (n > 0) {
01041 buff[i++] = '=';
01042 buff[i++] = '\n';
01043 }
01044 if (i > 0) {
01045 rb_str_buf_cat(str, buff, i);
01046 }
01047 }
01048
01049 static inline int
01050 hex2num(char c)
01051 {
01052 int n;
01053 n = ruby_digit36_to_number_table[(unsigned char)c];
01054 if (16 <= n)
01055 n = -1;
01056 return n;
01057 }
01058
01059 #define PACK_LENGTH_ADJUST_SIZE(sz) do { \
01060 tmp_len = 0; \
01061 if (len > (long)((send-s)/(sz))) { \
01062 if (!star) { \
01063 tmp_len = len-(send-s)/(sz); \
01064 } \
01065 len = (send-s)/(sz); \
01066 } \
01067 } while (0)
01068
01069 #define PACK_ITEM_ADJUST() do { \
01070 if (tmp_len > 0 && !block_p) \
01071 rb_ary_store(ary, RARRAY_LEN(ary)+tmp_len-1, Qnil); \
01072 } while (0)
01073
01074 static VALUE
01075 infected_str_new(const char *ptr, long len, VALUE str)
01076 {
01077 VALUE s = rb_str_new(ptr, len);
01078
01079 OBJ_INFECT(s, str);
01080 return s;
01081 }
01082
01083
01084
01085
01086
01087
01088
01089
01090
01091
01092
01093
01094
01095
01096
01097
01098
01099
01100
01101
01102
01103
01104
01105
01106
01107
01108
01109
01110
01111
01112
01113
01114
01115
01116
01117
01118
01119
01120
01121
01122
01123
01124
01125
01126
01127
01128
01129
01130
01131
01132
01133
01134
01135
01136
01137
01138
01139
01140
01141
01142
01143
01144
01145
01146
01147
01148
01149
01150
01151
01152
01153
01154
01155
01156
01157
01158
01159
01160
01161
01162
01163
01164
01165
01166
01167
01168
01169
01170
01171
01172
01173
01174
01175
01176
01177
01178
01179
01180
01181
01182
01183
01184
01185
01186
01187
01188
01189
01190
01191
01192
01193
01194
01195
01196
01197 static VALUE
01198 pack_unpack(VALUE str, VALUE fmt)
01199 {
01200 static const char hexdigits[] = "0123456789abcdef";
01201 char *s, *send;
01202 char *p, *pend;
01203 VALUE ary;
01204 char type;
01205 long len, tmp_len;
01206 int star;
01207 #ifdef NATINT_PACK
01208 int natint;
01209 #endif
01210 int block_p = rb_block_given_p();
01211 int signed_p, integer_size, bigendian_p;
01212 #define UNPACK_PUSH(item) do {\
01213 VALUE item_val = (item);\
01214 if (block_p) {\
01215 rb_yield(item_val);\
01216 }\
01217 else {\
01218 rb_ary_push(ary, item_val);\
01219 }\
01220 } while (0)
01221
01222 StringValue(str);
01223 StringValue(fmt);
01224 s = RSTRING_PTR(str);
01225 send = s + RSTRING_LEN(str);
01226 p = RSTRING_PTR(fmt);
01227 pend = p + RSTRING_LEN(fmt);
01228
01229 ary = block_p ? Qnil : rb_ary_new();
01230 while (p < pend) {
01231 int explicit_endian = 0;
01232 type = *p++;
01233 #ifdef NATINT_PACK
01234 natint = 0;
01235 #endif
01236
01237 if (ISSPACE(type)) continue;
01238 if (type == '#') {
01239 while ((p < pend) && (*p != '\n')) {
01240 p++;
01241 }
01242 continue;
01243 }
01244
01245 star = 0;
01246 {
01247 modifiers:
01248 switch (*p) {
01249 case '_':
01250 case '!':
01251
01252 if (strchr(natstr, type)) {
01253 #ifdef NATINT_PACK
01254 natint = 1;
01255 #endif
01256 p++;
01257 }
01258 else {
01259 rb_raise(rb_eArgError, "'%c' allowed only after types %s", *p, natstr);
01260 }
01261 goto modifiers;
01262
01263 case '<':
01264 case '>':
01265 if (!strchr(endstr, type)) {
01266 rb_raise(rb_eArgError, "'%c' allowed only after types %s", *p, endstr);
01267 }
01268 if (explicit_endian) {
01269 rb_raise(rb_eRangeError, "Can't use both '<' and '>'");
01270 }
01271 explicit_endian = *p++;
01272 goto modifiers;
01273 }
01274 }
01275
01276 if (p >= pend)
01277 len = 1;
01278 else if (*p == '*') {
01279 star = 1;
01280 len = send - s;
01281 p++;
01282 }
01283 else if (ISDIGIT(*p)) {
01284 errno = 0;
01285 len = STRTOUL(p, (char**)&p, 10);
01286 if (errno) {
01287 rb_raise(rb_eRangeError, "pack length too big");
01288 }
01289 }
01290 else {
01291 len = (type != '@');
01292 }
01293
01294 switch (type) {
01295 case '%':
01296 rb_raise(rb_eArgError, "%% is not supported");
01297 break;
01298
01299 case 'A':
01300 if (len > send - s) len = send - s;
01301 {
01302 long end = len;
01303 char *t = s + len - 1;
01304
01305 while (t >= s) {
01306 if (*t != ' ' && *t != '\0') break;
01307 t--; len--;
01308 }
01309 UNPACK_PUSH(infected_str_new(s, len, str));
01310 s += end;
01311 }
01312 break;
01313
01314 case 'Z':
01315 {
01316 char *t = s;
01317
01318 if (len > send-s) len = send-s;
01319 while (t < s+len && *t) t++;
01320 UNPACK_PUSH(infected_str_new(s, t-s, str));
01321 if (t < send) t++;
01322 s = star ? t : s+len;
01323 }
01324 break;
01325
01326 case 'a':
01327 if (len > send - s) len = send - s;
01328 UNPACK_PUSH(infected_str_new(s, len, str));
01329 s += len;
01330 break;
01331
01332 case 'b':
01333 {
01334 VALUE bitstr;
01335 char *t;
01336 int bits;
01337 long i;
01338
01339 if (p[-1] == '*' || len > (send - s) * 8)
01340 len = (send - s) * 8;
01341 bits = 0;
01342 UNPACK_PUSH(bitstr = rb_usascii_str_new(0, len));
01343 t = RSTRING_PTR(bitstr);
01344 for (i=0; i<len; i++) {
01345 if (i & 7) bits >>= 1;
01346 else bits = *s++;
01347 *t++ = (bits & 1) ? '1' : '0';
01348 }
01349 }
01350 break;
01351
01352 case 'B':
01353 {
01354 VALUE bitstr;
01355 char *t;
01356 int bits;
01357 long i;
01358
01359 if (p[-1] == '*' || len > (send - s) * 8)
01360 len = (send - s) * 8;
01361 bits = 0;
01362 UNPACK_PUSH(bitstr = rb_usascii_str_new(0, len));
01363 t = RSTRING_PTR(bitstr);
01364 for (i=0; i<len; i++) {
01365 if (i & 7) bits <<= 1;
01366 else bits = *s++;
01367 *t++ = (bits & 128) ? '1' : '0';
01368 }
01369 }
01370 break;
01371
01372 case 'h':
01373 {
01374 VALUE bitstr;
01375 char *t;
01376 int bits;
01377 long i;
01378
01379 if (p[-1] == '*' || len > (send - s) * 2)
01380 len = (send - s) * 2;
01381 bits = 0;
01382 UNPACK_PUSH(bitstr = rb_usascii_str_new(0, len));
01383 t = RSTRING_PTR(bitstr);
01384 for (i=0; i<len; i++) {
01385 if (i & 1)
01386 bits >>= 4;
01387 else
01388 bits = *s++;
01389 *t++ = hexdigits[bits & 15];
01390 }
01391 }
01392 break;
01393
01394 case 'H':
01395 {
01396 VALUE bitstr;
01397 char *t;
01398 int bits;
01399 long i;
01400
01401 if (p[-1] == '*' || len > (send - s) * 2)
01402 len = (send - s) * 2;
01403 bits = 0;
01404 UNPACK_PUSH(bitstr = rb_usascii_str_new(0, len));
01405 t = RSTRING_PTR(bitstr);
01406 for (i=0; i<len; i++) {
01407 if (i & 1)
01408 bits <<= 4;
01409 else
01410 bits = *s++;
01411 *t++ = hexdigits[(bits >> 4) & 15];
01412 }
01413 }
01414 break;
01415
01416 case 'c':
01417 signed_p = 1;
01418 integer_size = 1;
01419 bigendian_p = BIGENDIAN_P();
01420 goto unpack_integer;
01421
01422 case 'C':
01423 signed_p = 0;
01424 integer_size = 1;
01425 bigendian_p = BIGENDIAN_P();
01426 goto unpack_integer;
01427
01428 case 's':
01429 signed_p = 1;
01430 integer_size = NATINT_LEN(short, 2);
01431 bigendian_p = BIGENDIAN_P();
01432 goto unpack_integer;
01433
01434 case 'S':
01435 signed_p = 0;
01436 integer_size = NATINT_LEN(short, 2);
01437 bigendian_p = BIGENDIAN_P();
01438 goto unpack_integer;
01439
01440 case 'i':
01441 signed_p = 1;
01442 integer_size = (int)sizeof(int);
01443 bigendian_p = BIGENDIAN_P();
01444 goto unpack_integer;
01445
01446 case 'I':
01447 signed_p = 0;
01448 integer_size = (int)sizeof(int);
01449 bigendian_p = BIGENDIAN_P();
01450 goto unpack_integer;
01451
01452 case 'l':
01453 signed_p = 1;
01454 integer_size = NATINT_LEN(long, 4);
01455 bigendian_p = BIGENDIAN_P();
01456 goto unpack_integer;
01457
01458 case 'L':
01459 signed_p = 0;
01460 integer_size = NATINT_LEN(long, 4);
01461 bigendian_p = BIGENDIAN_P();
01462 goto unpack_integer;
01463
01464 case 'q':
01465 signed_p = 1;
01466 integer_size = NATINT_LEN_Q;
01467 bigendian_p = BIGENDIAN_P();
01468 goto unpack_integer;
01469
01470 case 'Q':
01471 signed_p = 0;
01472 integer_size = NATINT_LEN_Q;
01473 bigendian_p = BIGENDIAN_P();
01474 goto unpack_integer;
01475
01476 case 'n':
01477 signed_p = 0;
01478 integer_size = 2;
01479 bigendian_p = 1;
01480 goto unpack_integer;
01481
01482 case 'N':
01483 signed_p = 0;
01484 integer_size = 4;
01485 bigendian_p = 1;
01486 goto unpack_integer;
01487
01488 case 'v':
01489 signed_p = 0;
01490 integer_size = 2;
01491 bigendian_p = 0;
01492 goto unpack_integer;
01493
01494 case 'V':
01495 signed_p = 0;
01496 integer_size = 4;
01497 bigendian_p = 0;
01498 goto unpack_integer;
01499
01500 unpack_integer:
01501 if (explicit_endian) {
01502 bigendian_p = explicit_endian == '>';
01503 }
01504 PACK_LENGTH_ADJUST_SIZE(integer_size);
01505 while (len-- > 0) {
01506 int flags = bigendian_p ? INTEGER_PACK_BIG_ENDIAN : INTEGER_PACK_LITTLE_ENDIAN;
01507 VALUE val;
01508 if (signed_p)
01509 flags |= INTEGER_PACK_2COMP;
01510 val = rb_integer_unpack(s, integer_size, 1, 0, flags);
01511 UNPACK_PUSH(val);
01512 s += integer_size;
01513 }
01514 PACK_ITEM_ADJUST();
01515 break;
01516
01517 case 'f':
01518 case 'F':
01519 PACK_LENGTH_ADJUST_SIZE(sizeof(float));
01520 while (len-- > 0) {
01521 float tmp;
01522 memcpy(&tmp, s, sizeof(float));
01523 s += sizeof(float);
01524 UNPACK_PUSH(DBL2NUM((double)tmp));
01525 }
01526 PACK_ITEM_ADJUST();
01527 break;
01528
01529 case 'e':
01530 PACK_LENGTH_ADJUST_SIZE(sizeof(float));
01531 while (len-- > 0) {
01532 float tmp;
01533 FLOAT_CONVWITH(ftmp);
01534
01535 memcpy(&tmp, s, sizeof(float));
01536 s += sizeof(float);
01537 tmp = VTOHF(tmp,ftmp);
01538 UNPACK_PUSH(DBL2NUM((double)tmp));
01539 }
01540 PACK_ITEM_ADJUST();
01541 break;
01542
01543 case 'E':
01544 PACK_LENGTH_ADJUST_SIZE(sizeof(double));
01545 while (len-- > 0) {
01546 double tmp;
01547 DOUBLE_CONVWITH(dtmp);
01548
01549 memcpy(&tmp, s, sizeof(double));
01550 s += sizeof(double);
01551 tmp = VTOHD(tmp,dtmp);
01552 UNPACK_PUSH(DBL2NUM(tmp));
01553 }
01554 PACK_ITEM_ADJUST();
01555 break;
01556
01557 case 'D':
01558 case 'd':
01559 PACK_LENGTH_ADJUST_SIZE(sizeof(double));
01560 while (len-- > 0) {
01561 double tmp;
01562 memcpy(&tmp, s, sizeof(double));
01563 s += sizeof(double);
01564 UNPACK_PUSH(DBL2NUM(tmp));
01565 }
01566 PACK_ITEM_ADJUST();
01567 break;
01568
01569 case 'g':
01570 PACK_LENGTH_ADJUST_SIZE(sizeof(float));
01571 while (len-- > 0) {
01572 float tmp;
01573 FLOAT_CONVWITH(ftmp);
01574
01575 memcpy(&tmp, s, sizeof(float));
01576 s += sizeof(float);
01577 tmp = NTOHF(tmp,ftmp);
01578 UNPACK_PUSH(DBL2NUM((double)tmp));
01579 }
01580 PACK_ITEM_ADJUST();
01581 break;
01582
01583 case 'G':
01584 PACK_LENGTH_ADJUST_SIZE(sizeof(double));
01585 while (len-- > 0) {
01586 double tmp;
01587 DOUBLE_CONVWITH(dtmp);
01588
01589 memcpy(&tmp, s, sizeof(double));
01590 s += sizeof(double);
01591 tmp = NTOHD(tmp,dtmp);
01592 UNPACK_PUSH(DBL2NUM(tmp));
01593 }
01594 PACK_ITEM_ADJUST();
01595 break;
01596
01597 case 'U':
01598 if (len > send - s) len = send - s;
01599 while (len > 0 && s < send) {
01600 long alen = send - s;
01601 unsigned long l;
01602
01603 l = utf8_to_uv(s, &alen);
01604 s += alen; len--;
01605 UNPACK_PUSH(ULONG2NUM(l));
01606 }
01607 break;
01608
01609 case 'u':
01610 {
01611 VALUE buf = infected_str_new(0, (send - s)*3/4, str);
01612 char *ptr = RSTRING_PTR(buf);
01613 long total = 0;
01614
01615 while (s < send && *s > ' ' && *s < 'a') {
01616 long a,b,c,d;
01617 char hunk[4];
01618
01619 hunk[3] = '\0';
01620 len = (*s++ - ' ') & 077;
01621 total += len;
01622 if (total > RSTRING_LEN(buf)) {
01623 len -= total - RSTRING_LEN(buf);
01624 total = RSTRING_LEN(buf);
01625 }
01626
01627 while (len > 0) {
01628 long mlen = len > 3 ? 3 : len;
01629
01630 if (s < send && *s >= ' ')
01631 a = (*s++ - ' ') & 077;
01632 else
01633 a = 0;
01634 if (s < send && *s >= ' ')
01635 b = (*s++ - ' ') & 077;
01636 else
01637 b = 0;
01638 if (s < send && *s >= ' ')
01639 c = (*s++ - ' ') & 077;
01640 else
01641 c = 0;
01642 if (s < send && *s >= ' ')
01643 d = (*s++ - ' ') & 077;
01644 else
01645 d = 0;
01646 hunk[0] = (char)(a << 2 | b >> 4);
01647 hunk[1] = (char)(b << 4 | c >> 2);
01648 hunk[2] = (char)(c << 6 | d);
01649 memcpy(ptr, hunk, mlen);
01650 ptr += mlen;
01651 len -= mlen;
01652 }
01653 if (*s == '\r') s++;
01654 if (*s == '\n') s++;
01655 else if (s < send && (s+1 == send || s[1] == '\n'))
01656 s += 2;
01657 }
01658
01659 rb_str_set_len(buf, total);
01660 UNPACK_PUSH(buf);
01661 }
01662 break;
01663
01664 case 'm':
01665 {
01666 VALUE buf = infected_str_new(0, (send - s + 3)*3/4, str);
01667 char *ptr = RSTRING_PTR(buf);
01668 int a = -1,b = -1,c = 0,d = 0;
01669 static signed char b64_xtable[256];
01670
01671 if (b64_xtable['/'] <= 0) {
01672 int i;
01673
01674 for (i = 0; i < 256; i++) {
01675 b64_xtable[i] = -1;
01676 }
01677 for (i = 0; i < 64; i++) {
01678 b64_xtable[(unsigned char)b64_table[i]] = (char)i;
01679 }
01680 }
01681 if (len == 0) {
01682 while (s < send) {
01683 a = b = c = d = -1;
01684 a = b64_xtable[(unsigned char)*s++];
01685 if (s >= send || a == -1) rb_raise(rb_eArgError, "invalid base64");
01686 b = b64_xtable[(unsigned char)*s++];
01687 if (s >= send || b == -1) rb_raise(rb_eArgError, "invalid base64");
01688 if (*s == '=') {
01689 if (s + 2 == send && *(s + 1) == '=') break;
01690 rb_raise(rb_eArgError, "invalid base64");
01691 }
01692 c = b64_xtable[(unsigned char)*s++];
01693 if (s >= send || c == -1) rb_raise(rb_eArgError, "invalid base64");
01694 if (s + 1 == send && *s == '=') break;
01695 d = b64_xtable[(unsigned char)*s++];
01696 if (d == -1) rb_raise(rb_eArgError, "invalid base64");
01697 *ptr++ = castchar(a << 2 | b >> 4);
01698 *ptr++ = castchar(b << 4 | c >> 2);
01699 *ptr++ = castchar(c << 6 | d);
01700 }
01701 if (c == -1) {
01702 *ptr++ = castchar(a << 2 | b >> 4);
01703 if (b & 0xf) rb_raise(rb_eArgError, "invalid base64");
01704 }
01705 else if (d == -1) {
01706 *ptr++ = castchar(a << 2 | b >> 4);
01707 *ptr++ = castchar(b << 4 | c >> 2);
01708 if (c & 0x3) rb_raise(rb_eArgError, "invalid base64");
01709 }
01710 }
01711 else {
01712 while (s < send) {
01713 a = b = c = d = -1;
01714 while ((a = b64_xtable[(unsigned char)*s]) == -1 && s < send) {s++;}
01715 if (s >= send) break;
01716 s++;
01717 while ((b = b64_xtable[(unsigned char)*s]) == -1 && s < send) {s++;}
01718 if (s >= send) break;
01719 s++;
01720 while ((c = b64_xtable[(unsigned char)*s]) == -1 && s < send) {if (*s == '=') break; s++;}
01721 if (*s == '=' || s >= send) break;
01722 s++;
01723 while ((d = b64_xtable[(unsigned char)*s]) == -1 && s < send) {if (*s == '=') break; s++;}
01724 if (*s == '=' || s >= send) break;
01725 s++;
01726 *ptr++ = castchar(a << 2 | b >> 4);
01727 *ptr++ = castchar(b << 4 | c >> 2);
01728 *ptr++ = castchar(c << 6 | d);
01729 a = -1;
01730 }
01731 if (a != -1 && b != -1) {
01732 if (c == -1)
01733 *ptr++ = castchar(a << 2 | b >> 4);
01734 else {
01735 *ptr++ = castchar(a << 2 | b >> 4);
01736 *ptr++ = castchar(b << 4 | c >> 2);
01737 }
01738 }
01739 }
01740 rb_str_set_len(buf, ptr - RSTRING_PTR(buf));
01741 UNPACK_PUSH(buf);
01742 }
01743 break;
01744
01745 case 'M':
01746 {
01747 VALUE buf = infected_str_new(0, send - s, str);
01748 char *ptr = RSTRING_PTR(buf), *ss = s;
01749 int c1, c2;
01750
01751 while (s < send) {
01752 if (*s == '=') {
01753 if (++s == send) break;
01754 if (s+1 < send && *s == '\r' && *(s+1) == '\n')
01755 s++;
01756 if (*s != '\n') {
01757 if ((c1 = hex2num(*s)) == -1) break;
01758 if (++s == send) break;
01759 if ((c2 = hex2num(*s)) == -1) break;
01760 *ptr++ = castchar(c1 << 4 | c2);
01761 }
01762 }
01763 else {
01764 *ptr++ = *s;
01765 }
01766 s++;
01767 ss = s;
01768 }
01769 rb_str_set_len(buf, ptr - RSTRING_PTR(buf));
01770 rb_str_buf_cat(buf, ss, send-ss);
01771 ENCODING_CODERANGE_SET(buf, rb_ascii8bit_encindex(), ENC_CODERANGE_VALID);
01772 UNPACK_PUSH(buf);
01773 }
01774 break;
01775
01776 case '@':
01777 if (len > RSTRING_LEN(str))
01778 rb_raise(rb_eArgError, "@ outside of string");
01779 s = RSTRING_PTR(str) + len;
01780 break;
01781
01782 case 'X':
01783 if (len > s - RSTRING_PTR(str))
01784 rb_raise(rb_eArgError, "X outside of string");
01785 s -= len;
01786 break;
01787
01788 case 'x':
01789 if (len > send - s)
01790 rb_raise(rb_eArgError, "x outside of string");
01791 s += len;
01792 break;
01793
01794 case 'P':
01795 if (sizeof(char *) <= (size_t)(send - s)) {
01796 VALUE tmp = Qnil;
01797 char *t;
01798
01799 memcpy(&t, s, sizeof(char *));
01800 s += sizeof(char *);
01801
01802 if (t) {
01803 VALUE a;
01804 const VALUE *p, *pend;
01805
01806 if (!(a = rb_str_associated(str))) {
01807 rb_raise(rb_eArgError, "no associated pointer");
01808 }
01809 p = RARRAY_CONST_PTR(a);
01810 pend = p + RARRAY_LEN(a);
01811 while (p < pend) {
01812 if (RB_TYPE_P(*p, T_STRING) && RSTRING_PTR(*p) == t) {
01813 if (len < RSTRING_LEN(*p)) {
01814 tmp = rb_tainted_str_new(t, len);
01815 rb_str_associate(tmp, a);
01816 }
01817 else {
01818 tmp = *p;
01819 }
01820 break;
01821 }
01822 p++;
01823 }
01824 if (p == pend) {
01825 rb_raise(rb_eArgError, "non associated pointer");
01826 }
01827 }
01828 UNPACK_PUSH(tmp);
01829 }
01830 break;
01831
01832 case 'p':
01833 if (len > (long)((send - s) / sizeof(char *)))
01834 len = (send - s) / sizeof(char *);
01835 while (len-- > 0) {
01836 if ((size_t)(send - s) < sizeof(char *))
01837 break;
01838 else {
01839 VALUE tmp = Qnil;
01840 char *t;
01841
01842 memcpy(&t, s, sizeof(char *));
01843 s += sizeof(char *);
01844
01845 if (t) {
01846 VALUE a;
01847 const VALUE *p, *pend;
01848
01849 if (!(a = rb_str_associated(str))) {
01850 rb_raise(rb_eArgError, "no associated pointer");
01851 }
01852 p = RARRAY_CONST_PTR(a);
01853 pend = p + RARRAY_LEN(a);
01854 while (p < pend) {
01855 if (RB_TYPE_P(*p, T_STRING) && RSTRING_PTR(*p) == t) {
01856 tmp = *p;
01857 break;
01858 }
01859 p++;
01860 }
01861 if (p == pend) {
01862 rb_raise(rb_eArgError, "non associated pointer");
01863 }
01864 }
01865 UNPACK_PUSH(tmp);
01866 }
01867 }
01868 break;
01869
01870 case 'w':
01871 {
01872 char *s0 = s;
01873 while (len > 0 && s < send) {
01874 if (*s & 0x80) {
01875 s++;
01876 }
01877 else {
01878 s++;
01879 UNPACK_PUSH(rb_integer_unpack(s0, s-s0, 1, 1, INTEGER_PACK_BIG_ENDIAN));
01880 len--;
01881 s0 = s;
01882 }
01883 }
01884 }
01885 break;
01886
01887 default:
01888 rb_warning("unknown unpack directive '%c' in '%s'",
01889 type, RSTRING_PTR(fmt));
01890 break;
01891 }
01892 }
01893
01894 return ary;
01895 }
01896
01897 #define BYTEWIDTH 8
01898
01899 int
01900 rb_uv_to_utf8(char buf[6], unsigned long uv)
01901 {
01902 if (uv <= 0x7f) {
01903 buf[0] = (char)uv;
01904 return 1;
01905 }
01906 if (uv <= 0x7ff) {
01907 buf[0] = castchar(((uv>>6)&0xff)|0xc0);
01908 buf[1] = castchar((uv&0x3f)|0x80);
01909 return 2;
01910 }
01911 if (uv <= 0xffff) {
01912 buf[0] = castchar(((uv>>12)&0xff)|0xe0);
01913 buf[1] = castchar(((uv>>6)&0x3f)|0x80);
01914 buf[2] = castchar((uv&0x3f)|0x80);
01915 return 3;
01916 }
01917 if (uv <= 0x1fffff) {
01918 buf[0] = castchar(((uv>>18)&0xff)|0xf0);
01919 buf[1] = castchar(((uv>>12)&0x3f)|0x80);
01920 buf[2] = castchar(((uv>>6)&0x3f)|0x80);
01921 buf[3] = castchar((uv&0x3f)|0x80);
01922 return 4;
01923 }
01924 if (uv <= 0x3ffffff) {
01925 buf[0] = castchar(((uv>>24)&0xff)|0xf8);
01926 buf[1] = castchar(((uv>>18)&0x3f)|0x80);
01927 buf[2] = castchar(((uv>>12)&0x3f)|0x80);
01928 buf[3] = castchar(((uv>>6)&0x3f)|0x80);
01929 buf[4] = castchar((uv&0x3f)|0x80);
01930 return 5;
01931 }
01932 if (uv <= 0x7fffffff) {
01933 buf[0] = castchar(((uv>>30)&0xff)|0xfc);
01934 buf[1] = castchar(((uv>>24)&0x3f)|0x80);
01935 buf[2] = castchar(((uv>>18)&0x3f)|0x80);
01936 buf[3] = castchar(((uv>>12)&0x3f)|0x80);
01937 buf[4] = castchar(((uv>>6)&0x3f)|0x80);
01938 buf[5] = castchar((uv&0x3f)|0x80);
01939 return 6;
01940 }
01941 rb_raise(rb_eRangeError, "pack(U): value out of range");
01942
01943 UNREACHABLE;
01944 }
01945
01946 static const unsigned long utf8_limits[] = {
01947 0x0,
01948 0x80,
01949 0x800,
01950 0x10000,
01951 0x200000,
01952 0x4000000,
01953 0x80000000,
01954 };
01955
01956 static unsigned long
01957 utf8_to_uv(const char *p, long *lenp)
01958 {
01959 int c = *p++ & 0xff;
01960 unsigned long uv = c;
01961 long n;
01962
01963 if (!(uv & 0x80)) {
01964 *lenp = 1;
01965 return uv;
01966 }
01967 if (!(uv & 0x40)) {
01968 *lenp = 1;
01969 rb_raise(rb_eArgError, "malformed UTF-8 character");
01970 }
01971
01972 if (!(uv & 0x20)) { n = 2; uv &= 0x1f; }
01973 else if (!(uv & 0x10)) { n = 3; uv &= 0x0f; }
01974 else if (!(uv & 0x08)) { n = 4; uv &= 0x07; }
01975 else if (!(uv & 0x04)) { n = 5; uv &= 0x03; }
01976 else if (!(uv & 0x02)) { n = 6; uv &= 0x01; }
01977 else {
01978 *lenp = 1;
01979 rb_raise(rb_eArgError, "malformed UTF-8 character");
01980 }
01981 if (n > *lenp) {
01982 rb_raise(rb_eArgError, "malformed UTF-8 character (expected %ld bytes, given %ld bytes)",
01983 n, *lenp);
01984 }
01985 *lenp = n--;
01986 if (n != 0) {
01987 while (n--) {
01988 c = *p++ & 0xff;
01989 if ((c & 0xc0) != 0x80) {
01990 *lenp -= n + 1;
01991 rb_raise(rb_eArgError, "malformed UTF-8 character");
01992 }
01993 else {
01994 c &= 0x3f;
01995 uv = uv << 6 | c;
01996 }
01997 }
01998 }
01999 n = *lenp - 1;
02000 if (uv < utf8_limits[n]) {
02001 rb_raise(rb_eArgError, "redundant UTF-8 sequence");
02002 }
02003 return uv;
02004 }
02005
02006 void
02007 Init_pack(void)
02008 {
02009 rb_define_method(rb_cArray, "pack", pack_pack, 1);
02010 rb_define_method(rb_cString, "unpack", pack_unpack, 1);
02011 }
02012