00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012 #include "ruby/ruby.h"
00013 #include "ruby/io.h"
00014 #include "ruby/st.h"
00015 #include "ruby/util.h"
00016 #include "ruby/encoding.h"
00017 #include "internal.h"
00018
00019 #include <math.h>
00020 #ifdef HAVE_FLOAT_H
00021 #include <float.h>
00022 #endif
00023 #ifdef HAVE_IEEEFP_H
00024 #include <ieeefp.h>
00025 #endif
00026
00027 #define BITSPERSHORT (2*CHAR_BIT)
00028 #define SHORTMASK ((1<<BITSPERSHORT)-1)
00029 #define SHORTDN(x) RSHIFT((x),BITSPERSHORT)
00030
00031 #if SIZEOF_SHORT == SIZEOF_BDIGITS
00032 #define SHORTLEN(x) (x)
00033 #else
00034 static long
00035 shortlen(long len, BDIGIT *ds)
00036 {
00037 BDIGIT num;
00038 int offset = 0;
00039
00040 num = ds[len-1];
00041 while (num) {
00042 num = SHORTDN(num);
00043 offset++;
00044 }
00045 return (len - 1)*sizeof(BDIGIT)/2 + offset;
00046 }
00047 #define SHORTLEN(x) shortlen((x),d)
00048 #endif
00049
00050 #define MARSHAL_MAJOR 4
00051 #define MARSHAL_MINOR 8
00052
00053 #define TYPE_NIL '0'
00054 #define TYPE_TRUE 'T'
00055 #define TYPE_FALSE 'F'
00056 #define TYPE_FIXNUM 'i'
00057
00058 #define TYPE_EXTENDED 'e'
00059 #define TYPE_UCLASS 'C'
00060 #define TYPE_OBJECT 'o'
00061 #define TYPE_DATA 'd'
00062 #define TYPE_USERDEF 'u'
00063 #define TYPE_USRMARSHAL 'U'
00064 #define TYPE_FLOAT 'f'
00065 #define TYPE_BIGNUM 'l'
00066 #define TYPE_STRING '"'
00067 #define TYPE_REGEXP '/'
00068 #define TYPE_ARRAY '['
00069 #define TYPE_HASH '{'
00070 #define TYPE_HASH_DEF '}'
00071 #define TYPE_STRUCT 'S'
00072 #define TYPE_MODULE_OLD 'M'
00073 #define TYPE_CLASS 'c'
00074 #define TYPE_MODULE 'm'
00075
00076 #define TYPE_SYMBOL ':'
00077 #define TYPE_SYMLINK ';'
00078
00079 #define TYPE_IVAR 'I'
00080 #define TYPE_LINK '@'
00081
00082 static ID s_dump, s_load, s_mdump, s_mload;
00083 static ID s_dump_data, s_load_data, s_alloc, s_call;
00084 static ID s_getbyte, s_read, s_write, s_binmode;
00085
00086 typedef struct {
00087 VALUE newclass;
00088 VALUE oldclass;
00089 VALUE (*dumper)(VALUE);
00090 VALUE (*loader)(VALUE, VALUE);
00091 } marshal_compat_t;
00092
00093 static st_table *compat_allocator_tbl;
00094 static VALUE compat_allocator_tbl_wrapper;
00095
00096 static int
00097 mark_marshal_compat_i(st_data_t key, st_data_t value)
00098 {
00099 marshal_compat_t *p = (marshal_compat_t *)value;
00100 rb_gc_mark(p->newclass);
00101 rb_gc_mark(p->oldclass);
00102 return ST_CONTINUE;
00103 }
00104
00105 static void
00106 mark_marshal_compat_t(void *tbl)
00107 {
00108 if (!tbl) return;
00109 st_foreach(tbl, mark_marshal_compat_i, 0);
00110 }
00111
00112 void
00113 rb_marshal_define_compat(VALUE newclass, VALUE oldclass, VALUE (*dumper)(VALUE), VALUE (*loader)(VALUE, VALUE))
00114 {
00115 marshal_compat_t *compat;
00116 rb_alloc_func_t allocator = rb_get_alloc_func(newclass);
00117
00118 if (!allocator) {
00119 rb_raise(rb_eTypeError, "no allocator");
00120 }
00121
00122 compat = ALLOC(marshal_compat_t);
00123 compat->newclass = Qnil;
00124 compat->oldclass = Qnil;
00125 compat->newclass = newclass;
00126 compat->oldclass = oldclass;
00127 compat->dumper = dumper;
00128 compat->loader = loader;
00129
00130 st_insert(compat_allocator_tbl, (st_data_t)allocator, (st_data_t)compat);
00131 }
00132
00133 #define MARSHAL_INFECTION (FL_TAINT|FL_UNTRUSTED)
00134 typedef char ruby_check_marshal_viral_flags[MARSHAL_INFECTION == (int)MARSHAL_INFECTION ? 1 : -1];
00135
00136 struct dump_arg {
00137 VALUE str, dest;
00138 st_table *symbols;
00139 st_table *data;
00140 st_table *compat_tbl;
00141 st_table *encodings;
00142 int infection;
00143 };
00144
00145 struct dump_call_arg {
00146 VALUE obj;
00147 struct dump_arg *arg;
00148 int limit;
00149 };
00150
00151 static void
00152 check_dump_arg(struct dump_arg *arg, ID sym)
00153 {
00154 if (!arg->symbols) {
00155 rb_raise(rb_eRuntimeError, "Marshal.dump reentered at %s",
00156 rb_id2name(sym));
00157 }
00158 }
00159
00160 static void clear_dump_arg(struct dump_arg *arg);
00161
00162 static void
00163 mark_dump_arg(void *ptr)
00164 {
00165 struct dump_arg *p = ptr;
00166 if (!p->symbols)
00167 return;
00168 rb_mark_set(p->data);
00169 rb_mark_hash(p->compat_tbl);
00170 rb_gc_mark(p->str);
00171 }
00172
00173 static void
00174 free_dump_arg(void *ptr)
00175 {
00176 clear_dump_arg(ptr);
00177 xfree(ptr);
00178 }
00179
00180 static size_t
00181 memsize_dump_arg(const void *ptr)
00182 {
00183 return ptr ? sizeof(struct dump_arg) : 0;
00184 }
00185
00186 static const rb_data_type_t dump_arg_data = {
00187 "dump_arg",
00188 {mark_dump_arg, free_dump_arg, memsize_dump_arg,},
00189 };
00190
00191 static const char *
00192 must_not_be_anonymous(const char *type, VALUE path)
00193 {
00194 char *n = RSTRING_PTR(path);
00195
00196 if (!rb_enc_asciicompat(rb_enc_get(path))) {
00197
00198 rb_raise(rb_eTypeError, "can't dump non-ascii %s name", type);
00199 }
00200 if (n[0] == '#') {
00201 rb_raise(rb_eTypeError, "can't dump anonymous %s %.*s", type,
00202 (int)RSTRING_LEN(path), n);
00203 }
00204 return n;
00205 }
00206
00207 static VALUE
00208 class2path(VALUE klass)
00209 {
00210 VALUE path = rb_class_path(klass);
00211 const char *n;
00212
00213 n = must_not_be_anonymous((RB_TYPE_P(klass, T_CLASS) ? "class" : "module"), path);
00214 if (rb_path_to_class(path) != rb_class_real(klass)) {
00215 rb_raise(rb_eTypeError, "%s can't be referred to", n);
00216 }
00217 return path;
00218 }
00219
00220 static void w_long(long, struct dump_arg*);
00221 static void w_encoding(VALUE obj, long num, struct dump_call_arg *arg);
00222
00223 static void
00224 w_nbyte(const char *s, long n, struct dump_arg *arg)
00225 {
00226 VALUE buf = arg->str;
00227 rb_str_buf_cat(buf, s, n);
00228 RBASIC(buf)->flags |= arg->infection;
00229 if (arg->dest && RSTRING_LEN(buf) >= BUFSIZ) {
00230 rb_io_write(arg->dest, buf);
00231 rb_str_resize(buf, 0);
00232 }
00233 }
00234
00235 static void
00236 w_byte(char c, struct dump_arg *arg)
00237 {
00238 w_nbyte(&c, 1, arg);
00239 }
00240
00241 static void
00242 w_bytes(const char *s, long n, struct dump_arg *arg)
00243 {
00244 w_long(n, arg);
00245 w_nbyte(s, n, arg);
00246 }
00247
00248 #define w_cstr(s, arg) w_bytes((s), strlen(s), (arg))
00249
00250 static void
00251 w_short(int x, struct dump_arg *arg)
00252 {
00253 w_byte((char)((x >> 0) & 0xff), arg);
00254 w_byte((char)((x >> 8) & 0xff), arg);
00255 }
00256
00257 static void
00258 w_long(long x, struct dump_arg *arg)
00259 {
00260 char buf[sizeof(long)+1];
00261 int i, len = 0;
00262
00263 #if SIZEOF_LONG > 4
00264 if (!(RSHIFT(x, 31) == 0 || RSHIFT(x, 31) == -1)) {
00265
00266 rb_raise(rb_eTypeError, "long too big to dump");
00267 }
00268 #endif
00269
00270 if (x == 0) {
00271 w_byte(0, arg);
00272 return;
00273 }
00274 if (0 < x && x < 123) {
00275 w_byte((char)(x + 5), arg);
00276 return;
00277 }
00278 if (-124 < x && x < 0) {
00279 w_byte((char)((x - 5)&0xff), arg);
00280 return;
00281 }
00282 for (i=1;i<(int)sizeof(long)+1;i++) {
00283 buf[i] = (char)(x & 0xff);
00284 x = RSHIFT(x,8);
00285 if (x == 0) {
00286 buf[0] = i;
00287 break;
00288 }
00289 if (x == -1) {
00290 buf[0] = -i;
00291 break;
00292 }
00293 }
00294 len = i;
00295 for (i=0;i<=len;i++) {
00296 w_byte(buf[i], arg);
00297 }
00298 }
00299
00300 #ifdef DBL_MANT_DIG
00301 #define DECIMAL_MANT (53-16)
00302
00303 #if DBL_MANT_DIG > 32
00304 #define MANT_BITS 32
00305 #elif DBL_MANT_DIG > 24
00306 #define MANT_BITS 24
00307 #elif DBL_MANT_DIG > 16
00308 #define MANT_BITS 16
00309 #else
00310 #define MANT_BITS 8
00311 #endif
00312
00313 static double
00314 load_mantissa(double d, const char *buf, long len)
00315 {
00316 if (!len) return d;
00317 if (--len > 0 && !*buf++) {
00318 int e, s = d < 0, dig = 0;
00319 unsigned long m;
00320
00321 modf(ldexp(frexp(fabs(d), &e), DECIMAL_MANT), &d);
00322 do {
00323 m = 0;
00324 switch (len) {
00325 default: m = *buf++ & 0xff;
00326 #if MANT_BITS > 24
00327 case 3: m = (m << 8) | (*buf++ & 0xff);
00328 #endif
00329 #if MANT_BITS > 16
00330 case 2: m = (m << 8) | (*buf++ & 0xff);
00331 #endif
00332 #if MANT_BITS > 8
00333 case 1: m = (m << 8) | (*buf++ & 0xff);
00334 #endif
00335 }
00336 dig -= len < MANT_BITS / 8 ? 8 * (unsigned)len : MANT_BITS;
00337 d += ldexp((double)m, dig);
00338 } while ((len -= MANT_BITS / 8) > 0);
00339 d = ldexp(d, e - DECIMAL_MANT);
00340 if (s) d = -d;
00341 }
00342 return d;
00343 }
00344 #else
00345 #define load_mantissa(d, buf, len) (d)
00346 #endif
00347
00348 #ifdef DBL_DIG
00349 #define FLOAT_DIG (DBL_DIG+2)
00350 #else
00351 #define FLOAT_DIG 17
00352 #endif
00353
00354 static void
00355 w_float(double d, struct dump_arg *arg)
00356 {
00357 char *ruby_dtoa(double d_, int mode, int ndigits, int *decpt, int *sign, char **rve);
00358 char buf[FLOAT_DIG + (DECIMAL_MANT + 7) / 8 + 10];
00359
00360 if (isinf(d)) {
00361 if (d < 0) w_cstr("-inf", arg);
00362 else w_cstr("inf", arg);
00363 }
00364 else if (isnan(d)) {
00365 w_cstr("nan", arg);
00366 }
00367 else if (d == 0.0) {
00368 if (1.0/d < 0) w_cstr("-0", arg);
00369 else w_cstr("0", arg);
00370 }
00371 else {
00372 int decpt, sign, digs, len = 0;
00373 char *e, *p = ruby_dtoa(d, 0, 0, &decpt, &sign, &e);
00374 if (sign) buf[len++] = '-';
00375 digs = (int)(e - p);
00376 if (decpt < -3 || decpt > digs) {
00377 buf[len++] = p[0];
00378 if (--digs > 0) buf[len++] = '.';
00379 memcpy(buf + len, p + 1, digs);
00380 len += digs;
00381 len += snprintf(buf + len, sizeof(buf) - len, "e%d", decpt - 1);
00382 }
00383 else if (decpt > 0) {
00384 memcpy(buf + len, p, decpt);
00385 len += decpt;
00386 if ((digs -= decpt) > 0) {
00387 buf[len++] = '.';
00388 memcpy(buf + len, p + decpt, digs);
00389 len += digs;
00390 }
00391 }
00392 else {
00393 buf[len++] = '0';
00394 buf[len++] = '.';
00395 if (decpt) {
00396 memset(buf + len, '0', -decpt);
00397 len -= decpt;
00398 }
00399 memcpy(buf + len, p, digs);
00400 len += digs;
00401 }
00402 xfree(p);
00403 w_bytes(buf, len, arg);
00404 }
00405 }
00406
00407 static void
00408 w_symbol(ID id, struct dump_arg *arg)
00409 {
00410 VALUE sym;
00411 st_data_t num;
00412 int encidx = -1;
00413
00414 if (st_lookup(arg->symbols, id, &num)) {
00415 w_byte(TYPE_SYMLINK, arg);
00416 w_long((long)num, arg);
00417 }
00418 else {
00419 sym = rb_id2str(id);
00420 if (!sym) {
00421 rb_raise(rb_eTypeError, "can't dump anonymous ID %"PRIdVALUE, id);
00422 }
00423 encidx = rb_enc_get_index(sym);
00424 if (encidx == rb_usascii_encindex() ||
00425 rb_enc_str_coderange(sym) == ENC_CODERANGE_7BIT) {
00426 encidx = -1;
00427 }
00428 else {
00429 w_byte(TYPE_IVAR, arg);
00430 }
00431 w_byte(TYPE_SYMBOL, arg);
00432 w_bytes(RSTRING_PTR(sym), RSTRING_LEN(sym), arg);
00433 st_add_direct(arg->symbols, id, arg->symbols->num_entries);
00434 if (encidx != -1) {
00435 struct dump_call_arg c_arg;
00436 c_arg.limit = 1;
00437 c_arg.arg = arg;
00438 w_encoding(sym, 0, &c_arg);
00439 }
00440 }
00441 }
00442
00443 static void
00444 w_unique(VALUE s, struct dump_arg *arg)
00445 {
00446 must_not_be_anonymous("class", s);
00447 w_symbol(rb_intern_str(s), arg);
00448 }
00449
00450 static void w_object(VALUE,struct dump_arg*,int);
00451
00452 static int
00453 hash_each(VALUE key, VALUE value, struct dump_call_arg *arg)
00454 {
00455 w_object(key, arg->arg, arg->limit);
00456 w_object(value, arg->arg, arg->limit);
00457 return ST_CONTINUE;
00458 }
00459
00460 #define SINGLETON_DUMP_UNABLE_P(klass) \
00461 (RCLASS_M_TBL(klass)->num_entries || \
00462 (RCLASS_IV_TBL(klass) && RCLASS_IV_TBL(klass)->num_entries > 1))
00463
00464 static void
00465 w_extended(VALUE klass, struct dump_arg *arg, int check)
00466 {
00467 if (check && FL_TEST(klass, FL_SINGLETON)) {
00468 VALUE origin = RCLASS_ORIGIN(klass);
00469 if (SINGLETON_DUMP_UNABLE_P(klass) ||
00470 (origin != klass && SINGLETON_DUMP_UNABLE_P(origin))) {
00471 rb_raise(rb_eTypeError, "singleton can't be dumped");
00472 }
00473 klass = RCLASS_SUPER(klass);
00474 }
00475 while (BUILTIN_TYPE(klass) == T_ICLASS) {
00476 VALUE path = rb_class_name(RBASIC(klass)->klass);
00477 w_byte(TYPE_EXTENDED, arg);
00478 w_unique(path, arg);
00479 klass = RCLASS_SUPER(klass);
00480 }
00481 }
00482
00483 static void
00484 w_class(char type, VALUE obj, struct dump_arg *arg, int check)
00485 {
00486 VALUE path;
00487 st_data_t real_obj;
00488 VALUE klass;
00489
00490 if (st_lookup(arg->compat_tbl, (st_data_t)obj, &real_obj)) {
00491 obj = (VALUE)real_obj;
00492 }
00493 klass = CLASS_OF(obj);
00494 w_extended(klass, arg, check);
00495 w_byte(type, arg);
00496 path = class2path(rb_class_real(klass));
00497 w_unique(path, arg);
00498 }
00499
00500 static void
00501 w_uclass(VALUE obj, VALUE super, struct dump_arg *arg)
00502 {
00503 VALUE klass = CLASS_OF(obj);
00504
00505 w_extended(klass, arg, TRUE);
00506 klass = rb_class_real(klass);
00507 if (klass != super) {
00508 w_byte(TYPE_UCLASS, arg);
00509 w_unique(class2path(klass), arg);
00510 }
00511 }
00512
00513 static int
00514 w_obj_each(st_data_t key, st_data_t val, st_data_t a)
00515 {
00516 ID id = (ID)key;
00517 VALUE value = (VALUE)val;
00518 struct dump_call_arg *arg = (struct dump_call_arg *)a;
00519
00520 if (id == rb_id_encoding()) return ST_CONTINUE;
00521 if (id == rb_intern("E")) return ST_CONTINUE;
00522 w_symbol(id, arg->arg);
00523 w_object(value, arg->arg, arg->limit);
00524 return ST_CONTINUE;
00525 }
00526
00527 static void
00528 w_encoding(VALUE obj, long num, struct dump_call_arg *arg)
00529 {
00530 int encidx = rb_enc_get_index(obj);
00531 rb_encoding *enc = 0;
00532 st_data_t name;
00533
00534 if (encidx <= 0 || !(enc = rb_enc_from_index(encidx))) {
00535 w_long(num, arg->arg);
00536 return;
00537 }
00538 w_long(num + 1, arg->arg);
00539
00540
00541 if (encidx == rb_usascii_encindex()) {
00542 w_symbol(rb_intern("E"), arg->arg);
00543 w_object(Qfalse, arg->arg, arg->limit + 1);
00544 return;
00545 }
00546 else if (encidx == rb_utf8_encindex()) {
00547 w_symbol(rb_intern("E"), arg->arg);
00548 w_object(Qtrue, arg->arg, arg->limit + 1);
00549 return;
00550 }
00551
00552 w_symbol(rb_id_encoding(), arg->arg);
00553 do {
00554 if (!arg->arg->encodings)
00555 arg->arg->encodings = st_init_strcasetable();
00556 else if (st_lookup(arg->arg->encodings, (st_data_t)rb_enc_name(enc), &name))
00557 break;
00558 name = (st_data_t)rb_str_new2(rb_enc_name(enc));
00559 st_insert(arg->arg->encodings, (st_data_t)rb_enc_name(enc), name);
00560 } while (0);
00561 w_object(name, arg->arg, arg->limit + 1);
00562 }
00563
00564 static void
00565 w_ivar(VALUE obj, st_table *tbl, struct dump_call_arg *arg)
00566 {
00567 long num = tbl ? tbl->num_entries : 0;
00568
00569 w_encoding(obj, num, arg);
00570 if (tbl) {
00571 st_foreach_safe(tbl, w_obj_each, (st_data_t)arg);
00572 }
00573 }
00574
00575 static void
00576 w_objivar(VALUE obj, struct dump_call_arg *arg)
00577 {
00578 VALUE *ptr;
00579 long i, len, num;
00580
00581 len = ROBJECT_NUMIV(obj);
00582 ptr = ROBJECT_IVPTR(obj);
00583 num = 0;
00584 for (i = 0; i < len; i++)
00585 if (ptr[i] != Qundef)
00586 num += 1;
00587
00588 w_encoding(obj, num, arg);
00589 if (num != 0) {
00590 rb_ivar_foreach(obj, w_obj_each, (st_data_t)arg);
00591 }
00592 }
00593
00594 static void
00595 w_object(VALUE obj, struct dump_arg *arg, int limit)
00596 {
00597 struct dump_call_arg c_arg;
00598 st_table *ivtbl = 0;
00599 st_data_t num;
00600 int hasiv = 0;
00601 #define has_ivars(obj, ivtbl) (((ivtbl) = rb_generic_ivar_table(obj)) != 0 || \
00602 (!SPECIAL_CONST_P(obj) && !ENCODING_IS_ASCII8BIT(obj)))
00603
00604 if (limit == 0) {
00605 rb_raise(rb_eArgError, "exceed depth limit");
00606 }
00607
00608 limit--;
00609 c_arg.limit = limit;
00610 c_arg.arg = arg;
00611
00612 if (st_lookup(arg->data, obj, &num)) {
00613 w_byte(TYPE_LINK, arg);
00614 w_long((long)num, arg);
00615 return;
00616 }
00617
00618 if (obj == Qnil) {
00619 w_byte(TYPE_NIL, arg);
00620 }
00621 else if (obj == Qtrue) {
00622 w_byte(TYPE_TRUE, arg);
00623 }
00624 else if (obj == Qfalse) {
00625 w_byte(TYPE_FALSE, arg);
00626 }
00627 else if (FIXNUM_P(obj)) {
00628 #if SIZEOF_LONG <= 4
00629 w_byte(TYPE_FIXNUM, arg);
00630 w_long(FIX2INT(obj), arg);
00631 #else
00632 if (RSHIFT((long)obj, 31) == 0 || RSHIFT((long)obj, 31) == -1) {
00633 w_byte(TYPE_FIXNUM, arg);
00634 w_long(FIX2LONG(obj), arg);
00635 }
00636 else {
00637 w_object(rb_int2big(FIX2LONG(obj)), arg, limit);
00638 }
00639 #endif
00640 }
00641 else if (SYMBOL_P(obj)) {
00642 w_symbol(SYM2ID(obj), arg);
00643 }
00644 else if (FLONUM_P(obj)) {
00645 st_add_direct(arg->data, obj, arg->data->num_entries);
00646 w_byte(TYPE_FLOAT, arg);
00647 w_float(RFLOAT_VALUE(obj), arg);
00648 }
00649 else {
00650 VALUE v;
00651
00652 arg->infection |= (int)FL_TEST(obj, MARSHAL_INFECTION);
00653
00654 if (rb_obj_respond_to(obj, s_mdump, TRUE)) {
00655 st_add_direct(arg->data, obj, arg->data->num_entries);
00656
00657 v = rb_funcall2(obj, s_mdump, 0, 0);
00658 check_dump_arg(arg, s_mdump);
00659 hasiv = has_ivars(v, ivtbl);
00660 if (hasiv) w_byte(TYPE_IVAR, arg);
00661 w_class(TYPE_USRMARSHAL, obj, arg, FALSE);
00662 w_object(v, arg, limit);
00663 if (hasiv) w_ivar(v, ivtbl, &c_arg);
00664 return;
00665 }
00666 if (rb_obj_respond_to(obj, s_dump, TRUE)) {
00667 st_table *ivtbl2 = 0;
00668 int hasiv2;
00669
00670 v = INT2NUM(limit);
00671 v = rb_funcall2(obj, s_dump, 1, &v);
00672 check_dump_arg(arg, s_dump);
00673 if (!RB_TYPE_P(v, T_STRING)) {
00674 rb_raise(rb_eTypeError, "_dump() must return string");
00675 }
00676 hasiv = has_ivars(obj, ivtbl);
00677 if (hasiv) w_byte(TYPE_IVAR, arg);
00678 if ((hasiv2 = has_ivars(v, ivtbl2)) != 0 && !hasiv) {
00679 w_byte(TYPE_IVAR, arg);
00680 }
00681 w_class(TYPE_USERDEF, obj, arg, FALSE);
00682 w_bytes(RSTRING_PTR(v), RSTRING_LEN(v), arg);
00683 if (hasiv2) {
00684 w_ivar(v, ivtbl2, &c_arg);
00685 }
00686 else if (hasiv) {
00687 w_ivar(obj, ivtbl, &c_arg);
00688 }
00689 st_add_direct(arg->data, obj, arg->data->num_entries);
00690 return;
00691 }
00692
00693 st_add_direct(arg->data, obj, arg->data->num_entries);
00694
00695 hasiv = has_ivars(obj, ivtbl);
00696 {
00697 st_data_t compat_data;
00698 rb_alloc_func_t allocator = rb_get_alloc_func(RBASIC(obj)->klass);
00699 if (st_lookup(compat_allocator_tbl,
00700 (st_data_t)allocator,
00701 &compat_data)) {
00702 marshal_compat_t *compat = (marshal_compat_t*)compat_data;
00703 VALUE real_obj = obj;
00704 obj = compat->dumper(real_obj);
00705 st_insert(arg->compat_tbl, (st_data_t)obj, (st_data_t)real_obj);
00706 if (obj != real_obj && !ivtbl) hasiv = 0;
00707 }
00708 }
00709 if (hasiv) w_byte(TYPE_IVAR, arg);
00710
00711 switch (BUILTIN_TYPE(obj)) {
00712 case T_CLASS:
00713 if (FL_TEST(obj, FL_SINGLETON)) {
00714 rb_raise(rb_eTypeError, "singleton class can't be dumped");
00715 }
00716 w_byte(TYPE_CLASS, arg);
00717 {
00718 VALUE path = class2path(obj);
00719 w_bytes(RSTRING_PTR(path), RSTRING_LEN(path), arg);
00720 RB_GC_GUARD(path);
00721 }
00722 break;
00723
00724 case T_MODULE:
00725 w_byte(TYPE_MODULE, arg);
00726 {
00727 VALUE path = class2path(obj);
00728 w_bytes(RSTRING_PTR(path), RSTRING_LEN(path), arg);
00729 RB_GC_GUARD(path);
00730 }
00731 break;
00732
00733 case T_FLOAT:
00734 w_byte(TYPE_FLOAT, arg);
00735 w_float(RFLOAT_VALUE(obj), arg);
00736 break;
00737
00738 case T_BIGNUM:
00739 w_byte(TYPE_BIGNUM, arg);
00740 {
00741 char sign = RBIGNUM_SIGN(obj) ? '+' : '-';
00742 long len = RBIGNUM_LEN(obj);
00743 BDIGIT *d = RBIGNUM_DIGITS(obj);
00744
00745 w_byte(sign, arg);
00746 w_long(SHORTLEN(len), arg);
00747 while (len--) {
00748 #if SIZEOF_BDIGITS > SIZEOF_SHORT
00749 BDIGIT num = *d;
00750 int i;
00751
00752 for (i=0; i<SIZEOF_BDIGITS; i+=SIZEOF_SHORT) {
00753 w_short(num & SHORTMASK, arg);
00754 num = SHORTDN(num);
00755 if (len == 0 && num == 0) break;
00756 }
00757 #else
00758 w_short(*d, arg);
00759 #endif
00760 d++;
00761 }
00762 }
00763 break;
00764
00765 case T_STRING:
00766 w_uclass(obj, rb_cString, arg);
00767 w_byte(TYPE_STRING, arg);
00768 w_bytes(RSTRING_PTR(obj), RSTRING_LEN(obj), arg);
00769 break;
00770
00771 case T_REGEXP:
00772 w_uclass(obj, rb_cRegexp, arg);
00773 w_byte(TYPE_REGEXP, arg);
00774 {
00775 int opts = rb_reg_options(obj);
00776 w_bytes(RREGEXP_SRC_PTR(obj), RREGEXP_SRC_LEN(obj), arg);
00777 w_byte((char)opts, arg);
00778 }
00779 break;
00780
00781 case T_ARRAY:
00782 w_uclass(obj, rb_cArray, arg);
00783 w_byte(TYPE_ARRAY, arg);
00784 {
00785 long i, len = RARRAY_LEN(obj);
00786
00787 w_long(len, arg);
00788 for (i=0; i<RARRAY_LEN(obj); i++) {
00789 w_object(RARRAY_PTR(obj)[i], arg, limit);
00790 if (len != RARRAY_LEN(obj)) {
00791 rb_raise(rb_eRuntimeError, "array modified during dump");
00792 }
00793 }
00794 }
00795 break;
00796
00797 case T_HASH:
00798 w_uclass(obj, rb_cHash, arg);
00799 if (NIL_P(RHASH_IFNONE(obj))) {
00800 w_byte(TYPE_HASH, arg);
00801 }
00802 else if (FL_TEST(obj, FL_USER2)) {
00803
00804 rb_raise(rb_eTypeError, "can't dump hash with default proc");
00805 }
00806 else {
00807 w_byte(TYPE_HASH_DEF, arg);
00808 }
00809 w_long(RHASH_SIZE(obj), arg);
00810 rb_hash_foreach(obj, hash_each, (st_data_t)&c_arg);
00811 if (!NIL_P(RHASH_IFNONE(obj))) {
00812 w_object(RHASH_IFNONE(obj), arg, limit);
00813 }
00814 break;
00815
00816 case T_STRUCT:
00817 w_class(TYPE_STRUCT, obj, arg, TRUE);
00818 {
00819 long len = RSTRUCT_LEN(obj);
00820 VALUE mem;
00821 long i;
00822
00823 w_long(len, arg);
00824 mem = rb_struct_members(obj);
00825 for (i=0; i<len; i++) {
00826 w_symbol(SYM2ID(RARRAY_PTR(mem)[i]), arg);
00827 w_object(RSTRUCT_PTR(obj)[i], arg, limit);
00828 }
00829 }
00830 break;
00831
00832 case T_OBJECT:
00833 w_class(TYPE_OBJECT, obj, arg, TRUE);
00834 w_objivar(obj, &c_arg);
00835 break;
00836
00837 case T_DATA:
00838 {
00839 VALUE v;
00840
00841 if (!rb_obj_respond_to(obj, s_dump_data, TRUE)) {
00842 rb_raise(rb_eTypeError,
00843 "no _dump_data is defined for class %s",
00844 rb_obj_classname(obj));
00845 }
00846 v = rb_funcall2(obj, s_dump_data, 0, 0);
00847 check_dump_arg(arg, s_dump_data);
00848 w_class(TYPE_DATA, obj, arg, TRUE);
00849 w_object(v, arg, limit);
00850 }
00851 break;
00852
00853 default:
00854 rb_raise(rb_eTypeError, "can't dump %s",
00855 rb_obj_classname(obj));
00856 break;
00857 }
00858 RB_GC_GUARD(obj);
00859 }
00860 if (hasiv) {
00861 w_ivar(obj, ivtbl, &c_arg);
00862 }
00863 }
00864
00865 static void
00866 clear_dump_arg(struct dump_arg *arg)
00867 {
00868 if (!arg->symbols) return;
00869 st_free_table(arg->symbols);
00870 arg->symbols = 0;
00871 st_free_table(arg->data);
00872 arg->data = 0;
00873 st_free_table(arg->compat_tbl);
00874 arg->compat_tbl = 0;
00875 if (arg->encodings) {
00876 st_free_table(arg->encodings);
00877 arg->encodings = 0;
00878 }
00879 }
00880
00881 NORETURN(static inline void io_needed(void));
00882 static inline void
00883 io_needed(void)
00884 {
00885 rb_raise(rb_eTypeError, "instance of IO needed");
00886 }
00887
00888
00889
00890
00891
00892
00893
00894
00895
00896
00897
00898
00899
00900
00901
00902
00903
00904
00905
00906
00907
00908
00909
00910
00911
00912
00913
00914
00915
00916
00917
00918
00919
00920
00921
00922 static VALUE
00923 marshal_dump(int argc, VALUE *argv)
00924 {
00925 VALUE obj, port, a1, a2;
00926 int limit = -1;
00927 struct dump_arg *arg;
00928 volatile VALUE wrapper;
00929
00930 port = Qnil;
00931 rb_scan_args(argc, argv, "12", &obj, &a1, &a2);
00932 if (argc == 3) {
00933 if (!NIL_P(a2)) limit = NUM2INT(a2);
00934 if (NIL_P(a1)) io_needed();
00935 port = a1;
00936 }
00937 else if (argc == 2) {
00938 if (FIXNUM_P(a1)) limit = FIX2INT(a1);
00939 else if (NIL_P(a1)) io_needed();
00940 else port = a1;
00941 }
00942 RB_GC_GUARD(wrapper) = TypedData_Make_Struct(rb_cData, struct dump_arg, &dump_arg_data, arg);
00943 arg->dest = 0;
00944 arg->symbols = st_init_numtable();
00945 arg->data = st_init_numtable();
00946 arg->infection = 0;
00947 arg->compat_tbl = st_init_numtable();
00948 arg->encodings = 0;
00949 arg->str = rb_str_buf_new(0);
00950 if (!NIL_P(port)) {
00951 if (!rb_respond_to(port, s_write)) {
00952 io_needed();
00953 }
00954 arg->dest = port;
00955 if (rb_check_funcall(port, s_binmode, 0, 0) != Qundef) {
00956 check_dump_arg(arg, s_binmode);
00957 }
00958 }
00959 else {
00960 port = arg->str;
00961 }
00962
00963 w_byte(MARSHAL_MAJOR, arg);
00964 w_byte(MARSHAL_MINOR, arg);
00965
00966 w_object(obj, arg, limit);
00967 if (arg->dest) {
00968 rb_io_write(arg->dest, arg->str);
00969 rb_str_resize(arg->str, 0);
00970 }
00971 clear_dump_arg(arg);
00972 RB_GC_GUARD(wrapper);
00973
00974 return port;
00975 }
00976
00977 struct load_arg {
00978 VALUE src;
00979 char *buf;
00980 long buflen;
00981 long readable;
00982 long offset;
00983 st_table *symbols;
00984 st_table *data;
00985 VALUE proc;
00986 st_table *compat_tbl;
00987 int infection;
00988 };
00989
00990 static void
00991 check_load_arg(struct load_arg *arg, ID sym)
00992 {
00993 if (!arg->symbols) {
00994 rb_raise(rb_eRuntimeError, "Marshal.load reentered at %s",
00995 rb_id2name(sym));
00996 }
00997 }
00998
00999 static void clear_load_arg(struct load_arg *arg);
01000
01001 static void
01002 mark_load_arg(void *ptr)
01003 {
01004 struct load_arg *p = ptr;
01005 if (!p->symbols)
01006 return;
01007 rb_mark_tbl(p->data);
01008 rb_mark_hash(p->compat_tbl);
01009 }
01010
01011 static void
01012 free_load_arg(void *ptr)
01013 {
01014 clear_load_arg(ptr);
01015 xfree(ptr);
01016 }
01017
01018 static size_t
01019 memsize_load_arg(const void *ptr)
01020 {
01021 return ptr ? sizeof(struct load_arg) : 0;
01022 }
01023
01024 static const rb_data_type_t load_arg_data = {
01025 "load_arg",
01026 {mark_load_arg, free_load_arg, memsize_load_arg,},
01027 };
01028
01029 #define r_entry(v, arg) r_entry0((v), (arg)->data->num_entries, (arg))
01030 static VALUE r_entry0(VALUE v, st_index_t num, struct load_arg *arg);
01031 static VALUE r_object(struct load_arg *arg);
01032 static ID r_symbol(struct load_arg *arg);
01033 static VALUE path2class(VALUE path);
01034
01035 NORETURN(static void too_short(void));
01036 static void
01037 too_short(void)
01038 {
01039 rb_raise(rb_eArgError, "marshal data too short");
01040 }
01041
01042 static st_index_t
01043 r_prepare(struct load_arg *arg)
01044 {
01045 st_index_t idx = arg->data->num_entries;
01046
01047 st_insert(arg->data, (st_data_t)idx, (st_data_t)Qundef);
01048 return idx;
01049 }
01050
01051 static unsigned char
01052 r_byte1_buffered(struct load_arg *arg)
01053 {
01054 if (arg->buflen == 0) {
01055 long readable = arg->readable < BUFSIZ ? arg->readable : BUFSIZ;
01056 VALUE str, n = LONG2NUM(readable);
01057
01058 str = rb_funcall2(arg->src, s_read, 1, &n);
01059
01060 check_load_arg(arg, s_read);
01061 if (NIL_P(str)) too_short();
01062 StringValue(str);
01063 arg->infection |= (int)FL_TEST(str, MARSHAL_INFECTION);
01064 memcpy(arg->buf, RSTRING_PTR(str), RSTRING_LEN(str));
01065 arg->offset = 0;
01066 arg->buflen = RSTRING_LEN(str);
01067 }
01068 arg->buflen--;
01069 return arg->buf[arg->offset++];
01070 }
01071
01072 static int
01073 r_byte(struct load_arg *arg)
01074 {
01075 int c;
01076
01077 if (RB_TYPE_P(arg->src, T_STRING)) {
01078 if (RSTRING_LEN(arg->src) > arg->offset) {
01079 c = (unsigned char)RSTRING_PTR(arg->src)[arg->offset++];
01080 }
01081 else {
01082 too_short();
01083 }
01084 }
01085 else {
01086 if (arg->readable >0 || arg->buflen > 0) {
01087 c = r_byte1_buffered(arg);
01088 }
01089 else {
01090 VALUE v = rb_funcall2(arg->src, s_getbyte, 0, 0);
01091 check_load_arg(arg, s_getbyte);
01092 if (NIL_P(v)) rb_eof_error();
01093 c = (unsigned char)NUM2CHR(v);
01094 }
01095 }
01096 return c;
01097 }
01098
01099 static void
01100 long_toobig(int size)
01101 {
01102 rb_raise(rb_eTypeError, "long too big for this architecture (size "
01103 STRINGIZE(SIZEOF_LONG)", given %d)", size);
01104 }
01105
01106 #undef SIGN_EXTEND_CHAR
01107 #if __STDC__
01108 # define SIGN_EXTEND_CHAR(c) ((signed char)(c))
01109 #else
01110
01111 # define SIGN_EXTEND_CHAR(c) ((((unsigned char)(c)) ^ 128) - 128)
01112 #endif
01113
01114 static long
01115 r_long(struct load_arg *arg)
01116 {
01117 register long x;
01118 int c = SIGN_EXTEND_CHAR(r_byte(arg));
01119 long i;
01120
01121 if (c == 0) return 0;
01122 if (c > 0) {
01123 if (4 < c && c < 128) {
01124 return c - 5;
01125 }
01126 if (c > (int)sizeof(long)) long_toobig(c);
01127 x = 0;
01128 for (i=0;i<c;i++) {
01129 x |= (long)r_byte(arg) << (8*i);
01130 }
01131 }
01132 else {
01133 if (-129 < c && c < -4) {
01134 return c + 5;
01135 }
01136 c = -c;
01137 if (c > (int)sizeof(long)) long_toobig(c);
01138 x = -1;
01139 for (i=0;i<c;i++) {
01140 x &= ~((long)0xff << (8*i));
01141 x |= (long)r_byte(arg) << (8*i);
01142 }
01143 }
01144 return x;
01145 }
01146
01147 static VALUE
01148 r_bytes1(long len, struct load_arg *arg)
01149 {
01150 VALUE str, n = LONG2NUM(len);
01151
01152 str = rb_funcall2(arg->src, s_read, 1, &n);
01153 check_load_arg(arg, s_read);
01154 if (NIL_P(str)) too_short();
01155 StringValue(str);
01156 if (RSTRING_LEN(str) != len) too_short();
01157 arg->infection |= (int)FL_TEST(str, MARSHAL_INFECTION);
01158
01159 return str;
01160 }
01161
01162 static VALUE
01163 r_bytes1_buffered(long len, struct load_arg *arg)
01164 {
01165 VALUE str;
01166
01167 if (len <= arg->buflen) {
01168 str = rb_str_new(arg->buf+arg->offset, len);
01169 arg->offset += len;
01170 arg->buflen -= len;
01171 }
01172 else {
01173 long buflen = arg->buflen;
01174 long readable = arg->readable + 1;
01175 long tmp_len, read_len, need_len = len - buflen;
01176 VALUE tmp, n;
01177
01178 readable = readable < BUFSIZ ? readable : BUFSIZ;
01179 read_len = need_len > readable ? need_len : readable;
01180 n = LONG2NUM(read_len);
01181 tmp = rb_funcall2(arg->src, s_read, 1, &n);
01182
01183 check_load_arg(arg, s_read);
01184 if (NIL_P(tmp)) too_short();
01185 StringValue(tmp);
01186
01187 tmp_len = RSTRING_LEN(tmp);
01188
01189 if (tmp_len < need_len) too_short();
01190 arg->infection |= (int)FL_TEST(tmp, MARSHAL_INFECTION);
01191
01192 str = rb_str_new(arg->buf+arg->offset, buflen);
01193 rb_str_cat(str, RSTRING_PTR(tmp), need_len);
01194
01195 if (tmp_len > need_len) {
01196 buflen = tmp_len - need_len;
01197 memcpy(arg->buf, RSTRING_PTR(tmp)+need_len, buflen);
01198 arg->buflen = buflen;
01199 }
01200 else {
01201 arg->buflen = 0;
01202 }
01203 arg->offset = 0;
01204 }
01205
01206 return str;
01207 }
01208
01209 #define r_bytes(arg) r_bytes0(r_long(arg), (arg))
01210
01211 static VALUE
01212 r_bytes0(long len, struct load_arg *arg)
01213 {
01214 VALUE str;
01215
01216 if (len == 0) return rb_str_new(0, 0);
01217 if (RB_TYPE_P(arg->src, T_STRING)) {
01218 if (RSTRING_LEN(arg->src) - arg->offset >= len) {
01219 str = rb_str_new(RSTRING_PTR(arg->src)+arg->offset, len);
01220 arg->offset += len;
01221 }
01222 else {
01223 too_short();
01224 }
01225 }
01226 else {
01227 if (arg->readable > 0 || arg->buflen > 0) {
01228 str = r_bytes1_buffered(len, arg);
01229 }
01230 else {
01231 str = r_bytes1(len, arg);
01232 }
01233 }
01234 return str;
01235 }
01236
01237 static int
01238 id2encidx(ID id, VALUE val)
01239 {
01240 if (id == rb_id_encoding()) {
01241 int idx = rb_enc_find_index(StringValueCStr(val));
01242 return idx;
01243 }
01244 else if (id == rb_intern("E")) {
01245 if (val == Qfalse) return rb_usascii_encindex();
01246 else if (val == Qtrue) return rb_utf8_encindex();
01247
01248 }
01249 return -1;
01250 }
01251
01252 static ID
01253 r_symlink(struct load_arg *arg)
01254 {
01255 st_data_t id;
01256 long num = r_long(arg);
01257
01258 if (!st_lookup(arg->symbols, num, &id)) {
01259 rb_raise(rb_eArgError, "bad symbol");
01260 }
01261 return (ID)id;
01262 }
01263
01264 static ID
01265 r_symreal(struct load_arg *arg, int ivar)
01266 {
01267 VALUE s = r_bytes(arg);
01268 ID id;
01269 int idx = -1;
01270 st_index_t n = arg->symbols->num_entries;
01271
01272 st_insert(arg->symbols, (st_data_t)n, (st_data_t)0);
01273 if (ivar) {
01274 long num = r_long(arg);
01275 while (num-- > 0) {
01276 id = r_symbol(arg);
01277 idx = id2encidx(id, r_object(arg));
01278 }
01279 }
01280 if (idx > 0) rb_enc_associate_index(s, idx);
01281 id = rb_intern_str(s);
01282 st_insert(arg->symbols, (st_data_t)n, (st_data_t)id);
01283
01284 return id;
01285 }
01286
01287 static ID
01288 r_symbol(struct load_arg *arg)
01289 {
01290 int type, ivar = 0;
01291
01292 again:
01293 switch ((type = r_byte(arg))) {
01294 default:
01295 rb_raise(rb_eArgError, "dump format error for symbol(0x%x)", type);
01296 case TYPE_IVAR:
01297 ivar = 1;
01298 goto again;
01299 case TYPE_SYMBOL:
01300 return r_symreal(arg, ivar);
01301 case TYPE_SYMLINK:
01302 if (ivar) {
01303 rb_raise(rb_eArgError, "dump format error (symlink with encoding)");
01304 }
01305 return r_symlink(arg);
01306 }
01307 }
01308
01309 static VALUE
01310 r_unique(struct load_arg *arg)
01311 {
01312 return rb_id2str(r_symbol(arg));
01313 }
01314
01315 static VALUE
01316 r_string(struct load_arg *arg)
01317 {
01318 return r_bytes(arg);
01319 }
01320
01321 static VALUE
01322 r_entry0(VALUE v, st_index_t num, struct load_arg *arg)
01323 {
01324 st_data_t real_obj = (VALUE)Qundef;
01325 if (st_lookup(arg->compat_tbl, v, &real_obj)) {
01326 st_insert(arg->data, num, (st_data_t)real_obj);
01327 }
01328 else {
01329 st_insert(arg->data, num, (st_data_t)v);
01330 }
01331 if (arg->infection &&
01332 !RB_TYPE_P(v, T_CLASS) && !RB_TYPE_P(v, T_MODULE)) {
01333 FL_SET(v, arg->infection);
01334 if ((VALUE)real_obj != Qundef)
01335 FL_SET((VALUE)real_obj, arg->infection);
01336 }
01337 return v;
01338 }
01339
01340 static VALUE
01341 r_leave(VALUE v, struct load_arg *arg)
01342 {
01343 st_data_t data;
01344 if (st_lookup(arg->compat_tbl, v, &data)) {
01345 VALUE real_obj = (VALUE)data;
01346 rb_alloc_func_t allocator = rb_get_alloc_func(CLASS_OF(real_obj));
01347 st_data_t key = v;
01348 if (st_lookup(compat_allocator_tbl, (st_data_t)allocator, &data)) {
01349 marshal_compat_t *compat = (marshal_compat_t*)data;
01350 compat->loader(real_obj, v);
01351 }
01352 st_delete(arg->compat_tbl, &key, 0);
01353 v = real_obj;
01354 }
01355 if (arg->proc) {
01356 v = rb_funcall(arg->proc, s_call, 1, v);
01357 check_load_arg(arg, s_call);
01358 }
01359 return v;
01360 }
01361
01362 static void
01363 r_ivar(VALUE obj, int *has_encoding, struct load_arg *arg)
01364 {
01365 long len;
01366
01367 len = r_long(arg);
01368 if (len > 0) {
01369 do {
01370 ID id = r_symbol(arg);
01371 VALUE val = r_object(arg);
01372 int idx = id2encidx(id, val);
01373 if (idx >= 0) {
01374 rb_enc_associate_index(obj, idx);
01375 if (has_encoding) *has_encoding = TRUE;
01376 }
01377 else {
01378 rb_ivar_set(obj, id, val);
01379 }
01380 } while (--len > 0);
01381 }
01382 }
01383
01384 static VALUE
01385 path2class(VALUE path)
01386 {
01387 VALUE v = rb_path_to_class(path);
01388
01389 if (!RB_TYPE_P(v, T_CLASS)) {
01390 rb_raise(rb_eArgError, "%"PRIsVALUE" does not refer to class", path);
01391 }
01392 return v;
01393 }
01394
01395 static VALUE
01396 path2module(VALUE path)
01397 {
01398 VALUE v = rb_path_to_class(path);
01399
01400 if (!RB_TYPE_P(v, T_MODULE)) {
01401 rb_raise(rb_eArgError, "%"PRIsVALUE" does not refer to module", path);
01402 }
01403 return v;
01404 }
01405
01406 static VALUE
01407 obj_alloc_by_klass(VALUE klass, struct load_arg *arg, VALUE *oldclass)
01408 {
01409 st_data_t data;
01410 rb_alloc_func_t allocator;
01411
01412 allocator = rb_get_alloc_func(klass);
01413 if (st_lookup(compat_allocator_tbl, (st_data_t)allocator, &data)) {
01414 marshal_compat_t *compat = (marshal_compat_t*)data;
01415 VALUE real_obj = rb_obj_alloc(klass);
01416 VALUE obj = rb_obj_alloc(compat->oldclass);
01417 if (oldclass) *oldclass = compat->oldclass;
01418 st_insert(arg->compat_tbl, (st_data_t)obj, (st_data_t)real_obj);
01419 return obj;
01420 }
01421
01422 return rb_obj_alloc(klass);
01423 }
01424
01425 static VALUE
01426 obj_alloc_by_path(VALUE path, struct load_arg *arg)
01427 {
01428 return obj_alloc_by_klass(path2class(path), arg, 0);
01429 }
01430
01431 static VALUE
01432 append_extmod(VALUE obj, VALUE extmod)
01433 {
01434 long i = RARRAY_LEN(extmod);
01435 while (i > 0) {
01436 VALUE m = RARRAY_PTR(extmod)[--i];
01437 rb_extend_object(obj, m);
01438 }
01439 return obj;
01440 }
01441
01442 #define prohibit_ivar(type, str) do { \
01443 if (!ivp || !*ivp) break; \
01444 rb_raise(rb_eTypeError, \
01445 "can't override instance variable of "type" `%"PRIsVALUE"'", \
01446 (str)); \
01447 } while (0)
01448
01449 static VALUE
01450 r_object0(struct load_arg *arg, int *ivp, VALUE extmod)
01451 {
01452 VALUE v = Qnil;
01453 int type = r_byte(arg);
01454 long id;
01455 st_data_t link;
01456
01457 switch (type) {
01458 case TYPE_LINK:
01459 id = r_long(arg);
01460 if (!st_lookup(arg->data, (st_data_t)id, &link)) {
01461 rb_raise(rb_eArgError, "dump format error (unlinked)");
01462 }
01463 v = (VALUE)link;
01464 if (arg->proc) {
01465 v = rb_funcall(arg->proc, s_call, 1, v);
01466 check_load_arg(arg, s_call);
01467 }
01468 break;
01469
01470 case TYPE_IVAR:
01471 {
01472 int ivar = TRUE;
01473
01474 v = r_object0(arg, &ivar, extmod);
01475 if (ivar) r_ivar(v, NULL, arg);
01476 }
01477 break;
01478
01479 case TYPE_EXTENDED:
01480 {
01481 VALUE m = path2module(r_unique(arg));
01482
01483 if (NIL_P(extmod)) extmod = rb_ary_tmp_new(0);
01484 rb_ary_push(extmod, m);
01485
01486 v = r_object0(arg, 0, extmod);
01487 while (RARRAY_LEN(extmod) > 0) {
01488 m = rb_ary_pop(extmod);
01489 rb_extend_object(v, m);
01490 }
01491 }
01492 break;
01493
01494 case TYPE_UCLASS:
01495 {
01496 VALUE c = path2class(r_unique(arg));
01497
01498 if (FL_TEST(c, FL_SINGLETON)) {
01499 rb_raise(rb_eTypeError, "singleton can't be loaded");
01500 }
01501 v = r_object0(arg, 0, extmod);
01502 if (rb_special_const_p(v) || RB_TYPE_P(v, T_OBJECT) || RB_TYPE_P(v, T_CLASS)) {
01503 format_error:
01504 rb_raise(rb_eArgError, "dump format error (user class)");
01505 }
01506 if (RB_TYPE_P(v, T_MODULE) || !RTEST(rb_class_inherited_p(c, RBASIC(v)->klass))) {
01507 VALUE tmp = rb_obj_alloc(c);
01508
01509 if (TYPE(v) != TYPE(tmp)) goto format_error;
01510 }
01511 RBASIC(v)->klass = c;
01512 }
01513 break;
01514
01515 case TYPE_NIL:
01516 v = Qnil;
01517 v = r_leave(v, arg);
01518 break;
01519
01520 case TYPE_TRUE:
01521 v = Qtrue;
01522 v = r_leave(v, arg);
01523 break;
01524
01525 case TYPE_FALSE:
01526 v = Qfalse;
01527 v = r_leave(v, arg);
01528 break;
01529
01530 case TYPE_FIXNUM:
01531 {
01532 long i = r_long(arg);
01533 v = LONG2FIX(i);
01534 }
01535 v = r_leave(v, arg);
01536 break;
01537
01538 case TYPE_FLOAT:
01539 {
01540 double d;
01541 VALUE str = r_bytes(arg);
01542 const char *ptr = RSTRING_PTR(str);
01543
01544 if (strcmp(ptr, "nan") == 0) {
01545 d = NAN;
01546 }
01547 else if (strcmp(ptr, "inf") == 0) {
01548 d = INFINITY;
01549 }
01550 else if (strcmp(ptr, "-inf") == 0) {
01551 d = -INFINITY;
01552 }
01553 else {
01554 char *e;
01555 d = strtod(ptr, &e);
01556 d = load_mantissa(d, e, RSTRING_LEN(str) - (e - ptr));
01557 }
01558 v = DBL2NUM(d);
01559 v = r_entry(v, arg);
01560 v = r_leave(v, arg);
01561 }
01562 break;
01563
01564 case TYPE_BIGNUM:
01565 {
01566 long len;
01567 BDIGIT *digits;
01568 VALUE data;
01569
01570 NEWOBJ_OF(big, struct RBignum, rb_cBignum, T_BIGNUM);
01571 RBIGNUM_SET_SIGN(big, (r_byte(arg) == '+'));
01572 len = r_long(arg);
01573 data = r_bytes0(len * 2, arg);
01574 #if SIZEOF_BDIGITS == SIZEOF_SHORT
01575 rb_big_resize((VALUE)big, len);
01576 #else
01577 rb_big_resize((VALUE)big, (len + 1) * 2 / sizeof(BDIGIT));
01578 #endif
01579 digits = RBIGNUM_DIGITS(big);
01580 MEMCPY(digits, RSTRING_PTR(data), char, len * 2);
01581 rb_str_resize(data, 0L);
01582 #if SIZEOF_BDIGITS > SIZEOF_SHORT
01583 MEMZERO((char *)digits + len * 2, char,
01584 RBIGNUM_LEN(big) * sizeof(BDIGIT) - len * 2);
01585 #endif
01586 len = RBIGNUM_LEN(big);
01587 while (len > 0) {
01588 unsigned char *p = (unsigned char *)digits;
01589 BDIGIT num = 0;
01590 #if SIZEOF_BDIGITS > SIZEOF_SHORT
01591 int shift = 0;
01592 int i;
01593
01594 for (i=0; i<SIZEOF_BDIGITS; i++) {
01595 num |= (int)p[i] << shift;
01596 shift += 8;
01597 }
01598 #else
01599 num = p[0] | (p[1] << 8);
01600 #endif
01601 *digits++ = num;
01602 len--;
01603 }
01604 v = rb_big_norm((VALUE)big);
01605 v = r_entry(v, arg);
01606 v = r_leave(v, arg);
01607 }
01608 break;
01609
01610 case TYPE_STRING:
01611 v = r_entry(r_string(arg), arg);
01612 v = r_leave(v, arg);
01613 break;
01614
01615 case TYPE_REGEXP:
01616 {
01617 VALUE str = r_bytes(arg);
01618 int options = r_byte(arg);
01619 int has_encoding = FALSE;
01620 st_index_t idx = r_prepare(arg);
01621
01622 if (ivp) {
01623 r_ivar(str, &has_encoding, arg);
01624 *ivp = FALSE;
01625 }
01626 if (!has_encoding) {
01627
01628 char *ptr = RSTRING_PTR(str), *dst = ptr, *src = ptr;
01629 long len = RSTRING_LEN(str);
01630 long bs = 0;
01631 for (; len-- > 0; *dst++ = *src++) {
01632 switch (*src) {
01633 case '\\': bs++; break;
01634 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
01635 case 'm': case 'o': case 'p': case 'q': case 'u': case 'y':
01636 case 'E': case 'F': case 'H': case 'I': case 'J': case 'K':
01637 case 'L': case 'N': case 'O': case 'P': case 'Q': case 'R':
01638 case 'S': case 'T': case 'U': case 'V': case 'X': case 'Y':
01639 if (bs & 1) --dst;
01640 default: bs = 0; break;
01641 }
01642 }
01643 rb_str_set_len(str, dst - ptr);
01644 }
01645 v = r_entry0(rb_reg_new_str(str, options), idx, arg);
01646 v = r_leave(v, arg);
01647 }
01648 break;
01649
01650 case TYPE_ARRAY:
01651 {
01652 volatile long len = r_long(arg);
01653
01654 v = rb_ary_new2(len);
01655 v = r_entry(v, arg);
01656 arg->readable += len - 1;
01657 while (len--) {
01658 rb_ary_push(v, r_object(arg));
01659 arg->readable--;
01660 }
01661 v = r_leave(v, arg);
01662 arg->readable++;
01663 }
01664 break;
01665
01666 case TYPE_HASH:
01667 case TYPE_HASH_DEF:
01668 {
01669 long len = r_long(arg);
01670
01671 v = rb_hash_new();
01672 v = r_entry(v, arg);
01673 arg->readable += (len - 1) * 2;
01674 while (len--) {
01675 VALUE key = r_object(arg);
01676 VALUE value = r_object(arg);
01677 rb_hash_aset(v, key, value);
01678 arg->readable -= 2;
01679 }
01680 arg->readable += 2;
01681 if (type == TYPE_HASH_DEF) {
01682 RHASH_IFNONE(v) = r_object(arg);
01683 }
01684 v = r_leave(v, arg);
01685 }
01686 break;
01687
01688 case TYPE_STRUCT:
01689 {
01690 VALUE mem, values;
01691 volatile long i;
01692 ID slot;
01693 st_index_t idx = r_prepare(arg);
01694 VALUE klass = path2class(r_unique(arg));
01695 long len = r_long(arg);
01696
01697 v = rb_obj_alloc(klass);
01698 if (!RB_TYPE_P(v, T_STRUCT)) {
01699 rb_raise(rb_eTypeError, "class %s not a struct", rb_class2name(klass));
01700 }
01701 mem = rb_struct_s_members(klass);
01702 if (RARRAY_LEN(mem) != len) {
01703 rb_raise(rb_eTypeError, "struct %s not compatible (struct size differs)",
01704 rb_class2name(klass));
01705 }
01706
01707 arg->readable += (len - 1) * 2;
01708 v = r_entry0(v, idx, arg);
01709 values = rb_ary_new2(len);
01710 for (i=0; i<len; i++) {
01711 slot = r_symbol(arg);
01712
01713 if (RARRAY_PTR(mem)[i] != ID2SYM(slot)) {
01714 rb_raise(rb_eTypeError, "struct %s not compatible (:%s for :%s)",
01715 rb_class2name(klass),
01716 rb_id2name(slot),
01717 rb_id2name(SYM2ID(RARRAY_PTR(mem)[i])));
01718 }
01719 rb_ary_push(values, r_object(arg));
01720 arg->readable -= 2;
01721 }
01722 rb_struct_initialize(v, values);
01723 v = r_leave(v, arg);
01724 arg->readable += 2;
01725 }
01726 break;
01727
01728 case TYPE_USERDEF:
01729 {
01730 VALUE klass = path2class(r_unique(arg));
01731 VALUE data;
01732
01733 if (!rb_obj_respond_to(klass, s_load, TRUE)) {
01734 rb_raise(rb_eTypeError, "class %s needs to have method `_load'",
01735 rb_class2name(klass));
01736 }
01737 data = r_string(arg);
01738 if (ivp) {
01739 r_ivar(data, NULL, arg);
01740 *ivp = FALSE;
01741 }
01742 v = rb_funcall2(klass, s_load, 1, &data);
01743 check_load_arg(arg, s_load);
01744 v = r_entry(v, arg);
01745 v = r_leave(v, arg);
01746 }
01747 break;
01748
01749 case TYPE_USRMARSHAL:
01750 {
01751 VALUE klass = path2class(r_unique(arg));
01752 VALUE oldclass = 0;
01753 VALUE data;
01754
01755 v = obj_alloc_by_klass(klass, arg, &oldclass);
01756 if (!NIL_P(extmod)) {
01757
01758 append_extmod(v, extmod);
01759 }
01760 if (!rb_obj_respond_to(v, s_mload, TRUE)) {
01761 rb_raise(rb_eTypeError, "instance of %s needs to have method `marshal_load'",
01762 rb_class2name(klass));
01763 }
01764 v = r_entry(v, arg);
01765 data = r_object(arg);
01766 rb_funcall2(v, s_mload, 1, &data);
01767 check_load_arg(arg, s_mload);
01768 v = r_leave(v, arg);
01769 if (!NIL_P(extmod)) {
01770 if (oldclass) append_extmod(v, extmod);
01771 rb_ary_clear(extmod);
01772 }
01773 }
01774 break;
01775
01776 case TYPE_OBJECT:
01777 {
01778 st_index_t idx = r_prepare(arg);
01779 v = obj_alloc_by_path(r_unique(arg), arg);
01780 if (!RB_TYPE_P(v, T_OBJECT)) {
01781 rb_raise(rb_eArgError, "dump format error");
01782 }
01783 v = r_entry0(v, idx, arg);
01784 r_ivar(v, NULL, arg);
01785 v = r_leave(v, arg);
01786 }
01787 break;
01788
01789 case TYPE_DATA:
01790 {
01791 VALUE klass = path2class(r_unique(arg));
01792 VALUE oldclass = 0;
01793 VALUE r;
01794
01795 v = obj_alloc_by_klass(klass, arg, &oldclass);
01796 if (!RB_TYPE_P(v, T_DATA)) {
01797 rb_raise(rb_eArgError, "dump format error");
01798 }
01799 v = r_entry(v, arg);
01800 if (!rb_obj_respond_to(v, s_load_data, TRUE)) {
01801 rb_raise(rb_eTypeError,
01802 "class %s needs to have instance method `_load_data'",
01803 rb_class2name(klass));
01804 }
01805 r = r_object0(arg, 0, extmod);
01806 rb_funcall2(v, s_load_data, 1, &r);
01807 check_load_arg(arg, s_load_data);
01808 v = r_leave(v, arg);
01809 }
01810 break;
01811
01812 case TYPE_MODULE_OLD:
01813 {
01814 VALUE str = r_bytes(arg);
01815
01816 v = rb_path_to_class(str);
01817 prohibit_ivar("class/module", str);
01818 v = r_entry(v, arg);
01819 v = r_leave(v, arg);
01820 }
01821 break;
01822
01823 case TYPE_CLASS:
01824 {
01825 VALUE str = r_bytes(arg);
01826
01827 v = path2class(str);
01828 prohibit_ivar("class", str);
01829 v = r_entry(v, arg);
01830 v = r_leave(v, arg);
01831 }
01832 break;
01833
01834 case TYPE_MODULE:
01835 {
01836 VALUE str = r_bytes(arg);
01837
01838 v = path2module(str);
01839 prohibit_ivar("module", str);
01840 v = r_entry(v, arg);
01841 v = r_leave(v, arg);
01842 }
01843 break;
01844
01845 case TYPE_SYMBOL:
01846 if (ivp) {
01847 v = ID2SYM(r_symreal(arg, *ivp));
01848 *ivp = FALSE;
01849 }
01850 else {
01851 v = ID2SYM(r_symreal(arg, 0));
01852 }
01853 v = r_leave(v, arg);
01854 break;
01855
01856 case TYPE_SYMLINK:
01857 v = ID2SYM(r_symlink(arg));
01858 break;
01859
01860 default:
01861 rb_raise(rb_eArgError, "dump format error(0x%x)", type);
01862 break;
01863 }
01864 return v;
01865 }
01866
01867 static VALUE
01868 r_object(struct load_arg *arg)
01869 {
01870 return r_object0(arg, 0, Qnil);
01871 }
01872
01873 static void
01874 clear_load_arg(struct load_arg *arg)
01875 {
01876 if (arg->buf) {
01877 xfree(arg->buf);
01878 arg->buf = 0;
01879 }
01880 arg->buflen = 0;
01881 arg->offset = 0;
01882 arg->readable = 0;
01883 if (!arg->symbols) return;
01884 st_free_table(arg->symbols);
01885 arg->symbols = 0;
01886 st_free_table(arg->data);
01887 arg->data = 0;
01888 st_free_table(arg->compat_tbl);
01889 arg->compat_tbl = 0;
01890 }
01891
01892
01893
01894
01895
01896
01897
01898
01899
01900
01901
01902
01903
01904
01905
01906 static VALUE
01907 marshal_load(int argc, VALUE *argv)
01908 {
01909 VALUE port, proc;
01910 int major, minor, infection = 0;
01911 VALUE v;
01912 volatile VALUE wrapper;
01913 struct load_arg *arg;
01914
01915 rb_scan_args(argc, argv, "11", &port, &proc);
01916 v = rb_check_string_type(port);
01917 if (!NIL_P(v)) {
01918 infection = (int)FL_TEST(port, MARSHAL_INFECTION);
01919 port = v;
01920 }
01921 else if (rb_respond_to(port, s_getbyte) && rb_respond_to(port, s_read)) {
01922 rb_check_funcall(port, s_binmode, 0, 0);
01923 infection = (int)(FL_TAINT | FL_TEST(port, FL_UNTRUSTED));
01924 }
01925 else {
01926 io_needed();
01927 }
01928 RB_GC_GUARD(wrapper) = TypedData_Make_Struct(rb_cData, struct load_arg, &load_arg_data, arg);
01929 arg->infection = infection;
01930 arg->src = port;
01931 arg->offset = 0;
01932 arg->symbols = st_init_numtable();
01933 arg->data = st_init_numtable();
01934 arg->compat_tbl = st_init_numtable();
01935 arg->proc = 0;
01936 arg->readable = 0;
01937
01938 if (NIL_P(v))
01939 arg->buf = xmalloc(BUFSIZ);
01940 else
01941 arg->buf = 0;
01942
01943 major = r_byte(arg);
01944 minor = r_byte(arg);
01945 if (major != MARSHAL_MAJOR || minor > MARSHAL_MINOR) {
01946 clear_load_arg(arg);
01947 rb_raise(rb_eTypeError, "incompatible marshal file format (can't be read)\n\
01948 \tformat version %d.%d required; %d.%d given",
01949 MARSHAL_MAJOR, MARSHAL_MINOR, major, minor);
01950 }
01951 if (RTEST(ruby_verbose) && minor != MARSHAL_MINOR) {
01952 rb_warn("incompatible marshal file format (can be read)\n\
01953 \tformat version %d.%d required; %d.%d given",
01954 MARSHAL_MAJOR, MARSHAL_MINOR, major, minor);
01955 }
01956
01957 if (!NIL_P(proc)) arg->proc = proc;
01958 v = r_object(arg);
01959 clear_load_arg(arg);
01960 RB_GC_GUARD(wrapper);
01961
01962 return v;
01963 }
01964
01965
01966
01967
01968
01969
01970
01971
01972
01973
01974
01975
01976
01977
01978
01979
01980
01981
01982
01983
01984
01985
01986
01987
01988
01989
01990
01991
01992
01993
01994
01995
01996
01997
01998
01999
02000
02001
02002
02003
02004
02005
02006
02007
02008
02009
02010
02011
02012
02013
02014
02015
02016
02017
02018
02019
02020
02021
02022
02023
02024
02025
02026
02027
02028
02029
02030
02031
02032
02033
02034
02035
02036
02037
02038
02039
02040
02041
02042
02043
02044
02045
02046
02047
02048
02049
02050
02051
02052
02053
02054
02055
02056
02057
02058
02059
02060
02061
02062
02063
02064
02065
02066
02067
02068
02069
02070
02071
02072
02073
02074
02075
02076 void
02077 Init_marshal(void)
02078 {
02079 #undef rb_intern
02080 #define rb_intern(str) rb_intern_const(str)
02081
02082 VALUE rb_mMarshal = rb_define_module("Marshal");
02083
02084 s_dump = rb_intern("_dump");
02085 s_load = rb_intern("_load");
02086 s_mdump = rb_intern("marshal_dump");
02087 s_mload = rb_intern("marshal_load");
02088 s_dump_data = rb_intern("_dump_data");
02089 s_load_data = rb_intern("_load_data");
02090 s_alloc = rb_intern("_alloc");
02091 s_call = rb_intern("call");
02092 s_getbyte = rb_intern("getbyte");
02093 s_read = rb_intern("read");
02094 s_write = rb_intern("write");
02095 s_binmode = rb_intern("binmode");
02096
02097 rb_define_module_function(rb_mMarshal, "dump", marshal_dump, -1);
02098 rb_define_module_function(rb_mMarshal, "load", marshal_load, -1);
02099 rb_define_module_function(rb_mMarshal, "restore", marshal_load, -1);
02100
02101 rb_define_const(rb_mMarshal, "MAJOR_VERSION", INT2FIX(MARSHAL_MAJOR));
02102 rb_define_const(rb_mMarshal, "MINOR_VERSION", INT2FIX(MARSHAL_MINOR));
02103
02104 compat_allocator_tbl = st_init_numtable();
02105 compat_allocator_tbl_wrapper =
02106 Data_Wrap_Struct(rb_cData, mark_marshal_compat_t, 0, compat_allocator_tbl);
02107 rb_gc_register_mark_object(compat_allocator_tbl_wrapper);
02108 }
02109
02110 VALUE
02111 rb_marshal_dump(VALUE obj, VALUE port)
02112 {
02113 int argc = 1;
02114 VALUE argv[2];
02115
02116 argv[0] = obj;
02117 argv[1] = port;
02118 if (!NIL_P(port)) argc = 2;
02119 return marshal_dump(argc, argv);
02120 }
02121
02122 VALUE
02123 rb_marshal_load(VALUE port)
02124 {
02125 return marshal_load(1, &port);
02126 }
02127