00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012 #include "ruby/ruby.h"
00013 #include "ruby/io.h"
00014 #include "ruby/st.h"
00015 #include "ruby/util.h"
00016 #include "ruby/encoding.h"
00017 #include "internal.h"
00018
00019 #include <math.h>
00020 #ifdef HAVE_FLOAT_H
00021 #include <float.h>
00022 #endif
00023 #ifdef HAVE_IEEEFP_H
00024 #include <ieeefp.h>
00025 #endif
00026
00027 #define BITSPERSHORT (2*CHAR_BIT)
00028 #define SHORTMASK ((1<<BITSPERSHORT)-1)
00029 #define SHORTDN(x) RSHIFT((x),BITSPERSHORT)
00030
00031 #if SIZEOF_SHORT == SIZEOF_BDIGITS
00032 #define SHORTLEN(x) (x)
00033 #else
00034 static long
00035 shortlen(long len, BDIGIT *ds)
00036 {
00037 BDIGIT num;
00038 int offset = 0;
00039
00040 num = ds[len-1];
00041 while (num) {
00042 num = SHORTDN(num);
00043 offset++;
00044 }
00045 return (len - 1)*SIZEOF_BDIGITS/2 + offset;
00046 }
00047 #define SHORTLEN(x) shortlen((x),d)
00048 #endif
00049
00050 #define MARSHAL_MAJOR 4
00051 #define MARSHAL_MINOR 8
00052
00053 #define TYPE_NIL '0'
00054 #define TYPE_TRUE 'T'
00055 #define TYPE_FALSE 'F'
00056 #define TYPE_FIXNUM 'i'
00057
00058 #define TYPE_EXTENDED 'e'
00059 #define TYPE_UCLASS 'C'
00060 #define TYPE_OBJECT 'o'
00061 #define TYPE_DATA 'd'
00062 #define TYPE_USERDEF 'u'
00063 #define TYPE_USRMARSHAL 'U'
00064 #define TYPE_FLOAT 'f'
00065 #define TYPE_BIGNUM 'l'
00066 #define TYPE_STRING '"'
00067 #define TYPE_REGEXP '/'
00068 #define TYPE_ARRAY '['
00069 #define TYPE_HASH '{'
00070 #define TYPE_HASH_DEF '}'
00071 #define TYPE_STRUCT 'S'
00072 #define TYPE_MODULE_OLD 'M'
00073 #define TYPE_CLASS 'c'
00074 #define TYPE_MODULE 'm'
00075
00076 #define TYPE_SYMBOL ':'
00077 #define TYPE_SYMLINK ';'
00078
00079 #define TYPE_IVAR 'I'
00080 #define TYPE_LINK '@'
00081
00082 static ID s_dump, s_load, s_mdump, s_mload;
00083 static ID s_dump_data, s_load_data, s_alloc, s_call;
00084 static ID s_getbyte, s_read, s_write, s_binmode;
00085
00086 typedef struct {
00087 VALUE newclass;
00088 VALUE oldclass;
00089 VALUE (*dumper)(VALUE);
00090 VALUE (*loader)(VALUE, VALUE);
00091 } marshal_compat_t;
00092
00093 static st_table *compat_allocator_tbl;
00094 static VALUE compat_allocator_tbl_wrapper;
00095
00096 static int
00097 mark_marshal_compat_i(st_data_t key, st_data_t value)
00098 {
00099 marshal_compat_t *p = (marshal_compat_t *)value;
00100 rb_gc_mark(p->newclass);
00101 rb_gc_mark(p->oldclass);
00102 return ST_CONTINUE;
00103 }
00104
00105 static void
00106 mark_marshal_compat_t(void *tbl)
00107 {
00108 if (!tbl) return;
00109 st_foreach(tbl, mark_marshal_compat_i, 0);
00110 }
00111
00112 static st_table *compat_allocator_table(void);
00113
00114 void
00115 rb_marshal_define_compat(VALUE newclass, VALUE oldclass, VALUE (*dumper)(VALUE), VALUE (*loader)(VALUE, VALUE))
00116 {
00117 marshal_compat_t *compat;
00118 rb_alloc_func_t allocator = rb_get_alloc_func(newclass);
00119
00120 if (!allocator) {
00121 rb_raise(rb_eTypeError, "no allocator");
00122 }
00123
00124 compat = ALLOC(marshal_compat_t);
00125 compat->newclass = Qnil;
00126 compat->oldclass = Qnil;
00127 compat->newclass = newclass;
00128 compat->oldclass = oldclass;
00129 compat->dumper = dumper;
00130 compat->loader = loader;
00131
00132 st_insert(compat_allocator_table(), (st_data_t)allocator, (st_data_t)compat);
00133 }
00134
00135 #define MARSHAL_INFECTION FL_TAINT
00136 typedef char ruby_check_marshal_viral_flags[MARSHAL_INFECTION == (int)MARSHAL_INFECTION ? 1 : -1];
00137
00138 struct dump_arg {
00139 VALUE str, dest;
00140 st_table *symbols;
00141 st_table *data;
00142 st_table *compat_tbl;
00143 st_table *encodings;
00144 int infection;
00145 };
00146
00147 struct dump_call_arg {
00148 VALUE obj;
00149 struct dump_arg *arg;
00150 int limit;
00151 };
00152
00153 static void
00154 check_dump_arg(struct dump_arg *arg, ID sym)
00155 {
00156 if (!arg->symbols) {
00157 rb_raise(rb_eRuntimeError, "Marshal.dump reentered at %s",
00158 rb_id2name(sym));
00159 }
00160 }
00161
00162 static void clear_dump_arg(struct dump_arg *arg);
00163
00164 static void
00165 mark_dump_arg(void *ptr)
00166 {
00167 struct dump_arg *p = ptr;
00168 if (!p->symbols)
00169 return;
00170 rb_mark_set(p->data);
00171 rb_mark_hash(p->compat_tbl);
00172 rb_gc_mark(p->str);
00173 }
00174
00175 static void
00176 free_dump_arg(void *ptr)
00177 {
00178 clear_dump_arg(ptr);
00179 xfree(ptr);
00180 }
00181
00182 static size_t
00183 memsize_dump_arg(const void *ptr)
00184 {
00185 return ptr ? sizeof(struct dump_arg) : 0;
00186 }
00187
00188 static const rb_data_type_t dump_arg_data = {
00189 "dump_arg",
00190 {mark_dump_arg, free_dump_arg, memsize_dump_arg,},
00191 NULL, NULL, RUBY_TYPED_FREE_IMMEDIATELY
00192 };
00193
00194 static const char *
00195 must_not_be_anonymous(const char *type, VALUE path)
00196 {
00197 char *n = RSTRING_PTR(path);
00198
00199 if (!rb_enc_asciicompat(rb_enc_get(path))) {
00200
00201 rb_raise(rb_eTypeError, "can't dump non-ascii %s name", type);
00202 }
00203 if (n[0] == '#') {
00204 rb_raise(rb_eTypeError, "can't dump anonymous %s %.*s", type,
00205 (int)RSTRING_LEN(path), n);
00206 }
00207 return n;
00208 }
00209
00210 static VALUE
00211 class2path(VALUE klass)
00212 {
00213 VALUE path = rb_class_path(klass);
00214 const char *n;
00215
00216 n = must_not_be_anonymous((RB_TYPE_P(klass, T_CLASS) ? "class" : "module"), path);
00217 if (rb_path_to_class(path) != rb_class_real(klass)) {
00218 rb_raise(rb_eTypeError, "%s can't be referred to", n);
00219 }
00220 return path;
00221 }
00222
00223 static void w_long(long, struct dump_arg*);
00224 static void w_encoding(VALUE obj, long num, struct dump_call_arg *arg);
00225
00226 static void
00227 w_nbyte(const char *s, long n, struct dump_arg *arg)
00228 {
00229 VALUE buf = arg->str;
00230 rb_str_buf_cat(buf, s, n);
00231 RBASIC(buf)->flags |= arg->infection;
00232 if (arg->dest && RSTRING_LEN(buf) >= BUFSIZ) {
00233 rb_io_write(arg->dest, buf);
00234 rb_str_resize(buf, 0);
00235 }
00236 }
00237
00238 static void
00239 w_byte(char c, struct dump_arg *arg)
00240 {
00241 w_nbyte(&c, 1, arg);
00242 }
00243
00244 static void
00245 w_bytes(const char *s, long n, struct dump_arg *arg)
00246 {
00247 w_long(n, arg);
00248 w_nbyte(s, n, arg);
00249 }
00250
00251 #define w_cstr(s, arg) w_bytes((s), strlen(s), (arg))
00252
00253 static void
00254 w_short(int x, struct dump_arg *arg)
00255 {
00256 w_byte((char)((x >> 0) & 0xff), arg);
00257 w_byte((char)((x >> 8) & 0xff), arg);
00258 }
00259
00260 static void
00261 w_long(long x, struct dump_arg *arg)
00262 {
00263 char buf[sizeof(long)+1];
00264 int i, len = 0;
00265
00266 #if SIZEOF_LONG > 4
00267 if (!(RSHIFT(x, 31) == 0 || RSHIFT(x, 31) == -1)) {
00268
00269 rb_raise(rb_eTypeError, "long too big to dump");
00270 }
00271 #endif
00272
00273 if (x == 0) {
00274 w_byte(0, arg);
00275 return;
00276 }
00277 if (0 < x && x < 123) {
00278 w_byte((char)(x + 5), arg);
00279 return;
00280 }
00281 if (-124 < x && x < 0) {
00282 w_byte((char)((x - 5)&0xff), arg);
00283 return;
00284 }
00285 for (i=1;i<(int)sizeof(long)+1;i++) {
00286 buf[i] = (char)(x & 0xff);
00287 x = RSHIFT(x,8);
00288 if (x == 0) {
00289 buf[0] = i;
00290 break;
00291 }
00292 if (x == -1) {
00293 buf[0] = -i;
00294 break;
00295 }
00296 }
00297 len = i;
00298 for (i=0;i<=len;i++) {
00299 w_byte(buf[i], arg);
00300 }
00301 }
00302
00303 #ifdef DBL_MANT_DIG
00304 #define DECIMAL_MANT (53-16)
00305
00306 #if DBL_MANT_DIG > 32
00307 #define MANT_BITS 32
00308 #elif DBL_MANT_DIG > 24
00309 #define MANT_BITS 24
00310 #elif DBL_MANT_DIG > 16
00311 #define MANT_BITS 16
00312 #else
00313 #define MANT_BITS 8
00314 #endif
00315
00316 static double
00317 load_mantissa(double d, const char *buf, long len)
00318 {
00319 if (!len) return d;
00320 if (--len > 0 && !*buf++) {
00321 int e, s = d < 0, dig = 0;
00322 unsigned long m;
00323
00324 modf(ldexp(frexp(fabs(d), &e), DECIMAL_MANT), &d);
00325 do {
00326 m = 0;
00327 switch (len) {
00328 default: m = *buf++ & 0xff;
00329 #if MANT_BITS > 24
00330 case 3: m = (m << 8) | (*buf++ & 0xff);
00331 #endif
00332 #if MANT_BITS > 16
00333 case 2: m = (m << 8) | (*buf++ & 0xff);
00334 #endif
00335 #if MANT_BITS > 8
00336 case 1: m = (m << 8) | (*buf++ & 0xff);
00337 #endif
00338 }
00339 dig -= len < MANT_BITS / 8 ? 8 * (unsigned)len : MANT_BITS;
00340 d += ldexp((double)m, dig);
00341 } while ((len -= MANT_BITS / 8) > 0);
00342 d = ldexp(d, e - DECIMAL_MANT);
00343 if (s) d = -d;
00344 }
00345 return d;
00346 }
00347 #else
00348 #define load_mantissa(d, buf, len) (d)
00349 #endif
00350
00351 #ifdef DBL_DIG
00352 #define FLOAT_DIG (DBL_DIG+2)
00353 #else
00354 #define FLOAT_DIG 17
00355 #endif
00356
00357 static void
00358 w_float(double d, struct dump_arg *arg)
00359 {
00360 char *ruby_dtoa(double d_, int mode, int ndigits, int *decpt, int *sign, char **rve);
00361 char buf[FLOAT_DIG + (DECIMAL_MANT + 7) / 8 + 10];
00362
00363 if (isinf(d)) {
00364 if (d < 0) w_cstr("-inf", arg);
00365 else w_cstr("inf", arg);
00366 }
00367 else if (isnan(d)) {
00368 w_cstr("nan", arg);
00369 }
00370 else if (d == 0.0) {
00371 if (1.0/d < 0) w_cstr("-0", arg);
00372 else w_cstr("0", arg);
00373 }
00374 else {
00375 int decpt, sign, digs, len = 0;
00376 char *e, *p = ruby_dtoa(d, 0, 0, &decpt, &sign, &e);
00377 if (sign) buf[len++] = '-';
00378 digs = (int)(e - p);
00379 if (decpt < -3 || decpt > digs) {
00380 buf[len++] = p[0];
00381 if (--digs > 0) buf[len++] = '.';
00382 memcpy(buf + len, p + 1, digs);
00383 len += digs;
00384 len += snprintf(buf + len, sizeof(buf) - len, "e%d", decpt - 1);
00385 }
00386 else if (decpt > 0) {
00387 memcpy(buf + len, p, decpt);
00388 len += decpt;
00389 if ((digs -= decpt) > 0) {
00390 buf[len++] = '.';
00391 memcpy(buf + len, p + decpt, digs);
00392 len += digs;
00393 }
00394 }
00395 else {
00396 buf[len++] = '0';
00397 buf[len++] = '.';
00398 if (decpt) {
00399 memset(buf + len, '0', -decpt);
00400 len -= decpt;
00401 }
00402 memcpy(buf + len, p, digs);
00403 len += digs;
00404 }
00405 xfree(p);
00406 w_bytes(buf, len, arg);
00407 }
00408 }
00409
00410 static void
00411 w_symbol(ID id, struct dump_arg *arg)
00412 {
00413 VALUE sym;
00414 st_data_t num;
00415 int encidx = -1;
00416
00417 if (st_lookup(arg->symbols, id, &num)) {
00418 w_byte(TYPE_SYMLINK, arg);
00419 w_long((long)num, arg);
00420 }
00421 else {
00422 sym = rb_id2str(id);
00423 if (!sym) {
00424 rb_raise(rb_eTypeError, "can't dump anonymous ID %"PRIdVALUE, id);
00425 }
00426 encidx = rb_enc_get_index(sym);
00427 if (encidx == rb_usascii_encindex() ||
00428 rb_enc_str_coderange(sym) == ENC_CODERANGE_7BIT) {
00429 encidx = -1;
00430 }
00431 else {
00432 w_byte(TYPE_IVAR, arg);
00433 }
00434 w_byte(TYPE_SYMBOL, arg);
00435 w_bytes(RSTRING_PTR(sym), RSTRING_LEN(sym), arg);
00436 st_add_direct(arg->symbols, id, arg->symbols->num_entries);
00437 if (encidx != -1) {
00438 struct dump_call_arg c_arg;
00439 c_arg.limit = 1;
00440 c_arg.arg = arg;
00441 w_encoding(sym, 0, &c_arg);
00442 }
00443 }
00444 }
00445
00446 static void
00447 w_unique(VALUE s, struct dump_arg *arg)
00448 {
00449 must_not_be_anonymous("class", s);
00450 w_symbol(rb_intern_str(s), arg);
00451 }
00452
00453 static void w_object(VALUE,struct dump_arg*,int);
00454
00455 static int
00456 hash_each(VALUE key, VALUE value, struct dump_call_arg *arg)
00457 {
00458 w_object(key, arg->arg, arg->limit);
00459 w_object(value, arg->arg, arg->limit);
00460 return ST_CONTINUE;
00461 }
00462
00463 #define SINGLETON_DUMP_UNABLE_P(klass) \
00464 (RCLASS_M_TBL(klass)->num_entries || \
00465 (RCLASS_IV_TBL(klass) && RCLASS_IV_TBL(klass)->num_entries > 1))
00466
00467 static void
00468 w_extended(VALUE klass, struct dump_arg *arg, int check)
00469 {
00470 if (check && FL_TEST(klass, FL_SINGLETON)) {
00471 VALUE origin = RCLASS_ORIGIN(klass);
00472 if (SINGLETON_DUMP_UNABLE_P(klass) ||
00473 (origin != klass && SINGLETON_DUMP_UNABLE_P(origin))) {
00474 rb_raise(rb_eTypeError, "singleton can't be dumped");
00475 }
00476 klass = RCLASS_SUPER(klass);
00477 }
00478 while (BUILTIN_TYPE(klass) == T_ICLASS) {
00479 VALUE path = rb_class_name(RBASIC(klass)->klass);
00480 w_byte(TYPE_EXTENDED, arg);
00481 w_unique(path, arg);
00482 klass = RCLASS_SUPER(klass);
00483 }
00484 }
00485
00486 static void
00487 w_class(char type, VALUE obj, struct dump_arg *arg, int check)
00488 {
00489 VALUE path;
00490 st_data_t real_obj;
00491 VALUE klass;
00492
00493 if (st_lookup(arg->compat_tbl, (st_data_t)obj, &real_obj)) {
00494 obj = (VALUE)real_obj;
00495 }
00496 klass = CLASS_OF(obj);
00497 w_extended(klass, arg, check);
00498 w_byte(type, arg);
00499 path = class2path(rb_class_real(klass));
00500 w_unique(path, arg);
00501 }
00502
00503 static void
00504 w_uclass(VALUE obj, VALUE super, struct dump_arg *arg)
00505 {
00506 VALUE klass = CLASS_OF(obj);
00507
00508 w_extended(klass, arg, TRUE);
00509 klass = rb_class_real(klass);
00510 if (klass != super) {
00511 w_byte(TYPE_UCLASS, arg);
00512 w_unique(class2path(klass), arg);
00513 }
00514 }
00515
00516 static int
00517 w_obj_each(st_data_t key, st_data_t val, st_data_t a)
00518 {
00519 ID id = (ID)key;
00520 VALUE value = (VALUE)val;
00521 struct dump_call_arg *arg = (struct dump_call_arg *)a;
00522
00523 if (id == rb_id_encoding()) return ST_CONTINUE;
00524 if (id == rb_intern("E")) return ST_CONTINUE;
00525 w_symbol(id, arg->arg);
00526 w_object(value, arg->arg, arg->limit);
00527 return ST_CONTINUE;
00528 }
00529
00530 static void
00531 w_encoding(VALUE obj, long num, struct dump_call_arg *arg)
00532 {
00533 int encidx = rb_enc_get_index(obj);
00534 rb_encoding *enc = 0;
00535 st_data_t name;
00536
00537 if (encidx <= 0 || !(enc = rb_enc_from_index(encidx))) {
00538 w_long(num, arg->arg);
00539 return;
00540 }
00541 w_long(num + 1, arg->arg);
00542
00543
00544 if (encidx == rb_usascii_encindex()) {
00545 w_symbol(rb_intern("E"), arg->arg);
00546 w_object(Qfalse, arg->arg, arg->limit + 1);
00547 return;
00548 }
00549 else if (encidx == rb_utf8_encindex()) {
00550 w_symbol(rb_intern("E"), arg->arg);
00551 w_object(Qtrue, arg->arg, arg->limit + 1);
00552 return;
00553 }
00554
00555 w_symbol(rb_id_encoding(), arg->arg);
00556 do {
00557 if (!arg->arg->encodings)
00558 arg->arg->encodings = st_init_strcasetable();
00559 else if (st_lookup(arg->arg->encodings, (st_data_t)rb_enc_name(enc), &name))
00560 break;
00561 name = (st_data_t)rb_str_new2(rb_enc_name(enc));
00562 st_insert(arg->arg->encodings, (st_data_t)rb_enc_name(enc), name);
00563 } while (0);
00564 w_object(name, arg->arg, arg->limit + 1);
00565 }
00566
00567 static void
00568 w_ivar(VALUE obj, st_table *tbl, struct dump_call_arg *arg)
00569 {
00570 long num = tbl ? tbl->num_entries : 0;
00571
00572 w_encoding(obj, num, arg);
00573 if (tbl) {
00574 st_foreach_safe(tbl, w_obj_each, (st_data_t)arg);
00575 }
00576 }
00577
00578 static void
00579 w_objivar(VALUE obj, struct dump_call_arg *arg)
00580 {
00581 VALUE *ptr;
00582 long i, len, num;
00583
00584 len = ROBJECT_NUMIV(obj);
00585 ptr = ROBJECT_IVPTR(obj);
00586 num = 0;
00587 for (i = 0; i < len; i++)
00588 if (ptr[i] != Qundef)
00589 num += 1;
00590
00591 w_encoding(obj, num, arg);
00592 if (num != 0) {
00593 rb_ivar_foreach(obj, w_obj_each, (st_data_t)arg);
00594 }
00595 }
00596
00597 static void
00598 w_object(VALUE obj, struct dump_arg *arg, int limit)
00599 {
00600 struct dump_call_arg c_arg;
00601 st_table *ivtbl = 0;
00602 st_data_t num;
00603 int hasiv = 0;
00604 #define has_ivars(obj, ivtbl) ((((ivtbl) = rb_generic_ivar_table(obj)) != 0) || \
00605 (!SPECIAL_CONST_P(obj) && !ENCODING_IS_ASCII8BIT(obj)))
00606
00607 if (limit == 0) {
00608 rb_raise(rb_eArgError, "exceed depth limit");
00609 }
00610
00611 limit--;
00612 c_arg.limit = limit;
00613 c_arg.arg = arg;
00614
00615 if (st_lookup(arg->data, obj, &num)) {
00616 w_byte(TYPE_LINK, arg);
00617 w_long((long)num, arg);
00618 return;
00619 }
00620
00621 if (obj == Qnil) {
00622 w_byte(TYPE_NIL, arg);
00623 }
00624 else if (obj == Qtrue) {
00625 w_byte(TYPE_TRUE, arg);
00626 }
00627 else if (obj == Qfalse) {
00628 w_byte(TYPE_FALSE, arg);
00629 }
00630 else if (FIXNUM_P(obj)) {
00631 #if SIZEOF_LONG <= 4
00632 w_byte(TYPE_FIXNUM, arg);
00633 w_long(FIX2INT(obj), arg);
00634 #else
00635 if (RSHIFT((long)obj, 31) == 0 || RSHIFT((long)obj, 31) == -1) {
00636 w_byte(TYPE_FIXNUM, arg);
00637 w_long(FIX2LONG(obj), arg);
00638 }
00639 else {
00640 w_object(rb_int2big(FIX2LONG(obj)), arg, limit);
00641 }
00642 #endif
00643 }
00644 else if (SYMBOL_P(obj)) {
00645 w_symbol(SYM2ID(obj), arg);
00646 }
00647 else if (FLONUM_P(obj)) {
00648 st_add_direct(arg->data, obj, arg->data->num_entries);
00649 w_byte(TYPE_FLOAT, arg);
00650 w_float(RFLOAT_VALUE(obj), arg);
00651 }
00652 else {
00653 VALUE v;
00654
00655 if (!RBASIC_CLASS(obj)) {
00656 rb_raise(rb_eTypeError, "can't dump internal %s",
00657 rb_builtin_type_name(BUILTIN_TYPE(obj)));
00658 }
00659
00660 arg->infection |= (int)FL_TEST(obj, MARSHAL_INFECTION);
00661
00662 if (rb_obj_respond_to(obj, s_mdump, TRUE)) {
00663 st_add_direct(arg->data, obj, arg->data->num_entries);
00664
00665 v = rb_funcall2(obj, s_mdump, 0, 0);
00666 check_dump_arg(arg, s_mdump);
00667 w_class(TYPE_USRMARSHAL, obj, arg, FALSE);
00668 w_object(v, arg, limit);
00669 return;
00670 }
00671 if (rb_obj_respond_to(obj, s_dump, TRUE)) {
00672 st_table *ivtbl2 = 0;
00673 int hasiv2;
00674
00675 v = INT2NUM(limit);
00676 v = rb_funcall2(obj, s_dump, 1, &v);
00677 check_dump_arg(arg, s_dump);
00678 if (!RB_TYPE_P(v, T_STRING)) {
00679 rb_raise(rb_eTypeError, "_dump() must return string");
00680 }
00681 hasiv = has_ivars(obj, ivtbl);
00682 if (hasiv) w_byte(TYPE_IVAR, arg);
00683 if ((hasiv2 = has_ivars(v, ivtbl2)) != 0 && !hasiv) {
00684 w_byte(TYPE_IVAR, arg);
00685 }
00686 w_class(TYPE_USERDEF, obj, arg, FALSE);
00687 w_bytes(RSTRING_PTR(v), RSTRING_LEN(v), arg);
00688 if (hasiv2) {
00689 w_ivar(v, ivtbl2, &c_arg);
00690 }
00691 else if (hasiv) {
00692 w_ivar(obj, ivtbl, &c_arg);
00693 }
00694 st_add_direct(arg->data, obj, arg->data->num_entries);
00695 return;
00696 }
00697
00698 st_add_direct(arg->data, obj, arg->data->num_entries);
00699
00700 hasiv = has_ivars(obj, ivtbl);
00701 {
00702 st_data_t compat_data;
00703 rb_alloc_func_t allocator = rb_get_alloc_func(RBASIC(obj)->klass);
00704 if (st_lookup(compat_allocator_tbl,
00705 (st_data_t)allocator,
00706 &compat_data)) {
00707 marshal_compat_t *compat = (marshal_compat_t*)compat_data;
00708 VALUE real_obj = obj;
00709 obj = compat->dumper(real_obj);
00710 st_insert(arg->compat_tbl, (st_data_t)obj, (st_data_t)real_obj);
00711 if (obj != real_obj && !ivtbl) hasiv = 0;
00712 }
00713 }
00714 if (hasiv) w_byte(TYPE_IVAR, arg);
00715
00716 switch (BUILTIN_TYPE(obj)) {
00717 case T_CLASS:
00718 if (FL_TEST(obj, FL_SINGLETON)) {
00719 rb_raise(rb_eTypeError, "singleton class can't be dumped");
00720 }
00721 w_byte(TYPE_CLASS, arg);
00722 {
00723 VALUE path = class2path(obj);
00724 w_bytes(RSTRING_PTR(path), RSTRING_LEN(path), arg);
00725 RB_GC_GUARD(path);
00726 }
00727 break;
00728
00729 case T_MODULE:
00730 w_byte(TYPE_MODULE, arg);
00731 {
00732 VALUE path = class2path(obj);
00733 w_bytes(RSTRING_PTR(path), RSTRING_LEN(path), arg);
00734 RB_GC_GUARD(path);
00735 }
00736 break;
00737
00738 case T_FLOAT:
00739 w_byte(TYPE_FLOAT, arg);
00740 w_float(RFLOAT_VALUE(obj), arg);
00741 break;
00742
00743 case T_BIGNUM:
00744 w_byte(TYPE_BIGNUM, arg);
00745 {
00746 char sign = RBIGNUM_SIGN(obj) ? '+' : '-';
00747 long len = RBIGNUM_LEN(obj);
00748 BDIGIT *d = RBIGNUM_DIGITS(obj);
00749
00750 w_byte(sign, arg);
00751 w_long(SHORTLEN(len), arg);
00752 while (len--) {
00753 #if SIZEOF_BDIGITS > SIZEOF_SHORT
00754 BDIGIT num = *d;
00755 int i;
00756
00757 for (i=0; i<SIZEOF_BDIGITS; i+=SIZEOF_SHORT) {
00758 w_short(num & SHORTMASK, arg);
00759 num = SHORTDN(num);
00760 if (len == 0 && num == 0) break;
00761 }
00762 #else
00763 w_short(*d, arg);
00764 #endif
00765 d++;
00766 }
00767 }
00768 break;
00769
00770 case T_STRING:
00771 w_uclass(obj, rb_cString, arg);
00772 w_byte(TYPE_STRING, arg);
00773 w_bytes(RSTRING_PTR(obj), RSTRING_LEN(obj), arg);
00774 break;
00775
00776 case T_REGEXP:
00777 w_uclass(obj, rb_cRegexp, arg);
00778 w_byte(TYPE_REGEXP, arg);
00779 {
00780 int opts = rb_reg_options(obj);
00781 w_bytes(RREGEXP_SRC_PTR(obj), RREGEXP_SRC_LEN(obj), arg);
00782 w_byte((char)opts, arg);
00783 }
00784 break;
00785
00786 case T_ARRAY:
00787 w_uclass(obj, rb_cArray, arg);
00788 w_byte(TYPE_ARRAY, arg);
00789 {
00790 long i, len = RARRAY_LEN(obj);
00791
00792 w_long(len, arg);
00793 for (i=0; i<RARRAY_LEN(obj); i++) {
00794 w_object(RARRAY_AREF(obj, i), arg, limit);
00795 if (len != RARRAY_LEN(obj)) {
00796 rb_raise(rb_eRuntimeError, "array modified during dump");
00797 }
00798 }
00799 }
00800 break;
00801
00802 case T_HASH:
00803 w_uclass(obj, rb_cHash, arg);
00804 if (NIL_P(RHASH_IFNONE(obj))) {
00805 w_byte(TYPE_HASH, arg);
00806 }
00807 else if (FL_TEST(obj, HASH_PROC_DEFAULT)) {
00808 rb_raise(rb_eTypeError, "can't dump hash with default proc");
00809 }
00810 else {
00811 w_byte(TYPE_HASH_DEF, arg);
00812 }
00813 w_long(RHASH_SIZE(obj), arg);
00814 rb_hash_foreach(obj, hash_each, (st_data_t)&c_arg);
00815 if (!NIL_P(RHASH_IFNONE(obj))) {
00816 w_object(RHASH_IFNONE(obj), arg, limit);
00817 }
00818 break;
00819
00820 case T_STRUCT:
00821 w_class(TYPE_STRUCT, obj, arg, TRUE);
00822 {
00823 long len = RSTRUCT_LEN(obj);
00824 VALUE mem;
00825 long i;
00826
00827 w_long(len, arg);
00828 mem = rb_struct_members(obj);
00829 for (i=0; i<len; i++) {
00830 w_symbol(SYM2ID(RARRAY_AREF(mem, i)), arg);
00831 w_object(RSTRUCT_GET(obj, i), arg, limit);
00832 }
00833 }
00834 break;
00835
00836 case T_OBJECT:
00837 w_class(TYPE_OBJECT, obj, arg, TRUE);
00838 w_objivar(obj, &c_arg);
00839 break;
00840
00841 case T_DATA:
00842 {
00843 VALUE v;
00844
00845 if (!rb_obj_respond_to(obj, s_dump_data, TRUE)) {
00846 rb_raise(rb_eTypeError,
00847 "no _dump_data is defined for class %s",
00848 rb_obj_classname(obj));
00849 }
00850 v = rb_funcall2(obj, s_dump_data, 0, 0);
00851 check_dump_arg(arg, s_dump_data);
00852 w_class(TYPE_DATA, obj, arg, TRUE);
00853 w_object(v, arg, limit);
00854 }
00855 break;
00856
00857 default:
00858 rb_raise(rb_eTypeError, "can't dump %s",
00859 rb_obj_classname(obj));
00860 break;
00861 }
00862 RB_GC_GUARD(obj);
00863 }
00864 if (hasiv) {
00865 w_ivar(obj, ivtbl, &c_arg);
00866 }
00867 }
00868
00869 static void
00870 clear_dump_arg(struct dump_arg *arg)
00871 {
00872 if (!arg->symbols) return;
00873 st_free_table(arg->symbols);
00874 arg->symbols = 0;
00875 st_free_table(arg->data);
00876 arg->data = 0;
00877 st_free_table(arg->compat_tbl);
00878 arg->compat_tbl = 0;
00879 if (arg->encodings) {
00880 st_free_table(arg->encodings);
00881 arg->encodings = 0;
00882 }
00883 }
00884
00885 NORETURN(static inline void io_needed(void));
00886 static inline void
00887 io_needed(void)
00888 {
00889 rb_raise(rb_eTypeError, "instance of IO needed");
00890 }
00891
00892
00893
00894
00895
00896
00897
00898
00899
00900
00901
00902
00903
00904
00905
00906
00907
00908
00909
00910
00911
00912
00913
00914
00915
00916
00917
00918
00919
00920
00921
00922
00923
00924
00925
00926 static VALUE
00927 marshal_dump(int argc, VALUE *argv)
00928 {
00929 VALUE obj, port, a1, a2;
00930 int limit = -1;
00931 struct dump_arg *arg;
00932 volatile VALUE wrapper;
00933
00934 port = Qnil;
00935 rb_scan_args(argc, argv, "12", &obj, &a1, &a2);
00936 if (argc == 3) {
00937 if (!NIL_P(a2)) limit = NUM2INT(a2);
00938 if (NIL_P(a1)) io_needed();
00939 port = a1;
00940 }
00941 else if (argc == 2) {
00942 if (FIXNUM_P(a1)) limit = FIX2INT(a1);
00943 else if (NIL_P(a1)) io_needed();
00944 else port = a1;
00945 }
00946 RB_GC_GUARD(wrapper) = TypedData_Make_Struct(rb_cData, struct dump_arg, &dump_arg_data, arg);
00947 arg->dest = 0;
00948 arg->symbols = st_init_numtable();
00949 arg->data = st_init_numtable();
00950 arg->infection = 0;
00951 arg->compat_tbl = st_init_numtable();
00952 arg->encodings = 0;
00953 arg->str = rb_str_buf_new(0);
00954 if (!NIL_P(port)) {
00955 if (!rb_respond_to(port, s_write)) {
00956 io_needed();
00957 }
00958 arg->dest = port;
00959 if (rb_check_funcall(port, s_binmode, 0, 0) != Qundef) {
00960 check_dump_arg(arg, s_binmode);
00961 }
00962 }
00963 else {
00964 port = arg->str;
00965 }
00966
00967 w_byte(MARSHAL_MAJOR, arg);
00968 w_byte(MARSHAL_MINOR, arg);
00969
00970 w_object(obj, arg, limit);
00971 if (arg->dest) {
00972 rb_io_write(arg->dest, arg->str);
00973 rb_str_resize(arg->str, 0);
00974 }
00975 clear_dump_arg(arg);
00976 RB_GC_GUARD(wrapper);
00977
00978 return port;
00979 }
00980
00981 struct load_arg {
00982 VALUE src;
00983 char *buf;
00984 long buflen;
00985 long readable;
00986 long offset;
00987 st_table *symbols;
00988 st_table *data;
00989 VALUE proc;
00990 st_table *compat_tbl;
00991 int infection;
00992 };
00993
00994 static void
00995 check_load_arg(struct load_arg *arg, ID sym)
00996 {
00997 if (!arg->symbols) {
00998 rb_raise(rb_eRuntimeError, "Marshal.load reentered at %s",
00999 rb_id2name(sym));
01000 }
01001 }
01002
01003 static void clear_load_arg(struct load_arg *arg);
01004
01005 static void
01006 mark_load_arg(void *ptr)
01007 {
01008 struct load_arg *p = ptr;
01009 if (!p->symbols)
01010 return;
01011 rb_mark_tbl(p->data);
01012 rb_mark_hash(p->compat_tbl);
01013 }
01014
01015 static void
01016 free_load_arg(void *ptr)
01017 {
01018 clear_load_arg(ptr);
01019 xfree(ptr);
01020 }
01021
01022 static size_t
01023 memsize_load_arg(const void *ptr)
01024 {
01025 return ptr ? sizeof(struct load_arg) : 0;
01026 }
01027
01028 static const rb_data_type_t load_arg_data = {
01029 "load_arg",
01030 {mark_load_arg, free_load_arg, memsize_load_arg,},
01031 NULL, NULL, RUBY_TYPED_FREE_IMMEDIATELY
01032 };
01033
01034 #define r_entry(v, arg) r_entry0((v), (arg)->data->num_entries, (arg))
01035 static VALUE r_entry0(VALUE v, st_index_t num, struct load_arg *arg);
01036 static VALUE r_object(struct load_arg *arg);
01037 static ID r_symbol(struct load_arg *arg);
01038 static VALUE path2class(VALUE path);
01039
01040 NORETURN(static void too_short(void));
01041 static void
01042 too_short(void)
01043 {
01044 rb_raise(rb_eArgError, "marshal data too short");
01045 }
01046
01047 static st_index_t
01048 r_prepare(struct load_arg *arg)
01049 {
01050 st_index_t idx = arg->data->num_entries;
01051
01052 st_insert(arg->data, (st_data_t)idx, (st_data_t)Qundef);
01053 return idx;
01054 }
01055
01056 static unsigned char
01057 r_byte1_buffered(struct load_arg *arg)
01058 {
01059 if (arg->buflen == 0) {
01060 long readable = arg->readable < BUFSIZ ? arg->readable : BUFSIZ;
01061 VALUE str, n = LONG2NUM(readable);
01062
01063 str = rb_funcall2(arg->src, s_read, 1, &n);
01064
01065 check_load_arg(arg, s_read);
01066 if (NIL_P(str)) too_short();
01067 StringValue(str);
01068 arg->infection |= (int)FL_TEST(str, MARSHAL_INFECTION);
01069 memcpy(arg->buf, RSTRING_PTR(str), RSTRING_LEN(str));
01070 arg->offset = 0;
01071 arg->buflen = RSTRING_LEN(str);
01072 }
01073 arg->buflen--;
01074 return arg->buf[arg->offset++];
01075 }
01076
01077 static int
01078 r_byte(struct load_arg *arg)
01079 {
01080 int c;
01081
01082 if (RB_TYPE_P(arg->src, T_STRING)) {
01083 if (RSTRING_LEN(arg->src) > arg->offset) {
01084 c = (unsigned char)RSTRING_PTR(arg->src)[arg->offset++];
01085 }
01086 else {
01087 too_short();
01088 }
01089 }
01090 else {
01091 if (arg->readable >0 || arg->buflen > 0) {
01092 c = r_byte1_buffered(arg);
01093 }
01094 else {
01095 VALUE v = rb_funcall2(arg->src, s_getbyte, 0, 0);
01096 check_load_arg(arg, s_getbyte);
01097 if (NIL_P(v)) rb_eof_error();
01098 c = (unsigned char)NUM2CHR(v);
01099 }
01100 }
01101 return c;
01102 }
01103
01104 static void
01105 long_toobig(int size)
01106 {
01107 rb_raise(rb_eTypeError, "long too big for this architecture (size "
01108 STRINGIZE(SIZEOF_LONG)", given %d)", size);
01109 }
01110
01111 #undef SIGN_EXTEND_CHAR
01112 #if __STDC__
01113 # define SIGN_EXTEND_CHAR(c) ((signed char)(c))
01114 #else
01115
01116 # define SIGN_EXTEND_CHAR(c) ((((unsigned char)(c)) ^ 128) - 128)
01117 #endif
01118
01119 static long
01120 r_long(struct load_arg *arg)
01121 {
01122 register long x;
01123 int c = SIGN_EXTEND_CHAR(r_byte(arg));
01124 long i;
01125
01126 if (c == 0) return 0;
01127 if (c > 0) {
01128 if (4 < c && c < 128) {
01129 return c - 5;
01130 }
01131 if (c > (int)sizeof(long)) long_toobig(c);
01132 x = 0;
01133 for (i=0;i<c;i++) {
01134 x |= (long)r_byte(arg) << (8*i);
01135 }
01136 }
01137 else {
01138 if (-129 < c && c < -4) {
01139 return c + 5;
01140 }
01141 c = -c;
01142 if (c > (int)sizeof(long)) long_toobig(c);
01143 x = -1;
01144 for (i=0;i<c;i++) {
01145 x &= ~((long)0xff << (8*i));
01146 x |= (long)r_byte(arg) << (8*i);
01147 }
01148 }
01149 return x;
01150 }
01151
01152 static VALUE
01153 r_bytes1(long len, struct load_arg *arg)
01154 {
01155 VALUE str, n = LONG2NUM(len);
01156
01157 str = rb_funcall2(arg->src, s_read, 1, &n);
01158 check_load_arg(arg, s_read);
01159 if (NIL_P(str)) too_short();
01160 StringValue(str);
01161 if (RSTRING_LEN(str) != len) too_short();
01162 arg->infection |= (int)FL_TEST(str, MARSHAL_INFECTION);
01163
01164 return str;
01165 }
01166
01167 static VALUE
01168 r_bytes1_buffered(long len, struct load_arg *arg)
01169 {
01170 VALUE str;
01171
01172 if (len <= arg->buflen) {
01173 str = rb_str_new(arg->buf+arg->offset, len);
01174 arg->offset += len;
01175 arg->buflen -= len;
01176 }
01177 else {
01178 long buflen = arg->buflen;
01179 long readable = arg->readable + 1;
01180 long tmp_len, read_len, need_len = len - buflen;
01181 VALUE tmp, n;
01182
01183 readable = readable < BUFSIZ ? readable : BUFSIZ;
01184 read_len = need_len > readable ? need_len : readable;
01185 n = LONG2NUM(read_len);
01186 tmp = rb_funcall2(arg->src, s_read, 1, &n);
01187
01188 check_load_arg(arg, s_read);
01189 if (NIL_P(tmp)) too_short();
01190 StringValue(tmp);
01191
01192 tmp_len = RSTRING_LEN(tmp);
01193
01194 if (tmp_len < need_len) too_short();
01195 arg->infection |= (int)FL_TEST(tmp, MARSHAL_INFECTION);
01196
01197 str = rb_str_new(arg->buf+arg->offset, buflen);
01198 rb_str_cat(str, RSTRING_PTR(tmp), need_len);
01199
01200 if (tmp_len > need_len) {
01201 buflen = tmp_len - need_len;
01202 memcpy(arg->buf, RSTRING_PTR(tmp)+need_len, buflen);
01203 arg->buflen = buflen;
01204 }
01205 else {
01206 arg->buflen = 0;
01207 }
01208 arg->offset = 0;
01209 }
01210
01211 return str;
01212 }
01213
01214 #define r_bytes(arg) r_bytes0(r_long(arg), (arg))
01215
01216 static VALUE
01217 r_bytes0(long len, struct load_arg *arg)
01218 {
01219 VALUE str;
01220
01221 if (len == 0) return rb_str_new(0, 0);
01222 if (RB_TYPE_P(arg->src, T_STRING)) {
01223 if (RSTRING_LEN(arg->src) - arg->offset >= len) {
01224 str = rb_str_new(RSTRING_PTR(arg->src)+arg->offset, len);
01225 arg->offset += len;
01226 }
01227 else {
01228 too_short();
01229 }
01230 }
01231 else {
01232 if (arg->readable > 0 || arg->buflen > 0) {
01233 str = r_bytes1_buffered(len, arg);
01234 }
01235 else {
01236 str = r_bytes1(len, arg);
01237 }
01238 }
01239 return str;
01240 }
01241
01242 static int
01243 id2encidx(ID id, VALUE val)
01244 {
01245 if (id == rb_id_encoding()) {
01246 int idx = rb_enc_find_index(StringValueCStr(val));
01247 return idx;
01248 }
01249 else if (id == rb_intern("E")) {
01250 if (val == Qfalse) return rb_usascii_encindex();
01251 else if (val == Qtrue) return rb_utf8_encindex();
01252
01253 }
01254 return -1;
01255 }
01256
01257 static ID
01258 r_symlink(struct load_arg *arg)
01259 {
01260 st_data_t id;
01261 long num = r_long(arg);
01262
01263 if (!st_lookup(arg->symbols, num, &id)) {
01264 rb_raise(rb_eArgError, "bad symbol");
01265 }
01266 return (ID)id;
01267 }
01268
01269 static ID
01270 r_symreal(struct load_arg *arg, int ivar)
01271 {
01272 VALUE s = r_bytes(arg);
01273 ID id;
01274 int idx = -1;
01275 st_index_t n = arg->symbols->num_entries;
01276
01277 if (rb_enc_str_asciionly_p(s)) rb_enc_associate_index(s, ENCINDEX_US_ASCII);
01278 id = rb_intern_str(s);
01279 st_insert(arg->symbols, (st_data_t)n, (st_data_t)id);
01280 if (ivar) {
01281 long num = r_long(arg);
01282 while (num-- > 0) {
01283 id = r_symbol(arg);
01284 idx = id2encidx(id, r_object(arg));
01285 }
01286 }
01287 if (idx > 0) rb_enc_associate_index(s, idx);
01288 id = rb_intern_str(s);
01289
01290 return id;
01291 }
01292
01293 static ID
01294 r_symbol(struct load_arg *arg)
01295 {
01296 int type, ivar = 0;
01297
01298 again:
01299 switch ((type = r_byte(arg))) {
01300 default:
01301 rb_raise(rb_eArgError, "dump format error for symbol(0x%x)", type);
01302 case TYPE_IVAR:
01303 ivar = 1;
01304 goto again;
01305 case TYPE_SYMBOL:
01306 return r_symreal(arg, ivar);
01307 case TYPE_SYMLINK:
01308 if (ivar) {
01309 rb_raise(rb_eArgError, "dump format error (symlink with encoding)");
01310 }
01311 return r_symlink(arg);
01312 }
01313 }
01314
01315 static VALUE
01316 r_unique(struct load_arg *arg)
01317 {
01318 return rb_id2str(r_symbol(arg));
01319 }
01320
01321 static VALUE
01322 r_string(struct load_arg *arg)
01323 {
01324 return r_bytes(arg);
01325 }
01326
01327 static VALUE
01328 r_entry0(VALUE v, st_index_t num, struct load_arg *arg)
01329 {
01330 st_data_t real_obj = (VALUE)Qundef;
01331 if (st_lookup(arg->compat_tbl, v, &real_obj)) {
01332 st_insert(arg->data, num, (st_data_t)real_obj);
01333 }
01334 else {
01335 st_insert(arg->data, num, (st_data_t)v);
01336 }
01337 if (arg->infection &&
01338 !RB_TYPE_P(v, T_CLASS) && !RB_TYPE_P(v, T_MODULE)) {
01339 FL_SET(v, arg->infection);
01340 if ((VALUE)real_obj != Qundef)
01341 FL_SET((VALUE)real_obj, arg->infection);
01342 }
01343 return v;
01344 }
01345
01346 static VALUE
01347 r_fixup_compat(VALUE v, struct load_arg *arg)
01348 {
01349 st_data_t data;
01350 if (st_lookup(arg->compat_tbl, v, &data)) {
01351 VALUE real_obj = (VALUE)data;
01352 rb_alloc_func_t allocator = rb_get_alloc_func(CLASS_OF(real_obj));
01353 st_data_t key = v;
01354 if (st_lookup(compat_allocator_tbl, (st_data_t)allocator, &data)) {
01355 marshal_compat_t *compat = (marshal_compat_t*)data;
01356 compat->loader(real_obj, v);
01357 }
01358 st_delete(arg->compat_tbl, &key, 0);
01359 v = real_obj;
01360 }
01361 return v;
01362 }
01363
01364 static VALUE
01365 r_post_proc(VALUE v, struct load_arg *arg)
01366 {
01367 if (arg->proc) {
01368 v = rb_funcall(arg->proc, s_call, 1, v);
01369 check_load_arg(arg, s_call);
01370 }
01371 return v;
01372 }
01373
01374 static VALUE
01375 r_leave(VALUE v, struct load_arg *arg)
01376 {
01377 v = r_fixup_compat(v, arg);
01378 v = r_post_proc(v, arg);
01379 return v;
01380 }
01381
01382 static int
01383 copy_ivar_i(st_data_t key, st_data_t val, st_data_t arg)
01384 {
01385 VALUE obj = (VALUE)arg, value = (VALUE)val;
01386 ID vid = (ID)key;
01387
01388 if (!rb_ivar_defined(obj, vid))
01389 rb_ivar_set(obj, vid, value);
01390 return ST_CONTINUE;
01391 }
01392
01393 static VALUE
01394 r_copy_ivar(VALUE v, VALUE data)
01395 {
01396 rb_ivar_foreach(data, copy_ivar_i, (st_data_t)v);
01397 return v;
01398 }
01399
01400 static void
01401 r_ivar(VALUE obj, int *has_encoding, struct load_arg *arg)
01402 {
01403 long len;
01404
01405 len = r_long(arg);
01406 if (len > 0) {
01407 do {
01408 ID id = r_symbol(arg);
01409 VALUE val = r_object(arg);
01410 int idx = id2encidx(id, val);
01411 if (idx >= 0) {
01412 rb_enc_associate_index(obj, idx);
01413 if (has_encoding) *has_encoding = TRUE;
01414 }
01415 else {
01416 rb_ivar_set(obj, id, val);
01417 }
01418 } while (--len > 0);
01419 }
01420 }
01421
01422 static VALUE
01423 path2class(VALUE path)
01424 {
01425 VALUE v = rb_path_to_class(path);
01426
01427 if (!RB_TYPE_P(v, T_CLASS)) {
01428 rb_raise(rb_eArgError, "%"PRIsVALUE" does not refer to class", path);
01429 }
01430 return v;
01431 }
01432
01433 #define path2module(path) must_be_module(rb_path_to_class(path), path)
01434
01435 static VALUE
01436 must_be_module(VALUE v, VALUE path)
01437 {
01438 if (!RB_TYPE_P(v, T_MODULE)) {
01439 rb_raise(rb_eArgError, "%"PRIsVALUE" does not refer to module", path);
01440 }
01441 return v;
01442 }
01443
01444 static VALUE
01445 obj_alloc_by_klass(VALUE klass, struct load_arg *arg, VALUE *oldclass)
01446 {
01447 st_data_t data;
01448 rb_alloc_func_t allocator;
01449
01450 allocator = rb_get_alloc_func(klass);
01451 if (st_lookup(compat_allocator_tbl, (st_data_t)allocator, &data)) {
01452 marshal_compat_t *compat = (marshal_compat_t*)data;
01453 VALUE real_obj = rb_obj_alloc(klass);
01454 VALUE obj = rb_obj_alloc(compat->oldclass);
01455 if (oldclass) *oldclass = compat->oldclass;
01456 st_insert(arg->compat_tbl, (st_data_t)obj, (st_data_t)real_obj);
01457 return obj;
01458 }
01459
01460 return rb_obj_alloc(klass);
01461 }
01462
01463 static VALUE
01464 obj_alloc_by_path(VALUE path, struct load_arg *arg)
01465 {
01466 return obj_alloc_by_klass(path2class(path), arg, 0);
01467 }
01468
01469 static VALUE
01470 append_extmod(VALUE obj, VALUE extmod)
01471 {
01472 long i = RARRAY_LEN(extmod);
01473 while (i > 0) {
01474 VALUE m = RARRAY_AREF(extmod, --i);
01475 rb_extend_object(obj, m);
01476 }
01477 return obj;
01478 }
01479
01480 #define prohibit_ivar(type, str) do { \
01481 if (!ivp || !*ivp) break; \
01482 rb_raise(rb_eTypeError, \
01483 "can't override instance variable of "type" `%"PRIsVALUE"'", \
01484 (str)); \
01485 } while (0)
01486
01487 static VALUE
01488 r_object0(struct load_arg *arg, int *ivp, VALUE extmod)
01489 {
01490 VALUE v = Qnil;
01491 int type = r_byte(arg);
01492 long id;
01493 st_data_t link;
01494
01495 switch (type) {
01496 case TYPE_LINK:
01497 id = r_long(arg);
01498 if (!st_lookup(arg->data, (st_data_t)id, &link)) {
01499 rb_raise(rb_eArgError, "dump format error (unlinked)");
01500 }
01501 v = (VALUE)link;
01502 v = r_post_proc(v, arg);
01503 break;
01504
01505 case TYPE_IVAR:
01506 {
01507 int ivar = TRUE;
01508
01509 v = r_object0(arg, &ivar, extmod);
01510 if (ivar) r_ivar(v, NULL, arg);
01511 }
01512 break;
01513
01514 case TYPE_EXTENDED:
01515 {
01516 VALUE path = r_unique(arg);
01517 VALUE m = rb_path_to_class(path);
01518 if (NIL_P(extmod)) extmod = rb_ary_tmp_new(0);
01519
01520 if (RB_TYPE_P(m, T_CLASS)) {
01521 VALUE c;
01522
01523 v = r_object0(arg, 0, Qnil);
01524 c = CLASS_OF(v);
01525 if (c != m || FL_TEST(c, FL_SINGLETON)) {
01526 rb_raise(rb_eArgError,
01527 "prepended class %"PRIsVALUE" differs from class %"PRIsVALUE,
01528 path, rb_class_name(c));
01529 }
01530 c = rb_singleton_class(v);
01531 while (RARRAY_LEN(extmod) > 0) {
01532 m = rb_ary_pop(extmod);
01533 rb_prepend_module(c, m);
01534 }
01535 }
01536 else {
01537 must_be_module(m, path);
01538 rb_ary_push(extmod, m);
01539
01540 v = r_object0(arg, 0, extmod);
01541 while (RARRAY_LEN(extmod) > 0) {
01542 m = rb_ary_pop(extmod);
01543 rb_extend_object(v, m);
01544 }
01545 }
01546 }
01547 break;
01548
01549 case TYPE_UCLASS:
01550 {
01551 VALUE c = path2class(r_unique(arg));
01552
01553 if (FL_TEST(c, FL_SINGLETON)) {
01554 rb_raise(rb_eTypeError, "singleton can't be loaded");
01555 }
01556 v = r_object0(arg, 0, extmod);
01557 if (rb_special_const_p(v) || RB_TYPE_P(v, T_OBJECT) || RB_TYPE_P(v, T_CLASS)) {
01558 format_error:
01559 rb_raise(rb_eArgError, "dump format error (user class)");
01560 }
01561 if (RB_TYPE_P(v, T_MODULE) || !RTEST(rb_class_inherited_p(c, RBASIC(v)->klass))) {
01562 VALUE tmp = rb_obj_alloc(c);
01563
01564 if (TYPE(v) != TYPE(tmp)) goto format_error;
01565 }
01566 RBASIC_SET_CLASS(v, c);
01567 }
01568 break;
01569
01570 case TYPE_NIL:
01571 v = Qnil;
01572 v = r_leave(v, arg);
01573 break;
01574
01575 case TYPE_TRUE:
01576 v = Qtrue;
01577 v = r_leave(v, arg);
01578 break;
01579
01580 case TYPE_FALSE:
01581 v = Qfalse;
01582 v = r_leave(v, arg);
01583 break;
01584
01585 case TYPE_FIXNUM:
01586 {
01587 long i = r_long(arg);
01588 v = LONG2FIX(i);
01589 }
01590 v = r_leave(v, arg);
01591 break;
01592
01593 case TYPE_FLOAT:
01594 {
01595 double d;
01596 VALUE str = r_bytes(arg);
01597 const char *ptr = RSTRING_PTR(str);
01598
01599 if (strcmp(ptr, "nan") == 0) {
01600 d = NAN;
01601 }
01602 else if (strcmp(ptr, "inf") == 0) {
01603 d = INFINITY;
01604 }
01605 else if (strcmp(ptr, "-inf") == 0) {
01606 d = -INFINITY;
01607 }
01608 else {
01609 char *e;
01610 d = strtod(ptr, &e);
01611 d = load_mantissa(d, e, RSTRING_LEN(str) - (e - ptr));
01612 }
01613 v = DBL2NUM(d);
01614 v = r_entry(v, arg);
01615 v = r_leave(v, arg);
01616 }
01617 break;
01618
01619 case TYPE_BIGNUM:
01620 {
01621 long len;
01622 VALUE data;
01623 int sign;
01624
01625 sign = r_byte(arg);
01626 len = r_long(arg);
01627 data = r_bytes0(len * 2, arg);
01628 v = rb_integer_unpack(RSTRING_PTR(data), len, 2, 0,
01629 INTEGER_PACK_LITTLE_ENDIAN | (sign == '-' ? INTEGER_PACK_NEGATIVE : 0));
01630 rb_str_resize(data, 0L);
01631 v = r_entry(v, arg);
01632 v = r_leave(v, arg);
01633 }
01634 break;
01635
01636 case TYPE_STRING:
01637 v = r_entry(r_string(arg), arg);
01638 v = r_leave(v, arg);
01639 break;
01640
01641 case TYPE_REGEXP:
01642 {
01643 VALUE str = r_bytes(arg);
01644 int options = r_byte(arg);
01645 int has_encoding = FALSE;
01646 st_index_t idx = r_prepare(arg);
01647
01648 if (ivp) {
01649 r_ivar(str, &has_encoding, arg);
01650 *ivp = FALSE;
01651 }
01652 if (!has_encoding) {
01653
01654 char *ptr = RSTRING_PTR(str), *dst = ptr, *src = ptr;
01655 long len = RSTRING_LEN(str);
01656 long bs = 0;
01657 for (; len-- > 0; *dst++ = *src++) {
01658 switch (*src) {
01659 case '\\': bs++; break;
01660 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
01661 case 'm': case 'o': case 'p': case 'q': case 'u': case 'y':
01662 case 'E': case 'F': case 'H': case 'I': case 'J': case 'K':
01663 case 'L': case 'N': case 'O': case 'P': case 'Q': case 'R':
01664 case 'S': case 'T': case 'U': case 'V': case 'X': case 'Y':
01665 if (bs & 1) --dst;
01666 default: bs = 0; break;
01667 }
01668 }
01669 rb_str_set_len(str, dst - ptr);
01670 }
01671 v = r_entry0(rb_reg_new_str(str, options), idx, arg);
01672 v = r_leave(v, arg);
01673 }
01674 break;
01675
01676 case TYPE_ARRAY:
01677 {
01678 volatile long len = r_long(arg);
01679
01680 v = rb_ary_new2(len);
01681 v = r_entry(v, arg);
01682 arg->readable += len - 1;
01683 while (len--) {
01684 rb_ary_push(v, r_object(arg));
01685 arg->readable--;
01686 }
01687 v = r_leave(v, arg);
01688 arg->readable++;
01689 }
01690 break;
01691
01692 case TYPE_HASH:
01693 case TYPE_HASH_DEF:
01694 {
01695 long len = r_long(arg);
01696
01697 v = rb_hash_new();
01698 v = r_entry(v, arg);
01699 arg->readable += (len - 1) * 2;
01700 while (len--) {
01701 VALUE key = r_object(arg);
01702 VALUE value = r_object(arg);
01703 rb_hash_aset(v, key, value);
01704 arg->readable -= 2;
01705 }
01706 arg->readable += 2;
01707 if (type == TYPE_HASH_DEF) {
01708 RHASH_SET_IFNONE(v, r_object(arg));
01709 }
01710 v = r_leave(v, arg);
01711 }
01712 break;
01713
01714 case TYPE_STRUCT:
01715 {
01716 VALUE mem, values;
01717 volatile long i;
01718 ID slot;
01719 st_index_t idx = r_prepare(arg);
01720 VALUE klass = path2class(r_unique(arg));
01721 long len = r_long(arg);
01722
01723 v = rb_obj_alloc(klass);
01724 if (!RB_TYPE_P(v, T_STRUCT)) {
01725 rb_raise(rb_eTypeError, "class %s not a struct", rb_class2name(klass));
01726 }
01727 mem = rb_struct_s_members(klass);
01728 if (RARRAY_LEN(mem) != len) {
01729 rb_raise(rb_eTypeError, "struct %s not compatible (struct size differs)",
01730 rb_class2name(klass));
01731 }
01732
01733 arg->readable += (len - 1) * 2;
01734 v = r_entry0(v, idx, arg);
01735 values = rb_ary_new2(len);
01736 for (i=0; i<len; i++) {
01737 slot = r_symbol(arg);
01738
01739 if (RARRAY_AREF(mem, i) != ID2SYM(slot)) {
01740 rb_raise(rb_eTypeError, "struct %s not compatible (:%s for :%s)",
01741 rb_class2name(klass),
01742 rb_id2name(slot),
01743 rb_id2name(SYM2ID(RARRAY_AREF(mem, i))));
01744 }
01745 rb_ary_push(values, r_object(arg));
01746 arg->readable -= 2;
01747 }
01748 rb_struct_initialize(v, values);
01749 v = r_leave(v, arg);
01750 arg->readable += 2;
01751 }
01752 break;
01753
01754 case TYPE_USERDEF:
01755 {
01756 VALUE klass = path2class(r_unique(arg));
01757 VALUE data;
01758 st_data_t d;
01759
01760 if (!rb_obj_respond_to(klass, s_load, TRUE)) {
01761 rb_raise(rb_eTypeError, "class %s needs to have method `_load'",
01762 rb_class2name(klass));
01763 }
01764 data = r_string(arg);
01765 if (ivp) {
01766 r_ivar(data, NULL, arg);
01767 *ivp = FALSE;
01768 }
01769 v = rb_funcall2(klass, s_load, 1, &data);
01770 check_load_arg(arg, s_load);
01771 v = r_entry(v, arg);
01772 if (st_lookup(compat_allocator_tbl, (st_data_t)rb_get_alloc_func(klass), &d)) {
01773 marshal_compat_t *compat = (marshal_compat_t*)d;
01774 v = compat->loader(klass, v);
01775 }
01776 v = r_post_proc(v, arg);
01777 }
01778 break;
01779
01780 case TYPE_USRMARSHAL:
01781 {
01782 VALUE klass = path2class(r_unique(arg));
01783 VALUE oldclass = 0;
01784 VALUE data;
01785
01786 v = obj_alloc_by_klass(klass, arg, &oldclass);
01787 if (!NIL_P(extmod)) {
01788
01789 append_extmod(v, extmod);
01790 }
01791 if (!rb_obj_respond_to(v, s_mload, TRUE)) {
01792 rb_raise(rb_eTypeError, "instance of %s needs to have method `marshal_load'",
01793 rb_class2name(klass));
01794 }
01795 v = r_entry(v, arg);
01796 data = r_object(arg);
01797 rb_funcall2(v, s_mload, 1, &data);
01798 check_load_arg(arg, s_mload);
01799 v = r_fixup_compat(v, arg);
01800 v = r_copy_ivar(v, data);
01801 v = r_post_proc(v, arg);
01802 if (!NIL_P(extmod)) {
01803 if (oldclass) append_extmod(v, extmod);
01804 rb_ary_clear(extmod);
01805 }
01806 }
01807 break;
01808
01809 case TYPE_OBJECT:
01810 {
01811 st_index_t idx = r_prepare(arg);
01812 v = obj_alloc_by_path(r_unique(arg), arg);
01813 if (!RB_TYPE_P(v, T_OBJECT)) {
01814 rb_raise(rb_eArgError, "dump format error");
01815 }
01816 v = r_entry0(v, idx, arg);
01817 r_ivar(v, NULL, arg);
01818 v = r_leave(v, arg);
01819 }
01820 break;
01821
01822 case TYPE_DATA:
01823 {
01824 VALUE klass = path2class(r_unique(arg));
01825 VALUE oldclass = 0;
01826 VALUE r;
01827
01828 v = obj_alloc_by_klass(klass, arg, &oldclass);
01829 if (!RB_TYPE_P(v, T_DATA)) {
01830 rb_raise(rb_eArgError, "dump format error");
01831 }
01832 v = r_entry(v, arg);
01833 if (!rb_obj_respond_to(v, s_load_data, TRUE)) {
01834 rb_raise(rb_eTypeError,
01835 "class %s needs to have instance method `_load_data'",
01836 rb_class2name(klass));
01837 }
01838 r = r_object0(arg, 0, extmod);
01839 rb_funcall2(v, s_load_data, 1, &r);
01840 check_load_arg(arg, s_load_data);
01841 v = r_leave(v, arg);
01842 }
01843 break;
01844
01845 case TYPE_MODULE_OLD:
01846 {
01847 VALUE str = r_bytes(arg);
01848
01849 v = rb_path_to_class(str);
01850 prohibit_ivar("class/module", str);
01851 v = r_entry(v, arg);
01852 v = r_leave(v, arg);
01853 }
01854 break;
01855
01856 case TYPE_CLASS:
01857 {
01858 VALUE str = r_bytes(arg);
01859
01860 v = path2class(str);
01861 prohibit_ivar("class", str);
01862 v = r_entry(v, arg);
01863 v = r_leave(v, arg);
01864 }
01865 break;
01866
01867 case TYPE_MODULE:
01868 {
01869 VALUE str = r_bytes(arg);
01870
01871 v = path2module(str);
01872 prohibit_ivar("module", str);
01873 v = r_entry(v, arg);
01874 v = r_leave(v, arg);
01875 }
01876 break;
01877
01878 case TYPE_SYMBOL:
01879 if (ivp) {
01880 v = ID2SYM(r_symreal(arg, *ivp));
01881 *ivp = FALSE;
01882 }
01883 else {
01884 v = ID2SYM(r_symreal(arg, 0));
01885 }
01886 v = r_leave(v, arg);
01887 break;
01888
01889 case TYPE_SYMLINK:
01890 v = ID2SYM(r_symlink(arg));
01891 break;
01892
01893 default:
01894 rb_raise(rb_eArgError, "dump format error(0x%x)", type);
01895 break;
01896 }
01897
01898 if (v == Qundef) {
01899 rb_raise(rb_eArgError, "dump format error (bad link)");
01900 }
01901
01902 return v;
01903 }
01904
01905 static VALUE
01906 r_object(struct load_arg *arg)
01907 {
01908 return r_object0(arg, 0, Qnil);
01909 }
01910
01911 static void
01912 clear_load_arg(struct load_arg *arg)
01913 {
01914 if (arg->buf) {
01915 xfree(arg->buf);
01916 arg->buf = 0;
01917 }
01918 arg->buflen = 0;
01919 arg->offset = 0;
01920 arg->readable = 0;
01921 if (!arg->symbols) return;
01922 st_free_table(arg->symbols);
01923 arg->symbols = 0;
01924 st_free_table(arg->data);
01925 arg->data = 0;
01926 st_free_table(arg->compat_tbl);
01927 arg->compat_tbl = 0;
01928 }
01929
01930
01931
01932
01933
01934
01935
01936
01937
01938
01939
01940
01941
01942
01943
01944 static VALUE
01945 marshal_load(int argc, VALUE *argv)
01946 {
01947 VALUE port, proc;
01948 int major, minor, infection = 0;
01949 VALUE v;
01950 volatile VALUE wrapper;
01951 struct load_arg *arg;
01952
01953 rb_scan_args(argc, argv, "11", &port, &proc);
01954 v = rb_check_string_type(port);
01955 if (!NIL_P(v)) {
01956 infection = (int)FL_TEST(port, MARSHAL_INFECTION);
01957 port = v;
01958 }
01959 else if (rb_respond_to(port, s_getbyte) && rb_respond_to(port, s_read)) {
01960 rb_check_funcall(port, s_binmode, 0, 0);
01961 infection = (int)FL_TAINT;
01962 }
01963 else {
01964 io_needed();
01965 }
01966 RB_GC_GUARD(wrapper) = TypedData_Make_Struct(rb_cData, struct load_arg, &load_arg_data, arg);
01967 arg->infection = infection;
01968 arg->src = port;
01969 arg->offset = 0;
01970 arg->symbols = st_init_numtable();
01971 arg->data = st_init_numtable();
01972 arg->compat_tbl = st_init_numtable();
01973 arg->proc = 0;
01974 arg->readable = 0;
01975
01976 if (NIL_P(v))
01977 arg->buf = xmalloc(BUFSIZ);
01978 else
01979 arg->buf = 0;
01980
01981 major = r_byte(arg);
01982 minor = r_byte(arg);
01983 if (major != MARSHAL_MAJOR || minor > MARSHAL_MINOR) {
01984 clear_load_arg(arg);
01985 rb_raise(rb_eTypeError, "incompatible marshal file format (can't be read)\n\
01986 \tformat version %d.%d required; %d.%d given",
01987 MARSHAL_MAJOR, MARSHAL_MINOR, major, minor);
01988 }
01989 if (RTEST(ruby_verbose) && minor != MARSHAL_MINOR) {
01990 rb_warn("incompatible marshal file format (can be read)\n\
01991 \tformat version %d.%d required; %d.%d given",
01992 MARSHAL_MAJOR, MARSHAL_MINOR, major, minor);
01993 }
01994
01995 if (!NIL_P(proc)) arg->proc = proc;
01996 v = r_object(arg);
01997 clear_load_arg(arg);
01998 RB_GC_GUARD(wrapper);
01999
02000 return v;
02001 }
02002
02003
02004
02005
02006
02007
02008
02009
02010
02011
02012
02013
02014
02015
02016
02017
02018
02019
02020
02021
02022
02023
02024
02025
02026
02027
02028
02029
02030
02031
02032
02033
02034
02035
02036
02037
02038
02039
02040
02041
02042
02043
02044
02045
02046
02047
02048
02049
02050
02051
02052
02053
02054
02055
02056
02057
02058
02059
02060
02061
02062
02063
02064
02065
02066
02067
02068
02069
02070
02071
02072
02073
02074
02075
02076
02077
02078
02079
02080
02081
02082
02083
02084
02085
02086
02087
02088
02089
02090
02091
02092
02093
02094
02095
02096
02097
02098
02099
02100
02101
02102
02103
02104
02105
02106
02107
02108
02109
02110
02111
02112
02113
02114 void
02115 Init_marshal(void)
02116 {
02117 #undef rb_intern
02118 #define rb_intern(str) rb_intern_const(str)
02119
02120 VALUE rb_mMarshal = rb_define_module("Marshal");
02121
02122 s_dump = rb_intern("_dump");
02123 s_load = rb_intern("_load");
02124 s_mdump = rb_intern("marshal_dump");
02125 s_mload = rb_intern("marshal_load");
02126 s_dump_data = rb_intern("_dump_data");
02127 s_load_data = rb_intern("_load_data");
02128 s_alloc = rb_intern("_alloc");
02129 s_call = rb_intern("call");
02130 s_getbyte = rb_intern("getbyte");
02131 s_read = rb_intern("read");
02132 s_write = rb_intern("write");
02133 s_binmode = rb_intern("binmode");
02134
02135 rb_define_module_function(rb_mMarshal, "dump", marshal_dump, -1);
02136 rb_define_module_function(rb_mMarshal, "load", marshal_load, -1);
02137 rb_define_module_function(rb_mMarshal, "restore", marshal_load, -1);
02138
02139
02140 rb_define_const(rb_mMarshal, "MAJOR_VERSION", INT2FIX(MARSHAL_MAJOR));
02141
02142 rb_define_const(rb_mMarshal, "MINOR_VERSION", INT2FIX(MARSHAL_MINOR));
02143 }
02144
02145 static st_table *
02146 compat_allocator_table(void)
02147 {
02148 if (compat_allocator_tbl) return compat_allocator_tbl;
02149 compat_allocator_tbl = st_init_numtable();
02150 compat_allocator_tbl_wrapper =
02151 Data_Wrap_Struct(rb_cData, mark_marshal_compat_t, 0, compat_allocator_tbl);
02152 rb_gc_register_mark_object(compat_allocator_tbl_wrapper);
02153 return compat_allocator_tbl;
02154 }
02155
02156 VALUE
02157 rb_marshal_dump(VALUE obj, VALUE port)
02158 {
02159 int argc = 1;
02160 VALUE argv[2];
02161
02162 argv[0] = obj;
02163 argv[1] = port;
02164 if (!NIL_P(port)) argc = 2;
02165 return marshal_dump(argc, argv);
02166 }
02167
02168 VALUE
02169 rb_marshal_load(VALUE port)
02170 {
02171 return marshal_load(1, &port);
02172 }
02173