00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012 #include "ruby/ruby.h"
00013 #include "ruby/encoding.h"
00014 #include "internal.h"
00015 #include "transcode_data.h"
00016 #include <ctype.h>
00017
00018 #define ENABLE_ECONV_NEWLINE_OPTION 1
00019
00020
00021 VALUE rb_eUndefinedConversionError;
00022 VALUE rb_eInvalidByteSequenceError;
00023 VALUE rb_eConverterNotFoundError;
00024
00025 VALUE rb_cEncodingConverter;
00026
00027 static VALUE sym_invalid, sym_undef, sym_replace, sym_fallback, sym_aref;
00028 static VALUE sym_xml, sym_text, sym_attr;
00029 static VALUE sym_universal_newline;
00030 static VALUE sym_crlf_newline;
00031 static VALUE sym_cr_newline;
00032 #ifdef ENABLE_ECONV_NEWLINE_OPTION
00033 static VALUE sym_newline, sym_universal, sym_crlf, sym_cr, sym_lf;
00034 #endif
00035 static VALUE sym_partial_input;
00036
00037 static VALUE sym_invalid_byte_sequence;
00038 static VALUE sym_undefined_conversion;
00039 static VALUE sym_destination_buffer_full;
00040 static VALUE sym_source_buffer_empty;
00041 static VALUE sym_finished;
00042 static VALUE sym_after_output;
00043 static VALUE sym_incomplete_input;
00044
00045 static unsigned char *
00046 allocate_converted_string(const char *sname, const char *dname,
00047 const unsigned char *str, size_t len,
00048 unsigned char *caller_dst_buf, size_t caller_dst_bufsize,
00049 size_t *dst_len_ptr);
00050
00051
00052
00053 typedef struct rb_transcoding {
00054 const rb_transcoder *transcoder;
00055
00056 int flags;
00057
00058 int resume_position;
00059 unsigned int next_table;
00060 VALUE next_info;
00061 unsigned char next_byte;
00062 unsigned int output_index;
00063
00064 ssize_t recognized_len;
00065 ssize_t readagain_len;
00066 union {
00067 unsigned char ary[8];
00068 unsigned char *ptr;
00069 } readbuf;
00070
00071 ssize_t writebuf_off;
00072 ssize_t writebuf_len;
00073 union {
00074 unsigned char ary[8];
00075 unsigned char *ptr;
00076 } writebuf;
00077
00078 union rb_transcoding_state_t {
00079 void *ptr;
00080 char ary[sizeof(double) > sizeof(void*) ? sizeof(double) : sizeof(void*)];
00081 double dummy_for_alignment;
00082 } state;
00083 } rb_transcoding;
00084 #define TRANSCODING_READBUF(tc) \
00085 ((tc)->transcoder->max_input <= (int)sizeof((tc)->readbuf.ary) ? \
00086 (tc)->readbuf.ary : \
00087 (tc)->readbuf.ptr)
00088 #define TRANSCODING_WRITEBUF(tc) \
00089 ((tc)->transcoder->max_output <= (int)sizeof((tc)->writebuf.ary) ? \
00090 (tc)->writebuf.ary : \
00091 (tc)->writebuf.ptr)
00092 #define TRANSCODING_WRITEBUF_SIZE(tc) \
00093 ((tc)->transcoder->max_output <= (int)sizeof((tc)->writebuf.ary) ? \
00094 sizeof((tc)->writebuf.ary) : \
00095 (size_t)(tc)->transcoder->max_output)
00096 #define TRANSCODING_STATE_EMBED_MAX ((int)sizeof(union rb_transcoding_state_t))
00097 #define TRANSCODING_STATE(tc) \
00098 ((tc)->transcoder->state_size <= (int)sizeof((tc)->state) ? \
00099 (tc)->state.ary : \
00100 (tc)->state.ptr)
00101
00102 typedef struct {
00103 struct rb_transcoding *tc;
00104 unsigned char *out_buf_start;
00105 unsigned char *out_data_start;
00106 unsigned char *out_data_end;
00107 unsigned char *out_buf_end;
00108 rb_econv_result_t last_result;
00109 } rb_econv_elem_t;
00110
00111 struct rb_econv_t {
00112 int flags;
00113 const char *source_encoding_name;
00114 const char *destination_encoding_name;
00115
00116 int started;
00117
00118 const unsigned char *replacement_str;
00119 size_t replacement_len;
00120 const char *replacement_enc;
00121 int replacement_allocated;
00122
00123 unsigned char *in_buf_start;
00124 unsigned char *in_data_start;
00125 unsigned char *in_data_end;
00126 unsigned char *in_buf_end;
00127 rb_econv_elem_t *elems;
00128 int num_allocated;
00129 int num_trans;
00130 int num_finished;
00131 struct rb_transcoding *last_tc;
00132
00133
00134 struct {
00135 rb_econv_result_t result;
00136 struct rb_transcoding *error_tc;
00137 const char *source_encoding;
00138 const char *destination_encoding;
00139 const unsigned char *error_bytes_start;
00140 size_t error_bytes_len;
00141 size_t readagain_len;
00142 } last_error;
00143
00144
00145
00146 rb_encoding *source_encoding;
00147 rb_encoding *destination_encoding;
00148 };
00149
00150
00151
00152
00153
00154 #define DECORATOR_P(sname, dname) (*(sname) == '\0')
00155
00156 typedef struct {
00157 const char *sname;
00158 const char *dname;
00159 const char *lib;
00160 const rb_transcoder *transcoder;
00161 } transcoder_entry_t;
00162
00163 static st_table *transcoder_table;
00164
00165 static transcoder_entry_t *
00166 make_transcoder_entry(const char *sname, const char *dname)
00167 {
00168 st_data_t val;
00169 st_table *table2;
00170
00171 if (!st_lookup(transcoder_table, (st_data_t)sname, &val)) {
00172 val = (st_data_t)st_init_strcasetable();
00173 st_add_direct(transcoder_table, (st_data_t)sname, val);
00174 }
00175 table2 = (st_table *)val;
00176 if (!st_lookup(table2, (st_data_t)dname, &val)) {
00177 transcoder_entry_t *entry = ALLOC(transcoder_entry_t);
00178 entry->sname = sname;
00179 entry->dname = dname;
00180 entry->lib = NULL;
00181 entry->transcoder = NULL;
00182 val = (st_data_t)entry;
00183 st_add_direct(table2, (st_data_t)dname, val);
00184 }
00185 return (transcoder_entry_t *)val;
00186 }
00187
00188 static transcoder_entry_t *
00189 get_transcoder_entry(const char *sname, const char *dname)
00190 {
00191 st_data_t val;
00192 st_table *table2;
00193
00194 if (!st_lookup(transcoder_table, (st_data_t)sname, &val)) {
00195 return NULL;
00196 }
00197 table2 = (st_table *)val;
00198 if (!st_lookup(table2, (st_data_t)dname, &val)) {
00199 return NULL;
00200 }
00201 return (transcoder_entry_t *)val;
00202 }
00203
00204 void
00205 rb_register_transcoder(const rb_transcoder *tr)
00206 {
00207 const char *const sname = tr->src_encoding;
00208 const char *const dname = tr->dst_encoding;
00209
00210 transcoder_entry_t *entry;
00211
00212 entry = make_transcoder_entry(sname, dname);
00213 if (entry->transcoder) {
00214 rb_raise(rb_eArgError, "transcoder from %s to %s has been already registered",
00215 sname, dname);
00216 }
00217
00218 entry->transcoder = tr;
00219 }
00220
00221 static void
00222 declare_transcoder(const char *sname, const char *dname, const char *lib)
00223 {
00224 transcoder_entry_t *entry;
00225
00226 entry = make_transcoder_entry(sname, dname);
00227 entry->lib = lib;
00228 }
00229
00230 static const char transcoder_lib_prefix[] = "enc/trans/";
00231
00232 void
00233 rb_declare_transcoder(const char *enc1, const char *enc2, const char *lib)
00234 {
00235 if (!lib) {
00236 rb_raise(rb_eArgError, "invalid library name - (null)");
00237 }
00238 declare_transcoder(enc1, enc2, lib);
00239 }
00240
00241 #define encoding_equal(enc1, enc2) (STRCASECMP((enc1), (enc2)) == 0)
00242
00243 typedef struct search_path_queue_tag {
00244 struct search_path_queue_tag *next;
00245 const char *enc;
00246 } search_path_queue_t;
00247
00248 typedef struct {
00249 st_table *visited;
00250 search_path_queue_t *queue;
00251 search_path_queue_t **queue_last_ptr;
00252 const char *base_enc;
00253 } search_path_bfs_t;
00254
00255 static int
00256 transcode_search_path_i(st_data_t key, st_data_t val, st_data_t arg)
00257 {
00258 const char *dname = (const char *)key;
00259 search_path_bfs_t *bfs = (search_path_bfs_t *)arg;
00260 search_path_queue_t *q;
00261
00262 if (st_lookup(bfs->visited, (st_data_t)dname, &val)) {
00263 return ST_CONTINUE;
00264 }
00265
00266 q = ALLOC(search_path_queue_t);
00267 q->enc = dname;
00268 q->next = NULL;
00269 *bfs->queue_last_ptr = q;
00270 bfs->queue_last_ptr = &q->next;
00271
00272 st_add_direct(bfs->visited, (st_data_t)dname, (st_data_t)bfs->base_enc);
00273 return ST_CONTINUE;
00274 }
00275
00276 static int
00277 transcode_search_path(const char *sname, const char *dname,
00278 void (*callback)(const char *sname, const char *dname, int depth, void *arg),
00279 void *arg)
00280 {
00281 search_path_bfs_t bfs;
00282 search_path_queue_t *q;
00283 st_data_t val;
00284 st_table *table2;
00285 int found;
00286 int pathlen = -1;
00287
00288 if (encoding_equal(sname, dname))
00289 return -1;
00290
00291 q = ALLOC(search_path_queue_t);
00292 q->enc = sname;
00293 q->next = NULL;
00294 bfs.queue_last_ptr = &q->next;
00295 bfs.queue = q;
00296
00297 bfs.visited = st_init_strcasetable();
00298 st_add_direct(bfs.visited, (st_data_t)sname, (st_data_t)NULL);
00299
00300 while (bfs.queue) {
00301 q = bfs.queue;
00302 bfs.queue = q->next;
00303 if (!bfs.queue)
00304 bfs.queue_last_ptr = &bfs.queue;
00305
00306 if (!st_lookup(transcoder_table, (st_data_t)q->enc, &val)) {
00307 xfree(q);
00308 continue;
00309 }
00310 table2 = (st_table *)val;
00311
00312 if (st_lookup(table2, (st_data_t)dname, &val)) {
00313 st_add_direct(bfs.visited, (st_data_t)dname, (st_data_t)q->enc);
00314 xfree(q);
00315 found = 1;
00316 goto cleanup;
00317 }
00318
00319 bfs.base_enc = q->enc;
00320 st_foreach(table2, transcode_search_path_i, (st_data_t)&bfs);
00321 bfs.base_enc = NULL;
00322
00323 xfree(q);
00324 }
00325 found = 0;
00326
00327 cleanup:
00328 while (bfs.queue) {
00329 q = bfs.queue;
00330 bfs.queue = q->next;
00331 xfree(q);
00332 }
00333
00334 if (found) {
00335 const char *enc = dname;
00336 int depth;
00337 pathlen = 0;
00338 while (1) {
00339 st_lookup(bfs.visited, (st_data_t)enc, &val);
00340 if (!val)
00341 break;
00342 pathlen++;
00343 enc = (const char *)val;
00344 }
00345 depth = pathlen;
00346 enc = dname;
00347 while (1) {
00348 st_lookup(bfs.visited, (st_data_t)enc, &val);
00349 if (!val)
00350 break;
00351 callback((const char *)val, enc, --depth, arg);
00352 enc = (const char *)val;
00353 }
00354 }
00355
00356 st_free_table(bfs.visited);
00357
00358 return pathlen;
00359 }
00360
00361 static const rb_transcoder *
00362 load_transcoder_entry(transcoder_entry_t *entry)
00363 {
00364 if (entry->transcoder)
00365 return entry->transcoder;
00366
00367 if (entry->lib) {
00368 const char *const lib = entry->lib;
00369 const size_t len = strlen(lib);
00370 const size_t total_len = sizeof(transcoder_lib_prefix) - 1 + len;
00371 const VALUE fn = rb_str_new(0, total_len);
00372 char *const path = RSTRING_PTR(fn);
00373 const int safe = rb_safe_level();
00374
00375 memcpy(path, transcoder_lib_prefix, sizeof(transcoder_lib_prefix) - 1);
00376 memcpy(path + sizeof(transcoder_lib_prefix) - 1, lib, len);
00377 rb_str_set_len(fn, total_len);
00378 FL_UNSET(fn, FL_TAINT);
00379 OBJ_FREEZE(fn);
00380 rb_require_safe(fn, safe > 3 ? 3 : safe);
00381 }
00382
00383 if (entry->transcoder)
00384 return entry->transcoder;
00385
00386 return NULL;
00387 }
00388
00389 static const char*
00390 get_replacement_character(const char *encname, size_t *len_ret, const char **repl_encname_ptr)
00391 {
00392 if (encoding_equal(encname, "UTF-8")) {
00393 *len_ret = 3;
00394 *repl_encname_ptr = "UTF-8";
00395 return "\xEF\xBF\xBD";
00396 }
00397 else {
00398 *len_ret = 1;
00399 *repl_encname_ptr = "US-ASCII";
00400 return "?";
00401 }
00402 }
00403
00404
00405
00406
00407
00408 static const unsigned char *
00409 transcode_char_start(rb_transcoding *tc,
00410 const unsigned char *in_start,
00411 const unsigned char *inchar_start,
00412 const unsigned char *in_p,
00413 size_t *char_len_ptr)
00414 {
00415 const unsigned char *ptr;
00416 if (inchar_start - in_start < tc->recognized_len) {
00417 MEMCPY(TRANSCODING_READBUF(tc) + tc->recognized_len,
00418 inchar_start, unsigned char, in_p - inchar_start);
00419 ptr = TRANSCODING_READBUF(tc);
00420 }
00421 else {
00422 ptr = inchar_start - tc->recognized_len;
00423 }
00424 *char_len_ptr = tc->recognized_len + (in_p - inchar_start);
00425 return ptr;
00426 }
00427
00428 static rb_econv_result_t
00429 transcode_restartable0(const unsigned char **in_pos, unsigned char **out_pos,
00430 const unsigned char *in_stop, unsigned char *out_stop,
00431 rb_transcoding *tc,
00432 const int opt)
00433 {
00434 const rb_transcoder *tr = tc->transcoder;
00435 int unitlen = tr->input_unit_length;
00436 ssize_t readagain_len = 0;
00437
00438 const unsigned char *inchar_start;
00439 const unsigned char *in_p;
00440
00441 unsigned char *out_p;
00442
00443 in_p = inchar_start = *in_pos;
00444
00445 out_p = *out_pos;
00446
00447 #define SUSPEND(ret, num) \
00448 do { \
00449 tc->resume_position = (num); \
00450 if (0 < in_p - inchar_start) \
00451 MEMMOVE(TRANSCODING_READBUF(tc)+tc->recognized_len, \
00452 inchar_start, unsigned char, in_p - inchar_start); \
00453 *in_pos = in_p; \
00454 *out_pos = out_p; \
00455 tc->recognized_len += in_p - inchar_start; \
00456 if (readagain_len) { \
00457 tc->recognized_len -= readagain_len; \
00458 tc->readagain_len = readagain_len; \
00459 } \
00460 return (ret); \
00461 resume_label ## num:; \
00462 } while (0)
00463 #define SUSPEND_OBUF(num) \
00464 do { \
00465 while (out_stop - out_p < 1) { SUSPEND(econv_destination_buffer_full, num); } \
00466 } while (0)
00467
00468 #define SUSPEND_AFTER_OUTPUT(num) \
00469 if ((opt & ECONV_AFTER_OUTPUT) && *out_pos != out_p) { \
00470 SUSPEND(econv_after_output, num); \
00471 }
00472
00473 #define next_table (tc->next_table)
00474 #define next_info (tc->next_info)
00475 #define next_byte (tc->next_byte)
00476 #define writebuf_len (tc->writebuf_len)
00477 #define writebuf_off (tc->writebuf_off)
00478
00479 switch (tc->resume_position) {
00480 case 0: break;
00481 case 1: goto resume_label1;
00482 case 2: goto resume_label2;
00483 case 3: goto resume_label3;
00484 case 4: goto resume_label4;
00485 case 5: goto resume_label5;
00486 case 6: goto resume_label6;
00487 case 7: goto resume_label7;
00488 case 8: goto resume_label8;
00489 case 9: goto resume_label9;
00490 case 10: goto resume_label10;
00491 case 11: goto resume_label11;
00492 case 12: goto resume_label12;
00493 case 13: goto resume_label13;
00494 case 14: goto resume_label14;
00495 case 15: goto resume_label15;
00496 case 16: goto resume_label16;
00497 case 17: goto resume_label17;
00498 case 18: goto resume_label18;
00499 case 19: goto resume_label19;
00500 case 20: goto resume_label20;
00501 case 21: goto resume_label21;
00502 case 22: goto resume_label22;
00503 case 23: goto resume_label23;
00504 case 24: goto resume_label24;
00505 case 25: goto resume_label25;
00506 case 26: goto resume_label26;
00507 case 27: goto resume_label27;
00508 case 28: goto resume_label28;
00509 case 29: goto resume_label29;
00510 case 30: goto resume_label30;
00511 case 31: goto resume_label31;
00512 case 32: goto resume_label32;
00513 case 33: goto resume_label33;
00514 case 34: goto resume_label34;
00515 }
00516
00517 while (1) {
00518 inchar_start = in_p;
00519 tc->recognized_len = 0;
00520 next_table = tr->conv_tree_start;
00521
00522 SUSPEND_AFTER_OUTPUT(24);
00523
00524 if (in_stop <= in_p) {
00525 if (!(opt & ECONV_PARTIAL_INPUT))
00526 break;
00527 SUSPEND(econv_source_buffer_empty, 7);
00528 continue;
00529 }
00530
00531 #define BYTE_ADDR(index) (tr->byte_array + (index))
00532 #define WORD_ADDR(index) (tr->word_array + INFO2WORDINDEX(index))
00533 #define BL_BASE BYTE_ADDR(BYTE_LOOKUP_BASE(WORD_ADDR(next_table)))
00534 #define BL_INFO WORD_ADDR(BYTE_LOOKUP_INFO(WORD_ADDR(next_table)))
00535 #define BL_MIN_BYTE (BL_BASE[0])
00536 #define BL_MAX_BYTE (BL_BASE[1])
00537 #define BL_OFFSET(byte) (BL_BASE[2+(byte)-BL_MIN_BYTE])
00538 #define BL_ACTION(byte) (BL_INFO[BL_OFFSET((byte))])
00539
00540 next_byte = (unsigned char)*in_p++;
00541 follow_byte:
00542 if (next_byte < BL_MIN_BYTE || BL_MAX_BYTE < next_byte)
00543 next_info = INVALID;
00544 else {
00545 next_info = (VALUE)BL_ACTION(next_byte);
00546 }
00547 follow_info:
00548 switch (next_info & 0x1F) {
00549 case NOMAP:
00550 {
00551 const unsigned char *p = inchar_start;
00552 writebuf_off = 0;
00553 while (p < in_p) {
00554 TRANSCODING_WRITEBUF(tc)[writebuf_off++] = (unsigned char)*p++;
00555 }
00556 writebuf_len = writebuf_off;
00557 writebuf_off = 0;
00558 while (writebuf_off < writebuf_len) {
00559 SUSPEND_OBUF(3);
00560 *out_p++ = TRANSCODING_WRITEBUF(tc)[writebuf_off++];
00561 }
00562 }
00563 continue;
00564 case 0x00: case 0x04: case 0x08: case 0x0C:
00565 case 0x10: case 0x14: case 0x18: case 0x1C:
00566 SUSPEND_AFTER_OUTPUT(25);
00567 while (in_p >= in_stop) {
00568 if (!(opt & ECONV_PARTIAL_INPUT))
00569 goto incomplete;
00570 SUSPEND(econv_source_buffer_empty, 5);
00571 }
00572 next_byte = (unsigned char)*in_p++;
00573 next_table = (unsigned int)next_info;
00574 goto follow_byte;
00575 case ZERObt:
00576 continue;
00577 case ONEbt:
00578 SUSPEND_OBUF(9); *out_p++ = getBT1(next_info);
00579 continue;
00580 case TWObt:
00581 SUSPEND_OBUF(10); *out_p++ = getBT1(next_info);
00582 SUSPEND_OBUF(21); *out_p++ = getBT2(next_info);
00583 continue;
00584 case THREEbt:
00585 SUSPEND_OBUF(11); *out_p++ = getBT1(next_info);
00586 SUSPEND_OBUF(15); *out_p++ = getBT2(next_info);
00587 SUSPEND_OBUF(16); *out_p++ = getBT3(next_info);
00588 continue;
00589 case FOURbt:
00590 SUSPEND_OBUF(12); *out_p++ = getBT0(next_info);
00591 SUSPEND_OBUF(17); *out_p++ = getBT1(next_info);
00592 SUSPEND_OBUF(18); *out_p++ = getBT2(next_info);
00593 SUSPEND_OBUF(19); *out_p++ = getBT3(next_info);
00594 continue;
00595 case GB4bt:
00596 SUSPEND_OBUF(29); *out_p++ = getGB4bt0(next_info);
00597 SUSPEND_OBUF(30); *out_p++ = getGB4bt1(next_info);
00598 SUSPEND_OBUF(31); *out_p++ = getGB4bt2(next_info);
00599 SUSPEND_OBUF(32); *out_p++ = getGB4bt3(next_info);
00600 continue;
00601 case STR1:
00602 tc->output_index = 0;
00603 while (tc->output_index < STR1_LENGTH(BYTE_ADDR(STR1_BYTEINDEX(next_info)))) {
00604 SUSPEND_OBUF(28); *out_p++ = BYTE_ADDR(STR1_BYTEINDEX(next_info))[1+tc->output_index];
00605 tc->output_index++;
00606 }
00607 continue;
00608 case FUNii:
00609 next_info = (VALUE)(*tr->func_ii)(TRANSCODING_STATE(tc), next_info);
00610 goto follow_info;
00611 case FUNsi:
00612 {
00613 const unsigned char *char_start;
00614 size_t char_len;
00615 char_start = transcode_char_start(tc, *in_pos, inchar_start, in_p, &char_len);
00616 next_info = (VALUE)(*tr->func_si)(TRANSCODING_STATE(tc), char_start, (size_t)char_len);
00617 goto follow_info;
00618 }
00619 case FUNio:
00620 SUSPEND_OBUF(13);
00621 if (tr->max_output <= out_stop - out_p)
00622 out_p += tr->func_io(TRANSCODING_STATE(tc),
00623 next_info, out_p, out_stop - out_p);
00624 else {
00625 writebuf_len = tr->func_io(TRANSCODING_STATE(tc),
00626 next_info,
00627 TRANSCODING_WRITEBUF(tc), TRANSCODING_WRITEBUF_SIZE(tc));
00628 writebuf_off = 0;
00629 while (writebuf_off < writebuf_len) {
00630 SUSPEND_OBUF(20);
00631 *out_p++ = TRANSCODING_WRITEBUF(tc)[writebuf_off++];
00632 }
00633 }
00634 break;
00635 case FUNso:
00636 {
00637 const unsigned char *char_start;
00638 size_t char_len;
00639 SUSPEND_OBUF(14);
00640 if (tr->max_output <= out_stop - out_p) {
00641 char_start = transcode_char_start(tc, *in_pos, inchar_start, in_p, &char_len);
00642 out_p += tr->func_so(TRANSCODING_STATE(tc),
00643 char_start, (size_t)char_len,
00644 out_p, out_stop - out_p);
00645 }
00646 else {
00647 char_start = transcode_char_start(tc, *in_pos, inchar_start, in_p, &char_len);
00648 writebuf_len = tr->func_so(TRANSCODING_STATE(tc),
00649 char_start, (size_t)char_len,
00650 TRANSCODING_WRITEBUF(tc), TRANSCODING_WRITEBUF_SIZE(tc));
00651 writebuf_off = 0;
00652 while (writebuf_off < writebuf_len) {
00653 SUSPEND_OBUF(22);
00654 *out_p++ = TRANSCODING_WRITEBUF(tc)[writebuf_off++];
00655 }
00656 }
00657 break;
00658 }
00659 case FUNsio:
00660 {
00661 const unsigned char *char_start;
00662 size_t char_len;
00663 SUSPEND_OBUF(33);
00664 if (tr->max_output <= out_stop - out_p) {
00665 char_start = transcode_char_start(tc, *in_pos, inchar_start, in_p, &char_len);
00666 out_p += tr->func_sio(TRANSCODING_STATE(tc),
00667 char_start, (size_t)char_len, next_info,
00668 out_p, out_stop - out_p);
00669 }
00670 else {
00671 char_start = transcode_char_start(tc, *in_pos, inchar_start, in_p, &char_len);
00672 writebuf_len = tr->func_sio(TRANSCODING_STATE(tc),
00673 char_start, (size_t)char_len, next_info,
00674 TRANSCODING_WRITEBUF(tc), TRANSCODING_WRITEBUF_SIZE(tc));
00675 writebuf_off = 0;
00676 while (writebuf_off < writebuf_len) {
00677 SUSPEND_OBUF(34);
00678 *out_p++ = TRANSCODING_WRITEBUF(tc)[writebuf_off++];
00679 }
00680 }
00681 break;
00682 }
00683 case INVALID:
00684 if (tc->recognized_len + (in_p - inchar_start) <= unitlen) {
00685 if (tc->recognized_len + (in_p - inchar_start) < unitlen)
00686 SUSPEND_AFTER_OUTPUT(26);
00687 while ((opt & ECONV_PARTIAL_INPUT) && tc->recognized_len + (in_stop - inchar_start) < unitlen) {
00688 in_p = in_stop;
00689 SUSPEND(econv_source_buffer_empty, 8);
00690 }
00691 if (tc->recognized_len + (in_stop - inchar_start) <= unitlen) {
00692 in_p = in_stop;
00693 }
00694 else {
00695 in_p = inchar_start + (unitlen - tc->recognized_len);
00696 }
00697 }
00698 else {
00699 ssize_t invalid_len;
00700 ssize_t discard_len;
00701 invalid_len = tc->recognized_len + (in_p - inchar_start);
00702 discard_len = ((invalid_len - 1) / unitlen) * unitlen;
00703 readagain_len = invalid_len - discard_len;
00704 }
00705 goto invalid;
00706 case UNDEF:
00707 goto undef;
00708 default:
00709 rb_raise(rb_eRuntimeError, "unknown transcoding instruction");
00710 }
00711 continue;
00712
00713 invalid:
00714 SUSPEND(econv_invalid_byte_sequence, 1);
00715 continue;
00716
00717 incomplete:
00718 SUSPEND(econv_incomplete_input, 27);
00719 continue;
00720
00721 undef:
00722 SUSPEND(econv_undefined_conversion, 2);
00723 continue;
00724 }
00725
00726
00727 if (tr->finish_func) {
00728 SUSPEND_OBUF(4);
00729 if (tr->max_output <= out_stop - out_p) {
00730 out_p += tr->finish_func(TRANSCODING_STATE(tc),
00731 out_p, out_stop - out_p);
00732 }
00733 else {
00734 writebuf_len = tr->finish_func(TRANSCODING_STATE(tc),
00735 TRANSCODING_WRITEBUF(tc), TRANSCODING_WRITEBUF_SIZE(tc));
00736 writebuf_off = 0;
00737 while (writebuf_off < writebuf_len) {
00738 SUSPEND_OBUF(23);
00739 *out_p++ = TRANSCODING_WRITEBUF(tc)[writebuf_off++];
00740 }
00741 }
00742 }
00743 while (1)
00744 SUSPEND(econv_finished, 6);
00745 #undef SUSPEND
00746 #undef next_table
00747 #undef next_info
00748 #undef next_byte
00749 #undef writebuf_len
00750 #undef writebuf_off
00751 }
00752
00753 static rb_econv_result_t
00754 transcode_restartable(const unsigned char **in_pos, unsigned char **out_pos,
00755 const unsigned char *in_stop, unsigned char *out_stop,
00756 rb_transcoding *tc,
00757 const int opt)
00758 {
00759 if (tc->readagain_len) {
00760 unsigned char *readagain_buf = ALLOCA_N(unsigned char, tc->readagain_len);
00761 const unsigned char *readagain_pos = readagain_buf;
00762 const unsigned char *readagain_stop = readagain_buf + tc->readagain_len;
00763 rb_econv_result_t res;
00764
00765 MEMCPY(readagain_buf, TRANSCODING_READBUF(tc) + tc->recognized_len,
00766 unsigned char, tc->readagain_len);
00767 tc->readagain_len = 0;
00768 res = transcode_restartable0(&readagain_pos, out_pos, readagain_stop, out_stop, tc, opt|ECONV_PARTIAL_INPUT);
00769 if (res != econv_source_buffer_empty) {
00770 MEMCPY(TRANSCODING_READBUF(tc) + tc->recognized_len + tc->readagain_len,
00771 readagain_pos, unsigned char, readagain_stop - readagain_pos);
00772 tc->readagain_len += readagain_stop - readagain_pos;
00773 return res;
00774 }
00775 }
00776 return transcode_restartable0(in_pos, out_pos, in_stop, out_stop, tc, opt);
00777 }
00778
00779 static rb_transcoding *
00780 rb_transcoding_open_by_transcoder(const rb_transcoder *tr, int flags)
00781 {
00782 rb_transcoding *tc;
00783
00784 tc = ALLOC(rb_transcoding);
00785 tc->transcoder = tr;
00786 tc->flags = flags;
00787 if (TRANSCODING_STATE_EMBED_MAX < tr->state_size)
00788 tc->state.ptr = xmalloc(tr->state_size);
00789 if (tr->state_init_func) {
00790 (tr->state_init_func)(TRANSCODING_STATE(tc));
00791 }
00792 tc->resume_position = 0;
00793 tc->recognized_len = 0;
00794 tc->readagain_len = 0;
00795 tc->writebuf_len = 0;
00796 tc->writebuf_off = 0;
00797 if ((int)sizeof(tc->readbuf.ary) < tr->max_input) {
00798 tc->readbuf.ptr = xmalloc(tr->max_input);
00799 }
00800 if ((int)sizeof(tc->writebuf.ary) < tr->max_output) {
00801 tc->writebuf.ptr = xmalloc(tr->max_output);
00802 }
00803 return tc;
00804 }
00805
00806 static rb_econv_result_t
00807 rb_transcoding_convert(rb_transcoding *tc,
00808 const unsigned char **input_ptr, const unsigned char *input_stop,
00809 unsigned char **output_ptr, unsigned char *output_stop,
00810 int flags)
00811 {
00812 return transcode_restartable(
00813 input_ptr, output_ptr,
00814 input_stop, output_stop,
00815 tc, flags);
00816 }
00817
00818 static void
00819 rb_transcoding_close(rb_transcoding *tc)
00820 {
00821 const rb_transcoder *tr = tc->transcoder;
00822 if (tr->state_fini_func) {
00823 (tr->state_fini_func)(TRANSCODING_STATE(tc));
00824 }
00825 if (TRANSCODING_STATE_EMBED_MAX < tr->state_size)
00826 xfree(tc->state.ptr);
00827 if ((int)sizeof(tc->readbuf.ary) < tr->max_input)
00828 xfree(tc->readbuf.ptr);
00829 if ((int)sizeof(tc->writebuf.ary) < tr->max_output)
00830 xfree(tc->writebuf.ptr);
00831 xfree(tc);
00832 }
00833
00834 static size_t
00835 rb_transcoding_memsize(rb_transcoding *tc)
00836 {
00837 size_t size = sizeof(rb_transcoding);
00838 const rb_transcoder *tr = tc->transcoder;
00839
00840 if (TRANSCODING_STATE_EMBED_MAX < tr->state_size) {
00841 size += tr->state_size;
00842 }
00843 if ((int)sizeof(tc->readbuf.ary) < tr->max_input) {
00844 size += tr->max_input;
00845 }
00846 if ((int)sizeof(tc->writebuf.ary) < tr->max_output) {
00847 size += tr->max_output;
00848 }
00849 return size;
00850 }
00851
00852 static rb_econv_t *
00853 rb_econv_alloc(int n_hint)
00854 {
00855 rb_econv_t *ec;
00856
00857 if (n_hint <= 0)
00858 n_hint = 1;
00859
00860 ec = ALLOC(rb_econv_t);
00861 ec->flags = 0;
00862 ec->source_encoding_name = NULL;
00863 ec->destination_encoding_name = NULL;
00864 ec->started = 0;
00865 ec->replacement_str = NULL;
00866 ec->replacement_len = 0;
00867 ec->replacement_enc = NULL;
00868 ec->replacement_allocated = 0;
00869 ec->in_buf_start = NULL;
00870 ec->in_data_start = NULL;
00871 ec->in_data_end = NULL;
00872 ec->in_buf_end = NULL;
00873 ec->num_allocated = n_hint;
00874 ec->num_trans = 0;
00875 ec->elems = ALLOC_N(rb_econv_elem_t, ec->num_allocated);
00876 ec->num_finished = 0;
00877 ec->last_tc = NULL;
00878 ec->last_error.result = econv_source_buffer_empty;
00879 ec->last_error.error_tc = NULL;
00880 ec->last_error.source_encoding = NULL;
00881 ec->last_error.destination_encoding = NULL;
00882 ec->last_error.error_bytes_start = NULL;
00883 ec->last_error.error_bytes_len = 0;
00884 ec->last_error.readagain_len = 0;
00885 ec->source_encoding = NULL;
00886 ec->destination_encoding = NULL;
00887 return ec;
00888 }
00889
00890 static int
00891 rb_econv_add_transcoder_at(rb_econv_t *ec, const rb_transcoder *tr, int i)
00892 {
00893 int n, j;
00894 int bufsize = 4096;
00895 unsigned char *p;
00896
00897 if (ec->num_trans == ec->num_allocated) {
00898 n = ec->num_allocated * 2;
00899 REALLOC_N(ec->elems, rb_econv_elem_t, n);
00900 ec->num_allocated = n;
00901 }
00902
00903 p = xmalloc(bufsize);
00904
00905 MEMMOVE(ec->elems+i+1, ec->elems+i, rb_econv_elem_t, ec->num_trans-i);
00906
00907 ec->elems[i].tc = rb_transcoding_open_by_transcoder(tr, 0);
00908 ec->elems[i].out_buf_start = p;
00909 ec->elems[i].out_buf_end = p + bufsize;
00910 ec->elems[i].out_data_start = p;
00911 ec->elems[i].out_data_end = p;
00912 ec->elems[i].last_result = econv_source_buffer_empty;
00913
00914 ec->num_trans++;
00915
00916 if (!DECORATOR_P(tr->src_encoding, tr->dst_encoding))
00917 for (j = ec->num_trans-1; i <= j; j--) {
00918 rb_transcoding *tc = ec->elems[j].tc;
00919 const rb_transcoder *tr2 = tc->transcoder;
00920 if (!DECORATOR_P(tr2->src_encoding, tr2->dst_encoding)) {
00921 ec->last_tc = tc;
00922 break;
00923 }
00924 }
00925
00926 return 0;
00927 }
00928
00929 static rb_econv_t *
00930 rb_econv_open_by_transcoder_entries(int n, transcoder_entry_t **entries)
00931 {
00932 rb_econv_t *ec;
00933 int i, ret;
00934
00935 for (i = 0; i < n; i++) {
00936 const rb_transcoder *tr;
00937 tr = load_transcoder_entry(entries[i]);
00938 if (!tr)
00939 return NULL;
00940 }
00941
00942 ec = rb_econv_alloc(n);
00943
00944 for (i = 0; i < n; i++) {
00945 const rb_transcoder *tr = load_transcoder_entry(entries[i]);
00946 ret = rb_econv_add_transcoder_at(ec, tr, ec->num_trans);
00947 if (ret == -1) {
00948 rb_econv_close(ec);
00949 return NULL;
00950 }
00951 }
00952
00953 return ec;
00954 }
00955
00956 struct trans_open_t {
00957 transcoder_entry_t **entries;
00958 int num_additional;
00959 };
00960
00961 static void
00962 trans_open_i(const char *sname, const char *dname, int depth, void *arg)
00963 {
00964 struct trans_open_t *toarg = arg;
00965
00966 if (!toarg->entries) {
00967 toarg->entries = ALLOC_N(transcoder_entry_t *, depth+1+toarg->num_additional);
00968 }
00969 toarg->entries[depth] = get_transcoder_entry(sname, dname);
00970 }
00971
00972 static rb_econv_t *
00973 rb_econv_open0(const char *sname, const char *dname, int ecflags)
00974 {
00975 transcoder_entry_t **entries = NULL;
00976 int num_trans;
00977 rb_econv_t *ec;
00978
00979 int sidx, didx;
00980
00981 if (*sname) {
00982 sidx = rb_enc_find_index(sname);
00983 if (0 <= sidx) {
00984 rb_enc_from_index(sidx);
00985 }
00986 }
00987
00988 if (*dname) {
00989 didx = rb_enc_find_index(dname);
00990 if (0 <= didx) {
00991 rb_enc_from_index(didx);
00992 }
00993 }
00994
00995 if (*sname == '\0' && *dname == '\0') {
00996 num_trans = 0;
00997 entries = NULL;
00998 sname = dname = "";
00999 }
01000 else {
01001 struct trans_open_t toarg;
01002 toarg.entries = NULL;
01003 toarg.num_additional = 0;
01004 num_trans = transcode_search_path(sname, dname, trans_open_i, (void *)&toarg);
01005 entries = toarg.entries;
01006 if (num_trans < 0) {
01007 xfree(entries);
01008 return NULL;
01009 }
01010 }
01011
01012 ec = rb_econv_open_by_transcoder_entries(num_trans, entries);
01013 xfree(entries);
01014 if (!ec)
01015 return NULL;
01016
01017 ec->flags = ecflags;
01018 ec->source_encoding_name = sname;
01019 ec->destination_encoding_name = dname;
01020
01021 return ec;
01022 }
01023
01024 #define MAX_ECFLAGS_DECORATORS 32
01025
01026 static int
01027 decorator_names(int ecflags, const char **decorators_ret)
01028 {
01029 int num_decorators;
01030
01031 switch (ecflags & ECONV_NEWLINE_DECORATOR_MASK) {
01032 case ECONV_UNIVERSAL_NEWLINE_DECORATOR:
01033 case ECONV_CRLF_NEWLINE_DECORATOR:
01034 case ECONV_CR_NEWLINE_DECORATOR:
01035 case 0:
01036 break;
01037 default:
01038 return -1;
01039 }
01040
01041 if ((ecflags & ECONV_XML_TEXT_DECORATOR) &&
01042 (ecflags & ECONV_XML_ATTR_CONTENT_DECORATOR))
01043 return -1;
01044
01045 num_decorators = 0;
01046
01047 if (ecflags & ECONV_XML_TEXT_DECORATOR)
01048 decorators_ret[num_decorators++] = "xml_text_escape";
01049 if (ecflags & ECONV_XML_ATTR_CONTENT_DECORATOR)
01050 decorators_ret[num_decorators++] = "xml_attr_content_escape";
01051 if (ecflags & ECONV_XML_ATTR_QUOTE_DECORATOR)
01052 decorators_ret[num_decorators++] = "xml_attr_quote";
01053
01054 if (ecflags & ECONV_CRLF_NEWLINE_DECORATOR)
01055 decorators_ret[num_decorators++] = "crlf_newline";
01056 if (ecflags & ECONV_CR_NEWLINE_DECORATOR)
01057 decorators_ret[num_decorators++] = "cr_newline";
01058 if (ecflags & ECONV_UNIVERSAL_NEWLINE_DECORATOR)
01059 decorators_ret[num_decorators++] = "universal_newline";
01060
01061 return num_decorators;
01062 }
01063
01064 rb_econv_t *
01065 rb_econv_open(const char *sname, const char *dname, int ecflags)
01066 {
01067 rb_econv_t *ec;
01068 int num_decorators;
01069 const char *decorators[MAX_ECFLAGS_DECORATORS];
01070 int i;
01071
01072 num_decorators = decorator_names(ecflags, decorators);
01073 if (num_decorators == -1)
01074 return NULL;
01075
01076 ec = rb_econv_open0(sname, dname, ecflags & ECONV_ERROR_HANDLER_MASK);
01077 if (!ec)
01078 return NULL;
01079
01080 for (i = 0; i < num_decorators; i++)
01081 if (rb_econv_decorate_at_last(ec, decorators[i]) == -1) {
01082 rb_econv_close(ec);
01083 return NULL;
01084 }
01085
01086 ec->flags |= ecflags & ~ECONV_ERROR_HANDLER_MASK;
01087
01088 return ec;
01089 }
01090
01091 static int
01092 trans_sweep(rb_econv_t *ec,
01093 const unsigned char **input_ptr, const unsigned char *input_stop,
01094 unsigned char **output_ptr, unsigned char *output_stop,
01095 int flags,
01096 int start)
01097 {
01098 int try;
01099 int i, f;
01100
01101 const unsigned char **ipp, *is, *iold;
01102 unsigned char **opp, *os, *oold;
01103 rb_econv_result_t res;
01104
01105 try = 1;
01106 while (try) {
01107 try = 0;
01108 for (i = start; i < ec->num_trans; i++) {
01109 rb_econv_elem_t *te = &ec->elems[i];
01110
01111 if (i == 0) {
01112 ipp = input_ptr;
01113 is = input_stop;
01114 }
01115 else {
01116 rb_econv_elem_t *prev_te = &ec->elems[i-1];
01117 ipp = (const unsigned char **)&prev_te->out_data_start;
01118 is = prev_te->out_data_end;
01119 }
01120
01121 if (i == ec->num_trans-1) {
01122 opp = output_ptr;
01123 os = output_stop;
01124 }
01125 else {
01126 if (te->out_buf_start != te->out_data_start) {
01127 ssize_t len = te->out_data_end - te->out_data_start;
01128 ssize_t off = te->out_data_start - te->out_buf_start;
01129 MEMMOVE(te->out_buf_start, te->out_data_start, unsigned char, len);
01130 te->out_data_start = te->out_buf_start;
01131 te->out_data_end -= off;
01132 }
01133 opp = &te->out_data_end;
01134 os = te->out_buf_end;
01135 }
01136
01137 f = flags;
01138 if (ec->num_finished != i)
01139 f |= ECONV_PARTIAL_INPUT;
01140 if (i == 0 && (flags & ECONV_AFTER_OUTPUT)) {
01141 start = 1;
01142 flags &= ~ECONV_AFTER_OUTPUT;
01143 }
01144 if (i != 0)
01145 f &= ~ECONV_AFTER_OUTPUT;
01146 iold = *ipp;
01147 oold = *opp;
01148 te->last_result = res = rb_transcoding_convert(te->tc, ipp, is, opp, os, f);
01149 if (iold != *ipp || oold != *opp)
01150 try = 1;
01151
01152 switch (res) {
01153 case econv_invalid_byte_sequence:
01154 case econv_incomplete_input:
01155 case econv_undefined_conversion:
01156 case econv_after_output:
01157 return i;
01158
01159 case econv_destination_buffer_full:
01160 case econv_source_buffer_empty:
01161 break;
01162
01163 case econv_finished:
01164 ec->num_finished = i+1;
01165 break;
01166 }
01167 }
01168 }
01169 return -1;
01170 }
01171
01172 static rb_econv_result_t
01173 rb_trans_conv(rb_econv_t *ec,
01174 const unsigned char **input_ptr, const unsigned char *input_stop,
01175 unsigned char **output_ptr, unsigned char *output_stop,
01176 int flags,
01177 int *result_position_ptr)
01178 {
01179 int i;
01180 int needreport_index;
01181 int sweep_start;
01182
01183 unsigned char empty_buf;
01184 unsigned char *empty_ptr = &empty_buf;
01185
01186 if (!input_ptr) {
01187 input_ptr = (const unsigned char **)&empty_ptr;
01188 input_stop = empty_ptr;
01189 }
01190
01191 if (!output_ptr) {
01192 output_ptr = &empty_ptr;
01193 output_stop = empty_ptr;
01194 }
01195
01196 if (ec->elems[0].last_result == econv_after_output)
01197 ec->elems[0].last_result = econv_source_buffer_empty;
01198
01199 needreport_index = -1;
01200 for (i = ec->num_trans-1; 0 <= i; i--) {
01201 switch (ec->elems[i].last_result) {
01202 case econv_invalid_byte_sequence:
01203 case econv_incomplete_input:
01204 case econv_undefined_conversion:
01205 case econv_after_output:
01206 case econv_finished:
01207 sweep_start = i+1;
01208 needreport_index = i;
01209 goto found_needreport;
01210
01211 case econv_destination_buffer_full:
01212 case econv_source_buffer_empty:
01213 break;
01214
01215 default:
01216 rb_bug("unexpected transcode last result");
01217 }
01218 }
01219
01220
01221
01222 if (ec->elems[ec->num_trans-1].last_result == econv_destination_buffer_full &&
01223 (flags & ECONV_AFTER_OUTPUT)) {
01224 rb_econv_result_t res;
01225
01226 res = rb_trans_conv(ec, NULL, NULL, output_ptr, output_stop,
01227 (flags & ~ECONV_AFTER_OUTPUT)|ECONV_PARTIAL_INPUT,
01228 result_position_ptr);
01229
01230 if (res == econv_source_buffer_empty)
01231 return econv_after_output;
01232 return res;
01233 }
01234
01235 sweep_start = 0;
01236
01237 found_needreport:
01238
01239 do {
01240 needreport_index = trans_sweep(ec, input_ptr, input_stop, output_ptr, output_stop, flags, sweep_start);
01241 sweep_start = needreport_index + 1;
01242 } while (needreport_index != -1 && needreport_index != ec->num_trans-1);
01243
01244 for (i = ec->num_trans-1; 0 <= i; i--) {
01245 if (ec->elems[i].last_result != econv_source_buffer_empty) {
01246 rb_econv_result_t res = ec->elems[i].last_result;
01247 if (res == econv_invalid_byte_sequence ||
01248 res == econv_incomplete_input ||
01249 res == econv_undefined_conversion ||
01250 res == econv_after_output) {
01251 ec->elems[i].last_result = econv_source_buffer_empty;
01252 }
01253 if (result_position_ptr)
01254 *result_position_ptr = i;
01255 return res;
01256 }
01257 }
01258 if (result_position_ptr)
01259 *result_position_ptr = -1;
01260 return econv_source_buffer_empty;
01261 }
01262
01263 static rb_econv_result_t
01264 rb_econv_convert0(rb_econv_t *ec,
01265 const unsigned char **input_ptr, const unsigned char *input_stop,
01266 unsigned char **output_ptr, unsigned char *output_stop,
01267 int flags)
01268 {
01269 rb_econv_result_t res;
01270 int result_position;
01271 int has_output = 0;
01272
01273 memset(&ec->last_error, 0, sizeof(ec->last_error));
01274
01275 if (ec->num_trans == 0) {
01276 size_t len;
01277 if (ec->in_buf_start && ec->in_data_start != ec->in_data_end) {
01278 if (output_stop - *output_ptr < ec->in_data_end - ec->in_data_start) {
01279 len = output_stop - *output_ptr;
01280 memcpy(*output_ptr, ec->in_data_start, len);
01281 *output_ptr = output_stop;
01282 ec->in_data_start += len;
01283 res = econv_destination_buffer_full;
01284 goto gotresult;
01285 }
01286 len = ec->in_data_end - ec->in_data_start;
01287 memcpy(*output_ptr, ec->in_data_start, len);
01288 *output_ptr += len;
01289 ec->in_data_start = ec->in_data_end = ec->in_buf_start;
01290 if (flags & ECONV_AFTER_OUTPUT) {
01291 res = econv_after_output;
01292 goto gotresult;
01293 }
01294 }
01295 if (output_stop - *output_ptr < input_stop - *input_ptr) {
01296 len = output_stop - *output_ptr;
01297 }
01298 else {
01299 len = input_stop - *input_ptr;
01300 }
01301 if (0 < len && (flags & ECONV_AFTER_OUTPUT)) {
01302 *(*output_ptr)++ = *(*input_ptr)++;
01303 res = econv_after_output;
01304 goto gotresult;
01305 }
01306 memcpy(*output_ptr, *input_ptr, len);
01307 *output_ptr += len;
01308 *input_ptr += len;
01309 if (*input_ptr != input_stop)
01310 res = econv_destination_buffer_full;
01311 else if (flags & ECONV_PARTIAL_INPUT)
01312 res = econv_source_buffer_empty;
01313 else
01314 res = econv_finished;
01315 goto gotresult;
01316 }
01317
01318 if (ec->elems[ec->num_trans-1].out_data_start) {
01319 unsigned char *data_start = ec->elems[ec->num_trans-1].out_data_start;
01320 unsigned char *data_end = ec->elems[ec->num_trans-1].out_data_end;
01321 if (data_start != data_end) {
01322 size_t len;
01323 if (output_stop - *output_ptr < data_end - data_start) {
01324 len = output_stop - *output_ptr;
01325 memcpy(*output_ptr, data_start, len);
01326 *output_ptr = output_stop;
01327 ec->elems[ec->num_trans-1].out_data_start += len;
01328 res = econv_destination_buffer_full;
01329 goto gotresult;
01330 }
01331 len = data_end - data_start;
01332 memcpy(*output_ptr, data_start, len);
01333 *output_ptr += len;
01334 ec->elems[ec->num_trans-1].out_data_start =
01335 ec->elems[ec->num_trans-1].out_data_end =
01336 ec->elems[ec->num_trans-1].out_buf_start;
01337 has_output = 1;
01338 }
01339 }
01340
01341 if (ec->in_buf_start &&
01342 ec->in_data_start != ec->in_data_end) {
01343 res = rb_trans_conv(ec, (const unsigned char **)&ec->in_data_start, ec->in_data_end, output_ptr, output_stop,
01344 (flags&~ECONV_AFTER_OUTPUT)|ECONV_PARTIAL_INPUT, &result_position);
01345 if (res != econv_source_buffer_empty)
01346 goto gotresult;
01347 }
01348
01349 if (has_output &&
01350 (flags & ECONV_AFTER_OUTPUT) &&
01351 *input_ptr != input_stop) {
01352 input_stop = *input_ptr;
01353 res = rb_trans_conv(ec, input_ptr, input_stop, output_ptr, output_stop, flags, &result_position);
01354 if (res == econv_source_buffer_empty)
01355 res = econv_after_output;
01356 }
01357 else if ((flags & ECONV_AFTER_OUTPUT) ||
01358 ec->num_trans == 1) {
01359 res = rb_trans_conv(ec, input_ptr, input_stop, output_ptr, output_stop, flags, &result_position);
01360 }
01361 else {
01362 flags |= ECONV_AFTER_OUTPUT;
01363 do {
01364 res = rb_trans_conv(ec, input_ptr, input_stop, output_ptr, output_stop, flags, &result_position);
01365 } while (res == econv_after_output);
01366 }
01367
01368 gotresult:
01369 ec->last_error.result = res;
01370 if (res == econv_invalid_byte_sequence ||
01371 res == econv_incomplete_input ||
01372 res == econv_undefined_conversion) {
01373 rb_transcoding *error_tc = ec->elems[result_position].tc;
01374 ec->last_error.error_tc = error_tc;
01375 ec->last_error.source_encoding = error_tc->transcoder->src_encoding;
01376 ec->last_error.destination_encoding = error_tc->transcoder->dst_encoding;
01377 ec->last_error.error_bytes_start = TRANSCODING_READBUF(error_tc);
01378 ec->last_error.error_bytes_len = error_tc->recognized_len;
01379 ec->last_error.readagain_len = error_tc->readagain_len;
01380 }
01381
01382 return res;
01383 }
01384
01385 static int output_replacement_character(rb_econv_t *ec);
01386
01387 static int
01388 output_hex_charref(rb_econv_t *ec)
01389 {
01390 int ret;
01391 unsigned char utfbuf[1024];
01392 const unsigned char *utf;
01393 size_t utf_len;
01394 int utf_allocated = 0;
01395 char charef_buf[16];
01396 const unsigned char *p;
01397
01398 if (encoding_equal(ec->last_error.source_encoding, "UTF-32BE")) {
01399 utf = ec->last_error.error_bytes_start;
01400 utf_len = ec->last_error.error_bytes_len;
01401 }
01402 else {
01403 utf = allocate_converted_string(ec->last_error.source_encoding, "UTF-32BE",
01404 ec->last_error.error_bytes_start, ec->last_error.error_bytes_len,
01405 utfbuf, sizeof(utfbuf),
01406 &utf_len);
01407 if (!utf)
01408 return -1;
01409 if (utf != utfbuf && utf != ec->last_error.error_bytes_start)
01410 utf_allocated = 1;
01411 }
01412
01413 if (utf_len % 4 != 0)
01414 goto fail;
01415
01416 p = utf;
01417 while (4 <= utf_len) {
01418 unsigned int u = 0;
01419 u += p[0] << 24;
01420 u += p[1] << 16;
01421 u += p[2] << 8;
01422 u += p[3];
01423 snprintf(charef_buf, sizeof(charef_buf), "&#x%X;", u);
01424
01425 ret = rb_econv_insert_output(ec, (unsigned char *)charef_buf, strlen(charef_buf), "US-ASCII");
01426 if (ret == -1)
01427 goto fail;
01428
01429 p += 4;
01430 utf_len -= 4;
01431 }
01432
01433 if (utf_allocated)
01434 xfree((void *)utf);
01435 return 0;
01436
01437 fail:
01438 if (utf_allocated)
01439 xfree((void *)utf);
01440 return -1;
01441 }
01442
01443 rb_econv_result_t
01444 rb_econv_convert(rb_econv_t *ec,
01445 const unsigned char **input_ptr, const unsigned char *input_stop,
01446 unsigned char **output_ptr, unsigned char *output_stop,
01447 int flags)
01448 {
01449 rb_econv_result_t ret;
01450
01451 unsigned char empty_buf;
01452 unsigned char *empty_ptr = &empty_buf;
01453
01454 ec->started = 1;
01455
01456 if (!input_ptr) {
01457 input_ptr = (const unsigned char **)&empty_ptr;
01458 input_stop = empty_ptr;
01459 }
01460
01461 if (!output_ptr) {
01462 output_ptr = &empty_ptr;
01463 output_stop = empty_ptr;
01464 }
01465
01466 resume:
01467 ret = rb_econv_convert0(ec, input_ptr, input_stop, output_ptr, output_stop, flags);
01468
01469 if (ret == econv_invalid_byte_sequence ||
01470 ret == econv_incomplete_input) {
01471
01472
01473 switch (ec->flags & ECONV_INVALID_MASK) {
01474 case ECONV_INVALID_REPLACE:
01475 if (output_replacement_character(ec) == 0)
01476 goto resume;
01477 }
01478 }
01479
01480 if (ret == econv_undefined_conversion) {
01481
01482
01483
01484 switch (ec->flags & ECONV_UNDEF_MASK) {
01485 case ECONV_UNDEF_REPLACE:
01486 if (output_replacement_character(ec) == 0)
01487 goto resume;
01488 break;
01489
01490 case ECONV_UNDEF_HEX_CHARREF:
01491 if (output_hex_charref(ec) == 0)
01492 goto resume;
01493 break;
01494 }
01495 }
01496
01497 return ret;
01498 }
01499
01500 const char *
01501 rb_econv_encoding_to_insert_output(rb_econv_t *ec)
01502 {
01503 rb_transcoding *tc = ec->last_tc;
01504 const rb_transcoder *tr;
01505
01506 if (tc == NULL)
01507 return "";
01508
01509 tr = tc->transcoder;
01510
01511 if (tr->asciicompat_type == asciicompat_encoder)
01512 return tr->src_encoding;
01513 return tr->dst_encoding;
01514 }
01515
01516 static unsigned char *
01517 allocate_converted_string(const char *sname, const char *dname,
01518 const unsigned char *str, size_t len,
01519 unsigned char *caller_dst_buf, size_t caller_dst_bufsize,
01520 size_t *dst_len_ptr)
01521 {
01522 unsigned char *dst_str;
01523 size_t dst_len;
01524 size_t dst_bufsize;
01525
01526 rb_econv_t *ec;
01527 rb_econv_result_t res;
01528
01529 const unsigned char *sp;
01530 unsigned char *dp;
01531
01532 if (caller_dst_buf)
01533 dst_bufsize = caller_dst_bufsize;
01534 else if (len == 0)
01535 dst_bufsize = 1;
01536 else
01537 dst_bufsize = len;
01538
01539 ec = rb_econv_open(sname, dname, 0);
01540 if (ec == NULL)
01541 return NULL;
01542 if (caller_dst_buf)
01543 dst_str = caller_dst_buf;
01544 else
01545 dst_str = xmalloc(dst_bufsize);
01546 dst_len = 0;
01547 sp = str;
01548 dp = dst_str+dst_len;
01549 res = rb_econv_convert(ec, &sp, str+len, &dp, dst_str+dst_bufsize, 0);
01550 dst_len = dp - dst_str;
01551 while (res == econv_destination_buffer_full) {
01552 if (SIZE_MAX/2 < dst_bufsize) {
01553 goto fail;
01554 }
01555 dst_bufsize *= 2;
01556 if (dst_str == caller_dst_buf) {
01557 unsigned char *tmp;
01558 tmp = xmalloc(dst_bufsize);
01559 memcpy(tmp, dst_str, dst_bufsize/2);
01560 dst_str = tmp;
01561 }
01562 else {
01563 dst_str = xrealloc(dst_str, dst_bufsize);
01564 }
01565 dp = dst_str+dst_len;
01566 res = rb_econv_convert(ec, &sp, str+len, &dp, dst_str+dst_bufsize, 0);
01567 dst_len = dp - dst_str;
01568 }
01569 if (res != econv_finished) {
01570 goto fail;
01571 }
01572 rb_econv_close(ec);
01573 *dst_len_ptr = dst_len;
01574 return dst_str;
01575
01576 fail:
01577 if (dst_str != caller_dst_buf)
01578 xfree(dst_str);
01579 rb_econv_close(ec);
01580 return NULL;
01581 }
01582
01583
01584 int
01585 rb_econv_insert_output(rb_econv_t *ec,
01586 const unsigned char *str, size_t len, const char *str_encoding)
01587 {
01588 const char *insert_encoding = rb_econv_encoding_to_insert_output(ec);
01589 unsigned char insert_buf[4096];
01590 const unsigned char *insert_str = NULL;
01591 size_t insert_len;
01592
01593 int last_trans_index;
01594 rb_transcoding *tc;
01595
01596 unsigned char **buf_start_p;
01597 unsigned char **data_start_p;
01598 unsigned char **data_end_p;
01599 unsigned char **buf_end_p;
01600
01601 size_t need;
01602
01603 ec->started = 1;
01604
01605 if (len == 0)
01606 return 0;
01607
01608 if (encoding_equal(insert_encoding, str_encoding)) {
01609 insert_str = str;
01610 insert_len = len;
01611 }
01612 else {
01613 insert_str = allocate_converted_string(str_encoding, insert_encoding,
01614 str, len, insert_buf, sizeof(insert_buf), &insert_len);
01615 if (insert_str == NULL)
01616 return -1;
01617 }
01618
01619 need = insert_len;
01620
01621 last_trans_index = ec->num_trans-1;
01622 if (ec->num_trans == 0) {
01623 tc = NULL;
01624 buf_start_p = &ec->in_buf_start;
01625 data_start_p = &ec->in_data_start;
01626 data_end_p = &ec->in_data_end;
01627 buf_end_p = &ec->in_buf_end;
01628 }
01629 else if (ec->elems[last_trans_index].tc->transcoder->asciicompat_type == asciicompat_encoder) {
01630 tc = ec->elems[last_trans_index].tc;
01631 need += tc->readagain_len;
01632 if (need < insert_len)
01633 goto fail;
01634 if (last_trans_index == 0) {
01635 buf_start_p = &ec->in_buf_start;
01636 data_start_p = &ec->in_data_start;
01637 data_end_p = &ec->in_data_end;
01638 buf_end_p = &ec->in_buf_end;
01639 }
01640 else {
01641 rb_econv_elem_t *ee = &ec->elems[last_trans_index-1];
01642 buf_start_p = &ee->out_buf_start;
01643 data_start_p = &ee->out_data_start;
01644 data_end_p = &ee->out_data_end;
01645 buf_end_p = &ee->out_buf_end;
01646 }
01647 }
01648 else {
01649 rb_econv_elem_t *ee = &ec->elems[last_trans_index];
01650 buf_start_p = &ee->out_buf_start;
01651 data_start_p = &ee->out_data_start;
01652 data_end_p = &ee->out_data_end;
01653 buf_end_p = &ee->out_buf_end;
01654 tc = ec->elems[last_trans_index].tc;
01655 }
01656
01657 if (*buf_start_p == NULL) {
01658 unsigned char *buf = xmalloc(need);
01659 *buf_start_p = buf;
01660 *data_start_p = buf;
01661 *data_end_p = buf;
01662 *buf_end_p = buf+need;
01663 }
01664 else if ((size_t)(*buf_end_p - *data_end_p) < need) {
01665 MEMMOVE(*buf_start_p, *data_start_p, unsigned char, *data_end_p - *data_start_p);
01666 *data_end_p = *buf_start_p + (*data_end_p - *data_start_p);
01667 *data_start_p = *buf_start_p;
01668 if ((size_t)(*buf_end_p - *data_end_p) < need) {
01669 unsigned char *buf;
01670 size_t s = (*data_end_p - *buf_start_p) + need;
01671 if (s < need)
01672 goto fail;
01673 buf = xrealloc(*buf_start_p, s);
01674 *data_start_p = buf;
01675 *data_end_p = buf + (*data_end_p - *buf_start_p);
01676 *buf_start_p = buf;
01677 *buf_end_p = buf + s;
01678 }
01679 }
01680
01681 memcpy(*data_end_p, insert_str, insert_len);
01682 *data_end_p += insert_len;
01683 if (tc && tc->transcoder->asciicompat_type == asciicompat_encoder) {
01684 memcpy(*data_end_p, TRANSCODING_READBUF(tc)+tc->recognized_len, tc->readagain_len);
01685 *data_end_p += tc->readagain_len;
01686 tc->readagain_len = 0;
01687 }
01688
01689 if (insert_str != str && insert_str != insert_buf)
01690 xfree((void*)insert_str);
01691 return 0;
01692
01693 fail:
01694 if (insert_str != str && insert_str != insert_buf)
01695 xfree((void*)insert_str);
01696 return -1;
01697 }
01698
01699 void
01700 rb_econv_close(rb_econv_t *ec)
01701 {
01702 int i;
01703
01704 if (ec->replacement_allocated) {
01705 xfree((void *)ec->replacement_str);
01706 }
01707 for (i = 0; i < ec->num_trans; i++) {
01708 rb_transcoding_close(ec->elems[i].tc);
01709 if (ec->elems[i].out_buf_start)
01710 xfree(ec->elems[i].out_buf_start);
01711 }
01712 xfree(ec->in_buf_start);
01713 xfree(ec->elems);
01714 xfree(ec);
01715 }
01716
01717 size_t
01718 rb_econv_memsize(rb_econv_t *ec)
01719 {
01720 size_t size = sizeof(rb_econv_t);
01721 int i;
01722
01723 if (ec->replacement_allocated) {
01724 size += ec->replacement_len;
01725 }
01726 for (i = 0; i < ec->num_trans; i++) {
01727 size += rb_transcoding_memsize(ec->elems[i].tc);
01728
01729 if (ec->elems[i].out_buf_start) {
01730 size += ec->elems[i].out_buf_end - ec->elems[i].out_buf_start;
01731 }
01732 }
01733 size += ec->in_buf_end - ec->in_buf_start;
01734 size += sizeof(rb_econv_elem_t) * ec->num_allocated;
01735
01736 return size;
01737 }
01738
01739 int
01740 rb_econv_putbackable(rb_econv_t *ec)
01741 {
01742 if (ec->num_trans == 0)
01743 return 0;
01744 #if SIZEOF_SIZE_T > SIZEOF_INT
01745 if (ec->elems[0].tc->readagain_len > INT_MAX) return INT_MAX;
01746 #endif
01747 return (int)ec->elems[0].tc->readagain_len;
01748 }
01749
01750 void
01751 rb_econv_putback(rb_econv_t *ec, unsigned char *p, int n)
01752 {
01753 rb_transcoding *tc;
01754 if (ec->num_trans == 0 || n == 0)
01755 return;
01756 tc = ec->elems[0].tc;
01757 memcpy(p, TRANSCODING_READBUF(tc) + tc->recognized_len + tc->readagain_len - n, n);
01758 tc->readagain_len -= n;
01759 }
01760
01761 struct asciicompat_encoding_t {
01762 const char *ascii_compat_name;
01763 const char *ascii_incompat_name;
01764 };
01765
01766 static int
01767 asciicompat_encoding_i(st_data_t key, st_data_t val, st_data_t arg)
01768 {
01769 struct asciicompat_encoding_t *data = (struct asciicompat_encoding_t *)arg;
01770 transcoder_entry_t *entry = (transcoder_entry_t *)val;
01771 const rb_transcoder *tr;
01772
01773 if (DECORATOR_P(entry->sname, entry->dname))
01774 return ST_CONTINUE;
01775 tr = load_transcoder_entry(entry);
01776 if (tr && tr->asciicompat_type == asciicompat_decoder) {
01777 data->ascii_compat_name = tr->dst_encoding;
01778 return ST_STOP;
01779 }
01780 return ST_CONTINUE;
01781 }
01782
01783 const char *
01784 rb_econv_asciicompat_encoding(const char *ascii_incompat_name)
01785 {
01786 st_data_t v;
01787 st_table *table2;
01788 struct asciicompat_encoding_t data;
01789
01790 if (!st_lookup(transcoder_table, (st_data_t)ascii_incompat_name, &v))
01791 return NULL;
01792 table2 = (st_table *)v;
01793
01794
01795
01796
01797
01798
01799
01800
01801 if (table2->num_entries != 1)
01802 return NULL;
01803
01804 data.ascii_incompat_name = ascii_incompat_name;
01805 data.ascii_compat_name = NULL;
01806 st_foreach(table2, asciicompat_encoding_i, (st_data_t)&data);
01807 return data.ascii_compat_name;
01808 }
01809
01810 VALUE
01811 rb_econv_append(rb_econv_t *ec, const char *ss, long len, VALUE dst, int flags)
01812 {
01813 unsigned const char *sp, *se;
01814 unsigned char *ds, *dp, *de;
01815 rb_econv_result_t res;
01816 int max_output;
01817
01818 if (NIL_P(dst)) {
01819 dst = rb_str_buf_new(len);
01820 if (ec->destination_encoding)
01821 rb_enc_associate(dst, ec->destination_encoding);
01822 }
01823
01824 if (ec->last_tc)
01825 max_output = ec->last_tc->transcoder->max_output;
01826 else
01827 max_output = 1;
01828
01829 do {
01830 long dlen = RSTRING_LEN(dst);
01831 if (rb_str_capacity(dst) - dlen < (size_t)len + max_output) {
01832 unsigned long new_capa = (unsigned long)dlen + len + max_output;
01833 if (LONG_MAX < new_capa)
01834 rb_raise(rb_eArgError, "too long string");
01835 rb_str_resize(dst, new_capa);
01836 rb_str_set_len(dst, dlen);
01837 }
01838 sp = (const unsigned char *)ss;
01839 se = sp + len;
01840 ds = (unsigned char *)RSTRING_PTR(dst);
01841 de = ds + rb_str_capacity(dst);
01842 dp = ds += dlen;
01843 res = rb_econv_convert(ec, &sp, se, &dp, de, flags);
01844 len -= (const char *)sp - ss;
01845 ss = (const char *)sp;
01846 rb_str_set_len(dst, dlen + (dp - ds));
01847 rb_econv_check_error(ec);
01848 } while (res == econv_destination_buffer_full);
01849
01850 return dst;
01851 }
01852
01853 VALUE
01854 rb_econv_substr_append(rb_econv_t *ec, VALUE src, long off, long len, VALUE dst, int flags)
01855 {
01856 src = rb_str_new_frozen(src);
01857 dst = rb_econv_append(ec, RSTRING_PTR(src) + off, len, dst, flags);
01858 RB_GC_GUARD(src);
01859 OBJ_INFECT_RAW(dst, src);
01860 return dst;
01861 }
01862
01863 VALUE
01864 rb_econv_str_append(rb_econv_t *ec, VALUE src, VALUE dst, int flags)
01865 {
01866 return rb_econv_substr_append(ec, src, 0, RSTRING_LEN(src), dst, flags);
01867 }
01868
01869 VALUE
01870 rb_econv_substr_convert(rb_econv_t *ec, VALUE src, long byteoff, long bytesize, int flags)
01871 {
01872 return rb_econv_substr_append(ec, src, byteoff, bytesize, Qnil, flags);
01873 }
01874
01875 VALUE
01876 rb_econv_str_convert(rb_econv_t *ec, VALUE src, int flags)
01877 {
01878 return rb_econv_substr_append(ec, src, 0, RSTRING_LEN(src), Qnil, flags);
01879 }
01880
01881 static int
01882 rb_econv_add_converter(rb_econv_t *ec, const char *sname, const char *dname, int n)
01883 {
01884 transcoder_entry_t *entry;
01885 const rb_transcoder *tr;
01886
01887 if (ec->started != 0)
01888 return -1;
01889
01890 entry = get_transcoder_entry(sname, dname);
01891 if (!entry)
01892 return -1;
01893
01894 tr = load_transcoder_entry(entry);
01895 if (!tr) return -1;
01896
01897 return rb_econv_add_transcoder_at(ec, tr, n);
01898 }
01899
01900 static int
01901 rb_econv_decorate_at(rb_econv_t *ec, const char *decorator_name, int n)
01902 {
01903 return rb_econv_add_converter(ec, "", decorator_name, n);
01904 }
01905
01906 int
01907 rb_econv_decorate_at_first(rb_econv_t *ec, const char *decorator_name)
01908 {
01909 const rb_transcoder *tr;
01910
01911 if (ec->num_trans == 0)
01912 return rb_econv_decorate_at(ec, decorator_name, 0);
01913
01914 tr = ec->elems[0].tc->transcoder;
01915
01916 if (!DECORATOR_P(tr->src_encoding, tr->dst_encoding) &&
01917 tr->asciicompat_type == asciicompat_decoder)
01918 return rb_econv_decorate_at(ec, decorator_name, 1);
01919
01920 return rb_econv_decorate_at(ec, decorator_name, 0);
01921 }
01922
01923 int
01924 rb_econv_decorate_at_last(rb_econv_t *ec, const char *decorator_name)
01925 {
01926 const rb_transcoder *tr;
01927
01928 if (ec->num_trans == 0)
01929 return rb_econv_decorate_at(ec, decorator_name, 0);
01930
01931 tr = ec->elems[ec->num_trans-1].tc->transcoder;
01932
01933 if (!DECORATOR_P(tr->src_encoding, tr->dst_encoding) &&
01934 tr->asciicompat_type == asciicompat_encoder)
01935 return rb_econv_decorate_at(ec, decorator_name, ec->num_trans-1);
01936
01937 return rb_econv_decorate_at(ec, decorator_name, ec->num_trans);
01938 }
01939
01940 void
01941 rb_econv_binmode(rb_econv_t *ec)
01942 {
01943 const char *dname = 0;
01944
01945 switch (ec->flags & ECONV_NEWLINE_DECORATOR_MASK) {
01946 case ECONV_UNIVERSAL_NEWLINE_DECORATOR:
01947 dname = "universal_newline";
01948 break;
01949 case ECONV_CRLF_NEWLINE_DECORATOR:
01950 dname = "crlf_newline";
01951 break;
01952 case ECONV_CR_NEWLINE_DECORATOR:
01953 dname = "cr_newline";
01954 break;
01955 }
01956
01957 if (dname) {
01958 const rb_transcoder *transcoder = get_transcoder_entry("", dname)->transcoder;
01959 int num_trans = ec->num_trans;
01960 int i, j = 0;
01961
01962 for (i=0; i < num_trans; i++) {
01963 if (transcoder == ec->elems[i].tc->transcoder) {
01964 rb_transcoding_close(ec->elems[i].tc);
01965 xfree(ec->elems[i].out_buf_start);
01966 ec->num_trans--;
01967 }
01968 else
01969 ec->elems[j++] = ec->elems[i];
01970 }
01971 }
01972
01973 ec->flags &= ~ECONV_NEWLINE_DECORATOR_MASK;
01974 }
01975
01976 static VALUE
01977 econv_description(const char *sname, const char *dname, int ecflags, VALUE mesg)
01978 {
01979 int has_description = 0;
01980
01981 if (NIL_P(mesg))
01982 mesg = rb_str_new(NULL, 0);
01983
01984 if (*sname != '\0' || *dname != '\0') {
01985 if (*sname == '\0')
01986 rb_str_cat2(mesg, dname);
01987 else if (*dname == '\0')
01988 rb_str_cat2(mesg, sname);
01989 else
01990 rb_str_catf(mesg, "%s to %s", sname, dname);
01991 has_description = 1;
01992 }
01993
01994 if (ecflags & (ECONV_NEWLINE_DECORATOR_MASK|
01995 ECONV_XML_TEXT_DECORATOR|
01996 ECONV_XML_ATTR_CONTENT_DECORATOR|
01997 ECONV_XML_ATTR_QUOTE_DECORATOR)) {
01998 const char *pre = "";
01999 if (has_description)
02000 rb_str_cat2(mesg, " with ");
02001 if (ecflags & ECONV_UNIVERSAL_NEWLINE_DECORATOR) {
02002 rb_str_cat2(mesg, pre); pre = ",";
02003 rb_str_cat2(mesg, "universal_newline");
02004 }
02005 if (ecflags & ECONV_CRLF_NEWLINE_DECORATOR) {
02006 rb_str_cat2(mesg, pre); pre = ",";
02007 rb_str_cat2(mesg, "crlf_newline");
02008 }
02009 if (ecflags & ECONV_CR_NEWLINE_DECORATOR) {
02010 rb_str_cat2(mesg, pre); pre = ",";
02011 rb_str_cat2(mesg, "cr_newline");
02012 }
02013 if (ecflags & ECONV_XML_TEXT_DECORATOR) {
02014 rb_str_cat2(mesg, pre); pre = ",";
02015 rb_str_cat2(mesg, "xml_text");
02016 }
02017 if (ecflags & ECONV_XML_ATTR_CONTENT_DECORATOR) {
02018 rb_str_cat2(mesg, pre); pre = ",";
02019 rb_str_cat2(mesg, "xml_attr_content");
02020 }
02021 if (ecflags & ECONV_XML_ATTR_QUOTE_DECORATOR) {
02022 rb_str_cat2(mesg, pre); pre = ",";
02023 rb_str_cat2(mesg, "xml_attr_quote");
02024 }
02025 has_description = 1;
02026 }
02027 if (!has_description) {
02028 rb_str_cat2(mesg, "no-conversion");
02029 }
02030
02031 return mesg;
02032 }
02033
02034 VALUE
02035 rb_econv_open_exc(const char *sname, const char *dname, int ecflags)
02036 {
02037 VALUE mesg, exc;
02038 mesg = rb_str_new_cstr("code converter not found (");
02039 econv_description(sname, dname, ecflags, mesg);
02040 rb_str_cat2(mesg, ")");
02041 exc = rb_exc_new3(rb_eConverterNotFoundError, mesg);
02042 return exc;
02043 }
02044
02045 static VALUE
02046 make_econv_exception(rb_econv_t *ec)
02047 {
02048 VALUE mesg, exc;
02049 if (ec->last_error.result == econv_invalid_byte_sequence ||
02050 ec->last_error.result == econv_incomplete_input) {
02051 const char *err = (const char *)ec->last_error.error_bytes_start;
02052 size_t error_len = ec->last_error.error_bytes_len;
02053 VALUE bytes = rb_str_new(err, error_len);
02054 VALUE dumped = rb_str_dump(bytes);
02055 size_t readagain_len = ec->last_error.readagain_len;
02056 VALUE bytes2 = Qnil;
02057 VALUE dumped2;
02058 int idx;
02059 if (ec->last_error.result == econv_incomplete_input) {
02060 mesg = rb_sprintf("incomplete %s on %s",
02061 StringValueCStr(dumped),
02062 ec->last_error.source_encoding);
02063 }
02064 else if (readagain_len) {
02065 bytes2 = rb_str_new(err+error_len, readagain_len);
02066 dumped2 = rb_str_dump(bytes2);
02067 mesg = rb_sprintf("%s followed by %s on %s",
02068 StringValueCStr(dumped),
02069 StringValueCStr(dumped2),
02070 ec->last_error.source_encoding);
02071 }
02072 else {
02073 mesg = rb_sprintf("%s on %s",
02074 StringValueCStr(dumped),
02075 ec->last_error.source_encoding);
02076 }
02077
02078 exc = rb_exc_new3(rb_eInvalidByteSequenceError, mesg);
02079 rb_ivar_set(exc, rb_intern("error_bytes"), bytes);
02080 rb_ivar_set(exc, rb_intern("readagain_bytes"), bytes2);
02081 rb_ivar_set(exc, rb_intern("incomplete_input"), ec->last_error.result == econv_incomplete_input ? Qtrue : Qfalse);
02082
02083 set_encs:
02084 rb_ivar_set(exc, rb_intern("source_encoding_name"), rb_str_new2(ec->last_error.source_encoding));
02085 rb_ivar_set(exc, rb_intern("destination_encoding_name"), rb_str_new2(ec->last_error.destination_encoding));
02086 idx = rb_enc_find_index(ec->last_error.source_encoding);
02087 if (0 <= idx)
02088 rb_ivar_set(exc, rb_intern("source_encoding"), rb_enc_from_encoding(rb_enc_from_index(idx)));
02089 idx = rb_enc_find_index(ec->last_error.destination_encoding);
02090 if (0 <= idx)
02091 rb_ivar_set(exc, rb_intern("destination_encoding"), rb_enc_from_encoding(rb_enc_from_index(idx)));
02092 return exc;
02093 }
02094 if (ec->last_error.result == econv_undefined_conversion) {
02095 VALUE bytes = rb_str_new((const char *)ec->last_error.error_bytes_start,
02096 ec->last_error.error_bytes_len);
02097 VALUE dumped = Qnil;
02098 int idx;
02099 if (strcmp(ec->last_error.source_encoding, "UTF-8") == 0) {
02100 rb_encoding *utf8 = rb_utf8_encoding();
02101 const char *start, *end;
02102 int n;
02103 start = (const char *)ec->last_error.error_bytes_start;
02104 end = start + ec->last_error.error_bytes_len;
02105 n = rb_enc_precise_mbclen(start, end, utf8);
02106 if (MBCLEN_CHARFOUND_P(n) &&
02107 (size_t)MBCLEN_CHARFOUND_LEN(n) == ec->last_error.error_bytes_len) {
02108 unsigned int cc = rb_enc_mbc_to_codepoint(start, end, utf8);
02109 dumped = rb_sprintf("U+%04X", cc);
02110 }
02111 }
02112 if (dumped == Qnil)
02113 dumped = rb_str_dump(bytes);
02114 if (strcmp(ec->last_error.source_encoding,
02115 ec->source_encoding_name) == 0 &&
02116 strcmp(ec->last_error.destination_encoding,
02117 ec->destination_encoding_name) == 0) {
02118 mesg = rb_sprintf("%s from %s to %s",
02119 StringValueCStr(dumped),
02120 ec->last_error.source_encoding,
02121 ec->last_error.destination_encoding);
02122 }
02123 else {
02124 int i;
02125 mesg = rb_sprintf("%s to %s in conversion from %s",
02126 StringValueCStr(dumped),
02127 ec->last_error.destination_encoding,
02128 ec->source_encoding_name);
02129 for (i = 0; i < ec->num_trans; i++) {
02130 const rb_transcoder *tr = ec->elems[i].tc->transcoder;
02131 if (!DECORATOR_P(tr->src_encoding, tr->dst_encoding))
02132 rb_str_catf(mesg, " to %s",
02133 ec->elems[i].tc->transcoder->dst_encoding);
02134 }
02135 }
02136 exc = rb_exc_new3(rb_eUndefinedConversionError, mesg);
02137 idx = rb_enc_find_index(ec->last_error.source_encoding);
02138 if (0 <= idx)
02139 rb_enc_associate_index(bytes, idx);
02140 rb_ivar_set(exc, rb_intern("error_char"), bytes);
02141 goto set_encs;
02142 }
02143 return Qnil;
02144 }
02145
02146 static void
02147 more_output_buffer(
02148 VALUE destination,
02149 unsigned char *(*resize_destination)(VALUE, size_t, size_t),
02150 int max_output,
02151 unsigned char **out_start_ptr,
02152 unsigned char **out_pos,
02153 unsigned char **out_stop_ptr)
02154 {
02155 size_t len = (*out_pos - *out_start_ptr);
02156 size_t new_len = (len + max_output) * 2;
02157 *out_start_ptr = resize_destination(destination, len, new_len);
02158 *out_pos = *out_start_ptr + len;
02159 *out_stop_ptr = *out_start_ptr + new_len;
02160 }
02161
02162 static int
02163 make_replacement(rb_econv_t *ec)
02164 {
02165 rb_transcoding *tc;
02166 const rb_transcoder *tr;
02167 const unsigned char *replacement;
02168 const char *repl_enc;
02169 const char *ins_enc;
02170 size_t len;
02171
02172 if (ec->replacement_str)
02173 return 0;
02174
02175 ins_enc = rb_econv_encoding_to_insert_output(ec);
02176
02177 tc = ec->last_tc;
02178 if (*ins_enc) {
02179 tr = tc->transcoder;
02180 rb_enc_find(tr->dst_encoding);
02181 replacement = (const unsigned char *)get_replacement_character(ins_enc, &len, &repl_enc);
02182 }
02183 else {
02184 replacement = (unsigned char *)"?";
02185 len = 1;
02186 repl_enc = "";
02187 }
02188
02189 ec->replacement_str = replacement;
02190 ec->replacement_len = len;
02191 ec->replacement_enc = repl_enc;
02192 ec->replacement_allocated = 0;
02193 return 0;
02194 }
02195
02196 int
02197 rb_econv_set_replacement(rb_econv_t *ec,
02198 const unsigned char *str, size_t len, const char *encname)
02199 {
02200 unsigned char *str2;
02201 size_t len2;
02202 const char *encname2;
02203
02204 encname2 = rb_econv_encoding_to_insert_output(ec);
02205
02206 if (!*encname2 || encoding_equal(encname, encname2)) {
02207 str2 = xmalloc(len);
02208 MEMCPY(str2, str, unsigned char, len);
02209 len2 = len;
02210 encname2 = encname;
02211 }
02212 else {
02213 str2 = allocate_converted_string(encname, encname2, str, len, NULL, 0, &len2);
02214 if (!str2)
02215 return -1;
02216 }
02217
02218 if (ec->replacement_allocated) {
02219 xfree((void *)ec->replacement_str);
02220 }
02221 ec->replacement_allocated = 1;
02222 ec->replacement_str = str2;
02223 ec->replacement_len = len2;
02224 ec->replacement_enc = encname2;
02225 return 0;
02226 }
02227
02228 static int
02229 output_replacement_character(rb_econv_t *ec)
02230 {
02231 int ret;
02232
02233 if (make_replacement(ec) == -1)
02234 return -1;
02235
02236 ret = rb_econv_insert_output(ec, ec->replacement_str, ec->replacement_len, ec->replacement_enc);
02237 if (ret == -1)
02238 return -1;
02239
02240 return 0;
02241 }
02242
02243 #if 1
02244 #define hash_fallback rb_hash_aref
02245
02246 static VALUE
02247 proc_fallback(VALUE fallback, VALUE c)
02248 {
02249 return rb_proc_call(fallback, rb_ary_new4(1, &c));
02250 }
02251
02252 static VALUE
02253 method_fallback(VALUE fallback, VALUE c)
02254 {
02255 return rb_method_call(1, &c, fallback);
02256 }
02257
02258 static VALUE
02259 aref_fallback(VALUE fallback, VALUE c)
02260 {
02261 return rb_funcall3(fallback, sym_aref, 1, &c);
02262 }
02263
02264 static void
02265 transcode_loop(const unsigned char **in_pos, unsigned char **out_pos,
02266 const unsigned char *in_stop, unsigned char *out_stop,
02267 VALUE destination,
02268 unsigned char *(*resize_destination)(VALUE, size_t, size_t),
02269 const char *src_encoding,
02270 const char *dst_encoding,
02271 int ecflags,
02272 VALUE ecopts)
02273 {
02274 rb_econv_t *ec;
02275 rb_transcoding *last_tc;
02276 rb_econv_result_t ret;
02277 unsigned char *out_start = *out_pos;
02278 int max_output;
02279 VALUE exc;
02280 VALUE fallback = Qnil;
02281 VALUE (*fallback_func)(VALUE, VALUE) = 0;
02282
02283 ec = rb_econv_open_opts(src_encoding, dst_encoding, ecflags, ecopts);
02284 if (!ec)
02285 rb_exc_raise(rb_econv_open_exc(src_encoding, dst_encoding, ecflags));
02286
02287 if (!NIL_P(ecopts) && RB_TYPE_P(ecopts, T_HASH)) {
02288 fallback = rb_hash_aref(ecopts, sym_fallback);
02289 if (RB_TYPE_P(fallback, T_HASH)) {
02290 fallback_func = hash_fallback;
02291 }
02292 else if (rb_obj_is_proc(fallback)) {
02293 fallback_func = proc_fallback;
02294 }
02295 else if (rb_obj_is_method(fallback)) {
02296 fallback_func = method_fallback;
02297 }
02298 else {
02299 fallback_func = aref_fallback;
02300 }
02301 }
02302 last_tc = ec->last_tc;
02303 max_output = last_tc ? last_tc->transcoder->max_output : 1;
02304
02305 resume:
02306 ret = rb_econv_convert(ec, in_pos, in_stop, out_pos, out_stop, 0);
02307
02308 if (!NIL_P(fallback) && ret == econv_undefined_conversion) {
02309 VALUE rep = rb_enc_str_new(
02310 (const char *)ec->last_error.error_bytes_start,
02311 ec->last_error.error_bytes_len,
02312 rb_enc_find(ec->last_error.source_encoding));
02313 rep = (*fallback_func)(fallback, rep);
02314 if (rep != Qundef && !NIL_P(rep)) {
02315 StringValue(rep);
02316 ret = rb_econv_insert_output(ec, (const unsigned char *)RSTRING_PTR(rep),
02317 RSTRING_LEN(rep), rb_enc_name(rb_enc_get(rep)));
02318 if ((int)ret == -1) {
02319 rb_raise(rb_eArgError, "too big fallback string");
02320 }
02321 goto resume;
02322 }
02323 }
02324
02325 if (ret == econv_invalid_byte_sequence ||
02326 ret == econv_incomplete_input ||
02327 ret == econv_undefined_conversion) {
02328 exc = make_econv_exception(ec);
02329 rb_econv_close(ec);
02330 rb_exc_raise(exc);
02331 }
02332
02333 if (ret == econv_destination_buffer_full) {
02334 more_output_buffer(destination, resize_destination, max_output, &out_start, out_pos, &out_stop);
02335 goto resume;
02336 }
02337
02338 rb_econv_close(ec);
02339 return;
02340 }
02341 #else
02342
02343 static void
02344 transcode_loop(const unsigned char **in_pos, unsigned char **out_pos,
02345 const unsigned char *in_stop, unsigned char *out_stop,
02346 VALUE destination,
02347 unsigned char *(*resize_destination)(VALUE, size_t, size_t),
02348 const char *src_encoding,
02349 const char *dst_encoding,
02350 int ecflags,
02351 VALUE ecopts)
02352 {
02353 rb_econv_t *ec;
02354 rb_transcoding *last_tc;
02355 rb_econv_result_t ret;
02356 unsigned char *out_start = *out_pos;
02357 const unsigned char *ptr;
02358 int max_output;
02359 VALUE exc;
02360
02361 ec = rb_econv_open_opts(src_encoding, dst_encoding, ecflags, ecopts);
02362 if (!ec)
02363 rb_exc_raise(rb_econv_open_exc(src_encoding, dst_encoding, ecflags));
02364
02365 last_tc = ec->last_tc;
02366 max_output = last_tc ? last_tc->transcoder->max_output : 1;
02367
02368 ret = econv_source_buffer_empty;
02369 ptr = *in_pos;
02370 while (ret != econv_finished) {
02371 unsigned char input_byte;
02372 const unsigned char *p = &input_byte;
02373
02374 if (ret == econv_source_buffer_empty) {
02375 if (ptr < in_stop) {
02376 input_byte = *ptr;
02377 ret = rb_econv_convert(ec, &p, p+1, out_pos, out_stop, ECONV_PARTIAL_INPUT);
02378 }
02379 else {
02380 ret = rb_econv_convert(ec, NULL, NULL, out_pos, out_stop, 0);
02381 }
02382 }
02383 else {
02384 ret = rb_econv_convert(ec, NULL, NULL, out_pos, out_stop, ECONV_PARTIAL_INPUT);
02385 }
02386 if (&input_byte != p)
02387 ptr += p - &input_byte;
02388 switch (ret) {
02389 case econv_invalid_byte_sequence:
02390 case econv_incomplete_input:
02391 case econv_undefined_conversion:
02392 exc = make_econv_exception(ec);
02393 rb_econv_close(ec);
02394 rb_exc_raise(exc);
02395 break;
02396
02397 case econv_destination_buffer_full:
02398 more_output_buffer(destination, resize_destination, max_output, &out_start, out_pos, &out_stop);
02399 break;
02400
02401 case econv_source_buffer_empty:
02402 break;
02403
02404 case econv_finished:
02405 break;
02406 }
02407 }
02408 rb_econv_close(ec);
02409 *in_pos = in_stop;
02410 return;
02411 }
02412 #endif
02413
02414
02415
02416
02417
02418
02419 static unsigned char *
02420 str_transcoding_resize(VALUE destination, size_t len, size_t new_len)
02421 {
02422 rb_str_resize(destination, new_len);
02423 return (unsigned char *)RSTRING_PTR(destination);
02424 }
02425
02426 static int
02427 econv_opts(VALUE opt, int ecflags)
02428 {
02429 VALUE v;
02430
02431 v = rb_hash_aref(opt, sym_invalid);
02432 if (NIL_P(v)) {
02433 }
02434 else if (v==sym_replace) {
02435 ecflags |= ECONV_INVALID_REPLACE;
02436 }
02437 else {
02438 rb_raise(rb_eArgError, "unknown value for invalid character option");
02439 }
02440
02441 v = rb_hash_aref(opt, sym_undef);
02442 if (NIL_P(v)) {
02443 }
02444 else if (v==sym_replace) {
02445 ecflags |= ECONV_UNDEF_REPLACE;
02446 }
02447 else {
02448 rb_raise(rb_eArgError, "unknown value for undefined character option");
02449 }
02450
02451 v = rb_hash_aref(opt, sym_replace);
02452 if (!NIL_P(v) && !(ecflags & ECONV_INVALID_REPLACE)) {
02453 ecflags |= ECONV_UNDEF_REPLACE;
02454 }
02455
02456 v = rb_hash_aref(opt, sym_xml);
02457 if (!NIL_P(v)) {
02458 if (v==sym_text) {
02459 ecflags |= ECONV_XML_TEXT_DECORATOR|ECONV_UNDEF_HEX_CHARREF;
02460 }
02461 else if (v==sym_attr) {
02462 ecflags |= ECONV_XML_ATTR_CONTENT_DECORATOR|ECONV_XML_ATTR_QUOTE_DECORATOR|ECONV_UNDEF_HEX_CHARREF;
02463 }
02464 else if (RB_TYPE_P(v, T_SYMBOL)) {
02465 rb_raise(rb_eArgError, "unexpected value for xml option: %s", rb_id2name(SYM2ID(v)));
02466 }
02467 else {
02468 rb_raise(rb_eArgError, "unexpected value for xml option");
02469 }
02470 }
02471
02472 #ifdef ENABLE_ECONV_NEWLINE_OPTION
02473 v = rb_hash_aref(opt, sym_newline);
02474 if (!NIL_P(v)) {
02475 ecflags &= ~ECONV_NEWLINE_DECORATOR_MASK;
02476 if (v == sym_universal) {
02477 ecflags |= ECONV_UNIVERSAL_NEWLINE_DECORATOR;
02478 }
02479 else if (v == sym_crlf) {
02480 ecflags |= ECONV_CRLF_NEWLINE_DECORATOR;
02481 }
02482 else if (v == sym_cr) {
02483 ecflags |= ECONV_CR_NEWLINE_DECORATOR;
02484 }
02485 else if (v == sym_lf) {
02486
02487 }
02488 else if (SYMBOL_P(v)) {
02489 rb_raise(rb_eArgError, "unexpected value for newline option: %s",
02490 rb_id2name(SYM2ID(v)));
02491 }
02492 else {
02493 rb_raise(rb_eArgError, "unexpected value for newline option");
02494 }
02495 }
02496 else
02497 #endif
02498 {
02499 int setflags = 0, newlineflag = 0;
02500
02501 v = rb_hash_aref(opt, sym_universal_newline);
02502 if (RTEST(v))
02503 setflags |= ECONV_UNIVERSAL_NEWLINE_DECORATOR;
02504 newlineflag |= !NIL_P(v);
02505
02506 v = rb_hash_aref(opt, sym_crlf_newline);
02507 if (RTEST(v))
02508 setflags |= ECONV_CRLF_NEWLINE_DECORATOR;
02509 newlineflag |= !NIL_P(v);
02510
02511 v = rb_hash_aref(opt, sym_cr_newline);
02512 if (RTEST(v))
02513 setflags |= ECONV_CR_NEWLINE_DECORATOR;
02514 newlineflag |= !NIL_P(v);
02515
02516 if (newlineflag) {
02517 ecflags &= ~ECONV_NEWLINE_DECORATOR_MASK;
02518 ecflags |= setflags;
02519 }
02520 }
02521
02522 return ecflags;
02523 }
02524
02525 int
02526 rb_econv_prepare_options(VALUE opthash, VALUE *opts, int ecflags)
02527 {
02528 VALUE newhash = Qnil;
02529 VALUE v;
02530
02531 if (NIL_P(opthash)) {
02532 *opts = Qnil;
02533 return ecflags;
02534 }
02535 ecflags = econv_opts(opthash, ecflags);
02536
02537 v = rb_hash_aref(opthash, sym_replace);
02538 if (!NIL_P(v)) {
02539 StringValue(v);
02540 if (rb_enc_str_coderange(v) == ENC_CODERANGE_BROKEN) {
02541 VALUE dumped = rb_str_dump(v);
02542 rb_raise(rb_eArgError, "replacement string is broken: %s as %s",
02543 StringValueCStr(dumped),
02544 rb_enc_name(rb_enc_get(v)));
02545 }
02546 v = rb_str_new_frozen(v);
02547 newhash = rb_hash_new();
02548 rb_hash_aset(newhash, sym_replace, v);
02549 }
02550
02551 v = rb_hash_aref(opthash, sym_fallback);
02552 if (!NIL_P(v)) {
02553 VALUE h = rb_check_hash_type(v);
02554 if (NIL_P(h)
02555 ? (rb_obj_is_proc(v) || rb_obj_is_method(v) || rb_respond_to(v, sym_aref))
02556 : (v = h, 1)) {
02557 if (NIL_P(newhash))
02558 newhash = rb_hash_new();
02559 rb_hash_aset(newhash, sym_fallback, v);
02560 }
02561 }
02562
02563 if (!NIL_P(newhash))
02564 rb_hash_freeze(newhash);
02565 *opts = newhash;
02566
02567 return ecflags;
02568 }
02569
02570 int
02571 rb_econv_prepare_opts(VALUE opthash, VALUE *opts)
02572 {
02573 return rb_econv_prepare_options(opthash, opts, 0);
02574 }
02575
02576 rb_econv_t *
02577 rb_econv_open_opts(const char *source_encoding, const char *destination_encoding, int ecflags, VALUE opthash)
02578 {
02579 rb_econv_t *ec;
02580 VALUE replacement;
02581
02582 if (NIL_P(opthash)) {
02583 replacement = Qnil;
02584 }
02585 else {
02586 if (!RB_TYPE_P(opthash, T_HASH) || !OBJ_FROZEN(opthash))
02587 rb_bug("rb_econv_open_opts called with invalid opthash");
02588 replacement = rb_hash_aref(opthash, sym_replace);
02589 }
02590
02591 ec = rb_econv_open(source_encoding, destination_encoding, ecflags);
02592 if (!ec)
02593 return ec;
02594
02595 if (!NIL_P(replacement)) {
02596 int ret;
02597 rb_encoding *enc = rb_enc_get(replacement);
02598
02599 ret = rb_econv_set_replacement(ec,
02600 (const unsigned char *)RSTRING_PTR(replacement),
02601 RSTRING_LEN(replacement),
02602 rb_enc_name(enc));
02603 if (ret == -1) {
02604 rb_econv_close(ec);
02605 return NULL;
02606 }
02607 }
02608 return ec;
02609 }
02610
02611 static int
02612 enc_arg(volatile VALUE *arg, const char **name_p, rb_encoding **enc_p)
02613 {
02614 rb_encoding *enc;
02615 const char *n;
02616 int encidx;
02617 VALUE encval;
02618
02619 if (((encidx = rb_to_encoding_index(encval = *arg)) < 0) ||
02620 !(enc = rb_enc_from_index(encidx))) {
02621 enc = NULL;
02622 encidx = 0;
02623 n = StringValueCStr(*arg);
02624 }
02625 else {
02626 n = rb_enc_name(enc);
02627 }
02628
02629 *name_p = n;
02630 *enc_p = enc;
02631
02632 return encidx;
02633 }
02634
02635 static int
02636 str_transcode_enc_args(VALUE str, volatile VALUE *arg1, volatile VALUE *arg2,
02637 const char **sname_p, rb_encoding **senc_p,
02638 const char **dname_p, rb_encoding **denc_p)
02639 {
02640 rb_encoding *senc, *denc;
02641 const char *sname, *dname;
02642 int sencidx, dencidx;
02643
02644 dencidx = enc_arg(arg1, &dname, &denc);
02645
02646 if (NIL_P(*arg2)) {
02647 sencidx = rb_enc_get_index(str);
02648 senc = rb_enc_from_index(sencidx);
02649 sname = rb_enc_name(senc);
02650 }
02651 else {
02652 sencidx = enc_arg(arg2, &sname, &senc);
02653 }
02654
02655 *sname_p = sname;
02656 *senc_p = senc;
02657 *dname_p = dname;
02658 *denc_p = denc;
02659 return dencidx;
02660 }
02661
02662 static int
02663 str_transcode0(int argc, VALUE *argv, VALUE *self, int ecflags, VALUE ecopts)
02664 {
02665 VALUE dest;
02666 VALUE str = *self;
02667 volatile VALUE arg1, arg2;
02668 long blen, slen;
02669 unsigned char *buf, *bp, *sp;
02670 const unsigned char *fromp;
02671 rb_encoding *senc, *denc;
02672 const char *sname, *dname;
02673 int dencidx;
02674 int explicitly_invalid_replace = TRUE;
02675
02676 rb_check_arity(argc, 0, 2);
02677
02678 if (argc == 0) {
02679 arg1 = rb_enc_default_internal();
02680 if (NIL_P(arg1)) {
02681 if (!ecflags) return -1;
02682 arg1 = rb_obj_encoding(str);
02683 }
02684 if (!(ecflags & ECONV_INVALID_MASK)) {
02685 explicitly_invalid_replace = FALSE;
02686 }
02687 ecflags |= ECONV_INVALID_REPLACE | ECONV_UNDEF_REPLACE;
02688 }
02689 else {
02690 arg1 = argv[0];
02691 }
02692 arg2 = argc<=1 ? Qnil : argv[1];
02693 dencidx = str_transcode_enc_args(str, &arg1, &arg2, &sname, &senc, &dname, &denc);
02694
02695 if ((ecflags & (ECONV_NEWLINE_DECORATOR_MASK|
02696 ECONV_XML_TEXT_DECORATOR|
02697 ECONV_XML_ATTR_CONTENT_DECORATOR|
02698 ECONV_XML_ATTR_QUOTE_DECORATOR)) == 0) {
02699 if (senc && senc == denc) {
02700 if ((ecflags & ECONV_INVALID_MASK) && explicitly_invalid_replace) {
02701 VALUE rep = Qnil;
02702 if (!NIL_P(ecopts)) {
02703 rep = rb_hash_aref(ecopts, sym_replace);
02704 }
02705 dest = rb_str_scrub(str, rep);
02706 if (NIL_P(dest)) dest = str;
02707 *self = dest;
02708 return dencidx;
02709 }
02710 return NIL_P(arg2) ? -1 : dencidx;
02711 }
02712 if (senc && denc && rb_enc_asciicompat(senc) && rb_enc_asciicompat(denc)) {
02713 if (rb_enc_str_coderange(str) == ENC_CODERANGE_7BIT) {
02714 return dencidx;
02715 }
02716 }
02717 if (encoding_equal(sname, dname)) {
02718 return NIL_P(arg2) ? -1 : dencidx;
02719 }
02720 }
02721 else {
02722 if (encoding_equal(sname, dname)) {
02723 sname = "";
02724 dname = "";
02725 }
02726 }
02727
02728 fromp = sp = (unsigned char *)RSTRING_PTR(str);
02729 slen = RSTRING_LEN(str);
02730 blen = slen + 30;
02731 dest = rb_str_tmp_new(blen);
02732 bp = (unsigned char *)RSTRING_PTR(dest);
02733
02734 transcode_loop(&fromp, &bp, (sp+slen), (bp+blen), dest, str_transcoding_resize, sname, dname, ecflags, ecopts);
02735 if (fromp != sp+slen) {
02736 rb_raise(rb_eArgError, "not fully converted, %"PRIdPTRDIFF" bytes left", sp+slen-fromp);
02737 }
02738 buf = (unsigned char *)RSTRING_PTR(dest);
02739 *bp = '\0';
02740 rb_str_set_len(dest, bp - buf);
02741
02742
02743 if (!denc) {
02744 dencidx = rb_define_dummy_encoding(dname);
02745 }
02746 *self = dest;
02747
02748 return dencidx;
02749 }
02750
02751 static int
02752 str_transcode(int argc, VALUE *argv, VALUE *self)
02753 {
02754 VALUE opt;
02755 int ecflags = 0;
02756 VALUE ecopts = Qnil;
02757
02758 argc = rb_scan_args(argc, argv, "02:", NULL, NULL, &opt);
02759 if (!NIL_P(opt)) {
02760 ecflags = rb_econv_prepare_opts(opt, &ecopts);
02761 }
02762 return str_transcode0(argc, argv, self, ecflags, ecopts);
02763 }
02764
02765 static inline VALUE
02766 str_encode_associate(VALUE str, int encidx)
02767 {
02768 int cr = 0;
02769
02770 rb_enc_associate_index(str, encidx);
02771
02772
02773 if (rb_enc_asciicompat(rb_enc_from_index(encidx))) {
02774 rb_str_coderange_scan_restartable(RSTRING_PTR(str), RSTRING_END(str), 0, &cr);
02775 }
02776 else {
02777 cr = ENC_CODERANGE_VALID;
02778 }
02779 ENC_CODERANGE_SET(str, cr);
02780 return str;
02781 }
02782
02783
02784
02785
02786
02787
02788
02789
02790
02791
02792
02793
02794
02795
02796
02797 static VALUE
02798 str_encode_bang(int argc, VALUE *argv, VALUE str)
02799 {
02800 VALUE newstr;
02801 int encidx;
02802
02803 rb_check_frozen(str);
02804
02805 newstr = str;
02806 encidx = str_transcode(argc, argv, &newstr);
02807
02808 if (encidx < 0) return str;
02809 if (newstr == str) {
02810 rb_enc_associate_index(str, encidx);
02811 return str;
02812 }
02813 rb_str_shared_replace(str, newstr);
02814 return str_encode_associate(str, encidx);
02815 }
02816
02817 static VALUE encoded_dup(VALUE newstr, VALUE str, int encidx);
02818
02819
02820
02821
02822
02823
02824
02825
02826
02827
02828
02829
02830
02831
02832
02833
02834
02835
02836
02837
02838
02839
02840
02841
02842
02843
02844
02845
02846
02847
02848
02849
02850
02851
02852
02853
02854
02855
02856
02857
02858
02859
02860
02861
02862
02863
02864
02865
02866
02867
02868
02869
02870
02871
02872
02873
02874
02875 static VALUE
02876 str_encode(int argc, VALUE *argv, VALUE str)
02877 {
02878 VALUE newstr = str;
02879 int encidx = str_transcode(argc, argv, &newstr);
02880 return encoded_dup(newstr, str, encidx);
02881 }
02882
02883 VALUE
02884 rb_str_encode(VALUE str, VALUE to, int ecflags, VALUE ecopts)
02885 {
02886 int argc = 1;
02887 VALUE *argv = &to;
02888 VALUE newstr = str;
02889 int encidx = str_transcode0(argc, argv, &newstr, ecflags, ecopts);
02890 return encoded_dup(newstr, str, encidx);
02891 }
02892
02893 static VALUE
02894 encoded_dup(VALUE newstr, VALUE str, int encidx)
02895 {
02896 if (encidx < 0) return rb_str_dup(str);
02897 if (newstr == str) {
02898 newstr = rb_str_dup(str);
02899 rb_enc_associate_index(newstr, encidx);
02900 return newstr;
02901 }
02902 else {
02903 RBASIC_SET_CLASS(newstr, rb_obj_class(str));
02904 }
02905 return str_encode_associate(newstr, encidx);
02906 }
02907
02908 static void
02909 econv_free(void *ptr)
02910 {
02911 rb_econv_t *ec = ptr;
02912 rb_econv_close(ec);
02913 }
02914
02915 static size_t
02916 econv_memsize(const void *ptr)
02917 {
02918 return ptr ? sizeof(rb_econv_t) : 0;
02919 }
02920
02921 static const rb_data_type_t econv_data_type = {
02922 "econv",
02923 {NULL, econv_free, econv_memsize,},
02924 NULL, NULL, RUBY_TYPED_FREE_IMMEDIATELY
02925 };
02926
02927 static VALUE
02928 econv_s_allocate(VALUE klass)
02929 {
02930 return TypedData_Wrap_Struct(klass, &econv_data_type, NULL);
02931 }
02932
02933 static rb_encoding *
02934 make_dummy_encoding(const char *name)
02935 {
02936 rb_encoding *enc;
02937 int idx;
02938 idx = rb_define_dummy_encoding(name);
02939 enc = rb_enc_from_index(idx);
02940 return enc;
02941 }
02942
02943 static rb_encoding *
02944 make_encoding(const char *name)
02945 {
02946 rb_encoding *enc;
02947 enc = rb_enc_find(name);
02948 if (!enc)
02949 enc = make_dummy_encoding(name);
02950 return enc;
02951 }
02952
02953 static VALUE
02954 make_encobj(const char *name)
02955 {
02956 return rb_enc_from_encoding(make_encoding(name));
02957 }
02958
02959
02960
02961
02962
02963
02964
02965
02966
02967
02968
02969
02970
02971
02972
02973
02974
02975
02976
02977 static VALUE
02978 econv_s_asciicompat_encoding(VALUE klass, VALUE arg)
02979 {
02980 const char *arg_name, *result_name;
02981 rb_encoding *arg_enc, *result_enc;
02982
02983 enc_arg(&arg, &arg_name, &arg_enc);
02984
02985 result_name = rb_econv_asciicompat_encoding(arg_name);
02986
02987 if (result_name == NULL)
02988 return Qnil;
02989
02990 result_enc = make_encoding(result_name);
02991
02992 return rb_enc_from_encoding(result_enc);
02993 }
02994
02995 static void
02996 econv_args(int argc, VALUE *argv,
02997 volatile VALUE *snamev_p, volatile VALUE *dnamev_p,
02998 const char **sname_p, const char **dname_p,
02999 rb_encoding **senc_p, rb_encoding **denc_p,
03000 int *ecflags_p,
03001 VALUE *ecopts_p)
03002 {
03003 VALUE opt, flags_v, ecopts;
03004 int sidx, didx;
03005 const char *sname, *dname;
03006 rb_encoding *senc, *denc;
03007 int ecflags;
03008
03009 argc = rb_scan_args(argc, argv, "21:", snamev_p, dnamev_p, &flags_v, &opt);
03010
03011 if (!NIL_P(flags_v)) {
03012 if (!NIL_P(opt)) {
03013 rb_error_arity(argc + 1, 2, 3);
03014 }
03015 ecflags = NUM2INT(rb_to_int(flags_v));
03016 ecopts = Qnil;
03017 }
03018 else if (!NIL_P(opt)) {
03019 ecflags = rb_econv_prepare_opts(opt, &ecopts);
03020 }
03021 else {
03022 ecflags = 0;
03023 ecopts = Qnil;
03024 }
03025
03026 senc = NULL;
03027 sidx = rb_to_encoding_index(*snamev_p);
03028 if (0 <= sidx) {
03029 senc = rb_enc_from_index(sidx);
03030 }
03031 else {
03032 StringValue(*snamev_p);
03033 }
03034
03035 denc = NULL;
03036 didx = rb_to_encoding_index(*dnamev_p);
03037 if (0 <= didx) {
03038 denc = rb_enc_from_index(didx);
03039 }
03040 else {
03041 StringValue(*dnamev_p);
03042 }
03043
03044 sname = senc ? rb_enc_name(senc) : StringValueCStr(*snamev_p);
03045 dname = denc ? rb_enc_name(denc) : StringValueCStr(*dnamev_p);
03046
03047 *sname_p = sname;
03048 *dname_p = dname;
03049 *senc_p = senc;
03050 *denc_p = denc;
03051 *ecflags_p = ecflags;
03052 *ecopts_p = ecopts;
03053 }
03054
03055 static int
03056 decorate_convpath(VALUE convpath, int ecflags)
03057 {
03058 int num_decorators;
03059 const char *decorators[MAX_ECFLAGS_DECORATORS];
03060 int i;
03061 int n, len;
03062
03063 num_decorators = decorator_names(ecflags, decorators);
03064 if (num_decorators == -1)
03065 return -1;
03066
03067 len = n = RARRAY_LENINT(convpath);
03068 if (n != 0) {
03069 VALUE pair = RARRAY_AREF(convpath, n-1);
03070 if (RB_TYPE_P(pair, T_ARRAY)) {
03071 const char *sname = rb_enc_name(rb_to_encoding(RARRAY_AREF(pair, 0)));
03072 const char *dname = rb_enc_name(rb_to_encoding(RARRAY_AREF(pair, 1)));
03073 transcoder_entry_t *entry = get_transcoder_entry(sname, dname);
03074 const rb_transcoder *tr = load_transcoder_entry(entry);
03075 if (!tr)
03076 return -1;
03077 if (!DECORATOR_P(tr->src_encoding, tr->dst_encoding) &&
03078 tr->asciicompat_type == asciicompat_encoder) {
03079 n--;
03080 rb_ary_store(convpath, len + num_decorators - 1, pair);
03081 }
03082 }
03083 else {
03084 rb_ary_store(convpath, len + num_decorators - 1, pair);
03085 }
03086 }
03087
03088 for (i = 0; i < num_decorators; i++)
03089 rb_ary_store(convpath, n + i, rb_str_new_cstr(decorators[i]));
03090
03091 return 0;
03092 }
03093
03094 static void
03095 search_convpath_i(const char *sname, const char *dname, int depth, void *arg)
03096 {
03097 VALUE *ary_p = arg;
03098 VALUE v;
03099
03100 if (*ary_p == Qnil) {
03101 *ary_p = rb_ary_new();
03102 }
03103
03104 if (DECORATOR_P(sname, dname)) {
03105 v = rb_str_new_cstr(dname);
03106 }
03107 else {
03108 v = rb_assoc_new(make_encobj(sname), make_encobj(dname));
03109 }
03110 rb_ary_store(*ary_p, depth, v);
03111 }
03112
03113
03114
03115
03116
03117
03118
03119
03120
03121
03122
03123
03124
03125
03126
03127
03128
03129
03130
03131
03132
03133
03134
03135
03136
03137
03138 static VALUE
03139 econv_s_search_convpath(int argc, VALUE *argv, VALUE klass)
03140 {
03141 volatile VALUE snamev, dnamev;
03142 const char *sname, *dname;
03143 rb_encoding *senc, *denc;
03144 int ecflags;
03145 VALUE ecopts;
03146 VALUE convpath;
03147
03148 econv_args(argc, argv, &snamev, &dnamev, &sname, &dname, &senc, &denc, &ecflags, &ecopts);
03149
03150 convpath = Qnil;
03151 transcode_search_path(sname, dname, search_convpath_i, &convpath);
03152
03153 if (NIL_P(convpath))
03154 rb_exc_raise(rb_econv_open_exc(sname, dname, ecflags));
03155
03156 if (decorate_convpath(convpath, ecflags) == -1)
03157 rb_exc_raise(rb_econv_open_exc(sname, dname, ecflags));
03158
03159 return convpath;
03160 }
03161
03162
03163
03164
03165
03166
03167 int
03168 rb_econv_has_convpath_p(const char* from_encoding, const char* to_encoding)
03169 {
03170 VALUE convpath = Qnil;
03171 transcode_search_path(from_encoding, to_encoding, search_convpath_i,
03172 &convpath);
03173 return RTEST(convpath);
03174 }
03175
03176 struct rb_econv_init_by_convpath_t {
03177 rb_econv_t *ec;
03178 int index;
03179 int ret;
03180 };
03181
03182 static void
03183 rb_econv_init_by_convpath_i(const char *sname, const char *dname, int depth, void *arg)
03184 {
03185 struct rb_econv_init_by_convpath_t *a = (struct rb_econv_init_by_convpath_t *)arg;
03186 int ret;
03187
03188 if (a->ret == -1)
03189 return;
03190
03191 ret = rb_econv_add_converter(a->ec, sname, dname, a->index);
03192
03193 a->ret = ret;
03194 return;
03195 }
03196
03197 static rb_econv_t *
03198 rb_econv_init_by_convpath(VALUE self, VALUE convpath,
03199 const char **sname_p, const char **dname_p,
03200 rb_encoding **senc_p, rb_encoding**denc_p)
03201 {
03202 rb_econv_t *ec;
03203 long i;
03204 int ret, first=1;
03205 VALUE elt;
03206 rb_encoding *senc = 0, *denc = 0;
03207 const char *sname, *dname;
03208
03209 ec = rb_econv_alloc(RARRAY_LENINT(convpath));
03210 DATA_PTR(self) = ec;
03211
03212 for (i = 0; i < RARRAY_LEN(convpath); i++) {
03213 volatile VALUE snamev, dnamev;
03214 VALUE pair;
03215 elt = rb_ary_entry(convpath, i);
03216 if (!NIL_P(pair = rb_check_array_type(elt))) {
03217 if (RARRAY_LEN(pair) != 2)
03218 rb_raise(rb_eArgError, "not a 2-element array in convpath");
03219 snamev = rb_ary_entry(pair, 0);
03220 enc_arg(&snamev, &sname, &senc);
03221 dnamev = rb_ary_entry(pair, 1);
03222 enc_arg(&dnamev, &dname, &denc);
03223 }
03224 else {
03225 sname = "";
03226 dname = StringValueCStr(elt);
03227 }
03228 if (DECORATOR_P(sname, dname)) {
03229 ret = rb_econv_add_converter(ec, sname, dname, ec->num_trans);
03230 if (ret == -1)
03231 rb_raise(rb_eArgError, "decoration failed: %s", dname);
03232 }
03233 else {
03234 int j = ec->num_trans;
03235 struct rb_econv_init_by_convpath_t arg;
03236 arg.ec = ec;
03237 arg.index = ec->num_trans;
03238 arg.ret = 0;
03239 ret = transcode_search_path(sname, dname, rb_econv_init_by_convpath_i, &arg);
03240 if (ret == -1 || arg.ret == -1)
03241 rb_raise(rb_eArgError, "adding conversion failed: %s to %s", sname, dname);
03242 if (first) {
03243 first = 0;
03244 *senc_p = senc;
03245 *sname_p = ec->elems[j].tc->transcoder->src_encoding;
03246 }
03247 *denc_p = denc;
03248 *dname_p = ec->elems[ec->num_trans-1].tc->transcoder->dst_encoding;
03249 }
03250 }
03251
03252 if (first) {
03253 *senc_p = NULL;
03254 *denc_p = NULL;
03255 *sname_p = "";
03256 *dname_p = "";
03257 }
03258
03259 ec->source_encoding_name = *sname_p;
03260 ec->destination_encoding_name = *dname_p;
03261
03262 return ec;
03263 }
03264
03265
03266
03267
03268
03269
03270
03271
03272
03273
03274
03275
03276
03277
03278
03279
03280
03281
03282
03283
03284
03285
03286
03287
03288
03289
03290
03291
03292
03293
03294
03295
03296
03297
03298
03299
03300
03301
03302
03303
03304
03305
03306
03307
03308
03309
03310
03311
03312
03313
03314
03315
03316
03317
03318
03319
03320
03321
03322
03323
03324
03325
03326
03327
03328
03329
03330
03331
03332
03333
03334
03335
03336
03337
03338
03339
03340
03341
03342
03343
03344
03345
03346
03347
03348
03349
03350
03351
03352
03353
03354
03355
03356
03357
03358
03359
03360
03361
03362
03363
03364
03365
03366
03367
03368
03369
03370
03371 static VALUE
03372 econv_init(int argc, VALUE *argv, VALUE self)
03373 {
03374 VALUE ecopts;
03375 volatile VALUE snamev, dnamev;
03376 const char *sname, *dname;
03377 rb_encoding *senc, *denc;
03378 rb_econv_t *ec;
03379 int ecflags;
03380 VALUE convpath;
03381
03382 if (rb_check_typeddata(self, &econv_data_type)) {
03383 rb_raise(rb_eTypeError, "already initialized");
03384 }
03385
03386 if (argc == 1 && !NIL_P(convpath = rb_check_array_type(argv[0]))) {
03387 ec = rb_econv_init_by_convpath(self, convpath, &sname, &dname, &senc, &denc);
03388 ecflags = 0;
03389 ecopts = Qnil;
03390 }
03391 else {
03392 econv_args(argc, argv, &snamev, &dnamev, &sname, &dname, &senc, &denc, &ecflags, &ecopts);
03393 ec = rb_econv_open_opts(sname, dname, ecflags, ecopts);
03394 }
03395
03396 if (!ec) {
03397 VALUE exc = rb_econv_open_exc(sname, dname, ecflags);
03398 RB_GC_GUARD(snamev);
03399 RB_GC_GUARD(dnamev);
03400 rb_exc_raise(exc);
03401 }
03402
03403 if (!DECORATOR_P(sname, dname)) {
03404 if (!senc)
03405 senc = make_dummy_encoding(sname);
03406 if (!denc)
03407 denc = make_dummy_encoding(dname);
03408 RB_GC_GUARD(snamev);
03409 RB_GC_GUARD(dnamev);
03410 }
03411
03412 ec->source_encoding = senc;
03413 ec->destination_encoding = denc;
03414
03415 DATA_PTR(self) = ec;
03416
03417 return self;
03418 }
03419
03420
03421
03422
03423
03424
03425
03426
03427
03428
03429
03430 static VALUE
03431 econv_inspect(VALUE self)
03432 {
03433 const char *cname = rb_obj_classname(self);
03434 rb_econv_t *ec;
03435
03436 TypedData_Get_Struct(self, rb_econv_t, &econv_data_type, ec);
03437 if (!ec)
03438 return rb_sprintf("#<%s: uninitialized>", cname);
03439 else {
03440 const char *sname = ec->source_encoding_name;
03441 const char *dname = ec->destination_encoding_name;
03442 VALUE str;
03443 str = rb_sprintf("#<%s: ", cname);
03444 econv_description(sname, dname, ec->flags, str);
03445 rb_str_cat2(str, ">");
03446 return str;
03447 }
03448 }
03449
03450 static rb_econv_t *
03451 check_econv(VALUE self)
03452 {
03453 rb_econv_t *ec;
03454
03455 TypedData_Get_Struct(self, rb_econv_t, &econv_data_type, ec);
03456 if (!ec) {
03457 rb_raise(rb_eTypeError, "uninitialized encoding converter");
03458 }
03459 return ec;
03460 }
03461
03462
03463
03464
03465
03466
03467
03468 static VALUE
03469 econv_source_encoding(VALUE self)
03470 {
03471 rb_econv_t *ec = check_econv(self);
03472 if (!ec->source_encoding)
03473 return Qnil;
03474 return rb_enc_from_encoding(ec->source_encoding);
03475 }
03476
03477
03478
03479
03480
03481
03482
03483 static VALUE
03484 econv_destination_encoding(VALUE self)
03485 {
03486 rb_econv_t *ec = check_econv(self);
03487 if (!ec->destination_encoding)
03488 return Qnil;
03489 return rb_enc_from_encoding(ec->destination_encoding);
03490 }
03491
03492
03493
03494
03495
03496
03497
03498
03499
03500
03501
03502
03503
03504
03505
03506
03507
03508
03509
03510
03511
03512
03513
03514 static VALUE
03515 econv_convpath(VALUE self)
03516 {
03517 rb_econv_t *ec = check_econv(self);
03518 VALUE result;
03519 int i;
03520
03521 result = rb_ary_new();
03522 for (i = 0; i < ec->num_trans; i++) {
03523 const rb_transcoder *tr = ec->elems[i].tc->transcoder;
03524 VALUE v;
03525 if (DECORATOR_P(tr->src_encoding, tr->dst_encoding))
03526 v = rb_str_new_cstr(tr->dst_encoding);
03527 else
03528 v = rb_assoc_new(make_encobj(tr->src_encoding), make_encobj(tr->dst_encoding));
03529 rb_ary_push(result, v);
03530 }
03531 return result;
03532 }
03533
03534
03535
03536
03537
03538 static VALUE
03539 econv_equal(VALUE self, VALUE other)
03540 {
03541 rb_econv_t *ec1 = check_econv(self);
03542 rb_econv_t *ec2;
03543 int i;
03544
03545 if (!rb_typeddata_is_kind_of(other, &econv_data_type)) {
03546 return Qnil;
03547 }
03548 ec2 = DATA_PTR(other);
03549 if (!ec2) return Qfalse;
03550 if (ec1->source_encoding_name != ec2->source_encoding_name &&
03551 strcmp(ec1->source_encoding_name, ec2->source_encoding_name))
03552 return Qfalse;
03553 if (ec1->destination_encoding_name != ec2->destination_encoding_name &&
03554 strcmp(ec1->destination_encoding_name, ec2->destination_encoding_name))
03555 return Qfalse;
03556 if (ec1->flags != ec2->flags) return Qfalse;
03557 if (ec1->replacement_enc != ec2->replacement_enc &&
03558 strcmp(ec1->replacement_enc, ec2->replacement_enc))
03559 return Qfalse;
03560 if (ec1->replacement_len != ec2->replacement_len) return Qfalse;
03561 if (ec1->replacement_str != ec2->replacement_str &&
03562 memcmp(ec1->replacement_str, ec2->replacement_str, ec2->replacement_len))
03563 return Qfalse;
03564
03565 if (ec1->num_trans != ec2->num_trans) return Qfalse;
03566 for (i = 0; i < ec1->num_trans; i++) {
03567 if (ec1->elems[i].tc->transcoder != ec2->elems[i].tc->transcoder)
03568 return Qfalse;
03569 }
03570 return Qtrue;
03571 }
03572
03573 static VALUE
03574 econv_result_to_symbol(rb_econv_result_t res)
03575 {
03576 switch (res) {
03577 case econv_invalid_byte_sequence: return sym_invalid_byte_sequence;
03578 case econv_incomplete_input: return sym_incomplete_input;
03579 case econv_undefined_conversion: return sym_undefined_conversion;
03580 case econv_destination_buffer_full: return sym_destination_buffer_full;
03581 case econv_source_buffer_empty: return sym_source_buffer_empty;
03582 case econv_finished: return sym_finished;
03583 case econv_after_output: return sym_after_output;
03584 default: return INT2NUM(res);
03585 }
03586 }
03587
03588
03589
03590
03591
03592
03593
03594
03595
03596
03597
03598
03599
03600
03601
03602
03603
03604
03605
03606
03607
03608
03609
03610
03611
03612
03613
03614
03615
03616
03617
03618
03619
03620
03621
03622
03623
03624
03625
03626
03627
03628
03629
03630
03631
03632
03633
03634
03635
03636
03637
03638
03639
03640
03641
03642
03643
03644
03645
03646
03647
03648
03649
03650
03651
03652
03653
03654
03655
03656
03657
03658
03659
03660
03661
03662
03663
03664
03665
03666
03667
03668
03669
03670
03671
03672
03673
03674
03675
03676
03677
03678
03679
03680
03681
03682 static VALUE
03683 econv_primitive_convert(int argc, VALUE *argv, VALUE self)
03684 {
03685 VALUE input, output, output_byteoffset_v, output_bytesize_v, opt, flags_v;
03686 rb_econv_t *ec = check_econv(self);
03687 rb_econv_result_t res;
03688 const unsigned char *ip, *is;
03689 unsigned char *op, *os;
03690 long output_byteoffset, output_bytesize;
03691 unsigned long output_byteend;
03692 int flags;
03693
03694 argc = rb_scan_args(argc, argv, "23:", &input, &output, &output_byteoffset_v, &output_bytesize_v, &flags_v, &opt);
03695
03696 if (NIL_P(output_byteoffset_v))
03697 output_byteoffset = 0;
03698 else
03699 output_byteoffset = NUM2LONG(output_byteoffset_v);
03700
03701 if (NIL_P(output_bytesize_v))
03702 output_bytesize = 0;
03703 else
03704 output_bytesize = NUM2LONG(output_bytesize_v);
03705
03706 if (!NIL_P(flags_v)) {
03707 if (!NIL_P(opt)) {
03708 rb_error_arity(argc + 1, 2, 5);
03709 }
03710 flags = NUM2INT(rb_to_int(flags_v));
03711 }
03712 else if (!NIL_P(opt)) {
03713 VALUE v;
03714 flags = 0;
03715 v = rb_hash_aref(opt, sym_partial_input);
03716 if (RTEST(v))
03717 flags |= ECONV_PARTIAL_INPUT;
03718 v = rb_hash_aref(opt, sym_after_output);
03719 if (RTEST(v))
03720 flags |= ECONV_AFTER_OUTPUT;
03721 }
03722 else {
03723 flags = 0;
03724 }
03725
03726 StringValue(output);
03727 if (!NIL_P(input))
03728 StringValue(input);
03729 rb_str_modify(output);
03730
03731 if (NIL_P(output_bytesize_v)) {
03732 output_bytesize = RSTRING_EMBED_LEN_MAX;
03733 if (!NIL_P(input) && output_bytesize < RSTRING_LEN(input))
03734 output_bytesize = RSTRING_LEN(input);
03735 }
03736
03737 retry:
03738
03739 if (NIL_P(output_byteoffset_v))
03740 output_byteoffset = RSTRING_LEN(output);
03741
03742 if (output_byteoffset < 0)
03743 rb_raise(rb_eArgError, "negative output_byteoffset");
03744
03745 if (RSTRING_LEN(output) < output_byteoffset)
03746 rb_raise(rb_eArgError, "output_byteoffset too big");
03747
03748 if (output_bytesize < 0)
03749 rb_raise(rb_eArgError, "negative output_bytesize");
03750
03751 output_byteend = (unsigned long)output_byteoffset +
03752 (unsigned long)output_bytesize;
03753
03754 if (output_byteend < (unsigned long)output_byteoffset ||
03755 LONG_MAX < output_byteend)
03756 rb_raise(rb_eArgError, "output_byteoffset+output_bytesize too big");
03757
03758 if (rb_str_capacity(output) < output_byteend)
03759 rb_str_resize(output, output_byteend);
03760
03761 if (NIL_P(input)) {
03762 ip = is = NULL;
03763 }
03764 else {
03765 ip = (const unsigned char *)RSTRING_PTR(input);
03766 is = ip + RSTRING_LEN(input);
03767 }
03768
03769 op = (unsigned char *)RSTRING_PTR(output) + output_byteoffset;
03770 os = op + output_bytesize;
03771
03772 res = rb_econv_convert(ec, &ip, is, &op, os, flags);
03773 rb_str_set_len(output, op-(unsigned char *)RSTRING_PTR(output));
03774 if (!NIL_P(input)) {
03775 OBJ_INFECT_RAW(output, input);
03776 rb_str_drop_bytes(input, ip - (unsigned char *)RSTRING_PTR(input));
03777 }
03778
03779 if (NIL_P(output_bytesize_v) && res == econv_destination_buffer_full) {
03780 if (LONG_MAX / 2 < output_bytesize)
03781 rb_raise(rb_eArgError, "too long conversion result");
03782 output_bytesize *= 2;
03783 output_byteoffset_v = Qnil;
03784 goto retry;
03785 }
03786
03787 if (ec->destination_encoding) {
03788 rb_enc_associate(output, ec->destination_encoding);
03789 }
03790
03791 return econv_result_to_symbol(res);
03792 }
03793
03794
03795
03796
03797
03798
03799
03800
03801
03802
03803
03804
03805
03806
03807
03808
03809
03810
03811
03812
03813
03814
03815
03816
03817
03818
03819
03820
03821
03822
03823
03824
03825
03826
03827
03828 static VALUE
03829 econv_convert(VALUE self, VALUE source_string)
03830 {
03831 VALUE ret, dst;
03832 VALUE av[5];
03833 int ac;
03834 rb_econv_t *ec = check_econv(self);
03835
03836 StringValue(source_string);
03837
03838 dst = rb_str_new(NULL, 0);
03839
03840 av[0] = rb_str_dup(source_string);
03841 av[1] = dst;
03842 av[2] = Qnil;
03843 av[3] = Qnil;
03844 av[4] = INT2NUM(ECONV_PARTIAL_INPUT);
03845 ac = 5;
03846
03847 ret = econv_primitive_convert(ac, av, self);
03848
03849 if (ret == sym_invalid_byte_sequence ||
03850 ret == sym_undefined_conversion ||
03851 ret == sym_incomplete_input) {
03852 VALUE exc = make_econv_exception(ec);
03853 rb_exc_raise(exc);
03854 }
03855
03856 if (ret == sym_finished) {
03857 rb_raise(rb_eArgError, "converter already finished");
03858 }
03859
03860 if (ret != sym_source_buffer_empty) {
03861 rb_bug("unexpected result of econv_primitive_convert");
03862 }
03863
03864 return dst;
03865 }
03866
03867
03868
03869
03870
03871
03872
03873
03874
03875
03876
03877
03878 static VALUE
03879 econv_finish(VALUE self)
03880 {
03881 VALUE ret, dst;
03882 VALUE av[5];
03883 int ac;
03884 rb_econv_t *ec = check_econv(self);
03885
03886 dst = rb_str_new(NULL, 0);
03887
03888 av[0] = Qnil;
03889 av[1] = dst;
03890 av[2] = Qnil;
03891 av[3] = Qnil;
03892 av[4] = INT2FIX(0);
03893 ac = 5;
03894
03895 ret = econv_primitive_convert(ac, av, self);
03896
03897 if (ret == sym_invalid_byte_sequence ||
03898 ret == sym_undefined_conversion ||
03899 ret == sym_incomplete_input) {
03900 VALUE exc = make_econv_exception(ec);
03901 rb_exc_raise(exc);
03902 }
03903
03904 if (ret != sym_finished) {
03905 rb_bug("unexpected result of econv_primitive_convert");
03906 }
03907
03908 return dst;
03909 }
03910
03911
03912
03913
03914
03915
03916
03917
03918
03919
03920
03921
03922
03923
03924
03925
03926
03927
03928
03929
03930
03931
03932
03933
03934
03935
03936
03937
03938
03939
03940
03941
03942
03943
03944
03945
03946
03947
03948
03949
03950
03951
03952
03953
03954
03955
03956
03957
03958
03959
03960
03961
03962
03963
03964
03965
03966
03967
03968
03969
03970
03971
03972
03973
03974
03975
03976
03977
03978
03979
03980
03981
03982
03983
03984
03985
03986 static VALUE
03987 econv_primitive_errinfo(VALUE self)
03988 {
03989 rb_econv_t *ec = check_econv(self);
03990
03991 VALUE ary;
03992
03993 ary = rb_ary_new2(5);
03994
03995 rb_ary_store(ary, 0, econv_result_to_symbol(ec->last_error.result));
03996 rb_ary_store(ary, 4, Qnil);
03997
03998 if (ec->last_error.source_encoding)
03999 rb_ary_store(ary, 1, rb_str_new2(ec->last_error.source_encoding));
04000
04001 if (ec->last_error.destination_encoding)
04002 rb_ary_store(ary, 2, rb_str_new2(ec->last_error.destination_encoding));
04003
04004 if (ec->last_error.error_bytes_start) {
04005 rb_ary_store(ary, 3, rb_str_new((const char *)ec->last_error.error_bytes_start, ec->last_error.error_bytes_len));
04006 rb_ary_store(ary, 4, rb_str_new((const char *)ec->last_error.error_bytes_start + ec->last_error.error_bytes_len, ec->last_error.readagain_len));
04007 }
04008
04009 return ary;
04010 }
04011
04012
04013
04014
04015
04016
04017
04018
04019
04020
04021
04022
04023
04024
04025
04026
04027
04028
04029
04030
04031
04032
04033
04034
04035
04036
04037
04038
04039
04040
04041
04042
04043
04044 static VALUE
04045 econv_insert_output(VALUE self, VALUE string)
04046 {
04047 const char *insert_enc;
04048
04049 int ret;
04050
04051 rb_econv_t *ec = check_econv(self);
04052
04053 StringValue(string);
04054 insert_enc = rb_econv_encoding_to_insert_output(ec);
04055 string = rb_str_encode(string, rb_enc_from_encoding(rb_enc_find(insert_enc)), 0, Qnil);
04056
04057 ret = rb_econv_insert_output(ec, (const unsigned char *)RSTRING_PTR(string), RSTRING_LEN(string), insert_enc);
04058 if (ret == -1) {
04059 rb_raise(rb_eArgError, "too big string");
04060 }
04061
04062 return Qnil;
04063 }
04064
04065
04066
04067
04068
04069
04070
04071
04072
04073
04074
04075
04076
04077
04078
04079
04080
04081
04082
04083
04084
04085
04086
04087
04088
04089 static VALUE
04090 econv_putback(int argc, VALUE *argv, VALUE self)
04091 {
04092 rb_econv_t *ec = check_econv(self);
04093 int n;
04094 int putbackable;
04095 VALUE str, max;
04096
04097 rb_scan_args(argc, argv, "01", &max);
04098
04099 if (NIL_P(max))
04100 n = rb_econv_putbackable(ec);
04101 else {
04102 n = NUM2INT(max);
04103 putbackable = rb_econv_putbackable(ec);
04104 if (putbackable < n)
04105 n = putbackable;
04106 }
04107
04108 str = rb_str_new(NULL, n);
04109 rb_econv_putback(ec, (unsigned char *)RSTRING_PTR(str), n);
04110
04111 if (ec->source_encoding) {
04112 rb_enc_associate(str, ec->source_encoding);
04113 }
04114
04115 return str;
04116 }
04117
04118
04119
04120
04121
04122
04123
04124
04125
04126
04127
04128
04129
04130
04131
04132
04133
04134
04135
04136
04137
04138 static VALUE
04139 econv_last_error(VALUE self)
04140 {
04141 rb_econv_t *ec = check_econv(self);
04142 VALUE exc;
04143
04144 exc = make_econv_exception(ec);
04145 if (NIL_P(exc))
04146 return Qnil;
04147 return exc;
04148 }
04149
04150
04151
04152
04153
04154
04155
04156
04157
04158
04159
04160
04161
04162 static VALUE
04163 econv_get_replacement(VALUE self)
04164 {
04165 rb_econv_t *ec = check_econv(self);
04166 int ret;
04167 rb_encoding *enc;
04168
04169 ret = make_replacement(ec);
04170 if (ret == -1) {
04171 rb_raise(rb_eUndefinedConversionError, "replacement character setup failed");
04172 }
04173
04174 enc = rb_enc_find(ec->replacement_enc);
04175 return rb_enc_str_new((const char *)ec->replacement_str, (long)ec->replacement_len, enc);
04176 }
04177
04178
04179
04180
04181
04182
04183
04184
04185
04186
04187
04188 static VALUE
04189 econv_set_replacement(VALUE self, VALUE arg)
04190 {
04191 rb_econv_t *ec = check_econv(self);
04192 VALUE string = arg;
04193 int ret;
04194 rb_encoding *enc;
04195
04196 StringValue(string);
04197 enc = rb_enc_get(string);
04198
04199 ret = rb_econv_set_replacement(ec,
04200 (const unsigned char *)RSTRING_PTR(string),
04201 RSTRING_LEN(string),
04202 rb_enc_name(enc));
04203
04204 if (ret == -1) {
04205
04206 rb_raise(rb_eUndefinedConversionError, "replacement character setup failed");
04207 }
04208
04209 return arg;
04210 }
04211
04212 VALUE
04213 rb_econv_make_exception(rb_econv_t *ec)
04214 {
04215 return make_econv_exception(ec);
04216 }
04217
04218 void
04219 rb_econv_check_error(rb_econv_t *ec)
04220 {
04221 VALUE exc;
04222
04223 exc = make_econv_exception(ec);
04224 if (NIL_P(exc))
04225 return;
04226 rb_exc_raise(exc);
04227 }
04228
04229
04230
04231
04232
04233
04234
04235 static VALUE
04236 ecerr_source_encoding_name(VALUE self)
04237 {
04238 return rb_attr_get(self, rb_intern("source_encoding_name"));
04239 }
04240
04241
04242
04243
04244
04245
04246
04247
04248
04249
04250
04251
04252
04253
04254
04255
04256
04257
04258
04259
04260
04261 static VALUE
04262 ecerr_source_encoding(VALUE self)
04263 {
04264 return rb_attr_get(self, rb_intern("source_encoding"));
04265 }
04266
04267
04268
04269
04270
04271
04272
04273 static VALUE
04274 ecerr_destination_encoding_name(VALUE self)
04275 {
04276 return rb_attr_get(self, rb_intern("destination_encoding_name"));
04277 }
04278
04279
04280
04281
04282
04283
04284
04285 static VALUE
04286 ecerr_destination_encoding(VALUE self)
04287 {
04288 return rb_attr_get(self, rb_intern("destination_encoding"));
04289 }
04290
04291
04292
04293
04294
04295
04296
04297
04298
04299
04300
04301
04302
04303
04304
04305
04306 static VALUE
04307 ecerr_error_char(VALUE self)
04308 {
04309 return rb_attr_get(self, rb_intern("error_char"));
04310 }
04311
04312
04313
04314
04315
04316
04317
04318
04319
04320
04321
04322
04323
04324
04325
04326
04327 static VALUE
04328 ecerr_error_bytes(VALUE self)
04329 {
04330 return rb_attr_get(self, rb_intern("error_bytes"));
04331 }
04332
04333
04334
04335
04336
04337
04338
04339 static VALUE
04340 ecerr_readagain_bytes(VALUE self)
04341 {
04342 return rb_attr_get(self, rb_intern("readagain_bytes"));
04343 }
04344
04345
04346
04347
04348
04349
04350
04351
04352
04353
04354
04355
04356
04357
04358
04359
04360
04361
04362
04363
04364
04365
04366
04367
04368
04369 static VALUE
04370 ecerr_incomplete_input(VALUE self)
04371 {
04372 return rb_attr_get(self, rb_intern("incomplete_input"));
04373 }
04374
04375
04376
04377
04378
04379
04380
04381
04382
04383
04384
04385
04386
04387
04388
04389
04390
04391
04392
04393
04394
04395
04396
04397 void
04398 Init_transcode(void)
04399 {
04400 rb_eUndefinedConversionError = rb_define_class_under(rb_cEncoding, "UndefinedConversionError", rb_eEncodingError);
04401 rb_eInvalidByteSequenceError = rb_define_class_under(rb_cEncoding, "InvalidByteSequenceError", rb_eEncodingError);
04402 rb_eConverterNotFoundError = rb_define_class_under(rb_cEncoding, "ConverterNotFoundError", rb_eEncodingError);
04403
04404 transcoder_table = st_init_strcasetable();
04405
04406 sym_invalid = ID2SYM(rb_intern("invalid"));
04407 sym_undef = ID2SYM(rb_intern("undef"));
04408 sym_replace = ID2SYM(rb_intern("replace"));
04409 sym_fallback = ID2SYM(rb_intern("fallback"));
04410 sym_aref = ID2SYM(rb_intern("[]"));
04411 sym_xml = ID2SYM(rb_intern("xml"));
04412 sym_text = ID2SYM(rb_intern("text"));
04413 sym_attr = ID2SYM(rb_intern("attr"));
04414
04415 sym_invalid_byte_sequence = ID2SYM(rb_intern("invalid_byte_sequence"));
04416 sym_undefined_conversion = ID2SYM(rb_intern("undefined_conversion"));
04417 sym_destination_buffer_full = ID2SYM(rb_intern("destination_buffer_full"));
04418 sym_source_buffer_empty = ID2SYM(rb_intern("source_buffer_empty"));
04419 sym_finished = ID2SYM(rb_intern("finished"));
04420 sym_after_output = ID2SYM(rb_intern("after_output"));
04421 sym_incomplete_input = ID2SYM(rb_intern("incomplete_input"));
04422 sym_universal_newline = ID2SYM(rb_intern("universal_newline"));
04423 sym_crlf_newline = ID2SYM(rb_intern("crlf_newline"));
04424 sym_cr_newline = ID2SYM(rb_intern("cr_newline"));
04425 sym_partial_input = ID2SYM(rb_intern("partial_input"));
04426
04427 #ifdef ENABLE_ECONV_NEWLINE_OPTION
04428 sym_newline = ID2SYM(rb_intern("newline"));
04429 sym_universal = ID2SYM(rb_intern("universal"));
04430 sym_crlf = ID2SYM(rb_intern("crlf"));
04431 sym_cr = ID2SYM(rb_intern("cr"));
04432 sym_lf = ID2SYM(rb_intern("lf"));
04433 #endif
04434
04435 rb_define_method(rb_cString, "encode", str_encode, -1);
04436 rb_define_method(rb_cString, "encode!", str_encode_bang, -1);
04437
04438 rb_cEncodingConverter = rb_define_class_under(rb_cEncoding, "Converter", rb_cData);
04439 rb_define_alloc_func(rb_cEncodingConverter, econv_s_allocate);
04440 rb_define_singleton_method(rb_cEncodingConverter, "asciicompat_encoding", econv_s_asciicompat_encoding, 1);
04441 rb_define_singleton_method(rb_cEncodingConverter, "search_convpath", econv_s_search_convpath, -1);
04442 rb_define_method(rb_cEncodingConverter, "initialize", econv_init, -1);
04443 rb_define_method(rb_cEncodingConverter, "inspect", econv_inspect, 0);
04444 rb_define_method(rb_cEncodingConverter, "convpath", econv_convpath, 0);
04445 rb_define_method(rb_cEncodingConverter, "source_encoding", econv_source_encoding, 0);
04446 rb_define_method(rb_cEncodingConverter, "destination_encoding", econv_destination_encoding, 0);
04447 rb_define_method(rb_cEncodingConverter, "primitive_convert", econv_primitive_convert, -1);
04448 rb_define_method(rb_cEncodingConverter, "convert", econv_convert, 1);
04449 rb_define_method(rb_cEncodingConverter, "finish", econv_finish, 0);
04450 rb_define_method(rb_cEncodingConverter, "primitive_errinfo", econv_primitive_errinfo, 0);
04451 rb_define_method(rb_cEncodingConverter, "insert_output", econv_insert_output, 1);
04452 rb_define_method(rb_cEncodingConverter, "putback", econv_putback, -1);
04453 rb_define_method(rb_cEncodingConverter, "last_error", econv_last_error, 0);
04454 rb_define_method(rb_cEncodingConverter, "replacement", econv_get_replacement, 0);
04455 rb_define_method(rb_cEncodingConverter, "replacement=", econv_set_replacement, 1);
04456 rb_define_method(rb_cEncodingConverter, "==", econv_equal, 1);
04457
04458
04459
04460
04461
04462 rb_define_const(rb_cEncodingConverter, "INVALID_MASK", INT2FIX(ECONV_INVALID_MASK));
04463
04464
04465
04466
04467
04468 rb_define_const(rb_cEncodingConverter, "INVALID_REPLACE", INT2FIX(ECONV_INVALID_REPLACE));
04469
04470
04471
04472
04473
04474
04475 rb_define_const(rb_cEncodingConverter, "UNDEF_MASK", INT2FIX(ECONV_UNDEF_MASK));
04476
04477
04478
04479
04480
04481 rb_define_const(rb_cEncodingConverter, "UNDEF_REPLACE", INT2FIX(ECONV_UNDEF_REPLACE));
04482
04483
04484
04485
04486
04487
04488
04489 rb_define_const(rb_cEncodingConverter, "UNDEF_HEX_CHARREF", INT2FIX(ECONV_UNDEF_HEX_CHARREF));
04490
04491
04492
04493
04494
04495
04496 rb_define_const(rb_cEncodingConverter, "PARTIAL_INPUT", INT2FIX(ECONV_PARTIAL_INPUT));
04497
04498
04499
04500
04501
04502
04503 rb_define_const(rb_cEncodingConverter, "AFTER_OUTPUT", INT2FIX(ECONV_AFTER_OUTPUT));
04504
04505
04506
04507
04508
04509 rb_define_const(rb_cEncodingConverter, "UNIVERSAL_NEWLINE_DECORATOR", INT2FIX(ECONV_UNIVERSAL_NEWLINE_DECORATOR));
04510
04511
04512
04513
04514
04515 rb_define_const(rb_cEncodingConverter, "CRLF_NEWLINE_DECORATOR", INT2FIX(ECONV_CRLF_NEWLINE_DECORATOR));
04516
04517
04518
04519
04520
04521 rb_define_const(rb_cEncodingConverter, "CR_NEWLINE_DECORATOR", INT2FIX(ECONV_CR_NEWLINE_DECORATOR));
04522
04523
04524
04525
04526
04527 rb_define_const(rb_cEncodingConverter, "XML_TEXT_DECORATOR", INT2FIX(ECONV_XML_TEXT_DECORATOR));
04528
04529
04530
04531
04532
04533 rb_define_const(rb_cEncodingConverter, "XML_ATTR_CONTENT_DECORATOR", INT2FIX(ECONV_XML_ATTR_CONTENT_DECORATOR));
04534
04535
04536
04537
04538
04539 rb_define_const(rb_cEncodingConverter, "XML_ATTR_QUOTE_DECORATOR", INT2FIX(ECONV_XML_ATTR_QUOTE_DECORATOR));
04540
04541 rb_define_method(rb_eUndefinedConversionError, "source_encoding_name", ecerr_source_encoding_name, 0);
04542 rb_define_method(rb_eUndefinedConversionError, "destination_encoding_name", ecerr_destination_encoding_name, 0);
04543 rb_define_method(rb_eUndefinedConversionError, "source_encoding", ecerr_source_encoding, 0);
04544 rb_define_method(rb_eUndefinedConversionError, "destination_encoding", ecerr_destination_encoding, 0);
04545 rb_define_method(rb_eUndefinedConversionError, "error_char", ecerr_error_char, 0);
04546
04547 rb_define_method(rb_eInvalidByteSequenceError, "source_encoding_name", ecerr_source_encoding_name, 0);
04548 rb_define_method(rb_eInvalidByteSequenceError, "destination_encoding_name", ecerr_destination_encoding_name, 0);
04549 rb_define_method(rb_eInvalidByteSequenceError, "source_encoding", ecerr_source_encoding, 0);
04550 rb_define_method(rb_eInvalidByteSequenceError, "destination_encoding", ecerr_destination_encoding, 0);
04551 rb_define_method(rb_eInvalidByteSequenceError, "error_bytes", ecerr_error_bytes, 0);
04552 rb_define_method(rb_eInvalidByteSequenceError, "readagain_bytes", ecerr_readagain_bytes, 0);
04553 rb_define_method(rb_eInvalidByteSequenceError, "incomplete_input?", ecerr_incomplete_input, 0);
04554
04555 Init_newline();
04556 }
04557