00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023 #define NKF_VERSION "2.1.3"
00024 #define NKF_RELEASE_DATE "2012-11-22"
00025 #define COPY_RIGHT \
00026 "Copyright (C) 1987, FUJITSU LTD. (I.Ichikawa).\n" \
00027 "Copyright (C) 1996-2012, The nkf Project."
00028
00029 #include "config.h"
00030 #include "nkf.h"
00031 #include "utf8tbl.h"
00032 #ifdef __WIN32__
00033 #include <windows.h>
00034 #include <locale.h>
00035 #endif
00036 #if defined(__OS2__)
00037 # define INCL_DOS
00038 # define INCL_DOSERRORS
00039 # include <os2.h>
00040 #endif
00041 #include <assert.h>
00042
00043
00044
00045
00046
00047
00048
00049
00050
00051
00052
00053
00054
00055
00056
00057 #define FIXED_MIME 7
00058 #define STRICT_MIME 8
00059
00060
00061 enum byte_order {
00062 ENDIAN_BIG = 1,
00063 ENDIAN_LITTLE = 2,
00064 ENDIAN_2143 = 3,
00065 ENDIAN_3412 = 4
00066 };
00067
00068
00069
00070 #define BS 0x08
00071 #define TAB 0x09
00072 #define LF 0x0a
00073 #define CR 0x0d
00074 #define ESC 0x1b
00075 #define SP 0x20
00076 #define DEL 0x7f
00077 #define SI 0x0f
00078 #define SO 0x0e
00079 #define SS2 0x8e
00080 #define SS3 0x8f
00081 #define CRLF 0x0D0A
00082
00083
00084
00085
00086 enum nkf_encodings {
00087 ASCII,
00088 ISO_8859_1,
00089 ISO_2022_JP,
00090 CP50220,
00091 CP50221,
00092 CP50222,
00093 ISO_2022_JP_1,
00094 ISO_2022_JP_3,
00095 ISO_2022_JP_2004,
00096 SHIFT_JIS,
00097 WINDOWS_31J,
00098 CP10001,
00099 EUC_JP,
00100 EUCJP_NKF,
00101 CP51932,
00102 EUCJP_MS,
00103 EUCJP_ASCII,
00104 SHIFT_JISX0213,
00105 SHIFT_JIS_2004,
00106 EUC_JISX0213,
00107 EUC_JIS_2004,
00108 UTF_8,
00109 UTF_8N,
00110 UTF_8_BOM,
00111 UTF8_MAC,
00112 UTF_16,
00113 UTF_16BE,
00114 UTF_16BE_BOM,
00115 UTF_16LE,
00116 UTF_16LE_BOM,
00117 UTF_32,
00118 UTF_32BE,
00119 UTF_32BE_BOM,
00120 UTF_32LE,
00121 UTF_32LE_BOM,
00122 BINARY,
00123 NKF_ENCODING_TABLE_SIZE,
00124 JIS_X_0201_1976_K = 0x1013,
00125
00126
00127
00128 JIS_X_0208 = 0x1168,
00129 JIS_X_0212 = 0x1159,
00130
00131 JIS_X_0213_2 = 0x1229,
00132 JIS_X_0213_1 = 0x1233
00133 };
00134
00135 static nkf_char s_iconv(nkf_char c2, nkf_char c1, nkf_char c0);
00136 static nkf_char e_iconv(nkf_char c2, nkf_char c1, nkf_char c0);
00137 static nkf_char w_iconv(nkf_char c2, nkf_char c1, nkf_char c0);
00138 static nkf_char w_iconv16(nkf_char c2, nkf_char c1, nkf_char c0);
00139 static nkf_char w_iconv32(nkf_char c2, nkf_char c1, nkf_char c0);
00140 static void j_oconv(nkf_char c2, nkf_char c1);
00141 static void s_oconv(nkf_char c2, nkf_char c1);
00142 static void e_oconv(nkf_char c2, nkf_char c1);
00143 static void w_oconv(nkf_char c2, nkf_char c1);
00144 static void w_oconv16(nkf_char c2, nkf_char c1);
00145 static void w_oconv32(nkf_char c2, nkf_char c1);
00146
00147 typedef struct {
00148 const char *name;
00149 nkf_char (*iconv)(nkf_char c2, nkf_char c1, nkf_char c0);
00150 void (*oconv)(nkf_char c2, nkf_char c1);
00151 } nkf_native_encoding;
00152
00153 nkf_native_encoding NkfEncodingASCII = { "ASCII", e_iconv, e_oconv };
00154 nkf_native_encoding NkfEncodingISO_2022_JP = { "ISO-2022-JP", e_iconv, j_oconv };
00155 nkf_native_encoding NkfEncodingShift_JIS = { "Shift_JIS", s_iconv, s_oconv };
00156 nkf_native_encoding NkfEncodingEUC_JP = { "EUC-JP", e_iconv, e_oconv };
00157 nkf_native_encoding NkfEncodingUTF_8 = { "UTF-8", w_iconv, w_oconv };
00158 nkf_native_encoding NkfEncodingUTF_16 = { "UTF-16", w_iconv16, w_oconv16 };
00159 nkf_native_encoding NkfEncodingUTF_32 = { "UTF-32", w_iconv32, w_oconv32 };
00160
00161 typedef struct {
00162 const int id;
00163 const char *name;
00164 const nkf_native_encoding *base_encoding;
00165 } nkf_encoding;
00166
00167 nkf_encoding nkf_encoding_table[] = {
00168 {ASCII, "US-ASCII", &NkfEncodingASCII},
00169 {ISO_8859_1, "ISO-8859-1", &NkfEncodingASCII},
00170 {ISO_2022_JP, "ISO-2022-JP", &NkfEncodingISO_2022_JP},
00171 {CP50220, "CP50220", &NkfEncodingISO_2022_JP},
00172 {CP50221, "CP50221", &NkfEncodingISO_2022_JP},
00173 {CP50222, "CP50222", &NkfEncodingISO_2022_JP},
00174 {ISO_2022_JP_1, "ISO-2022-JP-1", &NkfEncodingISO_2022_JP},
00175 {ISO_2022_JP_3, "ISO-2022-JP-3", &NkfEncodingISO_2022_JP},
00176 {ISO_2022_JP_2004, "ISO-2022-JP-2004", &NkfEncodingISO_2022_JP},
00177 {SHIFT_JIS, "Shift_JIS", &NkfEncodingShift_JIS},
00178 {WINDOWS_31J, "Windows-31J", &NkfEncodingShift_JIS},
00179 {CP10001, "CP10001", &NkfEncodingShift_JIS},
00180 {EUC_JP, "EUC-JP", &NkfEncodingEUC_JP},
00181 {EUCJP_NKF, "eucJP-nkf", &NkfEncodingEUC_JP},
00182 {CP51932, "CP51932", &NkfEncodingEUC_JP},
00183 {EUCJP_MS, "eucJP-MS", &NkfEncodingEUC_JP},
00184 {EUCJP_ASCII, "eucJP-ASCII", &NkfEncodingEUC_JP},
00185 {SHIFT_JISX0213, "Shift_JISX0213", &NkfEncodingShift_JIS},
00186 {SHIFT_JIS_2004, "Shift_JIS-2004", &NkfEncodingShift_JIS},
00187 {EUC_JISX0213, "EUC-JISX0213", &NkfEncodingEUC_JP},
00188 {EUC_JIS_2004, "EUC-JIS-2004", &NkfEncodingEUC_JP},
00189 {UTF_8, "UTF-8", &NkfEncodingUTF_8},
00190 {UTF_8N, "UTF-8N", &NkfEncodingUTF_8},
00191 {UTF_8_BOM, "UTF-8-BOM", &NkfEncodingUTF_8},
00192 {UTF8_MAC, "UTF8-MAC", &NkfEncodingUTF_8},
00193 {UTF_16, "UTF-16", &NkfEncodingUTF_16},
00194 {UTF_16BE, "UTF-16BE", &NkfEncodingUTF_16},
00195 {UTF_16BE_BOM, "UTF-16BE-BOM", &NkfEncodingUTF_16},
00196 {UTF_16LE, "UTF-16LE", &NkfEncodingUTF_16},
00197 {UTF_16LE_BOM, "UTF-16LE-BOM", &NkfEncodingUTF_16},
00198 {UTF_32, "UTF-32", &NkfEncodingUTF_32},
00199 {UTF_32BE, "UTF-32BE", &NkfEncodingUTF_32},
00200 {UTF_32BE_BOM, "UTF-32BE-BOM", &NkfEncodingUTF_32},
00201 {UTF_32LE, "UTF-32LE", &NkfEncodingUTF_32},
00202 {UTF_32LE_BOM, "UTF-32LE-BOM", &NkfEncodingUTF_32},
00203 {BINARY, "BINARY", &NkfEncodingASCII},
00204 {-1, NULL, NULL}
00205 };
00206
00207 struct {
00208 const char *name;
00209 const int id;
00210 } encoding_name_to_id_table[] = {
00211 {"US-ASCII", ASCII},
00212 {"ASCII", ASCII},
00213 {"646", ASCII},
00214 {"ROMAN8", ASCII},
00215 {"ISO-2022-JP", ISO_2022_JP},
00216 {"ISO2022JP-CP932", CP50220},
00217 {"CP50220", CP50220},
00218 {"CP50221", CP50221},
00219 {"CSISO2022JP", CP50221},
00220 {"CP50222", CP50222},
00221 {"ISO-2022-JP-1", ISO_2022_JP_1},
00222 {"ISO-2022-JP-3", ISO_2022_JP_3},
00223 {"ISO-2022-JP-2004", ISO_2022_JP_2004},
00224 {"SHIFT_JIS", SHIFT_JIS},
00225 {"SJIS", SHIFT_JIS},
00226 {"MS_Kanji", SHIFT_JIS},
00227 {"PCK", SHIFT_JIS},
00228 {"WINDOWS-31J", WINDOWS_31J},
00229 {"CSWINDOWS31J", WINDOWS_31J},
00230 {"CP932", WINDOWS_31J},
00231 {"MS932", WINDOWS_31J},
00232 {"CP10001", CP10001},
00233 {"EUCJP", EUC_JP},
00234 {"EUC-JP", EUC_JP},
00235 {"EUCJP-NKF", EUCJP_NKF},
00236 {"CP51932", CP51932},
00237 {"EUC-JP-MS", EUCJP_MS},
00238 {"EUCJP-MS", EUCJP_MS},
00239 {"EUCJPMS", EUCJP_MS},
00240 {"EUC-JP-ASCII", EUCJP_ASCII},
00241 {"EUCJP-ASCII", EUCJP_ASCII},
00242 {"SHIFT_JISX0213", SHIFT_JISX0213},
00243 {"SHIFT_JIS-2004", SHIFT_JIS_2004},
00244 {"EUC-JISX0213", EUC_JISX0213},
00245 {"EUC-JIS-2004", EUC_JIS_2004},
00246 {"UTF-8", UTF_8},
00247 {"UTF-8N", UTF_8N},
00248 {"UTF-8-BOM", UTF_8_BOM},
00249 {"UTF8-MAC", UTF8_MAC},
00250 {"UTF-8-MAC", UTF8_MAC},
00251 {"UTF-16", UTF_16},
00252 {"UTF-16BE", UTF_16BE},
00253 {"UTF-16BE-BOM", UTF_16BE_BOM},
00254 {"UTF-16LE", UTF_16LE},
00255 {"UTF-16LE-BOM", UTF_16LE_BOM},
00256 {"UTF-32", UTF_32},
00257 {"UTF-32BE", UTF_32BE},
00258 {"UTF-32BE-BOM", UTF_32BE_BOM},
00259 {"UTF-32LE", UTF_32LE},
00260 {"UTF-32LE-BOM", UTF_32LE_BOM},
00261 {"BINARY", BINARY},
00262 {NULL, -1}
00263 };
00264
00265 #if defined(DEFAULT_CODE_JIS)
00266 #define DEFAULT_ENCIDX ISO_2022_JP
00267 #elif defined(DEFAULT_CODE_SJIS)
00268 #define DEFAULT_ENCIDX SHIFT_JIS
00269 #elif defined(DEFAULT_CODE_WINDOWS_31J)
00270 #define DEFAULT_ENCIDX WINDOWS_31J
00271 #elif defined(DEFAULT_CODE_EUC)
00272 #define DEFAULT_ENCIDX EUC_JP
00273 #elif defined(DEFAULT_CODE_UTF8)
00274 #define DEFAULT_ENCIDX UTF_8
00275 #endif
00276
00277
00278 #define is_alnum(c) \
00279 (('a'<=c && c<='z')||('A'<= c && c<='Z')||('0'<=c && c<='9'))
00280
00281
00282 #define nkf_toupper(c) (('a'<=c && c<='z')?(c-('a'-'A')):c)
00283 #define nkf_isoctal(c) ('0'<=c && c<='7')
00284 #define nkf_isdigit(c) ('0'<=c && c<='9')
00285 #define nkf_isxdigit(c) (nkf_isdigit(c) || ('a'<=c && c<='f') || ('A'<=c && c <= 'F'))
00286 #define nkf_isblank(c) (c == SP || c == TAB)
00287 #define nkf_isspace(c) (nkf_isblank(c) || c == CR || c == LF)
00288 #define nkf_isalpha(c) (('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z'))
00289 #define nkf_isalnum(c) (nkf_isdigit(c) || nkf_isalpha(c))
00290 #define nkf_isprint(c) (SP<=c && c<='~')
00291 #define nkf_isgraph(c) ('!'<=c && c<='~')
00292 #define hex2bin(c) (('0'<=c&&c<='9') ? (c-'0') : \
00293 ('A'<=c&&c<='F') ? (c-'A'+10) : \
00294 ('a'<=c&&c<='f') ? (c-'a'+10) : 0)
00295 #define bin2hex(c) ("0123456789ABCDEF"[c&15])
00296 #define is_eucg3(c2) (((unsigned short)c2 >> 8) == SS3)
00297 #define nkf_noescape_mime(c) ((c == CR) || (c == LF) || \
00298 ((c > SP) && (c < DEL) && (c != '?') && (c != '=') && (c != '_') \
00299 && (c != '(') && (c != ')') && (c != '.') && (c != 0x22)))
00300
00301 #define is_ibmext_in_sjis(c2) (CP932_TABLE_BEGIN <= c2 && c2 <= CP932_TABLE_END)
00302 #define nkf_byte_jisx0201_katakana_p(c) (SP <= c && c <= 0x5F)
00303
00304 #define HOLD_SIZE 1024
00305 #if defined(INT_IS_SHORT)
00306 #define IOBUF_SIZE 2048
00307 #else
00308 #define IOBUF_SIZE 16384
00309 #endif
00310
00311 #define DEFAULT_J 'B'
00312 #define DEFAULT_R 'B'
00313
00314
00315 #define GETA1 0x22
00316 #define GETA2 0x2e
00317
00318
00319
00320
00321 #ifdef EASYWIN
00322 extern POINT _BufferSize;
00323 #endif
00324
00325 struct input_code{
00326 const char *name;
00327 nkf_char stat;
00328 nkf_char score;
00329 nkf_char index;
00330 nkf_char buf[3];
00331 void (*status_func)(struct input_code *, nkf_char);
00332 nkf_char (*iconv_func)(nkf_char c2, nkf_char c1, nkf_char c0);
00333 int _file_stat;
00334 };
00335
00336 static const char *input_codename = NULL;
00337 static nkf_encoding *input_encoding = NULL;
00338 static nkf_encoding *output_encoding = NULL;
00339
00340 #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
00341
00342
00343
00344
00345
00346
00347 #define UCS_MAP_ASCII 0
00348 #define UCS_MAP_MS 1
00349 #define UCS_MAP_CP932 2
00350 #define UCS_MAP_CP10001 3
00351 static int ms_ucs_map_f = UCS_MAP_ASCII;
00352 #endif
00353 #ifdef UTF8_INPUT_ENABLE
00354
00355 static int no_cp932ext_f = FALSE;
00356
00357 static int no_best_fit_chars_f = FALSE;
00358 static int input_endian = ENDIAN_BIG;
00359 static int input_bom_f = FALSE;
00360 static nkf_char unicode_subchar = '?';
00361 static void (*encode_fallback)(nkf_char c) = NULL;
00362 static void w_status(struct input_code *, nkf_char);
00363 #endif
00364 #ifdef UTF8_OUTPUT_ENABLE
00365 static int output_bom_f = FALSE;
00366 static int output_endian = ENDIAN_BIG;
00367 #endif
00368
00369 static void std_putc(nkf_char c);
00370 static nkf_char std_getc(FILE *f);
00371 static nkf_char std_ungetc(nkf_char c,FILE *f);
00372
00373 static nkf_char broken_getc(FILE *f);
00374 static nkf_char broken_ungetc(nkf_char c,FILE *f);
00375
00376 static nkf_char mime_getc(FILE *f);
00377
00378 static void mime_putc(nkf_char c);
00379
00380
00381
00382 #if !defined(PERL_XS) && !defined(WIN32DLL)
00383 static unsigned char stdibuf[IOBUF_SIZE];
00384 static unsigned char stdobuf[IOBUF_SIZE];
00385 #endif
00386
00387 #define NKF_UNSPECIFIED (-TRUE)
00388
00389
00390 static int unbuf_f = FALSE;
00391 static int estab_f = FALSE;
00392 static int nop_f = FALSE;
00393 static int binmode_f = TRUE;
00394 static int rot_f = FALSE;
00395 static int hira_f = FALSE;
00396 static int alpha_f = FALSE;
00397 static int mime_f = MIME_DECODE_DEFAULT;
00398 static int mime_decode_f = FALSE;
00399 static int mimebuf_f = FALSE;
00400 static int broken_f = FALSE;
00401 static int iso8859_f = FALSE;
00402 static int mimeout_f = FALSE;
00403 static int x0201_f = NKF_UNSPECIFIED;
00404 static int iso2022jp_f = FALSE;
00405
00406 #ifdef UNICODE_NORMALIZATION
00407 static int nfc_f = FALSE;
00408 static nkf_char (*i_nfc_getc)(FILE *) = std_getc;
00409 static nkf_char (*i_nfc_ungetc)(nkf_char c ,FILE *f) = std_ungetc;
00410 #endif
00411
00412 #ifdef INPUT_OPTION
00413 static int cap_f = FALSE;
00414 static nkf_char (*i_cgetc)(FILE *) = std_getc;
00415 static nkf_char (*i_cungetc)(nkf_char c ,FILE *f) = std_ungetc;
00416
00417 static int url_f = FALSE;
00418 static nkf_char (*i_ugetc)(FILE *) = std_getc;
00419 static nkf_char (*i_uungetc)(nkf_char c ,FILE *f) = std_ungetc;
00420 #endif
00421
00422 #define PREFIX_EUCG3 NKF_INT32_C(0x8F00)
00423 #define CLASS_MASK NKF_INT32_C(0xFF000000)
00424 #define CLASS_UNICODE NKF_INT32_C(0x01000000)
00425 #define VALUE_MASK NKF_INT32_C(0x00FFFFFF)
00426 #define UNICODE_BMP_MAX NKF_INT32_C(0x0000FFFF)
00427 #define UNICODE_MAX NKF_INT32_C(0x0010FFFF)
00428 #define nkf_char_euc3_new(c) ((c) | PREFIX_EUCG3)
00429 #define nkf_char_unicode_new(c) ((c) | CLASS_UNICODE)
00430 #define nkf_char_unicode_p(c) ((c & CLASS_MASK) == CLASS_UNICODE)
00431 #define nkf_char_unicode_bmp_p(c) ((c & VALUE_MASK) <= UNICODE_BMP_MAX)
00432 #define nkf_char_unicode_value_p(c) ((c & VALUE_MASK) <= UNICODE_MAX)
00433
00434 #define UTF16_TO_UTF32(lead, trail) (((lead) << 10) + (trail) - NKF_INT32_C(0x35FDC00))
00435
00436 #ifdef NUMCHAR_OPTION
00437 static int numchar_f = FALSE;
00438 static nkf_char (*i_ngetc)(FILE *) = std_getc;
00439 static nkf_char (*i_nungetc)(nkf_char c ,FILE *f) = std_ungetc;
00440 #endif
00441
00442 #ifdef CHECK_OPTION
00443 static int noout_f = FALSE;
00444 static void no_putc(nkf_char c);
00445 static int debug_f = FALSE;
00446 static void debug(const char *str);
00447 static nkf_char (*iconv_for_check)(nkf_char c2,nkf_char c1,nkf_char c0) = 0;
00448 #endif
00449
00450 static int guess_f = 0;
00451 static void set_input_codename(const char *codename);
00452
00453 #ifdef EXEC_IO
00454 static int exec_f = 0;
00455 #endif
00456
00457 #ifdef SHIFTJIS_CP932
00458
00459 static int cp51932_f = FALSE;
00460
00461
00462 static int cp932inv_f = TRUE;
00463
00464
00465 #endif
00466
00467 static int x0212_f = FALSE;
00468 static int x0213_f = FALSE;
00469
00470 static unsigned char prefix_table[256];
00471
00472 static void e_status(struct input_code *, nkf_char);
00473 static void s_status(struct input_code *, nkf_char);
00474
00475 struct input_code input_code_list[] = {
00476 {"EUC-JP", 0, 0, 0, {0, 0, 0}, e_status, e_iconv, 0},
00477 {"Shift_JIS", 0, 0, 0, {0, 0, 0}, s_status, s_iconv, 0},
00478 #ifdef UTF8_INPUT_ENABLE
00479 {"UTF-8", 0, 0, 0, {0, 0, 0}, w_status, w_iconv, 0},
00480 {"UTF-16", 0, 0, 0, {0, 0, 0}, NULL, w_iconv16, 0},
00481 {"UTF-32", 0, 0, 0, {0, 0, 0}, NULL, w_iconv32, 0},
00482 #endif
00483 {NULL, 0, 0, 0, {0, 0, 0}, NULL, NULL, 0}
00484 };
00485
00486 static int mimeout_mode = 0;
00487 static int base64_count = 0;
00488
00489
00490
00491
00492 static int f_line = 0;
00493 static int f_prev = 0;
00494 static int fold_preserve_f = FALSE;
00495 static int fold_f = FALSE;
00496 static int fold_len = 0;
00497
00498
00499 static unsigned char kanji_intro = DEFAULT_J;
00500 static unsigned char ascii_intro = DEFAULT_R;
00501
00502
00503
00504 #define FOLD_MARGIN 10
00505 #define DEFAULT_FOLD 60
00506
00507 static int fold_margin = FOLD_MARGIN;
00508
00509
00510
00511 static nkf_char
00512 no_connection2(ARG_UNUSED nkf_char c2, ARG_UNUSED nkf_char c1, ARG_UNUSED nkf_char c0)
00513 {
00514 fprintf(stderr,"nkf internal module connection failure.\n");
00515 exit(EXIT_FAILURE);
00516 return 0;
00517 }
00518
00519 static void
00520 no_connection(nkf_char c2, nkf_char c1)
00521 {
00522 no_connection2(c2,c1,0);
00523 }
00524
00525 static nkf_char (*iconv)(nkf_char c2,nkf_char c1,nkf_char c0) = no_connection2;
00526 static void (*oconv)(nkf_char c2,nkf_char c1) = no_connection;
00527
00528 static void (*o_zconv)(nkf_char c2,nkf_char c1) = no_connection;
00529 static void (*o_fconv)(nkf_char c2,nkf_char c1) = no_connection;
00530 static void (*o_eol_conv)(nkf_char c2,nkf_char c1) = no_connection;
00531 static void (*o_rot_conv)(nkf_char c2,nkf_char c1) = no_connection;
00532 static void (*o_hira_conv)(nkf_char c2,nkf_char c1) = no_connection;
00533 static void (*o_base64conv)(nkf_char c2,nkf_char c1) = no_connection;
00534 static void (*o_iso2022jp_check_conv)(nkf_char c2,nkf_char c1) = no_connection;
00535
00536
00537
00538 static void (*o_putc)(nkf_char c) = std_putc;
00539
00540 static nkf_char (*i_getc)(FILE *f) = std_getc;
00541 static nkf_char (*i_ungetc)(nkf_char c,FILE *f) =std_ungetc;
00542
00543 static nkf_char (*i_bgetc)(FILE *) = std_getc;
00544 static nkf_char (*i_bungetc)(nkf_char c ,FILE *f) = std_ungetc;
00545
00546 static void (*o_mputc)(nkf_char c) = std_putc ;
00547
00548 static nkf_char (*i_mgetc)(FILE *) = std_getc;
00549 static nkf_char (*i_mungetc)(nkf_char c ,FILE *f) = std_ungetc;
00550
00551
00552 static nkf_char (*i_mgetc_buf)(FILE *) = std_getc;
00553 static nkf_char (*i_mungetc_buf)(nkf_char c,FILE *f) = std_ungetc;
00554
00555
00556 static int output_mode = ASCII;
00557 static int input_mode = ASCII;
00558 static int mime_decode_mode = FALSE;
00559
00560
00561
00562
00563
00564 static const unsigned char cv[]= {
00565 0x21,0x21,0x21,0x23,0x21,0x56,0x21,0x57,
00566 0x21,0x22,0x21,0x26,0x25,0x72,0x25,0x21,
00567 0x25,0x23,0x25,0x25,0x25,0x27,0x25,0x29,
00568 0x25,0x63,0x25,0x65,0x25,0x67,0x25,0x43,
00569 0x21,0x3c,0x25,0x22,0x25,0x24,0x25,0x26,
00570 0x25,0x28,0x25,0x2a,0x25,0x2b,0x25,0x2d,
00571 0x25,0x2f,0x25,0x31,0x25,0x33,0x25,0x35,
00572 0x25,0x37,0x25,0x39,0x25,0x3b,0x25,0x3d,
00573 0x25,0x3f,0x25,0x41,0x25,0x44,0x25,0x46,
00574 0x25,0x48,0x25,0x4a,0x25,0x4b,0x25,0x4c,
00575 0x25,0x4d,0x25,0x4e,0x25,0x4f,0x25,0x52,
00576 0x25,0x55,0x25,0x58,0x25,0x5b,0x25,0x5e,
00577 0x25,0x5f,0x25,0x60,0x25,0x61,0x25,0x62,
00578 0x25,0x64,0x25,0x66,0x25,0x68,0x25,0x69,
00579 0x25,0x6a,0x25,0x6b,0x25,0x6c,0x25,0x6d,
00580 0x25,0x6f,0x25,0x73,0x21,0x2b,0x21,0x2c,
00581 0x00,0x00};
00582
00583
00584
00585
00586 static const unsigned char dv[]= {
00587 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00588 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00589 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00590 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00591 0x00,0x00,0x00,0x00,0x00,0x00,0x25,0x74,
00592 0x00,0x00,0x00,0x00,0x25,0x2c,0x25,0x2e,
00593 0x25,0x30,0x25,0x32,0x25,0x34,0x25,0x36,
00594 0x25,0x38,0x25,0x3a,0x25,0x3c,0x25,0x3e,
00595 0x25,0x40,0x25,0x42,0x25,0x45,0x25,0x47,
00596 0x25,0x49,0x00,0x00,0x00,0x00,0x00,0x00,
00597 0x00,0x00,0x00,0x00,0x25,0x50,0x25,0x53,
00598 0x25,0x56,0x25,0x59,0x25,0x5c,0x00,0x00,
00599 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00600 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00601 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00602 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00603 0x00,0x00};
00604
00605
00606
00607 static const unsigned char ev[]= {
00608 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00609 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00610 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00611 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00612 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00613 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00614 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00615 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00616 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00617 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00618 0x00,0x00,0x00,0x00,0x25,0x51,0x25,0x54,
00619 0x25,0x57,0x25,0x5a,0x25,0x5d,0x00,0x00,
00620 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00621 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00622 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00623 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00624 0x00,0x00};
00625
00626
00627
00628 static const unsigned char ev_x0213[]= {
00629 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00630 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00631 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00632 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00633 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00634 0x00,0x00,0x00,0x00,0x25,0x77,0x25,0x78,
00635 0x25,0x79,0x25,0x7a,0x25,0x7b,0x00,0x00,
00636 0x00,0x00,0x00,0x00,0x25,0x7c,0x00,0x00,
00637 0x00,0x00,0x00,0x00,0x25,0x7d,0x00,0x00,
00638 0x25,0x7e,0x00,0x00,0x00,0x00,0x00,0x00,
00639 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00640 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00641 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00642 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00643 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00644 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00645 0x00,0x00};
00646
00647
00648
00649
00650 static const unsigned char fv[] = {
00651
00652 0x00,0x00,0x00,0x00,0x2c,0x2e,0x00,0x3a,
00653 0x3b,0x3f,0x21,0x00,0x00,0x27,0x60,0x00,
00654 0x5e,0x00,0x5f,0x00,0x00,0x00,0x00,0x00,
00655 0x00,0x00,0x00,0x00,0x00,0x2d,0x00,0x2f,
00656 0x5c,0x00,0x00,0x7c,0x00,0x00,0x60,0x27,
00657 0x22,0x22,0x28,0x29,0x00,0x00,0x5b,0x5d,
00658 0x7b,0x7d,0x3c,0x3e,0x00,0x00,0x00,0x00,
00659 0x00,0x00,0x00,0x00,0x2b,0x2d,0x00,0x00,
00660 0x00,0x3d,0x00,0x3c,0x3e,0x00,0x00,0x00,
00661 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00662 0x24,0x00,0x00,0x25,0x23,0x26,0x2a,0x40,
00663 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
00664 } ;
00665
00666
00667
00668 static int option_mode = 0;
00669 static int file_out_f = FALSE;
00670 #ifdef OVERWRITE
00671 static int overwrite_f = FALSE;
00672 static int preserve_time_f = FALSE;
00673 static int backup_f = FALSE;
00674 static char *backup_suffix = "";
00675 #endif
00676
00677 static int eolmode_f = 0;
00678 static int input_eol = 0;
00679 static nkf_char prev_cr = 0;
00680 #ifdef EASYWIN
00681 static int end_check;
00682 #endif
00683
00684 static void *
00685 nkf_xmalloc(size_t size)
00686 {
00687 void *ptr;
00688
00689 if (size == 0) size = 1;
00690
00691 ptr = malloc(size);
00692 if (ptr == NULL) {
00693 perror("can't malloc");
00694 exit(EXIT_FAILURE);
00695 }
00696
00697 return ptr;
00698 }
00699
00700 static void *
00701 nkf_xrealloc(void *ptr, size_t size)
00702 {
00703 if (size == 0) size = 1;
00704
00705 ptr = realloc(ptr, size);
00706 if (ptr == NULL) {
00707 perror("can't realloc");
00708 exit(EXIT_FAILURE);
00709 }
00710
00711 return ptr;
00712 }
00713
00714 #define nkf_xfree(ptr) free(ptr)
00715
00716 static int
00717 nkf_str_caseeql(const char *src, const char *target)
00718 {
00719 int i;
00720 for (i = 0; src[i] && target[i]; i++) {
00721 if (nkf_toupper(src[i]) != nkf_toupper(target[i])) return FALSE;
00722 }
00723 if (src[i] || target[i]) return FALSE;
00724 else return TRUE;
00725 }
00726
00727 static nkf_encoding*
00728 nkf_enc_from_index(int idx)
00729 {
00730 if (idx < 0 || NKF_ENCODING_TABLE_SIZE <= idx) {
00731 return 0;
00732 }
00733 return &nkf_encoding_table[idx];
00734 }
00735
00736 static int
00737 nkf_enc_find_index(const char *name)
00738 {
00739 int i;
00740 if (name[0] == 'X' && *(name+1) == '-') name += 2;
00741 for (i = 0; encoding_name_to_id_table[i].id >= 0; i++) {
00742 if (nkf_str_caseeql(encoding_name_to_id_table[i].name, name)) {
00743 return encoding_name_to_id_table[i].id;
00744 }
00745 }
00746 return -1;
00747 }
00748
00749 static nkf_encoding*
00750 nkf_enc_find(const char *name)
00751 {
00752 int idx = -1;
00753 idx = nkf_enc_find_index(name);
00754 if (idx < 0) return 0;
00755 return nkf_enc_from_index(idx);
00756 }
00757
00758 #define nkf_enc_name(enc) (enc)->name
00759 #define nkf_enc_to_index(enc) (enc)->id
00760 #define nkf_enc_to_base_encoding(enc) (enc)->base_encoding
00761 #define nkf_enc_to_iconv(enc) nkf_enc_to_base_encoding(enc)->iconv
00762 #define nkf_enc_to_oconv(enc) nkf_enc_to_base_encoding(enc)->oconv
00763 #define nkf_enc_asciicompat(enc) (\
00764 nkf_enc_to_base_encoding(enc) == &NkfEncodingASCII ||\
00765 nkf_enc_to_base_encoding(enc) == &NkfEncodingISO_2022_JP)
00766 #define nkf_enc_unicode_p(enc) (\
00767 nkf_enc_to_base_encoding(enc) == &NkfEncodingUTF_8 ||\
00768 nkf_enc_to_base_encoding(enc) == &NkfEncodingUTF_16 ||\
00769 nkf_enc_to_base_encoding(enc) == &NkfEncodingUTF_32)
00770 #define nkf_enc_cp5022x_p(enc) (\
00771 nkf_enc_to_index(enc) == CP50220 ||\
00772 nkf_enc_to_index(enc) == CP50221 ||\
00773 nkf_enc_to_index(enc) == CP50222)
00774
00775 #ifdef DEFAULT_CODE_LOCALE
00776 static const char*
00777 nkf_locale_charmap()
00778 {
00779 #ifdef HAVE_LANGINFO_H
00780 return nl_langinfo(CODESET);
00781 #elif defined(__WIN32__)
00782 static char buf[16];
00783 sprintf(buf, "CP%d", GetACP());
00784 return buf;
00785 #elif defined(__OS2__)
00786 # if defined(INT_IS_SHORT)
00787
00788 return NULL;
00789 # else
00790
00791 static char buf[16];
00792 ULONG ulCP[1], ulncp;
00793 DosQueryCp(sizeof(ulCP), ulCP, &ulncp);
00794 if (ulCP[0] == 932 || ulCP[0] == 943)
00795 strcpy(buf, "Shift_JIS");
00796 else
00797 sprintf(buf, "CP%lu", ulCP[0]);
00798 return buf;
00799 # endif
00800 #endif
00801 return NULL;
00802 }
00803
00804 static nkf_encoding*
00805 nkf_locale_encoding()
00806 {
00807 nkf_encoding *enc = 0;
00808 const char *encname = nkf_locale_charmap();
00809 if (encname)
00810 enc = nkf_enc_find(encname);
00811 return enc;
00812 }
00813 #endif
00814
00815 static nkf_encoding*
00816 nkf_utf8_encoding()
00817 {
00818 return &nkf_encoding_table[UTF_8];
00819 }
00820
00821 static nkf_encoding*
00822 nkf_default_encoding()
00823 {
00824 nkf_encoding *enc = 0;
00825 #ifdef DEFAULT_CODE_LOCALE
00826 enc = nkf_locale_encoding();
00827 #elif defined(DEFAULT_ENCIDX)
00828 enc = nkf_enc_from_index(DEFAULT_ENCIDX);
00829 #endif
00830 if (!enc) enc = nkf_utf8_encoding();
00831 return enc;
00832 }
00833
00834 typedef struct {
00835 long capa;
00836 long len;
00837 nkf_char *ptr;
00838 } nkf_buf_t;
00839
00840 static nkf_buf_t *
00841 nkf_buf_new(int length)
00842 {
00843 nkf_buf_t *buf = nkf_xmalloc(sizeof(nkf_buf_t));
00844 buf->ptr = nkf_xmalloc(sizeof(nkf_char) * length);
00845 buf->capa = length;
00846 buf->len = 0;
00847 return buf;
00848 }
00849
00850 #if 0
00851 static void
00852 nkf_buf_dispose(nkf_buf_t *buf)
00853 {
00854 nkf_xfree(buf->ptr);
00855 nkf_xfree(buf);
00856 }
00857 #endif
00858
00859 #define nkf_buf_length(buf) ((buf)->len)
00860 #define nkf_buf_empty_p(buf) ((buf)->len == 0)
00861
00862 static nkf_char
00863 nkf_buf_at(nkf_buf_t *buf, int index)
00864 {
00865 assert(index <= buf->len);
00866 return buf->ptr[index];
00867 }
00868
00869 static void
00870 nkf_buf_clear(nkf_buf_t *buf)
00871 {
00872 buf->len = 0;
00873 }
00874
00875 static void
00876 nkf_buf_push(nkf_buf_t *buf, nkf_char c)
00877 {
00878 if (buf->capa <= buf->len) {
00879 exit(EXIT_FAILURE);
00880 }
00881 buf->ptr[buf->len++] = c;
00882 }
00883
00884 static nkf_char
00885 nkf_buf_pop(nkf_buf_t *buf)
00886 {
00887 assert(!nkf_buf_empty_p(buf));
00888 return buf->ptr[--buf->len];
00889 }
00890
00891
00892 #ifndef PERL_XS
00893 #ifdef WIN32DLL
00894 #define fprintf dllprintf
00895 #endif
00896
00897 static void
00898 version(void)
00899 {
00900 fprintf(HELP_OUTPUT,"Network Kanji Filter Version " NKF_VERSION " (" NKF_RELEASE_DATE ") \n" COPY_RIGHT "\n");
00901 }
00902
00903 static void
00904 usage(void)
00905 {
00906 fprintf(HELP_OUTPUT,
00907 "Usage: nkf -[flags] [--] [in file] .. [out file for -O flag]\n"
00908 #ifdef UTF8_OUTPUT_ENABLE
00909 " j/s/e/w Specify output encoding ISO-2022-JP, Shift_JIS, EUC-JP\n"
00910 " UTF options is -w[8[0],{16,32}[{B,L}[0]]]\n"
00911 #else
00912 #endif
00913 #ifdef UTF8_INPUT_ENABLE
00914 " J/S/E/W Specify input encoding ISO-2022-JP, Shift_JIS, EUC-JP\n"
00915 " UTF option is -W[8,[16,32][B,L]]\n"
00916 #else
00917 " J/S/E Specify output encoding ISO-2022-JP, Shift_JIS, EUC-JP\n"
00918 #endif
00919 );
00920 fprintf(HELP_OUTPUT,
00921 " m[BQSN0] MIME decode [B:base64,Q:quoted,S:strict,N:nonstrict,0:no decode]\n"
00922 " M[BQ] MIME encode [B:base64 Q:quoted]\n"
00923 " f/F Folding: -f60 or -f or -f60-10 (fold margin 10) F preserve nl\n"
00924 );
00925 fprintf(HELP_OUTPUT,
00926 " Z[0-4] Default/0: Convert JISX0208 Alphabet to ASCII\n"
00927 " 1: Kankaku to one space 2: to two spaces 3: HTML Entity\n"
00928 " 4: JISX0208 Katakana to JISX0201 Katakana\n"
00929 " X,x Convert Halfwidth Katakana to Fullwidth or preserve it\n"
00930 );
00931 fprintf(HELP_OUTPUT,
00932 " O Output to File (DEFAULT 'nkf.out')\n"
00933 " L[uwm] Line mode u:LF w:CRLF m:CR (DEFAULT noconversion)\n"
00934 );
00935 fprintf(HELP_OUTPUT,
00936 " --ic=<encoding> Specify the input encoding\n"
00937 " --oc=<encoding> Specify the output encoding\n"
00938 " --hiragana --katakana Hiragana/Katakana Conversion\n"
00939 " --katakana-hiragana Converts each other\n"
00940 );
00941 fprintf(HELP_OUTPUT,
00942 #ifdef INPUT_OPTION
00943 " --{cap, url}-input Convert hex after ':' or '%%'\n"
00944 #endif
00945 #ifdef NUMCHAR_OPTION
00946 " --numchar-input Convert Unicode Character Reference\n"
00947 #endif
00948 #ifdef UTF8_INPUT_ENABLE
00949 " --fb-{skip, html, xml, perl, java, subchar}\n"
00950 " Specify unassigned character's replacement\n"
00951 #endif
00952 );
00953 fprintf(HELP_OUTPUT,
00954 #ifdef OVERWRITE
00955 " --in-place[=SUF] Overwrite original files\n"
00956 " --overwrite[=SUF] Preserve timestamp of original files\n"
00957 #endif
00958 " -g --guess Guess the input code\n"
00959 " -v --version Print the version\n"
00960 " --help/-V Print this help / configuration\n"
00961 );
00962 version();
00963 }
00964
00965 static void
00966 show_configuration(void)
00967 {
00968 fprintf(HELP_OUTPUT,
00969 "Summary of my nkf " NKF_VERSION " (" NKF_RELEASE_DATE ") configuration:\n"
00970 " Compile-time options:\n"
00971 " Compiled at: " __DATE__ " " __TIME__ "\n"
00972 );
00973 fprintf(HELP_OUTPUT,
00974 " Default output encoding: "
00975 #ifdef DEFAULT_CODE_LOCALE
00976 "LOCALE (%s)\n", nkf_enc_name(nkf_default_encoding())
00977 #elif defined(DEFAULT_ENCIDX)
00978 "CONFIG (%s)\n", nkf_enc_name(nkf_default_encoding())
00979 #else
00980 "NONE\n"
00981 #endif
00982 );
00983 fprintf(HELP_OUTPUT,
00984 " Default output end of line: "
00985 #if DEFAULT_NEWLINE == CR
00986 "CR"
00987 #elif DEFAULT_NEWLINE == CRLF
00988 "CRLF"
00989 #else
00990 "LF"
00991 #endif
00992 "\n"
00993 " Decode MIME encoded string: "
00994 #if MIME_DECODE_DEFAULT
00995 "ON"
00996 #else
00997 "OFF"
00998 #endif
00999 "\n"
01000 " Convert JIS X 0201 Katakana: "
01001 #if X0201_DEFAULT
01002 "ON"
01003 #else
01004 "OFF"
01005 #endif
01006 "\n"
01007 " --help, --version output: "
01008 #if HELP_OUTPUT_HELP_OUTPUT
01009 "HELP_OUTPUT"
01010 #else
01011 "STDOUT"
01012 #endif
01013 "\n");
01014 }
01015 #endif
01016
01017 #ifdef OVERWRITE
01018 static char*
01019 get_backup_filename(const char *suffix, const char *filename)
01020 {
01021 char *backup_filename;
01022 int asterisk_count = 0;
01023 int i, j;
01024 int filename_length = strlen(filename);
01025
01026 for(i = 0; suffix[i]; i++){
01027 if(suffix[i] == '*') asterisk_count++;
01028 }
01029
01030 if(asterisk_count){
01031 backup_filename = nkf_xmalloc(strlen(suffix) + (asterisk_count * (filename_length - 1)) + 1);
01032 for(i = 0, j = 0; suffix[i];){
01033 if(suffix[i] == '*'){
01034 backup_filename[j] = '\0';
01035 strncat(backup_filename, filename, filename_length);
01036 i++;
01037 j += filename_length;
01038 }else{
01039 backup_filename[j++] = suffix[i++];
01040 }
01041 }
01042 backup_filename[j] = '\0';
01043 }else{
01044 j = filename_length + strlen(suffix);
01045 backup_filename = nkf_xmalloc(j + 1);
01046 strcpy(backup_filename, filename);
01047 strcat(backup_filename, suffix);
01048 backup_filename[j] = '\0';
01049 }
01050 return backup_filename;
01051 }
01052 #endif
01053
01054 #ifdef UTF8_INPUT_ENABLE
01055 static void
01056 nkf_each_char_to_hex(void (*f)(nkf_char c2,nkf_char c1), nkf_char c)
01057 {
01058 int shift = 20;
01059 c &= VALUE_MASK;
01060 while(shift >= 0){
01061 if(c >= NKF_INT32_C(1)<<shift){
01062 while(shift >= 0){
01063 (*f)(0, bin2hex(c>>shift));
01064 shift -= 4;
01065 }
01066 }else{
01067 shift -= 4;
01068 }
01069 }
01070 return;
01071 }
01072
01073 static void
01074 encode_fallback_html(nkf_char c)
01075 {
01076 (*oconv)(0, '&');
01077 (*oconv)(0, '#');
01078 c &= VALUE_MASK;
01079 if(c >= NKF_INT32_C(1000000))
01080 (*oconv)(0, 0x30+(c/NKF_INT32_C(1000000))%10);
01081 if(c >= NKF_INT32_C(100000))
01082 (*oconv)(0, 0x30+(c/NKF_INT32_C(100000) )%10);
01083 if(c >= 10000)
01084 (*oconv)(0, 0x30+(c/10000 )%10);
01085 if(c >= 1000)
01086 (*oconv)(0, 0x30+(c/1000 )%10);
01087 if(c >= 100)
01088 (*oconv)(0, 0x30+(c/100 )%10);
01089 if(c >= 10)
01090 (*oconv)(0, 0x30+(c/10 )%10);
01091 if(c >= 0)
01092 (*oconv)(0, 0x30+ c %10);
01093 (*oconv)(0, ';');
01094 return;
01095 }
01096
01097 static void
01098 encode_fallback_xml(nkf_char c)
01099 {
01100 (*oconv)(0, '&');
01101 (*oconv)(0, '#');
01102 (*oconv)(0, 'x');
01103 nkf_each_char_to_hex(oconv, c);
01104 (*oconv)(0, ';');
01105 return;
01106 }
01107
01108 static void
01109 encode_fallback_java(nkf_char c)
01110 {
01111 (*oconv)(0, '\\');
01112 c &= VALUE_MASK;
01113 if(!nkf_char_unicode_bmp_p(c)){
01114 (*oconv)(0, 'U');
01115 (*oconv)(0, '0');
01116 (*oconv)(0, '0');
01117 (*oconv)(0, bin2hex(c>>20));
01118 (*oconv)(0, bin2hex(c>>16));
01119 }else{
01120 (*oconv)(0, 'u');
01121 }
01122 (*oconv)(0, bin2hex(c>>12));
01123 (*oconv)(0, bin2hex(c>> 8));
01124 (*oconv)(0, bin2hex(c>> 4));
01125 (*oconv)(0, bin2hex(c ));
01126 return;
01127 }
01128
01129 static void
01130 encode_fallback_perl(nkf_char c)
01131 {
01132 (*oconv)(0, '\\');
01133 (*oconv)(0, 'x');
01134 (*oconv)(0, '{');
01135 nkf_each_char_to_hex(oconv, c);
01136 (*oconv)(0, '}');
01137 return;
01138 }
01139
01140 static void
01141 encode_fallback_subchar(nkf_char c)
01142 {
01143 c = unicode_subchar;
01144 (*oconv)((c>>8)&0xFF, c&0xFF);
01145 return;
01146 }
01147 #endif
01148
01149 static const struct {
01150 const char *name;
01151 const char *alias;
01152 } long_option[] = {
01153 {"ic=", ""},
01154 {"oc=", ""},
01155 {"base64","jMB"},
01156 {"euc","e"},
01157 {"euc-input","E"},
01158 {"fj","jm"},
01159 {"help",""},
01160 {"jis","j"},
01161 {"jis-input","J"},
01162 {"mac","sLm"},
01163 {"mime","jM"},
01164 {"mime-input","m"},
01165 {"msdos","sLw"},
01166 {"sjis","s"},
01167 {"sjis-input","S"},
01168 {"unix","eLu"},
01169 {"version","v"},
01170 {"windows","sLw"},
01171 {"hiragana","h1"},
01172 {"katakana","h2"},
01173 {"katakana-hiragana","h3"},
01174 {"guess=", ""},
01175 {"guess", "g2"},
01176 {"cp932", ""},
01177 {"no-cp932", ""},
01178 #ifdef X0212_ENABLE
01179 {"x0212", ""},
01180 #endif
01181 #ifdef UTF8_OUTPUT_ENABLE
01182 {"utf8", "w"},
01183 {"utf16", "w16"},
01184 {"ms-ucs-map", ""},
01185 {"fb-skip", ""},
01186 {"fb-html", ""},
01187 {"fb-xml", ""},
01188 {"fb-perl", ""},
01189 {"fb-java", ""},
01190 {"fb-subchar", ""},
01191 {"fb-subchar=", ""},
01192 #endif
01193 #ifdef UTF8_INPUT_ENABLE
01194 {"utf8-input", "W"},
01195 {"utf16-input", "W16"},
01196 {"no-cp932ext", ""},
01197 {"no-best-fit-chars",""},
01198 #endif
01199 #ifdef UNICODE_NORMALIZATION
01200 {"utf8mac-input", ""},
01201 #endif
01202 #ifdef OVERWRITE
01203 {"overwrite", ""},
01204 {"overwrite=", ""},
01205 {"in-place", ""},
01206 {"in-place=", ""},
01207 #endif
01208 #ifdef INPUT_OPTION
01209 {"cap-input", ""},
01210 {"url-input", ""},
01211 #endif
01212 #ifdef NUMCHAR_OPTION
01213 {"numchar-input", ""},
01214 #endif
01215 #ifdef CHECK_OPTION
01216 {"no-output", ""},
01217 {"debug", ""},
01218 #endif
01219 #ifdef SHIFTJIS_CP932
01220 {"cp932inv", ""},
01221 #endif
01222 #ifdef EXEC_IO
01223 {"exec-in", ""},
01224 {"exec-out", ""},
01225 #endif
01226 {"prefix=", ""},
01227 };
01228
01229 static void
01230 set_input_encoding(nkf_encoding *enc)
01231 {
01232 switch (nkf_enc_to_index(enc)) {
01233 case ISO_8859_1:
01234 iso8859_f = TRUE;
01235 break;
01236 case CP50221:
01237 case CP50222:
01238 if (x0201_f == NKF_UNSPECIFIED) x0201_f = FALSE;
01239 case CP50220:
01240 #ifdef SHIFTJIS_CP932
01241 cp51932_f = TRUE;
01242 #endif
01243 #ifdef UTF8_OUTPUT_ENABLE
01244 ms_ucs_map_f = UCS_MAP_CP932;
01245 #endif
01246 break;
01247 case ISO_2022_JP_1:
01248 x0212_f = TRUE;
01249 break;
01250 case ISO_2022_JP_3:
01251 x0212_f = TRUE;
01252 x0213_f = TRUE;
01253 break;
01254 case ISO_2022_JP_2004:
01255 x0212_f = TRUE;
01256 x0213_f = TRUE;
01257 break;
01258 case SHIFT_JIS:
01259 break;
01260 case WINDOWS_31J:
01261 if (x0201_f == NKF_UNSPECIFIED) x0201_f = FALSE;
01262 #ifdef SHIFTJIS_CP932
01263 cp51932_f = TRUE;
01264 #endif
01265 #ifdef UTF8_OUTPUT_ENABLE
01266 ms_ucs_map_f = UCS_MAP_CP932;
01267 #endif
01268 break;
01269 break;
01270 case CP10001:
01271 #ifdef SHIFTJIS_CP932
01272 cp51932_f = TRUE;
01273 #endif
01274 #ifdef UTF8_OUTPUT_ENABLE
01275 ms_ucs_map_f = UCS_MAP_CP10001;
01276 #endif
01277 break;
01278 case EUC_JP:
01279 break;
01280 case EUCJP_NKF:
01281 break;
01282 case CP51932:
01283 if (x0201_f == NKF_UNSPECIFIED) x0201_f = FALSE;
01284 #ifdef SHIFTJIS_CP932
01285 cp51932_f = TRUE;
01286 #endif
01287 #ifdef UTF8_OUTPUT_ENABLE
01288 ms_ucs_map_f = UCS_MAP_CP932;
01289 #endif
01290 break;
01291 case EUCJP_MS:
01292 if (x0201_f == NKF_UNSPECIFIED) x0201_f = FALSE;
01293 #ifdef SHIFTJIS_CP932
01294 cp51932_f = FALSE;
01295 #endif
01296 #ifdef UTF8_OUTPUT_ENABLE
01297 ms_ucs_map_f = UCS_MAP_MS;
01298 #endif
01299 break;
01300 case EUCJP_ASCII:
01301 if (x0201_f == NKF_UNSPECIFIED) x0201_f = FALSE;
01302 #ifdef SHIFTJIS_CP932
01303 cp51932_f = FALSE;
01304 #endif
01305 #ifdef UTF8_OUTPUT_ENABLE
01306 ms_ucs_map_f = UCS_MAP_ASCII;
01307 #endif
01308 break;
01309 case SHIFT_JISX0213:
01310 case SHIFT_JIS_2004:
01311 x0213_f = TRUE;
01312 #ifdef SHIFTJIS_CP932
01313 cp51932_f = FALSE;
01314 if (cp932inv_f == TRUE) cp932inv_f = FALSE;
01315 #endif
01316 break;
01317 case EUC_JISX0213:
01318 case EUC_JIS_2004:
01319 x0213_f = TRUE;
01320 #ifdef SHIFTJIS_CP932
01321 cp51932_f = FALSE;
01322 #endif
01323 break;
01324 #ifdef UTF8_INPUT_ENABLE
01325 #ifdef UNICODE_NORMALIZATION
01326 case UTF8_MAC:
01327 nfc_f = TRUE;
01328 break;
01329 #endif
01330 case UTF_16:
01331 case UTF_16BE:
01332 case UTF_16BE_BOM:
01333 input_endian = ENDIAN_BIG;
01334 break;
01335 case UTF_16LE:
01336 case UTF_16LE_BOM:
01337 input_endian = ENDIAN_LITTLE;
01338 break;
01339 case UTF_32:
01340 case UTF_32BE:
01341 case UTF_32BE_BOM:
01342 input_endian = ENDIAN_BIG;
01343 break;
01344 case UTF_32LE:
01345 case UTF_32LE_BOM:
01346 input_endian = ENDIAN_LITTLE;
01347 break;
01348 #endif
01349 }
01350 }
01351
01352 static void
01353 set_output_encoding(nkf_encoding *enc)
01354 {
01355 switch (nkf_enc_to_index(enc)) {
01356 case CP50220:
01357 #ifdef SHIFTJIS_CP932
01358 if (cp932inv_f == TRUE) cp932inv_f = FALSE;
01359 #endif
01360 #ifdef UTF8_OUTPUT_ENABLE
01361 ms_ucs_map_f = UCS_MAP_CP932;
01362 #endif
01363 break;
01364 case CP50221:
01365 if (x0201_f == NKF_UNSPECIFIED) x0201_f = FALSE;
01366 #ifdef SHIFTJIS_CP932
01367 if (cp932inv_f == TRUE) cp932inv_f = FALSE;
01368 #endif
01369 #ifdef UTF8_OUTPUT_ENABLE
01370 ms_ucs_map_f = UCS_MAP_CP932;
01371 #endif
01372 break;
01373 case ISO_2022_JP:
01374 #ifdef SHIFTJIS_CP932
01375 if (cp932inv_f == TRUE) cp932inv_f = FALSE;
01376 #endif
01377 break;
01378 case ISO_2022_JP_1:
01379 x0212_f = TRUE;
01380 #ifdef SHIFTJIS_CP932
01381 if (cp932inv_f == TRUE) cp932inv_f = FALSE;
01382 #endif
01383 break;
01384 case ISO_2022_JP_3:
01385 case ISO_2022_JP_2004:
01386 x0212_f = TRUE;
01387 x0213_f = TRUE;
01388 #ifdef SHIFTJIS_CP932
01389 if (cp932inv_f == TRUE) cp932inv_f = FALSE;
01390 #endif
01391 break;
01392 case SHIFT_JIS:
01393 break;
01394 case WINDOWS_31J:
01395 if (x0201_f == NKF_UNSPECIFIED) x0201_f = FALSE;
01396 #ifdef UTF8_OUTPUT_ENABLE
01397 ms_ucs_map_f = UCS_MAP_CP932;
01398 #endif
01399 break;
01400 case CP10001:
01401 #ifdef UTF8_OUTPUT_ENABLE
01402 ms_ucs_map_f = UCS_MAP_CP10001;
01403 #endif
01404 break;
01405 case EUC_JP:
01406 x0212_f = TRUE;
01407 #ifdef SHIFTJIS_CP932
01408 if (cp932inv_f == TRUE) cp932inv_f = FALSE;
01409 #endif
01410 #ifdef UTF8_OUTPUT_ENABLE
01411 ms_ucs_map_f = UCS_MAP_ASCII;
01412 #endif
01413 break;
01414 case EUCJP_NKF:
01415 x0212_f = FALSE;
01416 #ifdef SHIFTJIS_CP932
01417 if (cp932inv_f == TRUE) cp932inv_f = FALSE;
01418 #endif
01419 #ifdef UTF8_OUTPUT_ENABLE
01420 ms_ucs_map_f = UCS_MAP_ASCII;
01421 #endif
01422 break;
01423 case CP51932:
01424 if (x0201_f == NKF_UNSPECIFIED) x0201_f = FALSE;
01425 #ifdef SHIFTJIS_CP932
01426 if (cp932inv_f == TRUE) cp932inv_f = FALSE;
01427 #endif
01428 #ifdef UTF8_OUTPUT_ENABLE
01429 ms_ucs_map_f = UCS_MAP_CP932;
01430 #endif
01431 break;
01432 case EUCJP_MS:
01433 if (x0201_f == NKF_UNSPECIFIED) x0201_f = FALSE;
01434 x0212_f = TRUE;
01435 #ifdef UTF8_OUTPUT_ENABLE
01436 ms_ucs_map_f = UCS_MAP_MS;
01437 #endif
01438 break;
01439 case EUCJP_ASCII:
01440 if (x0201_f == NKF_UNSPECIFIED) x0201_f = FALSE;
01441 x0212_f = TRUE;
01442 #ifdef UTF8_OUTPUT_ENABLE
01443 ms_ucs_map_f = UCS_MAP_ASCII;
01444 #endif
01445 break;
01446 case SHIFT_JISX0213:
01447 case SHIFT_JIS_2004:
01448 x0213_f = TRUE;
01449 #ifdef SHIFTJIS_CP932
01450 if (cp932inv_f == TRUE) cp932inv_f = FALSE;
01451 #endif
01452 break;
01453 case EUC_JISX0213:
01454 case EUC_JIS_2004:
01455 x0212_f = TRUE;
01456 x0213_f = TRUE;
01457 #ifdef SHIFTJIS_CP932
01458 if (cp932inv_f == TRUE) cp932inv_f = FALSE;
01459 #endif
01460 break;
01461 #ifdef UTF8_OUTPUT_ENABLE
01462 case UTF_8_BOM:
01463 output_bom_f = TRUE;
01464 break;
01465 case UTF_16:
01466 case UTF_16BE_BOM:
01467 output_bom_f = TRUE;
01468 break;
01469 case UTF_16LE:
01470 output_endian = ENDIAN_LITTLE;
01471 output_bom_f = FALSE;
01472 break;
01473 case UTF_16LE_BOM:
01474 output_endian = ENDIAN_LITTLE;
01475 output_bom_f = TRUE;
01476 break;
01477 case UTF_32:
01478 case UTF_32BE_BOM:
01479 output_bom_f = TRUE;
01480 break;
01481 case UTF_32LE:
01482 output_endian = ENDIAN_LITTLE;
01483 output_bom_f = FALSE;
01484 break;
01485 case UTF_32LE_BOM:
01486 output_endian = ENDIAN_LITTLE;
01487 output_bom_f = TRUE;
01488 break;
01489 #endif
01490 }
01491 }
01492
01493 static struct input_code*
01494 find_inputcode_byfunc(nkf_char (*iconv_func)(nkf_char c2,nkf_char c1,nkf_char c0))
01495 {
01496 if (iconv_func){
01497 struct input_code *p = input_code_list;
01498 while (p->name){
01499 if (iconv_func == p->iconv_func){
01500 return p;
01501 }
01502 p++;
01503 }
01504 }
01505 return 0;
01506 }
01507
01508 static void
01509 set_iconv(nkf_char f, nkf_char (*iconv_func)(nkf_char c2,nkf_char c1,nkf_char c0))
01510 {
01511 #ifdef INPUT_CODE_FIX
01512 if (f || !input_encoding)
01513 #endif
01514 if (estab_f != f){
01515 estab_f = f;
01516 }
01517
01518 if (iconv_func
01519 #ifdef INPUT_CODE_FIX
01520 && (f == -TRUE || !input_encoding)
01521 #endif
01522 ){
01523 iconv = iconv_func;
01524 }
01525 #ifdef CHECK_OPTION
01526 if (estab_f && iconv_for_check != iconv){
01527 struct input_code *p = find_inputcode_byfunc(iconv);
01528 if (p){
01529 set_input_codename(p->name);
01530 debug(p->name);
01531 }
01532 iconv_for_check = iconv;
01533 }
01534 #endif
01535 }
01536
01537 #ifdef X0212_ENABLE
01538 static nkf_char
01539 x0212_shift(nkf_char c)
01540 {
01541 nkf_char ret = c;
01542 c &= 0x7f;
01543 if (is_eucg3(ret)){
01544 if (0x75 <= c && c <= 0x7f){
01545 ret = c + (0x109 - 0x75);
01546 }
01547 }else{
01548 if (0x75 <= c && c <= 0x7f){
01549 ret = c + (0x113 - 0x75);
01550 }
01551 }
01552 return ret;
01553 }
01554
01555
01556 static nkf_char
01557 x0212_unshift(nkf_char c)
01558 {
01559 nkf_char ret = c;
01560 if (0x7f <= c && c <= 0x88){
01561 ret = c + (0x75 - 0x7f);
01562 }else if (0x89 <= c && c <= 0x92){
01563 ret = PREFIX_EUCG3 | 0x80 | (c + (0x75 - 0x89));
01564 }
01565 return ret;
01566 }
01567 #endif
01568
01569 static int
01570 is_x0213_2_in_x0212(nkf_char c1)
01571 {
01572 static const char x0213_2_table[] =
01573 {0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 1, 1, 1, 1};
01574 int ku = c1 - 0x20;
01575 if (ku <= 15)
01576 return x0213_2_table[ku];
01577 if (78 <= ku && ku <= 94)
01578 return 1;
01579 return 0;
01580 }
01581
01582 static nkf_char
01583 e2s_conv(nkf_char c2, nkf_char c1, nkf_char *p2, nkf_char *p1)
01584 {
01585 nkf_char ndx;
01586 if (is_eucg3(c2)){
01587 ndx = c2 & 0x7f;
01588 if (x0213_f && is_x0213_2_in_x0212(ndx)){
01589 if((0x21 <= ndx && ndx <= 0x2F)){
01590 if (p2) *p2 = ((ndx - 1) >> 1) + 0xec - ndx / 8 * 3;
01591 if (p1) *p1 = c1 + ((ndx & 1) ? ((c1 < 0x60) ? 0x1f : 0x20) : 0x7e);
01592 return 0;
01593 }else if(0x6E <= ndx && ndx <= 0x7E){
01594 if (p2) *p2 = ((ndx - 1) >> 1) + 0xbe;
01595 if (p1) *p1 = c1 + ((ndx & 1) ? ((c1 < 0x60) ? 0x1f : 0x20) : 0x7e);
01596 return 0;
01597 }
01598 return 1;
01599 }
01600 #ifdef X0212_ENABLE
01601 else if(nkf_isgraph(ndx)){
01602 nkf_char val = 0;
01603 const unsigned short *ptr;
01604 ptr = x0212_shiftjis[ndx - 0x21];
01605 if (ptr){
01606 val = ptr[(c1 & 0x7f) - 0x21];
01607 }
01608 if (val){
01609 c2 = val >> 8;
01610 c1 = val & 0xff;
01611 if (p2) *p2 = c2;
01612 if (p1) *p1 = c1;
01613 return 0;
01614 }
01615 c2 = x0212_shift(c2);
01616 }
01617 #endif
01618 }
01619 if(0x7F < c2) return 1;
01620 if (p2) *p2 = ((c2 - 1) >> 1) + ((c2 <= 0x5e) ? 0x71 : 0xb1);
01621 if (p1) *p1 = c1 + ((c2 & 1) ? ((c1 < 0x60) ? 0x1f : 0x20) : 0x7e);
01622 return 0;
01623 }
01624
01625 static nkf_char
01626 s2e_conv(nkf_char c2, nkf_char c1, nkf_char *p2, nkf_char *p1)
01627 {
01628 #if defined(SHIFTJIS_CP932) || defined(X0212_ENABLE)
01629 nkf_char val;
01630 #endif
01631 static const char shift_jisx0213_s1a3_table[5][2] ={ { 1, 8}, { 3, 4}, { 5,12}, {13,14}, {15, 0} };
01632 if (0xFC < c1) return 1;
01633 #ifdef SHIFTJIS_CP932
01634 if (!cp932inv_f && !x0213_f && is_ibmext_in_sjis(c2)){
01635 val = shiftjis_cp932[c2 - CP932_TABLE_BEGIN][c1 - 0x40];
01636 if (val){
01637 c2 = val >> 8;
01638 c1 = val & 0xff;
01639 }
01640 }
01641 if (cp932inv_f
01642 && CP932INV_TABLE_BEGIN <= c2 && c2 <= CP932INV_TABLE_END){
01643 val = cp932inv[c2 - CP932INV_TABLE_BEGIN][c1 - 0x40];
01644 if (val){
01645 c2 = val >> 8;
01646 c1 = val & 0xff;
01647 }
01648 }
01649 #endif
01650 #ifdef X0212_ENABLE
01651 if (!x0213_f && is_ibmext_in_sjis(c2)){
01652 val = shiftjis_x0212[c2 - 0xfa][c1 - 0x40];
01653 if (val){
01654 if (val > 0x7FFF){
01655 c2 = PREFIX_EUCG3 | ((val >> 8) & 0x7f);
01656 c1 = val & 0xff;
01657 }else{
01658 c2 = val >> 8;
01659 c1 = val & 0xff;
01660 }
01661 if (p2) *p2 = c2;
01662 if (p1) *p1 = c1;
01663 return 0;
01664 }
01665 }
01666 #endif
01667 if(c2 >= 0x80){
01668 if(x0213_f && c2 >= 0xF0){
01669 if(c2 <= 0xF3 || (c2 == 0xF4 && c1 < 0x9F)){
01670 c2 = PREFIX_EUCG3 | 0x20 | shift_jisx0213_s1a3_table[c2 - 0xF0][0x9E < c1];
01671 }else{
01672 c2 = PREFIX_EUCG3 | (c2 * 2 - 0x17B);
01673 if (0x9E < c1) c2++;
01674 }
01675 }else{
01676 #define SJ0162 0x00e1
01677 #define SJ6394 0x0161
01678 c2 = c2 + c2 - ((c2 <= 0x9F) ? SJ0162 : SJ6394);
01679 if (0x9E < c1) c2++;
01680 }
01681 if (c1 < 0x9F)
01682 c1 = c1 - ((c1 > DEL) ? SP : 0x1F);
01683 else {
01684 c1 = c1 - 0x7E;
01685 }
01686 }
01687
01688 #ifdef X0212_ENABLE
01689 c2 = x0212_unshift(c2);
01690 #endif
01691 if (p2) *p2 = c2;
01692 if (p1) *p1 = c1;
01693 return 0;
01694 }
01695
01696 #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
01697 static void
01698 nkf_unicode_to_utf8(nkf_char val, nkf_char *p1, nkf_char *p2, nkf_char *p3, nkf_char *p4)
01699 {
01700 val &= VALUE_MASK;
01701 if (val < 0x80){
01702 *p1 = val;
01703 *p2 = 0;
01704 *p3 = 0;
01705 *p4 = 0;
01706 }else if (val < 0x800){
01707 *p1 = 0xc0 | (val >> 6);
01708 *p2 = 0x80 | (val & 0x3f);
01709 *p3 = 0;
01710 *p4 = 0;
01711 } else if (nkf_char_unicode_bmp_p(val)) {
01712 *p1 = 0xe0 | (val >> 12);
01713 *p2 = 0x80 | ((val >> 6) & 0x3f);
01714 *p3 = 0x80 | ( val & 0x3f);
01715 *p4 = 0;
01716 } else if (nkf_char_unicode_value_p(val)) {
01717 *p1 = 0xf0 | (val >> 18);
01718 *p2 = 0x80 | ((val >> 12) & 0x3f);
01719 *p3 = 0x80 | ((val >> 6) & 0x3f);
01720 *p4 = 0x80 | ( val & 0x3f);
01721 } else {
01722 *p1 = 0;
01723 *p2 = 0;
01724 *p3 = 0;
01725 *p4 = 0;
01726 }
01727 }
01728
01729 static nkf_char
01730 nkf_utf8_to_unicode(nkf_char c1, nkf_char c2, nkf_char c3, nkf_char c4)
01731 {
01732 nkf_char wc;
01733 if (c1 <= 0x7F) {
01734
01735 wc = c1;
01736 }
01737 else if (c1 <= 0xC1) {
01738
01739 return -1;
01740 }
01741 else if (c1 <= 0xDF) {
01742
01743 wc = (c1 & 0x1F) << 6;
01744 wc |= (c2 & 0x3F);
01745 }
01746 else if (c1 <= 0xEF) {
01747
01748 wc = (c1 & 0x0F) << 12;
01749 wc |= (c2 & 0x3F) << 6;
01750 wc |= (c3 & 0x3F);
01751 }
01752 else if (c2 <= 0xF4) {
01753
01754 wc = (c1 & 0x0F) << 18;
01755 wc |= (c2 & 0x3F) << 12;
01756 wc |= (c3 & 0x3F) << 6;
01757 wc |= (c4 & 0x3F);
01758 }
01759 else {
01760 return -1;
01761 }
01762 return wc;
01763 }
01764 #endif
01765
01766 #ifdef UTF8_INPUT_ENABLE
01767 static int
01768 unicode_to_jis_common2(nkf_char c1, nkf_char c0,
01769 const unsigned short *const *pp, nkf_char psize,
01770 nkf_char *p2, nkf_char *p1)
01771 {
01772 nkf_char c2;
01773 const unsigned short *p;
01774 unsigned short val;
01775
01776 if (pp == 0) return 1;
01777
01778 c1 -= 0x80;
01779 if (c1 < 0 || psize <= c1) return 1;
01780 p = pp[c1];
01781 if (p == 0) return 1;
01782
01783 c0 -= 0x80;
01784 if (c0 < 0 || sizeof_utf8_to_euc_C2 <= c0) return 1;
01785 val = p[c0];
01786 if (val == 0) return 1;
01787 if (no_cp932ext_f && (
01788 (val>>8) == 0x2D ||
01789 val > NKF_INT32_C(0xF300)
01790 )) return 1;
01791
01792 c2 = val >> 8;
01793 if (val > 0x7FFF){
01794 c2 &= 0x7f;
01795 c2 |= PREFIX_EUCG3;
01796 }
01797 if (c2 == SO) c2 = JIS_X_0201_1976_K;
01798 c1 = val & 0xFF;
01799 if (p2) *p2 = c2;
01800 if (p1) *p1 = c1;
01801 return 0;
01802 }
01803
01804 static int
01805 unicode_to_jis_common(nkf_char c2, nkf_char c1, nkf_char c0, nkf_char *p2, nkf_char *p1)
01806 {
01807 const unsigned short *const *pp;
01808 const unsigned short *const *const *ppp;
01809 static const char no_best_fit_chars_table_C2[] =
01810 {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
01811 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
01812 1, 1, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 2, 1, 1, 2,
01813 0, 0, 1, 1, 0, 1, 0, 1, 2, 1, 1, 1, 1, 1, 1, 1};
01814 static const char no_best_fit_chars_table_C2_ms[] =
01815 {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
01816 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
01817 1, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0,
01818 0, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 0};
01819 static const char no_best_fit_chars_table_932_C2[] =
01820 {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
01821 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
01822 1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1,
01823 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0};
01824 static const char no_best_fit_chars_table_932_C3[] =
01825 {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
01826 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1,
01827 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
01828 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1};
01829 nkf_char ret = 0;
01830
01831 if(c2 < 0x80){
01832 *p2 = 0;
01833 *p1 = c2;
01834 }else if(c2 < 0xe0){
01835 if(no_best_fit_chars_f){
01836 if(ms_ucs_map_f == UCS_MAP_CP932){
01837 switch(c2){
01838 case 0xC2:
01839 if(no_best_fit_chars_table_932_C2[c1&0x3F]) return 1;
01840 break;
01841 case 0xC3:
01842 if(no_best_fit_chars_table_932_C3[c1&0x3F]) return 1;
01843 break;
01844 }
01845 }else if(!cp932inv_f){
01846 switch(c2){
01847 case 0xC2:
01848 if(no_best_fit_chars_table_C2[c1&0x3F]) return 1;
01849 break;
01850 case 0xC3:
01851 if(no_best_fit_chars_table_932_C3[c1&0x3F]) return 1;
01852 break;
01853 }
01854 }else if(ms_ucs_map_f == UCS_MAP_MS){
01855 if(c2 == 0xC2 && no_best_fit_chars_table_C2_ms[c1&0x3F]) return 1;
01856 }else if(ms_ucs_map_f == UCS_MAP_CP10001){
01857 switch(c2){
01858 case 0xC2:
01859 switch(c1){
01860 case 0xA2:
01861 case 0xA3:
01862 case 0xA5:
01863 case 0xA6:
01864 case 0xAC:
01865 case 0xAF:
01866 case 0xB8:
01867 return 1;
01868 }
01869 break;
01870 }
01871 }
01872 }
01873 pp =
01874 ms_ucs_map_f == UCS_MAP_CP932 ? utf8_to_euc_2bytes_932 :
01875 ms_ucs_map_f == UCS_MAP_MS ? utf8_to_euc_2bytes_ms :
01876 ms_ucs_map_f == UCS_MAP_CP10001 ? utf8_to_euc_2bytes_mac :
01877 x0213_f ? utf8_to_euc_2bytes_x0213 :
01878 utf8_to_euc_2bytes;
01879 ret = unicode_to_jis_common2(c2, c1, pp, sizeof_utf8_to_euc_2bytes, p2, p1);
01880 }else if(c0 < 0xF0){
01881 if(no_best_fit_chars_f){
01882 if(ms_ucs_map_f == UCS_MAP_CP932){
01883 if(c2 == 0xE3 && c1 == 0x82 && c0 == 0x94) return 1;
01884 }else if(ms_ucs_map_f == UCS_MAP_MS){
01885 switch(c2){
01886 case 0xE2:
01887 switch(c1){
01888 case 0x80:
01889 if(c0 == 0x94 || c0 == 0x96 || c0 == 0xBE) return 1;
01890 break;
01891 case 0x88:
01892 if(c0 == 0x92) return 1;
01893 break;
01894 }
01895 break;
01896 case 0xE3:
01897 if(c1 == 0x80 || c0 == 0x9C) return 1;
01898 break;
01899 }
01900 }else if(ms_ucs_map_f == UCS_MAP_CP10001){
01901 switch(c2){
01902 case 0xE3:
01903 switch(c1){
01904 case 0x82:
01905 if(c0 == 0x94) return 1;
01906 break;
01907 case 0x83:
01908 if(c0 == 0xBB) return 1;
01909 break;
01910 }
01911 break;
01912 }
01913 }else{
01914 switch(c2){
01915 case 0xE2:
01916 switch(c1){
01917 case 0x80:
01918 if(c0 == 0x95) return 1;
01919 break;
01920 case 0x88:
01921 if(c0 == 0xA5) return 1;
01922 break;
01923 }
01924 break;
01925 case 0xEF:
01926 switch(c1){
01927 case 0xBC:
01928 if(c0 == 0x8D) return 1;
01929 break;
01930 case 0xBD:
01931 if(c0 == 0x9E && !cp932inv_f) return 1;
01932 break;
01933 case 0xBF:
01934 if(0xA0 <= c0 && c0 <= 0xA5) return 1;
01935 break;
01936 }
01937 break;
01938 }
01939 }
01940 }
01941 ppp =
01942 ms_ucs_map_f == UCS_MAP_CP932 ? utf8_to_euc_3bytes_932 :
01943 ms_ucs_map_f == UCS_MAP_MS ? utf8_to_euc_3bytes_ms :
01944 ms_ucs_map_f == UCS_MAP_CP10001 ? utf8_to_euc_3bytes_mac :
01945 x0213_f ? utf8_to_euc_3bytes_x0213 :
01946 utf8_to_euc_3bytes;
01947 ret = unicode_to_jis_common2(c1, c0, ppp[c2 - 0xE0], sizeof_utf8_to_euc_C2, p2, p1);
01948 }else return -1;
01949 #ifdef SHIFTJIS_CP932
01950 if (!ret && !cp932inv_f && is_eucg3(*p2)) {
01951 nkf_char s2, s1;
01952 if (e2s_conv(*p2, *p1, &s2, &s1) == 0) {
01953 s2e_conv(s2, s1, p2, p1);
01954 }else{
01955 ret = 1;
01956 }
01957 }
01958 #endif
01959 return ret;
01960 }
01961
01962 #ifdef UTF8_OUTPUT_ENABLE
01963 #define X0213_SURROGATE_FIND(tbl, size, euc) do { \
01964 int i; \
01965 for (i = 0; i < size; i++) \
01966 if (tbl[i][0] == euc) { \
01967 low = tbl[i][2]; \
01968 break; \
01969 } \
01970 } while (0)
01971
01972 static nkf_char
01973 e2w_conv(nkf_char c2, nkf_char c1)
01974 {
01975 const unsigned short *p;
01976
01977 if (c2 == JIS_X_0201_1976_K) {
01978 if (ms_ucs_map_f == UCS_MAP_CP10001) {
01979 switch (c1) {
01980 case 0x20:
01981 return 0xA0;
01982 case 0x7D:
01983 return 0xA9;
01984 }
01985 }
01986 p = euc_to_utf8_1byte;
01987 #ifdef X0212_ENABLE
01988 } else if (is_eucg3(c2)){
01989 if(ms_ucs_map_f == UCS_MAP_ASCII&& c2 == NKF_INT32_C(0x8F22) && c1 == 0x43){
01990 return 0xA6;
01991 }
01992 c2 = (c2&0x7f) - 0x21;
01993 if (0<=c2 && c2<sizeof_euc_to_utf8_2bytes)
01994 p =
01995 x0213_f ? x0212_to_utf8_2bytes_x0213[c2] :
01996 x0212_to_utf8_2bytes[c2];
01997 else
01998 return 0;
01999 #endif
02000 } else {
02001 c2 &= 0x7f;
02002 c2 = (c2&0x7f) - 0x21;
02003 if (0<=c2 && c2<sizeof_euc_to_utf8_2bytes)
02004 p =
02005 x0213_f ? euc_to_utf8_2bytes_x0213[c2] :
02006 ms_ucs_map_f == UCS_MAP_ASCII ? euc_to_utf8_2bytes[c2] :
02007 ms_ucs_map_f == UCS_MAP_CP10001 ? euc_to_utf8_2bytes_mac[c2] :
02008 euc_to_utf8_2bytes_ms[c2];
02009 else
02010 return 0;
02011 }
02012 if (!p) return 0;
02013 c1 = (c1 & 0x7f) - 0x21;
02014 if (0<=c1 && c1<sizeof_euc_to_utf8_1byte) {
02015 nkf_char val = p[c1];
02016 if (x0213_f && 0xD800<=val && val<=0xDBFF) {
02017 nkf_char euc = (c2+0x21)<<8 | (c1+0x21);
02018 nkf_char low = 0;
02019 if (p==x0212_to_utf8_2bytes_x0213[c2]) {
02020 X0213_SURROGATE_FIND(x0213_2_surrogate_table, sizeof_x0213_2_surrogate_table, euc);
02021 } else {
02022 X0213_SURROGATE_FIND(x0213_1_surrogate_table, sizeof_x0213_1_surrogate_table, euc);
02023 }
02024 if (!low) return 0;
02025 return UTF16_TO_UTF32(val, low);
02026 } else {
02027 return val;
02028 }
02029 }
02030 return 0;
02031 }
02032
02033 static nkf_char
02034 e2w_combining(nkf_char comb, nkf_char c2, nkf_char c1)
02035 {
02036 nkf_char euc;
02037 int i;
02038 for (i = 0; i < sizeof_x0213_combining_chars; i++)
02039 if (x0213_combining_chars[i] == comb)
02040 break;
02041 if (i >= sizeof_x0213_combining_chars)
02042 return 0;
02043 euc = (c2&0x7f)<<8 | (c1&0x7f);
02044 for (i = 0; i < sizeof_x0213_combining_table; i++)
02045 if (x0213_combining_table[i][0] == euc)
02046 return x0213_combining_table[i][1];
02047 return 0;
02048 }
02049 #endif
02050
02051 static nkf_char
02052 w2e_conv(nkf_char c2, nkf_char c1, nkf_char c0, nkf_char *p2, nkf_char *p1)
02053 {
02054 nkf_char ret = 0;
02055
02056 if (!c1){
02057 *p2 = 0;
02058 *p1 = c2;
02059 }else if (0xc0 <= c2 && c2 <= 0xef) {
02060 ret = unicode_to_jis_common(c2, c1, c0, p2, p1);
02061 #ifdef NUMCHAR_OPTION
02062 if (ret > 0){
02063 if (p2) *p2 = 0;
02064 if (p1) *p1 = nkf_char_unicode_new(nkf_utf8_to_unicode(c2, c1, c0, 0));
02065 ret = 0;
02066 }
02067 #endif
02068 }
02069 return ret;
02070 }
02071
02072 #ifdef UTF8_INPUT_ENABLE
02073 static nkf_char
02074 w16e_conv(nkf_char val, nkf_char *p2, nkf_char *p1)
02075 {
02076 nkf_char c1, c2, c3, c4;
02077 nkf_char ret = 0;
02078 val &= VALUE_MASK;
02079 if (val < 0x80) {
02080 *p2 = 0;
02081 *p1 = val;
02082 }
02083 else if (nkf_char_unicode_bmp_p(val)){
02084 nkf_unicode_to_utf8(val, &c1, &c2, &c3, &c4);
02085 ret = unicode_to_jis_common(c1, c2, c3, p2, p1);
02086 if (ret > 0){
02087 *p2 = 0;
02088 *p1 = nkf_char_unicode_new(val);
02089 ret = 0;
02090 }
02091 }
02092 else {
02093 int i;
02094 if (x0213_f) {
02095 c1 = (val >> 10) + NKF_INT32_C(0xD7C0);
02096 c2 = (val & 0x3FF) + NKF_INT32_C(0xDC00);
02097 for (i = 0; i < sizeof_x0213_1_surrogate_table; i++)
02098 if (x0213_1_surrogate_table[i][1] == c1 && x0213_1_surrogate_table[i][2] == c2) {
02099 val = x0213_1_surrogate_table[i][0];
02100 *p2 = val >> 8;
02101 *p1 = val & 0xFF;
02102 return 0;
02103 }
02104 for (i = 0; i < sizeof_x0213_2_surrogate_table; i++)
02105 if (x0213_2_surrogate_table[i][1] == c1 && x0213_2_surrogate_table[i][2] == c2) {
02106 val = x0213_2_surrogate_table[i][0];
02107 *p2 = PREFIX_EUCG3 | (val >> 8);
02108 *p1 = val & 0xFF;
02109 return 0;
02110 }
02111 }
02112 *p2 = 0;
02113 *p1 = nkf_char_unicode_new(val);
02114 }
02115 return ret;
02116 }
02117 #endif
02118
02119 static nkf_char
02120 e_iconv(nkf_char c2, nkf_char c1, nkf_char c0)
02121 {
02122 if (c2 == JIS_X_0201_1976_K || c2 == SS2){
02123 if (iso2022jp_f && !x0201_f) {
02124 c2 = GETA1; c1 = GETA2;
02125 } else {
02126 c2 = JIS_X_0201_1976_K;
02127 c1 &= 0x7f;
02128 }
02129 #ifdef X0212_ENABLE
02130 }else if (c2 == 0x8f){
02131 if (c0 == 0){
02132 return -1;
02133 }
02134 if (!cp51932_f && !x0213_f && 0xF5 <= c1 && c1 <= 0xFE && 0xA1 <= c0 && c0 <= 0xFE) {
02135
02136 c1 = nkf_char_unicode_new((c1 - 0xF5) * 94 + c0 - 0xA1 + 0xE3AC);
02137 c2 = 0;
02138 } else {
02139 c2 = (c2 << 8) | (c1 & 0x7f);
02140 c1 = c0 & 0x7f;
02141 #ifdef SHIFTJIS_CP932
02142 if (cp51932_f){
02143 nkf_char s2, s1;
02144 if (e2s_conv(c2, c1, &s2, &s1) == 0){
02145 s2e_conv(s2, s1, &c2, &c1);
02146 if (c2 < 0x100){
02147 c1 &= 0x7f;
02148 c2 &= 0x7f;
02149 }
02150 }
02151 }
02152 #endif
02153 }
02154 #endif
02155 } else if ((c2 == EOF) || (c2 == 0) || c2 < SP || c2 == ISO_8859_1) {
02156
02157 } else {
02158 if (!cp51932_f && ms_ucs_map_f && 0xF5 <= c2 && c2 <= 0xFE && 0xA1 <= c1 && c1 <= 0xFE) {
02159
02160 c1 = nkf_char_unicode_new((c2 - 0xF5) * 94 + c1 - 0xA1 + 0xE000);
02161 c2 = 0;
02162 } else {
02163 c1 &= 0x7f;
02164 c2 &= 0x7f;
02165 #ifdef SHIFTJIS_CP932
02166 if (cp51932_f && 0x79 <= c2 && c2 <= 0x7c){
02167 nkf_char s2, s1;
02168 if (e2s_conv(c2, c1, &s2, &s1) == 0){
02169 s2e_conv(s2, s1, &c2, &c1);
02170 if (c2 < 0x100){
02171 c1 &= 0x7f;
02172 c2 &= 0x7f;
02173 }
02174 }
02175 }
02176 #endif
02177 }
02178 }
02179 (*oconv)(c2, c1);
02180 return 0;
02181 }
02182
02183 static nkf_char
02184 s_iconv(ARG_UNUSED nkf_char c2, nkf_char c1, ARG_UNUSED nkf_char c0)
02185 {
02186 if (c2 == JIS_X_0201_1976_K || (0xA1 <= c2 && c2 <= 0xDF)) {
02187 if (iso2022jp_f && !x0201_f) {
02188 c2 = GETA1; c1 = GETA2;
02189 } else {
02190 c1 &= 0x7f;
02191 }
02192 } else if ((c2 == EOF) || (c2 == 0) || c2 < SP) {
02193
02194 } else if (!x0213_f && 0xF0 <= c2 && c2 <= 0xF9 && 0x40 <= c1 && c1 <= 0xFC) {
02195
02196 if(c1 == 0x7F) return 0;
02197 c1 = nkf_char_unicode_new((c2 - 0xF0) * 188 + (c1 - 0x40 - (0x7E < c1)) + 0xE000);
02198 c2 = 0;
02199 } else {
02200 nkf_char ret = s2e_conv(c2, c1, &c2, &c1);
02201 if (ret) return ret;
02202 }
02203 (*oconv)(c2, c1);
02204 return 0;
02205 }
02206
02207 static int
02208 x0213_wait_combining_p(nkf_char wc)
02209 {
02210 int i;
02211 for (i = 0; i < sizeof_x0213_combining_table; i++) {
02212 if (x0213_combining_table[i][1] == wc) {
02213 return TRUE;
02214 }
02215 }
02216 return FALSE;
02217 }
02218
02219 static int
02220 x0213_combining_p(nkf_char wc)
02221 {
02222 int i;
02223 for (i = 0; i < sizeof_x0213_combining_chars; i++) {
02224 if (x0213_combining_chars[i] == wc) {
02225 return TRUE;
02226 }
02227 }
02228 return FALSE;
02229 }
02230
02231 static nkf_char
02232 w_iconv(nkf_char c1, nkf_char c2, nkf_char c3)
02233 {
02234 nkf_char ret = 0, c4 = 0;
02235 static const char w_iconv_utf8_1st_byte[] =
02236 {
02237 20, 20, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
02238 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
02239 30, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 32, 33, 33,
02240 40, 41, 41, 41, 42, 43, 43, 43, 50, 50, 50, 50, 60, 60, 70, 70};
02241
02242 if (c3 > 0xFF) {
02243 c4 = c3 & 0xFF;
02244 c3 >>= 8;
02245 }
02246
02247 if (c1 < 0 || 0xff < c1) {
02248 }else if (c1 == 0) {
02249 c3 = 0;
02250 } else if ((c1 & 0xC0) == 0x80) {
02251 return 0;
02252 } else{
02253 switch (w_iconv_utf8_1st_byte[c1 - 0xC0]) {
02254 case 21:
02255 if (c2 < 0x80 || 0xBF < c2) return 0;
02256 break;
02257 case 30:
02258 if (c3 == 0) return -1;
02259 if (c2 < 0xA0 || 0xBF < c2 || (c3 & 0xC0) != 0x80)
02260 return 0;
02261 break;
02262 case 31:
02263 case 33:
02264 if (c3 == 0) return -1;
02265 if ((c2 & 0xC0) != 0x80 || (c3 & 0xC0) != 0x80)
02266 return 0;
02267 break;
02268 case 32:
02269 if (c3 == 0) return -1;
02270 if (c2 < 0x80 || 0x9F < c2 || (c3 & 0xC0) != 0x80)
02271 return 0;
02272 break;
02273 case 40:
02274 if (c3 == 0) return -2;
02275 if (c2 < 0x90 || 0xBF < c2 || (c3 & 0xC0) != 0x80 || (c4 & 0xC0) != 0x80)
02276 return 0;
02277 break;
02278 case 41:
02279 if (c3 == 0) return -2;
02280 if (c2 < 0x80 || 0xBF < c2 || (c3 & 0xC0) != 0x80 || (c4 & 0xC0) != 0x80)
02281 return 0;
02282 break;
02283 case 42:
02284 if (c3 == 0) return -2;
02285 if (c2 < 0x80 || 0x8F < c2 || (c3 & 0xC0) != 0x80 || (c4 & 0xC0) != 0x80)
02286 return 0;
02287 break;
02288 default:
02289 return 0;
02290 break;
02291 }
02292 }
02293 if (c1 == 0 || c1 == EOF){
02294 } else if ((c1 & 0xf8) == 0xf0) {
02295 c2 = nkf_char_unicode_new(nkf_utf8_to_unicode(c1, c2, c3, c4));
02296 c1 = 0;
02297 } else {
02298 if (x0213_f && x0213_wait_combining_p(nkf_utf8_to_unicode(c1, c2, c3, c4)))
02299 return -3;
02300 ret = w2e_conv(c1, c2, c3, &c1, &c2);
02301 }
02302 if (ret == 0){
02303 (*oconv)(c1, c2);
02304 }
02305 return ret;
02306 }
02307
02308 static nkf_char
02309 w_iconv_nocombine(nkf_char c1, nkf_char c2, nkf_char c3)
02310 {
02311
02312 nkf_char ret = w2e_conv(c1, c2, c3, &c1, &c2);
02313 if (ret == 0){
02314 (*oconv)(c1, c2);
02315 }
02316 return ret;
02317 }
02318
02319 #define NKF_ICONV_INVALID_CODE_RANGE -13
02320 #define NKF_ICONV_WAIT_COMBINING_CHAR -14
02321 #define NKF_ICONV_NOT_COMBINED -15
02322 static size_t
02323 unicode_iconv(nkf_char wc, int nocombine)
02324 {
02325 nkf_char c1, c2;
02326 int ret = 0;
02327
02328 if (wc < 0x80) {
02329 c2 = 0;
02330 c1 = wc;
02331 }else if ((wc>>11) == 27) {
02332
02333 return NKF_ICONV_INVALID_CODE_RANGE;
02334 }else if (wc < 0xFFFF) {
02335 if (!nocombine && x0213_f && x0213_wait_combining_p(wc))
02336 return NKF_ICONV_WAIT_COMBINING_CHAR;
02337 ret = w16e_conv(wc, &c2, &c1);
02338 if (ret) return ret;
02339 }else if (wc < 0x10FFFF) {
02340 c2 = 0;
02341 c1 = nkf_char_unicode_new(wc);
02342 } else {
02343 return NKF_ICONV_INVALID_CODE_RANGE;
02344 }
02345 (*oconv)(c2, c1);
02346 return 0;
02347 }
02348
02349 static nkf_char
02350 unicode_iconv_combine(nkf_char wc, nkf_char wc2)
02351 {
02352 nkf_char c1, c2;
02353 int i;
02354
02355 if (wc2 < 0x80) {
02356 return NKF_ICONV_NOT_COMBINED;
02357 }else if ((wc2>>11) == 27) {
02358
02359 return NKF_ICONV_INVALID_CODE_RANGE;
02360 }else if (wc2 < 0xFFFF) {
02361 if (!x0213_combining_p(wc2))
02362 return NKF_ICONV_NOT_COMBINED;
02363 for (i = 0; i < sizeof_x0213_combining_table; i++) {
02364 if (x0213_combining_table[i][1] == wc &&
02365 x0213_combining_table[i][2] == wc2) {
02366 c2 = x0213_combining_table[i][0] >> 8;
02367 c1 = x0213_combining_table[i][0] & 0x7f;
02368 (*oconv)(c2, c1);
02369 return 0;
02370 }
02371 }
02372 }else if (wc2 < 0x10FFFF) {
02373 return NKF_ICONV_NOT_COMBINED;
02374 } else {
02375 return NKF_ICONV_INVALID_CODE_RANGE;
02376 }
02377 return NKF_ICONV_NOT_COMBINED;
02378 }
02379
02380 static nkf_char
02381 w_iconv_combine(nkf_char c1, nkf_char c2, nkf_char c3, nkf_char c4, nkf_char c5, nkf_char c6)
02382 {
02383 nkf_char wc, wc2;
02384 wc = nkf_utf8_to_unicode(c1, c2, c3, 0);
02385 wc2 = nkf_utf8_to_unicode(c4, c5, c6, 0);
02386 if (wc2 < 0)
02387 return wc2;
02388 return unicode_iconv_combine(wc, wc2);
02389 }
02390
02391 #define NKF_ICONV_NEED_ONE_MORE_BYTE (size_t)-1
02392 #define NKF_ICONV_NEED_TWO_MORE_BYTES (size_t)-2
02393 static size_t
02394 nkf_iconv_utf_16(nkf_char c1, nkf_char c2, nkf_char c3, nkf_char c4)
02395 {
02396 nkf_char wc;
02397
02398 if (c1 == EOF) {
02399 (*oconv)(EOF, 0);
02400 return 0;
02401 }
02402
02403 if (input_endian == ENDIAN_BIG) {
02404 if (0xD8 <= c1 && c1 <= 0xDB) {
02405 if (0xDC <= c3 && c3 <= 0xDF) {
02406 wc = UTF16_TO_UTF32(c1 << 8 | c2, c3 << 8 | c4);
02407 } else return NKF_ICONV_NEED_TWO_MORE_BYTES;
02408 } else {
02409 wc = c1 << 8 | c2;
02410 }
02411 } else {
02412 if (0xD8 <= c2 && c2 <= 0xDB) {
02413 if (0xDC <= c4 && c4 <= 0xDF) {
02414 wc = UTF16_TO_UTF32(c2 << 8 | c1, c4 << 8 | c3);
02415 } else return NKF_ICONV_NEED_TWO_MORE_BYTES;
02416 } else {
02417 wc = c2 << 8 | c1;
02418 }
02419 }
02420
02421 return (*unicode_iconv)(wc, FALSE);
02422 }
02423
02424 static size_t
02425 nkf_iconv_utf_16_combine(nkf_char c1, nkf_char c2, nkf_char c3, nkf_char c4)
02426 {
02427 nkf_char wc, wc2;
02428
02429 if (input_endian == ENDIAN_BIG) {
02430 if (0xD8 <= c3 && c3 <= 0xDB) {
02431 return NKF_ICONV_NOT_COMBINED;
02432 } else {
02433 wc = c1 << 8 | c2;
02434 wc2 = c3 << 8 | c4;
02435 }
02436 } else {
02437 if (0xD8 <= c2 && c2 <= 0xDB) {
02438 return NKF_ICONV_NOT_COMBINED;
02439 } else {
02440 wc = c2 << 8 | c1;
02441 wc2 = c4 << 8 | c3;
02442 }
02443 }
02444
02445 return unicode_iconv_combine(wc, wc2);
02446 }
02447
02448 static size_t
02449 nkf_iconv_utf_16_nocombine(nkf_char c1, nkf_char c2)
02450 {
02451 nkf_char wc;
02452 if (input_endian == ENDIAN_BIG)
02453 wc = c1 << 8 | c2;
02454 else
02455 wc = c2 << 8 | c1;
02456 return (*unicode_iconv)(wc, TRUE);
02457 }
02458
02459 static nkf_char
02460 w_iconv16(nkf_char c2, nkf_char c1, ARG_UNUSED nkf_char c0)
02461 {
02462 (*oconv)(c2, c1);
02463 return 16;
02464 }
02465
02466 static nkf_char
02467 w_iconv32(nkf_char c2, nkf_char c1, ARG_UNUSED nkf_char c0)
02468 {
02469 (*oconv)(c2, c1);
02470 return 32;
02471 }
02472
02473 static nkf_char
02474 utf32_to_nkf_char(nkf_char c1, nkf_char c2, nkf_char c3, nkf_char c4)
02475 {
02476 nkf_char wc;
02477
02478 switch(input_endian){
02479 case ENDIAN_BIG:
02480 wc = c2 << 16 | c3 << 8 | c4;
02481 break;
02482 case ENDIAN_LITTLE:
02483 wc = c3 << 16 | c2 << 8 | c1;
02484 break;
02485 case ENDIAN_2143:
02486 wc = c1 << 16 | c4 << 8 | c3;
02487 break;
02488 case ENDIAN_3412:
02489 wc = c4 << 16 | c1 << 8 | c2;
02490 break;
02491 default:
02492 return NKF_ICONV_INVALID_CODE_RANGE;
02493 }
02494 return wc;
02495 }
02496
02497 static size_t
02498 nkf_iconv_utf_32(nkf_char c1, nkf_char c2, nkf_char c3, nkf_char c4)
02499 {
02500 nkf_char wc;
02501
02502 if (c1 == EOF) {
02503 (*oconv)(EOF, 0);
02504 return 0;
02505 }
02506
02507 wc = utf32_to_nkf_char(c1, c2, c3, c4);
02508 if (wc < 0)
02509 return wc;
02510
02511 return (*unicode_iconv)(wc, FALSE);
02512 }
02513
02514 static nkf_char
02515 nkf_iconv_utf_32_combine(nkf_char c1, nkf_char c2, nkf_char c3, nkf_char c4, nkf_char c5, nkf_char c6, nkf_char c7, nkf_char c8)
02516 {
02517 nkf_char wc, wc2;
02518
02519 wc = utf32_to_nkf_char(c1, c2, c3, c4);
02520 if (wc < 0)
02521 return wc;
02522 wc2 = utf32_to_nkf_char(c5, c6, c7, c8);
02523 if (wc2 < 0)
02524 return wc2;
02525
02526 return unicode_iconv_combine(wc, wc2);
02527 }
02528
02529 static size_t
02530 nkf_iconv_utf_32_nocombine(nkf_char c1, nkf_char c2, nkf_char c3, nkf_char c4)
02531 {
02532 nkf_char wc;
02533
02534 wc = utf32_to_nkf_char(c1, c2, c3, c4);
02535 return (*unicode_iconv)(wc, TRUE);
02536 }
02537 #endif
02538
02539 #define output_ascii_escape_sequence(mode) do { \
02540 if (output_mode != ASCII && output_mode != ISO_8859_1) { \
02541 (*o_putc)(ESC); \
02542 (*o_putc)('('); \
02543 (*o_putc)(ascii_intro); \
02544 output_mode = mode; \
02545 } \
02546 } while (0)
02547
02548 static void
02549 output_escape_sequence(int mode)
02550 {
02551 if (output_mode == mode)
02552 return;
02553 switch(mode) {
02554 case ISO_8859_1:
02555 (*o_putc)(ESC);
02556 (*o_putc)('.');
02557 (*o_putc)('A');
02558 break;
02559 case JIS_X_0201_1976_K:
02560 (*o_putc)(ESC);
02561 (*o_putc)('(');
02562 (*o_putc)('I');
02563 break;
02564 case JIS_X_0208:
02565 (*o_putc)(ESC);
02566 (*o_putc)('$');
02567 (*o_putc)(kanji_intro);
02568 break;
02569 case JIS_X_0212:
02570 (*o_putc)(ESC);
02571 (*o_putc)('$');
02572 (*o_putc)('(');
02573 (*o_putc)('D');
02574 break;
02575 case JIS_X_0213_1:
02576 (*o_putc)(ESC);
02577 (*o_putc)('$');
02578 (*o_putc)('(');
02579 (*o_putc)('Q');
02580 break;
02581 case JIS_X_0213_2:
02582 (*o_putc)(ESC);
02583 (*o_putc)('$');
02584 (*o_putc)('(');
02585 (*o_putc)('P');
02586 break;
02587 }
02588 output_mode = mode;
02589 }
02590
02591 static void
02592 j_oconv(nkf_char c2, nkf_char c1)
02593 {
02594 #ifdef NUMCHAR_OPTION
02595 if (c2 == 0 && nkf_char_unicode_p(c1)){
02596 w16e_conv(c1, &c2, &c1);
02597 if (c2 == 0 && nkf_char_unicode_p(c1)){
02598 c2 = c1 & VALUE_MASK;
02599 if (ms_ucs_map_f && 0xE000 <= c2 && c2 <= 0xE757) {
02600
02601 c1 &= 0xFFF;
02602 c2 = 0x7F + c1 / 94;
02603 c1 = 0x21 + c1 % 94;
02604 } else {
02605 if (encode_fallback) (*encode_fallback)(c1);
02606 return;
02607 }
02608 }
02609 }
02610 #endif
02611 if (c2 == 0) {
02612 output_ascii_escape_sequence(ASCII);
02613 (*o_putc)(c1);
02614 }
02615 else if (c2 == EOF) {
02616 output_ascii_escape_sequence(ASCII);
02617 (*o_putc)(EOF);
02618 }
02619 else if (c2 == ISO_8859_1) {
02620 output_ascii_escape_sequence(ISO_8859_1);
02621 (*o_putc)(c1|0x80);
02622 }
02623 else if (c2 == JIS_X_0201_1976_K) {
02624 output_escape_sequence(JIS_X_0201_1976_K);
02625 (*o_putc)(c1);
02626 #ifdef X0212_ENABLE
02627 } else if (is_eucg3(c2)){
02628 output_escape_sequence(x0213_f ? JIS_X_0213_2 : JIS_X_0212);
02629 (*o_putc)(c2 & 0x7f);
02630 (*o_putc)(c1);
02631 #endif
02632 } else {
02633 if(ms_ucs_map_f
02634 ? c2<0x20 || 0x92<c2 || c1<0x20 || 0x7e<c1
02635 : c2<0x20 || 0x7e<c2 || c1<0x20 || 0x7e<c1) return;
02636 output_escape_sequence(x0213_f ? JIS_X_0213_1 : JIS_X_0208);
02637 (*o_putc)(c2);
02638 (*o_putc)(c1);
02639 }
02640 }
02641
02642 static void
02643 e_oconv(nkf_char c2, nkf_char c1)
02644 {
02645 if (c2 == 0 && nkf_char_unicode_p(c1)){
02646 w16e_conv(c1, &c2, &c1);
02647 if (c2 == 0 && nkf_char_unicode_p(c1)){
02648 c2 = c1 & VALUE_MASK;
02649 if (x0212_f && 0xE000 <= c2 && c2 <= 0xE757) {
02650
02651 c1 &= 0xFFF;
02652 c2 = c1 / 94;
02653 c2 += c2 < 10 ? 0x75 : 0x8FEB;
02654 c1 = 0x21 + c1 % 94;
02655 if (is_eucg3(c2)){
02656 (*o_putc)(0x8f);
02657 (*o_putc)((c2 & 0x7f) | 0x080);
02658 (*o_putc)(c1 | 0x080);
02659 }else{
02660 (*o_putc)((c2 & 0x7f) | 0x080);
02661 (*o_putc)(c1 | 0x080);
02662 }
02663 return;
02664 } else {
02665 if (encode_fallback) (*encode_fallback)(c1);
02666 return;
02667 }
02668 }
02669 }
02670
02671 if (c2 == EOF) {
02672 (*o_putc)(EOF);
02673 } else if (c2 == 0) {
02674 output_mode = ASCII;
02675 (*o_putc)(c1);
02676 } else if (c2 == JIS_X_0201_1976_K) {
02677 output_mode = EUC_JP;
02678 (*o_putc)(SS2); (*o_putc)(c1|0x80);
02679 } else if (c2 == ISO_8859_1) {
02680 output_mode = ISO_8859_1;
02681 (*o_putc)(c1 | 0x080);
02682 #ifdef X0212_ENABLE
02683 } else if (is_eucg3(c2)){
02684 output_mode = EUC_JP;
02685 #ifdef SHIFTJIS_CP932
02686 if (!cp932inv_f){
02687 nkf_char s2, s1;
02688 if (e2s_conv(c2, c1, &s2, &s1) == 0){
02689 s2e_conv(s2, s1, &c2, &c1);
02690 }
02691 }
02692 #endif
02693 if (c2 == 0) {
02694 output_mode = ASCII;
02695 (*o_putc)(c1);
02696 }else if (is_eucg3(c2)){
02697 if (x0212_f){
02698 (*o_putc)(0x8f);
02699 (*o_putc)((c2 & 0x7f) | 0x080);
02700 (*o_putc)(c1 | 0x080);
02701 }
02702 }else{
02703 (*o_putc)((c2 & 0x7f) | 0x080);
02704 (*o_putc)(c1 | 0x080);
02705 }
02706 #endif
02707 } else {
02708 if (!nkf_isgraph(c1) || !nkf_isgraph(c2)) {
02709 set_iconv(FALSE, 0);
02710 return;
02711 }
02712 output_mode = EUC_JP;
02713 (*o_putc)(c2 | 0x080);
02714 (*o_putc)(c1 | 0x080);
02715 }
02716 }
02717
02718 static void
02719 s_oconv(nkf_char c2, nkf_char c1)
02720 {
02721 #ifdef NUMCHAR_OPTION
02722 if (c2 == 0 && nkf_char_unicode_p(c1)){
02723 w16e_conv(c1, &c2, &c1);
02724 if (c2 == 0 && nkf_char_unicode_p(c1)){
02725 c2 = c1 & VALUE_MASK;
02726 if (!x0213_f && 0xE000 <= c2 && c2 <= 0xE757) {
02727
02728 c1 &= 0xFFF;
02729 c2 = c1 / 188 + (cp932inv_f ? 0xF0 : 0xEB);
02730 c1 = c1 % 188;
02731 c1 += 0x40 + (c1 > 0x3e);
02732 (*o_putc)(c2);
02733 (*o_putc)(c1);
02734 return;
02735 } else {
02736 if(encode_fallback)(*encode_fallback)(c1);
02737 return;
02738 }
02739 }
02740 }
02741 #endif
02742 if (c2 == EOF) {
02743 (*o_putc)(EOF);
02744 return;
02745 } else if (c2 == 0) {
02746 output_mode = ASCII;
02747 (*o_putc)(c1);
02748 } else if (c2 == JIS_X_0201_1976_K) {
02749 output_mode = SHIFT_JIS;
02750 (*o_putc)(c1|0x80);
02751 } else if (c2 == ISO_8859_1) {
02752 output_mode = ISO_8859_1;
02753 (*o_putc)(c1 | 0x080);
02754 #ifdef X0212_ENABLE
02755 } else if (is_eucg3(c2)){
02756 output_mode = SHIFT_JIS;
02757 if (e2s_conv(c2, c1, &c2, &c1) == 0){
02758 (*o_putc)(c2);
02759 (*o_putc)(c1);
02760 }
02761 #endif
02762 } else {
02763 if (!nkf_isprint(c1) || !nkf_isprint(c2)) {
02764 set_iconv(FALSE, 0);
02765 return;
02766 }
02767 output_mode = SHIFT_JIS;
02768 e2s_conv(c2, c1, &c2, &c1);
02769
02770 #ifdef SHIFTJIS_CP932
02771 if (cp932inv_f
02772 && CP932INV_TABLE_BEGIN <= c2 && c2 <= CP932INV_TABLE_END){
02773 nkf_char c = cp932inv[c2 - CP932INV_TABLE_BEGIN][c1 - 0x40];
02774 if (c){
02775 c2 = c >> 8;
02776 c1 = c & 0xff;
02777 }
02778 }
02779 #endif
02780
02781 (*o_putc)(c2);
02782 if (prefix_table[(unsigned char)c1]){
02783 (*o_putc)(prefix_table[(unsigned char)c1]);
02784 }
02785 (*o_putc)(c1);
02786 }
02787 }
02788
02789 #ifdef UTF8_OUTPUT_ENABLE
02790 #define OUTPUT_UTF8(val) do { \
02791 nkf_unicode_to_utf8(val, &c1, &c2, &c3, &c4); \
02792 (*o_putc)(c1); \
02793 if (c2) (*o_putc)(c2); \
02794 if (c3) (*o_putc)(c3); \
02795 if (c4) (*o_putc)(c4); \
02796 } while (0)
02797
02798 static void
02799 w_oconv(nkf_char c2, nkf_char c1)
02800 {
02801 nkf_char c3, c4;
02802 nkf_char val, val2;
02803
02804 if (output_bom_f) {
02805 output_bom_f = FALSE;
02806 (*o_putc)('\357');
02807 (*o_putc)('\273');
02808 (*o_putc)('\277');
02809 }
02810
02811 if (c2 == EOF) {
02812 (*o_putc)(EOF);
02813 return;
02814 }
02815
02816 if (c2 == 0 && nkf_char_unicode_p(c1)){
02817 val = c1 & VALUE_MASK;
02818 OUTPUT_UTF8(val);
02819 return;
02820 }
02821
02822 if (c2 == 0) {
02823 (*o_putc)(c1);
02824 } else {
02825 val = e2w_conv(c2, c1);
02826 if (val){
02827 val2 = e2w_combining(val, c2, c1);
02828 if (val2)
02829 OUTPUT_UTF8(val2);
02830 OUTPUT_UTF8(val);
02831 }
02832 }
02833 }
02834
02835 #define OUTPUT_UTF16_BYTES(c1, c2) do { \
02836 if (output_endian == ENDIAN_LITTLE){ \
02837 (*o_putc)(c1); \
02838 (*o_putc)(c2); \
02839 }else{ \
02840 (*o_putc)(c2); \
02841 (*o_putc)(c1); \
02842 } \
02843 } while (0)
02844
02845 #define OUTPUT_UTF16(val) do { \
02846 if (nkf_char_unicode_bmp_p(val)) { \
02847 c2 = (val >> 8) & 0xff; \
02848 c1 = val & 0xff; \
02849 OUTPUT_UTF16_BYTES(c1, c2); \
02850 } else { \
02851 val &= VALUE_MASK; \
02852 if (val <= UNICODE_MAX) { \
02853 c2 = (val >> 10) + NKF_INT32_C(0xD7C0); \
02854 c1 = (val & 0x3FF) + NKF_INT32_C(0xDC00); \
02855 OUTPUT_UTF16_BYTES(c2 & 0xff, (c2 >> 8) & 0xff); \
02856 OUTPUT_UTF16_BYTES(c1 & 0xff, (c1 >> 8) & 0xff); \
02857 } \
02858 } \
02859 } while (0)
02860
02861 static void
02862 w_oconv16(nkf_char c2, nkf_char c1)
02863 {
02864 if (output_bom_f) {
02865 output_bom_f = FALSE;
02866 OUTPUT_UTF16_BYTES(0xFF, 0xFE);
02867 }
02868
02869 if (c2 == EOF) {
02870 (*o_putc)(EOF);
02871 return;
02872 }
02873
02874 if (c2 == 0 && nkf_char_unicode_p(c1)) {
02875 OUTPUT_UTF16(c1);
02876 } else if (c2) {
02877 nkf_char val, val2;
02878 val = e2w_conv(c2, c1);
02879 if (!val) return;
02880 val2 = e2w_combining(val, c2, c1);
02881 if (val2)
02882 OUTPUT_UTF16(val2);
02883 OUTPUT_UTF16(val);
02884 } else {
02885 OUTPUT_UTF16_BYTES(c1, c2);
02886 }
02887 }
02888
02889 #define OUTPUT_UTF32(c) do { \
02890 if (output_endian == ENDIAN_LITTLE){ \
02891 (*o_putc)( (c) & 0xFF); \
02892 (*o_putc)(((c) >> 8) & 0xFF); \
02893 (*o_putc)(((c) >> 16) & 0xFF); \
02894 (*o_putc)(0); \
02895 }else{ \
02896 (*o_putc)(0); \
02897 (*o_putc)(((c) >> 16) & 0xFF); \
02898 (*o_putc)(((c) >> 8) & 0xFF); \
02899 (*o_putc)( (c) & 0xFF); \
02900 } \
02901 } while (0)
02902
02903 static void
02904 w_oconv32(nkf_char c2, nkf_char c1)
02905 {
02906 if (output_bom_f) {
02907 output_bom_f = FALSE;
02908 if (output_endian == ENDIAN_LITTLE){
02909 (*o_putc)(0xFF);
02910 (*o_putc)(0xFE);
02911 (*o_putc)(0);
02912 (*o_putc)(0);
02913 }else{
02914 (*o_putc)(0);
02915 (*o_putc)(0);
02916 (*o_putc)(0xFE);
02917 (*o_putc)(0xFF);
02918 }
02919 }
02920
02921 if (c2 == EOF) {
02922 (*o_putc)(EOF);
02923 return;
02924 }
02925
02926 if (c2 == ISO_8859_1) {
02927 c1 |= 0x80;
02928 } else if (c2 == 0 && nkf_char_unicode_p(c1)) {
02929 c1 &= VALUE_MASK;
02930 } else if (c2) {
02931 nkf_char val, val2;
02932 val = e2w_conv(c2, c1);
02933 if (!val) return;
02934 val2 = e2w_combining(val, c2, c1);
02935 if (val2)
02936 OUTPUT_UTF32(val2);
02937 c1 = val;
02938 }
02939 OUTPUT_UTF32(c1);
02940 }
02941 #endif
02942
02943 #define SCORE_L2 (1)
02944 #define SCORE_KANA (SCORE_L2 << 1)
02945 #define SCORE_DEPEND (SCORE_KANA << 1)
02946 #define SCORE_CP932 (SCORE_DEPEND << 1)
02947 #define SCORE_X0212 (SCORE_CP932 << 1)
02948 #define SCORE_X0213 (SCORE_X0212 << 1)
02949 #define SCORE_NO_EXIST (SCORE_X0213 << 1)
02950 #define SCORE_iMIME (SCORE_NO_EXIST << 1)
02951 #define SCORE_ERROR (SCORE_iMIME << 1)
02952
02953 #define SCORE_INIT (SCORE_iMIME)
02954
02955 static const nkf_char score_table_A0[] = {
02956 0, 0, 0, 0,
02957 0, 0, 0, 0,
02958 0, SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND,
02959 SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND, SCORE_X0213,
02960 };
02961
02962 static const nkf_char score_table_F0[] = {
02963 SCORE_L2, SCORE_L2, SCORE_L2, SCORE_L2,
02964 SCORE_L2, SCORE_DEPEND, SCORE_X0213, SCORE_X0213,
02965 SCORE_DEPEND, SCORE_DEPEND, SCORE_CP932, SCORE_CP932,
02966 SCORE_CP932, SCORE_X0213, SCORE_X0213, SCORE_ERROR,
02967 };
02968
02969 static const nkf_char score_table_8FA0[] = {
02970 0, SCORE_X0213, SCORE_X0212, SCORE_X0213,
02971 SCORE_X0213, SCORE_X0213, SCORE_X0212, SCORE_X0212,
02972 SCORE_X0213, SCORE_X0212, SCORE_X0212, SCORE_X0212,
02973 SCORE_X0213, SCORE_X0213, SCORE_X0213, SCORE_X0213,
02974 };
02975
02976 static const nkf_char score_table_8FE0[] = {
02977 SCORE_X0212, SCORE_X0212, SCORE_X0212, SCORE_X0212,
02978 SCORE_X0212, SCORE_X0212, SCORE_X0212, SCORE_X0212,
02979 SCORE_X0212, SCORE_X0212, SCORE_X0212, SCORE_X0212,
02980 SCORE_X0212, SCORE_X0212, SCORE_X0213, SCORE_X0213,
02981 };
02982
02983 static const nkf_char score_table_8FF0[] = {
02984 SCORE_X0213, SCORE_X0213, SCORE_X0213, SCORE_X0212,
02985 SCORE_X0212, SCORE_X0213, SCORE_X0213, SCORE_X0213,
02986 SCORE_X0213, SCORE_X0213, SCORE_X0213, SCORE_X0213,
02987 SCORE_X0213, SCORE_X0213, SCORE_X0213, SCORE_X0213,
02988 };
02989
02990 static void
02991 set_code_score(struct input_code *ptr, nkf_char score)
02992 {
02993 if (ptr){
02994 ptr->score |= score;
02995 }
02996 }
02997
02998 static void
02999 clr_code_score(struct input_code *ptr, nkf_char score)
03000 {
03001 if (ptr){
03002 ptr->score &= ~score;
03003 }
03004 }
03005
03006 static void
03007 code_score(struct input_code *ptr)
03008 {
03009 nkf_char c2 = ptr->buf[0];
03010 nkf_char c1 = ptr->buf[1];
03011 if (c2 < 0){
03012 set_code_score(ptr, SCORE_ERROR);
03013 }else if (c2 == SS2){
03014 set_code_score(ptr, SCORE_KANA);
03015 }else if (c2 == 0x8f){
03016 if ((c1 & 0x70) == 0x20){
03017 set_code_score(ptr, score_table_8FA0[c1 & 0x0f]);
03018 }else if ((c1 & 0x70) == 0x60){
03019 set_code_score(ptr, score_table_8FE0[c1 & 0x0f]);
03020 }else if ((c1 & 0x70) == 0x70){
03021 set_code_score(ptr, score_table_8FF0[c1 & 0x0f]);
03022 }else{
03023 set_code_score(ptr, SCORE_X0212);
03024 }
03025 #ifdef UTF8_OUTPUT_ENABLE
03026 }else if (!e2w_conv(c2, c1)){
03027 set_code_score(ptr, SCORE_NO_EXIST);
03028 #endif
03029 }else if ((c2 & 0x70) == 0x20){
03030 set_code_score(ptr, score_table_A0[c2 & 0x0f]);
03031 }else if ((c2 & 0x70) == 0x70){
03032 set_code_score(ptr, score_table_F0[c2 & 0x0f]);
03033 }else if ((c2 & 0x70) >= 0x50){
03034 set_code_score(ptr, SCORE_L2);
03035 }
03036 }
03037
03038 static void
03039 status_disable(struct input_code *ptr)
03040 {
03041 ptr->stat = -1;
03042 ptr->buf[0] = -1;
03043 code_score(ptr);
03044 if (iconv == ptr->iconv_func) set_iconv(FALSE, 0);
03045 }
03046
03047 static void
03048 status_push_ch(struct input_code *ptr, nkf_char c)
03049 {
03050 ptr->buf[ptr->index++] = c;
03051 }
03052
03053 static void
03054 status_clear(struct input_code *ptr)
03055 {
03056 ptr->stat = 0;
03057 ptr->index = 0;
03058 }
03059
03060 static void
03061 status_reset(struct input_code *ptr)
03062 {
03063 status_clear(ptr);
03064 ptr->score = SCORE_INIT;
03065 }
03066
03067 static void
03068 status_reinit(struct input_code *ptr)
03069 {
03070 status_reset(ptr);
03071 ptr->_file_stat = 0;
03072 }
03073
03074 static void
03075 status_check(struct input_code *ptr, nkf_char c)
03076 {
03077 if (c <= DEL && estab_f){
03078 status_reset(ptr);
03079 }
03080 }
03081
03082 static void
03083 s_status(struct input_code *ptr, nkf_char c)
03084 {
03085 switch(ptr->stat){
03086 case -1:
03087 status_check(ptr, c);
03088 break;
03089 case 0:
03090 if (c <= DEL){
03091 break;
03092 }else if (nkf_char_unicode_p(c)){
03093 break;
03094 }else if (0xa1 <= c && c <= 0xdf){
03095 status_push_ch(ptr, SS2);
03096 status_push_ch(ptr, c);
03097 code_score(ptr);
03098 status_clear(ptr);
03099 }else if ((0x81 <= c && c < 0xa0) || (0xe0 <= c && c <= 0xea)){
03100 ptr->stat = 1;
03101 status_push_ch(ptr, c);
03102 }else if (0xed <= c && c <= 0xee){
03103 ptr->stat = 3;
03104 status_push_ch(ptr, c);
03105 #ifdef SHIFTJIS_CP932
03106 }else if (is_ibmext_in_sjis(c)){
03107 ptr->stat = 2;
03108 status_push_ch(ptr, c);
03109 #endif
03110 #ifdef X0212_ENABLE
03111 }else if (0xf0 <= c && c <= 0xfc){
03112 ptr->stat = 1;
03113 status_push_ch(ptr, c);
03114 #endif
03115 }else{
03116 status_disable(ptr);
03117 }
03118 break;
03119 case 1:
03120 if ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfc)){
03121 status_push_ch(ptr, c);
03122 s2e_conv(ptr->buf[0], ptr->buf[1], &ptr->buf[0], &ptr->buf[1]);
03123 code_score(ptr);
03124 status_clear(ptr);
03125 }else{
03126 status_disable(ptr);
03127 }
03128 break;
03129 case 2:
03130 #ifdef SHIFTJIS_CP932
03131 if ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfc)) {
03132 status_push_ch(ptr, c);
03133 if (s2e_conv(ptr->buf[0], ptr->buf[1], &ptr->buf[0], &ptr->buf[1]) == 0) {
03134 set_code_score(ptr, SCORE_CP932);
03135 status_clear(ptr);
03136 break;
03137 }
03138 }
03139 #endif
03140 status_disable(ptr);
03141 break;
03142 case 3:
03143 if ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfc)){
03144 status_push_ch(ptr, c);
03145 s2e_conv(ptr->buf[0], ptr->buf[1], &ptr->buf[0], &ptr->buf[1]);
03146 set_code_score(ptr, SCORE_CP932);
03147 status_clear(ptr);
03148 }else{
03149 status_disable(ptr);
03150 }
03151 break;
03152 }
03153 }
03154
03155 static void
03156 e_status(struct input_code *ptr, nkf_char c)
03157 {
03158 switch (ptr->stat){
03159 case -1:
03160 status_check(ptr, c);
03161 break;
03162 case 0:
03163 if (c <= DEL){
03164 break;
03165 }else if (nkf_char_unicode_p(c)){
03166 break;
03167 }else if (SS2 == c || (0xa1 <= c && c <= 0xfe)){
03168 ptr->stat = 1;
03169 status_push_ch(ptr, c);
03170 #ifdef X0212_ENABLE
03171 }else if (0x8f == c){
03172 ptr->stat = 2;
03173 status_push_ch(ptr, c);
03174 #endif
03175 }else{
03176 status_disable(ptr);
03177 }
03178 break;
03179 case 1:
03180 if (0xa1 <= c && c <= 0xfe){
03181 status_push_ch(ptr, c);
03182 code_score(ptr);
03183 status_clear(ptr);
03184 }else{
03185 status_disable(ptr);
03186 }
03187 break;
03188 #ifdef X0212_ENABLE
03189 case 2:
03190 if (0xa1 <= c && c <= 0xfe){
03191 ptr->stat = 1;
03192 status_push_ch(ptr, c);
03193 }else{
03194 status_disable(ptr);
03195 }
03196 #endif
03197 }
03198 }
03199
03200 #ifdef UTF8_INPUT_ENABLE
03201 static void
03202 w_status(struct input_code *ptr, nkf_char c)
03203 {
03204 switch (ptr->stat){
03205 case -1:
03206 status_check(ptr, c);
03207 break;
03208 case 0:
03209 if (c <= DEL){
03210 break;
03211 }else if (nkf_char_unicode_p(c)){
03212 break;
03213 }else if (0xc0 <= c && c <= 0xdf){
03214 ptr->stat = 1;
03215 status_push_ch(ptr, c);
03216 }else if (0xe0 <= c && c <= 0xef){
03217 ptr->stat = 2;
03218 status_push_ch(ptr, c);
03219 }else if (0xf0 <= c && c <= 0xf4){
03220 ptr->stat = 3;
03221 status_push_ch(ptr, c);
03222 }else{
03223 status_disable(ptr);
03224 }
03225 break;
03226 case 1:
03227 case 2:
03228 if (0x80 <= c && c <= 0xbf){
03229 status_push_ch(ptr, c);
03230 if (ptr->index > ptr->stat){
03231 int bom = (ptr->buf[0] == 0xef && ptr->buf[1] == 0xbb
03232 && ptr->buf[2] == 0xbf);
03233 w2e_conv(ptr->buf[0], ptr->buf[1], ptr->buf[2],
03234 &ptr->buf[0], &ptr->buf[1]);
03235 if (!bom){
03236 code_score(ptr);
03237 }
03238 status_clear(ptr);
03239 }
03240 }else{
03241 status_disable(ptr);
03242 }
03243 break;
03244 case 3:
03245 if (0x80 <= c && c <= 0xbf){
03246 if (ptr->index < ptr->stat){
03247 status_push_ch(ptr, c);
03248 } else {
03249 status_clear(ptr);
03250 }
03251 }else{
03252 status_disable(ptr);
03253 }
03254 break;
03255 }
03256 }
03257 #endif
03258
03259 static void
03260 code_status(nkf_char c)
03261 {
03262 int action_flag = 1;
03263 struct input_code *result = 0;
03264 struct input_code *p = input_code_list;
03265 while (p->name){
03266 if (!p->status_func) {
03267 ++p;
03268 continue;
03269 }
03270 if (!p->status_func)
03271 continue;
03272 (p->status_func)(p, c);
03273 if (p->stat > 0){
03274 action_flag = 0;
03275 }else if(p->stat == 0){
03276 if (result){
03277 action_flag = 0;
03278 }else{
03279 result = p;
03280 }
03281 }
03282 ++p;
03283 }
03284
03285 if (action_flag){
03286 if (result && !estab_f){
03287 set_iconv(TRUE, result->iconv_func);
03288 }else if (c <= DEL){
03289 struct input_code *ptr = input_code_list;
03290 while (ptr->name){
03291 status_reset(ptr);
03292 ++ptr;
03293 }
03294 }
03295 }
03296 }
03297
03298 typedef struct {
03299 nkf_buf_t *std_gc_buf;
03300 nkf_char broken_state;
03301 nkf_buf_t *broken_buf;
03302 nkf_char mimeout_state;
03303 nkf_buf_t *nfc_buf;
03304 } nkf_state_t;
03305
03306 static nkf_state_t *nkf_state = NULL;
03307
03308 #define STD_GC_BUFSIZE (256)
03309
03310 static void
03311 nkf_state_init(void)
03312 {
03313 if (nkf_state) {
03314 nkf_buf_clear(nkf_state->std_gc_buf);
03315 nkf_buf_clear(nkf_state->broken_buf);
03316 nkf_buf_clear(nkf_state->nfc_buf);
03317 }
03318 else {
03319 nkf_state = nkf_xmalloc(sizeof(nkf_state_t));
03320 nkf_state->std_gc_buf = nkf_buf_new(STD_GC_BUFSIZE);
03321 nkf_state->broken_buf = nkf_buf_new(3);
03322 nkf_state->nfc_buf = nkf_buf_new(9);
03323 }
03324 nkf_state->broken_state = 0;
03325 nkf_state->mimeout_state = 0;
03326 }
03327
03328 #ifndef WIN32DLL
03329 static nkf_char
03330 std_getc(FILE *f)
03331 {
03332 if (!nkf_buf_empty_p(nkf_state->std_gc_buf)){
03333 return nkf_buf_pop(nkf_state->std_gc_buf);
03334 }
03335 return getc(f);
03336 }
03337 #endif
03338
03339 static nkf_char
03340 std_ungetc(nkf_char c, ARG_UNUSED FILE *f)
03341 {
03342 nkf_buf_push(nkf_state->std_gc_buf, c);
03343 return c;
03344 }
03345
03346 #ifndef WIN32DLL
03347 static void
03348 std_putc(nkf_char c)
03349 {
03350 if(c!=EOF)
03351 putchar(c);
03352 }
03353 #endif
03354
03355 static nkf_char hold_buf[HOLD_SIZE*2];
03356 static int hold_count = 0;
03357 static nkf_char
03358 push_hold_buf(nkf_char c2)
03359 {
03360 if (hold_count >= HOLD_SIZE*2)
03361 return (EOF);
03362 hold_buf[hold_count++] = c2;
03363 return ((hold_count >= HOLD_SIZE*2) ? EOF : hold_count);
03364 }
03365
03366 static int
03367 h_conv(FILE *f, nkf_char c1, nkf_char c2)
03368 {
03369 int ret;
03370 int hold_index;
03371 int fromhold_count;
03372 nkf_char c3, c4;
03373
03378 hold_count = 0;
03379 push_hold_buf(c1);
03380 push_hold_buf(c2);
03381
03382 while ((c2 = (*i_getc)(f)) != EOF) {
03383 if (c2 == ESC){
03384 (*i_ungetc)(c2,f);
03385 break;
03386 }
03387 code_status(c2);
03388 if (push_hold_buf(c2) == EOF || estab_f) {
03389 break;
03390 }
03391 }
03392
03393 if (!estab_f) {
03394 struct input_code *p = input_code_list;
03395 struct input_code *result = p;
03396 if (c2 == EOF) {
03397 code_status(c2);
03398 }
03399 while (p->name) {
03400 if (p->status_func && p->score < result->score) {
03401 result = p;
03402 }
03403 p++;
03404 }
03405 set_iconv(TRUE, result->iconv_func);
03406 }
03407
03408
03418 ret = c2;
03419 hold_index = 0;
03420 while (hold_index < hold_count){
03421 c1 = hold_buf[hold_index++];
03422 if (nkf_char_unicode_p(c1)) {
03423 (*oconv)(0, c1);
03424 continue;
03425 }
03426 else if (c1 <= DEL){
03427 (*iconv)(0, c1, 0);
03428 continue;
03429 }else if (iconv == s_iconv && 0xa1 <= c1 && c1 <= 0xdf){
03430 (*iconv)(JIS_X_0201_1976_K, c1, 0);
03431 continue;
03432 }
03433 fromhold_count = 1;
03434 if (hold_index < hold_count){
03435 c2 = hold_buf[hold_index++];
03436 fromhold_count++;
03437 }else{
03438 c2 = (*i_getc)(f);
03439 if (c2 == EOF){
03440 c4 = EOF;
03441 break;
03442 }
03443 code_status(c2);
03444 }
03445 c3 = 0;
03446 switch ((*iconv)(c1, c2, 0)) {
03447 case -2:
03448
03449 if (hold_index < hold_count){
03450 c3 = hold_buf[hold_index++];
03451 } else if ((c3 = (*i_getc)(f)) == EOF) {
03452 ret = EOF;
03453 break;
03454 }
03455 code_status(c3);
03456 if (hold_index < hold_count){
03457 c4 = hold_buf[hold_index++];
03458 } else if ((c4 = (*i_getc)(f)) == EOF) {
03459 c3 = ret = EOF;
03460 break;
03461 }
03462 code_status(c4);
03463 (*iconv)(c1, c2, (c3<<8)|c4);
03464 break;
03465 case -3:
03466
03467 if (hold_index < hold_count){
03468 c3 = hold_buf[hold_index++];
03469 fromhold_count++;
03470 } else if ((c3 = (*i_getc)(f)) == EOF) {
03471 w_iconv_nocombine(c1, c2, 0);
03472 break;
03473 }
03474 if (hold_index < hold_count){
03475 c4 = hold_buf[hold_index++];
03476 fromhold_count++;
03477 } else if ((c4 = (*i_getc)(f)) == EOF) {
03478 w_iconv_nocombine(c1, c2, 0);
03479 if (fromhold_count <= 2)
03480 (*i_ungetc)(c3,f);
03481 else
03482 hold_index--;
03483 continue;
03484 }
03485 if (w_iconv_combine(c1, c2, 0, c3, c4, 0)) {
03486 w_iconv_nocombine(c1, c2, 0);
03487 if (fromhold_count <= 2) {
03488 (*i_ungetc)(c4,f);
03489 (*i_ungetc)(c3,f);
03490 } else if (fromhold_count == 3) {
03491 (*i_ungetc)(c4,f);
03492 hold_index--;
03493 } else {
03494 hold_index -= 2;
03495 }
03496 }
03497 break;
03498 case -1:
03499
03500 if (hold_index < hold_count){
03501 c3 = hold_buf[hold_index++];
03502 fromhold_count++;
03503 } else if ((c3 = (*i_getc)(f)) == EOF) {
03504 ret = EOF;
03505 break;
03506 } else {
03507 code_status(c3);
03508 }
03509 if ((*iconv)(c1, c2, c3) == -3) {
03510
03511 nkf_char c5, c6;
03512 if (hold_index < hold_count){
03513 c4 = hold_buf[hold_index++];
03514 fromhold_count++;
03515 } else if ((c4 = (*i_getc)(f)) == EOF) {
03516 w_iconv_nocombine(c1, c2, c3);
03517 continue;
03518 }
03519 if (hold_index < hold_count){
03520 c5 = hold_buf[hold_index++];
03521 fromhold_count++;
03522 } else if ((c5 = (*i_getc)(f)) == EOF) {
03523 w_iconv_nocombine(c1, c2, c3);
03524 if (fromhold_count == 4)
03525 hold_index--;
03526 else
03527 (*i_ungetc)(c4,f);
03528 continue;
03529 }
03530 if (hold_index < hold_count){
03531 c6 = hold_buf[hold_index++];
03532 fromhold_count++;
03533 } else if ((c6 = (*i_getc)(f)) == EOF) {
03534 w_iconv_nocombine(c1, c2, c3);
03535 if (fromhold_count == 5) {
03536 hold_index -= 2;
03537 } else if (fromhold_count == 4) {
03538 hold_index--;
03539 (*i_ungetc)(c5,f);
03540 } else {
03541 (*i_ungetc)(c5,f);
03542 (*i_ungetc)(c4,f);
03543 }
03544 continue;
03545 }
03546 if (w_iconv_combine(c1, c2, c3, c4, c5, c6)) {
03547 w_iconv_nocombine(c1, c2, c3);
03548 if (fromhold_count == 6) {
03549 hold_index -= 3;
03550 } else if (fromhold_count == 5) {
03551 hold_index -= 2;
03552 (*i_ungetc)(c6,f);
03553 } else if (fromhold_count == 4) {
03554 hold_index--;
03555 (*i_ungetc)(c6,f);
03556 (*i_ungetc)(c5,f);
03557 } else {
03558 (*i_ungetc)(c6,f);
03559 (*i_ungetc)(c5,f);
03560 (*i_ungetc)(c4,f);
03561 }
03562 }
03563 }
03564 break;
03565 }
03566 if (c3 == EOF) break;
03567 }
03568 return ret;
03569 }
03570
03571
03572
03573
03574 static void
03575 check_bom(FILE *f)
03576 {
03577 int c2;
03578 switch(c2 = (*i_getc)(f)){
03579 case 0x00:
03580 if((c2 = (*i_getc)(f)) == 0x00){
03581 if((c2 = (*i_getc)(f)) == 0xFE){
03582 if((c2 = (*i_getc)(f)) == 0xFF){
03583 if(!input_encoding){
03584 set_iconv(TRUE, w_iconv32);
03585 }
03586 if (iconv == w_iconv32) {
03587 input_bom_f = TRUE;
03588 input_endian = ENDIAN_BIG;
03589 return;
03590 }
03591 (*i_ungetc)(0xFF,f);
03592 }else (*i_ungetc)(c2,f);
03593 (*i_ungetc)(0xFE,f);
03594 }else if(c2 == 0xFF){
03595 if((c2 = (*i_getc)(f)) == 0xFE){
03596 if(!input_encoding){
03597 set_iconv(TRUE, w_iconv32);
03598 }
03599 if (iconv == w_iconv32) {
03600 input_endian = ENDIAN_2143;
03601 return;
03602 }
03603 (*i_ungetc)(0xFF,f);
03604 }else (*i_ungetc)(c2,f);
03605 (*i_ungetc)(0xFF,f);
03606 }else (*i_ungetc)(c2,f);
03607 (*i_ungetc)(0x00,f);
03608 }else (*i_ungetc)(c2,f);
03609 (*i_ungetc)(0x00,f);
03610 break;
03611 case 0xEF:
03612 if((c2 = (*i_getc)(f)) == 0xBB){
03613 if((c2 = (*i_getc)(f)) == 0xBF){
03614 if(!input_encoding){
03615 set_iconv(TRUE, w_iconv);
03616 }
03617 if (iconv == w_iconv) {
03618 input_bom_f = TRUE;
03619 return;
03620 }
03621 (*i_ungetc)(0xBF,f);
03622 }else (*i_ungetc)(c2,f);
03623 (*i_ungetc)(0xBB,f);
03624 }else (*i_ungetc)(c2,f);
03625 (*i_ungetc)(0xEF,f);
03626 break;
03627 case 0xFE:
03628 if((c2 = (*i_getc)(f)) == 0xFF){
03629 if((c2 = (*i_getc)(f)) == 0x00){
03630 if((c2 = (*i_getc)(f)) == 0x00){
03631 if(!input_encoding){
03632 set_iconv(TRUE, w_iconv32);
03633 }
03634 if (iconv == w_iconv32) {
03635 input_endian = ENDIAN_3412;
03636 return;
03637 }
03638 (*i_ungetc)(0x00,f);
03639 }else (*i_ungetc)(c2,f);
03640 (*i_ungetc)(0x00,f);
03641 }else (*i_ungetc)(c2,f);
03642 if(!input_encoding){
03643 set_iconv(TRUE, w_iconv16);
03644 }
03645 if (iconv == w_iconv16) {
03646 input_endian = ENDIAN_BIG;
03647 input_bom_f = TRUE;
03648 return;
03649 }
03650 (*i_ungetc)(0xFF,f);
03651 }else (*i_ungetc)(c2,f);
03652 (*i_ungetc)(0xFE,f);
03653 break;
03654 case 0xFF:
03655 if((c2 = (*i_getc)(f)) == 0xFE){
03656 if((c2 = (*i_getc)(f)) == 0x00){
03657 if((c2 = (*i_getc)(f)) == 0x00){
03658 if(!input_encoding){
03659 set_iconv(TRUE, w_iconv32);
03660 }
03661 if (iconv == w_iconv32) {
03662 input_endian = ENDIAN_LITTLE;
03663 input_bom_f = TRUE;
03664 return;
03665 }
03666 (*i_ungetc)(0x00,f);
03667 }else (*i_ungetc)(c2,f);
03668 (*i_ungetc)(0x00,f);
03669 }else (*i_ungetc)(c2,f);
03670 if(!input_encoding){
03671 set_iconv(TRUE, w_iconv16);
03672 }
03673 if (iconv == w_iconv16) {
03674 input_endian = ENDIAN_LITTLE;
03675 input_bom_f = TRUE;
03676 return;
03677 }
03678 (*i_ungetc)(0xFE,f);
03679 }else (*i_ungetc)(c2,f);
03680 (*i_ungetc)(0xFF,f);
03681 break;
03682 default:
03683 (*i_ungetc)(c2,f);
03684 break;
03685 }
03686 }
03687
03688 static nkf_char
03689 broken_getc(FILE *f)
03690 {
03691 nkf_char c, c1;
03692
03693 if (!nkf_buf_empty_p(nkf_state->broken_buf)) {
03694 return nkf_buf_pop(nkf_state->broken_buf);
03695 }
03696 c = (*i_bgetc)(f);
03697 if (c=='$' && nkf_state->broken_state != ESC
03698 && (input_mode == ASCII || input_mode == JIS_X_0201_1976_K)) {
03699 c1= (*i_bgetc)(f);
03700 nkf_state->broken_state = 0;
03701 if (c1=='@'|| c1=='B') {
03702 nkf_buf_push(nkf_state->broken_buf, c1);
03703 nkf_buf_push(nkf_state->broken_buf, c);
03704 return ESC;
03705 } else {
03706 (*i_bungetc)(c1,f);
03707 return c;
03708 }
03709 } else if (c=='(' && nkf_state->broken_state != ESC
03710 && (input_mode == JIS_X_0208 || input_mode == JIS_X_0201_1976_K)) {
03711 c1= (*i_bgetc)(f);
03712 nkf_state->broken_state = 0;
03713 if (c1=='J'|| c1=='B') {
03714 nkf_buf_push(nkf_state->broken_buf, c1);
03715 nkf_buf_push(nkf_state->broken_buf, c);
03716 return ESC;
03717 } else {
03718 (*i_bungetc)(c1,f);
03719 return c;
03720 }
03721 } else {
03722 nkf_state->broken_state = c;
03723 return c;
03724 }
03725 }
03726
03727 static nkf_char
03728 broken_ungetc(nkf_char c, ARG_UNUSED FILE *f)
03729 {
03730 if (nkf_buf_length(nkf_state->broken_buf) < 2)
03731 nkf_buf_push(nkf_state->broken_buf, c);
03732 return c;
03733 }
03734
03735 static void
03736 eol_conv(nkf_char c2, nkf_char c1)
03737 {
03738 if (guess_f && input_eol != EOF) {
03739 if (c2 == 0 && c1 == LF) {
03740 if (!input_eol) input_eol = prev_cr ? CRLF : LF;
03741 else if (input_eol != (prev_cr ? CRLF : LF)) input_eol = EOF;
03742 } else if (c2 == 0 && c1 == CR && input_eol == LF) input_eol = EOF;
03743 else if (!prev_cr);
03744 else if (!input_eol) input_eol = CR;
03745 else if (input_eol != CR) input_eol = EOF;
03746 }
03747 if (prev_cr || (c2 == 0 && c1 == LF)) {
03748 prev_cr = 0;
03749 if (eolmode_f != LF) (*o_eol_conv)(0, CR);
03750 if (eolmode_f != CR) (*o_eol_conv)(0, LF);
03751 }
03752 if (c2 == 0 && c1 == CR) prev_cr = CR;
03753 else if (c2 != 0 || c1 != LF) (*o_eol_conv)(c2, c1);
03754 }
03755
03756 static void
03757 put_newline(void (*func)(nkf_char))
03758 {
03759 switch (eolmode_f ? eolmode_f : DEFAULT_NEWLINE) {
03760 case CRLF:
03761 (*func)(0x0D);
03762 (*func)(0x0A);
03763 break;
03764 case CR:
03765 (*func)(0x0D);
03766 break;
03767 case LF:
03768 (*func)(0x0A);
03769 break;
03770 }
03771 }
03772
03773 static void
03774 oconv_newline(void (*func)(nkf_char, nkf_char))
03775 {
03776 switch (eolmode_f ? eolmode_f : DEFAULT_NEWLINE) {
03777 case CRLF:
03778 (*func)(0, 0x0D);
03779 (*func)(0, 0x0A);
03780 break;
03781 case CR:
03782 (*func)(0, 0x0D);
03783 break;
03784 case LF:
03785 (*func)(0, 0x0A);
03786 break;
03787 }
03788 }
03789
03790
03791
03792
03793
03794
03795
03796
03797
03798
03799
03800
03801
03802
03803
03804
03805
03806
03807
03808
03809
03810 #define char_size(c2,c1) (c2?2:1)
03811
03812 static void
03813 fold_conv(nkf_char c2, nkf_char c1)
03814 {
03815 nkf_char prev0;
03816 nkf_char fold_state;
03817
03818 if (c1== CR && !fold_preserve_f) {
03819 fold_state=0;
03820 }else if (c1== LF&&f_prev==CR && fold_preserve_f) {
03821 f_prev = LF;
03822 fold_state=0;
03823 } else if (c1== BS) {
03824 if (f_line>0) f_line--;
03825 fold_state = 1;
03826 } else if (c2==EOF && f_line != 0) {
03827 fold_state = LF;
03828 } else if ((c1==LF && !fold_preserve_f)
03829 || ((c1==CR||(c1==LF&&f_prev!=CR))
03830 && fold_preserve_f)) {
03831
03832 if (fold_preserve_f) {
03833 f_prev = c1;
03834 f_line = 0;
03835 fold_state = CR;
03836 } else if ((f_prev == c1 && !fold_preserve_f)
03837 || (f_prev == LF && fold_preserve_f)
03838 ) {
03839 if (f_line) {
03840 f_line = 0;
03841 fold_state = LF;
03842 } else {
03843 f_line = 0;
03844 fold_state = 1;
03845 }
03846 } else {
03847 if (f_prev&0x80) {
03848 f_prev = c1;
03849 fold_state = 0;
03850 } else if (f_prev==SP) {
03851 fold_state = 0;
03852 } else {
03853 f_prev = c1;
03854 if (++f_line<=fold_len)
03855 fold_state = SP;
03856 else {
03857 f_line = 0;
03858 fold_state = CR;
03859 }
03860 }
03861 }
03862 } else if (c1=='\f') {
03863 f_prev = LF;
03864 f_line = 0;
03865 fold_state = LF;
03866 } else if ((c2==0 && nkf_isblank(c1)) || (c2 == '!' && c1 == '!')) {
03867
03868 if (f_prev == SP) {
03869 fold_state = 0;
03870 } else {
03871 f_prev = SP;
03872 if (++f_line<=fold_len)
03873 fold_state = SP;
03874 else {
03875 f_prev = SP; f_line = 0;
03876 fold_state = CR;
03877 }
03878 }
03879 } else {
03880 prev0 = f_prev;
03881 f_prev = c1;
03882 if (c2 || c2 == JIS_X_0201_1976_K)
03883 f_prev |= 0x80;
03884 f_line += c2 == JIS_X_0201_1976_K ? 1: char_size(c2,c1);
03885 if (f_line<=fold_len) {
03886 fold_state = 1;
03887 } else {
03888 if (f_line>fold_len+fold_margin) {
03889 f_line = char_size(c2,c1);
03890 fold_state = LF;
03891 } else if (c2 == JIS_X_0201_1976_K) {
03892
03893 if (c1==(0xde&0x7f)) fold_state = 1;
03894 else if (c1==(0xdf&0x7f)) fold_state = 1;
03895 else if (c1==(0xa4&0x7f)) fold_state = 1;
03896 else if (c1==(0xa3&0x7f)) fold_state = 1;
03897 else if (c1==(0xa1&0x7f)) fold_state = 1;
03898 else if (c1==(0xb0&0x7f)) fold_state = 1;
03899 else if (SP<=c1 && c1<=(0xdf&0x7f)) {
03900 f_line = 1;
03901 fold_state = LF;
03902 } else {
03903 f_line = 1;
03904 fold_state = LF;
03905 }
03906 } else if (c2==0) {
03907
03908 if ( c1==')'||
03909 c1==']'||
03910 c1=='}'||
03911 c1=='.'||
03912 c1==','||
03913 c1=='!'||
03914 c1=='?'||
03915 c1=='/'||
03916 c1==':'||
03917 c1==';') {
03918 fold_state = 1;
03919
03920 } else if (!is_alnum(prev0)) {
03921 f_line = char_size(c2,c1);
03922 fold_state = LF;
03923 } else if ((prev0==SP) ||
03924 (prev0==LF)||
03925 (prev0&0x80)) {
03926 f_line = char_size(c2,c1);
03927 fold_state = LF;
03928 } else {
03929 fold_state = 1;
03930 }
03931 } else {
03932 if (c2=='!') {
03933 if (c1=='"') fold_state = 1;
03934 else if (c1=='#') fold_state = 1;
03935 else if (c1=='W') fold_state = 1;
03936 else if (c1=='K') fold_state = 1;
03937 else if (c1=='$') fold_state = 1;
03938 else if (c1=='%') fold_state = 1;
03939 else if (c1=='\'') fold_state = 1;
03940 else if (c1=='(') fold_state = 1;
03941 else if (c1==')') fold_state = 1;
03942 else if (c1=='*') fold_state = 1;
03943 else if (c1=='+') fold_state = 1;
03944 else if (c1==',') fold_state = 1;
03945
03946 else {
03947 fold_state = LF;
03948 f_line = char_size(c2,c1);
03949
03950 }
03951 } else {
03952 f_line = char_size(c2,c1);
03953 fold_state = LF;
03954
03955 }
03956 }
03957 }
03958 }
03959
03960 switch(fold_state) {
03961 case LF:
03962 oconv_newline(o_fconv);
03963 (*o_fconv)(c2,c1);
03964 break;
03965 case 0:
03966 return;
03967 case CR:
03968 oconv_newline(o_fconv);
03969 break;
03970 case TAB:
03971 case SP:
03972 (*o_fconv)(0,SP);
03973 break;
03974 default:
03975 (*o_fconv)(c2,c1);
03976 }
03977 }
03978
03979 static nkf_char z_prev2=0,z_prev1=0;
03980
03981 static void
03982 z_conv(nkf_char c2, nkf_char c1)
03983 {
03984
03985
03986
03987 if (c2 == JIS_X_0201_1976_K && (c1 == 0x20 || c1 == 0x7D || c1 == 0x7E)) {
03988 (*o_zconv)(c2,c1);
03989 return;
03990 }
03991
03992 if (x0201_f) {
03993 if (z_prev2 == JIS_X_0201_1976_K) {
03994 if (c2 == JIS_X_0201_1976_K) {
03995 if (c1 == (0xde&0x7f)) {
03996 z_prev2 = 0;
03997 (*o_zconv)(dv[(z_prev1-SP)*2], dv[(z_prev1-SP)*2+1]);
03998 return;
03999 } else if (c1 == (0xdf&0x7f) && ev[(z_prev1-SP)*2]) {
04000 z_prev2 = 0;
04001 (*o_zconv)(ev[(z_prev1-SP)*2], ev[(z_prev1-SP)*2+1]);
04002 return;
04003 } else if (x0213_f && c1 == (0xdf&0x7f) && ev_x0213[(z_prev1-SP)*2]) {
04004 z_prev2 = 0;
04005 (*o_zconv)(ev_x0213[(z_prev1-SP)*2], ev_x0213[(z_prev1-SP)*2+1]);
04006 return;
04007 }
04008 }
04009 z_prev2 = 0;
04010 (*o_zconv)(cv[(z_prev1-SP)*2], cv[(z_prev1-SP)*2+1]);
04011 }
04012 if (c2 == JIS_X_0201_1976_K) {
04013 if (dv[(c1-SP)*2] || ev[(c1-SP)*2] || (x0213_f && ev_x0213[(c1-SP)*2])) {
04014
04015 z_prev1 = c1;
04016 z_prev2 = c2;
04017 return;
04018 } else {
04019 (*o_zconv)(cv[(c1-SP)*2], cv[(c1-SP)*2+1]);
04020 return;
04021 }
04022 }
04023 }
04024
04025 if (c2 == EOF) {
04026 (*o_zconv)(c2, c1);
04027 return;
04028 }
04029
04030 if (alpha_f&1 && c2 == 0x23) {
04031
04032 c2 = 0;
04033 } else if (c2 == 0x21) {
04034
04035 if (0x21==c1) {
04036 if (alpha_f&2) {
04037 c2 = 0;
04038 c1 = SP;
04039 } else if (alpha_f&4) {
04040 (*o_zconv)(0, SP);
04041 (*o_zconv)(0, SP);
04042 return;
04043 }
04044 } else if (alpha_f&1 && 0x20<c1 && c1<0x7f && fv[c1-0x20]) {
04045 c2 = 0;
04046 c1 = fv[c1-0x20];
04047 }
04048 }
04049
04050 if (alpha_f&8 && c2 == 0) {
04051
04052 const char *entity = 0;
04053 switch (c1){
04054 case '>': entity = ">"; break;
04055 case '<': entity = "<"; break;
04056 case '\"': entity = """; break;
04057 case '&': entity = "&"; break;
04058 }
04059 if (entity){
04060 while (*entity) (*o_zconv)(0, *entity++);
04061 return;
04062 }
04063 }
04064
04065 if (alpha_f & 16) {
04066
04067 if (c2 == 0x21) {
04068 nkf_char c = 0;
04069 switch (c1) {
04070 case 0x23:
04071
04072 c = 0xA1;
04073 break;
04074 case 0x56:
04075
04076 c = 0xA2;
04077 break;
04078 case 0x57:
04079
04080 c = 0xA3;
04081 break;
04082 case 0x22:
04083
04084 c = 0xA4;
04085 break;
04086 case 0x26:
04087
04088 c = 0xA5;
04089 break;
04090 case 0x3C:
04091
04092 c = 0xB0;
04093 break;
04094 case 0x2B:
04095
04096 c = 0xDE;
04097 break;
04098 case 0x2C:
04099
04100 c = 0xDF;
04101 break;
04102 }
04103 if (c) {
04104 (*o_zconv)(JIS_X_0201_1976_K, c);
04105 return;
04106 }
04107 } else if (c2 == 0x25) {
04108
04109 static const int fullwidth_to_halfwidth[] =
04110 {
04111 0x0000, 0x2700, 0x3100, 0x2800, 0x3200, 0x2900, 0x3300, 0x2A00,
04112 0x3400, 0x2B00, 0x3500, 0x3600, 0x365E, 0x3700, 0x375E, 0x3800,
04113 0x385E, 0x3900, 0x395E, 0x3A00, 0x3A5E, 0x3B00, 0x3B5E, 0x3C00,
04114 0x3C5E, 0x3D00, 0x3D5E, 0x3E00, 0x3E5E, 0x3F00, 0x3F5E, 0x4000,
04115 0x405E, 0x4100, 0x415E, 0x2F00, 0x4200, 0x425E, 0x4300, 0x435E,
04116 0x4400, 0x445E, 0x4500, 0x4600, 0x4700, 0x4800, 0x4900, 0x4A00,
04117 0x4A5E, 0x4A5F, 0x4B00, 0x4B5E, 0x4B5F, 0x4C00, 0x4C5E, 0x4C5F,
04118 0x4D00, 0x4D5E, 0x4D5F, 0x4E00, 0x4E5E, 0x4E5F, 0x4F00, 0x5000,
04119 0x5100, 0x5200, 0x5300, 0x2C00, 0x5400, 0x2D00, 0x5500, 0x2E00,
04120 0x5600, 0x5700, 0x5800, 0x5900, 0x5A00, 0x5B00, 0x0000, 0x5C00,
04121 0x0000, 0x0000, 0x2600, 0x5D00, 0x335E, 0x0000, 0x0000, 0x365F,
04122 0x375F, 0x385F, 0x395F, 0x3A5F, 0x3E5F, 0x425F, 0x445F, 0x0000
04123 };
04124 if (fullwidth_to_halfwidth[c1-0x20]){
04125 c2 = fullwidth_to_halfwidth[c1-0x20];
04126 (*o_zconv)(JIS_X_0201_1976_K, c2>>8);
04127 if (c2 & 0xFF) {
04128 (*o_zconv)(JIS_X_0201_1976_K, c2&0xFF);
04129 }
04130 return;
04131 }
04132 } else if (c2 == 0 && nkf_char_unicode_p(c1) &&
04133 ((c1&VALUE_MASK) == 0x3099 || (c1&VALUE_MASK) == 0x309A)) {
04134 (*o_zconv)(JIS_X_0201_1976_K, 0x5E + (c1&VALUE_MASK) - 0x3099);
04135 return;
04136 }
04137 }
04138 (*o_zconv)(c2,c1);
04139 }
04140
04141
04142 #define rot13(c) ( \
04143 ( c < 'A') ? c: \
04144 (c <= 'M') ? (c + 13): \
04145 (c <= 'Z') ? (c - 13): \
04146 (c < 'a') ? (c): \
04147 (c <= 'm') ? (c + 13): \
04148 (c <= 'z') ? (c - 13): \
04149 (c) \
04150 )
04151
04152 #define rot47(c) ( \
04153 ( c < '!') ? c: \
04154 ( c <= 'O') ? (c + 47) : \
04155 ( c <= '~') ? (c - 47) : \
04156 c \
04157 )
04158
04159 static void
04160 rot_conv(nkf_char c2, nkf_char c1)
04161 {
04162 if (c2 == 0 || c2 == JIS_X_0201_1976_K || c2 == ISO_8859_1) {
04163 c1 = rot13(c1);
04164 } else if (c2) {
04165 c1 = rot47(c1);
04166 c2 = rot47(c2);
04167 }
04168 (*o_rot_conv)(c2,c1);
04169 }
04170
04171 static void
04172 hira_conv(nkf_char c2, nkf_char c1)
04173 {
04174 if (hira_f & 1) {
04175 if (c2 == 0x25) {
04176 if (0x20 < c1 && c1 < 0x74) {
04177 c2 = 0x24;
04178 (*o_hira_conv)(c2,c1);
04179 return;
04180 } else if (c1 == 0x74 && nkf_enc_unicode_p(output_encoding)) {
04181 c2 = 0;
04182 c1 = nkf_char_unicode_new(0x3094);
04183 (*o_hira_conv)(c2,c1);
04184 return;
04185 }
04186 } else if (c2 == 0x21 && (c1 == 0x33 || c1 == 0x34)) {
04187 c1 += 2;
04188 (*o_hira_conv)(c2,c1);
04189 return;
04190 }
04191 }
04192 if (hira_f & 2) {
04193 if (c2 == 0 && c1 == nkf_char_unicode_new(0x3094)) {
04194 c2 = 0x25;
04195 c1 = 0x74;
04196 } else if (c2 == 0x24 && 0x20 < c1 && c1 < 0x74) {
04197 c2 = 0x25;
04198 } else if (c2 == 0x21 && (c1 == 0x35 || c1 == 0x36)) {
04199 c1 -= 2;
04200 }
04201 }
04202 (*o_hira_conv)(c2,c1);
04203 }
04204
04205
04206 static void
04207 iso2022jp_check_conv(nkf_char c2, nkf_char c1)
04208 {
04209 #define RANGE_NUM_MAX 18
04210 static const nkf_char range[RANGE_NUM_MAX][2] = {
04211 {0x222f, 0x2239,},
04212 {0x2242, 0x2249,},
04213 {0x2251, 0x225b,},
04214 {0x226b, 0x2271,},
04215 {0x227a, 0x227d,},
04216 {0x2321, 0x232f,},
04217 {0x233a, 0x2340,},
04218 {0x235b, 0x2360,},
04219 {0x237b, 0x237e,},
04220 {0x2474, 0x247e,},
04221 {0x2577, 0x257e,},
04222 {0x2639, 0x2640,},
04223 {0x2659, 0x267e,},
04224 {0x2742, 0x2750,},
04225 {0x2772, 0x277e,},
04226 {0x2841, 0x287e,},
04227 {0x4f54, 0x4f7e,},
04228 {0x7425, 0x747e},
04229 };
04230 nkf_char i;
04231 nkf_char start, end, c;
04232
04233 if(c2 >= 0x00 && c2 <= 0x20 && c1 >= 0x7f && c1 <= 0xff) {
04234 c2 = GETA1;
04235 c1 = GETA2;
04236 }
04237 if((c2 >= 0x29 && c2 <= 0x2f) || (c2 >= 0x75 && c2 <= 0x7e)) {
04238 c2 = GETA1;
04239 c1 = GETA2;
04240 }
04241
04242 for (i = 0; i < RANGE_NUM_MAX; i++) {
04243 start = range[i][0];
04244 end = range[i][1];
04245 c = (c2 << 8) + c1;
04246 if (c >= start && c <= end) {
04247 c2 = GETA1;
04248 c1 = GETA2;
04249 }
04250 }
04251 (*o_iso2022jp_check_conv)(c2,c1);
04252 }
04253
04254
04255
04256
04257 static const unsigned char *mime_pattern[] = {
04258 (const unsigned char *)"\075?EUC-JP?B?",
04259 (const unsigned char *)"\075?SHIFT_JIS?B?",
04260 (const unsigned char *)"\075?ISO-8859-1?Q?",
04261 (const unsigned char *)"\075?ISO-8859-1?B?",
04262 (const unsigned char *)"\075?ISO-2022-JP?B?",
04263 (const unsigned char *)"\075?ISO-2022-JP?B?",
04264 (const unsigned char *)"\075?ISO-2022-JP?Q?",
04265 #if defined(UTF8_INPUT_ENABLE)
04266 (const unsigned char *)"\075?UTF-8?B?",
04267 (const unsigned char *)"\075?UTF-8?Q?",
04268 #endif
04269 (const unsigned char *)"\075?US-ASCII?Q?",
04270 NULL
04271 };
04272
04273
04274
04275 nkf_char (*mime_priority_func[])(nkf_char c2, nkf_char c1, nkf_char c0) = {
04276 e_iconv, s_iconv, 0, 0, 0, 0, 0,
04277 #if defined(UTF8_INPUT_ENABLE)
04278 w_iconv, w_iconv,
04279 #endif
04280 0,
04281 };
04282
04283 static const nkf_char mime_encode[] = {
04284 EUC_JP, SHIFT_JIS, ISO_8859_1, ISO_8859_1, JIS_X_0208, JIS_X_0201_1976_K, JIS_X_0201_1976_K,
04285 #if defined(UTF8_INPUT_ENABLE)
04286 UTF_8, UTF_8,
04287 #endif
04288 ASCII,
04289 0
04290 };
04291
04292 static const nkf_char mime_encode_method[] = {
04293 'B', 'B','Q', 'B', 'B', 'B', 'Q',
04294 #if defined(UTF8_INPUT_ENABLE)
04295 'B', 'Q',
04296 #endif
04297 'Q',
04298 0
04299 };
04300
04301
04302
04303
04304 #define MIME_BUF_SIZE (1024)
04305 #define MIME_BUF_MASK (MIME_BUF_SIZE-1)
04306 #define mime_input_buf(n) mime_input_state.buf[(n)&MIME_BUF_MASK]
04307 static struct {
04308 unsigned char buf[MIME_BUF_SIZE];
04309 unsigned int top;
04310 unsigned int last;
04311 unsigned int input;
04312 } mime_input_state;
04313 static nkf_char (*mime_iconv_back)(nkf_char c2,nkf_char c1,nkf_char c0) = NULL;
04314
04315 #define MAXRECOVER 20
04316
04317 static void
04318 mime_input_buf_unshift(nkf_char c)
04319 {
04320 mime_input_buf(--mime_input_state.top) = (unsigned char)c;
04321 }
04322
04323 static nkf_char
04324 mime_ungetc(nkf_char c, ARG_UNUSED FILE *f)
04325 {
04326 mime_input_buf_unshift(c);
04327 return c;
04328 }
04329
04330 static nkf_char
04331 mime_ungetc_buf(nkf_char c, FILE *f)
04332 {
04333 if (mimebuf_f)
04334 (*i_mungetc_buf)(c,f);
04335 else
04336 mime_input_buf(--mime_input_state.input) = (unsigned char)c;
04337 return c;
04338 }
04339
04340 static nkf_char
04341 mime_getc_buf(FILE *f)
04342 {
04343
04344
04345 return ((mimebuf_f)?
04346 (*i_mgetc_buf)(f):mime_input_buf(mime_input_state.input++));
04347 }
04348
04349 static void
04350 switch_mime_getc(void)
04351 {
04352 if (i_getc!=mime_getc) {
04353 i_mgetc = i_getc; i_getc = mime_getc;
04354 i_mungetc = i_ungetc; i_ungetc = mime_ungetc;
04355 if(mime_f==STRICT_MIME) {
04356 i_mgetc_buf = i_mgetc; i_mgetc = mime_getc_buf;
04357 i_mungetc_buf = i_mungetc; i_mungetc = mime_ungetc_buf;
04358 }
04359 }
04360 }
04361
04362 static void
04363 unswitch_mime_getc(void)
04364 {
04365 if(mime_f==STRICT_MIME) {
04366 i_mgetc = i_mgetc_buf;
04367 i_mungetc = i_mungetc_buf;
04368 }
04369 i_getc = i_mgetc;
04370 i_ungetc = i_mungetc;
04371 if(mime_iconv_back)set_iconv(FALSE, mime_iconv_back);
04372 mime_iconv_back = NULL;
04373 }
04374
04375 static nkf_char
04376 mime_integrity(FILE *f, const unsigned char *p)
04377 {
04378 nkf_char c,d;
04379 unsigned int q;
04380
04381
04382 mime_input_state.input = mime_input_state.top;
04383 mime_input_state.last = mime_input_state.top;
04384
04385 while(*p) mime_input_buf(mime_input_state.input++) = *p++;
04386 d = 0;
04387 q = mime_input_state.input;
04388 while((c=(*i_getc)(f))!=EOF) {
04389 if (((mime_input_state.input-mime_input_state.top)&MIME_BUF_MASK)==0) {
04390 break;
04391 }
04392 if (c=='=' && d=='?') {
04393
04394 mime_input_buf(mime_input_state.input++) = (unsigned char)c;
04395
04396 mime_input_state.input = q;
04397 switch_mime_getc();
04398 return 1;
04399 }
04400 if (!( (c=='+'||c=='/'|| c=='=' || c=='?' || is_alnum(c))))
04401 break;
04402
04403 mime_input_buf(mime_input_state.input++) = (unsigned char)c;
04404 d=c;
04405 }
04406
04407 mime_input_buf(mime_input_state.input++) = (unsigned char)c;
04408 mime_input_state.last = mime_input_state.input;
04409 mime_decode_mode = 1;
04410 switch_mime_getc();
04411 return 1;
04412 }
04413
04414 static nkf_char
04415 mime_begin_strict(FILE *f)
04416 {
04417 nkf_char c1 = 0;
04418 int i,j,k;
04419 const unsigned char *p,*q;
04420 nkf_char r[MAXRECOVER];
04421
04422 mime_decode_mode = FALSE;
04423
04424 j = 0;
04425 p = mime_pattern[j];
04426 r[0]='='; r[1]='?';
04427
04428 for(i=2;p[i]>SP;i++) {
04429 if (((r[i] = c1 = (*i_getc)(f))==EOF) || nkf_toupper(c1) != p[i]) {
04430
04431 q = p;
04432 while (mime_pattern[++j]) {
04433 p = mime_pattern[j];
04434 for(k=2;k<i;k++)
04435 if (p[k]!=q[k]) break;
04436 if (k==i && nkf_toupper(c1)==p[k]) break;
04437 }
04438 p = mime_pattern[j];
04439 if (p) continue;
04440
04441 (*i_ungetc)(c1,f);
04442 for(j=0;j<i;j++) {
04443 (*oconv)(0,r[j]);
04444 }
04445 return c1;
04446 }
04447 }
04448 mime_decode_mode = p[i-2];
04449
04450 mime_iconv_back = iconv;
04451 set_iconv(FALSE, mime_priority_func[j]);
04452 clr_code_score(find_inputcode_byfunc(mime_priority_func[j]), SCORE_iMIME);
04453
04454 if (mime_decode_mode=='B') {
04455 mimebuf_f = unbuf_f;
04456 if (!unbuf_f) {
04457
04458 return mime_integrity(f,mime_pattern[j]);
04459 }
04460 }
04461 switch_mime_getc();
04462 mimebuf_f = TRUE;
04463 return c1;
04464 }
04465
04466 static nkf_char
04467 mime_begin(FILE *f)
04468 {
04469 nkf_char c1 = 0;
04470 int i,k;
04471
04472
04473
04474
04475
04476 k = mime_input_state.last;
04477 mime_input_buf(mime_input_state.last++)='='; mime_input_buf(mime_input_state.last++)='?';
04478 for(i=2;i<MAXRECOVER;i++) {
04479
04480 c1 = (*i_getc)(f); mime_input_buf(mime_input_state.last++) = (unsigned char)c1;
04481 if (c1==LF||c1==SP||c1==CR||
04482 c1=='-'||c1=='_'||is_alnum(c1)) continue;
04483 if (c1=='=') {
04484
04485 (*i_ungetc)(c1,f);
04486 mime_input_state.last--;
04487 break;
04488 }
04489 if (c1!='?') break;
04490 else {
04491
04492 c1 = (*i_getc)(f); mime_input_buf(mime_input_state.last++) = (unsigned char)c1;
04493 if (!(++i<MAXRECOVER) || c1==EOF) break;
04494 if (c1=='b'||c1=='B') {
04495 mime_decode_mode = 'B';
04496 } else if (c1=='q'||c1=='Q') {
04497 mime_decode_mode = 'Q';
04498 } else {
04499 break;
04500 }
04501 c1 = (*i_getc)(f); mime_input_buf(mime_input_state.last++) = (unsigned char)c1;
04502 if (!(++i<MAXRECOVER) || c1==EOF) break;
04503 if (c1!='?') {
04504 mime_decode_mode = FALSE;
04505 }
04506 break;
04507 }
04508 }
04509 switch_mime_getc();
04510 if (!mime_decode_mode) {
04511
04512 mime_decode_mode = 1;
04513
04514
04515 return c1;
04516 }
04517
04518 mime_input_state.last = k;
04519
04520 return c1;
04521 }
04522
04523 #ifdef CHECK_OPTION
04524 static void
04525 no_putc(ARG_UNUSED nkf_char c)
04526 {
04527 ;
04528 }
04529
04530 static void
04531 debug(const char *str)
04532 {
04533 if (debug_f){
04534 fprintf(stderr, "%s\n", str ? str : "NULL");
04535 }
04536 }
04537 #endif
04538
04539 static void
04540 set_input_codename(const char *codename)
04541 {
04542 if (!input_codename) {
04543 input_codename = codename;
04544 } else if (strcmp(codename, input_codename) != 0) {
04545 input_codename = "";
04546 }
04547 }
04548
04549 static const char*
04550 get_guessed_code(void)
04551 {
04552 if (input_codename && !*input_codename) {
04553 input_codename = "BINARY";
04554 } else {
04555 struct input_code *p = find_inputcode_byfunc(iconv);
04556 if (!input_codename) {
04557 input_codename = "ASCII";
04558 } else if (strcmp(input_codename, "Shift_JIS") == 0) {
04559 if (p->score & (SCORE_DEPEND|SCORE_CP932))
04560 input_codename = "CP932";
04561 } else if (strcmp(input_codename, "EUC-JP") == 0) {
04562 if (p->score & SCORE_X0213)
04563 input_codename = "EUC-JIS-2004";
04564 else if (p->score & (SCORE_X0212))
04565 input_codename = "EUCJP-MS";
04566 else if (p->score & (SCORE_DEPEND|SCORE_CP932))
04567 input_codename = "CP51932";
04568 } else if (strcmp(input_codename, "ISO-2022-JP") == 0) {
04569 if (p->score & (SCORE_KANA))
04570 input_codename = "CP50221";
04571 else if (p->score & (SCORE_DEPEND|SCORE_CP932))
04572 input_codename = "CP50220";
04573 }
04574 }
04575 return input_codename;
04576 }
04577
04578 #if !defined(PERL_XS) && !defined(WIN32DLL)
04579 static void
04580 print_guessed_code(char *filename)
04581 {
04582 if (filename != NULL) printf("%s: ", filename);
04583 if (input_codename && !*input_codename) {
04584 printf("BINARY\n");
04585 } else {
04586 input_codename = get_guessed_code();
04587 if (guess_f == 1) {
04588 printf("%s\n", input_codename);
04589 } else {
04590 printf("%s%s%s%s\n",
04591 input_codename,
04592 iconv != w_iconv16 && iconv != w_iconv32 ? "" :
04593 input_endian == ENDIAN_LITTLE ? " LE" :
04594 input_endian == ENDIAN_BIG ? " BE" :
04595 "[BUG]",
04596 input_bom_f ? " (BOM)" : "",
04597 input_eol == CR ? " (CR)" :
04598 input_eol == LF ? " (LF)" :
04599 input_eol == CRLF ? " (CRLF)" :
04600 input_eol == EOF ? " (MIXED NL)" :
04601 "");
04602 }
04603 }
04604 }
04605 #endif
04606
04607 #ifdef INPUT_OPTION
04608
04609 static nkf_char
04610 hex_getc(nkf_char ch, FILE *f, nkf_char (*g)(FILE *f), nkf_char (*u)(nkf_char c, FILE *f))
04611 {
04612 nkf_char c1, c2, c3;
04613 c1 = (*g)(f);
04614 if (c1 != ch){
04615 return c1;
04616 }
04617 c2 = (*g)(f);
04618 if (!nkf_isxdigit(c2)){
04619 (*u)(c2, f);
04620 return c1;
04621 }
04622 c3 = (*g)(f);
04623 if (!nkf_isxdigit(c3)){
04624 (*u)(c2, f);
04625 (*u)(c3, f);
04626 return c1;
04627 }
04628 return (hex2bin(c2) << 4) | hex2bin(c3);
04629 }
04630
04631 static nkf_char
04632 cap_getc(FILE *f)
04633 {
04634 return hex_getc(':', f, i_cgetc, i_cungetc);
04635 }
04636
04637 static nkf_char
04638 cap_ungetc(nkf_char c, FILE *f)
04639 {
04640 return (*i_cungetc)(c, f);
04641 }
04642
04643 static nkf_char
04644 url_getc(FILE *f)
04645 {
04646 return hex_getc('%', f, i_ugetc, i_uungetc);
04647 }
04648
04649 static nkf_char
04650 url_ungetc(nkf_char c, FILE *f)
04651 {
04652 return (*i_uungetc)(c, f);
04653 }
04654 #endif
04655
04656 #ifdef NUMCHAR_OPTION
04657 static nkf_char
04658 numchar_getc(FILE *f)
04659 {
04660 nkf_char (*g)(FILE *) = i_ngetc;
04661 nkf_char (*u)(nkf_char c ,FILE *f) = i_nungetc;
04662 int i = 0, j;
04663 nkf_char buf[12];
04664 nkf_char c = -1;
04665
04666 buf[i] = (*g)(f);
04667 if (buf[i] == '&'){
04668 buf[++i] = (*g)(f);
04669 if (buf[i] == '#'){
04670 c = 0;
04671 buf[++i] = (*g)(f);
04672 if (buf[i] == 'x' || buf[i] == 'X'){
04673 for (j = 0; j < 7; j++){
04674 buf[++i] = (*g)(f);
04675 if (!nkf_isxdigit(buf[i])){
04676 if (buf[i] != ';'){
04677 c = -1;
04678 }
04679 break;
04680 }
04681 c <<= 4;
04682 c |= hex2bin(buf[i]);
04683 }
04684 }else{
04685 for (j = 0; j < 8; j++){
04686 if (j){
04687 buf[++i] = (*g)(f);
04688 }
04689 if (!nkf_isdigit(buf[i])){
04690 if (buf[i] != ';'){
04691 c = -1;
04692 }
04693 break;
04694 }
04695 c *= 10;
04696 c += hex2bin(buf[i]);
04697 }
04698 }
04699 }
04700 }
04701 if (c != -1){
04702 return nkf_char_unicode_new(c);
04703 }
04704 while (i > 0){
04705 (*u)(buf[i], f);
04706 --i;
04707 }
04708 return buf[0];
04709 }
04710
04711 static nkf_char
04712 numchar_ungetc(nkf_char c, FILE *f)
04713 {
04714 return (*i_nungetc)(c, f);
04715 }
04716 #endif
04717
04718 #ifdef UNICODE_NORMALIZATION
04719
04720 static nkf_char
04721 nfc_getc(FILE *f)
04722 {
04723 nkf_char (*g)(FILE *f) = i_nfc_getc;
04724 nkf_char (*u)(nkf_char c ,FILE *f) = i_nfc_ungetc;
04725 nkf_buf_t *buf = nkf_state->nfc_buf;
04726 const unsigned char *array;
04727 int lower=0, upper=NORMALIZATION_TABLE_LENGTH-1;
04728 nkf_char c = (*g)(f);
04729
04730 if (c == EOF || c > 0xFF || (c & 0xc0) == 0x80) return c;
04731
04732 nkf_buf_push(buf, c);
04733 do {
04734 while (lower <= upper) {
04735 int mid = (lower+upper) / 2;
04736 int len;
04737 array = normalization_table[mid].nfd;
04738 for (len=0; len < NORMALIZATION_TABLE_NFD_LENGTH && array[len]; len++) {
04739 if (len >= nkf_buf_length(buf)) {
04740 c = (*g)(f);
04741 if (c == EOF) {
04742 len = 0;
04743 lower = 1, upper = 0;
04744 break;
04745 }
04746 nkf_buf_push(buf, c);
04747 }
04748 if (array[len] != nkf_buf_at(buf, len)) {
04749 if (array[len] < nkf_buf_at(buf, len)) lower = mid + 1;
04750 else upper = mid - 1;
04751 len = 0;
04752 break;
04753 }
04754 }
04755 if (len > 0) {
04756 int i;
04757 array = normalization_table[mid].nfc;
04758 nkf_buf_clear(buf);
04759 for (i=0; i < NORMALIZATION_TABLE_NFC_LENGTH && array[i]; i++)
04760 nkf_buf_push(buf, array[i]);
04761 break;
04762 }
04763 }
04764 } while (lower <= upper);
04765
04766 while (nkf_buf_length(buf) > 1) (*u)(nkf_buf_pop(buf), f);
04767 c = nkf_buf_pop(buf);
04768
04769 return c;
04770 }
04771
04772 static nkf_char
04773 nfc_ungetc(nkf_char c, FILE *f)
04774 {
04775 return (*i_nfc_ungetc)(c, f);
04776 }
04777 #endif
04778
04779
04780 static nkf_char
04781 base64decode(nkf_char c)
04782 {
04783 int i;
04784 if (c > '@') {
04785 if (c < '[') {
04786 i = c - 'A';
04787 } else if (c == '_') {
04788 i = '?' ;
04789 } else {
04790 i = c - 'G' ;
04791 }
04792 } else if (c > '/') {
04793 i = c - '0' + '4' ;
04794 } else if (c == '+' || c == '-') {
04795 i = '>' ;
04796 } else {
04797 i = '?' ;
04798 }
04799 return (i);
04800 }
04801
04802 static nkf_char
04803 mime_getc(FILE *f)
04804 {
04805 nkf_char c1, c2, c3, c4, cc;
04806 nkf_char t1, t2, t3, t4, mode, exit_mode;
04807 nkf_char lwsp_count;
04808 char *lwsp_buf;
04809 char *lwsp_buf_new;
04810 nkf_char lwsp_size = 128;
04811
04812 if (mime_input_state.top != mime_input_state.last) {
04813 return mime_input_buf(mime_input_state.top++);
04814 }
04815 if (mime_decode_mode==1 ||mime_decode_mode==FALSE) {
04816 mime_decode_mode=FALSE;
04817 unswitch_mime_getc();
04818 return (*i_getc)(f);
04819 }
04820
04821 if (mimebuf_f == FIXED_MIME)
04822 exit_mode = mime_decode_mode;
04823 else
04824 exit_mode = FALSE;
04825 if (mime_decode_mode == 'Q') {
04826 if ((c1 = (*i_mgetc)(f)) == EOF) return (EOF);
04827 restart_mime_q:
04828 if (c1=='_' && mimebuf_f != FIXED_MIME) return SP;
04829 if (c1<=SP || DEL<=c1) {
04830 mime_decode_mode = exit_mode;
04831 return c1;
04832 }
04833 if (c1!='=' && (c1!='?' || mimebuf_f == FIXED_MIME)) {
04834 return c1;
04835 }
04836
04837 mime_decode_mode = exit_mode;
04838 if ((c2 = (*i_mgetc)(f)) == EOF) return (EOF);
04839 if (c1=='?'&&c2=='=' && mimebuf_f != FIXED_MIME) {
04840
04841 input_mode = exit_mode;
04842 lwsp_count = 0;
04843 lwsp_buf = nkf_xmalloc((lwsp_size+5)*sizeof(char));
04844 while ((c1=(*i_getc)(f))!=EOF) {
04845 switch (c1) {
04846 case LF:
04847 case CR:
04848 if (c1==LF) {
04849 if ((c1=(*i_getc)(f))!=EOF && nkf_isblank(c1)) {
04850 i_ungetc(SP,f);
04851 continue;
04852 } else {
04853 i_ungetc(c1,f);
04854 }
04855 c1 = LF;
04856 } else {
04857 if ((c1=(*i_getc)(f))!=EOF && c1 == LF) {
04858 if ((c1=(*i_getc)(f))!=EOF && nkf_isblank(c1)) {
04859 i_ungetc(SP,f);
04860 continue;
04861 } else {
04862 i_ungetc(c1,f);
04863 }
04864 i_ungetc(LF,f);
04865 } else {
04866 i_ungetc(c1,f);
04867 }
04868 c1 = CR;
04869 }
04870 break;
04871 case SP:
04872 case TAB:
04873 lwsp_buf[lwsp_count] = (unsigned char)c1;
04874 if (lwsp_count++>lwsp_size){
04875 lwsp_size <<= 1;
04876 lwsp_buf_new = nkf_xrealloc(lwsp_buf, (lwsp_size+5)*sizeof(char));
04877 lwsp_buf = lwsp_buf_new;
04878 }
04879 continue;
04880 }
04881 break;
04882 }
04883 if (lwsp_count > 0 && (c1 != '=' || (lwsp_buf[lwsp_count-1] != SP && lwsp_buf[lwsp_count-1] != TAB))) {
04884 i_ungetc(c1,f);
04885 for(lwsp_count--;lwsp_count>0;lwsp_count--)
04886 i_ungetc(lwsp_buf[lwsp_count],f);
04887 c1 = lwsp_buf[0];
04888 }
04889 nkf_xfree(lwsp_buf);
04890 return c1;
04891 }
04892 if (c1=='='&&c2<SP) {
04893 while((c1 = (*i_mgetc)(f)) <=SP) {
04894 if (c1 == EOF) return (EOF);
04895 }
04896 mime_decode_mode = 'Q';
04897 goto restart_mime_q;
04898 }
04899 if (c1=='?') {
04900 mime_decode_mode = 'Q';
04901 (*i_mungetc)(c2,f);
04902 return c1;
04903 }
04904 if ((c3 = (*i_mgetc)(f)) == EOF) return (EOF);
04905 if (c2<=SP) return c2;
04906 mime_decode_mode = 'Q';
04907 return ((hex2bin(c2)<<4) + hex2bin(c3));
04908 }
04909
04910 if (mime_decode_mode != 'B') {
04911 mime_decode_mode = FALSE;
04912 return (*i_mgetc)(f);
04913 }
04914
04915
04916
04917
04918
04919
04920
04921
04922
04923
04924 mode = mime_decode_mode;
04925 mime_decode_mode = exit_mode;
04926
04927 while ((c1 = (*i_mgetc)(f))<=SP) {
04928 if (c1==EOF)
04929 return (EOF);
04930 }
04931 mime_c2_retry:
04932 if ((c2 = (*i_mgetc)(f))<=SP) {
04933 if (c2==EOF)
04934 return (EOF);
04935 if (mime_f != STRICT_MIME) goto mime_c2_retry;
04936 if (mimebuf_f!=FIXED_MIME) input_mode = ASCII;
04937 return c2;
04938 }
04939 if ((c1 == '?') && (c2 == '=')) {
04940 input_mode = ASCII;
04941 lwsp_count = 0;
04942 lwsp_buf = nkf_xmalloc((lwsp_size+5)*sizeof(char));
04943 while ((c1=(*i_getc)(f))!=EOF) {
04944 switch (c1) {
04945 case LF:
04946 case CR:
04947 if (c1==LF) {
04948 if ((c1=(*i_getc)(f))!=EOF && nkf_isblank(c1)) {
04949 i_ungetc(SP,f);
04950 continue;
04951 } else {
04952 i_ungetc(c1,f);
04953 }
04954 c1 = LF;
04955 } else {
04956 if ((c1=(*i_getc)(f))!=EOF) {
04957 if (c1==SP) {
04958 i_ungetc(SP,f);
04959 continue;
04960 } else if ((c1=(*i_getc)(f))!=EOF && nkf_isblank(c1)) {
04961 i_ungetc(SP,f);
04962 continue;
04963 } else {
04964 i_ungetc(c1,f);
04965 }
04966 i_ungetc(LF,f);
04967 } else {
04968 i_ungetc(c1,f);
04969 }
04970 c1 = CR;
04971 }
04972 break;
04973 case SP:
04974 case TAB:
04975 lwsp_buf[lwsp_count] = (unsigned char)c1;
04976 if (lwsp_count++>lwsp_size){
04977 lwsp_size <<= 1;
04978 lwsp_buf_new = nkf_xrealloc(lwsp_buf, (lwsp_size+5)*sizeof(char));
04979 lwsp_buf = lwsp_buf_new;
04980 }
04981 continue;
04982 }
04983 break;
04984 }
04985 if (lwsp_count > 0 && (c1 != '=' || (lwsp_buf[lwsp_count-1] != SP && lwsp_buf[lwsp_count-1] != TAB))) {
04986 i_ungetc(c1,f);
04987 for(lwsp_count--;lwsp_count>0;lwsp_count--)
04988 i_ungetc(lwsp_buf[lwsp_count],f);
04989 c1 = lwsp_buf[0];
04990 }
04991 nkf_xfree(lwsp_buf);
04992 return c1;
04993 }
04994 mime_c3_retry:
04995 if ((c3 = (*i_mgetc)(f))<=SP) {
04996 if (c3==EOF)
04997 return (EOF);
04998 if (mime_f != STRICT_MIME) goto mime_c3_retry;
04999 if (mimebuf_f!=FIXED_MIME) input_mode = ASCII;
05000 return c3;
05001 }
05002 mime_c4_retry:
05003 if ((c4 = (*i_mgetc)(f))<=SP) {
05004 if (c4==EOF)
05005 return (EOF);
05006 if (mime_f != STRICT_MIME) goto mime_c4_retry;
05007 if (mimebuf_f!=FIXED_MIME) input_mode = ASCII;
05008 return c4;
05009 }
05010
05011 mime_decode_mode = mode;
05012
05013
05014
05015 t1 = 0x3f & base64decode(c1);
05016 t2 = 0x3f & base64decode(c2);
05017 t3 = 0x3f & base64decode(c3);
05018 t4 = 0x3f & base64decode(c4);
05019 cc = ((t1 << 2) & 0x0fc) | ((t2 >> 4) & 0x03);
05020 if (c2 != '=') {
05021 mime_input_buf(mime_input_state.last++) = (unsigned char)cc;
05022 cc = ((t2 << 4) & 0x0f0) | ((t3 >> 2) & 0x0f);
05023 if (c3 != '=') {
05024 mime_input_buf(mime_input_state.last++) = (unsigned char)cc;
05025 cc = ((t3 << 6) & 0x0c0) | (t4 & 0x3f);
05026 if (c4 != '=')
05027 mime_input_buf(mime_input_state.last++) = (unsigned char)cc;
05028 }
05029 } else {
05030 return c1;
05031 }
05032 return mime_input_buf(mime_input_state.top++);
05033 }
05034
05035 static const char basis_64[] =
05036 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
05037
05038 #define MIMEOUT_BUF_LENGTH 74
05039 static struct {
05040 unsigned char buf[MIMEOUT_BUF_LENGTH+1];
05041 int count;
05042 } mimeout_state;
05043
05044
05045
05046 static void
05047 open_mime(nkf_char mode)
05048 {
05049 const unsigned char *p;
05050 int i;
05051 int j;
05052 p = mime_pattern[0];
05053 for(i=0;mime_pattern[i];i++) {
05054 if (mode == mime_encode[i]) {
05055 p = mime_pattern[i];
05056 break;
05057 }
05058 }
05059 mimeout_mode = mime_encode_method[i];
05060 i = 0;
05061 if (base64_count>45) {
05062 if (mimeout_state.count>0 && nkf_isblank(mimeout_state.buf[i])){
05063 (*o_mputc)(mimeout_state.buf[i]);
05064 i++;
05065 }
05066 put_newline(o_mputc);
05067 (*o_mputc)(SP);
05068 base64_count = 1;
05069 if (mimeout_state.count>0 && nkf_isspace(mimeout_state.buf[i])) {
05070 i++;
05071 }
05072 }
05073 for (;i<mimeout_state.count;i++) {
05074 if (nkf_isspace(mimeout_state.buf[i])) {
05075 (*o_mputc)(mimeout_state.buf[i]);
05076 base64_count ++;
05077 } else {
05078 break;
05079 }
05080 }
05081 while(*p) {
05082 (*o_mputc)(*p++);
05083 base64_count ++;
05084 }
05085 j = mimeout_state.count;
05086 mimeout_state.count = 0;
05087 for (;i<j;i++) {
05088 mime_putc(mimeout_state.buf[i]);
05089 }
05090 }
05091
05092 static void
05093 mime_prechar(nkf_char c2, nkf_char c1)
05094 {
05095 if (mimeout_mode > 0){
05096 if (c2 == EOF){
05097 if (base64_count + mimeout_state.count/3*4> 73){
05098 (*o_base64conv)(EOF,0);
05099 oconv_newline(o_base64conv);
05100 (*o_base64conv)(0,SP);
05101 base64_count = 1;
05102 }
05103 } else {
05104 if ((c2 != 0 || c1 > DEL) && base64_count + mimeout_state.count/3*4> 66) {
05105 (*o_base64conv)(EOF,0);
05106 oconv_newline(o_base64conv);
05107 (*o_base64conv)(0,SP);
05108 base64_count = 1;
05109 mimeout_mode = -1;
05110 }
05111 }
05112 } else if (c2) {
05113 if (c2 != EOF && base64_count + mimeout_state.count/3*4> 60) {
05114 mimeout_mode = (output_mode==ASCII ||output_mode == ISO_8859_1) ? 'Q' : 'B';
05115 open_mime(output_mode);
05116 (*o_base64conv)(EOF,0);
05117 oconv_newline(o_base64conv);
05118 (*o_base64conv)(0,SP);
05119 base64_count = 1;
05120 mimeout_mode = -1;
05121 }
05122 }
05123 }
05124
05125 static void
05126 close_mime(void)
05127 {
05128 (*o_mputc)('?');
05129 (*o_mputc)('=');
05130 base64_count += 2;
05131 mimeout_mode = 0;
05132 }
05133
05134 static void
05135 eof_mime(void)
05136 {
05137 switch(mimeout_mode) {
05138 case 'Q':
05139 case 'B':
05140 break;
05141 case 2:
05142 (*o_mputc)(basis_64[((nkf_state->mimeout_state & 0x3)<< 4)]);
05143 (*o_mputc)('=');
05144 (*o_mputc)('=');
05145 base64_count += 3;
05146 break;
05147 case 1:
05148 (*o_mputc)(basis_64[((nkf_state->mimeout_state & 0xF) << 2)]);
05149 (*o_mputc)('=');
05150 base64_count += 2;
05151 break;
05152 }
05153 if (mimeout_mode > 0) {
05154 if (mimeout_f!=FIXED_MIME) {
05155 close_mime();
05156 } else if (mimeout_mode != 'Q')
05157 mimeout_mode = 'B';
05158 }
05159 }
05160
05161 static void
05162 mimeout_addchar(nkf_char c)
05163 {
05164 switch(mimeout_mode) {
05165 case 'Q':
05166 if (c==CR||c==LF) {
05167 (*o_mputc)(c);
05168 base64_count = 0;
05169 } else if(!nkf_isalnum(c)) {
05170 (*o_mputc)('=');
05171 (*o_mputc)(bin2hex(((c>>4)&0xf)));
05172 (*o_mputc)(bin2hex((c&0xf)));
05173 base64_count += 3;
05174 } else {
05175 (*o_mputc)(c);
05176 base64_count++;
05177 }
05178 break;
05179 case 'B':
05180 nkf_state->mimeout_state=c;
05181 (*o_mputc)(basis_64[c>>2]);
05182 mimeout_mode=2;
05183 base64_count ++;
05184 break;
05185 case 2:
05186 (*o_mputc)(basis_64[((nkf_state->mimeout_state & 0x3)<< 4) | ((c & 0xF0) >> 4)]);
05187 nkf_state->mimeout_state=c;
05188 mimeout_mode=1;
05189 base64_count ++;
05190 break;
05191 case 1:
05192 (*o_mputc)(basis_64[((nkf_state->mimeout_state & 0xF) << 2) | ((c & 0xC0) >>6)]);
05193 (*o_mputc)(basis_64[c & 0x3F]);
05194 mimeout_mode='B';
05195 base64_count += 2;
05196 break;
05197 default:
05198 (*o_mputc)(c);
05199 base64_count++;
05200 break;
05201 }
05202 }
05203
05204 static void
05205 mime_putc(nkf_char c)
05206 {
05207 int i, j;
05208 nkf_char lastchar;
05209
05210 if (mimeout_f == FIXED_MIME){
05211 if (mimeout_mode == 'Q'){
05212 if (base64_count > 71){
05213 if (c!=CR && c!=LF) {
05214 (*o_mputc)('=');
05215 put_newline(o_mputc);
05216 }
05217 base64_count = 0;
05218 }
05219 }else{
05220 if (base64_count > 71){
05221 eof_mime();
05222 put_newline(o_mputc);
05223 base64_count = 0;
05224 }
05225 if (c == EOF) {
05226 eof_mime();
05227 }
05228 }
05229 if (c != EOF) {
05230 mimeout_addchar(c);
05231 }
05232 return;
05233 }
05234
05235
05236
05237 if (c == EOF) {
05238 if (mimeout_mode == -1 && mimeout_state.count > 1) open_mime(output_mode);
05239 j = mimeout_state.count;
05240 mimeout_state.count = 0;
05241 i = 0;
05242 if (mimeout_mode > 0) {
05243 if (!nkf_isblank(mimeout_state.buf[j-1])) {
05244 for (;i<j;i++) {
05245 if (nkf_isspace(mimeout_state.buf[i]) && base64_count < 71){
05246 break;
05247 }
05248 mimeout_addchar(mimeout_state.buf[i]);
05249 }
05250 eof_mime();
05251 for (;i<j;i++) {
05252 mimeout_addchar(mimeout_state.buf[i]);
05253 }
05254 } else {
05255 for (;i<j;i++) {
05256 mimeout_addchar(mimeout_state.buf[i]);
05257 }
05258 eof_mime();
05259 }
05260 } else {
05261 for (;i<j;i++) {
05262 mimeout_addchar(mimeout_state.buf[i]);
05263 }
05264 }
05265 return;
05266 }
05267
05268 if (mimeout_state.count > 0){
05269 lastchar = mimeout_state.buf[mimeout_state.count - 1];
05270 }else{
05271 lastchar = -1;
05272 }
05273
05274 if (mimeout_mode=='Q') {
05275 if (c <= DEL && (output_mode==ASCII ||output_mode == ISO_8859_1)) {
05276 if (c == CR || c == LF) {
05277 close_mime();
05278 (*o_mputc)(c);
05279 base64_count = 0;
05280 return;
05281 } else if (c <= SP) {
05282 close_mime();
05283 if (base64_count > 70) {
05284 put_newline(o_mputc);
05285 base64_count = 0;
05286 }
05287 if (!nkf_isblank(c)) {
05288 (*o_mputc)(SP);
05289 base64_count++;
05290 }
05291 } else {
05292 if (base64_count > 70) {
05293 close_mime();
05294 put_newline(o_mputc);
05295 (*o_mputc)(SP);
05296 base64_count = 1;
05297 open_mime(output_mode);
05298 }
05299 if (!nkf_noescape_mime(c)) {
05300 mimeout_addchar(c);
05301 return;
05302 }
05303 }
05304 if (c != 0x1B) {
05305 (*o_mputc)(c);
05306 base64_count++;
05307 return;
05308 }
05309 }
05310 }
05311
05312 if (mimeout_mode <= 0) {
05313 if (c <= DEL && (output_mode==ASCII || output_mode == ISO_8859_1 ||
05314 output_mode == UTF_8)) {
05315 if (nkf_isspace(c)) {
05316 int flag = 0;
05317 if (mimeout_mode == -1) {
05318 flag = 1;
05319 }
05320 if (c==CR || c==LF) {
05321 if (flag) {
05322 open_mime(output_mode);
05323 output_mode = 0;
05324 } else {
05325 base64_count = 0;
05326 }
05327 }
05328 for (i=0;i<mimeout_state.count;i++) {
05329 (*o_mputc)(mimeout_state.buf[i]);
05330 if (mimeout_state.buf[i] == CR || mimeout_state.buf[i] == LF){
05331 base64_count = 0;
05332 }else{
05333 base64_count++;
05334 }
05335 }
05336 if (flag) {
05337 eof_mime();
05338 base64_count = 0;
05339 mimeout_mode = 0;
05340 }
05341 mimeout_state.buf[0] = (char)c;
05342 mimeout_state.count = 1;
05343 }else{
05344 if (base64_count > 1
05345 && base64_count + mimeout_state.count > 76
05346 && mimeout_state.buf[0] != CR && mimeout_state.buf[0] != LF){
05347 static const char *str = "boundary=\"";
05348 static int len = 10;
05349 i = 0;
05350
05351 for (; i < mimeout_state.count - len; ++i) {
05352 if (!strncmp((char *)(mimeout_state.buf+i), str, len)) {
05353 i += len - 2;
05354 break;
05355 }
05356 }
05357
05358 if (i == 0 || i == mimeout_state.count - len) {
05359 put_newline(o_mputc);
05360 base64_count = 0;
05361 if (!nkf_isspace(mimeout_state.buf[0])){
05362 (*o_mputc)(SP);
05363 base64_count++;
05364 }
05365 }
05366 else {
05367 int j;
05368 for (j = 0; j <= i; ++j) {
05369 (*o_mputc)(mimeout_state.buf[j]);
05370 }
05371 put_newline(o_mputc);
05372 base64_count = 1;
05373 for (; j <= mimeout_state.count; ++j) {
05374 mimeout_state.buf[j - i] = mimeout_state.buf[j];
05375 }
05376 mimeout_state.count -= i;
05377 }
05378 }
05379 mimeout_state.buf[mimeout_state.count++] = (char)c;
05380 if (mimeout_state.count>MIMEOUT_BUF_LENGTH) {
05381 open_mime(output_mode);
05382 }
05383 }
05384 return;
05385 }else{
05386 if (lastchar==CR || lastchar == LF){
05387 for (i=0;i<mimeout_state.count;i++) {
05388 (*o_mputc)(mimeout_state.buf[i]);
05389 }
05390 base64_count = 0;
05391 mimeout_state.count = 0;
05392 }
05393 if (lastchar==SP) {
05394 for (i=0;i<mimeout_state.count-1;i++) {
05395 (*o_mputc)(mimeout_state.buf[i]);
05396 base64_count++;
05397 }
05398 mimeout_state.buf[0] = SP;
05399 mimeout_state.count = 1;
05400 }
05401 open_mime(output_mode);
05402 }
05403 }else{
05404
05405 if (c <= DEL && (output_mode==ASCII || output_mode == ISO_8859_1 ||
05406 output_mode == UTF_8)) {
05407 if (lastchar == CR || lastchar == LF){
05408 if (nkf_isblank(c)) {
05409 for (i=0;i<mimeout_state.count;i++) {
05410 mimeout_addchar(mimeout_state.buf[i]);
05411 }
05412 mimeout_state.count = 0;
05413 } else {
05414 eof_mime();
05415 for (i=0;i<mimeout_state.count;i++) {
05416 (*o_mputc)(mimeout_state.buf[i]);
05417 }
05418 base64_count = 0;
05419 mimeout_state.count = 0;
05420 }
05421 mimeout_state.buf[mimeout_state.count++] = (char)c;
05422 return;
05423 }
05424 if (nkf_isspace(c)) {
05425 for (i=0;i<mimeout_state.count;i++) {
05426 if (SP<mimeout_state.buf[i] && mimeout_state.buf[i]<DEL) {
05427 eof_mime();
05428 for (i=0;i<mimeout_state.count;i++) {
05429 (*o_mputc)(mimeout_state.buf[i]);
05430 base64_count++;
05431 }
05432 mimeout_state.count = 0;
05433 }
05434 }
05435 mimeout_state.buf[mimeout_state.count++] = (char)c;
05436 if (mimeout_state.count>MIMEOUT_BUF_LENGTH) {
05437 eof_mime();
05438 for (i=0;i<mimeout_state.count;i++) {
05439 (*o_mputc)(mimeout_state.buf[i]);
05440 base64_count++;
05441 }
05442 mimeout_state.count = 0;
05443 }
05444 return;
05445 }
05446 if (mimeout_state.count>0 && SP<c && c!='=') {
05447 mimeout_state.buf[mimeout_state.count++] = (char)c;
05448 if (mimeout_state.count>MIMEOUT_BUF_LENGTH) {
05449 j = mimeout_state.count;
05450 mimeout_state.count = 0;
05451 for (i=0;i<j;i++) {
05452 mimeout_addchar(mimeout_state.buf[i]);
05453 }
05454 }
05455 return;
05456 }
05457 }
05458 }
05459 if (mimeout_state.count>0) {
05460 j = mimeout_state.count;
05461 mimeout_state.count = 0;
05462 for (i=0;i<j;i++) {
05463 if (mimeout_state.buf[i]==CR || mimeout_state.buf[i]==LF)
05464 break;
05465 mimeout_addchar(mimeout_state.buf[i]);
05466 }
05467 if (i<j) {
05468 eof_mime();
05469 base64_count=0;
05470 for (;i<j;i++) {
05471 (*o_mputc)(mimeout_state.buf[i]);
05472 }
05473 open_mime(output_mode);
05474 }
05475 }
05476 mimeout_addchar(c);
05477 }
05478
05479 static void
05480 base64_conv(nkf_char c2, nkf_char c1)
05481 {
05482 mime_prechar(c2, c1);
05483 (*o_base64conv)(c2,c1);
05484 }
05485
05486 #ifdef HAVE_ICONV_H
05487 typedef struct nkf_iconv_t {
05488 iconv_t cd;
05489 char *input_buffer;
05490 size_t input_buffer_size;
05491 char *output_buffer;
05492 size_t output_buffer_size;
05493 }
05494
05495 static nkf_iconv_t
05496 nkf_iconv_new(char *tocode, char *fromcode)
05497 {
05498 nkf_iconv_t converter;
05499
05500 converter->input_buffer_size = IOBUF_SIZE;
05501 converter->input_buffer = nkf_xmalloc(converter->input_buffer_size);
05502 converter->output_buffer_size = IOBUF_SIZE * 2;
05503 converter->output_buffer = nkf_xmalloc(converter->output_buffer_size);
05504 converter->cd = iconv_open(tocode, fromcode);
05505 if (converter->cd == (iconv_t)-1)
05506 {
05507 switch (errno) {
05508 case EINVAL:
05509 perror(fprintf("iconv doesn't support %s to %s conversion.", fromcode, tocode));
05510 return -1;
05511 default:
05512 perror("can't iconv_open");
05513 }
05514 }
05515 }
05516
05517 static size_t
05518 nkf_iconv_convert(nkf_iconv_t *converter, FILE *input)
05519 {
05520 size_t invalid = (size_t)0;
05521 char *input_buffer = converter->input_buffer;
05522 size_t input_length = (size_t)0;
05523 char *output_buffer = converter->output_buffer;
05524 size_t output_length = converter->output_buffer_size;
05525 int c;
05526
05527 do {
05528 if (c != EOF) {
05529 while ((c = (*i_getc)(f)) != EOF) {
05530 input_buffer[input_length++] = c;
05531 if (input_length < converter->input_buffer_size) break;
05532 }
05533 }
05534
05535 size_t ret = iconv(converter->cd, &input_buffer, &input_length, &output_buffer, &output_length);
05536 while (output_length-- > 0) {
05537 (*o_putc)(output_buffer[converter->output_buffer_size-output_length]);
05538 }
05539 if (ret == (size_t) - 1) {
05540 switch (errno) {
05541 case EINVAL:
05542 if (input_buffer != converter->input_buffer)
05543 memmove(converter->input_buffer, input_buffer, input_length);
05544 break;
05545 case E2BIG:
05546 converter->output_buffer_size *= 2;
05547 output_buffer = realloc(converter->outbuf, converter->output_buffer_size);
05548 if (output_buffer == NULL) {
05549 perror("can't realloc");
05550 return -1;
05551 }
05552 converter->output_buffer = output_buffer;
05553 break;
05554 default:
05555 perror("can't iconv");
05556 return -1;
05557 }
05558 } else {
05559 invalid += ret;
05560 }
05561 } while (1);
05562
05563 return invalid;
05564 }
05565
05566
05567 static void
05568 nkf_iconv_close(nkf_iconv_t *convert)
05569 {
05570 nkf_xfree(converter->inbuf);
05571 nkf_xfree(converter->outbuf);
05572 iconv_close(converter->cd);
05573 }
05574 #endif
05575
05576
05577 static void
05578 reinit(void)
05579 {
05580 {
05581 struct input_code *p = input_code_list;
05582 while (p->name){
05583 status_reinit(p++);
05584 }
05585 }
05586 unbuf_f = FALSE;
05587 estab_f = FALSE;
05588 nop_f = FALSE;
05589 binmode_f = TRUE;
05590 rot_f = FALSE;
05591 hira_f = FALSE;
05592 alpha_f = FALSE;
05593 mime_f = MIME_DECODE_DEFAULT;
05594 mime_decode_f = FALSE;
05595 mimebuf_f = FALSE;
05596 broken_f = FALSE;
05597 iso8859_f = FALSE;
05598 mimeout_f = FALSE;
05599 x0201_f = NKF_UNSPECIFIED;
05600 iso2022jp_f = FALSE;
05601 #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
05602 ms_ucs_map_f = UCS_MAP_ASCII;
05603 #endif
05604 #ifdef UTF8_INPUT_ENABLE
05605 no_cp932ext_f = FALSE;
05606 no_best_fit_chars_f = FALSE;
05607 encode_fallback = NULL;
05608 unicode_subchar = '?';
05609 input_endian = ENDIAN_BIG;
05610 #endif
05611 #ifdef UTF8_OUTPUT_ENABLE
05612 output_bom_f = FALSE;
05613 output_endian = ENDIAN_BIG;
05614 #endif
05615 #ifdef UNICODE_NORMALIZATION
05616 nfc_f = FALSE;
05617 #endif
05618 #ifdef INPUT_OPTION
05619 cap_f = FALSE;
05620 url_f = FALSE;
05621 numchar_f = FALSE;
05622 #endif
05623 #ifdef CHECK_OPTION
05624 noout_f = FALSE;
05625 debug_f = FALSE;
05626 #endif
05627 guess_f = 0;
05628 #ifdef EXEC_IO
05629 exec_f = 0;
05630 #endif
05631 #ifdef SHIFTJIS_CP932
05632 cp51932_f = TRUE;
05633 cp932inv_f = TRUE;
05634 #endif
05635 #ifdef X0212_ENABLE
05636 x0212_f = FALSE;
05637 x0213_f = FALSE;
05638 #endif
05639 {
05640 int i;
05641 for (i = 0; i < 256; i++){
05642 prefix_table[i] = 0;
05643 }
05644 }
05645 hold_count = 0;
05646 mimeout_state.count = 0;
05647 mimeout_mode = 0;
05648 base64_count = 0;
05649 f_line = 0;
05650 f_prev = 0;
05651 fold_preserve_f = FALSE;
05652 fold_f = FALSE;
05653 fold_len = 0;
05654 kanji_intro = DEFAULT_J;
05655 ascii_intro = DEFAULT_R;
05656 fold_margin = FOLD_MARGIN;
05657 o_zconv = no_connection;
05658 o_fconv = no_connection;
05659 o_eol_conv = no_connection;
05660 o_rot_conv = no_connection;
05661 o_hira_conv = no_connection;
05662 o_base64conv = no_connection;
05663 o_iso2022jp_check_conv = no_connection;
05664 o_putc = std_putc;
05665 i_getc = std_getc;
05666 i_ungetc = std_ungetc;
05667 i_bgetc = std_getc;
05668 i_bungetc = std_ungetc;
05669 o_mputc = std_putc;
05670 i_mgetc = std_getc;
05671 i_mungetc = std_ungetc;
05672 i_mgetc_buf = std_getc;
05673 i_mungetc_buf = std_ungetc;
05674 output_mode = ASCII;
05675 input_mode = ASCII;
05676 mime_decode_mode = FALSE;
05677 file_out_f = FALSE;
05678 eolmode_f = 0;
05679 input_eol = 0;
05680 prev_cr = 0;
05681 option_mode = 0;
05682 z_prev2=0,z_prev1=0;
05683 #ifdef CHECK_OPTION
05684 iconv_for_check = 0;
05685 #endif
05686 input_codename = NULL;
05687 input_encoding = NULL;
05688 output_encoding = NULL;
05689 nkf_state_init();
05690 #ifdef WIN32DLL
05691 reinitdll();
05692 #endif
05693 }
05694
05695 static int
05696 module_connection(void)
05697 {
05698 if (input_encoding) set_input_encoding(input_encoding);
05699 if (!output_encoding) {
05700 output_encoding = nkf_default_encoding();
05701 }
05702 if (!output_encoding) {
05703 if (noout_f || guess_f) output_encoding = nkf_enc_from_index(ISO_2022_JP);
05704 else return -1;
05705 }
05706 set_output_encoding(output_encoding);
05707 oconv = nkf_enc_to_oconv(output_encoding);
05708 o_putc = std_putc;
05709 if (nkf_enc_unicode_p(output_encoding))
05710 output_mode = UTF_8;
05711
05712 if (x0201_f == NKF_UNSPECIFIED) {
05713 x0201_f = X0201_DEFAULT;
05714 }
05715
05716
05717
05718
05719 #ifdef CHECK_OPTION
05720 if (noout_f || guess_f){
05721 o_putc = no_putc;
05722 }
05723 #endif
05724 if (mimeout_f) {
05725 o_mputc = o_putc;
05726 o_putc = mime_putc;
05727 if (mimeout_f == TRUE) {
05728 o_base64conv = oconv; oconv = base64_conv;
05729 }
05730
05731 }
05732
05733 if (eolmode_f || guess_f) {
05734 o_eol_conv = oconv; oconv = eol_conv;
05735 }
05736 if (rot_f) {
05737 o_rot_conv = oconv; oconv = rot_conv;
05738 }
05739 if (iso2022jp_f) {
05740 o_iso2022jp_check_conv = oconv; oconv = iso2022jp_check_conv;
05741 }
05742 if (hira_f) {
05743 o_hira_conv = oconv; oconv = hira_conv;
05744 }
05745 if (fold_f) {
05746 o_fconv = oconv; oconv = fold_conv;
05747 f_line = 0;
05748 }
05749 if (alpha_f || x0201_f) {
05750 o_zconv = oconv; oconv = z_conv;
05751 }
05752
05753 i_getc = std_getc;
05754 i_ungetc = std_ungetc;
05755
05756 #ifdef INPUT_OPTION
05757 if (cap_f){
05758 i_cgetc = i_getc; i_getc = cap_getc;
05759 i_cungetc = i_ungetc; i_ungetc= cap_ungetc;
05760 }
05761 if (url_f){
05762 i_ugetc = i_getc; i_getc = url_getc;
05763 i_uungetc = i_ungetc; i_ungetc= url_ungetc;
05764 }
05765 #endif
05766 #ifdef NUMCHAR_OPTION
05767 if (numchar_f){
05768 i_ngetc = i_getc; i_getc = numchar_getc;
05769 i_nungetc = i_ungetc; i_ungetc= numchar_ungetc;
05770 }
05771 #endif
05772 #ifdef UNICODE_NORMALIZATION
05773 if (nfc_f){
05774 i_nfc_getc = i_getc; i_getc = nfc_getc;
05775 i_nfc_ungetc = i_ungetc; i_ungetc= nfc_ungetc;
05776 }
05777 #endif
05778 if (mime_f && mimebuf_f==FIXED_MIME) {
05779 i_mgetc = i_getc; i_getc = mime_getc;
05780 i_mungetc = i_ungetc; i_ungetc = mime_ungetc;
05781 }
05782 if (broken_f & 1) {
05783 i_bgetc = i_getc; i_getc = broken_getc;
05784 i_bungetc = i_ungetc; i_ungetc = broken_ungetc;
05785 }
05786 if (input_encoding) {
05787 set_iconv(-TRUE, nkf_enc_to_iconv(input_encoding));
05788 } else {
05789 set_iconv(FALSE, e_iconv);
05790 }
05791
05792 {
05793 struct input_code *p = input_code_list;
05794 while (p->name){
05795 status_reinit(p++);
05796 }
05797 }
05798 return 0;
05799 }
05800
05801
05802
05803
05804
05805 #if !defined(PERL_XS) && !defined(WIN32DLL)
05806 static nkf_char
05807 noconvert(FILE *f)
05808 {
05809 nkf_char c;
05810
05811 if (nop_f == 2)
05812 module_connection();
05813 while ((c = (*i_getc)(f)) != EOF)
05814 (*o_putc)(c);
05815 (*o_putc)(EOF);
05816 return 1;
05817 }
05818 #endif
05819
05820 #define NEXT continue
05821 #define SKIP c2=0;continue
05822 #define MORE c2=c1;continue
05823 #define SEND (void)0
05824 #define LAST break
05825 #define set_input_mode(mode) do { \
05826 input_mode = mode; \
05827 shift_mode = 0; \
05828 set_input_codename("ISO-2022-JP"); \
05829 debug("ISO-2022-JP"); \
05830 } while (0)
05831
05832 static int
05833 kanji_convert(FILE *f)
05834 {
05835 nkf_char c1=0, c2=0, c3=0, c4=0;
05836 int shift_mode = 0;
05837 int g2 = 0;
05838 int is_8bit = FALSE;
05839
05840 if (input_encoding && !nkf_enc_asciicompat(input_encoding)) {
05841 is_8bit = TRUE;
05842 }
05843
05844 input_mode = ASCII;
05845 output_mode = ASCII;
05846
05847 if (module_connection() < 0) {
05848 #if !defined(PERL_XS) && !defined(WIN32DLL)
05849 fprintf(stderr, "no output encoding given\n");
05850 #endif
05851 return -1;
05852 }
05853 check_bom(f);
05854
05855 #ifdef UTF8_INPUT_ENABLE
05856 if(iconv == w_iconv32){
05857 while ((c1 = (*i_getc)(f)) != EOF &&
05858 (c2 = (*i_getc)(f)) != EOF &&
05859 (c3 = (*i_getc)(f)) != EOF &&
05860 (c4 = (*i_getc)(f)) != EOF) {
05861 nkf_char c5, c6, c7, c8;
05862 if (nkf_iconv_utf_32(c1, c2, c3, c4) == (size_t)NKF_ICONV_WAIT_COMBINING_CHAR) {
05863 if ((c5 = (*i_getc)(f)) != EOF &&
05864 (c6 = (*i_getc)(f)) != EOF &&
05865 (c7 = (*i_getc)(f)) != EOF &&
05866 (c8 = (*i_getc)(f)) != EOF) {
05867 if (nkf_iconv_utf_32_combine(c1, c2, c3, c4, c5, c6, c7, c8)) {
05868 (*i_ungetc)(c8, f);
05869 (*i_ungetc)(c7, f);
05870 (*i_ungetc)(c6, f);
05871 (*i_ungetc)(c5, f);
05872 nkf_iconv_utf_32_nocombine(c1, c2, c3, c4);
05873 }
05874 } else {
05875 nkf_iconv_utf_32_nocombine(c1, c2, c3, c4);
05876 }
05877 }
05878 }
05879 goto finished;
05880 }
05881 else if (iconv == w_iconv16) {
05882 while ((c1 = (*i_getc)(f)) != EOF &&
05883 (c2 = (*i_getc)(f)) != EOF) {
05884 size_t ret = nkf_iconv_utf_16(c1, c2, 0, 0);
05885 if (ret == NKF_ICONV_NEED_TWO_MORE_BYTES &&
05886 (c3 = (*i_getc)(f)) != EOF &&
05887 (c4 = (*i_getc)(f)) != EOF) {
05888 nkf_iconv_utf_16(c1, c2, c3, c4);
05889 } else if (ret == (size_t)NKF_ICONV_WAIT_COMBINING_CHAR) {
05890 if ((c3 = (*i_getc)(f)) != EOF &&
05891 (c4 = (*i_getc)(f)) != EOF) {
05892 if (nkf_iconv_utf_16_combine(c1, c2, c3, c4)) {
05893 (*i_ungetc)(c4, f);
05894 (*i_ungetc)(c3, f);
05895 nkf_iconv_utf_16_nocombine(c1, c2);
05896 }
05897 } else {
05898 nkf_iconv_utf_16_nocombine(c1, c2);
05899 }
05900 }
05901 }
05902 goto finished;
05903 }
05904 #endif
05905
05906 while ((c1 = (*i_getc)(f)) != EOF) {
05907 #ifdef INPUT_CODE_FIX
05908 if (!input_encoding)
05909 #endif
05910 code_status(c1);
05911 if (c2) {
05912
05913 if (c2 > ((input_encoding && nkf_enc_cp5022x_p(input_encoding)) ? 0x92 : DEL)) {
05914
05915 if (!estab_f&&!mime_decode_mode) {
05916
05917
05918 if (h_conv(f, c2, c1)==EOF) {
05919 LAST;
05920 }
05921 else {
05922 SKIP;
05923 }
05924 }
05925 else {
05926
05927 if (c1 < 0x40) {
05928
05929 SKIP;
05930 } else {
05931 SEND;
05932 }
05933 }
05934 }
05935 else {
05936
05937 SEND;
05938 }
05939 }
05940 else if (nkf_char_unicode_p(c1)) {
05941 (*oconv)(0, c1);
05942 NEXT;
05943 }
05944 else {
05945
05946 if (input_mode == JIS_X_0208 && DEL <= c1 && c1 < 0x92) {
05947
05948 MORE;
05949 }else if (input_codename && input_codename[0] == 'I' &&
05950 0xA1 <= c1 && c1 <= 0xDF) {
05951
05952 c2 = JIS_X_0201_1976_K;
05953 c1 &= 0x7f;
05954 SEND;
05955 } else if (c1 > DEL) {
05956
05957 if (!estab_f && !iso8859_f) {
05958
05959 MORE;
05960 } else {
05961 if (iso8859_f) {
05962 c2 = ISO_8859_1;
05963 c1 &= 0x7f;
05964 SEND;
05965 }
05966 else if ((iconv == s_iconv && 0xA0 <= c1 && c1 <= 0xDF) ||
05967 (ms_ucs_map_f == UCS_MAP_CP10001 && (c1 == 0xFD || c1 == 0xFE))) {
05968
05969 c2 = JIS_X_0201_1976_K;
05970 c1 &= 0x7f;
05971 SEND;
05972 }
05973 else {
05974
05975 MORE;
05976 }
05977 }
05978 } else if (SP < c1 && c1 < DEL) {
05979
05980 if (shift_mode) {
05981
05982 if (iso8859_f) {
05983 c2 = ISO_8859_1;
05984 SEND;
05985 } else if (nkf_byte_jisx0201_katakana_p(c1)){
05986
05987 c2 = JIS_X_0201_1976_K;
05988 SEND;
05989 } else {
05990
05991 SKIP;
05992 }
05993 } else if (input_mode == JIS_X_0208 || input_mode == JIS_X_0212 ||
05994 input_mode == JIS_X_0213_1 || input_mode == JIS_X_0213_2) {
05995
05996 MORE;
05997 } else if (c1 == '=' && mime_f && !mime_decode_mode) {
05998
05999 if ((c1 = (*i_getc)(f)) == EOF) {
06000 (*oconv)(0, '=');
06001 LAST;
06002 } else if (c1 == '?') {
06003
06004 if(mime_f == STRICT_MIME) {
06005
06006 if (mime_begin_strict(f) == EOF)
06007 LAST;
06008 SKIP;
06009 } else if (mime_begin(f) == EOF)
06010 LAST;
06011 SKIP;
06012 } else {
06013 (*oconv)(0, '=');
06014 (*i_ungetc)(c1,f);
06015 SKIP;
06016 }
06017 } else {
06018
06019 SEND;
06020 }
06021 } else if (c1 == SI && (!is_8bit || mime_decode_mode)) {
06022 shift_mode = 0;
06023 SKIP;
06024 } else if (c1 == SO && (!is_8bit || mime_decode_mode)) {
06025 shift_mode = 1;
06026 SKIP;
06027 } else if (c1 == ESC && (!is_8bit || mime_decode_mode)) {
06028 if ((c1 = (*i_getc)(f)) == EOF) {
06029 (*oconv)(0, ESC);
06030 LAST;
06031 }
06032 else if (c1 == '&') {
06033
06034 if ((c1 = (*i_getc)(f)) == EOF) {
06035 LAST;
06036 } else {
06037 SKIP;
06038 }
06039 }
06040 else if (c1 == '$') {
06041
06042 if ((c1 = (*i_getc)(f)) == EOF) {
06043
06044
06045
06046 LAST;
06047 } else if (c1 == '@' || c1 == 'B') {
06048
06049 set_input_mode(JIS_X_0208);
06050 SKIP;
06051 } else if (c1 == '(') {
06052
06053 if ((c1 = (*i_getc)(f)) == EOF) {
06054
06055
06056
06057
06058
06059 LAST;
06060 } else if (c1 == '@'|| c1 == 'B') {
06061
06062 set_input_mode(JIS_X_0208);
06063 SKIP;
06064 #ifdef X0212_ENABLE
06065 } else if (c1 == 'D'){
06066 set_input_mode(JIS_X_0212);
06067 SKIP;
06068 #endif
06069 } else if (c1 == 'O' || c1 == 'Q'){
06070 set_input_mode(JIS_X_0213_1);
06071 SKIP;
06072 } else if (c1 == 'P'){
06073 set_input_mode(JIS_X_0213_2);
06074 SKIP;
06075 } else {
06076
06077 (*oconv)(0, ESC);
06078 (*oconv)(0, '$');
06079 (*oconv)(0, '(');
06080 (*oconv)(0, c1);
06081 SKIP;
06082 }
06083 } else if (broken_f&0x2) {
06084
06085 input_mode = JIS_X_0208;
06086 shift_mode = 0;
06087 SKIP;
06088 } else {
06089 (*oconv)(0, ESC);
06090 (*oconv)(0, '$');
06091 (*oconv)(0, c1);
06092 SKIP;
06093 }
06094 } else if (c1 == '(') {
06095
06096 if ((c1 = (*i_getc)(f)) == EOF) {
06097
06098
06099
06100 LAST;
06101 }
06102 else if (c1 == 'I') {
06103
06104 set_input_mode(JIS_X_0201_1976_K);
06105 shift_mode = 1;
06106 SKIP;
06107 }
06108 else if (c1 == 'B' || c1 == 'J' || c1 == 'H') {
06109
06110 set_input_mode(ASCII);
06111 SKIP;
06112 }
06113 else if (broken_f&0x2) {
06114 set_input_mode(ASCII);
06115 SKIP;
06116 }
06117 else {
06118 (*oconv)(0, ESC);
06119 (*oconv)(0, '(');
06120 SEND;
06121 }
06122 }
06123 else if (c1 == '.') {
06124
06125 if ((c1 = (*i_getc)(f)) == EOF) {
06126 LAST;
06127 }
06128 else if (c1 == 'A') {
06129
06130 g2 = ISO_8859_1;
06131 SKIP;
06132 }
06133 else {
06134 (*oconv)(0, ESC);
06135 (*oconv)(0, '.');
06136 SEND;
06137 }
06138 }
06139 else if (c1 == 'N') {
06140
06141 c1 = (*i_getc)(f);
06142 if (g2 == ISO_8859_1) {
06143 c2 = ISO_8859_1;
06144 SEND;
06145 }else{
06146 (*i_ungetc)(c1, f);
06147
06148 (*oconv)(0, ESC);
06149 SEND;
06150 }
06151 }
06152 else {
06153
06154 (*oconv)(0, ESC);
06155 SEND;
06156 }
06157 } else if (c1 == ESC && iconv == s_iconv) {
06158
06159 if ((c1 = (*i_getc)(f)) == EOF) {
06160 (*oconv)(0, ESC);
06161 LAST;
06162 } else if (c1 == '$') {
06163
06164 if ((c1 = (*i_getc)(f)) == EOF) {
06165 LAST;
06166 } else if (('E' <= c1 && c1 <= 'G') ||
06167 ('O' <= c1 && c1 <= 'Q')) {
06168
06169
06170
06171
06172
06173
06174
06175 static const nkf_char jphone_emoji_first_table[7] =
06176 {0xE1E0, 0xDFE0, 0xE2E0, 0xE3E0, 0xE4E0, 0xDFE0, 0xE0E0};
06177 c3 = nkf_char_unicode_new(jphone_emoji_first_table[c1 % 7]);
06178 if ((c1 = (*i_getc)(f)) == EOF) LAST;
06179 while (SP <= c1 && c1 <= 'z') {
06180 (*oconv)(0, c1 + c3);
06181 if ((c1 = (*i_getc)(f)) == EOF) LAST;
06182 }
06183 SKIP;
06184 }
06185 else {
06186 (*oconv)(0, ESC);
06187 (*oconv)(0, '$');
06188 SEND;
06189 }
06190 }
06191 else {
06192
06193 (*oconv)(0, ESC);
06194 SEND;
06195 }
06196 } else if (c1 == LF || c1 == CR) {
06197 if (broken_f&4) {
06198 input_mode = ASCII; set_iconv(FALSE, 0);
06199 SEND;
06200 } else if (mime_decode_f && !mime_decode_mode){
06201 if (c1 == LF) {
06202 if ((c1=(*i_getc)(f))!=EOF && c1 == SP) {
06203 i_ungetc(SP,f);
06204 continue;
06205 } else {
06206 i_ungetc(c1,f);
06207 }
06208 c1 = LF;
06209 SEND;
06210 } else {
06211 if ((c1=(*i_getc)(f))!=EOF) {
06212 if (c1==SP) {
06213 i_ungetc(SP,f);
06214 continue;
06215 } else if (c1 == LF && (c1=(*i_getc)(f))!=EOF && c1 == SP) {
06216 i_ungetc(SP,f);
06217 continue;
06218 } else {
06219 i_ungetc(c1,f);
06220 }
06221 i_ungetc(LF,f);
06222 } else {
06223 i_ungetc(c1,f);
06224 }
06225 c1 = CR;
06226 SEND;
06227 }
06228 }
06229 } else
06230 SEND;
06231 }
06232
06233 switch(input_mode){
06234 case ASCII:
06235 switch ((*iconv)(c2, c1, 0)) {
06236 case -2:
06237
06238 if ((c3 = (*i_getc)(f)) != EOF) {
06239 code_status(c3);
06240 c3 <<= 8;
06241 if ((c4 = (*i_getc)(f)) != EOF) {
06242 code_status(c4);
06243 (*iconv)(c2, c1, c3|c4);
06244 }
06245 }
06246 break;
06247 case -3:
06248
06249 if ((c3 = (*i_getc)(f)) != EOF) {
06250 if ((c4 = (*i_getc)(f)) != EOF) {
06251 if (w_iconv_combine(c2, c1, 0, c3, c4, 0)) {
06252 (*i_ungetc)(c4, f);
06253 (*i_ungetc)(c3, f);
06254 w_iconv_nocombine(c2, c1, 0);
06255 }
06256 } else {
06257 (*i_ungetc)(c3, f);
06258 w_iconv_nocombine(c2, c1, 0);
06259 }
06260 } else {
06261 w_iconv_nocombine(c2, c1, 0);
06262 }
06263 break;
06264 case -1:
06265
06266 if ((c3 = (*i_getc)(f)) != EOF) {
06267 code_status(c3);
06268 if ((*iconv)(c2, c1, c3) == -3) {
06269
06270 nkf_char c5, c6;
06271 if ((c4 = (*i_getc)(f)) != EOF) {
06272 if ((c5 = (*i_getc)(f)) != EOF) {
06273 if ((c6 = (*i_getc)(f)) != EOF) {
06274 if (w_iconv_combine(c2, c1, c3, c4, c5, c6)) {
06275 (*i_ungetc)(c6, f);
06276 (*i_ungetc)(c5, f);
06277 (*i_ungetc)(c4, f);
06278 w_iconv_nocombine(c2, c1, c3);
06279 }
06280 } else {
06281 (*i_ungetc)(c5, f);
06282 (*i_ungetc)(c4, f);
06283 w_iconv_nocombine(c2, c1, c3);
06284 }
06285 } else {
06286 (*i_ungetc)(c4, f);
06287 w_iconv_nocombine(c2, c1, c3);
06288 }
06289 } else {
06290 w_iconv_nocombine(c2, c1, c3);
06291 }
06292 }
06293 }
06294 break;
06295 }
06296 break;
06297 case JIS_X_0208:
06298 case JIS_X_0213_1:
06299 if (ms_ucs_map_f &&
06300 0x7F <= c2 && c2 <= 0x92 &&
06301 0x21 <= c1 && c1 <= 0x7E) {
06302
06303 c1 = nkf_char_unicode_new((c2 - 0x7F) * 94 + c1 - 0x21 + 0xE000);
06304 c2 = 0;
06305 }
06306 (*oconv)(c2, c1);
06307 break;
06308 #ifdef X0212_ENABLE
06309 case JIS_X_0212:
06310 (*oconv)(PREFIX_EUCG3 | c2, c1);
06311 break;
06312 #endif
06313 case JIS_X_0213_2:
06314 (*oconv)(PREFIX_EUCG3 | c2, c1);
06315 break;
06316 default:
06317 (*oconv)(input_mode, c1);
06318 }
06319
06320 c2 = 0;
06321 c3 = 0;
06322 continue;
06323
06324 }
06325
06326 finished:
06327
06328 (*iconv)(EOF, 0, 0);
06329 if (!input_codename)
06330 {
06331 if (is_8bit) {
06332 struct input_code *p = input_code_list;
06333 struct input_code *result = p;
06334 while (p->name){
06335 if (p->score < result->score) result = p;
06336 ++p;
06337 }
06338 set_input_codename(result->name);
06339 #ifdef CHECK_OPTION
06340 debug(result->name);
06341 #endif
06342 }
06343 }
06344 return 0;
06345 }
06346
06347
06348
06349
06350
06351
06352
06353
06354 static int
06355 options(unsigned char *cp)
06356 {
06357 nkf_char i, j;
06358 unsigned char *p;
06359 unsigned char *cp_back = NULL;
06360 nkf_encoding *enc;
06361
06362 if (option_mode==1)
06363 return 0;
06364 while(*cp && *cp++!='-');
06365 while (*cp || cp_back) {
06366 if(!*cp){
06367 cp = cp_back;
06368 cp_back = NULL;
06369 continue;
06370 }
06371 p = 0;
06372 switch (*cp++) {
06373 case '-':
06374 if (!*cp || *cp == SP) {
06375 option_mode = 1;
06376 return 0;
06377 }
06378 for (i=0;i<(int)(sizeof(long_option)/sizeof(long_option[0]));i++) {
06379 p = (unsigned char *)long_option[i].name;
06380 for (j=0;*p && *p != '=' && *p == cp[j];p++, j++);
06381 if (*p == cp[j] || cp[j] == SP){
06382 p = &cp[j] + 1;
06383 break;
06384 }
06385 p = 0;
06386 }
06387 if (p == 0) {
06388 #if !defined(PERL_XS) && !defined(WIN32DLL)
06389 fprintf(stderr, "unknown long option: --%s\n", cp);
06390 #endif
06391 return -1;
06392 }
06393 while(*cp && *cp != SP && cp++);
06394 if (long_option[i].alias[0]){
06395 cp_back = cp;
06396 cp = (unsigned char *)long_option[i].alias;
06397 }else{
06398 #ifndef PERL_XS
06399 if (strcmp(long_option[i].name, "help") == 0){
06400 usage();
06401 exit(EXIT_SUCCESS);
06402 }
06403 #endif
06404 if (strcmp(long_option[i].name, "ic=") == 0){
06405 enc = nkf_enc_find((char *)p);
06406 if (!enc) continue;
06407 input_encoding = enc;
06408 continue;
06409 }
06410 if (strcmp(long_option[i].name, "oc=") == 0){
06411 enc = nkf_enc_find((char *)p);
06412
06413 if (!enc) continue;
06414 output_encoding = enc;
06415 continue;
06416 }
06417 if (strcmp(long_option[i].name, "guess=") == 0){
06418 if (p[0] == '0' || p[0] == '1') {
06419 guess_f = 1;
06420 } else {
06421 guess_f = 2;
06422 }
06423 continue;
06424 }
06425 #ifdef OVERWRITE
06426 if (strcmp(long_option[i].name, "overwrite") == 0){
06427 file_out_f = TRUE;
06428 overwrite_f = TRUE;
06429 preserve_time_f = TRUE;
06430 continue;
06431 }
06432 if (strcmp(long_option[i].name, "overwrite=") == 0){
06433 file_out_f = TRUE;
06434 overwrite_f = TRUE;
06435 preserve_time_f = TRUE;
06436 backup_f = TRUE;
06437 backup_suffix = (char *)p;
06438 continue;
06439 }
06440 if (strcmp(long_option[i].name, "in-place") == 0){
06441 file_out_f = TRUE;
06442 overwrite_f = TRUE;
06443 preserve_time_f = FALSE;
06444 continue;
06445 }
06446 if (strcmp(long_option[i].name, "in-place=") == 0){
06447 file_out_f = TRUE;
06448 overwrite_f = TRUE;
06449 preserve_time_f = FALSE;
06450 backup_f = TRUE;
06451 backup_suffix = (char *)p;
06452 continue;
06453 }
06454 #endif
06455 #ifdef INPUT_OPTION
06456 if (strcmp(long_option[i].name, "cap-input") == 0){
06457 cap_f = TRUE;
06458 continue;
06459 }
06460 if (strcmp(long_option[i].name, "url-input") == 0){
06461 url_f = TRUE;
06462 continue;
06463 }
06464 #endif
06465 #ifdef NUMCHAR_OPTION
06466 if (strcmp(long_option[i].name, "numchar-input") == 0){
06467 numchar_f = TRUE;
06468 continue;
06469 }
06470 #endif
06471 #ifdef CHECK_OPTION
06472 if (strcmp(long_option[i].name, "no-output") == 0){
06473 noout_f = TRUE;
06474 continue;
06475 }
06476 if (strcmp(long_option[i].name, "debug") == 0){
06477 debug_f = TRUE;
06478 continue;
06479 }
06480 #endif
06481 if (strcmp(long_option[i].name, "cp932") == 0){
06482 #ifdef SHIFTJIS_CP932
06483 cp51932_f = TRUE;
06484 cp932inv_f = -TRUE;
06485 #endif
06486 #ifdef UTF8_OUTPUT_ENABLE
06487 ms_ucs_map_f = UCS_MAP_CP932;
06488 #endif
06489 continue;
06490 }
06491 if (strcmp(long_option[i].name, "no-cp932") == 0){
06492 #ifdef SHIFTJIS_CP932
06493 cp51932_f = FALSE;
06494 cp932inv_f = FALSE;
06495 #endif
06496 #ifdef UTF8_OUTPUT_ENABLE
06497 ms_ucs_map_f = UCS_MAP_ASCII;
06498 #endif
06499 continue;
06500 }
06501 #ifdef SHIFTJIS_CP932
06502 if (strcmp(long_option[i].name, "cp932inv") == 0){
06503 cp932inv_f = -TRUE;
06504 continue;
06505 }
06506 #endif
06507
06508 #ifdef X0212_ENABLE
06509 if (strcmp(long_option[i].name, "x0212") == 0){
06510 x0212_f = TRUE;
06511 continue;
06512 }
06513 #endif
06514
06515 #ifdef EXEC_IO
06516 if (strcmp(long_option[i].name, "exec-in") == 0){
06517 exec_f = 1;
06518 return 0;
06519 }
06520 if (strcmp(long_option[i].name, "exec-out") == 0){
06521 exec_f = -1;
06522 return 0;
06523 }
06524 #endif
06525 #if defined(UTF8_OUTPUT_ENABLE) && defined(UTF8_INPUT_ENABLE)
06526 if (strcmp(long_option[i].name, "no-cp932ext") == 0){
06527 no_cp932ext_f = TRUE;
06528 continue;
06529 }
06530 if (strcmp(long_option[i].name, "no-best-fit-chars") == 0){
06531 no_best_fit_chars_f = TRUE;
06532 continue;
06533 }
06534 if (strcmp(long_option[i].name, "fb-skip") == 0){
06535 encode_fallback = NULL;
06536 continue;
06537 }
06538 if (strcmp(long_option[i].name, "fb-html") == 0){
06539 encode_fallback = encode_fallback_html;
06540 continue;
06541 }
06542 if (strcmp(long_option[i].name, "fb-xml") == 0){
06543 encode_fallback = encode_fallback_xml;
06544 continue;
06545 }
06546 if (strcmp(long_option[i].name, "fb-java") == 0){
06547 encode_fallback = encode_fallback_java;
06548 continue;
06549 }
06550 if (strcmp(long_option[i].name, "fb-perl") == 0){
06551 encode_fallback = encode_fallback_perl;
06552 continue;
06553 }
06554 if (strcmp(long_option[i].name, "fb-subchar") == 0){
06555 encode_fallback = encode_fallback_subchar;
06556 continue;
06557 }
06558 if (strcmp(long_option[i].name, "fb-subchar=") == 0){
06559 encode_fallback = encode_fallback_subchar;
06560 unicode_subchar = 0;
06561 if (p[0] != '0'){
06562
06563 for (i = 0; i < 7 && nkf_isdigit(p[i]); i++){
06564 unicode_subchar *= 10;
06565 unicode_subchar += hex2bin(p[i]);
06566 }
06567 }else if(p[1] == 'x' || p[1] == 'X'){
06568
06569 for (i = 2; i < 8 && nkf_isxdigit(p[i]); i++){
06570 unicode_subchar <<= 4;
06571 unicode_subchar |= hex2bin(p[i]);
06572 }
06573 }else{
06574
06575 for (i = 1; i < 8 && nkf_isoctal(p[i]); i++){
06576 unicode_subchar *= 8;
06577 unicode_subchar += hex2bin(p[i]);
06578 }
06579 }
06580 w16e_conv(unicode_subchar, &i, &j);
06581 unicode_subchar = i<<8 | j;
06582 continue;
06583 }
06584 #endif
06585 #ifdef UTF8_OUTPUT_ENABLE
06586 if (strcmp(long_option[i].name, "ms-ucs-map") == 0){
06587 ms_ucs_map_f = UCS_MAP_MS;
06588 continue;
06589 }
06590 #endif
06591 #ifdef UNICODE_NORMALIZATION
06592 if (strcmp(long_option[i].name, "utf8mac-input") == 0){
06593 nfc_f = TRUE;
06594 continue;
06595 }
06596 #endif
06597 if (strcmp(long_option[i].name, "prefix=") == 0){
06598 if (nkf_isgraph(p[0])){
06599 for (i = 1; nkf_isgraph(p[i]); i++){
06600 prefix_table[p[i]] = p[0];
06601 }
06602 }
06603 continue;
06604 }
06605 #if !defined(PERL_XS) && !defined(WIN32DLL)
06606 fprintf(stderr, "unsupported long option: --%s\n", long_option[i].name);
06607 #endif
06608 return -1;
06609 }
06610 continue;
06611 case 'b':
06612 unbuf_f = FALSE;
06613 continue;
06614 case 'u':
06615 unbuf_f = TRUE;
06616 continue;
06617 case 't':
06618 if (*cp=='1') {
06619
06620 cp++;
06621 nop_f = TRUE;
06622 } else if (*cp=='2') {
06623
06624
06625
06626
06627
06628
06629 cp++;
06630 nop_f = 2;
06631 } else
06632 nop_f = TRUE;
06633 continue;
06634 case 'j':
06635 case 'n':
06636 output_encoding = nkf_enc_from_index(ISO_2022_JP);
06637 continue;
06638 case 'e':
06639 output_encoding = nkf_enc_from_index(EUCJP_NKF);
06640 continue;
06641 case 's':
06642 output_encoding = nkf_enc_from_index(SHIFT_JIS);
06643 continue;
06644 case 'l':
06645 iso8859_f = TRUE;
06646 input_encoding = nkf_enc_from_index(ISO_8859_1);
06647 continue;
06648 case 'i':
06649 if (*cp=='@'||*cp=='B')
06650 kanji_intro = *cp++;
06651 continue;
06652 case 'o':
06653
06654 if (*cp=='J'||*cp=='B'||*cp=='H')
06655 ascii_intro = *cp++;
06656 continue;
06657 case 'h':
06658
06659
06660
06661
06662 if ('9'>= *cp && *cp>='0')
06663 hira_f |= (*cp++ -'0');
06664 else
06665 hira_f |= 1;
06666 continue;
06667 case 'r':
06668 rot_f = TRUE;
06669 continue;
06670 #if defined(MSDOS) || defined(__OS2__)
06671 case 'T':
06672 binmode_f = FALSE;
06673 continue;
06674 #endif
06675 #ifndef PERL_XS
06676 case 'V':
06677 show_configuration();
06678 exit(EXIT_SUCCESS);
06679 break;
06680 case 'v':
06681 version();
06682 exit(EXIT_SUCCESS);
06683 break;
06684 #endif
06685 #ifdef UTF8_OUTPUT_ENABLE
06686 case 'w':
06687 if (cp[0] == '8') {
06688 cp++;
06689 if (cp[0] == '0'){
06690 cp++;
06691 output_encoding = nkf_enc_from_index(UTF_8N);
06692 } else {
06693 output_bom_f = TRUE;
06694 output_encoding = nkf_enc_from_index(UTF_8_BOM);
06695 }
06696 } else {
06697 int enc_idx;
06698 if ('1'== cp[0] && '6'==cp[1]) {
06699 cp += 2;
06700 enc_idx = UTF_16;
06701 } else if ('3'== cp[0] && '2'==cp[1]) {
06702 cp += 2;
06703 enc_idx = UTF_32;
06704 } else {
06705 output_encoding = nkf_enc_from_index(UTF_8);
06706 continue;
06707 }
06708 if (cp[0]=='L') {
06709 cp++;
06710 output_endian = ENDIAN_LITTLE;
06711 output_bom_f = TRUE;
06712 } else if (cp[0] == 'B') {
06713 cp++;
06714 output_bom_f = TRUE;
06715 }
06716 if (cp[0] == '0'){
06717 output_bom_f = FALSE;
06718 cp++;
06719 enc_idx = enc_idx == UTF_16
06720 ? (output_endian == ENDIAN_LITTLE ? UTF_16LE : UTF_16BE)
06721 : (output_endian == ENDIAN_LITTLE ? UTF_32LE : UTF_32BE);
06722 } else {
06723 enc_idx = enc_idx == UTF_16
06724 ? (output_endian == ENDIAN_LITTLE ? UTF_16LE_BOM : UTF_16BE_BOM)
06725 : (output_endian == ENDIAN_LITTLE ? UTF_32LE_BOM : UTF_32BE_BOM);
06726 }
06727 output_encoding = nkf_enc_from_index(enc_idx);
06728 }
06729 continue;
06730 #endif
06731 #ifdef UTF8_INPUT_ENABLE
06732 case 'W':
06733 if (cp[0] == '8') {
06734 cp++;
06735 input_encoding = nkf_enc_from_index(UTF_8);
06736 }else{
06737 int enc_idx;
06738 if ('1'== cp[0] && '6'==cp[1]) {
06739 cp += 2;
06740 input_endian = ENDIAN_BIG;
06741 enc_idx = UTF_16;
06742 } else if ('3'== cp[0] && '2'==cp[1]) {
06743 cp += 2;
06744 input_endian = ENDIAN_BIG;
06745 enc_idx = UTF_32;
06746 } else {
06747 input_encoding = nkf_enc_from_index(UTF_8);
06748 continue;
06749 }
06750 if (cp[0]=='L') {
06751 cp++;
06752 input_endian = ENDIAN_LITTLE;
06753 } else if (cp[0] == 'B') {
06754 cp++;
06755 input_endian = ENDIAN_BIG;
06756 }
06757 enc_idx = (enc_idx == UTF_16
06758 ? (input_endian == ENDIAN_LITTLE ? UTF_16LE : UTF_16BE)
06759 : (input_endian == ENDIAN_LITTLE ? UTF_32LE : UTF_32BE));
06760 input_encoding = nkf_enc_from_index(enc_idx);
06761 }
06762 continue;
06763 #endif
06764
06765 case 'J':
06766 input_encoding = nkf_enc_from_index(ISO_2022_JP);
06767 continue;
06768 case 'E':
06769 input_encoding = nkf_enc_from_index(EUCJP_NKF);
06770 continue;
06771 case 'S':
06772 input_encoding = nkf_enc_from_index(SHIFT_JIS);
06773 continue;
06774 case 'Z':
06775
06776
06777
06778
06779
06780
06781
06782 while ('0'<= *cp && *cp <='4') {
06783 alpha_f |= 1 << (*cp++ - '0');
06784 }
06785 alpha_f |= 1;
06786 continue;
06787 case 'x':
06788 x0201_f = FALSE;
06789
06790
06791
06792
06793
06794
06795
06796
06797
06798
06799 continue;
06800 case 'X':
06801 x0201_f = TRUE;
06802 continue;
06803 case 'F':
06804 fold_preserve_f = TRUE;
06805 case 'f':
06806 fold_f = TRUE;
06807 fold_len = 0;
06808 while('0'<= *cp && *cp <='9') {
06809 fold_len *= 10;
06810 fold_len += *cp++ - '0';
06811 }
06812 if (!(0<fold_len && fold_len<BUFSIZ))
06813 fold_len = DEFAULT_FOLD;
06814 if (*cp=='-') {
06815 fold_margin = 0;
06816 cp++;
06817 while('0'<= *cp && *cp <='9') {
06818 fold_margin *= 10;
06819 fold_margin += *cp++ - '0';
06820 }
06821 }
06822 continue;
06823 case 'm':
06824
06825 if (*cp=='B'||*cp=='Q') {
06826 mime_decode_mode = *cp++;
06827 mimebuf_f = FIXED_MIME;
06828 } else if (*cp=='N') {
06829 mime_f = TRUE; cp++;
06830 } else if (*cp=='S') {
06831 mime_f = STRICT_MIME; cp++;
06832 } else if (*cp=='0') {
06833 mime_decode_f = FALSE;
06834 mime_f = FALSE; cp++;
06835 } else {
06836 mime_f = STRICT_MIME;
06837 }
06838 continue;
06839 case 'M':
06840 if (*cp=='B') {
06841 mimeout_mode = 'B';
06842 mimeout_f = FIXED_MIME; cp++;
06843 } else if (*cp=='Q') {
06844 mimeout_mode = 'Q';
06845 mimeout_f = FIXED_MIME; cp++;
06846 } else {
06847 mimeout_f = TRUE;
06848 }
06849 continue;
06850 case 'B':
06851
06852
06853
06854
06855 if ('9'>= *cp && *cp>='0')
06856 broken_f |= 1<<(*cp++ -'0');
06857 else
06858 broken_f |= TRUE;
06859 continue;
06860 #ifndef PERL_XS
06861 case 'O':
06862 file_out_f = TRUE;
06863 continue;
06864 #endif
06865 case 'c':
06866 eolmode_f = CRLF;
06867 continue;
06868 case 'd':
06869 eolmode_f = LF;
06870 continue;
06871 case 'I':
06872 iso2022jp_f = TRUE;
06873 continue;
06874 case 'L':
06875 if (*cp=='u') {
06876 eolmode_f = LF; cp++;
06877 } else if (*cp=='m') {
06878 eolmode_f = CR; cp++;
06879 } else if (*cp=='w') {
06880 eolmode_f = CRLF; cp++;
06881 } else if (*cp=='0') {
06882 eolmode_f = 0; cp++;
06883 }
06884 continue;
06885 #ifndef PERL_XS
06886 case 'g':
06887 if ('2' <= *cp && *cp <= '9') {
06888 guess_f = 2;
06889 cp++;
06890 } else if (*cp == '0' || *cp == '1') {
06891 guess_f = 1;
06892 cp++;
06893 } else {
06894 guess_f = 1;
06895 }
06896 continue;
06897 #endif
06898 case SP:
06899
06900 while(*cp && *cp++!='-');
06901 continue;
06902 default:
06903 #if !defined(PERL_XS) && !defined(WIN32DLL)
06904 fprintf(stderr, "unknown option: -%c\n", *(cp-1));
06905 #endif
06906
06907 return -1;
06908 }
06909 }
06910 return 0;
06911 }
06912
06913 #ifdef WIN32DLL
06914 #include "nkf32dll.c"
06915 #elif defined(PERL_XS)
06916 #else
06917 int
06918 main(int argc, char **argv)
06919 {
06920 FILE *fin;
06921 unsigned char *cp;
06922
06923 char *outfname = NULL;
06924 char *origfname;
06925
06926 #ifdef EASYWIN
06927 _BufferSize.y = 400;
06928 #endif
06929 #ifdef DEFAULT_CODE_LOCALE
06930 setlocale(LC_CTYPE, "");
06931 #endif
06932 nkf_state_init();
06933
06934 for (argc--,argv++; (argc > 0) && **argv == '-'; argc--, argv++) {
06935 cp = (unsigned char *)*argv;
06936 options(cp);
06937 #ifdef EXEC_IO
06938 if (exec_f){
06939 int fds[2], pid;
06940 if (pipe(fds) < 0 || (pid = fork()) < 0){
06941 abort();
06942 }
06943 if (pid == 0){
06944 if (exec_f > 0){
06945 close(fds[0]);
06946 dup2(fds[1], 1);
06947 }else{
06948 close(fds[1]);
06949 dup2(fds[0], 0);
06950 }
06951 execvp(argv[1], &argv[1]);
06952 }
06953 if (exec_f > 0){
06954 close(fds[1]);
06955 dup2(fds[0], 0);
06956 }else{
06957 close(fds[0]);
06958 dup2(fds[1], 1);
06959 }
06960 argc = 0;
06961 break;
06962 }
06963 #endif
06964 }
06965
06966 if (guess_f) {
06967 #ifdef CHECK_OPTION
06968 int debug_f_back = debug_f;
06969 #endif
06970 #ifdef EXEC_IO
06971 int exec_f_back = exec_f;
06972 #endif
06973 #ifdef X0212_ENABLE
06974 int x0212_f_back = x0212_f;
06975 #endif
06976 int x0213_f_back = x0213_f;
06977 int guess_f_back = guess_f;
06978 reinit();
06979 guess_f = guess_f_back;
06980 mime_f = FALSE;
06981 #ifdef CHECK_OPTION
06982 debug_f = debug_f_back;
06983 #endif
06984 #ifdef EXEC_IO
06985 exec_f = exec_f_back;
06986 #endif
06987 x0212_f = x0212_f_back;
06988 x0213_f = x0213_f_back;
06989 }
06990
06991 if (binmode_f == TRUE)
06992 #if defined(__OS2__) && (defined(__IBMC__) || defined(__IBMCPP__))
06993 if (freopen("","wb",stdout) == NULL)
06994 return (-1);
06995 #else
06996 setbinmode(stdout);
06997 #endif
06998
06999 if (unbuf_f)
07000 setbuf(stdout, (char *) NULL);
07001 else
07002 setvbuffer(stdout, (char *) stdobuf, IOBUF_SIZE);
07003
07004 if (argc == 0) {
07005 if (binmode_f == TRUE)
07006 #if defined(__OS2__) && (defined(__IBMC__) || defined(__IBMCPP__))
07007 if (freopen("","rb",stdin) == NULL) return (-1);
07008 #else
07009 setbinmode(stdin);
07010 #endif
07011 setvbuffer(stdin, (char *) stdibuf, IOBUF_SIZE);
07012 if (nop_f)
07013 noconvert(stdin);
07014 else {
07015 kanji_convert(stdin);
07016 if (guess_f) print_guessed_code(NULL);
07017 }
07018 } else {
07019 int nfiles = argc;
07020 int is_argument_error = FALSE;
07021 while (argc--) {
07022 input_codename = NULL;
07023 input_eol = 0;
07024 #ifdef CHECK_OPTION
07025 iconv_for_check = 0;
07026 #endif
07027 if ((fin = fopen((origfname = *argv++), "r")) == NULL) {
07028 perror(*(argv-1));
07029 is_argument_error = TRUE;
07030 continue;
07031 } else {
07032 #ifdef OVERWRITE
07033 int fd = 0;
07034 int fd_backup = 0;
07035 #endif
07036
07037
07038 if (file_out_f == TRUE) {
07039 #ifdef OVERWRITE
07040 if (overwrite_f){
07041 outfname = nkf_xmalloc(strlen(origfname)
07042 + strlen(".nkftmpXXXXXX")
07043 + 1);
07044 strcpy(outfname, origfname);
07045 #ifdef MSDOS
07046 {
07047 int i;
07048 for (i = strlen(outfname); i; --i){
07049 if (outfname[i - 1] == '/'
07050 || outfname[i - 1] == '\\'){
07051 break;
07052 }
07053 }
07054 outfname[i] = '\0';
07055 }
07056 strcat(outfname, "ntXXXXXX");
07057 mktemp(outfname);
07058 fd = open(outfname, O_WRONLY | O_CREAT | O_TRUNC | O_EXCL,
07059 S_IREAD | S_IWRITE);
07060 #else
07061 strcat(outfname, ".nkftmpXXXXXX");
07062 fd = mkstemp(outfname);
07063 #endif
07064 if (fd < 0
07065 || (fd_backup = dup(fileno(stdout))) < 0
07066 || dup2(fd, fileno(stdout)) < 0
07067 ){
07068 perror(origfname);
07069 return -1;
07070 }
07071 }else
07072 #endif
07073 if(argc == 1) {
07074 outfname = *argv++;
07075 argc--;
07076 } else {
07077 outfname = "nkf.out";
07078 }
07079
07080 if(freopen(outfname, "w", stdout) == NULL) {
07081 perror (outfname);
07082 return (-1);
07083 }
07084 if (binmode_f == TRUE) {
07085 #if defined(__OS2__) && (defined(__IBMC__) || defined(__IBMCPP__))
07086 if (freopen("","wb",stdout) == NULL)
07087 return (-1);
07088 #else
07089 setbinmode(stdout);
07090 #endif
07091 }
07092 }
07093 if (binmode_f == TRUE)
07094 #if defined(__OS2__) && (defined(__IBMC__) || defined(__IBMCPP__))
07095 if (freopen("","rb",fin) == NULL)
07096 return (-1);
07097 #else
07098 setbinmode(fin);
07099 #endif
07100 setvbuffer(fin, (char *) stdibuf, IOBUF_SIZE);
07101 if (nop_f)
07102 noconvert(fin);
07103 else {
07104 char *filename = NULL;
07105 kanji_convert(fin);
07106 if (nfiles > 1) filename = origfname;
07107 if (guess_f) print_guessed_code(filename);
07108 }
07109 fclose(fin);
07110 #ifdef OVERWRITE
07111 if (overwrite_f) {
07112 struct stat sb;
07113 #if defined(MSDOS) && !defined(__MINGW32__) && !defined(__WIN32__) && !defined(__WATCOMC__) && !defined(__EMX__) && !defined(__OS2__) && !defined(__DJGPP__)
07114 time_t tb[2];
07115 #else
07116 struct utimbuf tb;
07117 #endif
07118
07119 fflush(stdout);
07120 close(fd);
07121 if (dup2(fd_backup, fileno(stdout)) < 0){
07122 perror("dup2");
07123 }
07124 if (stat(origfname, &sb)) {
07125 fprintf(stderr, "Can't stat %s\n", origfname);
07126 }
07127
07128 if (chmod(outfname, sb.st_mode)) {
07129 fprintf(stderr, "Can't set permission %s\n", outfname);
07130 }
07131
07132
07133 if(preserve_time_f){
07134 #if defined(MSDOS) && !defined(__MINGW32__) && !defined(__WIN32__) && !defined(__WATCOMC__) && !defined(__EMX__) && !defined(__OS2__) && !defined(__DJGPP__)
07135 tb[0] = tb[1] = sb.st_mtime;
07136 if (utime(outfname, tb)) {
07137 fprintf(stderr, "Can't set timestamp %s\n", outfname);
07138 }
07139 #else
07140 tb.actime = sb.st_atime;
07141 tb.modtime = sb.st_mtime;
07142 if (utime(outfname, &tb)) {
07143 fprintf(stderr, "Can't set timestamp %s\n", outfname);
07144 }
07145 #endif
07146 }
07147 if(backup_f){
07148 char *backup_filename = get_backup_filename(backup_suffix, origfname);
07149 #ifdef MSDOS
07150 unlink(backup_filename);
07151 #endif
07152 if (rename(origfname, backup_filename)) {
07153 perror(backup_filename);
07154 fprintf(stderr, "Can't rename %s to %s\n",
07155 origfname, backup_filename);
07156 }
07157 nkf_xfree(backup_filename);
07158 }else{
07159 #ifdef MSDOS
07160 if (unlink(origfname)){
07161 perror(origfname);
07162 }
07163 #endif
07164 }
07165 if (rename(outfname, origfname)) {
07166 perror(origfname);
07167 fprintf(stderr, "Can't rename %s to %s\n",
07168 outfname, origfname);
07169 }
07170 nkf_xfree(outfname);
07171 }
07172 #endif
07173 }
07174 }
07175 if (is_argument_error)
07176 return(-1);
07177 }
07178 #ifdef EASYWIN
07179 if (file_out_f == FALSE)
07180 scanf("%d",&end_check);
07181 else
07182 fclose(stdout);
07183 #else
07184 if (file_out_f == TRUE)
07185 fclose(stdout);
07186 #endif
07187 return (0);
07188 }
07189 #endif
07190