00001 #include "ruby/ruby.h"
00002 #include "ruby/encoding.h"
00003 #include "ruby/thread.h"
00004 #include "internal.h"
00005 #include <winbase.h>
00006 #include <wchar.h>
00007 #include <shlwapi.h>
00008
00009 #ifndef INVALID_FILE_ATTRIBUTES
00010 # define INVALID_FILE_ATTRIBUTES ((DWORD)-1)
00011 #endif
00012
00013
00014 static struct code_page_table {
00015 USHORT *table;
00016 unsigned int count;
00017 } rb_code_page;
00018
00019 #define IS_DIR_SEPARATOR_P(c) (c == L'\\' || c == L'/')
00020 #define IS_DIR_UNC_P(c) (IS_DIR_SEPARATOR_P(c[0]) && IS_DIR_SEPARATOR_P(c[1]))
00021
00022
00023 #define INVALID_CODE_PAGE 51932
00024 #define PATH_BUFFER_SIZE MAX_PATH * 2
00025
00026 #define insecure_obj_p(obj, level) ((level) >= 4 || ((level) > 0 && OBJ_TAINTED(obj)))
00027
00028 static inline void
00029 replace_wchar(wchar_t *s, int find, int replace)
00030 {
00031 while (*s != 0) {
00032 if (*s == find)
00033 *s = replace;
00034 s++;
00035 }
00036 }
00037
00038
00039 static inline void
00040 convert_mb_to_wchar(const char *str, wchar_t **wstr, size_t *wstr_len, UINT code_page)
00041 {
00042 size_t len;
00043
00044 len = MultiByteToWideChar(code_page, 0, str, -1, NULL, 0) + 1;
00045 *wstr = (wchar_t *)xmalloc(len * sizeof(wchar_t));
00046
00047 MultiByteToWideChar(code_page, 0, str, -1, *wstr, len);
00048 *wstr_len = len - 2;
00049 }
00050
00051 static inline void
00052 convert_wchar_to_mb(const wchar_t *wstr, char **str, size_t *str_len, UINT code_page)
00053 {
00054 size_t len;
00055
00056 len = WideCharToMultiByte(code_page, 0, wstr, -1, NULL, 0, NULL, NULL);
00057 *str = (char *)xmalloc(len * sizeof(char));
00058 WideCharToMultiByte(code_page, 0, wstr, -1, *str, len, NULL, NULL);
00059
00060
00061 *str_len = len - 1;
00062 }
00063
00064
00065
00066
00067
00068
00069
00070
00071
00072 static wchar_t *
00073 home_dir(void)
00074 {
00075 wchar_t *buffer = NULL;
00076 size_t buffer_len = 0, len = 0;
00077 size_t home_env = 0;
00078
00079
00080
00081
00082
00083
00084
00085 if (len = GetEnvironmentVariableW(L"HOME", NULL, 0)) {
00086 buffer_len = len;
00087 home_env = 1;
00088 }
00089 else if (len = GetEnvironmentVariableW(L"HOMEDRIVE", NULL, 0)) {
00090 buffer_len = len;
00091 if (len = GetEnvironmentVariableW(L"HOMEPATH", NULL, 0)) {
00092 buffer_len += len;
00093 home_env = 2;
00094 }
00095 else {
00096 buffer_len = 0;
00097 }
00098 }
00099 else if (len = GetEnvironmentVariableW(L"USERPROFILE", NULL, 0)) {
00100 buffer_len = len;
00101 home_env = 3;
00102 }
00103
00104
00105 if (home_env)
00106 buffer = (wchar_t *)xmalloc(buffer_len * sizeof(wchar_t));
00107
00108 switch (home_env) {
00109 case 1:
00110
00111 GetEnvironmentVariableW(L"HOME", buffer, buffer_len);
00112 break;
00113 case 2:
00114
00115 len = GetEnvironmentVariableW(L"HOMEDRIVE", buffer, buffer_len);
00116 GetEnvironmentVariableW(L"HOMEPATH", buffer + len, buffer_len - len);
00117 break;
00118 case 3:
00119
00120 GetEnvironmentVariableW(L"USERPROFILE", buffer, buffer_len);
00121 break;
00122 default:
00123 break;
00124 }
00125
00126 if (home_env) {
00127
00128 replace_wchar(buffer, L'\\', L'/');
00129
00130 return buffer;
00131 }
00132
00133 return NULL;
00134 }
00135
00136
00137 static inline size_t
00138 remove_invalid_alternative_data(wchar_t *wfullpath, size_t size)
00139 {
00140 static const wchar_t prime[] = L":$DATA";
00141 enum { prime_len = (sizeof(prime) / sizeof(wchar_t)) -1 };
00142
00143 if (size <= prime_len || _wcsnicmp(wfullpath + size - prime_len, prime, prime_len) != 0)
00144 return size;
00145
00146
00147
00148 if (wfullpath[size - (prime_len + 1)] == ':') {
00149
00150 size -= prime_len + 1;
00151 wfullpath[size] = L'\0';
00152 }
00153 else {
00154
00155 wchar_t *pos = wfullpath + size - (prime_len + 1);
00156 while (!IS_DIR_SEPARATOR_P(*pos) && pos != wfullpath) {
00157 if (*pos == L':') {
00158 size -= prime_len;
00159 wfullpath[size] = L'\0';
00160 break;
00161 }
00162 pos--;
00163 }
00164 }
00165 return size;
00166 }
00167
00168
00169 static inline UINT
00170 system_code_page(void)
00171 {
00172 return AreFileApisANSI() ? CP_ACP : CP_OEMCP;
00173 }
00174
00175 void rb_enc_foreach_name(int (*func)(st_data_t name, st_data_t idx, st_data_t arg), st_data_t arg);
00176
00177 static int
00178 code_page_i(st_data_t name, st_data_t idx, st_data_t arg)
00179 {
00180 const char *n = (const char *)name;
00181 if (strncmp("CP", n, 2) == 0) {
00182 int code_page = atoi(n + 2);
00183 if (code_page != 0) {
00184 struct code_page_table *cp = (struct code_page_table *)arg;
00185 unsigned int count = cp->count;
00186 USHORT *table = cp->table;
00187 if (count <= idx) {
00188 unsigned int i = count;
00189 cp->count = count = ((idx + 4) & ~31 | 28);
00190 cp->table = table = realloc(table, count * sizeof(*table));
00191 while (i < count) table[i++] = INVALID_CODE_PAGE;
00192 }
00193 table[idx] = (USHORT)code_page;
00194 }
00195 }
00196 return ST_CONTINUE;
00197 }
00198
00199
00200
00201
00202
00203
00204 static UINT
00205 code_page(rb_encoding *enc)
00206 {
00207 int enc_idx;
00208
00209 if (!enc)
00210 return system_code_page();
00211
00212 enc_idx = rb_enc_to_index(enc);
00213
00214
00215 if (enc_idx == rb_usascii_encindex() || enc_idx == rb_ascii8bit_encindex()) {
00216 return 1252;
00217 }
00218
00219 if (0 <= enc_idx && (unsigned int)enc_idx < rb_code_page.count)
00220 return rb_code_page.table[enc_idx];
00221
00222 return INVALID_CODE_PAGE;
00223 }
00224
00225 #define fix_string_encoding(str, encoding) rb_str_conv_enc((str), (encoding), rb_utf8_encoding())
00226
00227
00228
00229
00230
00231 static inline size_t
00232 replace_to_long_name(wchar_t **wfullpath, size_t size, int heap)
00233 {
00234 WIN32_FIND_DATAW find_data;
00235 HANDLE find_handle;
00236
00237
00238
00239
00240
00241
00242
00243
00244
00245
00246 size_t const max_short_name_size = 8 + 1 + 3;
00247 size_t const max_extension_size = 3;
00248 size_t path_len = 1, extension_len = 0;
00249 wchar_t *pos = *wfullpath;
00250
00251 if (size == 3 && pos[1] == L':' && pos[2] == L'\\' && pos[3] == L'\0') {
00252
00253 return size;
00254 }
00255
00256
00257 if (wcspbrk(pos, L"*?")) {
00258 return size;
00259 }
00260
00261 pos = *wfullpath + size - 1;
00262 while (!IS_DIR_SEPARATOR_P(*pos) && pos != *wfullpath) {
00263 if (!extension_len && *pos == L'.') {
00264 extension_len = path_len - 1;
00265 }
00266 if (path_len > max_short_name_size || extension_len > max_extension_size) {
00267 return size;
00268 }
00269 path_len++;
00270 pos--;
00271 }
00272
00273 find_handle = FindFirstFileW(*wfullpath, &find_data);
00274 if (find_handle != INVALID_HANDLE_VALUE) {
00275 size_t trail_pos = wcslen(*wfullpath);
00276 size_t file_len = wcslen(find_data.cFileName);
00277
00278 FindClose(find_handle);
00279 while (trail_pos > 0) {
00280 if (IS_DIR_SEPARATOR_P((*wfullpath)[trail_pos]))
00281 break;
00282 trail_pos--;
00283 }
00284 size = trail_pos + 1 + file_len;
00285 if ((size + 1) > sizeof(*wfullpath) / sizeof((*wfullpath)[0])) {
00286 wchar_t *buf = (wchar_t *)xmalloc((size + 1) * sizeof(wchar_t));
00287 wcsncpy(buf, *wfullpath, trail_pos + 1);
00288 if (heap)
00289 xfree(*wfullpath);
00290 *wfullpath = buf;
00291 }
00292 wcsncpy(*wfullpath + trail_pos + 1, find_data.cFileName, file_len + 1);
00293 }
00294 return size;
00295 }
00296
00297 static inline VALUE
00298 get_user_from_path(wchar_t **wpath, int offset, UINT cp, UINT path_cp, rb_encoding *path_encoding)
00299 {
00300 VALUE result, tmp;
00301 wchar_t *wuser = *wpath + offset;
00302 wchar_t *pos = wuser;
00303 char *user;
00304 size_t size;
00305
00306 while (!IS_DIR_SEPARATOR_P(*pos) && *pos != '\0')
00307 pos++;
00308
00309 *pos = '\0';
00310 convert_wchar_to_mb(wuser, &user, &size, cp);
00311
00312
00313 if (path_cp == INVALID_CODE_PAGE) {
00314 tmp = rb_enc_str_new(user, size, rb_utf8_encoding());
00315 result = rb_str_encode(tmp, rb_enc_from_encoding(path_encoding), 0, Qnil);
00316 rb_str_resize(tmp, 0);
00317 }
00318 else {
00319 result = rb_enc_str_new(user, size, path_encoding);
00320 }
00321
00322 if (user)
00323 xfree(user);
00324
00325 return result;
00326 }
00327
00328 VALUE
00329 rb_file_expand_path_internal(VALUE fname, VALUE dname, int abs_mode, int long_name, VALUE result)
00330 {
00331 size_t size = 0, wpath_len = 0, wdir_len = 0, whome_len = 0;
00332 size_t buffer_len = 0;
00333 char *fullpath = NULL;
00334 wchar_t *wfullpath = NULL, *wpath = NULL, *wpath_pos = NULL;
00335 wchar_t *wdir = NULL, *wdir_pos = NULL;
00336 wchar_t *whome = NULL, *buffer = NULL, *buffer_pos = NULL;
00337 UINT path_cp, cp;
00338 VALUE path = fname, dir = dname;
00339 wchar_t wfullpath_buffer[PATH_BUFFER_SIZE];
00340 wchar_t path_drive = L'\0', dir_drive = L'\0';
00341 int ignore_dir = 0;
00342 rb_encoding *path_encoding;
00343 int tainted = 0;
00344
00345
00346 tainted = OBJ_TAINTED(path);
00347
00348
00349 if (NIL_P(dir)) {
00350 path_encoding = rb_enc_get(path);
00351 }
00352 else {
00353 path_encoding = rb_enc_check(path, dir);
00354 }
00355
00356 cp = path_cp = code_page(path_encoding);
00357
00358
00359 if (path_cp == INVALID_CODE_PAGE) {
00360 cp = CP_UTF8;
00361 if (!NIL_P(path)) {
00362 path = fix_string_encoding(path, path_encoding);
00363 }
00364 }
00365
00366
00367 if (!NIL_P(path)) {
00368 convert_mb_to_wchar(RSTRING_PTR(path), &wpath, &wpath_len, cp);
00369 wpath_pos = wpath;
00370 }
00371
00372
00373
00374 if (abs_mode == 0 && wpath_len > 0 && wpath_pos[0] == L'~' &&
00375 (wpath_len == 1 || IS_DIR_SEPARATOR_P(wpath_pos[1]))) {
00376
00377 tainted = 1;
00378
00379 whome = home_dir();
00380 if (whome == NULL) {
00381 xfree(wpath);
00382 rb_raise(rb_eArgError, "couldn't find HOME environment -- expanding `~'");
00383 }
00384 whome_len = wcslen(whome);
00385
00386 if (PathIsRelativeW(whome) && !(whome_len >= 2 && IS_DIR_UNC_P(whome))) {
00387 xfree(wpath);
00388 xfree(whome);
00389 rb_raise(rb_eArgError, "non-absolute home");
00390 }
00391
00392 if (path_cp == INVALID_CODE_PAGE || rb_enc_str_asciionly_p(path)) {
00393
00394 path_encoding = rb_filesystem_encoding();
00395 cp = path_cp = system_code_page();
00396 }
00397
00398
00399 ignore_dir = 1;
00400
00401
00402 wpath_pos++;
00403 wpath_len--;
00404
00405
00406 if (wpath_len && IS_DIR_SEPARATOR_P(wpath_pos[0])) {
00407 wpath_pos++;
00408 wpath_len--;
00409 }
00410 }
00411 else if (wpath_len >= 2 && wpath_pos[1] == L':') {
00412 if (wpath_len >= 3 && IS_DIR_SEPARATOR_P(wpath_pos[2])) {
00413
00414 ignore_dir = 1;
00415 }
00416 else {
00417
00418 path_drive = wpath_pos[0];
00419 wpath_pos += 2;
00420 wpath_len -= 2;
00421 }
00422 }
00423 else if (abs_mode == 0 && wpath_len >= 2 && wpath_pos[0] == L'~') {
00424 result = get_user_from_path(&wpath_pos, 1, cp, path_cp, path_encoding);
00425
00426 if (wpath)
00427 xfree(wpath);
00428
00429 rb_raise(rb_eArgError, "can't find user %s", StringValuePtr(result));
00430 }
00431
00432
00433 if (!ignore_dir && !NIL_P(dir)) {
00434
00435 if (path_cp == INVALID_CODE_PAGE) {
00436 dir = fix_string_encoding(dir, path_encoding);
00437 }
00438
00439
00440 if (!NIL_P(dir)) {
00441 convert_mb_to_wchar(RSTRING_PTR(dir), &wdir, &wdir_len, cp);
00442 wdir_pos = wdir;
00443 }
00444
00445 if (abs_mode == 0 && wdir_len > 0 && wdir_pos[0] == L'~' &&
00446 (wdir_len == 1 || IS_DIR_SEPARATOR_P(wdir_pos[1]))) {
00447
00448 tainted = 1;
00449
00450 whome = home_dir();
00451 if (whome == NULL) {
00452 xfree(wpath);
00453 xfree(wdir);
00454 rb_raise(rb_eArgError, "couldn't find HOME environment -- expanding `~'");
00455 }
00456 whome_len = wcslen(whome);
00457
00458 if (PathIsRelativeW(whome) && !(whome_len >= 2 && IS_DIR_UNC_P(whome))) {
00459 xfree(wpath);
00460 xfree(wdir);
00461 xfree(whome);
00462 rb_raise(rb_eArgError, "non-absolute home");
00463 }
00464
00465
00466 wdir_pos++;
00467 wdir_len--;
00468
00469
00470 if (wdir_len && IS_DIR_SEPARATOR_P(wdir_pos[0])) {
00471 wdir_pos++;
00472 wdir_len--;
00473 }
00474 }
00475 else if (wdir_len >= 2 && wdir[1] == L':') {
00476 dir_drive = wdir[0];
00477 if (wpath_len && IS_DIR_SEPARATOR_P(wpath_pos[0])) {
00478 wdir_len = 2;
00479 }
00480 }
00481 else if (wdir_len >= 2 && IS_DIR_UNC_P(wdir)) {
00482
00483 if (wpath_len && IS_DIR_SEPARATOR_P(wpath_pos[0])) {
00484
00485 size_t separators = 0;
00486 size_t pos = 2;
00487 while (pos < wdir_len && separators < 2) {
00488 if (IS_DIR_SEPARATOR_P(wdir[pos])) {
00489 separators++;
00490 }
00491 pos++;
00492 }
00493 if (separators == 2)
00494 wdir_len = pos - 1;
00495 }
00496 }
00497 else if (abs_mode == 0 && wdir_len >= 2 && wdir_pos[0] == L'~') {
00498 result = get_user_from_path(&wdir_pos, 1, cp, path_cp, path_encoding);
00499 if (wpath)
00500 xfree(wpath);
00501
00502 if (wdir)
00503 xfree(wdir);
00504
00505 rb_raise(rb_eArgError, "can't find user %s", StringValuePtr(result));
00506 }
00507 }
00508
00509
00510 if (!ignore_dir && path_drive && dir_drive) {
00511 if (towupper(path_drive) != towupper(dir_drive)) {
00512
00513 ignore_dir = 1;
00514 wdir_len = 0;
00515 dir_drive = 0;
00516 }
00517 }
00518
00519 if (!ignore_dir && wpath_len >= 2 && IS_DIR_UNC_P(wpath)) {
00520
00521 ignore_dir = 1;
00522 wdir_len = 0;
00523 }
00524 else if (!ignore_dir && wpath_len >= 1 && IS_DIR_SEPARATOR_P(wpath[0]) &&
00525 !dir_drive && !(wdir_len >= 2 && IS_DIR_UNC_P(wdir))) {
00526
00527 ignore_dir = 1;
00528 wdir_len = 0;
00529 }
00530
00531 buffer_len = wpath_len + 1 + wdir_len + 1 + whome_len + 1;
00532
00533 buffer = buffer_pos = (wchar_t *)xmalloc((buffer_len + 1) * sizeof(wchar_t));
00534
00535
00536 if (whome_len) {
00537 wcsncpy(buffer_pos, whome, whome_len);
00538 buffer_pos += whome_len;
00539 }
00540
00541
00542 if (whome_len && wcsrchr(L"\\/:", buffer_pos[-1]) == NULL) {
00543 buffer_pos[0] = L'\\';
00544 buffer_pos++;
00545 }
00546 else if (!dir_drive && path_drive) {
00547 *buffer_pos++ = path_drive;
00548 *buffer_pos++ = L':';
00549 }
00550
00551 if (wdir_len) {
00552
00553 if (!tainted && OBJ_TAINTED(dir))
00554 tainted = 1;
00555
00556 wcsncpy(buffer_pos, wdir_pos, wdir_len);
00557 buffer_pos += wdir_len;
00558 }
00559
00560
00561 if (wdir_len && wcsrchr(L"\\/:", buffer_pos[-1]) == NULL) {
00562 buffer_pos[0] = L'\\';
00563 buffer_pos++;
00564 }
00565
00566
00567 if (wpath_len) {
00568 wcsncpy(buffer_pos, wpath_pos, wpath_len);
00569 buffer_pos += wpath_len;
00570 }
00571
00572
00573 if (wpath_len == 0) {
00574 buffer_pos[0] = L'.';
00575 buffer_pos++;
00576 }
00577
00578
00579 buffer_pos[0] = L'\0';
00580
00581
00582 if (!tainted && PathIsRelativeW(buffer) && !(buffer_len >= 2 && IS_DIR_UNC_P(buffer)))
00583 tainted = 1;
00584
00585
00586
00587 size = GetFullPathNameW(buffer, PATH_BUFFER_SIZE, wfullpath_buffer, NULL);
00588 if (size > PATH_BUFFER_SIZE) {
00589
00590 wfullpath = (wchar_t *)xmalloc(size * sizeof(wchar_t));
00591 size = GetFullPathNameW(buffer, size, wfullpath, NULL);
00592 }
00593 else {
00594 wfullpath = wfullpath_buffer;
00595 }
00596
00597
00598 if (IS_DIR_SEPARATOR_P(wfullpath[size - 1]) &&
00599 wfullpath[size - 2] != L':' &&
00600 !(size == 2 && IS_DIR_UNC_P(wfullpath))) {
00601 size -= 1;
00602 wfullpath[size] = L'\0';
00603 }
00604
00605
00606 if (wfullpath[size - 1] == L'.') {
00607 size -= 1;
00608 wfullpath[size] = L'\0';
00609 }
00610
00611
00612 size = remove_invalid_alternative_data(wfullpath, size);
00613
00614
00615 if (long_name)
00616 size = replace_to_long_name(&wfullpath, size, (wfullpath != wfullpath_buffer));
00617
00618
00619 replace_wchar(wfullpath, L'\\', L'/');
00620
00621
00622 size = WideCharToMultiByte(cp, 0, wfullpath, size, NULL, 0, NULL, NULL);
00623 if (size > (size_t)RSTRING_LEN(result)) {
00624 rb_str_modify(result);
00625 rb_str_resize(result, size);
00626 }
00627
00628 WideCharToMultiByte(cp, 0, wfullpath, size, RSTRING_PTR(result), size, NULL, NULL);
00629 rb_str_set_len(result, size);
00630
00631
00632 if (path_cp == INVALID_CODE_PAGE) {
00633 VALUE tmp;
00634 size_t len;
00635
00636 rb_enc_associate(result, rb_utf8_encoding());
00637 ENC_CODERANGE_CLEAR(result);
00638 tmp = rb_str_encode(result, rb_enc_from_encoding(path_encoding), 0, Qnil);
00639 len = RSTRING_LEN(tmp);
00640 rb_str_modify(result);
00641 rb_str_resize(result, len);
00642 memcpy(RSTRING_PTR(result), RSTRING_PTR(tmp), len);
00643 rb_str_resize(tmp, 0);
00644 }
00645 rb_enc_associate(result, path_encoding);
00646 ENC_CODERANGE_CLEAR(result);
00647
00648
00649 if (tainted)
00650 OBJ_TAINT(result);
00651
00652
00653 if (buffer)
00654 xfree(buffer);
00655
00656 if (wpath)
00657 xfree(wpath);
00658
00659 if (wdir)
00660 xfree(wdir);
00661
00662 if (whome)
00663 xfree(whome);
00664
00665 if (wfullpath && wfullpath != wfullpath_buffer)
00666 xfree(wfullpath);
00667
00668 if (fullpath)
00669 xfree(fullpath);
00670
00671 return result;
00672 }
00673
00674 static void *
00675 loadopen_func(void *wpath)
00676 {
00677 return (void *)CreateFileW(wpath, GENERIC_READ,
00678 FILE_SHARE_READ | FILE_SHARE_WRITE,
00679 NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL);
00680 }
00681
00682 int
00683 rb_file_load_ok(const char *path)
00684 {
00685 DWORD attr;
00686 int ret = 1;
00687 size_t len;
00688 wchar_t* wpath;
00689
00690 convert_mb_to_wchar(path, &wpath, &len, CP_UTF8);
00691
00692 attr = GetFileAttributesW(wpath);
00693 if (attr == INVALID_FILE_ATTRIBUTES ||
00694 (attr & FILE_ATTRIBUTE_DIRECTORY)) {
00695 ret = 0;
00696 }
00697 else {
00698 HANDLE h = (HANDLE)rb_thread_call_without_gvl(loadopen_func, (void *)wpath,
00699 RUBY_UBF_IO, 0);
00700 if (h != INVALID_HANDLE_VALUE) {
00701 CloseHandle(h);
00702 }
00703 else {
00704 ret = 0;
00705 }
00706 }
00707 xfree(wpath);
00708 return ret;
00709 }
00710
00711 void
00712 Init_w32_codepage(void)
00713 {
00714 if (rb_code_page.count) return;
00715 rb_enc_foreach_name(code_page_i, (st_data_t)&rb_code_page);
00716 }
00717