00001 #include "ruby/ruby.h"
00002 #include "ruby/encoding.h"
00003 #include <winbase.h>
00004 #include <wchar.h>
00005 #include <shlwapi.h>
00006
00007 #ifndef INVALID_FILE_ATTRIBUTES
00008 # define INVALID_FILE_ATTRIBUTES ((DWORD)-1)
00009 #endif
00010
00011
00012 static VALUE rb_code_page;
00013
00014 #define IS_DIR_SEPARATOR_P(c) (c == L'\\' || c == L'/')
00015 #define IS_DIR_UNC_P(c) (IS_DIR_SEPARATOR_P(c[0]) && IS_DIR_SEPARATOR_P(c[1]))
00016
00017
00018 #define INVALID_CODE_PAGE 51932
00019 #define PATH_BUFFER_SIZE MAX_PATH * 2
00020
00021 #define insecure_obj_p(obj, level) ((level) >= 4 || ((level) > 0 && OBJ_TAINTED(obj)))
00022
00023 static inline void
00024 replace_wchar(wchar_t *s, int find, int replace)
00025 {
00026 while (*s != 0) {
00027 if (*s == find)
00028 *s = replace;
00029 s++;
00030 }
00031 }
00032
00033
00034 static inline void
00035 convert_mb_to_wchar(VALUE str, wchar_t **wstr, wchar_t **wstr_pos, size_t *wstr_len, UINT code_page)
00036 {
00037 size_t len;
00038
00039 if (NIL_P(str))
00040 return;
00041
00042 len = MultiByteToWideChar(code_page, 0, RSTRING_PTR(str), -1, NULL, 0) + 1;
00043 *wstr = (wchar_t *)xmalloc(len * sizeof(wchar_t));
00044 if (wstr_pos)
00045 *wstr_pos = *wstr;
00046
00047 MultiByteToWideChar(code_page, 0, RSTRING_PTR(str), -1, *wstr, len);
00048 *wstr_len = len - 2;
00049 }
00050
00051 static inline void
00052 convert_wchar_to_mb(const wchar_t *wstr, char **str, size_t *str_len, UINT code_page)
00053 {
00054 size_t len;
00055
00056 len = WideCharToMultiByte(code_page, 0, wstr, -1, NULL, 0, NULL, NULL);
00057 *str = (char *)xmalloc(len * sizeof(char));
00058 WideCharToMultiByte(code_page, 0, wstr, -1, *str, len, NULL, NULL);
00059
00060
00061 *str_len = len - 1;
00062 }
00063
00064
00065
00066
00067
00068
00069
00070
00071
00072 static wchar_t *
00073 home_dir(void)
00074 {
00075 wchar_t *buffer = NULL;
00076 size_t buffer_len = 0, len = 0;
00077 size_t home_env = 0;
00078
00079
00080
00081
00082
00083
00084
00085 if (len = GetEnvironmentVariableW(L"HOME", NULL, 0)) {
00086 buffer_len = len;
00087 home_env = 1;
00088 }
00089 else if (len = GetEnvironmentVariableW(L"HOMEDRIVE", NULL, 0)) {
00090 buffer_len = len;
00091 if (len = GetEnvironmentVariableW(L"HOMEPATH", NULL, 0)) {
00092 buffer_len += len;
00093 home_env = 2;
00094 }
00095 else {
00096 buffer_len = 0;
00097 }
00098 }
00099 else if (len = GetEnvironmentVariableW(L"USERPROFILE", NULL, 0)) {
00100 buffer_len = len;
00101 home_env = 3;
00102 }
00103
00104
00105 if (home_env)
00106 buffer = (wchar_t *)xmalloc(buffer_len * sizeof(wchar_t));
00107
00108 switch (home_env) {
00109 case 1:
00110
00111 GetEnvironmentVariableW(L"HOME", buffer, buffer_len);
00112 break;
00113 case 2:
00114
00115 len = GetEnvironmentVariableW(L"HOMEDRIVE", buffer, buffer_len);
00116 GetEnvironmentVariableW(L"HOMEPATH", buffer + len, buffer_len - len);
00117 break;
00118 case 3:
00119
00120 GetEnvironmentVariableW(L"USERPROFILE", buffer, buffer_len);
00121 break;
00122 default:
00123 break;
00124 }
00125
00126 if (home_env) {
00127
00128 replace_wchar(buffer, L'\\', L'/');
00129
00130 return buffer;
00131 }
00132
00133 return NULL;
00134 }
00135
00136
00137 static inline size_t
00138 remove_invalid_alternative_data(wchar_t *wfullpath, size_t size)
00139 {
00140 static const wchar_t prime[] = L":$DATA";
00141 enum { prime_len = (sizeof(prime) / sizeof(wchar_t)) -1 };
00142
00143 if (size <= prime_len || _wcsnicmp(wfullpath + size - prime_len, prime, prime_len) != 0)
00144 return size;
00145
00146
00147
00148 if (wfullpath[size - (prime_len + 1)] == ':') {
00149
00150 size -= prime_len + 1;
00151 wfullpath[size] = L'\0';
00152 }
00153 else {
00154
00155 wchar_t *pos = wfullpath + size - (prime_len + 1);
00156 while (!IS_DIR_SEPARATOR_P(*pos) && pos != wfullpath) {
00157 if (*pos == L':') {
00158 size -= prime_len;
00159 wfullpath[size] = L'\0';
00160 break;
00161 }
00162 pos--;
00163 }
00164 }
00165 return size;
00166 }
00167
00168
00169 static inline UINT
00170 system_code_page(void)
00171 {
00172 return AreFileApisANSI() ? CP_ACP : CP_OEMCP;
00173 }
00174
00175
00176
00177
00178
00179
00180 static UINT
00181 code_page(rb_encoding *enc)
00182 {
00183 VALUE code_page_value, name_key;
00184 VALUE encoding, names_ary = Qundef, name;
00185 char *enc_name;
00186 struct RString fake_str;
00187 ID names;
00188 long i;
00189
00190 if (!enc)
00191 return system_code_page();
00192
00193 enc_name = (char *)rb_enc_name(enc);
00194
00195 fake_str.basic.flags = T_STRING|RSTRING_NOEMBED;
00196 fake_str.basic.klass = rb_cString;
00197 fake_str.as.heap.len = strlen(enc_name);
00198 fake_str.as.heap.ptr = enc_name;
00199 fake_str.as.heap.aux.capa = fake_str.as.heap.len;
00200 name_key = (VALUE)&fake_str;
00201 ENCODING_CODERANGE_SET(name_key, rb_usascii_encindex(), ENC_CODERANGE_7BIT);
00202
00203 code_page_value = rb_hash_lookup(rb_code_page, name_key);
00204 if (code_page_value != Qnil)
00205 return (UINT)FIX2INT(code_page_value);
00206
00207 name_key = rb_usascii_str_new2(enc_name);
00208
00209 encoding = rb_enc_from_encoding(enc);
00210 if (!NIL_P(encoding)) {
00211 CONST_ID(names, "names");
00212 names_ary = rb_funcall(encoding, names, 0);
00213 }
00214
00215
00216 if (enc == rb_usascii_encoding() || enc == rb_ascii8bit_encoding()) {
00217 UINT code_page = 1252;
00218 rb_hash_aset(rb_code_page, name_key, INT2FIX(code_page));
00219 return code_page;
00220 }
00221
00222 if (names_ary != Qundef) {
00223 for (i = 0; i < RARRAY_LEN(names_ary); i++) {
00224 name = RARRAY_PTR(names_ary)[i];
00225 if (strncmp("CP", RSTRING_PTR(name), 2) == 0) {
00226 int code_page = atoi(RSTRING_PTR(name) + 2);
00227 if (code_page != 0) {
00228 rb_hash_aset(rb_code_page, name_key, INT2FIX(code_page));
00229 return (UINT)code_page;
00230 }
00231 }
00232 }
00233 }
00234
00235 rb_hash_aset(rb_code_page, name_key, INT2FIX(INVALID_CODE_PAGE));
00236 return INVALID_CODE_PAGE;
00237 }
00238
00239 static inline VALUE
00240 fix_string_encoding(VALUE str, rb_encoding *encoding)
00241 {
00242 VALUE result, tmp;
00243
00244 tmp = rb_enc_str_new(RSTRING_PTR(str), RSTRING_LEN(str), encoding);
00245 result = rb_str_encode(tmp, rb_enc_from_encoding(rb_utf8_encoding()), 0, Qnil);
00246
00247 return result;
00248 }
00249
00250
00251
00252
00253
00254 static inline size_t
00255 replace_to_long_name(wchar_t **wfullpath, size_t size, int heap)
00256 {
00257 WIN32_FIND_DATAW find_data;
00258 HANDLE find_handle;
00259
00260
00261
00262
00263
00264
00265
00266
00267
00268
00269 size_t const max_short_name_size = 8 + 1 + 3;
00270 size_t const max_extension_size = 3;
00271 size_t path_len = 1, extension_len = 0;
00272 wchar_t *pos = *wfullpath;
00273
00274 if (size == 3 && pos[1] == L':' && pos[2] == L'\\' && pos[3] == L'\0') {
00275
00276 return size;
00277 }
00278
00279
00280 if (wcspbrk(pos, L"*?")) {
00281 return size;
00282 }
00283
00284 pos = *wfullpath + size - 1;
00285 while (!IS_DIR_SEPARATOR_P(*pos) && pos != *wfullpath) {
00286 if (!extension_len && *pos == L'.') {
00287 extension_len = path_len - 1;
00288 }
00289 if (path_len > max_short_name_size || extension_len > max_extension_size) {
00290 return size;
00291 }
00292 path_len++;
00293 pos--;
00294 }
00295
00296 find_handle = FindFirstFileW(*wfullpath, &find_data);
00297 if (find_handle != INVALID_HANDLE_VALUE) {
00298 size_t trail_pos = wcslen(*wfullpath);
00299 size_t file_len = wcslen(find_data.cFileName);
00300
00301 FindClose(find_handle);
00302 while (trail_pos > 0) {
00303 if (IS_DIR_SEPARATOR_P((*wfullpath)[trail_pos]))
00304 break;
00305 trail_pos--;
00306 }
00307 size = trail_pos + 1 + file_len;
00308 if ((size + 1) > sizeof(*wfullpath) / sizeof((*wfullpath)[0])) {
00309 wchar_t *buf = (wchar_t *)xmalloc((size + 1) * sizeof(wchar_t));
00310 wcsncpy(buf, *wfullpath, trail_pos + 1);
00311 if (heap)
00312 xfree(*wfullpath);
00313 *wfullpath = buf;
00314 }
00315 wcsncpy(*wfullpath + trail_pos + 1, find_data.cFileName, file_len + 1);
00316 }
00317 return size;
00318 }
00319
00320 static inline VALUE
00321 get_user_from_path(wchar_t **wpath, int offset, UINT cp, UINT path_cp, rb_encoding *path_encoding)
00322 {
00323 VALUE result, tmp;
00324 wchar_t *wuser = *wpath + offset;
00325 wchar_t *pos = wuser;
00326 char *user;
00327 size_t size;
00328
00329 while (!IS_DIR_SEPARATOR_P(*pos) && *pos != '\0')
00330 pos++;
00331
00332 *pos = '\0';
00333 convert_wchar_to_mb(wuser, &user, &size, cp);
00334
00335
00336 if (path_cp == INVALID_CODE_PAGE) {
00337 tmp = rb_enc_str_new(user, size, rb_utf8_encoding());
00338 result = rb_str_encode(tmp, rb_enc_from_encoding(path_encoding), 0, Qnil);
00339 rb_str_resize(tmp, 0);
00340 }
00341 else {
00342 result = rb_enc_str_new(user, size, path_encoding);
00343 }
00344
00345 if (user)
00346 xfree(user);
00347
00348 return result;
00349 }
00350
00351 VALUE
00352 rb_file_expand_path_internal(VALUE fname, VALUE dname, int abs_mode, int long_name, VALUE result)
00353 {
00354 size_t size = 0, wpath_len = 0, wdir_len = 0, whome_len = 0;
00355 size_t buffer_len = 0;
00356 char *fullpath = NULL;
00357 wchar_t *wfullpath = NULL, *wpath = NULL, *wpath_pos = NULL;
00358 wchar_t *wdir = NULL, *wdir_pos = NULL;
00359 wchar_t *whome = NULL, *buffer = NULL, *buffer_pos = NULL;
00360 UINT path_cp, cp;
00361 VALUE path = fname, dir = dname;
00362 wchar_t wfullpath_buffer[PATH_BUFFER_SIZE];
00363 wchar_t path_drive = L'\0', dir_drive = L'\0';
00364 int ignore_dir = 0;
00365 rb_encoding *path_encoding;
00366 int tainted = 0;
00367
00368
00369 tainted = OBJ_TAINTED(path);
00370
00371
00372 if (NIL_P(dir)) {
00373 path_encoding = rb_enc_get(path);
00374 }
00375 else {
00376 path_encoding = rb_enc_check(path, dir);
00377 }
00378
00379 cp = path_cp = code_page(path_encoding);
00380
00381
00382 if (path_cp == INVALID_CODE_PAGE) {
00383 cp = CP_UTF8;
00384 if (!NIL_P(path)) {
00385 path = fix_string_encoding(path, path_encoding);
00386 }
00387 }
00388
00389
00390 convert_mb_to_wchar(path, &wpath, &wpath_pos, &wpath_len, cp);
00391
00392
00393
00394 if (abs_mode == 0 && wpath_len > 0 && wpath_pos[0] == L'~' &&
00395 (wpath_len == 1 || IS_DIR_SEPARATOR_P(wpath_pos[1]))) {
00396
00397 tainted = 1;
00398
00399 whome = home_dir();
00400 if (whome == NULL) {
00401 xfree(wpath);
00402 rb_raise(rb_eArgError, "couldn't find HOME environment -- expanding `~'");
00403 }
00404 whome_len = wcslen(whome);
00405
00406 if (PathIsRelativeW(whome) && !(whome_len >= 2 && IS_DIR_UNC_P(whome))) {
00407 xfree(wpath);
00408 xfree(whome);
00409 rb_raise(rb_eArgError, "non-absolute home");
00410 }
00411
00412
00413 path_encoding = rb_filesystem_encoding();
00414 cp = path_cp = system_code_page();
00415
00416
00417 ignore_dir = 1;
00418
00419
00420 wpath_pos++;
00421 wpath_len--;
00422
00423
00424 if (wpath_len && IS_DIR_SEPARATOR_P(wpath_pos[0])) {
00425 wpath_pos++;
00426 wpath_len--;
00427 }
00428 }
00429 else if (wpath_len >= 2 && wpath_pos[1] == L':') {
00430 if (wpath_len >= 3 && IS_DIR_SEPARATOR_P(wpath_pos[2])) {
00431
00432 ignore_dir = 1;
00433 }
00434 else {
00435
00436 path_drive = wpath_pos[0];
00437 }
00438 }
00439 else if (abs_mode == 0 && wpath_len >= 2 && wpath_pos[0] == L'~') {
00440 result = get_user_from_path(&wpath_pos, 1, cp, path_cp, path_encoding);
00441
00442 if (wpath)
00443 xfree(wpath);
00444
00445 rb_raise(rb_eArgError, "can't find user %s", StringValuePtr(result));
00446 }
00447
00448
00449 if (!ignore_dir && !NIL_P(dir)) {
00450
00451 if (path_cp == INVALID_CODE_PAGE) {
00452 dir = fix_string_encoding(dir, path_encoding);
00453 }
00454
00455
00456 convert_mb_to_wchar(dir, &wdir, &wdir_pos, &wdir_len, cp);
00457
00458 if (abs_mode == 0 && wdir_len > 0 && wdir_pos[0] == L'~' &&
00459 (wdir_len == 1 || IS_DIR_SEPARATOR_P(wdir_pos[1]))) {
00460
00461 tainted = 1;
00462
00463 whome = home_dir();
00464 if (whome == NULL) {
00465 xfree(wpath);
00466 xfree(wdir);
00467 rb_raise(rb_eArgError, "couldn't find HOME environment -- expanding `~'");
00468 }
00469 whome_len = wcslen(whome);
00470
00471 if (PathIsRelativeW(whome) && !(whome_len >= 2 && IS_DIR_UNC_P(whome))) {
00472 xfree(wpath);
00473 xfree(wdir);
00474 xfree(whome);
00475 rb_raise(rb_eArgError, "non-absolute home");
00476 }
00477
00478
00479 wdir_pos++;
00480 wdir_len--;
00481
00482
00483 if (wdir_len && IS_DIR_SEPARATOR_P(wdir_pos[0])) {
00484 wdir_pos++;
00485 wdir_len--;
00486 }
00487 }
00488 else if (wdir_len >= 2 && wdir[1] == L':') {
00489 dir_drive = wdir[0];
00490 if (wpath_len && IS_DIR_SEPARATOR_P(wpath_pos[0])) {
00491 wdir_len = 2;
00492 }
00493 }
00494 else if (wdir_len >= 2 && IS_DIR_UNC_P(wdir)) {
00495
00496 if (wpath_len && IS_DIR_SEPARATOR_P(wpath_pos[0])) {
00497
00498 size_t separators = 0;
00499 size_t pos = 2;
00500 while (pos < wdir_len && separators < 2) {
00501 if (IS_DIR_SEPARATOR_P(wdir[pos])) {
00502 separators++;
00503 }
00504 pos++;
00505 }
00506 if (separators == 2)
00507 wdir_len = pos - 1;
00508 }
00509 }
00510 else if (abs_mode == 0 && wdir_len >= 2 && wdir_pos[0] == L'~') {
00511 result = get_user_from_path(&wdir_pos, 1, cp, path_cp, path_encoding);
00512 if (wpath)
00513 xfree(wpath);
00514
00515 if (wdir)
00516 xfree(wdir);
00517
00518 rb_raise(rb_eArgError, "can't find user %s", StringValuePtr(result));
00519 }
00520 }
00521
00522
00523 if (!ignore_dir && path_drive && dir_drive) {
00524 if (towupper(path_drive) == towupper(dir_drive)) {
00525
00526 wpath_pos += 2;
00527 wpath_len -= 2;
00528 }
00529 else {
00530
00531 ignore_dir = 1;
00532 wdir_len = 0;
00533 }
00534 }
00535
00536 if (!ignore_dir && wpath_len >= 2 && IS_DIR_UNC_P(wpath)) {
00537
00538 ignore_dir = 1;
00539 wdir_len = 0;
00540 }
00541 else if (!ignore_dir && wpath_len >= 1 && IS_DIR_SEPARATOR_P(wpath[0]) &&
00542 !dir_drive && !(wdir_len >= 2 && IS_DIR_UNC_P(wdir))) {
00543
00544 ignore_dir = 1;
00545 wdir_len = 0;
00546 }
00547
00548 buffer_len = wpath_len + 1 + wdir_len + 1 + whome_len + 1;
00549
00550 buffer = buffer_pos = (wchar_t *)xmalloc((buffer_len + 1) * sizeof(wchar_t));
00551
00552
00553 if (whome_len) {
00554 wcsncpy(buffer_pos, whome, whome_len);
00555 buffer_pos += whome_len;
00556 }
00557
00558
00559 if (whome_len && wcsrchr(L"\\/:", buffer_pos[-1]) == NULL) {
00560 buffer_pos[0] = L'\\';
00561 buffer_pos++;
00562 }
00563
00564 if (wdir_len) {
00565
00566 if (!tainted && OBJ_TAINTED(dir))
00567 tainted = 1;
00568
00569 wcsncpy(buffer_pos, wdir_pos, wdir_len);
00570 buffer_pos += wdir_len;
00571 }
00572
00573
00574 if (wdir_len && wcsrchr(L"\\/:", buffer_pos[-1]) == NULL) {
00575 buffer_pos[0] = L'\\';
00576 buffer_pos++;
00577 }
00578
00579
00580 if (wpath_len) {
00581 wcsncpy(buffer_pos, wpath_pos, wpath_len);
00582 buffer_pos += wpath_len;
00583 }
00584
00585
00586 if (wpath_len == 0) {
00587 buffer_pos[0] = L'.';
00588 buffer_pos++;
00589 }
00590
00591
00592 buffer_pos[0] = L'\0';
00593
00594
00595 if (!tainted && PathIsRelativeW(buffer) && !(buffer_len >= 2 && IS_DIR_UNC_P(buffer)))
00596 tainted = 1;
00597
00598
00599
00600 size = GetFullPathNameW(buffer, PATH_BUFFER_SIZE, wfullpath_buffer, NULL);
00601 if (size > PATH_BUFFER_SIZE) {
00602
00603 wfullpath = (wchar_t *)xmalloc(size * sizeof(wchar_t));
00604 size = GetFullPathNameW(buffer, size, wfullpath, NULL);
00605 }
00606 else {
00607 wfullpath = wfullpath_buffer;
00608 }
00609
00610
00611 if (IS_DIR_SEPARATOR_P(wfullpath[size - 1]) &&
00612 wfullpath[size - 2] != L':' &&
00613 !(size == 2 && IS_DIR_UNC_P(wfullpath))) {
00614 size -= 1;
00615 wfullpath[size] = L'\0';
00616 }
00617
00618
00619 if (wfullpath[size - 1] == L'.') {
00620 size -= 1;
00621 wfullpath[size] = L'\0';
00622 }
00623
00624
00625 size = remove_invalid_alternative_data(wfullpath, size);
00626
00627
00628 if (long_name)
00629 size = replace_to_long_name(&wfullpath, size, (wfullpath != wfullpath_buffer));
00630
00631
00632 replace_wchar(wfullpath, L'\\', L'/');
00633
00634
00635 size = WideCharToMultiByte(cp, 0, wfullpath, size, NULL, 0, NULL, NULL);
00636 if (size > (size_t)RSTRING_LEN(result)) {
00637 rb_str_modify(result);
00638 rb_str_resize(result, size);
00639 }
00640
00641 WideCharToMultiByte(cp, 0, wfullpath, size, RSTRING_PTR(result), size, NULL, NULL);
00642 rb_str_set_len(result, size);
00643
00644
00645 if (path_cp == INVALID_CODE_PAGE) {
00646 VALUE tmp;
00647 size_t len;
00648
00649 rb_enc_associate(result, rb_utf8_encoding());
00650 ENC_CODERANGE_CLEAR(result);
00651 tmp = rb_str_encode(result, rb_enc_from_encoding(path_encoding), 0, Qnil);
00652 len = RSTRING_LEN(tmp);
00653 rb_str_modify(result);
00654 rb_str_resize(result, len);
00655 memcpy(RSTRING_PTR(result), RSTRING_PTR(tmp), len);
00656 rb_str_resize(tmp, 0);
00657 }
00658 rb_enc_associate(result, path_encoding);
00659 ENC_CODERANGE_CLEAR(result);
00660
00661
00662 if (tainted)
00663 OBJ_TAINT(result);
00664
00665
00666 if (buffer)
00667 xfree(buffer);
00668
00669 if (wpath)
00670 xfree(wpath);
00671
00672 if (wdir)
00673 xfree(wdir);
00674
00675 if (whome)
00676 xfree(whome);
00677
00678 if (wfullpath && wfullpath != wfullpath_buffer)
00679 xfree(wfullpath);
00680
00681 if (fullpath)
00682 xfree(fullpath);
00683
00684 return result;
00685 }
00686
00687 int
00688 rb_file_load_ok(const char *path)
00689 {
00690 int ret = 1;
00691 DWORD attr = GetFileAttributes(path);
00692 if (attr == INVALID_FILE_ATTRIBUTES ||
00693 attr & FILE_ATTRIBUTE_DIRECTORY) {
00694 ret = 0;
00695 }
00696 else {
00697 HANDLE h = CreateFile(path, GENERIC_READ,
00698 FILE_SHARE_READ | FILE_SHARE_WRITE,
00699 NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL);
00700 if (h != INVALID_HANDLE_VALUE) {
00701 CloseHandle(h);
00702 }
00703 else {
00704 ret = 0;
00705 }
00706 }
00707 return ret;
00708 }
00709
00710 void
00711 rb_w32_init_file(void)
00712 {
00713 rb_code_page = rb_hash_new();
00714
00715
00716 rb_gc_register_mark_object(rb_code_page);
00717 }
00718