Skip to content

Commit cee6e50

Browse files
committed
Add zip unicode test
1 parent ffa43ae commit cee6e50

File tree

1 file changed

+95
-0
lines changed

1 file changed

+95
-0
lines changed

Diff for: libarchive/test/test_zip_filename_encoding.c

+95
Original file line numberDiff line numberDiff line change
@@ -527,3 +527,98 @@ DEFINE_TEST(test_zip_filename_encoding_CP932)
527527
assertEqualInt(0, buff[7]);
528528
assertEqualMem(buff + 30, "abcABC", 6);
529529
}
530+
531+
DEFINE_TEST(test_zip_filename_encoding_UTF16_win)
532+
{
533+
#if !defined(_WIN32) || defined(__CYGWIN__)
534+
skipping("This test is meant to verify unicode string handling"
535+
" on Windows with UTF-16 names");
536+
return;
537+
#else
538+
struct archive *a;
539+
struct archive_entry *entry;
540+
char buff[4096];
541+
size_t used;
542+
543+
/*
544+
* Don't call setlocale because we're verifying that the '_w' functions
545+
* work as expected when 'hdrcharset' is UTF-8
546+
*/
547+
548+
/* Part 1: file */
549+
a = archive_write_new();
550+
assertEqualInt(ARCHIVE_OK, archive_write_set_format_zip(a));
551+
if (archive_write_set_options(a, "hdrcharset=UTF-8") != ARCHIVE_OK) {
552+
skipping("This system cannot convert character-set"
553+
" from UTF-16 to UTF-8.");
554+
archive_write_free(a);
555+
return;
556+
}
557+
assertEqualInt(ARCHIVE_OK,
558+
archive_write_open_memory(a, buff, sizeof(buff), &used));
559+
560+
entry = archive_entry_new2(a);
561+
/* Set the filename using a UTF-16 string */
562+
archive_entry_copy_pathname_w(entry, L"\u8868.txt");
563+
archive_entry_set_filetype(entry, AE_IFREG);
564+
archive_entry_set_size(entry, 0);
565+
assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
566+
archive_entry_free(entry);
567+
assertEqualInt(ARCHIVE_OK, archive_write_free(a));
568+
569+
/* A bit 11 of general purpose flag should be 1,
570+
* which indicates the filename charset is UTF-8. */
571+
assertEqualInt(0x08, buff[7]);
572+
/* Check UTF-8 version. */
573+
assertEqualMem(buff + 30, "\xE8\xA1\xA8.txt", 7);
574+
575+
/* Part 2: directory */
576+
a = archive_write_new();
577+
assertEqualInt(ARCHIVE_OK, archive_write_set_format_zip(a));
578+
assertEqualInt(ARCHIVE_OK, archive_write_set_options(a, "hdrcharset=UTF-8"));
579+
assertEqualInt(ARCHIVE_OK,
580+
archive_write_open_memory(a, buff, sizeof(buff), &used));
581+
582+
entry = archive_entry_new2(a);
583+
/* Set the directory name using a UTF-16 string */
584+
/* NOTE: Explicitly not adding trailing slash to test that code path */
585+
archive_entry_copy_pathname_w(entry, L"\u8868");
586+
archive_entry_set_filetype(entry, AE_IFDIR);
587+
archive_entry_set_size(entry, 0);
588+
assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
589+
archive_entry_free(entry);
590+
assertEqualInt(ARCHIVE_OK, archive_write_free(a));
591+
592+
/* A bit 11 of general purpose flag should be 1,
593+
* which indicates the filename charset is UTF-8. */
594+
assertEqualInt(0x08, buff[7]);
595+
/* Check UTF-8 version. */
596+
assertEqualMem(buff+ 30, "\xE8\xA1\xA8/", 4);
597+
598+
/* Part 3: symlink */
599+
a = archive_write_new();
600+
assertEqualInt(ARCHIVE_OK, archive_write_set_format_zip(a));
601+
assertEqualInt(ARCHIVE_OK, archive_write_set_options(a, "hdrcharset=UTF-8"));
602+
assertEqualInt(ARCHIVE_OK,
603+
archive_write_open_memory(a, buff, sizeof(buff), &used));
604+
605+
entry = archive_entry_new2(a);
606+
/* Set the symlink target using a UTF-16 string */
607+
archive_entry_set_pathname(entry, "link.txt");
608+
archive_entry_copy_symlink_w(entry, L"\u8868.txt");
609+
archive_entry_set_filetype(entry, AE_IFLNK);
610+
archive_entry_set_symlink_type(entry, AE_SYMLINK_TYPE_FILE);
611+
archive_entry_set_size(entry, 0);
612+
assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
613+
archive_entry_free(entry);
614+
assertEqualInt(ARCHIVE_OK, archive_write_free(a));
615+
616+
/* A bit 11 of general purpose flag should be 0,
617+
* because the file name is ASCII. */
618+
assertEqualInt(0, buff[7]);
619+
/* Check UTF-8 version. */
620+
assertEqualMem(buff + 38, "\xE8\xA1\xA8.txt", 7);
621+
622+
/* NOTE: ZIP does not support hardlinks */
623+
#endif
624+
}

0 commit comments

Comments
 (0)