Skip to content

Commit 26fdb4c

Browse files
authored
Merge branch 'libarchive:master' into rar-encryption
2 parents 38050d6 + 898dc83 commit 26fdb4c

13 files changed

+390
-15
lines changed

Diff for: Makefile.am

+1
Original file line numberDiff line numberDiff line change
@@ -790,6 +790,7 @@ libarchive_test_EXTRA_DIST=\
790790
libarchive/test/test_read_format_7zip_encryption.7z.uu \
791791
libarchive/test/test_read_format_7zip_encryption_header.7z.uu \
792792
libarchive/test/test_read_format_7zip_encryption_partially.7z.uu \
793+
libarchive/test/test_read_format_7zip_extract_second.7z.uu \
793794
libarchive/test/test_read_format_7zip_lzma1.7z.uu \
794795
libarchive/test/test_read_format_7zip_lzma1_2.7z.uu \
795796
libarchive/test/test_read_format_7zip_lzma1_lzma2.7z.uu \

Diff for: libarchive/archive_entry.c

+6
Original file line numberDiff line numberDiff line change
@@ -526,6 +526,12 @@ archive_entry_hardlink_w(struct archive_entry *entry)
526526
return (NULL);
527527
}
528528

529+
int
530+
archive_entry_hardlink_is_set(struct archive_entry *entry)
531+
{
532+
return (entry->ae_set & AE_SET_HARDLINK) != 0;
533+
}
534+
529535
int
530536
_archive_entry_hardlink_l(struct archive_entry *entry,
531537
const char **p, size_t *len, struct archive_string_conv *sc)

Diff for: libarchive/archive_entry.h

+1
Original file line numberDiff line numberDiff line change
@@ -263,6 +263,7 @@ __LA_DECL void archive_entry_set_link_to_hardlink(struct archive_entry *);
263263
__LA_DECL const char *archive_entry_hardlink(struct archive_entry *);
264264
__LA_DECL const char *archive_entry_hardlink_utf8(struct archive_entry *);
265265
__LA_DECL const wchar_t *archive_entry_hardlink_w(struct archive_entry *);
266+
__LA_DECL int archive_entry_hardlink_is_set(struct archive_entry *);
266267
__LA_DECL la_int64_t archive_entry_ino(struct archive_entry *);
267268
__LA_DECL la_int64_t archive_entry_ino64(struct archive_entry *);
268269
__LA_DECL int archive_entry_ino_is_set(struct archive_entry *);

Diff for: libarchive/archive_read_support_format_7zip.c

+6-2
Original file line numberDiff line numberDiff line change
@@ -1063,7 +1063,7 @@ ppmd_read(void *p)
10631063
ssize_t bytes_avail = 0;
10641064
const uint8_t* data = __archive_read_ahead(a,
10651065
(size_t)zip->ppstream.stream_in+1, &bytes_avail);
1066-
if(bytes_avail < zip->ppstream.stream_in+1) {
1066+
if(data == NULL || bytes_avail < zip->ppstream.stream_in+1) {
10671067
archive_set_error(&a->archive,
10681068
ARCHIVE_ERRNO_FILE_FORMAT,
10691069
"Truncated 7z file data");
@@ -3462,7 +3462,7 @@ read_stream(struct archive_read *a, const void **buff, size_t size,
34623462
/*
34633463
* Skip the bytes we already has skipped in skip_stream().
34643464
*/
3465-
while (skip_bytes) {
3465+
while (1) {
34663466
ssize_t skipped;
34673467

34683468
if (zip->uncompressed_buffer_bytes_remaining == 0) {
@@ -3482,6 +3482,10 @@ read_stream(struct archive_read *a, const void **buff, size_t size,
34823482
return (ARCHIVE_FATAL);
34833483
}
34843484
}
3485+
3486+
if (!skip_bytes)
3487+
break;
3488+
34853489
skipped = get_uncompressed_data(
34863490
a, buff, (size_t)skip_bytes, 0);
34873491
if (skipped < 0)

Diff for: libarchive/archive_string.c

+73-11
Original file line numberDiff line numberDiff line change
@@ -3874,6 +3874,30 @@ archive_mstring_get_utf8(struct archive *a, struct archive_mstring *aes,
38743874
}
38753875

38763876
*p = NULL;
3877+
#if defined(_WIN32) && !defined(__CYGWIN__)
3878+
/*
3879+
* On Windows, first try converting from WCS because (1) there's no
3880+
* guarantee that the conversion to MBS will succeed, e.g. when using
3881+
* CP_ACP, and (2) that's more efficient than converting to MBS, just to
3882+
* convert back to WCS again before finally converting to UTF-8
3883+
*/
3884+
if ((aes->aes_set & AES_SET_WCS) != 0) {
3885+
sc = archive_string_conversion_to_charset(a, "UTF-8", 1);
3886+
if (sc == NULL)
3887+
return (-1);/* Couldn't allocate memory for sc. */
3888+
archive_string_empty(&(aes->aes_utf8));
3889+
r = archive_string_append_from_wcs_in_codepage(&(aes->aes_utf8),
3890+
aes->aes_wcs.s, aes->aes_wcs.length, sc);
3891+
if (a == NULL)
3892+
free_sconv_object(sc);
3893+
if (r == 0) {
3894+
aes->aes_set |= AES_SET_UTF8;
3895+
*p = aes->aes_utf8.s;
3896+
return (0);/* success. */
3897+
} else
3898+
return (-1);/* failure. */
3899+
}
3900+
#endif
38773901
/* Try converting WCS to MBS first if MBS does not exist yet. */
38783902
if ((aes->aes_set & AES_SET_MBS) == 0) {
38793903
const char *pm; /* unused */
@@ -3958,6 +3982,32 @@ archive_mstring_get_wcs(struct archive *a, struct archive_mstring *aes,
39583982
}
39593983

39603984
*wp = NULL;
3985+
#if defined(_WIN32) && !defined(__CYGWIN__)
3986+
/*
3987+
* On Windows, prefer converting from UTF-8 directly to WCS because:
3988+
* (1) there's no guarantee that the string can be represented in MBS (e.g.
3989+
* with CP_ACP), and (2) in order to convert from UTF-8 to MBS, we're going
3990+
* to need to convert from UTF-8 to WCS anyway and its wasteful to throw
3991+
* away that intermediate result
3992+
*/
3993+
if (aes->aes_set & AES_SET_UTF8) {
3994+
struct archive_string_conv *sc;
3995+
3996+
sc = archive_string_conversion_from_charset(a, "UTF-8", 1);
3997+
if (sc != NULL) {
3998+
archive_wstring_empty((&aes->aes_wcs));
3999+
r = archive_wstring_append_from_mbs_in_codepage(&(aes->aes_wcs),
4000+
aes->aes_utf8.s, aes->aes_utf8.length, sc);
4001+
if (a == NULL)
4002+
free_sconv_object(sc);
4003+
if (r == 0) {
4004+
aes->aes_set |= AES_SET_WCS;
4005+
*wp = aes->aes_wcs.s;
4006+
return (0);
4007+
}
4008+
}
4009+
}
4010+
#endif
39614011
/* Try converting UTF8 to MBS first if MBS does not exist yet. */
39624012
if ((aes->aes_set & AES_SET_MBS) == 0) {
39634013
const char *p; /* unused */
@@ -4211,21 +4261,31 @@ archive_mstring_update_utf8(struct archive *a, struct archive_mstring *aes,
42114261

42124262
aes->aes_set = AES_SET_UTF8; /* Only UTF8 is set now. */
42134263

4214-
/* Try converting UTF-8 to MBS, return false on failure. */
42154264
sc = archive_string_conversion_from_charset(a, "UTF-8", 1);
42164265
if (sc == NULL)
42174266
return (-1);/* Couldn't allocate memory for sc. */
4218-
r = archive_strcpy_l(&(aes->aes_mbs), utf8, sc);
42194267

42204268
#if defined(_WIN32) && !defined(__CYGWIN__)
4221-
/* On failure, make an effort to convert UTF8 to WCS as the active code page
4222-
* may not be able to represent all characters in the string */
4223-
if (r != 0) {
4224-
if (archive_wstring_append_from_mbs_in_codepage(&(aes->aes_wcs),
4225-
aes->aes_utf8.s, aes->aes_utf8.length, sc) == 0)
4226-
aes->aes_set = AES_SET_UTF8 | AES_SET_WCS;
4227-
}
4228-
#endif
4269+
/* On Windows, there's no good way to convert from UTF8 -> MBS directly, so
4270+
* prefer to first convert to WCS as (1) it's wasteful to throw away the
4271+
* intermediate result, and (2) WCS will still be set even if we fail to
4272+
* convert to MBS (e.g. with ACP that can't represent the characters) */
4273+
r = archive_wstring_append_from_mbs_in_codepage(&(aes->aes_wcs),
4274+
aes->aes_utf8.s, aes->aes_utf8.length, sc);
4275+
4276+
if (a == NULL)
4277+
free_sconv_object(sc);
4278+
if (r != 0)
4279+
return (-1); /* This will guarantee we can't convert to MBS */
4280+
aes->aes_set = AES_SET_UTF8 | AES_SET_WCS; /* Both UTF8 and WCS set. */
4281+
4282+
/* Try converting WCS to MBS, return false on failure. */
4283+
if (archive_string_append_from_wcs(&(aes->aes_mbs), aes->aes_wcs.s,
4284+
aes->aes_wcs.length))
4285+
return (-1);
4286+
#else
4287+
/* Try converting UTF-8 to MBS, return false on failure. */
4288+
r = archive_strcpy_l(&(aes->aes_mbs), utf8, sc);
42294289

42304290
if (a == NULL)
42314291
free_sconv_object(sc);
@@ -4237,8 +4297,10 @@ archive_mstring_update_utf8(struct archive *a, struct archive_mstring *aes,
42374297
if (archive_wstring_append_from_mbs(&(aes->aes_wcs), aes->aes_mbs.s,
42384298
aes->aes_mbs.length))
42394299
return (-1);
4240-
aes->aes_set = AES_SET_UTF8 | AES_SET_WCS | AES_SET_MBS;
4300+
#endif
42414301

42424302
/* All conversions succeeded. */
4303+
aes->aes_set = AES_SET_UTF8 | AES_SET_WCS | AES_SET_MBS;
4304+
42434305
return (0);
42444306
}

Diff for: libarchive/archive_windows.h

+6-1
Original file line numberDiff line numberDiff line change
@@ -292,12 +292,17 @@ typedef int mbstate_t;
292292
size_t wcrtomb(char *, wchar_t, mbstate_t *);
293293
#endif
294294

295-
#if defined(_MSC_VER) && _MSC_VER < 1300
295+
#if !WINAPI_FAMILY_PARTITION (WINAPI_PARTITION_DESKTOP) && NTDDI_VERSION < NTDDI_WIN10_VB
296+
// not supported in UWP SDK before 20H1
297+
#define GetVolumePathNameW(f, v, c) (0)
298+
#elif defined(_MSC_VER) && _MSC_VER < 1300
296299
WINBASEAPI BOOL WINAPI GetVolumePathNameW(
297300
LPCWSTR lpszFileName,
298301
LPWSTR lpszVolumePathName,
299302
DWORD cchBufferLength
300303
);
304+
#endif
305+
#if defined(_MSC_VER) && _MSC_VER < 1300
301306
# if _WIN32_WINNT < 0x0500 /* windows.h not providing 0x500 API */
302307
typedef struct _FILE_ALLOCATED_RANGE_BUFFER {
303308
LARGE_INTEGER FileOffset;

Diff for: libarchive/archive_write_set_format_cpio_binary.c

+3
Original file line numberDiff line numberDiff line change
@@ -577,6 +577,9 @@ archive_write_binary_close(struct archive_write *a)
577577
struct archive_entry *trailer;
578578

579579
trailer = archive_entry_new2(NULL);
580+
if (trailer == NULL) {
581+
return ARCHIVE_FATAL;
582+
}
580583
/* nlink = 1 here for GNU cpio compat. */
581584
archive_entry_set_nlink(trailer, 1);
582585
archive_entry_set_size(trailer, 0);

Diff for: libarchive/archive_write_set_format_cpio_odc.c

+3
Original file line numberDiff line numberDiff line change
@@ -467,6 +467,9 @@ archive_write_odc_close(struct archive_write *a)
467467
struct archive_entry *trailer;
468468

469469
trailer = archive_entry_new2(NULL);
470+
if (trailer == NULL) {
471+
return ARCHIVE_FATAL;
472+
}
470473
/* nlink = 1 here for GNU cpio compat. */
471474
archive_entry_set_nlink(trailer, 1);
472475
archive_entry_set_size(trailer, 0);

Diff for: libarchive/archive_write_set_format_gnutar.c

+1-1
Original file line numberDiff line numberDiff line change
@@ -523,7 +523,7 @@ archive_write_gnutar_header(struct archive_write *a,
523523
goto exit_write_header;
524524
}
525525

526-
if (archive_entry_hardlink(entry) != NULL) {
526+
if (archive_entry_hardlink_is_set(entry)) {
527527
tartype = '1';
528528
} else
529529
switch (archive_entry_filetype(entry)) {

Diff for: libarchive/test/test_archive_string_conversion.c

+135
Original file line numberDiff line numberDiff line change
@@ -882,3 +882,138 @@ DEFINE_TEST(test_archive_string_conversion)
882882
test_archive_string_canonicalization();
883883
test_archive_string_set_get();
884884
}
885+
886+
DEFINE_TEST(test_archive_string_conversion_utf16_utf8)
887+
{
888+
#if !defined(_WIN32) || defined(__CYGWIN__)
889+
skipping("This test is meant to verify unicode string handling on Windows");
890+
#else
891+
struct archive_mstring mstr;
892+
const char* utf8_string;
893+
894+
memset(&mstr, 0, sizeof(mstr));
895+
896+
assertEqualInt(ARCHIVE_OK,
897+
archive_mstring_copy_wcs(&mstr, L"\U0000043f\U00000440\U00000438"));
898+
899+
/* Conversion from WCS to UTF-8 should always succeed */
900+
assertEqualInt(ARCHIVE_OK,
901+
archive_mstring_get_utf8(NULL, &mstr, &utf8_string));
902+
assertEqualString("\xD0\xBF\xD1\x80\xD0\xB8", utf8_string);
903+
904+
archive_mstring_clean(&mstr);
905+
#endif
906+
}
907+
908+
DEFINE_TEST(test_archive_string_conversion_utf8_utf16)
909+
{
910+
#if !defined(_WIN32) || defined(__CYGWIN__)
911+
skipping("This test is meant to verify unicode string handling on Windows");
912+
#else
913+
struct archive_mstring mstr;
914+
const wchar_t* wcs_string;
915+
916+
memset(&mstr, 0, sizeof(mstr));
917+
918+
assertEqualInt(6,
919+
archive_mstring_copy_utf8(&mstr, "\xD0\xBF\xD1\x80\xD0\xB8"));
920+
921+
/* Conversion from UTF-8 to WCS should always succeed */
922+
assertEqualInt(ARCHIVE_OK,
923+
archive_mstring_get_wcs(NULL, &mstr, &wcs_string));
924+
assertEqualWString(L"\U0000043f\U00000440\U00000438", wcs_string);
925+
926+
archive_mstring_clean(&mstr);
927+
#endif
928+
}
929+
930+
DEFINE_TEST(test_archive_string_update_utf8_win)
931+
{
932+
#if !defined(_WIN32) || defined(__CYGWIN__)
933+
skipping("This test is meant to verify unicode string handling on Windows"
934+
" with the C locale");
935+
#else
936+
static const char utf8_string[] = "\xD0\xBF\xD1\x80\xD0\xB8";
937+
static const wchar_t wcs_string[] = L"\U0000043f\U00000440\U00000438";
938+
struct archive_mstring mstr;
939+
int r;
940+
941+
memset(&mstr, 0, sizeof(mstr));
942+
943+
r = archive_mstring_update_utf8(NULL, &mstr, utf8_string);
944+
945+
/* On Windows, this should reliably fail with the C locale */
946+
assertEqualInt(-1, r);
947+
assertEqualInt(0, mstr.aes_set & AES_SET_MBS);
948+
949+
/* NOTE: We access the internals to validate that they were set by the
950+
* 'archive_mstring_update_utf8' function */
951+
/* UTF-8 should always be set */
952+
assertEqualInt(AES_SET_UTF8, mstr.aes_set & AES_SET_UTF8);
953+
assertEqualString(utf8_string, mstr.aes_utf8.s);
954+
/* WCS should always be set as well */
955+
assertEqualInt(AES_SET_WCS, mstr.aes_set & AES_SET_WCS);
956+
assertEqualWString(wcs_string, mstr.aes_wcs.s);
957+
958+
archive_mstring_clean(&mstr);
959+
#endif
960+
}
961+
962+
DEFINE_TEST(test_archive_string_update_utf8_utf8)
963+
{
964+
static const char utf8_string[] = "\xD0\xBF\xD1\x80\xD0\xB8";
965+
static const wchar_t wcs_string[] = L"\U0000043f\U00000440\U00000438";
966+
struct archive_mstring mstr;
967+
int r;
968+
969+
memset(&mstr, 0, sizeof(mstr));
970+
971+
if (setlocale(LC_ALL, "en_US.UTF-8") == NULL) {
972+
skipping("UTF-8 not supported on this system.");
973+
return;
974+
}
975+
976+
r = archive_mstring_update_utf8(NULL, &mstr, utf8_string);
977+
978+
/* All conversions should have succeeded */
979+
assertEqualInt(0, r);
980+
assertEqualInt(AES_SET_MBS | AES_SET_WCS | AES_SET_UTF8, mstr.aes_set);
981+
assertEqualString(utf8_string, mstr.aes_utf8.s);
982+
assertEqualString(utf8_string, mstr.aes_mbs.s);
983+
assertEqualWString(wcs_string, mstr.aes_wcs.s);
984+
985+
archive_mstring_clean(&mstr);
986+
}
987+
988+
DEFINE_TEST(test_archive_string_update_utf8_koi8)
989+
{
990+
static const char utf8_string[] = "\xD0\xBF\xD1\x80\xD0\xB8";
991+
static const char koi8_string[] = "\xD0\xD2\xC9";
992+
static const wchar_t wcs_string[] = L"\U0000043f\U00000440\U00000438";
993+
struct archive_mstring mstr;
994+
int r;
995+
996+
memset(&mstr, 0, sizeof(mstr));
997+
998+
if (setlocale(LC_ALL, "ru_RU.KOI8-R") == NULL) {
999+
skipping("KOI8-R locale not available on this system.");
1000+
return;
1001+
}
1002+
1003+
r = archive_mstring_update_utf8(NULL, &mstr, utf8_string);
1004+
1005+
/* All conversions should have succeeded */
1006+
assertEqualInt(0, r);
1007+
assertEqualInt(AES_SET_MBS | AES_SET_WCS | AES_SET_UTF8, mstr.aes_set);
1008+
assertEqualString(utf8_string, mstr.aes_utf8.s);
1009+
assertEqualString(koi8_string, mstr.aes_mbs.s);
1010+
#if defined(_WIN32) && !defined(__CYGWIN__)
1011+
assertEqualWString(wcs_string, mstr.aes_wcs.s);
1012+
#else
1013+
/* No guarantee of how WCS strings behave, however this test test is
1014+
* primarily meant for Windows */
1015+
(void)wcs_string;
1016+
#endif
1017+
1018+
archive_mstring_clean(&mstr);
1019+
}

0 commit comments

Comments
 (0)