Skip to content

Commit c0b58e6

Browse files
committed
libarchive: merge bugfixes from vendor branch
CTSRD-CHERI#2147 archive_string: clean up strncat_from_utf8_to_utf8 (36047967a) CTSRD-CHERI#2153 archive_match: check archive_read_support_format_raw() return value (0ce1b4c38) CTSRD-CHERI#2154 archive_match: turn counter into flag (287e05d53) CTSRD-CHERI#2155 lha: Do not allow negative file sizes (93b11caed) CTSRD-CHERI#2156 tests: setenv LANG to en_US.UTF-8 in bsdunzip test_I.c (83e8b0ea8) MFC after: 3 days
2 parents b91f295 + ed3e988 commit c0b58e6

File tree

4 files changed

+61
-63
lines changed

4 files changed

+61
-63
lines changed

contrib/libarchive/libarchive/archive_match.c

+9-9
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@
4646

4747
struct match {
4848
struct match *next;
49-
int matches;
49+
int matched;
5050
struct archive_mstring pattern;
5151
};
5252

@@ -605,7 +605,8 @@ add_pattern_from_file(struct archive_match *a, struct match_list *mlist,
605605
return (ARCHIVE_FATAL);
606606
}
607607
r = archive_read_support_format_raw(ar);
608-
r = archive_read_support_format_empty(ar);
608+
if (r == ARCHIVE_OK)
609+
r = archive_read_support_format_empty(ar);
609610
if (r != ARCHIVE_OK) {
610611
archive_copy_error(&(a->archive), ar);
611612
archive_read_free(ar);
@@ -724,12 +725,12 @@ path_excluded(struct archive_match *a, int mbs, const void *pathname)
724725
matched = NULL;
725726
for (match = a->inclusions.first; match != NULL;
726727
match = match->next){
727-
if (match->matches == 0 &&
728+
if (!match->matched &&
728729
(r = match_path_inclusion(a, match, mbs, pathname)) != 0) {
729730
if (r < 0)
730731
return (r);
731732
a->inclusions.unmatched_count--;
732-
match->matches++;
733+
match->matched = 1;
733734
matched = match;
734735
}
735736
}
@@ -752,11 +753,10 @@ path_excluded(struct archive_match *a, int mbs, const void *pathname)
752753
for (match = a->inclusions.first; match != NULL;
753754
match = match->next){
754755
/* We looked at previously-unmatched inclusions already. */
755-
if (match->matches > 0 &&
756+
if (match->matched &&
756757
(r = match_path_inclusion(a, match, mbs, pathname)) != 0) {
757758
if (r < 0)
758759
return (r);
759-
match->matches++;
760760
return (0);
761761
}
762762
}
@@ -879,7 +879,7 @@ match_list_unmatched_inclusions_next(struct archive_match *a,
879879
for (m = list->unmatched_next; m != NULL; m = m->next) {
880880
int r;
881881

882-
if (m->matches)
882+
if (m->matched)
883883
continue;
884884
if (mbs) {
885885
const char *p;
@@ -1793,7 +1793,7 @@ match_owner_name_mbs(struct archive_match *a, struct match_list *list,
17931793
< 0 && errno == ENOMEM)
17941794
return (error_nomem(a));
17951795
if (p != NULL && strcmp(p, name) == 0) {
1796-
m->matches++;
1796+
m->matched = 1;
17971797
return (1);
17981798
}
17991799
}
@@ -1814,7 +1814,7 @@ match_owner_name_wcs(struct archive_match *a, struct match_list *list,
18141814
< 0 && errno == ENOMEM)
18151815
return (error_nomem(a));
18161816
if (p != NULL && wcscmp(p, name) == 0) {
1817-
m->matches++;
1817+
m->matched = 1;
18181818
return (1);
18191819
}
18201820
}

contrib/libarchive/libarchive/archive_read_support_format_lha.c

+2
Original file line numberDiff line numberDiff line change
@@ -1347,6 +1347,8 @@ lha_read_file_extended_header(struct archive_read *a, struct lha *lha,
13471347
lha->compsize = archive_le64dec(extdheader);
13481348
extdheader += sizeof(uint64_t);
13491349
lha->origsize = archive_le64dec(extdheader);
1350+
if (lha->compsize < 0 || lha->origsize < 0)
1351+
goto invalid;
13501352
}
13511353
break;
13521354
case EXT_CODEPAGE:

contrib/libarchive/libarchive/archive_string.c

+42-54
Original file line numberDiff line numberDiff line change
@@ -2640,81 +2640,69 @@ unicode_to_utf16le(char *p, size_t remaining, uint32_t uc)
26402640
}
26412641

26422642
/*
2643-
* Copy UTF-8 string in checking surrogate pair.
2644-
* If any surrogate pair are found, it would be canonicalized.
2643+
* Append new UTF-8 string to existing UTF-8 string.
2644+
* Existing string is assumed to already be in proper form;
2645+
* the new string will have invalid sequences replaced and
2646+
* surrogate pairs canonicalized.
26452647
*/
26462648
static int
2647-
strncat_from_utf8_to_utf8(struct archive_string *as, const void *_p,
2649+
strncat_from_utf8_to_utf8(struct archive_string *as, const void *_src,
26482650
size_t len, struct archive_string_conv *sc)
26492651
{
2650-
const char *s;
2651-
char *p, *endp;
2652-
int n, ret = 0;
2653-
2652+
int ret = 0;
2653+
const char *src = _src;
26542654
(void)sc; /* UNUSED */
26552655

2656+
/* Pre-extend the destination */
26562657
if (archive_string_ensure(as, as->length + len + 1) == NULL)
26572658
return (-1);
26582659

2659-
s = (const char *)_p;
2660-
p = as->s + as->length;
2661-
endp = as->s + as->buffer_length -1;
2662-
do {
2660+
/* Invariant: src points to the first UTF8 byte that hasn't
2661+
* been copied to the destination `as`. */
2662+
for (;;) {
2663+
int n;
26632664
uint32_t uc;
2664-
const char *ss = s;
2665-
size_t w;
2665+
const char *e = src;
26662666

2667-
/*
2668-
* Forward byte sequence until a conversion of that is needed.
2669-
*/
2670-
while ((n = utf8_to_unicode(&uc, s, len)) > 0) {
2671-
s += n;
2667+
/* Skip UTF-8 sequences until we reach end-of-string or
2668+
* a code point that needs conversion. */
2669+
while ((n = utf8_to_unicode(&uc, e, len)) > 0) {
2670+
e += n;
26722671
len -= n;
26732672
}
2674-
if (ss < s) {
2675-
if (p + (s - ss) > endp) {
2676-
as->length = p - as->s;
2677-
if (archive_string_ensure(as,
2678-
as->buffer_length + len + 1) == NULL)
2679-
return (-1);
2680-
p = as->s + as->length;
2681-
endp = as->s + as->buffer_length -1;
2682-
}
2683-
2684-
memcpy(p, ss, s - ss);
2685-
p += s - ss;
2673+
/* Copy the part that doesn't need conversion */
2674+
if (e > src) {
2675+
if (archive_string_append(as, src, e - src) == NULL)
2676+
return (-1);
2677+
src = e;
26862678
}
26872679

2688-
/*
2689-
* If n is negative, current byte sequence needs a replacement.
2690-
*/
2691-
if (n < 0) {
2680+
if (n == 0) {
2681+
/* We reached end-of-string */
2682+
return (ret);
2683+
} else {
2684+
/* Next code point needs conversion */
2685+
char t[4];
2686+
size_t w;
2687+
2688+
/* Try decoding a surrogate pair */
26922689
if (n == -3 && IS_SURROGATE_PAIR_LA(uc)) {
2693-
/* Current byte sequence may be CESU-8. */
2694-
n = cesu8_to_unicode(&uc, s, len);
2690+
n = cesu8_to_unicode(&uc, src, len);
26952691
}
2692+
/* Not a (valid) surrogate, so use a replacement char */
26962693
if (n < 0) {
2697-
ret = -1;
2698-
n *= -1;/* Use a replaced unicode character. */
2699-
}
2700-
2701-
/* Rebuild UTF-8 byte sequence. */
2702-
while ((w = unicode_to_utf8(p, endp - p, uc)) == 0) {
2703-
as->length = p - as->s;
2704-
if (archive_string_ensure(as,
2705-
as->buffer_length + len + 1) == NULL)
2706-
return (-1);
2707-
p = as->s + as->length;
2708-
endp = as->s + as->buffer_length -1;
2694+
ret = -1; /* Return -1 if we used any replacement */
2695+
n *= -1;
27092696
}
2710-
p += w;
2711-
s += n;
2697+
/* Consume converted code point */
2698+
src += n;
27122699
len -= n;
2700+
/* Convert and append new UTF-8 sequence. */
2701+
w = unicode_to_utf8(t, sizeof(t), uc);
2702+
if (archive_string_append(as, t, w) == NULL)
2703+
return (-1);
27132704
}
2714-
} while (n > 0);
2715-
as->length = p - as->s;
2716-
as->s[as->length] = '\0';
2717-
return (ret);
2705+
}
27182706
}
27192707

27202708
static int

contrib/libarchive/unzip/test/test_I.c

+8
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@
3333
DEFINE_TEST(test_I)
3434
{
3535
const char *reffile = "test_I.zip";
36+
const char *lang;
3637
int r;
3738

3839
#if HAVE_SETLOCALE
@@ -44,11 +45,18 @@ DEFINE_TEST(test_I)
4445
skipping("setlocale() not available on this system.");
4546
#endif
4647

48+
lang = getenv("LANG");
49+
setenv("LANG", "en_US.UTF-8", 1);
4750
extract_reference_file(reffile);
4851
r = systemf("%s -I UTF-8 %s >test.out 2>test.err", testprog, reffile);
4952
assertEqualInt(0, r);
5053
assertNonEmptyFile("test.out");
5154
assertEmptyFile("test.err");
5255

5356
assertTextFileContents("Hello, World!\n", "Γειά σου Κόσμε.txt");
57+
58+
if (lang == NULL)
59+
unsetenv("LANG");
60+
else
61+
setenv("LANG", lang, 1);
5462
}

0 commit comments

Comments
 (0)