Skip to content

Commit 2f752ce

Browse files
committed
Set bit 11 of the zip header flags in minizip if UTF-8.
The bit is set if the file name contains valid UTF-8 and there is at least one code of two or more bytes. If there is a comment, then the comment has to pass the same test for bit 11 to be set.
1 parent 8236296 commit 2f752ce

File tree

1 file changed

+43
-0
lines changed

1 file changed

+43
-0
lines changed

contrib/minizip/zip.c

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1247,6 +1247,46 @@ local int Write_LocalFileHeader(zip64_internal* zi, const char* filename, uInt s
12471247
return err;
12481248
}
12491249

1250+
// Return the length of the UTF-8 code at str[0..len-1] in [1..4], or negative
1251+
// if there is no valid UTF-8 code there. If negative, it is minus the number
1252+
// of bytes examined in order to determine it was bad. Or if minus the return
1253+
// code is one less than len, then at least one more byte than provided would
1254+
// be needed to complete the code.
1255+
local inline int utf8len(unsigned char const *str, size_t len) {
1256+
return
1257+
len == 0 ? -1 : // empty input
1258+
str[0] < 0x80 ? 1 : // good one-byte
1259+
str[0] < 0xc0 ? -1 : // bad first byte
1260+
len < 2 || (str[1] >> 6) != 2 ? -2 : // missing or bad second byte
1261+
str[0] < 0xc2 ? -2 : // overlong code
1262+
str[0] < 0xe0 ? 2 : // good two-byte
1263+
len < 3 || (str[2] >> 6) != 2 ? -3 : // missing or bad third byte
1264+
str[0] == 0xe0 && str[1] < 0xa0 ? -3 : // overlong code
1265+
str[0] < 0xf0 ? 3 : // good three-byte
1266+
len < 4 || (str[3] >> 6) != 2 ? -4 : // missing or bad fourth byte
1267+
str[0] == 0xf0 && str[1] < 0x90 ? -4 : // overlong code
1268+
str[0] < 0xf4 ||
1269+
(str[0] == 0xf4 && str[1] < 0x90) ? 4 : // good four-byte
1270+
-4; // code > 0x10ffff
1271+
}
1272+
1273+
// Return true if str[0..len-1] is valid UTF-8 *and* it contains at least one
1274+
// code of two or more bytes. This is used to determine whether or not to set
1275+
// bit 11 in the zip header flags.
1276+
local int isutf8(char const *str, size_t len) {
1277+
int utf8 = 0;
1278+
while (len) {
1279+
int code = utf8len((unsigned char const *)str, len);
1280+
if (code < 0)
1281+
return 0;
1282+
if (code > 1)
1283+
utf8 = 1;
1284+
str += code;
1285+
len -= code;
1286+
}
1287+
return utf8;
1288+
}
1289+
12501290
/*
12511291
NOTE.
12521292
When writing RAW the ZIP64 extended information in extrafield_local and extrafield_global needs to be stripped
@@ -1333,6 +1373,9 @@ extern int ZEXPORT zipOpenNewFileInZip4_64(zipFile file, const char* filename, c
13331373
zi->ci.flag |= 6;
13341374
if (password != NULL)
13351375
zi->ci.flag |= 1;
1376+
if (isutf8(filename, size_filename) &&
1377+
(size_comment == 0 || isutf8(comment, size_comment)))
1378+
zi->ci.flag |= (1 << 11);
13361379

13371380
zi->ci.crc32 = 0;
13381381
zi->ci.method = method;

0 commit comments

Comments
 (0)