Skip to content

Commit 9a23b75

Browse files
miss-islingtonsethmlarsone-nomem
authored
[3.11] gh-141707: Skip TarInfo DIRTYPE normalization during GNU long name handling (#145815)
gh-141707: Skip TarInfo DIRTYPE normalization during GNU long name handling (cherry picked from commit 42d754e) Co-authored-by: Seth Michael Larson <seth@python.org> Co-authored-by: Eashwar Ranganathan <eashwar@eashwar.com>
1 parent 69ddd9b commit 9a23b75

4 files changed

Lines changed: 47 additions & 4 deletions

File tree

Lib/tarfile.py

Lines changed: 25 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1245,6 +1245,20 @@ def _create_pax_generic_header(cls, pax_headers, type, encoding):
12451245
@classmethod
12461246
def frombuf(cls, buf, encoding, errors):
12471247
"""Construct a TarInfo object from a 512 byte bytes object.
1248+
1249+
To support the old v7 tar format AREGTYPE headers are
1250+
transformed to DIRTYPE headers if their name ends in '/'.
1251+
"""
1252+
return cls._frombuf(buf, encoding, errors)
1253+
1254+
@classmethod
1255+
def _frombuf(cls, buf, encoding, errors, *, dircheck=True):
1256+
"""Construct a TarInfo object from a 512 byte bytes object.
1257+
1258+
If ``dircheck`` is set to ``True`` then ``AREGTYPE`` headers will
1259+
be normalized to ``DIRTYPE`` if the name ends in a trailing slash.
1260+
``dircheck`` must be set to ``False`` if this function is called
1261+
on a follow-up header such as ``GNUTYPE_LONGNAME``.
12481262
"""
12491263
if len(buf) == 0:
12501264
raise EmptyHeaderError("empty header")
@@ -1275,7 +1289,7 @@ def frombuf(cls, buf, encoding, errors):
12751289

12761290
# Old V7 tar format represents a directory as a regular
12771291
# file with a trailing slash.
1278-
if obj.type == AREGTYPE and obj.name.endswith("/"):
1292+
if dircheck and obj.type == AREGTYPE and obj.name.endswith("/"):
12791293
obj.type = DIRTYPE
12801294

12811295
# The old GNU sparse format occupies some of the unused
@@ -1310,8 +1324,15 @@ def fromtarfile(cls, tarfile):
13101324
"""Return the next TarInfo object from TarFile object
13111325
tarfile.
13121326
"""
1327+
return cls._fromtarfile(tarfile)
1328+
1329+
@classmethod
1330+
def _fromtarfile(cls, tarfile, *, dircheck=True):
1331+
"""
1332+
See dircheck documentation in _frombuf().
1333+
"""
13131334
buf = tarfile.fileobj.read(BLOCKSIZE)
1314-
obj = cls.frombuf(buf, tarfile.encoding, tarfile.errors)
1335+
obj = cls._frombuf(buf, tarfile.encoding, tarfile.errors, dircheck=dircheck)
13151336
obj.offset = tarfile.fileobj.tell() - BLOCKSIZE
13161337
return obj._proc_member(tarfile)
13171338

@@ -1369,7 +1390,7 @@ def _proc_gnulong(self, tarfile):
13691390

13701391
# Fetch the next header and process it.
13711392
try:
1372-
next = self.fromtarfile(tarfile)
1393+
next = self._fromtarfile(tarfile, dircheck=False)
13731394
except HeaderError as e:
13741395
raise SubsequentHeaderError(str(e)) from None
13751396

@@ -1504,7 +1525,7 @@ def _proc_pax(self, tarfile):
15041525

15051526
# Fetch the next header.
15061527
try:
1507-
next = self.fromtarfile(tarfile)
1528+
next = self._fromtarfile(tarfile, dircheck=False)
15081529
except HeaderError as e:
15091530
raise SubsequentHeaderError(str(e)) from None
15101531

Lib/test/test_tarfile.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1105,6 +1105,25 @@ def test_longname_directory(self):
11051105
self.assertIsNotNone(tar.getmember(longdir))
11061106
self.assertIsNotNone(tar.getmember(longdir.removesuffix('/')))
11071107

1108+
def test_longname_file_not_directory(self):
1109+
# Test reading a longname file and ensure it is not handled as a directory
1110+
# Issue #141707
1111+
buf = io.BytesIO()
1112+
with tarfile.open(mode='w', fileobj=buf, format=self.format) as tar:
1113+
ti = tarfile.TarInfo()
1114+
ti.type = tarfile.AREGTYPE
1115+
ti.name = ('a' * 99) + '/' + ('b' * 3)
1116+
tar.addfile(ti)
1117+
1118+
expected = {t.name: t.type for t in tar.getmembers()}
1119+
1120+
buf.seek(0)
1121+
with tarfile.open(mode='r', fileobj=buf) as tar:
1122+
actual = {t.name: t.type for t in tar.getmembers()}
1123+
1124+
self.assertEqual(expected, actual)
1125+
1126+
11081127
class GNUReadTest(LongnameTest, ReadTest, unittest.TestCase):
11091128

11101129
subdir = "gnu"

Misc/ACKS

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1462,6 +1462,7 @@ Dhushyanth Ramasamy
14621462
Ashwin Ramaswami
14631463
Jeff Ramnani
14641464
Bayard Randel
1465+
Eashwar Ranganathan
14651466
Varpu Rantala
14661467
Brodie Rao
14671468
Rémi Rampin
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
Don't change :class:`tarfile.TarInfo` type from ``AREGTYPE`` to ``DIRTYPE`` when parsing
2+
GNU long name or link headers.

0 commit comments

Comments
 (0)