Skip to content

Commit 80b8591

Browse files
gdh1995gongdahan
authored and
gongdahan
committed
Update _create_gnu_long_header to align with GNU Tar
1 parent de9deb7 commit 80b8591

File tree

5 files changed

+83
-16
lines changed

5 files changed

+83
-16
lines changed

Doc/whatsnew/3.14.rst

+9
Original file line numberDiff line numberDiff line change
@@ -1175,6 +1175,15 @@ sysconfig
11751175
(Contributed by Xuehai Pan in :gh:`131799`.)
11761176

11771177

1178+
tarfile
1179+
-------
1180+
1181+
* Emit ``mode``, ``uname`` and ``gname`` fields for long paths in
1182+
:mod:`tarfile` archives, providing better bit-for-bit compatibility with GNU
1183+
``tar(1)``.
1184+
(Contributed by Dahan Gong in :gh:`130820`.)
1185+
1186+
11781187
threading
11791188
---------
11801189

Lib/tarfile.py

+36-16
Original file line numberDiff line numberDiff line change
@@ -258,6 +258,29 @@ def copyfileobj(src, dst, length=None, exception=OSError, bufsize=None):
258258
dst.write(buf)
259259
return
260260

261+
def _get_user_group_names(uid, gid, unames_cache, gnames_cache):
262+
# Calls to pwd.getpwuid() and grp.getgrgid() tend to be expensive. To speed
263+
# things up, cache the resolved usernames and group names.
264+
if pwd:
265+
if uid not in unames_cache:
266+
try:
267+
unames_cache[uid] = pwd.getpwuid(uid)[0]
268+
except KeyError:
269+
unames_cache[uid] = ''
270+
uname = unames_cache[uid]
271+
else:
272+
uname = None
273+
if grp:
274+
if gid not in gnames_cache:
275+
try:
276+
gnames_cache[gid] = grp.getgrgid(gid)[0]
277+
except KeyError:
278+
gnames_cache[gid] = ''
279+
gname = gnames_cache[gid]
280+
else:
281+
gname = None
282+
return uname, gname
283+
261284
def _safe_print(s):
262285
encoding = getattr(sys.stdout, 'encoding', None)
263286
if encoding is not None:
@@ -883,6 +906,9 @@ class TarInfo(object):
883906
_link_target = None,
884907
)
885908

909+
_unames = {} # Cached mappings of uid=0 -> uname
910+
_gnames = {} # Cached mappings of gid=0 -> gname
911+
886912
def __init__(self, name=""):
887913
"""Construct a TarInfo object. name is the optional name
888914
of the member.
@@ -1190,6 +1216,10 @@ def _create_gnu_long_header(cls, name, type, encoding, errors):
11901216
info["type"] = type
11911217
info["size"] = len(name)
11921218
info["magic"] = GNU_MAGIC
1219+
info["mode"] = 0o100644
1220+
uname, gname = _get_user_group_names(0, 0, cls._unames, cls._gnames)
1221+
info["uname"] = uname or ""
1222+
info["gname"] = gname or ""
11931223

11941224
# create extended header + name blocks.
11951225
return cls._create_header(info, USTAR_FORMAT, encoding, errors) + \
@@ -2141,22 +2171,12 @@ def gettarinfo(self, name=None, arcname=None, fileobj=None):
21412171
tarinfo.type = type
21422172
tarinfo.linkname = linkname
21432173

2144-
# Calls to pwd.getpwuid() and grp.getgrgid() tend to be expensive. To
2145-
# speed things up, cache the resolved usernames and group names.
2146-
if pwd:
2147-
if tarinfo.uid not in self._unames:
2148-
try:
2149-
self._unames[tarinfo.uid] = pwd.getpwuid(tarinfo.uid)[0]
2150-
except KeyError:
2151-
self._unames[tarinfo.uid] = ''
2152-
tarinfo.uname = self._unames[tarinfo.uid]
2153-
if grp:
2154-
if tarinfo.gid not in self._gnames:
2155-
try:
2156-
self._gnames[tarinfo.gid] = grp.getgrgid(tarinfo.gid)[0]
2157-
except KeyError:
2158-
self._gnames[tarinfo.gid] = ''
2159-
tarinfo.gname = self._gnames[tarinfo.gid]
2174+
uname, gname = _get_user_group_names(tarinfo.uid, tarinfo.gid,
2175+
self._unames, self._gnames)
2176+
if uname != None:
2177+
tarinfo.uname = uname
2178+
if gname != None:
2179+
tarinfo.gname = gname
21602180

21612181
if type in (CHRTYPE, BLKTYPE):
21622182
if hasattr(os, "major") and hasattr(os, "minor"):

Lib/test/test_tarfile.py

+34
Original file line numberDiff line numberDiff line change
@@ -1878,6 +1878,40 @@ def test_longnamelink_1025(self):
18781878
self._test(("longnam/" * 127) + "longname_",
18791879
("longlnk/" * 127) + "longlink_")
18801880

1881+
def test_hidden_header_for_longname(self):
1882+
# Regression test for gh-130819.
1883+
memory_file = io.BytesIO()
1884+
tar = tarfile.open(mode="w", fileobj=memory_file, format=tarfile.GNU_FORMAT)
1885+
tar_info = tarfile.TarInfo("abcdef" * 20)
1886+
tar_info.type = tarfile.DIRTYPE
1887+
tar.addfile(tar_info, None)
1888+
tar.close()
1889+
1890+
class RawTabInfo(tarfile.TarInfo):
1891+
1892+
def _proc_member(self, tar_file):
1893+
if self.type in (tarfile.GNUTYPE_LONGNAME, tarfile.GNUTYPE_LONGLINK):
1894+
tester.assertEqual(self.mode, 0o644)
1895+
unames_cache = RawTabInfo._unames
1896+
gnames_cache = RawTabInfo._gnames
1897+
if unames_cache:
1898+
tester.assertIn(0, unames_cache)
1899+
if gnames_cache:
1900+
tester.assertIn(0, gnames_cache)
1901+
tester.assertEqual(self.uname, unames_cache.get(0, ""))
1902+
tester.assertEqual(self.gname, gnames_cache.get(0, ""))
1903+
return super()._proc_member(tar_file) # type: ignore
1904+
1905+
tester = self
1906+
memory_file.seek(0)
1907+
tar = tarfile.open(fileobj=memory_file, mode="r", tarinfo=RawTabInfo)
1908+
try:
1909+
members = tar.getmembers()
1910+
self.assertEqual(len(members), 1)
1911+
finally:
1912+
tar.close()
1913+
memory_file.close()
1914+
18811915

18821916
class DeviceHeaderTest(WriteTestBase, unittest.TestCase):
18831917

Misc/ACKS

+1
Original file line numberDiff line numberDiff line change
@@ -665,6 +665,7 @@ Mikhail Golubev
665665
Marta Gómez Macías
666666
Guilherme Gonçalves
667667
Tiago Gonçalves
668+
Dahan Gong
668669
Chris Gonnerman
669670
Shelley Gooch
670671
David Goodger
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
Emit ``mode``, ``uname`` and ``gname`` fields for long paths in
2+
:mod:`tarfile` archives, providing better bit-for-bit compatibility with GNU
3+
``tar(1)``.

0 commit comments

Comments
 (0)