Skip to content

gh-130819: Update tarfile.py#_create_gnu_long_header to align with GNU Tar #130820

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions Doc/whatsnew/3.14.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1175,6 +1175,15 @@ sysconfig
(Contributed by Xuehai Pan in :gh:`131799`.)


tarfile
-------

* Emit ``mode``, ``uname`` and ``gname`` fields for long paths in
:mod:`tarfile` archives, providing better bit-for-bit compatibility with GNU
``tar(1)``.
(Contributed by Dahan Gong in :gh:`130820`.)


threading
---------

Expand Down
52 changes: 36 additions & 16 deletions Lib/tarfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -258,6 +258,29 @@ def copyfileobj(src, dst, length=None, exception=OSError, bufsize=None):
dst.write(buf)
return

def _get_user_group_names(uid, gid, unames_cache, gnames_cache):
# Calls to pwd.getpwuid() and grp.getgrgid() tend to be expensive. To speed
# things up, cache the resolved usernames and group names.
if pwd:
if uid not in unames_cache:
try:
unames_cache[uid] = pwd.getpwuid(uid)[0]
except KeyError:
unames_cache[uid] = ''
uname = unames_cache[uid]
else:
uname = None
if grp:
if gid not in gnames_cache:
try:
gnames_cache[gid] = grp.getgrgid(gid)[0]
except KeyError:
gnames_cache[gid] = ''
gname = gnames_cache[gid]
else:
gname = None
return uname, gname

def _safe_print(s):
encoding = getattr(sys.stdout, 'encoding', None)
if encoding is not None:
Expand Down Expand Up @@ -883,6 +906,9 @@ class TarInfo(object):
_link_target = None,
)

_unames = {} # Cached mappings of uid=0 -> uname
_gnames = {} # Cached mappings of gid=0 -> gname

def __init__(self, name=""):
"""Construct a TarInfo object. name is the optional name
of the member.
Expand Down Expand Up @@ -1190,6 +1216,10 @@ def _create_gnu_long_header(cls, name, type, encoding, errors):
info["type"] = type
info["size"] = len(name)
info["magic"] = GNU_MAGIC
info["mode"] = 0o100644
uname, gname = _get_user_group_names(0, 0, cls._unames, cls._gnames)
info["uname"] = uname or ""
info["gname"] = gname or ""

# create extended header + name blocks.
return cls._create_header(info, USTAR_FORMAT, encoding, errors) + \
Expand Down Expand Up @@ -2141,22 +2171,12 @@ def gettarinfo(self, name=None, arcname=None, fileobj=None):
tarinfo.type = type
tarinfo.linkname = linkname

# Calls to pwd.getpwuid() and grp.getgrgid() tend to be expensive. To
# speed things up, cache the resolved usernames and group names.
if pwd:
if tarinfo.uid not in self._unames:
try:
self._unames[tarinfo.uid] = pwd.getpwuid(tarinfo.uid)[0]
except KeyError:
self._unames[tarinfo.uid] = ''
tarinfo.uname = self._unames[tarinfo.uid]
if grp:
if tarinfo.gid not in self._gnames:
try:
self._gnames[tarinfo.gid] = grp.getgrgid(tarinfo.gid)[0]
except KeyError:
self._gnames[tarinfo.gid] = ''
tarinfo.gname = self._gnames[tarinfo.gid]
uname, gname = _get_user_group_names(tarinfo.uid, tarinfo.gid,
self._unames, self._gnames)
if uname != None:
tarinfo.uname = uname
if gname != None:
tarinfo.gname = gname

if type in (CHRTYPE, BLKTYPE):
if hasattr(os, "major") and hasattr(os, "minor"):
Expand Down
34 changes: 34 additions & 0 deletions Lib/test/test_tarfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -1878,6 +1878,40 @@ def test_longnamelink_1025(self):
self._test(("longnam/" * 127) + "longname_",
("longlnk/" * 127) + "longlink_")

def test_hidden_header_for_longname(self):
# Regression test for gh-130819.
memory_file = io.BytesIO()
tar = tarfile.open(mode="w", fileobj=memory_file, format=tarfile.GNU_FORMAT)
tar_info = tarfile.TarInfo("abcdef" * 20)
tar_info.type = tarfile.DIRTYPE
tar.addfile(tar_info, None)
tar.close()

class RawTabInfo(tarfile.TarInfo):

def _proc_member(self, tar_file):
if self.type in (tarfile.GNUTYPE_LONGNAME, tarfile.GNUTYPE_LONGLINK):
tester.assertEqual(self.mode, 0o644)
unames_cache = RawTabInfo._unames
gnames_cache = RawTabInfo._gnames
if unames_cache:
tester.assertIn(0, unames_cache)
if gnames_cache:
tester.assertIn(0, gnames_cache)
tester.assertEqual(self.uname, unames_cache.get(0, ""))
tester.assertEqual(self.gname, gnames_cache.get(0, ""))
return super()._proc_member(tar_file) # type: ignore

tester = self
memory_file.seek(0)
tar = tarfile.open(fileobj=memory_file, mode="r", tarinfo=RawTabInfo)
try:
members = tar.getmembers()
self.assertEqual(len(members), 1)
finally:
tar.close()
memory_file.close()


class DeviceHeaderTest(WriteTestBase, unittest.TestCase):

Expand Down
1 change: 1 addition & 0 deletions Misc/ACKS
Original file line number Diff line number Diff line change
Expand Up @@ -665,6 +665,7 @@ Mikhail Golubev
Marta Gómez Macías
Guilherme Gonçalves
Tiago Gonçalves
Dahan Gong
Chris Gonnerman
Shelley Gooch
David Goodger
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
Emit ``mode``, ``uname`` and ``gname`` fields for long paths in
:mod:`tarfile` archives, providing better bit-for-bit compatibility with GNU
``tar(1)``.
Loading