Skip to content

Commit 57a9c5b

Browse files
author
Joost Jansen
committed
Also inline 'addfile' function of tarfile
1 parent c23418a commit 57a9c5b

File tree

2 files changed

+50
-41
lines changed

2 files changed

+50
-41
lines changed

acquire/outputs/tar.py

Lines changed: 49 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
11
from __future__ import annotations
22

3+
import copy
34
import io
45
import shutil
56
import tarfile
67
from typing import TYPE_CHECKING, BinaryIO
7-
from unittest.mock import patch
88

99
from acquire.crypt import EncryptedStream
1010
from acquire.outputs.base import Output
@@ -17,43 +17,6 @@
1717
TAR_COMPRESSION_METHODS = {"gzip": "gz", "bzip2": "bz2", "xz": "xz"}
1818

1919

20-
def copyfileobj(
21-
src: BinaryIO, dst: BinaryIO, length: int | None = None, _: Exception = OSError, bufsize: int | None = None
22-
) -> None:
23-
"""
24-
Patched version of the copyfileobj function from the Python stdlib (tarfile.py),
25-
to handle cases where the source file is actually shorter than expected.
26-
By patching the missing bytes with zeroes, we avoid raising an exception
27-
and potentially corrupting the tar file.
28-
"""
29-
bufsize = bufsize or 16 * 1024
30-
if length == 0:
31-
return
32-
if length is None:
33-
shutil.copyfileobj(src, dst, bufsize)
34-
return
35-
36-
blocks, remainder = divmod(length, bufsize)
37-
for _ in range(blocks):
38-
# Already prevents "long reads" because it reads at max bufsize bytes at a time
39-
buf = src.read(bufsize)
40-
if len(buf) < bufsize:
41-
# raise exception("unexpected end of data")
42-
# PATCH; instead of raising an exception, pad the data to the desired length
43-
buf += b"\x00" * (bufsize - len(buf))
44-
dst.write(buf)
45-
46-
if remainder != 0:
47-
# Already prevents "long reads" because it reads at max bufsize bytes at a time
48-
buf = src.read(remainder)
49-
if len(buf) < remainder:
50-
# raise exception("unexpected end of data")
51-
# PATCH; instead of raising an exception, pad the data to the desired length
52-
buf += b"\x00" * (remainder - len(buf))
53-
dst.write(buf)
54-
return
55-
56-
5720
class TarOutput(Output):
5821
"""Tar archive acquire output format. Output can be compressed and/or encrypted.
5922
@@ -139,8 +102,54 @@ def write(
139102
if stat:
140103
info.mtime = stat.st_mtime
141104

142-
with patch("tarfile.copyfileobj", copyfileobj):
143-
self.tar.addfile(info, fh)
105+
# Inline version of Python stdlib's tarfile.addfile & tarfile.copyfileobj,
106+
# to allow for padding and more control over the tar file writing.
107+
self.tar._check("awx")
108+
109+
if fh is None and info.isreg() and info.size != 0:
110+
raise ValueError("fileobj not provided for non zero-size regular file")
111+
112+
info = copy.copy(info)
113+
114+
buf = info.tobuf(self.tar.format, self.tar.encoding, self.tar.errors)
115+
self.tar.fileobj.write(buf)
116+
self.tar.offset += len(buf)
117+
bufsize = self.tar.copybufsize
118+
if fh is not None:
119+
bufsize = bufsize or 16 * 1024
120+
121+
if info.size == 0:
122+
return
123+
if info.size is None:
124+
shutil.copyfileobj(fh, self.tar.fileobj, bufsize)
125+
return
126+
127+
blocks, remainder = divmod(info.size, bufsize)
128+
for _ in range(blocks):
129+
# Prevents "long reads" because it reads at max bufsize bytes at a time
130+
buf = fh.read(bufsize)
131+
if len(buf) < bufsize:
132+
# raise exception("unexpected end of data")
133+
# PATCH; instead of raising an exception, pad the data to the desired length
134+
buf += tarfile.NUL * (bufsize - len(buf))
135+
self.tar.fileobj.write(buf)
136+
137+
if remainder != 0:
138+
# Prevents "long reads" because it reads at max bufsize bytes at a time
139+
buf = fh.read(remainder)
140+
if len(buf) < remainder:
141+
# raise exception("unexpected end of data")
142+
# PATCH; instead of raising an exception, pad the data to the desired length
143+
buf += tarfile.NUL * (remainder - len(buf))
144+
self.tar.fileobj.write(buf)
145+
146+
blocks, remainder = divmod(info.size, tarfile.BLOCKSIZE)
147+
if remainder > 0:
148+
self.tar.fileobj.write(tarfile.NUL * (tarfile.BLOCKSIZE - remainder))
149+
blocks += 1
150+
self.tar.offset += blocks * tarfile.BLOCKSIZE
151+
152+
self.tar.members.append(info)
144153

145154
def close(self) -> None:
146155
"""Closes the tar file."""

tests/test_outputs_tar.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,7 @@ def read(self, size: int) -> bytes:
8282

8383
content = b"some text"
8484

85-
content_padded = content[:-5] + b"\x00" * 5
85+
content_padded = content[:-5] + tarfile.NUL * 5
8686
file = ShrinkingFile(content)
8787

8888
tar_output = TarOutput(tmp_path / "race.tar", encrypt=True, public_key=public_key)

0 commit comments

Comments
 (0)