|
1 | 1 | from __future__ import annotations |
2 | 2 |
|
| 3 | +import copy |
3 | 4 | import io |
4 | 5 | import shutil |
5 | 6 | import tarfile |
6 | 7 | from typing import TYPE_CHECKING, BinaryIO |
7 | | -from unittest.mock import patch |
8 | 8 |
|
9 | 9 | from acquire.crypt import EncryptedStream |
10 | 10 | from acquire.outputs.base import Output |
|
17 | 17 | TAR_COMPRESSION_METHODS = {"gzip": "gz", "bzip2": "bz2", "xz": "xz"} |
18 | 18 |
|
19 | 19 |
|
20 | | -def copyfileobj( |
21 | | - src: BinaryIO, dst: BinaryIO, length: int | None = None, _: Exception = OSError, bufsize: int | None = None |
22 | | -) -> None: |
23 | | - """ |
24 | | - Patched version of the copyfileobj function from the Python stdlib (tarfile.py), |
25 | | - to handle cases where the source file is actually shorter than expected. |
26 | | - By patching the missing bytes with zeroes, we avoid raising an exception |
27 | | - and potentially corrupting the tar file. |
28 | | - """ |
29 | | - bufsize = bufsize or 16 * 1024 |
30 | | - if length == 0: |
31 | | - return |
32 | | - if length is None: |
33 | | - shutil.copyfileobj(src, dst, bufsize) |
34 | | - return |
35 | | - |
36 | | - blocks, remainder = divmod(length, bufsize) |
37 | | - for _ in range(blocks): |
38 | | - # Already prevents "long reads" because it reads at max bufsize bytes at a time |
39 | | - buf = src.read(bufsize) |
40 | | - if len(buf) < bufsize: |
41 | | - # raise exception("unexpected end of data") |
42 | | - # PATCH; instead of raising an exception, pad the data to the desired length |
43 | | - buf += b"\x00" * (bufsize - len(buf)) |
44 | | - dst.write(buf) |
45 | | - |
46 | | - if remainder != 0: |
47 | | - # Already prevents "long reads" because it reads at max bufsize bytes at a time |
48 | | - buf = src.read(remainder) |
49 | | - if len(buf) < remainder: |
50 | | - # raise exception("unexpected end of data") |
51 | | - # PATCH; instead of raising an exception, pad the data to the desired length |
52 | | - buf += b"\x00" * (remainder - len(buf)) |
53 | | - dst.write(buf) |
54 | | - return |
55 | | - |
56 | | - |
57 | 20 | class TarOutput(Output): |
58 | 21 | """Tar archive acquire output format. Output can be compressed and/or encrypted. |
59 | 22 |
|
@@ -139,8 +102,54 @@ def write( |
139 | 102 | if stat: |
140 | 103 | info.mtime = stat.st_mtime |
141 | 104 |
|
142 | | - with patch("tarfile.copyfileobj", copyfileobj): |
143 | | - self.tar.addfile(info, fh) |
| 105 | + # Inline version of Python stdlib's tarfile.addfile & tarfile.copyfileobj, |
| 106 | + # to allow for padding and more control over the tar file writing. |
| 107 | + self.tar._check("awx") |
| 108 | + |
| 109 | + if fh is None and info.isreg() and info.size != 0: |
| 110 | + raise ValueError("fileobj not provided for non zero-size regular file") |
| 111 | + |
| 112 | + info = copy.copy(info) |
| 113 | + |
| 114 | + buf = info.tobuf(self.tar.format, self.tar.encoding, self.tar.errors) |
| 115 | + self.tar.fileobj.write(buf) |
| 116 | + self.tar.offset += len(buf) |
| 117 | + bufsize = self.tar.copybufsize |
| 118 | + if fh is not None: |
| 119 | + bufsize = bufsize or 16 * 1024 |
| 120 | + |
| 121 | + if info.size == 0: |
| 122 | + return |
| 123 | + if info.size is None: |
| 124 | + shutil.copyfileobj(fh, self.tar.fileobj, bufsize) |
| 125 | + return |
| 126 | + |
| 127 | + blocks, remainder = divmod(info.size, bufsize) |
| 128 | + for _ in range(blocks): |
| 129 | + # Prevents "long reads" because it reads at max bufsize bytes at a time |
| 130 | + buf = fh.read(bufsize) |
| 131 | + if len(buf) < bufsize: |
| 132 | + # raise exception("unexpected end of data") |
| 133 | + # PATCH; instead of raising an exception, pad the data to the desired length |
| 134 | + buf += tarfile.NUL * (bufsize - len(buf)) |
| 135 | + self.tar.fileobj.write(buf) |
| 136 | + |
| 137 | + if remainder != 0: |
| 138 | + # Prevents "long reads" because it reads at max bufsize bytes at a time |
| 139 | + buf = fh.read(remainder) |
| 140 | + if len(buf) < remainder: |
| 141 | + # raise exception("unexpected end of data") |
| 142 | + # PATCH; instead of raising an exception, pad the data to the desired length |
| 143 | + buf += tarfile.NUL * (remainder - len(buf)) |
| 144 | + self.tar.fileobj.write(buf) |
| 145 | + |
| 146 | + blocks, remainder = divmod(info.size, tarfile.BLOCKSIZE) |
| 147 | + if remainder > 0: |
| 148 | + self.tar.fileobj.write(tarfile.NUL * (tarfile.BLOCKSIZE - remainder)) |
| 149 | + blocks += 1 |
| 150 | + self.tar.offset += blocks * tarfile.BLOCKSIZE |
| 151 | + |
| 152 | + self.tar.members.append(info) |
144 | 153 |
|
145 | 154 | def close(self) -> None: |
146 | 155 | """Closes the tar file.""" |
|
0 commit comments