Skip to content

Commit f113c50

Browse files
authored
Merge pull request #71 from emontnemery/filter_full_match
Modify atomic_contents_add to accept a filter function
2 parents c5c02e3 + 198af66 commit f113c50

File tree

2 files changed

+55
-51
lines changed

2 files changed

+55
-51
lines changed

securetar/__init__.py

Lines changed: 24 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
from __future__ import annotations
44

5-
from collections.abc import Generator
5+
from collections.abc import Callable, Generator
66
import hashlib
77
import logging
88
import os
@@ -381,41 +381,45 @@ def secure_path(tar: tarfile.TarFile) -> Generator[tarfile.TarInfo, None, None]:
381381
yield member
382382

383383

384-
def _is_excluded_by_filter(path: PurePath, exclude_list: list[str]) -> bool:
385-
"""Filter to filter excludes."""
386-
387-
for exclude in exclude_list:
388-
if not path.match(exclude):
389-
continue
390-
_LOGGER.debug("Ignoring %s because of %s", path, exclude)
391-
return True
392-
393-
return False
394-
395-
396384
def atomic_contents_add(
397385
tar_file: tarfile.TarFile,
398386
origin_path: Path,
399-
excludes: list[str],
387+
file_filter: Callable[[PurePath], bool],
400388
arcname: str = ".",
401389
) -> None:
402-
"""Append directories and/or files to the TarFile if excludes wont filter."""
390+
"""Append directories and/or files to the TarFile if file_filter returns False.
391+
392+
:param file_filter: A filter function, should return True if the item should
393+
be excluded from the archive. The function should take a single argument, a
394+
pathlib.PurePath object representing the relative path of the item to be archived.
395+
"""
403396

404-
if _is_excluded_by_filter(origin_path, excludes):
397+
if file_filter(PurePath(arcname)):
405398
return None
399+
return _atomic_contents_add(tar_file, origin_path, file_filter, arcname)
400+
401+
402+
def _atomic_contents_add(
403+
tar_file: tarfile.TarFile,
404+
origin_path: Path,
405+
file_filter: Callable[[PurePath], bool],
406+
arcname: str,
407+
) -> None:
408+
"""Append directories and/or files to the TarFile if file_filter returns False."""
406409

407410
# Add directory only (recursive=False) to ensure we also archive empty directories
408411
tar_file.add(origin_path.as_posix(), arcname=arcname, recursive=False)
409412

410413
for directory_item in origin_path.iterdir():
411-
if _is_excluded_by_filter(directory_item, excludes):
414+
item_arcpath = PurePath(arcname, directory_item.name)
415+
if file_filter(PurePath(item_arcpath)):
412416
continue
413417

414-
arcpath = PurePath(arcname, directory_item.name).as_posix()
418+
item_arcname = item_arcpath.as_posix()
415419
if directory_item.is_dir() and not directory_item.is_symlink():
416-
atomic_contents_add(tar_file, directory_item, excludes, arcpath)
420+
_atomic_contents_add(tar_file, directory_item, file_filter, item_arcname)
417421
continue
418422

419-
tar_file.add(directory_item.as_posix(), arcname=arcpath, recursive=False)
423+
tar_file.add(directory_item.as_posix(), arcname=item_arcname, recursive=False)
420424

421425
return None

tests/test_tar.py

Lines changed: 31 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -8,12 +8,11 @@
88
import time
99
from dataclasses import dataclass
1010
from pathlib import Path, PurePath
11-
from unittest.mock import patch
11+
from unittest.mock import Mock, patch
1212
import pytest
1313

1414
from securetar import (
1515
SecureTarFile,
16-
_is_excluded_by_filter,
1716
_add_stream,
1817
atomic_contents_add,
1918
secure_path,
@@ -48,32 +47,33 @@ def test_not_secure_path() -> None:
4847
assert [] == list(secure_path(test_list))
4948

5049

51-
def test_is_excluded_by_filter_good() -> None:
50+
def test_file_filter(tmp_path: Path) -> None:
5251
"""Test exclude filter."""
53-
filter_list = ["not/match", "/dev/xy"]
54-
test_list = [
55-
PurePath("test.txt"),
56-
PurePath("data/xy.blob"),
57-
PurePath("bla/blu/ble"),
58-
PurePath("data/../xy.blob"),
59-
]
60-
61-
for path_object in test_list:
62-
assert _is_excluded_by_filter(path_object, filter_list) is False
63-
64-
65-
def test_is_exclude_by_filter_bad() -> None:
66-
"""Test exclude filter."""
67-
filter_list = ["*.txt", "data/*", "bla/blu/ble"]
68-
test_list = [
69-
PurePath("test.txt"),
70-
PurePath("data/xy.blob"),
71-
PurePath("bla/blu/ble"),
72-
PurePath("data/test_files/kk.txt"),
73-
]
52+
file_filter = Mock(return_value=False)
53+
# Prepare test folder
54+
temp_orig = tmp_path.joinpath("orig")
55+
fixture_data = Path(__file__).parent.joinpath("fixtures/tar_data")
56+
shutil.copytree(fixture_data, temp_orig, symlinks=True)
7457

75-
for path_object in test_list:
76-
assert _is_excluded_by_filter(path_object, filter_list) is True
58+
# Create Tarfile
59+
temp_tar = tmp_path.joinpath("backup.tar")
60+
with SecureTarFile(temp_tar, "w") as tar_file:
61+
atomic_contents_add(
62+
tar_file,
63+
temp_orig,
64+
file_filter=file_filter,
65+
arcname=".",
66+
)
67+
paths = [call[1][0] for call in file_filter.mock_calls]
68+
expected_paths = {
69+
PurePath("."),
70+
PurePath("README.md"),
71+
PurePath("test_symlink"),
72+
PurePath("test1"),
73+
PurePath("test1/script.sh"),
74+
}
75+
assert len(paths) == len(expected_paths)
76+
assert set(paths) == expected_paths
7777

7878

7979
@pytest.mark.parametrize("bufsize", [10240, 4 * 2**20])
@@ -90,7 +90,7 @@ def test_create_pure_tar(tmp_path: Path, bufsize: int) -> None:
9090
atomic_contents_add(
9191
tar_file,
9292
temp_orig,
93-
excludes=[],
93+
file_filter=lambda _: False,
9494
arcname=".",
9595
)
9696

@@ -134,7 +134,7 @@ def test_create_encrypted_tar(tmp_path: Path, bufsize: int) -> None:
134134
atomic_contents_add(
135135
tar_file,
136136
temp_orig,
137-
excludes=[],
137+
file_filter=lambda _: False,
138138
arcname=".",
139139
)
140140

@@ -193,7 +193,7 @@ def test_gzipped_tar_inside_tar(tmp_path: Path) -> None:
193193
atomic_contents_add(
194194
inner_tar_file,
195195
temp_orig,
196-
excludes=[],
196+
file_filter=lambda _: False,
197197
arcname=".",
198198
)
199199

@@ -271,7 +271,7 @@ def test_gzipped_tar_inside_tar_failure(tmp_path: Path) -> None:
271271
atomic_contents_add(
272272
inner_tar_file,
273273
temp_orig,
274-
excludes=[],
274+
file_filter=lambda _: False,
275275
arcname=".",
276276
)
277277
raise ValueError("Test")
@@ -336,7 +336,7 @@ def test_encrypted_gzipped_tar_inside_tar(tmp_path: Path, bufsize: int) -> None:
336336
atomic_contents_add(
337337
inner_tar_file,
338338
temp_orig,
339-
excludes=[],
339+
file_filter=lambda _: False,
340340
arcname=".",
341341
)
342342

0 commit comments

Comments
 (0)