Skip to content

Commit 8ab25e6

Browse files
Adding tarfile member sanitization to extractall()
1 parent abf10d7 commit 8ab25e6

File tree

2 files changed

+43
-2
lines changed

2 files changed

+43
-2
lines changed

test/test_crawl.py

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,26 @@ def test_browser_profile_coverage(default_params, task_manager_creator):
6666
# Extract crawl profile
6767
ff_db_tar = browser_params[0].profile_archive_dir / "profile.tar.gz"
6868
with tarfile.open(ff_db_tar) as tar:
69-
tar.extractall(browser_params[0].profile_archive_dir)
69+
def is_within_directory(directory, target):
70+
71+
abs_directory = os.path.abspath(directory)
72+
abs_target = os.path.abspath(target)
73+
74+
prefix = os.path.commonprefix([abs_directory, abs_target])
75+
76+
return prefix == abs_directory
77+
78+
def safe_extract(tar, path=".", members=None, *, numeric_owner=False):
79+
80+
for member in tar.getmembers():
81+
member_path = os.path.join(path, member.name)
82+
if not is_within_directory(path, member_path):
83+
raise Exception("Attempted Path Traversal in Tar File")
84+
85+
tar.extractall(path, members, numeric_owner=numeric_owner)
86+
87+
88+
safe_extract(tar, browser_params["0"].profile_archive_dir)
7089

7190
# Output databases
7291
ff_db = browser_params[0].profile_archive_dir / "places.sqlite"

test/test_profile.py

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -261,7 +261,29 @@ def undo_monkeypatch():
261261
# Check that a consistent profile is used for stateful crawls but
262262
# not for stateless crawls
263263
with tarfile.open(tar_path) as tar:
264-
tar.extractall(tar_directory)
264+
265+
import os
266+
267+
def is_within_directory(directory, target):
268+
269+
abs_directory = os.path.abspath(directory)
270+
abs_target = os.path.abspath(target)
271+
272+
prefix = os.path.commonprefix([abs_directory, abs_target])
273+
274+
return prefix == abs_directory
275+
276+
def safe_extract(tar, path=".", members=None, *, numeric_owner=False):
277+
278+
for member in tar.getmembers():
279+
member_path = os.path.join(path, member.name)
280+
if not is_within_directory(path, member_path):
281+
raise Exception("Attempted Path Traversal in Tar File")
282+
283+
tar.extractall(path, members, numeric_owner=numeric_owner)
284+
285+
286+
safe_extract(tar, tar_directory)
265287
ff_db = tar_directory / "places.sqlite"
266288
rows = db_utils.query_db(ff_db, "SELECT url FROM moz_places")
267289
places = [url for (url,) in rows]

0 commit comments

Comments
 (0)