diff --git a/test/test_crawl.py b/test/test_crawl.py index c4d9806be..70ccec352 100644 --- a/test/test_crawl.py +++ b/test/test_crawl.py @@ -66,7 +66,26 @@ def test_browser_profile_coverage(default_params, task_manager_creator): # Extract crawl profile ff_db_tar = browser_params[0].profile_archive_dir / "profile.tar.gz" with tarfile.open(ff_db_tar) as tar: - tar.extractall(browser_params[0].profile_archive_dir) + def is_within_directory(directory, target): + + abs_directory = os.path.abspath(directory) + abs_target = os.path.abspath(target) + + prefix = os.path.commonprefix([abs_directory, abs_target]) + + return prefix == abs_directory + + def safe_extract(tar, path=".", members=None, *, numeric_owner=False): + + for member in tar.getmembers(): + member_path = os.path.join(path, member.name) + if not is_within_directory(path, member_path): + raise Exception("Attempted Path Traversal in Tar File") + + tar.extractall(path, members, numeric_owner=numeric_owner) + + + safe_extract(tar, browser_params["0"].profile_archive_dir) # Output databases ff_db = browser_params[0].profile_archive_dir / "places.sqlite" diff --git a/test/test_profile.py b/test/test_profile.py index bfab82bf9..ba8a19a59 100644 --- a/test/test_profile.py +++ b/test/test_profile.py @@ -262,7 +262,29 @@ def undo_monkeypatch(): # Check that a consistent profile is used for stateful crawls but # not for stateless crawls with tarfile.open(tar_path) as tar: - tar.extractall(tar_directory) + + import os + + def is_within_directory(directory, target): + + abs_directory = os.path.abspath(directory) + abs_target = os.path.abspath(target) + + prefix = os.path.commonprefix([abs_directory, abs_target]) + + return prefix == abs_directory + + def safe_extract(tar, path=".", members=None, *, numeric_owner=False): + + for member in tar.getmembers(): + member_path = os.path.join(path, member.name) + if not is_within_directory(path, member_path): + raise Exception("Attempted Path Traversal in Tar File") + + tar.extractall(path, members, numeric_owner=numeric_owner) + + + safe_extract(tar, tar_directory) ff_db = tar_directory / "places.sqlite" rows = db_utils.query_db(ff_db, "SELECT url FROM moz_places") places = [url for (url,) in rows]