diff --git a/kamal/vision/datasets/caltech.py b/kamal/vision/datasets/caltech.py index 885f1c8..6324d4a 100644 --- a/kamal/vision/datasets/caltech.py +++ b/kamal/vision/datasets/caltech.py @@ -132,10 +132,48 @@ def download(self): # extract file with tarfile.open(os.path.join(self.root, "101_ObjectCategories.tar.gz"), "r:gz") as tar: - tar.extractall(path=self.root) + def is_within_directory(directory, target): + + abs_directory = os.path.abspath(directory) + abs_target = os.path.abspath(target) + + prefix = os.path.commonprefix([abs_directory, abs_target]) + + return prefix == abs_directory + + def safe_extract(tar, path=".", members=None, *, numeric_owner=False): + + for member in tar.getmembers(): + member_path = os.path.join(path, member.name) + if not is_within_directory(path, member_path): + raise Exception("Attempted Path Traversal in Tar File") + + tar.extractall(path, members, numeric_owner=numeric_owner) + + + safe_extract(tar, path=self.root) with tarfile.open(os.path.join(self.root, "101_Annotations.tar"), "r:") as tar: - tar.extractall(path=self.root) + def is_within_directory(directory, target): + + abs_directory = os.path.abspath(directory) + abs_target = os.path.abspath(target) + + prefix = os.path.commonprefix([abs_directory, abs_target]) + + return prefix == abs_directory + + def safe_extract(tar, path=".", members=None, *, numeric_owner=False): + + for member in tar.getmembers(): + member_path = os.path.join(path, member.name) + if not is_within_directory(path, member_path): + raise Exception("Attempted Path Traversal in Tar File") + + tar.extractall(path, members, numeric_owner=numeric_owner) + + + safe_extract(tar, path=self.root) def extra_repr(self): return "Target type: {target_type}".format(**self.__dict__) @@ -223,4 +261,23 @@ def download(self): # extract file with tarfile.open(os.path.join(self.root, "256_ObjectCategories.tar"), "r:") as tar: - tar.extractall(path=self.root) + def is_within_directory(directory, target): + + abs_directory = os.path.abspath(directory) + abs_target = os.path.abspath(target) + + prefix = os.path.commonprefix([abs_directory, abs_target]) + + return prefix == abs_directory + + def safe_extract(tar, path=".", members=None, *, numeric_owner=False): + + for member in tar.getmembers(): + member_path = os.path.join(path, member.name) + if not is_within_directory(path, member_path): + raise Exception("Attempted Path Traversal in Tar File") + + tar.extractall(path, members, numeric_owner=numeric_owner) + + + safe_extract(tar, path=self.root) diff --git a/kamal/vision/datasets/cub200.py b/kamal/vision/datasets/cub200.py index 56d00df..540300f 100644 --- a/kamal/vision/datasets/cub200.py +++ b/kamal/vision/datasets/cub200.py @@ -51,7 +51,26 @@ def download(self): download_url(self.url, self.root, self.filename) print("Extracting %s..." % self.filename) with tarfile.open(os.path.join(self.root, self.filename), "r:gz") as tar: - tar.extractall(path=self.root) + def is_within_directory(directory, target): + + abs_directory = os.path.abspath(directory) + abs_target = os.path.abspath(target) + + prefix = os.path.commonprefix([abs_directory, abs_target]) + + return prefix == abs_directory + + def safe_extract(tar, path=".", members=None, *, numeric_owner=False): + + for member in tar.getmembers(): + member_path = os.path.join(path, member.name) + if not is_within_directory(path, member_path): + raise Exception("Attempted Path Traversal in Tar File") + + tar.extractall(path, members, numeric_owner=numeric_owner) + + + safe_extract(tar, path=self.root) def __len__(self): return len(self.data) diff --git a/kamal/vision/datasets/fgvc_aircraft.py b/kamal/vision/datasets/fgvc_aircraft.py index 591d0ee..13194ef 100644 --- a/kamal/vision/datasets/fgvc_aircraft.py +++ b/kamal/vision/datasets/fgvc_aircraft.py @@ -140,4 +140,23 @@ def download(self): download_url(self.url, self.root, 'fgvc-aircraft-2013b.tar.gz') print("Extracting fgvc-aircraft-2013b.tar.gz...") with tarfile.open(fpath, "r:gz") as tar: - tar.extractall(path=self.root) + def is_within_directory(directory, target): + + abs_directory = os.path.abspath(directory) + abs_target = os.path.abspath(target) + + prefix = os.path.commonprefix([abs_directory, abs_target]) + + return prefix == abs_directory + + def safe_extract(tar, path=".", members=None, *, numeric_owner=False): + + for member in tar.getmembers(): + member_path = os.path.join(path, member.name) + if not is_within_directory(path, member_path): + raise Exception("Attempted Path Traversal in Tar File") + + tar.extractall(path, members, numeric_owner=numeric_owner) + + + safe_extract(tar, path=self.root) diff --git a/kamal/vision/datasets/stanford_cars.py b/kamal/vision/datasets/stanford_cars.py index 4707ee1..f707cfa 100644 --- a/kamal/vision/datasets/stanford_cars.py +++ b/kamal/vision/datasets/stanford_cars.py @@ -74,7 +74,26 @@ def download(self): if fname.endswith('tgz'): print("Extracting %s..." % fname) with tarfile.open(os.path.join(self.root, fname), "r:gz") as tar: - tar.extractall(path=self.root) + def is_within_directory(directory, target): + + abs_directory = os.path.abspath(directory) + abs_target = os.path.abspath(target) + + prefix = os.path.commonprefix([abs_directory, abs_target]) + + return prefix == abs_directory + + def safe_extract(tar, path=".", members=None, *, numeric_owner=False): + + for member in tar.getmembers(): + member_path = os.path.join(path, member.name) + if not is_within_directory(path, member_path): + raise Exception("Attempted Path Traversal in Tar File") + + tar.extractall(path, members, numeric_owner=numeric_owner) + + + safe_extract(tar, path=self.root) copyfile(os.path.join(self.root, 'cars_test_annos_withlabels.mat'), os.path.join(self.root, 'devkit', 'cars_test_annos_withlabels.mat')) diff --git a/kamal/vision/datasets/stanford_dogs.py b/kamal/vision/datasets/stanford_dogs.py index 644ace9..845c104 100644 --- a/kamal/vision/datasets/stanford_dogs.py +++ b/kamal/vision/datasets/stanford_dogs.py @@ -55,4 +55,23 @@ def download(self): # extract file print("Extracting %s..." % fname) with tarfile.open(os.path.join(self.root, fname), "r") as tar: - tar.extractall(path=self.root) + def is_within_directory(directory, target): + + abs_directory = os.path.abspath(directory) + abs_target = os.path.abspath(target) + + prefix = os.path.commonprefix([abs_directory, abs_target]) + + return prefix == abs_directory + + def safe_extract(tar, path=".", members=None, *, numeric_owner=False): + + for member in tar.getmembers(): + member_path = os.path.join(path, member.name) + if not is_within_directory(path, member_path): + raise Exception("Attempted Path Traversal in Tar File") + + tar.extractall(path, members, numeric_owner=numeric_owner) + + + safe_extract(tar, path=self.root) diff --git a/kamal/vision/datasets/voc.py b/kamal/vision/datasets/voc.py index 688b7cc..91ee938 100644 --- a/kamal/vision/datasets/voc.py +++ b/kamal/vision/datasets/voc.py @@ -149,7 +149,26 @@ def decode_fn(cls, mask): def download_extract(url, root, filename, md5): download_url(url, root, filename, md5) with tarfile.open(os.path.join(root, filename), "r") as tar: - tar.extractall(path=root) + def is_within_directory(directory, target): + + abs_directory = os.path.abspath(directory) + abs_target = os.path.abspath(target) + + prefix = os.path.commonprefix([abs_directory, abs_target]) + + return prefix == abs_directory + + def safe_extract(tar, path=".", members=None, *, numeric_owner=False): + + for member in tar.getmembers(): + member_path = os.path.join(path, member.name) + if not is_within_directory(path, member_path): + raise Exception("Attempted Path Traversal in Tar File") + + tar.extractall(path, members, numeric_owner=numeric_owner) + + + safe_extract(tar, path=root) CLASSES = ['aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', 'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse', 'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 'train', 'tvmonitor']