Skip to content

Commit bba381d

Browse files
authored
Merge pull request #225 from djarecka/enh/DirectoryType
adding hash_dir for Directory type (closes #209)
2 parents 638957c + 6b2a7dd commit bba381d

File tree

4 files changed

+65
-5
lines changed

4 files changed

+65
-5
lines changed

pydra/engine/helpers.py

+8-2
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,8 @@
99
from hashlib import sha256
1010
import subprocess as sp
1111

12-
from .specs import Runtime, File, attr_fields
13-
from .helpers_file import hash_file, copyfile, is_existing_file
12+
from .specs import Runtime, File, Directory, attr_fields
13+
from .helpers_file import hash_file, hash_dir, copyfile, is_existing_file
1414

1515

1616
def ensure_list(obj, tuple2list=False):
@@ -459,6 +459,12 @@ def hash_value(value, tp=None, metadata=None):
459459
and "container_path" not in metadata
460460
):
461461
return hash_file(value)
462+
elif (
463+
(tp is File or "pydra.engine.specs.Directory" in str(tp))
464+
and is_existing_file(value)
465+
and "container_path" not in metadata
466+
):
467+
return hash_dir(value)
462468
else:
463469
return value
464470

pydra/engine/helpers_file.py

+24-1
Original file line numberDiff line numberDiff line change
@@ -113,7 +113,7 @@ def hash_file(afile, chunk_len=8192, crypto=sha256, raise_notfound=True):
113113

114114
if afile is None or isinstance(afile, LazyField) or isinstance(afile, list):
115115
return None
116-
if not os.path.isfile(afile):
116+
if not Path(afile).is_file():
117117
if raise_notfound:
118118
raise RuntimeError('File "%s" not found.' % afile)
119119
return None
@@ -128,6 +128,29 @@ def hash_file(afile, chunk_len=8192, crypto=sha256, raise_notfound=True):
128128
return crypto_obj.hexdigest()
129129

130130

131+
def hash_dir(dirpath, raise_notfound=True):
132+
from .specs import LazyField
133+
134+
if dirpath is None or isinstance(dirpath, LazyField) or isinstance(dirpath, list):
135+
return None
136+
if not Path(dirpath).is_dir():
137+
if raise_notfound:
138+
raise RuntimeError(f"Directory {dirpath} not found.")
139+
return None
140+
141+
def search_dir(path):
142+
path = Path(path)
143+
file_list = []
144+
for el in path.iterdir():
145+
if el.is_file():
146+
file_list.append(hash_file(el))
147+
else:
148+
file_list.append(search_dir(path / el))
149+
return file_list
150+
151+
return search_dir(dirpath)
152+
153+
131154
def _parse_mount_table(exit_code, output):
132155
"""
133156
Parse the output of ``mount`` to produce (path, fs_type) pairs.

pydra/engine/tests/test_helpers.py

+32-1
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
from .utils import multiply
77
from ..helpers import hash_value, hash_function, save, create_pyscript
88
from .. import helpers_file
9-
from ..specs import File
9+
from ..specs import File, Directory
1010

1111

1212
def test_save(tmpdir):
@@ -110,6 +110,7 @@ def test_hash_value_files(tmpdir):
110110
assert hash_value(file_1, tp=File) == hash_value(file_2, tp=File)
111111
assert hash_value(file_1, tp=str) != hash_value(file_2, tp=str)
112112
assert hash_value(file_1) != hash_value(file_2)
113+
assert hash_value(file_1, tp=File) == helpers_file.hash_file(file_1)
113114

114115

115116
def test_hash_value_files_list(tmpdir):
@@ -124,3 +125,33 @@ def test_hash_value_files_list(tmpdir):
124125
hash_value(file_1, tp=File),
125126
hash_value(file_2, tp=File),
126127
]
128+
129+
130+
def test_hash_value_dir(tmpdir):
131+
file_1 = tmpdir.join("file_1.txt")
132+
file_2 = tmpdir.join("file_2.txt")
133+
with open(file_1, "w") as f:
134+
f.write("hello")
135+
with open(file_2, "w") as f:
136+
f.write("hi")
137+
138+
assert hash_value(tmpdir, tp=Directory) == hash_value([file_1, file_2], tp=File)
139+
assert hash_value(tmpdir, tp=Directory) == helpers_file.hash_dir(tmpdir)
140+
141+
142+
def test_hash_value_nested(tmpdir):
143+
nested = tmpdir.mkdir("nested")
144+
file_1 = tmpdir.join("file_1.txt")
145+
file_2 = nested.join("file_2.txt")
146+
file_3 = nested.join("file_3.txt")
147+
with open(file_1, "w") as f:
148+
f.write("hello")
149+
with open(file_2, "w") as f:
150+
f.write("hi")
151+
with open(file_3, "w") as f:
152+
f.write("hola")
153+
154+
assert hash_value(tmpdir, tp=Directory) == hash_value(
155+
[file_1, [file_2, file_3]], tp=File
156+
)
157+
assert hash_value(tmpdir, tp=Directory) == helpers_file.hash_dir(tmpdir)

pydra/engine/tests/test_helpers_file.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -107,7 +107,7 @@ def test_copyfiles(_temp_analyze_files, _temp_analyze_files_prime):
107107

108108

109109
def test_linkchain(_temp_analyze_files):
110-
if os.name is not "posix":
110+
if os.name != "posix":
111111
return
112112
orig_img, orig_hdr = _temp_analyze_files
113113
pth, fname = os.path.split(orig_img)

0 commit comments

Comments
 (0)