Skip to content

Commit 8881e2f

Browse files
committed
refac: changing os.path for pathlib across the application, having mostly changes in the registries module.
fix: from_git now correctly locates the repository content in the desired path set in model_config. incremented timeout for Zenodo requests. ft: get_source methods are now different for TimeDependent and TimeIndependent. Expanded git and zenodo accessors to handle end-cases, and to become adapted to the type of model structure (e.g., src or files). tests: updated corresponding unit tests, and expanded integration tests to catch more end-cases when querying a model from a repository.
1 parent 52df516 commit 8881e2f

21 files changed

+628
-357
lines changed

floatcsep/experiment.py

Lines changed: 13 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -116,9 +116,7 @@ def __init__(
116116

117117
workdir = Path(kwargs.get("path", os.getcwd())).resolve()
118118
if kwargs.get("timestamp", False):
119-
rundir = Path(
120-
rundir, f"run_{datetime.datetime.utcnow().date().isoformat()}"
121-
)
119+
rundir = Path(rundir, f"run_{datetime.datetime.utcnow().date().isoformat()}")
122120
os.makedirs(Path(workdir, rundir), exist_ok=True)
123121

124122
self.name = name if name else "floatingExp"
@@ -360,7 +358,6 @@ def set_input_cat(self, tstring: str, model: Model) -> None:
360358
model (:class:`~floatcsep.model.Model`): Model to give the input
361359
catalog
362360
"""
363-
364361
self.catalog_repo.set_input_cat(tstring, model)
365362

366363
def set_tasks(self) -> None:
@@ -582,11 +579,14 @@ def make_repr(self) -> None:
582579
if not exists(target_cat):
583580
shutil.copy2(self.registry.abs(self.catalog_repo.cat_path), target_cat)
584581

585-
relative_path = os.path.relpath(
586-
self.registry.workdir, os.path.join(self.registry.workdir, self.registry.run_dir)
582+
# relative_path = self.registry.rel(self.registry.run_dir)
583+
# print(self.registry.workdir.__class__, self.registry.run_dir.__class__)
584+
relative_path = Path(
585+
os.path.relpath(self.registry.workdir.as_posix(), self.registry.run_dir.as_posix())
587586
)
588587
self.registry.workdir = relative_path
589-
self.to_yml(repr_config, extended=True)
588+
589+
self.to_yml(repr_config.as_posix(), extended=True)
590590

591591
def as_dict(self, extra: Sequence = (), extended=False) -> dict:
592592
"""
@@ -604,8 +604,8 @@ def as_dict(self, extra: Sequence = (), extended=False) -> dict:
604604
dict_walk = {
605605
"name": self.name,
606606
"config_file": self.config_file,
607-
"path": self.registry.workdir.resolve().as_posix(),
608-
"run_dir": self.registry.run_dir.resolve().as_posix(),
607+
"path": self.registry.workdir.as_posix(),
608+
"run_dir": self.registry.rel(self.registry.run_dir).as_posix(),
609609
"time_config": {
610610
i: j
611611
for i, j in self.time_config.items()
@@ -616,11 +616,10 @@ def as_dict(self, extra: Sequence = (), extended=False) -> dict:
616616
for i, j in self.region_config.items()
617617
if (i not in ("magnitudes", "depths") or extended)
618618
},
619-
"catalog": self.catalog_repo.cat_path.resolve().as_posix(),
619+
"catalog": self.registry.rel(self.catalog_repo.cat_path).as_posix(),
620620
"models": [i.as_dict() for i in self.models],
621621
"tests": [i.as_dict() for i in self.tests],
622622
}
623-
print(dict_walk)
624623
dict_walk.update(extra)
625624
return parse_nested_dicts(dict_walk)
626625

@@ -817,7 +816,9 @@ def get_filecomp(self):
817816
results[test.name] = dict.fromkeys(models_orig)
818817
for model in models_orig:
819818
orig_path = self.original.registry.get_result_key(win_orig[-1], test, model)
820-
repr_path = self.reproduced.registry.get_result_key(win_orig[-1], test, model)
819+
repr_path = self.reproduced.registry.get_result_key(
820+
win_orig[-1], test, model
821+
)
821822
results[test.name][model] = {
822823
"hash": (self.get_hash(orig_path) == self.get_hash(repr_path)),
823824
"byte2byte": filecmp.cmp(orig_path, repr_path),

floatcsep/infrastructure/engine.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -147,8 +147,9 @@ def add(self, task: Task):
147147
self.tasks[task] = []
148148
self.ntasks += 1
149149

150-
def add_dependency(self, task, dep_inst: Union[object, str] = None, dep_meth: str = None,
151-
dkw: Any = None):
150+
def add_dependency(
151+
self, task, dep_inst: Union[object, str] = None, dep_meth: str = None, dkw: Any = None
152+
):
152153
"""
153154
Adds a dependency to a task already within the graph.
154155

floatcsep/infrastructure/environments.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -429,7 +429,7 @@ def create_environment(self, force: bool = False) -> None:
429429
rm=True,
430430
decode=True,
431431
buildargs=build_args,
432-
nocache=False # todo: create model arg for --no-cache
432+
nocache=False, # todo: create model arg for --no-cache
433433
)
434434

435435
# Stream each chunk
@@ -459,15 +459,15 @@ def env_exists(self) -> bool:
459459
except ImageNotFound:
460460
return False
461461

462-
def run_command(self, command=None) -> None:
462+
def run_command(self, command=None) -> None:
463463
"""
464464
Runs the model’s Docker container with input/ and forecasts/ mounted.
465465
Streams logs and checks for non-zero exit codes.
466466
"""
467467
model_root = os.path.abspath(self.model_directory)
468468
mounts = {
469-
os.path.join(model_root, "input"): {'bind': '/app/input', 'mode': 'rw'},
470-
os.path.join(model_root, "forecasts"): {'bind': '/app/forecasts', 'mode': 'rw'},
469+
os.path.join(model_root, "input"): {"bind": "/app/input", "mode": "rw"},
470+
os.path.join(model_root, "forecasts"): {"bind": "/app/forecasts", "mode": "rw"},
471471
}
472472

473473
uid, gid = os.getuid(), os.getgid()

floatcsep/infrastructure/logger.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -62,8 +62,6 @@ def set_console_log_level(log_level):
6262
handler.setLevel(log_level)
6363

6464

65-
66-
6765
def log_models_tree(log, experiment_registry, time_windows):
6866
"""
6967
Logs the forecasts for all models managed by this ExperimentFileRegistry.
@@ -112,7 +110,9 @@ def log_results_tree(log, experiment_registry):
112110

113111
for model_name, result_path in models.items():
114112
total_results += 1
115-
result_full_path = experiment_registry.get_result_key(timewindow, test_name, model_name)
113+
result_full_path = experiment_registry.get_result_key(
114+
timewindow, test_name, model_name
115+
)
116116
if os.path.exists(result_full_path):
117117
results_exist_count += 1
118118
else:
@@ -130,4 +130,4 @@ def log_results_tree(log, experiment_registry):
130130
log.debug(f"Total Results: {total_results}")
131131
log.debug(f"Results that Exist: {results_exist_count}")
132132
log.debug(f"Results that Do Not Exist: {results_not_exist_count}")
133-
log.debug("===================")
133+
log.debug("===================")

floatcsep/infrastructure/registries.py

Lines changed: 40 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -14,11 +14,13 @@
1414

1515
log = logging.getLogger("floatLogger")
1616

17+
1718
class FilepathMixin:
1819
"""
1920
Small mixin to provide filepath management functionality to Registries that uses files to
2021
store objects
2122
"""
23+
2224
workdir: Path
2325
path: Path
2426

@@ -32,7 +34,7 @@ def dir(self) -> Path:
3234
return self.path.parents[0]
3335

3436
@staticmethod
35-
def _parse_arg(arg) -> Union[str , list[str]]:
37+
def _parse_arg(arg) -> Union[str, list[str]]:
3638
if isinstance(arg, (list, tuple)):
3739
return timewindow2str(arg)
3840
elif isinstance(arg, str):
@@ -91,7 +93,7 @@ def abs_dir(self, *paths: Sequence[str | Path]) -> Path:
9193
_dir = _path.parents[0]
9294
return _dir
9395

94-
def rel(self, *paths: Union[Path | str | Sequence[str | Path]] ) -> Path:
96+
def rel(self, *paths: Union[Path | str | Sequence[str | Path]]) -> Path:
9597
"""
9698
Gets the relative path of an item, relative to the Registry workdir
9799
@@ -133,6 +135,7 @@ def file_exists(self, *args: Sequence[str | Path]):
133135
file_abspath = self.get_attr(*args)
134136
return exists(file_abspath)
135137

138+
136139
class ModelRegistry(ABC):
137140
@abstractmethod
138141
def get_input_catalog_key(self, tstring: str) -> str:
@@ -147,18 +150,21 @@ def get_args_key(self, tstring: str) -> str:
147150
pass
148151

149152
@classmethod
150-
def factory(cls, registry_type: str = 'file', **kwargs) -> Union["ModelFileRegistry", "ModelHDF5Registry"]:
153+
def factory(
154+
cls, registry_type: str = "file", **kwargs
155+
) -> Union["ModelFileRegistry", "ModelHDF5Registry"]:
151156
"""Factory method. Instantiate first on any explicit option provided in the model
152157
configuration.
153158
"""
154-
if registry_type == 'file':
159+
if registry_type == "file":
155160
return ModelFileRegistry(**kwargs)
156161

157-
elif registry_type == 'hdf5':
162+
elif registry_type == "hdf5":
158163
return ModelHDF5Registry(**kwargs)
159164
else:
160165
raise Exception("No valid model management schema was selected")
161166

167+
162168
class ModelFileRegistry(ModelRegistry, FilepathMixin):
163169
def __init__(
164170
self,
@@ -181,18 +187,16 @@ def __init__(
181187

182188
self.model_name = model_name
183189
self.workdir = Path(workdir)
184-
self.path = Path(path)
185-
186-
self.args_file = self.rel(self.path, args_file) if args_file else None
187-
self.input_cat = self.rel(self.path, input_cat) if input_cat else None
190+
self.path = self.abs(Path(path))
188191

192+
self.args_file = args_file if args_file else None
193+
self.input_cat = input_cat if input_cat else None
189194
self.forecasts = {}
190195
self.input_args = {}
191196
self.input_cats = {}
192197
self.input_store = None
193198
self._fmt = fmt
194199

195-
196200
@property
197201
def fmt(self) -> str:
198202
"""
@@ -265,8 +269,8 @@ def build_tree(
265269
time_windows: Sequence[Sequence[datetime]] = None,
266270
model_class: str = "TimeIndependentModel",
267271
prefix: str = None,
268-
run_mode: str = 'sequential',
269-
run_dir: Optional[str] = None
272+
run_mode: str = "sequential",
273+
run_dir: Optional[str] = None,
270274
) -> None:
271275
"""
272276
Creates the run directory, and reads the file structure inside.
@@ -295,20 +299,20 @@ def build_tree(
295299
for _, folder_ in dirtree.items():
296300
os.makedirs(folder_, exist_ok=True)
297301

298-
if run_mode == 'sequential':
302+
if run_mode == "sequential":
299303
self.input_args = {
300-
win: Path(self.path, 'input', self.args_file) for win in windows
304+
win: Path(self.path, "input", self.args_file) for win in windows
301305
}
302306
self.input_cats = {
303-
win: Path(self.path, 'input', self.input_cat) for win in windows
307+
win: Path(self.path, "input", self.input_cat) for win in windows
304308
}
305-
elif run_mode == 'parallel':
309+
elif run_mode == "parallel":
306310
self.input_args = {
307-
win: Path(run_dir, win, 'input', self.model_name, self.args_file)
311+
win: Path(run_dir, win, "input", self.model_name, self.args_file)
308312
for win in windows
309313
}
310314
self.input_cats = {
311-
win: Path(run_dir, win, 'input', self.model_name, self.input_cat)
315+
win: Path(run_dir, win, "input", self.model_name, self.input_cat)
312316
for win in windows
313317
}
314318

@@ -330,16 +334,21 @@ def as_dict(self) -> dict:
330334
"forecasts": self.forecasts,
331335
}
332336

337+
333338
class ModelHDF5Registry(ModelRegistry):
334339

335340
def __init__(self, workdir: str, path: str):
336341
pass
342+
337343
def get_input_catalog_key(self, tstring: str) -> str:
338-
return ''
344+
return ""
345+
339346
def get_forecast_key(self, tstring: str) -> str:
340-
return ''
347+
return ""
348+
341349
def get_args_key(self, tstring: str) -> str:
342-
return ''
350+
return ""
351+
343352

344353
class ExperimentRegistry(ABC):
345354
@abstractmethod
@@ -372,16 +381,19 @@ def build_tree(
372381
pass
373382

374383
@classmethod
375-
def factory(cls, registry_type: str = 'file', **kwargs) -> Optional["ExperimentFileRegistry"]:
384+
def factory(
385+
cls, registry_type: str = "file", **kwargs
386+
) -> Optional["ExperimentFileRegistry"]:
376387
"""Factory method. Instantiate first on any explicit option provided in the experiment
377388
configuration.
378389
"""
379390

380-
if registry_type == 'file':
391+
if registry_type == "file":
381392
return ExperimentFileRegistry(**kwargs)
382393
else:
383394
return None
384395

396+
385397
class ExperimentFileRegistry(ExperimentRegistry, FilepathMixin):
386398
"""
387399
The class has the responsibility of managing the keys (based on models, timewindow and
@@ -398,7 +410,7 @@ def __init__(self, workdir: str, run_dir: str = "results") -> None:
398410
run_dir: The directory in which the results will be stored.
399411
"""
400412
self.workdir = Path(workdir)
401-
self.run_dir = Path(run_dir)
413+
self.run_dir = self.abs(Path(run_dir))
402414
self.results = {}
403415
self.test_catalogs = {}
404416
self.figures = {}
@@ -510,7 +522,7 @@ def build_tree(
510522
time_windows: Sequence[Sequence[datetime]],
511523
models: Sequence["Model"],
512524
tests: Sequence["Evaluation"],
513-
run_mode: str = 'sequential'
525+
run_mode: str = "sequential",
514526
) -> None:
515527
"""
516528
Creates the run directory and reads the file structure inside.
@@ -529,8 +541,8 @@ def build_tree(
529541

530542
run_folder = self.run_dir
531543
subfolders = ["catalog", "evaluations", "figures"]
532-
if run_mode == 'parallel':
533-
subfolders.append('input')
544+
if run_mode == "parallel":
545+
subfolders.append("input")
534546
dirtree = {
535547
win: {folder: self.abs(run_folder, win, folder) for folder in subfolders}
536548
for win in windows
@@ -540,8 +552,7 @@ def build_tree(
540552
for tw, tw_folder in dirtree.items():
541553
for _, folder_ in tw_folder.items():
542554
os.makedirs(folder_, exist_ok=True)
543-
if run_mode == 'parallel' and folder_.endswith('input'):
544-
print('a')
555+
if run_mode == "parallel" and folder_.name == "input":
545556
for model in models:
546557
os.makedirs(join(folder_, model), exist_ok=True)
547558
results = {
@@ -578,4 +589,3 @@ def build_tree(
578589
def as_dict(self) -> Path:
579590

580591
return self.workdir
581-

0 commit comments

Comments
 (0)