Skip to content

Update management of input files #57

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 5 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions floatcsep/evaluation.py
Original file line number Diff line number Diff line change
Expand Up @@ -287,7 +287,7 @@ def plot_results(
# Regular consistency/comparative test plots (e.g., many models)
try:
for time_str in timewindow:
fig_path = registry.get_figure(time_str, self.name)
fig_path = registry.get_figure_key(time_str, self.name)
results = self.read_results(time_str, models)
ax = func(results, plot_args=fargs, **fkwargs)
if "code" in fargs:
Expand All @@ -307,7 +307,7 @@ def plot_results(
registry.figures[time_str][fig_name] = os.path.join(
time_str, "figures", fig_name
)
fig_path = registry.get_figure(time_str, fig_name)
fig_path = registry.get_figure_key(time_str, fig_name)
ax = func(result, plot_args=fargs, **fkwargs, show=False)
if "code" in fargs:
exec(fargs["code"])
Expand All @@ -318,7 +318,7 @@ def plot_results(
pyplot.show()

elif self.type in ["sequential", "sequential_comparative", "batch"]:
fig_path = registry.get_figure(timewindow[-1], self.name)
fig_path = registry.get_figure_key(timewindow[-1], self.name)
results = self.read_results(timewindow[-1], models)
ax = func(results, plot_args=fargs, **fkwargs)

Expand Down
47 changes: 24 additions & 23 deletions floatcsep/experiment.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
parse_nested_dicts,
)
from floatcsep.infrastructure.engine import Task, TaskGraph
from floatcsep.infrastructure.logger import log_models_tree, log_results_tree

log = logging.getLogger("floatLogger")

Expand Down Expand Up @@ -52,8 +53,8 @@ class Experiment:
- growth (:class:`str`): `incremental` or `cumulative`
- offset (:class:`float`): recurrence of forecast creation.

For further details, see :func:`~floatcsep.utils.timewindows_ti`
and :func:`~floatcsep.utils.timewindows_td`
For further details, see :func:`~floatcsep.utils.time_windows_ti`
and :func:`~floatcsep.utils.time_windows_td`

region_config (dict): Contains all the spatial and magnitude
specifications. It must contain the following keys:
Expand Down Expand Up @@ -118,7 +119,7 @@ def __init__(
os.makedirs(os.path.join(workdir, rundir), exist_ok=True)

self.name = name if name else "floatingExp"
self.registry = ExperimentRegistry(workdir, rundir)
self.registry = ExperimentRegistry.factory(workdir=workdir, run_dir=rundir)
self.results_repo = ResultsRepository(self.registry)
self.catalog_repo = CatalogRepository(self.registry)

Expand All @@ -143,7 +144,7 @@ def __init__(
log.info(f"Setting up experiment {self.name}:")
log.info(f"\tStart: {self.start_date}")
log.info(f"\tEnd: {self.end_date}")
log.info(f"\tTime windows: {len(self.timewindows)}")
log.info(f"\tTime windows: {len(self.time_windows)}")
log.info(f"\tRegion: {self.region.name if self.region else None}")
log.info(
f"\tMagnitude range: [{numpy.min(self.magnitudes)},"
Expand Down Expand Up @@ -175,7 +176,7 @@ def __getattr__(self, item: str) -> object:
Override built-in method to return the experiment attributes by also using the command
``experiment.{attr}``. Adds also to the experiment scope the keys of
:attr:`region_config` or :attr:`time_config`. These are: ``start_date``, ``end_date``,
``timewindows``, ``horizon``, ``offset``, ``region``, ``magnitudes``, ``mag_min``,
``time_windows``, ``horizon``, ``offset``, ``region``, ``magnitudes``, ``mag_min``,
`mag_max``, ``mag_bin``, ``depth_min`` depth_max .
"""

Expand Down Expand Up @@ -295,8 +296,8 @@ def stage_models(self) -> None:
"""
log.info("Staging models")
for i in self.models:
i.stage(self.timewindows)
self.registry.add_forecast_registry(i)
i.stage(self.time_windows)
self.registry.add_model_registry(i)

def set_tests(self, test_config: Union[str, Dict, List]) -> list:
"""
Expand Down Expand Up @@ -376,17 +377,17 @@ def set_tasks(self) -> None:
"""

# Set the file path structure
self.registry.build_tree(self.timewindows, self.models, self.tests)
self.registry.build_tree(self.time_windows, self.models, self.tests)

log.debug("Pre-run forecast summary")
self.registry.log_forecast_trees(self.timewindows)
log_models_tree(log, self.registry, self.time_windows)
log.debug("Pre-run result summary")
self.registry.log_results_tree()
log_results_tree(log, self.registry)

log.info("Setting up experiment's tasks")

# Get the time windows strings
tw_strings = timewindow2str(self.timewindows)
tw_strings = timewindow2str(self.time_windows)

# Prepare the testing catalogs
task_graph = TaskGraph()
Expand Down Expand Up @@ -481,7 +482,7 @@ def set_tasks(self) -> None:
)
# Set up the Sequential_Comparative Scores
elif test_k.type == "sequential_comparative":
tw_strs = timewindow2str(self.timewindows)
tw_strs = timewindow2str(self.time_windows)
for model_j in self.models:
task_k = Task(
instance=test_k,
Expand All @@ -504,7 +505,7 @@ def set_tasks(self) -> None:
)
# Set up the Batch comparative Scores
elif test_k.type == "batch":
time_str = timewindow2str(self.timewindows[-1])
time_str = timewindow2str(self.time_windows[-1])
for model_j in self.models:
task_k = Task(
instance=test_k,
Expand Down Expand Up @@ -540,9 +541,9 @@ def run(self) -> None:
self.task_graph.run()
log.info("Calculation completed")
log.debug("Post-run forecast registry")
self.registry.log_forecast_trees(self.timewindows)
log_models_tree(log, self.registry, self.time_windows)
log.debug("Post-run result summary")
self.registry.log_results_tree()
log_results_tree(log, self.registry)

def read_results(self, test: Evaluation, window: str) -> List:
"""
Expand All @@ -559,7 +560,7 @@ def make_repr(self) -> None:

"""
log.info("Creating reproducibility config file")
repr_config = self.registry.get("repr_config")
repr_config = self.registry.get_attr("repr_config")

# Dropping region to results folder if it is a file
region_path = self.region_config.get("path", False)
Expand Down Expand Up @@ -604,7 +605,7 @@ def as_dict(self, extra: Sequence = (), extended=False) -> dict:
"time_config": {
i: j
for i, j in self.time_config.items()
if (i not in ("timewindows",) or extended)
if (i not in ("time_windows",) or extended)
},
"region_config": {
i: j
Expand Down Expand Up @@ -731,7 +732,7 @@ def test_stat(test_orig, test_repr):

def get_results(self):

win_orig = timewindow2str(self.original.timewindows)
win_orig = timewindow2str(self.original.time_windows)

tests_orig = self.original.tests

Expand Down Expand Up @@ -787,7 +788,7 @@ def get_hash(filename):

def get_filecomp(self):

win_orig = timewindow2str(self.original.timewindows)
win_orig = timewindow2str(self.original.time_windows)

tests_orig = self.original.tests

Expand All @@ -801,8 +802,8 @@ def get_filecomp(self):
for tw in win_orig:
results[test.name][tw] = dict.fromkeys(models_orig)
for model in models_orig:
orig_path = self.original.registry.get_result(tw, test, model)
repr_path = self.reproduced.registry.get_result(tw, test, model)
orig_path = self.original.registry.get_result_key(tw, test, model)
repr_path = self.reproduced.registry.get_result_key(tw, test, model)

results[test.name][tw][model] = {
"hash": (self.get_hash(orig_path) == self.get_hash(repr_path)),
Expand All @@ -811,8 +812,8 @@ def get_filecomp(self):
else:
results[test.name] = dict.fromkeys(models_orig)
for model in models_orig:
orig_path = self.original.registry.get_result(win_orig[-1], test, model)
repr_path = self.reproduced.registry.get_result(win_orig[-1], test, model)
orig_path = self.original.registry.get_result_key(win_orig[-1], test, model)
repr_path = self.reproduced.registry.get_result_key(win_orig[-1], test, model)
results[test.name][model] = {
"hash": (self.get_hash(orig_path) == self.get_hash(repr_path)),
"byte2byte": filecmp.cmp(orig_path, repr_path),
Expand Down
71 changes: 71 additions & 0 deletions floatcsep/infrastructure/logger.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,3 +60,74 @@ def set_console_log_level(log_level):
for handler in logger.handlers:
if isinstance(handler, logging.StreamHandler):
handler.setLevel(log_level)




def log_models_tree(log, experiment_registry, time_windows):
"""
Logs the forecasts for all models managed by this ExperimentFileRegistry.
"""
log.debug("===================")
log.debug(f" Total Time Windows: {len(time_windows)}")
for model_name, registry in experiment_registry.model_registries.items():
log.debug(f" Model: {model_name}")
exists_group = []
not_exist_group = []

for timewindow, filepath in registry.forecasts.items():
if registry.forecast_exists(timewindow):
exists_group.append(timewindow)
else:
not_exist_group.append(timewindow)

log.debug(f" Existing forecasts: {len(exists_group)}")
log.debug(f" Missing forecasts: {len(not_exist_group)}")
for timewindow in not_exist_group:
log.debug(f" Time Window: {timewindow}")
log.debug("===================")


def log_results_tree(log, experiment_registry):
"""
Logs a summary of the results dictionary, sorted by test.
For each test and time window, it logs whether all models have results,
or if some results are missing, and specifies which models are missing.
"""
log.debug("===================")

total_results = results_exist_count = results_not_exist_count = 0

# Get all unique test names and sort them
all_tests = sorted(
{test_name for tests in experiment_registry.results.values() for test_name in tests}
)

for test_name in all_tests:
log.debug(f"Test: {test_name}")
for timewindow, tests in experiment_registry.results.items():
if test_name in tests:
models = tests[test_name]
missing_models = []

for model_name, result_path in models.items():
total_results += 1
result_full_path = experiment_registry.get_result_key(timewindow, test_name, model_name)
if os.path.exists(result_full_path):
results_exist_count += 1
else:
results_not_exist_count += 1
missing_models.append(model_name)

if not missing_models:
log.debug(f" Time Window: {timewindow} - All models evaluated.")
else:
log.debug(
f" Time Window: {timewindow} - Missing results for models: "
f"{', '.join(missing_models)}"
)

log.debug(f"Total Results: {total_results}")
log.debug(f"Results that Exist: {results_exist_count}")
log.debug(f"Results that Do Not Exist: {results_not_exist_count}")
log.debug("===================")
Loading