cyber-physical-systems-group
diff --git a/‎examples/prediction/README.md‎ ‎examples/core/README.md‎examples/prediction/README.md renamed to examples/core/README.md
Lines changed: 22 additions & 1 deletion b/‎examples/prediction/README.md‎ ‎examples/core/README.md‎examples/prediction/README.md renamed to examples/core/README.md
Lines changed: 22 additions & 1 deletion
diff --git a/‎examples/core/experiment.py‎
Lines changed: 90 additions & 0 deletions b/‎examples/core/experiment.py‎
Lines changed: 90 additions & 0 deletions
diff --git a/‎examples/prediction/run.py‎ ‎examples/core/prediction.py‎examples/prediction/run.py renamed to examples/core/prediction.py
Lines changed: 9 additions & 12 deletions b/‎examples/prediction/run.py‎ ‎examples/core/prediction.py‎examples/prediction/run.py renamed to examples/core/prediction.py
Lines changed: 9 additions & 12 deletions
diff --git a/‎examples/simulation/run.py‎ ‎examples/core/simulation.py‎examples/simulation/run.py renamed to examples/core/simulation.py
Lines changed: 2 additions & 2 deletions b/‎examples/simulation/run.py‎ ‎examples/core/simulation.py‎examples/simulation/run.py renamed to examples/core/simulation.py
Lines changed: 2 additions & 2 deletions
diff --git a/‎pydentification/data/datamodules/prediction.py‎
Lines changed: 3 additions & 0 deletions b/‎pydentification/data/datamodules/prediction.py‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎pydentification/data/datamodules/simulation.py‎
Lines changed: 3 additions & 0 deletions b/‎pydentification/data/datamodules/simulation.py‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎…tification/experiment/dumper/__init__.py‎ ‎…ification/experiment/storage/__init__.py‎pydentification/experiment/dumper/__init__.py renamed to pydentification/experiment/storage/__init__.py b/‎…tification/experiment/dumper/__init__.py‎ ‎…ification/experiment/storage/__init__.py‎pydentification/experiment/dumper/__init__.py renamed to pydentification/experiment/storage/__init__.py
diff --git a/‎pydentification/experiment/dumper/code.py‎ ‎…dentification/experiment/storage/code.py‎pydentification/experiment/dumper/code.py renamed to pydentification/experiment/storage/code.py
Lines changed: 34 additions & 18 deletions b/‎pydentification/experiment/dumper/code.py‎ ‎…dentification/experiment/storage/code.py‎pydentification/experiment/dumper/code.py renamed to pydentification/experiment/storage/code.py
Lines changed: 34 additions & 18 deletions
diff --git a/‎pydentification/experiment/storage/compose.py‎
Lines changed: 87 additions & 0 deletions b/‎pydentification/experiment/storage/compose.py‎
Lines changed: 87 additions & 0 deletions
@@ -9,9 +9,30 @@ To run the experiment, use the following command (assuming W&B is installed and
 sources section):
 
 ```bash
-python -m examples.prediction.run
+python -m examples.core.prediction
 ```
 
+# Simulation
+
+This example uses `pydentification` to run training with simulation experiment on example benchmark. 
+Only single experiment is run and registered to W&B.
+
+### Running
+
+To run the experiment, use the following command (assuming W&B is installed and logged in, for more details go to
+sources section):
+
+```bash
+python -m examples.core.simulation
+```
+
+### Experiment
+
+This is the example for fully reproducible sweeps, storing snapshot of the code used to run the experiment in ZIP, 
+alongside stand-alone function to re-create the model, init parameters in JSON and model weights in safe-tensors format.
+
+It is based on prediction example.
+
 ### Sources
 
 * [https://docs.wandb.ai/guides/sweeps](https://docs.wandb.ai/guides/sweeps)
 
@@ -0,0 +1,90 @@
+# isort: skip_file
+import os
+
+import lightning.pytorch as pl
+import pandas as pd
+import torch
+
+import wandb
+
+from pydentification.experiment.storage.wrapper import dump
+from pydentification.experiment.storage.models import save_lightning
+from pydentification.experiment.storage.code import save_code_snapshot
+from pydentification.experiment.storage.sync import save_to_wandb
+from pydentification.training.lightning.prediction import LightningPredictionTrainingModule
+from pydentification.data.datamodules.prediction import PredictionDataModule
+
+
+def input_fn(parameters: dict):
+    data = pd.read_csv("data/lorenz.csv")  # assume dataset exists and has ~100 000 samples with 3 columns: x, y, z
+    return PredictionDataModule(
+        data[["x", "y", "z"]].values,
+        test_size=30_000,  # 30% assuming 100 000 sample
+        batch_size=32,
+        validation_size=0.1,  # 10% of the training set, which is 70% of the whole dataset
+        n_backward_time_steps=parameters["n_input_time_steps"],  # sweep parameter, which can be changed between runs
+        n_forward_time_steps=parameters["n_output_time_steps"],
+        n_workers=4,
+    )
+
+
+# pass parameterless lambda function dynamically returning path to the decorator, after W&B run is initialized
+@dump(path=lambda: f"outputs/{wandb.run.id}", param_store="both")  # noqa
+def model_fn(hidden_dim: int = 64):
+    return torch.nn.Sequential(
+        torch.nn.Linear(3, hidden_dim),
+        torch.nn.ReLU(),
+        torch.nn.Linear(hidden_dim, hidden_dim),
+        torch.nn.ReLU(),
+        torch.nn.Linear(hidden_dim, 3),
+    )
+
+
+def trainer_fn(model, parameters: dict):
+    optimizer = torch.optim.AdamW(model.parameters(), lr=1e-3, weight_decay=0.01)
+    timer = pl.callbacks.Timer(duration="00:04:00:00", interval="epoch")
+    model = LightningPredictionTrainingModule(module=model, optimizer=optimizer)
+
+    trainer = pl.Trainer(
+        max_epochs=3,  # just an example
+        precision=64,
+        accelerator="cpu",
+        devices=1,
+        callbacks=[timer],
+    )
+
+    return model, trainer
+
+
+def train_fn(model, trainer, dm):
+    """Runs training using lightning trainer and given datamodule"""
+    trainer.fit(model, datamodule=dm)
+    return model, trainer
+
+
+def run_single_experiment():
+    with wandb.init(reinit=True):
+        # prepare directories and library code snapshot
+        os.makedirs(f"outputs/{wandb.run.id}/models", exist_ok=True)
+        os.makedirs(f"outputs/{wandb.run.id}/code", exist_ok=True)
+        save_code_snapshot(name="code", source_dir="pydentification", target_dir=f"outputs/{wandb.run.id}/code")
+
+        parameters = dict(wandb.config)  # cast to dict is needed to serialize the parameters
+        try:
+            print(f"Starting experiment with {wandb.run.id}")
+            dm = input_fn(parameters)
+            model = model_fn()
+            model, trainer = trainer_fn(model, parameters)
+            model, trainer = train_fn(model, trainer, dm)
+
+            # store trained model and send it to W&B
+            save_lightning(f"outputs/{wandb.run.id}/models", model=model, method="safetensors", save_hparams=True)
+            save_to_wandb(f"outputs/{wandb.run.id}")  # save all files in the directory to W&B
+        except Exception as e:
+            print(e)  # print traceback, since W&B uses multiprocessing, which can lose information about exception
+            raise ValueError("Experiment failed.") from e
+
+
+if __name__ == "__main__":
+    sweep_id = wandb.sweep({"hidden_di": [32, 64, 128]}, project="test")
+    wandb.agent(sweep_id, function=run_single_experiment, count=3, project="test")
@@ -1,5 +1,4 @@
 # isort: skip_file
-import os
 from datetime import timedelta
 
 import lightning.pytorch as pl
@@ -9,6 +8,7 @@
 
 from pydentification.data.datamodules.prediction import PredictionDataModule
 from pydentification.experiment.reporters import report_metrics, report_prediction_plot, report_trainable_parameters
+from pydentification.experiment.storage.models import save_lightning
 from pydentification.metrics import regression_metrics
 from pydentification.models.networks.transformer import (
     CausalDelayLineFeedforward,
@@ -19,10 +19,9 @@
 
 
 def input_fn(parameters: dict):
-    df = pd.read_csv("dataset.csv")  # assume dataset exists and has ~100 000 samples with 3 columns: x, y, z
-
+    data = pd.read_csv("data/lorenz.csv")  # assume dataset exists and has ~100 000 samples with 3 columns: x, y, z
     return PredictionDataModule(
-        df[["x", "y", "z"]],
+        data[["x", "y", "z"]].values,
         test_size=30_000,  # 30% assuming 100 000 sample
         batch_size=32,
         validation_size=0.1,  # 10% of the training set, which is 70% of the whole dataset
@@ -83,10 +82,10 @@ def trainer_fn(model, parameters: dict):
     lr_scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=20, verbose=True)
     # callbacks for stopping the training early, with 4 hour timeout and patience of 50 epochs (with 20 for reducing LR)
     timer = pl.callbacks.Timer(duration="00:04:00:00", interval="epoch")
-    stopping = pl.callbacks.EarlyStopping(monitor="training/validation_loss", patience=50, mode="min", verbose=True)
+    stopping = pl.callbacks.EarlyStopping(monitor="trainer/validation_loss", patience=50, mode="min", verbose=True)
     # checkpointing the model every 100 epochs and every hour to single directory
-    path = f"models/{wandb.run.id}"
-    epoch_checkpoint = pl.callbacks.ModelCheckpoint(dirpath=path, monitor="validation/loss", every_n_epochs=100)
+    path = f"outputs/models/{wandb.run.id}"
+    epoch_checkpoint = pl.callbacks.ModelCheckpoint(dirpath=path, monitor="trainer/validation_loss", every_n_epochs=100)
     time_checkpoint = pl.callbacks.ModelCheckpoint(dirpath=path, train_time_interval=timedelta(hours=1))
 
     # wrap model in training class with auto-regression training defined
@@ -146,10 +145,8 @@ def run_single_experiment():
             model, trainer = train_fn(model, trainer, dm)
             report_fn(model, dm, auto_regression_scales=[16, 32, 128])  # sample of regression scales
             # store trained model and send it to W&B
-            os.makedirs(f"models/{wandb.run.id}", exist_ok=True)
-            path = f"models/{wandb.run.id}/trained-model.pt"
-            torch.save(model, path)
-            wandb.save(path)
+            save_lightning(name=wandb.run.id, model=model, method="safetensors", save_hparams=True)
+            wandb.save(wandb.run.id)
         except Exception as e:
             print(e)  # print traceback, since W&B uses multiprocessing, which can lose information about exception
             raise ValueError("Experiment failed.") from e
@@ -201,4 +198,4 @@ def run_single_experiment():
 
 if __name__ == "__main__":
     sweep_id = wandb.sweep(SWEEP_CONFIG, project="test")  # change project name
-    wandb.agent(sweep_id, function=run_single_experiment, count=10, project="test")
+    wandb.agent(sweep_id, function=run_single_experiment, count=1, project="test")
@@ -61,10 +61,10 @@ def model_fn():
     lr_scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=50, verbose=True)
 
     timer = pl.callbacks.Timer(duration="00:04:00:00", interval="epoch")  # 4 hours
-    stopping = pl.callbacks.EarlyStopping(monitor="training/validation_loss", patience=50, mode="min", verbose=True)
+    stopping = pl.callbacks.EarlyStopping(monitor="trainer/validation_loss", patience=50, mode="min", verbose=True)
 
     path = f"models/{wandb.run.id}"
-    epoch_checkpoint = pl.callbacks.ModelCheckpoint(dirpath=path, monitor="training/validation_loss", every_n_epochs=10)
+    epoch_checkpoint = pl.callbacks.ModelCheckpoint(dirpath=path, monitor="trainer/validation_loss", every_n_epochs=10)
 
     model = LightningSimulationTrainingModule(transformer, optimizer, lr_scheduler, loss=torch.nn.MSELoss())
 
 
@@ -84,6 +84,9 @@ def __init__(
         """
         super().__init__()
 
+        if not isinstance(states, np.ndarray):
+            raise TypeError(f"States must be numpy given as numpy array, got {type(states)}!")
+
         self.states = states
 
         self.test_size = test_size
 
@@ -53,6 +53,9 @@ def __init__(
         batch_size: int = 32,
         dtype: torch.dtype = torch.float32,
     ):
+        if not isinstance(inputs, np.ndarray) or not isinstance(outputs, np.ndarray):
+            raise TypeError(f"Inputs and outputs must be numpy arrays! Got {type(inputs)} and {type(outputs)}!")
+
         self.inputs = inputs
         self.outputs = outputs
 
 
@@ -3,9 +3,6 @@
 import uuid
 from pathlib import Path
 
-PYTHON_EXTENSIONS = frozenset({".py", ".json", ".txt", ".md", ".yaml", ".yml", ".toml", ".ini"})
-DEFAULT_FORBIDDEN_PREFIX = frozenset({"venv", ".ipynb_checkpoints", "__pycache__", ".git", ".pytest_cache"})
-
 
 def _load_gitignore() -> set[str]:
     """Load .gitignore from default name and root directory as set"""
@@ -21,7 +18,7 @@ def not_comment(line: str) -> bool:
         return set(filter(not_comment, f.read().splitlines()))
 
 
-def _skip_subdir(current: Path, archive_path: Path, forbidden_paths: frozenset[str]) -> bool:
+def _skip_subdir(current: Path, archive_path: Path, forbidden_paths: set[str]) -> bool:
     # prevent copying the temp directory, where the archive with source code is build
     if str(archive_path.absolute()) == current:
         return True
@@ -34,38 +31,57 @@ def _skip_subdir(current: Path, archive_path: Path, forbidden_paths: frozenset[s
     return False
 
 
-def save_code_snapshot(name: str, source_dir: str | Path):
-    """Save only text-based files in a ZIP archive, excluding binary data files."""
-
+def save_code_snapshot(
+    name: str,
+    source_dir: str | Path,
+    target_dir: str | Path,
+    filter_prefix: set[str] = frozenset({"venv", ".ipynb_checkpoints", "__pycache__", ".git", ".pytest_cache"}),
+    accept_suffix: set[str] = frozenset({".py", ".json", ".txt", ".md", ".yaml", ".yml", ".toml", ".ini"}),
+    use_gitignore: bool = True,
+):
+    """
+    Save only text-based files in a ZIP archive, excluding binary data files.
+
+    :param name: name of the archive file
+    :param source_dir: path to the directory with source code
+    :param target_dir: path to the directory where the archive will be saved
+    :param filter_prefix: set of prefixes to exclude from the archive
+    :param accept_suffix: set of suffixes to include in the archive
+    :param use_gitignore: whether to use .gitignore file in the source directory for filter_prefix
+    """
     if isinstance(source_dir, str):
         source_dir = Path(source_dir)
 
-    source_dir = Path(source_dir).resolve()  # ensure absolute path
-    snapshot_filename = f"source_code_{name}"
-    temp_dir = Path(f"temp_code_snapshot_{uuid.uuid4()}")  # append random UUID to avoid conflicts
+    if isinstance(target_dir, str):
+        target_dir = Path(target_dir)
 
-    gitignore = _load_gitignore()
-    forbidden = DEFAULT_FORBIDDEN_PREFIX | gitignore
+    source_dir = Path(source_dir).resolve()  # ensure absolute path
+    snapshot_path = target_dir / name
+    temp_dir = target_dir / str(uuid.uuid4())  # create temp dir with unique name for copying files
 
     if temp_dir.exists():
         shutil.rmtree(temp_dir)
 
     temp_dir.mkdir(parents=True, exist_ok=True)
 
+    if use_gitignore:
+        filter_prefix |= _load_gitignore()  # union with .gitignore, if present
+
     for root, dirs, files in os.walk(source_dir):
         root_path = Path(root)
-        if _skip_subdir(root_path, temp_dir, forbidden):
+
+        if _skip_subdir(root_path, temp_dir, filter_prefix):
             dirs.clear()  # prevent descending into this directory
             continue  # skip to the next directory
 
         for file in files:
-            file_path = root_path / file
-            if file_path.suffix in PYTHON_EXTENSIONS:
-                relative_path = file_path.relative_to(source_dir)
+            source_path = root_path / file
+            if source_path.suffix in accept_suffix:
+                relative_path = source_path.relative_to(os.getcwd())
                 dest_path = temp_dir / relative_path
 
                 dest_path.parent.mkdir(parents=True, exist_ok=True)
-                shutil.copy2(file_path, dest_path)
+                shutil.copy2(relative_path, dest_path)
 
-    shutil.make_archive(snapshot_filename, format="zip", root_dir=temp_dir)  # archive the directory
+    shutil.make_archive(str(snapshot_path), format="zip", root_dir=temp_dir)  # archive the directory
     shutil.rmtree(temp_dir)
@@ -0,0 +1,87 @@
+import importlib.util
+import json
+import os
+import shutil
+import sys
+import zipfile
+from pathlib import Path
+from typing import Any, Callable
+
+
+class ReplaceSourceCode:
+    """
+    ContextManager over-writing imports with given `path` to ZIP with source code created by
+    `pydentification.experiment.storage.code.save_code_snapshot`.
+
+    Code is extracted to a temporary directory and added to the `sys.path` for the duration of the context and removed
+    afterward on exit. The source code needs to be unique directory to avoid conflicts with other imports.
+    """
+
+    def __init__(self, path: Path):
+        self.path = path
+        self.source_path = path.with_suffix("")
+
+    def __enter__(self):
+        if self.source_path.exists():
+            raise FileExistsError(f"Can't overwrite {self.source_path.stem}!")
+
+        with zipfile.ZipFile(self.path, "r") as zip:
+            zip.extractall(str(self.source_path))
+
+        sys.path.append(str(self.source_path))
+        return self
+
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        sys.path.remove(str(self.source_path))
+        shutil.rmtree(self.source_path)
+
+
+def _import_function_from_path(module_path: str, function_name: str) -> Callable:
+    """Dynamically imports a function from a Python file given the file path and function name."""
+    module_name = os.path.basename(module_path).replace(".py", "")
+    spec = importlib.util.spec_from_file_location(module_name, module_path)
+
+    module = importlib.util.module_from_spec(spec)
+    sys.modules[module_name] = module
+
+    spec.loader.exec_module(module)
+    function = getattr(module, function_name)
+
+    return function
+
+
+def _load_model_and_parameters(path: str | Path, name: str, parameters: dict[str, Any]) -> Any:
+    model_fn = _import_function_from_path(path, name)
+    return model_fn(**parameters)
+
+
+def compose_model(
+    path: str | Path,
+    name: str = "model_fn",
+    parameters: str | Path | None = None,
+    source: str | Path | None = None,
+):
+    """
+    Compose model from dump, which will contain model generating function, JSON with its parameters and source code
+    for module definitions (ZIP of entire `pydentification`).
+
+    :param path: filesystem Path to the model generating function, which will be imported by `import_function_from_path`
+    :param name: name of the function to be imported, default is `model_fn`
+    :param parameters: filesystem Path to the JSON file with parameters, if None, empty dictionary will be used
+    :param source: filesystem Path to the ZIP file with source code
+                   if None imports are attempted from the current working directory.
+    """
+    if isinstance(source, str):
+        source = Path(source)
+
+    if parameters is not None:
+        with open(parameters, "r") as f:
+            parameters = json.load(f)
+    else:
+        parameters = {}
+
+    if source is not None:
+        with ReplaceSourceCode(source):
+            return _load_model_and_parameters(path, name, parameters)
+    else:
+        return _load_model_and_parameters(path, name, parameters)