Version 0.2.3 (#57)

clemens-fricke · pre-commit-ci[bot] · Copilot · web-flow · commit f68033d17b1d · 2025-09-29T16:06:26.000+02:00
* ci(precommit): ruff update v0.11.12-> v0.12.2 updates: - [github.com/astral-sh/ruff-pre-commit: v0.11.12 → v0.12.2](astral-sh/ruff-pre-commit@v0.11.12...v0.12.2) Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> * Ft(geom)!: Apply random inital position for incremental approach (#49) * feat(geom)!: Add random start for incremental approach BREAKING CHANGE: Discrete values are not clipped anymore. If the step would exceed the range the step is not taken. It is therefore possible that the extreme values of the given ranges are not reachable. * test(random_init): Fixes the issues with the not working tests after inital commit. * Feat: Add util function for caching of results (pyspor) (#47) * feat(pyspor): Add util function for caching of results * Doc: add more documentation for the cache * docs(caching): Added docs that explain how to use the caching and added generate key function for easier use * chore: use new ruff dual call in pre-commit and add conventional commits (#50) * chore(pre-commit): update ruff call to new check and format calls and added conventional commits * style: adapt new format rules * fix(cli): version cli works again and help as well * chore: version bump * fix: harden against SQL injection -> suggestion from @Copilot Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * fix: improve input parameter checking -> suggestion from @Copilot Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * chore: format carfuffle * fix: Episode numbers in step-log were off, now fixed (#52) * fix: Episode numbers in step-log were off, now fixed * chore(version): version bumb for new release * fix(visu, step-log): Change episode selection in step log visualization (#55) * fix(visu, step-log): Change episode selection in step log visualization * Update version to 0.2.2.dev1 * Feat: episode log validation (#56) * feat(visu, episode-log): Added visualization of validation values, toggle to show them, moved slow imports in main function to function they are used in visualization speed-up * CI(pre-commit): changed the branch the prs are opened on * feat(visu, episode-log): Add boxplot to visualize validation distribution * feat(visu, episode-log): Boxplot width dependent on validation frequency * chore: Deleted soon * chore: Deleted * Feat(visu, episode-log): Changed validation value visualization to scatter based error bars * Chore(visu, episode-log): Clean up unused code * Chore: Bump version to 0.2.2.dev2 * [pre-commit.ci] pre-commit autoupdate (#54) * mini version (#53) * fix: Episode numbers in step-log were off, now fixed (#52) * chore(version): version bumb for new release * [pre-commit.ci] pre-commit autoupdate updates: - [github.com/pre-commit/pre-commit-hooks: v5.0.0 → v6.0.0](pre-commit/pre-commit-hooks@v5.0.0...v6.0.0) - [github.com/astral-sh/ruff-pre-commit: v0.11.13 → v0.12.11](astral-sh/ruff-pre-commit@v0.11.13...v0.12.11) --------- Co-authored-by: clemens.fricke <clemens.david.fricke@tuwien.ac.at> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> * Chore(ruff): fix format error * Fix index error (#58) --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -2,6 +2,7 @@
 # See https://pre-commit.com/hooks.html for more hooks
 ci:
     autofix_prs: false
+    autoupdate_branch: 'develop'
     autoupdate_schedule: monthly
 
 
@@ -12,7 +13,7 @@ default_language_version:
     python: python3.11
 repos:
 -   repo: https://github.com/pre-commit/pre-commit-hooks
-    rev: v5.0.0
+    rev: v6.0.0
     hooks:
     -   id: end-of-file-fixer
     -   id: check-yaml
@@ -29,7 +30,7 @@ repos:
     -   id: mixed-line-ending
     -   id: trailing-whitespace
 - repo: https://github.com/astral-sh/ruff-pre-commit
-  rev: v0.11.13
+  rev: v0.12.11
   hooks:
     # Run the linter.
     - id: ruff-check
diff --git a/releso/__main__.py b/releso/__main__.py
@@ -15,11 +15,8 @@
 import gymnasium
 import hjson
 import numpy as np
-import stable_baselines3
-import torch
 
 from releso.__version__ import __version__
-from releso.base_parser import BaseParser
 from releso.util.module_import_raiser import ModuleImportRaiser
 
 try:
@@ -126,6 +123,11 @@ def main(args) -> pathlib.Path:
     Raises:
         ValueError: Thrown if the json file could not be found.
     """
+    import stable_baselines3
+    import torch
+
+    from releso.base_parser import BaseParser
+
     ###########################
     #                         #
     #   Loading and parsing   #
@@ -322,6 +324,12 @@ def entry():
             "timesteps will be included."
         ),
     )
+    parser_visualize_episodelog.add_argument(
+        "-v",
+        "--show-validation",
+        action="store_true",
+        help=("Whether to show validation results. Defaults to False."),
+    )
     parser_visualize_steplog = sub_parser_visualize.add_parser(
         "step-log",
         parents=[visualize_shared_args],
@@ -417,6 +425,7 @@ def entry():
                 args.window,
                 window_size=figure_size,
                 cut_off_point=args.cut_off_point,
+                show_validation=args.show_validation,
             )
             export_figure(fig, args.export_path, "episode_log.html")
         # Visualize contents of step_log.jsonl
diff --git a/releso/__version__.py b/releso/__version__.py
@@ -3,4 +3,4 @@
 Current version.
 """
 
-__version__ = "0.2.2"
+__version__ = "0.2.3"
diff --git a/releso/util/visualization.py b/releso/util/visualization.py
@@ -68,6 +68,7 @@ def plot_episode_log(
     window: int = 5,
     window_size: Union[tuple[int, int], Literal["auto"]] = "auto",
     cut_off_point: int = np.iinfo(int).max,
+    show_validation: bool = False,
 ) -> Figure:
     """Plot one or multiple episodes to check out the training progress.
 
@@ -109,13 +110,16 @@ def plot_episode_log(
          or container size. Defaults to "auto".
         cut_off_point (int, optional): Plot the episode only to a certain
          number of time steps. Defaults to max int.
+        show_validation (bool, optional): Whether to show validation results.
+         Defaults to False.
 
     Returns:
         fig (plotly.graph_objects.Figure): Plotly figure object with the
          requested plots for further customization or export.
     """
     end_episode = []
     df: list[pd.DataFrame] = []
+    df_val: list[pd.DataFrame] = []
     # make a separate list for just the names of the experiments
     n_env: list[str] = list(result_folders.keys())
     # set of all episode end reasons
@@ -139,14 +143,45 @@ def plot_episode_log(
             temp_df["episode_reward"] / temp_df["steps_in_episode"]
         )
         unique_values.update(temp_df["episode_end_reason"].unique())
+
+        # Validation
+        if (
+            show_validation
+            and (val_file := folder / "eval/log/evaluations.npz").exists()
+        ):
+            val_data = np.load(val_file)
+            data = dict()
+            data["total_timesteps"] = val_data["timesteps"][
+                val_data["timesteps"] <= cut_off_point
+            ]
+            cut_off = len(data["total_timesteps"])
+            data["val_reward_mean"] = val_data["results"][:cut_off].mean(
+                axis=1
+            )
+            data["val_reward_min"] = val_data["results"][:cut_off].min(axis=1)
+            data["val_reward_max"] = val_data["results"][:cut_off].max(axis=1)
+            data["val_length_mean"] = val_data["ep_lengths"][:cut_off].mean(
+                axis=1
+            )
+            data["val_length_min"] = val_data["ep_lengths"][:cut_off].min(
+                axis=1
+            )
+            data["val_length_max"] = val_data["ep_lengths"][:cut_off].max(
+                axis=1
+            )
+
+            temp_val = pd.DataFrame(data, index=data["total_timesteps"])
+        else:
+            temp_val = None
+        df_val.append(temp_val)
         df.append(temp_df)
 
     fig = make_subplots(
         rows=6, cols=1, shared_xaxes=True, vertical_spacing=0.01
     )
     # plot each experiment into each sub plot, the idx is used to determine the
     # episode end and the episode name
-    for idx, dataframe in enumerate(df):
+    for idx, (dataframe, val_dataframe) in enumerate(zip(df, df_val)):
         # first subplot
         fig.add_trace(
             go.Scatter(
@@ -166,6 +201,30 @@ def plot_episode_log(
             row=1,
             col=1,
         )
+        if val_dataframe is not None:
+            fig.add_trace(
+                go.Scatter(
+                    x=val_dataframe["total_timesteps"],
+                    y=val_dataframe["val_reward_mean"],
+                    legendgroup=f"{n_env[idx]}",
+                    name=f"Validation {n_env[idx]}",
+                    # line_color=plotly_colors[idx],
+                    mode="markers",
+                    showlegend=True,
+                    opacity=0.2,
+                    error_y=dict(
+                        type="data",
+                        symmetric=False,
+                        array=val_dataframe["val_reward_max"]
+                        - val_dataframe["val_reward_mean"],
+                        arrayminus=val_dataframe["val_reward_mean"]
+                        - val_dataframe["val_reward_min"],
+                    ),
+                    marker=dict(color=plotly_colors[idx], size=8),
+                ),
+                row=1,
+                col=1,
+            )
         # second subplot
         fig.add_trace(
             go.Scatter(
@@ -183,6 +242,30 @@ def plot_episode_log(
             row=2,
             col=1,
         )
+
+        if val_dataframe is not None:
+            fig.add_trace(
+                go.Scatter(
+                    x=val_dataframe["total_timesteps"],
+                    y=val_dataframe["val_length_mean"],
+                    legendgroup=f"{n_env[idx]}",
+                    name=f"Validation {n_env[idx]}",
+                    mode="markers",
+                    showlegend=False,
+                    opacity=0.2,
+                    error_y=dict(
+                        type="data",
+                        symmetric=False,
+                        array=val_dataframe["val_length_max"]
+                        - val_dataframe["val_length_mean"],
+                        arrayminus=val_dataframe["val_length_mean"]
+                        - val_dataframe["val_length_min"],
+                    ),
+                    marker=dict(color=plotly_colors[idx], size=8),
+                ),
+                row=2,
+                col=1,
+            )
         # third subplot
         fig.add_trace(
             go.Scatter(
@@ -488,7 +571,10 @@ def plot_step_log(
     df = pd.concat(
         [df_raw["episodes"], df_raw["reward"], objectives, design_vars], axis=1
     )
-
+    if df.empty:
+        raise ValueError(
+            f"The provided step log file {step_log_file} is empty or does not follow the current format."
+        )
     # Filter only the selected episodes
     max_idx = df["episodes"].max()
     if episode_end is None or episode_end > max_idx:
@@ -497,8 +583,17 @@ def plot_step_log(
     # filter does not directly filter for episode number but filters the whole
     # list of episodes, which can have missing episodes, due to episodes
     # generated outside the environment id chosen.
-    selected_episodes = df["episodes"].unique()[
-        episode_start : (episode_end + 1) : episode_step
+    try:
+        idx_start = df[df["episodes"] >= episode_start].index[0]
+        idx_end = df[df["episodes"] <= episode_end].index[-1]
+    except IndexError as err:
+        raise IndexError(
+            f"Could not find any episode in the range {episode_start} to {episode_end}. "
+            f"The available episodes range from {df['episodes'].unique().min()} "
+            f"to {df['episodes'].unique().max()}"
+        ) from err
+    selected_episodes = df.iloc[idx_start : idx_end + 1]["episodes"].unique()[
+        ::episode_step
     ]
     df = df[df["episodes"].isin(selected_episodes)]