DLR-RM
diff --git a/‎CHANGELOG.md‎
Lines changed: 18 additions & 1 deletion b/‎CHANGELOG.md‎
Lines changed: 18 additions & 1 deletion
diff --git a/‎docs/guide/tuning.rst‎
Lines changed: 20 additions & 11 deletions b/‎docs/guide/tuning.rst‎
Lines changed: 20 additions & 11 deletions
diff --git a/‎requirements.txt‎
Lines changed: 2 additions & 0 deletions b/‎requirements.txt‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎rl_zoo3/exp_manager.py‎
Lines changed: 49 additions & 14 deletions b/‎rl_zoo3/exp_manager.py‎
Lines changed: 49 additions & 14 deletions
@@ -1,7 +1,23 @@
-## Release 2.6.0a2 (WIP)
+## Release 2.6.0a3 (WIP)
 
 ### Breaking Changes
 - Upgraded to SB3 >= 2.6.0
+- Refactored hyperparameter optimization. The Optuna [Journal storage backend](https://optuna.readthedocs.io/en/stable/reference/generated/optuna.storages.JournalStorage.html) is now supported (recommended default) and you can easily load tuned hyperparameter via the new `--trial-id` argument of `train.py`.
+
+For example, optimize using the journal storage:
+```bash
+python train.py --algo ppo --env Pendulum-v1 -n 40000 --study-name demo --storage logs/demo.log --sampler tpe --n-evaluations 2 --optimize --no-optim-plots
+```
+Visualize live using [optuna-dashboard](https://optuna-dashboard.readthedocs.io/en/latest/getting-started.html)
+```
+optuna-dashboard logs/demo.log
+```
+
+Load hyperparameters from trial number 21 and train an agent with it:
+```bash
+python train.py --algo ppo --env Pendulum-v1 --study-name demo --storage logs/demo.log --trial-id 21
+```
+
 
 ### New Features
 - Save the exact command line used to launch a training
@@ -15,6 +31,7 @@
 ### Documentation
 
 ### Other
+- `scripts/parse_study.py` is now deprecated because of the new hyperparameter optimization scripts
 
 ## Release 2.5.0 (2025-01-27)
 
 
@@ -4,8 +4,12 @@
 Hyperparameter Tuning
 =====================
 
-Hyperparameter Tuning
----------------------
+Automated hyperparameter optimization
+-------------------------------------
+
+Blog post: `Automatic Hyperparameter Tuning - A Visual Guide <https://araffin.github.io/post/hyperparam-tuning/>`_
+
+Video: https://www.youtube.com/watch?v=AidFTOdGNFQ
 
 We use `Optuna <https://optuna.org/>`__ for optimizing the
 hyperparameters. Not all hyperparameters are tuned, and tuning enforces
@@ -35,20 +39,29 @@ documentation <https://optuna.readthedocs.io/en/stable/tutorial/10_key_features/
 
 ::
 
-   python train.py --algo ppo --env MountainCar-v0 -optimize --study-name test --storage sqlite:///example.db
+   python train.py --algo ppo --env MountainCar-v0 -optimize --study-name test --storage logs/demo.log
 
-Print and save best hyperparameters of an Optuna study:
 
-::
 
-   python scripts/parse_study.py -i path/to/study.pkl --print-n-best-trials 10 --save-n-best-hyperparameters 10
+Visualize live using `optuna-dashboard <https://optuna-dashboard.readthedocs.io/en/latest/getting-started.html>`__
+
+.. code:: bash
+
+   optuna-dashboard logs/demo.log
+
+Load hyperparameters from trial number 21 and train an agent with it:
+
+.. code:: bash
+
+   python train.py --algo ppo --env MountainCar-v0 --study-name test --storage logs/demo.log --trial-id 21
+
 
 The default budget for hyperparameter tuning is 500 trials and there is
 one intermediate evaluation for pruning/early stopping per 100k time
 steps.
 
 Hyperparameters search space
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+----------------------------
 
 Note that the default hyperparameters used in the zoo when tuning are
 not always the same as the defaults provided in
@@ -65,7 +78,3 @@ example:
 -  Non-episodic rollout in TD3 and DDPG assumes
    ``gradient_steps = train_freq`` and so tunes only ``train_freq`` to
    reduce the search space.
-
-When working with continuous actions, we recommend to enable
-`gSDE <https://arxiv.org/abs/2005.05719>`__ by uncommenting lines in
-`rl_zoo3/hyperparams_opt.py <https://github.com/DLR-RM/rl-baselines3-zoo/blob/master/rl_zoo3/hyperparams_opt.py>`__.
 
@@ -4,6 +4,8 @@ box2d-py==2.3.8
 pybullet_envs_gymnasium>=0.6.0
 # minigrid
 cloudpickle>=2.2.1
+# Optuna auto
+optunahub>=0.2.0
 # optuna plots:
 plotly
 # need to upgrade to gymnasium:
 
@@ -49,7 +49,7 @@
 # Register custom envs
 import rl_zoo3.import_envs  # noqa: F401
 from rl_zoo3.callbacks import SaveVecNormalizeCallback, TrialEvalCallback
-from rl_zoo3.hyperparams_opt import HYPERPARAMS_SAMPLER
+from rl_zoo3.hyperparams_opt import HYPERPARAMS_CONVERTER, HYPERPARAMS_SAMPLER
 from rl_zoo3.utils import ALGOS, get_callback_list, get_class_by_name, get_latest_run_id, get_wrapper_class, linear_schedule
 
 
@@ -102,6 +102,7 @@ def __init__(
         device: Union[th.device, str] = "auto",
         config: Optional[str] = None,
         show_progress: bool = False,
+        trial_id: Optional[int] = None,
     ):
         super().__init__()
         self.algo = algo
@@ -160,6 +161,8 @@ def __init__(
         self.storage = storage
         self.study_name = study_name
         self.no_optim_plots = no_optim_plots
+        # For loading hyperparams from a study
+        self.trial_id = trial_id
         # maximum number of trials for finding the best hyperparams
         self.n_trials = n_trials
         self.max_total_trials = max_total_trials
@@ -334,6 +337,11 @@ def read_hyperparameters(self) -> tuple[dict[str, Any], dict[str, Any]]:
         else:
             raise ValueError(f"Hyperparameters not found for {self.algo}-{self.env_name.gym_id} in {self.config}")
 
+        if self.storage and self.study_name and self.trial_id:
+            print("Loading from Optuna study...")
+            study_hyperparams = self.load_trial(self.storage, self.study_name, self.trial_id)
+            hyperparams.update(study_hyperparams)
+
         if self.custom_hyperparams is not None:
             # Overwrite hyperparams if needed
             hyperparams.update(self.custom_hyperparams)
@@ -346,6 +354,24 @@ def read_hyperparameters(self) -> tuple[dict[str, Any], dict[str, Any]]:
 
         return hyperparams, saved_hyperparams
 
+    def load_trial(
+        self, storage: str, study_name: str, trial_id: Optional[int] = None, convert: bool = True
+    ) -> dict[str, Any]:
+
+        if storage.endswith(".log"):
+            optuna_storage = optuna.storages.JournalStorage(optuna.storages.journal.JournalFileBackend(storage))
+        else:
+            optuna_storage = storage  # type: ignore[assignment]
+        study = optuna.load_study(storage=optuna_storage, study_name=study_name)
+        if trial_id is not None:
+            params = study.trials[trial_id].params
+        else:
+            params = study.best_trial.params
+
+        if convert:
+            return HYPERPARAMS_CONVERTER[self.algo](params)
+        return params
+
     @staticmethod
     def _preprocess_schedules(hyperparams: dict[str, Any]) -> dict[str, Any]:
         # Create schedules
@@ -470,6 +496,10 @@ def _preprocess_hyperparams(  # noqa: C901
     def _preprocess_action_noise(
         self, hyperparams: dict[str, Any], saved_hyperparams: dict[str, Any], env: VecEnv
     ) -> dict[str, Any]:
+        # Compute n_actions for hyperparameter optim
+        if isinstance(env.action_space, spaces.Box):
+            self.n_actions = env.action_space.shape[0]
+
         # Parse noise string
         # Note: only off-policy algorithms are supported
         if hyperparams.get("noise_type") is not None:
@@ -480,7 +510,6 @@ def _preprocess_action_noise(
             assert isinstance(
                 env.action_space, spaces.Box
             ), f"Action noise can only be used with Box action space, not {env.action_space}"
-            self.n_actions = env.action_space.shape[0]
 
             if "normal" in noise_type:
                 hyperparams["action_noise"] = NormalActionNoise(
@@ -619,11 +648,9 @@ def create_envs(self, n_envs: int, eval_env: bool = False, no_log: bool = False)
         log_dir = None if eval_env or no_log else self.save_path
 
         # Special case for GoalEnvs: log success rate too
-        if (
-            "Neck" in self.env_name.gym_id
-            or self.is_robotics_env(self.env_name.gym_id)
-            or ("parking-v0" in self.env_name.gym_id and len(self.monitor_kwargs) == 0)  # do not overwrite custom kwargs
-        ):
+        if self.is_robotics_env(self.env_name.gym_id) or (
+            "parking-v0" in self.env_name.gym_id and len(self.monitor_kwargs) == 0
+        ):  # do not overwrite custom kwargs
             self.monitor_kwargs = dict(info_keywords=("is_success",))
 
         spec = gym.spec(self.env_name.gym_id)
@@ -722,13 +749,10 @@ def _create_sampler(self, sampler_method: str) -> BaseSampler:
             sampler: BaseSampler = RandomSampler(seed=self.seed)
         elif sampler_method == "tpe":
             sampler = TPESampler(n_startup_trials=self.n_startup_trials, seed=self.seed, multivariate=True)
-        elif sampler_method == "skopt":
-            from optuna.integration.skopt import SkoptSampler
+        elif sampler_method == "auto":
+            import optunahub
 
-            # cf https://scikit-optimize.github.io/#skopt.Optimizer
-            # GP: gaussian process
-            # Gradient boosted regression: GBRT
-            sampler = SkoptSampler(skopt_kwargs={"base_estimator": "GP", "acq_func": "gp_hedge"})
+            sampler = optunahub.load_module("samplers/auto_sampler").AutoSampler(seed=self.seed)
         else:
             raise ValueError(f"Unknown sampler: {sampler_method}")
         return sampler
@@ -854,14 +878,22 @@ def hyperparameters_optimization(self) -> None:
         # TODO: eval each hyperparams several times to account for noisy evaluation
         sampler = self._create_sampler(self.sampler)
         pruner = self._create_pruner(self.pruner)
+        # Log file storage
+        storage = self.storage
+        if storage is not None and storage.endswith(".log"):
+            # Create folder if it doesn't exist
+            Path(storage).parent.mkdir(parents=True, exist_ok=True)
+            storage = optuna.storages.JournalStorage(  # type: ignore[assignment]
+                optuna.storages.journal.JournalFileBackend(storage),
+            )
 
         if self.verbose > 0:
             print(f"Sampler: {self.sampler} - Pruner: {self.pruner}")
 
         study = optuna.create_study(
             sampler=sampler,
             pruner=pruner,
-            storage=self.storage,
+            storage=storage,
             study_name=self.study_name,
             load_if_exists=True,
             direction="maximize",
@@ -903,6 +935,9 @@ def hyperparameters_optimization(self) -> None:
         print("Params: ")
         for key, value in trial.params.items():
             print(f"    {key}: {value}")
+        print("User Attributes: ")
+        for key, value in trial.user_attrs.items():
+            print(f"    {key}: {value}")
 
         report_name = (
             f"report_{self.env_name}_{self.n_trials}-trials-{self.n_timesteps}"