Add Python 3.13 support, drop Python 3.9 (#501)

araffin · web-flow · commit d29756c456ca · 2025-12-05T18:43:21.000+01:00
* Drop Python 3.9 support

* Ignore unused variables

* Autofixes for Python 3.10

* Use zip strict

* Reformat and fix mypy issues
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -20,19 +20,19 @@ jobs:
     runs-on: ubuntu-latest
     strategy:
       matrix:
-        python-version: ["3.9", "3.10", "3.11", "3.12"]
+        python-version: ["3.10", "3.11", "3.12", "3.13"]
         include:
           # Default version
           - gymnasium-version: "1.0.0"
           # Add a new config to test gym<1.0
           - python-version: "3.10"
             gymnasium-version: "0.29.1"
     steps:
-      - uses: actions/checkout@v3
+      - uses: actions/checkout@v6
         with:
           submodules: true
       - name: Set up Python ${{ matrix.python-version }}
-        uses: actions/setup-python@v4
+        uses: actions/setup-python@v6
         with:
           python-version: ${{ matrix.python-version }}
       - name: Install dependencies
@@ -42,7 +42,8 @@ jobs:
           pip install uv
           # cpu version of pytorch
           # See https://github.com/astral-sh/uv/issues/1497
-          uv pip install --system torch==2.4.1+cpu --index https://download.pytorch.org/whl/cpu
+          # Need Pytorch 2.9+ for Python 3.13
+          uv pip install --system torch==2.9.1+cpu --index https://download.pytorch.org/whl/cpu
           # Install full requirements (for additional envs and test tools)
           uv pip install --system -r requirements.txt
           # Use headless version
diff --git a/.github/workflows/trained_agents.yml b/.github/workflows/trained_agents.yml
@@ -20,19 +20,19 @@ jobs:
     runs-on: ubuntu-latest
     strategy:
       matrix:
-        python-version: ["3.9", "3.10", "3.11", "3.12"]
+        python-version: ["3.10", "3.11", "3.12", "3.13"]
         include:
           # Default version
           - gymnasium-version: "1.0.0"
           # Add a new config to test gym<1.0
           - python-version: "3.10"
             gymnasium-version: "0.29.1"
     steps:
-      - uses: actions/checkout@v3
+      - uses: actions/checkout@v6
         with:
           submodules: true
       - name: Set up Python ${{ matrix.python-version }}
-        uses: actions/setup-python@v4
+        uses: actions/setup-python@v6
         with:
           python-version: ${{ matrix.python-version }}
       - name: Install dependencies
@@ -43,7 +43,8 @@ jobs:
           pip install uv
           # cpu version of pytorch
           # See https://github.com/astral-sh/uv/issues/1497
-          uv pip install --system torch==2.4.1+cpu --index https://download.pytorch.org/whl/cpu
+          # Need Pytorch 2.9+ for Python 3.13
+          uv pip install --system torch==2.9.1+cpu --index https://download.pytorch.org/whl/cpu
           # Install full requirements (for additional envs and test tools)
           uv pip install --system -r requirements.txt
           # Use headless version
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,3 +1,20 @@
+## Release 2.8.0a0 (WIP)
+
+### Breaking Changes
+- Upgraded to SB3 >= 2.8.0
+- Removed support for Python 3.9, please upgrade to Python >= 3.10
+- Set ``strict=True`` for every call to ``zip(...)``
+
+### New Features
+- Added official support for Python 3.13
+
+### Bug fixes
+
+### Documentation
+
+### Other
+
+
 ## Release 2.7.0 (2025-07-25)
 
 ### Breaking Changes
@@ -14,10 +31,6 @@
 - Use `ConstantSchedule`, and `SimpleLinearSchedule` instead of `constant_fn` and `linear_schedule`
 - Fixed `CarRacing-v3` hyperparameters for newer Gymnasium version
 
-### Documentation
-
-### Other
-
 ## Release 2.6.0 (2025-03-24)
 
 ### Breaking Changes
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,8 +1,8 @@
 [tool.ruff]
 # Same as Black.
 line-length = 127
-# Assume Python 3.9
-target-version = "py39"
+# Assume Python 3.10
+target-version = "py310"
 
 [tool.ruff.lint]
 # See https://beta.ruff.rs/docs/rules/
diff --git a/rl_zoo3/callbacks.py b/rl_zoo3/callbacks.py
@@ -4,7 +4,6 @@
 from copy import deepcopy
 from functools import wraps
 from threading import Thread
-from typing import Optional, Union
 
 import optuna
 from sb3_contrib import TQC
@@ -27,8 +26,8 @@ def __init__(
         eval_freq: int = 10000,
         deterministic: bool = True,
         verbose: int = 0,
-        best_model_save_path: Optional[str] = None,
-        log_path: Optional[str] = None,
+        best_model_save_path: str | None = None,
+        log_path: str | None = None,
     ) -> None:
         super().__init__(
             eval_env=eval_env,
@@ -67,7 +66,7 @@ class SaveVecNormalizeCallback(BaseCallback):
         only one file will be kept.
     """
 
-    def __init__(self, save_freq: int, save_path: str, name_prefix: Optional[str] = None, verbose: int = 0):
+    def __init__(self, save_freq: int, save_path: str, name_prefix: str | None = None, verbose: int = 0):
         super().__init__(verbose)
         self.save_freq = save_freq
         self.save_path = save_path
@@ -116,10 +115,10 @@ def __init__(self, gradient_steps: int = 100, verbose: int = 0, sleep_time: floa
         super().__init__(verbose)
         self.batch_size = 0
         self._model_ready = True
-        self._model: Union[SAC, TQC]
+        self._model: SAC | TQC
         self.gradient_steps = gradient_steps
         self.process: Thread
-        self.model_class: Union[type[SAC], type[TQC]]
+        self.model_class: type[SAC] | type[TQC]
         self.sleep_time = sleep_time
 
     def _init_callback(self) -> None:
diff --git a/rl_zoo3/exp_manager.py b/rl_zoo3/exp_manager.py
@@ -6,9 +6,10 @@
 import time
 import warnings
 from collections import OrderedDict
+from collections.abc import Callable
 from pathlib import Path
 from pprint import pprint
-from typing import Any, Callable, Optional, Union
+from typing import Any
 
 import gymnasium as gym
 import numpy as np
@@ -69,7 +70,7 @@ class ExperimentManager:
     """
 
     # For special VecEnv like Brax, IsaacLab, ...
-    default_vec_env_cls: Optional[type[VecEnv]] = None
+    default_vec_env_cls: type[VecEnv] | None = None
 
     def __init__(
         self,
@@ -82,19 +83,19 @@ def __init__(
         eval_freq: int = 10000,
         n_eval_episodes: int = 5,
         save_freq: int = -1,
-        hyperparams: Optional[dict[str, Any]] = None,
-        env_kwargs: Optional[dict[str, Any]] = None,
-        eval_env_kwargs: Optional[dict[str, Any]] = None,
+        hyperparams: dict[str, Any] | None = None,
+        env_kwargs: dict[str, Any] | None = None,
+        eval_env_kwargs: dict[str, Any] | None = None,
         trained_agent: str = "",
         optimize_hyperparameters: bool = False,
-        storage: Optional[str] = None,
-        study_name: Optional[str] = None,
+        storage: str | None = None,
+        study_name: str | None = None,
         n_trials: int = 1,
-        max_total_trials: Optional[int] = None,
+        max_total_trials: int | None = None,
         n_jobs: int = 1,
         sampler: str = "tpe",
         pruner: str = "median",
-        optimization_log_path: Optional[str] = None,
+        optimization_log_path: str | None = None,
         n_startup_trials: int = 0,
         n_evaluations: int = 1,
         truncate_last_trajectory: bool = False,
@@ -106,10 +107,10 @@ def __init__(
         vec_env_type: str = "dummy",
         n_eval_envs: int = 1,
         no_optim_plots: bool = False,
-        device: Union[th.device, str] = "auto",
-        config: Optional[str] = None,
+        device: th.device | str = "auto",
+        config: str | None = None,
         show_progress: bool = False,
-        trial_id: Optional[int] = None,
+        trial_id: int | None = None,
     ):
         super().__init__()
         self.algo = algo
@@ -128,7 +129,7 @@ def __init__(
         self.n_timesteps = n_timesteps
         self.normalize = False
         self.normalize_kwargs: dict[str, Any] = {}
-        self.env_wrapper: Optional[Callable] = None
+        self.env_wrapper: Callable | None = None
         self.frame_stack = None
         self.seed = seed
         self.optimization_log_path = optimization_log_path
@@ -138,7 +139,7 @@ def __init__(
         if self.default_vec_env_cls is not None:
             self.vec_env_class = self.default_vec_env_cls
 
-        self.vec_env_wrapper: Optional[Callable] = None
+        self.vec_env_wrapper: Callable | None = None
 
         self.vec_env_kwargs: dict[str, Any] = {}
         # self.vec_env_kwargs = {} if vec_env_type == "dummy" else {"start_method": "fork"}
@@ -197,7 +198,7 @@ def __init__(
         )
         self.params_path = f"{self.save_path}/{self.env_name}"
 
-    def setup_experiment(self) -> Optional[tuple[BaseAlgorithm, dict[str, Any]]]:
+    def setup_experiment(self) -> tuple[BaseAlgorithm, dict[str, Any]] | None:
         """
         Read hyperparameters, pre-process them (create schedules, wrappers, callbacks, action noise objects)
         create the environment and possibly the model.
@@ -361,12 +362,10 @@ def read_hyperparameters(self) -> tuple[dict[str, Any], dict[str, Any]]:
 
         return hyperparams, saved_hyperparams
 
-    def load_trial(
-        self, storage: str, study_name: str, trial_id: Optional[int] = None, convert: bool = True
-    ) -> dict[str, Any]:
+    def load_trial(self, storage: str, study_name: str, trial_id: int | None = None, convert: bool = True) -> dict[str, Any]:
 
         if storage.endswith(".log"):
-            optuna_storage = optuna.storages.JournalStorage(optuna.storages.journal.JournalFileBackend(storage))
+            optuna_storage = optuna.storages.JournalStorage(optuna.storages.journal.JournalFileBackend(storage))  # type: ignore[attr-defined]
         else:
             optuna_storage = storage  # type: ignore[assignment]
         study = optuna.load_study(storage=optuna_storage, study_name=study_name)
@@ -386,7 +385,7 @@ def _preprocess_schedules(hyperparams: dict[str, Any]) -> dict[str, Any]:
             if key not in hyperparams:
                 continue
             if isinstance(hyperparams[key], str):
-                schedule, initial_value = hyperparams[key].split("_")
+                _schedule, initial_value = hyperparams[key].split("_")
                 initial_value = float(initial_value)
                 hyperparams[key] = SimpleLinearSchedule(initial_value)
             elif isinstance(hyperparams[key], (float, int)):
@@ -424,7 +423,7 @@ def _preprocess_normalization(self, hyperparams: dict[str, Any]) -> dict[str, An
 
     def _preprocess_hyperparams(  # noqa: C901
         self, hyperparams: dict[str, Any]
-    ) -> tuple[dict[str, Any], Optional[Callable], list[BaseCallback], Optional[Callable]]:
+    ) -> tuple[dict[str, Any], Callable | None, list[BaseCallback], Callable | None]:
         self.n_envs = hyperparams.get("n_envs", 1)
 
         if self.verbose > 0:
@@ -891,7 +890,7 @@ def hyperparameters_optimization(self) -> None:
             # Create folder if it doesn't exist
             Path(storage).parent.mkdir(parents=True, exist_ok=True)
             storage = optuna.storages.JournalStorage(  # type: ignore[assignment]
-                optuna.storages.journal.JournalFileBackend(storage),
+                optuna.storages.journal.JournalFileBackend(storage),  # type: ignore[attr-defined]
             )
 
         if self.verbose > 0:
diff --git a/rl_zoo3/import_envs.py b/rl_zoo3/import_envs.py
@@ -1,4 +1,5 @@
-from typing import Callable, Optional
+from collections.abc import Callable
+from typing import Optional
 
 import gymnasium as gym
 from gymnasium.envs.registration import register, register_envs
@@ -55,8 +56,8 @@
 
 
 # Register no vel envs
-def create_no_vel_env(env_id: str) -> Callable[[Optional[str]], gym.Env]:
-    def make_env(render_mode: Optional[str] = None) -> gym.Env:
+def create_no_vel_env(env_id: str) -> Callable[[str | None], gym.Env]:
+    def make_env(render_mode: str | None = None) -> gym.Env:
         env = gym.make(env_id, render_mode=render_mode)
         env = MaskVelocityWrapper(env)
         return env
diff --git a/rl_zoo3/load_from_hub.py b/rl_zoo3/load_from_hub.py
@@ -3,7 +3,6 @@
 import shutil
 import zipfile
 from pathlib import Path
-from typing import Optional
 
 from huggingface_sb3 import EnvironmentName, ModelName, ModelRepoId, load_from_hub
 from requests.exceptions import HTTPError
@@ -17,7 +16,7 @@ def download_from_hub(
     exp_id: int,
     folder: str,
     organization: str,
-    repo_name: Optional[str] = None,
+    repo_name: str | None = None,
     force: bool = False,
 ) -> None:
     """
diff --git a/rl_zoo3/plots/plot_from_file.py b/rl_zoo3/plots/plot_from_file.py
@@ -109,7 +109,7 @@ def plot_from_file():  # noqa: C901
 
     labels = {key: key for key in keys}
     if args.labels is not None:
-        for key, label in zip(keys, args.labels):
+        for key, label in zip(keys, args.labels, strict=True):
             labels[key] = label
 
     if not args.skip_timesteps:
@@ -234,7 +234,7 @@ def plot_from_file():  # noqa: C901
             confidence_interval_size=args.ci_size,  # Coverage of confidence interval. Defaults to 95%.
         )
 
-        fig, axes = plot_utils.plot_interval_estimates(
+        fig, _axes = plot_utils.plot_interval_estimates(
             aggregate_scores,
             aggregate_interval_estimates,
             metric_names=["Median", "IQM", "Mean", "Optimality Gap"],
@@ -266,7 +266,7 @@ def plot_from_file():  # noqa: C901
             score_distributions,
             normalized_score_thresholds,
             performance_profile_cis=score_distributions_cis,
-            colors=dict(zip(algorithms, seaborn.color_palette("colorblind"))),
+            colors=dict(zip(algorithms, seaborn.color_palette("colorblind"), strict=False)),
             xlabel=r"Normalized Score $(\tau)$",
             ax=ax,
         )
@@ -350,7 +350,7 @@ def plot_from_file():  # noqa: C901
 
     ax = seaborn.barplot(x="Environment", y="Score", hue="Method", data=data_frame)
     # Custom legend title
-    handles, labels_legend = ax.get_legend_handles_labels()
+    _handles, _labels_legend = ax.get_legend_handles_labels()
     # ax.legend(handles=handles, labels=labels_legend, title=r"$log \sigma$", loc=args.legend_loc)
     # ax.legend(handles=handles, labels=labels_legend, title="Network Architecture", loc=args.legend_loc)
     # ax.legend(handles=handles, labels=labels_legend, title="Interval", loc=args.legend_loc)
diff --git a/rl_zoo3/push_to_hub.py b/rl_zoo3/push_to_hub.py
@@ -6,7 +6,7 @@
 from copy import deepcopy
 from pathlib import Path
 from pprint import pformat
-from typing import Any, Optional
+from typing import Any
 
 import torch as th
 import yaml
@@ -139,7 +139,7 @@ def package_to_hub(
     commit_message: str,
     is_deterministic: bool = True,
     n_eval_episodes=10,
-    token: Optional[str] = None,
+    token: str | None = None,
     local_repo_path="hub",
     video_length=1000,
     generate_video: bool = False,
diff --git a/rl_zoo3/utils.py b/rl_zoo3/utils.py
diff --git a/rl_zoo3/version.txt b/rl_zoo3/version.txt
diff --git a/rl_zoo3/wrappers.py b/rl_zoo3/wrappers.py
diff --git a/setup.py b/setup.py