Skip to content

Commit 633954f

Browse files
authored
Drop python 3.8, add python 3.12 support (#477)
* Drop python 3.8, add python 3.12 support * Update trained agent CI too * Add missing condition in CI * Downgrade numpy for pybullet and add a notice
1 parent b8ff1a6 commit 633954f

File tree

18 files changed

+106
-88
lines changed

18 files changed

+106
-88
lines changed

.github/workflows/ci.yml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ jobs:
1919
runs-on: ubuntu-latest
2020
strategy:
2121
matrix:
22-
python-version: ["3.8", "3.9", "3.10", "3.11"]
22+
python-version: ["3.9", "3.10", "3.11", "3.12"]
2323
include:
2424
# Default version
2525
- gymnasium-version: "1.0.0"
@@ -51,7 +51,9 @@ jobs:
5151
- name: Install specific version of gym
5252
run: |
5353
uv pip install --system gymnasium==${{ matrix.gymnasium-version }}
54+
uv pip install --system "numpy<2"
5455
# Only run for python 3.10, downgrade gym to 0.29.1
56+
if: matrix.gymnasium-version != '1.0.0'
5557

5658
- name: Lint with ruff
5759
run: |
@@ -65,8 +67,6 @@ jobs:
6567
- name: Type check
6668
run: |
6769
make type
68-
# Do not run for python 3.8 (mypy internal error)
69-
if: matrix.python-version != '3.8'
7070
- name: Test with pytest
7171
run: |
7272
make pytest

.github/workflows/trained_agents.yml

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ jobs:
2020
runs-on: ubuntu-latest
2121
strategy:
2222
matrix:
23-
python-version: ["3.8", "3.9", "3.10", "3.11"]
23+
python-version: ["3.9", "3.10", "3.11", "3.12"]
2424
include:
2525
# Default version
2626
- gymnasium-version: "1.0.0"
@@ -45,16 +45,20 @@ jobs:
4545
# See https://github.com/astral-sh/uv/issues/1497
4646
uv pip install --system torch==2.4.1+cpu --index https://download.pytorch.org/whl/cpu
4747
# Install full requirements (for additional envs and test tools)
48-
# Install full requirements (for additional envs and test tools)
4948
uv pip install --system -r requirements.txt
5049
# Use headless version
5150
uv pip install --system opencv-python-headless
5251
uv pip install --system -e .[plots,tests]
52+
# Downgrade numpy to run pybullet agents
53+
# See https://github.com/bulletphysics/bullet3/issues/4649
54+
uv pip install --system "numpy<2"
5355
5456
- name: Install specific version of gym
5557
run: |
5658
uv pip install --system gymnasium==${{ matrix.gymnasium-version }}
59+
uv pip install --system "numpy<2"
5760
# Only run for python 3.10, downgrade gym to 0.29.1
61+
if: matrix.gymnasium-version != '1.0.0'
5862

5963
- name: Check trained agents
6064
run: |

CHANGELOG.md

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,19 @@
1+
## Release 2.5.0a0 (WIP)
2+
3+
### Breaking Changes
4+
- Upgraded to Pytorch >= 2.3.0
5+
- Upgraded to SB3 >= 2.5.0
6+
7+
### New Features
8+
- Added support for Numpy v2
9+
10+
### Bug fixes
11+
12+
### Documentation
13+
14+
### Other
15+
16+
117
## Release 2.4.0 (2024-11-18)
218

319
**New algorithm: CrossQ, Gymnasium v1.0 support, and better defaults for SAC/TQC on Swimmer-v4 env**

README.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,8 @@ Goals of this repository:
2727

2828
This is the SB3 version of the original SB2 [rl-zoo](https://github.com/araffin/rl-baselines-zoo).
2929

30+
Note: although SB3 and the RL Zoo are compatible with Numpy>=2.0, you will need Numpy<2 to run agents on pybullet envs (see [issue](https://github.com/bulletphysics/bullet3/issues/4649)).
31+
3032
## Documentation
3133

3234
Documentation is available online: [https://rl-baselines3-zoo.readthedocs.io/](https://rl-baselines3-zoo.readthedocs.io)

docs/conf.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,6 @@
1414
import datetime
1515
import os
1616
import sys
17-
from typing import Dict
1817

1918
# We CANNOT enable 'sphinxcontrib.spelling' because ReadTheDocs.org does not support
2019
# PyEnchant.
@@ -151,7 +150,7 @@ def setup(app):
151150

152151
# -- Options for LaTeX output ------------------------------------------------
153152

154-
latex_elements: Dict[str, str] = {
153+
latex_elements: dict[str, str] = {
155154
# The paper size ('letterpaper' or 'a4paper').
156155
#
157156
# 'papersize': 'letterpaper',

pyproject.toml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
[tool.ruff]
22
# Same as Black.
33
line-length = 127
4-
# Assume Python 3.8
5-
target-version = "py38"
4+
# Assume Python 3.9
5+
target-version = "py39"
66

77
[tool.ruff.lint]
88
# See https://beta.ruff.rs/docs/rules/

requirements.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
gym==0.26.2
2-
stable-baselines3[extra,tests,docs]>=2.4.0,<3.0
2+
stable-baselines3[extra,tests,docs]>=2.5.0a0,<3.0
33
box2d-py==2.3.8
44
pybullet_envs_gymnasium>=0.5.0
55
# minigrid

rl_zoo3/benchmark.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@
33
import os
44
import shutil
55
import subprocess
6-
from typing import Dict, List
76

87
import numpy as np
98
import pandas as pd
@@ -33,7 +32,7 @@
3332
trained_models.update(get_hf_trained_models())
3433

3534
n_experiments = len(trained_models)
36-
results: Dict[str, List] = {
35+
results: dict[str, list] = {
3736
"algo": [],
3837
"env_id": [],
3938
"mean_reward": [],

rl_zoo3/callbacks.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
from copy import deepcopy
55
from functools import wraps
66
from threading import Thread
7-
from typing import Optional, Type, Union
7+
from typing import Optional, Union
88

99
import optuna
1010
from sb3_contrib import TQC
@@ -119,7 +119,7 @@ def __init__(self, gradient_steps: int = 100, verbose: int = 0, sleep_time: floa
119119
self._model: Union[SAC, TQC]
120120
self.gradient_steps = gradient_steps
121121
self.process: Thread
122-
self.model_class: Union[Type[SAC], Type[TQC]]
122+
self.model_class: Union[type[SAC], type[TQC]]
123123
self.sleep_time = sleep_time
124124

125125
def _init_callback(self) -> None:

rl_zoo3/exp_manager.py

Lines changed: 24 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
from collections import OrderedDict
88
from pathlib import Path
99
from pprint import pprint
10-
from typing import Any, Callable, Dict, List, Optional, Tuple, Union
10+
from typing import Any, Callable, Optional, Union
1111

1212
import gymnasium as gym
1313
import numpy as np
@@ -71,9 +71,9 @@ def __init__(
7171
eval_freq: int = 10000,
7272
n_eval_episodes: int = 5,
7373
save_freq: int = -1,
74-
hyperparams: Optional[Dict[str, Any]] = None,
75-
env_kwargs: Optional[Dict[str, Any]] = None,
76-
eval_env_kwargs: Optional[Dict[str, Any]] = None,
74+
hyperparams: Optional[dict[str, Any]] = None,
75+
env_kwargs: Optional[dict[str, Any]] = None,
76+
eval_env_kwargs: Optional[dict[str, Any]] = None,
7777
trained_agent: str = "",
7878
optimize_hyperparameters: bool = False,
7979
storage: Optional[str] = None,
@@ -112,10 +112,10 @@ def __init__(
112112
default_path = Path(__file__).parent.parent
113113

114114
self.config = config or str(default_path / f"hyperparams/{self.algo}.yml")
115-
self.env_kwargs: Dict[str, Any] = env_kwargs or {}
115+
self.env_kwargs: dict[str, Any] = env_kwargs or {}
116116
self.n_timesteps = n_timesteps
117117
self.normalize = False
118-
self.normalize_kwargs: Dict[str, Any] = {}
118+
self.normalize_kwargs: dict[str, Any] = {}
119119
self.env_wrapper: Optional[Callable] = None
120120
self.frame_stack = None
121121
self.seed = seed
@@ -124,23 +124,23 @@ def __init__(
124124
self.vec_env_class = {"dummy": DummyVecEnv, "subproc": SubprocVecEnv}[vec_env_type]
125125
self.vec_env_wrapper: Optional[Callable] = None
126126

127-
self.vec_env_kwargs: Dict[str, Any] = {}
127+
self.vec_env_kwargs: dict[str, Any] = {}
128128
# self.vec_env_kwargs = {} if vec_env_type == "dummy" else {"start_method": "fork"}
129129

130130
# Callbacks
131-
self.specified_callbacks: List = []
132-
self.callbacks: List[BaseCallback] = []
131+
self.specified_callbacks: list = []
132+
self.callbacks: list[BaseCallback] = []
133133
# Use env-kwargs if eval_env_kwargs was not specified
134-
self.eval_env_kwargs: Dict[str, Any] = eval_env_kwargs or self.env_kwargs
134+
self.eval_env_kwargs: dict[str, Any] = eval_env_kwargs or self.env_kwargs
135135
self.save_freq = save_freq
136136
self.eval_freq = eval_freq
137137
self.n_eval_episodes = n_eval_episodes
138138
self.n_eval_envs = n_eval_envs
139139

140140
self.n_envs = 1 # it will be updated when reading hyperparams
141141
self.n_actions = 0 # For DDPG/TD3 action noise objects
142-
self._hyperparams: Dict[str, Any] = {}
143-
self.monitor_kwargs: Dict[str, Any] = {}
142+
self._hyperparams: dict[str, Any] = {}
143+
self.monitor_kwargs: dict[str, Any] = {}
144144

145145
self.trained_agent = trained_agent
146146
self.continue_training = trained_agent.endswith(".zip") and os.path.isfile(trained_agent)
@@ -179,7 +179,7 @@ def __init__(
179179
)
180180
self.params_path = f"{self.save_path}/{self.env_name}"
181181

182-
def setup_experiment(self) -> Optional[Tuple[BaseAlgorithm, Dict[str, Any]]]:
182+
def setup_experiment(self) -> Optional[tuple[BaseAlgorithm, dict[str, Any]]]:
183183
"""
184184
Read hyperparameters, pre-process them (create schedules, wrappers, callbacks, action noise objects)
185185
create the environment and possibly the model.
@@ -223,7 +223,7 @@ def learn(self, model: BaseAlgorithm) -> None:
223223
"""
224224
:param model: an initialized RL model
225225
"""
226-
kwargs: Dict[str, Any] = {}
226+
kwargs: dict[str, Any] = {}
227227
if self.log_interval > -1:
228228
kwargs = {"log_interval": self.log_interval}
229229

@@ -272,7 +272,7 @@ def save_trained_model(self, model: BaseAlgorithm) -> None:
272272
assert vec_normalize is not None
273273
vec_normalize.save(os.path.join(self.params_path, "vecnormalize.pkl"))
274274

275-
def _save_config(self, saved_hyperparams: Dict[str, Any]) -> None:
275+
def _save_config(self, saved_hyperparams: dict[str, Any]) -> None:
276276
"""
277277
Save unprocessed hyperparameters, this can be use later
278278
to reproduce an experiment.
@@ -290,15 +290,15 @@ def _save_config(self, saved_hyperparams: Dict[str, Any]) -> None:
290290

291291
print(f"Log path: {self.save_path}")
292292

293-
def read_hyperparameters(self) -> Tuple[Dict[str, Any], Dict[str, Any]]:
293+
def read_hyperparameters(self) -> tuple[dict[str, Any], dict[str, Any]]:
294294
print(f"Loading hyperparameters from: {self.config}")
295295

296296
if self.config.endswith(".yml") or self.config.endswith(".yaml"):
297297
# Load hyperparameters from yaml file
298298
with open(self.config) as f:
299299
hyperparams_dict = yaml.safe_load(f)
300300
elif self.config.endswith(".py"):
301-
global_variables: Dict = {}
301+
global_variables: dict = {}
302302
# Load hyperparameters from python file
303303
exec(Path(self.config).read_text(), global_variables)
304304
hyperparams_dict = global_variables["hyperparams"]
@@ -327,7 +327,7 @@ def read_hyperparameters(self) -> Tuple[Dict[str, Any], Dict[str, Any]]:
327327
return hyperparams, saved_hyperparams
328328

329329
@staticmethod
330-
def _preprocess_schedules(hyperparams: Dict[str, Any]) -> Dict[str, Any]:
330+
def _preprocess_schedules(hyperparams: dict[str, Any]) -> dict[str, Any]:
331331
# Create schedules
332332
for key in ["learning_rate", "clip_range", "clip_range_vf", "delta_std"]:
333333
if key not in hyperparams:
@@ -345,7 +345,7 @@ def _preprocess_schedules(hyperparams: Dict[str, Any]) -> Dict[str, Any]:
345345
raise ValueError(f"Invalid value for {key}: {hyperparams[key]}")
346346
return hyperparams
347347

348-
def _preprocess_normalization(self, hyperparams: Dict[str, Any]) -> Dict[str, Any]:
348+
def _preprocess_normalization(self, hyperparams: dict[str, Any]) -> dict[str, Any]:
349349
if "normalize" in hyperparams.keys():
350350
self.normalize = hyperparams["normalize"]
351351

@@ -370,8 +370,8 @@ def _preprocess_normalization(self, hyperparams: Dict[str, Any]) -> Dict[str, An
370370
return hyperparams
371371

372372
def _preprocess_hyperparams( # noqa: C901
373-
self, hyperparams: Dict[str, Any]
374-
) -> Tuple[Dict[str, Any], Optional[Callable], List[BaseCallback], Optional[Callable]]:
373+
self, hyperparams: dict[str, Any]
374+
) -> tuple[dict[str, Any], Optional[Callable], list[BaseCallback], Optional[Callable]]:
375375
self.n_envs = hyperparams.get("n_envs", 1)
376376

377377
if self.verbose > 0:
@@ -448,8 +448,8 @@ def _preprocess_hyperparams( # noqa: C901
448448
return hyperparams, env_wrapper, callbacks, vec_env_wrapper
449449

450450
def _preprocess_action_noise(
451-
self, hyperparams: Dict[str, Any], saved_hyperparams: Dict[str, Any], env: VecEnv
452-
) -> Dict[str, Any]:
451+
self, hyperparams: dict[str, Any], saved_hyperparams: dict[str, Any], env: VecEnv
452+
) -> dict[str, Any]:
453453
# Parse noise string
454454
# Note: only off-policy algorithms are supported
455455
if hyperparams.get("noise_type") is not None:
@@ -667,7 +667,7 @@ def make_env(**kwargs) -> gym.Env:
667667

668668
return env
669669

670-
def _load_pretrained_agent(self, hyperparams: Dict[str, Any], env: VecEnv) -> BaseAlgorithm:
670+
def _load_pretrained_agent(self, hyperparams: dict[str, Any], env: VecEnv) -> BaseAlgorithm:
671671
# Continue training
672672
print("Loading pretrained agent")
673673
# Policy should not be changed

0 commit comments

Comments
 (0)