Fixes for CarRacing-v3 (#496)

araffin · web-flow · commit cd701a094ac5 · 2025-06-16T14:08:53.000+02:00
* Fixes for CarRacing-v3 and Gymnasium v1.0

* Update to constant schedule class

* Add score normalization for bipedal walker and lunar lander

* Update CarRacing hyperparams

* Update SB3
diff --git a/.gitignore b/.gitignore
@@ -19,6 +19,7 @@ hub
 *.mp4
 *.json
 _build/
+run_crossq_bipedal.sh
 
 tests/dummy_env/build/
 
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,15 +1,17 @@
-## Release 2.6.1 (WIP)
+## Release 2.7.0a0 (WIP)
 
 ### Breaking Changes
-- Upgraded to SB3 >= 2.6.1
+- Upgraded to SB3 >= 2.7.0
 - `linear_schedule` now returns a `SimpleLinearSchedule` object for better portability
 - Renamed `LunarLander-v2` to `LunarLander-v3` in hyperparameters
+- Renamed `CarRacing-v2` to `CarRacing-v3` in hyperparameters
 
 ### New Features
 
 ### Bug fixes
 - Docker GPU images are now working again
 - Use `ConstantSchedule`, and `SimpleLinearSchedule` instead of `constant_fn` and `linear_schedule`
+- Fixed `CarRacing-v3` hyperparameters for newer Gymnasium version
 
 ### Documentation
 
diff --git a/hyperparams/ppo.yml b/hyperparams/ppo.yml
@@ -347,13 +347,13 @@ MiniGrid-ObstructedMaze-2Dlh-v0:
   n_timesteps: !!float 1e7 # Unsolved
 
 
-CarRacing-v2:
+CarRacing-v3:
   env_wrapper:
     - rl_zoo3.wrappers.FrameSkip:
         skip: 2
-    - gymnasium.wrappers.resize_observation.ResizeObservation:
-        shape: 64
-    - gymnasium.wrappers.gray_scale_observation.GrayScaleObservation:
+    - rl_zoo3.wrappers.YAMLCompatResizeObservation:
+        shape: [64, 64]
+    - gymnasium.wrappers.transform_observation.GrayscaleObservation:
         keep_dim: true
   frame_stack: 2
   normalize: "{'norm_obs': False, 'norm_reward': True}"
diff --git a/hyperparams/ppo_lstm.yml b/hyperparams/ppo_lstm.yml
@@ -283,13 +283,13 @@ InvertedPendulumSwingupBulletEnv-v0:
   clip_range: 0.2
 
 
-CarRacing-v2:
+CarRacing-v3:
   env_wrapper:
     # - rl_zoo3.wrappers.FrameSkip:
     #     skip: 2
-    - gymnasium.wrappers.resize_observation.ResizeObservation:
-        shape: 64
-    - gymnasium.wrappers.gray_scale_observation.GrayScaleObservation:
+    - rl_zoo3.wrappers.YAMLCompatResizeObservation:
+        shape: [64, 64]
+    - gymnasium.wrappers.transform_observation.GrayscaleObservation:
         keep_dim: true
   frame_stack: 2
   normalize: "{'norm_obs': False, 'norm_reward': True}"
diff --git a/hyperparams/sac.yml b/hyperparams/sac.yml
@@ -161,7 +161,7 @@ MinitaurBulletDuckEnv-v0:
   learning_starts: 10000
 
 # To be tuned
-CarRacing-v2:
+CarRacing-v3:
   env_wrapper:
     - rl_zoo3.wrappers.FrameSkip:
         skip: 2
diff --git a/rl_zoo3/plots/plot_from_file.py b/rl_zoo3/plots/plot_from_file.py
@@ -156,6 +156,8 @@ def plot_from_file():  # noqa: C901
         "Ant": "AntBulletEnv-v0",
         "Hopper": "HopperBulletEnv-v0",
         "Walker": "Walker2DBulletEnv-v0",
+        "LunarLanderContinuous": "LunarLanderContinuous-v3",
+        "BipedalWalker": "BipedalWalker-v3",
     }
     # Backward compat
     skip_all_algos_dict = False
diff --git a/rl_zoo3/plots/score_normalization.py b/rl_zoo3/plots/score_normalization.py
@@ -22,6 +22,8 @@ class ReferenceScore(NamedTuple):
     ReferenceScore("AntBulletEnv-v0", 300, 3500),
     ReferenceScore("HopperBulletEnv-v0", 20, 2500),
     ReferenceScore("Walker2DBulletEnv-v0", 200, 2500),
+    ReferenceScore("LunarLanderContinuous-v3", -200, 250),
+    ReferenceScore("BipedalWalker-v3", -100, 300),
 ]
 
 # Alternative scaling
diff --git a/rl_zoo3/version.txt b/rl_zoo3/version.txt
@@ -1 +1 @@
-2.6.1a1
+2.7.0a0
diff --git a/rl_zoo3/wrappers.py b/rl_zoo3/wrappers.py
@@ -4,10 +4,17 @@
 import numpy as np
 from gymnasium import spaces
 from gymnasium.core import ObsType
+from gymnasium.wrappers import ResizeObservation
 from sb3_contrib.common.wrappers import TimeFeatureWrapper  # noqa: F401 (backward compatibility)
 from stable_baselines3.common.type_aliases import GymResetReturn, GymStepReturn
 
 
+# Convert to tuple, so it is compatible with YAML
+class YAMLCompatResizeObservation(ResizeObservation):
+    def __init__(self, env: gym.Env, shape: list[int]):
+        super().__init__(env, (shape[0], shape[1]))
+
+
 class TruncatedOnSuccessWrapper(gym.Wrapper):
     """
     Reset on success and offsets the reward.
diff --git a/setup.py b/setup.py
@@ -15,7 +15,7 @@
 See https://github.com/DLR-RM/rl-baselines3-zoo
 """
 install_requires = [
-    "sb3_contrib>=2.6.1a1,<3.0",
+    "sb3_contrib>=2.7.0a0,<3.0",
     "gymnasium>=0.29.1,<1.2.0",
     "huggingface_sb3>=3.0,<4.0",
     "tqdm",

Original file line number	Diff line number	Diff line change
`@@ -156,6 +156,8 @@ def plot_from_file(): # noqa: C901`
`156`	`156`	`"Ant": "AntBulletEnv-v0",`
`157`	`157`	`"Hopper": "HopperBulletEnv-v0",`
`158`	`158`	`"Walker": "Walker2DBulletEnv-v0",`
	`159`	`+ "LunarLanderContinuous": "LunarLanderContinuous-v3",`
	`160`	`+ "BipedalWalker": "BipedalWalker-v3",`
`159`	`161`	`}`
`160`	`162`	`# Backward compat`
`161`	`163`	`skip_all_algos_dict = False`
Original file line number	Diff line number	Diff line change
`@@ -22,6 +22,8 @@ class ReferenceScore(NamedTuple):`
`22`	`22`	`ReferenceScore("AntBulletEnv-v0", 300, 3500),`
`23`	`23`	`ReferenceScore("HopperBulletEnv-v0", 20, 2500),`
`24`	`24`	`ReferenceScore("Walker2DBulletEnv-v0", 200, 2500),`
	`25`	`+ ReferenceScore("LunarLanderContinuous-v3", -200, 250),`
	`26`	`+ ReferenceScore("BipedalWalker-v3", -100, 300),`
`25`	`27`	`]`
`26`	`28`
`27`	`29`	`# Alternative scaling`