Doc fix following copilot suggestions

araffin · araffin · commit b63c5d62a367 · 2026-02-19T23:26:37.000+01:00
diff --git a/docs/conf.py b/docs/conf.py
@@ -86,8 +86,8 @@
 # The suffix(es) of source filenames.
 # You can specify multiple suffix as a list of string:
 #
-# source_suffix = ['.rst', '.md']
-source_suffix = ".rst"
+source_suffix = [".rst", ".md"]
+# source_suffix = ".rst"
 
 # The master toctree document.
 master_doc = "index"
@@ -207,9 +207,9 @@ def setup(app):
     # "amsmath",
     "attrs_inline",
     "colon_fence",
-    # "deflist",
+    "deflist",
     "dollarmath",
-    # "fieldlist",
+    "fieldlist",
     # "html_admonition",
     "html_image",
     # "linkify",
diff --git a/docs/guide/algos.md b/docs/guide/algos.md
@@ -12,7 +12,7 @@ along with some useful characteristics: support for discrete/continuous actions,
 | DQN                | ❌    | ✔️         | ❌              | ❌            | ✔️               |
 | HER                | ✔️    | ✔️         | ❌              | ❌            | ✔️               |
 | PPO                | ✔️    | ✔️         | ✔️              | ✔️            | ✔️               |
-| QR-DQN [^f1]       | ❌    | ️ ✔️        | ❌              | ❌            | ✔️               |
+| QR-DQN [^f1]       | ❌    | ️✔️        | ❌              | ❌            | ✔️               |
 | RecurrentPPO [^f1] | ✔️    | ✔️         | ✔️              | ✔️            | ✔️               |
 | SAC                | ✔️    | ❌         | ❌              | ❌            | ✔️               |
 | TD3                | ✔️    | ❌         | ❌              | ❌            | ✔️               |
diff --git a/docs/guide/custom_env.md b/docs/guide/custom_env.md
@@ -27,14 +27,14 @@ SB3 doesn't support `Discrete` and `MultiDiscrete` spaces with `start!=0`. Howev
 import gymnasium as gym
 
 class ShiftWrapper(gym.Wrapper):
-"""Allow to use Discrete() action spaces with start!=0"""
-def __init__(self, env: gym.Env) -> None:
-    super().__init__(env)
-    assert isinstance(env.action_space, gym.spaces.Discrete)
-    self.action_space = gym.spaces.Discrete(env.action_space.n, start=0)
-
-def step(self, action: int):
-    return self.env.step(action + self.env.action_space.start)
+    """Allow to use Discrete() action spaces with start!=0"""
+    def __init__(self, env: gym.Env) -> None:
+        super().__init__(env)
+        assert isinstance(env.action_space, gym.spaces.Discrete)
+        self.action_space = gym.spaces.Discrete(env.action_space.n, start=0)
+    
+    def step(self, action: int):
+        return self.env.step(action + self.env.action_space.start)
 ```
 :::
 
@@ -46,15 +46,15 @@ import numpy as np
 import gymnasium as gym
 
 class ReshapeWrapper(gym.Wrapper):
-"""Allow to use MultiDiscrete() action spaces with len(nvec.shape) > 1:"""
-def __init__(self, env: gym.Env) -> None:
-    super().__init__(env)
-    assert isinstance(env.action_space, gym.spaces.MultiDiscrete)
-    self.original_shape = env.action_space.nvec.shape
-    self.action_space = gym.spaces.MultiDiscrete(env.action_space.nvec.flatten())
-
-def step(self, action: np.ndarray):
-    return self.env.step(action.reshape(self.original_shape))
+    """Allow to use MultiDiscrete() action spaces with len(nvec.shape) > 1:"""
+    def __init__(self, env: gym.Env) -> None:
+        super().__init__(env)
+        assert isinstance(env.action_space, gym.spaces.MultiDiscrete)
+        self.original_shape = env.action_space.nvec.shape
+        self.action_space = gym.spaces.MultiDiscrete(env.action_space.nvec.flatten())
+    
+    def step(self, action: np.ndarray):
+        return self.env.step(action.reshape(self.original_shape))
 ```
 :::
 
diff --git a/docs/guide/imitation.md b/docs/guide/imitation.md
@@ -5,11 +5,11 @@
 The [imitation](https://github.com/HumanCompatibleAI/imitation) library implements
 imitation learning algorithms on top of Stable-Baselines3, including:
 
-> - Behavioral Cloning
-> - [DAgger](https://arxiv.org/abs/1011.0686) with synthetic examples
-> - [Adversarial Inverse Reinforcement Learning](https://arxiv.org/abs/1710.11248) (AIRL)
-> - [Generative Adversarial Imitation Learning](https://arxiv.org/abs/1606.03476) (GAIL)
-> - [Deep RL from Human Preferences](https://arxiv.org/abs/1706.03741) (DRLHP)
+- Behavioral Cloning
+- [DAgger](https://arxiv.org/abs/1011.0686) with synthetic examples
+- [Adversarial Inverse Reinforcement Learning](https://arxiv.org/abs/1710.11248) (AIRL)
+- [Generative Adversarial Imitation Learning](https://arxiv.org/abs/1606.03476) (GAIL)
+- [Deep RL from Human Preferences](https://arxiv.org/abs/1706.03741) (DRLHP)
 
 You can install imitation with `pip install imitation`. The [imitation
 documentation](https://imitation.readthedocs.io/en/latest/) has more details
diff --git a/docs/guide/migration.md b/docs/guide/migration.md
@@ -36,7 +36,7 @@ and [issue #90](https://github.com/DLR-RM/stable-baselines3/issues/90).
 - SB3 requires python 3.7+ (instead of python 3.5+ for SB2)
 - Dropped MPI support
 - Dropped layer normalized policies (`MlpLnLstmPolicy`, `CnnLnLstmPolicy`)
-- LSTM policies (`` `MlpLstmPolicy` ``, `` `CnnLstmPolicy` ``) are not supported for the time being
+- LSTM policies (`MlpLstmPolicy`, `CnnLstmPolicy`) are not supported for the time being
   (see [PR #53](https://github.com/Stable-Baselines-Team/stable-baselines3-contrib/pull/53) for a recurrent PPO implementation)
 - Dropped parameter noise for DDPG and DQN
 - PPO is now closer to the original implementation (no clipping of the value function by default), cf PPO section below
diff --git a/docs/guide/plotting.md b/docs/guide/plotting.md
@@ -163,7 +163,7 @@ if len(x) >= 50:  # Only smooth if we have enough data
     x_smooth, y_smooth = window_func(x, y, 50, np.mean)
     plt.plot(x_smooth, y_smooth, linewidth=2)
     plt.xlabel("Timesteps")
-    plt.ylabel("Average Episode Reward (50-episode window)"")
+    plt.ylabel("Average Episode Reward (50-episode window)")
     plt.title("Smoothed Episode Rewards")
 
 plt.tight_layout()
diff --git a/docs/guide/vec_envs.md b/docs/guide/vec_envs.md
@@ -138,7 +138,7 @@ vec_env = make_vec_env(MyMultiTaskEnv)
 # Note: you should use vec_env.env_method("get_wrapper_attr", "mu") in Gymnasium v1.0
 print(vec_env.env_method("get_wrapper_attr", "mu"))
 # Change `mu` attribute via the setter
-vec_env.env_method("set_mu", "mu", 0.1)
+vec_env.env_method("set_mu", 0.1)
 # If the variable exists, you can also use `set_wrapper_attr` to set it
 assert vec_env.has_attr("mu")
 vec_env.env_method("set_wrapper_attr", "mu", 0.1)
@@ -157,7 +157,7 @@ class ChangeMuCallback(BaseCallback):
   The environment is implemented so that the impact of changing
   the value of mu mid-episode is visible only after the episode is over
   and the reset method has been called.
-  """"
+  """
   def __init__(self):
     super().__init__()
     # An iterator that contains the different of the friction coefficient