v1: disable buffer hasnull checks by default

MischaPanch · MischaPanch · commit 5c1e4b2c60dd · 2025-06-21T15:09:40.000+02:00
Control validation enabling with global flag
diff --git a/tianshou/config.py b/tianshou/config.py
@@ -0,0 +1,2 @@
+ENABLE_VALIDATION = False
+"""Validation can help catching bugs and issues but it slows down training and collection. Enable it only if needed."""
diff --git a/tianshou/data/collector.py b/tianshou/data/collector.py
@@ -12,6 +12,7 @@
 from overrides import override
 from torch.distributions import Categorical, Distribution
 
+from tianshou.config import ENABLE_VALIDATION
 from tianshou.data import (
     Batch,
     CachedReplayBuffer,
@@ -318,8 +319,32 @@ def __init__(
         exploration_noise: bool = False,
         # The typing is correct, there's a bug in mypy, see https://github.com/python/mypy/issues/3737
         collect_stats_class: type[TCollectStats] = CollectStats,  # type: ignore[assignment]
-        raise_on_nan_in_buffer: bool = True,
+        raise_on_nan_in_buffer: bool = ENABLE_VALIDATION,
     ) -> None:
+        """
+        :param policy: a tianshou policy, each :class:`BasePolicy` is capable of computing a batch
+            of actions from a batch of observations.
+        :param env: a ``gymnasium.Env`` environment or a vectorized instance of the
+            :class:`~tianshou.env.BaseVectorEnv` class. The latter is strongly recommended, as with
+            a gymnasium env the collection will not happen in parallel (a `DummyVectorEnv`
+            will be constructed internally from the passed env)
+        :param buffer: an instance of the :class:`~tianshou.data.ReplayBuffer` class.
+            If set to None, will instantiate a :class:`~tianshou.data.VectorReplayBuffer`
+            of size :data:`DEFAULT_BUFFER_MAXSIZE` * (number of envs)
+            as the default buffer.
+        :param exploration_noise: determine whether the action needs to be modified
+            with the corresponding policy's exploration noise. If so, "policy.
+            exploration_noise(act, batch)" will be called automatically to add the
+            exploration noise into action.
+            the rollout batch with this hook also modifies the data that is collected to the buffer!
+        :param raise_on_nan_in_buffer: whether to raise a `RuntimeError` if NaNs are found in the buffer after
+            a collection step. Especially useful when episode-level hooks are passed for making
+            sure that nothing is broken during the collection. Consider setting to False if
+            the NaN-check becomes a bottleneck.
+        :param collect_stats_class: the class to use for collecting statistics. Allows customizing
+            the stats collection logic by passing a subclass of :class:`CollectStats`. Changing
+            this is rarely necessary and is mainly done by "power users".
+        """
         if isinstance(env, gym.Env) and not hasattr(env, "__len__"):
             warnings.warn("Single environment detected, wrap to DummyVectorEnv.")
             # Unfortunately, mypy seems to ignore the isinstance in lambda, maybe a bug in mypy
@@ -557,7 +582,7 @@ def __init__(
         exploration_noise: bool = False,
         on_episode_done_hook: Optional["EpisodeRolloutHookProtocol"] = None,
         on_step_hook: Optional["StepHookProtocol"] = None,
-        raise_on_nan_in_buffer: bool = True,
+        raise_on_nan_in_buffer: bool = ENABLE_VALIDATION,
         collect_stats_class: type[TCollectStats] = CollectStats,  # type: ignore[assignment]
     ) -> None:
         """
@@ -574,7 +599,7 @@ def __init__(
         :param exploration_noise: determine whether the action needs to be modified
             with the corresponding policy's exploration noise. If so, "policy.
             exploration_noise(act, batch)" will be called automatically to add the
-            exploration noise into action..
+            exploration noise into action.
         :param on_episode_done_hook: if passed will be executed when an episode is done.
             The input to the hook will be a `RolloutBatch` that contains the entire episode (and nothing else).
             If a dict is returned by the hook it will be used to add new entries to the buffer
@@ -1045,7 +1070,7 @@ def _collect(  # noqa: C901
                 break
 
         # Check if we screwed up somewhere
-        if self.buffer.hasnull():
+        if self.raise_on_nan_in_buffer and self.buffer.hasnull():
             nan_batch = self.buffer.isnull().apply_values_transform(np.sum)
 
             raise MalformedBufferError(
diff --git a/tianshou/highlevel/env.py b/tianshou/highlevel/env.py
@@ -389,7 +389,9 @@ def _next_seed(rng: np.random.Generator) -> int:
         :param rng: the random number generator
         :return: the sampled random seed
         """
-        return int(rng.integers(-2**31, 2**31, dtype=np.int32)) # int32 is needed for envpool compatibility
+        return int(
+            rng.integers(-(2**31), 2**31, dtype=np.int32)
+        )  # int32 is needed for envpool compatibility
 
     @abstractmethod
     def _create_env(self, mode: EnvMode) -> Env:
diff --git a/tianshou/trainer/base.py b/tianshou/trainer/base.py
@@ -543,7 +543,7 @@ def _collect_training_data(self) -> CollectStats:
             lambda: f"Collected {collect_stats.n_collected_steps} steps, {collect_stats.n_collected_episodes} episodes",
         )
 
-        if self.train_collector.buffer.hasnull():
+        if self.train_collector.raise_on_nan_in_buffer and self.train_collector.buffer.hasnull():
             from tianshou.data.collector import EpisodeRolloutHook
             from tianshou.env import DummyVectorEnv
 

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,2 @@`
	`1`	`+ENABLE_VALIDATION = False`
	`2`	`+"""Validation can help catching bugs and issues but it slows down training and collection. Enable it only if needed."""`
Original file line number	Diff line number	Diff line change
`@@ -543,7 +543,7 @@ def _collect_training_data(self) -> CollectStats:`
`543`	`543`	`lambda: f"Collected {collect_stats.n_collected_steps} steps, {collect_stats.n_collected_episodes} episodes",`
`544`	`544`	`)`
`545`	`545`
`546`		`- if self.train_collector.buffer.hasnull():`
	`546`	`+ if self.train_collector.raise_on_nan_in_buffer and self.train_collector.buffer.hasnull():`
`547`	`547`	`from tianshou.data.collector import EpisodeRolloutHook`
`548`	`548`	`from tianshou.env import DummyVectorEnv`
`549`	`549`