v1: Allow collection of empty episodes (done on reset)

MischaPanch · MischaPanch · commit 683a17d3c473 · 2025-05-19T18:41:37.000+02:00
Slightly enhanced docstrings in collector
diff --git a/tianshou/data/buffer/buffer_base.py b/tianshou/data/buffer/buffer_base.py
@@ -145,8 +145,7 @@ def _get_start_stop_tuples_for_edge_crossing_interval(
         if stop >= start:
             raise ValueError(
                 f"Expected stop < start, but got {start=}, {stop=}. "
-                f"For stop larger than start this method should never be called, "
-                f"and stop=start should never occur. This can occur either due to an implementation error, "
+                f"For stop larger-equal than start this method should never be called. This can occur either due to an implementation error, "
                 f"or due a bad configuration of the buffer that resulted in a single episode being so long that "
                 f"it completely filled a subbuffer (of size len(buffer)/degree_of_vectorization). "
                 f"Consider either shortening the episode, increasing the size of the buffer, or decreasing the "
@@ -213,7 +212,7 @@ def get_buffer_indices(self, start: int, stop: int) -> np.ndarray:
                 f"Start and stop indices must be within the same subbuffer. "
                 f"Got {start=} in subbuffer edge {start_left_edge} and {stop=} in subbuffer edge {stop_left_edge}.",
             )
-        if stop > start:
+        if stop >= start:
             return np.arange(start, stop, dtype=int)
         else:
             (start, upper_edge), (
diff --git a/tianshou/data/collector.py b/tianshou/data/collector.py
@@ -42,6 +42,8 @@
 
 _TArrLike = TypeVar("_TArrLike", bound="np.ndarray | torch.Tensor | Batch | None")
 
+TScalarArrayShape = TypeVar("TScalarArrayShape")
+
 
 class CollectActionBatchProtocol(Protocol):
     """A protocol for results of computing actions from a batch of observations within a single collect step.
@@ -777,10 +779,13 @@ def _collect(  # noqa: C901
         # TODO: can't do it init since AsyncCollector is currently a subclass of Collector
         if self.env.is_async:
             raise ValueError(
-                f"Please use {AsyncCollector.__name__} for asynchronous environments. "
+                f"Please use AsyncCollector for asynchronous environments. "
                 f"Env class: {self.env.__class__.__name__}.",
             )
 
+        ready_env_ids_R: np.ndarray[Any, np.dtype[np.signedinteger]]
+        """provides a mapping from local indices (indexing within `1, ..., R` where `R` is the number of ready envs)
+         to global ones (indexing within `1, ..., num_envs`). So the entry i in this array is the global index of the i-th ready env."""
         if n_step is not None:
             ready_env_ids_R = np.arange(self.env_num)
         elif n_episode is not None:
@@ -914,6 +919,8 @@ def _collect(  # noqa: C901
                 # local_idx - see block comment on class level
                 # Step 7
                 env_done_local_idx_D = np.where(done_R)[0]
+                """Indexes which episodes are done within the ready envs, so it can be used for selecting from `..._R` arrays.
+                Stands in contrast to the "global" index, which counts within all envs and is unsuitable for selecting from `..._R` arrays."""
                 episode_lens_D = ep_len_R[env_done_local_idx_D]
                 episode_returns_D = ep_return_R[env_done_local_idx_D]
                 episode_start_indices_D = ep_start_idx_R[env_done_local_idx_D]
@@ -932,6 +939,10 @@ def _collect(  # noqa: C901
                 # 0,...,R and this global index is maintained by the ready_env_ids_R array.
                 # See the class block comment for more details
                 env_done_global_idx_D = ready_env_ids_R[env_done_local_idx_D]
+                """Indexes which episodes are done within all envs, i.e., within the index `1, ..., num_envs`. It can be
+                used to communicate with the vector env, where env ids are selected from this "global" index.
+                Is not suited for selecting from the ready envs (`..._R` arrays), use the local counterpart instead.
+                """
                 obs_reset_DO, info_reset_D = self.env.reset(
                     env_id=env_done_global_idx_D,
                     **gym_reset_kwargs,