Skip to content

Commit 8818338

Browse files
committed
Removes unused attributes from VecEnv
1 parent 9a62552 commit 8818338

File tree

1 file changed

+37
-35
lines changed

1 file changed

+37
-35
lines changed

Diff for: rsl_rl/env/vec_env.py

+37-35
Original file line numberDiff line numberDiff line change
@@ -16,53 +16,53 @@ class VecEnv(ABC):
1616
the same action is applied to all environments and the same observation is returned from all environments.
1717
1818
All extra observations must be provided as a dictionary to "extras" in the step() method. Based on the
19-
configuration, the extra observations are used for different purposes. The following keys are reserved
20-
in the "observations" dictionary (if they are present):
19+
configuration, the extra observations are used for different purposes. The following keys are used by the
20+
environment:
2121
22-
- "critic": The observation is used as input to the critic network. Useful for asymmetric observation spaces.
23-
- "rnd_state": The observation is used as input to the RND network. Useful for random network distillation.
24-
"""
22+
- "observations" (dict[str, dict[str, torch.Tensor]]):
23+
Additional observations that are not used by the actor networks. The keys are the names of the observations
24+
and the values are the observations themselves. The following are reserved keys for the observations:
2525
26-
num_envs: int
27-
"""Number of environments."""
26+
- "critic": The observation is used as input to the critic network. Useful for asymmetric observation spaces.
27+
- "rnd_state": The observation is used as input to the RND network. Useful for random network distillation.
2828
29-
num_obs: int
30-
"""Number of observations."""
29+
- "time_outs" (torch.Tensor): Timeouts for the environments. These correspond to terminations that happen due to time limits and
30+
not due to the environment reaching a terminal state. This is useful for environments that have a fixed
31+
episode length.
3132
32-
num_privileged_obs: int
33-
"""Number of privileged observations."""
33+
- "log" (dict[str, float | torch.Tensor]): Additional information for logging and debugging purposes.
34+
The key should be a string and start with "/" for namespacing. The value can be a scalar or a tensor.
35+
If it is a tensor, the mean of the tensor is used for logging.
3436
35-
num_actions: int
36-
"""Number of actions."""
37+
.. deprecated:: 2.0.0
3738
38-
max_episode_length: int
39-
"""Maximum episode length."""
39+
Use "log" in the extra information dictionary instead of the "episode" key.
4040
41-
privileged_obs_buf: torch.Tensor
42-
"""Buffer for privileged observations."""
41+
"""
4342

44-
obs_buf: torch.Tensor
45-
"""Buffer for observations."""
43+
num_envs: int
44+
"""Number of environments."""
4645

47-
rew_buf: torch.Tensor
48-
"""Buffer for rewards."""
46+
num_actions: int
47+
"""Number of actions."""
48+
49+
max_episode_length: int | torch.Tensor
50+
"""Maximum episode length.
4951
50-
reset_buf: torch.Tensor
51-
"""Buffer for resets."""
52+
The maximum episode length can be a scalar or a tensor. If it is a scalar, it is the same for all environments.
53+
If it is a tensor, it is the maximum episode length for each environment. This is useful for dynamic episode
54+
lengths.
55+
"""
5256

5357
episode_length_buf: torch.Tensor
5458
"""Buffer for current episode lengths."""
5559

56-
extras: dict
57-
"""Extra information (metrics).
58-
59-
Extra information is stored in a dictionary. This includes metrics such as the episode reward, episode length,
60-
etc. Additional information can be stored in the dictionary such as observations for the critic network, etc.
61-
"""
62-
6360
device: torch.device
6461
"""Device to use."""
6562

63+
cfg: dict | object
64+
"""Configuration object."""
65+
6666
"""
6767
Operations.
6868
"""
@@ -72,7 +72,7 @@ def get_observations(self) -> tuple[torch.Tensor, dict]:
7272
"""Return the current observations.
7373
7474
Returns:
75-
Tuple[torch.Tensor, dict]: Tuple containing the observations and extras.
75+
Tuple containing the observations and extras.
7676
"""
7777
raise NotImplementedError
7878

@@ -81,19 +81,21 @@ def reset(self) -> tuple[torch.Tensor, dict]:
8181
"""Reset all environment instances.
8282
8383
Returns:
84-
Tuple[torch.Tensor, dict]: Tuple containing the observations and extras.
84+
Tuple containing the observations and extras.
8585
"""
8686
raise NotImplementedError
8787

8888
@abstractmethod
8989
def step(self, actions: torch.Tensor) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor, dict]:
9090
"""Apply input action on the environment.
9191
92+
The extra information is a dictionary. It includes metrics such as the episode reward, episode length,
93+
etc. Additional information can be stored in the dictionary such as observations for the critic network, etc.
94+
9295
Args:
93-
actions (torch.Tensor): Input actions to apply. Shape: (num_envs, num_actions)
96+
actions: Input actions to apply. Shape: (num_envs, num_actions)
9497
9598
Returns:
96-
Tuple[torch.Tensor, torch.Tensor, torch.Tensor, dict]:
97-
A tuple containing the observations, rewards, dones and extra information (metrics).
99+
A tuple containing the observations, rewards, dones and extra information (metrics).
98100
"""
99101
raise NotImplementedError

0 commit comments

Comments
 (0)