Skip to content

Commit 9edaf15

Browse files
1 parent a80a900 commit 9edaf15

10 files changed

+25
-25
lines changed

gymnasium/wrappers/stateful_observation.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -557,9 +557,9 @@ class MaxAndSkipObservation(
557557
>>> wrapped_obs0, *_ = wrapped_env.reset(seed=123)
558558
>>> wrapped_obs1, *_ = wrapped_env.step(1)
559559
>>> np.all(obs0 == wrapped_obs0)
560-
True
560+
np.True_
561561
>>> np.all(wrapped_obs1 == skip_and_max_obs)
562-
True
562+
np.True_
563563
564564
Change logs:
565565
* v1.0.0 - Initially add

gymnasium/wrappers/stateful_reward.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@ class NormalizeReward(
5858
...
5959
>>> env.close()
6060
>>> np.var(episode_rewards)
61-
0.0008876301247721108
61+
np.float64(0.0008876301247721108)
6262
6363
Example with the normalize reward wrapper:
6464
>>> import numpy as np
@@ -76,7 +76,7 @@ class NormalizeReward(
7676
>>> env.close()
7777
>>> # will approach 0.99 with more episodes
7878
>>> np.var(episode_rewards)
79-
0.010162116476634746
79+
np.float64(0.010162116476634746)
8080
8181
Change logs:
8282
* v0.21.0 - Initially added

gymnasium/wrappers/transform_action.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -146,7 +146,7 @@ class RescaleAction(
146146
>>> wrapped_env = RescaleAction(env, min_action=min_action, max_action=max_action)
147147
>>> wrapped_env_obs, _, _, _, _ = wrapped_env.step(max_action)
148148
>>> np.all(obs == wrapped_env_obs)
149-
True
149+
np.True_
150150
151151
Change logs:
152152
* v0.15.4 - Initially added

gymnasium/wrappers/transform_observation.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -594,11 +594,11 @@ class AddRenderObservation(
594594
>>> obs, _ = env.reset(seed=123)
595595
>>> image = env.render()
596596
>>> np.all(obs == image)
597-
True
597+
np.True_
598598
>>> obs, *_ = env.step(env.action_space.sample())
599599
>>> image = env.render()
600600
>>> np.all(obs == image)
601-
True
601+
np.True_
602602
603603
Example - Add the rendered image to the original observation as a dictionary item:
604604
>>> env = gym.make("CartPole-v1", render_mode="rgb_array")
@@ -611,11 +611,11 @@ class AddRenderObservation(
611611
>>> obs["state"]
612612
array([ 0.01823519, -0.0446179 , -0.02796401, -0.03156282], dtype=float32)
613613
>>> np.all(obs["pixels"] == env.render())
614-
True
614+
np.True_
615615
>>> obs, reward, terminates, truncates, info = env.step(env.action_space.sample())
616616
>>> image = env.render()
617617
>>> np.all(obs["pixels"] == image)
618-
True
618+
np.True_
619619
620620
Change logs:
621621
* v0.15.0 - Initially added as ``PixelObservationWrapper``

gymnasium/wrappers/transform_reward.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,7 @@ class ClipReward(TransformReward[ObsType, ActType], gym.utils.RecordConstructorA
7777
>>> _ = env.reset()
7878
>>> _, rew, _, _, _ = env.step(1)
7979
>>> rew
80-
0.5
80+
np.float64(0.5)
8181
8282
Change logs:
8383
* v1.0.0 - Initially added

gymnasium/wrappers/vector/dict_info_to_list.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -54,13 +54,13 @@ class DictInfoToList(VectorWrapper):
5454
>>> _ = envs.action_space.seed(123)
5555
>>> _, _, _, _, infos = envs.step(envs.action_space.sample())
5656
>>> infos
57-
{'x_position': array([0.03332211, 0.10172355]), '_x_position': array([ True, True]), 'x_velocity': array([-0.06296527, 0.89345848]), '_x_velocity': array([ True, True]), 'reward_run': array([-0.06296527, 0.89345848]), '_reward_run': array([ True, True]), 'reward_ctrl': array([-0.24503503, -0.21944423]), '_reward_ctrl': array([ True, True])}
57+
{'x_position': array([0.03332211, 0.10172355]), '_x_position': array([ True, True]), 'x_velocity': array([-0.06296527, 0.89345848]), '_x_velocity': array([ True, True]), 'reward_run': array([-0.06296527, 0.89345848]), '_reward_run': array([ True, True]), 'reward_ctrl': array([-0.24503504, -0.21944423], dtype=float32), '_reward_ctrl': array([ True, True])}
5858
>>> envs = DictInfoToList(envs)
5959
>>> _ = envs.reset(seed=123)
6060
>>> _ = envs.action_space.seed(123)
6161
>>> _, _, _, _, infos = envs.step(envs.action_space.sample())
6262
>>> infos
63-
[{'x_position': 0.0333221090036294, 'x_velocity': -0.06296527291998574, 'reward_run': -0.06296527291998574, 'reward_ctrl': -0.2450350284576416}, {'x_position': 0.10172354684460168, 'x_velocity': 0.8934584807363618, 'reward_run': 0.8934584807363618, 'reward_ctrl': -0.21944422721862794}]
63+
[{'x_position': np.float64(0.0333221090036294), 'x_velocity': np.float64(-0.06296527291998574), 'reward_run': np.float64(-0.06296527291998574), 'reward_ctrl': np.float32(-0.24503504)}, {'x_position': np.float64(0.10172354684460168), 'x_velocity': np.float64(0.8934584807363618), 'reward_run': np.float64(0.8934584807363618), 'reward_ctrl': np.float32(-0.21944423)}]
6464
6565
Change logs:
6666
* v0.24.0 - Initially added as ``VectorListInfo``

gymnasium/wrappers/vector/stateful_observation.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -35,9 +35,9 @@ class NormalizeObservation(VectorObservationWrapper, gym.utils.RecordConstructor
3535
>>> for _ in range(100):
3636
... obs, *_ = envs.step(envs.action_space.sample())
3737
>>> np.mean(obs)
38-
0.024251968
38+
np.float32(0.024251968)
3939
>>> np.std(obs)
40-
0.62259156
40+
np.float32(0.62259156)
4141
>>> envs.close()
4242
4343
Example with the normalize reward wrapper:
@@ -49,9 +49,9 @@ class NormalizeObservation(VectorObservationWrapper, gym.utils.RecordConstructor
4949
>>> for _ in range(100):
5050
... obs, *_ = envs.step(envs.action_space.sample())
5151
>>> np.mean(obs)
52-
-0.2359734
52+
np.float32(-0.2359734)
5353
>>> np.std(obs)
54-
1.1938739
54+
np.float32(1.1938739)
5555
>>> envs.close()
5656
"""
5757

gymnasium/wrappers/vector/stateful_reward.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -50,9 +50,9 @@ class NormalizeReward(VectorWrapper, gym.utils.RecordConstructorArgs):
5050
...
5151
>>> envs.close()
5252
>>> np.mean(episode_rewards)
53-
-0.03359492141887935
53+
np.float64(-0.03359492141887935)
5454
>>> np.std(episode_rewards)
55-
0.029028230434438706
55+
np.float64(0.029028230434438706)
5656
5757
Example with the normalize reward wrapper:
5858
>>> import gymnasium as gym
@@ -68,9 +68,9 @@ class NormalizeReward(VectorWrapper, gym.utils.RecordConstructorArgs):
6868
...
6969
>>> envs.close()
7070
>>> np.mean(episode_rewards)
71-
-0.1598639586606745
71+
np.float64(-0.1598639586606745)
7272
>>> np.std(episode_rewards)
73-
0.27800309628058434
73+
np.float64(0.27800309628058434)
7474
"""
7575

7676
def __init__(

gymnasium/wrappers/vector/vectorize_action.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ class TransformAction(VectorActionWrapper):
3333
>>> obs
3434
array([[-0.46553135, -0.00142543],
3535
[-0.498371 , -0.00715587],
36-
[-0.4651575 , -0.00624371]], dtype=float32)
36+
[-0.46515748, -0.00624371]], dtype=float32)
3737
3838
Example - With action transformation:
3939
>>> import gymnasium as gym

gymnasium/wrappers/vector/vectorize_observation.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -321,15 +321,15 @@ class RescaleObservation(VectorizeTransformObservation):
321321
>>> envs = gym.make_vec("MountainCar-v0", num_envs=3, vectorization_mode="sync")
322322
>>> obs, info = envs.reset(seed=123)
323323
>>> obs.min()
324-
-0.46352962
324+
np.float32(-0.46352962)
325325
>>> obs.max()
326-
0.0
326+
np.float32(0.0)
327327
>>> envs = RescaleObservation(envs, min_obs=-5.0, max_obs=5.0)
328328
>>> obs, info = envs.reset(seed=123)
329329
>>> obs.min()
330-
-0.90849805
330+
np.float32(-0.90849805)
331331
>>> obs.max()
332-
0.0
332+
np.float32(0.0)
333333
>>> envs.close()
334334
"""
335335

0 commit comments

Comments
 (0)