Skip to content

Commit 2336a7d

Browse files
fixed typo in rainbow DQN paper reference (#569)
* fixed typo in rainbow DQN paper ref * fix gym==0.23 ci failure Co-authored-by: Jiayi Weng <trinkle23897@gmail.com>
1 parent 39f8391 commit 2336a7d

File tree

8 files changed

+15
-14
lines changed

8 files changed

+15
-14
lines changed

LICENSE

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
MIT License
22

3-
Copyright (c) 2020 Tianshou contributors
3+
Copyright (c) 2022 Tianshou contributors
44

55
Permission is hereby granted, free of charge, to any person obtaining a copy
66
of this software and associated documentation files (the "Software"), to deal

docs/index.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ Welcome to Tianshou!
1313
* :class:`~tianshou.policy.DQNPolicy` `Double DQN <https://arxiv.org/pdf/1509.06461.pdf>`_
1414
* :class:`~tianshou.policy.DQNPolicy` `Dueling DQN <https://arxiv.org/pdf/1511.06581.pdf>`_
1515
* :class:`~tianshou.policy.C51Policy` `Categorical DQN <https://arxiv.org/pdf/1707.06887.pdf>`_
16-
* :class:`~tianshou.policy.RainbowPolicy` `Rainbow DQN <https://arxiv.org/pdf/1707.02298.pdf>`_
16+
* :class:`~tianshou.policy.RainbowPolicy` `Rainbow DQN <https://arxiv.org/pdf/1710.02298.pdf>`_
1717
* :class:`~tianshou.policy.QRDQNPolicy` `Quantile Regression DQN <https://arxiv.org/pdf/1710.10044.pdf>`_
1818
* :class:`~tianshou.policy.IQNPolicy` `Implicit Quantile Network <https://arxiv.org/pdf/1806.06923.pdf>`_
1919
* :class:`~tianshou.policy.FQFPolicy` `Fully-parameterized Quantile Function <https://arxiv.org/pdf/1911.02140.pdf>`_

tianshou/data/collector.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -64,14 +64,15 @@ def __init__(
6464
super().__init__()
6565
if isinstance(env, gym.Env) and not hasattr(env, "__len__"):
6666
warnings.warn("Single environment detected, wrap to DummyVectorEnv.")
67-
env = DummyVectorEnv([lambda: env])
68-
self.env = env
69-
self.env_num = len(env)
67+
self.env = DummyVectorEnv([lambda: env]) # type: ignore
68+
else:
69+
self.env = env # type: ignore
70+
self.env_num = len(self.env)
7071
self.exploration_noise = exploration_noise
7172
self._assign_buffer(buffer)
7273
self.policy = policy
7374
self.preprocess_fn = preprocess_fn
74-
self._action_space = env.action_space
75+
self._action_space = self.env.action_space
7576
# avoid creating attribute outside __init__
7677
self.reset(False)
7778

tianshou/env/pettingzoo_env.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
from pettingzoo.utils.wrappers import BaseWrapper
77

88

9-
class PettingZooEnv(AECEnv, gym.Env, ABC):
9+
class PettingZooEnv(AECEnv, ABC):
1010
"""The interface for petting zoo environments.
1111
1212
Multi-agent environments must be wrapped as

tianshou/env/venvs.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
from tianshou.utils import RunningMeanStd
1313

1414

15-
class BaseVectorEnv(gym.Env):
15+
class BaseVectorEnv(object):
1616
"""Base class for vectorized environments wrapper.
1717
1818
Usage:
@@ -196,6 +196,7 @@ def _assert_id(self, id: Union[List[int], np.ndarray]) -> None:
196196
assert i in self.ready_id, \
197197
f"Can only interact with ready environments {self.ready_id}."
198198

199+
# TODO: compatible issue with reset -> (obs, info)
199200
def reset(
200201
self, id: Optional[Union[int, List[int], np.ndarray]] = None
201202
) -> np.ndarray:

tianshou/env/worker/dummy.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,9 +31,9 @@ def wait( # type: ignore
3131

3232
def send(self, action: Optional[np.ndarray]) -> None:
3333
if action is None:
34-
self.result = self.env.reset()
34+
self.result = self.env.reset() # type: ignore
3535
else:
36-
self.result = self.env.step(action)
36+
self.result = self.env.step(action) # type: ignore
3737

3838
def seed(self, seed: Optional[int] = None) -> List[int]:
3939
super().seed(seed)

tianshou/env/worker/subproc.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ def _setup_buf(space: gym.Space) -> Union[dict, tuple, ShArray]:
5353
assert isinstance(space.spaces, tuple)
5454
return tuple([_setup_buf(t) for t in space.spaces])
5555
else:
56-
return ShArray(space.dtype, space.shape)
56+
return ShArray(space.dtype, space.shape) # type: ignore
5757

5858

5959
def _worker(

tianshou/policy/modelfree/sac.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -122,9 +122,8 @@ def forward( # type: ignore
122122
# You can check out the original SAC paper (arXiv 1801.01290): Eq 21.
123123
# in appendix C to get some understanding of this equation.
124124
if self.action_scaling and self.action_space is not None:
125-
action_scale = to_torch_as(
126-
(self.action_space.high - self.action_space.low) / 2.0, act
127-
)
125+
low, high = self.action_space.low, self.action_space.high # type: ignore
126+
action_scale = to_torch_as((high - low) / 2.0, act)
128127
else:
129128
action_scale = 1.0 # type: ignore
130129
squashed_action = torch.tanh(act)

0 commit comments

Comments
 (0)