fixed typo in rainbow DQN paper reference (#569)

abcamiletto · Trinkle23897 · web-flow · commit 2336a7db1b7e · 2022-03-16T21:38:51.000+08:00
* fixed typo in rainbow DQN paper ref

* fix gym==0.23 ci failure

Co-authored-by: Jiayi Weng &lt;trinkle23897@gmail.com&gt;
diff --git a/LICENSE b/LICENSE
@@ -1,6 +1,6 @@
 MIT License
 
-Copyright (c) 2020 Tianshou contributors
+Copyright (c) 2022 Tianshou contributors
 
 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal
diff --git a/docs/index.rst b/docs/index.rst
@@ -13,7 +13,7 @@ Welcome to Tianshou!
 * :class:`~tianshou.policy.DQNPolicy` `Double DQN <https://arxiv.org/pdf/1509.06461.pdf>`_
 * :class:`~tianshou.policy.DQNPolicy` `Dueling DQN <https://arxiv.org/pdf/1511.06581.pdf>`_
 * :class:`~tianshou.policy.C51Policy` `Categorical DQN <https://arxiv.org/pdf/1707.06887.pdf>`_
-* :class:`~tianshou.policy.RainbowPolicy` `Rainbow DQN <https://arxiv.org/pdf/1707.02298.pdf>`_
+* :class:`~tianshou.policy.RainbowPolicy` `Rainbow DQN <https://arxiv.org/pdf/1710.02298.pdf>`_
 * :class:`~tianshou.policy.QRDQNPolicy` `Quantile Regression DQN <https://arxiv.org/pdf/1710.10044.pdf>`_
 * :class:`~tianshou.policy.IQNPolicy` `Implicit Quantile Network <https://arxiv.org/pdf/1806.06923.pdf>`_
 * :class:`~tianshou.policy.FQFPolicy` `Fully-parameterized Quantile Function <https://arxiv.org/pdf/1911.02140.pdf>`_
diff --git a/tianshou/data/collector.py b/tianshou/data/collector.py
@@ -64,14 +64,15 @@ def __init__(
         super().__init__()
         if isinstance(env, gym.Env) and not hasattr(env, "__len__"):
             warnings.warn("Single environment detected, wrap to DummyVectorEnv.")
-            env = DummyVectorEnv([lambda: env])
-        self.env = env
-        self.env_num = len(env)
+            self.env = DummyVectorEnv([lambda: env])  # type: ignore
+        else:
+            self.env = env  # type: ignore
+        self.env_num = len(self.env)
         self.exploration_noise = exploration_noise
         self._assign_buffer(buffer)
         self.policy = policy
         self.preprocess_fn = preprocess_fn
-        self._action_space = env.action_space
+        self._action_space = self.env.action_space
         # avoid creating attribute outside __init__
         self.reset(False)
 
diff --git a/tianshou/env/pettingzoo_env.py b/tianshou/env/pettingzoo_env.py
@@ -6,7 +6,7 @@
 from pettingzoo.utils.wrappers import BaseWrapper
 
 
-class PettingZooEnv(AECEnv, gym.Env, ABC):
+class PettingZooEnv(AECEnv, ABC):
     """The interface for petting zoo environments.
 
     Multi-agent environments must be wrapped as
diff --git a/tianshou/env/venvs.py b/tianshou/env/venvs.py
@@ -12,7 +12,7 @@
 from tianshou.utils import RunningMeanStd
 
 
-class BaseVectorEnv(gym.Env):
+class BaseVectorEnv(object):
     """Base class for vectorized environments wrapper.
 
     Usage:
@@ -196,6 +196,7 @@ def _assert_id(self, id: Union[List[int], np.ndarray]) -> None:
             assert i in self.ready_id, \
                 f"Can only interact with ready environments {self.ready_id}."
 
+    # TODO: compatible issue with reset -> (obs, info)
     def reset(
         self, id: Optional[Union[int, List[int], np.ndarray]] = None
     ) -> np.ndarray:
diff --git a/tianshou/env/worker/dummy.py b/tianshou/env/worker/dummy.py
@@ -31,9 +31,9 @@ def wait(  # type: ignore
 
     def send(self, action: Optional[np.ndarray]) -> None:
         if action is None:
-            self.result = self.env.reset()
+            self.result = self.env.reset()  # type: ignore
         else:
-            self.result = self.env.step(action)
+            self.result = self.env.step(action)  # type: ignore
 
     def seed(self, seed: Optional[int] = None) -> List[int]:
         super().seed(seed)
diff --git a/tianshou/env/worker/subproc.py b/tianshou/env/worker/subproc.py
@@ -53,7 +53,7 @@ def _setup_buf(space: gym.Space) -> Union[dict, tuple, ShArray]:
         assert isinstance(space.spaces, tuple)
         return tuple([_setup_buf(t) for t in space.spaces])
     else:
-        return ShArray(space.dtype, space.shape)
+        return ShArray(space.dtype, space.shape)  # type: ignore
 
 
 def _worker(
diff --git a/tianshou/policy/modelfree/sac.py b/tianshou/policy/modelfree/sac.py
@@ -122,9 +122,8 @@ def forward(  # type: ignore
         # You can check out the original SAC paper (arXiv 1801.01290): Eq 21.
         # in appendix C to get some understanding of this equation.
         if self.action_scaling and self.action_space is not None:
-            action_scale = to_torch_as(
-                (self.action_space.high - self.action_space.low) / 2.0, act
-            )
+            low, high = self.action_space.low, self.action_space.high  # type: ignore
+            action_scale = to_torch_as((high - low) / 2.0, act)
         else:
             action_scale = 1.0  # type: ignore
         squashed_action = torch.tanh(act)