Merge branch 'dev-v1' into dev-v2

opcode81 · opcode81 · commit 17449c45d7a2 · 2025-05-15T13:08:19.000+02:00
Conflicts:
	tianshou/trainer/base.py
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -205,13 +205,13 @@ Developers:
       (via adaptation if necessary).
 
 
-## Unreleased
+## Upcoming Release 1.2.0
 
 ### Changes/Improvements
 
-- trainer:
+- `trainer`:
     - Custom scoring now supported for selecting the best model. #1202
-- highlevel:
+- `highlevel`:
     - `DiscreteSACExperimentBuilder`: Expose method `with_actor_factory_default` #1248 #1250
     - `ActorFactoryDefault`: Fix parameters for hidden sizes and activation not being 
       passed on in the discrete case (affects `with_actor_factory_default` method of experiment builders)
@@ -224,22 +224,29 @@ Developers:
     - `NPGAgentFactory`, `TRPOAgentFactory`: Fix optimizer instantiation including the actor parameters
       (which was misleadingly suggested in the docstring in the respective policy classes; docstrings were fixed),
       as the actor parameters are intended to be handled via natural gradients internally
-
+- Tests:
+    - We have introduced extensive **determinism tests** which allow to validate whether
+      training processes deterministically compute the same results across different development branches.
+      This is an important step towards ensuring reproducibility and consistency, which will be 
+      instrumental in supporting Tianshou developers in their work, especially in the context of
+      algorithm development and evaluation. 
+  
 ### Breaking Changes
 
-- trainer:
+- `trainer`:
     - `BaseTrainer.run` and `__iter__`: Resetting was never optional prior to running the trainer,
       yet the recently introduced parameter `reset_prior_to_run` of `run` suggested that it _was_ optional.
       Yet the parameter was ultimately not respected, because `__iter__` would always call `reset(reset_collectors=True, reset_buffer=False)`
       regardless. The parameter was removed; instead, the parameters of `run` now mirror the parameters of `reset`,
       and the implicit `reset` call in `__iter__` was removed.     
       This aligns with upcoming changes in Tianshou v2.0.0.  
-      NOTE: If you have been using a trainer without calling `run` but by directly iterating over it, you
-      will need to call `reset` on the trainer explicitly before iterating over the trainer.
-- data:
-    - stats:
-        - `InfoStats` has a new non-optional field `best_score` which is used
-          for selecting the best model. #1202
+        * NOTE: If you have been using a trainer without calling `run` but by directly iterating over it, you
+          will need to call `reset` on the trainer explicitly before iterating over the trainer.
+        * Using a trainer as an iterator is considered deprecated and support for this will be removed in Tianshou v2.0.0.
+- `data`:
+    - `InfoStats` has a new non-optional field `best_score` which is used
+      for selecting the best model. #1202
+
 
 ## Release 1.1.0
 
diff --git a/test/continuous/test_ddpg.py b/test/continuous/test_ddpg.py
@@ -145,6 +145,6 @@ def stop_fn(mean_rewards: float) -> bool:
         assert stop_fn(result.best_reward)
 
 
-def test_ddpg_determinism():
+def test_ddpg_determinism() -> None:
     main_fn = lambda args: test_ddpg(args, enable_assertions=False)
     AlgorithmDeterminismTest("continuous_ddpg", main_fn, get_args()).run()
diff --git a/test/continuous/test_npg.py b/test/continuous/test_npg.py
@@ -163,6 +163,6 @@ def stop_fn(mean_rewards: float) -> bool:
         assert stop_fn(result.best_reward)
 
 
-def test_npg_determinism():
+def test_npg_determinism() -> None:
     main_fn = lambda args: test_npg(args, enable_assertions=False)
     AlgorithmDeterminismTest("continuous_npg", main_fn, get_args()).run()
diff --git a/test/continuous/test_ppo.py b/test/continuous/test_ppo.py
@@ -196,6 +196,6 @@ def test_ppo_resume(args: argparse.Namespace = get_args()) -> None:
     test_ppo(args)
 
 
-def test_ppo_determinism():
+def test_ppo_determinism() -> None:
     main_fn = lambda args: test_ppo(args, enable_assertions=False)
     AlgorithmDeterminismTest("continuous_ppo", main_fn, get_args()).run()
diff --git a/test/continuous/test_redq.py b/test/continuous/test_redq.py
@@ -174,6 +174,6 @@ def stop_fn(mean_rewards: float) -> bool:
         assert stop_fn(result.best_reward)
 
 
-def test_redq_determinism():
+def test_redq_determinism() -> None:
     main_fn = lambda args: test_redq(args, enable_assertions=False)
     AlgorithmDeterminismTest("continuous_redq", main_fn, get_args()).run()
diff --git a/test/continuous/test_sac_with_il.py b/test/continuous/test_sac_with_il.py
@@ -232,6 +232,6 @@ def stop_fn(mean_rewards: float) -> bool:
         assert stop_fn(result.best_reward)
 
 
-def test_sac_determinism():
+def test_sac_determinism() -> None:
     main_fn = lambda args: test_sac_with_il(args, enable_assertions=False, skip_il=True)
     AlgorithmDeterminismTest("continuous_sac", main_fn, get_args()).run()
diff --git a/test/continuous/test_td3.py b/test/continuous/test_td3.py
@@ -161,6 +161,6 @@ def stop_fn(mean_rewards: float) -> bool:
         assert stop_fn(result.best_reward)
 
 
-def test_td3_determinism():
+def test_td3_determinism() -> None:
     main_fn = lambda args: test_td3(args, enable_assertions=False)
     AlgorithmDeterminismTest("continuous_td3", main_fn, get_args()).run()
diff --git a/test/continuous/test_trpo.py b/test/continuous/test_trpo.py
@@ -165,6 +165,6 @@ def stop_fn(mean_rewards: float) -> bool:
         assert stop_fn(result.best_reward)
 
 
-def test_trpo_determinism():
+def test_trpo_determinism() -> None:
     main_fn = lambda args: test_trpo(args, enable_assertions=False)
     AlgorithmDeterminismTest("continuous_trpo", main_fn, get_args()).run()
diff --git a/test/determinism_test.py b/test/determinism_test.py
@@ -10,18 +10,16 @@
 
 
 class TorchDeterministicModeContext:
-    def __init__(self, mode="default"):
+    def __init__(self, mode: str | int = "default") -> None:
         self.new_mode = mode
-        self.original_mode = None
+        self.original_mode: str | int | None = None
 
-    def __enter__(self):
+    def __enter__(self) -> None:
         self.original_mode = torch.get_deterministic_debug_mode()
         torch.set_deterministic_debug_mode(self.new_mode)
 
-    def __exit__(self, exc_type, exc_value, traceback):
-        assert (
-            self.original_mode is not None
-        ), "original_mode should not be None, did you enter the context?"
+    def __exit__(self, exc_type, exc_value, traceback):  # type: ignore
+        assert self.original_mode is not None
         torch.set_deterministic_debug_mode(self.original_mode)
 
 
diff --git a/test/discrete/test_c51.py b/test/discrete/test_c51.py
@@ -224,6 +224,6 @@ def test_pc51(args: argparse.Namespace = get_args()) -> None:
     test_c51(args)
 
 
-def test_c51_determinism():
+def test_c51_determinism() -> None:
     main_fn = lambda args: test_c51(args, enable_assertions=False)
     AlgorithmDeterminismTest("discrete_c51", main_fn, get_args()).run()
diff --git a/test/discrete/test_qrdqn.py b/test/discrete/test_qrdqn.py
@@ -181,6 +181,6 @@ def test_pqrdqn(args: argparse.Namespace = get_args()) -> None:
     test_qrdqn(args)
 
 
-def test_qrdqn_determinism():
+def test_qrdqn_determinism() -> None:
     main_fn = lambda args: test_qrdqn(args, enable_assertions=False)
     AlgorithmDeterminismTest("discrete_qrdqn", main_fn, get_args()).run()
diff --git a/test/discrete/test_rainbow.py b/test/discrete/test_rainbow.py
@@ -243,6 +243,6 @@ def test_prainbow(args: argparse.Namespace = get_args()) -> None:
     test_rainbow(args)
 
 
-def test_rainbow_determinism():
+def test_rainbow_determinism() -> None:
     main_fn = lambda args: test_rainbow(args, enable_assertions=False)
     AlgorithmDeterminismTest("discrete_rainbow", main_fn, get_args()).run()
diff --git a/test/offline/test_cql.py b/test/offline/test_cql.py
@@ -200,6 +200,6 @@ def stop_fn(mean_rewards: float) -> bool:
         assert stop_fn(result.best_reward)
 
 
-def test_cql_determinism():
+def test_cql_determinism() -> None:
     main_fn = lambda args: test_cql(args, enable_assertions=False)
     AlgorithmDeterminismTest("offline_cql", main_fn, get_args(), is_offline=True).run()