Skip to content

Commit 17449c4

Browse files
committed
Merge branch 'dev-v1' into dev-v2
Conflicts: tianshou/trainer/base.py
2 parents 87c7fb5 + cd57fa7 commit 17449c4

File tree

13 files changed

+34
-29
lines changed

13 files changed

+34
-29
lines changed

CHANGELOG.md

Lines changed: 18 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -205,13 +205,13 @@ Developers:
205205
(via adaptation if necessary).
206206

207207

208-
## Unreleased
208+
## Upcoming Release 1.2.0
209209

210210
### Changes/Improvements
211211

212-
- trainer:
212+
- `trainer`:
213213
- Custom scoring now supported for selecting the best model. #1202
214-
- highlevel:
214+
- `highlevel`:
215215
- `DiscreteSACExperimentBuilder`: Expose method `with_actor_factory_default` #1248 #1250
216216
- `ActorFactoryDefault`: Fix parameters for hidden sizes and activation not being
217217
passed on in the discrete case (affects `with_actor_factory_default` method of experiment builders)
@@ -224,22 +224,29 @@ Developers:
224224
- `NPGAgentFactory`, `TRPOAgentFactory`: Fix optimizer instantiation including the actor parameters
225225
(which was misleadingly suggested in the docstring in the respective policy classes; docstrings were fixed),
226226
as the actor parameters are intended to be handled via natural gradients internally
227-
227+
- Tests:
228+
- We have introduced extensive **determinism tests** which allow to validate whether
229+
training processes deterministically compute the same results across different development branches.
230+
This is an important step towards ensuring reproducibility and consistency, which will be
231+
instrumental in supporting Tianshou developers in their work, especially in the context of
232+
algorithm development and evaluation.
233+
228234
### Breaking Changes
229235

230-
- trainer:
236+
- `trainer`:
231237
- `BaseTrainer.run` and `__iter__`: Resetting was never optional prior to running the trainer,
232238
yet the recently introduced parameter `reset_prior_to_run` of `run` suggested that it _was_ optional.
233239
Yet the parameter was ultimately not respected, because `__iter__` would always call `reset(reset_collectors=True, reset_buffer=False)`
234240
regardless. The parameter was removed; instead, the parameters of `run` now mirror the parameters of `reset`,
235241
and the implicit `reset` call in `__iter__` was removed.
236242
This aligns with upcoming changes in Tianshou v2.0.0.
237-
NOTE: If you have been using a trainer without calling `run` but by directly iterating over it, you
238-
will need to call `reset` on the trainer explicitly before iterating over the trainer.
239-
- data:
240-
- stats:
241-
- `InfoStats` has a new non-optional field `best_score` which is used
242-
for selecting the best model. #1202
243+
* NOTE: If you have been using a trainer without calling `run` but by directly iterating over it, you
244+
will need to call `reset` on the trainer explicitly before iterating over the trainer.
245+
* Using a trainer as an iterator is considered deprecated and support for this will be removed in Tianshou v2.0.0.
246+
- `data`:
247+
- `InfoStats` has a new non-optional field `best_score` which is used
248+
for selecting the best model. #1202
249+
243250

244251
## Release 1.1.0
245252

test/continuous/test_ddpg.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -145,6 +145,6 @@ def stop_fn(mean_rewards: float) -> bool:
145145
assert stop_fn(result.best_reward)
146146

147147

148-
def test_ddpg_determinism():
148+
def test_ddpg_determinism() -> None:
149149
main_fn = lambda args: test_ddpg(args, enable_assertions=False)
150150
AlgorithmDeterminismTest("continuous_ddpg", main_fn, get_args()).run()

test/continuous/test_npg.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -163,6 +163,6 @@ def stop_fn(mean_rewards: float) -> bool:
163163
assert stop_fn(result.best_reward)
164164

165165

166-
def test_npg_determinism():
166+
def test_npg_determinism() -> None:
167167
main_fn = lambda args: test_npg(args, enable_assertions=False)
168168
AlgorithmDeterminismTest("continuous_npg", main_fn, get_args()).run()

test/continuous/test_ppo.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -196,6 +196,6 @@ def test_ppo_resume(args: argparse.Namespace = get_args()) -> None:
196196
test_ppo(args)
197197

198198

199-
def test_ppo_determinism():
199+
def test_ppo_determinism() -> None:
200200
main_fn = lambda args: test_ppo(args, enable_assertions=False)
201201
AlgorithmDeterminismTest("continuous_ppo", main_fn, get_args()).run()

test/continuous/test_redq.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -174,6 +174,6 @@ def stop_fn(mean_rewards: float) -> bool:
174174
assert stop_fn(result.best_reward)
175175

176176

177-
def test_redq_determinism():
177+
def test_redq_determinism() -> None:
178178
main_fn = lambda args: test_redq(args, enable_assertions=False)
179179
AlgorithmDeterminismTest("continuous_redq", main_fn, get_args()).run()

test/continuous/test_sac_with_il.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -232,6 +232,6 @@ def stop_fn(mean_rewards: float) -> bool:
232232
assert stop_fn(result.best_reward)
233233

234234

235-
def test_sac_determinism():
235+
def test_sac_determinism() -> None:
236236
main_fn = lambda args: test_sac_with_il(args, enable_assertions=False, skip_il=True)
237237
AlgorithmDeterminismTest("continuous_sac", main_fn, get_args()).run()

test/continuous/test_td3.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -161,6 +161,6 @@ def stop_fn(mean_rewards: float) -> bool:
161161
assert stop_fn(result.best_reward)
162162

163163

164-
def test_td3_determinism():
164+
def test_td3_determinism() -> None:
165165
main_fn = lambda args: test_td3(args, enable_assertions=False)
166166
AlgorithmDeterminismTest("continuous_td3", main_fn, get_args()).run()

test/continuous/test_trpo.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -165,6 +165,6 @@ def stop_fn(mean_rewards: float) -> bool:
165165
assert stop_fn(result.best_reward)
166166

167167

168-
def test_trpo_determinism():
168+
def test_trpo_determinism() -> None:
169169
main_fn = lambda args: test_trpo(args, enable_assertions=False)
170170
AlgorithmDeterminismTest("continuous_trpo", main_fn, get_args()).run()

test/determinism_test.py

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -10,18 +10,16 @@
1010

1111

1212
class TorchDeterministicModeContext:
13-
def __init__(self, mode="default"):
13+
def __init__(self, mode: str | int = "default") -> None:
1414
self.new_mode = mode
15-
self.original_mode = None
15+
self.original_mode: str | int | None = None
1616

17-
def __enter__(self):
17+
def __enter__(self) -> None:
1818
self.original_mode = torch.get_deterministic_debug_mode()
1919
torch.set_deterministic_debug_mode(self.new_mode)
2020

21-
def __exit__(self, exc_type, exc_value, traceback):
22-
assert (
23-
self.original_mode is not None
24-
), "original_mode should not be None, did you enter the context?"
21+
def __exit__(self, exc_type, exc_value, traceback): # type: ignore
22+
assert self.original_mode is not None
2523
torch.set_deterministic_debug_mode(self.original_mode)
2624

2725

test/discrete/test_c51.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -224,6 +224,6 @@ def test_pc51(args: argparse.Namespace = get_args()) -> None:
224224
test_c51(args)
225225

226226

227-
def test_c51_determinism():
227+
def test_c51_determinism() -> None:
228228
main_fn = lambda args: test_c51(args, enable_assertions=False)
229229
AlgorithmDeterminismTest("discrete_c51", main_fn, get_args()).run()

0 commit comments

Comments
 (0)