diff --git a/tianshou/algorithm/modelfree/trpo.py b/tianshou/algorithm/modelfree/trpo.py index 450fdde54..6ba0a2259 100644 --- a/tianshou/algorithm/modelfree/trpo.py +++ b/tianshou/algorithm/modelfree/trpo.py @@ -183,7 +183,7 @@ def _update_with_batch( # type: ignore[override] if i < self.max_backtracks - 1: step_size = step_size * self.backtrack_coeff else: - self._set_from_flat_params(self.policy.actor, new_flat_params) + self._set_from_flat_params(self.policy.actor, flat_params) step_size = torch.tensor([0.0]) warnings.warn( "Line search failed! It seems hyperparamters"