We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
1 parent e6fc189 commit 5a6b217Copy full SHA for 5a6b217
intermediate_source/reinforcement_ppo.py
@@ -639,7 +639,7 @@
639
# number of steps (1000, which is our ``env`` horizon).
640
# The ``rollout`` method of the ``env`` can take a policy as argument:
641
# it will then execute this policy at each step.
642
- with set_exploration_type(ExplorationType.MEAN), torch.no_grad():
+ with set_exploration_type(ExplorationType.DETERMINISTIC), torch.no_grad():
643
# execute a rollout with the trained policy
644
eval_rollout = env.rollout(1000, policy_module)
645
logs["eval reward"].append(eval_rollout["next", "reward"].mean().item())
0 commit comments