fix atari examples (#206)

Trinkle23897 · web-flow · commit 380e9e911daa · 2020-09-06T23:05:33.000+08:00
diff --git a/examples/atari/atari_dqn.py b/examples/atari/atari_dqn.py
@@ -84,7 +84,7 @@ def test_dqn(args=get_args()):
     # replay buffer: `save_last_obs` and `stack_num` can be removed together
     # when you have enough RAM
     buffer = ReplayBuffer(args.buffer_size, ignore_obs_next=True,
-                          save_last_obs=True, stack_num=args.frames_stack)
+                          save_only_last_obs=True, stack_num=args.frames_stack)
     # collector
     train_collector = Collector(policy, train_envs, buffer)
     test_collector = Collector(policy, test_envs)
@@ -100,17 +100,19 @@ def stop_fn(x):
             return x >= env.spec.reward_threshold
         elif 'Pong' in args.task:
             return x >= 20
+        else:
+            return False
 
     def train_fn(x):
         # nature DQN setting, linear decay in the first 1M steps
         now = x * args.collect_per_step * args.step_per_epoch
         if now <= 1e6:
             eps = args.eps_train - now / 1e6 * \
                 (args.eps_train - args.eps_train_final)
-            policy.set_eps(eps)
         else:
-            policy.set_eps(args.eps_train_final)
-        print("set eps =", policy.eps)
+            eps = args.eps_train_final
+        policy.set_eps(eps)
+        writer.add_scalar('train/eps', eps, global_step=now)
 
     def test_fn(x):
         policy.set_eps(args.eps_test)