We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 1d548e4 commit cd5b69bCopy full SHA for cd5b69b
examples/rl/actor_critic_cartpole.py
@@ -45,7 +45,7 @@
45
import os
46
47
os.environ["KERAS_BACKEND"] = "tensorflow"
48
-import gym
+import gymnasium as gym
49
import numpy as np
50
import keras
51
from keras import ops
@@ -98,13 +98,13 @@
98
episode_count = 0
99
100
while True: # Run until solved
101
- state = env.reset()[0]
+ obs, _ = env.reset()
102
episode_reward = 0
103
with tf.GradientTape() as tape:
104
for timestep in range(1, max_steps_per_episode):
105
106
- state = ops.convert_to_tensor(state)
107
- state = ops.expand_dims(state, 0)
+ state = tf.convert_to_tensor(state)
+ state = tf.expand_dims(state, 0)
108
109
# Predict action probabilities and estimated future rewards
110
# from environment state
0 commit comments