Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 5 additions & 5 deletions examples/rl/actor_critic_cartpole.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,8 @@

### References

- [CartPole](http://www.derongliu.org/adp/adp-cdrom/Barto1983.pdf)
- [Environment documentation](https://www.gymlibrary.dev/environments/classic_control/cart_pole/)
- [CartPole paper](http://www.derongliu.org/adp/adp-cdrom/Barto1983.pdf)
- [Actor Critic Method](https://hal.inria.fr/hal-00840470/document)
"""
"""
Expand All @@ -56,6 +57,7 @@
gamma = 0.99 # Discount factor for past rewards
max_steps_per_episode = 10000
env = gym.make("CartPole-v0") # Create the environment
# Adding `render_mode='human'` will show the attempts of the agent
env.seed(seed)
eps = np.finfo(np.float32).eps.item() # Smallest number such that 1.0 + eps != 1.0

Expand Down Expand Up @@ -96,12 +98,10 @@
episode_count = 0

while True: # Run until solved
state = env.reset()
state = env.reset()[0]
episode_reward = 0
with tf.GradientTape() as tape:
for timestep in range(1, max_steps_per_episode):
# env.render(); Adding this line would show the attempts
# of the agent in a pop up window.

state = ops.convert_to_tensor(state)
state = ops.expand_dims(state, 0)
Expand All @@ -116,7 +116,7 @@
action_probs_history.append(ops.log(action_probs[0, action]))

# Apply the sampled action in our environment
state, reward, done, _ = env.step(action)
state, reward, done, _, _ = env.step(action)
rewards_history.append(reward)
episode_reward += reward

Expand Down
12 changes: 6 additions & 6 deletions examples/rl/ipynb/actor_critic_cartpole.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,8 @@
"\n",
"### References\n",
"\n",
"- [CartPole](http://www.derongliu.org/adp/adp-cdrom/Barto1983.pdf)\n",
"- [Environment documentation](https://www.gymlibrary.dev/environments/classic_control/cart_pole/)\n",
"- [CartPole paper](http://www.derongliu.org/adp/adp-cdrom/Barto1983.pdf)\n",
"- [Actor Critic Method](https://hal.inria.fr/hal-00840470/document)\n"
]
},
Expand Down Expand Up @@ -80,6 +81,7 @@
"gamma = 0.99 # Discount factor for past rewards\n",
"max_steps_per_episode = 10000\n",
"env = gym.make(\"CartPole-v0\") # Create the environment\n",
"# Adding `render_mode='human'` will show the attempts of the agent\n",
"env.seed(seed)\n",
"eps = np.finfo(np.float32).eps.item() # Smallest number such that 1.0 + eps != 1.0\n"
]
Expand Down Expand Up @@ -148,13 +150,11 @@
"episode_count = 0\n",
"\n",
"while True: # Run until solved\n",
" state = env.reset()\n",
" state = env.reset()[0]\n",
" episode_reward = 0\n",
" with tf.GradientTape() as tape:\n",
" for timestep in range(1, max_steps_per_episode):\n",
" # env.render(); Adding this line would show the attempts\n",
" # of the agent in a pop up window.\n",
"\n",
" \n",
" state = ops.convert_to_tensor(state)\n",
" state = ops.expand_dims(state, 0)\n",
"\n",
Expand All @@ -168,7 +168,7 @@
" action_probs_history.append(ops.log(action_probs[0, action]))\n",
"\n",
" # Apply the sampled action in our environment\n",
" state, reward, done, _ = env.step(action)\n",
" state, reward, done, _, _ = env.step(action)\n",
" rewards_history.append(reward)\n",
" episode_reward += reward\n",
"\n",
Expand Down
10 changes: 5 additions & 5 deletions examples/rl/md/actor_critic_cartpole.md
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,8 @@ remains upright. The agent, therefore, must learn to keep the pole from falling

### References

- [CartPole](http://www.derongliu.org/adp/adp-cdrom/Barto1983.pdf)
- [Environment documentation](https://www.gymlibrary.dev/environments/classic_control/cart_pole/)
- [CartPole paper](http://www.derongliu.org/adp/adp-cdrom/Barto1983.pdf)
- [Actor Critic Method](https://hal.inria.fr/hal-00840470/document)


Expand All @@ -60,6 +61,7 @@ seed = 42
gamma = 0.99 # Discount factor for past rewards
max_steps_per_episode = 10000
env = gym.make("CartPole-v0") # Create the environment
# Adding `render_mode='human'` will show the attempts of the agent
env.seed(seed)
eps = np.finfo(np.float32).eps.item() # Smallest number such that 1.0 + eps != 1.0

Expand Down Expand Up @@ -108,12 +110,10 @@ running_reward = 0
episode_count = 0

while True: # Run until solved
state = env.reset()
state = env.reset()[0]
episode_reward = 0
with tf.GradientTape() as tape:
for timestep in range(1, max_steps_per_episode):
# env.render(); Adding this line would show the attempts
# of the agent in a pop up window.

state = ops.convert_to_tensor(state)
state = ops.expand_dims(state, 0)
Expand All @@ -128,7 +128,7 @@ while True: # Run until solved
action_probs_history.append(ops.log(action_probs[0, action]))

# Apply the sampled action in our environment
state, reward, done, _ = env.step(action)
state, reward, done, _, _ = env.step(action)
rewards_history.append(reward)
episode_reward += reward

Expand Down