From 53c9da8b25672aafe41c383cfdd09aa308e4dc51 Mon Sep 17 00:00:00 2001
From: chr0nikler <jchahal@diffzero.com>
Date: Mon, 23 Dec 2024 15:01:07 -0800
Subject: [PATCH 1/3] Fixup for 'Training An Agent' page

---
 docs/introduction/train_agent.md | 53 ++++++++++++++++++--------------
 1 file changed, 30 insertions(+), 23 deletions(-)

diff --git a/docs/introduction/train_agent.md b/docs/introduction/train_agent.md
index 4c0f2542ea..c3e6adb12f 100644
--- a/docs/introduction/train_agent.md
+++ b/docs/introduction/train_agent.md
@@ -155,37 +155,44 @@ You can use `matplotlib` to visualize the training reward and length.
 
 ```python
 from matplotlib import pyplot as plt
-# visualize the episode rewards, episode length and training error in one figure
-fig, axs = plt.subplots(1, 3, figsize=(20, 8))
 
-# np.convolve will compute the rolling mean for 100 episodes
-
-axs[0].plot(np.convolve(env.return_queue, np.ones(100)/100))
-axs[0].set_title("Episode Rewards")
-axs[0].set_xlabel("Episode")
-axs[0].set_ylabel("Reward")
-
-axs[1].plot(np.convolve(env.length_queue, np.ones(100)/100))
-axs[1].set_title("Episode Lengths")
-axs[1].set_xlabel("Episode")
-axs[1].set_ylabel("Length")
-
-axs[2].plot(np.convolve(agent.training_error, np.ones(100)/100))
+def get_moving_avgs(arr, window, convolution_mode):
+    return np.convolve(
+        np.array(arr).flatten(),
+        np.ones(window),
+        mode=convolution_mode
+    ) / window
+
+# Smooth over a 500 episode window 
+rolling_length = 500
+fig, axs = plt.subplots(ncols=3, figsize=(12, 5))
+axs[0].set_title("Episode rewards")
+# compute and assign a rolling average of the data to provide a smoother graph
+reward_moving_average = (
+    get_moving_avgs(env.return_queue, rolling_length, "valid")
+    / rolling_length
+)
+axs[0].plot(range(len(reward_moving_average)), reward_moving_average)
+axs[1].set_title("Episode lengths")
+length_moving_average = (
+    get_moving_avgs(env.length_queue, rolling_length, "valid")
+    / rolling_length
+)
+axs[1].plot(range(len(length_moving_average)), length_moving_average)
 axs[2].set_title("Training Error")
-axs[2].set_xlabel("Episode")
-axs[2].set_ylabel("Temporal Difference")
-
+training_error_moving_average = (
+    get_moving_avgs(agent.training_error, rolling_length, "same")
+    / rolling_length
+)
+axs[2].plot(range(len(training_error_moving_average)), training_error_moving_average)
 plt.tight_layout()
 plt.show()
-```
 
-![](../_static/img/tutorials/blackjack_training_plots.png "Training Plot")
 
-## Visualising the policy
 
-![](../_static/img/tutorials/blackjack_with_usable_ace.png "With a usable ace")
+```
 
-![](../_static/img/tutorials/blackjack_without_usable_ace.png "Without a usable ace")
+![](../_static/img/tutorials/blackjack_training_plots.png "Training Plot")
 
 Hopefully this tutorial helped you get a grip of how to interact with Gymnasium environments and sets you on a journey to solve many more RL challenges.
 

From 43b41e3cac413e7d3eaafcdc345b4d19f85a164e Mon Sep 17 00:00:00 2001
From: chr0nikler <jchahal@diffzero.com>
Date: Mon, 23 Dec 2024 15:04:58 -0800
Subject: [PATCH 2/3] precommit

---
 docs/introduction/train_agent.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/introduction/train_agent.md b/docs/introduction/train_agent.md
index c3e6adb12f..c5f7276013 100644
--- a/docs/introduction/train_agent.md
+++ b/docs/introduction/train_agent.md
@@ -163,7 +163,7 @@ def get_moving_avgs(arr, window, convolution_mode):
         mode=convolution_mode
     ) / window
 
-# Smooth over a 500 episode window 
+# Smooth over a 500 episode window
 rolling_length = 500
 fig, axs = plt.subplots(ncols=3, figsize=(12, 5))
 axs[0].set_title("Episode rewards")

From 8801e61963a89fa42439ff850ff26f9b360be889 Mon Sep 17 00:00:00 2001
From: chr0nikler <jchahal@diffzero.com>
Date: Mon, 23 Dec 2024 16:23:15 -0800
Subject: [PATCH 3/3] cleanup

---
 docs/introduction/train_agent.md | 26 +++++++++++++++-----------
 1 file changed, 15 insertions(+), 11 deletions(-)

diff --git a/docs/introduction/train_agent.md b/docs/introduction/train_agent.md
index c5f7276013..461a0d9b16 100644
--- a/docs/introduction/train_agent.md
+++ b/docs/introduction/train_agent.md
@@ -166,30 +166,34 @@ def get_moving_avgs(arr, window, convolution_mode):
 # Smooth over a 500 episode window
 rolling_length = 500
 fig, axs = plt.subplots(ncols=3, figsize=(12, 5))
+
 axs[0].set_title("Episode rewards")
-# compute and assign a rolling average of the data to provide a smoother graph
-reward_moving_average = (
-    get_moving_avgs(env.return_queue, rolling_length, "valid")
-    / rolling_length
+reward_moving_average = get_moving_avgs(
+    env.return_queue,
+    rolling_length,
+    "valid"
 )
 axs[0].plot(range(len(reward_moving_average)), reward_moving_average)
+
 axs[1].set_title("Episode lengths")
-length_moving_average = (
-    get_moving_avgs(env.length_queue, rolling_length, "valid")
-    / rolling_length
+length_moving_average = get_moving_avgs(
+    env.length_queue,
+    rolling_length,
+    "valid"
 )
 axs[1].plot(range(len(length_moving_average)), length_moving_average)
+
 axs[2].set_title("Training Error")
-training_error_moving_average = (
-    get_moving_avgs(agent.training_error, rolling_length, "same")
-    / rolling_length
+training_error_moving_average = get_moving_avgs(
+    agent.training_error,
+    rolling_length,
+    "same"
 )
 axs[2].plot(range(len(training_error_moving_average)), training_error_moving_average)
 plt.tight_layout()
 plt.show()
 
 
-
 ```
 
 ![](../_static/img/tutorials/blackjack_training_plots.png "Training Plot")