From 53c9da8b25672aafe41c383cfdd09aa308e4dc51 Mon Sep 17 00:00:00 2001 From: chr0nikler Date: Mon, 23 Dec 2024 15:01:07 -0800 Subject: [PATCH 1/3] Fixup for 'Training An Agent' page --- docs/introduction/train_agent.md | 53 ++++++++++++++++++-------------- 1 file changed, 30 insertions(+), 23 deletions(-) diff --git a/docs/introduction/train_agent.md b/docs/introduction/train_agent.md index 4c0f2542ea..c3e6adb12f 100644 --- a/docs/introduction/train_agent.md +++ b/docs/introduction/train_agent.md @@ -155,37 +155,44 @@ You can use `matplotlib` to visualize the training reward and length. ```python from matplotlib import pyplot as plt -# visualize the episode rewards, episode length and training error in one figure -fig, axs = plt.subplots(1, 3, figsize=(20, 8)) -# np.convolve will compute the rolling mean for 100 episodes - -axs[0].plot(np.convolve(env.return_queue, np.ones(100)/100)) -axs[0].set_title("Episode Rewards") -axs[0].set_xlabel("Episode") -axs[0].set_ylabel("Reward") - -axs[1].plot(np.convolve(env.length_queue, np.ones(100)/100)) -axs[1].set_title("Episode Lengths") -axs[1].set_xlabel("Episode") -axs[1].set_ylabel("Length") - -axs[2].plot(np.convolve(agent.training_error, np.ones(100)/100)) +def get_moving_avgs(arr, window, convolution_mode): + return np.convolve( + np.array(arr).flatten(), + np.ones(window), + mode=convolution_mode + ) / window + +# Smooth over a 500 episode window +rolling_length = 500 +fig, axs = plt.subplots(ncols=3, figsize=(12, 5)) +axs[0].set_title("Episode rewards") +# compute and assign a rolling average of the data to provide a smoother graph +reward_moving_average = ( + get_moving_avgs(env.return_queue, rolling_length, "valid") + / rolling_length +) +axs[0].plot(range(len(reward_moving_average)), reward_moving_average) +axs[1].set_title("Episode lengths") +length_moving_average = ( + get_moving_avgs(env.length_queue, rolling_length, "valid") + / rolling_length +) +axs[1].plot(range(len(length_moving_average)), length_moving_average) axs[2].set_title("Training Error") -axs[2].set_xlabel("Episode") -axs[2].set_ylabel("Temporal Difference") - +training_error_moving_average = ( + get_moving_avgs(agent.training_error, rolling_length, "same") + / rolling_length +) +axs[2].plot(range(len(training_error_moving_average)), training_error_moving_average) plt.tight_layout() plt.show() -``` -![](../_static/img/tutorials/blackjack_training_plots.png "Training Plot") -## Visualising the policy -![](../_static/img/tutorials/blackjack_with_usable_ace.png "With a usable ace") +``` -![](../_static/img/tutorials/blackjack_without_usable_ace.png "Without a usable ace") +![](../_static/img/tutorials/blackjack_training_plots.png "Training Plot") Hopefully this tutorial helped you get a grip of how to interact with Gymnasium environments and sets you on a journey to solve many more RL challenges. From 43b41e3cac413e7d3eaafcdc345b4d19f85a164e Mon Sep 17 00:00:00 2001 From: chr0nikler Date: Mon, 23 Dec 2024 15:04:58 -0800 Subject: [PATCH 2/3] precommit --- docs/introduction/train_agent.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/introduction/train_agent.md b/docs/introduction/train_agent.md index c3e6adb12f..c5f7276013 100644 --- a/docs/introduction/train_agent.md +++ b/docs/introduction/train_agent.md @@ -163,7 +163,7 @@ def get_moving_avgs(arr, window, convolution_mode): mode=convolution_mode ) / window -# Smooth over a 500 episode window +# Smooth over a 500 episode window rolling_length = 500 fig, axs = plt.subplots(ncols=3, figsize=(12, 5)) axs[0].set_title("Episode rewards") From 8801e61963a89fa42439ff850ff26f9b360be889 Mon Sep 17 00:00:00 2001 From: chr0nikler Date: Mon, 23 Dec 2024 16:23:15 -0800 Subject: [PATCH 3/3] cleanup --- docs/introduction/train_agent.md | 26 +++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/docs/introduction/train_agent.md b/docs/introduction/train_agent.md index c5f7276013..461a0d9b16 100644 --- a/docs/introduction/train_agent.md +++ b/docs/introduction/train_agent.md @@ -166,30 +166,34 @@ def get_moving_avgs(arr, window, convolution_mode): # Smooth over a 500 episode window rolling_length = 500 fig, axs = plt.subplots(ncols=3, figsize=(12, 5)) + axs[0].set_title("Episode rewards") -# compute and assign a rolling average of the data to provide a smoother graph -reward_moving_average = ( - get_moving_avgs(env.return_queue, rolling_length, "valid") - / rolling_length +reward_moving_average = get_moving_avgs( + env.return_queue, + rolling_length, + "valid" ) axs[0].plot(range(len(reward_moving_average)), reward_moving_average) + axs[1].set_title("Episode lengths") -length_moving_average = ( - get_moving_avgs(env.length_queue, rolling_length, "valid") - / rolling_length +length_moving_average = get_moving_avgs( + env.length_queue, + rolling_length, + "valid" ) axs[1].plot(range(len(length_moving_average)), length_moving_average) + axs[2].set_title("Training Error") -training_error_moving_average = ( - get_moving_avgs(agent.training_error, rolling_length, "same") - / rolling_length +training_error_moving_average = get_moving_avgs( + agent.training_error, + rolling_length, + "same" ) axs[2].plot(range(len(training_error_moving_average)), training_error_moving_average) plt.tight_layout() plt.show() - ``` ![](../_static/img/tutorials/blackjack_training_plots.png "Training Plot")