Farama-Foundation · pseudo-rnd-thoughts · Mar 31, 2025 · Mar 28, 2025 · Mar 28, 2025 · Mar 28, 2025
diff --git a/.github/workflows/run-tutorial.yml b/.github/workflows/run-tutorial.yml
@@ -9,7 +9,7 @@ on:
   pull_request:
     paths:
       - 'docs/tutorials/**/*.py'
-      - '.github/workflows/run-tutorials.yml'
+      - '.github/workflows/test-tutorials.yml'
 
   # Allow manual trigger
   workflow_dispatch:
@@ -18,7 +18,7 @@ jobs:
   test-tutorials:
     runs-on: ubuntu-latest
     strategy:
-      fail-fast: false
+      fail-fast: false # This ensures all matrix combinations run even if one fails
       matrix:
         python-version: ["3.9"]
         tutorial-group:
@@ -34,23 +34,21 @@ jobs:
       uses: actions/setup-python@v4
       with:
         python-version: ${{ matrix.python-version }}
-        cache: 'pip'
 
     - name: Install dependencies
       run: |
-        python -m pip install --upgrade pip
-        # Install Gymnasium and its dependencies
-        pip install -e .
-        # Install additional dependencies for tutorials
-        pip install torch torchvision tqdm matplotlib seaborn pandas pygame
-        # Install MuJoCo dependencies if needed
         sudo apt-get update
-        sudo apt-get install -y patchelf libosmesa6-dev libgl1-mesa-glx libglfw3 libglew-dev
+        sudo apt-get install -y libglu1-mesa-dev libgl1-mesa-dev libosmesa6-dev xvfb unzip patchelf ffmpeg cmake swig
 
-    - name: Install MuJoCo (for MuJoCo tutorials)
+    - name: Install Gymnasium basics tutorial requirements
+      if: matrix.tutorial-group == 'gymnasium_basics'
+      run: |
+        pip install ".[mujoco, box2d]"
+
+    - name: Install Training Agents tutorial requirements
       if: matrix.tutorial-group == 'training_agents'
       run: |
-        pip install mujoco gymnasium[mujoco]
+        pip install ".[mujoco, toy_text, box2d]" torch seaborn matplotlib pandas tqdm
 
     - name: Get changed files
       id: changed-files
@@ -59,6 +57,34 @@ jobs:
         files: docs/tutorials/**/*.py
       if: github.event_name == 'pull_request'
 
+    - name: Patch tutorials
+      run: |
+        # Patch load_quadruped_model.py to use the built-in ant.xml instead of the missing mujoco_menagerie file
+        if [ -f "docs/tutorials/gymnasium_basics/load_quadruped_model.py" ]; then
+          sed -i 's|"./mujoco_menagerie/unitree_go1/scene.xml"|"ant.xml"|g' docs/tutorials/gymnasium_basics/load_quadruped_model.py
+        fi
+
+        # Patch mujoco_reinforce.py to reduce the number of episodes for CI
+        if [ -f "docs/tutorials/training_agents/mujoco_reinforce.py" ]; then
+          sed -i 's/total_num_episodes = int(5e3)/total_num_episodes = int(1e2)/g' docs/tutorials/training_agents/mujoco_reinforce.py
+        fi
+
+        # Patch frozenlake_q_learning.py to reduce the number of episodes and runs
+        if [ -f "docs/tutorials/training_agents/frozenlake_q_learning.py" ]; then
+          sed -i 's/total_episodes=2000/total_episodes=200/g' docs/tutorials/training_agents/frozenlake_q_learning.py
+          sed -i 's/n_runs=20/n_runs=3/g' docs/tutorials/training_agents/frozenlake_q_learning.py
+          sed -i 's/map_sizes = \[4, 7, 9, 11\]/map_sizes = \[4, 7\]/g' docs/tutorials/training_agents/frozenlake_q_learning.py
+        fi
+
+        # Patch vector_a2c.py to reduce the number of updates and environments
+        if [ -f "docs/tutorials/training_agents/vector_a2c.py" ]; then
+          sed -i 's/n_envs = 10/n_envs = 4/g' docs/tutorials/training_agents/vector_a2c.py
+          sed -i 's/n_updates = 1000/n_updates = 100/g' docs/tutorials/training_agents/vector_a2c.py
+        fi
+
+        # Make sure we use 'rgb_array' render mode instead of 'human' everywhere to avoid display issues
+        find docs/tutorials -name "*.py" -type f -exec sed -i 's/render_mode="human"/render_mode="rgb_array"/g' {} \;
+
     - name: Test tutorials (${{ matrix.tutorial-group }})
       id: run-tutorials
       run: |
@@ -67,7 +93,6 @@ jobs:
 
         # Determine which tutorials to test
         if [[ "${{ github.event_name }}" == "pull_request" ]]; then
-          echo "PR detected - testing only modified tutorials"
           # Get the list of modified tutorial files in this group
           for file in ${{ steps.changed-files.outputs.all_changed_files }}; do
             if [[ $file == docs/tutorials/${{ matrix.tutorial-group }}/* && $file == *.py ]]; then
@@ -77,14 +102,10 @@ jobs:
 
           # If no tutorials in this group were modified, skip this job
           if [ ! -f tutorial_files.txt ] || [ ! -s tutorial_files.txt ]; then
-            echo "No tutorials modified in ${{ matrix.tutorial-group }} - skipping tests"
-            echo "total=0" >> $GITHUB_OUTPUT
-            echo "passed=0" >> $GITHUB_OUTPUT
-            echo "failed=0" >> $GITHUB_OUTPUT
+            echo "No tutorials modified in ${{ matrix.tutorial-group }} - skipping"
             exit 0
           fi
         else
-          echo "Main branch or manual run - testing all tutorials"
           # Find all Python files in the tutorial group
           find docs/tutorials/${{ matrix.tutorial-group }} -name "*.py" -type f | sort > tutorial_files.txt
         fi
@@ -94,81 +115,64 @@ jobs:
         passed=0
         failed=0
 
-        # Run each tutorial with timeout
+        # Run each tutorial with timeout - continue even if one fails
         while IFS= read -r tutorial; do
+          # Clear separator for better readability
+          echo ""
+          echo "========================================================"
           echo "Running tutorial: $tutorial"
+          echo "========================================================"
           total=$((total+1))
 
           # Set max time based on complexity (can be adjusted)
           max_time=300  # 5 minutes default
 
-          # Create a marker to skip rendering for headless environment
-          sed -i 's/render_mode="human"/render_mode="rgb_array"/g' "$tutorial" || true
+          # Create log file path
+          log_file="test-results/$(basename "$tutorial").log"
 
           # Run the tutorial with timeout and record results
           start_time=$(date +%s)
-          timeout $max_time python "$tutorial" > "test-results/$(basename "$tutorial").log" 2>&1
+          # Use set +e so the script continues even if the command fails
+          set +e
+          timeout $max_time python "$tutorial" > "$log_file" 2>&1
           exit_code=$?
+          set -e
           end_time=$(date +%s)
           execution_time=$((end_time-start_time))
 
+          # Output results to console immediately
           if [ $exit_code -eq 0 ]; then
-            echo "✅ Passed: $tutorial (${execution_time}s)"
+            echo "✅ PASSED: $tutorial (${execution_time}s)"
             passed=$((passed+1))
             echo "$tutorial,pass,$execution_time" >> test-results/summary.csv
           elif [ $exit_code -eq 124 ]; then
-            echo "⚠️ Timeout: $tutorial (exceeded ${max_time}s)"
+            echo "⚠️ TIMEOUT: $tutorial (exceeded ${max_time}s)"
             failed=$((failed+1))
             echo "$tutorial,timeout,$max_time" >> test-results/summary.csv
+            # Show the last output before timeout
+            echo "Last output before timeout:"
+            echo "----------------------------------------"
+            tail -n 20 "$log_file"
+            echo "----------------------------------------"
           else
-            echo "❌ Failed: $tutorial (${execution_time}s)"
+            echo "❌ FAILED: $tutorial (${execution_time}s)"
             failed=$((failed+1))
             echo "$tutorial,fail,$execution_time" >> test-results/summary.csv
+            # Show the error details
+            echo "Error details:"
+            echo "----------------------------------------"
+            cat "$log_file"
+            echo "----------------------------------------"
           fi
 
-          echo "----------------------------------------"
         done < tutorial_files.txt
 
-        echo "::endgroup::"
-
-        # Set output variables
+        # Export the counters as outputs for later steps
         echo "total=$total" >> $GITHUB_OUTPUT
         echo "passed=$passed" >> $GITHUB_OUTPUT
         echo "failed=$failed" >> $GITHUB_OUTPUT
 
-        # Generate summary
-        echo "### Tutorial Test Results for ${{ matrix.tutorial-group }} 📊" >> $GITHUB_STEP_SUMMARY
-        echo "" >> $GITHUB_STEP_SUMMARY
-
-        if [[ "${{ github.event_name }}" == "pull_request" ]]; then
-          echo "**Mode:** Testing only modified tutorials in PR #${{ github.event.pull_request.number }}" >> $GITHUB_STEP_SUMMARY
-        else
-          echo "**Mode:** Testing all tutorials (main branch or manual run)" >> $GITHUB_STEP_SUMMARY
-        fi
-        echo "" >> $GITHUB_STEP_SUMMARY
-
-        echo "| Metric | Count |" >> $GITHUB_STEP_SUMMARY
-        echo "| ------ | ----- |" >> $GITHUB_STEP_SUMMARY
-        echo "| ✅ Passed | $passed |" >> $GITHUB_STEP_SUMMARY
-        echo "| ❌ Failed | $failed |" >> $GITHUB_STEP_SUMMARY
-        echo "| 📚 Total | $total |" >> $GITHUB_STEP_SUMMARY
-
-        # List all tested tutorials
-        if [ $total -gt 0 ]; then
-          echo "" >> $GITHUB_STEP_SUMMARY
-          echo "### Tested Tutorials 📝" >> $GITHUB_STEP_SUMMARY
-          echo "" >> $GITHUB_STEP_SUMMARY
-
-          while IFS=, read -r file status time; do
-            if [ "$status" == "pass" ]; then
-              echo "- ✅ $file (${time}s)" >> $GITHUB_STEP_SUMMARY
-            elif [ "$status" == "timeout" ]; then
-              echo "- ⚠️ $file (timeout after ${time}s)" >> $GITHUB_STEP_SUMMARY
-            else
-              echo "- ❌ $file (failed after ${time}s)" >> $GITHUB_STEP_SUMMARY
-            fi
-          done < test-results/summary.csv
-        fi
+        echo "::endgroup::"
 
     - name: Upload test results
       if: always()
@@ -184,8 +188,11 @@ jobs:
         if [ "${{ steps.run-tutorials.outputs.total }}" -eq 0 ]; then
           echo "::notice::No tutorials were tested in this group."
         elif [ "${{ steps.run-tutorials.outputs.failed }}" -gt 0 ]; then
-          echo "::error::${{ steps.run-tutorials.outputs.failed }} out of ${{ steps.run-tutorials.outputs.total }} tutorials failed."
-          exit 1
+          echo "::warning::${{ steps.run-tutorials.outputs.failed }} out of ${{ steps.run-tutorials.outputs.total }} tutorials failed. Read in Test tutorials and click `Running tutorials...` to see the error messages."
         else
           echo "::notice::All ${{ steps.run-tutorials.outputs.total }} tutorials passed."
         fi
+
+        # This ensures the job reports failure if any tutorials failed,
+        # but without stopping other jobs in the matrix
+        [ "${{ steps.run-tutorials.outputs.failed }}" -eq 0 ]
diff --git a/...orials/reinforce_invpend_gym_v26_fig1.gif → ...c/img/tutorials/mujoco_reinforce_fig1.gif b/...orials/reinforce_invpend_gym_v26_fig1.gif → ...c/img/tutorials/mujoco_reinforce_fig1.gif
diff --git a/...orials/reinforce_invpend_gym_v26_fig2.png → ...c/img/tutorials/mujoco_reinforce_fig2.png b/...orials/reinforce_invpend_gym_v26_fig2.png → ...c/img/tutorials/mujoco_reinforce_fig2.png
diff --git a/...rials/reinforce_invpend_gym_v26_fig3.jpeg → .../img/tutorials/mujoco_reinforce_fig3.jpeg b/...rials/reinforce_invpend_gym_v26_fig3.jpeg → .../img/tutorials/mujoco_reinforce_fig3.jpeg
diff --git a/...orials/reinforce_invpend_gym_v26_fig4.png → ...c/img/tutorials/mujoco_reinforce_fig4.png b/...orials/reinforce_invpend_gym_v26_fig4.png → ...c/img/tutorials/mujoco_reinforce_fig4.png
diff --git a/docs/tutorials/README.rst b/docs/tutorials/README.rst
@@ -1,7 +1,9 @@
 Tutorials
 =========
-In this section, we cover some of the most well-known benchmarks of RL including the Frozen Lake, Black Jack, and Training using REINFORCE for Mujoco.
 
-Additionally, we provide a guide on how to load custom quadruped robot environments, implementing custom wrappers, creating custom environments, handling time limits, and training A2C with Vector Envs and Domain Randomization.
+We provide two sets of tutorials: basics and training.
 
-Lastly, there is a guide on third-party integrations with Gymnasium.
+* The aim of the basics tutorials is to showcase the fundamental API of Gymnasium to help users implement it
+* The most common application of Gymnasium is for training RL agents, the training tutorials aim to show a range of example implementations for different environments
+
+Additionally, we provide the third party tutorials as a link for external projects that utilise Gymnasium that could help users.
diff --git a/docs/tutorials/gymnasium_basics/README.rst b/docs/tutorials/gymnasium_basics/README.rst
@@ -1,16 +1,10 @@
-Gymnasium Basics Documentation Links
+Gymnasium Basics
 ----------------
-Load custom quadruped robot environments link: https://gymnasium.farama.org/tutorials/gymnasium_basics/load_quadruped_model/
-
-Implementing Custom Wrappers link: https://gymnasium.farama.org/tutorials/gymnasium_basics/implementing_custom_wrappers/
-
-Make your own custom environment(environment_creation.py): https://gymnasium.farama.org/tutorials/gymnasium_basics/environment_creation/
-
-Handling Time Limits: https://gymnasium.farama.org/tutorials/gymnasium_basics/handling_time_limits/
-
-Training A2C with Vector Envs and Domain Randomization: https://gymnasium.farama.org/tutorials/gymnasium_basics/vector_envs_tutorial/
 
 .. toctree::
-   :hidden:
+    :hidden:
 
-   /tutorials/gymnasium_basics/load_quadruped_model.md
+    environment_creation
+    implementing_custom_wrappers
+    handling_time_limits
+    load_quadruped_model
diff --git a/docs/tutorials/gymnasium_basics/environment_creation.py b/docs/tutorials/gymnasium_basics/environment_creation.py
@@ -58,16 +58,16 @@
 
     .
     ├── gymnasium_env
-    │   ├── envs
-    │   │   ├── grid_world.py
-    │   │   └── __init__.py
-    │   ├── __init__.py
-    │   └── wrappers
-    │       ├── clip_reward.py
-    │       ├── discrete_actions.py
-    │       ├── __init__.py
-    │       ├── reacher_weighted_reward.py
-    │       └── relative_position.py
+    │        ├── envs
+    │        │       ├── grid_world.py
+    │        │       └── __init__.py
+    │        ├── __init__.py
+    │        └── wrappers
+    │            ├── clip_reward.py
+    │            ├── discrete_actions.py
+    │            ├── __init__.py
+    │            ├── reacher_weighted_reward.py
+    │            └── relative_position.py
     ├── LICENSE
     ├── pyproject.toml
     └── README.md

diff --git a/docs/tutorials/gymnasium_basics/implementing_custom_wrappers.py b/docs/tutorials/gymnasium_basics/implementing_custom_wrappers.py
@@ -32,9 +32,9 @@
 # observation wrapper like this:
 
 import numpy as np
-from gym import ActionWrapper, ObservationWrapper, RewardWrapper, Wrapper
 
 import gymnasium as gym
+from gymnasium import ActionWrapper, ObservationWrapper, RewardWrapper, Wrapper
 from gymnasium.spaces import Box, Discrete
 
 
@@ -69,12 +69,12 @@ def action(self, act):
         return self.disc_to_cont[act]
 
 
-if __name__ == "__main__":
-    env = gym.make("LunarLanderContinuous-v2")
-    wrapped_env = DiscreteActions(
-        env, [np.array([1, 0]), np.array([-1, 0]), np.array([0, 1]), np.array([0, -1])]
-    )
-    print(wrapped_env.action_space)  # Discrete(4)
+env = gym.make("LunarLanderContinuous-v3")
+# print(env.action_space)  # Box(-1.0, 1.0, (2,), float32)
+wrapped_env = DiscreteActions(
+    env, [np.array([1, 0]), np.array([-1, 0]), np.array([0, 1]), np.array([0, -1])]
+)
+# print(wrapped_env.action_space)  # Discrete(4)
 
 
 # %%