diff --git a/examples/rl/actor_critic_cartpole.py b/examples/rl/actor_critic_cartpole.py
index c44552446c..a4e1496ba0 100644
--- a/examples/rl/actor_critic_cartpole.py
+++ b/examples/rl/actor_critic_cartpole.py
@@ -2,16 +2,16 @@
 Title: Actor Critic Method
 Author: [Apoorv Nandan](https://twitter.com/NandanApoorv)
 Date created: 2020/05/13
-Last modified: 2024/02/22
+Last modified: 2025/01/07
 Description: Implement Actor Critic Method in CartPole environment.
-Accelerator: NONE
+Accelerator: None
 Converted to Keras 3 by: [Sitam Meur](https://github.com/sitamgithub-MSIT)
 """
 
 """
 ## Introduction
 
-This script shows an implementation of Actor Critic method on CartPole-V0 environment.
+This script shows an implementation of Actor Critic method on CartPole-V1 environment.
 
 ### Actor Critic Method
 
@@ -26,7 +26,7 @@
 Agent and Critic learn to perform their tasks, such that the recommended actions
 from the actor maximize the rewards.
 
-### CartPole-V0
+### CartPole-V1
 
 A pole is attached to a cart placed on a frictionless track. The agent has to apply
 force to move the cart. It is rewarded for every time step the pole
@@ -45,7 +45,7 @@
 import os
 
 os.environ["KERAS_BACKEND"] = "tensorflow"
-import gym
+import gymnasium as gym
 import numpy as np
 import keras
 from keras import ops
@@ -57,7 +57,7 @@
 gamma = 0.99  # Discount factor for past rewards
 max_steps_per_episode = 10000
 # Adding `render_mode='human'` will show the attempts of the agent
-env = gym.make("CartPole-v0")  # Create the environment
+env = gym.make("CartPole-v1")  # Create the environment
 env.reset(seed=seed)
 eps = np.finfo(np.float32).eps.item()  # Smallest number such that 1.0 + eps != 1.0
 
@@ -98,12 +98,12 @@
 episode_count = 0
 
 while True:  # Run until solved
-    state = env.reset()[0]
+    obs, _ = env.reset()
     episode_reward = 0
     with tf.GradientTape() as tape:
         for timestep in range(1, max_steps_per_episode):
 
-            state = ops.convert_to_tensor(state)
+            state = ops.convert_to_tensor(obs)
             state = ops.expand_dims(state, 0)
 
             # Predict action probabilities and estimated future rewards
@@ -116,10 +116,11 @@
             action_probs_history.append(ops.log(action_probs[0, action]))
 
             # Apply the sampled action in our environment
-            state, reward, done, *_ = env.step(action)
+            obs, reward, terminated, truncated, _ = env.step(action)
             rewards_history.append(reward)
             episode_reward += reward
 
+            done = terminated or truncated
             if done:
                 break
 
diff --git a/examples/rl/ipynb/actor_critic_cartpole.ipynb b/examples/rl/ipynb/actor_critic_cartpole.ipynb
index 47d03a83fc..df7711cfb1 100644
--- a/examples/rl/ipynb/actor_critic_cartpole.ipynb
+++ b/examples/rl/ipynb/actor_critic_cartpole.ipynb
@@ -10,7 +10,7 @@
     "\n",
     "**Author:** [Apoorv Nandan](https://twitter.com/NandanApoorv)<br>\n",
     "**Date created:** 2020/05/13<br>\n",
-    "**Last modified:** 2024/02/22<br>\n",
+    "**Last modified:** 2025/01/07<br>\n",
     "**Description:** Implement Actor Critic Method in CartPole environment."
    ]
   },
@@ -22,7 +22,7 @@
    "source": [
     "## Introduction\n",
     "\n",
-    "This script shows an implementation of Actor Critic method on CartPole-V0 environment.\n",
+    "This script shows an implementation of Actor Critic method on CartPole-V1 environment.\n",
     "\n",
     "### Actor Critic Method\n",
     "\n",
@@ -37,7 +37,7 @@
     "Agent and Critic learn to perform their tasks, such that the recommended actions\n",
     "from the actor maximize the rewards.\n",
     "\n",
-    "### CartPole-V0\n",
+    "### CartPole-V1\n",
     "\n",
     "A pole is attached to a cart placed on a frictionless track. The agent has to apply\n",
     "force to move the cart. It is rewarded for every time step the pole\n",
@@ -47,7 +47,7 @@
     "\n",
     "- [Environment documentation](https://gymnasium.farama.org/environments/classic_control/cart_pole/)\n",
     "- [CartPole paper](http://www.derongliu.org/adp/adp-cdrom/Barto1983.pdf)\n",
-    "- [Actor Critic Method](https://hal.inria.fr/hal-00840470/document)\n"
+    "- [Actor Critic Method](https://hal.inria.fr/hal-00840470/document)"
    ]
   },
   {
@@ -56,12 +56,12 @@
     "colab_type": "text"
    },
    "source": [
-    "## Setup\n"
+    "## Setup"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 0,
    "metadata": {
     "colab_type": "code"
    },
@@ -70,7 +70,7 @@
     "import os\n",
     "\n",
     "os.environ[\"KERAS_BACKEND\"] = \"tensorflow\"\n",
-    "import gym\n",
+    "import gymnasium as gym\n",
     "import numpy as np\n",
     "import keras\n",
     "from keras import ops\n",
@@ -82,7 +82,7 @@
     "gamma = 0.99  # Discount factor for past rewards\n",
     "max_steps_per_episode = 10000\n",
     "# Adding `render_mode='human'` will show the attempts of the agent\n",
-    "env = gym.make(\"CartPole-v0\")  # Create the environment\n",
+    "env = gym.make(\"CartPole-v1\")  # Create the environment\n",
     "env.reset(seed=seed)\n",
     "eps = np.finfo(np.float32).eps.item()  # Smallest number such that 1.0 + eps != 1.0"
    ]
@@ -102,12 +102,12 @@
     "2. Critic: This takes as input the state of our environment and returns\n",
     "an estimate of total rewards in the future.\n",
     "\n",
-    "In our implementation, they share the initial layer.\n"
+    "In our implementation, they share the initial layer."
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 0,
    "metadata": {
     "colab_type": "code"
    },
@@ -131,12 +131,12 @@
     "colab_type": "text"
    },
    "source": [
-    "## Train\n"
+    "## Train"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 0,
    "metadata": {
     "colab_type": "code"
    },
@@ -151,12 +151,12 @@
     "episode_count = 0\n",
     "\n",
     "while True:  # Run until solved\n",
-    "    state = env.reset()[0]\n",
+    "    obs, _ = env.reset()\n",
     "    episode_reward = 0\n",
     "    with tf.GradientTape() as tape:\n",
     "        for timestep in range(1, max_steps_per_episode):\n",
     "\n",
-    "            state = ops.convert_to_tensor(state)\n",
+    "            state = ops.convert_to_tensor(obs)\n",
     "            state = ops.expand_dims(state, 0)\n",
     "\n",
     "            # Predict action probabilities and estimated future rewards\n",
@@ -169,10 +169,11 @@
     "            action_probs_history.append(ops.log(action_probs[0, action]))\n",
     "\n",
     "            # Apply the sampled action in our environment\n",
-    "            state, reward, done, *_ = env.step(action)\n",
+    "            obs, reward, terminated, truncated, _ = env.step(action)\n",
     "            rewards_history.append(reward)\n",
     "            episode_reward += reward\n",
     "\n",
+    "            done = terminated or truncated\n",
     "            if done:\n",
     "                break\n",
     "\n",
@@ -245,12 +246,12 @@
     "![Imgur](https://i.imgur.com/5gCs5kH.gif)\n",
     "\n",
     "In later stages of training:\n",
-    "![Imgur](https://i.imgur.com/5ziiZUD.gif)\n"
+    "![Imgur](https://i.imgur.com/5ziiZUD.gif)"
    ]
   }
  ],
  "metadata": {
-  "accelerator": "GPU",
+  "accelerator": "None",
   "colab": {
    "collapsed_sections": [],
    "name": "actor_critic_cartpole",
@@ -273,9 +274,9 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.12.8"
+   "version": "3.7.0"
   }
  },
  "nbformat": 4,
  "nbformat_minor": 0
-}
+}
\ No newline at end of file
diff --git a/examples/rl/md/actor_critic_cartpole.md b/examples/rl/md/actor_critic_cartpole.md
index 5dbef18efe..c8383cc241 100644
--- a/examples/rl/md/actor_critic_cartpole.md
+++ b/examples/rl/md/actor_critic_cartpole.md
@@ -2,7 +2,7 @@
 
 **Author:** [Apoorv Nandan](https://twitter.com/NandanApoorv)<br>
 **Date created:** 2020/05/13<br>
-**Last modified:** 2024/02/22<br>
+**Last modified:** 2025/01/07<br>
 **Description:** Implement Actor Critic Method in CartPole environment.
 
 
@@ -13,7 +13,7 @@
 ---
 ## Introduction
 
-This script shows an implementation of Actor Critic method on CartPole-V0 environment.
+This script shows an implementation of Actor Critic method on CartPole-V1 environment.
 
 ### Actor Critic Method
 
@@ -28,7 +28,7 @@ the observed state of the environment to two possible outputs:
 Agent and Critic learn to perform their tasks, such that the recommended actions
 from the actor maximize the rewards.
 
-### CartPole-V0
+### CartPole-V1
 
 A pole is attached to a cart placed on a frictionless track. The agent has to apply
 force to move the cart. It is rewarded for every time step the pole
@@ -40,16 +40,15 @@ remains upright. The agent, therefore, must learn to keep the pole from falling
 - [CartPole paper](http://www.derongliu.org/adp/adp-cdrom/Barto1983.pdf)
 - [Actor Critic Method](https://hal.inria.fr/hal-00840470/document)
 
-
 ---
 ## Setup
 
 
-
 ```python
 import os
+
 os.environ["KERAS_BACKEND"] = "tensorflow"
-import gym
+import gymnasium as gym
 import numpy as np
 import keras
 from keras import ops
@@ -61,12 +60,10 @@ seed = 42
 gamma = 0.99  # Discount factor for past rewards
 max_steps_per_episode = 10000
 # Adding `render_mode='human'` will show the attempts of the agent
-env = gym.make("CartPole-v0")  # Create the environment
+env = gym.make("CartPole-v1")  # Create the environment
 env.reset(seed=seed)
 eps = np.finfo(np.float32).eps.item()  # Smallest number such that 1.0 + eps != 1.0
-
 ```
-
 ---
 ## Implement Actor Critic network
 
@@ -80,7 +77,6 @@ an estimate of total rewards in the future.
 In our implementation, they share the initial layer.
 
 
-
 ```python
 num_inputs = 4
 num_actions = 2
@@ -92,14 +88,12 @@ action = layers.Dense(num_actions, activation="softmax")(common)
 critic = layers.Dense(1)(common)
 
 model = keras.Model(inputs=inputs, outputs=[action, critic])
-
 ```
 
 ---
 ## Train
 
 
-
 ```python
 optimizer = keras.optimizers.Adam(learning_rate=0.01)
 huber_loss = keras.losses.Huber()
@@ -110,12 +104,12 @@ running_reward = 0
 episode_count = 0
 
 while True:  # Run until solved
-    state = env.reset()[0]
+    obs, _ = env.reset()
     episode_reward = 0
     with tf.GradientTape() as tape:
         for timestep in range(1, max_steps_per_episode):
 
-            state = ops.convert_to_tensor(state)
+            state = ops.convert_to_tensor(obs)
             state = ops.expand_dims(state, 0)
 
             # Predict action probabilities and estimated future rewards
@@ -128,10 +122,11 @@ while True:  # Run until solved
             action_probs_history.append(ops.log(action_probs[0, action]))
 
             # Apply the sampled action in our environment
-            state, reward, done, *_ = env.step(action)
+            obs, reward, terminated, truncated, _ = env.step(action)
             rewards_history.append(reward)
             episode_reward += reward
 
+            done = terminated or truncated
             if done:
                 break
 
@@ -191,47 +186,31 @@ while True:  # Run until solved
     if running_reward > 195:  # Condition to consider the task solved
         print("Solved at episode {}!".format(episode_count))
         break
-
 ```
 
 <div class="k-default-codeblock">
 ```
-running reward: 8.82 at episode 10
-running reward: 23.04 at episode 20
-running reward: 28.41 at episode 30
-running reward: 53.59 at episode 40
-running reward: 53.71 at episode 50
-running reward: 77.35 at episode 60
-running reward: 74.76 at episode 70
-running reward: 57.89 at episode 80
-running reward: 46.59 at episode 90
-running reward: 43.48 at episode 100
-running reward: 63.77 at episode 110
-running reward: 111.13 at episode 120
-running reward: 142.77 at episode 130
-running reward: 127.96 at episode 140
-running reward: 113.92 at episode 150
-running reward: 128.57 at episode 160
-running reward: 139.95 at episode 170
-running reward: 154.95 at episode 180
-running reward: 171.45 at episode 190
-running reward: 171.33 at episode 200
-running reward: 177.74 at episode 210
-running reward: 184.76 at episode 220
-running reward: 190.88 at episode 230
-running reward: 154.78 at episode 240
-running reward: 114.38 at episode 250
-running reward: 107.51 at episode 260
-running reward: 128.99 at episode 270
-running reward: 157.48 at episode 280
-running reward: 174.54 at episode 290
-running reward: 184.76 at episode 300
-running reward: 190.87 at episode 310
-running reward: 194.54 at episode 320
-Solved at episode 322!
+running reward: 13.73 at episode 10
+running reward: 22.93 at episode 20
+running reward: 20.96 at episode 30
+running reward: 18.73 at episode 40
+running reward: 28.80 at episode 50
+running reward: 27.52 at episode 60
+running reward: 29.73 at episode 70
+running reward: 45.53 at episode 80
+running reward: 60.19 at episode 90
+running reward: 78.66 at episode 100
+running reward: 112.70 at episode 110
+running reward: 91.89 at episode 120
+running reward: 91.08 at episode 130
+running reward: 77.85 at episode 140
+running reward: 121.86 at episode 150
+running reward: 173.82 at episode 160
+Solved at episode 163!
 
 ```
 </div>
+
 ---
 ## Visualizations
 In early stages of training:
@@ -239,4 +218,3 @@ In early stages of training:
 
 In later stages of training:
 ![Imgur](https://i.imgur.com/5ziiZUD.gif)
-