|
138 | 138 | "```" |
139 | 139 | ] |
140 | 140 | }, |
| 141 | + { |
| 142 | + "cell_type": "code", |
| 143 | + "execution_count": null, |
| 144 | + "metadata": { |
| 145 | + "slideshow": { |
| 146 | + "slide_type": "slide" |
| 147 | + } |
| 148 | + }, |
| 149 | + "outputs": [], |
| 150 | + "source": [ |
| 151 | + "# Importing the required packages\n", |
| 152 | + "from time import sleep\n", |
| 153 | + "\n", |
| 154 | + "import matplotlib.pyplot as plt\n", |
| 155 | + "import names\n", |
| 156 | + "import numpy as np\n", |
| 157 | + "from gymnasium.wrappers import RescaleAction\n", |
| 158 | + "from IPython.display import clear_output, display\n", |
| 159 | + "from stable_baselines3 import PPO\n", |
| 160 | + "\n", |
| 161 | + "from utils.helpers import (\n", |
| 162 | + " evaluate_ares_ea_agent,\n", |
| 163 | + " plot_ares_ea_training_history,\n", |
| 164 | + " show_video,\n", |
| 165 | + ")\n", |
| 166 | + "from utils.train import ARESEACheetah, make_env, read_from_yaml\n", |
| 167 | + "from utils.train import train as train_ares_ea\n", |
| 168 | + "from utils.utils import NotVecNormalize" |
| 169 | + ] |
| 170 | + }, |
141 | 171 | { |
142 | 172 | "cell_type": "markdown", |
143 | 173 | "metadata": { |
|
159 | 189 | } |
160 | 190 | }, |
161 | 191 | "source": [ |
162 | | - "<h2 style=\"color: #b51f2a\">Formulating the RL problem</h2>\n", |
163 | | - "<h3>Refresher from the lecture</h3>\n", |
164 | | - "\n", |
165 | | - "We need to define:\n", |
166 | | - "\n", |
167 | | - "- Actions\n", |
168 | | - "- Observations\n", |
169 | | - "- Reward\n", |
170 | | - "- Environment\n", |
171 | | - "- Agent\n", |
172 | | - " <img src=\"img/rl_problem_2.png\" style=\"width:70%; margin:auto;\"/>\n" |
| 192 | + "<img src=\"img/the_rl_problem.png\" style=\"width:70%; margin:auto;\"/>\n" |
173 | 193 | ] |
174 | 194 | }, |
175 | 195 | { |
|
181 | 201 | }, |
182 | 202 | "source": [ |
183 | 203 | "<h2 style=\"color: #b51f2a\">Formulating the RL problem</h2>\n", |
184 | | - "<h3>Refresher from the lecture</h3>\n", |
185 | 204 | "\n", |
186 | 205 | "We need to define:\n", |
187 | 206 | "\n", |
|
201 | 220 | } |
202 | 221 | }, |
203 | 222 | "source": [ |
204 | | - "<h2 style=\"color: #b51f2a\">Formulating the RL problem</h2>\n", |
205 | | - "<h3>Refresher from the lecture</h3>\n", |
206 | | - "\n", |
207 | | - "We need to define:\n", |
208 | | - "\n", |
209 | | - "- Actions\n", |
210 | | - "- Observations\n", |
211 | | - "- Reward\n", |
212 | | - "- Environment\n", |
213 | | - "- Agent\n", |
214 | | - " <img src=\"img/rl_problem_2.png\" style=\"width:70%; margin:auto;\"/>\n" |
| 223 | + "<img src=\"img/cumulate_reward.png\" style=\"width:70%; margin:auto;\"/>\n" |
215 | 224 | ] |
216 | 225 | }, |
217 | 226 | { |
|
222 | 231 | } |
223 | 232 | }, |
224 | 233 | "source": [ |
225 | | - "<h2 style=\"color: #b51f2a\">Formulating the RL problem</h2>\n", |
226 | | - "<h3>Refresher from the lecture</h3>\n", |
227 | | - "\n", |
228 | | - "We need to define:\n", |
229 | | - "\n", |
230 | | - "- Actions\n", |
231 | | - "- Observations\n", |
232 | | - "- Reward\n", |
233 | | - "- Environment\n", |
234 | | - "- Agent\n", |
235 | | - " <img src=\"img/rl_problem_2.png\" style=\"width:70%; margin:auto;\"/>\n" |
| 234 | + "<img src=\"img/the_agent.png\" style=\"width:70%; margin:auto;\"/>\n" |
| 235 | + ] |
| 236 | + }, |
| 237 | + { |
| 238 | + "cell_type": "markdown", |
| 239 | + "metadata": { |
| 240 | + "slideshow": { |
| 241 | + "slide_type": "slide" |
| 242 | + } |
| 243 | + }, |
| 244 | + "source": [ |
| 245 | + "<img src=\"img/markov.png\" style=\"width:70%; margin:auto;\"/>\n" |
236 | 246 | ] |
237 | 247 | }, |
238 | 248 | { |
|
284 | 294 | "\n", |
285 | 295 | "<h3 style=\"color:#038aa1;\">Discussion</h3>\n", |
286 | 296 | "<p style=\"color:#038aa1;\"> $\\implies$ Is the action space continuous or discrete? </p>\n", |
287 | | - "<p style=\"color:#038aa1;\"> $\\implies$ Is the problem deterministic or stochastic?</p>\n" |
| 297 | + "<p style=\"color:#038aa1;\"> $\\implies$ Is the problem fully observable or partially observable?</p>\n" |
288 | 298 | ] |
289 | 299 | }, |
290 | 300 | { |
|
578 | 588 | "You can find more information in the [paper](https://arxiv.org/abs/2401.05815) and the [code repository](https://github.com/desy-ml/cheetah).\n" |
579 | 589 | ] |
580 | 590 | }, |
581 | | - { |
582 | | - "cell_type": "code", |
583 | | - "execution_count": null, |
584 | | - "metadata": { |
585 | | - "slideshow": { |
586 | | - "slide_type": "slide" |
587 | | - } |
588 | | - }, |
589 | | - "outputs": [], |
590 | | - "source": [ |
591 | | - "# Importing the required packages\n", |
592 | | - "from time import sleep\n", |
593 | | - "\n", |
594 | | - "import matplotlib.pyplot as plt\n", |
595 | | - "import names\n", |
596 | | - "import numpy as np\n", |
597 | | - "from gymnasium.wrappers import RescaleAction\n", |
598 | | - "from IPython.display import clear_output, display\n", |
599 | | - "from stable_baselines3 import PPO\n", |
600 | | - "\n", |
601 | | - "from utils.helpers import (\n", |
602 | | - " evaluate_ares_ea_agent,\n", |
603 | | - " plot_ares_ea_training_history,\n", |
604 | | - " show_video,\n", |
605 | | - ")\n", |
606 | | - "from utils.train import ARESEACheetah, make_env, read_from_yaml\n", |
607 | | - "from utils.train import train as train_ares_ea\n", |
608 | | - "from utils.utils import NotVecNormalize" |
609 | | - ] |
610 | | - }, |
611 | 591 | { |
612 | 592 | "cell_type": "markdown", |
613 | 593 | "metadata": { |
|
0 commit comments