Skip to content

Commit 2ad1f88

Browse files
committed
Last notebook changes
1 parent 0b5837a commit 2ad1f88

File tree

3 files changed

+45
-65
lines changed

3 files changed

+45
-65
lines changed

img/markov.png

307 KB
Loading

img/the_agent.png

205 KB
Loading

tutorial.ipynb

Lines changed: 45 additions & 65 deletions
Original file line numberDiff line numberDiff line change
@@ -138,6 +138,36 @@
138138
"```"
139139
]
140140
},
141+
{
142+
"cell_type": "code",
143+
"execution_count": null,
144+
"metadata": {
145+
"slideshow": {
146+
"slide_type": "slide"
147+
}
148+
},
149+
"outputs": [],
150+
"source": [
151+
"# Importing the required packages\n",
152+
"from time import sleep\n",
153+
"\n",
154+
"import matplotlib.pyplot as plt\n",
155+
"import names\n",
156+
"import numpy as np\n",
157+
"from gymnasium.wrappers import RescaleAction\n",
158+
"from IPython.display import clear_output, display\n",
159+
"from stable_baselines3 import PPO\n",
160+
"\n",
161+
"from utils.helpers import (\n",
162+
" evaluate_ares_ea_agent,\n",
163+
" plot_ares_ea_training_history,\n",
164+
" show_video,\n",
165+
")\n",
166+
"from utils.train import ARESEACheetah, make_env, read_from_yaml\n",
167+
"from utils.train import train as train_ares_ea\n",
168+
"from utils.utils import NotVecNormalize"
169+
]
170+
},
141171
{
142172
"cell_type": "markdown",
143173
"metadata": {
@@ -159,17 +189,7 @@
159189
}
160190
},
161191
"source": [
162-
"<h2 style=\"color: #b51f2a\">Formulating the RL problem</h2>\n",
163-
"<h3>Refresher from the lecture</h3>\n",
164-
"\n",
165-
"We need to define:\n",
166-
"\n",
167-
"- Actions\n",
168-
"- Observations\n",
169-
"- Reward\n",
170-
"- Environment\n",
171-
"- Agent\n",
172-
" <img src=\"img/rl_problem_2.png\" style=\"width:70%; margin:auto;\"/>\n"
192+
"<img src=\"img/the_rl_problem.png\" style=\"width:70%; margin:auto;\"/>\n"
173193
]
174194
},
175195
{
@@ -181,7 +201,6 @@
181201
},
182202
"source": [
183203
"<h2 style=\"color: #b51f2a\">Formulating the RL problem</h2>\n",
184-
"<h3>Refresher from the lecture</h3>\n",
185204
"\n",
186205
"We need to define:\n",
187206
"\n",
@@ -201,17 +220,7 @@
201220
}
202221
},
203222
"source": [
204-
"<h2 style=\"color: #b51f2a\">Formulating the RL problem</h2>\n",
205-
"<h3>Refresher from the lecture</h3>\n",
206-
"\n",
207-
"We need to define:\n",
208-
"\n",
209-
"- Actions\n",
210-
"- Observations\n",
211-
"- Reward\n",
212-
"- Environment\n",
213-
"- Agent\n",
214-
" <img src=\"img/rl_problem_2.png\" style=\"width:70%; margin:auto;\"/>\n"
223+
"<img src=\"img/cumulate_reward.png\" style=\"width:70%; margin:auto;\"/>\n"
215224
]
216225
},
217226
{
@@ -222,17 +231,18 @@
222231
}
223232
},
224233
"source": [
225-
"<h2 style=\"color: #b51f2a\">Formulating the RL problem</h2>\n",
226-
"<h3>Refresher from the lecture</h3>\n",
227-
"\n",
228-
"We need to define:\n",
229-
"\n",
230-
"- Actions\n",
231-
"- Observations\n",
232-
"- Reward\n",
233-
"- Environment\n",
234-
"- Agent\n",
235-
" <img src=\"img/rl_problem_2.png\" style=\"width:70%; margin:auto;\"/>\n"
234+
"<img src=\"img/the_agent.png\" style=\"width:70%; margin:auto;\"/>\n"
235+
]
236+
},
237+
{
238+
"cell_type": "markdown",
239+
"metadata": {
240+
"slideshow": {
241+
"slide_type": "slide"
242+
}
243+
},
244+
"source": [
245+
"<img src=\"img/markov.png\" style=\"width:70%; margin:auto;\"/>\n"
236246
]
237247
},
238248
{
@@ -284,7 +294,7 @@
284294
"\n",
285295
"<h3 style=\"color:#038aa1;\">Discussion</h3>\n",
286296
"<p style=\"color:#038aa1;\"> $\\implies$ Is the action space continuous or discrete? </p>\n",
287-
"<p style=\"color:#038aa1;\"> $\\implies$ Is the problem deterministic or stochastic?</p>\n"
297+
"<p style=\"color:#038aa1;\"> $\\implies$ Is the problem fully observable or partially observable?</p>\n"
288298
]
289299
},
290300
{
@@ -578,36 +588,6 @@
578588
"You can find more information in the [paper](https://arxiv.org/abs/2401.05815) and the [code repository](https://github.com/desy-ml/cheetah).\n"
579589
]
580590
},
581-
{
582-
"cell_type": "code",
583-
"execution_count": null,
584-
"metadata": {
585-
"slideshow": {
586-
"slide_type": "slide"
587-
}
588-
},
589-
"outputs": [],
590-
"source": [
591-
"# Importing the required packages\n",
592-
"from time import sleep\n",
593-
"\n",
594-
"import matplotlib.pyplot as plt\n",
595-
"import names\n",
596-
"import numpy as np\n",
597-
"from gymnasium.wrappers import RescaleAction\n",
598-
"from IPython.display import clear_output, display\n",
599-
"from stable_baselines3 import PPO\n",
600-
"\n",
601-
"from utils.helpers import (\n",
602-
" evaluate_ares_ea_agent,\n",
603-
" plot_ares_ea_training_history,\n",
604-
" show_video,\n",
605-
")\n",
606-
"from utils.train import ARESEACheetah, make_env, read_from_yaml\n",
607-
"from utils.train import train as train_ares_ea\n",
608-
"from utils.utils import NotVecNormalize"
609-
]
610-
},
611591
{
612592
"cell_type": "markdown",
613593
"metadata": {

0 commit comments

Comments
 (0)