Skip to content

Commit ee89308

Browse files
committed
Add description and image for each env
1 parent 5a7457c commit ee89308

File tree

6 files changed

+26
-6
lines changed

6 files changed

+26
-6
lines changed

figure/bandit_env.png

2.79 KB
Loading

figure/nim_env.png

8.88 KB
Loading

figure/ocean_env.jpeg

5.2 KB
Loading

playground_app/mappings.py

+16-5
Original file line numberDiff line numberDiff line change
@@ -21,22 +21,33 @@
2121

2222
}
2323

24-
map_name_to_env = { "Ocean": { "Env" : OceanEnv,
24+
map_name_to_env = { "Ocean Env": { "Env" : OceanEnv,
2525
"model" : (transition_probability_ocean, reward_probability_ocean),
2626
"is_state_done" : lambda state : state == 0,
27-
"range_values" : [-20, 5]
27+
"range_values" : [-20, 5],
28+
"image_path" : "figure/ocean_env.jpeg",
29+
"description" : "In this environment you need to reach the beach as fast as possible. \
30+
You start in the ocean and you can only move in the 2 directions. \
31+
The state consist of the distance with the beach and is represented by an integer between 0 and 10 \
32+
(you can't go more far than 10). The reward is -1 at each step and 0 when you reach the beach. \
33+
The episode ends when you reach the beach. \
34+
",
2835
},
2936

30-
"Nim" : { "Env" : NimEnv,
37+
"Nim's Game" : { "Env" : NimEnv,
3138
"model" : (transition_probability_nim, reward_probability_nim),
3239
"is_state_done" : lambda state : state <= 0,
33-
"range_values" : [-2, 2]
40+
"range_values" : [-2, 2],
41+
"image_path" : "figure/nim_env.png",
42+
"description" : "In this game you start with 10 matches and you can remove 1, 2 or 3 matches at each step (those are your actions). The player that removes the last match loses. You play against a random agent. The state consist of the number of matches left and is represented by an integer between 0 and n_matches=25. The reward is 1 if you win, -1 if you lose and 0 if the game is not finished. The episode ends when the game is finished."
3443
},
3544

3645
"n-Bandit Contextual" : { "Env" : ContextualBanditEnv,
3746
"model" : (transition_probability_CB, reward_probability_CB),
3847
"is_state_done" : lambda state : state == -1,
39-
"range_values" : [-1, 4]
48+
"range_values" : [-1, 4],
49+
"image_path" : "figure/bandit_env.png",
50+
"description" : "In this famous environment, which is a foundation problem of theoretical RL, you have a slot machine with 4 arms. Each arm ill give you a reward following a random law that you don't now. This is contextual because which arm is better depends on the state. In particular here, the expected reward is r(s,a) = (s-a-1)%4 so the optimal action for each state is pi*(s)=s.",
4051
},
4152

4253
}

requirements.txt

+2-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
gym
22
numpy
33
streamlit
4-
plotly==5.9.0
4+
plotly==5.9.0
5+
altair==4.0.0

streamlit_app.py

+8
Original file line numberDiff line numberDiff line change
@@ -13,17 +13,25 @@
1313
# Input 1 : env and problem type
1414
st.sidebar.header("Problem")
1515
env_name = st.sidebar.selectbox("Environment", map_name_to_env.keys())
16+
st.sidebar.caption(map_name_to_env[env_name]["description"])
1617
problem = st.sidebar.selectbox("Problem", ["Prediction Problem", "Control Problem"])
1718

1819
env_dict = map_name_to_env[env_name]
1920
Pssa, Rsa = env_dict["model"]
2021
env = env_dict["Env"]()
22+
env_description = env_dict["description"]
23+
env_image_path = env_dict["image_path"]
2124
config["env"] = env
2225
config["transition_probability"] = Pssa
2326
config["reward_probability"] = Rsa
2427
config["range_values"] = env_dict["range_values"]
2528
config["problem"] = problem
2629

30+
st.header(f"Environment : {env_name}")
31+
st.caption(env_description)
32+
st.image(env_image_path)
33+
34+
2735
if problem == "Prediction Problem":
2836
# Input 2 : policy to evaluate, value type and algo
2937
st.header("Algorithm used")

0 commit comments

Comments
 (0)