tboulet
diff --git a/‎figure/bandit_env.png
2.79 KB b/‎figure/bandit_env.png
2.79 KB
diff --git a/‎figure/nim_env.png
8.88 KB b/‎figure/nim_env.png
8.88 KB
diff --git a/‎figure/ocean_env.jpeg
5.2 KB b/‎figure/ocean_env.jpeg
5.2 KB
diff --git a/‎playground_app/mappings.py
Lines changed: 16 additions & 5 deletions b/‎playground_app/mappings.py
Lines changed: 16 additions & 5 deletions
diff --git a/‎requirements.txt
Lines changed: 2 additions & 1 deletion b/‎requirements.txt
Lines changed: 2 additions & 1 deletion
diff --git a/‎streamlit_app.py
Lines changed: 8 additions & 0 deletions b/‎streamlit_app.py
Lines changed: 8 additions & 0 deletions
@@ -21,22 +21,33 @@
 
                         }
 
-map_name_to_env = { "Ocean": { "Env" : OceanEnv, 
+map_name_to_env = { "Ocean Env": { "Env" : OceanEnv, 
                                     "model" : (transition_probability_ocean, reward_probability_ocean),
                                     "is_state_done" : lambda state : state == 0,
-                                    "range_values" : [-20, 5]
+                                    "range_values" : [-20, 5],
+                                    "image_path" : "figure/ocean_env.jpeg",
+                                    "description" : "In this environment you need to reach the beach as fast as possible. \
+                                    You start in the ocean and you can only move in the 2 directions.  \
+                                    The state consist of the distance with the beach and is represented by an integer between 0 and 10  \
+                                    (you can't go more far than 10). The reward is -1 at each step and 0 when you reach the beach.  \
+                                    The episode ends when you reach the beach. \
+                                    ",
                                     },
 
-                    "Nim" :  { "Env" : NimEnv, 
+                    "Nim's Game" :  { "Env" : NimEnv, 
                                     "model" : (transition_probability_nim, reward_probability_nim),
                                     "is_state_done" : lambda state : state <= 0,
-                                    "range_values" : [-2, 2]
+                                    "range_values" : [-2, 2],
+                                    "image_path" : "figure/nim_env.png",
+                                    "description" : "In this game you start with 10 matches and you can remove 1, 2 or 3 matches at each step (those are your actions). The player that removes the last match loses. You play against a random agent. The state consist of the number of matches left and is represented by an integer between 0 and n_matches=25. The reward is 1 if you win, -1 if you lose and 0 if the game is not finished. The episode ends when the game is finished."
                                     }, 
 
                     "n-Bandit Contextual" :  { "Env" : ContextualBanditEnv, 
                                     "model" : (transition_probability_CB, reward_probability_CB),
                                     "is_state_done" : lambda state : state == -1,
-                                    "range_values" : [-1, 4]
+                                    "range_values" : [-1, 4],
+                                    "image_path" : "figure/bandit_env.png",
+                                    "description" : "In this famous environment, which is a foundation problem of theoretical RL, you have a slot machine with 4 arms. Each arm ill give you a reward following a random law that you don't now. This is contextual because which arm is better depends on the state. In particular here, the expected reward is r(s,a) = (s-a-1)%4 so the optimal action for each state is pi*(s)=s.",
                                     },        
 
                         }
 
@@ -1,4 +1,5 @@
 gym
 numpy
 streamlit
-plotly==5.9.0
+plotly==5.9.0
+altair==4.0.0
@@ -13,17 +13,25 @@
 # Input 1 : env and problem type
 st.sidebar.header("Problem")
 env_name = st.sidebar.selectbox("Environment", map_name_to_env.keys())
+st.sidebar.caption(map_name_to_env[env_name]["description"])
 problem =  st.sidebar.selectbox("Problem", ["Prediction Problem", "Control Problem"])
 
 env_dict = map_name_to_env[env_name]
 Pssa, Rsa = env_dict["model"]
 env = env_dict["Env"]()
+env_description = env_dict["description"]
+env_image_path = env_dict["image_path"]
 config["env"] = env
 config["transition_probability"] = Pssa
 config["reward_probability"] = Rsa
 config["range_values"] = env_dict["range_values"]
 config["problem"] = problem
 
+st.header(f"Environment : {env_name}")
+st.caption(env_description)
+st.image(env_image_path)
+
+
 if problem == "Prediction Problem":
     # Input 2 : policy to evaluate, value type and algo
     st.header("Algorithm used")