automl
diff --git a/‎docs/_autosummary/mdp_playground.envs.rl_toy_env.RLToyEnv.rst‎
Lines changed: 1 addition & 1 deletion b/‎docs/_autosummary/mdp_playground.envs.rl_toy_env.RLToyEnv.rst‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎docs/_build/html/_autosummary/mdp_playground.envs.rl_toy_env.RLToyEnv.html‎
Lines changed: 4 additions & 4 deletions b/‎docs/_build/html/_autosummary/mdp_playground.envs.rl_toy_env.RLToyEnv.html‎
Lines changed: 4 additions & 4 deletions
diff --git a/‎docs/_build/html/_modules/mdp_playground/envs/rl_toy_env.html‎
Lines changed: 4 additions & 4 deletions b/‎docs/_build/html/_modules/mdp_playground/envs/rl_toy_env.html‎
Lines changed: 4 additions & 4 deletions
diff --git a/‎docs/_build/html/_sources/_autosummary/mdp_playground.envs.rl_toy_env.RLToyEnv.rst.txt‎
Lines changed: 1 addition & 1 deletion b/‎docs/_build/html/_sources/_autosummary/mdp_playground.envs.rl_toy_env.RLToyEnv.rst.txt‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎docs/_build/html/genindex.html‎
Lines changed: 1 addition & 1 deletion b/‎docs/_build/html/genindex.html‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎docs/_build/html/searchindex.js‎
Lines changed: 1 addition & 1 deletion b/‎docs/_build/html/searchindex.js‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎example.py‎
Lines changed: 13 additions & 13 deletions b/‎example.py‎
Lines changed: 13 additions & 13 deletions
@@ -18,7 +18,7 @@ mdp\_playground.envs.rl\_toy\_env.RLToyEnv
 
       ~RLToyEnv.__init__
       ~RLToyEnv.close
-      ~RLToyEnv.get_augmented_state
+      ~RLToyEnv.get_markov_state
       ~RLToyEnv.init_init_state_dist
       ~RLToyEnv.init_reward_function
       ~RLToyEnv.init_terminal_states
 
@@ -712,8 +712,8 @@ <h1>mdp_playground.envs.rl_toy_env.RLToyEnv<a class="headerlink" href="#mdp-play
 </dd></dl>
 
 <dl class="py method">
-<dt id="mdp_playground.envs.rl_toy_env.RLToyEnv.get_augmented_state">
-<code class="sig-name descname"><span class="pre">get_augmented_state</span></code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="../_modules/mdp_playground/envs/rl_toy_env.html#RLToyEnv.get_augmented_state"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#mdp_playground.envs.rl_toy_env.RLToyEnv.get_augmented_state" title="Permalink to this definition">¶</a></dt>
+<dt id="mdp_playground.envs.rl_toy_env.RLToyEnv.get_markov_state">
+<code class="sig-name descname"><span class="pre">get_markov_state</span></code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="../_modules/mdp_playground/envs/rl_toy_env.html#RLToyEnv.get_markov_state"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#mdp_playground.envs.rl_toy_env.RLToyEnv.get_markov_state" title="Permalink to this definition">¶</a></dt>
 <dd><p>gets underlying Markovian state of the MDP</p>
 </dd></dl>
 
@@ -765,7 +765,7 @@ <h1>mdp_playground.envs.rl_toy_env.RLToyEnv<a class="headerlink" href="#mdp-play
 <tr class="row-even"><td><p><a class="reference internal" href="#mdp_playground.envs.rl_toy_env.RLToyEnv.close" title="mdp_playground.envs.rl_toy_env.RLToyEnv.close"><code class="xref py py-obj docutils literal notranslate"><span class="pre">close</span></code></a>()</p></td>
 <td><p>Override close in your subclass to perform any necessary cleanup.</p></td>
 </tr>
-<tr class="row-odd"><td><p><a class="reference internal" href="#id1" title="mdp_playground.envs.rl_toy_env.RLToyEnv.get_augmented_state"><code class="xref py py-obj docutils literal notranslate"><span class="pre">get_augmented_state</span></code></a>()</p></td>
+<tr class="row-odd"><td><p><a class="reference internal" href="#id1" title="mdp_playground.envs.rl_toy_env.RLToyEnv.get_markov_state"><code class="xref py py-obj docutils literal notranslate"><span class="pre">get_markov_state</span></code></a>()</p></td>
 <td><p>Intended to return the full augmented state which would be Markovian.</p></td>
 </tr>
 <tr class="row-even"><td><p><a class="reference internal" href="#id2" title="mdp_playground.envs.rl_toy_env.RLToyEnv.init_init_state_dist"><code class="xref py py-obj docutils literal notranslate"><span class="pre">init_init_state_dist</span></code></a>()</p></td>
@@ -837,7 +837,7 @@ <h1>mdp_playground.envs.rl_toy_env.RLToyEnv<a class="headerlink" href="#mdp-play
 
 <dl class="py method">
 <dt id="id1">
-<code class="sig-name descname"><span class="pre">get_augmented_state</span></code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="../_modules/mdp_playground/envs/rl_toy_env.html#RLToyEnv.get_augmented_state"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#id1" title="Permalink to this definition">¶</a></dt>
+<code class="sig-name descname"><span class="pre">get_markov_state</span></code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="../_modules/mdp_playground/envs/rl_toy_env.html#RLToyEnv.get_markov_state"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#id1" title="Permalink to this definition">¶</a></dt>
 <dd><p>Intended to return the full augmented state which would be Markovian. (However, it’s not Markovian wrt the noise in P and R because we’re not returning the underlying RNG.) Currently, returns the augmented state which is the sequence of length “delay + sequence_length + 1” of past states for both discrete and continuous environments. Additonally, the current state derivatives are also returned for continuous environments.</p>
 <dl class="field-list simple">
 <dt class="field-odd">Returns</dt>
 
@@ -629,7 +629,7 @@ <h1>Source code for mdp_playground.envs.rl_toy_env</h1><div class="highlight"><p
 <span class="sd">        the reward function of the MDP, R</span>
 <span class="sd">    R(state, action)</span>
 <span class="sd">        defined as a lambda function in the call to init_reward_function() and is equivalent to calling reward_function()</span>
-<span class="sd">    get_augmented_state()</span>
+<span class="sd">    get_markov_state()</span>
 <span class="sd">        gets underlying Markovian state of the MDP</span>
 <span class="sd">    reset()</span>
 <span class="sd">        Resets environment state</span>
@@ -1834,9 +1834,9 @@ <h1>Source code for mdp_playground.envs.rl_toy_env</h1><div class="highlight"><p
             <span class="bp">self</span><span class="o">.</span><span class="n">reward</span> <span class="o">+=</span> <span class="bp">self</span><span class="o">.</span><span class="n">term_state_reward</span> <span class="o">*</span> <span class="bp">self</span><span class="o">.</span><span class="n">reward_scale</span> <span class="c1"># Scale before or after?</span>
         <span class="bp">self</span><span class="o">.</span><span class="n">logger</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s1">&#39;sas</span><span class="se">\&#39;</span><span class="s1">r:   &#39;</span> <span class="o">+</span> <span class="nb">str</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">augmented_state</span><span class="p">[</span><span class="o">-</span><span class="mi">2</span><span class="p">])</span> <span class="o">+</span> <span class="s1">&#39;   &#39;</span> <span class="o">+</span> <span class="nb">str</span><span class="p">(</span><span class="n">action</span><span class="p">)</span> <span class="o">+</span> <span class="s1">&#39;   &#39;</span> <span class="o">+</span> <span class="nb">str</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">augmented_state</span><span class="p">[</span><span class="o">-</span><span class="mi">1</span><span class="p">])</span> <span class="o">+</span> <span class="s1">&#39;   &#39;</span> <span class="o">+</span> <span class="nb">str</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">reward</span><span class="p">))</span>
 
-        <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">curr_obs</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">reward</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">done</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">get_augmented_state</span><span class="p">()</span></div>
+        <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">curr_obs</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">reward</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">done</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">get_markov_state</span><span class="p">()</span></div>
 
-<div class="viewcode-block" id="RLToyEnv.get_augmented_state"><a class="viewcode-back" href="../../../_autosummary/mdp_playground.envs.rl_toy_env.RLToyEnv.html#mdp_playground.envs.rl_toy_env.RLToyEnv.get_augmented_state">[docs]</a>    <span class="k">def</span> <span class="nf">get_augmented_state</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+<div class="viewcode-block" id="RLToyEnv.get_markov_state"><a class="viewcode-back" href="../../../_autosummary/mdp_playground.envs.rl_toy_env.RLToyEnv.html#mdp_playground.envs.rl_toy_env.RLToyEnv.get_markov_state">[docs]</a>    <span class="k">def</span> <span class="nf">get_markov_state</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
         <span class="sd">&#39;&#39;&#39;Intended to return the full augmented state which would be Markovian. (However, it&#39;s not Markovian wrt the noise in P and R because we&#39;re not returning the underlying RNG.) Currently, returns the augmented state which is the sequence of length &quot;delay + sequence_length + 1&quot; of past states for both discrete and continuous environments. Additonally, the current state derivatives are also returned for continuous environments.</span>
 
 <span class="sd">        Returns</span>
@@ -2042,7 +2042,7 @@ <h1>Source code for mdp_playground.envs.rl_toy_env</h1><div class="highlight"><p
 
     <span class="n">config</span><span class="p">[</span><span class="s2">&quot;generate_random_mdp&quot;</span><span class="p">]</span> <span class="o">=</span> <span class="kc">True</span> <span class="c1"># This supersedes previous settings and generates a random transition function, a random reward function (for random specific sequences)</span>
     <span class="n">env</span> <span class="o">=</span> <span class="n">RLToyEnv</span><span class="p">(</span><span class="o">**</span><span class="n">config</span><span class="p">)</span>
-    <span class="n">state</span> <span class="o">=</span> <span class="n">copy</span><span class="o">.</span><span class="n">copy</span><span class="p">(</span><span class="n">env</span><span class="o">.</span><span class="n">get_augmented_state</span><span class="p">()[</span><span class="s1">&#39;curr_state&#39;</span><span class="p">])</span>
+    <span class="n">state</span> <span class="o">=</span> <span class="n">copy</span><span class="o">.</span><span class="n">copy</span><span class="p">(</span><span class="n">env</span><span class="o">.</span><span class="n">get_markov_state</span><span class="p">()[</span><span class="s1">&#39;curr_state&#39;</span><span class="p">])</span>
     <span class="k">for</span> <span class="n">_</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="mi">20</span><span class="p">):</span>
         <span class="c1"># env.render() # For GUI</span>
         <span class="n">action</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">action_space</span><span class="o">.</span><span class="n">sample</span><span class="p">()</span> <span class="c1"># take a #random action</span>
 
@@ -18,7 +18,7 @@ mdp\_playground.envs.rl\_toy\_env.RLToyEnv
 
       ~RLToyEnv.__init__
       ~RLToyEnv.close
-      ~RLToyEnv.get_augmented_state
+      ~RLToyEnv.get_markov_state
       ~RLToyEnv.init_init_state_dist
       ~RLToyEnv.init_reward_function
       ~RLToyEnv.init_terminal_states
 
@@ -820,7 +820,7 @@ <h2 id="G">G</h2>
   <td style="width: 33%; vertical-align: top;"><ul>
       <li><a href="_autosummary/mdp_playground.spaces.image_continuous.ImageContinuous.html#mdp_playground.spaces.image_continuous.ImageContinuous.generate_image">generate_image() (mdp_playground.spaces.image_continuous.ImageContinuous method)</a>
 </li>
-      <li><a href="_autosummary/mdp_playground.envs.rl_toy_env.RLToyEnv.html#id1">get_augmented_state() (mdp_playground.envs.rl_toy_env.RLToyEnv method)</a>, <a href="_autosummary/mdp_playground.envs.rl_toy_env.RLToyEnv.html#mdp_playground.envs.rl_toy_env.RLToyEnv.get_augmented_state">[1]</a>
+      <li><a href="_autosummary/mdp_playground.envs.rl_toy_env.RLToyEnv.html#id1">get_markov_state() (mdp_playground.envs.rl_toy_env.RLToyEnv method)</a>, <a href="_autosummary/mdp_playground.envs.rl_toy_env.RLToyEnv.html#mdp_playground.envs.rl_toy_env.RLToyEnv.get_markov_state">[1]</a>
 </li>
       <li><a href="_autosummary/mdp_playground.spaces.image_continuous.ImageContinuous.html#id0">get_image_representation() (mdp_playground.spaces.image_continuous.ImageContinuous method)</a>, <a href="_autosummary/mdp_playground.spaces.image_continuous.ImageContinuous.html#mdp_playground.spaces.image_continuous.ImageContinuous.get_image_representation">[1]</a>
 
 
@@ -72,7 +72,7 @@ def discrete_environment_example():
     # The environment maintains an augmented state which contains the underlying
     # state used by the MDP to perform transitions and hand out rewards. We can
     # fetch a dict containing the augmented state and current state like this:
-    augmented_state_dict = env.get_augmented_state()
+    augmented_state_dict = env.get_markov_state()
     state = augmented_state_dict["curr_state"]
 
     print(
@@ -113,7 +113,7 @@ def discrete_environment_image_representations_example():
     # The environment maintains an augmented state which contains the underlying
     # state used by the MDP to perform transitions and hand out rewards. We can
     # fetch a dict containing the augmented state and current state like this:
-    augmented_state_dict = env.get_augmented_state()
+    augmented_state_dict = env.get_markov_state()
     state = augmented_state_dict["curr_state"]
 
     print(
@@ -122,7 +122,7 @@ def discrete_environment_image_representations_example():
     )
     action = env.action_space.sample()
     next_state_image, reward, done, trunc, info = env.step(action)
-    augmented_state_dict = env.get_augmented_state()
+    augmented_state_dict = env.get_markov_state()
     next_state = augmented_state_dict["curr_state"]  # Underlying MDP state holds
     # the current discrete state.
     print("sars', done, image shape =", state, action, reward, next_state, done, next_state_image.shape)
@@ -161,7 +161,7 @@ def discrete_environment_diameter_image_representations_example():
     # The environment maintains an augmented state which contains the underlying
     # state used by the MDP to perform transitions and hand out rewards. We can
     # fetch a dict containing the augmented state and current state like this:
-    augmented_state_dict = env.get_augmented_state()
+    augmented_state_dict = env.get_markov_state()
     state = augmented_state_dict["curr_state"]
 
     print(
@@ -170,7 +170,7 @@ def discrete_environment_diameter_image_representations_example():
     )
     action = env.action_space.sample()
     next_state_image, reward, done, trunc, info = env.step(action)
-    augmented_state_dict = env.get_augmented_state()
+    augmented_state_dict = env.get_markov_state()
     next_state = augmented_state_dict["curr_state"]  # Underlying MDP state holds
     # the current discrete state.
     print("sars', done, shape =", state, action, reward, next_state, done, next_state_image.shape)
@@ -247,7 +247,7 @@ def continuous_environment_example_move_to_a_point_irrelevant_image():
 
     env = RLToyEnv(**config)
     state = env.reset()[0]
-    augmented_state_dict = env.get_augmented_state()
+    augmented_state_dict = env.get_markov_state()
     state = augmented_state_dict["curr_state"].copy()  # Underlying MDP state holds
     # the current continuous state.
 
@@ -257,7 +257,7 @@ def continuous_environment_example_move_to_a_point_irrelevant_image():
     )
     action = env.action_space.sample()
     next_state_image, reward, done, trunc, info = env.step(action)
-    augmented_state_dict = env.get_augmented_state()
+    augmented_state_dict = env.get_markov_state()
     next_state = augmented_state_dict["curr_state"].copy()  # Underlying MDP state holds
     # the current continuous state.
     print("sars', done, image shape =", state, action, reward, next_state, done, next_state_image.shape)
@@ -319,13 +319,13 @@ def grid_environment_example():
 
     env = RLToyEnv(**config)
 
-    state = env.get_augmented_state()["augmented_state"][-1]
+    state = env.get_markov_state()["augmented_state"][-1]
     actions = [[0, 1], [-1, 0], [-1, 0], [1, 0], [0.5, -0.5], [1, 2], [1, 1], [0, 1]]
 
     for i in range(len(actions)):
         action = actions[i]
         next_obs, reward, done, trunc, info = env.step(action)
-        next_state = env.get_augmented_state()["augmented_state"][-1]
+        next_state = env.get_markov_state()["augmented_state"][-1]
         print("sars', done =", state, action, reward, next_state, done)
         state = next_state
 
@@ -348,13 +348,13 @@ def grid_environment_example_reward_every_n_steps():
 
     env = RLToyEnv(**config)
 
-    state = env.get_augmented_state()["augmented_state"][-1]
+    state = env.get_markov_state()["augmented_state"][-1]
     actions = [[0, 1], [-1, 0], [-1, 0], [1, 0], [0.5, -0.5], [1, 2], [1, 1], [0, 1]]
 
     for i in range(len(actions)):
         action = actions[i]
         next_obs, reward, done, trunc, info = env.step(action)
-        next_state = env.get_augmented_state()["augmented_state"][-1]
+        next_state = env.get_markov_state()["augmented_state"][-1]
         print("sars', done =", state, action, reward, next_state, done)
         state = next_state
 
@@ -379,13 +379,13 @@ def grid_environment_image_representations_example():
     config["terminal_states"] = [[5, 5], [2, 3], [2, 4], [3, 3], [3, 4]]
     env = RLToyEnv(**config)
 
-    state = env.get_augmented_state()["augmented_state"][-1]
+    state = env.get_markov_state()["augmented_state"][-1]
     actions = [[0, 1], [-1, 0], [-1, 0], [1, 0], [0.5, -0.5], [1, 2]]
 
     for i in range(len(actions)):
         action = actions[i]
         next_obs, reward, done, trunc, info = env.step(action)
-        next_state = env.get_augmented_state()["augmented_state"][-1]
+        next_state = env.get_markov_state()["augmented_state"][-1]
         print("sars', done, image shape =", state, action, reward, next_state, done, next_obs.shape)
         state = next_state