adaptive-intelligent-robotics
diff --git a/‎examples/aurora.ipynb‎
Lines changed: 9 additions & 1 deletion b/‎examples/aurora.ipynb‎
Lines changed: 9 additions & 1 deletion
diff --git a/‎examples/cmaes.ipynb‎
Lines changed: 6 additions & 3 deletions b/‎examples/cmaes.ipynb‎
Lines changed: 6 additions & 3 deletions
diff --git a/‎examples/mapelites_asktell.ipynb‎
Lines changed: 5 additions & 2 deletions b/‎examples/mapelites_asktell.ipynb‎
Lines changed: 5 additions & 2 deletions
diff --git a/‎examples/mees.ipynb‎
Lines changed: 4 additions & 7 deletions b/‎examples/mees.ipynb‎
Lines changed: 4 additions & 7 deletions
diff --git a/‎examples/nsga2_spea2.ipynb‎
Lines changed: 3 additions & 3 deletions b/‎examples/nsga2_spea2.ipynb‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎examples/pga_aurora.ipynb‎
Lines changed: 9 additions & 1 deletion b/‎examples/pga_aurora.ipynb‎
Lines changed: 9 additions & 1 deletion
diff --git a/‎qdax/baselines/cmaes.py‎
Lines changed: 0 additions & 7 deletions b/‎qdax/baselines/cmaes.py‎
Lines changed: 0 additions & 7 deletions
diff --git a/‎qdax/baselines/dads.py‎
Lines changed: 4 additions & 13 deletions b/‎qdax/baselines/dads.py‎
Lines changed: 4 additions & 13 deletions
diff --git a/‎qdax/baselines/dads_smerl.py‎
Lines changed: 1 addition & 4 deletions b/‎qdax/baselines/dads_smerl.py‎
Lines changed: 1 addition & 4 deletions
@@ -122,6 +122,11 @@
     "prior_descriptor_dim = 2 #@param {type:\"integer\"}\n",
     "\n",
     "log_freq = 5 #@param {type:\"integer\"}\n",
+    "\n",
+    "# Custom observations key that will be used to store the observations in the\n",
+    "# extra_scores of the repertoire\n",
+    "aurora_observations_key = \"observations\"\n",
+    "\n",
     "#@markdown ---"
    ]
   },
@@ -258,6 +263,7 @@
     "aurora_scoring_fn = get_aurora_scoring_fn(\n",
     "    scoring_fn=scoring_fn,\n",
     "    observation_extractor_fn=observation_extractor_fn,\n",
+    "    observations_key=aurora_observations_key,\n",
     ")\n",
     "\n",
     "# Get minimum reward value to make sure qd_score are positive\n",
@@ -389,6 +395,7 @@
     "    metrics_function=metrics_fn,\n",
     "    encoder_function=encoder_fn,\n",
     "    training_function=train_fn,\n",
+    "    observations_key=aurora_observations_key,\n",
     ")\n",
     "\n",
     "# define arbitrary observation's mean/std\n",
@@ -444,6 +451,7 @@
     "n_target = 1024\n",
     "\n",
     "previous_error = jnp.sum(repertoire.fitnesses != -jnp.inf) - n_target\n",
+    "container_size_control_fn = jax.jit(aurora.container_size_control)\n",
     "\n",
     "iteration = 0\n",
     "while iteration < max_iterations:\n",
@@ -472,7 +480,7 @@
     "        )\n",
     "\n",
     "    elif iteration % 2 == 0:\n",
-    "        repertoire, previous_error = aurora.container_size_control(\n",
+    "        repertoire, previous_error = container_size_control_fn(\n",
     "            repertoire,\n",
     "            target_size=n_target,\n",
     "            previous_error=previous_error,\n",
 
@@ -208,18 +208,21 @@
     "covs = [(state.sigma**2) * state.cov_matrix]\n",
     "\n",
     "iteration_count = 0\n",
+    "sample_fn = jax.jit(cmaes.sample)\n",
+    "update_fn = jax.jit(cmaes.update)\n",
+    "stop_condition_fn = jax.jit(cmaes.stop_condition)\n",
     "for _ in range(num_iterations):\n",
     "    iteration_count += 1\n",
     "\n",
     "    # sample\n",
     "    key, subkey = jax.random.split(key)\n",
-    "    samples = cmaes.sample(state, subkey)\n",
+    "    samples = sample_fn(state, subkey)\n",
     "\n",
     "    # update\n",
-    "    state = cmaes.update(state, samples)\n",
+    "    state = update_fn(state, samples)\n",
     "\n",
     "    # check stop condition\n",
-    "    stop_condition = cmaes.stop_condition(state)\n",
+    "    stop_condition = stop_condition_fn(state)\n",
     "\n",
     "    if stop_condition:\n",
     "        break\n",
 
@@ -326,20 +326,23 @@
     "except ImportError:\n",
     "    bar = range(num_iterations)\n",
     "\n",
+    "ask_fn = jax.jit(map_elites.ask)\n",
+    "tell_fn = jax.jit(map_elites.tell)\n",
+    "\n",
     "# Main loop\n",
     "for i in bar:\n",
     "    start_time = time.time()\n",
     "    key, subkey = jax.random.split(key)\n",
     "    # Generate solutions\n",
-    "    genotypes, extra_info = map_elites.ask(repertoire, emitter_state, subkey)\n",
+    "    genotypes, extra_info = ask_fn(repertoire, emitter_state, subkey)\n",
     "\n",
     "    # Evaluate solutions: get fitness, descriptor and extra scores.\n",
     "    # This is where custom evaluations on CPU or GPU can be added.\n",
     "    key, subkey = jax.random.split(key)\n",
     "    fitnesses, descriptors, extra_scores = scoring_fn(genotypes, subkey)\n",
     "\n",
     "    # Update MAP-Elites\n",
-    "    repertoire, emitter_state, current_metrics = map_elites.tell(\n",
+    "    repertoire, emitter_state, current_metrics = tell_fn(\n",
     "        genotypes=genotypes,\n",
     "        fitnesses=fitnesses,\n",
     "        descriptors=descriptors,\n",
 
@@ -248,11 +248,11 @@
     "\n",
     "# Prepare the scoring functions for the offspring generated following\n",
     "# the approximated gradient (each of them is evaluated 30 times)\n",
-    "sampling_fn = functools.partial(\n",
+    "sampling_fn = jax.jit(functools.partial(\n",
     "    sampling,\n",
     "    scoring_fn=scoring_fn,\n",
     "    num_samples=30,\n",
-    ")\n",
+    "))\n",
     "\n",
     "# Get minimum reward value to make sure qd_score are positive\n",
     "reward_offset = environments.reward_offset[env_name]\n",
@@ -448,11 +448,8 @@
    "provenance": []
   },
   "gpuClass": "standard",
-  "interpreter": {
-   "hash": "9ae46cf6a59eb5e192bc4f27fbb5c33d8a30eb9acb43edbb510eeaf7c819ab64"
-  },
   "kernelspec": {
-   "display_name": "Python 3 (ipykernel)",
+   "display_name": ".venv",
    "language": "python",
    "name": "python3"
   },
@@ -466,7 +463,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.10.12"
+   "version": "3.11.10"
   }
  },
  "nbformat": 4,
 
@@ -98,7 +98,6 @@
     "proportion_mutation = 0.80 #@param {type:\"number\"}\n",
     "minval = -5.12 #@param {type:\"number\"}\n",
     "maxval = 5.12 #@param {type:\"number\"}\n",
-    "batch_size = 100 #@param {type:\"integer\"}\n",
     "genotype_dim = 6 #@param {type:\"integer\"}\n",
     "lag = 2.2 #@param {type:\"number\"}\n",
     "base_lag = 0 #@param {type:\"number\"}\n",
@@ -184,7 +183,7 @@
     "key = jax.random.key(0)\n",
     "key, subkey = jax.random.split(key)\n",
     "genotypes = jax.random.uniform(\n",
-    "    subkey, (batch_size, genotype_dim), minval=minval, maxval=maxval, dtype=jnp.float32\n",
+    "    subkey, (population_size, genotype_dim), minval=minval, maxval=maxval, dtype=jnp.float32\n",
     ")\n",
     "\n",
     "# Mutation & Crossover\n",
@@ -202,11 +201,12 @@
     ")\n",
     "\n",
     "# Define the emitter\n",
+    "# NSGA-II and SPEA2 use batch size = population size\n",
     "mixing_emitter = MixingEmitter(\n",
     "    mutation_fn=mutation_function, \n",
     "    variation_fn=crossover_function, \n",
     "    variation_percentage=1-proportion_mutation, \n",
-    "    batch_size=batch_size\n",
+    "    batch_size=population_size, \n",
     ")"
    ]
   },
 
@@ -140,6 +140,11 @@
     "policy_delay = 2 #@param {type:\"number\"}\n",
     "\n",
     "log_freq = 5 #@param {type:\"integer\"}\n",
+    "\n",
+    "# Custom observations key that will be used to store the observations in the\n",
+    "# extra_scores of the repertoire\n",
+    "aurora_observations_key = \"observations\"\n",
+    "\n",
     "#@markdown ---"
    ]
   },
@@ -276,6 +281,7 @@
     "aurora_scoring_fn = get_aurora_scoring_fn(\n",
     "    scoring_fn=scoring_fn,\n",
     "    observation_extractor_fn=observation_extractor_fn,\n",
+    "    observations_key=aurora_observations_key,\n",
     ")\n",
     "\n",
     "# Get minimum reward value to make sure qd_score are positive\n",
@@ -441,6 +447,7 @@
     "    metrics_function=metrics_fn,\n",
     "    encoder_function=encoder_fn,\n",
     "    training_function=train_fn,\n",
+    "    observations_key=aurora_observations_key,\n",
     ")\n",
     "\n",
     "# init the model params\n",
@@ -502,6 +509,7 @@
     "n_target = 1024\n",
     "\n",
     "previous_error = jnp.sum(repertoire.fitnesses != -jnp.inf) - n_target\n",
+    "container_size_control_fn = jax.jit(aurora.container_size_control)\n",
     "\n",
     "iteration = 0\n",
     "while iteration < max_iterations:\n",
@@ -530,7 +538,7 @@
     "        )\n",
     "\n",
     "    elif iteration % 2 == 0:\n",
-    "        repertoire, previous_error = aurora.container_size_control(\n",
+    "        repertoire, previous_error = container_size_control_fn(\n",
     "            repertoire,\n",
     "            target_size=n_target,\n",
     "            previous_error=previous_error,\n",
 
@@ -3,7 +3,6 @@
 a CMA optimization script. Link to the paper: https://arxiv.org/abs/1604.00772
 """
 
-from functools import partial
 from typing import Callable, Optional, Tuple
 
 import flax
@@ -165,7 +164,6 @@ def init(self) -> CMAESState:
             invsqrt_cov=invsqrt_cov,
         )
 
-    @partial(jax.jit, static_argnames=("self",))
     def sample(self, cmaes_state: CMAESState, key: RNGKey) -> Genotype:
         """
         Sample a population.
@@ -186,7 +184,6 @@ def sample(self, cmaes_state: CMAESState, key: RNGKey) -> Genotype:
         )
         return samples
 
-    @partial(jax.jit, static_argnames=("self",))
     def update_state(
         self,
         cmaes_state: CMAESState,
@@ -198,7 +195,6 @@ def update_state(
             weights=self._weights,
         )
 
-    @partial(jax.jit, static_argnames=("self",))
     def update_state_with_mask(
         self, cmaes_state: CMAESState, sorted_candidates: Genotype, mask: Mask
     ) -> CMAESState:
@@ -217,7 +213,6 @@ def update_state_with_mask(
             weights=weights,
         )
 
-    @partial(jax.jit, static_argnames=("self",))
     def _update_state(
         self,
         cmaes_state: CMAESState,
@@ -332,7 +327,6 @@ def update_eigen(
 
         return cmaes_state
 
-    @partial(jax.jit, static_argnames=("self",))
     def update(self, cmaes_state: CMAESState, samples: Genotype) -> CMAESState:
         """Updates the distribution.
 
@@ -352,7 +346,6 @@ def update(self, cmaes_state: CMAESState, samples: Genotype) -> CMAESState:
 
         return new_state  # type: ignore
 
-    @partial(jax.jit, static_argnames=("self",))
     def stop_condition(self, cmaes_state: CMAESState) -> bool:
         """Determines if the current optimization path must be stopped.
 
 
@@ -4,7 +4,6 @@
 """
 
 from dataclasses import dataclass
-from functools import partial
 from typing import Callable, Tuple
 
 import jax
@@ -191,7 +190,6 @@ def init(  # type: ignore
             steps=jnp.array(0),
         )
 
-    @partial(jax.jit, static_argnames=("self",))
     def _compute_diversity_reward(
         self, transition: QDTransition, training_state: DadsTrainingState
     ) -> Reward:
@@ -244,8 +242,7 @@ def _compute_diversity_reward(
 
         return reward
 
-    @partial(jax.jit, static_argnames=("self", "env", "deterministic", "evaluation"))
-    def play_step_fn(
+    def play_step_fn(  # type: ignore
         self,
         env_state: EnvState,
         training_state: DadsTrainingState,
@@ -339,14 +336,13 @@ def play_step_fn(
 
         return next_env_state, training_state, transition
 
-    @partial(jax.jit, static_argnames=("self", "play_step_fn", "env_batch_size"))
-    def eval_policy_fn(
+    def eval_policy_fn(  # type: ignore
         self,
         training_state: DadsTrainingState,
         eval_env_first_state: EnvState,
         play_step_fn: Callable[
-            [EnvState, Params, RNGKey],
-            Tuple[EnvState, Params, RNGKey, QDTransition],
+            [EnvState, Params],
+            Tuple[EnvState, Params, QDTransition],
         ],
         env_batch_size: int,
     ) -> Tuple[Reward, Reward, Reward, StateDescriptor]:
@@ -400,7 +396,6 @@ def eval_policy_fn(
 
         return true_return, true_returns, diversity_returns, transitions.state_desc
 
-    @partial(jax.jit, static_argnames=("self",))
     def _compute_reward(
         self, transition: QDTransition, training_state: DadsTrainingState
     ) -> Reward:
@@ -417,7 +412,6 @@ def _compute_reward(
             transition=transition, training_state=training_state
         )
 
-    @partial(jax.jit, static_argnames=("self",))
     def _update_dynamics(
         self, operand: Tuple[DadsTrainingState, QDTransition]
     ) -> Tuple[Params, float, optax.OptState]:
@@ -448,7 +442,6 @@ def _update_dynamics(
             dynamics_optimizer_state,
         )
 
-    @partial(jax.jit, static_argnames=("self",))
     def _not_update_dynamics(
         self, operand: Tuple[DadsTrainingState, QDTransition]
     ) -> Tuple[Params, float, optax.OptState]:
@@ -464,7 +457,6 @@ def _not_update_dynamics(
             training_state.dynamics_optimizer_state,
         )
 
-    @partial(jax.jit, static_argnames=("self",))
     def _update_networks(
         self,
         training_state: DadsTrainingState,
@@ -566,7 +558,6 @@ def _update_networks(
 
         return new_training_state, metrics
 
-    @partial(jax.jit, static_argnames=("self",))
     def update(
         self,
         training_state: DadsTrainingState,
 
@@ -5,7 +5,6 @@
 """
 
 from dataclasses import dataclass
-from functools import partial
 from typing import Optional, Tuple
 
 import jax
@@ -40,8 +39,7 @@ def __init__(self, config: DadsSmerlConfig, action_size: int, descriptor_size: i
         super(DADSSMERL, self).__init__(config, action_size, descriptor_size)
         self._config: DadsSmerlConfig = config
 
-    @partial(jax.jit, static_argnames=("self",))
-    def _compute_reward(
+    def _compute_reward(  # type: ignore
         self,
         transition: QDTransition,
         training_state: DadsTrainingState,
@@ -74,7 +72,6 @@ def _compute_reward(
 
         return rewards
 
-    @partial(jax.jit, static_argnames=("self",))
     def update(
         self,
         training_state: DadsTrainingState,