2466206547
diff --git a/‎examples/aloha_real/Dockerfile‎
Lines changed: 1 addition & 1 deletion b/‎examples/aloha_real/Dockerfile‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎examples/aloha_real/real_env.py‎
Lines changed: 8 additions & 3 deletions b/‎examples/aloha_real/real_env.py‎
Lines changed: 8 additions & 3 deletions
diff --git a/‎examples/droid/README.md‎
Lines changed: 24 additions & 0 deletions b/‎examples/droid/README.md‎
Lines changed: 24 additions & 0 deletions
diff --git a/‎examples/libero/convert_libero_data_to_lerobot.py‎
Lines changed: 5 additions & 7 deletions b/‎examples/libero/convert_libero_data_to_lerobot.py‎
Lines changed: 5 additions & 7 deletions
diff --git a/‎src/openpi/models/pi0_fast.py‎
Lines changed: 20 additions & 9 deletions b/‎src/openpi/models/pi0_fast.py‎
Lines changed: 20 additions & 9 deletions
@@ -6,7 +6,7 @@
 # Run the container:
 # docker run --rm -it --network=host -v /dev:/dev -v .:/app --privileged aloha_real /bin/bash
 
-FROM ros:noetic-robot@sha256:0e12e4db836e78c74c4b04c6d16f185d9a18d2b13cf5580747efa075eb6dc6e0
+FROM ros:noetic-robot@sha256:7cf0b9f6546abeba308ea42cb7ad3453f3e520e1af57cdf179fe915c939674bc
 SHELL ["/bin/bash", "-c"]
 
 ENV DEBIAN_FRONTEND=noninteractive
 
@@ -112,12 +112,17 @@ def _reset_joints(self):
         )
 
     def _reset_gripper(self):
-        """Set to position mode and do position resets: first open then close. Then change back to PWM mode"""
+        """Set to position mode and do position resets: first close then open. Then change back to PWM mode
+
+        NOTE: This diverges from the original Aloha code which first opens then closes the gripper. Pi internal aloha data
+        was collected with the gripper starting in the open position. Leaving the grippers fully closed was also found to
+        increase the frequency of motor faults.
+        """
         robot_utils.move_grippers(
-            [self.puppet_bot_left, self.puppet_bot_right], [constants.PUPPET_GRIPPER_JOINT_OPEN] * 2, move_time=0.5
+            [self.puppet_bot_left, self.puppet_bot_right], [constants.PUPPET_GRIPPER_JOINT_CLOSE] * 2, move_time=1
         )
         robot_utils.move_grippers(
-            [self.puppet_bot_left, self.puppet_bot_right], [constants.PUPPET_GRIPPER_JOINT_CLOSE] * 2, move_time=1
+            [self.puppet_bot_left, self.puppet_bot_right], [constants.PUPPET_GRIPPER_JOINT_OPEN] * 2, move_time=0.5
         )
 
     def get_observation(self):
 
@@ -44,3 +44,27 @@ The script will ask you to enter a free-form language instruction for the robot
 | Cannot find cameras | Make sure the camera IDs are correct and that the cameras are connected to the DROID laptop. Sometimes replugging the cameras can help. You can check all connected cameras by running `ZED_Explore` in the command line. |
 | Policy inference is slow / inconsistent | Try using a wired internet connection for the DROID laptop to reduce latency (0.5 - 1 sec latency per chunk is normal). |
 | Policy does not perform the task well | In our experiments, the policy could perform simple table top manipulation tasks (pick-and-place) across a wide range of environments, camera positions, and lighting conditions. If the policy does not perform the task well, you can try modifying the scene or object placement to make the task easier. Also make sure that the camera view you are passing to the policy can see all relevant objects in the scene (the policy is only conditioned on a single external camera + wrist camera, make sure you are feeding the desired camera to the policy). Use `ZED_Explore` to check that the camera view you are passing to the policy can see all relevant objects in the scene. Finally, the policy is far from perfect and will fail on more complex manipulation tasks, but it usually makes a decent effort. :) |
+
+
+# Running RoboArena Baseline Policies
+
+We provide configs for running the baseline DROID policies from the [RoboArena](https://robo-arena.github.io/) paper. Simply run the commands below to start inference servers for the respective policies. Then follow the instructions above to run evaluation on the DROID robot.
+
+```
+# Trained from PaliGemma, using RT-2 / OpenVLA style binning tokenizer.
+uv run scripts/serve_policy.py policy:checkpoint --policy.config=paligemma_binning_droid --policy.dir=gs://openpi-assets/checkpoints/roboarena/paligemma_binning_droid
+
+# Trained from PaliGemma, using FAST tokenizer (using universal FAST+ tokenizer).
+uv run scripts/serve_policy.py policy:checkpoint --policy.config=paligemma_fast_droid --policy.dir=gs://openpi-assets/checkpoints/roboarena/paligemma_fast_droid
+
+# Trained from PaliGemma, using FAST tokenizer (tokenizer trained on DROID dataset).
+uv run scripts/serve_policy.py policy:checkpoint --policy.config=paligemma_fast_specialist_droid --policy.dir=gs://openpi-assets/checkpoints/roboarena/paligemma_fast_specialist_droid
+
+# Trained from PaliGemma, using FSQ tokenizer.
+uv run scripts/serve_policy.py policy:checkpoint --policy.config=paligemma_vq_droid --policy.dir=gs://openpi-assets/checkpoints/roboarena/paligemma_vq_droid
+
+# pi0-style diffusion / flow VLA, trained on DROID from PaliGemma.
+uv run scripts/serve_policy.py policy:checkpoint --policy.config=paligemma_diffusion_droid --policy.dir=gs://openpi-assets/checkpoints/roboarena/paligemma_diffusion_droid
+```
+
+You can find the inference configs in [roboarena_config.py](../../src/openpi/training/misc/roboarena_config.py).
@@ -14,13 +14,13 @@
 `uv pip install tensorflow tensorflow_datasets`
 
 You can download the raw Libero datasets from https://huggingface.co/datasets/openvla/modified_libero_rlds
-The resulting dataset will get saved to the $LEROBOT_HOME directory.
+The resulting dataset will get saved to the $HF_LEROBOT_HOME directory.
 Running this conversion script will take approximately 30 minutes.
 """
 
 import shutil
 
-from lerobot.common.datasets.lerobot_dataset import LEROBOT_HOME
+from lerobot.common.datasets.lerobot_dataset import HF_LEROBOT_HOME
 from lerobot.common.datasets.lerobot_dataset import LeRobotDataset
 import tensorflow_datasets as tfds
 import tyro
@@ -36,7 +36,7 @@
 
 def main(data_dir: str, *, push_to_hub: bool = False):
     # Clean up any existing dataset in the output directory
-    output_path = LEROBOT_HOME / REPO_NAME
+    output_path = HF_LEROBOT_HOME / REPO_NAME
     if output_path.exists():
         shutil.rmtree(output_path)
 
@@ -85,12 +85,10 @@ def main(data_dir: str, *, push_to_hub: bool = False):
                         "wrist_image": step["observation"]["wrist_image"],
                         "state": step["observation"]["state"],
                         "actions": step["action"],
+                        "task": step["language_instruction"].decode(),
                     }
                 )
-            dataset.save_episode(task=step["language_instruction"].decode())
-
-    # Consolidate the dataset, skip computing stats since we will do that later
-    dataset.consolidate(run_compute_stats=False)
+            dataset.save_episode()
 
     # Optionally push to the Hugging Face Hub
     if push_to_hub:
 
@@ -1,5 +1,6 @@
 import dataclasses
 import logging
+from typing import Any
 
 import einops
 import flax.nnx as nnx
@@ -82,6 +83,11 @@ class Pi0FASTConfig(_model.BaseModelConfig):
     action_horizon: int = 32
     max_token_len: int = 250
 
+    # Tokenizer for the fast model.
+    fast_model_tokenizer: Any | None = None
+    # Keyword arguments for the fast model tokenizer.
+    fast_model_tokenizer_kwargs: dict[str, Any] | None = None
+
     @property
     @override
     def model_type(self) -> _model.ModelType:
@@ -265,14 +271,17 @@ def sample_actions(
         output_tokens = jnp.zeros((last_logit.shape[0], max_decoding_steps))
 
         def step(carry):
-            last_logit, output_tokens, cache, _, step = carry
+            rng, last_logit, output_tokens, cache, _, step = carry
 
             # Sample token from last logit
-            if temperature > 0.0:
-                last_logit = last_logit / temperature
-                token = jax.random.categorical(rng, last_logit, axis=-1)
-            else:
-                token = jnp.argmax(last_logit, axis=-1)
+            # Split RNG for this step
+            rng, rng_step = jax.random.split(rng)
+            token = jax.lax.cond(
+                temperature > 0.0,
+                lambda _: jax.random.categorical(rng_step, last_logit / temperature, axis=-1),
+                lambda _: jnp.argmax(last_logit, axis=-1),
+                operand=None,
+            )
             output_tokens = put_along_last_axis(output_tokens, jnp.broadcast_to(step, (token.shape[0], 1)), token)
 
             # Check for early stopping --> stop if all batch elements have EOS token
@@ -291,12 +300,14 @@ def step(carry):
                 embedded_prefix=token_embedding, mask=mask, positions=positions, decode=True, kv_cache=cache
             )
 
-            return last_logit, output_tokens, kv_cache, all_eos, step + 1
+            return rng, last_logit, output_tokens, kv_cache, all_eos, step + 1
 
         def cond(carry):
-            _, _, _, all_eos, step = carry
+            _, _, _, _, all_eos, step = carry
             return (~all_eos) & (step < max_decoding_steps)
 
         # Use lax.while_loop so we can jit the full decoding loop.
-        _, output_tokens, _, _, _ = jax.lax.while_loop(cond, step, (last_logit, output_tokens, kv_cache, False, 0))
+        _, _, output_tokens, _, _, _ = jax.lax.while_loop(
+            cond, step, (rng, last_logit, output_tokens, kv_cache, False, 0)
+        )
         return output_tokens
Original file line number	Diff line number	Diff line change
`@@ -112,12 +112,17 @@ def _reset_joints(self):`
`112`	`112`	`)`
`113`	`113`
`114`	`114`	`def _reset_gripper(self):`
`115`		`- """Set to position mode and do position resets: first open then close. Then change back to PWM mode"""`
	`115`	`+ """Set to position mode and do position resets: first close then open. Then change back to PWM mode`
	`116`	`+`
	`117`	`+ NOTE: This diverges from the original Aloha code which first opens then closes the gripper. Pi internal aloha data`
	`118`	`+ was collected with the gripper starting in the open position. Leaving the grippers fully closed was also found to`
	`119`	`+ increase the frequency of motor faults.`
	`120`	`+ """`
`116`	`121`	`robot_utils.move_grippers(`
`117`		`- [self.puppet_bot_left, self.puppet_bot_right], [constants.PUPPET_GRIPPER_JOINT_OPEN] * 2, move_time=0.5`
	`122`	`+ [self.puppet_bot_left, self.puppet_bot_right], [constants.PUPPET_GRIPPER_JOINT_CLOSE] * 2, move_time=1`
`118`	`123`	`)`
`119`	`124`	`robot_utils.move_grippers(`
`120`		`- [self.puppet_bot_left, self.puppet_bot_right], [constants.PUPPET_GRIPPER_JOINT_CLOSE] * 2, move_time=1`
	`125`	`+ [self.puppet_bot_left, self.puppet_bot_right], [constants.PUPPET_GRIPPER_JOINT_OPEN] * 2, move_time=0.5`
`121`	`126`	`)`
`122`	`127`
`123`	`128`	`def get_observation(self):`