RobotFlow-Labs
diff --git a/‎.github/workflows/mlx-macos.yml‎
Lines changed: 27 additions & 4 deletions b/‎.github/workflows/mlx-macos.yml‎
Lines changed: 27 additions & 4 deletions
diff --git a/‎PORT_TO_MLX_TODO.md‎
Lines changed: 6 additions & 1 deletion b/‎PORT_TO_MLX_TODO.md‎
Lines changed: 6 additions & 1 deletion
@@ -278,6 +278,7 @@ jobs:
           source/isaaclab/test/backends/test_mac_camera_capture.py \
           source/isaaclab/test/backends/test_mac_sensor_raycast.py \
           source/isaaclab/test/backends/test_mac_stereo_depth.py \
+          source/isaaclab/test/backends/test_mac_runtime_diagnostics.py \
           source/isaaclab/test/backends/test_mlx_task_cli.py \
           source/isaaclab/test/backends/test_planner_compat.py \
           source/isaaclab/test/backends/test_ros2_bridge.py \
@@ -300,6 +301,7 @@ jobs:
           source/isaaclab/test/backends/test_mac_franka_stack.py \
           source/isaaclab/test/backends/test_mac_franka_stack_rgb.py \
           source/isaaclab/test/backends/test_mac_franka_cabinet.py \
+          source/isaaclab/test/backends/test_mac_franka_open_drawer.py \
           source/isaaclab/test/backends/test_mac_h1.py -q
 
     - name: Run MLX cart-double-pendulum smoke script
@@ -481,6 +483,7 @@ jobs:
         assert "franka-stack" in list_mlx_tasks()
         assert "franka-stack-rgb" in list_mlx_tasks()
         assert "franka-cabinet" in list_mlx_tasks()
+        assert "franka-open-drawer" in list_mlx_tasks()
         train_payload = train_mlx_task(
             "franka-stack-rgb",
             num_envs=8,
@@ -538,6 +541,8 @@ jobs:
           logs/hardware/synthetic_depth \
           --max-disparity 4 \
           --window-size 1 \
+          --fx 700.0 \
+          --baseline-mm 120.0 \
           --summary-out logs/hardware/synthetic_depth/summary.json
         test -f logs/hardware/synthetic_depth/left_rgb.png
         test -f logs/hardware/synthetic_depth/right_rgb.png
@@ -562,6 +567,18 @@ jobs:
         test -f logs/hardware/ros2-bridge-smoke.jsonl
         test -f logs/hardware/ros2-bridge-smoke-summary.json
 
+    - name: Run runtime diagnostics smoke
+      run: |
+        mkdir -p logs/runtime
+        PYTHONPATH=.:source/isaaclab .venv/bin/python \
+          scripts/tools/mac_runtime_diagnostics.py \
+          logs/runtime/mac-runtime-diagnostics.json
+        test -f logs/runtime/mac-runtime-diagnostics.json
+
+    - name: Run uv bootstrap dry-run smoke
+      run: |
+        uv run scripts/bootstrap_uv_mlx.py --dry-run --venv .venv-bootstrap-smoke
+
     - name: Run MLX benchmark smoke
       run: |
         PYTHONPATH=.:source/isaaclab .venv/bin/python \
@@ -625,22 +642,28 @@ jobs:
         assert len(payload["benchmarks"]) >= 5
         print(payload["tasks"])
         dashboard = json.loads(Path("logs/benchmarks/mlx/smoke-dashboard.json").read_text(encoding="utf-8"))
-        assert dashboard["summary"]["rollout_task_count"] == 12
+        assert dashboard["summary"]["rollout_task_count"] == 13
         assert dashboard["summary"]["training_task_count"] == 0
         trend = json.loads(Path("logs/benchmarks/mlx/smoke-trend.json").read_text(encoding="utf-8"))
-        assert trend["summary"]["task_count"] == 12
+        assert trend["summary"]["task_count"] == 13
         planner_payload = json.loads(Path("logs/planner/mac-planner-smoke.json").read_text(encoding="utf-8"))
         assert planner_payload["planner"]["implementation"] == "joint-space-linear-interpolation"
         assert planner_payload["plan"]["waypoint_count"] == 6
         ros_payload = json.loads(Path("logs/hardware/ros2-bridge-smoke-summary.json").read_text(encoding="utf-8"))
-        assert ros_payload["message_count"] == 4
+        assert ros_payload["message_count"] == 6
         assert ros_payload["pub_command"][:3] == ["ros2", "topic", "pub"]
         assert ros_payload["planner_roundtrip_ok"] is True
         assert ros_payload["trajectory_roundtrip_ok"] is True
+        assert ros_payload["planner_batch_size"] == 2
+        assert ros_payload["trajectory_batch_size"] == 2
         stereo_payload = json.loads(Path("logs/hardware/synthetic_depth/summary.json").read_text(encoding="utf-8"))
         assert stereo_payload["validated_capture"] is True
         assert stereo_payload["disparity_shape"] == [4, 4]
+        assert stereo_payload["depth_mm_mean"] is not None
         assert Path(stereo_payload["left_rgb_path"]).is_file()
+        runtime_payload = json.loads(Path("logs/runtime/mac-runtime-diagnostics.json").read_text(encoding="utf-8"))
+        assert runtime_payload["runtime"]["supported_tasks"]["public_task_count"] >= 15
+        assert runtime_payload["sim"]["supported_tasks"]["current_mac_native_count"] >= 13
         sensor_payload = json.loads(Path("logs/benchmarks/mlx/sensor-smoke.json").read_text(encoding="utf-8"))
         assert sensor_payload["task_group"] == "sensor-mac-native"
         assert sensor_payload["cpu_fallback_detected"] is False
@@ -670,7 +693,7 @@ jobs:
         full_payload = json.loads(Path("logs/benchmarks/mlx/full-smoke.json").read_text(encoding="utf-8"))
         assert full_payload["task_group"] == "full"
         full_dashboard = json.loads(Path("logs/benchmarks/mlx/full-smoke-dashboard.json").read_text(encoding="utf-8"))
-        assert full_dashboard["summary"]["rollout_task_count"] == 16
+        assert full_dashboard["summary"]["rollout_task_count"] == 17
         assert full_dashboard["summary"]["training_task_count"] == 1
         full_trend = json.loads(Path("logs/benchmarks/mlx/full-smoke-trend.json").read_text(encoding="utf-8"))
         assert any(entry["kind"] == "training" for entry in full_trend["tasks"])
 
@@ -73,6 +73,7 @@ without pausing for replanning after every small success.
 - `DONE` Third trainable manipulation slice landed for `Isaac-Stack-Cube-Franka-v0` with compiled stack hotpath helpers, shared PPO/checkpoint contracts, public MLX wrapper training, benchmark coverage, semantic baseline refresh, and CI smoke coverage
 - `DONE` Fourth trainable stack variant landed for `Isaac-Stack-Cube-RedGreenBlue-Franka-IK-Rel-v0` with a reduced three-cube sequential stack backend, staged terminal benchmark metrics, shared PPO/checkpoint contracts, public MLX wrapper training, direct thin CLI wrappers, semantic baseline refresh, and CI smoke coverage
 - `DONE` Fourth trainable manipulation slice landed for `Isaac-Franka-Cabinet-Direct-v0` with a reduced drawer workflow, compiled cabinet hotpath helper, shared PPO/checkpoint contracts, public MLX wrapper training, benchmark coverage, semantic baseline refresh, and CI smoke coverage
+- `DONE` Sixth trainable Franka manipulation slice landed for `Isaac-Open-Drawer-Franka-v0` with a reduced analytic drawer substrate, public MLX wrapper/CLI exposure, benchmark coverage, refreshed semantic baseline, and focused backend tests
 - `DONE` First raycast-driven mac-native task landed for `Isaac-Velocity-Rough-Anymal-C-Direct-v0` with procedural wave terrain, analytic terrain raycasts, benchmark coverage, and deterministic replay tests
 - `DONE` Rough locomotion slices for ANYmal-C and H1 now expose full MLX PPO train/replay surfaces with rough-task checkpoint metadata, wrapper coverage, and CI smoke coverage
 - `DONE` Synthetic cartpole RGB/depth camera slices landed as eval-only mac-native tasks with deterministic analytic `100x100` observations, public MLX wrapper exposure, sensor benchmark coverage, and CI smoke coverage
@@ -86,7 +87,9 @@ without pausing for replanning after every small success.
 - `DONE` Planner and ROS compatibility now carry richer planner world-state obstacles plus ROS-friendly world-state / timed joint-trajectory envelopes without requiring ROS Python bindings
 - `DONE` CI now proves a release-style MLX install path without `dev` extras or `PYTHONPATH`, then parses rough locomotion/manipulation configs and exercises the public wrapper
 - `DONE` Generic `mac-sensors` capability metadata is now honest about the public runtime surface: analytic raycasts plus synthetic camera task slices and backend-local external stereo capture, not generic Isaac Sim camera parity
+- `DONE` Supported public MLX/mac tasks now come from a shared typed manifest with a runtime diagnostics CLI so kernel inventory, wrapper task lists, benchmark groups, and runtime capability reporting cannot drift independently
 - `DONE` ROS/planner software smokes now exercise the real `mac-planners` backend and verify typed round-trip reconstruction of planner world-state and joint trajectories
+- `DONE` Planner/ROS batch helpers now restore batches by `batch_index` and report actual batch envelope counts instead of inferring from message order or `max(index) + 1`
 - `DONE` Stereo/depth smoke now validates raw capture artifacts before processing and writes a machine-checkable JSON summary artifact
 - `DONE` `uv run scripts/bootstrap_uv_mlx.py` now bootstraps the public MLX/mac editable environment in one command
 
@@ -740,7 +743,7 @@ without pausing for replanning after every small success.
 This queue exists so work can continue without waiting for a new plan. The documented v1 board above is now closed for the current public MLX/mac slice, so the next queue is follow-on parity work:
 
 - Hardware validation is now done for the backend-local stereo path against live ZED 2i capture through a camera-authorized Terminal host plus `zed-sdk-mlx`; retained host-local probe artifacts include `/tmp/isaaclab-zed-probe-live-final.json` and `/tmp/isaaclab-zed-probe-live-final.yuv`.
-- Port the next manipulation milestone beyond the current five trainable Franka slices, likely a richer cabinet/drawer variant or the next multi-object manipulation workflow.
+- Port the next manipulation milestone beyond the current six trainable Franka slices, likely a richer cabinet/drawer variant or the next multi-object manipulation workflow.
 - Replace the next remaining locomotion or contact/support `mx.compile` helper with a true custom Metal kernel only after the root-step tranche proves benchmark-positive and semantically stable.
 - Grow the planner/ROS prototypes carefully: richer process/message interoperability layers around the new world-state and joint-trajectory envelopes while still avoiding CUDA/NITROS assumptions.
 - Keep the generic runtime metadata honest: only advertise generic sensor/runtime capabilities that are actually exposed through backend-neutral APIs, and push task-specific or tooling-only support into explicit diagnostic fields instead of broad parity flags.
@@ -758,6 +761,7 @@ PYTHONPATH=.:source/isaaclab:source/isaaclab_rl .venv/bin/pytest \
   source/isaaclab/test/backends/test_kernel_inventory.py \
   source/isaaclab/test/backends/test_kernel_compat.py \
   source/isaaclab/test/backends/test_mac_hotpath.py \
+  source/isaaclab/test/backends/test_mac_runtime_diagnostics.py \
   source/isaaclab/test/backends/test_planner_compat.py \
   source/isaaclab/test/backends/test_ros2_bridge.py \
   source/isaaclab/test/backends/test_mac_benchmark_suite.py \
@@ -775,6 +779,7 @@ PYTHONPATH=.:source/isaaclab:source/isaaclab_rl .venv/bin/pytest \
   source/isaaclab/test/backends/test_mac_franka_reach.py \
   source/isaaclab/test/backends/test_mac_franka_lift.py \
   source/isaaclab/test/backends/test_mac_franka_stack.py \
+  source/isaaclab/test/backends/test_mac_franka_open_drawer.py \
   source/isaaclab/test/backends/test_mac_h1.py -q
 ```