Skip to content

Commit f615f36

Browse files
committed
Add open-drawer slice and runtime diagnostics
1 parent a490dd9 commit f615f36

33 files changed

Lines changed: 1361 additions & 101 deletions

.github/workflows/mlx-macos.yml

Lines changed: 27 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -278,6 +278,7 @@ jobs:
278278
source/isaaclab/test/backends/test_mac_camera_capture.py \
279279
source/isaaclab/test/backends/test_mac_sensor_raycast.py \
280280
source/isaaclab/test/backends/test_mac_stereo_depth.py \
281+
source/isaaclab/test/backends/test_mac_runtime_diagnostics.py \
281282
source/isaaclab/test/backends/test_mlx_task_cli.py \
282283
source/isaaclab/test/backends/test_planner_compat.py \
283284
source/isaaclab/test/backends/test_ros2_bridge.py \
@@ -300,6 +301,7 @@ jobs:
300301
source/isaaclab/test/backends/test_mac_franka_stack.py \
301302
source/isaaclab/test/backends/test_mac_franka_stack_rgb.py \
302303
source/isaaclab/test/backends/test_mac_franka_cabinet.py \
304+
source/isaaclab/test/backends/test_mac_franka_open_drawer.py \
303305
source/isaaclab/test/backends/test_mac_h1.py -q
304306
305307
- name: Run MLX cart-double-pendulum smoke script
@@ -481,6 +483,7 @@ jobs:
481483
assert "franka-stack" in list_mlx_tasks()
482484
assert "franka-stack-rgb" in list_mlx_tasks()
483485
assert "franka-cabinet" in list_mlx_tasks()
486+
assert "franka-open-drawer" in list_mlx_tasks()
484487
train_payload = train_mlx_task(
485488
"franka-stack-rgb",
486489
num_envs=8,
@@ -538,6 +541,8 @@ jobs:
538541
logs/hardware/synthetic_depth \
539542
--max-disparity 4 \
540543
--window-size 1 \
544+
--fx 700.0 \
545+
--baseline-mm 120.0 \
541546
--summary-out logs/hardware/synthetic_depth/summary.json
542547
test -f logs/hardware/synthetic_depth/left_rgb.png
543548
test -f logs/hardware/synthetic_depth/right_rgb.png
@@ -562,6 +567,18 @@ jobs:
562567
test -f logs/hardware/ros2-bridge-smoke.jsonl
563568
test -f logs/hardware/ros2-bridge-smoke-summary.json
564569
570+
- name: Run runtime diagnostics smoke
571+
run: |
572+
mkdir -p logs/runtime
573+
PYTHONPATH=.:source/isaaclab .venv/bin/python \
574+
scripts/tools/mac_runtime_diagnostics.py \
575+
logs/runtime/mac-runtime-diagnostics.json
576+
test -f logs/runtime/mac-runtime-diagnostics.json
577+
578+
- name: Run uv bootstrap dry-run smoke
579+
run: |
580+
uv run scripts/bootstrap_uv_mlx.py --dry-run --venv .venv-bootstrap-smoke
581+
565582
- name: Run MLX benchmark smoke
566583
run: |
567584
PYTHONPATH=.:source/isaaclab .venv/bin/python \
@@ -625,22 +642,28 @@ jobs:
625642
assert len(payload["benchmarks"]) >= 5
626643
print(payload["tasks"])
627644
dashboard = json.loads(Path("logs/benchmarks/mlx/smoke-dashboard.json").read_text(encoding="utf-8"))
628-
assert dashboard["summary"]["rollout_task_count"] == 12
645+
assert dashboard["summary"]["rollout_task_count"] == 13
629646
assert dashboard["summary"]["training_task_count"] == 0
630647
trend = json.loads(Path("logs/benchmarks/mlx/smoke-trend.json").read_text(encoding="utf-8"))
631-
assert trend["summary"]["task_count"] == 12
648+
assert trend["summary"]["task_count"] == 13
632649
planner_payload = json.loads(Path("logs/planner/mac-planner-smoke.json").read_text(encoding="utf-8"))
633650
assert planner_payload["planner"]["implementation"] == "joint-space-linear-interpolation"
634651
assert planner_payload["plan"]["waypoint_count"] == 6
635652
ros_payload = json.loads(Path("logs/hardware/ros2-bridge-smoke-summary.json").read_text(encoding="utf-8"))
636-
assert ros_payload["message_count"] == 4
653+
assert ros_payload["message_count"] == 6
637654
assert ros_payload["pub_command"][:3] == ["ros2", "topic", "pub"]
638655
assert ros_payload["planner_roundtrip_ok"] is True
639656
assert ros_payload["trajectory_roundtrip_ok"] is True
657+
assert ros_payload["planner_batch_size"] == 2
658+
assert ros_payload["trajectory_batch_size"] == 2
640659
stereo_payload = json.loads(Path("logs/hardware/synthetic_depth/summary.json").read_text(encoding="utf-8"))
641660
assert stereo_payload["validated_capture"] is True
642661
assert stereo_payload["disparity_shape"] == [4, 4]
662+
assert stereo_payload["depth_mm_mean"] is not None
643663
assert Path(stereo_payload["left_rgb_path"]).is_file()
664+
runtime_payload = json.loads(Path("logs/runtime/mac-runtime-diagnostics.json").read_text(encoding="utf-8"))
665+
assert runtime_payload["runtime"]["supported_tasks"]["public_task_count"] >= 15
666+
assert runtime_payload["sim"]["supported_tasks"]["current_mac_native_count"] >= 13
644667
sensor_payload = json.loads(Path("logs/benchmarks/mlx/sensor-smoke.json").read_text(encoding="utf-8"))
645668
assert sensor_payload["task_group"] == "sensor-mac-native"
646669
assert sensor_payload["cpu_fallback_detected"] is False
@@ -670,7 +693,7 @@ jobs:
670693
full_payload = json.loads(Path("logs/benchmarks/mlx/full-smoke.json").read_text(encoding="utf-8"))
671694
assert full_payload["task_group"] == "full"
672695
full_dashboard = json.loads(Path("logs/benchmarks/mlx/full-smoke-dashboard.json").read_text(encoding="utf-8"))
673-
assert full_dashboard["summary"]["rollout_task_count"] == 16
696+
assert full_dashboard["summary"]["rollout_task_count"] == 17
674697
assert full_dashboard["summary"]["training_task_count"] == 1
675698
full_trend = json.loads(Path("logs/benchmarks/mlx/full-smoke-trend.json").read_text(encoding="utf-8"))
676699
assert any(entry["kind"] == "training" for entry in full_trend["tasks"])

PORT_TO_MLX_TODO.md

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,7 @@ without pausing for replanning after every small success.
7373
- `DONE` Third trainable manipulation slice landed for `Isaac-Stack-Cube-Franka-v0` with compiled stack hotpath helpers, shared PPO/checkpoint contracts, public MLX wrapper training, benchmark coverage, semantic baseline refresh, and CI smoke coverage
7474
- `DONE` Fourth trainable stack variant landed for `Isaac-Stack-Cube-RedGreenBlue-Franka-IK-Rel-v0` with a reduced three-cube sequential stack backend, staged terminal benchmark metrics, shared PPO/checkpoint contracts, public MLX wrapper training, direct thin CLI wrappers, semantic baseline refresh, and CI smoke coverage
7575
- `DONE` Fourth trainable manipulation slice landed for `Isaac-Franka-Cabinet-Direct-v0` with a reduced drawer workflow, compiled cabinet hotpath helper, shared PPO/checkpoint contracts, public MLX wrapper training, benchmark coverage, semantic baseline refresh, and CI smoke coverage
76+
- `DONE` Sixth trainable Franka manipulation slice landed for `Isaac-Open-Drawer-Franka-v0` with a reduced analytic drawer substrate, public MLX wrapper/CLI exposure, benchmark coverage, refreshed semantic baseline, and focused backend tests
7677
- `DONE` First raycast-driven mac-native task landed for `Isaac-Velocity-Rough-Anymal-C-Direct-v0` with procedural wave terrain, analytic terrain raycasts, benchmark coverage, and deterministic replay tests
7778
- `DONE` Rough locomotion slices for ANYmal-C and H1 now expose full MLX PPO train/replay surfaces with rough-task checkpoint metadata, wrapper coverage, and CI smoke coverage
7879
- `DONE` Synthetic cartpole RGB/depth camera slices landed as eval-only mac-native tasks with deterministic analytic `100x100` observations, public MLX wrapper exposure, sensor benchmark coverage, and CI smoke coverage
@@ -86,7 +87,9 @@ without pausing for replanning after every small success.
8687
- `DONE` Planner and ROS compatibility now carry richer planner world-state obstacles plus ROS-friendly world-state / timed joint-trajectory envelopes without requiring ROS Python bindings
8788
- `DONE` CI now proves a release-style MLX install path without `dev` extras or `PYTHONPATH`, then parses rough locomotion/manipulation configs and exercises the public wrapper
8889
- `DONE` Generic `mac-sensors` capability metadata is now honest about the public runtime surface: analytic raycasts plus synthetic camera task slices and backend-local external stereo capture, not generic Isaac Sim camera parity
90+
- `DONE` Supported public MLX/mac tasks now come from a shared typed manifest with a runtime diagnostics CLI so kernel inventory, wrapper task lists, benchmark groups, and runtime capability reporting cannot drift independently
8991
- `DONE` ROS/planner software smokes now exercise the real `mac-planners` backend and verify typed round-trip reconstruction of planner world-state and joint trajectories
92+
- `DONE` Planner/ROS batch helpers now restore batches by `batch_index` and report actual batch envelope counts instead of inferring from message order or `max(index) + 1`
9093
- `DONE` Stereo/depth smoke now validates raw capture artifacts before processing and writes a machine-checkable JSON summary artifact
9194
- `DONE` `uv run scripts/bootstrap_uv_mlx.py` now bootstraps the public MLX/mac editable environment in one command
9295

@@ -740,7 +743,7 @@ without pausing for replanning after every small success.
740743
This queue exists so work can continue without waiting for a new plan. The documented v1 board above is now closed for the current public MLX/mac slice, so the next queue is follow-on parity work:
741744

742745
- Hardware validation is now done for the backend-local stereo path against live ZED 2i capture through a camera-authorized Terminal host plus `zed-sdk-mlx`; retained host-local probe artifacts include `/tmp/isaaclab-zed-probe-live-final.json` and `/tmp/isaaclab-zed-probe-live-final.yuv`.
743-
- Port the next manipulation milestone beyond the current five trainable Franka slices, likely a richer cabinet/drawer variant or the next multi-object manipulation workflow.
746+
- Port the next manipulation milestone beyond the current six trainable Franka slices, likely a richer cabinet/drawer variant or the next multi-object manipulation workflow.
744747
- Replace the next remaining locomotion or contact/support `mx.compile` helper with a true custom Metal kernel only after the root-step tranche proves benchmark-positive and semantically stable.
745748
- Grow the planner/ROS prototypes carefully: richer process/message interoperability layers around the new world-state and joint-trajectory envelopes while still avoiding CUDA/NITROS assumptions.
746749
- Keep the generic runtime metadata honest: only advertise generic sensor/runtime capabilities that are actually exposed through backend-neutral APIs, and push task-specific or tooling-only support into explicit diagnostic fields instead of broad parity flags.
@@ -758,6 +761,7 @@ PYTHONPATH=.:source/isaaclab:source/isaaclab_rl .venv/bin/pytest \
758761
source/isaaclab/test/backends/test_kernel_inventory.py \
759762
source/isaaclab/test/backends/test_kernel_compat.py \
760763
source/isaaclab/test/backends/test_mac_hotpath.py \
764+
source/isaaclab/test/backends/test_mac_runtime_diagnostics.py \
761765
source/isaaclab/test/backends/test_planner_compat.py \
762766
source/isaaclab/test/backends/test_ros2_bridge.py \
763767
source/isaaclab/test/backends/test_mac_benchmark_suite.py \
@@ -775,6 +779,7 @@ PYTHONPATH=.:source/isaaclab:source/isaaclab_rl .venv/bin/pytest \
775779
source/isaaclab/test/backends/test_mac_franka_reach.py \
776780
source/isaaclab/test/backends/test_mac_franka_lift.py \
777781
source/isaaclab/test/backends/test_mac_franka_stack.py \
782+
source/isaaclab/test/backends/test_mac_franka_open_drawer.py \
778783
source/isaaclab/test/backends/test_mac_h1.py -q
779784
```
780785

0 commit comments

Comments
 (0)