gke-labs
diff --git a/‎examples/autoresearch/README.md‎
Lines changed: 29 additions & 15 deletions b/‎examples/autoresearch/README.md‎
Lines changed: 29 additions & 15 deletions
diff --git a/‎examples/autoresearch/cleanup_research_session.sh‎
Lines changed: 4 additions & 3 deletions b/‎examples/autoresearch/cleanup_research_session.sh‎
Lines changed: 4 additions & 3 deletions
@@ -41,6 +41,10 @@ The command can be any runnable benchmark or training loop. It just needs to:
 - exit nonzero on failure
 - log the configured metric to `run_dir/metrics.jsonl`
 
+Useful command placeholders are `{run_dir}` for the attempt artifact directory,
+`{attempt_name}` for numeric attempt ids like `001`, and `{run_root}` for the
+assembled `LOG_ROOT/RUN_NAME` directory.
+
 ```python
 ml_logger.log_metrics({"accuracy": 0.73}, step=1)
 ```
@@ -67,27 +71,27 @@ recipe-specific settings.
 The easiest Kubernetes path is the small CLI. From `examples/autoresearch`:
 
 ```bash
-uv run --project .. python -m harness.cli run recipes/text_sql name=text-sql-v1
+uv run --project .. python -m harness.cli run recipes/text_sql run_name=text-sql-v1
 ```
 
 That command creates an ignored generated overlay under `.runs/text-sql-v1`,
 copies the flat recipe directory into a ConfigMap, mounts it into the stable
-researcher image, sets `RECIPE`, `LOG_ROOT`, and `SPEC_HASH`, and runs
-`kubectl apply -k`.
+researcher image, sets `RECIPE`, `LOG_ROOT`, `RUN_NAME`, and `SPEC_HASH`, and
+runs `kubectl apply -k`.
 
 Preview without applying:
 
 ```bash
 uv run --project .. python -m harness.cli run recipes/text_sql \
-  name=text-sql-v1 \
+  run_name=text-sql-v1 \
   apply=False
 ```
 
 Pass common recipe env directly:
 
 ```bash
 uv run --project .. python -m harness.cli run recipes/my_recipe \
-  name=my-recipe-v1 \
+  run_name=my-recipe-v1 \
   tinker_base_url=http://open-rl-gateway-service:8000 \
   base_model=google/gemma-4-e2b
 ```
@@ -107,6 +111,14 @@ and calls the shared OpenRL/Tinker services.
 
 ## Cluster Run
 
+These manifests require the official Agent Sandbox CRD. The researcher resource
+kind is `agents.x-k8s.io/v1alpha1/Sandbox`; there is no plain Kubernetes `Job`
+fallback in this demo. Verify the CRD before applying a recipe:
+
+```bash
+kubectl api-resources | grep -i sandbox
+```
+
 Create the API secret for agent-backed researcher pods:
 
 ```bash
@@ -156,7 +168,7 @@ agent starts only after those endpoints are reachable.
 ```text
 harness/cli.py         # creates/applies a generated overlay for a recipe dir
 harness/agent.py       # prepares git, records baseline, launches Gemini
-harness/attempt.py     # runs one measured attempt and writes attempt.json
+harness/attempt.py     # runs one measured attempt and writes metadata.json
 harness/serve.py       # read-only UI server over researcher/attempt manifests
 harness/utils.py       # shared JSON, git, hashing, process helpers
 k8s/base/              # reusable Sandbox/UI resources
@@ -171,13 +183,14 @@ workspace at `RECIPE`'s parent and committed as the run baseline. That lets the
 image stay stable while recipe files come from shared storage.
 
 `harness.attempt` runs recipe code and writes artifacts. The UI reads
-`LOG_ROOT/researchers/*/researcher.json`,
-`LOG_ROOT/researchers/*/attempts/*/attempt.json`, and fixed artifact filenames
-next to those manifests. Clearing `LOG_ROOT` resets the visible run.
-
-The launcher records the unmodified default config as `000-baseline`, then
-passes the recipe-adjacent `program.md` to Gemini as the prompt. That program
-tells the agent to edit only the declared target, commit the attempt, run
+`LOG_ROOT/RUN_NAME/researchers/*/metadata.json`,
+`LOG_ROOT/RUN_NAME/researchers/*/attempts/*/metadata.json`, and fixed artifact
+filenames next to those manifests. Clearing `LOG_ROOT/RUN_NAME` resets the
+visible run.
+
+The launcher records the unmodified default config as attempt `000`, then passes
+the recipe-adjacent `program.md` to Gemini as the prompt. That program tells the
+agent to edit only the declared target, commit the attempt, run
 `eval "${RUN_ATTEMPT_COMMAND}"`, record the metric, and reset if the metric did
 not improve.
 
@@ -189,7 +202,7 @@ Copy one existing recipe directory and update:
 - `autoresearch.toml`
 - the command target, if you keep one
 - the editable target
-- `kustomization.yaml` settings: `RECIPE`, `LOG_ROOT`, and
+- `kustomization.yaml` settings: `RECIPE`, `LOG_ROOT`, `RUN_NAME`, and
   `ATTEMPT_TIMEOUT_MINUTES`
 - optionally `RECIPE_DIR`, if Kubernetes should use a recipe uploaded to shared
   storage instead of the recipe already in the image
@@ -222,7 +235,8 @@ To also clear shared run data:
 
 ```bash
 DELETE_ARTIFACTS=1 \
-LOG_ROOT=/mnt/shared/open-rl/autoresearch/text_sql \
+LOG_ROOT=/mnt/shared/open-rl/autoresearch \
+RUN_NAME=text-sql \
 OVERLAY=examples/autoresearch/recipes/text_sql \
   examples/autoresearch/cleanup_research_session.sh
 ```
@@ -5,13 +5,14 @@ OVERLAY="${OVERLAY:-examples/autoresearch/recipes/text_sql}"
 NAMESPACE="${NAMESPACE:-default}"
 DELETE_ARTIFACTS="${DELETE_ARTIFACTS:-0}"
 LOG_ROOT="${LOG_ROOT:-}"
+RUN_NAME="${RUN_NAME:-}"
 
 kubectl -n "${NAMESPACE}" delete -k "${OVERLAY}" --ignore-not-found=true
 
 if [ "${DELETE_ARTIFACTS}" = "1" ]; then
-  if [ -z "${LOG_ROOT}" ]; then
-    echo "DELETE_ARTIFACTS=1 requires LOG_ROOT" >&2
+  if [ -z "${LOG_ROOT}" ] || [ -z "${RUN_NAME}" ]; then
+    echo "DELETE_ARTIFACTS=1 requires LOG_ROOT and RUN_NAME" >&2
     exit 2
   fi
-  rm -rf "${LOG_ROOT}"
+  rm -rf "${LOG_ROOT%/}/${RUN_NAME}"
 fi