feat(slime): make Sample.metadata the agent payload verbatim

Teq2412 · claude · Teq2412 · commit dbbbded82989 · 2026-05-05T20:07:41.000Z
Collapses _sample_to_payload to return a shallow copy of Sample.metadata. Previously it synthesized a hybrid payload shape (sample.prompt -> payload["prompt"], sample.label -> payload["answer"], sample.metadata nested under payload["metadata"], plus a fall-through copy of Sample fields), which locked the slime backend into the math agent's shape and forced other agents (appworld, migration, officebench) into workarounds. After this change, the JSONL row's metadata dict is the agent payload exactly, so each agent declares whatever payload shape it wants by choosing what keys to put in metadata. The JSONL top-level prompt field still drives slime's tokenizer and length filter. Breaking change for existing math JSONLs: rows using {prompt, label} now produce an empty payload. Regenerate with the updated SETUP.md data-prep snippet which emits {prompt, metadata: {prompt, answer}}. Also drops --label-key from train.sh (nothing reads sample.label under the new rule). Verified end-to-end on Qwen2.5-3B-Instruct + GSM8K with NUM_ROLLOUT=10: raw_reward climbed 0.27 -> 0.63, train/loss and grad_norm move as expected, no rollout failures. Plan: docs/roadmap/committed/slime-data-contract.md (committed on docs/core-api-rename-roadmap in PR #59). Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
diff --git a/src/agentcore_rl_toolkit/backends/slime/SETUP.md b/src/agentcore_rl_toolkit/backends/slime/SETUP.md
@@ -112,11 +112,22 @@ ds = load_dataset('openai/gsm8k', 'main', split='train')
 with open('/path/to/gsm8k_tiny.jsonl', 'w') as f:
     for i, row in enumerate(ds):
         if i >= 64: break
+        question = row['question']
         answer = row['answer'].split('####')[-1].strip()
-        f.write(json.dumps({'prompt': row['question'], 'label': answer}) + '\n')
+        # Top-level 'prompt' is read by slime (tokenization, length filter).
+        # 'metadata' is the agent payload verbatim — shape it however the agent expects.
+        f.write(json.dumps({
+            'prompt': question,
+            'metadata': {'prompt': question, 'answer': answer},
+        }) + '\n')
 "
 ```
 
+The agent-visible payload is exactly the contents of ``metadata``, so
+different agents can use different payload shapes (e.g. ``{'task_id': ...}``
+for AppWorld, ``{'repo_uri': ..., 'metadata_uri': ..., ...}`` for
+migration) without any slime-side changes.
+
 ### 3.3 Configure deployment settings
 
 ```bash
diff --git a/src/agentcore_rl_toolkit/backends/slime/examples/math_agent/train.sh b/src/agentcore_rl_toolkit/backends/slime/examples/math_agent/train.sh
@@ -80,7 +80,6 @@ ray job submit --address="http://127.0.0.1:8265" \
   --tensor-model-parallel-size ${TP_SIZE} \
   --rollout-num-gpus-per-engine ${ROLLOUT_GPUS_PER_ENGINE} \
   --input-key prompt \
-  --label-key label \
   --rollout-batch-size 32 \
   --n-samples-per-prompt 8 \
   --rollout-max-response-len 1024 \
diff --git a/src/agentcore_rl_toolkit/backends/slime/integration/rollout.py b/src/agentcore_rl_toolkit/backends/slime/integration/rollout.py
@@ -209,52 +209,20 @@ def _ensure_initialized(args: Namespace):
 
 
 def _sample_to_payload(sample) -> dict:
-    """Convert a slime Sample to an ART invocation payload.
-
-    Extracts all non-None public fields from the Sample into the payload.
-    The agent's @rollout_entrypoint receives this dict as `payload`.
+    """The agent payload is the JSONL row's ``metadata`` dict, verbatim.
+
+    slime's Dataset reads the JSONL row's ``metadata`` field into
+    ``Sample.metadata``; we hand that dict to the agent unchanged. The JSONL's
+    top-level ``prompt`` field is for slime (tokenization, length filtering);
+    the agent's payload shape is entirely defined by whatever the data author
+    put in ``metadata``. A shallow copy isolates the agent's view from
+    downstream mutations to ``Sample.metadata`` (e.g. ``task_metadata``
+    injection in ``_process_one_episode``).
     """
-    payload = {}
-
-    if hasattr(sample, "prompt") and sample.prompt:
-        payload["prompt"] = sample.prompt
-    if hasattr(sample, "label") and sample.label is not None:
-        payload["answer"] = sample.label
-    if hasattr(sample, "metadata") and sample.metadata:
-        payload["metadata"] = sample.metadata
-
-    if hasattr(sample, "to_dict"):
-        for key, value in sample.to_dict().items():
-            if (
-                key not in payload
-                and value is not None
-                and key
-                not in (
-                    "tokens",
-                    "rollout_log_probs",
-                    "loss_mask",
-                    "teacher_log_probs",
-                    "rollout_routed_experts",
-                    "multimodal_inputs",
-                    "multimodal_train_inputs",
-                    "group_index",
-                    "index",
-                    "status",
-                    "session_id",
-                    "spec_info",
-                    "prefix_cache_info",
-                    "response_length",
-                    "response",
-                    "weight_versions",
-                    "remove_sample",
-                    "non_generation_time",
-                    "generate_function_path",
-                    "train_metadata",
-                )
-            ):
-                payload[key] = value
-
-    return payload
+    metadata = getattr(sample, "metadata", None)
+    if isinstance(metadata, dict):
+        return dict(metadata)
+    return {}
 
 
 # ---------------------------------------------------------------------------
diff --git a/tests/test_slime_rollout_payload.py b/tests/test_slime_rollout_payload.py
@@ -0,0 +1,66 @@
+"""Tests for the slime backend's agent-payload conversion.
+
+``_sample_to_payload`` is the contract between slime's ``Sample`` object
+and the agent's ``@rollout_entrypoint`` payload. Post the data-contract
+change, the rule is: the agent receives ``Sample.metadata`` verbatim
+(shallow-copied). These tests pin that rule so the contract can't
+regress silently.
+"""
+
+from types import SimpleNamespace
+
+from agentcore_rl_toolkit.backends.slime.integration.rollout import _sample_to_payload
+
+
+def test_metadata_is_returned_verbatim():
+    sample = SimpleNamespace(metadata={"task_id": "t1", "prompt": "hi"})
+    assert _sample_to_payload(sample) == {"task_id": "t1", "prompt": "hi"}
+
+
+def test_empty_metadata_returns_empty_dict():
+    sample = SimpleNamespace(metadata={})
+    assert _sample_to_payload(sample) == {}
+
+
+def test_missing_metadata_attr_returns_empty_dict():
+    sample = SimpleNamespace()
+    assert _sample_to_payload(sample) == {}
+
+
+def test_none_metadata_returns_empty_dict():
+    sample = SimpleNamespace(metadata=None)
+    assert _sample_to_payload(sample) == {}
+
+
+def test_non_dict_metadata_returns_empty_dict():
+    sample = SimpleNamespace(metadata="not a dict")
+    assert _sample_to_payload(sample) == {}
+
+
+def test_returned_dict_is_a_shallow_copy():
+    """Mutations to the returned payload must not leak into Sample.metadata.
+
+    _process_one_episode mutates ``Sample.metadata`` (e.g. injects
+    ``task_metadata``) downstream; the agent's view must remain stable.
+    """
+    metadata = {"prompt": "hi", "answer": "42"}
+    sample = SimpleNamespace(metadata=metadata)
+
+    payload = _sample_to_payload(sample)
+    payload["injected"] = True
+
+    assert "injected" not in metadata
+
+
+def test_prompt_and_label_on_sample_are_ignored():
+    """Top-level sample.prompt / sample.label are no longer part of the payload.
+
+    They exist on ``Sample`` for slime's own use (tokenization, logging).
+    Post the data-contract change, only ``metadata`` reaches the agent.
+    """
+    sample = SimpleNamespace(
+        prompt="slime-side prompt",
+        label="slime-side label",
+        metadata={"foo": "bar"},
+    )
+    assert _sample_to_payload(sample) == {"foo": "bar"}
diff --git a/uv.lock b/uv.lock