fix(examples): update strands_sglang example to strands-sglang v0.2.x API (#1593)

Lawhy · yitianlian · web-flow · commit eafa233b991c · 2026-02-20T10:48:29.000+08:00
Co-authored-by: Chengxing Xie &lt;91449279+yitianlian@users.noreply.github.com&gt;
diff --git a/examples/README.md b/examples/README.md
@@ -14,7 +14,7 @@ These examples provide concrete examples to leverage slime in your own RL workfl
 - **[reproducibility](./reproducibility)**: Guides on achieving bitwise experiment reproduction using deterministic modes.
 - **[retool](./retool)**: Demonstrates the retool functionality for tool-enabled language model generation.
 - **[search-r1](./search-r1)**: A minimal reproduction of Search-R1, featuring multi-turn conversation and tool-calling.
-- **[strands-agents](./strands-agents)**: Integration example with the Strands-Agents scaffolding framework.
+- **[strands_sglang](./strands_sglang)**: Integration example with the Strands-Agents scaffolding framework.
 - **[tau-bench](./tau-bench)**: Training in an agentic multi-turn tool use environment (Tau-bench).
 - **[train_infer_mismatch_helper](./train_infer_mismatch_helper)**: Algorithmic methods for rollout correction (e.g., TIS, MIS).
 - **[true_on_policy](./true_on_policy)**: Ensures strictly equal log probabilities between inference (SGLang) and training engines.
diff --git a/examples/strands_sglang/README.md b/examples/strands_sglang/README.md
@@ -14,7 +14,7 @@ This example connects `slime` with [`strands-sglang`](https://github.com/horizon
 
 - Captures exact token IDs during generation (no retokenization drift)
 - Automatically tracks `loss_mask` via `token_manager`
-- Provides `ToolIterationLimiter` for clean trajectory truncation
+- Provides `ToolLimiter` for clean trajectory truncation
 
 ## Install Dependencies
 
diff --git a/examples/strands_sglang/generate_with_strands.py b/examples/strands_sglang/generate_with_strands.py
@@ -2,8 +2,8 @@
 
 from camel.interpreters import SubprocessInterpreter
 from strands import Agent, tool
-from strands_sglang import SGLangClient, SGLangModel
-from strands_sglang.tool_limiter import ToolIterationLimiter
+from strands_sglang import SGLangClient, SGLangModel, ToolLimiter
+from strands_sglang.tool_parsers import HermesToolParser
 
 from slime.rollout.rm_hub.math_dapo_utils import compute_score as math_dapo_compute_score
 from slime.rollout.sglang_rollout import GenerateState
@@ -20,16 +20,17 @@
 - After completing your reasoning, present the final result enclosed in \\boxed{}.
 """.strip()
 
-MAX_TOOL_ITERATIONS = 5
+MAX_TOOL_ITERS = 5
+MAX_TOOL_CALLS = None  # No limit
 
 _client_cache: dict[str, SGLangClient] = {}
 
 
 def get_client(args) -> SGLangClient:
-    """Get shared client for connection pooling (like SLIME)."""
+    """Get shared client for connection pooling."""
     base_url = f"http://{args.sglang_router_ip}:{args.sglang_router_port}"
     if base_url not in _client_cache:
-        _client_cache[base_url] = SGLangClient.from_slime_args(args)
+        _client_cache[base_url] = SGLangClient.from_slime_args(args, timeout=300.0)
     return _client_cache[base_url]
 
 
@@ -55,15 +56,15 @@ async def generate(args, sample: Sample, sampling_params) -> Sample:
     model = SGLangModel(
         tokenizer=state.tokenizer,
         client=get_client(args),
-        model_id=args.hf_checkpoint.split("/")[-1],
-        params={k: sampling_params[k] for k in ["max_new_tokens", "temperature", "top_p"]},
+        tool_parser=HermesToolParser(),  # tool parsing for wrapped JSON tool calls
+        sampling_params=sampling_params,
     )
 
-    limiter = ToolIterationLimiter(max_iterations=MAX_TOOL_ITERATIONS)
+    tool_limiter = ToolLimiter(max_tool_iters=MAX_TOOL_ITERS, max_tool_calls=MAX_TOOL_CALLS)
     agent = Agent(
         model=model,
         tools=[execute_python_code],
-        hooks=[limiter],
+        hooks=[tool_limiter],
         callback_handler=None,
         system_prompt=SYSTEM_PROMPT,
     )
@@ -74,12 +75,12 @@ async def generate(args, sample: Sample, sampling_params) -> Sample:
         await agent.invoke_async(prompt)
         sample.status = Sample.Status.COMPLETED
     except Exception as e:
-        # Always use TRUNCATED instead of ABORTED because Slime doesn't properly
+        # Always use TRUNCATED instead of ABORTED because slime doesn't properly
         # handle ABORTED samples in reward processing. See: https://github.com/THUDM/slime/issues/200
         sample.status = Sample.Status.TRUNCATED
         logger.warning(f"TRUNCATED: {type(e).__name__}: {e}")
 
-    # TITO: extract trajectory from token_manager
+    # Extract token trajectory from token_manager
     tm = model.token_manager
     prompt_len = len(tm.segments[0])  # system + user are first segment
     sample.tokens = tm.token_ids
@@ -88,9 +89,8 @@ async def generate(args, sample: Sample, sampling_params) -> Sample:
     sample.response_length = len(sample.tokens) - prompt_len
     sample.response = model.tokenizer.decode(sample.tokens[prompt_len:], skip_special_tokens=False)
     # Tool iteration and tool call count are different because multiple parallel tool calls count as 1 iteration
-    sample.tool_iterations = limiter.iteration_count
-    trajectory = model.format_request_messages(agent.messages, None)
-    sample.tool_call_count = [message["role"] == "tool" for message in trajectory].count(True)
+    sample.tool_iters = tool_limiter.tool_iter_count
+    sample.tool_calls = tool_limiter.tool_call_count
 
     model.reset()
     agent.cleanup()
@@ -100,18 +100,19 @@ async def generate(args, sample: Sample, sampling_params) -> Sample:
 async def reward_func(args, sample: Sample, **kwargs):
     """Reward function using math_dapo scoring."""
     ground_truth = sample.label or ""
-    tool_iterations = getattr(sample, "tool_iterations", 0)
+    tool_iters = getattr(sample, "tool_iters", 0)
+    tool_calls = getattr(sample, "tool_calls", 0)
 
     result = math_dapo_compute_score(sample.response, ground_truth, strict_box_verify=False)
     if result["pred"] == "[INVALID]":
         result = math_dapo_compute_score(sample.response, ground_truth, strict_box_verify=True)
 
     # Encourage tool use on failures
     if result["score"] < 0:
-        result["score"] = min(-0.6, result["score"] + (tool_iterations - 2) / 2 * 0.1)
+        result["score"] = min(-0.6, result["score"] + (tool_iters - 2) / 2 * 0.1)
 
     result["pred"] = result["pred"] or ""
     logger.info(
-        f"reward={result['score']:.2f} | status={sample.status.name} | tool_iters={tool_iterations} | tool_calls={getattr(sample, 'tool_call_count', 0)} | tokens={len(sample.tokens)} | resp_len={sample.response_length} | "
+        f"reward={result['score']:.2f} | status={sample.status.name} | tool_iters={tool_iters} | tool_calls={tool_calls} | tokens={len(sample.tokens)} | resp_len={sample.response_length} | "
     )
     return result["score"]
diff --git a/examples/strands_sglang/requirements.txt b/examples/strands_sglang/requirements.txt
@@ -1,4 +1,2 @@
 camel-ai
-strands-agents
-strands-agents-tools
 strands-sglang