chore: add CLAUDE.md, auto-detect model ID and health check for integration tests

Lawhy · claude · Lawhy · commit 777a985edeaa · 2026-02-03T00:57:23.000-08:00
Remove --sglang-model-id CLI option; model ID is now auto-detected
from the server via /get_model_info. Integration tests skip
automatically if the SGLang server is unreachable (/health check).

Co-Authored-By: Claude Opus 4.5 &lt;noreply@anthropic.com&gt;
diff --git a/CLAUDE.md b/CLAUDE.md
@@ -0,0 +1,73 @@
+# CLAUDE.md
+
+This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.
+
+## Project Overview
+
+Strands-env is an RL environment abstraction for Strands agents — step, observe, reward. It provides a base `Environment` class that wraps a Strands `Agent` with token-level observation tracking (TITO), reward computation, and termination handling. Supports SGLang, Bedrock, and OpenAI model backends.
+
+## Commands
+
+### Setup
+```bash
+pip install -e ".[dev]"
+```
+
+### Linting
+```bash
+ruff check src/
+ruff format --check src/
+```
+
+### Testing
+```bash
+# Unit tests (no server needed)
+pytest tests/unit/ -v
+
+# Single test
+pytest tests/unit/test_environment.py::TestStep::test_successful_step -v
+
+# Unit tests with coverage
+pytest tests/unit/ -v --cov=src/strands_env --cov-report=html
+
+# Integration tests (requires running SGLang server; model ID auto-detected via /get_model_info)
+# Tests skip automatically if server is unreachable (/health check)
+pytest tests/integration/ -v --sglang-base-url=http://localhost:30000
+# Or via env var: SGLANG_BASE_URL=http://localhost:30000 pytest tests/integration/
+```
+
+### Integration Tests with Remote GPU Server
+
+```bash
+# 1. Launch SGLang on the remote server in docker
+ssh <remote-host> "sudo docker run -d --gpus '\"device=0\"' --name sglang-test -p 30000:30000 --ipc=host lmsysorg/sglang:<tag> python3 -m sglang.launch_server --model-path <model-id> --host 0.0.0.0 --port 30000 --tp <num_gpus> --mem-fraction-static 0.7"
+# 2. Tunnel the port locally
+ssh -L 30000:localhost:30000 -N -f <remote-host>
+# 3. Run tests locally
+pytest tests/integration/ -v
+```
+
+## Architecture
+
+The package lives in `src/strands_env/core/` with three modules:
+
+**types.py** — All data types. `Action` carries a user message + `TaskContext` (ground truth, conversation history, arbitrary metadata via `extra="allow"`). `Observation` holds messages, metrics, and optional `TokenObservation` for TITO training. `TerminationReason` maps agent exceptions to enum values via `from_error()` which walks exception cause chains. `StepResult` bundles observation + reward + termination reason.
+
+**models.py** — `ModelFactory = Callable[[], Model]` type and three factory functions (`sglang_model_factory`, `bedrock_model_factory`, `openai_model_factory`). Each returns a zero-arg lambda that creates a fresh Model instance per `step()` call for concurrent isolation. Bedrock and OpenAI remap `max_new_tokens` → `max_tokens` with a shallow dict copy to avoid mutating defaults.
+
+**environment.py** — Base `Environment` class. `step(action)` creates a fresh model via factory, attaches a `TokenManager`, builds an `Agent` with tools/hooks (always includes `ToolIterationLimiter`), runs `invoke_async`, then collects metrics and optional reward. Subclasses override `get_tools()` and `get_hooks()` to customize. Messages are sliced so only new messages from the current step appear in the observation.
+
+### Key Design Decisions
+
+- **Factory pattern**: `ModelFactory` returns lambdas (not Model instances) so each `step()` gets a fresh model with clean token tracking state.
+- **TITO token tracking**: `TokenManager` on SGLang models captures exact token IDs and logprobs during generation. `TokenObservation.from_token_manager()` extracts prompt/rollout split. Non-SGLang models get an empty `TokenManager` (returns `None` from `from_token_manager`).
+- **`list()` copies**: Tools, hooks, and messages are copied via `list()` before passing to Agent to prevent cross-step mutation.
+- **ToolIterationLimiter**: Always prepended to hooks list. Raises `MaxToolIterationsReachedError` which `TerminationReason.from_error()` maps to `MAX_TOOL_ITERATIONS_REACHED`.
+
+## Code Style
+
+- Ruff for linting and formatting (line-length 120, rules: E, F, I, N, W)
+- Conventional commits (feat, fix, docs, style, refactor, perf, test, build, ci, chore, revert)
+- Python 3.10+ required
+- asyncio_mode = "auto" for pytest-asyncio
+- Async-first: all Environment methods that interact with Agent are async
diff --git a/tests/conftest.py b/tests/conftest.py
@@ -10,7 +10,6 @@
 
 Configuration:
     pytest tests/integration/ --sglang-base-url=http://localhost:30000
-    pytest tests/integration/ --sglang-model-id=Qwen/Qwen3-4B-Instruct-2507
 
     Or via environment variables:
     SGLANG_BASE_URL=http://localhost:30000 pytest tests/integration/
@@ -27,12 +26,6 @@ def pytest_addoption(parser):
         default=os.environ.get("SGLANG_BASE_URL", "http://localhost:30000"),
         help="SGLang server URL (default: http://localhost:30000 or SGLANG_BASE_URL env var)",
     )
-    parser.addoption(
-        "--sglang-model-id",
-        action="store",
-        default=os.environ.get("SGLANG_MODEL_ID", "Qwen/Qwen3-4B-Instruct-2507"),
-        help="Model ID (default: Qwen/Qwen3-4B-Instruct-2507 or SGLANG_MODEL_ID env var)",
-    )
 
 
 def pytest_configure(config):
diff --git a/tests/integration/conftest.py b/tests/integration/conftest.py
@@ -1,12 +1,15 @@
 """Shared fixtures for integration tests.
 
 All tests in this directory require a running SGLang server.
+The model ID is auto-detected from the server via /get_model_info.
+Tests are skipped automatically if the server is not reachable.
 
 Configuration (priority: CLI > env var > default):
-    pytest --sglang-base-url=http://localhost:30000 --sglang-model-id=Qwen/Qwen3-4B-Instruct-2507
-    SGLANG_BASE_URL=http://... SGLANG_MODEL_ID=... pytest tests/integration/
+    pytest --sglang-base-url=http://localhost:30000
+    SGLANG_BASE_URL=http://... pytest tests/integration/
 """
 
+import httpx
 import pytest
 from strands_sglang import SGLangClient
 from transformers import AutoTokenizer
@@ -24,21 +27,30 @@ def sglang_base_url(request):
 
 
 @pytest.fixture(scope="session")
-def sglang_model_id(request):
-    """Get model ID from pytest config."""
-    return request.config.getoption("--sglang-model-id")
+def sglang_client(sglang_base_url):
+    """Shared SGLang client for connection pooling. Skips all tests if server is unreachable."""
+    try:
+        response = httpx.get(f"{sglang_base_url}/health", timeout=5)
+        healthy = response.status_code == 200
+    except httpx.HTTPError:
+        healthy = False
+    if not healthy:
+        pytest.skip(f"SGLang server not reachable at {sglang_base_url}")
+    return SGLangClient(sglang_base_url)
 
 
 @pytest.fixture(scope="session")
-def tokenizer(sglang_model_id):
-    """Load tokenizer for the configured model."""
-    return AutoTokenizer.from_pretrained(sglang_model_id)
+def sglang_model_id(sglang_base_url, sglang_client):
+    """Auto-detect model ID from the running SGLang server."""
+    response = httpx.get(f"{sglang_base_url}/get_model_info", timeout=5)
+    response.raise_for_status()
+    return response.json()["model_path"]
 
 
 @pytest.fixture(scope="session")
-def sglang_client(sglang_base_url):
-    """Shared SGLang client for connection pooling."""
-    return SGLangClient(sglang_base_url)
+def tokenizer(sglang_model_id):
+    """Load tokenizer for the detected model."""
+    return AutoTokenizer.from_pretrained(sglang_model_id)
 
 
 @pytest.fixture