fix(util): fall back sglang sync utils

Lawhy · Lawhy · commit 39e915e9c792 · 2026-02-09T23:53:45.000-08:00
diff --git a/src/strands_env/cli/utils.py b/src/strands_env/cli/utils.py
@@ -244,22 +244,19 @@ def build_model_factory(config: ModelConfig, max_concurrency: int) -> ModelFacto
 
 def _build_sglang_model_factory(config: ModelConfig, max_concurrency: int, sampling: dict) -> ModelFactory:
     """Build SGLang model factory."""
-    import asyncio
-
-    from strands_env.utils.sglang import get_cached_client, get_cached_tokenizer
-
-    client = get_cached_client(config.base_url, max_concurrency)
+    from strands_env.utils.sglang import check_server_health, get_cached_client, get_cached_tokenizer, get_model_id
 
     # Check server health before proceeding
     try:
-        if not asyncio.run(client.health()):
-            raise ConnectionError(f"SGLang server at {config.base_url} is not healthy")
-    except Exception as e:
-        raise click.ClickException(f"SGLang server at {config.base_url} is not reachable: {e}")
+        check_server_health(config.base_url)
+    except ConnectionError as e:
+        raise click.ClickException(str(e))
+
+    client = get_cached_client(config.base_url, max_concurrency)
 
     # Resolve and backfill model_id/tokenizer_path for reproducibility
     if not config.model_id:
-        config.model_id = asyncio.run(client.get_model_info())["model_path"]
+        config.model_id = get_model_id(config.base_url)
     if not config.tokenizer_path:
         config.tokenizer_path = config.model_id
 
diff --git a/src/strands_env/utils/sglang.py b/src/strands_env/utils/sglang.py
@@ -19,6 +19,7 @@
 from functools import lru_cache
 from typing import TYPE_CHECKING, Any
 
+import httpx
 from strands_sglang import SGLangClient
 
 if TYPE_CHECKING:
@@ -68,3 +69,35 @@ def clear_clients() -> None:
 def clear_tokenizers() -> None:
     """Clear all cached tokenizer instances."""
     get_cached_tokenizer.cache_clear()
+
+
+def check_server_health(base_url: str, timeout: float = 5.0) -> None:
+    """Check if the SGLang server is reachable.
+
+    Args:
+        base_url: Base URL of the SGLang server.
+        timeout: Request timeout in seconds.
+
+    Raises:
+        ConnectionError: If the server is not reachable or unhealthy.
+    """
+    try:
+        response = httpx.get(f"{base_url}/health", timeout=timeout)
+        response.raise_for_status()
+    except httpx.HTTPError as e:
+        raise ConnectionError(f"SGLang server at {base_url} is not reachable: {e}") from e
+
+
+def get_model_id(base_url: str, timeout: float = 5.0) -> str:
+    """Get the model ID from the SGLang server.
+
+    Args:
+        base_url: Base URL of the SGLang server.
+        timeout: Request timeout in seconds.
+
+    Returns:
+        The model path/ID from the server.
+    """
+    response = httpx.get(f"{base_url}/get_model_info", timeout=timeout)
+    response.raise_for_status()
+    return response.json()["model_path"]
diff --git a/tests/integration/conftest.py b/tests/integration/conftest.py
@@ -9,13 +9,12 @@
     SGLANG_BASE_URL=http://... pytest tests/integration/
 """
 
-import asyncio
-
 import pytest
 from strands_sglang import SGLangClient
 from transformers import AutoTokenizer
 
 from strands_env.core.models import DEFAULT_SAMPLING_PARAMS, sglang_model_factory
+from strands_env.utils.sglang import check_server_health, get_model_id
 
 # Mark all tests in this directory as integration tests
 pytestmark = pytest.mark.integration
@@ -30,19 +29,17 @@ def sglang_base_url(request):
 @pytest.fixture(scope="session")
 def sglang_client(sglang_base_url):
     """Shared SGLang client for connection pooling. Skips all tests if server is unreachable."""
-    client = SGLangClient(sglang_base_url)
     try:
-        if not asyncio.run(client.health()):
-            pytest.skip(f"SGLang server at {sglang_base_url} is not healthy")
-    except Exception:
+        check_server_health(sglang_base_url)
+    except ConnectionError:
         pytest.skip(f"SGLang server not reachable at {sglang_base_url}")
-    return client
+    return SGLangClient(sglang_base_url)
 
 
 @pytest.fixture(scope="session")
-def sglang_model_id(sglang_client):
+def sglang_model_id(sglang_base_url):
     """Auto-detect model ID from the running SGLang server."""
-    return asyncio.run(sglang_client.get_model_info())["model_path"]
+    return get_model_id(sglang_base_url)
 
 
 @pytest.fixture(scope="session")