test: add client test and simplify existing tests

Lawhy · Lawhy · commit caa6f25029a1 · 2026-01-05T14:55:30.000-08:00
diff --git a/tests/conftest.py b/tests/conftest.py
@@ -24,11 +24,11 @@
     pytest tests/                         # All tests
 
 Configuration:
-    pytest tests/integration/ --sglang-base-url=http://localhost:8000
+    pytest tests/integration/ --sglang-base-url=http://localhost:30000
     pytest tests/integration/ --sglang-model-id=Qwen/Qwen3-4B-Instruct-2507
 
     Or via environment variables:
-    SGLANG_BASE_URL=http://localhost:8000 pytest tests/integration/
+    SGLANG_BASE_URL=http://localhost:30000 pytest tests/integration/
 """
 
 import os
@@ -39,8 +39,8 @@ def pytest_addoption(parser):
     parser.addoption(
         "--sglang-base-url",
         action="store",
-        default=os.environ.get("SGLANG_BASE_URL", "http://localhost:8000"),
-        help="SGLang server URL (default: http://localhost:8000 or SGLANG_BASE_URL env var)",
+        default=os.environ.get("SGLANG_BASE_URL", "http://localhost:30000"),
+        help="SGLang server URL (default: http://localhost:30000 or SGLANG_BASE_URL env var)",
     )
     parser.addoption(
         "--sglang-model-id",
diff --git a/tests/integration/conftest.py b/tests/integration/conftest.py
@@ -18,7 +18,7 @@
 and require a running SGLang server.
 
 Configuration (priority: CLI > env var > default):
-    pytest --sglang-base-url=http://localhost:8000 --sglang-model-id=Qwen/Qwen3-4B-Instruct-2507
+    pytest --sglang-base-url=http://localhost:30000 --sglang-model-id=Qwen/Qwen3-4B-Instruct-2507
     SGLANG_BASE_URL=http://... SGLANG_MODEL_ID=... pytest tests/integration/
 """
 
diff --git a/tests/integration/test_sglang_integration.py b/tests/integration/test_sglang_integration.py
@@ -190,21 +190,22 @@ async def test_incremental_tokenization(self, model):
 
 
 class TestSSEParsing:
-    """Tests for SSE event parsing."""
+    """Tests for SSE event parsing via SGLangClient."""
 
-    async def test_iter_sse_events(self, model):
-        """_iter_sse_events correctly parses SSE stream."""
+    async def test_client_generate_parses_sse(self, model):
+        """SGLangClient.generate() correctly parses SSE stream."""
         messages = [{"role": "user", "content": [{"text": "Say 'test'"}]}]
 
-        # Manually call the internal stream to test SSE parsing
+        # Tokenize and call client.generate() directly
         input_ids = model.tokenize_prompt_messages(messages, system_prompt=None)
-        payload = model.build_sglang_payload(input_ids=input_ids)
+        client = model._get_client()
 
-        async with model.client.stream("POST", "/generate", json=payload) as response:
-            events = []
-            async for event in model._iter_sse_events(response):
-                events.append(event)
+        events = []
+        async for event in client.generate(input_ids=input_ids):
+            events.append(event)
 
-        # Should have parsed JSON events
+        # Should have parsed JSON events with expected fields
         assert len(events) > 0
         assert all(isinstance(e, dict) for e in events)
+        # Final event should have output_ids
+        assert "output_ids" in events[-1] or "text" in events[-1]
diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py
@@ -0,0 +1,227 @@
+# Copyright 2025 Horizon RL Contributors
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+
+#     http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Unit tests for SGLangClient (mocked, no server required)."""
+
+from unittest.mock import MagicMock, patch
+
+import httpx
+import pytest
+
+from strands_sglang.client import RETRYABLE_STATUS_CODES, SGLangClient
+
+
+class TestSGLangClientInit:
+    """Tests for SGLangClient initialization."""
+
+    def test_default_config(self):
+        """Default configuration values."""
+        client = SGLangClient("http://localhost:30000")
+
+        assert client.base_url == "http://localhost:30000"
+        assert client.max_retries == 60
+        assert client.retry_delay == 1.0
+
+    def test_base_url_strips_trailing_slash(self):
+        """Base URL trailing slash is stripped."""
+        client = SGLangClient("http://localhost:30000/")
+        assert client.base_url == "http://localhost:30000"
+
+    def test_custom_config(self):
+        """Custom configuration is applied."""
+        client = SGLangClient(
+            "http://custom:9000",
+            max_connections=500,
+            timeout=120.0,
+            max_retries=10,
+            retry_delay=2.0,
+        )
+
+        assert client.base_url == "http://custom:9000"
+        assert client.max_retries == 10
+        assert client.retry_delay == 2.0
+
+
+class TestRetryableErrors:
+    """Tests for _is_retryable_error method."""
+
+    @pytest.fixture
+    def client(self):
+        return SGLangClient("http://localhost:30000")
+
+    def test_connect_error_is_retryable(self, client):
+        """ConnectError is retryable."""
+        error = httpx.ConnectError("Connection refused")
+        assert client._is_retryable_error(error) is True
+
+    def test_read_timeout_is_retryable(self, client):
+        """ReadTimeout is retryable."""
+        error = httpx.ReadTimeout("Read timed out")
+        assert client._is_retryable_error(error) is True
+
+    def test_pool_timeout_is_retryable(self, client):
+        """PoolTimeout is retryable."""
+        error = httpx.PoolTimeout("Pool exhausted")
+        assert client._is_retryable_error(error) is True
+
+    @pytest.mark.parametrize("status_code", RETRYABLE_STATUS_CODES)
+    def test_5xx_errors_are_retryable(self, client, status_code):
+        """HTTP 5xx errors are retryable."""
+        response = MagicMock()
+        response.status_code = status_code
+        error = httpx.HTTPStatusError("Server error", request=MagicMock(), response=response)
+        assert client._is_retryable_error(error) is True
+
+    def test_400_is_not_retryable(self, client):
+        """HTTP 400 is not retryable."""
+        response = MagicMock()
+        response.status_code = 400
+        error = httpx.HTTPStatusError("Bad request", request=MagicMock(), response=response)
+        assert client._is_retryable_error(error) is False
+
+    def test_429_is_not_retryable(self, client):
+        """HTTP 429 is not retryable (rate limit is handled by caller)."""
+        response = MagicMock()
+        response.status_code = 429
+        error = httpx.HTTPStatusError("Rate limited", request=MagicMock(), response=response)
+        assert client._is_retryable_error(error) is False
+
+    def test_generic_exception_is_not_retryable(self, client):
+        """Generic exceptions are not retryable."""
+        error = ValueError("Something wrong")
+        assert client._is_retryable_error(error) is False
+
+
+class TestSSEParsing:
+    """Tests for _iter_sse_events method."""
+
+    @pytest.fixture
+    def client(self):
+        return SGLangClient("http://localhost:30000")
+
+    @pytest.mark.asyncio
+    async def test_parse_valid_sse_events(self, client):
+        """Parse valid SSE events."""
+        response = MagicMock()
+        response.aiter_lines.return_value = AsyncIteratorMock(
+            [
+                'data: {"text": "Hello"}',
+                'data: {"text": "Hello world"}',
+                "data: [DONE]",
+            ]
+        )
+
+        events = [e async for e in client._iter_sse_events(response)]
+
+        assert len(events) == 2
+        assert events[0] == {"text": "Hello"}
+        assert events[1] == {"text": "Hello world"}
+
+    @pytest.mark.asyncio
+    async def test_skip_empty_lines(self, client):
+        """Empty lines are skipped."""
+        response = MagicMock()
+        response.aiter_lines.return_value = AsyncIteratorMock(
+            [
+                "",
+                'data: {"text": "test"}',
+                "",
+                "data: [DONE]",
+            ]
+        )
+
+        events = [e async for e in client._iter_sse_events(response)]
+
+        assert len(events) == 1
+        assert events[0] == {"text": "test"}
+
+    @pytest.mark.asyncio
+    async def test_skip_non_data_lines(self, client):
+        """Non-data lines are skipped."""
+        response = MagicMock()
+        response.aiter_lines.return_value = AsyncIteratorMock(
+            [
+                "event: message",
+                'data: {"text": "test"}',
+                ": comment",
+                "data: [DONE]",
+            ]
+        )
+
+        events = [e async for e in client._iter_sse_events(response)]
+
+        assert len(events) == 1
+
+    @pytest.mark.asyncio
+    async def test_skip_malformed_json(self, client):
+        """Malformed JSON is skipped."""
+        response = MagicMock()
+        response.aiter_lines.return_value = AsyncIteratorMock(
+            [
+                "data: not json",
+                'data: {"text": "valid"}',
+                "data: [DONE]",
+            ]
+        )
+
+        events = [e async for e in client._iter_sse_events(response)]
+
+        assert len(events) == 1
+        assert events[0] == {"text": "valid"}
+
+
+class TestHealth:
+    """Tests for health method."""
+
+    @pytest.mark.asyncio
+    async def test_health_returns_true_on_200(self):
+        """Health returns True on 200 response."""
+        with patch.object(httpx.AsyncClient, "get") as mock_get:
+            mock_response = MagicMock()
+            mock_response.status_code = 200
+            mock_get.return_value = mock_response
+
+            client = SGLangClient("http://localhost:30000")
+            result = await client.health()
+
+            assert result is True
+
+    @pytest.mark.asyncio
+    async def test_health_returns_false_on_error(self):
+        """Health returns False on HTTP error."""
+        with patch.object(httpx.AsyncClient, "get") as mock_get:
+            mock_get.side_effect = httpx.ConnectError("Connection refused")
+
+            client = SGLangClient("http://localhost:30000")
+            result = await client.health()
+
+            assert result is False
+
+
+class AsyncIteratorMock:
+    """Mock async iterator for testing."""
+
+    def __init__(self, items):
+        self.items = items
+        self.index = 0
+
+    def __aiter__(self):
+        return self
+
+    async def __anext__(self):
+        if self.index >= len(self.items):
+            raise StopAsyncIteration
+        item = self.items[self.index]
+        self.index += 1
+        return item
diff --git a/tests/unit/test_sglang.py b/tests/unit/test_sglang.py
@@ -119,46 +119,6 @@ def test_format_prompt_with_tools(self, model, mock_tokenizer):
         assert call_kwargs["tokenize"] is False
 
 
-class TestBuildSglangPayload:
-    """Tests for build_sglang_payload method."""
-
-    def test_minimal_payload(self, model):
-        """Build payload with minimal parameters."""
-        payload = model.build_sglang_payload(input_ids=[1, 2, 3])
-
-        assert payload["input_ids"] == [1, 2, 3]
-        assert payload["stream"] is True
-        assert payload["return_logprob"] is True
-        assert payload["logprob_start_len"] == 0
-
-    def test_payload_with_sampling_params(self, model):
-        """Build payload with sampling parameters."""
-        sampling = {"temperature": 0.7, "max_tokens": 100}
-        payload = model.build_sglang_payload(input_ids=[1, 2, 3], sampling_params=sampling)
-
-        assert payload["sampling_params"] == sampling
-
-    def test_payload_without_logprobs(self, model):
-        """Build payload without logprobs."""
-        payload = model.build_sglang_payload(input_ids=[1, 2, 3], return_logprob=False)
-
-        assert "return_logprob" not in payload
-        assert "logprob_start_len" not in payload
-
-    def test_payload_without_streaming(self, model):
-        """Build payload without streaming."""
-        payload = model.build_sglang_payload(input_ids=[1, 2, 3], stream=False)
-
-        assert payload["stream"] is False
-
-    def test_payload_with_model_id(self, mock_tokenizer):
-        """Build payload with model ID from config."""
-        model = SGLangModel(tokenizer=mock_tokenizer, model_id="qwen/qwen3-4b")
-        payload = model.build_sglang_payload(input_ids=[1, 2, 3])
-
-        assert payload["model"] == "qwen/qwen3-4b"
-
-
 class TestTokenizePromptMessages:
     """Tests for tokenize_prompt_messages method."""
 
@@ -336,7 +296,7 @@ def test_default_config(self, mock_tokenizer):
         model = SGLangModel(tokenizer=mock_tokenizer)
         config = model.get_config()
 
-        assert config["base_url"] == "http://localhost:8000"
+        assert config["base_url"] == "http://localhost:30000"
 
     def test_custom_base_url(self, mock_tokenizer):
         """Custom base URL is stored correctly."""
@@ -354,15 +314,11 @@ def test_update_config(self, model):
         assert config["model_id"] == "new-model"
 
     def test_config_with_timeout_float(self, mock_tokenizer):
-        """Configuration with timeout float (connect is always 5.0 like OpenAI)."""
+        """Configuration with custom timeout."""
         model = SGLangModel(tokenizer=mock_tokenizer, timeout=300.0)
-        timeout = model._client_config["timeout"]
-        assert timeout.connect == 5.0  # Fixed like OpenAI
-        assert timeout.read == 300.0
+        assert model._timeout == 300.0
 
     def test_config_with_default_timeout(self, mock_tokenizer):
-        """Configuration with default timeout (600s like OpenAI)."""
+        """Configuration with default timeout (None = infinite, like SLIME)."""
         model = SGLangModel(tokenizer=mock_tokenizer)
-        timeout = model._client_config["timeout"]
-        assert timeout.connect == 5.0
-        assert timeout.read == 600.0  # Default 10min like OpenAI
+        assert model._timeout is None  # Infinite timeout by default