zhaochenyang20
diff --git a/‎.github/workflows/cpu-test-api.yml‎
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/cpu-test-api.yml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎development.md‎
Lines changed: 6 additions & 2 deletions b/‎development.md‎
Lines changed: 6 additions & 2 deletions
diff --git a/‎pyproject.toml‎
Lines changed: 5 additions & 1 deletion b/‎pyproject.toml‎
Lines changed: 5 additions & 1 deletion
diff --git a/‎src/sglang_diffusion_routing/cli/main.py‎
Lines changed: 0 additions & 9 deletions b/‎src/sglang_diffusion_routing/cli/main.py‎
Lines changed: 0 additions & 9 deletions
diff --git a/‎src/sglang_diffusion_routing/launcher/utils.py‎
Lines changed: 12 additions & 3 deletions b/‎src/sglang_diffusion_routing/launcher/utils.py‎
Lines changed: 12 additions & 3 deletions
diff --git a/‎src/sglang_diffusion_routing/router/diffusion_router.py‎
Lines changed: 10 additions & 0 deletions b/‎src/sglang_diffusion_routing/router/diffusion_router.py‎
Lines changed: 10 additions & 0 deletions
diff --git a/‎tests/conftest.py‎
Lines changed: 11 additions & 0 deletions b/‎tests/conftest.py‎
Lines changed: 11 additions & 0 deletions
diff --git a/‎tests/e2e/__init__.py‎ b/‎tests/e2e/__init__.py‎
@@ -24,4 +24,4 @@ jobs:
           python -m pip install torch --index-url https://download.pytorch.org/whl/cpu
 
       - name: Run CPU unit tests
-        run: pytest tests/unit -v
+        run: pytest tests/unit tests/integration -v
@@ -10,13 +10,17 @@ Run CPU only tests:
 
 ```bash
 pip install pytest
-pytest tests/unit -v
+# CPU-only tests (unit + integration)
+pytest tests/unit tests/integration -v
+
+# Real E2E tests (GPU required, longer runtime)
+pytest tests/e2e/test_e2e_sglang.py -v -s
 ```
 
 ## Benchmark Scripts
 
 Benchmark scripts are available under `tests/benchmarks/diffusion_router/` and are intended for manual runs.
-They are not part of default unit test collection (`pytest tests/unit -v`).
+They are not part of default unit test collection (`pytest tests/unit tests/integration -v`).
 
 Single benchmark:
 
 
@@ -40,5 +40,9 @@ package-dir = { "" = "src" }
 where = ["src"]
 
 [tool.pytest.ini_options]
-testpaths = ["tests/unit"]
+testpaths = ["tests/unit", "tests/integration"]
+markers = [
+    "integration: CPU-only integration tests with real processes.",
+    "real_e2e: Real e2e tests requiring sglang and GPU.",
+]
 pythonpath = ["src"]
@@ -67,18 +67,9 @@ def _run_router_server(
         ) from exc
 
     worker_urls = list(args.worker_urls or [])
-    refresh_tasks = []
     for url in worker_urls:
         normalized_url = router.normalize_worker_url(url)
         router.register_worker(normalized_url)
-        refresh_tasks.append(router.refresh_worker_video_support(normalized_url))
-
-    if refresh_tasks:
-
-        async def _refresh_all_worker_video_support() -> None:
-            await asyncio.gather(*refresh_tasks)
-
-        asyncio.run(_refresh_all_worker_video_support())
 
     print(f"{log_prefix} starting router on {args.host}:{args.port}", flush=True)
     print(
 
@@ -10,11 +10,20 @@
 from collections.abc import Iterable
 
 import httpx
-import torch
 
 # TODO (mengyang, shuwen, chenyang): these utils should be clean up.
 
 
+def _cuda_device_count() -> int:
+    """Best-effort CUDA device count without hard torch import at module import."""
+    try:
+        import torch
+
+        return int(torch.cuda.device_count())
+    except Exception:
+        return 0
+
+
 def infer_connect_host(host: str) -> str:
     """Normalize bind-all addresses to loopback for client connections."""
     if host in ("0.0.0.0", "::", "localhost"):
@@ -72,7 +81,7 @@ def resolve_gpu_pool(
         if parsed:
             return parsed
 
-    gpu_count = int(torch.cuda.device_count())
+    gpu_count = _cuda_device_count()
     if gpu_count > 0:
         return [str(i) for i in range(gpu_count)]
     return None
@@ -116,7 +125,7 @@ def build_gpu_assignments(
             gpu_pool = parsed
 
     if gpu_pool is None:
-        gpu_count = int(torch.cuda.device_count())
+        gpu_count = _cuda_device_count()
         if gpu_count > 0:
             gpu_pool = [str(i) for i in range(gpu_count)]
 
 
@@ -86,6 +86,16 @@ def _setup_routes(self) -> None:
         )
 
     async def _start_background_health_check(self) -> None:
+        # Probe capability for pre-registered workers in the active server loop.
+        unknown_workers = [
+            url for url, support in self.worker_video_support.items() if support is None
+        ]
+        if unknown_workers:
+            await asyncio.gather(
+                *(self.refresh_worker_video_support(url) for url in unknown_workers),
+                return_exceptions=True,
+            )
+
         if self._health_task is None or self._health_task.done():
             self._health_task = asyncio.create_task(self._health_check_loop())
 
 
@@ -0,0 +1,11 @@
+"""Pytest configuration: force local src import precedence."""
+
+from __future__ import annotations
+
+import sys
+from pathlib import Path
+
+src_str = str(Path(__file__).resolve().parent.parent / "src")
+while src_str in sys.path:
+    sys.path.remove(src_str)
+sys.path.insert(0, src_str)