|
16 | 16 | # Delay import of SGLExecutor to avoid import errors when sglang is not available |
17 | 17 | # This allows test collection to succeed even if sglang is not installed |
18 | 18 | SGLExecutor = None |
19 | | -SGL_EXECUTOR_IMPORT_ERROR = None |
20 | 19 | try: |
21 | 20 | from parallax.server.executor.sglang_executor import SGLExecutor |
22 | | -except ImportError as exc: |
| 21 | +except ImportError: |
23 | 22 | # sglang not available, tests will be skipped |
24 | | - SGL_EXECUTOR_IMPORT_ERROR = exc |
25 | | -except Exception as exc: |
26 | | - if is_cuda_available(): |
27 | | - raise |
28 | | - SGL_EXECUTOR_IMPORT_ERROR = exc |
| 23 | + pass |
29 | 24 |
|
30 | 25 | CUDA_MODEL_REPO = "Qwen/Qwen3-0.6B" |
31 | 26 | TOTAL_LAYERS = 28 |
@@ -69,10 +64,7 @@ def test_cuda_shard_prefill(layers_config: List[Tuple[int, int]], ref_model_and_ |
69 | 64 | but uses parallax's SGLExecutor instead of direct model loading. |
70 | 65 | """ |
71 | 66 | if SGLExecutor is None: |
72 | | - pytest.skip( |
73 | | - "sglang not available or failed to import " |
74 | | - f"({type(SGL_EXECUTOR_IMPORT_ERROR).__name__}: {SGL_EXECUTOR_IMPORT_ERROR})" |
75 | | - ) |
| 67 | + pytest.skip("sglang not available (install with 'pip install -e .[gpu]')") |
76 | 68 | if not is_cuda_available(): |
77 | 69 | pytest.skip("CUDA not available") |
78 | 70 |
|
@@ -179,10 +171,7 @@ def test_cuda_executor_pipeline(ref_model_and_tokenizer): |
179 | 171 | This test creates a 2-stage pipeline and verifies it can process requests. |
180 | 172 | """ |
181 | 173 | if SGLExecutor is None: |
182 | | - pytest.skip( |
183 | | - "sglang not available or failed to import " |
184 | | - f"({type(SGL_EXECUTOR_IMPORT_ERROR).__name__}: {SGL_EXECUTOR_IMPORT_ERROR})" |
185 | | - ) |
| 174 | + pytest.skip("sglang not available (install with 'pip install -e .[gpu]')") |
186 | 175 | if not is_cuda_available(): |
187 | 176 | pytest.skip("CUDA not available") |
188 | 177 |
|
|
0 commit comments