|
3 | 3 | import importlib |
4 | 4 | import json |
5 | 5 | import os |
| 6 | +import subprocess |
6 | 7 | import sys |
7 | 8 | import threading |
8 | 9 | import time |
@@ -159,8 +160,49 @@ def run_case(case: PerfCase | AccuracyCase, model_path: str, port: int) -> None: |
159 | 160 | if isinstance(case, PerfCase): |
160 | 161 | run_perf_case(case, model_path, port) |
161 | 162 | return |
| 163 | + if isinstance(case, AccuracyCase): |
| 164 | + run_accuracy_case(case, port) |
| 165 | + return |
| 166 | + |
| 167 | + raise NotImplementedError(f"Unsupported case type: {type(case).__name__}") |
| 168 | + |
| 169 | + |
| 170 | +def run_accuracy_case(case: AccuracyCase, port: int) -> None: |
| 171 | + api_url = f"http://127.0.0.1:{port}/v1" |
| 172 | + cmd = [ |
| 173 | + "evalscope", |
| 174 | + "eval", |
| 175 | + "--model", |
| 176 | + case.model_id, |
| 177 | + "--api-url", |
| 178 | + api_url, |
| 179 | + "--api-key", |
| 180 | + "EMPTY", |
| 181 | + "--eval-type", |
| 182 | + "openai_api", |
| 183 | + "--datasets", |
| 184 | + case.dataset, |
| 185 | + "--eval-batch-size", |
| 186 | + str(case.eval_batch_size), |
| 187 | + ] |
| 188 | + if case.generation_config: |
| 189 | + cmd.extend(["--generation-config", json.dumps(case.generation_config)]) |
| 190 | + if case.limit is not None: |
| 191 | + cmd.extend(["--limit", str(case.limit)]) |
162 | 192 |
|
163 | | - raise NotImplementedError(f"Accuracy case is not supported yet: {case.name}") |
| 193 | + _log( |
| 194 | + "Running accuracy case " |
| 195 | + f"name={case.name}, dataset={case.dataset}, " |
| 196 | + f"eval_batch_size={case.eval_batch_size}, " |
| 197 | + f"generation_config={case.generation_config}, limit={case.limit}" |
| 198 | + ) |
| 199 | + _log(f"Command: {' '.join(cmd)}") |
| 200 | + completed = subprocess.run(cmd, check=False) |
| 201 | + if completed.returncode != 0: |
| 202 | + raise RuntimeError( |
| 203 | + f"evalscope exited with code {completed.returncode} for case={case.name}" |
| 204 | + ) |
| 205 | + _log(f"Accuracy case {case.name} completed (warn-only mode, accuracy not gated)") |
164 | 206 |
|
165 | 207 |
|
166 | 208 | def stop_server_process(server_process) -> None: |
|
0 commit comments