Skip to content

Commit f0b6e9d

Browse files
committed
add aime25 test case
1 parent a5ccc22 commit f0b6e9d

4 files changed

Lines changed: 20 additions & 4 deletions

File tree

.github/workflows/gke-connectivity-smoke.yml

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -192,7 +192,6 @@ jobs:
192192
set -euo pipefail
193193
194194
HEADLESS_SERVICE_NAME="${WORKLOAD_NAME}-headless"
195-
JOB_COMPLETE_TIMEOUT="30m"
196195
TPU_PROCESS_ADDRESSES="${WORKLOAD_NAME}-0.${HEADLESS_SERVICE_NAME}:8471,${WORKLOAD_NAME}-1.${HEADLESS_SERVICE_NAME}:8471,${WORKLOAD_NAME}-2.${HEADLESS_SERVICE_NAME}:8471,${WORKLOAD_NAME}-3.${HEADLESS_SERVICE_NAME}:8471"
197196
TPU_WORKER_HOSTNAMES="${WORKLOAD_NAME}-0.${HEADLESS_SERVICE_NAME},${WORKLOAD_NAME}-1.${HEADLESS_SERVICE_NAME},${WORKLOAD_NAME}-2.${HEADLESS_SERVICE_NAME},${WORKLOAD_NAME}-3.${HEADLESS_SERVICE_NAME}"
198197
@@ -346,7 +345,7 @@ jobs:
346345
# Wait for Job to reach a terminal state (Complete or Failed). Plain
347346
# `kubectl wait --for=condition=complete` only matches success and
348347
# would block until --timeout even if the Job has already Failed.
349-
deadline=$(($(date +%s) + 1800))
348+
deadline=$(($(date +%s) + 5400))
350349
while [[ $(date +%s) -lt $deadline ]]; do
351350
conds=$(kubectl get job "${WORKLOAD_NAME}" --namespace "${NAMESPACE}" \
352351
-o jsonpath='{.status.conditions[?(@.type=="Complete")].status}|{.status.conditions[?(@.type=="Failed")].status}')

test/srt/mulit_host/multi_host_suite.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ class AccuracyCase:
2626
eval_batch_size: int = 32
2727
generation_config: dict[str, Any] = field(default_factory=dict)
2828
limit: int | None = None
29+
timeout: int | None = None
2930
dry_run_result: Literal["success", "failed"] = "success"
3031

3132

test/srt/mulit_host/run_suite.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -189,12 +189,15 @@ def run_accuracy_case(case: AccuracyCase, port: int) -> None:
189189
cmd.extend(["--generation-config", json.dumps(case.generation_config)])
190190
if case.limit is not None:
191191
cmd.extend(["--limit", str(case.limit)])
192+
if case.timeout is not None:
193+
cmd.extend(["--timeout", str(case.timeout)])
192194

193195
_log(
194196
"Running accuracy case "
195197
f"name={case.name}, dataset={case.dataset}, "
196198
f"eval_batch_size={case.eval_batch_size}, "
197-
f"generation_config={case.generation_config}, limit={case.limit}"
199+
f"generation_config={case.generation_config}, limit={case.limit}, "
200+
f"timeout={case.timeout}"
198201
)
199202
_log(f"Command: {' '.join(cmd)}")
200203
completed = subprocess.run(cmd, check=False)

test/srt/mulit_host/test_mimo_flash.py

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -58,12 +58,25 @@ def get_suites() -> list[MultiHostSuite]:
5858
# flush_cache=True,
5959
# ),
6060
AccuracyCase(
61-
name="mimo-flash-accuracy",
61+
name="mimo-flash-gsm8k",
6262
dataset="gsm8k",
6363
model_id="XiaomiMiMo/MiMo-V2-Flash",
6464
eval_batch_size=32,
6565
generation_config={"temperature": 0.8, "top_p": 0.95},
6666
),
67+
AccuracyCase(
68+
name="mimo-flash-aime25",
69+
dataset="aime25",
70+
model_id="XiaomiMiMo/MiMo-V2-Flash",
71+
eval_batch_size=16,
72+
timeout=6000000,
73+
generation_config={
74+
"temperature": 1,
75+
"top_p": 0.95,
76+
"max_tokens": 131072,
77+
"chat_template_kwargs": {"enable_thinking": True},
78+
},
79+
),
6780
],
6881
)
6982
],

0 commit comments

Comments
 (0)