update

zhulinJulia24 · zhulinJulia24 · commit b510df47d7d4 · 2026-05-20T20:13:56.000+08:00
diff --git a/autotest/all/chat_sub_fullbench.py b/autotest/all/chat_sub_fullbench.py
@@ -33,10 +33,8 @@
     [],
 )
 
-# MTBench101 / WildBench 的 judge 后处理依赖固定格式（[[score]]、"choice": "A++" 等），
-# mock --type choice 只返回 "A"，无法解析 → 空 references / ZeroDivisionError。
-# datasets += mtbench101_datasets
-# datasets += wildbench_datasets
+datasets += mtbench101_datasets
+datasets += wildbench_datasets
 
 eval = dict(
     partitioner=dict(type=SubjectiveNaivePartitioner,
diff --git a/opencompass/datasets/subjective/arena_hard.py b/opencompass/datasets/subjective/arena_hard.py
@@ -188,6 +188,14 @@ def arenahard_postprocess(
         references,
     )
 
+    if battles.empty or 'model_a' not in battles.columns:
+        return {
+            'warning':
+            'no valid arena-hard judgements (expect [[A>B]] etc. in judge output)',
+            'score': 0,
+            'details': output,
+        }
+
     bootstrap_online_elo = compute_mle_elo(battles)
 
     np.random.seed(42)