Skip to content

Commit b510df4

Browse files
committed
update
1 parent 7f249cb commit b510df4

2 files changed

Lines changed: 10 additions & 4 deletions

File tree

autotest/all/chat_sub_fullbench.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -33,10 +33,8 @@
3333
[],
3434
)
3535

36-
# MTBench101 / WildBench 的 judge 后处理依赖固定格式([[score]]、"choice": "A++" 等),
37-
# mock --type choice 只返回 "A",无法解析 → 空 references / ZeroDivisionError。
38-
# datasets += mtbench101_datasets
39-
# datasets += wildbench_datasets
36+
datasets += mtbench101_datasets
37+
datasets += wildbench_datasets
4038

4139
eval = dict(
4240
partitioner=dict(type=SubjectiveNaivePartitioner,

opencompass/datasets/subjective/arena_hard.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -188,6 +188,14 @@ def arenahard_postprocess(
188188
references,
189189
)
190190

191+
if battles.empty or 'model_a' not in battles.columns:
192+
return {
193+
'warning':
194+
'no valid arena-hard judgements (expect [[A>B]] etc. in judge output)',
195+
'score': 0,
196+
'details': output,
197+
}
198+
191199
bootstrap_online_elo = compute_mle_elo(battles)
192200

193201
np.random.seed(42)

0 commit comments

Comments
 (0)