Skip to content

Commit c8ccfc5

Browse files
committed
fix(jumpscore): expose map metric
1 parent ac2becf commit c8ccfc5

2 files changed

Lines changed: 17 additions & 11 deletions

File tree

lmms_eval/tasks/jumpscore/jumpscore.yaml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,9 @@ generation_kwargs:
2222
process_results: !function utils.jumpscore_process_results
2323

2424
metric_list:
25+
- metric: jumpscore_map
26+
aggregation: !function utils.jumpscore_aggregate_results
27+
higher_is_better: true
2528
- metric: jumpscore_score
2629
aggregation: !function utils.jumpscore_aggregate_results
2730
higher_is_better: true

lmms_eval/tasks/jumpscore/utils.py

Lines changed: 14 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -285,18 +285,21 @@ def jumpscore_process_results(doc: Dict[str, Any], results: List[str]) -> Dict[s
285285
confidences=None,
286286
)
287287

288+
result = {
289+
"question_id": doc["id"],
290+
"map": map_value,
291+
"ap_per_tolerance": map_details["ap_per_tolerance"],
292+
"pred_starts": pred_starts,
293+
"gt_starts": gt_starts,
294+
"num_pred": map_details["num_pred"],
295+
"num_gt": map_details["num_gt"],
296+
"pred_raw": pred_answer_raw[:200] if pred_answer_raw else "",
297+
"gt_raw": gt_answer_raw[:200] if gt_answer_raw else "",
298+
}
299+
288300
return {
289-
"jumpscore_score": {
290-
"question_id": doc["id"],
291-
"map": map_value,
292-
"ap_per_tolerance": map_details["ap_per_tolerance"],
293-
"pred_starts": pred_starts,
294-
"gt_starts": gt_starts,
295-
"num_pred": map_details["num_pred"],
296-
"num_gt": map_details["num_gt"],
297-
"pred_raw": pred_answer_raw[:200] if pred_answer_raw else "",
298-
"gt_raw": gt_answer_raw[:200] if gt_answer_raw else "",
299-
}
301+
"jumpscore_map": result,
302+
"jumpscore_score": result.copy(),
300303
}
301304

302305

0 commit comments

Comments
 (0)