Skip to content

Commit

Permalink
Include task scores with mmlu results + adjust default api retries
Browse files Browse the repository at this point in the history
Signed-off-by: Dan McPherson <[email protected]>
  • Loading branch information
danmcp committed Jun 28, 2024
1 parent 5dd43e3 commit 29e1b96
Show file tree
Hide file tree
Showing 2 changed files with 11 additions and 8 deletions.
15 changes: 9 additions & 6 deletions src/instructlab/eval/mmlu.py
Original file line number Diff line number Diff line change
Expand Up @@ -194,12 +194,15 @@ def run(self) -> tuple:
)
results = mmlu_output["results"]

for task in self.tasks:
mmlu_res = results[task]
agg_score += float(mmlu_res["acc,none"])
individual_scores[task] = {}
individual_scores[task]["score"] = float(mmlu_res["acc,none"])
individual_scores[task]["stderr"] = float(mmlu_res["acc_stderr,none"])
for task, result in results.items():
if task in self.tasks:
agg_score += float(result["acc,none"])
else:
individual_scores[task] = {
"score": float(result["acc,none"]),
"stderr": float(result["acc_stderr,none"]),
}

overall_score = float(agg_score / len(self.tasks))

return overall_score, individual_scores
4 changes: 2 additions & 2 deletions src/instructlab/eval/mt_bench_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,8 @@
import openai

# API setting constants
API_MAX_RETRY = 16
API_RETRY_SLEEP = 10
API_MAX_RETRY = 4
API_RETRY_SLEEP = 4
API_ERROR_OUTPUT = "$ERROR$"

# Categories that need reference answers
Expand Down

0 comments on commit 29e1b96

Please sign in to comment.