Skip to content

Commit

Permalink
Merge pull request instructlab#24 from nathan-weinberg/mmlu-tasks
Browse files Browse the repository at this point in the history
Add list of default MMLU tasks as a constant
  • Loading branch information
nathan-weinberg authored Jun 27, 2024
2 parents f75a9e2 + 6cf3356 commit f79ce58
Show file tree
Hide file tree
Showing 2 changed files with 67 additions and 2 deletions.
3 changes: 2 additions & 1 deletion .pylintrc
Original file line number Diff line number Diff line change
Expand Up @@ -446,7 +446,8 @@ disable=raw-checker-failed,
abstract-method,
pointless-statement,
wrong-import-order,
line-too-long
line-too-long,
dangerous-default-value

# Enable the message, report, category or checker with the given id(s). You can
# either give multiple identifier separated by comma (,) or put this option
Expand Down
66 changes: 65 additions & 1 deletion src/instructlab/eval/mmlu.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,70 @@
# First Party
from instructlab.eval.evaluator import Evaluator

MMLU_TASKS = [
"mmlu_abstract_algebra",
"mmlu_anatomy",
"mmlu_astronomy",
"mmlu_business_ethics",
"mmlu_clinical_knowledge",
"mmlu_college_biology",
"mmlu_college_chemistry",
"mmlu_college_computer_science",
"mmlu_college_mathematics",
"mmlu_college_medicine",
"mmlu_college_physics",
"mmlu_computer_security",
"mmlu_conceptual_physics",
"mmlu_econometrics",
"mmlu_electrical_engineering",
"mmlu_elementary_mathematics",
"mmlu_formal_logic",
"mmlu_global_facts",
"mmlu_high_school_biology",
"mmlu_high_school_chemistry",
"mmlu_high_school_computer_science",
"mmlu_high_school_european_history",
"mmlu_high_school_geography",
"mmlu_high_school_government_and_politics",
"mmlu_high_school_macroeconomics",
"mmlu_high_school_mathematics",
"mmlu_high_school_microeconomics",
"mmlu_high_school_physics",
"mmlu_high_school_psychology",
"mmlu_high_school_statistics",
"mmlu_high_school_us_history",
"mmlu_high_school_world_history",
"mmlu_human_aging",
"mmlu_human_sexuality",
"mmlu_humanities",
"mmlu_international_law",
"mmlu_jurisprudence",
"mmlu_logical_fallacies",
"mmlu_machine_learning",
"mmlu_management",
"mmlu_marketing",
"mmlu_medical_genetics",
"mmlu_miscellaneous",
"mmlu_moral_disputes",
"mmlu_moral_scenarios",
"mmlu_nutrition",
"mmlu_other",
"mmlu_philosophy",
"mmlu_prehistory",
"mmlu_professional_accounting",
"mmlu_professional_law",
"mmlu_professional_medicine",
"mmlu_professional_psychology",
"mmlu_public_relations",
"mmlu_security_studies",
"mmlu_social_sciences",
"mmlu_sociology",
"mmlu_stem",
"mmlu_us_foreign_policy",
"mmlu_virology",
"mmlu_world_religions",
]


class MMLUEvaluator(Evaluator):
"""
Expand All @@ -27,7 +91,7 @@ class MMLUEvaluator(Evaluator):
def __init__(
self,
model_path,
tasks: list[str],
tasks: list[str] = MMLU_TASKS,
model_dtype="bfloat16",
few_shots: int = 2,
batch_size: int = 5,
Expand Down

0 comments on commit f79ce58

Please sign in to comment.