Merge pull request instructlab#24 from nathan-weinberg/mmlu-tasks

Add list of default MMLU tasks as a constant
danmcp · Jun 27, 2024 · f79ce58 · f79ce58
2 parents f75a9e2 + 6cf3356
commit f79ce58
Show file tree

Hide file tree

Showing 2 changed files with 67 additions and 2 deletions.
diff --git a/.pylintrc b/.pylintrc
@@ -446,7 +446,8 @@ disable=raw-checker-failed,
         abstract-method,
         pointless-statement,
         wrong-import-order,
-        line-too-long
+        line-too-long,
+        dangerous-default-value
 
 # Enable the message, report, category or checker with the given id(s). You can
 # either give multiple identifier separated by comma (,) or put this option

diff --git a/src/instructlab/eval/mmlu.py b/src/instructlab/eval/mmlu.py
@@ -11,6 +11,70 @@
 # First Party
 from instructlab.eval.evaluator import Evaluator
 
+MMLU_TASKS = [
+    "mmlu_abstract_algebra",
+    "mmlu_anatomy",
+    "mmlu_astronomy",
+    "mmlu_business_ethics",
+    "mmlu_clinical_knowledge",
+    "mmlu_college_biology",
+    "mmlu_college_chemistry",
+    "mmlu_college_computer_science",
+    "mmlu_college_mathematics",
+    "mmlu_college_medicine",
+    "mmlu_college_physics",
+    "mmlu_computer_security",
+    "mmlu_conceptual_physics",
+    "mmlu_econometrics",
+    "mmlu_electrical_engineering",
+    "mmlu_elementary_mathematics",
+    "mmlu_formal_logic",
+    "mmlu_global_facts",
+    "mmlu_high_school_biology",
+    "mmlu_high_school_chemistry",
+    "mmlu_high_school_computer_science",
+    "mmlu_high_school_european_history",
+    "mmlu_high_school_geography",
+    "mmlu_high_school_government_and_politics",
+    "mmlu_high_school_macroeconomics",
+    "mmlu_high_school_mathematics",
+    "mmlu_high_school_microeconomics",
+    "mmlu_high_school_physics",
+    "mmlu_high_school_psychology",
+    "mmlu_high_school_statistics",
+    "mmlu_high_school_us_history",
+    "mmlu_high_school_world_history",
+    "mmlu_human_aging",
+    "mmlu_human_sexuality",
+    "mmlu_humanities",
+    "mmlu_international_law",
+    "mmlu_jurisprudence",
+    "mmlu_logical_fallacies",
+    "mmlu_machine_learning",
+    "mmlu_management",
+    "mmlu_marketing",
+    "mmlu_medical_genetics",
+    "mmlu_miscellaneous",
+    "mmlu_moral_disputes",
+    "mmlu_moral_scenarios",
+    "mmlu_nutrition",
+    "mmlu_other",
+    "mmlu_philosophy",
+    "mmlu_prehistory",
+    "mmlu_professional_accounting",
+    "mmlu_professional_law",
+    "mmlu_professional_medicine",
+    "mmlu_professional_psychology",
+    "mmlu_public_relations",
+    "mmlu_security_studies",
+    "mmlu_social_sciences",
+    "mmlu_sociology",
+    "mmlu_stem",
+    "mmlu_us_foreign_policy",
+    "mmlu_virology",
+    "mmlu_world_religions",
+]
+
 
 class MMLUEvaluator(Evaluator):
     """
@@ -27,7 +91,7 @@ class MMLUEvaluator(Evaluator):
     def __init__(
         self,
         model_path,
-        tasks: list[str],
+        tasks: list[str] = MMLU_TASKS,
         model_dtype="bfloat16",
         few_shots: int = 2,
         batch_size: int = 5,