Skip to content

Commit 394905f

Browse files
committed
feat: remove invalid LMEval tasks
1 parent 89c0fc2 commit 394905f

File tree

1 file changed

+0
-67
lines changed

1 file changed

+0
-67
lines changed

tests/model_explainability/lm_eval/data/new_task_list.csv

Lines changed: 0 additions & 67 deletions
Original file line numberDiff line numberDiff line change
@@ -2492,7 +2492,6 @@ kobest,4306,true,Unknown,false,3
24922492
kobest_sentineg,4306,true,skt/kobest_v1,false,3
24932493
kobest_copa,4306,true,skt/kobest_v1,false,3
24942494
realtoxicityprompts,4024,true,allenai/real-toxicity-prompts,false,3
2495-
tinyMMLU,3799,true,tinyBenchmarks/tinyMMLU,false,3
24962495
arabicmmlu_biology_high_school,3548,true,MBZUAI/ArabicMMLU,false,3
24972496
arabicmmlu_other_tasks,3548,false,Unknown,false,3
24982497
arabicmmlu_economics_middle_school,3548,true,MBZUAI/ArabicMMLU,false,3
@@ -3075,70 +3074,11 @@ mmlu_prox_en,2213,false,Unknown,false,3
30753074
gsm_plus_mini,2097,true,qintongli/GSM-Plus,false,3
30763075
gsm_plus,2097,true,qintongli/GSM-Plus,false,3
30773076
afrobench_mmlu_tasks,2022,false,Unknown,false,3
3078-
tmlu_CAP_earth_science,2013,true,miulab/tmlu,false,3
3079-
tmlu_GSAT_chinese,2013,true,miulab/tmlu,false,3
3080-
tmlu_GSAT_civics,2013,true,miulab/tmlu,false,3
3081-
tmlu_basic_traditional_chinese_medicine,2013,true,miulab/tmlu,false,3
3082-
tmlu_AST_biology,2013,true,miulab/tmlu,false,3
3083-
tmlu_nutritionist,2013,true,miulab/tmlu,false,3
3084-
tmlu_CAP_chinese,2013,true,miulab/tmlu,false,3
3085-
tmlu_clinical_traditional_chinese_medicine,2013,true,miulab/tmlu,false,3
3086-
tmlu_humanities_tasks,2013,false,Unknown,false,3
3087-
tmlu_tour_guide,2013,true,miulab/tmlu,false,3
3088-
tmlu_teacher_qualification,2013,true,miulab/tmlu,false,3
3089-
tmlu,2013,true,Unknown,false,3
3090-
tmlu_stem_tasks,2013,false,Unknown,false,3
3091-
tmlu_other_tasks,2013,false,Unknown,false,3
3092-
tmlu_taiwan_specific_tasks,2013,false,Unknown,false,3
3093-
tmlu_social_sciences_tasks,2013,false,Unknown,false,3
3094-
tmlu_AST_history,2013,true,miulab/tmlu,false,3
3095-
tmlu_CAP_biology,2013,true,miulab/tmlu,false,3
3096-
tmlu_clinical_psychologist,2013,true,miulab/tmlu,false,3
3097-
tmlu_AST_geography,2013,true,miulab/tmlu,false,3
3098-
tmlu_CAP_civics,2013,true,miulab/tmlu,false,3
3099-
tmlu_GSAT_earth_science,2013,true,miulab/tmlu,false,3
3100-
tmlu_AST_chinese,2013,true,miulab/tmlu,false,3
3101-
tmlu_taiwan_tourist_resources,2013,true,miulab/tmlu,false,3
3102-
tmlu_CAP_history,2013,true,miulab/tmlu,false,3
3103-
tmlu_CAP_geography,2013,true,miulab/tmlu,false,3
3104-
tmlu_CAP_chemistry,2013,true,miulab/tmlu,false,3
3105-
tmlu_GSAT_biology,2013,true,miulab/tmlu,false,3
3106-
tmlu_AST_civics,2013,true,miulab/tmlu,false,3
3107-
tmlu_lawyer_qualification,2013,true,miulab/tmlu,false,3
3108-
tmlu_GSAT_chemistry,2013,true,miulab/tmlu,false,3
3109-
tmlu_driving_rule,2013,true,miulab/tmlu,false,3
3110-
tmlu_GSAT_history,2013,true,miulab/tmlu,false,3
3111-
tmlu_accountant,2013,true,miulab/tmlu,false,3
3112-
tmlu_tour_leader,2013,true,miulab/tmlu,false,3
3113-
tmlu_AST_chemistry,2013,true,miulab/tmlu,false,3
3114-
tmlu_GSAT_geography,2013,true,miulab/tmlu,false,3
31153077
gsm8k_platinum_cot_self_consistency,2002,false,Unknown,false,3
31163078
gsm8k_platinum_cot_llama,2002,false,Unknown,false,3
31173079
gsm8k_platinum_cot_zeroshot,2002,false,Unknown,false,3
31183080
gsm8k_platinum_cot,2002,false,Unknown,false,3
31193081
gsm8k_platinum,2002,false,Unknown,false,3
3120-
pile_ubuntu-irc,1990,true,EleutherAI/pile,false,3
3121-
pile_philpapers,1990,true,EleutherAI/pile,false,3
3122-
pile_bookcorpus2,1990,true,EleutherAI/pile,false,3
3123-
pile_uspto,1990,true,EleutherAI/pile,false,3
3124-
pile_stackexchange,1990,true,EleutherAI/pile,false,3
3125-
pile_pubmed-abstracts,1990,true,EleutherAI/pile,false,3
3126-
pile_arxiv,1990,true,EleutherAI/pile,false,3
3127-
pile_wikipedia,1990,true,EleutherAI/pile,false,3
3128-
pile_nih-exporter,1990,true,EleutherAI/pile,false,3
3129-
pile_freelaw,1990,true,EleutherAI/pile,false,3
3130-
pile_hackernews,1990,true,EleutherAI/pile,false,3
3131-
pile_pile-cc,1990,true,EleutherAI/pile,false,3
3132-
pile_github,1990,true,EleutherAI/pile,false,3
3133-
pile_books3,1990,true,EleutherAI/pile,false,3
3134-
pile_dm-mathematics,1990,true,EleutherAI/pile,false,3
3135-
pile_youtubesubtitles,1990,true,EleutherAI/pile,false,3
3136-
pile_openwebtext2,1990,true,EleutherAI/pile,false,3
3137-
pile_opensubtitles,1990,true,EleutherAI/pile,false,3
3138-
pile_pubmed-central,1990,true,EleutherAI/pile,false,3
3139-
pile_europarl,1990,true,EleutherAI/pile,false,3
3140-
pile_enron,1990,true,EleutherAI/pile,false,3
3141-
pile_gutenberg,1990,true,EleutherAI/pile,false,3
31423082
darijammlu_natural_science,1912,false,Unknown,false,3
31433083
darijammlu_high_school_psychology,1912,false,Unknown,false,3
31443084
darijammlu_high_school_world_history,1912,false,Unknown,false,3
@@ -3188,8 +3128,6 @@ darijammlu_ar_mmlu,1912,false,Unknown,false,3
31883128
darijammlu,1912,false,Unknown,false,3
31893129
darijammlu_professional_psychology,1912,false,Unknown,false,3
31903130
darijammlu_geography,1912,false,Unknown,false,3
3191-
tinyHellaswag,1871,true,tinyBenchmarks/tinyHellaswag,false,3
3192-
tinyBenchmarks,1739,true,Unknown,false,3
31933131
t0_eval,1693,true,Unknown,false,3
31943132
swag,1691,true,swag,false,3
31953133
qa4mre_2013,1650,true,qa4mre,false,3
@@ -3703,8 +3641,6 @@ masakhanews_lin_prompt_2,1380,false,Unknown,false,3
37033641
masakhanews_yor_prompt_2,1380,false,Unknown,false,3
37043642
masakhanews_tir_prompt_5,1380,false,Unknown,false,3
37053643
masakhanews_fra_prompt_5,1380,false,Unknown,false,3
3706-
tinyArc,1354,true,tinyBenchmarks/tinyAI2_arc,false,3
3707-
tinyGSM8k,1344,true,tinyBenchmarks/tinyGSM8k,false,3
37083644
japanese_leaderboard,1330,true,Unknown,false,3
37093645
arabic_leaderboard_alghafa_multiple_choice_rating_sentiment_task,1326,true,OALL/AlGhafa-Arabic-LLM-Benchmark-Native,false,3
37103646
arabic_leaderboard_alghafa_meta_ar_msa,1326,true,OALL/AlGhafa-Arabic-LLM-Benchmark-Native,false,3
@@ -4006,7 +3942,6 @@ sib_kon_prompt_2,1258,false,Unknown,false,3
40063942
sib_som_prompt_2,1258,false,Unknown,false,3
40073943
sib_kab_prompt_2,1258,false,Unknown,false,3
40083944
sib_yor_prompt_2,1258,false,Unknown,false,3
4009-
tinyWinogrande,1244,true,tinyBenchmarks/tinyWinogrande,false,3
40103945
agieval_math,1236,true,hails/agieval-math,false,3
40113946
bbq_generate_disambig,1198,false,Unknown,false,3
40123947
bbq,1198,false,Unknown,false,3
@@ -4509,8 +4444,6 @@ careqa_en,843,false,Unknown,false,3
45094444
careqa_open,843,false,Unknown,false,3
45104445
careqa_open_perplexity,843,false,Unknown,false,3
45114446
careqa_es,843,false,Unknown,false,3
4512-
tinyTruthfulQA_mc1,824,true,tinyBenchmarks/tinyTruthfulQA,false,3
4513-
tinyTruthfulQA,824,true,tinyBenchmarks/tinyTruthfulQA,false,3
45144447
xcopa_eu,823,true,HiTZ/XCOPA-eu,false,3
45154448
AraDiCE_ArabicMMLU_high_social-science_geography_lev,818,true,QCRI/AraDICE-ArabicMMLU-lev,false,3
45164449
AraDiCE_ArabicMMLU_primary_social-science_geography_lev,818,true,QCRI/AraDICE-ArabicMMLU-lev,false,3

0 commit comments

Comments
 (0)