@@ -2492,7 +2492,6 @@ kobest,4306,true,Unknown,false,3
24922492kobest_sentineg,4306,true,skt/kobest_v1,false,3
24932493kobest_copa,4306,true,skt/kobest_v1,false,3
24942494realtoxicityprompts,4024,true,allenai/real-toxicity-prompts,false,3
2495- tinyMMLU,3799,true,tinyBenchmarks/tinyMMLU,false,3
24962495arabicmmlu_biology_high_school,3548,true,MBZUAI/ArabicMMLU,false,3
24972496arabicmmlu_other_tasks,3548,false,Unknown,false,3
24982497arabicmmlu_economics_middle_school,3548,true,MBZUAI/ArabicMMLU,false,3
@@ -3075,70 +3074,11 @@ mmlu_prox_en,2213,false,Unknown,false,3
30753074gsm_plus_mini,2097,true,qintongli/GSM-Plus,false,3
30763075gsm_plus,2097,true,qintongli/GSM-Plus,false,3
30773076afrobench_mmlu_tasks,2022,false,Unknown,false,3
3078- tmlu_CAP_earth_science,2013,true,miulab/tmlu,false,3
3079- tmlu_GSAT_chinese,2013,true,miulab/tmlu,false,3
3080- tmlu_GSAT_civics,2013,true,miulab/tmlu,false,3
3081- tmlu_basic_traditional_chinese_medicine,2013,true,miulab/tmlu,false,3
3082- tmlu_AST_biology,2013,true,miulab/tmlu,false,3
3083- tmlu_nutritionist,2013,true,miulab/tmlu,false,3
3084- tmlu_CAP_chinese,2013,true,miulab/tmlu,false,3
3085- tmlu_clinical_traditional_chinese_medicine,2013,true,miulab/tmlu,false,3
3086- tmlu_humanities_tasks,2013,false,Unknown,false,3
3087- tmlu_tour_guide,2013,true,miulab/tmlu,false,3
3088- tmlu_teacher_qualification,2013,true,miulab/tmlu,false,3
3089- tmlu,2013,true,Unknown,false,3
3090- tmlu_stem_tasks,2013,false,Unknown,false,3
3091- tmlu_other_tasks,2013,false,Unknown,false,3
3092- tmlu_taiwan_specific_tasks,2013,false,Unknown,false,3
3093- tmlu_social_sciences_tasks,2013,false,Unknown,false,3
3094- tmlu_AST_history,2013,true,miulab/tmlu,false,3
3095- tmlu_CAP_biology,2013,true,miulab/tmlu,false,3
3096- tmlu_clinical_psychologist,2013,true,miulab/tmlu,false,3
3097- tmlu_AST_geography,2013,true,miulab/tmlu,false,3
3098- tmlu_CAP_civics,2013,true,miulab/tmlu,false,3
3099- tmlu_GSAT_earth_science,2013,true,miulab/tmlu,false,3
3100- tmlu_AST_chinese,2013,true,miulab/tmlu,false,3
3101- tmlu_taiwan_tourist_resources,2013,true,miulab/tmlu,false,3
3102- tmlu_CAP_history,2013,true,miulab/tmlu,false,3
3103- tmlu_CAP_geography,2013,true,miulab/tmlu,false,3
3104- tmlu_CAP_chemistry,2013,true,miulab/tmlu,false,3
3105- tmlu_GSAT_biology,2013,true,miulab/tmlu,false,3
3106- tmlu_AST_civics,2013,true,miulab/tmlu,false,3
3107- tmlu_lawyer_qualification,2013,true,miulab/tmlu,false,3
3108- tmlu_GSAT_chemistry,2013,true,miulab/tmlu,false,3
3109- tmlu_driving_rule,2013,true,miulab/tmlu,false,3
3110- tmlu_GSAT_history,2013,true,miulab/tmlu,false,3
3111- tmlu_accountant,2013,true,miulab/tmlu,false,3
3112- tmlu_tour_leader,2013,true,miulab/tmlu,false,3
3113- tmlu_AST_chemistry,2013,true,miulab/tmlu,false,3
3114- tmlu_GSAT_geography,2013,true,miulab/tmlu,false,3
31153077gsm8k_platinum_cot_self_consistency,2002,false,Unknown,false,3
31163078gsm8k_platinum_cot_llama,2002,false,Unknown,false,3
31173079gsm8k_platinum_cot_zeroshot,2002,false,Unknown,false,3
31183080gsm8k_platinum_cot,2002,false,Unknown,false,3
31193081gsm8k_platinum,2002,false,Unknown,false,3
3120- pile_ubuntu-irc,1990,true,EleutherAI/pile,false,3
3121- pile_philpapers,1990,true,EleutherAI/pile,false,3
3122- pile_bookcorpus2,1990,true,EleutherAI/pile,false,3
3123- pile_uspto,1990,true,EleutherAI/pile,false,3
3124- pile_stackexchange,1990,true,EleutherAI/pile,false,3
3125- pile_pubmed-abstracts,1990,true,EleutherAI/pile,false,3
3126- pile_arxiv,1990,true,EleutherAI/pile,false,3
3127- pile_wikipedia,1990,true,EleutherAI/pile,false,3
3128- pile_nih-exporter,1990,true,EleutherAI/pile,false,3
3129- pile_freelaw,1990,true,EleutherAI/pile,false,3
3130- pile_hackernews,1990,true,EleutherAI/pile,false,3
3131- pile_pile-cc,1990,true,EleutherAI/pile,false,3
3132- pile_github,1990,true,EleutherAI/pile,false,3
3133- pile_books3,1990,true,EleutherAI/pile,false,3
3134- pile_dm-mathematics,1990,true,EleutherAI/pile,false,3
3135- pile_youtubesubtitles,1990,true,EleutherAI/pile,false,3
3136- pile_openwebtext2,1990,true,EleutherAI/pile,false,3
3137- pile_opensubtitles,1990,true,EleutherAI/pile,false,3
3138- pile_pubmed-central,1990,true,EleutherAI/pile,false,3
3139- pile_europarl,1990,true,EleutherAI/pile,false,3
3140- pile_enron,1990,true,EleutherAI/pile,false,3
3141- pile_gutenberg,1990,true,EleutherAI/pile,false,3
31423082darijammlu_natural_science,1912,false,Unknown,false,3
31433083darijammlu_high_school_psychology,1912,false,Unknown,false,3
31443084darijammlu_high_school_world_history,1912,false,Unknown,false,3
@@ -3188,8 +3128,6 @@ darijammlu_ar_mmlu,1912,false,Unknown,false,3
31883128darijammlu,1912,false,Unknown,false,3
31893129darijammlu_professional_psychology,1912,false,Unknown,false,3
31903130darijammlu_geography,1912,false,Unknown,false,3
3191- tinyHellaswag,1871,true,tinyBenchmarks/tinyHellaswag,false,3
3192- tinyBenchmarks,1739,true,Unknown,false,3
31933131t0_eval,1693,true,Unknown,false,3
31943132swag,1691,true,swag,false,3
31953133qa4mre_2013,1650,true,qa4mre,false,3
@@ -3703,8 +3641,6 @@ masakhanews_lin_prompt_2,1380,false,Unknown,false,3
37033641masakhanews_yor_prompt_2,1380,false,Unknown,false,3
37043642masakhanews_tir_prompt_5,1380,false,Unknown,false,3
37053643masakhanews_fra_prompt_5,1380,false,Unknown,false,3
3706- tinyArc,1354,true,tinyBenchmarks/tinyAI2_arc,false,3
3707- tinyGSM8k,1344,true,tinyBenchmarks/tinyGSM8k,false,3
37083644japanese_leaderboard,1330,true,Unknown,false,3
37093645arabic_leaderboard_alghafa_multiple_choice_rating_sentiment_task,1326,true,OALL/AlGhafa-Arabic-LLM-Benchmark-Native,false,3
37103646arabic_leaderboard_alghafa_meta_ar_msa,1326,true,OALL/AlGhafa-Arabic-LLM-Benchmark-Native,false,3
@@ -4006,7 +3942,6 @@ sib_kon_prompt_2,1258,false,Unknown,false,3
40063942sib_som_prompt_2,1258,false,Unknown,false,3
40073943sib_kab_prompt_2,1258,false,Unknown,false,3
40083944sib_yor_prompt_2,1258,false,Unknown,false,3
4009- tinyWinogrande,1244,true,tinyBenchmarks/tinyWinogrande,false,3
40103945agieval_math,1236,true,hails/agieval-math,false,3
40113946bbq_generate_disambig,1198,false,Unknown,false,3
40123947bbq,1198,false,Unknown,false,3
@@ -4509,8 +4444,6 @@ careqa_en,843,false,Unknown,false,3
45094444careqa_open,843,false,Unknown,false,3
45104445careqa_open_perplexity,843,false,Unknown,false,3
45114446careqa_es,843,false,Unknown,false,3
4512- tinyTruthfulQA_mc1,824,true,tinyBenchmarks/tinyTruthfulQA,false,3
4513- tinyTruthfulQA,824,true,tinyBenchmarks/tinyTruthfulQA,false,3
45144447xcopa_eu,823,true,HiTZ/XCOPA-eu,false,3
45154448AraDiCE_ArabicMMLU_high_social-science_geography_lev,818,true,QCRI/AraDICE-ArabicMMLU-lev,false,3
45164449AraDiCE_ArabicMMLU_primary_social-science_geography_lev,818,true,QCRI/AraDICE-ArabicMMLU-lev,false,3
0 commit comments