From 33237fe4ac100a3df5edf6fea9e4c4b95eb2f42d Mon Sep 17 00:00:00 2001 From: shahrzads Date: Tue, 3 Sep 2024 18:36:33 +0000 Subject: [PATCH] running pre-commit --- .../arabic_leaderboard_complete/README.md | 6 ++--- .../arabic_leaderboard_alghafa.yaml | 2 +- ...leaderboard_alghafa_mcq_exams_test_ar.yaml | 2 +- ..._leaderboard_alghafa_meta_ar_dialects.yaml | 2 +- ...rabic_leaderboard_alghafa_meta_ar_msa.yaml | 2 +- ..._choice_facts_truefalse_balanced_task.yaml | 2 +- ..._choice_grounded_statement_soqal_task.yaml | 2 +- ...ce_grounded_statement_xglue_mlqa_task.yaml | 2 +- ...oice_rating_sentiment_no_neutral_task.yaml | 2 +- ...multiple_choice_rating_sentiment_task.yaml | 2 +- ...lghafa_multiple_choice_sentiment_task.yaml | 2 +- .../arabic_leaderboard_alghafa/utils.py | 13 +++++----- .../arabic_exams.yaml | 2 +- .../arabic_leaderboard_arabic_exams.yaml | 2 +- .../arabic_leaderboard_arabic_exams/utils.py | 15 ++++++------ .../arabic_leaderboard_arabic_mmlu.yaml | 2 +- ...derboard_arabic_mmlu_abstract_algebra.yaml | 2 +- ...rabic_leaderboard_arabic_mmlu_anatomy.yaml | 2 +- ...bic_leaderboard_arabic_mmlu_astronomy.yaml | 2 +- ...aderboard_arabic_mmlu_business_ethics.yaml | 2 +- ...rboard_arabic_mmlu_clinical_knowledge.yaml | 2 +- ...aderboard_arabic_mmlu_college_biology.yaml | 2 +- ...erboard_arabic_mmlu_college_chemistry.yaml | 2 +- ..._arabic_mmlu_college_computer_science.yaml | 2 +- ...board_arabic_mmlu_college_mathematics.yaml | 2 +- ...derboard_arabic_mmlu_college_medicine.yaml | 2 +- ...aderboard_arabic_mmlu_college_physics.yaml | 2 +- ...erboard_arabic_mmlu_computer_security.yaml | 2 +- ...rboard_arabic_mmlu_conceptual_physics.yaml | 2 +- ..._leaderboard_arabic_mmlu_econometrics.yaml | 2 +- ...rd_arabic_mmlu_electrical_engineering.yaml | 2 +- ...rd_arabic_mmlu_elementary_mathematics.yaml | 2 +- ..._leaderboard_arabic_mmlu_formal_logic.yaml | 2 +- ..._leaderboard_arabic_mmlu_global_facts.yaml | 2 +- ...board_arabic_mmlu_high_school_biology.yaml | 2 +- ...ard_arabic_mmlu_high_school_chemistry.yaml | 2 +- ...bic_mmlu_high_school_computer_science.yaml | 2 +- ...bic_mmlu_high_school_european_history.yaml | 2 +- ...ard_arabic_mmlu_high_school_geography.yaml | 2 +- ...u_high_school_government_and_politics.yaml | 2 +- ...rabic_mmlu_high_school_macroeconomics.yaml | 2 +- ...d_arabic_mmlu_high_school_mathematics.yaml | 2 +- ...rabic_mmlu_high_school_microeconomics.yaml | 2 +- ...board_arabic_mmlu_high_school_physics.yaml | 2 +- ...rd_arabic_mmlu_high_school_psychology.yaml | 2 +- ...rd_arabic_mmlu_high_school_statistics.yaml | 2 +- ...rd_arabic_mmlu_high_school_us_history.yaml | 2 +- ...arabic_mmlu_high_school_world_history.yaml | 2 +- ...c_leaderboard_arabic_mmlu_human_aging.yaml | 2 +- ...aderboard_arabic_mmlu_human_sexuality.yaml | 2 +- ...erboard_arabic_mmlu_international_law.yaml | 2 +- ...leaderboard_arabic_mmlu_jurisprudence.yaml | 2 +- ...erboard_arabic_mmlu_logical_fallacies.yaml | 2 +- ...derboard_arabic_mmlu_machine_learning.yaml | 2 +- ...ic_leaderboard_arabic_mmlu_management.yaml | 2 +- ...bic_leaderboard_arabic_mmlu_marketing.yaml | 2 +- ...derboard_arabic_mmlu_medical_genetics.yaml | 2 +- ...leaderboard_arabic_mmlu_miscellaneous.yaml | 2 +- ...eaderboard_arabic_mmlu_moral_disputes.yaml | 2 +- ...aderboard_arabic_mmlu_moral_scenarios.yaml | 2 +- ...bic_leaderboard_arabic_mmlu_nutrition.yaml | 2 +- ...ic_leaderboard_arabic_mmlu_philosophy.yaml | 2 +- ...ic_leaderboard_arabic_mmlu_prehistory.yaml | 2 +- ...d_arabic_mmlu_professional_accounting.yaml | 2 +- ...derboard_arabic_mmlu_professional_law.yaml | 2 +- ...ard_arabic_mmlu_professional_medicine.yaml | 2 +- ...d_arabic_mmlu_professional_psychology.yaml | 2 +- ...derboard_arabic_mmlu_public_relations.yaml | 2 +- ...derboard_arabic_mmlu_security_studies.yaml | 2 +- ...bic_leaderboard_arabic_mmlu_sociology.yaml | 2 +- ...erboard_arabic_mmlu_us_foreign_policy.yaml | 2 +- ...abic_leaderboard_arabic_mmlu_virology.yaml | 2 +- ...aderboard_arabic_mmlu_world_religions.yaml | 2 +- .../arabic_leaderboard_arabic_mmlu/utils.py | 17 ++++++------- ...c_leaderboard_arabic_mt_arc_challenge.yaml | 2 +- .../arabic_mt_arc_challenge.yaml | 2 +- .../utils.py | 12 ++++------ ...arabic_leaderboard_arabic_mt_arc_easy.yaml | 2 +- .../arabic_mt_arc_easy.yaml | 2 +- .../utils.py | 12 ++++------ .../arabic_leaderboard_arabic_mt_boolq.yaml | 2 +- .../arabic_mt_boolq.yaml | 2 +- .../utils.py | 6 ++--- .../arabic_leaderboard_arabic_mt_copa.yaml | 2 +- .../arabic_mt_copa.yaml | 2 +- .../utils.py | 11 ++++----- ...rabic_leaderboard_arabic_mt_hellaswag.yaml | 2 +- .../arabic_mt_hellaswag.yaml | 2 +- .../utils.py | 15 ++++++------ .../arabic_leaderboard_arabic_mt_mmlu.yaml | 2 +- .../arabic_mt_mmlu.yaml | 2 +- .../utils.py | 14 ++++------- ...bic_leaderboard_arabic_mt_openbook_qa.yaml | 2 +- .../arabic_mt_openbook_qa.yaml | 2 +- .../utils.py | 11 ++++----- .../arabic_leaderboard_arabic_mt_piqa.yaml | 2 +- .../arabic_mt_piqa.yaml | 2 +- .../utils.py | 12 ++++------ .../arabic_leaderboard_arabic_mt_race.yaml | 2 +- .../arabic_mt_race.yaml | 2 +- .../utils.py | 14 ++++------- .../arabic_leaderboard_arabic_mt_sciq.yaml | 2 +- .../arabic_mt_sciq.yaml | 2 +- .../utils.py | 24 +++++++++++-------- .../arabic_leaderboard_arabic_mt_toxigen.yaml | 2 +- .../arabic_mt_toxigen.yaml | 2 +- .../utils.py | 2 +- .../arabic_leaderboard_acva.yaml | 2 +- .../arabic_leaderboard_acva_Algeria.yaml | 2 +- ...arabic_leaderboard_acva_Ancient_Egypt.yaml | 2 +- .../arabic_leaderboard_acva_Arab_Empire.yaml | 2 +- ..._leaderboard_acva_Arabic_Architecture.yaml | 2 +- .../arabic_leaderboard_acva_Arabic_Art.yaml | 2 +- ...bic_leaderboard_acva_Arabic_Astronomy.yaml | 2 +- ...c_leaderboard_acva_Arabic_Calligraphy.yaml | 2 +- ...abic_leaderboard_acva_Arabic_Ceremony.yaml | 2 +- ...abic_leaderboard_acva_Arabic_Clothing.yaml | 2 +- ...rabic_leaderboard_acva_Arabic_Culture.yaml | 2 +- .../arabic_leaderboard_acva_Arabic_Food.yaml | 2 +- ...rabic_leaderboard_acva_Arabic_Funeral.yaml | 2 +- ...bic_leaderboard_acva_Arabic_Geography.yaml | 2 +- ...rabic_leaderboard_acva_Arabic_History.yaml | 2 +- ...aderboard_acva_Arabic_Language_Origin.yaml | 2 +- ...ic_leaderboard_acva_Arabic_Literature.yaml | 2 +- .../arabic_leaderboard_acva_Arabic_Math.yaml | 2 +- ...abic_leaderboard_acva_Arabic_Medicine.yaml | 2 +- .../arabic_leaderboard_acva_Arabic_Music.yaml | 2 +- ...abic_leaderboard_acva_Arabic_Ornament.yaml | 2 +- ...ic_leaderboard_acva_Arabic_Philosophy.yaml | 2 +- ...ard_acva_Arabic_Physics_and_Chemistry.yaml | 2 +- ...rabic_leaderboard_acva_Arabic_Wedding.yaml | 2 +- .../arabic_leaderboard_acva_Bahrain.yaml | 2 +- .../arabic_leaderboard_acva_Comoros.yaml | 2 +- .../arabic_leaderboard_acva_Egypt_modern.yaml | 2 +- ...rboard_acva_InfluenceFromAncientEgypt.yaml | 2 +- ...aderboard_acva_InfluenceFromByzantium.yaml | 2 +- ...c_leaderboard_acva_InfluenceFromChina.yaml | 2 +- ..._leaderboard_acva_InfluenceFromGreece.yaml | 2 +- ...c_leaderboard_acva_InfluenceFromIslam.yaml | 2 +- ..._leaderboard_acva_InfluenceFromPersia.yaml | 2 +- ...ic_leaderboard_acva_InfluenceFromRome.yaml | 2 +- .../arabic_leaderboard_acva_Iraq.yaml | 2 +- ...abic_leaderboard_acva_Islam_Education.yaml | 2 +- ...board_acva_Islam_branches_and_schools.yaml | 2 +- ...c_leaderboard_acva_Islamic_law_system.yaml | 2 +- .../arabic_leaderboard_acva_Jordan.yaml | 2 +- .../arabic_leaderboard_acva_Kuwait.yaml | 2 +- .../arabic_leaderboard_acva_Lebanon.yaml | 2 +- .../arabic_leaderboard_acva_Libya.yaml | 2 +- .../arabic_leaderboard_acva_Mauritania.yaml | 2 +- ...erboard_acva_Mesopotamia_civilization.yaml | 2 +- .../arabic_leaderboard_acva_Morocco.yaml | 2 +- .../arabic_leaderboard_acva_Oman.yaml | 2 +- .../arabic_leaderboard_acva_Palestine.yaml | 2 +- .../arabic_leaderboard_acva_Qatar.yaml | 2 +- .../arabic_leaderboard_acva_Saudi_Arabia.yaml | 2 +- .../arabic_leaderboard_acva_Somalia.yaml | 2 +- .../arabic_leaderboard_acva_Sudan.yaml | 2 +- .../arabic_leaderboard_acva_Syria.yaml | 2 +- .../arabic_leaderboard_acva_Tunisia.yaml | 2 +- ...leaderboard_acva_United_Arab_Emirates.yaml | 2 +- .../arabic_leaderboard_acva_Yemen.yaml | 2 +- ...arabic_leaderboard_acva_communication.yaml | 2 +- ...c_leaderboard_acva_computer_and_phone.yaml | 2 +- .../arabic_leaderboard_acva_daily_life.yaml | 2 +- ...arabic_leaderboard_acva_entertainment.yaml | 2 +- .../arabic_leaderboard_avca/utils.py | 6 ++--- .../arabic_leaderboard_complete.yaml | 2 -- .../tasks/arabic_leaderboard_light/README.md | 1 - .../arabic_leaderboard_alghafa_light.yaml | 2 +- ...board_alghafa_mcq_exams_test_ar_light.yaml | 2 +- ...rboard_alghafa_meta_ar_dialects_light.yaml | 2 +- ...leaderboard_alghafa_meta_ar_msa_light.yaml | 2 +- ...e_facts_truefalse_balanced_task_light.yaml | 2 +- ...e_grounded_statement_soqal_task_light.yaml | 2 +- ...unded_statement_xglue_mlqa_task_light.yaml | 2 +- ...ating_sentiment_no_neutral_task_light.yaml | 2 +- ...le_choice_rating_sentiment_task_light.yaml | 2 +- ..._multiple_choice_sentiment_task_light.yaml | 2 +- .../arabic_leaderboard_alghafa_light/utils.py | 13 +++++----- .../arabic_exams_light.yaml | 2 +- ...arabic_leaderboard_arabic_exams_light.yaml | 2 +- .../utils.py | 15 ++++++------ ...rd_arabic_mmlu_abstract_algebra_light.yaml | 2 +- ...leaderboard_arabic_mmlu_anatomy_light.yaml | 2 +- ...aderboard_arabic_mmlu_astronomy_light.yaml | 2 +- ...ard_arabic_mmlu_business_ethics_light.yaml | 2 +- ..._arabic_mmlu_clinical_knowledge_light.yaml | 2 +- ...ard_arabic_mmlu_college_biology_light.yaml | 2 +- ...d_arabic_mmlu_college_chemistry_light.yaml | 2 +- ...c_mmlu_college_computer_science_light.yaml | 2 +- ...arabic_mmlu_college_mathematics_light.yaml | 2 +- ...rd_arabic_mmlu_college_medicine_light.yaml | 2 +- ...ard_arabic_mmlu_college_physics_light.yaml | 2 +- ...d_arabic_mmlu_computer_security_light.yaml | 2 +- ..._arabic_mmlu_conceptual_physics_light.yaml | 2 +- ...rboard_arabic_mmlu_econometrics_light.yaml | 2 +- ...bic_mmlu_electrical_engineering_light.yaml | 2 +- ...bic_mmlu_elementary_mathematics_light.yaml | 2 +- ...rboard_arabic_mmlu_formal_logic_light.yaml | 2 +- ...rboard_arabic_mmlu_global_facts_light.yaml | 2 +- ...arabic_mmlu_high_school_biology_light.yaml | 2 +- ...abic_mmlu_high_school_chemistry_light.yaml | 2 +- ...lu_high_school_computer_science_light.yaml | 2 +- ...lu_high_school_european_history_light.yaml | 2 +- ...abic_mmlu_high_school_geography_light.yaml | 2 +- ..._school_government_and_politics_light.yaml | 2 +- ...mmlu_high_school_macroeconomics_light.yaml | 2 +- ...ic_mmlu_high_school_mathematics_light.yaml | 2 +- ...mmlu_high_school_microeconomics_light.yaml | 2 +- ...arabic_mmlu_high_school_physics_light.yaml | 2 +- ...bic_mmlu_high_school_psychology_light.yaml | 2 +- ...bic_mmlu_high_school_statistics_light.yaml | 2 +- ...bic_mmlu_high_school_us_history_light.yaml | 2 +- ..._mmlu_high_school_world_history_light.yaml | 2 +- ...erboard_arabic_mmlu_human_aging_light.yaml | 2 +- ...ard_arabic_mmlu_human_sexuality_light.yaml | 2 +- ...d_arabic_mmlu_international_law_light.yaml | 2 +- ...board_arabic_mmlu_jurisprudence_light.yaml | 2 +- .../arabic_leaderboard_arabic_mmlu_light.yaml | 2 +- ...d_arabic_mmlu_logical_fallacies_light.yaml | 2 +- ...rd_arabic_mmlu_machine_learning_light.yaml | 2 +- ...derboard_arabic_mmlu_management_light.yaml | 2 +- ...aderboard_arabic_mmlu_marketing_light.yaml | 2 +- ...rd_arabic_mmlu_medical_genetics_light.yaml | 2 +- ...board_arabic_mmlu_miscellaneous_light.yaml | 2 +- ...oard_arabic_mmlu_moral_disputes_light.yaml | 2 +- ...ard_arabic_mmlu_moral_scenarios_light.yaml | 2 +- ...aderboard_arabic_mmlu_nutrition_light.yaml | 2 +- ...derboard_arabic_mmlu_philosophy_light.yaml | 2 +- ...derboard_arabic_mmlu_prehistory_light.yaml | 2 +- ...ic_mmlu_professional_accounting_light.yaml | 2 +- ...rd_arabic_mmlu_professional_law_light.yaml | 2 +- ...abic_mmlu_professional_medicine_light.yaml | 2 +- ...ic_mmlu_professional_psychology_light.yaml | 2 +- ...rd_arabic_mmlu_public_relations_light.yaml | 2 +- ...rd_arabic_mmlu_security_studies_light.yaml | 2 +- ...aderboard_arabic_mmlu_sociology_light.yaml | 2 +- ...d_arabic_mmlu_us_foreign_policy_light.yaml | 2 +- ...eaderboard_arabic_mmlu_virology_light.yaml | 2 +- ...ard_arabic_mmlu_world_religions_light.yaml | 2 +- .../utils.py | 17 ++++++------- ...erboard_arabic_mt_arc_challenge_light.yaml | 2 +- .../arabic_mt_arc_challenge_light.yaml | 2 +- .../utils.py | 12 ++++------ ..._leaderboard_arabic_mt_arc_easy_light.yaml | 2 +- .../arabic_mt_arc_easy_light.yaml | 2 +- .../utils.py | 12 ++++------ ...bic_leaderboard_arabic_mt_boolq_light.yaml | 2 +- .../arabic_mt_boolq_light.yaml | 2 +- .../utils.py | 6 ++--- .../arabic_mt_copa_light.yaml | 2 +- ...rbic_leaderboard_arabic_mt_copa_light.yaml | 2 +- .../utils.py | 11 ++++----- ...leaderboard_arabic_mt_hellaswag_light.yaml | 2 +- .../arabic_mt_hellaswag_light.yaml | 2 +- .../utils.py | 15 ++++++------ ...abic_leaderboard_arabic_mt_mmlu_light.yaml | 2 +- .../arabic_mt_mmlu_light.yaml | 2 +- .../utils.py | 14 ++++------- ...aderboard_arabic_mt_openbook_qa_light.yaml | 2 +- .../arabic_mt_openbook_qa_light.yaml | 2 +- .../utils.py | 11 ++++----- ...abic_leaderboard_arabic_mt_piqa_light.yaml | 2 +- .../arabic_mt_piqa_light.yaml | 2 +- .../utils.py | 12 ++++------ ...abic_leaderboard_arabic_mt_race_light.yaml | 2 +- .../arabic_mt_race_light.yaml | 2 +- .../utils.py | 14 ++++------- ...abic_leaderboard_arabic_mt_sciq_light.yaml | 2 +- .../arabic_mt_sciq_light.yaml | 2 +- .../utils.py | 24 +++++++++++-------- ...c_leaderboard_arabic_mt_toxigen_light.yaml | 2 +- .../arabic_mt_toxigen_light.yaml | 2 +- .../utils.py | 2 +- ...arabic_leaderboard_acva_Algeria_light.yaml | 2 +- ..._leaderboard_acva_Ancient_Egypt_light.yaml | 2 +- ...ic_leaderboard_acva_Arab_Empire_light.yaml | 2 +- ...rboard_acva_Arabic_Architecture_light.yaml | 2 +- ...bic_leaderboard_acva_Arabic_Art_light.yaml | 2 +- ...aderboard_acva_Arabic_Astronomy_light.yaml | 2 +- ...erboard_acva_Arabic_Calligraphy_light.yaml | 2 +- ...eaderboard_acva_Arabic_Ceremony_light.yaml | 2 +- ...eaderboard_acva_Arabic_Clothing_light.yaml | 2 +- ...leaderboard_acva_Arabic_Culture_light.yaml | 2 +- ...ic_leaderboard_acva_Arabic_Food_light.yaml | 2 +- ...leaderboard_acva_Arabic_Funeral_light.yaml | 2 +- ...aderboard_acva_Arabic_Geography_light.yaml | 2 +- ...leaderboard_acva_Arabic_History_light.yaml | 2 +- ...ard_acva_Arabic_Language_Origin_light.yaml | 2 +- ...derboard_acva_Arabic_Literature_light.yaml | 2 +- ...ic_leaderboard_acva_Arabic_Math_light.yaml | 2 +- ...eaderboard_acva_Arabic_Medicine_light.yaml | 2 +- ...c_leaderboard_acva_Arabic_Music_light.yaml | 2 +- ...eaderboard_acva_Arabic_Ornament_light.yaml | 2 +- ...derboard_acva_Arabic_Philosophy_light.yaml | 2 +- ...va_Arabic_Physics_and_Chemistry_light.yaml | 2 +- ...leaderboard_acva_Arabic_Wedding_light.yaml | 2 +- ...arabic_leaderboard_acva_Bahrain_light.yaml | 2 +- ...arabic_leaderboard_acva_Comoros_light.yaml | 2 +- ...c_leaderboard_acva_Egypt_modern_light.yaml | 2 +- ..._acva_InfluenceFromAncientEgypt_light.yaml | 2 +- ...ard_acva_InfluenceFromByzantium_light.yaml | 2 +- ...erboard_acva_InfluenceFromChina_light.yaml | 2 +- ...rboard_acva_InfluenceFromGreece_light.yaml | 2 +- ...erboard_acva_InfluenceFromIslam_light.yaml | 2 +- ...rboard_acva_InfluenceFromPersia_light.yaml | 2 +- ...derboard_acva_InfluenceFromRome_light.yaml | 2 +- .../arabic_leaderboard_acva_Iraq_light.yaml | 2 +- ...eaderboard_acva_Islam_Education_light.yaml | 2 +- ...acva_Islam_branches_and_schools_light.yaml | 2 +- ...erboard_acva_Islamic_law_system_light.yaml | 2 +- .../arabic_leaderboard_acva_Jordan_light.yaml | 2 +- .../arabic_leaderboard_acva_Kuwait_light.yaml | 2 +- ...arabic_leaderboard_acva_Lebanon_light.yaml | 2 +- .../arabic_leaderboard_acva_Libya_light.yaml | 2 +- ...bic_leaderboard_acva_Mauritania_light.yaml | 2 +- ...d_acva_Mesopotamia_civilization_light.yaml | 2 +- ...arabic_leaderboard_acva_Morocco_light.yaml | 2 +- .../arabic_leaderboard_acva_Oman_light.yaml | 2 +- ...abic_leaderboard_acva_Palestine_light.yaml | 2 +- .../arabic_leaderboard_acva_Qatar_light.yaml | 2 +- ...c_leaderboard_acva_Saudi_Arabia_light.yaml | 2 +- ...arabic_leaderboard_acva_Somalia_light.yaml | 2 +- .../arabic_leaderboard_acva_Sudan_light.yaml | 2 +- .../arabic_leaderboard_acva_Syria_light.yaml | 2 +- ...arabic_leaderboard_acva_Tunisia_light.yaml | 2 +- ...board_acva_United_Arab_Emirates_light.yaml | 2 +- .../arabic_leaderboard_acva_Yemen_light.yaml | 2 +- ..._leaderboard_acva_communication_light.yaml | 2 +- ...erboard_acva_computer_and_phone_light.yaml | 2 +- ...bic_leaderboard_acva_daily_life_light.yaml | 2 +- ..._leaderboard_acva_entertainment_light.yaml | 2 +- .../arabic_leaderboard_acva_light.yaml | 2 +- .../arabic_leaderboard_avca_light/utils.py | 6 ++--- .../arabic_leaderboard_light.yaml | 2 -- 336 files changed, 474 insertions(+), 509 deletions(-) diff --git a/lm_eval/tasks/arabic_leaderboard_complete/README.md b/lm_eval/tasks/arabic_leaderboard_complete/README.md index d50e549dc5..8052abcbd6 100644 --- a/lm_eval/tasks/arabic_leaderboard_complete/README.md +++ b/lm_eval/tasks/arabic_leaderboard_complete/README.md @@ -59,7 +59,7 @@ Homepage: https://huggingface.co/spaces/OALL/Open-Arabic-LLM-Leaderboard abstract = "Recent advances in the space of Arabic large language models have opened up a wealth of potential practical applications. From optimal training strategies, large scale data acquisition and continuously increasing NLP resources, the Arabic LLM landscape has improved in a very short span of time, despite being plagued by training data scarcity and limited evaluation resources compared to English. In line with contributing towards this ever-growing field, we introduce AlGhafa, a new multiple-choice evaluation benchmark for Arabic LLMs. For showcasing purposes, we train a new suite of models, including a 14 billion parameter model, the largest monolingual Arabic decoder-only model to date. We use a collection of publicly available datasets, as well as a newly introduced HandMade dataset consisting of 8 billion tokens. Finally, we explore the quantitative and qualitative toxicity of several Arabic models, comparing our models to existing public Arabic LLMs.", } @misc{huang2023acegpt, - title={AceGPT, Localizing Large Language Models in Arabic}, + title={AceGPT, Localizing Large Language Models in Arabic}, author={Huang Huang and Fei Yu and Jianqing Zhu and Xuening Sun and Hao Cheng and Dingjie Song and Zhihong Chen and Abdulmohsen Alharthi and Bang An and Ziche Liu and Zhiyi Zhang and Junying Chen and Jianquan Li and Benyou Wang and Lian Zhang and Ruoyu Sun and Xiang Wan and Haizhou Li and Jinchao Xu}, year={2023}, eprint={2309.12053}, @@ -239,7 +239,7 @@ Homepage: https://huggingface.co/spaces/OALL/Open-Arabic-LLM-Leaderboard - `arabic_leaderboard_acva_computer_and_phone` - `arabic_leaderboard_acva_daily_life` - `arabic_leaderboard_acva_entertainment` - + ### Checklist For adding novel benchmarks/datasets to the library: @@ -252,5 +252,3 @@ If other tasks on this dataset are already supported: * [ ] Is the "Main" variant of this task clearly denoted? * [ ] Have you provided a short sentence in a README on what each new variant adds / evaluates? * [ ] Have you noted which, if any, published evaluation setups are matched by this variant? - - diff --git a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_alghafa/arabic_leaderboard_alghafa.yaml b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_alghafa/arabic_leaderboard_alghafa.yaml index 422e9ad2ad..6f0014d812 100644 --- a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_alghafa/arabic_leaderboard_alghafa.yaml +++ b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_alghafa/arabic_leaderboard_alghafa.yaml @@ -20,4 +20,4 @@ aggregate_metric_list: aggregation: mean weight_by_size: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_alghafa/arabic_leaderboard_alghafa_mcq_exams_test_ar.yaml b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_alghafa/arabic_leaderboard_alghafa_mcq_exams_test_ar.yaml index f3b62c84cc..e436e29574 100644 --- a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_alghafa/arabic_leaderboard_alghafa_mcq_exams_test_ar.yaml +++ b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_alghafa/arabic_leaderboard_alghafa_mcq_exams_test_ar.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_alghafa/arabic_leaderboard_alghafa_meta_ar_dialects.yaml b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_alghafa/arabic_leaderboard_alghafa_meta_ar_dialects.yaml index d2a665ccac..f19c2ecefe 100644 --- a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_alghafa/arabic_leaderboard_alghafa_meta_ar_dialects.yaml +++ b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_alghafa/arabic_leaderboard_alghafa_meta_ar_dialects.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_alghafa/arabic_leaderboard_alghafa_meta_ar_msa.yaml b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_alghafa/arabic_leaderboard_alghafa_meta_ar_msa.yaml index e5862d4870..0d95ec5a06 100644 --- a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_alghafa/arabic_leaderboard_alghafa_meta_ar_msa.yaml +++ b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_alghafa/arabic_leaderboard_alghafa_meta_ar_msa.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_alghafa/arabic_leaderboard_alghafa_multiple_choice_facts_truefalse_balanced_task.yaml b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_alghafa/arabic_leaderboard_alghafa_multiple_choice_facts_truefalse_balanced_task.yaml index b6fab2c1a9..46d2b6abcf 100644 --- a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_alghafa/arabic_leaderboard_alghafa_multiple_choice_facts_truefalse_balanced_task.yaml +++ b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_alghafa/arabic_leaderboard_alghafa_multiple_choice_facts_truefalse_balanced_task.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_alghafa/arabic_leaderboard_alghafa_multiple_choice_grounded_statement_soqal_task.yaml b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_alghafa/arabic_leaderboard_alghafa_multiple_choice_grounded_statement_soqal_task.yaml index 71d5e6d5b9..13150c690b 100644 --- a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_alghafa/arabic_leaderboard_alghafa_multiple_choice_grounded_statement_soqal_task.yaml +++ b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_alghafa/arabic_leaderboard_alghafa_multiple_choice_grounded_statement_soqal_task.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_alghafa/arabic_leaderboard_alghafa_multiple_choice_grounded_statement_xglue_mlqa_task.yaml b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_alghafa/arabic_leaderboard_alghafa_multiple_choice_grounded_statement_xglue_mlqa_task.yaml index 60e45e81e8..3a17548f8e 100644 --- a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_alghafa/arabic_leaderboard_alghafa_multiple_choice_grounded_statement_xglue_mlqa_task.yaml +++ b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_alghafa/arabic_leaderboard_alghafa_multiple_choice_grounded_statement_xglue_mlqa_task.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_alghafa/arabic_leaderboard_alghafa_multiple_choice_rating_sentiment_no_neutral_task.yaml b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_alghafa/arabic_leaderboard_alghafa_multiple_choice_rating_sentiment_no_neutral_task.yaml index 2a0160dcd8..8e34a45c7a 100644 --- a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_alghafa/arabic_leaderboard_alghafa_multiple_choice_rating_sentiment_no_neutral_task.yaml +++ b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_alghafa/arabic_leaderboard_alghafa_multiple_choice_rating_sentiment_no_neutral_task.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_alghafa/arabic_leaderboard_alghafa_multiple_choice_rating_sentiment_task.yaml b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_alghafa/arabic_leaderboard_alghafa_multiple_choice_rating_sentiment_task.yaml index a0fdcbac05..b31748516a 100644 --- a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_alghafa/arabic_leaderboard_alghafa_multiple_choice_rating_sentiment_task.yaml +++ b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_alghafa/arabic_leaderboard_alghafa_multiple_choice_rating_sentiment_task.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_alghafa/arabic_leaderboard_alghafa_multiple_choice_sentiment_task.yaml b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_alghafa/arabic_leaderboard_alghafa_multiple_choice_sentiment_task.yaml index fc3acdf200..191b26ba0a 100644 --- a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_alghafa/arabic_leaderboard_alghafa_multiple_choice_sentiment_task.yaml +++ b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_alghafa/arabic_leaderboard_alghafa_multiple_choice_sentiment_task.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_alghafa/utils.py b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_alghafa/utils.py index 47237f3e5d..62f9874e63 100644 --- a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_alghafa/utils.py +++ b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_alghafa/utils.py @@ -7,7 +7,9 @@ def _process_doc(doc): question = doc["query"] answer_index = int(doc["label"]) # Dynamically determining the choices by excluding '__few_shots', 'query' and 'label' - choices_keys = [key for key in doc.keys() if key not in ["query", "label", "__few_shots"]] + choices_keys = [ + key for key in doc.keys() if key not in ["query", "label", "__few_shots"] + ] choices = [doc[key] for key in choices_keys] instruction = "الأسئلة التالية هي أسئلة متعددة الإختيارات مع الجواب الصحيح\n\n" @@ -16,9 +18,6 @@ def _process_doc(doc): query += f"{index}) {choice}\n" query += "الإجابة:" - return { - "query": query, - "choices": choices, - "gold": answer_index - } - return dataset.map(_process_doc) \ No newline at end of file + return {"query": query, "choices": choices, "gold": answer_index} + + return dataset.map(_process_doc) diff --git a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_exams/arabic_exams.yaml b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_exams/arabic_exams.yaml index f9c8868201..edc20fe4b9 100644 --- a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_exams/arabic_exams.yaml +++ b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_exams/arabic_exams.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_exams/arabic_leaderboard_arabic_exams.yaml b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_exams/arabic_leaderboard_arabic_exams.yaml index c04a998349..2bf77eb361 100644 --- a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_exams/arabic_leaderboard_arabic_exams.yaml +++ b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_exams/arabic_leaderboard_arabic_exams.yaml @@ -10,4 +10,4 @@ aggregate_metric_list: aggregation: mean weight_by_size: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_exams/utils.py b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_exams/utils.py index 1df1edadfa..72af1c40fe 100644 --- a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_exams/utils.py +++ b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_exams/utils.py @@ -1,6 +1,7 @@ import datasets import numpy as np + # fmt: off LETTER_INDICES_AR = ["أ", "ب", "ج", "د", "هـ", "و", "ز", "ح", "ط", "ي", "ك", "ل", "م", "ن", "س", "ع", "ف", "ص", "ق", "ر", "ش", "ت", "ث", "خ", "ذ", "ض", "ظ", "غ"] # fmt: on @@ -10,12 +11,15 @@ LETTER_INDICES = ["A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N", "O", "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z"] # fmt: on + def process_docs(dataset: datasets.Dataset): def _process_doc(doc): topic = doc["subject"] question = doc["question"] choices = [doc["A"], doc["B"], doc["C"], doc["D"]] - choices_formatted = [f" {LETTER_INDICES_AR[i]}) {choice}\n" for i, choice in enumerate(choices)] + choices_formatted = [ + f" {LETTER_INDICES_AR[i]}) {choice}\n" for i, choice in enumerate(choices) + ] answer = doc["answer"] answer_index = LETTER_INDICES.index(answer) @@ -24,9 +28,6 @@ def _process_doc(doc): query += "\n".join(choices_formatted) query += "\nالإجابة:" - return { - "query": query, - "choices": LETTER_INDICES_AR[:4], - "gold": answer_index - } - return dataset.map(_process_doc) \ No newline at end of file + return {"query": query, "choices": LETTER_INDICES_AR[:4], "gold": answer_index} + + return dataset.map(_process_doc) diff --git a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mmlu/arabic_leaderboard_arabic_mmlu.yaml b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mmlu/arabic_leaderboard_arabic_mmlu.yaml index 363d2c26d3..ad2751bf32 100644 --- a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mmlu/arabic_leaderboard_arabic_mmlu.yaml +++ b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mmlu/arabic_leaderboard_arabic_mmlu.yaml @@ -65,4 +65,4 @@ aggregate_metric_list: aggregation: mean weight_by_size: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mmlu/arabic_leaderboard_arabic_mmlu_abstract_algebra.yaml b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mmlu/arabic_leaderboard_arabic_mmlu_abstract_algebra.yaml index f307401139..3d0946be2c 100644 --- a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mmlu/arabic_leaderboard_arabic_mmlu_abstract_algebra.yaml +++ b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mmlu/arabic_leaderboard_arabic_mmlu_abstract_algebra.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mmlu/arabic_leaderboard_arabic_mmlu_anatomy.yaml b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mmlu/arabic_leaderboard_arabic_mmlu_anatomy.yaml index 2867edfb28..24af11dd2f 100644 --- a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mmlu/arabic_leaderboard_arabic_mmlu_anatomy.yaml +++ b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mmlu/arabic_leaderboard_arabic_mmlu_anatomy.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mmlu/arabic_leaderboard_arabic_mmlu_astronomy.yaml b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mmlu/arabic_leaderboard_arabic_mmlu_astronomy.yaml index 41941148f2..0aa9680906 100644 --- a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mmlu/arabic_leaderboard_arabic_mmlu_astronomy.yaml +++ b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mmlu/arabic_leaderboard_arabic_mmlu_astronomy.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mmlu/arabic_leaderboard_arabic_mmlu_business_ethics.yaml b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mmlu/arabic_leaderboard_arabic_mmlu_business_ethics.yaml index 5d797cceb3..18c941e422 100644 --- a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mmlu/arabic_leaderboard_arabic_mmlu_business_ethics.yaml +++ b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mmlu/arabic_leaderboard_arabic_mmlu_business_ethics.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mmlu/arabic_leaderboard_arabic_mmlu_clinical_knowledge.yaml b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mmlu/arabic_leaderboard_arabic_mmlu_clinical_knowledge.yaml index f0ef66fcd9..9460403c98 100644 --- a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mmlu/arabic_leaderboard_arabic_mmlu_clinical_knowledge.yaml +++ b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mmlu/arabic_leaderboard_arabic_mmlu_clinical_knowledge.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mmlu/arabic_leaderboard_arabic_mmlu_college_biology.yaml b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mmlu/arabic_leaderboard_arabic_mmlu_college_biology.yaml index 2a64fff6e3..2f34d342d6 100644 --- a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mmlu/arabic_leaderboard_arabic_mmlu_college_biology.yaml +++ b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mmlu/arabic_leaderboard_arabic_mmlu_college_biology.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mmlu/arabic_leaderboard_arabic_mmlu_college_chemistry.yaml b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mmlu/arabic_leaderboard_arabic_mmlu_college_chemistry.yaml index 97d73bc02f..17d63b60bb 100644 --- a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mmlu/arabic_leaderboard_arabic_mmlu_college_chemistry.yaml +++ b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mmlu/arabic_leaderboard_arabic_mmlu_college_chemistry.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mmlu/arabic_leaderboard_arabic_mmlu_college_computer_science.yaml b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mmlu/arabic_leaderboard_arabic_mmlu_college_computer_science.yaml index 75ae499b96..a3f5d3e84c 100644 --- a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mmlu/arabic_leaderboard_arabic_mmlu_college_computer_science.yaml +++ b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mmlu/arabic_leaderboard_arabic_mmlu_college_computer_science.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mmlu/arabic_leaderboard_arabic_mmlu_college_mathematics.yaml b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mmlu/arabic_leaderboard_arabic_mmlu_college_mathematics.yaml index 46d5087a0c..0284093dd9 100644 --- a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mmlu/arabic_leaderboard_arabic_mmlu_college_mathematics.yaml +++ b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mmlu/arabic_leaderboard_arabic_mmlu_college_mathematics.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mmlu/arabic_leaderboard_arabic_mmlu_college_medicine.yaml b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mmlu/arabic_leaderboard_arabic_mmlu_college_medicine.yaml index 14a0118754..e21246e7be 100644 --- a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mmlu/arabic_leaderboard_arabic_mmlu_college_medicine.yaml +++ b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mmlu/arabic_leaderboard_arabic_mmlu_college_medicine.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mmlu/arabic_leaderboard_arabic_mmlu_college_physics.yaml b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mmlu/arabic_leaderboard_arabic_mmlu_college_physics.yaml index 2768ec2443..ab23f490f3 100644 --- a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mmlu/arabic_leaderboard_arabic_mmlu_college_physics.yaml +++ b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mmlu/arabic_leaderboard_arabic_mmlu_college_physics.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mmlu/arabic_leaderboard_arabic_mmlu_computer_security.yaml b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mmlu/arabic_leaderboard_arabic_mmlu_computer_security.yaml index 51b9ebd982..96624cd02f 100644 --- a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mmlu/arabic_leaderboard_arabic_mmlu_computer_security.yaml +++ b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mmlu/arabic_leaderboard_arabic_mmlu_computer_security.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mmlu/arabic_leaderboard_arabic_mmlu_conceptual_physics.yaml b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mmlu/arabic_leaderboard_arabic_mmlu_conceptual_physics.yaml index 273ce7c8bb..cd605de40a 100644 --- a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mmlu/arabic_leaderboard_arabic_mmlu_conceptual_physics.yaml +++ b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mmlu/arabic_leaderboard_arabic_mmlu_conceptual_physics.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mmlu/arabic_leaderboard_arabic_mmlu_econometrics.yaml b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mmlu/arabic_leaderboard_arabic_mmlu_econometrics.yaml index 24ed1355d7..60c9f373a3 100644 --- a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mmlu/arabic_leaderboard_arabic_mmlu_econometrics.yaml +++ b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mmlu/arabic_leaderboard_arabic_mmlu_econometrics.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mmlu/arabic_leaderboard_arabic_mmlu_electrical_engineering.yaml b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mmlu/arabic_leaderboard_arabic_mmlu_electrical_engineering.yaml index 6d1b74dbc8..83aa42a620 100644 --- a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mmlu/arabic_leaderboard_arabic_mmlu_electrical_engineering.yaml +++ b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mmlu/arabic_leaderboard_arabic_mmlu_electrical_engineering.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mmlu/arabic_leaderboard_arabic_mmlu_elementary_mathematics.yaml b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mmlu/arabic_leaderboard_arabic_mmlu_elementary_mathematics.yaml index 76607a2ecb..ac06d9ec7c 100644 --- a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mmlu/arabic_leaderboard_arabic_mmlu_elementary_mathematics.yaml +++ b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mmlu/arabic_leaderboard_arabic_mmlu_elementary_mathematics.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mmlu/arabic_leaderboard_arabic_mmlu_formal_logic.yaml b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mmlu/arabic_leaderboard_arabic_mmlu_formal_logic.yaml index c08b4a8c07..5e1d60758b 100644 --- a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mmlu/arabic_leaderboard_arabic_mmlu_formal_logic.yaml +++ b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mmlu/arabic_leaderboard_arabic_mmlu_formal_logic.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mmlu/arabic_leaderboard_arabic_mmlu_global_facts.yaml b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mmlu/arabic_leaderboard_arabic_mmlu_global_facts.yaml index 90e40b0e31..074248d8fe 100644 --- a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mmlu/arabic_leaderboard_arabic_mmlu_global_facts.yaml +++ b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mmlu/arabic_leaderboard_arabic_mmlu_global_facts.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mmlu/arabic_leaderboard_arabic_mmlu_high_school_biology.yaml b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mmlu/arabic_leaderboard_arabic_mmlu_high_school_biology.yaml index a0d00503d3..09862e1ce6 100644 --- a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mmlu/arabic_leaderboard_arabic_mmlu_high_school_biology.yaml +++ b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mmlu/arabic_leaderboard_arabic_mmlu_high_school_biology.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mmlu/arabic_leaderboard_arabic_mmlu_high_school_chemistry.yaml b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mmlu/arabic_leaderboard_arabic_mmlu_high_school_chemistry.yaml index 0d64442c50..849ad63ed7 100644 --- a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mmlu/arabic_leaderboard_arabic_mmlu_high_school_chemistry.yaml +++ b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mmlu/arabic_leaderboard_arabic_mmlu_high_school_chemistry.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mmlu/arabic_leaderboard_arabic_mmlu_high_school_computer_science.yaml b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mmlu/arabic_leaderboard_arabic_mmlu_high_school_computer_science.yaml index 4a5972a5ed..e91bfe7fb9 100644 --- a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mmlu/arabic_leaderboard_arabic_mmlu_high_school_computer_science.yaml +++ b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mmlu/arabic_leaderboard_arabic_mmlu_high_school_computer_science.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mmlu/arabic_leaderboard_arabic_mmlu_high_school_european_history.yaml b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mmlu/arabic_leaderboard_arabic_mmlu_high_school_european_history.yaml index 89f0b73c5c..912e57bfab 100644 --- a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mmlu/arabic_leaderboard_arabic_mmlu_high_school_european_history.yaml +++ b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mmlu/arabic_leaderboard_arabic_mmlu_high_school_european_history.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mmlu/arabic_leaderboard_arabic_mmlu_high_school_geography.yaml b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mmlu/arabic_leaderboard_arabic_mmlu_high_school_geography.yaml index c441e0e5e3..33c41db0f1 100644 --- a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mmlu/arabic_leaderboard_arabic_mmlu_high_school_geography.yaml +++ b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mmlu/arabic_leaderboard_arabic_mmlu_high_school_geography.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mmlu/arabic_leaderboard_arabic_mmlu_high_school_government_and_politics.yaml b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mmlu/arabic_leaderboard_arabic_mmlu_high_school_government_and_politics.yaml index 84cb482aaa..16689f115f 100644 --- a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mmlu/arabic_leaderboard_arabic_mmlu_high_school_government_and_politics.yaml +++ b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mmlu/arabic_leaderboard_arabic_mmlu_high_school_government_and_politics.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mmlu/arabic_leaderboard_arabic_mmlu_high_school_macroeconomics.yaml b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mmlu/arabic_leaderboard_arabic_mmlu_high_school_macroeconomics.yaml index f04c7aa7d7..04ec5d7431 100644 --- a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mmlu/arabic_leaderboard_arabic_mmlu_high_school_macroeconomics.yaml +++ b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mmlu/arabic_leaderboard_arabic_mmlu_high_school_macroeconomics.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mmlu/arabic_leaderboard_arabic_mmlu_high_school_mathematics.yaml b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mmlu/arabic_leaderboard_arabic_mmlu_high_school_mathematics.yaml index bd5b37b27d..fd4ebd5161 100644 --- a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mmlu/arabic_leaderboard_arabic_mmlu_high_school_mathematics.yaml +++ b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mmlu/arabic_leaderboard_arabic_mmlu_high_school_mathematics.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mmlu/arabic_leaderboard_arabic_mmlu_high_school_microeconomics.yaml b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mmlu/arabic_leaderboard_arabic_mmlu_high_school_microeconomics.yaml index 813bde1830..7ba3eea694 100644 --- a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mmlu/arabic_leaderboard_arabic_mmlu_high_school_microeconomics.yaml +++ b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mmlu/arabic_leaderboard_arabic_mmlu_high_school_microeconomics.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mmlu/arabic_leaderboard_arabic_mmlu_high_school_physics.yaml b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mmlu/arabic_leaderboard_arabic_mmlu_high_school_physics.yaml index fa13b37a71..8d53cca80e 100644 --- a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mmlu/arabic_leaderboard_arabic_mmlu_high_school_physics.yaml +++ b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mmlu/arabic_leaderboard_arabic_mmlu_high_school_physics.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mmlu/arabic_leaderboard_arabic_mmlu_high_school_psychology.yaml b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mmlu/arabic_leaderboard_arabic_mmlu_high_school_psychology.yaml index 391a7ff7d4..129733d1dd 100644 --- a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mmlu/arabic_leaderboard_arabic_mmlu_high_school_psychology.yaml +++ b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mmlu/arabic_leaderboard_arabic_mmlu_high_school_psychology.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mmlu/arabic_leaderboard_arabic_mmlu_high_school_statistics.yaml b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mmlu/arabic_leaderboard_arabic_mmlu_high_school_statistics.yaml index 725c9f9b85..b23e1a77e5 100644 --- a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mmlu/arabic_leaderboard_arabic_mmlu_high_school_statistics.yaml +++ b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mmlu/arabic_leaderboard_arabic_mmlu_high_school_statistics.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mmlu/arabic_leaderboard_arabic_mmlu_high_school_us_history.yaml b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mmlu/arabic_leaderboard_arabic_mmlu_high_school_us_history.yaml index c7120e200e..cc6ec9a397 100644 --- a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mmlu/arabic_leaderboard_arabic_mmlu_high_school_us_history.yaml +++ b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mmlu/arabic_leaderboard_arabic_mmlu_high_school_us_history.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mmlu/arabic_leaderboard_arabic_mmlu_high_school_world_history.yaml b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mmlu/arabic_leaderboard_arabic_mmlu_high_school_world_history.yaml index 67bca534c8..b537669fec 100644 --- a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mmlu/arabic_leaderboard_arabic_mmlu_high_school_world_history.yaml +++ b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mmlu/arabic_leaderboard_arabic_mmlu_high_school_world_history.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mmlu/arabic_leaderboard_arabic_mmlu_human_aging.yaml b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mmlu/arabic_leaderboard_arabic_mmlu_human_aging.yaml index 01443156ea..62124769b1 100644 --- a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mmlu/arabic_leaderboard_arabic_mmlu_human_aging.yaml +++ b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mmlu/arabic_leaderboard_arabic_mmlu_human_aging.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mmlu/arabic_leaderboard_arabic_mmlu_human_sexuality.yaml b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mmlu/arabic_leaderboard_arabic_mmlu_human_sexuality.yaml index 4696364f29..bf6c298b8a 100644 --- a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mmlu/arabic_leaderboard_arabic_mmlu_human_sexuality.yaml +++ b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mmlu/arabic_leaderboard_arabic_mmlu_human_sexuality.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mmlu/arabic_leaderboard_arabic_mmlu_international_law.yaml b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mmlu/arabic_leaderboard_arabic_mmlu_international_law.yaml index aee72667aa..feec16f59b 100644 --- a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mmlu/arabic_leaderboard_arabic_mmlu_international_law.yaml +++ b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mmlu/arabic_leaderboard_arabic_mmlu_international_law.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mmlu/arabic_leaderboard_arabic_mmlu_jurisprudence.yaml b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mmlu/arabic_leaderboard_arabic_mmlu_jurisprudence.yaml index 20305fcb2d..fcc1a3ab9c 100644 --- a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mmlu/arabic_leaderboard_arabic_mmlu_jurisprudence.yaml +++ b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mmlu/arabic_leaderboard_arabic_mmlu_jurisprudence.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mmlu/arabic_leaderboard_arabic_mmlu_logical_fallacies.yaml b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mmlu/arabic_leaderboard_arabic_mmlu_logical_fallacies.yaml index 42d6665478..c6de637bae 100644 --- a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mmlu/arabic_leaderboard_arabic_mmlu_logical_fallacies.yaml +++ b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mmlu/arabic_leaderboard_arabic_mmlu_logical_fallacies.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mmlu/arabic_leaderboard_arabic_mmlu_machine_learning.yaml b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mmlu/arabic_leaderboard_arabic_mmlu_machine_learning.yaml index 727c17da00..bf191fc7c8 100644 --- a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mmlu/arabic_leaderboard_arabic_mmlu_machine_learning.yaml +++ b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mmlu/arabic_leaderboard_arabic_mmlu_machine_learning.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mmlu/arabic_leaderboard_arabic_mmlu_management.yaml b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mmlu/arabic_leaderboard_arabic_mmlu_management.yaml index 7c3455895e..4bbc800cfe 100644 --- a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mmlu/arabic_leaderboard_arabic_mmlu_management.yaml +++ b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mmlu/arabic_leaderboard_arabic_mmlu_management.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mmlu/arabic_leaderboard_arabic_mmlu_marketing.yaml b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mmlu/arabic_leaderboard_arabic_mmlu_marketing.yaml index b8d452d194..59694487eb 100644 --- a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mmlu/arabic_leaderboard_arabic_mmlu_marketing.yaml +++ b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mmlu/arabic_leaderboard_arabic_mmlu_marketing.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mmlu/arabic_leaderboard_arabic_mmlu_medical_genetics.yaml b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mmlu/arabic_leaderboard_arabic_mmlu_medical_genetics.yaml index 31e8d74606..88f0de37c3 100644 --- a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mmlu/arabic_leaderboard_arabic_mmlu_medical_genetics.yaml +++ b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mmlu/arabic_leaderboard_arabic_mmlu_medical_genetics.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mmlu/arabic_leaderboard_arabic_mmlu_miscellaneous.yaml b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mmlu/arabic_leaderboard_arabic_mmlu_miscellaneous.yaml index 6cafe6f7d0..da333e4536 100644 --- a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mmlu/arabic_leaderboard_arabic_mmlu_miscellaneous.yaml +++ b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mmlu/arabic_leaderboard_arabic_mmlu_miscellaneous.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mmlu/arabic_leaderboard_arabic_mmlu_moral_disputes.yaml b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mmlu/arabic_leaderboard_arabic_mmlu_moral_disputes.yaml index 5aab97ffd3..1d0d07945f 100644 --- a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mmlu/arabic_leaderboard_arabic_mmlu_moral_disputes.yaml +++ b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mmlu/arabic_leaderboard_arabic_mmlu_moral_disputes.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mmlu/arabic_leaderboard_arabic_mmlu_moral_scenarios.yaml b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mmlu/arabic_leaderboard_arabic_mmlu_moral_scenarios.yaml index 3ee48d1374..c0c924650f 100644 --- a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mmlu/arabic_leaderboard_arabic_mmlu_moral_scenarios.yaml +++ b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mmlu/arabic_leaderboard_arabic_mmlu_moral_scenarios.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mmlu/arabic_leaderboard_arabic_mmlu_nutrition.yaml b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mmlu/arabic_leaderboard_arabic_mmlu_nutrition.yaml index 8cb6f75998..24ad69b90d 100644 --- a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mmlu/arabic_leaderboard_arabic_mmlu_nutrition.yaml +++ b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mmlu/arabic_leaderboard_arabic_mmlu_nutrition.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mmlu/arabic_leaderboard_arabic_mmlu_philosophy.yaml b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mmlu/arabic_leaderboard_arabic_mmlu_philosophy.yaml index d6fea9c395..a57dcf7ecd 100644 --- a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mmlu/arabic_leaderboard_arabic_mmlu_philosophy.yaml +++ b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mmlu/arabic_leaderboard_arabic_mmlu_philosophy.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mmlu/arabic_leaderboard_arabic_mmlu_prehistory.yaml b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mmlu/arabic_leaderboard_arabic_mmlu_prehistory.yaml index ff4b2838e9..45ba2e5de2 100644 --- a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mmlu/arabic_leaderboard_arabic_mmlu_prehistory.yaml +++ b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mmlu/arabic_leaderboard_arabic_mmlu_prehistory.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mmlu/arabic_leaderboard_arabic_mmlu_professional_accounting.yaml b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mmlu/arabic_leaderboard_arabic_mmlu_professional_accounting.yaml index 8205dc9219..d931a00099 100644 --- a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mmlu/arabic_leaderboard_arabic_mmlu_professional_accounting.yaml +++ b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mmlu/arabic_leaderboard_arabic_mmlu_professional_accounting.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mmlu/arabic_leaderboard_arabic_mmlu_professional_law.yaml b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mmlu/arabic_leaderboard_arabic_mmlu_professional_law.yaml index 0043604444..e11d0368f5 100644 --- a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mmlu/arabic_leaderboard_arabic_mmlu_professional_law.yaml +++ b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mmlu/arabic_leaderboard_arabic_mmlu_professional_law.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mmlu/arabic_leaderboard_arabic_mmlu_professional_medicine.yaml b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mmlu/arabic_leaderboard_arabic_mmlu_professional_medicine.yaml index ae60fd0ddc..7a10d8157f 100644 --- a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mmlu/arabic_leaderboard_arabic_mmlu_professional_medicine.yaml +++ b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mmlu/arabic_leaderboard_arabic_mmlu_professional_medicine.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mmlu/arabic_leaderboard_arabic_mmlu_professional_psychology.yaml b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mmlu/arabic_leaderboard_arabic_mmlu_professional_psychology.yaml index b38353c9da..bb12274adb 100644 --- a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mmlu/arabic_leaderboard_arabic_mmlu_professional_psychology.yaml +++ b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mmlu/arabic_leaderboard_arabic_mmlu_professional_psychology.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mmlu/arabic_leaderboard_arabic_mmlu_public_relations.yaml b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mmlu/arabic_leaderboard_arabic_mmlu_public_relations.yaml index 5d6e58ea0e..3361f775b4 100644 --- a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mmlu/arabic_leaderboard_arabic_mmlu_public_relations.yaml +++ b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mmlu/arabic_leaderboard_arabic_mmlu_public_relations.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mmlu/arabic_leaderboard_arabic_mmlu_security_studies.yaml b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mmlu/arabic_leaderboard_arabic_mmlu_security_studies.yaml index 1cf5a8c100..781a6145f0 100644 --- a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mmlu/arabic_leaderboard_arabic_mmlu_security_studies.yaml +++ b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mmlu/arabic_leaderboard_arabic_mmlu_security_studies.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mmlu/arabic_leaderboard_arabic_mmlu_sociology.yaml b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mmlu/arabic_leaderboard_arabic_mmlu_sociology.yaml index 8d3503d0b9..2c80872c97 100644 --- a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mmlu/arabic_leaderboard_arabic_mmlu_sociology.yaml +++ b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mmlu/arabic_leaderboard_arabic_mmlu_sociology.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mmlu/arabic_leaderboard_arabic_mmlu_us_foreign_policy.yaml b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mmlu/arabic_leaderboard_arabic_mmlu_us_foreign_policy.yaml index 2b4ce93574..f767e0a78d 100644 --- a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mmlu/arabic_leaderboard_arabic_mmlu_us_foreign_policy.yaml +++ b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mmlu/arabic_leaderboard_arabic_mmlu_us_foreign_policy.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mmlu/arabic_leaderboard_arabic_mmlu_virology.yaml b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mmlu/arabic_leaderboard_arabic_mmlu_virology.yaml index 87ff6e9293..8103face6c 100644 --- a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mmlu/arabic_leaderboard_arabic_mmlu_virology.yaml +++ b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mmlu/arabic_leaderboard_arabic_mmlu_virology.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mmlu/arabic_leaderboard_arabic_mmlu_world_religions.yaml b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mmlu/arabic_leaderboard_arabic_mmlu_world_religions.yaml index b6d4c17405..31c563cc53 100644 --- a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mmlu/arabic_leaderboard_arabic_mmlu_world_religions.yaml +++ b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mmlu/arabic_leaderboard_arabic_mmlu_world_religions.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mmlu/utils.py b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mmlu/utils.py index 16c3017d56..da927b66fc 100644 --- a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mmlu/utils.py +++ b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mmlu/utils.py @@ -1,6 +1,7 @@ import datasets import numpy as np + # fmt: off LETTER_INDICES_AR = ["أ", "ب", "ج", "د", "هـ", "و", "ز", "ح", "ط", "ي", "ك", "ل", "م", "ن", "س", "ع", "ف", "ص", "ق", "ر", "ش", "ت", "ث", "خ", "ذ", "ض", "ظ", "غ"] # fmt: on @@ -21,14 +22,14 @@ def _process_doc(doc): gold_ix = LETTER_INDICES.index(doc["answer"]) query = f"{instruction}{doc['question']}\n" - query += "".join([f"{key}. {choice}\n" for key, choice in zip(LETTER_INDICES_AR[:4], choices)]) + query += "".join( + [ + f"{key}. {choice}\n" + for key, choice in zip(LETTER_INDICES_AR[:4], choices) + ] + ) query += "الإجابة:" - return { - "query": query, - "choices": LETTER_INDICES_AR[:4], - "gold": gold_ix - - } + return {"query": query, "choices": LETTER_INDICES_AR[:4], "gold": gold_ix} - return dataset.map(_process_doc) \ No newline at end of file + return dataset.map(_process_doc) diff --git a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mt_arc_challenge/arabic_leaderboard_arabic_mt_arc_challenge.yaml b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mt_arc_challenge/arabic_leaderboard_arabic_mt_arc_challenge.yaml index be4d0bb316..f49aed0716 100644 --- a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mt_arc_challenge/arabic_leaderboard_arabic_mt_arc_challenge.yaml +++ b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mt_arc_challenge/arabic_leaderboard_arabic_mt_arc_challenge.yaml @@ -10,4 +10,4 @@ aggregate_metric_list: aggregation: mean weight_by_size: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mt_arc_challenge/arabic_mt_arc_challenge.yaml b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mt_arc_challenge/arabic_mt_arc_challenge.yaml index 705712d1c4..e0b245aabb 100644 --- a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mt_arc_challenge/arabic_mt_arc_challenge.yaml +++ b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mt_arc_challenge/arabic_mt_arc_challenge.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mt_arc_challenge/utils.py b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mt_arc_challenge/utils.py index 944d31c13f..62f9874e63 100644 --- a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mt_arc_challenge/utils.py +++ b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mt_arc_challenge/utils.py @@ -2,13 +2,14 @@ import numpy as np - def process_docs(dataset: datasets.Dataset): def _process_doc(doc): question = doc["query"] answer_index = int(doc["label"]) # Dynamically determining the choices by excluding '__few_shots', 'query' and 'label' - choices_keys = [key for key in doc.keys() if key not in ["query", "label", "__few_shots"]] + choices_keys = [ + key for key in doc.keys() if key not in ["query", "label", "__few_shots"] + ] choices = [doc[key] for key in choices_keys] instruction = "الأسئلة التالية هي أسئلة متعددة الإختيارات مع الجواب الصحيح\n\n" @@ -17,9 +18,6 @@ def _process_doc(doc): query += f"{index}) {choice}\n" query += "الإجابة:" - return { - "query": query, - "choices": choices, - "gold": answer_index - } + return {"query": query, "choices": choices, "gold": answer_index} + return dataset.map(_process_doc) diff --git a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mt_arc_easy/arabic_leaderboard_arabic_mt_arc_easy.yaml b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mt_arc_easy/arabic_leaderboard_arabic_mt_arc_easy.yaml index 359865c9cf..6abd5fa21b 100644 --- a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mt_arc_easy/arabic_leaderboard_arabic_mt_arc_easy.yaml +++ b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mt_arc_easy/arabic_leaderboard_arabic_mt_arc_easy.yaml @@ -10,4 +10,4 @@ aggregate_metric_list: aggregation: mean weight_by_size: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mt_arc_easy/arabic_mt_arc_easy.yaml b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mt_arc_easy/arabic_mt_arc_easy.yaml index 8648f1855b..b629529f06 100644 --- a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mt_arc_easy/arabic_mt_arc_easy.yaml +++ b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mt_arc_easy/arabic_mt_arc_easy.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mt_arc_easy/utils.py b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mt_arc_easy/utils.py index 944d31c13f..62f9874e63 100644 --- a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mt_arc_easy/utils.py +++ b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mt_arc_easy/utils.py @@ -2,13 +2,14 @@ import numpy as np - def process_docs(dataset: datasets.Dataset): def _process_doc(doc): question = doc["query"] answer_index = int(doc["label"]) # Dynamically determining the choices by excluding '__few_shots', 'query' and 'label' - choices_keys = [key for key in doc.keys() if key not in ["query", "label", "__few_shots"]] + choices_keys = [ + key for key in doc.keys() if key not in ["query", "label", "__few_shots"] + ] choices = [doc[key] for key in choices_keys] instruction = "الأسئلة التالية هي أسئلة متعددة الإختيارات مع الجواب الصحيح\n\n" @@ -17,9 +18,6 @@ def _process_doc(doc): query += f"{index}) {choice}\n" query += "الإجابة:" - return { - "query": query, - "choices": choices, - "gold": answer_index - } + return {"query": query, "choices": choices, "gold": answer_index} + return dataset.map(_process_doc) diff --git a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mt_boolq/arabic_leaderboard_arabic_mt_boolq.yaml b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mt_boolq/arabic_leaderboard_arabic_mt_boolq.yaml index 6f5e65f6eb..5072f01dd7 100644 --- a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mt_boolq/arabic_leaderboard_arabic_mt_boolq.yaml +++ b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mt_boolq/arabic_leaderboard_arabic_mt_boolq.yaml @@ -10,4 +10,4 @@ aggregate_metric_list: aggregation: mean weight_by_size: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mt_boolq/arabic_mt_boolq.yaml b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mt_boolq/arabic_mt_boolq.yaml index 648c8901f1..299570af81 100644 --- a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mt_boolq/arabic_mt_boolq.yaml +++ b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mt_boolq/arabic_mt_boolq.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mt_boolq/utils.py b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mt_boolq/utils.py index ebbda4c4da..dcbc10d92e 100644 --- a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mt_boolq/utils.py +++ b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mt_boolq/utils.py @@ -6,7 +6,6 @@ def process_docs(dataset: datasets.Dataset): def _process_doc(doc): question = doc["question"] passage = doc["passage"] - answer = "نعم" if doc["answer"] else "لا" instruction = "بناء على المقطع التالي، أجب عن السؤال ب نعم أو لا" query = f"""{instruction} المقطع : @@ -19,6 +18,7 @@ def _process_doc(doc): return { "query": query, "choices": ["نعم", "لا"], - "gold": 0 if doc["answer"] else 1 + "gold": 0 if doc["answer"] else 1, } - return dataset.map(_process_doc) \ No newline at end of file + + return dataset.map(_process_doc) diff --git a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mt_copa/arabic_leaderboard_arabic_mt_copa.yaml b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mt_copa/arabic_leaderboard_arabic_mt_copa.yaml index 232a70fd2d..3ef88d9c37 100644 --- a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mt_copa/arabic_leaderboard_arabic_mt_copa.yaml +++ b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mt_copa/arabic_leaderboard_arabic_mt_copa.yaml @@ -10,4 +10,4 @@ aggregate_metric_list: aggregation: mean weight_by_size: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mt_copa/arabic_mt_copa.yaml b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mt_copa/arabic_mt_copa.yaml index fedaf503fa..e9483e1de5 100644 --- a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mt_copa/arabic_mt_copa.yaml +++ b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mt_copa/arabic_mt_copa.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mt_copa/utils.py b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mt_copa/utils.py index e3de70063f..175ebdadc1 100644 --- a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mt_copa/utils.py +++ b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mt_copa/utils.py @@ -10,11 +10,10 @@ def _process_doc(doc): question = question_map[doc["question"]] answer = doc["label"] - query = "{}، {} :\n0) {}\n1) {}\nالإجابة:".format(premise, question, choices[0], choices[1]) + query = "{}، {} :\n0) {}\n1) {}\nالإجابة:".format( + premise, question, choices[0], choices[1] + ) + + return {"query": query, "choices": choices, "gold": answer} - return { - "query": query, - "choices": choices, - "gold": answer - } return dataset.map(_process_doc) diff --git a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mt_hellaswag/arabic_leaderboard_arabic_mt_hellaswag.yaml b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mt_hellaswag/arabic_leaderboard_arabic_mt_hellaswag.yaml index ebbb4eec96..a70f5ab68d 100644 --- a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mt_hellaswag/arabic_leaderboard_arabic_mt_hellaswag.yaml +++ b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mt_hellaswag/arabic_leaderboard_arabic_mt_hellaswag.yaml @@ -10,4 +10,4 @@ aggregate_metric_list: aggregation: mean weight_by_size: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mt_hellaswag/arabic_mt_hellaswag.yaml b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mt_hellaswag/arabic_mt_hellaswag.yaml index 7c765a20ad..59a4547485 100644 --- a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mt_hellaswag/arabic_mt_hellaswag.yaml +++ b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mt_hellaswag/arabic_mt_hellaswag.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mt_hellaswag/utils.py b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mt_hellaswag/utils.py index 4dce04ddc3..6b5a9f1f4f 100644 --- a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mt_hellaswag/utils.py +++ b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mt_hellaswag/utils.py @@ -1,7 +1,7 @@ -import datasets -import numpy as np import re +import datasets +import numpy as np def process_docs(dataset: datasets.Dataset): @@ -11,7 +11,9 @@ def _process_doc(doc): re.sub(r"\[.*?\]", "", e) for e in eval(doc["endings"]) ] # endings is a string representation of a list answer_index = doc["label"] - instruction = "بناء على السياق التالي، اختر النهاية الصحيحة من الاقتراحات التالية" + instruction = ( + "بناء على السياق التالي، اختر النهاية الصحيحة من الاقتراحات التالية" + ) query = f"""{instruction} السياق: @@ -23,9 +25,6 @@ def _process_doc(doc): query += f"{i}) {ending}\n" query += "الإجابة:" - return { - "query": query, - "choices": endings, - "gold": answer_index - } + return {"query": query, "choices": endings, "gold": answer_index} + return dataset.map(_process_doc) diff --git a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mt_mmlu/arabic_leaderboard_arabic_mt_mmlu.yaml b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mt_mmlu/arabic_leaderboard_arabic_mt_mmlu.yaml index 30a40fdacd..0188b5ddc4 100644 --- a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mt_mmlu/arabic_leaderboard_arabic_mt_mmlu.yaml +++ b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mt_mmlu/arabic_leaderboard_arabic_mt_mmlu.yaml @@ -10,4 +10,4 @@ aggregate_metric_list: aggregation: mean weight_by_size: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mt_mmlu/arabic_mt_mmlu.yaml b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mt_mmlu/arabic_mt_mmlu.yaml index 4f8a473c86..4f3cd249c2 100644 --- a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mt_mmlu/arabic_mt_mmlu.yaml +++ b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mt_mmlu/arabic_mt_mmlu.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mt_mmlu/utils.py b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mt_mmlu/utils.py index c17598d55e..62f9874e63 100644 --- a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mt_mmlu/utils.py +++ b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mt_mmlu/utils.py @@ -2,13 +2,14 @@ import numpy as np - def process_docs(dataset: datasets.Dataset): def _process_doc(doc): question = doc["query"] answer_index = int(doc["label"]) # Dynamically determining the choices by excluding '__few_shots', 'query' and 'label' - choices_keys = [key for key in doc.keys() if key not in ["query", "label", "__few_shots"]] + choices_keys = [ + key for key in doc.keys() if key not in ["query", "label", "__few_shots"] + ] choices = [doc[key] for key in choices_keys] instruction = "الأسئلة التالية هي أسئلة متعددة الإختيارات مع الجواب الصحيح\n\n" @@ -17,11 +18,6 @@ def _process_doc(doc): query += f"{index}) {choice}\n" query += "الإجابة:" - return { - "query": query, - "choices": choices, - "gold": answer_index - - } + return {"query": query, "choices": choices, "gold": answer_index} - return dataset.map(_process_doc) \ No newline at end of file + return dataset.map(_process_doc) diff --git a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mt_openbook_qa/arabic_leaderboard_arabic_mt_openbook_qa.yaml b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mt_openbook_qa/arabic_leaderboard_arabic_mt_openbook_qa.yaml index aae25581ec..dd3b78f4d0 100644 --- a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mt_openbook_qa/arabic_leaderboard_arabic_mt_openbook_qa.yaml +++ b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mt_openbook_qa/arabic_leaderboard_arabic_mt_openbook_qa.yaml @@ -10,4 +10,4 @@ aggregate_metric_list: aggregation: mean weight_by_size: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mt_openbook_qa/arabic_mt_openbook_qa.yaml b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mt_openbook_qa/arabic_mt_openbook_qa.yaml index e02a119503..b826a18927 100644 --- a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mt_openbook_qa/arabic_mt_openbook_qa.yaml +++ b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mt_openbook_qa/arabic_mt_openbook_qa.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mt_openbook_qa/utils.py b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mt_openbook_qa/utils.py index b385956f1b..62f9874e63 100644 --- a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mt_openbook_qa/utils.py +++ b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mt_openbook_qa/utils.py @@ -7,7 +7,9 @@ def _process_doc(doc): question = doc["query"] answer_index = int(doc["label"]) # Dynamically determining the choices by excluding '__few_shots', 'query' and 'label' - choices_keys = [key for key in doc.keys() if key not in ["query", "label", "__few_shots"]] + choices_keys = [ + key for key in doc.keys() if key not in ["query", "label", "__few_shots"] + ] choices = [doc[key] for key in choices_keys] instruction = "الأسئلة التالية هي أسئلة متعددة الإختيارات مع الجواب الصحيح\n\n" @@ -16,9 +18,6 @@ def _process_doc(doc): query += f"{index}) {choice}\n" query += "الإجابة:" - return { - "query": query, - "choices": choices, - "gold": answer_index - } + return {"query": query, "choices": choices, "gold": answer_index} + return dataset.map(_process_doc) diff --git a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mt_piqa/arabic_leaderboard_arabic_mt_piqa.yaml b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mt_piqa/arabic_leaderboard_arabic_mt_piqa.yaml index a4732d9009..b75bcc2b1c 100644 --- a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mt_piqa/arabic_leaderboard_arabic_mt_piqa.yaml +++ b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mt_piqa/arabic_leaderboard_arabic_mt_piqa.yaml @@ -10,4 +10,4 @@ aggregate_metric_list: aggregation: mean weight_by_size: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mt_piqa/arabic_mt_piqa.yaml b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mt_piqa/arabic_mt_piqa.yaml index a557cb856f..fa93a937a8 100644 --- a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mt_piqa/arabic_mt_piqa.yaml +++ b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mt_piqa/arabic_mt_piqa.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mt_piqa/utils.py b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mt_piqa/utils.py index 944d31c13f..62f9874e63 100644 --- a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mt_piqa/utils.py +++ b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mt_piqa/utils.py @@ -2,13 +2,14 @@ import numpy as np - def process_docs(dataset: datasets.Dataset): def _process_doc(doc): question = doc["query"] answer_index = int(doc["label"]) # Dynamically determining the choices by excluding '__few_shots', 'query' and 'label' - choices_keys = [key for key in doc.keys() if key not in ["query", "label", "__few_shots"]] + choices_keys = [ + key for key in doc.keys() if key not in ["query", "label", "__few_shots"] + ] choices = [doc[key] for key in choices_keys] instruction = "الأسئلة التالية هي أسئلة متعددة الإختيارات مع الجواب الصحيح\n\n" @@ -17,9 +18,6 @@ def _process_doc(doc): query += f"{index}) {choice}\n" query += "الإجابة:" - return { - "query": query, - "choices": choices, - "gold": answer_index - } + return {"query": query, "choices": choices, "gold": answer_index} + return dataset.map(_process_doc) diff --git a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mt_race/arabic_leaderboard_arabic_mt_race.yaml b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mt_race/arabic_leaderboard_arabic_mt_race.yaml index c850b7e970..f3f91c278d 100644 --- a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mt_race/arabic_leaderboard_arabic_mt_race.yaml +++ b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mt_race/arabic_leaderboard_arabic_mt_race.yaml @@ -10,4 +10,4 @@ aggregate_metric_list: aggregation: mean weight_by_size: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mt_race/arabic_mt_race.yaml b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mt_race/arabic_mt_race.yaml index 78d756516d..ec2aee6898 100644 --- a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mt_race/arabic_mt_race.yaml +++ b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mt_race/arabic_mt_race.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mt_race/utils.py b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mt_race/utils.py index c17598d55e..62f9874e63 100644 --- a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mt_race/utils.py +++ b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mt_race/utils.py @@ -2,13 +2,14 @@ import numpy as np - def process_docs(dataset: datasets.Dataset): def _process_doc(doc): question = doc["query"] answer_index = int(doc["label"]) # Dynamically determining the choices by excluding '__few_shots', 'query' and 'label' - choices_keys = [key for key in doc.keys() if key not in ["query", "label", "__few_shots"]] + choices_keys = [ + key for key in doc.keys() if key not in ["query", "label", "__few_shots"] + ] choices = [doc[key] for key in choices_keys] instruction = "الأسئلة التالية هي أسئلة متعددة الإختيارات مع الجواب الصحيح\n\n" @@ -17,11 +18,6 @@ def _process_doc(doc): query += f"{index}) {choice}\n" query += "الإجابة:" - return { - "query": query, - "choices": choices, - "gold": answer_index - - } + return {"query": query, "choices": choices, "gold": answer_index} - return dataset.map(_process_doc) \ No newline at end of file + return dataset.map(_process_doc) diff --git a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mt_sciq/arabic_leaderboard_arabic_mt_sciq.yaml b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mt_sciq/arabic_leaderboard_arabic_mt_sciq.yaml index 3625aff56b..7768047c4c 100644 --- a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mt_sciq/arabic_leaderboard_arabic_mt_sciq.yaml +++ b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mt_sciq/arabic_leaderboard_arabic_mt_sciq.yaml @@ -10,4 +10,4 @@ aggregate_metric_list: aggregation: mean weight_by_size: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mt_sciq/arabic_mt_sciq.yaml b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mt_sciq/arabic_mt_sciq.yaml index c2fd248f88..07f96b7574 100644 --- a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mt_sciq/arabic_mt_sciq.yaml +++ b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mt_sciq/arabic_mt_sciq.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mt_sciq/utils.py b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mt_sciq/utils.py index 7783db27c3..ddb42eeb8c 100644 --- a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mt_sciq/utils.py +++ b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mt_sciq/utils.py @@ -1,9 +1,13 @@ -import random +import random + import datasets import numpy as np + def doc_to_text(doc): - instruction = "بناءً على السياق أدناه، اختر الإجابة الصحيحة للسؤال التالي من قائمة الاقتراحات" + instruction = ( + "بناءً على السياق أدناه، اختر الإجابة الصحيحة للسؤال التالي من قائمة الاقتراحات" + ) support = doc["support"] question = doc["question"] query = f"""{instruction} @@ -20,18 +24,18 @@ def doc_to_text(doc): def process_docs(dataset: datasets.Dataset): def _process_doc(doc): correct_answer = doc["correct_answer"] - choices = [doc["distractor1"], doc["distractor2"], doc["distractor3"], correct_answer] + choices = [ + doc["distractor1"], + doc["distractor2"], + doc["distractor3"], + correct_answer, + ] # Shuffle the choices random.shuffle(choices) answer_index = choices.index(correct_answer) - return { - "query": doc_to_text(doc), - "choices": choices, - "gold": answer_index - - } + return {"query": doc_to_text(doc), "choices": choices, "gold": answer_index} - return dataset.map(_process_doc) \ No newline at end of file + return dataset.map(_process_doc) diff --git a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mt_toxigen/arabic_leaderboard_arabic_mt_toxigen.yaml b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mt_toxigen/arabic_leaderboard_arabic_mt_toxigen.yaml index 9266b0e8b8..272166206b 100644 --- a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mt_toxigen/arabic_leaderboard_arabic_mt_toxigen.yaml +++ b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mt_toxigen/arabic_leaderboard_arabic_mt_toxigen.yaml @@ -10,4 +10,4 @@ aggregate_metric_list: aggregation: mean weight_by_size: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mt_toxigen/arabic_mt_toxigen.yaml b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mt_toxigen/arabic_mt_toxigen.yaml index 3e958c50d4..8a140793d7 100644 --- a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mt_toxigen/arabic_mt_toxigen.yaml +++ b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mt_toxigen/arabic_mt_toxigen.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mt_toxigen/utils.py b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mt_toxigen/utils.py index 24a3f54be7..09d311e5a1 100644 --- a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mt_toxigen/utils.py +++ b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mt_toxigen/utils.py @@ -20,4 +20,4 @@ def _process_doc(doc): } return out_doc - return dataset.map(_process_doc) \ No newline at end of file + return dataset.map(_process_doc) diff --git a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_avca/arabic_leaderboard_acva.yaml b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_avca/arabic_leaderboard_acva.yaml index 636954d19d..8e2dab57b0 100644 --- a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_avca/arabic_leaderboard_acva.yaml +++ b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_avca/arabic_leaderboard_acva.yaml @@ -67,4 +67,4 @@ aggregate_metric_list: aggregation: mean weight_by_size: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_avca/arabic_leaderboard_acva_Algeria.yaml b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_avca/arabic_leaderboard_acva_Algeria.yaml index 9958f7ec4d..177161edaa 100644 --- a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_avca/arabic_leaderboard_acva_Algeria.yaml +++ b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_avca/arabic_leaderboard_acva_Algeria.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_avca/arabic_leaderboard_acva_Ancient_Egypt.yaml b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_avca/arabic_leaderboard_acva_Ancient_Egypt.yaml index 5c049e6e8f..ddb5c35555 100644 --- a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_avca/arabic_leaderboard_acva_Ancient_Egypt.yaml +++ b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_avca/arabic_leaderboard_acva_Ancient_Egypt.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_avca/arabic_leaderboard_acva_Arab_Empire.yaml b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_avca/arabic_leaderboard_acva_Arab_Empire.yaml index 06f40c2197..b510de5ab9 100644 --- a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_avca/arabic_leaderboard_acva_Arab_Empire.yaml +++ b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_avca/arabic_leaderboard_acva_Arab_Empire.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_avca/arabic_leaderboard_acva_Arabic_Architecture.yaml b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_avca/arabic_leaderboard_acva_Arabic_Architecture.yaml index e35a023bfb..5dc2c07dee 100644 --- a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_avca/arabic_leaderboard_acva_Arabic_Architecture.yaml +++ b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_avca/arabic_leaderboard_acva_Arabic_Architecture.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_avca/arabic_leaderboard_acva_Arabic_Art.yaml b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_avca/arabic_leaderboard_acva_Arabic_Art.yaml index 872737a478..36f364bc50 100644 --- a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_avca/arabic_leaderboard_acva_Arabic_Art.yaml +++ b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_avca/arabic_leaderboard_acva_Arabic_Art.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_avca/arabic_leaderboard_acva_Arabic_Astronomy.yaml b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_avca/arabic_leaderboard_acva_Arabic_Astronomy.yaml index 2b0477e27a..f90b1c9140 100644 --- a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_avca/arabic_leaderboard_acva_Arabic_Astronomy.yaml +++ b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_avca/arabic_leaderboard_acva_Arabic_Astronomy.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_avca/arabic_leaderboard_acva_Arabic_Calligraphy.yaml b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_avca/arabic_leaderboard_acva_Arabic_Calligraphy.yaml index 3114799d9e..dfdf51878b 100644 --- a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_avca/arabic_leaderboard_acva_Arabic_Calligraphy.yaml +++ b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_avca/arabic_leaderboard_acva_Arabic_Calligraphy.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_avca/arabic_leaderboard_acva_Arabic_Ceremony.yaml b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_avca/arabic_leaderboard_acva_Arabic_Ceremony.yaml index db07161977..c20b4439e2 100644 --- a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_avca/arabic_leaderboard_acva_Arabic_Ceremony.yaml +++ b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_avca/arabic_leaderboard_acva_Arabic_Ceremony.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_avca/arabic_leaderboard_acva_Arabic_Clothing.yaml b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_avca/arabic_leaderboard_acva_Arabic_Clothing.yaml index b4bfec204b..06118034dc 100644 --- a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_avca/arabic_leaderboard_acva_Arabic_Clothing.yaml +++ b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_avca/arabic_leaderboard_acva_Arabic_Clothing.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_avca/arabic_leaderboard_acva_Arabic_Culture.yaml b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_avca/arabic_leaderboard_acva_Arabic_Culture.yaml index 06076550a2..cea33022b4 100644 --- a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_avca/arabic_leaderboard_acva_Arabic_Culture.yaml +++ b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_avca/arabic_leaderboard_acva_Arabic_Culture.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_avca/arabic_leaderboard_acva_Arabic_Food.yaml b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_avca/arabic_leaderboard_acva_Arabic_Food.yaml index f006fca998..cca516c972 100644 --- a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_avca/arabic_leaderboard_acva_Arabic_Food.yaml +++ b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_avca/arabic_leaderboard_acva_Arabic_Food.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_avca/arabic_leaderboard_acva_Arabic_Funeral.yaml b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_avca/arabic_leaderboard_acva_Arabic_Funeral.yaml index 28c7539834..3dd8fbedd9 100644 --- a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_avca/arabic_leaderboard_acva_Arabic_Funeral.yaml +++ b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_avca/arabic_leaderboard_acva_Arabic_Funeral.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_avca/arabic_leaderboard_acva_Arabic_Geography.yaml b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_avca/arabic_leaderboard_acva_Arabic_Geography.yaml index 203140abb5..89aa7361b3 100644 --- a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_avca/arabic_leaderboard_acva_Arabic_Geography.yaml +++ b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_avca/arabic_leaderboard_acva_Arabic_Geography.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_avca/arabic_leaderboard_acva_Arabic_History.yaml b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_avca/arabic_leaderboard_acva_Arabic_History.yaml index 74d89d2b39..776589c07b 100644 --- a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_avca/arabic_leaderboard_acva_Arabic_History.yaml +++ b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_avca/arabic_leaderboard_acva_Arabic_History.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_avca/arabic_leaderboard_acva_Arabic_Language_Origin.yaml b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_avca/arabic_leaderboard_acva_Arabic_Language_Origin.yaml index 46ceabc492..4f0612acaf 100644 --- a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_avca/arabic_leaderboard_acva_Arabic_Language_Origin.yaml +++ b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_avca/arabic_leaderboard_acva_Arabic_Language_Origin.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_avca/arabic_leaderboard_acva_Arabic_Literature.yaml b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_avca/arabic_leaderboard_acva_Arabic_Literature.yaml index 63abc4dec8..0c9198f446 100644 --- a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_avca/arabic_leaderboard_acva_Arabic_Literature.yaml +++ b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_avca/arabic_leaderboard_acva_Arabic_Literature.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_avca/arabic_leaderboard_acva_Arabic_Math.yaml b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_avca/arabic_leaderboard_acva_Arabic_Math.yaml index 63c07b4489..02a3643024 100644 --- a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_avca/arabic_leaderboard_acva_Arabic_Math.yaml +++ b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_avca/arabic_leaderboard_acva_Arabic_Math.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_avca/arabic_leaderboard_acva_Arabic_Medicine.yaml b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_avca/arabic_leaderboard_acva_Arabic_Medicine.yaml index 2539009def..109aae994a 100644 --- a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_avca/arabic_leaderboard_acva_Arabic_Medicine.yaml +++ b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_avca/arabic_leaderboard_acva_Arabic_Medicine.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_avca/arabic_leaderboard_acva_Arabic_Music.yaml b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_avca/arabic_leaderboard_acva_Arabic_Music.yaml index f1581611f0..2559625784 100644 --- a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_avca/arabic_leaderboard_acva_Arabic_Music.yaml +++ b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_avca/arabic_leaderboard_acva_Arabic_Music.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_avca/arabic_leaderboard_acva_Arabic_Ornament.yaml b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_avca/arabic_leaderboard_acva_Arabic_Ornament.yaml index c4b30d5f8f..00311e107e 100644 --- a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_avca/arabic_leaderboard_acva_Arabic_Ornament.yaml +++ b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_avca/arabic_leaderboard_acva_Arabic_Ornament.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_avca/arabic_leaderboard_acva_Arabic_Philosophy.yaml b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_avca/arabic_leaderboard_acva_Arabic_Philosophy.yaml index d2d04795eb..62a570f00c 100644 --- a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_avca/arabic_leaderboard_acva_Arabic_Philosophy.yaml +++ b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_avca/arabic_leaderboard_acva_Arabic_Philosophy.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_avca/arabic_leaderboard_acva_Arabic_Physics_and_Chemistry.yaml b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_avca/arabic_leaderboard_acva_Arabic_Physics_and_Chemistry.yaml index 93829c922b..b1b52096e4 100644 --- a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_avca/arabic_leaderboard_acva_Arabic_Physics_and_Chemistry.yaml +++ b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_avca/arabic_leaderboard_acva_Arabic_Physics_and_Chemistry.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_avca/arabic_leaderboard_acva_Arabic_Wedding.yaml b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_avca/arabic_leaderboard_acva_Arabic_Wedding.yaml index ef58cd933f..21205cfff8 100644 --- a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_avca/arabic_leaderboard_acva_Arabic_Wedding.yaml +++ b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_avca/arabic_leaderboard_acva_Arabic_Wedding.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_avca/arabic_leaderboard_acva_Bahrain.yaml b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_avca/arabic_leaderboard_acva_Bahrain.yaml index 98b8c16a0f..3b2481bc87 100644 --- a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_avca/arabic_leaderboard_acva_Bahrain.yaml +++ b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_avca/arabic_leaderboard_acva_Bahrain.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_avca/arabic_leaderboard_acva_Comoros.yaml b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_avca/arabic_leaderboard_acva_Comoros.yaml index d241a897fa..be4df372c7 100644 --- a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_avca/arabic_leaderboard_acva_Comoros.yaml +++ b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_avca/arabic_leaderboard_acva_Comoros.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_avca/arabic_leaderboard_acva_Egypt_modern.yaml b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_avca/arabic_leaderboard_acva_Egypt_modern.yaml index 07135890b8..26ca2f6e08 100644 --- a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_avca/arabic_leaderboard_acva_Egypt_modern.yaml +++ b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_avca/arabic_leaderboard_acva_Egypt_modern.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_avca/arabic_leaderboard_acva_InfluenceFromAncientEgypt.yaml b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_avca/arabic_leaderboard_acva_InfluenceFromAncientEgypt.yaml index 64598756ce..be300fc869 100644 --- a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_avca/arabic_leaderboard_acva_InfluenceFromAncientEgypt.yaml +++ b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_avca/arabic_leaderboard_acva_InfluenceFromAncientEgypt.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_avca/arabic_leaderboard_acva_InfluenceFromByzantium.yaml b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_avca/arabic_leaderboard_acva_InfluenceFromByzantium.yaml index 52ef299b20..72c86a6247 100644 --- a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_avca/arabic_leaderboard_acva_InfluenceFromByzantium.yaml +++ b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_avca/arabic_leaderboard_acva_InfluenceFromByzantium.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_avca/arabic_leaderboard_acva_InfluenceFromChina.yaml b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_avca/arabic_leaderboard_acva_InfluenceFromChina.yaml index fc04e6ed92..b297642cbe 100644 --- a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_avca/arabic_leaderboard_acva_InfluenceFromChina.yaml +++ b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_avca/arabic_leaderboard_acva_InfluenceFromChina.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_avca/arabic_leaderboard_acva_InfluenceFromGreece.yaml b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_avca/arabic_leaderboard_acva_InfluenceFromGreece.yaml index af473d9f1a..70458ea2d3 100644 --- a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_avca/arabic_leaderboard_acva_InfluenceFromGreece.yaml +++ b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_avca/arabic_leaderboard_acva_InfluenceFromGreece.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_avca/arabic_leaderboard_acva_InfluenceFromIslam.yaml b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_avca/arabic_leaderboard_acva_InfluenceFromIslam.yaml index 153cf2db52..803f33345d 100644 --- a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_avca/arabic_leaderboard_acva_InfluenceFromIslam.yaml +++ b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_avca/arabic_leaderboard_acva_InfluenceFromIslam.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_avca/arabic_leaderboard_acva_InfluenceFromPersia.yaml b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_avca/arabic_leaderboard_acva_InfluenceFromPersia.yaml index 5e317368ec..117ca89079 100644 --- a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_avca/arabic_leaderboard_acva_InfluenceFromPersia.yaml +++ b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_avca/arabic_leaderboard_acva_InfluenceFromPersia.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_avca/arabic_leaderboard_acva_InfluenceFromRome.yaml b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_avca/arabic_leaderboard_acva_InfluenceFromRome.yaml index 248a388a14..1655522e5a 100644 --- a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_avca/arabic_leaderboard_acva_InfluenceFromRome.yaml +++ b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_avca/arabic_leaderboard_acva_InfluenceFromRome.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_avca/arabic_leaderboard_acva_Iraq.yaml b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_avca/arabic_leaderboard_acva_Iraq.yaml index 819afa934a..909c6678c7 100644 --- a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_avca/arabic_leaderboard_acva_Iraq.yaml +++ b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_avca/arabic_leaderboard_acva_Iraq.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_avca/arabic_leaderboard_acva_Islam_Education.yaml b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_avca/arabic_leaderboard_acva_Islam_Education.yaml index 21e18ac9e0..13c1fab2a0 100644 --- a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_avca/arabic_leaderboard_acva_Islam_Education.yaml +++ b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_avca/arabic_leaderboard_acva_Islam_Education.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_avca/arabic_leaderboard_acva_Islam_branches_and_schools.yaml b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_avca/arabic_leaderboard_acva_Islam_branches_and_schools.yaml index 3accd0d6c8..6985b24a74 100644 --- a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_avca/arabic_leaderboard_acva_Islam_branches_and_schools.yaml +++ b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_avca/arabic_leaderboard_acva_Islam_branches_and_schools.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_avca/arabic_leaderboard_acva_Islamic_law_system.yaml b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_avca/arabic_leaderboard_acva_Islamic_law_system.yaml index 20e52014fc..d19a52ba03 100644 --- a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_avca/arabic_leaderboard_acva_Islamic_law_system.yaml +++ b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_avca/arabic_leaderboard_acva_Islamic_law_system.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_avca/arabic_leaderboard_acva_Jordan.yaml b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_avca/arabic_leaderboard_acva_Jordan.yaml index 41522ab6c5..7bff93a94c 100644 --- a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_avca/arabic_leaderboard_acva_Jordan.yaml +++ b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_avca/arabic_leaderboard_acva_Jordan.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_avca/arabic_leaderboard_acva_Kuwait.yaml b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_avca/arabic_leaderboard_acva_Kuwait.yaml index 49418133f3..b1ae77aaa5 100644 --- a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_avca/arabic_leaderboard_acva_Kuwait.yaml +++ b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_avca/arabic_leaderboard_acva_Kuwait.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_avca/arabic_leaderboard_acva_Lebanon.yaml b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_avca/arabic_leaderboard_acva_Lebanon.yaml index 1eb8fbca05..65974b74dc 100644 --- a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_avca/arabic_leaderboard_acva_Lebanon.yaml +++ b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_avca/arabic_leaderboard_acva_Lebanon.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_avca/arabic_leaderboard_acva_Libya.yaml b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_avca/arabic_leaderboard_acva_Libya.yaml index 87c405fcb1..c8b339650c 100644 --- a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_avca/arabic_leaderboard_acva_Libya.yaml +++ b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_avca/arabic_leaderboard_acva_Libya.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_avca/arabic_leaderboard_acva_Mauritania.yaml b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_avca/arabic_leaderboard_acva_Mauritania.yaml index aee859cfb5..1b84074abc 100644 --- a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_avca/arabic_leaderboard_acva_Mauritania.yaml +++ b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_avca/arabic_leaderboard_acva_Mauritania.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_avca/arabic_leaderboard_acva_Mesopotamia_civilization.yaml b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_avca/arabic_leaderboard_acva_Mesopotamia_civilization.yaml index 169d00f782..4218947702 100644 --- a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_avca/arabic_leaderboard_acva_Mesopotamia_civilization.yaml +++ b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_avca/arabic_leaderboard_acva_Mesopotamia_civilization.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_avca/arabic_leaderboard_acva_Morocco.yaml b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_avca/arabic_leaderboard_acva_Morocco.yaml index 9c76b26493..4ed1510bb5 100644 --- a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_avca/arabic_leaderboard_acva_Morocco.yaml +++ b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_avca/arabic_leaderboard_acva_Morocco.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_avca/arabic_leaderboard_acva_Oman.yaml b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_avca/arabic_leaderboard_acva_Oman.yaml index 817a983c11..b534cfb19f 100644 --- a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_avca/arabic_leaderboard_acva_Oman.yaml +++ b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_avca/arabic_leaderboard_acva_Oman.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_avca/arabic_leaderboard_acva_Palestine.yaml b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_avca/arabic_leaderboard_acva_Palestine.yaml index ab3a5711aa..1cb9b56a85 100644 --- a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_avca/arabic_leaderboard_acva_Palestine.yaml +++ b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_avca/arabic_leaderboard_acva_Palestine.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_avca/arabic_leaderboard_acva_Qatar.yaml b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_avca/arabic_leaderboard_acva_Qatar.yaml index 8dacaf82a3..5d5775ccd9 100644 --- a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_avca/arabic_leaderboard_acva_Qatar.yaml +++ b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_avca/arabic_leaderboard_acva_Qatar.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_avca/arabic_leaderboard_acva_Saudi_Arabia.yaml b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_avca/arabic_leaderboard_acva_Saudi_Arabia.yaml index 28ea8d2a5b..5010723661 100644 --- a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_avca/arabic_leaderboard_acva_Saudi_Arabia.yaml +++ b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_avca/arabic_leaderboard_acva_Saudi_Arabia.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_avca/arabic_leaderboard_acva_Somalia.yaml b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_avca/arabic_leaderboard_acva_Somalia.yaml index 3b6d0a6c9d..d40b578221 100644 --- a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_avca/arabic_leaderboard_acva_Somalia.yaml +++ b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_avca/arabic_leaderboard_acva_Somalia.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_avca/arabic_leaderboard_acva_Sudan.yaml b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_avca/arabic_leaderboard_acva_Sudan.yaml index 691380bab1..e7c2f41a3b 100644 --- a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_avca/arabic_leaderboard_acva_Sudan.yaml +++ b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_avca/arabic_leaderboard_acva_Sudan.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_avca/arabic_leaderboard_acva_Syria.yaml b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_avca/arabic_leaderboard_acva_Syria.yaml index 64389847eb..98ebff9fca 100644 --- a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_avca/arabic_leaderboard_acva_Syria.yaml +++ b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_avca/arabic_leaderboard_acva_Syria.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_avca/arabic_leaderboard_acva_Tunisia.yaml b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_avca/arabic_leaderboard_acva_Tunisia.yaml index efc52381e6..d86e428cc3 100644 --- a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_avca/arabic_leaderboard_acva_Tunisia.yaml +++ b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_avca/arabic_leaderboard_acva_Tunisia.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_avca/arabic_leaderboard_acva_United_Arab_Emirates.yaml b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_avca/arabic_leaderboard_acva_United_Arab_Emirates.yaml index fc6a686e1e..f41b625508 100644 --- a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_avca/arabic_leaderboard_acva_United_Arab_Emirates.yaml +++ b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_avca/arabic_leaderboard_acva_United_Arab_Emirates.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_avca/arabic_leaderboard_acva_Yemen.yaml b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_avca/arabic_leaderboard_acva_Yemen.yaml index 78ad4735d9..b239dd514a 100644 --- a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_avca/arabic_leaderboard_acva_Yemen.yaml +++ b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_avca/arabic_leaderboard_acva_Yemen.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_avca/arabic_leaderboard_acva_communication.yaml b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_avca/arabic_leaderboard_acva_communication.yaml index ed125dcc96..beb954efce 100644 --- a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_avca/arabic_leaderboard_acva_communication.yaml +++ b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_avca/arabic_leaderboard_acva_communication.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_avca/arabic_leaderboard_acva_computer_and_phone.yaml b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_avca/arabic_leaderboard_acva_computer_and_phone.yaml index cae5713fcb..888f82af92 100644 --- a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_avca/arabic_leaderboard_acva_computer_and_phone.yaml +++ b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_avca/arabic_leaderboard_acva_computer_and_phone.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_avca/arabic_leaderboard_acva_daily_life.yaml b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_avca/arabic_leaderboard_acva_daily_life.yaml index 1dd668dc61..0b4748a297 100644 --- a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_avca/arabic_leaderboard_acva_daily_life.yaml +++ b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_avca/arabic_leaderboard_acva_daily_life.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_avca/arabic_leaderboard_acva_entertainment.yaml b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_avca/arabic_leaderboard_acva_entertainment.yaml index 4a150147bb..b2adcfb954 100644 --- a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_avca/arabic_leaderboard_acva_entertainment.yaml +++ b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_avca/arabic_leaderboard_acva_entertainment.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_avca/utils.py b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_avca/utils.py index 1cf3a4927e..7e91496f59 100644 --- a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_avca/utils.py +++ b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_avca/utils.py @@ -2,7 +2,6 @@ import numpy as np - def process_docs(dataset: datasets.Dataset): def _process_doc(doc): question = doc["question"] @@ -11,8 +10,7 @@ def _process_doc(doc): return { "query": f"السؤال: {question}\nالإجابة:", "choices": ["صح", "خطأ"], - "gold": ["صح", "خطأ"].index(answer) - + "gold": ["صح", "خطأ"].index(answer), } - return dataset.map(_process_doc) \ No newline at end of file + return dataset.map(_process_doc) diff --git a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_complete.yaml b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_complete.yaml index 4cb1ab0090..c26370157d 100644 --- a/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_complete.yaml +++ b/lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_complete.yaml @@ -23,5 +23,3 @@ aggregate_metric_list: weight_by_size: true metadata: version: 1.0 - - diff --git a/lm_eval/tasks/arabic_leaderboard_light/README.md b/lm_eval/tasks/arabic_leaderboard_light/README.md index 5d76028bf6..199aa2c8da 100644 --- a/lm_eval/tasks/arabic_leaderboard_light/README.md +++ b/lm_eval/tasks/arabic_leaderboard_light/README.md @@ -18,4 +18,3 @@ If other tasks on this dataset are already supported: * [ ] Is the "Main" variant of this task clearly denoted? * [ ] Have you provided a short sentence in a README on what each new variant adds / evaluates? * [ ] Have you noted which, if any, published evaluation setups are matched by this variant? - diff --git a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_alghafa_light/arabic_leaderboard_alghafa_light.yaml b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_alghafa_light/arabic_leaderboard_alghafa_light.yaml index 3e0d5083d5..0ee6a568d9 100644 --- a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_alghafa_light/arabic_leaderboard_alghafa_light.yaml +++ b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_alghafa_light/arabic_leaderboard_alghafa_light.yaml @@ -20,4 +20,4 @@ aggregate_metric_list: aggregation: mean weight_by_size: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_alghafa_light/arabic_leaderboard_alghafa_mcq_exams_test_ar_light.yaml b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_alghafa_light/arabic_leaderboard_alghafa_mcq_exams_test_ar_light.yaml index b64dc33ab6..1fdda36405 100644 --- a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_alghafa_light/arabic_leaderboard_alghafa_mcq_exams_test_ar_light.yaml +++ b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_alghafa_light/arabic_leaderboard_alghafa_mcq_exams_test_ar_light.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_alghafa_light/arabic_leaderboard_alghafa_meta_ar_dialects_light.yaml b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_alghafa_light/arabic_leaderboard_alghafa_meta_ar_dialects_light.yaml index b32b45877f..47af55b86a 100644 --- a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_alghafa_light/arabic_leaderboard_alghafa_meta_ar_dialects_light.yaml +++ b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_alghafa_light/arabic_leaderboard_alghafa_meta_ar_dialects_light.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_alghafa_light/arabic_leaderboard_alghafa_meta_ar_msa_light.yaml b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_alghafa_light/arabic_leaderboard_alghafa_meta_ar_msa_light.yaml index 868acdc394..9a26a2653f 100644 --- a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_alghafa_light/arabic_leaderboard_alghafa_meta_ar_msa_light.yaml +++ b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_alghafa_light/arabic_leaderboard_alghafa_meta_ar_msa_light.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_alghafa_light/arabic_leaderboard_alghafa_multiple_choice_facts_truefalse_balanced_task_light.yaml b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_alghafa_light/arabic_leaderboard_alghafa_multiple_choice_facts_truefalse_balanced_task_light.yaml index 84ebb8aa38..b56ddfee19 100644 --- a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_alghafa_light/arabic_leaderboard_alghafa_multiple_choice_facts_truefalse_balanced_task_light.yaml +++ b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_alghafa_light/arabic_leaderboard_alghafa_multiple_choice_facts_truefalse_balanced_task_light.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_alghafa_light/arabic_leaderboard_alghafa_multiple_choice_grounded_statement_soqal_task_light.yaml b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_alghafa_light/arabic_leaderboard_alghafa_multiple_choice_grounded_statement_soqal_task_light.yaml index 809f7939ad..4d85c68491 100644 --- a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_alghafa_light/arabic_leaderboard_alghafa_multiple_choice_grounded_statement_soqal_task_light.yaml +++ b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_alghafa_light/arabic_leaderboard_alghafa_multiple_choice_grounded_statement_soqal_task_light.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_alghafa_light/arabic_leaderboard_alghafa_multiple_choice_grounded_statement_xglue_mlqa_task_light.yaml b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_alghafa_light/arabic_leaderboard_alghafa_multiple_choice_grounded_statement_xglue_mlqa_task_light.yaml index 8cbdf15c01..e5d8afefea 100644 --- a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_alghafa_light/arabic_leaderboard_alghafa_multiple_choice_grounded_statement_xglue_mlqa_task_light.yaml +++ b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_alghafa_light/arabic_leaderboard_alghafa_multiple_choice_grounded_statement_xglue_mlqa_task_light.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_alghafa_light/arabic_leaderboard_alghafa_multiple_choice_rating_sentiment_no_neutral_task_light.yaml b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_alghafa_light/arabic_leaderboard_alghafa_multiple_choice_rating_sentiment_no_neutral_task_light.yaml index a3372c87b4..21721d2a2d 100644 --- a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_alghafa_light/arabic_leaderboard_alghafa_multiple_choice_rating_sentiment_no_neutral_task_light.yaml +++ b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_alghafa_light/arabic_leaderboard_alghafa_multiple_choice_rating_sentiment_no_neutral_task_light.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_alghafa_light/arabic_leaderboard_alghafa_multiple_choice_rating_sentiment_task_light.yaml b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_alghafa_light/arabic_leaderboard_alghafa_multiple_choice_rating_sentiment_task_light.yaml index 66a1efe3c0..39f72e4d2a 100644 --- a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_alghafa_light/arabic_leaderboard_alghafa_multiple_choice_rating_sentiment_task_light.yaml +++ b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_alghafa_light/arabic_leaderboard_alghafa_multiple_choice_rating_sentiment_task_light.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_alghafa_light/arabic_leaderboard_alghafa_multiple_choice_sentiment_task_light.yaml b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_alghafa_light/arabic_leaderboard_alghafa_multiple_choice_sentiment_task_light.yaml index 33113f12f2..28b0701561 100644 --- a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_alghafa_light/arabic_leaderboard_alghafa_multiple_choice_sentiment_task_light.yaml +++ b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_alghafa_light/arabic_leaderboard_alghafa_multiple_choice_sentiment_task_light.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_alghafa_light/utils.py b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_alghafa_light/utils.py index 47237f3e5d..62f9874e63 100644 --- a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_alghafa_light/utils.py +++ b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_alghafa_light/utils.py @@ -7,7 +7,9 @@ def _process_doc(doc): question = doc["query"] answer_index = int(doc["label"]) # Dynamically determining the choices by excluding '__few_shots', 'query' and 'label' - choices_keys = [key for key in doc.keys() if key not in ["query", "label", "__few_shots"]] + choices_keys = [ + key for key in doc.keys() if key not in ["query", "label", "__few_shots"] + ] choices = [doc[key] for key in choices_keys] instruction = "الأسئلة التالية هي أسئلة متعددة الإختيارات مع الجواب الصحيح\n\n" @@ -16,9 +18,6 @@ def _process_doc(doc): query += f"{index}) {choice}\n" query += "الإجابة:" - return { - "query": query, - "choices": choices, - "gold": answer_index - } - return dataset.map(_process_doc) \ No newline at end of file + return {"query": query, "choices": choices, "gold": answer_index} + + return dataset.map(_process_doc) diff --git a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_exams_light/arabic_exams_light.yaml b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_exams_light/arabic_exams_light.yaml index 8db6903518..2348be4eb3 100644 --- a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_exams_light/arabic_exams_light.yaml +++ b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_exams_light/arabic_exams_light.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_exams_light/arabic_leaderboard_arabic_exams_light.yaml b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_exams_light/arabic_leaderboard_arabic_exams_light.yaml index 962a0f900c..296a47cbb4 100644 --- a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_exams_light/arabic_leaderboard_arabic_exams_light.yaml +++ b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_exams_light/arabic_leaderboard_arabic_exams_light.yaml @@ -10,4 +10,4 @@ aggregate_metric_list: aggregation: mean weight_by_size: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_exams_light/utils.py b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_exams_light/utils.py index 1df1edadfa..72af1c40fe 100644 --- a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_exams_light/utils.py +++ b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_exams_light/utils.py @@ -1,6 +1,7 @@ import datasets import numpy as np + # fmt: off LETTER_INDICES_AR = ["أ", "ب", "ج", "د", "هـ", "و", "ز", "ح", "ط", "ي", "ك", "ل", "م", "ن", "س", "ع", "ف", "ص", "ق", "ر", "ش", "ت", "ث", "خ", "ذ", "ض", "ظ", "غ"] # fmt: on @@ -10,12 +11,15 @@ LETTER_INDICES = ["A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N", "O", "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z"] # fmt: on + def process_docs(dataset: datasets.Dataset): def _process_doc(doc): topic = doc["subject"] question = doc["question"] choices = [doc["A"], doc["B"], doc["C"], doc["D"]] - choices_formatted = [f" {LETTER_INDICES_AR[i]}) {choice}\n" for i, choice in enumerate(choices)] + choices_formatted = [ + f" {LETTER_INDICES_AR[i]}) {choice}\n" for i, choice in enumerate(choices) + ] answer = doc["answer"] answer_index = LETTER_INDICES.index(answer) @@ -24,9 +28,6 @@ def _process_doc(doc): query += "\n".join(choices_formatted) query += "\nالإجابة:" - return { - "query": query, - "choices": LETTER_INDICES_AR[:4], - "gold": answer_index - } - return dataset.map(_process_doc) \ No newline at end of file + return {"query": query, "choices": LETTER_INDICES_AR[:4], "gold": answer_index} + + return dataset.map(_process_doc) diff --git a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mmlu_light/arabic_leaderboard_arabic_mmlu_abstract_algebra_light.yaml b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mmlu_light/arabic_leaderboard_arabic_mmlu_abstract_algebra_light.yaml index 143644244e..dcb59fc361 100644 --- a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mmlu_light/arabic_leaderboard_arabic_mmlu_abstract_algebra_light.yaml +++ b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mmlu_light/arabic_leaderboard_arabic_mmlu_abstract_algebra_light.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mmlu_light/arabic_leaderboard_arabic_mmlu_anatomy_light.yaml b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mmlu_light/arabic_leaderboard_arabic_mmlu_anatomy_light.yaml index bf98a69323..fc77a66dde 100644 --- a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mmlu_light/arabic_leaderboard_arabic_mmlu_anatomy_light.yaml +++ b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mmlu_light/arabic_leaderboard_arabic_mmlu_anatomy_light.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mmlu_light/arabic_leaderboard_arabic_mmlu_astronomy_light.yaml b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mmlu_light/arabic_leaderboard_arabic_mmlu_astronomy_light.yaml index a3318b048f..db4a9b4360 100644 --- a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mmlu_light/arabic_leaderboard_arabic_mmlu_astronomy_light.yaml +++ b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mmlu_light/arabic_leaderboard_arabic_mmlu_astronomy_light.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mmlu_light/arabic_leaderboard_arabic_mmlu_business_ethics_light.yaml b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mmlu_light/arabic_leaderboard_arabic_mmlu_business_ethics_light.yaml index b6edadcab3..a747dbafaf 100644 --- a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mmlu_light/arabic_leaderboard_arabic_mmlu_business_ethics_light.yaml +++ b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mmlu_light/arabic_leaderboard_arabic_mmlu_business_ethics_light.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mmlu_light/arabic_leaderboard_arabic_mmlu_clinical_knowledge_light.yaml b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mmlu_light/arabic_leaderboard_arabic_mmlu_clinical_knowledge_light.yaml index b9588fad92..1296b90cbc 100644 --- a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mmlu_light/arabic_leaderboard_arabic_mmlu_clinical_knowledge_light.yaml +++ b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mmlu_light/arabic_leaderboard_arabic_mmlu_clinical_knowledge_light.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mmlu_light/arabic_leaderboard_arabic_mmlu_college_biology_light.yaml b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mmlu_light/arabic_leaderboard_arabic_mmlu_college_biology_light.yaml index 97bb6a72fa..cbfc804974 100644 --- a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mmlu_light/arabic_leaderboard_arabic_mmlu_college_biology_light.yaml +++ b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mmlu_light/arabic_leaderboard_arabic_mmlu_college_biology_light.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mmlu_light/arabic_leaderboard_arabic_mmlu_college_chemistry_light.yaml b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mmlu_light/arabic_leaderboard_arabic_mmlu_college_chemistry_light.yaml index ee619d32b0..ac0970355b 100644 --- a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mmlu_light/arabic_leaderboard_arabic_mmlu_college_chemistry_light.yaml +++ b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mmlu_light/arabic_leaderboard_arabic_mmlu_college_chemistry_light.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mmlu_light/arabic_leaderboard_arabic_mmlu_college_computer_science_light.yaml b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mmlu_light/arabic_leaderboard_arabic_mmlu_college_computer_science_light.yaml index 1ccaff4cec..361274d64a 100644 --- a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mmlu_light/arabic_leaderboard_arabic_mmlu_college_computer_science_light.yaml +++ b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mmlu_light/arabic_leaderboard_arabic_mmlu_college_computer_science_light.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mmlu_light/arabic_leaderboard_arabic_mmlu_college_mathematics_light.yaml b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mmlu_light/arabic_leaderboard_arabic_mmlu_college_mathematics_light.yaml index 504904f8c6..20e4d6e627 100644 --- a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mmlu_light/arabic_leaderboard_arabic_mmlu_college_mathematics_light.yaml +++ b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mmlu_light/arabic_leaderboard_arabic_mmlu_college_mathematics_light.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mmlu_light/arabic_leaderboard_arabic_mmlu_college_medicine_light.yaml b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mmlu_light/arabic_leaderboard_arabic_mmlu_college_medicine_light.yaml index 8422125744..d854004973 100644 --- a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mmlu_light/arabic_leaderboard_arabic_mmlu_college_medicine_light.yaml +++ b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mmlu_light/arabic_leaderboard_arabic_mmlu_college_medicine_light.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mmlu_light/arabic_leaderboard_arabic_mmlu_college_physics_light.yaml b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mmlu_light/arabic_leaderboard_arabic_mmlu_college_physics_light.yaml index ad377e6507..57e4b55033 100644 --- a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mmlu_light/arabic_leaderboard_arabic_mmlu_college_physics_light.yaml +++ b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mmlu_light/arabic_leaderboard_arabic_mmlu_college_physics_light.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mmlu_light/arabic_leaderboard_arabic_mmlu_computer_security_light.yaml b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mmlu_light/arabic_leaderboard_arabic_mmlu_computer_security_light.yaml index ef9258402b..dd8c01dc6c 100644 --- a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mmlu_light/arabic_leaderboard_arabic_mmlu_computer_security_light.yaml +++ b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mmlu_light/arabic_leaderboard_arabic_mmlu_computer_security_light.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mmlu_light/arabic_leaderboard_arabic_mmlu_conceptual_physics_light.yaml b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mmlu_light/arabic_leaderboard_arabic_mmlu_conceptual_physics_light.yaml index 4e57610131..cffd7ee42d 100644 --- a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mmlu_light/arabic_leaderboard_arabic_mmlu_conceptual_physics_light.yaml +++ b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mmlu_light/arabic_leaderboard_arabic_mmlu_conceptual_physics_light.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mmlu_light/arabic_leaderboard_arabic_mmlu_econometrics_light.yaml b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mmlu_light/arabic_leaderboard_arabic_mmlu_econometrics_light.yaml index c2199e6448..30413feff0 100644 --- a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mmlu_light/arabic_leaderboard_arabic_mmlu_econometrics_light.yaml +++ b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mmlu_light/arabic_leaderboard_arabic_mmlu_econometrics_light.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mmlu_light/arabic_leaderboard_arabic_mmlu_electrical_engineering_light.yaml b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mmlu_light/arabic_leaderboard_arabic_mmlu_electrical_engineering_light.yaml index edcd649413..e60787d675 100644 --- a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mmlu_light/arabic_leaderboard_arabic_mmlu_electrical_engineering_light.yaml +++ b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mmlu_light/arabic_leaderboard_arabic_mmlu_electrical_engineering_light.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mmlu_light/arabic_leaderboard_arabic_mmlu_elementary_mathematics_light.yaml b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mmlu_light/arabic_leaderboard_arabic_mmlu_elementary_mathematics_light.yaml index d0f3b48a3e..571476620a 100644 --- a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mmlu_light/arabic_leaderboard_arabic_mmlu_elementary_mathematics_light.yaml +++ b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mmlu_light/arabic_leaderboard_arabic_mmlu_elementary_mathematics_light.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mmlu_light/arabic_leaderboard_arabic_mmlu_formal_logic_light.yaml b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mmlu_light/arabic_leaderboard_arabic_mmlu_formal_logic_light.yaml index ece5649613..9b2bebf1e5 100644 --- a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mmlu_light/arabic_leaderboard_arabic_mmlu_formal_logic_light.yaml +++ b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mmlu_light/arabic_leaderboard_arabic_mmlu_formal_logic_light.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mmlu_light/arabic_leaderboard_arabic_mmlu_global_facts_light.yaml b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mmlu_light/arabic_leaderboard_arabic_mmlu_global_facts_light.yaml index 7d48ff9884..15c3b34aac 100644 --- a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mmlu_light/arabic_leaderboard_arabic_mmlu_global_facts_light.yaml +++ b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mmlu_light/arabic_leaderboard_arabic_mmlu_global_facts_light.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mmlu_light/arabic_leaderboard_arabic_mmlu_high_school_biology_light.yaml b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mmlu_light/arabic_leaderboard_arabic_mmlu_high_school_biology_light.yaml index bab8a14e97..906c33284d 100644 --- a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mmlu_light/arabic_leaderboard_arabic_mmlu_high_school_biology_light.yaml +++ b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mmlu_light/arabic_leaderboard_arabic_mmlu_high_school_biology_light.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mmlu_light/arabic_leaderboard_arabic_mmlu_high_school_chemistry_light.yaml b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mmlu_light/arabic_leaderboard_arabic_mmlu_high_school_chemistry_light.yaml index 0f51266f03..199f16b093 100644 --- a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mmlu_light/arabic_leaderboard_arabic_mmlu_high_school_chemistry_light.yaml +++ b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mmlu_light/arabic_leaderboard_arabic_mmlu_high_school_chemistry_light.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mmlu_light/arabic_leaderboard_arabic_mmlu_high_school_computer_science_light.yaml b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mmlu_light/arabic_leaderboard_arabic_mmlu_high_school_computer_science_light.yaml index 6ec14ec85a..cb23af53bb 100644 --- a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mmlu_light/arabic_leaderboard_arabic_mmlu_high_school_computer_science_light.yaml +++ b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mmlu_light/arabic_leaderboard_arabic_mmlu_high_school_computer_science_light.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mmlu_light/arabic_leaderboard_arabic_mmlu_high_school_european_history_light.yaml b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mmlu_light/arabic_leaderboard_arabic_mmlu_high_school_european_history_light.yaml index 927654d39c..25a9b46695 100644 --- a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mmlu_light/arabic_leaderboard_arabic_mmlu_high_school_european_history_light.yaml +++ b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mmlu_light/arabic_leaderboard_arabic_mmlu_high_school_european_history_light.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mmlu_light/arabic_leaderboard_arabic_mmlu_high_school_geography_light.yaml b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mmlu_light/arabic_leaderboard_arabic_mmlu_high_school_geography_light.yaml index a40ede3410..f7f39cd2f2 100644 --- a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mmlu_light/arabic_leaderboard_arabic_mmlu_high_school_geography_light.yaml +++ b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mmlu_light/arabic_leaderboard_arabic_mmlu_high_school_geography_light.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mmlu_light/arabic_leaderboard_arabic_mmlu_high_school_government_and_politics_light.yaml b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mmlu_light/arabic_leaderboard_arabic_mmlu_high_school_government_and_politics_light.yaml index 0a40aa8704..dff09d6717 100644 --- a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mmlu_light/arabic_leaderboard_arabic_mmlu_high_school_government_and_politics_light.yaml +++ b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mmlu_light/arabic_leaderboard_arabic_mmlu_high_school_government_and_politics_light.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mmlu_light/arabic_leaderboard_arabic_mmlu_high_school_macroeconomics_light.yaml b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mmlu_light/arabic_leaderboard_arabic_mmlu_high_school_macroeconomics_light.yaml index bacea7fe76..ae42622353 100644 --- a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mmlu_light/arabic_leaderboard_arabic_mmlu_high_school_macroeconomics_light.yaml +++ b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mmlu_light/arabic_leaderboard_arabic_mmlu_high_school_macroeconomics_light.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mmlu_light/arabic_leaderboard_arabic_mmlu_high_school_mathematics_light.yaml b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mmlu_light/arabic_leaderboard_arabic_mmlu_high_school_mathematics_light.yaml index 83155313a1..8adc3d7e93 100644 --- a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mmlu_light/arabic_leaderboard_arabic_mmlu_high_school_mathematics_light.yaml +++ b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mmlu_light/arabic_leaderboard_arabic_mmlu_high_school_mathematics_light.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mmlu_light/arabic_leaderboard_arabic_mmlu_high_school_microeconomics_light.yaml b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mmlu_light/arabic_leaderboard_arabic_mmlu_high_school_microeconomics_light.yaml index 77aacb7e4f..6eec39237b 100644 --- a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mmlu_light/arabic_leaderboard_arabic_mmlu_high_school_microeconomics_light.yaml +++ b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mmlu_light/arabic_leaderboard_arabic_mmlu_high_school_microeconomics_light.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mmlu_light/arabic_leaderboard_arabic_mmlu_high_school_physics_light.yaml b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mmlu_light/arabic_leaderboard_arabic_mmlu_high_school_physics_light.yaml index 17279d3079..973bd1ffc5 100644 --- a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mmlu_light/arabic_leaderboard_arabic_mmlu_high_school_physics_light.yaml +++ b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mmlu_light/arabic_leaderboard_arabic_mmlu_high_school_physics_light.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mmlu_light/arabic_leaderboard_arabic_mmlu_high_school_psychology_light.yaml b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mmlu_light/arabic_leaderboard_arabic_mmlu_high_school_psychology_light.yaml index 47012025cc..614dd7e89d 100644 --- a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mmlu_light/arabic_leaderboard_arabic_mmlu_high_school_psychology_light.yaml +++ b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mmlu_light/arabic_leaderboard_arabic_mmlu_high_school_psychology_light.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mmlu_light/arabic_leaderboard_arabic_mmlu_high_school_statistics_light.yaml b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mmlu_light/arabic_leaderboard_arabic_mmlu_high_school_statistics_light.yaml index c7e079456a..2db9f196a3 100644 --- a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mmlu_light/arabic_leaderboard_arabic_mmlu_high_school_statistics_light.yaml +++ b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mmlu_light/arabic_leaderboard_arabic_mmlu_high_school_statistics_light.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mmlu_light/arabic_leaderboard_arabic_mmlu_high_school_us_history_light.yaml b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mmlu_light/arabic_leaderboard_arabic_mmlu_high_school_us_history_light.yaml index be9c261140..5411e8c479 100644 --- a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mmlu_light/arabic_leaderboard_arabic_mmlu_high_school_us_history_light.yaml +++ b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mmlu_light/arabic_leaderboard_arabic_mmlu_high_school_us_history_light.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mmlu_light/arabic_leaderboard_arabic_mmlu_high_school_world_history_light.yaml b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mmlu_light/arabic_leaderboard_arabic_mmlu_high_school_world_history_light.yaml index be8020a2a5..319c49b22b 100644 --- a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mmlu_light/arabic_leaderboard_arabic_mmlu_high_school_world_history_light.yaml +++ b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mmlu_light/arabic_leaderboard_arabic_mmlu_high_school_world_history_light.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mmlu_light/arabic_leaderboard_arabic_mmlu_human_aging_light.yaml b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mmlu_light/arabic_leaderboard_arabic_mmlu_human_aging_light.yaml index ec88f24965..afd2eefa29 100644 --- a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mmlu_light/arabic_leaderboard_arabic_mmlu_human_aging_light.yaml +++ b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mmlu_light/arabic_leaderboard_arabic_mmlu_human_aging_light.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mmlu_light/arabic_leaderboard_arabic_mmlu_human_sexuality_light.yaml b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mmlu_light/arabic_leaderboard_arabic_mmlu_human_sexuality_light.yaml index e7f452eea4..9e245f2687 100644 --- a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mmlu_light/arabic_leaderboard_arabic_mmlu_human_sexuality_light.yaml +++ b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mmlu_light/arabic_leaderboard_arabic_mmlu_human_sexuality_light.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mmlu_light/arabic_leaderboard_arabic_mmlu_international_law_light.yaml b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mmlu_light/arabic_leaderboard_arabic_mmlu_international_law_light.yaml index 581e26d2df..6e476bb879 100644 --- a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mmlu_light/arabic_leaderboard_arabic_mmlu_international_law_light.yaml +++ b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mmlu_light/arabic_leaderboard_arabic_mmlu_international_law_light.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mmlu_light/arabic_leaderboard_arabic_mmlu_jurisprudence_light.yaml b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mmlu_light/arabic_leaderboard_arabic_mmlu_jurisprudence_light.yaml index c326dd2700..1d848cd173 100644 --- a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mmlu_light/arabic_leaderboard_arabic_mmlu_jurisprudence_light.yaml +++ b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mmlu_light/arabic_leaderboard_arabic_mmlu_jurisprudence_light.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mmlu_light/arabic_leaderboard_arabic_mmlu_light.yaml b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mmlu_light/arabic_leaderboard_arabic_mmlu_light.yaml index c8925a5aee..130713702c 100644 --- a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mmlu_light/arabic_leaderboard_arabic_mmlu_light.yaml +++ b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mmlu_light/arabic_leaderboard_arabic_mmlu_light.yaml @@ -65,4 +65,4 @@ aggregate_metric_list: aggregation: mean weight_by_size: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mmlu_light/arabic_leaderboard_arabic_mmlu_logical_fallacies_light.yaml b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mmlu_light/arabic_leaderboard_arabic_mmlu_logical_fallacies_light.yaml index 1ed4da565e..866420ba28 100644 --- a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mmlu_light/arabic_leaderboard_arabic_mmlu_logical_fallacies_light.yaml +++ b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mmlu_light/arabic_leaderboard_arabic_mmlu_logical_fallacies_light.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mmlu_light/arabic_leaderboard_arabic_mmlu_machine_learning_light.yaml b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mmlu_light/arabic_leaderboard_arabic_mmlu_machine_learning_light.yaml index 065139ba13..01ed181e01 100644 --- a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mmlu_light/arabic_leaderboard_arabic_mmlu_machine_learning_light.yaml +++ b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mmlu_light/arabic_leaderboard_arabic_mmlu_machine_learning_light.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mmlu_light/arabic_leaderboard_arabic_mmlu_management_light.yaml b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mmlu_light/arabic_leaderboard_arabic_mmlu_management_light.yaml index ce9a2fe13f..62d7e32ab0 100644 --- a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mmlu_light/arabic_leaderboard_arabic_mmlu_management_light.yaml +++ b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mmlu_light/arabic_leaderboard_arabic_mmlu_management_light.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mmlu_light/arabic_leaderboard_arabic_mmlu_marketing_light.yaml b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mmlu_light/arabic_leaderboard_arabic_mmlu_marketing_light.yaml index 0e7e330b78..c42f7a177b 100644 --- a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mmlu_light/arabic_leaderboard_arabic_mmlu_marketing_light.yaml +++ b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mmlu_light/arabic_leaderboard_arabic_mmlu_marketing_light.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mmlu_light/arabic_leaderboard_arabic_mmlu_medical_genetics_light.yaml b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mmlu_light/arabic_leaderboard_arabic_mmlu_medical_genetics_light.yaml index af6ec0904e..40d0d88326 100644 --- a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mmlu_light/arabic_leaderboard_arabic_mmlu_medical_genetics_light.yaml +++ b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mmlu_light/arabic_leaderboard_arabic_mmlu_medical_genetics_light.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mmlu_light/arabic_leaderboard_arabic_mmlu_miscellaneous_light.yaml b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mmlu_light/arabic_leaderboard_arabic_mmlu_miscellaneous_light.yaml index 4e3b384385..06bc6a4715 100644 --- a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mmlu_light/arabic_leaderboard_arabic_mmlu_miscellaneous_light.yaml +++ b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mmlu_light/arabic_leaderboard_arabic_mmlu_miscellaneous_light.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mmlu_light/arabic_leaderboard_arabic_mmlu_moral_disputes_light.yaml b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mmlu_light/arabic_leaderboard_arabic_mmlu_moral_disputes_light.yaml index 9ff3848221..be0c60e631 100644 --- a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mmlu_light/arabic_leaderboard_arabic_mmlu_moral_disputes_light.yaml +++ b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mmlu_light/arabic_leaderboard_arabic_mmlu_moral_disputes_light.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mmlu_light/arabic_leaderboard_arabic_mmlu_moral_scenarios_light.yaml b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mmlu_light/arabic_leaderboard_arabic_mmlu_moral_scenarios_light.yaml index be8e8363b4..08e71366d1 100644 --- a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mmlu_light/arabic_leaderboard_arabic_mmlu_moral_scenarios_light.yaml +++ b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mmlu_light/arabic_leaderboard_arabic_mmlu_moral_scenarios_light.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mmlu_light/arabic_leaderboard_arabic_mmlu_nutrition_light.yaml b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mmlu_light/arabic_leaderboard_arabic_mmlu_nutrition_light.yaml index 5e7dbbea60..7987f5f36c 100644 --- a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mmlu_light/arabic_leaderboard_arabic_mmlu_nutrition_light.yaml +++ b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mmlu_light/arabic_leaderboard_arabic_mmlu_nutrition_light.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mmlu_light/arabic_leaderboard_arabic_mmlu_philosophy_light.yaml b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mmlu_light/arabic_leaderboard_arabic_mmlu_philosophy_light.yaml index 8c6ade7f8f..85ebdd7a4f 100644 --- a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mmlu_light/arabic_leaderboard_arabic_mmlu_philosophy_light.yaml +++ b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mmlu_light/arabic_leaderboard_arabic_mmlu_philosophy_light.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mmlu_light/arabic_leaderboard_arabic_mmlu_prehistory_light.yaml b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mmlu_light/arabic_leaderboard_arabic_mmlu_prehistory_light.yaml index 7accf1eb3b..24aa8e22fe 100644 --- a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mmlu_light/arabic_leaderboard_arabic_mmlu_prehistory_light.yaml +++ b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mmlu_light/arabic_leaderboard_arabic_mmlu_prehistory_light.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mmlu_light/arabic_leaderboard_arabic_mmlu_professional_accounting_light.yaml b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mmlu_light/arabic_leaderboard_arabic_mmlu_professional_accounting_light.yaml index 1632db2368..1dc009663c 100644 --- a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mmlu_light/arabic_leaderboard_arabic_mmlu_professional_accounting_light.yaml +++ b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mmlu_light/arabic_leaderboard_arabic_mmlu_professional_accounting_light.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mmlu_light/arabic_leaderboard_arabic_mmlu_professional_law_light.yaml b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mmlu_light/arabic_leaderboard_arabic_mmlu_professional_law_light.yaml index 8de8808d18..6e8c3617db 100644 --- a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mmlu_light/arabic_leaderboard_arabic_mmlu_professional_law_light.yaml +++ b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mmlu_light/arabic_leaderboard_arabic_mmlu_professional_law_light.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mmlu_light/arabic_leaderboard_arabic_mmlu_professional_medicine_light.yaml b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mmlu_light/arabic_leaderboard_arabic_mmlu_professional_medicine_light.yaml index 0cfde74f74..b90cdb38d8 100644 --- a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mmlu_light/arabic_leaderboard_arabic_mmlu_professional_medicine_light.yaml +++ b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mmlu_light/arabic_leaderboard_arabic_mmlu_professional_medicine_light.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mmlu_light/arabic_leaderboard_arabic_mmlu_professional_psychology_light.yaml b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mmlu_light/arabic_leaderboard_arabic_mmlu_professional_psychology_light.yaml index 51c5d37bae..420a536243 100644 --- a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mmlu_light/arabic_leaderboard_arabic_mmlu_professional_psychology_light.yaml +++ b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mmlu_light/arabic_leaderboard_arabic_mmlu_professional_psychology_light.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mmlu_light/arabic_leaderboard_arabic_mmlu_public_relations_light.yaml b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mmlu_light/arabic_leaderboard_arabic_mmlu_public_relations_light.yaml index fd52903ed8..83d267bc08 100644 --- a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mmlu_light/arabic_leaderboard_arabic_mmlu_public_relations_light.yaml +++ b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mmlu_light/arabic_leaderboard_arabic_mmlu_public_relations_light.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mmlu_light/arabic_leaderboard_arabic_mmlu_security_studies_light.yaml b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mmlu_light/arabic_leaderboard_arabic_mmlu_security_studies_light.yaml index 55bc16a3e6..03e05d66e7 100644 --- a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mmlu_light/arabic_leaderboard_arabic_mmlu_security_studies_light.yaml +++ b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mmlu_light/arabic_leaderboard_arabic_mmlu_security_studies_light.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mmlu_light/arabic_leaderboard_arabic_mmlu_sociology_light.yaml b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mmlu_light/arabic_leaderboard_arabic_mmlu_sociology_light.yaml index 0471d009b2..7deb088396 100644 --- a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mmlu_light/arabic_leaderboard_arabic_mmlu_sociology_light.yaml +++ b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mmlu_light/arabic_leaderboard_arabic_mmlu_sociology_light.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mmlu_light/arabic_leaderboard_arabic_mmlu_us_foreign_policy_light.yaml b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mmlu_light/arabic_leaderboard_arabic_mmlu_us_foreign_policy_light.yaml index 54c4be9845..6c5f40a55e 100644 --- a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mmlu_light/arabic_leaderboard_arabic_mmlu_us_foreign_policy_light.yaml +++ b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mmlu_light/arabic_leaderboard_arabic_mmlu_us_foreign_policy_light.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mmlu_light/arabic_leaderboard_arabic_mmlu_virology_light.yaml b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mmlu_light/arabic_leaderboard_arabic_mmlu_virology_light.yaml index 94a359e5a8..5ee4a7c95b 100644 --- a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mmlu_light/arabic_leaderboard_arabic_mmlu_virology_light.yaml +++ b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mmlu_light/arabic_leaderboard_arabic_mmlu_virology_light.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mmlu_light/arabic_leaderboard_arabic_mmlu_world_religions_light.yaml b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mmlu_light/arabic_leaderboard_arabic_mmlu_world_religions_light.yaml index 20874b0db9..57b13f05b8 100644 --- a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mmlu_light/arabic_leaderboard_arabic_mmlu_world_religions_light.yaml +++ b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mmlu_light/arabic_leaderboard_arabic_mmlu_world_religions_light.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mmlu_light/utils.py b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mmlu_light/utils.py index 16c3017d56..da927b66fc 100644 --- a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mmlu_light/utils.py +++ b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mmlu_light/utils.py @@ -1,6 +1,7 @@ import datasets import numpy as np + # fmt: off LETTER_INDICES_AR = ["أ", "ب", "ج", "د", "هـ", "و", "ز", "ح", "ط", "ي", "ك", "ل", "م", "ن", "س", "ع", "ف", "ص", "ق", "ر", "ش", "ت", "ث", "خ", "ذ", "ض", "ظ", "غ"] # fmt: on @@ -21,14 +22,14 @@ def _process_doc(doc): gold_ix = LETTER_INDICES.index(doc["answer"]) query = f"{instruction}{doc['question']}\n" - query += "".join([f"{key}. {choice}\n" for key, choice in zip(LETTER_INDICES_AR[:4], choices)]) + query += "".join( + [ + f"{key}. {choice}\n" + for key, choice in zip(LETTER_INDICES_AR[:4], choices) + ] + ) query += "الإجابة:" - return { - "query": query, - "choices": LETTER_INDICES_AR[:4], - "gold": gold_ix - - } + return {"query": query, "choices": LETTER_INDICES_AR[:4], "gold": gold_ix} - return dataset.map(_process_doc) \ No newline at end of file + return dataset.map(_process_doc) diff --git a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mt_arc_challenge_light/arabic_leaderboard_arabic_mt_arc_challenge_light.yaml b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mt_arc_challenge_light/arabic_leaderboard_arabic_mt_arc_challenge_light.yaml index 7018c3ed85..a88bd6bd9e 100644 --- a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mt_arc_challenge_light/arabic_leaderboard_arabic_mt_arc_challenge_light.yaml +++ b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mt_arc_challenge_light/arabic_leaderboard_arabic_mt_arc_challenge_light.yaml @@ -10,4 +10,4 @@ aggregate_metric_list: aggregation: mean weight_by_size: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mt_arc_challenge_light/arabic_mt_arc_challenge_light.yaml b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mt_arc_challenge_light/arabic_mt_arc_challenge_light.yaml index f6d3eab9b3..e6b299e846 100644 --- a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mt_arc_challenge_light/arabic_mt_arc_challenge_light.yaml +++ b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mt_arc_challenge_light/arabic_mt_arc_challenge_light.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mt_arc_challenge_light/utils.py b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mt_arc_challenge_light/utils.py index 944d31c13f..62f9874e63 100644 --- a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mt_arc_challenge_light/utils.py +++ b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mt_arc_challenge_light/utils.py @@ -2,13 +2,14 @@ import numpy as np - def process_docs(dataset: datasets.Dataset): def _process_doc(doc): question = doc["query"] answer_index = int(doc["label"]) # Dynamically determining the choices by excluding '__few_shots', 'query' and 'label' - choices_keys = [key for key in doc.keys() if key not in ["query", "label", "__few_shots"]] + choices_keys = [ + key for key in doc.keys() if key not in ["query", "label", "__few_shots"] + ] choices = [doc[key] for key in choices_keys] instruction = "الأسئلة التالية هي أسئلة متعددة الإختيارات مع الجواب الصحيح\n\n" @@ -17,9 +18,6 @@ def _process_doc(doc): query += f"{index}) {choice}\n" query += "الإجابة:" - return { - "query": query, - "choices": choices, - "gold": answer_index - } + return {"query": query, "choices": choices, "gold": answer_index} + return dataset.map(_process_doc) diff --git a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mt_arc_easy_light/arabic_leaderboard_arabic_mt_arc_easy_light.yaml b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mt_arc_easy_light/arabic_leaderboard_arabic_mt_arc_easy_light.yaml index ce668ff32c..618b542952 100644 --- a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mt_arc_easy_light/arabic_leaderboard_arabic_mt_arc_easy_light.yaml +++ b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mt_arc_easy_light/arabic_leaderboard_arabic_mt_arc_easy_light.yaml @@ -10,4 +10,4 @@ aggregate_metric_list: aggregation: mean weight_by_size: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mt_arc_easy_light/arabic_mt_arc_easy_light.yaml b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mt_arc_easy_light/arabic_mt_arc_easy_light.yaml index 9802d9faf4..90252fb31d 100644 --- a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mt_arc_easy_light/arabic_mt_arc_easy_light.yaml +++ b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mt_arc_easy_light/arabic_mt_arc_easy_light.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mt_arc_easy_light/utils.py b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mt_arc_easy_light/utils.py index 944d31c13f..62f9874e63 100644 --- a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mt_arc_easy_light/utils.py +++ b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mt_arc_easy_light/utils.py @@ -2,13 +2,14 @@ import numpy as np - def process_docs(dataset: datasets.Dataset): def _process_doc(doc): question = doc["query"] answer_index = int(doc["label"]) # Dynamically determining the choices by excluding '__few_shots', 'query' and 'label' - choices_keys = [key for key in doc.keys() if key not in ["query", "label", "__few_shots"]] + choices_keys = [ + key for key in doc.keys() if key not in ["query", "label", "__few_shots"] + ] choices = [doc[key] for key in choices_keys] instruction = "الأسئلة التالية هي أسئلة متعددة الإختيارات مع الجواب الصحيح\n\n" @@ -17,9 +18,6 @@ def _process_doc(doc): query += f"{index}) {choice}\n" query += "الإجابة:" - return { - "query": query, - "choices": choices, - "gold": answer_index - } + return {"query": query, "choices": choices, "gold": answer_index} + return dataset.map(_process_doc) diff --git a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mt_boolq_light/arabic_leaderboard_arabic_mt_boolq_light.yaml b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mt_boolq_light/arabic_leaderboard_arabic_mt_boolq_light.yaml index ada9d91535..ee02f9cbc9 100644 --- a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mt_boolq_light/arabic_leaderboard_arabic_mt_boolq_light.yaml +++ b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mt_boolq_light/arabic_leaderboard_arabic_mt_boolq_light.yaml @@ -10,4 +10,4 @@ aggregate_metric_list: aggregation: mean weight_by_size: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mt_boolq_light/arabic_mt_boolq_light.yaml b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mt_boolq_light/arabic_mt_boolq_light.yaml index 814d61c9a8..4bdd145ce6 100644 --- a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mt_boolq_light/arabic_mt_boolq_light.yaml +++ b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mt_boolq_light/arabic_mt_boolq_light.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mt_boolq_light/utils.py b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mt_boolq_light/utils.py index ebbda4c4da..dcbc10d92e 100644 --- a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mt_boolq_light/utils.py +++ b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mt_boolq_light/utils.py @@ -6,7 +6,6 @@ def process_docs(dataset: datasets.Dataset): def _process_doc(doc): question = doc["question"] passage = doc["passage"] - answer = "نعم" if doc["answer"] else "لا" instruction = "بناء على المقطع التالي، أجب عن السؤال ب نعم أو لا" query = f"""{instruction} المقطع : @@ -19,6 +18,7 @@ def _process_doc(doc): return { "query": query, "choices": ["نعم", "لا"], - "gold": 0 if doc["answer"] else 1 + "gold": 0 if doc["answer"] else 1, } - return dataset.map(_process_doc) \ No newline at end of file + + return dataset.map(_process_doc) diff --git a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mt_copa_light/arabic_mt_copa_light.yaml b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mt_copa_light/arabic_mt_copa_light.yaml index aa28b0779c..0ca475e735 100644 --- a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mt_copa_light/arabic_mt_copa_light.yaml +++ b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mt_copa_light/arabic_mt_copa_light.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mt_copa_light/arbic_leaderboard_arabic_mt_copa_light.yaml b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mt_copa_light/arbic_leaderboard_arabic_mt_copa_light.yaml index f99e54623f..f3ea35bc50 100644 --- a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mt_copa_light/arbic_leaderboard_arabic_mt_copa_light.yaml +++ b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mt_copa_light/arbic_leaderboard_arabic_mt_copa_light.yaml @@ -10,4 +10,4 @@ aggregate_metric_list: aggregation: mean weight_by_size: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mt_copa_light/utils.py b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mt_copa_light/utils.py index e3de70063f..175ebdadc1 100644 --- a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mt_copa_light/utils.py +++ b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mt_copa_light/utils.py @@ -10,11 +10,10 @@ def _process_doc(doc): question = question_map[doc["question"]] answer = doc["label"] - query = "{}، {} :\n0) {}\n1) {}\nالإجابة:".format(premise, question, choices[0], choices[1]) + query = "{}، {} :\n0) {}\n1) {}\nالإجابة:".format( + premise, question, choices[0], choices[1] + ) + + return {"query": query, "choices": choices, "gold": answer} - return { - "query": query, - "choices": choices, - "gold": answer - } return dataset.map(_process_doc) diff --git a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mt_hellaswag_light/arabic_leaderboard_arabic_mt_hellaswag_light.yaml b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mt_hellaswag_light/arabic_leaderboard_arabic_mt_hellaswag_light.yaml index 3b29292ae4..0f44bbbc75 100644 --- a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mt_hellaswag_light/arabic_leaderboard_arabic_mt_hellaswag_light.yaml +++ b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mt_hellaswag_light/arabic_leaderboard_arabic_mt_hellaswag_light.yaml @@ -10,4 +10,4 @@ aggregate_metric_list: aggregation: mean weight_by_size: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mt_hellaswag_light/arabic_mt_hellaswag_light.yaml b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mt_hellaswag_light/arabic_mt_hellaswag_light.yaml index 47e43c46ea..56ea04f248 100644 --- a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mt_hellaswag_light/arabic_mt_hellaswag_light.yaml +++ b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mt_hellaswag_light/arabic_mt_hellaswag_light.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mt_hellaswag_light/utils.py b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mt_hellaswag_light/utils.py index 4dce04ddc3..6b5a9f1f4f 100644 --- a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mt_hellaswag_light/utils.py +++ b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mt_hellaswag_light/utils.py @@ -1,7 +1,7 @@ -import datasets -import numpy as np import re +import datasets +import numpy as np def process_docs(dataset: datasets.Dataset): @@ -11,7 +11,9 @@ def _process_doc(doc): re.sub(r"\[.*?\]", "", e) for e in eval(doc["endings"]) ] # endings is a string representation of a list answer_index = doc["label"] - instruction = "بناء على السياق التالي، اختر النهاية الصحيحة من الاقتراحات التالية" + instruction = ( + "بناء على السياق التالي، اختر النهاية الصحيحة من الاقتراحات التالية" + ) query = f"""{instruction} السياق: @@ -23,9 +25,6 @@ def _process_doc(doc): query += f"{i}) {ending}\n" query += "الإجابة:" - return { - "query": query, - "choices": endings, - "gold": answer_index - } + return {"query": query, "choices": endings, "gold": answer_index} + return dataset.map(_process_doc) diff --git a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mt_mmlu_light/arabic_leaderboard_arabic_mt_mmlu_light.yaml b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mt_mmlu_light/arabic_leaderboard_arabic_mt_mmlu_light.yaml index f45f1bb95a..b95ca1b531 100644 --- a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mt_mmlu_light/arabic_leaderboard_arabic_mt_mmlu_light.yaml +++ b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mt_mmlu_light/arabic_leaderboard_arabic_mt_mmlu_light.yaml @@ -10,4 +10,4 @@ aggregate_metric_list: aggregation: mean weight_by_size: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mt_mmlu_light/arabic_mt_mmlu_light.yaml b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mt_mmlu_light/arabic_mt_mmlu_light.yaml index c29a77dbba..43084db30b 100644 --- a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mt_mmlu_light/arabic_mt_mmlu_light.yaml +++ b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mt_mmlu_light/arabic_mt_mmlu_light.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mt_mmlu_light/utils.py b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mt_mmlu_light/utils.py index c17598d55e..62f9874e63 100644 --- a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mt_mmlu_light/utils.py +++ b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mt_mmlu_light/utils.py @@ -2,13 +2,14 @@ import numpy as np - def process_docs(dataset: datasets.Dataset): def _process_doc(doc): question = doc["query"] answer_index = int(doc["label"]) # Dynamically determining the choices by excluding '__few_shots', 'query' and 'label' - choices_keys = [key for key in doc.keys() if key not in ["query", "label", "__few_shots"]] + choices_keys = [ + key for key in doc.keys() if key not in ["query", "label", "__few_shots"] + ] choices = [doc[key] for key in choices_keys] instruction = "الأسئلة التالية هي أسئلة متعددة الإختيارات مع الجواب الصحيح\n\n" @@ -17,11 +18,6 @@ def _process_doc(doc): query += f"{index}) {choice}\n" query += "الإجابة:" - return { - "query": query, - "choices": choices, - "gold": answer_index - - } + return {"query": query, "choices": choices, "gold": answer_index} - return dataset.map(_process_doc) \ No newline at end of file + return dataset.map(_process_doc) diff --git a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mt_openbook_qa_light/arabic_leaderboard_arabic_mt_openbook_qa_light.yaml b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mt_openbook_qa_light/arabic_leaderboard_arabic_mt_openbook_qa_light.yaml index 8c369131bc..3737f621fb 100644 --- a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mt_openbook_qa_light/arabic_leaderboard_arabic_mt_openbook_qa_light.yaml +++ b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mt_openbook_qa_light/arabic_leaderboard_arabic_mt_openbook_qa_light.yaml @@ -10,4 +10,4 @@ aggregate_metric_list: aggregation: mean weight_by_size: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mt_openbook_qa_light/arabic_mt_openbook_qa_light.yaml b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mt_openbook_qa_light/arabic_mt_openbook_qa_light.yaml index 243c0230f4..5e914fbd32 100644 --- a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mt_openbook_qa_light/arabic_mt_openbook_qa_light.yaml +++ b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mt_openbook_qa_light/arabic_mt_openbook_qa_light.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mt_openbook_qa_light/utils.py b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mt_openbook_qa_light/utils.py index b385956f1b..62f9874e63 100644 --- a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mt_openbook_qa_light/utils.py +++ b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mt_openbook_qa_light/utils.py @@ -7,7 +7,9 @@ def _process_doc(doc): question = doc["query"] answer_index = int(doc["label"]) # Dynamically determining the choices by excluding '__few_shots', 'query' and 'label' - choices_keys = [key for key in doc.keys() if key not in ["query", "label", "__few_shots"]] + choices_keys = [ + key for key in doc.keys() if key not in ["query", "label", "__few_shots"] + ] choices = [doc[key] for key in choices_keys] instruction = "الأسئلة التالية هي أسئلة متعددة الإختيارات مع الجواب الصحيح\n\n" @@ -16,9 +18,6 @@ def _process_doc(doc): query += f"{index}) {choice}\n" query += "الإجابة:" - return { - "query": query, - "choices": choices, - "gold": answer_index - } + return {"query": query, "choices": choices, "gold": answer_index} + return dataset.map(_process_doc) diff --git a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mt_piqa_light/arabic_leaderboard_arabic_mt_piqa_light.yaml b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mt_piqa_light/arabic_leaderboard_arabic_mt_piqa_light.yaml index f31c70c038..642b2e0a60 100644 --- a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mt_piqa_light/arabic_leaderboard_arabic_mt_piqa_light.yaml +++ b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mt_piqa_light/arabic_leaderboard_arabic_mt_piqa_light.yaml @@ -10,4 +10,4 @@ aggregate_metric_list: aggregation: mean weight_by_size: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mt_piqa_light/arabic_mt_piqa_light.yaml b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mt_piqa_light/arabic_mt_piqa_light.yaml index 152cf79a78..4dd9e005a9 100644 --- a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mt_piqa_light/arabic_mt_piqa_light.yaml +++ b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mt_piqa_light/arabic_mt_piqa_light.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mt_piqa_light/utils.py b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mt_piqa_light/utils.py index 944d31c13f..62f9874e63 100644 --- a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mt_piqa_light/utils.py +++ b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mt_piqa_light/utils.py @@ -2,13 +2,14 @@ import numpy as np - def process_docs(dataset: datasets.Dataset): def _process_doc(doc): question = doc["query"] answer_index = int(doc["label"]) # Dynamically determining the choices by excluding '__few_shots', 'query' and 'label' - choices_keys = [key for key in doc.keys() if key not in ["query", "label", "__few_shots"]] + choices_keys = [ + key for key in doc.keys() if key not in ["query", "label", "__few_shots"] + ] choices = [doc[key] for key in choices_keys] instruction = "الأسئلة التالية هي أسئلة متعددة الإختيارات مع الجواب الصحيح\n\n" @@ -17,9 +18,6 @@ def _process_doc(doc): query += f"{index}) {choice}\n" query += "الإجابة:" - return { - "query": query, - "choices": choices, - "gold": answer_index - } + return {"query": query, "choices": choices, "gold": answer_index} + return dataset.map(_process_doc) diff --git a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mt_race_light/arabic_leaderboard_arabic_mt_race_light.yaml b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mt_race_light/arabic_leaderboard_arabic_mt_race_light.yaml index 4b30d49c09..8f427484d1 100644 --- a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mt_race_light/arabic_leaderboard_arabic_mt_race_light.yaml +++ b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mt_race_light/arabic_leaderboard_arabic_mt_race_light.yaml @@ -10,4 +10,4 @@ aggregate_metric_list: aggregation: mean weight_by_size: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mt_race_light/arabic_mt_race_light.yaml b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mt_race_light/arabic_mt_race_light.yaml index 8606ee4a8f..fed452cce6 100644 --- a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mt_race_light/arabic_mt_race_light.yaml +++ b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mt_race_light/arabic_mt_race_light.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mt_race_light/utils.py b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mt_race_light/utils.py index c17598d55e..62f9874e63 100644 --- a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mt_race_light/utils.py +++ b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mt_race_light/utils.py @@ -2,13 +2,14 @@ import numpy as np - def process_docs(dataset: datasets.Dataset): def _process_doc(doc): question = doc["query"] answer_index = int(doc["label"]) # Dynamically determining the choices by excluding '__few_shots', 'query' and 'label' - choices_keys = [key for key in doc.keys() if key not in ["query", "label", "__few_shots"]] + choices_keys = [ + key for key in doc.keys() if key not in ["query", "label", "__few_shots"] + ] choices = [doc[key] for key in choices_keys] instruction = "الأسئلة التالية هي أسئلة متعددة الإختيارات مع الجواب الصحيح\n\n" @@ -17,11 +18,6 @@ def _process_doc(doc): query += f"{index}) {choice}\n" query += "الإجابة:" - return { - "query": query, - "choices": choices, - "gold": answer_index - - } + return {"query": query, "choices": choices, "gold": answer_index} - return dataset.map(_process_doc) \ No newline at end of file + return dataset.map(_process_doc) diff --git a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mt_sciq_light/arabic_leaderboard_arabic_mt_sciq_light.yaml b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mt_sciq_light/arabic_leaderboard_arabic_mt_sciq_light.yaml index be53d6c4c7..13127e9915 100644 --- a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mt_sciq_light/arabic_leaderboard_arabic_mt_sciq_light.yaml +++ b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mt_sciq_light/arabic_leaderboard_arabic_mt_sciq_light.yaml @@ -10,4 +10,4 @@ aggregate_metric_list: aggregation: mean weight_by_size: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mt_sciq_light/arabic_mt_sciq_light.yaml b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mt_sciq_light/arabic_mt_sciq_light.yaml index b22a0c7e81..95976cbb7f 100644 --- a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mt_sciq_light/arabic_mt_sciq_light.yaml +++ b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mt_sciq_light/arabic_mt_sciq_light.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mt_sciq_light/utils.py b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mt_sciq_light/utils.py index 7783db27c3..ddb42eeb8c 100644 --- a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mt_sciq_light/utils.py +++ b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mt_sciq_light/utils.py @@ -1,9 +1,13 @@ -import random +import random + import datasets import numpy as np + def doc_to_text(doc): - instruction = "بناءً على السياق أدناه، اختر الإجابة الصحيحة للسؤال التالي من قائمة الاقتراحات" + instruction = ( + "بناءً على السياق أدناه، اختر الإجابة الصحيحة للسؤال التالي من قائمة الاقتراحات" + ) support = doc["support"] question = doc["question"] query = f"""{instruction} @@ -20,18 +24,18 @@ def doc_to_text(doc): def process_docs(dataset: datasets.Dataset): def _process_doc(doc): correct_answer = doc["correct_answer"] - choices = [doc["distractor1"], doc["distractor2"], doc["distractor3"], correct_answer] + choices = [ + doc["distractor1"], + doc["distractor2"], + doc["distractor3"], + correct_answer, + ] # Shuffle the choices random.shuffle(choices) answer_index = choices.index(correct_answer) - return { - "query": doc_to_text(doc), - "choices": choices, - "gold": answer_index - - } + return {"query": doc_to_text(doc), "choices": choices, "gold": answer_index} - return dataset.map(_process_doc) \ No newline at end of file + return dataset.map(_process_doc) diff --git a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mt_toxigen_light/arabic_leaderboard_arabic_mt_toxigen_light.yaml b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mt_toxigen_light/arabic_leaderboard_arabic_mt_toxigen_light.yaml index 0462e3c7ce..7e305d5496 100644 --- a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mt_toxigen_light/arabic_leaderboard_arabic_mt_toxigen_light.yaml +++ b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mt_toxigen_light/arabic_leaderboard_arabic_mt_toxigen_light.yaml @@ -10,4 +10,4 @@ aggregate_metric_list: aggregation: mean weight_by_size: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mt_toxigen_light/arabic_mt_toxigen_light.yaml b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mt_toxigen_light/arabic_mt_toxigen_light.yaml index defcb571c6..b2bef8abae 100644 --- a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mt_toxigen_light/arabic_mt_toxigen_light.yaml +++ b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mt_toxigen_light/arabic_mt_toxigen_light.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mt_toxigen_light/utils.py b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mt_toxigen_light/utils.py index 24a3f54be7..09d311e5a1 100644 --- a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mt_toxigen_light/utils.py +++ b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mt_toxigen_light/utils.py @@ -20,4 +20,4 @@ def _process_doc(doc): } return out_doc - return dataset.map(_process_doc) \ No newline at end of file + return dataset.map(_process_doc) diff --git a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_avca_light/arabic_leaderboard_acva_Algeria_light.yaml b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_avca_light/arabic_leaderboard_acva_Algeria_light.yaml index bf12b9b49f..4ab4634f60 100644 --- a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_avca_light/arabic_leaderboard_acva_Algeria_light.yaml +++ b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_avca_light/arabic_leaderboard_acva_Algeria_light.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_avca_light/arabic_leaderboard_acva_Ancient_Egypt_light.yaml b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_avca_light/arabic_leaderboard_acva_Ancient_Egypt_light.yaml index dddd942564..ab6fffedc1 100644 --- a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_avca_light/arabic_leaderboard_acva_Ancient_Egypt_light.yaml +++ b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_avca_light/arabic_leaderboard_acva_Ancient_Egypt_light.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_avca_light/arabic_leaderboard_acva_Arab_Empire_light.yaml b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_avca_light/arabic_leaderboard_acva_Arab_Empire_light.yaml index 308fde09e1..886574ebf2 100644 --- a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_avca_light/arabic_leaderboard_acva_Arab_Empire_light.yaml +++ b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_avca_light/arabic_leaderboard_acva_Arab_Empire_light.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_avca_light/arabic_leaderboard_acva_Arabic_Architecture_light.yaml b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_avca_light/arabic_leaderboard_acva_Arabic_Architecture_light.yaml index fe649b363e..e57472ad6e 100644 --- a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_avca_light/arabic_leaderboard_acva_Arabic_Architecture_light.yaml +++ b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_avca_light/arabic_leaderboard_acva_Arabic_Architecture_light.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_avca_light/arabic_leaderboard_acva_Arabic_Art_light.yaml b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_avca_light/arabic_leaderboard_acva_Arabic_Art_light.yaml index acd4ba01d2..e94340e755 100644 --- a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_avca_light/arabic_leaderboard_acva_Arabic_Art_light.yaml +++ b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_avca_light/arabic_leaderboard_acva_Arabic_Art_light.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_avca_light/arabic_leaderboard_acva_Arabic_Astronomy_light.yaml b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_avca_light/arabic_leaderboard_acva_Arabic_Astronomy_light.yaml index 0ac71ebc51..e8ed990d52 100644 --- a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_avca_light/arabic_leaderboard_acva_Arabic_Astronomy_light.yaml +++ b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_avca_light/arabic_leaderboard_acva_Arabic_Astronomy_light.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_avca_light/arabic_leaderboard_acva_Arabic_Calligraphy_light.yaml b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_avca_light/arabic_leaderboard_acva_Arabic_Calligraphy_light.yaml index 3b92d25b50..cd41bdde6a 100644 --- a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_avca_light/arabic_leaderboard_acva_Arabic_Calligraphy_light.yaml +++ b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_avca_light/arabic_leaderboard_acva_Arabic_Calligraphy_light.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_avca_light/arabic_leaderboard_acva_Arabic_Ceremony_light.yaml b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_avca_light/arabic_leaderboard_acva_Arabic_Ceremony_light.yaml index 74310472df..72c6705479 100644 --- a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_avca_light/arabic_leaderboard_acva_Arabic_Ceremony_light.yaml +++ b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_avca_light/arabic_leaderboard_acva_Arabic_Ceremony_light.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_avca_light/arabic_leaderboard_acva_Arabic_Clothing_light.yaml b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_avca_light/arabic_leaderboard_acva_Arabic_Clothing_light.yaml index 2e8d314420..9348de07f7 100644 --- a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_avca_light/arabic_leaderboard_acva_Arabic_Clothing_light.yaml +++ b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_avca_light/arabic_leaderboard_acva_Arabic_Clothing_light.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_avca_light/arabic_leaderboard_acva_Arabic_Culture_light.yaml b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_avca_light/arabic_leaderboard_acva_Arabic_Culture_light.yaml index ef918c2be2..4f211064d6 100644 --- a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_avca_light/arabic_leaderboard_acva_Arabic_Culture_light.yaml +++ b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_avca_light/arabic_leaderboard_acva_Arabic_Culture_light.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_avca_light/arabic_leaderboard_acva_Arabic_Food_light.yaml b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_avca_light/arabic_leaderboard_acva_Arabic_Food_light.yaml index af6b95bf3d..7ccef6746f 100644 --- a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_avca_light/arabic_leaderboard_acva_Arabic_Food_light.yaml +++ b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_avca_light/arabic_leaderboard_acva_Arabic_Food_light.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_avca_light/arabic_leaderboard_acva_Arabic_Funeral_light.yaml b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_avca_light/arabic_leaderboard_acva_Arabic_Funeral_light.yaml index 8297f5ca9e..941154787b 100644 --- a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_avca_light/arabic_leaderboard_acva_Arabic_Funeral_light.yaml +++ b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_avca_light/arabic_leaderboard_acva_Arabic_Funeral_light.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_avca_light/arabic_leaderboard_acva_Arabic_Geography_light.yaml b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_avca_light/arabic_leaderboard_acva_Arabic_Geography_light.yaml index 1fbcaff9b6..36221d8899 100644 --- a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_avca_light/arabic_leaderboard_acva_Arabic_Geography_light.yaml +++ b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_avca_light/arabic_leaderboard_acva_Arabic_Geography_light.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_avca_light/arabic_leaderboard_acva_Arabic_History_light.yaml b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_avca_light/arabic_leaderboard_acva_Arabic_History_light.yaml index 1ab5525037..2e12831816 100644 --- a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_avca_light/arabic_leaderboard_acva_Arabic_History_light.yaml +++ b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_avca_light/arabic_leaderboard_acva_Arabic_History_light.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_avca_light/arabic_leaderboard_acva_Arabic_Language_Origin_light.yaml b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_avca_light/arabic_leaderboard_acva_Arabic_Language_Origin_light.yaml index 132edff3eb..8060604355 100644 --- a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_avca_light/arabic_leaderboard_acva_Arabic_Language_Origin_light.yaml +++ b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_avca_light/arabic_leaderboard_acva_Arabic_Language_Origin_light.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_avca_light/arabic_leaderboard_acva_Arabic_Literature_light.yaml b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_avca_light/arabic_leaderboard_acva_Arabic_Literature_light.yaml index bf720a74bd..3122e39531 100644 --- a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_avca_light/arabic_leaderboard_acva_Arabic_Literature_light.yaml +++ b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_avca_light/arabic_leaderboard_acva_Arabic_Literature_light.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_avca_light/arabic_leaderboard_acva_Arabic_Math_light.yaml b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_avca_light/arabic_leaderboard_acva_Arabic_Math_light.yaml index 915d273a53..0182aedac7 100644 --- a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_avca_light/arabic_leaderboard_acva_Arabic_Math_light.yaml +++ b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_avca_light/arabic_leaderboard_acva_Arabic_Math_light.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_avca_light/arabic_leaderboard_acva_Arabic_Medicine_light.yaml b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_avca_light/arabic_leaderboard_acva_Arabic_Medicine_light.yaml index 741dca470b..aec88febf1 100644 --- a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_avca_light/arabic_leaderboard_acva_Arabic_Medicine_light.yaml +++ b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_avca_light/arabic_leaderboard_acva_Arabic_Medicine_light.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_avca_light/arabic_leaderboard_acva_Arabic_Music_light.yaml b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_avca_light/arabic_leaderboard_acva_Arabic_Music_light.yaml index c91b787d65..35a771898a 100644 --- a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_avca_light/arabic_leaderboard_acva_Arabic_Music_light.yaml +++ b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_avca_light/arabic_leaderboard_acva_Arabic_Music_light.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_avca_light/arabic_leaderboard_acva_Arabic_Ornament_light.yaml b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_avca_light/arabic_leaderboard_acva_Arabic_Ornament_light.yaml index bd40dc0921..6b31186cd6 100644 --- a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_avca_light/arabic_leaderboard_acva_Arabic_Ornament_light.yaml +++ b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_avca_light/arabic_leaderboard_acva_Arabic_Ornament_light.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_avca_light/arabic_leaderboard_acva_Arabic_Philosophy_light.yaml b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_avca_light/arabic_leaderboard_acva_Arabic_Philosophy_light.yaml index 96e58fd29a..f6b5fa71f1 100644 --- a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_avca_light/arabic_leaderboard_acva_Arabic_Philosophy_light.yaml +++ b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_avca_light/arabic_leaderboard_acva_Arabic_Philosophy_light.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_avca_light/arabic_leaderboard_acva_Arabic_Physics_and_Chemistry_light.yaml b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_avca_light/arabic_leaderboard_acva_Arabic_Physics_and_Chemistry_light.yaml index 7021841b88..559d729c9b 100644 --- a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_avca_light/arabic_leaderboard_acva_Arabic_Physics_and_Chemistry_light.yaml +++ b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_avca_light/arabic_leaderboard_acva_Arabic_Physics_and_Chemistry_light.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_avca_light/arabic_leaderboard_acva_Arabic_Wedding_light.yaml b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_avca_light/arabic_leaderboard_acva_Arabic_Wedding_light.yaml index 35bbf4f1a6..9241709c13 100644 --- a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_avca_light/arabic_leaderboard_acva_Arabic_Wedding_light.yaml +++ b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_avca_light/arabic_leaderboard_acva_Arabic_Wedding_light.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_avca_light/arabic_leaderboard_acva_Bahrain_light.yaml b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_avca_light/arabic_leaderboard_acva_Bahrain_light.yaml index f049120d66..b9c7cef57a 100644 --- a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_avca_light/arabic_leaderboard_acva_Bahrain_light.yaml +++ b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_avca_light/arabic_leaderboard_acva_Bahrain_light.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_avca_light/arabic_leaderboard_acva_Comoros_light.yaml b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_avca_light/arabic_leaderboard_acva_Comoros_light.yaml index 9a5c6f6f3e..1f74bd46c5 100644 --- a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_avca_light/arabic_leaderboard_acva_Comoros_light.yaml +++ b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_avca_light/arabic_leaderboard_acva_Comoros_light.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_avca_light/arabic_leaderboard_acva_Egypt_modern_light.yaml b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_avca_light/arabic_leaderboard_acva_Egypt_modern_light.yaml index 3b60028705..e0b19cff58 100644 --- a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_avca_light/arabic_leaderboard_acva_Egypt_modern_light.yaml +++ b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_avca_light/arabic_leaderboard_acva_Egypt_modern_light.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_avca_light/arabic_leaderboard_acva_InfluenceFromAncientEgypt_light.yaml b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_avca_light/arabic_leaderboard_acva_InfluenceFromAncientEgypt_light.yaml index 103ce6981a..6cf755a2a8 100644 --- a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_avca_light/arabic_leaderboard_acva_InfluenceFromAncientEgypt_light.yaml +++ b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_avca_light/arabic_leaderboard_acva_InfluenceFromAncientEgypt_light.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_avca_light/arabic_leaderboard_acva_InfluenceFromByzantium_light.yaml b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_avca_light/arabic_leaderboard_acva_InfluenceFromByzantium_light.yaml index 1ca7c7009d..8fe285eb12 100644 --- a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_avca_light/arabic_leaderboard_acva_InfluenceFromByzantium_light.yaml +++ b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_avca_light/arabic_leaderboard_acva_InfluenceFromByzantium_light.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_avca_light/arabic_leaderboard_acva_InfluenceFromChina_light.yaml b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_avca_light/arabic_leaderboard_acva_InfluenceFromChina_light.yaml index 679001412b..bb028b0892 100644 --- a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_avca_light/arabic_leaderboard_acva_InfluenceFromChina_light.yaml +++ b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_avca_light/arabic_leaderboard_acva_InfluenceFromChina_light.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_avca_light/arabic_leaderboard_acva_InfluenceFromGreece_light.yaml b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_avca_light/arabic_leaderboard_acva_InfluenceFromGreece_light.yaml index a54c15ffe5..25060acc1a 100644 --- a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_avca_light/arabic_leaderboard_acva_InfluenceFromGreece_light.yaml +++ b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_avca_light/arabic_leaderboard_acva_InfluenceFromGreece_light.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_avca_light/arabic_leaderboard_acva_InfluenceFromIslam_light.yaml b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_avca_light/arabic_leaderboard_acva_InfluenceFromIslam_light.yaml index 2cba9d419c..0a60a2f3f0 100644 --- a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_avca_light/arabic_leaderboard_acva_InfluenceFromIslam_light.yaml +++ b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_avca_light/arabic_leaderboard_acva_InfluenceFromIslam_light.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_avca_light/arabic_leaderboard_acva_InfluenceFromPersia_light.yaml b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_avca_light/arabic_leaderboard_acva_InfluenceFromPersia_light.yaml index 5275844b28..7081bec227 100644 --- a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_avca_light/arabic_leaderboard_acva_InfluenceFromPersia_light.yaml +++ b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_avca_light/arabic_leaderboard_acva_InfluenceFromPersia_light.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_avca_light/arabic_leaderboard_acva_InfluenceFromRome_light.yaml b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_avca_light/arabic_leaderboard_acva_InfluenceFromRome_light.yaml index dd7f4dd6b7..8c64cf3bbe 100644 --- a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_avca_light/arabic_leaderboard_acva_InfluenceFromRome_light.yaml +++ b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_avca_light/arabic_leaderboard_acva_InfluenceFromRome_light.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_avca_light/arabic_leaderboard_acva_Iraq_light.yaml b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_avca_light/arabic_leaderboard_acva_Iraq_light.yaml index 7636a65d10..a056a9cf04 100644 --- a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_avca_light/arabic_leaderboard_acva_Iraq_light.yaml +++ b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_avca_light/arabic_leaderboard_acva_Iraq_light.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_avca_light/arabic_leaderboard_acva_Islam_Education_light.yaml b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_avca_light/arabic_leaderboard_acva_Islam_Education_light.yaml index 355d430fc4..e8f6ad45d9 100644 --- a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_avca_light/arabic_leaderboard_acva_Islam_Education_light.yaml +++ b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_avca_light/arabic_leaderboard_acva_Islam_Education_light.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_avca_light/arabic_leaderboard_acva_Islam_branches_and_schools_light.yaml b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_avca_light/arabic_leaderboard_acva_Islam_branches_and_schools_light.yaml index d856b0225f..98137e9a3a 100644 --- a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_avca_light/arabic_leaderboard_acva_Islam_branches_and_schools_light.yaml +++ b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_avca_light/arabic_leaderboard_acva_Islam_branches_and_schools_light.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_avca_light/arabic_leaderboard_acva_Islamic_law_system_light.yaml b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_avca_light/arabic_leaderboard_acva_Islamic_law_system_light.yaml index 4f1c168299..d9aff345da 100644 --- a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_avca_light/arabic_leaderboard_acva_Islamic_law_system_light.yaml +++ b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_avca_light/arabic_leaderboard_acva_Islamic_law_system_light.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_avca_light/arabic_leaderboard_acva_Jordan_light.yaml b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_avca_light/arabic_leaderboard_acva_Jordan_light.yaml index 07b1789b16..674a998e01 100644 --- a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_avca_light/arabic_leaderboard_acva_Jordan_light.yaml +++ b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_avca_light/arabic_leaderboard_acva_Jordan_light.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_avca_light/arabic_leaderboard_acva_Kuwait_light.yaml b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_avca_light/arabic_leaderboard_acva_Kuwait_light.yaml index 01d0caa884..0c3d372d9e 100644 --- a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_avca_light/arabic_leaderboard_acva_Kuwait_light.yaml +++ b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_avca_light/arabic_leaderboard_acva_Kuwait_light.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_avca_light/arabic_leaderboard_acva_Lebanon_light.yaml b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_avca_light/arabic_leaderboard_acva_Lebanon_light.yaml index 6ce6ba60e5..9c3856d698 100644 --- a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_avca_light/arabic_leaderboard_acva_Lebanon_light.yaml +++ b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_avca_light/arabic_leaderboard_acva_Lebanon_light.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_avca_light/arabic_leaderboard_acva_Libya_light.yaml b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_avca_light/arabic_leaderboard_acva_Libya_light.yaml index 088ed26749..6070ccbfb8 100644 --- a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_avca_light/arabic_leaderboard_acva_Libya_light.yaml +++ b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_avca_light/arabic_leaderboard_acva_Libya_light.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_avca_light/arabic_leaderboard_acva_Mauritania_light.yaml b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_avca_light/arabic_leaderboard_acva_Mauritania_light.yaml index 7afe2db8f1..0b1deda614 100644 --- a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_avca_light/arabic_leaderboard_acva_Mauritania_light.yaml +++ b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_avca_light/arabic_leaderboard_acva_Mauritania_light.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_avca_light/arabic_leaderboard_acva_Mesopotamia_civilization_light.yaml b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_avca_light/arabic_leaderboard_acva_Mesopotamia_civilization_light.yaml index cd1c09d4d0..65474b724b 100644 --- a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_avca_light/arabic_leaderboard_acva_Mesopotamia_civilization_light.yaml +++ b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_avca_light/arabic_leaderboard_acva_Mesopotamia_civilization_light.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_avca_light/arabic_leaderboard_acva_Morocco_light.yaml b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_avca_light/arabic_leaderboard_acva_Morocco_light.yaml index 799680333f..d752434a5a 100644 --- a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_avca_light/arabic_leaderboard_acva_Morocco_light.yaml +++ b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_avca_light/arabic_leaderboard_acva_Morocco_light.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_avca_light/arabic_leaderboard_acva_Oman_light.yaml b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_avca_light/arabic_leaderboard_acva_Oman_light.yaml index f4a2ecd897..448498f4a1 100644 --- a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_avca_light/arabic_leaderboard_acva_Oman_light.yaml +++ b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_avca_light/arabic_leaderboard_acva_Oman_light.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_avca_light/arabic_leaderboard_acva_Palestine_light.yaml b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_avca_light/arabic_leaderboard_acva_Palestine_light.yaml index 96c43e2bd1..a619c460a1 100644 --- a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_avca_light/arabic_leaderboard_acva_Palestine_light.yaml +++ b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_avca_light/arabic_leaderboard_acva_Palestine_light.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_avca_light/arabic_leaderboard_acva_Qatar_light.yaml b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_avca_light/arabic_leaderboard_acva_Qatar_light.yaml index 865a4a29f2..967dbc57ef 100644 --- a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_avca_light/arabic_leaderboard_acva_Qatar_light.yaml +++ b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_avca_light/arabic_leaderboard_acva_Qatar_light.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_avca_light/arabic_leaderboard_acva_Saudi_Arabia_light.yaml b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_avca_light/arabic_leaderboard_acva_Saudi_Arabia_light.yaml index 986deff4a4..d45558b9ff 100644 --- a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_avca_light/arabic_leaderboard_acva_Saudi_Arabia_light.yaml +++ b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_avca_light/arabic_leaderboard_acva_Saudi_Arabia_light.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_avca_light/arabic_leaderboard_acva_Somalia_light.yaml b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_avca_light/arabic_leaderboard_acva_Somalia_light.yaml index 2d7c4490b8..558ea176a3 100644 --- a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_avca_light/arabic_leaderboard_acva_Somalia_light.yaml +++ b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_avca_light/arabic_leaderboard_acva_Somalia_light.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_avca_light/arabic_leaderboard_acva_Sudan_light.yaml b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_avca_light/arabic_leaderboard_acva_Sudan_light.yaml index 9f01cc5b85..ce59973306 100644 --- a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_avca_light/arabic_leaderboard_acva_Sudan_light.yaml +++ b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_avca_light/arabic_leaderboard_acva_Sudan_light.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_avca_light/arabic_leaderboard_acva_Syria_light.yaml b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_avca_light/arabic_leaderboard_acva_Syria_light.yaml index 2a37f429f9..8b0bd7aebc 100644 --- a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_avca_light/arabic_leaderboard_acva_Syria_light.yaml +++ b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_avca_light/arabic_leaderboard_acva_Syria_light.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_avca_light/arabic_leaderboard_acva_Tunisia_light.yaml b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_avca_light/arabic_leaderboard_acva_Tunisia_light.yaml index c614335e48..a53c5e0bf9 100644 --- a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_avca_light/arabic_leaderboard_acva_Tunisia_light.yaml +++ b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_avca_light/arabic_leaderboard_acva_Tunisia_light.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_avca_light/arabic_leaderboard_acva_United_Arab_Emirates_light.yaml b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_avca_light/arabic_leaderboard_acva_United_Arab_Emirates_light.yaml index 17eddf36a5..1ce5993a67 100644 --- a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_avca_light/arabic_leaderboard_acva_United_Arab_Emirates_light.yaml +++ b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_avca_light/arabic_leaderboard_acva_United_Arab_Emirates_light.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_avca_light/arabic_leaderboard_acva_Yemen_light.yaml b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_avca_light/arabic_leaderboard_acva_Yemen_light.yaml index d62fd84c75..e480b19d60 100644 --- a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_avca_light/arabic_leaderboard_acva_Yemen_light.yaml +++ b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_avca_light/arabic_leaderboard_acva_Yemen_light.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_avca_light/arabic_leaderboard_acva_communication_light.yaml b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_avca_light/arabic_leaderboard_acva_communication_light.yaml index 65aaaa4bd3..2814278ace 100644 --- a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_avca_light/arabic_leaderboard_acva_communication_light.yaml +++ b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_avca_light/arabic_leaderboard_acva_communication_light.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_avca_light/arabic_leaderboard_acva_computer_and_phone_light.yaml b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_avca_light/arabic_leaderboard_acva_computer_and_phone_light.yaml index 6b9257d120..ddd07e3f50 100644 --- a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_avca_light/arabic_leaderboard_acva_computer_and_phone_light.yaml +++ b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_avca_light/arabic_leaderboard_acva_computer_and_phone_light.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_avca_light/arabic_leaderboard_acva_daily_life_light.yaml b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_avca_light/arabic_leaderboard_acva_daily_life_light.yaml index e25b0b4a15..2d975e4e85 100644 --- a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_avca_light/arabic_leaderboard_acva_daily_life_light.yaml +++ b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_avca_light/arabic_leaderboard_acva_daily_life_light.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_avca_light/arabic_leaderboard_acva_entertainment_light.yaml b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_avca_light/arabic_leaderboard_acva_entertainment_light.yaml index 06cc268e9a..721e6cdd3b 100644 --- a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_avca_light/arabic_leaderboard_acva_entertainment_light.yaml +++ b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_avca_light/arabic_leaderboard_acva_entertainment_light.yaml @@ -20,4 +20,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_avca_light/arabic_leaderboard_acva_light.yaml b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_avca_light/arabic_leaderboard_acva_light.yaml index b900e78554..ea4a89771f 100644 --- a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_avca_light/arabic_leaderboard_acva_light.yaml +++ b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_avca_light/arabic_leaderboard_acva_light.yaml @@ -67,4 +67,4 @@ aggregate_metric_list: aggregation: mean weight_by_size: true metadata: - version: 1.0 \ No newline at end of file + version: 1.0 diff --git a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_avca_light/utils.py b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_avca_light/utils.py index 1cf3a4927e..7e91496f59 100644 --- a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_avca_light/utils.py +++ b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_avca_light/utils.py @@ -2,7 +2,6 @@ import numpy as np - def process_docs(dataset: datasets.Dataset): def _process_doc(doc): question = doc["question"] @@ -11,8 +10,7 @@ def _process_doc(doc): return { "query": f"السؤال: {question}\nالإجابة:", "choices": ["صح", "خطأ"], - "gold": ["صح", "خطأ"].index(answer) - + "gold": ["صح", "خطأ"].index(answer), } - return dataset.map(_process_doc) \ No newline at end of file + return dataset.map(_process_doc) diff --git a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_light.yaml b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_light.yaml index 8458e11534..d77ebd1eeb 100644 --- a/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_light.yaml +++ b/lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_light.yaml @@ -23,5 +23,3 @@ aggregate_metric_list: weight_by_size: true metadata: version: 1.0 - -