First commit.

giuliolovisotto · Sep 17, 2024 · ca2dd8b · ca2dd8b
1 parent fb963f0
commit ca2dd8b
Show file tree

Hide file tree

Showing 872 changed files with 7,154 additions and 0 deletions.
diff --git a/lm_eval/tasks/openai_mmlu/_default_template.yaml b/lm_eval/tasks/openai_mmlu/_default_template.yaml
@@ -0,0 +1,15 @@
+dataset_path: baber/multilingual_mmlu # a copy of `cais/mmlu` with no auxiliary_train split
+fewshot_split: null
+fewshot_config: null
+output_type: multiple_choice
+doc_to_text: "{{Question.strip()}}\nA. {{A.strip()}}\nB. {{B.strip()}}\nC. {{C.strip()}}\nD. {{D.strip()}}\nAnswer:"
+doc_to_choice: ["A", "B", "C", "D"]
+doc_to_target: "{{Answer.strip()}}"
+metric_list:
+  - metric: acc
+    aggregation: mean
+    higher_is_better: true
+metadata:
+  version: 1.0.0
+dataset_kwargs:
+  trust_remote_code: true
diff --git a/lm_eval/tasks/openai_mmlu/_generate_configs.py b/lm_eval/tasks/openai_mmlu/_generate_configs.py
@@ -0,0 +1,164 @@
+"""
+Take in a YAML, and output all "other" splits with this YAML
+"""
+
+import argparse
+import logging
+import os
+from pathlib import Path
+
+import yaml
+from tqdm import tqdm
+
+
+eval_logger = logging.getLogger("lm-eval")
+
+
+SUBJECTS = {
+    "abstract_algebra": "stem",
+    "anatomy": "stem",
+    "astronomy": "stem",
+    "business_ethics": "other",
+    "clinical_knowledge": "other",
+    "college_biology": "stem",
+    "college_chemistry": "stem",
+    "college_computer_science": "stem",
+    "college_mathematics": "stem",
+    "college_medicine": "other",
+    "college_physics": "stem",
+    "computer_security": "stem",
+    "conceptual_physics": "stem",
+    "econometrics": "social_sciences",
+    "electrical_engineering": "stem",
+    "elementary_mathematics": "stem",
+    "formal_logic": "humanities",
+    "global_facts": "other",
+    "high_school_biology": "stem",
+    "high_school_chemistry": "stem",
+    "high_school_computer_science": "stem",
+    "high_school_european_history": "humanities",
+    "high_school_geography": "social_sciences",
+    "high_school_government_and_politics": "social_sciences",
+    "high_school_macroeconomics": "social_sciences",
+    "high_school_mathematics": "stem",
+    "high_school_microeconomics": "social_sciences",
+    "high_school_physics": "stem",
+    "high_school_psychology": "social_sciences",
+    "high_school_statistics": "stem",
+    "high_school_us_history": "humanities",
+    "high_school_world_history": "humanities",
+    "human_aging": "other",
+    "human_sexuality": "social_sciences",
+    "international_law": "humanities",
+    "jurisprudence": "humanities",
+    "logical_fallacies": "humanities",
+    "machine_learning": "stem",
+    "management": "other",
+    "marketing": "other",
+    "medical_genetics": "other",
+    "miscellaneous": "other",
+    "moral_disputes": "humanities",
+    "moral_scenarios": "humanities",
+    "nutrition": "other",
+    "philosophy": "humanities",
+    "prehistory": "humanities",
+    "professional_accounting": "other",
+    "professional_law": "humanities",
+    "professional_medicine": "other",
+    "professional_psychology": "social_sciences",
+    "public_relations": "social_sciences",
+    "security_studies": "social_sciences",
+    "sociology": "social_sciences",
+    "us_foreign_policy": "social_sciences",
+    "virology": "other",
+    "world_religions": "humanities",
+}
+
+LANGUAGES = {
+    "AR_XY": "Arabic (Generic)",
+    "BN_BD": "Bengali (Bangladesh)",
+    "DE_DE": "German (Germany)",
+    "ES_LA": "Spanish (Latin America)",
+    "FR_FR": "French (France)",
+    "HI_IN": "Hindi (India)",
+    "ID_ID": "Indonesian (Indonesia)",
+    "IT_IT": "Italian (Italy)",
+    "JA_JP": "Japanese (Japan)",
+    "KO_KR": "Korean (South Korea)",
+    "PT_BR": "Portuguese (Brazil)",
+    "ZH_CN": "Chinese (China)",
+    "SW_KE": "Swahili (Kenya)",
+    "YO_NG": "Yoruba (Nigeria)",
+    "EN_US": "English (United States)",
+}
+
+
+def parse_args():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--base_yaml_path", required=True)
+    parser.add_argument("--save_prefix_path", default="openai_mmlu")
+    parser.add_argument("--group_prefix", default="")
+    return parser.parse_args()
+
+
+if __name__ == "__main__":
+    args = parse_args()
+
+    # get filename of base_yaml so we can `"include": ` it in our "other" YAMLs.
+    base_yaml_name = os.path.split(args.base_yaml_path)[-1]
+    with open(args.base_yaml_path, encoding="utf-8") as f:
+        base_yaml = yaml.full_load(f)
+
+    ALL_CATEGORIES = []
+    for langgode, language_full_name in tqdm(LANGUAGES.items()):
+        _langgode = langgode.lower()
+        out_folder = Path(_langgode)
+        out_folder.mkdir(exist_ok=True)
+        for subject, category in SUBJECTS.items():
+            if category not in ALL_CATEGORIES:
+                ALL_CATEGORIES.append(category)
+
+            description = f"The following are multiple choice questions (with answers) about {' '.join(subject.split('_'))} in the {language_full_name} language.\n\n"
+
+            yaml_dict = {
+                "include": f"../{base_yaml_name}",
+                "tag": f"mmlu_{_langgode}_{category}",
+                "task": f"mmlu_{_langgode}_{subject}",
+                "task_alias": f'{_langgode} {subject.replace("_", " ")}',
+                "dataset_name": subject,
+                "test_split": langgode,
+                "description": description,
+            }
+
+            file_save_path = out_folder / (args.save_prefix_path + f"_{subject}.yaml")
+            eval_logger.info(
+                f"Saving yaml for subset {_langgode},{subject} to {file_save_path}"
+            )
+            with open(file_save_path, "w", encoding="utf-8") as yaml_file:
+                yaml.dump(
+                    yaml_dict,
+                    yaml_file,
+                    allow_unicode=True,
+                    default_style='"',
+                )
+
+            # shutil.copy("_default_template.yaml", out_folder/"_default_template.yaml")
+
+        file_save_path = out_folder / (
+            "_" + args.save_prefix_path + f"_{_langgode}.yaml"
+        )
+        with open(file_save_path, "w", encoding="utf-8") as yaml_file:
+            dct = {
+                "group": f"openai_mmlu_{_langgode}",
+                "group_alias": _langgode,
+                "task": [f"mmlu_{_langgode}_tasks"],
+                "aggregate_metric_list": [{"metric": "acc", "weight_by_size": True}],
+                "metadata": {"version": "1.0.0"},
+            }
+
+            yaml.dump(
+                dct,
+                yaml_file,
+                indent=4,
+                default_flow_style=False,
+            )
diff --git a/lm_eval/tasks/openai_mmlu/ar_xy/_openai_mmlu_ar_xy.yaml b/lm_eval/tasks/openai_mmlu/ar_xy/_openai_mmlu_ar_xy.yaml
@@ -0,0 +1,9 @@
+aggregate_metric_list:
+-   metric: acc
+    weight_by_size: true
+group: openai_mmlu_ar_xy
+group_alias: ar_xy
+metadata:
+    version: 1.0.0
+task:
+- mmlu_ar_xy_tasks
diff --git a/lm_eval/tasks/openai_mmlu/ar_xy/openai_mmlu_abstract_algebra.yaml b/lm_eval/tasks/openai_mmlu/ar_xy/openai_mmlu_abstract_algebra.yaml
@@ -0,0 +1,8 @@
+"dataset_name": "abstract_algebra"
+"description": "The following are multiple choice questions (with answers) about abstract\
+  \ algebra in the Arabic (Generic) language.\n\n"
+"include": "../_default_template.yaml"
+"tag": "mmlu_ar_xy_stem"
+"task": "mmlu_ar_xy_abstract_algebra"
+"task_alias": "ar_xy abstract algebra"
+"test_split": "AR_XY"
diff --git a/lm_eval/tasks/openai_mmlu/ar_xy/openai_mmlu_anatomy.yaml b/lm_eval/tasks/openai_mmlu/ar_xy/openai_mmlu_anatomy.yaml
@@ -0,0 +1,8 @@
+"dataset_name": "anatomy"
+"description": "The following are multiple choice questions (with answers) about anatomy\
+  \ in the Arabic (Generic) language.\n\n"
+"include": "../_default_template.yaml"
+"tag": "mmlu_ar_xy_stem"
+"task": "mmlu_ar_xy_anatomy"
+"task_alias": "ar_xy anatomy"
+"test_split": "AR_XY"
diff --git a/lm_eval/tasks/openai_mmlu/ar_xy/openai_mmlu_astronomy.yaml b/lm_eval/tasks/openai_mmlu/ar_xy/openai_mmlu_astronomy.yaml
@@ -0,0 +1,8 @@
+"dataset_name": "astronomy"
+"description": "The following are multiple choice questions (with answers) about astronomy\
+  \ in the Arabic (Generic) language.\n\n"
+"include": "../_default_template.yaml"
+"tag": "mmlu_ar_xy_stem"
+"task": "mmlu_ar_xy_astronomy"
+"task_alias": "ar_xy astronomy"
+"test_split": "AR_XY"
diff --git a/lm_eval/tasks/openai_mmlu/ar_xy/openai_mmlu_business_ethics.yaml b/lm_eval/tasks/openai_mmlu/ar_xy/openai_mmlu_business_ethics.yaml
@@ -0,0 +1,8 @@
+"dataset_name": "business_ethics"
+"description": "The following are multiple choice questions (with answers) about business\
+  \ ethics in the Arabic (Generic) language.\n\n"
+"include": "../_default_template.yaml"
+"tag": "mmlu_ar_xy_other"
+"task": "mmlu_ar_xy_business_ethics"
+"task_alias": "ar_xy business ethics"
+"test_split": "AR_XY"
diff --git a/lm_eval/tasks/openai_mmlu/ar_xy/openai_mmlu_clinical_knowledge.yaml b/lm_eval/tasks/openai_mmlu/ar_xy/openai_mmlu_clinical_knowledge.yaml
@@ -0,0 +1,8 @@
+"dataset_name": "clinical_knowledge"
+"description": "The following are multiple choice questions (with answers) about clinical\
+  \ knowledge in the Arabic (Generic) language.\n\n"
+"include": "../_default_template.yaml"
+"tag": "mmlu_ar_xy_other"
+"task": "mmlu_ar_xy_clinical_knowledge"
+"task_alias": "ar_xy clinical knowledge"
+"test_split": "AR_XY"
diff --git a/lm_eval/tasks/openai_mmlu/ar_xy/openai_mmlu_college_biology.yaml b/lm_eval/tasks/openai_mmlu/ar_xy/openai_mmlu_college_biology.yaml
@@ -0,0 +1,8 @@
+"dataset_name": "college_biology"
+"description": "The following are multiple choice questions (with answers) about college\
+  \ biology in the Arabic (Generic) language.\n\n"
+"include": "../_default_template.yaml"
+"tag": "mmlu_ar_xy_stem"
+"task": "mmlu_ar_xy_college_biology"
+"task_alias": "ar_xy college biology"
+"test_split": "AR_XY"
diff --git a/lm_eval/tasks/openai_mmlu/ar_xy/openai_mmlu_college_chemistry.yaml b/lm_eval/tasks/openai_mmlu/ar_xy/openai_mmlu_college_chemistry.yaml
@@ -0,0 +1,8 @@
+"dataset_name": "college_chemistry"
+"description": "The following are multiple choice questions (with answers) about college\
+  \ chemistry in the Arabic (Generic) language.\n\n"
+"include": "../_default_template.yaml"
+"tag": "mmlu_ar_xy_stem"
+"task": "mmlu_ar_xy_college_chemistry"
+"task_alias": "ar_xy college chemistry"
+"test_split": "AR_XY"
diff --git a/lm_eval/tasks/openai_mmlu/ar_xy/openai_mmlu_college_computer_science.yaml b/lm_eval/tasks/openai_mmlu/ar_xy/openai_mmlu_college_computer_science.yaml
@@ -0,0 +1,8 @@
+"dataset_name": "college_computer_science"
+"description": "The following are multiple choice questions (with answers) about college\
+  \ computer science in the Arabic (Generic) language.\n\n"
+"include": "../_default_template.yaml"
+"tag": "mmlu_ar_xy_stem"
+"task": "mmlu_ar_xy_college_computer_science"
+"task_alias": "ar_xy college computer science"
+"test_split": "AR_XY"
diff --git a/lm_eval/tasks/openai_mmlu/ar_xy/openai_mmlu_college_mathematics.yaml b/lm_eval/tasks/openai_mmlu/ar_xy/openai_mmlu_college_mathematics.yaml
@@ -0,0 +1,8 @@
+"dataset_name": "college_mathematics"
+"description": "The following are multiple choice questions (with answers) about college\
+  \ mathematics in the Arabic (Generic) language.\n\n"
+"include": "../_default_template.yaml"
+"tag": "mmlu_ar_xy_stem"
+"task": "mmlu_ar_xy_college_mathematics"
+"task_alias": "ar_xy college mathematics"
+"test_split": "AR_XY"
diff --git a/lm_eval/tasks/openai_mmlu/ar_xy/openai_mmlu_college_medicine.yaml b/lm_eval/tasks/openai_mmlu/ar_xy/openai_mmlu_college_medicine.yaml
@@ -0,0 +1,8 @@
+"dataset_name": "college_medicine"
+"description": "The following are multiple choice questions (with answers) about college\
+  \ medicine in the Arabic (Generic) language.\n\n"
+"include": "../_default_template.yaml"
+"tag": "mmlu_ar_xy_other"
+"task": "mmlu_ar_xy_college_medicine"
+"task_alias": "ar_xy college medicine"
+"test_split": "AR_XY"
diff --git a/lm_eval/tasks/openai_mmlu/ar_xy/openai_mmlu_college_physics.yaml b/lm_eval/tasks/openai_mmlu/ar_xy/openai_mmlu_college_physics.yaml
@@ -0,0 +1,8 @@
+"dataset_name": "college_physics"
+"description": "The following are multiple choice questions (with answers) about college\
+  \ physics in the Arabic (Generic) language.\n\n"
+"include": "../_default_template.yaml"
+"tag": "mmlu_ar_xy_stem"
+"task": "mmlu_ar_xy_college_physics"
+"task_alias": "ar_xy college physics"
+"test_split": "AR_XY"
diff --git a/lm_eval/tasks/openai_mmlu/ar_xy/openai_mmlu_computer_security.yaml b/lm_eval/tasks/openai_mmlu/ar_xy/openai_mmlu_computer_security.yaml
@@ -0,0 +1,8 @@
+"dataset_name": "computer_security"
+"description": "The following are multiple choice questions (with answers) about computer\
+  \ security in the Arabic (Generic) language.\n\n"
+"include": "../_default_template.yaml"
+"tag": "mmlu_ar_xy_stem"
+"task": "mmlu_ar_xy_computer_security"
+"task_alias": "ar_xy computer security"
+"test_split": "AR_XY"
diff --git a/lm_eval/tasks/openai_mmlu/ar_xy/openai_mmlu_conceptual_physics.yaml b/lm_eval/tasks/openai_mmlu/ar_xy/openai_mmlu_conceptual_physics.yaml
@@ -0,0 +1,8 @@
+"dataset_name": "conceptual_physics"
+"description": "The following are multiple choice questions (with answers) about conceptual\
+  \ physics in the Arabic (Generic) language.\n\n"
+"include": "../_default_template.yaml"
+"tag": "mmlu_ar_xy_stem"
+"task": "mmlu_ar_xy_conceptual_physics"
+"task_alias": "ar_xy conceptual physics"
+"test_split": "AR_XY"
diff --git a/lm_eval/tasks/openai_mmlu/ar_xy/openai_mmlu_econometrics.yaml b/lm_eval/tasks/openai_mmlu/ar_xy/openai_mmlu_econometrics.yaml
@@ -0,0 +1,8 @@
+"dataset_name": "econometrics"
+"description": "The following are multiple choice questions (with answers) about econometrics\
+  \ in the Arabic (Generic) language.\n\n"
+"include": "../_default_template.yaml"
+"tag": "mmlu_ar_xy_social_sciences"
+"task": "mmlu_ar_xy_econometrics"
+"task_alias": "ar_xy econometrics"
+"test_split": "AR_XY"
diff --git a/lm_eval/tasks/openai_mmlu/ar_xy/openai_mmlu_electrical_engineering.yaml b/lm_eval/tasks/openai_mmlu/ar_xy/openai_mmlu_electrical_engineering.yaml
@@ -0,0 +1,8 @@
+"dataset_name": "electrical_engineering"
+"description": "The following are multiple choice questions (with answers) about electrical\
+  \ engineering in the Arabic (Generic) language.\n\n"
+"include": "../_default_template.yaml"
+"tag": "mmlu_ar_xy_stem"
+"task": "mmlu_ar_xy_electrical_engineering"
+"task_alias": "ar_xy electrical engineering"
+"test_split": "AR_XY"
diff --git a/lm_eval/tasks/openai_mmlu/ar_xy/openai_mmlu_elementary_mathematics.yaml b/lm_eval/tasks/openai_mmlu/ar_xy/openai_mmlu_elementary_mathematics.yaml
@@ -0,0 +1,8 @@
+"dataset_name": "elementary_mathematics"
+"description": "The following are multiple choice questions (with answers) about elementary\
+  \ mathematics in the Arabic (Generic) language.\n\n"
+"include": "../_default_template.yaml"
+"tag": "mmlu_ar_xy_stem"
+"task": "mmlu_ar_xy_elementary_mathematics"
+"task_alias": "ar_xy elementary mathematics"
+"test_split": "AR_XY"
diff --git a/lm_eval/tasks/openai_mmlu/ar_xy/openai_mmlu_formal_logic.yaml b/lm_eval/tasks/openai_mmlu/ar_xy/openai_mmlu_formal_logic.yaml
@@ -0,0 +1,8 @@
+"dataset_name": "formal_logic"
+"description": "The following are multiple choice questions (with answers) about formal\
+  \ logic in the Arabic (Generic) language.\n\n"
+"include": "../_default_template.yaml"
+"tag": "mmlu_ar_xy_humanities"
+"task": "mmlu_ar_xy_formal_logic"
+"task_alias": "ar_xy formal logic"
+"test_split": "AR_XY"
diff --git a/lm_eval/tasks/openai_mmlu/ar_xy/openai_mmlu_global_facts.yaml b/lm_eval/tasks/openai_mmlu/ar_xy/openai_mmlu_global_facts.yaml
@@ -0,0 +1,8 @@
+"dataset_name": "global_facts"
+"description": "The following are multiple choice questions (with answers) about global\
+  \ facts in the Arabic (Generic) language.\n\n"
+"include": "../_default_template.yaml"
+"tag": "mmlu_ar_xy_other"
+"task": "mmlu_ar_xy_global_facts"
+"task_alias": "ar_xy global facts"
+"test_split": "AR_XY"
diff --git a/lm_eval/tasks/openai_mmlu/ar_xy/openai_mmlu_high_school_biology.yaml b/lm_eval/tasks/openai_mmlu/ar_xy/openai_mmlu_high_school_biology.yaml
@@ -0,0 +1,8 @@
+"dataset_name": "high_school_biology"
+"description": "The following are multiple choice questions (with answers) about high\
+  \ school biology in the Arabic (Generic) language.\n\n"
+"include": "../_default_template.yaml"
+"tag": "mmlu_ar_xy_stem"
+"task": "mmlu_ar_xy_high_school_biology"
+"task_alias": "ar_xy high school biology"
+"test_split": "AR_XY"
diff --git a/lm_eval/tasks/openai_mmlu/ar_xy/openai_mmlu_high_school_chemistry.yaml b/lm_eval/tasks/openai_mmlu/ar_xy/openai_mmlu_high_school_chemistry.yaml
@@ -0,0 +1,8 @@
+"dataset_name": "high_school_chemistry"
+"description": "The following are multiple choice questions (with answers) about high\
+  \ school chemistry in the Arabic (Generic) language.\n\n"
+"include": "../_default_template.yaml"
+"tag": "mmlu_ar_xy_stem"
+"task": "mmlu_ar_xy_high_school_chemistry"
+"task_alias": "ar_xy high school chemistry"
+"test_split": "AR_XY"
diff --git a/lm_eval/tasks/openai_mmlu/ar_xy/openai_mmlu_high_school_computer_science.yaml b/lm_eval/tasks/openai_mmlu/ar_xy/openai_mmlu_high_school_computer_science.yaml
@@ -0,0 +1,8 @@
+"dataset_name": "high_school_computer_science"
+"description": "The following are multiple choice questions (with answers) about high\
+  \ school computer science in the Arabic (Generic) language.\n\n"
+"include": "../_default_template.yaml"
+"tag": "mmlu_ar_xy_stem"
+"task": "mmlu_ar_xy_high_school_computer_science"
+"task_alias": "ar_xy high school computer science"
+"test_split": "AR_XY"
diff --git a/lm_eval/tasks/openai_mmlu/ar_xy/openai_mmlu_high_school_european_history.yaml b/lm_eval/tasks/openai_mmlu/ar_xy/openai_mmlu_high_school_european_history.yaml
@@ -0,0 +1,8 @@
+"dataset_name": "high_school_european_history"
+"description": "The following are multiple choice questions (with answers) about high\
+  \ school european history in the Arabic (Generic) language.\n\n"
+"include": "../_default_template.yaml"
+"tag": "mmlu_ar_xy_humanities"
+"task": "mmlu_ar_xy_high_school_european_history"
+"task_alias": "ar_xy high school european history"
+"test_split": "AR_XY"
diff --git a/lm_eval/tasks/openai_mmlu/ar_xy/openai_mmlu_high_school_geography.yaml b/lm_eval/tasks/openai_mmlu/ar_xy/openai_mmlu_high_school_geography.yaml
@@ -0,0 +1,8 @@
+"dataset_name": "high_school_geography"
+"description": "The following are multiple choice questions (with answers) about high\
+  \ school geography in the Arabic (Generic) language.\n\n"
+"include": "../_default_template.yaml"
+"tag": "mmlu_ar_xy_social_sciences"
+"task": "mmlu_ar_xy_high_school_geography"
+"task_alias": "ar_xy high school geography"
+"test_split": "AR_XY"