Skip to content

Commit

Permalink
First commit.
Browse files Browse the repository at this point in the history
  • Loading branch information
giuliolovisotto committed Sep 17, 2024
1 parent fb963f0 commit ca2dd8b
Show file tree
Hide file tree
Showing 872 changed files with 7,154 additions and 0 deletions.
15 changes: 15 additions & 0 deletions lm_eval/tasks/openai_mmlu/_default_template.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
dataset_path: baber/multilingual_mmlu # a copy of `cais/mmlu` with no auxiliary_train split
fewshot_split: null
fewshot_config: null
output_type: multiple_choice
doc_to_text: "{{Question.strip()}}\nA. {{A.strip()}}\nB. {{B.strip()}}\nC. {{C.strip()}}\nD. {{D.strip()}}\nAnswer:"
doc_to_choice: ["A", "B", "C", "D"]
doc_to_target: "{{Answer.strip()}}"
metric_list:
- metric: acc
aggregation: mean
higher_is_better: true
metadata:
version: 1.0.0
dataset_kwargs:
trust_remote_code: true
164 changes: 164 additions & 0 deletions lm_eval/tasks/openai_mmlu/_generate_configs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,164 @@
"""
Take in a YAML, and output all "other" splits with this YAML
"""

import argparse
import logging
import os
from pathlib import Path

import yaml
from tqdm import tqdm


eval_logger = logging.getLogger("lm-eval")


SUBJECTS = {
"abstract_algebra": "stem",
"anatomy": "stem",
"astronomy": "stem",
"business_ethics": "other",
"clinical_knowledge": "other",
"college_biology": "stem",
"college_chemistry": "stem",
"college_computer_science": "stem",
"college_mathematics": "stem",
"college_medicine": "other",
"college_physics": "stem",
"computer_security": "stem",
"conceptual_physics": "stem",
"econometrics": "social_sciences",
"electrical_engineering": "stem",
"elementary_mathematics": "stem",
"formal_logic": "humanities",
"global_facts": "other",
"high_school_biology": "stem",
"high_school_chemistry": "stem",
"high_school_computer_science": "stem",
"high_school_european_history": "humanities",
"high_school_geography": "social_sciences",
"high_school_government_and_politics": "social_sciences",
"high_school_macroeconomics": "social_sciences",
"high_school_mathematics": "stem",
"high_school_microeconomics": "social_sciences",
"high_school_physics": "stem",
"high_school_psychology": "social_sciences",
"high_school_statistics": "stem",
"high_school_us_history": "humanities",
"high_school_world_history": "humanities",
"human_aging": "other",
"human_sexuality": "social_sciences",
"international_law": "humanities",
"jurisprudence": "humanities",
"logical_fallacies": "humanities",
"machine_learning": "stem",
"management": "other",
"marketing": "other",
"medical_genetics": "other",
"miscellaneous": "other",
"moral_disputes": "humanities",
"moral_scenarios": "humanities",
"nutrition": "other",
"philosophy": "humanities",
"prehistory": "humanities",
"professional_accounting": "other",
"professional_law": "humanities",
"professional_medicine": "other",
"professional_psychology": "social_sciences",
"public_relations": "social_sciences",
"security_studies": "social_sciences",
"sociology": "social_sciences",
"us_foreign_policy": "social_sciences",
"virology": "other",
"world_religions": "humanities",
}

LANGUAGES = {
"AR_XY": "Arabic (Generic)",
"BN_BD": "Bengali (Bangladesh)",
"DE_DE": "German (Germany)",
"ES_LA": "Spanish (Latin America)",
"FR_FR": "French (France)",
"HI_IN": "Hindi (India)",
"ID_ID": "Indonesian (Indonesia)",
"IT_IT": "Italian (Italy)",
"JA_JP": "Japanese (Japan)",
"KO_KR": "Korean (South Korea)",
"PT_BR": "Portuguese (Brazil)",
"ZH_CN": "Chinese (China)",
"SW_KE": "Swahili (Kenya)",
"YO_NG": "Yoruba (Nigeria)",
"EN_US": "English (United States)",
}


def parse_args():
parser = argparse.ArgumentParser()
parser.add_argument("--base_yaml_path", required=True)
parser.add_argument("--save_prefix_path", default="openai_mmlu")
parser.add_argument("--group_prefix", default="")
return parser.parse_args()


if __name__ == "__main__":
args = parse_args()

# get filename of base_yaml so we can `"include": ` it in our "other" YAMLs.
base_yaml_name = os.path.split(args.base_yaml_path)[-1]
with open(args.base_yaml_path, encoding="utf-8") as f:
base_yaml = yaml.full_load(f)

ALL_CATEGORIES = []
for langgode, language_full_name in tqdm(LANGUAGES.items()):
_langgode = langgode.lower()
out_folder = Path(_langgode)
out_folder.mkdir(exist_ok=True)
for subject, category in SUBJECTS.items():
if category not in ALL_CATEGORIES:
ALL_CATEGORIES.append(category)

description = f"The following are multiple choice questions (with answers) about {' '.join(subject.split('_'))} in the {language_full_name} language.\n\n"

yaml_dict = {
"include": f"../{base_yaml_name}",
"tag": f"mmlu_{_langgode}_{category}",
"task": f"mmlu_{_langgode}_{subject}",
"task_alias": f'{_langgode} {subject.replace("_", " ")}',
"dataset_name": subject,
"test_split": langgode,
"description": description,
}

file_save_path = out_folder / (args.save_prefix_path + f"_{subject}.yaml")
eval_logger.info(
f"Saving yaml for subset {_langgode},{subject} to {file_save_path}"
)
with open(file_save_path, "w", encoding="utf-8") as yaml_file:
yaml.dump(
yaml_dict,
yaml_file,
allow_unicode=True,
default_style='"',
)

# shutil.copy("_default_template.yaml", out_folder/"_default_template.yaml")

file_save_path = out_folder / (
"_" + args.save_prefix_path + f"_{_langgode}.yaml"
)
with open(file_save_path, "w", encoding="utf-8") as yaml_file:
dct = {
"group": f"openai_mmlu_{_langgode}",
"group_alias": _langgode,
"task": [f"mmlu_{_langgode}_tasks"],
"aggregate_metric_list": [{"metric": "acc", "weight_by_size": True}],
"metadata": {"version": "1.0.0"},
}

yaml.dump(
dct,
yaml_file,
indent=4,
default_flow_style=False,
)
9 changes: 9 additions & 0 deletions lm_eval/tasks/openai_mmlu/ar_xy/_openai_mmlu_ar_xy.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
aggregate_metric_list:
- metric: acc
weight_by_size: true
group: openai_mmlu_ar_xy
group_alias: ar_xy
metadata:
version: 1.0.0
task:
- mmlu_ar_xy_tasks
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
"dataset_name": "abstract_algebra"
"description": "The following are multiple choice questions (with answers) about abstract\
\ algebra in the Arabic (Generic) language.\n\n"
"include": "../_default_template.yaml"
"tag": "mmlu_ar_xy_stem"
"task": "mmlu_ar_xy_abstract_algebra"
"task_alias": "ar_xy abstract algebra"
"test_split": "AR_XY"
8 changes: 8 additions & 0 deletions lm_eval/tasks/openai_mmlu/ar_xy/openai_mmlu_anatomy.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
"dataset_name": "anatomy"
"description": "The following are multiple choice questions (with answers) about anatomy\
\ in the Arabic (Generic) language.\n\n"
"include": "../_default_template.yaml"
"tag": "mmlu_ar_xy_stem"
"task": "mmlu_ar_xy_anatomy"
"task_alias": "ar_xy anatomy"
"test_split": "AR_XY"
8 changes: 8 additions & 0 deletions lm_eval/tasks/openai_mmlu/ar_xy/openai_mmlu_astronomy.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
"dataset_name": "astronomy"
"description": "The following are multiple choice questions (with answers) about astronomy\
\ in the Arabic (Generic) language.\n\n"
"include": "../_default_template.yaml"
"tag": "mmlu_ar_xy_stem"
"task": "mmlu_ar_xy_astronomy"
"task_alias": "ar_xy astronomy"
"test_split": "AR_XY"
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
"dataset_name": "business_ethics"
"description": "The following are multiple choice questions (with answers) about business\
\ ethics in the Arabic (Generic) language.\n\n"
"include": "../_default_template.yaml"
"tag": "mmlu_ar_xy_other"
"task": "mmlu_ar_xy_business_ethics"
"task_alias": "ar_xy business ethics"
"test_split": "AR_XY"
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
"dataset_name": "clinical_knowledge"
"description": "The following are multiple choice questions (with answers) about clinical\
\ knowledge in the Arabic (Generic) language.\n\n"
"include": "../_default_template.yaml"
"tag": "mmlu_ar_xy_other"
"task": "mmlu_ar_xy_clinical_knowledge"
"task_alias": "ar_xy clinical knowledge"
"test_split": "AR_XY"
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
"dataset_name": "college_biology"
"description": "The following are multiple choice questions (with answers) about college\
\ biology in the Arabic (Generic) language.\n\n"
"include": "../_default_template.yaml"
"tag": "mmlu_ar_xy_stem"
"task": "mmlu_ar_xy_college_biology"
"task_alias": "ar_xy college biology"
"test_split": "AR_XY"
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
"dataset_name": "college_chemistry"
"description": "The following are multiple choice questions (with answers) about college\
\ chemistry in the Arabic (Generic) language.\n\n"
"include": "../_default_template.yaml"
"tag": "mmlu_ar_xy_stem"
"task": "mmlu_ar_xy_college_chemistry"
"task_alias": "ar_xy college chemistry"
"test_split": "AR_XY"
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
"dataset_name": "college_computer_science"
"description": "The following are multiple choice questions (with answers) about college\
\ computer science in the Arabic (Generic) language.\n\n"
"include": "../_default_template.yaml"
"tag": "mmlu_ar_xy_stem"
"task": "mmlu_ar_xy_college_computer_science"
"task_alias": "ar_xy college computer science"
"test_split": "AR_XY"
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
"dataset_name": "college_mathematics"
"description": "The following are multiple choice questions (with answers) about college\
\ mathematics in the Arabic (Generic) language.\n\n"
"include": "../_default_template.yaml"
"tag": "mmlu_ar_xy_stem"
"task": "mmlu_ar_xy_college_mathematics"
"task_alias": "ar_xy college mathematics"
"test_split": "AR_XY"
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
"dataset_name": "college_medicine"
"description": "The following are multiple choice questions (with answers) about college\
\ medicine in the Arabic (Generic) language.\n\n"
"include": "../_default_template.yaml"
"tag": "mmlu_ar_xy_other"
"task": "mmlu_ar_xy_college_medicine"
"task_alias": "ar_xy college medicine"
"test_split": "AR_XY"
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
"dataset_name": "college_physics"
"description": "The following are multiple choice questions (with answers) about college\
\ physics in the Arabic (Generic) language.\n\n"
"include": "../_default_template.yaml"
"tag": "mmlu_ar_xy_stem"
"task": "mmlu_ar_xy_college_physics"
"task_alias": "ar_xy college physics"
"test_split": "AR_XY"
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
"dataset_name": "computer_security"
"description": "The following are multiple choice questions (with answers) about computer\
\ security in the Arabic (Generic) language.\n\n"
"include": "../_default_template.yaml"
"tag": "mmlu_ar_xy_stem"
"task": "mmlu_ar_xy_computer_security"
"task_alias": "ar_xy computer security"
"test_split": "AR_XY"
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
"dataset_name": "conceptual_physics"
"description": "The following are multiple choice questions (with answers) about conceptual\
\ physics in the Arabic (Generic) language.\n\n"
"include": "../_default_template.yaml"
"tag": "mmlu_ar_xy_stem"
"task": "mmlu_ar_xy_conceptual_physics"
"task_alias": "ar_xy conceptual physics"
"test_split": "AR_XY"
8 changes: 8 additions & 0 deletions lm_eval/tasks/openai_mmlu/ar_xy/openai_mmlu_econometrics.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
"dataset_name": "econometrics"
"description": "The following are multiple choice questions (with answers) about econometrics\
\ in the Arabic (Generic) language.\n\n"
"include": "../_default_template.yaml"
"tag": "mmlu_ar_xy_social_sciences"
"task": "mmlu_ar_xy_econometrics"
"task_alias": "ar_xy econometrics"
"test_split": "AR_XY"
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
"dataset_name": "electrical_engineering"
"description": "The following are multiple choice questions (with answers) about electrical\
\ engineering in the Arabic (Generic) language.\n\n"
"include": "../_default_template.yaml"
"tag": "mmlu_ar_xy_stem"
"task": "mmlu_ar_xy_electrical_engineering"
"task_alias": "ar_xy electrical engineering"
"test_split": "AR_XY"
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
"dataset_name": "elementary_mathematics"
"description": "The following are multiple choice questions (with answers) about elementary\
\ mathematics in the Arabic (Generic) language.\n\n"
"include": "../_default_template.yaml"
"tag": "mmlu_ar_xy_stem"
"task": "mmlu_ar_xy_elementary_mathematics"
"task_alias": "ar_xy elementary mathematics"
"test_split": "AR_XY"
8 changes: 8 additions & 0 deletions lm_eval/tasks/openai_mmlu/ar_xy/openai_mmlu_formal_logic.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
"dataset_name": "formal_logic"
"description": "The following are multiple choice questions (with answers) about formal\
\ logic in the Arabic (Generic) language.\n\n"
"include": "../_default_template.yaml"
"tag": "mmlu_ar_xy_humanities"
"task": "mmlu_ar_xy_formal_logic"
"task_alias": "ar_xy formal logic"
"test_split": "AR_XY"
8 changes: 8 additions & 0 deletions lm_eval/tasks/openai_mmlu/ar_xy/openai_mmlu_global_facts.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
"dataset_name": "global_facts"
"description": "The following are multiple choice questions (with answers) about global\
\ facts in the Arabic (Generic) language.\n\n"
"include": "../_default_template.yaml"
"tag": "mmlu_ar_xy_other"
"task": "mmlu_ar_xy_global_facts"
"task_alias": "ar_xy global facts"
"test_split": "AR_XY"
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
"dataset_name": "high_school_biology"
"description": "The following are multiple choice questions (with answers) about high\
\ school biology in the Arabic (Generic) language.\n\n"
"include": "../_default_template.yaml"
"tag": "mmlu_ar_xy_stem"
"task": "mmlu_ar_xy_high_school_biology"
"task_alias": "ar_xy high school biology"
"test_split": "AR_XY"
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
"dataset_name": "high_school_chemistry"
"description": "The following are multiple choice questions (with answers) about high\
\ school chemistry in the Arabic (Generic) language.\n\n"
"include": "../_default_template.yaml"
"tag": "mmlu_ar_xy_stem"
"task": "mmlu_ar_xy_high_school_chemistry"
"task_alias": "ar_xy high school chemistry"
"test_split": "AR_XY"
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
"dataset_name": "high_school_computer_science"
"description": "The following are multiple choice questions (with answers) about high\
\ school computer science in the Arabic (Generic) language.\n\n"
"include": "../_default_template.yaml"
"tag": "mmlu_ar_xy_stem"
"task": "mmlu_ar_xy_high_school_computer_science"
"task_alias": "ar_xy high school computer science"
"test_split": "AR_XY"
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
"dataset_name": "high_school_european_history"
"description": "The following are multiple choice questions (with answers) about high\
\ school european history in the Arabic (Generic) language.\n\n"
"include": "../_default_template.yaml"
"tag": "mmlu_ar_xy_humanities"
"task": "mmlu_ar_xy_high_school_european_history"
"task_alias": "ar_xy high school european history"
"test_split": "AR_XY"
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
"dataset_name": "high_school_geography"
"description": "The following are multiple choice questions (with answers) about high\
\ school geography in the Arabic (Generic) language.\n\n"
"include": "../_default_template.yaml"
"tag": "mmlu_ar_xy_social_sciences"
"task": "mmlu_ar_xy_high_school_geography"
"task_alias": "ar_xy high school geography"
"test_split": "AR_XY"
Loading

0 comments on commit ca2dd8b

Please sign in to comment.