Skip to content

Commit

Permalink
[Feature] Mmlu-pro auto-download (#1464)
Browse files Browse the repository at this point in the history
* update

* update

* update

* update

* update
  • Loading branch information
MaiziXiao committed Aug 30, 2024
1 parent f342097 commit 9693be4
Show file tree
Hide file tree
Showing 6 changed files with 18 additions and 2 deletions.
1 change: 1 addition & 0 deletions configs/datasets/mmlu_pro/mmlu_pro_0shot_cot_gen_08c1de.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@
dict(
abbr=f'mmlu_pro_{category.replace(" ", "_")}',
type=MMLUProDataset,
path='opencompass/mmlu_pro',
category=category,
reader_cfg=mmlu_pro_reader_cfg,
infer_cfg=mmlu_pro_infer_cfg,
Expand Down
1 change: 1 addition & 0 deletions configs/datasets/mmlu_pro/mmlu_pro_gen_cdbebf.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@
dict(
abbr=f'mmlu_pro_{category.replace(" ", "_")}',
type=MMLUProDataset,
path='opencompass/mmlu_pro',
category=category,
reader_cfg=mmlu_pro_reader_cfg,
infer_cfg=mmlu_pro_infer_cfg,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@
dict(
abbr=f'mmlu_pro_{category.replace(" ", "_")}',
type=MMLUProDataset,
path='opencompass/mmlu_pro',
category=category,
reader_cfg=mmlu_pro_reader_cfg,
infer_cfg=mmlu_pro_infer_cfg,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@
dict(
abbr=f'mmlu_pro_{category.replace(" ", "_")}',
type=MMLUProDataset,
path='opencompass/mmlu_pro',
category=category,
reader_cfg=mmlu_pro_reader_cfg,
infer_cfg=mmlu_pro_infer_cfg,
Expand Down
6 changes: 4 additions & 2 deletions opencompass/datasets/mmlu_pro.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from datasets import load_dataset

from opencompass.registry import LOAD_DATASET
from opencompass.utils import get_data_path

from .base import BaseDataset

Expand All @@ -24,8 +25,9 @@ def _parse(item):
class MMLUProDataset(BaseDataset):

@staticmethod
def load(category: str):
mmlu_pro = load_dataset('TIGER-Lab/MMLU-Pro')
def load(path: str, category: str):
path = get_data_path(path)
mmlu_pro = load_dataset(path)
mmlu_pro = mmlu_pro.filter(lambda x: x['category'] == category)
mmlu_pro = mmlu_pro.map(_parse)
return mmlu_pro
10 changes: 10 additions & 0 deletions opencompass/utils/datasets_info.py
Original file line number Diff line number Diff line change
Expand Up @@ -175,6 +175,12 @@
"hf_id": "opencompass/mmlu",
"local": "./data/mmlu/",
},
# MMLU_PRO
"opencompass/mmlu_pro": {
"ms_id": "",
"hf_id": "",
"local": "./data/mmlu_pro",
},
# NQ
"opencompass/natural_question": {
"ms_id": "opencompass/natural_question",
Expand Down Expand Up @@ -386,4 +392,8 @@
"url": "http://opencompass.oss-cn-shanghai.aliyuncs.com/datasets/data/SQuAD2.0.zip",
"md5": "1321cbf9349e1102a57d31d1b2bfdd7e",
},
"mmlu_pro": {
"url": "http://opencompass.oss-cn-shanghai.aliyuncs.com/datasets/data/mmlu_pro.zip",
"md5": "e3200c7380f4cea5f13c768f2815fabb",
},
}

0 comments on commit 9693be4

Please sign in to comment.