Skip to content

Commit

Permalink
update mmmlu_lite dataload from oss
Browse files Browse the repository at this point in the history
  • Loading branch information
liushz committed Nov 1, 2024
1 parent 47186b9 commit 3930fc7
Show file tree
Hide file tree
Showing 4 changed files with 19 additions and 12 deletions.
5 changes: 1 addition & 4 deletions opencompass/configs/datasets/mmmlu_lite/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -31,11 +31,8 @@ MMMLU contains the MMLU test set translated into the following locales:


## How to Use
Download file from [link](https://hf-mirror.com/datasets/openai/MMMLU)

```python
from datasets import load_dataset
ds = load_dataset("openai/MMMLU", "default")
from datasets import load_dataset
ds = load_dataset("openai/MMMLU", "by_language")
ds = load_dataset("opencompass/mmmlu_lite", "AR_XY")
```
Original file line number Diff line number Diff line change
Expand Up @@ -95,8 +95,7 @@
dict(
abbr=f'openai_m{_name}',
type=MMMLULiteDataset,
# path='opencompass/mmmlu_lite',
path = './data/mmmlu_lite',
path='opencompass/mmmlu_lite',
name=f'openai_m{_name}',
reader_cfg=mmmlu_lite_reader_cfg,
infer_cfg=mmmlu_lite_infer_cfg,
Expand Down
14 changes: 8 additions & 6 deletions opencompass/datasets/mmmlu.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
# yapf: disable

import json
import os
import os.path as osp

from datasets import Dataset, DatasetDict, load_dataset

Expand Down Expand Up @@ -43,10 +43,12 @@ class MMMLULiteDataset(BaseDataset):

@staticmethod
def load(path: str, name: str):
path = get_data_path(path, local_mode=False)
dataset = DatasetDict()
path = os.path.join(path, name + '.jsonl')
dataset_list = []
with open(path, 'r') as f:
dataset_list = [json.loads(line) for line in f.readlines()]
dataset['test'] = Dataset.from_list(dataset_list)
name = name.split('_')[-1]
raw_data = []
filename = osp.join(path, name, 'test.jsonl')
with open(filename, encoding='utf-8') as f:
raw_data = [json.loads(line) for line in f.readlines()]
dataset['test'] = Dataset.from_list(raw_data)
return dataset
9 changes: 9 additions & 0 deletions opencompass/utils/datasets_info.py
Original file line number Diff line number Diff line change
Expand Up @@ -301,6 +301,11 @@
"ms_id": "",
"hf_id": "",
"local": "./data/cmo.jsonl",
},
"opencompass/mmmlu_lite": {
"ms_id": "",
"hf_id": "",
"local": "./data/mmmlu_lite",
}
}

Expand All @@ -309,6 +314,10 @@
"url": "http://opencompass.oss-cn-shanghai.aliyuncs.com/datasets/data/mmlu.zip",
"md5": "761310671509a239e41c4b717f7fab9c",
},
"/mmmlu_lite": {
"url": "http://opencompass.oss-cn-shanghai.aliyuncs.com/datasets/data/mmmlu_lite.zip",
"md5": "a776af1220e1826fd0608eda1bc4425e",
},
"/gpqa/": {
"url": "http://opencompass.oss-cn-shanghai.aliyuncs.com/datasets/data/gpqa.zip",
"md5": "2e9657959030a765916f1f2aca29140d",
Expand Down

0 comments on commit 3930fc7

Please sign in to comment.