Skip to content

Commit

Permalink
Merge branch 'main' into fix_bugs
Browse files Browse the repository at this point in the history
  • Loading branch information
Leymore authored May 14, 2024
2 parents ec6b394 + aa2dd2b commit a182ca2
Show file tree
Hide file tree
Showing 805 changed files with 6,828 additions and 8,006 deletions.
5 changes: 2 additions & 3 deletions .pre-commit-config-zh-cn.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -45,19 +45,18 @@ repos:
(?x)^(
dicts/|
projects/.*?/dicts/|
configs/
configs/.*?/.*\.txt
)
- id: check-yaml
- id: end-of-file-fixer
exclude: |
(?x)^(
dicts/|
projects/.*?/dicts/|
configs/
configs/.*?/.*\.txt
)
- id: requirements-txt-fixer
- id: double-quote-string-fixer
exclude: configs/
- id: check-merge-conflict
- id: fix-encoding-pragma
args: ["--remove"]
Expand Down
5 changes: 2 additions & 3 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -45,19 +45,18 @@ repos:
(?x)^(
dicts/|
projects/.*?/dicts/|
configs/
configs/.*?/.*\.txt
)
- id: check-yaml
- id: end-of-file-fixer
exclude: |
(?x)^(
dicts/|
projects/.*?/dicts/|
configs/
configs/.*?/.*\.txt
)
- id: requirements-txt-fixer
- id: double-quote-string-fixer
exclude: configs/
- id: check-merge-conflict
- id: fix-encoding-pragma
args: ["--remove"]
Expand Down
11 changes: 1 addition & 10 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -162,20 +162,11 @@ python tools/list_configs.py llama mmlu
You can also evaluate other HuggingFace models via command line. Taking LLaMA-7b as an example:

```bash
python run.py --datasets ceval_ppl mmlu_ppl \
--hf-path huggyllama/llama-7b \ # HuggingFace model path
--model-kwargs device_map='auto' \ # Arguments for model construction
--tokenizer-kwargs padding_side='left' truncation='left' use_fast=False \ # Arguments for tokenizer construction
--max-out-len 100 \ # Maximum number of tokens generated
--max-seq-len 2048 \ # Maximum sequence length the model can accept
--batch-size 8 \ # Batch size
--no-batch-padding \ # Don't enable batch padding, infer through for loop to avoid performance loss
--num-gpus 1 # Number of minimum required GPUs
python run.py --datasets ceval_ppl mmlu_ppl --hf-type base --hf-path huggyllama/llama-7b
```

> \[!TIP\]
>
> To run the command above, you will need to remove the comments starting from `# ` first.
> configuration with `_ppl` is designed for base model typically.
> configuration with `_gen` can be used for both base model and chat model.
Expand Down
13 changes: 1 addition & 12 deletions README_zh-CN.md
Original file line number Diff line number Diff line change
Expand Up @@ -163,20 +163,9 @@ python tools/list_configs.py llama mmlu
你也可以通过命令行去评测其它 HuggingFace 模型。同样以 LLaMA-7b 为例:

```bash
python run.py --datasets ceval_ppl mmlu_ppl \
--hf-path huggyllama/llama-7b \ # HuggingFace 模型地址
--model-kwargs device_map='auto' \ # 构造 model 的参数
--tokenizer-kwargs padding_side='left' truncation='left' use_fast=False \ # 构造 tokenizer 的参数
--max-out-len 100 \ # 最长生成 token 数
--max-seq-len 2048 \ # 模型能接受的最大序列长度
--batch-size 8 \ # 批次大小
--no-batch-padding \ # 不打开 batch padding,通过 for loop 推理,避免精度损失
--num-gpus 1 # 运行该模型所需的最少 gpu 数
python run.py --datasets ceval_ppl mmlu_ppl --hf-type base --hf-path huggyllama/llama-7b
```

> **注意**<br />
> 若需要运行上述命令,你需要删除所有从 `# ` 开始的注释。
通过命令行或配置文件,OpenCompass 还支持评测 API 或自定义模型,以及更多样化的评测策略。请阅读[快速开始](https://opencompass.readthedocs.io/zh_CN/latest/get_started/quick_start.html)了解如何运行一个评测任务。

更多教程请查看我们的[文档](https://opencompass.readthedocs.io/zh_CN/latest/index.html)
Expand Down
4 changes: 2 additions & 2 deletions configs/api_examples/eval_api_360.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
abbr='360GPT_S2_V9',
type=AI360GPT,
path='360GPT_S2_V9',
key="xxxxxxxxxxxx",
key='xxxxxxxxxxxx',
generation_kwargs={
'temperature': 0.9,
'max_tokens': 2048,
Expand All @@ -40,4 +40,4 @@
task=dict(type=OpenICLInferTask)),
)

work_dir ="./output/api_360GPT_S2_V9"
work_dir ='./output/api_360GPT_S2_V9'
6 changes: 3 additions & 3 deletions configs/api_examples/eval_api_baichuan.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,8 @@
type=BaiChuan,
path='Baichuan2-53B',
api_key='xxxxxx',
secret_key="xxxxx",
url="xxxxx",
secret_key='xxxxx',
url='xxxxx',
generation_kwargs={
'temperature': 0.3,
'top_p': 0.85,
Expand All @@ -41,4 +41,4 @@
task=dict(type=OpenICLInferTask)),
)

work_dir = "outputs/api_baichuan53b/"
work_dir = 'outputs/api_baichuan53b/'
2 changes: 1 addition & 1 deletion configs/api_examples/eval_api_baidu.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,4 +39,4 @@
task=dict(type=OpenICLInferTask)),
)

work_dir = "outputs/api_erniebot/"
work_dir = 'outputs/api_erniebot/'
6 changes: 3 additions & 3 deletions configs/api_examples/eval_api_bytedance.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,8 @@
abbr='skylark-pro-public',
type=ByteDance,
path='skylark-pro-public',
accesskey="xxxxxxx",
secretkey="xxxxxxx",
accesskey='xxxxxxx',
secretkey='xxxxxxx',
url='xxxxxx',
generation_kwargs={
'temperature': 0.7,
Expand All @@ -41,4 +41,4 @@
task=dict(type=OpenICLInferTask)),
)

work_dir = "outputs/api_bytedance/"
work_dir = 'outputs/api_bytedance/'
2 changes: 1 addition & 1 deletion configs/api_examples/eval_api_minimax.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,4 +34,4 @@
task=dict(type=OpenICLInferTask)),
)

work_dir = "outputs/api_minimax/"
work_dir = 'outputs/api_minimax/'
2 changes: 1 addition & 1 deletion configs/api_examples/eval_api_moonshot.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,4 +37,4 @@
task=dict(type=OpenICLInferTask)),
)

work_dir = "outputs/api_moonshot/"
work_dir = 'outputs/api_moonshot/'
4 changes: 2 additions & 2 deletions configs/api_examples/eval_api_nanbeige.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
abbr='nanbeige-plus',
type=Nanbeige,
path='nanbeige-plus',
key="xxxxxx",
key='xxxxxx',
query_per_second=1,
max_out_len=2048,
batch_size=8),
Expand All @@ -33,4 +33,4 @@
task=dict(type=OpenICLInferTask)),
)

work_dir ="./output/nanbeige-plus"
work_dir ='./output/nanbeige-plus'
12 changes: 6 additions & 6 deletions configs/api_examples/eval_api_pangu.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,13 @@
abbr='pangu',
type=PanGu,
path='pangu',
access_key="xxxxxx",
secret_key="xxxxxx",
url = "xxxxxx",
access_key='xxxxxx',
secret_key='xxxxxx',
url = 'xxxxxx',
# url of token sever, used for generate token, like "https://xxxxxx.myhuaweicloud.com/v3/auth/tokens",
token_url = "xxxxxx",
token_url = 'xxxxxx',
# scope-project-name, used for generate token
project_name = "xxxxxx",
project_name = 'xxxxxx',
query_per_second=1,
max_out_len=2048,
max_seq_len=2048,
Expand All @@ -39,4 +39,4 @@
task=dict(type=OpenICLInferTask)),
)

work_dir = "outputs/api_pangu/"
work_dir = 'outputs/api_pangu/'
2 changes: 1 addition & 1 deletion configs/api_examples/eval_api_qwen.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,4 +37,4 @@
task=dict(type=OpenICLInferTask)),
)

work_dir = "outputs/api_qwen/"
work_dir = 'outputs/api_qwen/'
24 changes: 12 additions & 12 deletions configs/api_examples/eval_api_sensetime.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,17 +24,17 @@
max_seq_len=2048,
batch_size=8,
parameters={
"temperature": 0.8,
"top_p": 0.7,
"max_new_tokens": 1024,
"repetition_penalty": 1.05,
"know_ids": [],
"stream": True,
"user": "#*#***TestUser***#*#",
"knowledge_config": {
"control_level": "normal",
"knowledge_base_result": False,
"online_search_result": False
'temperature': 0.8,
'top_p': 0.7,
'max_new_tokens': 1024,
'repetition_penalty': 1.05,
'know_ids': [],
'stream': True,
'user': '#*#***TestUser***#*#',
'knowledge_config': {
'control_level': 'normal',
'knowledge_base_result': False,
'online_search_result': False
}
}
)
Expand All @@ -49,4 +49,4 @@
task=dict(type=OpenICLInferTask)),
)

work_dir = "outputs/api_sensetime/"
work_dir = 'outputs/api_sensetime/'
14 changes: 7 additions & 7 deletions configs/api_examples/eval_api_xunfei.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,22 +17,22 @@
dict(
abbr='Spark-v1-1',
type=XunFei,
appid="xxxx",
appid='xxxx',
path='ws://spark-api.xf-yun.com/v1.1/chat',
api_secret = "xxxxxxx",
api_key = "xxxxxxx",
api_secret = 'xxxxxxx',
api_key = 'xxxxxxx',
query_per_second=1,
max_out_len=2048,
max_seq_len=2048,
batch_size=8),
dict(
abbr='Spark-v3-1',
type=XunFei,
appid="xxxx",
appid='xxxx',
domain='generalv3',
path='ws://spark-api.xf-yun.com/v3.1/chat',
api_secret = "xxxxxxxx",
api_key = "xxxxxxxxx",
api_secret = 'xxxxxxxx',
api_key = 'xxxxxxxxx',
query_per_second=1,
max_out_len=2048,
max_seq_len=2048,
Expand All @@ -48,4 +48,4 @@
task=dict(type=OpenICLInferTask)),
)

work_dir = "outputs/api_xunfei/"
work_dir = 'outputs/api_xunfei/'
4 changes: 2 additions & 2 deletions configs/api_examples/eval_api_zhipu.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
abbr='chatglm_pro',
type=ZhiPuAI,
path='chatglm_pro',
key='xxxxxxxxxxxx',
key='xxxxxxxxxxxx',
query_per_second=1,
max_out_len=2048,
max_seq_len=2048,
Expand All @@ -45,4 +45,4 @@
task=dict(type=OpenICLInferTask)),
)

work_dir = "outputs/api_zhipu/"
work_dir = 'outputs/api_zhipu/'
2 changes: 1 addition & 1 deletion configs/api_examples/eval_api_zhipu_v2.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,4 +64,4 @@
task=dict(type=OpenICLInferTask)),
)

work_dir = "outputs/api_zhipu_v2/"
work_dir = 'outputs/api_zhipu_v2/'
22 changes: 22 additions & 0 deletions configs/dataset_collections/chat_OC15.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
from mmengine.config import read_base

with read_base():
from ..datasets.mmlu.mmlu_gen_4d595a import mmlu_datasets
from ..datasets.cmmlu.cmmlu_gen_c13365 import cmmlu_datasets
from ..datasets.ceval.ceval_gen_5f30c7 import ceval_datasets
from ..datasets.GaokaoBench.GaokaoBench_no_subjective_gen_4c31db import GaokaoBench_datasets
from ..datasets.triviaqa.triviaqa_wiki_1shot_gen_eaf81e import triviaqa_datasets
from ..datasets.nq.nq_open_1shot_gen_01cf41 import nq_datasets
from ..datasets.race.race_gen_69ee4f import race_datasets
from ..datasets.winogrande.winogrande_5shot_gen_b36770 import winogrande_datasets
from ..datasets.hellaswag.hellaswag_10shot_gen_e42710 import hellaswag_datasets
from ..datasets.bbh.bbh_gen_2879b0 import bbh_datasets
from ..datasets.gsm8k.gsm8k_gen_1d7fe4 import gsm8k_datasets
from ..datasets.math.math_0shot_gen_393424 import math_datasets
from ..datasets.TheoremQA.TheoremQA_5shot_gen_6f0af8 import TheoremQA_datasets
from ..datasets.humaneval.humaneval_gen_8e312c import humaneval_datasets
from ..datasets.mbpp.sanitized_mbpp_gen_830460 import sanitized_mbpp_datasets
from ..datasets.gpqa.gpqa_gen_4baadb import gpqa_datasets
from ..datasets.IFEval.IFEval_gen_3321a3 import ifeval_datasets

datasets = sum((v for k, v in locals().items() if k.endswith('_datasets')), [])
24 changes: 12 additions & 12 deletions configs/datasets/ARC_c/ARC_c_clean_ppl.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,29 +12,29 @@
prompt_template=dict(
type=PromptTemplate,
template={
"A":
'A':
dict(
round=[
dict(role="HUMAN", prompt="Question: {question}\nAnswer: "),
dict(role="BOT", prompt="{textA}")
dict(role='HUMAN', prompt='Question: {question}\nAnswer: '),
dict(role='BOT', prompt='{textA}')
], ),
"B":
'B':
dict(
round=[
dict(role="HUMAN", prompt="Question: {question}\nAnswer: "),
dict(role="BOT", prompt="{textB}")
dict(role='HUMAN', prompt='Question: {question}\nAnswer: '),
dict(role='BOT', prompt='{textB}')
], ),
"C":
'C':
dict(
round=[
dict(role="HUMAN", prompt="Question: {question}\nAnswer: "),
dict(role="BOT", prompt="{textC}")
dict(role='HUMAN', prompt='Question: {question}\nAnswer: '),
dict(role='BOT', prompt='{textC}')
], ),
"D":
'D':
dict(
round=[
dict(role="HUMAN", prompt="Question: {question}\nAnswer: "),
dict(role="BOT", prompt="{textD}")
dict(role='HUMAN', prompt='Question: {question}\nAnswer: '),
dict(role='BOT', prompt='{textD}')
], ),
}),
retriever=dict(type=ZeroRetriever),
Expand Down
Loading

0 comments on commit a182ca2

Please sign in to comment.