Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Feature] Add dingo test #1529

Open
wants to merge 13 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 34 additions & 0 deletions configs/datasets/dingo/dingo_gen.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.datasets import DingoDataset, DingoEvaluator


dingo_paths = [
'./data/dingo/en_192.csv',
'./data/dingo/zh_170.csv',
]

dingo_datasets = []
for path in dingo_paths:
dingo_reader_cfg = dict(input_columns='input', output_column=None)
dingo_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(round=[dict(role='HUMAN', prompt='{input}')])),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer),
)
dingo_eval_cfg = dict(evaluator=dict(type=DingoEvaluator), pred_role='BOT')

dingo_datasets.append(
dict(
abbr='dingo_' + path.split('/')[-1].split('.csv')[0],
type=DingoDataset,
path=path,
reader_cfg=dingo_reader_cfg,
infer_cfg=dingo_infer_cfg,
eval_cfg=dingo_eval_cfg,
))

datasets = dingo_datasets
7 changes: 7 additions & 0 deletions configs/eval_dingo.py
shijinpjlab marked this conversation as resolved.
Show resolved Hide resolved
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
from mmengine.config import read_base
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

补充一下新增PR功能说明,和测试记录吧

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

title也修改成[feature] Add xxx 这样的格式吧


with read_base():
from .models.hf_internlm.hf_internlm_7b import models
from .datasets.dingo.dingo_gen import datasets

work_dir = './outputs/eval_dingo'
34 changes: 34 additions & 0 deletions opencompass/configs/datasets/dingo/dingo_gen.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.datasets import DingoDataset, DingoEvaluator


dingo_paths = [
'./data/dingo/en_192.csv',
'./data/dingo/zh_170.csv',
]

dingo_datasets = []
for path in dingo_paths:
dingo_reader_cfg = dict(input_columns='input', output_column=None)
dingo_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(round=[dict(role='HUMAN', prompt='{input}')])),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer),
)
dingo_eval_cfg = dict(evaluator=dict(type=DingoEvaluator), pred_role='BOT')

dingo_datasets.append(
dict(
abbr='dingo_' + path.split('/')[-1].split('.csv')[0],
type=DingoDataset,
path=path,
reader_cfg=dingo_reader_cfg,
infer_cfg=dingo_infer_cfg,
eval_cfg=dingo_eval_cfg,
))

datasets = dingo_datasets
1 change: 1 addition & 0 deletions opencompass/datasets/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
from .csl import * # noqa: F401, F403
from .custom import * # noqa: F401, F403
from .cvalues import * # noqa: F401, F403
from .dingo import * # noqa: F401, F403
from .drcd import * # noqa: F401, F403
from .drop import * # noqa: F401, F403
from .drop_simple_eval import * # noqa: F401, F403
Expand Down
79 changes: 79 additions & 0 deletions opencompass/datasets/dingo.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
# flake8: nodingo
# yapf: disable
import os
import csv
import json
import time
from typing import List
from datasets import Dataset

from opencompass.openicl.icl_evaluator import BaseEvaluator
from opencompass.registry import ICL_EVALUATORS, LOAD_DATASET

from .base import BaseDataset

@LOAD_DATASET.register_module()
class DingoDataset(BaseDataset):

@staticmethod
def load(path: str):
raw_data = []
with open(path, encoding="utf-8") as f:
reader = csv.reader(f, delimiter=';')
for row in reader:
if len(row) < 1:
row = [""]
raw_data.append({"input": row[0]})
return Dataset.from_list(raw_data)


@LOAD_DATASET.register_module()
class DingoLongDataset(BaseDataset):

@staticmethod
def load(path: str):
raw_data = []
with open(path, "r", encoding="utf-8") as f:
for line in f:
raw_data.append({"input": json.loads(line).get("input")})
return Dataset.from_list(raw_data)


@ICL_EVALUATORS.register_module()
class DingoEvaluator(BaseEvaluator):

def score(self, origin_prompt: List, predictions: List) -> dict:
try:
# from dingo.model.model import Model
from dingo.io import InputArgs
from dingo.exec import Executor
except Exception:
raise ModuleNotFoundError(
"=========== dingo register fail. please try: pip install dingo-python. ===========")

current_time = time.strftime("%Y%m%d_%H%M%S", time.localtime())
file_data = [{"prompt": pmt, "prediction": prd} for pmt, prd in zip(origin_prompt, predictions)]
file_name = "dingo_file_" + current_time + ".jsonl"
with open(file_name, "a", encoding="utf-8") as f:
for d in file_data:
json.dump(d, f, ensure_ascii=False)
f.write('\n')

input_data = {
"eval_models": ["llm_base"],
"input_path": file_name,
"output_path": "./outputs/dingo/",
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

这个看看有没有全局的环境变量

"dataset": "local",
"datasource": "local", # If not fill in this item, it will be the same as "dataset"
"data_format": "jsonl",
"column_prompt": ["prompt"],
"column_content": ["prediction"],
}
# Model.apply_config(input_data['custom_config_path'])
input_args = InputArgs(**input_data)
executor = Executor.exec_map["local"](input_args)
result = executor.execute()
summary = result[0].to_dict()

os.remove(file_name)
return summary
1 change: 1 addition & 0 deletions requirements/extra.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# Alpaca-eval
alpaca-eval==0.6
cn2an
dingo-python
# Icl topk retriever
faiss_gpu==1.7.2
# Humaneval, Humaneval X
Expand Down