forked from open-compass/opencompass
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
liuhongwei
committed
Sep 8, 2023
1 parent
93c3c3b
commit 86220f8
Showing
4 changed files
with
231 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
from mmengine.config import read_base | ||
|
||
with read_base(): | ||
from .Kaoshi_gen_6666 import Kaoshi_datasets # noqa: F401, F403 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,79 @@ | ||
from opencompass.openicl.icl_prompt_template import PromptTemplate | ||
from opencompass.openicl.icl_retriever import ZeroRetriever | ||
from opencompass.openicl.icl_inferencer import GenInferencer | ||
from opencompass.datasets import KaoshiDataset | ||
|
||
prompts = { | ||
|
||
"单选题" : "请你做一道单项选择题\n请你一步一步思考并将思考过程写在【解析】和<eoe>之间。你将从A,B,C,D中选出正确的答案,并写在【答案】和<eoa>之间,答案应只包含最终结果,不要添加额外词语。\n例如:【答案】: A <eoa>\n完整的题目回答的格式如下:\n【解析】 ... <eoe>\n【答案】 ... <eoa>\n请你严格按照上述格式作答。\n题目如下:", | ||
"多选题" : "请你做一道多项选择题\n请你一步一步思考并将思考过程写在【解析】和<eoe>之间。你将从A,B,C,D中选出正确的答案,答案可能是一个到多个选项,奇怪将其写在【答案】和<eoa>之间,答案应只包含最终结果,不要添加额外词语。\n例如:【答案】: A D <eoa>\n完整的题目回答的格式如下:\n【解析】 ... <eoe>\n【答案】 ... <eoa>\n请你严格按照上述格式作答。\n题目如下:", | ||
# "解答题" : "请解答下面的解答题\n仔细阅读题目并充分结合你已有的知识,解答其中的问题,请你一步步思考并将思考过程写在【解析】和<eoe>之间。你的答案请写在【答案】和<eoa>之间\n完整的题目回答格式如下:\n(1)【解析】 ...<eoe>\n【答案】...<eoa>\n (2)【解析】 ...<eoe>\n【答案】...<eoa>\n请你严格按照上述格式作答,如果不止一道题,请分别作答。\n题目如下:" , | ||
"填空题":"请解答下面的填空题\n仔细阅读题目,解答其中的问题,请你一步步思考并将思考过程写在【解析】和<eoe>之间。请把你的答案写在【答案】和<eoa>之间,答案应只包含最终结果,不要添加额外词语。\n完整的题目回答格式如下:\n【解析】 ...<eoe>\n【答案】...<eoa>\n请你严格按照上述格式作答。\n题目如下:", | ||
} | ||
|
||
|
||
# splits = ['考研-经济', '职业-公务员', '考研-法学', '职业-高项', '职业-出版', '职业-测绘', '考研-数学', '考研-管理类综合', '职业-基金', '职业-银行', '职业-会计', '职业-建筑', '职业-消防', '职业-教师资格', '职业-期货', '考研-英语', '职业-房地产估价', '考研-临床医学', '考研-政治', '职业-安全工程', '职业-证券'] | ||
splits_with_type = {'单选题': ['职业-消防', '职业-测绘', '考研-经济', '职业-安全工程', '考研-政治', '职业-建筑', '考研-英语', '职业-教师资格', '职业-证券', '职业-会计', '职业-公务员', '考研-数学', '考研-法学', '职业-高项', '考研-临床医学', '职业-银行', '考研-管理类综合', '职业-基金'], | ||
'多选题': ['职业-消防', '职业-测绘', '考研-政治', '职业-建筑', '职业-证券', '职业-会计', '考研-法学', '考研-临床医学', '职业-银行'], | ||
'完形填空': ['考研-英语'], | ||
'判断题': ['职业-证券'], | ||
'填空题': ['考研-数学']} | ||
Kaoshi_datasets = [] | ||
|
||
# for _folder, _prompts in [ | ||
# ("Multiple-choice_Questions", _MCQ_prompts), | ||
# ("Fill-in-the-blank_Questions", _FBQ_prompts), | ||
# ("Open-ended_Questions", _OEQ_prompts), | ||
# ]: | ||
|
||
for _type in ['单选题', '多选题', '填空题']: | ||
for _split in splits_with_type[_type]: | ||
if "法学" in _split or "房地产" in _split: | ||
continue | ||
_folder = _split.replace('-' + _type, '') | ||
_p = prompts[_type] | ||
_reader_cfg = { | ||
"input_columns": ['question'], | ||
"output_column": 'answer', | ||
} | ||
_infer_cfg = { | ||
"ice_template": { | ||
"type": PromptTemplate, | ||
"template": { | ||
"round": [{ | ||
"role": "HUMAN", | ||
"prompt": _p + '{question}' | ||
}] | ||
}, | ||
"ice_token": "</E>" | ||
}, | ||
"retriever": { | ||
"type": ZeroRetriever | ||
}, | ||
"inferencer": { | ||
"type": GenInferencer, | ||
"max_out_len": 1024, | ||
} | ||
} | ||
_eval_cfg = { | ||
"evaluator": { | ||
"type": "KaoshiEvaluator" + "_" + _type, | ||
}, | ||
"pred_role": "BOT", | ||
} | ||
_base_path = './data/Kaoshi' | ||
_dataset = { | ||
"type": KaoshiDataset, | ||
"abbr": "Kaoshi" + _split, | ||
"path": _base_path + '/' + _folder + '/' + _type + ".jsonl", | ||
"reader_cfg": _reader_cfg, | ||
"infer_cfg": _infer_cfg, | ||
"eval_cfg": _eval_cfg, | ||
} | ||
|
||
Kaoshi_datasets.append(_dataset) | ||
|
||
_temporary_variables = [k for k in globals() if k.startswith('_')] | ||
for _t in _temporary_variables: | ||
del globals()[_t] | ||
del _temporary_variables, _t |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,147 @@ | ||
import json | ||
import re | ||
|
||
from datasets import Dataset | ||
|
||
from opencompass.openicl.icl_evaluator import BaseEvaluator | ||
from opencompass.registry import ICL_EVALUATORS, LOAD_DATASET | ||
|
||
from .base import BaseDataset | ||
|
||
|
||
def get_number(options): | ||
result_string = '' | ||
for i, percentage in enumerate(options, | ||
start=65): # 使用ASCII码值作为序号,从大写字母'A'开始 | ||
result_string += f'{chr(i)}. {percentage}\n' | ||
return result_string | ||
|
||
|
||
@LOAD_DATASET.register_module() | ||
class KaoshiDataset(BaseDataset): | ||
|
||
@staticmethod | ||
def load(path: str): | ||
data_list = [] | ||
_type = path.split('/')[-1].replace('.jsonl', '') | ||
with open(path, encoding='utf-8') as f: | ||
for line in f: | ||
data = json.loads(line) | ||
if _type in ['单选题', '多选题']: | ||
data['question'] = data['question'].strip( | ||
) + '\n' + get_number(data['options']) | ||
data_list.append(data) | ||
return Dataset.from_list(data_list) | ||
|
||
|
||
valid_kaoshi__question_types = [ | ||
'single_choice', 'multi_choice', 'multi_question_choice', | ||
'five_out_of_seven', 'cloze', 'subjective', 'correction' | ||
] | ||
|
||
|
||
class KaoshiEvaluator(BaseEvaluator): | ||
|
||
def __init__(self, question_type) -> None: | ||
super().__init__() | ||
assert question_type in valid_kaoshi__question_types | ||
self.question_type = question_type | ||
|
||
def do_predictions_postprocess(self, model_output, answer_lenth=None): | ||
if self.question_type == 'single_choice': | ||
model_answer = [] | ||
temp = re.findall(r'[A-D]', model_output[::-1]) | ||
if len(temp) != 0: | ||
model_answer.append(temp[0]) | ||
|
||
elif self.question_type == 'multi_question_choice': | ||
model_answer = [] | ||
temp = re.findall(r'【答案】\s*[::]*\s*[A-Z]', model_output) | ||
|
||
if len(temp) == answer_lenth: | ||
for t in temp: | ||
model_answer.append(re.findall(r'[A-Z]', t)[0]) | ||
else: | ||
temp = re.findall(r'[A-Z]', model_output) | ||
if len(temp) > 0: | ||
for k in range(min(len(temp), answer_lenth)): | ||
model_answer.append(temp[k]) | ||
|
||
elif self.question_type == 'multi_choice': | ||
model_answer = [] | ||
answer = '' | ||
content = re.sub(r'\s+', '', model_output) | ||
answer_index = content.find('【答案】') | ||
if answer_index > 0: | ||
temp = content[answer_index:] | ||
if len(re.findall(r'[A-D]', temp)) > 0: | ||
for t in re.findall(r'[A-D]', temp): | ||
answer += t | ||
else: | ||
temp = content[-10:] | ||
if len(re.findall(r'[A-D]', temp)) > 0: | ||
for t in re.findall(r'[A-D]', temp): | ||
answer += t | ||
if len(answer) != 0: | ||
model_answer.append(answer) | ||
|
||
elif self.question_type == 'five_out_of_seven': | ||
model_answer = [] | ||
temp = re.findall(r'[A-G]', model_output) | ||
if len(temp) > 0: | ||
for k in range(min(5, len(temp))): | ||
model_answer.append(temp[k]) | ||
|
||
return model_answer | ||
|
||
def ensure_same_length(self, pred, refr): | ||
if len(pred) == len(refr): | ||
return pred | ||
return ['Z'] * len(refr) | ||
|
||
def score(self, predictions, references): | ||
if self.question_type not in [ | ||
'single_choice', 'multi_choice', 'multi_question_choice', | ||
'five_out_of_seven' | ||
]: | ||
return {'score': 0} | ||
elif self.question_type == 'multi_choice': | ||
correct_score, total_score = 0, 0 | ||
for pred, refr in zip(predictions, references): | ||
pred = self.do_predictions_postprocess(pred) | ||
pred = self.ensure_same_length(pred, refr) | ||
for p, r in zip(pred, refr): | ||
if p == r: | ||
correct_score += 2 | ||
else: | ||
for i in p: | ||
if i not in r: | ||
break | ||
else: | ||
correct_score += 1 | ||
total_score += 2 | ||
return {'score': correct_score / total_score * 100} | ||
else: | ||
correct_score, total_score = 0, 0 | ||
for pred, refr in zip(predictions, references): | ||
if self.question_type == 'multi_question_choice': | ||
pred = self.do_predictions_postprocess(pred, len(refr)) | ||
else: | ||
pred = self.do_predictions_postprocess(pred) | ||
pred = self.ensure_same_length(pred, refr) | ||
for p, r in zip(pred, refr): | ||
if p == r: | ||
correct_score += 1 | ||
total_score += 1 | ||
return {'score': correct_score / total_score * 100} | ||
|
||
|
||
for question_type in valid_kaoshi__question_types: | ||
# fix classic closure problem | ||
def _kaoshi_register(question_type): | ||
ICL_EVALUATORS.register_module( | ||
name='KaoshiEvaluator' + '_' + question_type, | ||
module=lambda *args, **kwargs: KaoshiEvaluator( | ||
question_type=question_type, *args, **kwargs)) | ||
|
||
_kaoshi_register(question_type) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters