Skip to content

Commit

Permalink
Add Koashi
Browse files Browse the repository at this point in the history
  • Loading branch information
liuhongwei committed Sep 8, 2023
1 parent 93c3c3b commit 86220f8
Show file tree
Hide file tree
Showing 4 changed files with 231 additions and 0 deletions.
4 changes: 4 additions & 0 deletions configs/datasets/Kaoshi/Kaoshi_gen.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
from mmengine.config import read_base

with read_base():
from .Kaoshi_gen_6666 import Kaoshi_datasets # noqa: F401, F403
79 changes: 79 additions & 0 deletions configs/datasets/Kaoshi/Kaoshi_gen_6666.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.datasets import KaoshiDataset

prompts = {

"单选题" : "请你做一道单项选择题\n请你一步一步思考并将思考过程写在【解析】和<eoe>之间。你将从A,B,C,D中选出正确的答案,并写在【答案】和<eoa>之间,答案应只包含最终结果,不要添加额外词语。\n例如:【答案】: A <eoa>\n完整的题目回答的格式如下:\n【解析】 ... <eoe>\n【答案】 ... <eoa>\n请你严格按照上述格式作答。\n题目如下:",
"多选题" : "请你做一道多项选择题\n请你一步一步思考并将思考过程写在【解析】和<eoe>之间。你将从A,B,C,D中选出正确的答案,答案可能是一个到多个选项,奇怪将其写在【答案】和<eoa>之间,答案应只包含最终结果,不要添加额外词语。\n例如:【答案】: A D <eoa>\n完整的题目回答的格式如下:\n【解析】 ... <eoe>\n【答案】 ... <eoa>\n请你严格按照上述格式作答。\n题目如下:",
# "解答题" : "请解答下面的解答题\n仔细阅读题目并充分结合你已有的知识,解答其中的问题,请你一步步思考并将思考过程写在【解析】和<eoe>之间。你的答案请写在【答案】和<eoa>之间\n完整的题目回答格式如下:\n(1)【解析】 ...<eoe>\n【答案】...<eoa>\n (2)【解析】 ...<eoe>\n【答案】...<eoa>\n请你严格按照上述格式作答,如果不止一道题,请分别作答。\n题目如下:" ,
"填空题":"请解答下面的填空题\n仔细阅读题目,解答其中的问题,请你一步步思考并将思考过程写在【解析】和<eoe>之间。请把你的答案写在【答案】和<eoa>之间,答案应只包含最终结果,不要添加额外词语。\n完整的题目回答格式如下:\n【解析】 ...<eoe>\n【答案】...<eoa>\n请你严格按照上述格式作答。\n题目如下:",
}


# splits = ['考研-经济', '职业-公务员', '考研-法学', '职业-高项', '职业-出版', '职业-测绘', '考研-数学', '考研-管理类综合', '职业-基金', '职业-银行', '职业-会计', '职业-建筑', '职业-消防', '职业-教师资格', '职业-期货', '考研-英语', '职业-房地产估价', '考研-临床医学', '考研-政治', '职业-安全工程', '职业-证券']
splits_with_type = {'单选题': ['职业-消防', '职业-测绘', '考研-经济', '职业-安全工程', '考研-政治', '职业-建筑', '考研-英语', '职业-教师资格', '职业-证券', '职业-会计', '职业-公务员', '考研-数学', '考研-法学', '职业-高项', '考研-临床医学', '职业-银行', '考研-管理类综合', '职业-基金'],
'多选题': ['职业-消防', '职业-测绘', '考研-政治', '职业-建筑', '职业-证券', '职业-会计', '考研-法学', '考研-临床医学', '职业-银行'],
'完形填空': ['考研-英语'],
'判断题': ['职业-证券'],
'填空题': ['考研-数学']}
Kaoshi_datasets = []

# for _folder, _prompts in [
# ("Multiple-choice_Questions", _MCQ_prompts),
# ("Fill-in-the-blank_Questions", _FBQ_prompts),
# ("Open-ended_Questions", _OEQ_prompts),
# ]:

for _type in ['单选题', '多选题', '填空题']:
for _split in splits_with_type[_type]:
if "法学" in _split or "房地产" in _split:
continue
_folder = _split.replace('-' + _type, '')
_p = prompts[_type]
_reader_cfg = {
"input_columns": ['question'],
"output_column": 'answer',
}
_infer_cfg = {
"ice_template": {
"type": PromptTemplate,
"template": {
"round": [{
"role": "HUMAN",
"prompt": _p + '{question}'
}]
},
"ice_token": "</E>"
},
"retriever": {
"type": ZeroRetriever
},
"inferencer": {
"type": GenInferencer,
"max_out_len": 1024,
}
}
_eval_cfg = {
"evaluator": {
"type": "KaoshiEvaluator" + "_" + _type,
},
"pred_role": "BOT",
}
_base_path = './data/Kaoshi'
_dataset = {
"type": KaoshiDataset,
"abbr": "Kaoshi" + _split,
"path": _base_path + '/' + _folder + '/' + _type + ".jsonl",
"reader_cfg": _reader_cfg,
"infer_cfg": _infer_cfg,
"eval_cfg": _eval_cfg,
}

Kaoshi_datasets.append(_dataset)

_temporary_variables = [k for k in globals() if k.startswith('_')]
for _t in _temporary_variables:
del globals()[_t]
del _temporary_variables, _t
147 changes: 147 additions & 0 deletions opencompass/datasets/Kaoshi.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,147 @@
import json
import re

from datasets import Dataset

from opencompass.openicl.icl_evaluator import BaseEvaluator
from opencompass.registry import ICL_EVALUATORS, LOAD_DATASET

from .base import BaseDataset


def get_number(options):
result_string = ''
for i, percentage in enumerate(options,
start=65): # 使用ASCII码值作为序号,从大写字母'A'开始
result_string += f'{chr(i)}. {percentage}\n'
return result_string


@LOAD_DATASET.register_module()
class KaoshiDataset(BaseDataset):

@staticmethod
def load(path: str):
data_list = []
_type = path.split('/')[-1].replace('.jsonl', '')
with open(path, encoding='utf-8') as f:
for line in f:
data = json.loads(line)
if _type in ['单选题', '多选题']:
data['question'] = data['question'].strip(
) + '\n' + get_number(data['options'])
data_list.append(data)
return Dataset.from_list(data_list)


valid_kaoshi__question_types = [
'single_choice', 'multi_choice', 'multi_question_choice',
'five_out_of_seven', 'cloze', 'subjective', 'correction'
]


class KaoshiEvaluator(BaseEvaluator):

def __init__(self, question_type) -> None:
super().__init__()
assert question_type in valid_kaoshi__question_types
self.question_type = question_type

def do_predictions_postprocess(self, model_output, answer_lenth=None):
if self.question_type == 'single_choice':
model_answer = []
temp = re.findall(r'[A-D]', model_output[::-1])
if len(temp) != 0:
model_answer.append(temp[0])

elif self.question_type == 'multi_question_choice':
model_answer = []
temp = re.findall(r'【答案】\s*[::]*\s*[A-Z]', model_output)

if len(temp) == answer_lenth:
for t in temp:
model_answer.append(re.findall(r'[A-Z]', t)[0])
else:
temp = re.findall(r'[A-Z]', model_output)
if len(temp) > 0:
for k in range(min(len(temp), answer_lenth)):
model_answer.append(temp[k])

elif self.question_type == 'multi_choice':
model_answer = []
answer = ''
content = re.sub(r'\s+', '', model_output)
answer_index = content.find('【答案】')
if answer_index > 0:
temp = content[answer_index:]
if len(re.findall(r'[A-D]', temp)) > 0:
for t in re.findall(r'[A-D]', temp):
answer += t
else:
temp = content[-10:]
if len(re.findall(r'[A-D]', temp)) > 0:
for t in re.findall(r'[A-D]', temp):
answer += t
if len(answer) != 0:
model_answer.append(answer)

elif self.question_type == 'five_out_of_seven':
model_answer = []
temp = re.findall(r'[A-G]', model_output)
if len(temp) > 0:
for k in range(min(5, len(temp))):
model_answer.append(temp[k])

return model_answer

def ensure_same_length(self, pred, refr):
if len(pred) == len(refr):
return pred
return ['Z'] * len(refr)

def score(self, predictions, references):
if self.question_type not in [
'single_choice', 'multi_choice', 'multi_question_choice',
'five_out_of_seven'
]:
return {'score': 0}
elif self.question_type == 'multi_choice':
correct_score, total_score = 0, 0
for pred, refr in zip(predictions, references):
pred = self.do_predictions_postprocess(pred)
pred = self.ensure_same_length(pred, refr)
for p, r in zip(pred, refr):
if p == r:
correct_score += 2
else:
for i in p:
if i not in r:
break
else:
correct_score += 1
total_score += 2
return {'score': correct_score / total_score * 100}
else:
correct_score, total_score = 0, 0
for pred, refr in zip(predictions, references):
if self.question_type == 'multi_question_choice':
pred = self.do_predictions_postprocess(pred, len(refr))
else:
pred = self.do_predictions_postprocess(pred)
pred = self.ensure_same_length(pred, refr)
for p, r in zip(pred, refr):
if p == r:
correct_score += 1
total_score += 1
return {'score': correct_score / total_score * 100}


for question_type in valid_kaoshi__question_types:
# fix classic closure problem
def _kaoshi_register(question_type):
ICL_EVALUATORS.register_module(
name='KaoshiEvaluator' + '_' + question_type,
module=lambda *args, **kwargs: KaoshiEvaluator(
question_type=question_type, *args, **kwargs))

_kaoshi_register(question_type)
1 change: 1 addition & 0 deletions opencompass/datasets/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
from .humanevalx import * # noqa: F401, F403
from .iwslt2017 import * # noqa: F401, F403
from .jigsawmultilingual import * # noqa: F401, F403
from .Kaoshi import * # noqa: F401, F403
from .lambada import * # noqa: F401, F403
from .lcsts import * # noqa: F401, F403
from .LEval_coursera import * # noqa: F401, F403
Expand Down

0 comments on commit 86220f8

Please sign in to comment.