Add Koashi

liushz · Sep 8, 2023 · 86220f8 · 86220f8
1 parent 93c3c3b
commit 86220f8
Show file tree

Hide file tree

Showing 4 changed files with 231 additions and 0 deletions.
diff --git a/configs/datasets/Kaoshi/Kaoshi_gen.py b/configs/datasets/Kaoshi/Kaoshi_gen.py
@@ -0,0 +1,4 @@
+from mmengine.config import read_base
+
+with read_base():
+    from .Kaoshi_gen_6666 import Kaoshi_datasets  # noqa: F401, F403
diff --git a/configs/datasets/Kaoshi/Kaoshi_gen_6666.py b/configs/datasets/Kaoshi/Kaoshi_gen_6666.py
@@ -0,0 +1,79 @@
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import ZeroRetriever
+from opencompass.openicl.icl_inferencer import GenInferencer
+from opencompass.datasets import KaoshiDataset
+
+prompts = {
+
+        "单选题" : "请你做一道单项选择题\n请你一步一步思考并将思考过程写在【解析】和<eoe>之间。你将从A，B，C，D中选出正确的答案，并写在【答案】和<eoa>之间，答案应只包含最终结果，不要添加额外词语。\n例如：【答案】: A <eoa>\n完整的题目回答的格式如下：\n【解析】 ... <eoe>\n【答案】 ... <eoa>\n请你严格按照上述格式作答。\n题目如下：",
+        "多选题" : "请你做一道多项选择题\n请你一步一步思考并将思考过程写在【解析】和<eoe>之间。你将从A，B，C，D中选出正确的答案，答案可能是一个到多个选项，奇怪将其写在【答案】和<eoa>之间，答案应只包含最终结果，不要添加额外词语。\n例如：【答案】: A D <eoa>\n完整的题目回答的格式如下：\n【解析】 ... <eoe>\n【答案】 ... <eoa>\n请你严格按照上述格式作答。\n题目如下：",
+        # "解答题" : "请解答下面的解答题\n仔细阅读题目并充分结合你已有的知识，解答其中的问题，请你一步步思考并将思考过程写在【解析】和<eoe>之间。你的答案请写在【答案】和<eoa>之间\n完整的题目回答格式如下：\n(1)【解析】 ...<eoe>\n【答案】...<eoa>\n (2)【解析】 ...<eoe>\n【答案】...<eoa>\n请你严格按照上述格式作答，如果不止一道题，请分别作答。\n题目如下：" ,
+        "填空题":"请解答下面的填空题\n仔细阅读题目，解答其中的问题，请你一步步思考并将思考过程写在【解析】和<eoe>之间。请把你的答案写在【答案】和<eoa>之间，答案应只包含最终结果，不要添加额外词语。\n完整的题目回答格式如下：\n【解析】 ...<eoe>\n【答案】...<eoa>\n请你严格按照上述格式作答。\n题目如下:",
+}
+
+
+# splits = ['考研-经济', '职业-公务员', '考研-法学', '职业-高项', '职业-出版', '职业-测绘', '考研-数学', '考研-管理类综合', '职业-基金', '职业-银行', '职业-会计', '职业-建筑', '职业-消防', '职业-教师资格', '职业-期货', '考研-英语', '职业-房地产估价', '考研-临床医学', '考研-政治', '职业-安全工程', '职业-证券']
+splits_with_type = {'单选题': ['职业-消防', '职业-测绘', '考研-经济', '职业-安全工程', '考研-政治', '职业-建筑', '考研-英语', '职业-教师资格', '职业-证券', '职业-会计', '职业-公务员', '考研-数学', '考研-法学', '职业-高项', '考研-临床医学', '职业-银行', '考研-管理类综合', '职业-基金'], 
+                    '多选题': ['职业-消防', '职业-测绘', '考研-政治', '职业-建筑', '职业-证券', '职业-会计', '考研-法学', '考研-临床医学', '职业-银行'], 
+                    '完形填空': ['考研-英语'], 
+                    '判断题': ['职业-证券'], 
+                    '填空题': ['考研-数学']}
+Kaoshi_datasets = []
+
+# for _folder, _prompts in [
+#     ("Multiple-choice_Questions", _MCQ_prompts),
+#     ("Fill-in-the-blank_Questions", _FBQ_prompts),
+#     ("Open-ended_Questions", _OEQ_prompts),
+# ]:
+
+for _type in ['单选题', '多选题', '填空题']:
+    for _split in splits_with_type[_type]:
+        if "法学" in _split or "房地产" in _split:
+            continue
+        _folder = _split.replace('-' + _type, '')
+        _p = prompts[_type]
+        _reader_cfg = {
+            "input_columns": ['question'],
+            "output_column": 'answer',
+        }
+        _infer_cfg = {
+            "ice_template": {
+                "type": PromptTemplate,
+                "template": {
+                    "round": [{
+                        "role": "HUMAN",
+                        "prompt": _p + '{question}'
+                    }]
+                },
+                "ice_token": "</E>"
+            },
+            "retriever": {
+                "type": ZeroRetriever
+            },
+            "inferencer": {
+                "type": GenInferencer,
+                "max_out_len": 1024,
+            }
+        }
+        _eval_cfg = {
+            "evaluator": {
+                "type": "KaoshiEvaluator" + "_" + _type,
+            },
+            "pred_role": "BOT",
+        }
+        _base_path = './data/Kaoshi'
+        _dataset = {
+            "type": KaoshiDataset,
+            "abbr": "Kaoshi" + _split,
+            "path": _base_path + '/' + _folder + '/' + _type + ".jsonl",
+            "reader_cfg": _reader_cfg,
+            "infer_cfg": _infer_cfg,
+            "eval_cfg": _eval_cfg,
+        }
+
+        Kaoshi_datasets.append(_dataset)
+
+_temporary_variables = [k for k in globals() if k.startswith('_')]
+for _t in _temporary_variables:
+    del globals()[_t]
+del _temporary_variables, _t
diff --git a/opencompass/datasets/Kaoshi.py b/opencompass/datasets/Kaoshi.py
@@ -0,0 +1,147 @@
+import json
+import re
+
+from datasets import Dataset
+
+from opencompass.openicl.icl_evaluator import BaseEvaluator
+from opencompass.registry import ICL_EVALUATORS, LOAD_DATASET
+
+from .base import BaseDataset
+
+
+def get_number(options):
+    result_string = ''
+    for i, percentage in enumerate(options,
+                                   start=65):  # 使用ASCII码值作为序号，从大写字母'A'开始
+        result_string += f'{chr(i)}. {percentage}\n'
+    return result_string
+
+
+@LOAD_DATASET.register_module()
+class KaoshiDataset(BaseDataset):
+
+    @staticmethod
+    def load(path: str):
+        data_list = []
+        _type = path.split('/')[-1].replace('.jsonl', '')
+        with open(path, encoding='utf-8') as f:
+            for line in f:
+                data = json.loads(line)
+                if _type in ['单选题', '多选题']:
+                    data['question'] = data['question'].strip(
+                    ) + '\n' + get_number(data['options'])
+                data_list.append(data)
+        return Dataset.from_list(data_list)
+
+
+valid_kaoshi__question_types = [
+    'single_choice', 'multi_choice', 'multi_question_choice',
+    'five_out_of_seven', 'cloze', 'subjective', 'correction'
+]
+
+
+class KaoshiEvaluator(BaseEvaluator):
+
+    def __init__(self, question_type) -> None:
+        super().__init__()
+        assert question_type in valid_kaoshi__question_types
+        self.question_type = question_type
+
+    def do_predictions_postprocess(self, model_output, answer_lenth=None):
+        if self.question_type == 'single_choice':
+            model_answer = []
+            temp = re.findall(r'[A-D]', model_output[::-1])
+            if len(temp) != 0:
+                model_answer.append(temp[0])
+
+        elif self.question_type == 'multi_question_choice':
+            model_answer = []
+            temp = re.findall(r'【答案】\s*[:：]*\s*[A-Z]', model_output)
+
+            if len(temp) == answer_lenth:
+                for t in temp:
+                    model_answer.append(re.findall(r'[A-Z]', t)[0])
+            else:
+                temp = re.findall(r'[A-Z]', model_output)
+                if len(temp) > 0:
+                    for k in range(min(len(temp), answer_lenth)):
+                        model_answer.append(temp[k])
+
+        elif self.question_type == 'multi_choice':
+            model_answer = []
+            answer = ''
+            content = re.sub(r'\s+', '', model_output)
+            answer_index = content.find('【答案】')
+            if answer_index > 0:
+                temp = content[answer_index:]
+                if len(re.findall(r'[A-D]', temp)) > 0:
+                    for t in re.findall(r'[A-D]', temp):
+                        answer += t
+            else:
+                temp = content[-10:]
+                if len(re.findall(r'[A-D]', temp)) > 0:
+                    for t in re.findall(r'[A-D]', temp):
+                        answer += t
+            if len(answer) != 0:
+                model_answer.append(answer)
+
+        elif self.question_type == 'five_out_of_seven':
+            model_answer = []
+            temp = re.findall(r'[A-G]', model_output)
+            if len(temp) > 0:
+                for k in range(min(5, len(temp))):
+                    model_answer.append(temp[k])
+
+        return model_answer
+
+    def ensure_same_length(self, pred, refr):
+        if len(pred) == len(refr):
+            return pred
+        return ['Z'] * len(refr)
+
+    def score(self, predictions, references):
+        if self.question_type not in [
+                'single_choice', 'multi_choice', 'multi_question_choice',
+                'five_out_of_seven'
+        ]:
+            return {'score': 0}
+        elif self.question_type == 'multi_choice':
+            correct_score, total_score = 0, 0
+            for pred, refr in zip(predictions, references):
+                pred = self.do_predictions_postprocess(pred)
+                pred = self.ensure_same_length(pred, refr)
+                for p, r in zip(pred, refr):
+                    if p == r:
+                        correct_score += 2
+                    else:
+                        for i in p:
+                            if i not in r:
+                                break
+                        else:
+                            correct_score += 1
+                    total_score += 2
+            return {'score': correct_score / total_score * 100}
+        else:
+            correct_score, total_score = 0, 0
+            for pred, refr in zip(predictions, references):
+                if self.question_type == 'multi_question_choice':
+                    pred = self.do_predictions_postprocess(pred, len(refr))
+                else:
+                    pred = self.do_predictions_postprocess(pred)
+                pred = self.ensure_same_length(pred, refr)
+                for p, r in zip(pred, refr):
+                    if p == r:
+                        correct_score += 1
+                    total_score += 1
+            return {'score': correct_score / total_score * 100}
+
+
+for question_type in valid_kaoshi__question_types:
+    # fix classic closure problem
+    def _kaoshi_register(question_type):
+        ICL_EVALUATORS.register_module(
+            name='KaoshiEvaluator' + '_' + question_type,
+            module=lambda *args, **kwargs: KaoshiEvaluator(
+                question_type=question_type, *args, **kwargs))
+
+    _kaoshi_register(question_type)
diff --git a/opencompass/datasets/__init__.py b/opencompass/datasets/__init__.py
@@ -35,6 +35,7 @@
 from .humanevalx import *  # noqa: F401, F403
 from .iwslt2017 import *  # noqa: F401, F403
 from .jigsawmultilingual import *  # noqa: F401, F403
+from .Kaoshi import *  # noqa: F401, F403
 from .lambada import *  # noqa: F401, F403
 from .lcsts import *  # noqa: F401, F403
 from .LEval_coursera import *  # noqa: F401, F403