Skip to content

Commit

Permalink
Mathbench update postprocess (open-compass#600)
Browse files Browse the repository at this point in the history
* Update mathbench

* Update mathbench
  • Loading branch information
liushz authored and BunnyRunnerX committed Nov 20, 2023
1 parent f098781 commit d811584
Show file tree
Hide file tree
Showing 5 changed files with 41 additions and 21 deletions.
2 changes: 1 addition & 1 deletion configs/datasets/MathBench/mathbench_gen.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from mmengine.config import read_base

with read_base():
from .mathbench_gen_10da90 import mathbench_datasets # noqa: F401, F403
from .mathbench_gen_ad37c1 import mathbench_datasets # noqa: F401, F403
Original file line number Diff line number Diff line change
Expand Up @@ -3,17 +3,17 @@
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.openicl.icl_evaluator import CircularEvaluator, AccEvaluator
from opencompass.datasets import MathBenchDataset, mathbench_postprocess
from opencompass.utils.text_postprocessors import first_capital_postprocess
from opencompass.utils.text_postprocessors import first_option_postprocess


single_choice_prompts = {
"single_choice_cn_with_reasoning": "以下是一道关于数学的单项选择题,请你一步一步推理并得到最终的答案选项。回答格式为如下:\n答案选项:A、B、C、D中你认为正确的一个选项\n计算过程:根据题目得到选项答案的一步步过程\n请严格按照上面的格式回答问题,下面是你要回答的题目:\n{question}\n答案选项:",
"single_choice_cn": "以下是一道关于数学的单项选择题,请你给出正确的答案选项\n下面是你要回答的题目:\n{question}\n答案选项:",
"single_choice_cn": "以下是一道关于数学的单项选择题,请你直接回答正确答案的选项序号\n下面是你要回答的题目:\n{question}\n答案选项:",
"single_choice_en_with_reasoning": "Here is a multiple-choice question about mathematics. Please provide the final answer option by step-by-step reasoning. Please answer in the following format:\nAnswer option: A, B, C, or D (the option you believe is correct)\nCalculation process: Step-by-step process to derive the answer option based on the question\nPlease strictly follow the above format to answer the question. Here is the question you need to answer:\n{question}\nAnswer option:",
"single_choice_en": "Here is a multiple-choice question about mathematics. Please provide the correct answer option directly.\nHere is the question you need to answer:\n{question}\nAnswer option:",
}

cloze_prompts={
cloze_prompts = {
"cloze_cn": [
dict(role='HUMAN', prompt='Q: 林中有15棵树。林务工人员今天将在林中种植树木。完成后,将有21棵树。林务工人员今天种植了多少棵树?'),
dict(role='BOT', prompt='A: 我们从15棵树开始。后来有21棵树。差值必定是他们种植的树木数量。所以,他们必须种植了21 - 15 = 6棵树。答案是 6\n'),
Expand Down Expand Up @@ -53,15 +53,13 @@
dict(role='BOT', prompt='A: She bought 5 bagels for $3 each. This means she spent 5 * $3 = $15 on the bagels. She had $23 in beginning, so now she has $23 - $15 = $8. The answer is 8.\n'),
dict(role='HUMAN', prompt='Q: {question}'),
dict(role='BOT', prompt='A: {answer}\n'),
],
}

]}

mathbench_sets = {
'college': ['single_choice_cn', 'cloze_en'],
'high': ['single_choice_cn', 'single_choice_en'],
'middle': ['single_choice_cn'],
'primary': ['cloze_cn'],
'primary': ['cloze_cn']
}

# Generate reasoning path if set True or just generate the final answer
Expand All @@ -75,26 +73,24 @@
for _split in list(mathbench_sets.keys()):
for _name in mathbench_sets[_split]:
mathbench_infer_cfg = dict(
ice_template=dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(
begin="</E>",
round=[
dict(
role="HUMAN",
prompt=single_choice_prompts[_name + "_with_reasoning"] if with_reasoning else single_choice_prompts[_name],
),
dict(role="BOT", prompt="{answer}")] if 'choice' in _name else cloze_prompts[_name],
),
ice_token="</E>",
),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer, max_out_len=512,),
inferencer=dict(type=GenInferencer, max_out_len=512),
)

mathbench_eval_cfg = dict(
evaluator=dict(type=CircularEvaluator if 'choice' in _name else AccEvaluator),
pred_postprocessor=dict(type=first_capital_postprocess) if 'single_choice' in _name else dict(type=mathbench_postprocess, name=_name))
pred_postprocessor=dict(type=first_option_postprocess, options='ABCD') if 'single_choice' in _name else dict(type=mathbench_postprocess, name=_name))

mathbench_datasets.append(
dict(
Expand All @@ -110,5 +106,3 @@
infer_cfg=mathbench_infer_cfg,
eval_cfg=mathbench_eval_cfg,
))

del _split, _name
18 changes: 18 additions & 0 deletions configs/summarizers/mathbench.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
summarizer = dict(
dataset_abbrs=[
'######## MathBench Accuracy ########', # category
['mathbench-college-single_choice_cn', 'acc_1'],
['mathbench-college-cloze_en', 'accuracy'],
['mathbench-high-single_choice_cn', 'acc_1'],
['mathbench-high-single_choice_en', 'acc_1'],
['mathbench-middle-single_choice_cn', 'acc_1'],
['mathbench-primary-cloze_cn', 'accuracy'],
'######## MathBench CircularEval ########', # category
['mathbench-college-single_choice_cn', 'perf_4'],
['mathbench-high-single_choice_cn', 'perf_4'],
['mathbench-high-single_choice_en', 'perf_4'],
['mathbench-middle-single_choice_cn', 'perf_4'],
],
summary_groups=sum(
[v for k, v in locals().items() if k.endswith("_summary_groups")], [])
)
11 changes: 8 additions & 3 deletions opencompass/datasets/mathbench.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,10 +71,15 @@ def load(path: str, name: str, with_circular: bool = True):
else:
question = entry['question'].strip(
) + '\n' + get_number(entry['options'])
data.append({
info = {
'question': question,
'answer': entry['answer'].strip()
})
}
# For PPL evaluation
for i in range(4):
info[chr(ord('A') +
i)] = entry['options'][i].strip()
data.append(info)

dataset = Dataset.from_list(data)
return dataset
Expand All @@ -91,7 +96,7 @@ def mathbench_postprocess(text: str, name: str) -> str:
ans = ans_line[1].strip()

output = re.sub(r'(\d),(\d)', r'\1\2', ans)
numbers = re.findall(r'-?\d*\.?\d+|\d+', output)
numbers = re.findall(r'-?\d*\.?/?\d+|\d+', output)
if numbers:
return numbers[-1]

Expand Down
9 changes: 6 additions & 3 deletions opencompass/utils/text_postprocessors.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,9 +53,12 @@ def first_option_postprocess(text: str, options: str) -> str:

patterns = [
f'[Tt]he answer is [{options}]',
f'[Tt]he correct answer is [{options}]',
f'答案是(.*?)[{options}]',
f'答案为(.*?)[{options}]',
f'[Tt]he correct answer\s?(?:option)?\s?is [{options}]', # noqa
f'答案(?:选项)?是(.*?)[{options}]',
f'答案(?:选项)?为(.*?)[{options}]',
f'答案(?:选项)?选(.*?)[{options}]',
f'选项[{options}]是?正确',
f'选项[{options}]为?正确',
f'固选(.*?)[{options}]',
f'答案应该是(.*?)[{options}]',
f'(\s|^)[{options}][\s。,,\.$]', # noqa
Expand Down

0 comments on commit d811584

Please sign in to comment.