Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Mathbench update postprocess #600

Merged
merged 2 commits into from
Nov 20, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion configs/datasets/MathBench/mathbench_gen.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from mmengine.config import read_base

with read_base():
from .mathbench_gen_10da90 import mathbench_datasets # noqa: F401, F403
from .mathbench_gen_ad37c1 import mathbench_datasets # noqa: F401, F403
Original file line number Diff line number Diff line change
Expand Up @@ -3,17 +3,17 @@
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.openicl.icl_evaluator import CircularEvaluator, AccEvaluator
from opencompass.datasets import MathBenchDataset, mathbench_postprocess
from opencompass.utils.text_postprocessors import first_capital_postprocess
from opencompass.utils.text_postprocessors import first_option_postprocess


single_choice_prompts = {
"single_choice_cn_with_reasoning": "以下是一道关于数学的单项选择题,请你一步一步推理并得到最终的答案选项。回答格式为如下:\n答案选项:A、B、C、D中你认为正确的一个选项\n计算过程:根据题目得到选项答案的一步步过程\n请严格按照上面的格式回答问题,下面是你要回答的题目:\n{question}\n答案选项:",
"single_choice_cn": "以下是一道关于数学的单项选择题,请你给出正确的答案选项。\n下面是你要回答的题目:\n{question}\n答案选项:",
"single_choice_cn": "以下是一道关于数学的单项选择题,请你直接回答正确答案的选项序号。\n下面是你要回答的题目:\n{question}\n答案选项:",
"single_choice_en_with_reasoning": "Here is a multiple-choice question about mathematics. Please provide the final answer option by step-by-step reasoning. Please answer in the following format:\nAnswer option: A, B, C, or D (the option you believe is correct)\nCalculation process: Step-by-step process to derive the answer option based on the question\nPlease strictly follow the above format to answer the question. Here is the question you need to answer:\n{question}\nAnswer option:",
"single_choice_en": "Here is a multiple-choice question about mathematics. Please provide the correct answer option directly.\nHere is the question you need to answer:\n{question}\nAnswer option:",
}

cloze_prompts={
cloze_prompts = {
"cloze_cn": [
dict(role='HUMAN', prompt='Q: 林中有15棵树。林务工人员今天将在林中种植树木。完成后,将有21棵树。林务工人员今天种植了多少棵树?'),
dict(role='BOT', prompt='A: 我们从15棵树开始。后来有21棵树。差值必定是他们种植的树木数量。所以,他们必须种植了21 - 15 = 6棵树。答案是 6\n'),
Expand Down Expand Up @@ -53,15 +53,13 @@
dict(role='BOT', prompt='A: She bought 5 bagels for $3 each. This means she spent 5 * $3 = $15 on the bagels. She had $23 in beginning, so now she has $23 - $15 = $8. The answer is 8.\n'),
dict(role='HUMAN', prompt='Q: {question}'),
dict(role='BOT', prompt='A: {answer}\n'),
],
}

]}

mathbench_sets = {
'college': ['single_choice_cn', 'cloze_en'],
'high': ['single_choice_cn', 'single_choice_en'],
'middle': ['single_choice_cn'],
'primary': ['cloze_cn'],
'primary': ['cloze_cn']
}

# Generate reasoning path if set True or just generate the final answer
Expand All @@ -75,26 +73,24 @@
for _split in list(mathbench_sets.keys()):
for _name in mathbench_sets[_split]:
mathbench_infer_cfg = dict(
ice_template=dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(
begin="</E>",
round=[
dict(
role="HUMAN",
prompt=single_choice_prompts[_name + "_with_reasoning"] if with_reasoning else single_choice_prompts[_name],
),
dict(role="BOT", prompt="{answer}")] if 'choice' in _name else cloze_prompts[_name],
),
ice_token="</E>",
),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer, max_out_len=512,),
inferencer=dict(type=GenInferencer, max_out_len=512),
)

mathbench_eval_cfg = dict(
evaluator=dict(type=CircularEvaluator if 'choice' in _name else AccEvaluator),
pred_postprocessor=dict(type=first_capital_postprocess) if 'single_choice' in _name else dict(type=mathbench_postprocess, name=_name))
pred_postprocessor=dict(type=first_option_postprocess, options='ABCD') if 'single_choice' in _name else dict(type=mathbench_postprocess, name=_name))

mathbench_datasets.append(
dict(
Expand All @@ -110,5 +106,3 @@
infer_cfg=mathbench_infer_cfg,
eval_cfg=mathbench_eval_cfg,
))

del _split, _name
18 changes: 18 additions & 0 deletions configs/summarizers/mathbench.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
summarizer = dict(
dataset_abbrs=[
'######## MathBench Accuracy ########', # category
['mathbench-college-single_choice_cn', 'acc_1'],
['mathbench-college-cloze_en', 'accuracy'],
['mathbench-high-single_choice_cn', 'acc_1'],
['mathbench-high-single_choice_en', 'acc_1'],
['mathbench-middle-single_choice_cn', 'acc_1'],
['mathbench-primary-cloze_cn', 'accuracy'],
'######## MathBench CircularEval ########', # category
['mathbench-college-single_choice_cn', 'perf_4'],
['mathbench-high-single_choice_cn', 'perf_4'],
['mathbench-high-single_choice_en', 'perf_4'],
['mathbench-middle-single_choice_cn', 'perf_4'],
],
summary_groups=sum(
[v for k, v in locals().items() if k.endswith("_summary_groups")], [])
)
11 changes: 8 additions & 3 deletions opencompass/datasets/mathbench.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,10 +71,15 @@ def load(path: str, name: str, with_circular: bool = True):
else:
question = entry['question'].strip(
) + '\n' + get_number(entry['options'])
data.append({
info = {
'question': question,
'answer': entry['answer'].strip()
})
}
# For PPL evaluation
for i in range(4):
info[chr(ord('A') +
i)] = entry['options'][i].strip()
data.append(info)

dataset = Dataset.from_list(data)
return dataset
Expand All @@ -91,7 +96,7 @@ def mathbench_postprocess(text: str, name: str) -> str:
ans = ans_line[1].strip()

output = re.sub(r'(\d),(\d)', r'\1\2', ans)
numbers = re.findall(r'-?\d*\.?\d+|\d+', output)
numbers = re.findall(r'-?\d*\.?/?\d+|\d+', output)
if numbers:
return numbers[-1]

Expand Down
9 changes: 6 additions & 3 deletions opencompass/utils/text_postprocessors.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,9 +53,12 @@ def first_option_postprocess(text: str, options: str) -> str:

patterns = [
f'[Tt]he answer is [{options}]',
f'[Tt]he correct answer is [{options}]',
f'答案是(.*?)[{options}]',
f'答案为(.*?)[{options}]',
f'[Tt]he correct answer\s?(?:option)?\s?is [{options}]', # noqa
f'答案(?:选项)?是(.*?)[{options}]',
f'答案(?:选项)?为(.*?)[{options}]',
f'答案(?:选项)?选(.*?)[{options}]',
f'选项[{options}]是?正确',
f'选项[{options}]为?正确',
f'固选(.*?)[{options}]',
f'答案应该是(.*?)[{options}]',
f'(\s|^)[{options}][\s。,,\.$]', # noqa
Expand Down