-
Notifications
You must be signed in to change notification settings - Fork 405
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[Feature] Update MathBench & Math base model config (#1550)
* Update MathBench & WikiBench for FullBench * Update MathBench & WikiBench for FullBench * Update GPQA & MMLU_Pro * Update MathBench & WikiBench for FullBench * Update MathBench & WikiBench for FullBench * Update MathBench & WikiBench for FullBench * Update MathBench & Math base config --------- Co-authored-by: liushz <[email protected]>
- Loading branch information
Showing
4 changed files
with
222 additions
and
0 deletions.
There are no files selected for viewing
81 changes: 81 additions & 0 deletions
81
configs/datasets/MathBench/mathbench_2024_few_shot_mixed_4a3fd4.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,81 @@ | ||
from mmengine.config import read_base | ||
from copy import deepcopy | ||
from opencompass.openicl.icl_prompt_template import PromptTemplate | ||
from opencompass.openicl.icl_retriever import ZeroRetriever | ||
from opencompass.openicl.icl_inferencer import GenInferencer, PPLInferencer | ||
from opencompass.openicl.icl_evaluator import CircularEvaluator, AccEvaluator | ||
from opencompass.datasets import MathBenchDataset, mathbench_postprocess | ||
from opencompass.utils.text_postprocessors import first_option_postprocess | ||
|
||
with read_base(): | ||
from .mathbench_prompt import zero_shot_prompts, few_shot_prompts, mathbench_sets | ||
|
||
# Max for this dataset is 4 | ||
num_shot = 4 | ||
# Generate reasoning path or not, only for single choice | ||
with_reasoning = True | ||
# Use circular evaluation or not | ||
with_circular_eval = True | ||
# Use PPL mode in single choice test or not | ||
use_ppl_single_choice = True | ||
|
||
assert 0 <= num_shot <= 4 | ||
if num_shot == 0: | ||
prompts = zero_shot_prompts | ||
else: | ||
prompts = {name: p[- 2 * num_shot - 2:] for name, p in few_shot_prompts.items()} | ||
|
||
mathbench_datasets = [] | ||
for _split in mathbench_sets: | ||
for _name in mathbench_sets[_split]: | ||
if 'single_choice' in _name: | ||
if with_reasoning and not use_ppl_single_choice: | ||
template_round = prompts[_name + '_with_reasoning'] | ||
else: | ||
template_round = prompts[_name] | ||
else: | ||
template_round = prompts[_name] | ||
|
||
if 'single_choice' in _name: | ||
pred_postprocessor = dict(type=first_option_postprocess, options='ABCD') | ||
else: | ||
pred_postprocessor = dict(type=mathbench_postprocess, name=_name) | ||
|
||
if 'single_choice' in _name and with_circular_eval: | ||
evaluator = dict(type=CircularEvaluator) | ||
else: | ||
evaluator = dict(type=AccEvaluator) | ||
|
||
# assemble the final config | ||
mathbench_reader_cfg = dict(input_columns=['question'], output_column='answer') | ||
if use_ppl_single_choice and 'single_choice' in _name: | ||
template = {} | ||
for answer in ['A', 'B', 'C', 'D']: | ||
one_template_round = deepcopy(template_round) | ||
one_template_round[-1]['prompt'] = one_template_round[-1]['prompt'].format(answer=answer) | ||
template[answer] = dict(round=one_template_round) | ||
mathbench_infer_cfg = dict( | ||
prompt_template=dict(type=PromptTemplate, template=template), | ||
retriever=dict(type=ZeroRetriever), | ||
inferencer=dict(type=PPLInferencer), | ||
) | ||
else: | ||
mathbench_infer_cfg = dict( | ||
prompt_template=dict(type=PromptTemplate, template=dict(round=template_round)), | ||
retriever=dict(type=ZeroRetriever), | ||
inferencer=dict(type=GenInferencer, max_out_len=2048, stopping_criteria=['Question:']), | ||
) | ||
mathbench_eval_cfg = dict(evaluator=evaluator, pred_postprocessor=pred_postprocessor) | ||
|
||
mathbench_datasets.append( | ||
dict( | ||
abbr='mathbench-' + _split + '-' + _name, | ||
type=MathBenchDataset, | ||
path=f'data/mathbench_v1/{_split}', | ||
name=_name, | ||
with_circular=with_circular_eval, | ||
reader_cfg=mathbench_reader_cfg, | ||
infer_cfg=mathbench_infer_cfg, | ||
eval_cfg=mathbench_eval_cfg, | ||
) | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
from mmengine.config import read_base | ||
from opencompass.openicl.icl_prompt_template import PromptTemplate | ||
from opencompass.openicl.icl_retriever import ZeroRetriever | ||
from opencompass.openicl.icl_inferencer import GenInferencer | ||
from opencompass.datasets import MATHDataset, MATHEvaluator, math_postprocess_v2 | ||
|
||
with read_base(): | ||
from .math_4shot_example_from_google_research import prompt | ||
|
||
math_reader_cfg = dict(input_columns=['problem'], output_column='solution') | ||
|
||
math_infer_cfg = dict( | ||
prompt_template=dict(type=PromptTemplate, template=prompt + '\n\nProblem:\n{problem}\nSolution:'), | ||
retriever=dict(type=ZeroRetriever), | ||
inferencer=dict(type=GenInferencer, max_out_len=2048, stopping_criteria=['Problem', '问题:'])) | ||
|
||
# postprocess v2 | ||
math_eval_cfg = dict( | ||
evaluator=dict(type=MATHEvaluator, version='v2'), | ||
pred_postprocessor=dict(type=math_postprocess_v2)) | ||
|
||
math_datasets = [ | ||
dict( | ||
type=MATHDataset, | ||
abbr='math', | ||
path='opencompass/math', | ||
reader_cfg=math_reader_cfg, | ||
infer_cfg=math_infer_cfg, | ||
eval_cfg=math_eval_cfg) | ||
] |
81 changes: 81 additions & 0 deletions
81
opencompass/configs/datasets/MathBench/mathbench_2024_few_shot_mixed_4a3fd4.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,81 @@ | ||
from mmengine.config import read_base | ||
from copy import deepcopy | ||
from opencompass.openicl.icl_prompt_template import PromptTemplate | ||
from opencompass.openicl.icl_retriever import ZeroRetriever | ||
from opencompass.openicl.icl_inferencer import GenInferencer, PPLInferencer | ||
from opencompass.openicl.icl_evaluator import CircularEvaluator, AccEvaluator | ||
from opencompass.datasets import MathBenchDataset, mathbench_postprocess | ||
from opencompass.utils.text_postprocessors import first_option_postprocess | ||
|
||
with read_base(): | ||
from .mathbench_prompt import zero_shot_prompts, few_shot_prompts, mathbench_sets | ||
|
||
# Max for this dataset is 4 | ||
num_shot = 4 | ||
# Generate reasoning path or not, only for single choice | ||
with_reasoning = True | ||
# Use circular evaluation or not | ||
with_circular_eval = True | ||
# Use PPL mode in single choice test or not | ||
use_ppl_single_choice = True | ||
|
||
assert 0 <= num_shot <= 4 | ||
if num_shot == 0: | ||
prompts = zero_shot_prompts | ||
else: | ||
prompts = {name: p[- 2 * num_shot - 2:] for name, p in few_shot_prompts.items()} | ||
|
||
mathbench_datasets = [] | ||
for _split in mathbench_sets: | ||
for _name in mathbench_sets[_split]: | ||
if 'single_choice' in _name: | ||
if with_reasoning and not use_ppl_single_choice: | ||
template_round = prompts[_name + '_with_reasoning'] | ||
else: | ||
template_round = prompts[_name] | ||
else: | ||
template_round = prompts[_name] | ||
|
||
if 'single_choice' in _name: | ||
pred_postprocessor = dict(type=first_option_postprocess, options='ABCD') | ||
else: | ||
pred_postprocessor = dict(type=mathbench_postprocess, name=_name) | ||
|
||
if 'single_choice' in _name and with_circular_eval: | ||
evaluator = dict(type=CircularEvaluator) | ||
else: | ||
evaluator = dict(type=AccEvaluator) | ||
|
||
# assemble the final config | ||
mathbench_reader_cfg = dict(input_columns=['question'], output_column='answer') | ||
if use_ppl_single_choice and 'single_choice' in _name: | ||
template = {} | ||
for answer in ['A', 'B', 'C', 'D']: | ||
one_template_round = deepcopy(template_round) | ||
one_template_round[-1]['prompt'] = one_template_round[-1]['prompt'].format(answer=answer) | ||
template[answer] = dict(round=one_template_round) | ||
mathbench_infer_cfg = dict( | ||
prompt_template=dict(type=PromptTemplate, template=template), | ||
retriever=dict(type=ZeroRetriever), | ||
inferencer=dict(type=PPLInferencer), | ||
) | ||
else: | ||
mathbench_infer_cfg = dict( | ||
prompt_template=dict(type=PromptTemplate, template=dict(round=template_round)), | ||
retriever=dict(type=ZeroRetriever), | ||
inferencer=dict(type=GenInferencer, max_out_len=2048, stopping_criteria=['Question:']), | ||
) | ||
mathbench_eval_cfg = dict(evaluator=evaluator, pred_postprocessor=pred_postprocessor) | ||
|
||
mathbench_datasets.append( | ||
dict( | ||
abbr='mathbench-' + _split + '-' + _name, | ||
type=MathBenchDataset, | ||
path=f'data/mathbench_v1/{_split}', | ||
name=_name, | ||
with_circular=with_circular_eval, | ||
reader_cfg=mathbench_reader_cfg, | ||
infer_cfg=mathbench_infer_cfg, | ||
eval_cfg=mathbench_eval_cfg, | ||
) | ||
) |
30 changes: 30 additions & 0 deletions
30
opencompass/configs/datasets/math/math_4shot_base_gen_43d5b6.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
from mmengine.config import read_base | ||
from opencompass.openicl.icl_prompt_template import PromptTemplate | ||
from opencompass.openicl.icl_retriever import ZeroRetriever | ||
from opencompass.openicl.icl_inferencer import GenInferencer | ||
from opencompass.datasets import MATHDataset, MATHEvaluator, math_postprocess_v2 | ||
|
||
with read_base(): | ||
from .math_4shot_example_from_google_research import prompt | ||
|
||
math_reader_cfg = dict(input_columns=['problem'], output_column='solution') | ||
|
||
math_infer_cfg = dict( | ||
prompt_template=dict(type=PromptTemplate, template=prompt + '\n\nProblem:\n{problem}\nSolution:'), | ||
retriever=dict(type=ZeroRetriever), | ||
inferencer=dict(type=GenInferencer, max_out_len=2048, stopping_criteria=['Problem', '问题:'])) | ||
|
||
# postprocess v2 | ||
math_eval_cfg = dict( | ||
evaluator=dict(type=MATHEvaluator, version='v2'), | ||
pred_postprocessor=dict(type=math_postprocess_v2)) | ||
|
||
math_datasets = [ | ||
dict( | ||
type=MATHDataset, | ||
abbr='math', | ||
path='opencompass/math', | ||
reader_cfg=math_reader_cfg, | ||
infer_cfg=math_infer_cfg, | ||
eval_cfg=math_eval_cfg) | ||
] |