Skip to content

Commit

Permalink
[Fix] Update option postprocess & mathbench language summarizer (#1413)
Browse files Browse the repository at this point in the history
* Update option postprocess & mathbench language summarizer

* Update option postprocess & mathbench language summarizer

---------

Co-authored-by: liuhongwei <[email protected]>
  • Loading branch information
liushz and liuhongwei authored Aug 22, 2024
1 parent 0fe9756 commit 9fdbc74
Show file tree
Hide file tree
Showing 3 changed files with 171 additions and 50 deletions.
109 changes: 84 additions & 25 deletions configs/summarizers/groups/mathbench_v1_2024_lang.py
Original file line number Diff line number Diff line change
@@ -1,22 +1,66 @@
# Language specific summarizer groups for MathBench

mathbench_2024_summary_groups = [
# mathbench-a average with subsets
{'name': 'college', 'subsets': [['mathbench-college-single_choice_cn', 'perf_4'], ['mathbench-college-single_choice_en', 'perf_4']]},
{'name': 'high', 'subsets': [['mathbench-high-single_choice_cn', 'perf_4'], ['mathbench-high-single_choice_en', 'perf_4']]},
{'name': 'middle', 'subsets': [['mathbench-middle-single_choice_cn', 'perf_4'], ['mathbench-middle-single_choice_en', 'perf_4']]},
{'name': 'primary', 'subsets': [['mathbench-primary-cloze_cn', 'accuracy'], ['mathbench-primary-cloze_en', 'accuracy']]},
{'name': 'arithmetic', 'subsets': [['mathbench-arithmetic-cloze_en', 'accuracy']]},
{'name': 'mathbench-a-cn-average', 'subsets': ['mathbench-college-single_choice_cn', 'mathbench-high-single_choice_cn', 'mathbench-middle-single_choice_cn', 'mathbench-primary-cloze_cn']},
{'name': 'mathbench-a-en-average', 'subsets': ['mathbench-college-single_choice_en', 'mathbench-high-single_choice_en', 'mathbench-middle-single_choice_en', 'mathbench-primary-cloze_en']},
{'name': 'mathbench-a (average)', 'subsets': ['college', 'high', 'middle', 'primary', 'arithmetic']},

# mathbench-a language
{'name': 'mathbench-a-college-cn', 'subsets': [['mathbench-college-single_choice_cn', 'perf_4']]},
{'name': 'mathbench-a-college-en', 'ssubsets': [['mathbench-college-single_choice_en', 'perf_4']]},
{'name': 'mathbench-a-high-cn', 'subsets': [['mathbench-high-single_choice_cn', 'perf_4']]},
{'name': 'mathbench-a-high-en', 'subsets': [['mathbench-high-single_choice_en', 'perf_4']]},
{'name': 'mathbench-a-middle-cn', 'subsets': [['mathbench-middle-single_choice_cn', 'perf_4']]},
{'name': 'mathbench-a-middle-en', 'subsets': [['mathbench-middle-single_choice_en', 'perf_4']]},
{'name': 'mathbench-a-primary-cn', 'subsets': [['mathbench-primary-cloze_cn', 'accuracy']]},
{'name': 'mathbench-a-primary-en', 'subsets': [['mathbench-primary-cloze_en', 'accuracy']]},
{'name': 'mathbench-a-arithmetic', 'subsets': [['mathbench-arithmetic-cloze_en', 'accuracy']]},
{'name': 'mathbench-a-cn-average', 'subsets': ['mathbench-a-college-cn', 'mathbench-a-high-cn', 'mathbench-a-middle-cn', 'mathbench-a-primary-cn']},
{'name': 'mathbench-a-en-average', 'subsets': ['mathbench-a-college-en', 'mathbench-a-high-en', 'mathbench-a-middle-en', 'mathbench-a-primary-en']},
# mathbench-a average
{'name': 'mathbench-a (average)', 'subsets': ['mathbench-a-cn-average', 'mathbench-a-en-average']},

# mathbench-t average with subsets
{'name': 'college_knowledge', 'subsets': [['mathbench-college_knowledge-single_choice_cn', 'perf_4'], ['mathbench-college_knowledge-single_choice_en', 'perf_4']]},
{'name': 'high_knowledge', 'subsets': [['mathbench-high_knowledge-single_choice_cn', 'perf_4'], ['mathbench-high_knowledge-single_choice_en', 'perf_4']]},
{'name': 'middle_knowledge', 'subsets': [['mathbench-middle_knowledge-single_choice_cn', 'perf_4'], ['mathbench-middle_knowledge-single_choice_en', 'perf_4']]},
{'name': 'college_knowledge', 'subsets': [['mathbench-college_knowledge-single_choice_cn', 'perf_4'], ['mathbench-college_knowledge-single_choice_en', 'perf_4']]},
{'name': 'high_knowledge', 'subsets': [['mathbench-high_knowledge-single_choice_cn', 'perf_4'], ['mathbench-high_knowledge-single_choice_en', 'perf_4']]},
{'name': 'middle_knowledge', 'subsets': [['mathbench-middle_knowledge-single_choice_cn', 'perf_4'], ['mathbench-middle_knowledge-single_choice_en', 'perf_4']]},
{'name': 'primary_knowledge', 'subsets': [['mathbench-primary_knowledge-single_choice_cn', 'perf_4'], ['mathbench-primary_knowledge-single_choice_en', 'perf_4']]},
{'name': 'mathbench-t-cn-average', 'subsets': ['mathbench-college_knowledge-single_choice_cn', 'mathbench-high_knowledge-single_choice_cn', 'mathbench-middle_knowledge-single_choice_cn', 'mathbench-primary_knowledge-single_choice_cn']},
{'name': 'mathbench-t-en-average', 'subsets': ['mathbench-college_knowledge-single_choice_en', 'mathbench-high_knowledge-single_choice_en', 'mathbench-middle_knowledge-single_choice_en', 'mathbench-primary_knowledge-single_choice_en']},
{'name': 'mathbench-t (average)', 'subsets': ['college_knowledge', 'high_knowledge', 'middle_knowledge', 'primary_knowledge']},
# mathbench-t language
{'name': 'mathbench-t-college-cn', 'subsets': [['mathbench-college_knowledge-single_choice_cn', 'perf_4']]},
{'name': 'mathbench-t-college-en', 'subsets': [['mathbench-college_knowledge-single_choice_en', 'perf_4']]},
{'name': 'mathbench-t-high-cn', 'subsets': [['mathbench-high_knowledge-single_choice_cn', 'perf_4']]},
{'name': 'mathbench-t-high-en', 'subsets': [['mathbench-high_knowledge-single_choice_en', 'perf_4']]},
{'name': 'mathbench-t-middle-cn', 'subsets': [['mathbench-middle_knowledge-single_choice_cn', 'perf_4']]},
{'name': 'mathbench-t-middle-en', 'subsets': [['mathbench-middle_knowledge-single_choice_en', 'perf_4']]},
{'name': 'mathbench-t-primary-cn', 'subsets': [['mathbench-primary_knowledge-single_choice_cn', 'perf_4']]},
{'name': 'mathbench-t-primary-en', 'subsets': [['mathbench-primary_knowledge-single_choice_en', 'perf_4']]},
{'name': 'mathbench-t-cn-average', 'subsets': ['mathbench-t-college-cn', 'mathbench-t-high-cn', 'mathbench-t-middle-cn', 'mathbench-t-primary-cn']},
{'name': 'mathbench-t-en-average', 'subsets': ['mathbench-t-college-en', 'mathbench-t-high-en', 'mathbench-t-middle-en', 'mathbench-t-primary-en']},
# mathbench-t average
{'name': 'mathbench-t (average)', 'subsets': ['mathbench-t-cn-average', 'mathbench-t-en-average']},

# overall cn
{'name': 'college-cn', 'subsets': ['mathbench-a-college-cn', 'mathbench-t-college-cn']},
{'name': 'high-cn', 'subsets': ['mathbench-a-high-cn', 'mathbench-t-high-cn']},
{'name': 'middle-cn', 'subsets': ['mathbench-a-middle-cn', 'mathbench-t-middle-cn']},
{'name': 'primary-cn', 'subsets': ['mathbench-a-primary-cn', 'mathbench-t-primary-cn']},
{'name': 'cn-avarage', 'subsets': ['college-cn', 'high-cn', 'middle-cn', 'primary-cn']},

# overall en
{'name': 'college-en', 'subsets': ['mathbench-a-college-en', 'mathbench-t-college-en']},
{'name': 'high-en', 'subsets': ['mathbench-a-high-en', 'mathbench-t-high-en']},
{'name': 'middle-en', 'subsets': ['mathbench-a-middle-en', 'mathbench-t-middle-en']},
{'name': 'primary-en', 'subsets': ['mathbench-a-primary-en', 'mathbench-t-primary-en']},
{'name': 'en-avarage', 'subsets': ['college-en', 'high-en', 'middle-en', 'primary-en']},

# overall
{'name': 'Overall', 'subsets': ['mathbench-a (average)', 'mathbench-t (average)']},
]

Expand All @@ -25,33 +69,48 @@
dataset_abbrs = [
'########################################################',
'###### MathBench-A-CN: Application Part (Chinese) ######',
'mathbench-college-single_choice_cn',
'mathbench-high-single_choice_cn',
'mathbench-middle-single_choice_cn',
'mathbench-primary-cloze_cn',
'mathbench-a-college-cn',
'mathbench-a-high-cn',
'mathbench-a-middle-cn',
'mathbench-a-primary-cn',
'mathbench-a-cn-average',

'###### MathBench-A-EN: Application Part (English) ######',
'mathbench-college-single_choice_en',
'mathbench-high-single_choice_en',
'mathbench-middle-single_choice_en',
'mathbench-primary-cloze_en',
'mathbench-a-college-en',
'mathbench-a-high-en',
'mathbench-a-middle-en',
'mathbench-a-primary-en',
'mathbench-a-en-average',

'###################################################',
'###### MathBench-T-CN: Theory Part (Chinese) ######',
'mathbench-college_knowledge-single_choice_cn',
'mathbench-high_knowledge-single_choice_cn',
'mathbench-middle_knowledge-single_choice_cn',
'mathbench-primary_knowledge-single_choice_cn',
'#########################################################',
'###### MathBench-T-CN: Theory Part (Chinese) ############',
'mathbench-t-college-cn',
'mathbench-t-high-cn',
'mathbench-t-middle-cn',
'mathbench-t-primary-cn',
'mathbench-t-cn-average',

'###### MathBench-T-EN: Theory Part (English) ######',
'mathbench-college_knowledge-single_choice_en',
'mathbench-high_knowledge-single_choice_en',
'mathbench-middle_knowledge-single_choice_en',
'mathbench-primary_knowledge-single_choice_en',
'###### MathBench-T-EN: Theory Part (English) ############',
'mathbench-t-college-en',
'mathbench-t-high-en',
'mathbench-t-middle-en',
'mathbench-t-primary-en',
'mathbench-t-en-average',

'#########################################################',
'###### MathBench-CN ############',
'college-cn',
'high-cn',
'middle-cn',
'primary-cn',
'cn-avarage',

'###### MathBench-EN ############',
'college-en',
'high-en',
'middle-en',
'primary-en',
'en-avarage',
'#########################################################',
],
summary_groups=mathbench_2024_summary_groups,
)
109 changes: 84 additions & 25 deletions opencompass/configs/summarizers/groups/mathbench_v1_2024_lang.py
Original file line number Diff line number Diff line change
@@ -1,22 +1,66 @@
# Language specific summarizer groups for MathBench

mathbench_2024_summary_groups = [
# mathbench-a average with subsets
{'name': 'college', 'subsets': [['mathbench-college-single_choice_cn', 'perf_4'], ['mathbench-college-single_choice_en', 'perf_4']]},
{'name': 'high', 'subsets': [['mathbench-high-single_choice_cn', 'perf_4'], ['mathbench-high-single_choice_en', 'perf_4']]},
{'name': 'middle', 'subsets': [['mathbench-middle-single_choice_cn', 'perf_4'], ['mathbench-middle-single_choice_en', 'perf_4']]},
{'name': 'primary', 'subsets': [['mathbench-primary-cloze_cn', 'accuracy'], ['mathbench-primary-cloze_en', 'accuracy']]},
{'name': 'arithmetic', 'subsets': [['mathbench-arithmetic-cloze_en', 'accuracy']]},
{'name': 'mathbench-a-cn-average', 'subsets': ['mathbench-college-single_choice_cn', 'mathbench-high-single_choice_cn', 'mathbench-middle-single_choice_cn', 'mathbench-primary-cloze_cn']},
{'name': 'mathbench-a-en-average', 'subsets': ['mathbench-college-single_choice_en', 'mathbench-high-single_choice_en', 'mathbench-middle-single_choice_en', 'mathbench-primary-cloze_en']},
{'name': 'mathbench-a (average)', 'subsets': ['college', 'high', 'middle', 'primary', 'arithmetic']},

# mathbench-a language
{'name': 'mathbench-a-college-cn', 'subsets': [['mathbench-college-single_choice_cn', 'perf_4']]},
{'name': 'mathbench-a-college-en', 'ssubsets': [['mathbench-college-single_choice_en', 'perf_4']]},
{'name': 'mathbench-a-high-cn', 'subsets': [['mathbench-high-single_choice_cn', 'perf_4']]},
{'name': 'mathbench-a-high-en', 'subsets': [['mathbench-high-single_choice_en', 'perf_4']]},
{'name': 'mathbench-a-middle-cn', 'subsets': [['mathbench-middle-single_choice_cn', 'perf_4']]},
{'name': 'mathbench-a-middle-en', 'subsets': [['mathbench-middle-single_choice_en', 'perf_4']]},
{'name': 'mathbench-a-primary-cn', 'subsets': [['mathbench-primary-cloze_cn', 'accuracy']]},
{'name': 'mathbench-a-primary-en', 'subsets': [['mathbench-primary-cloze_en', 'accuracy']]},
{'name': 'mathbench-a-arithmetic', 'subsets': [['mathbench-arithmetic-cloze_en', 'accuracy']]},
{'name': 'mathbench-a-cn-average', 'subsets': ['mathbench-a-college-cn', 'mathbench-a-high-cn', 'mathbench-a-middle-cn', 'mathbench-a-primary-cn']},
{'name': 'mathbench-a-en-average', 'subsets': ['mathbench-a-college-en', 'mathbench-a-high-en', 'mathbench-a-middle-en', 'mathbench-a-primary-en']},
# mathbench-a average
{'name': 'mathbench-a (average)', 'subsets': ['mathbench-a-cn-average', 'mathbench-a-en-average']},

# mathbench-t average with subsets
{'name': 'college_knowledge', 'subsets': [['mathbench-college_knowledge-single_choice_cn', 'perf_4'], ['mathbench-college_knowledge-single_choice_en', 'perf_4']]},
{'name': 'high_knowledge', 'subsets': [['mathbench-high_knowledge-single_choice_cn', 'perf_4'], ['mathbench-high_knowledge-single_choice_en', 'perf_4']]},
{'name': 'middle_knowledge', 'subsets': [['mathbench-middle_knowledge-single_choice_cn', 'perf_4'], ['mathbench-middle_knowledge-single_choice_en', 'perf_4']]},
{'name': 'college_knowledge', 'subsets': [['mathbench-college_knowledge-single_choice_cn', 'perf_4'], ['mathbench-college_knowledge-single_choice_en', 'perf_4']]},
{'name': 'high_knowledge', 'subsets': [['mathbench-high_knowledge-single_choice_cn', 'perf_4'], ['mathbench-high_knowledge-single_choice_en', 'perf_4']]},
{'name': 'middle_knowledge', 'subsets': [['mathbench-middle_knowledge-single_choice_cn', 'perf_4'], ['mathbench-middle_knowledge-single_choice_en', 'perf_4']]},
{'name': 'primary_knowledge', 'subsets': [['mathbench-primary_knowledge-single_choice_cn', 'perf_4'], ['mathbench-primary_knowledge-single_choice_en', 'perf_4']]},
{'name': 'mathbench-t-cn-average', 'subsets': ['mathbench-college_knowledge-single_choice_cn', 'mathbench-high_knowledge-single_choice_cn', 'mathbench-middle_knowledge-single_choice_cn', 'mathbench-primary_knowledge-single_choice_cn']},
{'name': 'mathbench-t-en-average', 'subsets': ['mathbench-college_knowledge-single_choice_en', 'mathbench-high_knowledge-single_choice_en', 'mathbench-middle_knowledge-single_choice_en', 'mathbench-primary_knowledge-single_choice_en']},
{'name': 'mathbench-t (average)', 'subsets': ['college_knowledge', 'high_knowledge', 'middle_knowledge', 'primary_knowledge']},
# mathbench-t language
{'name': 'mathbench-t-college-cn', 'subsets': [['mathbench-college_knowledge-single_choice_cn', 'perf_4']]},
{'name': 'mathbench-t-college-en', 'subsets': [['mathbench-college_knowledge-single_choice_en', 'perf_4']]},
{'name': 'mathbench-t-high-cn', 'subsets': [['mathbench-high_knowledge-single_choice_cn', 'perf_4']]},
{'name': 'mathbench-t-high-en', 'subsets': [['mathbench-high_knowledge-single_choice_en', 'perf_4']]},
{'name': 'mathbench-t-middle-cn', 'subsets': [['mathbench-middle_knowledge-single_choice_cn', 'perf_4']]},
{'name': 'mathbench-t-middle-en', 'subsets': [['mathbench-middle_knowledge-single_choice_en', 'perf_4']]},
{'name': 'mathbench-t-primary-cn', 'subsets': [['mathbench-primary_knowledge-single_choice_cn', 'perf_4']]},
{'name': 'mathbench-t-primary-en', 'subsets': [['mathbench-primary_knowledge-single_choice_en', 'perf_4']]},
{'name': 'mathbench-t-cn-average', 'subsets': ['mathbench-t-college-cn', 'mathbench-t-high-cn', 'mathbench-t-middle-cn', 'mathbench-t-primary-cn']},
{'name': 'mathbench-t-en-average', 'subsets': ['mathbench-t-college-en', 'mathbench-t-high-en', 'mathbench-t-middle-en', 'mathbench-t-primary-en']},
# mathbench-t average
{'name': 'mathbench-t (average)', 'subsets': ['mathbench-t-cn-average', 'mathbench-t-en-average']},

# overall cn
{'name': 'college-cn', 'subsets': ['mathbench-a-college-cn', 'mathbench-t-college-cn']},
{'name': 'high-cn', 'subsets': ['mathbench-a-high-cn', 'mathbench-t-high-cn']},
{'name': 'middle-cn', 'subsets': ['mathbench-a-middle-cn', 'mathbench-t-middle-cn']},
{'name': 'primary-cn', 'subsets': ['mathbench-a-primary-cn', 'mathbench-t-primary-cn']},
{'name': 'cn-avarage', 'subsets': ['college-cn', 'high-cn', 'middle-cn', 'primary-cn']},

# overall en
{'name': 'college-en', 'subsets': ['mathbench-a-college-en', 'mathbench-t-college-en']},
{'name': 'high-en', 'subsets': ['mathbench-a-high-en', 'mathbench-t-high-en']},
{'name': 'middle-en', 'subsets': ['mathbench-a-middle-en', 'mathbench-t-middle-en']},
{'name': 'primary-en', 'subsets': ['mathbench-a-primary-en', 'mathbench-t-primary-en']},
{'name': 'en-avarage', 'subsets': ['college-en', 'high-en', 'middle-en', 'primary-en']},

# overall
{'name': 'Overall', 'subsets': ['mathbench-a (average)', 'mathbench-t (average)']},
]

Expand All @@ -25,33 +69,48 @@
dataset_abbrs = [
'########################################################',
'###### MathBench-A-CN: Application Part (Chinese) ######',
'mathbench-college-single_choice_cn',
'mathbench-high-single_choice_cn',
'mathbench-middle-single_choice_cn',
'mathbench-primary-cloze_cn',
'mathbench-a-college-cn',
'mathbench-a-high-cn',
'mathbench-a-middle-cn',
'mathbench-a-primary-cn',
'mathbench-a-cn-average',

'###### MathBench-A-EN: Application Part (English) ######',
'mathbench-college-single_choice_en',
'mathbench-high-single_choice_en',
'mathbench-middle-single_choice_en',
'mathbench-primary-cloze_en',
'mathbench-a-college-en',
'mathbench-a-high-en',
'mathbench-a-middle-en',
'mathbench-a-primary-en',
'mathbench-a-en-average',

'###################################################',
'###### MathBench-T-CN: Theory Part (Chinese) ######',
'mathbench-college_knowledge-single_choice_cn',
'mathbench-high_knowledge-single_choice_cn',
'mathbench-middle_knowledge-single_choice_cn',
'mathbench-primary_knowledge-single_choice_cn',
'#########################################################',
'###### MathBench-T-CN: Theory Part (Chinese) ############',
'mathbench-t-college-cn',
'mathbench-t-high-cn',
'mathbench-t-middle-cn',
'mathbench-t-primary-cn',
'mathbench-t-cn-average',

'###### MathBench-T-EN: Theory Part (English) ######',
'mathbench-college_knowledge-single_choice_en',
'mathbench-high_knowledge-single_choice_en',
'mathbench-middle_knowledge-single_choice_en',
'mathbench-primary_knowledge-single_choice_en',
'###### MathBench-T-EN: Theory Part (English) ############',
'mathbench-t-college-en',
'mathbench-t-high-en',
'mathbench-t-middle-en',
'mathbench-t-primary-en',
'mathbench-t-en-average',

'#########################################################',
'###### MathBench-CN ############',
'college-cn',
'high-cn',
'middle-cn',
'primary-cn',
'cn-avarage',

'###### MathBench-EN ############',
'college-en',
'high-en',
'middle-en',
'primary-en',
'en-avarage',
'#########################################################',
],
summary_groups=mathbench_2024_summary_groups,
)
3 changes: 3 additions & 0 deletions opencompass/utils/text_postprocessors.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,9 @@ def first_option_postprocess(text: str, options: str, cushion=True) -> str:
f'[Tt]he answer is option:?\s+\(?([{options}])\)?',
f'[Tt]he correct answer is:?\s+\(?([{options}])\)?',
f'[Tt]he correct answer is option:?\s+\(?([{options}])\)?',
f'[Tt]he correct answer is:?.*?boxed{{([{options}])}}',
f'[Tt]he correct option is:?.*?boxed{{([{options}])}}',
f'[Tt]he correct answer option is:?.*?boxed{{([{options}])}}',
f'[Tt]he answer to the question is:?\s+\(?([{options}])\)?',
f'^选项\s?([{options}])',
f'^([{options}])\s?选?项',
Expand Down

0 comments on commit 9fdbc74

Please sign in to comment.