diff --git a/configs/datasets/MathBench/mathbench_gen_86de1c.py b/configs/datasets/MathBench/mathbench_gen_86de1c.py index 2326edc73..1c350e9d3 100644 --- a/configs/datasets/MathBench/mathbench_gen_86de1c.py +++ b/configs/datasets/MathBench/mathbench_gen_86de1c.py @@ -83,9 +83,9 @@ round=[ dict( role="HUMAN", - prompt= single_choice_prompts[_name + "_with_reasoning"] if with_reasoning else mathbench_sets[_name], + prompt=single_choice_prompts[_name + "_with_reasoning"] if with_reasoning else single_choice_prompts[_name], ), - dict(role="BOT", prompt="{answer}"),] if 'choice' in _name else cloze_prompts[_name], + dict(role="BOT", prompt="{answer}")] if 'choice' in _name else cloze_prompts[_name], ), ice_token="", ), @@ -94,15 +94,15 @@ ) mathbench_eval_cfg = dict( - evaluator=dict(type=CircularEvaluator if 'choice' in _name else AccEvaluator), - pred_postprocessor=dict(type=first_capital_postprocess ) if 'single_choice' in _name else dict(type=mathbench_postprocess, name=_name)) + evaluator=dict(type=CircularEvaluator if 'choice' in _name else AccEvaluator), + pred_postprocessor=dict(type=first_capital_postprocess) if 'single_choice' in _name else dict(type=mathbench_postprocess, name=_name)) mathbench_datasets.append( dict( type=MathBenchDataset, path=f"./data/mathbench/{_split}", name=_name, - abbr="mathbench-" + _split + '-' + _name, + abbr="mathbench-" + _split + '-' + _name, reader_cfg=dict( input_columns=["question"], output_column="answer" @@ -111,4 +111,4 @@ eval_cfg=mathbench_eval_cfg, )) -del _split, _name \ No newline at end of file +del _split, _name diff --git a/opencompass/datasets/mathbench.py b/opencompass/datasets/mathbench.py index 35bf34d93..dbc8d2634 100644 --- a/opencompass/datasets/mathbench.py +++ b/opencompass/datasets/mathbench.py @@ -11,21 +11,50 @@ def get_number(options): - result_string = '' - for i, option in enumerate(options, start=65): + for i, option in enumerate(options, start=ord('A')): result_string += f'{chr(i)}. {option}\n' return result_string +def get_circular_example(entry, id): + """For given example, generate four circular examples.""" + # Only 4 options is supported for current circular eval. + circular_patterns = ['ABCD', 'BCDA', 'CDAB', 'DABC'] + data = [] + for c in circular_patterns: + line = copy.deepcopy(entry) + options = [] + for i in range(4): + options.append(line['options'][ord(c[i]) - ord('A')]) + line['options'] = options + line['answer'] = { + c[0]: 'A', + c[1]: 'B', + c[2]: 'C', + c[3]: 'D' + }[line['answer']] + line['answer'] = str(id) + '--' + line['answer'] + '--' + c + line['question'] = line['question'].strip() + '\n' + get_number( + line['options']) + data.append(line) + + return data + + @LOAD_DATASET.register_module() class MathBenchDataset(BaseDataset): @staticmethod - def load(path: str, name: str): - - circular_patterns = ['ABCD', 'BCDA', 'CDAB', 'DABC'] - + def load(path: str, name: str, with_circular: bool = True): + """MathBenth Dataset. + + Args: + path (str): Path of the mathbench dataset. + name (str): Name of the target subset. + with_circular (bool): Whether to create circular dataset for + single choice question. Defaults to True. + """ data = [] filename = osp.join(path, f'{name}.jsonl') with open(filename, 'r') as infile: @@ -37,24 +66,15 @@ def load(path: str, name: str): 'answer': entry['answer'].strip() }) else: - for c in circular_patterns: - line = copy.deepcopy(entry) - options = [] - for i in range(4): - options.append(line['options'][ord(c[i]) - - ord('A')]) - line['options'] = options - line['answer'] = { - c[0]: 'A', - c[1]: 'B', - c[2]: 'C', - c[3]: 'D' - }[line['answer']] - line['answer'] = str( - id) + '--' + line['answer'] + '--' + c - line['question'] = line['question'].strip( - ) + '\n' + get_number(line['options']) - data.append(line) + if with_circular: + data.extend(get_circular_example(entry, id)) + else: + question = entry['question'].strip( + ) + '\n' + get_number(entry['options']) + data.append({ + 'question': question, + 'answer': entry['answer'].strip() + }) dataset = Dataset.from_list(data) return dataset