-
Notifications
You must be signed in to change notification settings - Fork 436
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[Feature] Add ReasonBench(Internal) dataset (#577)
* [Feature] Add reasonbench dataset * add configs for supporting generative inference & merge datasets in the same category * modify config filename to prompt version * fix codes to meet pre-commit requirements * lint the code to meet pre-commit requirements * Align Load_data Sourcecode Briefly * fix bugs * reduce code redundancy
- Loading branch information
1 parent
76a95e9
commit b35d991
Showing
7 changed files
with
325 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
from mmengine.config import read_base | ||
|
||
with read_base(): | ||
from .reasonbench_gen_d15233 import reasonbench_datasets |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,140 @@ | ||
from opencompass.openicl.icl_prompt_template import PromptTemplate | ||
from opencompass.openicl.icl_retriever import FixKRetriever | ||
from opencompass.openicl.icl_inferencer import GenInferencer | ||
from opencompass.openicl.icl_evaluator import AccEvaluator | ||
from opencompass.utils.text_postprocessors import first_capital_postprocess | ||
from opencompass.datasets.reasonbench import ReasonBenchDataset | ||
|
||
reasonbench_eval_cfg = dict( | ||
evaluator=dict(type=AccEvaluator), | ||
pred_postprocessor=dict(type=first_capital_postprocess) | ||
) | ||
|
||
reader_cfgs = [] | ||
for i in range(2, 5): | ||
choices = ["A", "B", "C", "D"][:i] | ||
|
||
reader_cfgs.append(dict( | ||
input_columns=["prompt_ppl"], | ||
output_column="label_ppl") | ||
) | ||
|
||
infer_cfg=dict( | ||
ice_template=dict( | ||
type=PromptTemplate, | ||
template=dict( | ||
begin="</E>", | ||
round=[ | ||
dict( | ||
role="HUMAN", | ||
prompt="</E>{prompt_ppl}" | ||
), | ||
dict(role="BOT", prompt="Answer: {label_ppl}"), | ||
]), | ||
ice_token="</E>", | ||
), | ||
retriever=dict(type=FixKRetriever, fix_id_list=[]), | ||
inferencer=dict(type=GenInferencer) | ||
) | ||
|
||
|
||
CausalReasoningDataset = [ | ||
dict( | ||
abbr="reasonbench-causal", | ||
type=ReasonBenchDataset, | ||
path="data/reasonbench/causal.jsonl", | ||
reader_cfg=reader_cfgs[0], | ||
infer_cfg=infer_cfg, | ||
eval_cfg=reasonbench_eval_cfg), | ||
] | ||
|
||
CommonsenseReasoningDataset = [ | ||
dict( | ||
abbr="reasonbench-commonsense", | ||
type=ReasonBenchDataset, | ||
path="data/reasonbench/commonsense.jsonl", | ||
reader_cfg=reader_cfgs[1], | ||
infer_cfg=infer_cfg, | ||
eval_cfg=reasonbench_eval_cfg), | ||
] | ||
|
||
AbductiveReasoningDataset = [ | ||
dict( | ||
abbr="reasonbench-abductive", | ||
type=ReasonBenchDataset, | ||
path="data/reasonbench/abductive.jsonl", | ||
reader_cfg=reader_cfgs[0], | ||
infer_cfg=infer_cfg, | ||
eval_cfg=reasonbench_eval_cfg), | ||
] | ||
|
||
DeductiveReasoningDataset = [ | ||
dict( | ||
abbr="reasonbench-deductive", | ||
type=ReasonBenchDataset, | ||
path="data/reasonbench/deductive.jsonl", | ||
reader_cfg=reader_cfgs[1], | ||
infer_cfg=infer_cfg, | ||
eval_cfg=reasonbench_eval_cfg), | ||
] | ||
|
||
InductiveReasoningDataset = [ | ||
dict( | ||
abbr="reasonbench-inductive", | ||
type=ReasonBenchDataset, | ||
path="data/reasonbench/inductive.jsonl", | ||
reader_cfg=reader_cfgs[0], | ||
infer_cfg=infer_cfg, | ||
eval_cfg=reasonbench_eval_cfg), | ||
] | ||
|
||
SymbolicReasoningDataset = [ | ||
dict( | ||
abbr="reasonbench-symbolic", | ||
type=ReasonBenchDataset, | ||
path="data/reasonbench/symbolic.jsonl", | ||
reader_cfg=reader_cfgs[2], | ||
infer_cfg=infer_cfg, | ||
eval_cfg=reasonbench_eval_cfg), | ||
] | ||
|
||
CLEVA_CommonsenseReasoningDataset = [ | ||
dict( | ||
abbr="reasonbench-cleva_commonsense", | ||
type=ReasonBenchDataset, | ||
path="data/reasonbench/cleva_commonsense.jsonl", | ||
reader_cfg=reader_cfgs[1], | ||
infer_cfg=infer_cfg, | ||
eval_cfg=reasonbench_eval_cfg), | ||
] | ||
|
||
CLEVA_DeductiveReasoningDataset = [ | ||
dict( | ||
abbr="reasonbench-cleva_deductive", | ||
type=ReasonBenchDataset, | ||
path="data/reasonbench/cleva_deductive.jsonl", | ||
reader_cfg=reader_cfgs[1], | ||
infer_cfg=infer_cfg, | ||
eval_cfg=reasonbench_eval_cfg), | ||
] | ||
|
||
CLEVA_InductiveReasoningDataset = [ | ||
dict( | ||
abbr="reasonbench-cleva_inductive", | ||
type=ReasonBenchDataset, | ||
path="data/reasonbench/cleva_inductive.jsonl", | ||
reader_cfg=reader_cfgs[0], | ||
infer_cfg=infer_cfg, | ||
eval_cfg=reasonbench_eval_cfg), | ||
] | ||
|
||
reasonbench_datasets = \ | ||
CLEVA_CommonsenseReasoningDataset + \ | ||
CLEVA_DeductiveReasoningDataset + \ | ||
CLEVA_InductiveReasoningDataset + \ | ||
CausalReasoningDataset + \ | ||
CommonsenseReasoningDataset + \ | ||
AbductiveReasoningDataset + \ | ||
DeductiveReasoningDataset + \ | ||
InductiveReasoningDataset + \ | ||
SymbolicReasoningDataset |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
from mmengine.config import read_base | ||
|
||
with read_base(): | ||
from .reasonbench_ppl_b4a005 import reasonbench_datasets |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,136 @@ | ||
from opencompass.openicl.icl_prompt_template import PromptTemplate | ||
from opencompass.openicl.icl_retriever import ZeroRetriever | ||
from opencompass.openicl.icl_inferencer import PPLInferencer | ||
from opencompass.openicl.icl_evaluator import AccEvaluator | ||
from opencompass.datasets.reasonbench import ReasonBenchDataset | ||
|
||
reasonbench_eval_cfg = dict( | ||
evaluator=dict(type=AccEvaluator), | ||
pred_role="BOT", | ||
) | ||
|
||
reader_cfgs, infer_cfgs = [], [] | ||
for i in range(2, 5): | ||
choices = ["A", "B", "C", "D"][:i] | ||
|
||
reader_cfgs.append(dict( | ||
input_columns=["prompt_ppl"] + choices + ["choices"], | ||
output_column="label") | ||
) | ||
|
||
infer_cfgs.append(dict( | ||
prompt_template=dict( | ||
type=PromptTemplate, | ||
template={ | ||
str(id): | ||
dict( | ||
round=[ | ||
dict(role="HUMAN", prompt="{prompt_ppl}Answer:"), | ||
dict(role="BOT", prompt=f"{choice}") | ||
], ) | ||
for id, choice in enumerate(choices) | ||
}), | ||
retriever=dict(type=ZeroRetriever), | ||
inferencer=dict(type=PPLInferencer) | ||
)) | ||
|
||
CausalReasoningDataset = [ | ||
dict( | ||
abbr="reasonbench-causal", | ||
type=ReasonBenchDataset, | ||
path="data/reasonbench/causal.jsonl", | ||
reader_cfg=reader_cfgs[0], | ||
infer_cfg=infer_cfgs[0], | ||
eval_cfg=reasonbench_eval_cfg), | ||
] | ||
|
||
CommonsenseReasoningDataset = [ | ||
dict( | ||
abbr="reasonbench-commonsense", | ||
type=ReasonBenchDataset, | ||
path="data/reasonbench/commonsense.jsonl", | ||
reader_cfg=reader_cfgs[1], | ||
infer_cfg=infer_cfgs[1], | ||
eval_cfg=reasonbench_eval_cfg), | ||
] | ||
|
||
AbductiveReasoningDataset = [ | ||
dict( | ||
abbr="reasonbench-abductive", | ||
type=ReasonBenchDataset, | ||
path="data/reasonbench/abductive.jsonl", | ||
reader_cfg=reader_cfgs[0], | ||
infer_cfg=infer_cfgs[0], | ||
eval_cfg=reasonbench_eval_cfg), | ||
] | ||
|
||
DeductiveReasoningDataset = [ | ||
dict( | ||
abbr="reasonbench-deductive", | ||
type=ReasonBenchDataset, | ||
path="data/reasonbench/deductive.jsonl", | ||
reader_cfg=reader_cfgs[1], | ||
infer_cfg=infer_cfgs[1], | ||
eval_cfg=reasonbench_eval_cfg), | ||
] | ||
|
||
InductiveReasoningDataset = [ | ||
dict( | ||
abbr="reasonbench-inductive", | ||
type=ReasonBenchDataset, | ||
path="data/reasonbench/inductive.jsonl", | ||
reader_cfg=reader_cfgs[0], | ||
infer_cfg=infer_cfgs[0], | ||
eval_cfg=reasonbench_eval_cfg), | ||
] | ||
|
||
SymbolicReasoningDataset = [ | ||
dict( | ||
abbr="reasonbench-symbolic", | ||
type=ReasonBenchDataset, | ||
path="data/reasonbench/symbolic.jsonl", | ||
reader_cfg=reader_cfgs[2], | ||
infer_cfg=infer_cfgs[2], | ||
eval_cfg=reasonbench_eval_cfg), | ||
] | ||
|
||
CLEVA_CommonsenseReasoningDataset = [ | ||
dict( | ||
abbr="reasonbench-cleva_commonsense", | ||
type=ReasonBenchDataset, | ||
path="data/reasonbench/cleva_commonsense.jsonl", | ||
reader_cfg=reader_cfgs[1], | ||
infer_cfg=infer_cfgs[1], | ||
eval_cfg=reasonbench_eval_cfg), | ||
] | ||
|
||
CLEVA_DeductiveReasoningDataset = [ | ||
dict( | ||
abbr="reasonbench-cleva_deductive", | ||
type=ReasonBenchDataset, | ||
path="data/reasonbench/cleva_deductive.jsonl", | ||
reader_cfg=reader_cfgs[1], | ||
infer_cfg=infer_cfgs[1], | ||
eval_cfg=reasonbench_eval_cfg), | ||
] | ||
|
||
CLEVA_InductiveReasoningDataset = [ | ||
dict( | ||
abbr="reasonbench-cleva_inductive", | ||
type=ReasonBenchDataset, | ||
path="data/reasonbench/cleva_inductive.jsonl", | ||
reader_cfg=reader_cfgs[0], | ||
infer_cfg=infer_cfgs[0], | ||
eval_cfg=reasonbench_eval_cfg), | ||
] | ||
|
||
reasonbench_datasets = \ | ||
CLEVA_CommonsenseReasoningDataset + \ | ||
CLEVA_DeductiveReasoningDataset + \ | ||
CLEVA_InductiveReasoningDataset + \ | ||
CausalReasoningDataset + \ | ||
CommonsenseReasoningDataset + \ | ||
AbductiveReasoningDataset + \ | ||
DeductiveReasoningDataset + \ | ||
InductiveReasoningDataset + \ | ||
SymbolicReasoningDataset |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,39 @@ | ||
import json | ||
|
||
from datasets import Dataset | ||
|
||
from opencompass.registry import LOAD_DATASET | ||
|
||
from ..base import BaseDataset | ||
|
||
|
||
@LOAD_DATASET.register_module() | ||
class ReasonBenchDataset(BaseDataset): | ||
|
||
@staticmethod | ||
def load(path: str): | ||
raw_data = [] | ||
with open(path, 'r', encoding='utf-8') as f: | ||
for line in f: | ||
line = json.loads(line) | ||
prompt = line['prompt'] | ||
prompt_ppl = line['prompt_ppl'] | ||
label = line['label'] | ||
label_ppl = line['label_ppl'] | ||
choices = line['choices'] | ||
tag = line['tag'] | ||
source = line['source'] | ||
option_content = {choice: line[choice] for choice in choices} | ||
data = { | ||
'prompt': prompt, | ||
'label': label, | ||
'prompt_ppl': prompt_ppl, | ||
'label_ppl': str(label_ppl)[0], | ||
'choices': choices, | ||
'tag': tag, | ||
'source': source, | ||
} | ||
data.update(option_content) | ||
raw_data.append(data) | ||
dataset = Dataset.from_list(raw_data) | ||
return dataset |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
from .ReasonBenchDataset import * # noqa: F401, F403 |