Skip to content

Commit

Permalink
[Feature] Add ReasonBench(Internal) dataset (#577)
Browse files Browse the repository at this point in the history
* [Feature] Add reasonbench dataset

* add configs for supporting generative inference & merge datasets in the same category

* modify config filename to prompt version

* fix codes to meet pre-commit requirements

* lint the code to meet pre-commit requirements

* Align Load_data Sourcecode Briefly

* fix bugs

* reduce code redundancy
  • Loading branch information
Skyfall-xzz authored Dec 20, 2023
1 parent 76a95e9 commit b35d991
Show file tree
Hide file tree
Showing 7 changed files with 325 additions and 0 deletions.
4 changes: 4 additions & 0 deletions configs/datasets/ReasonBench/reasonbench_gen.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
from mmengine.config import read_base

with read_base():
from .reasonbench_gen_d15233 import reasonbench_datasets
140 changes: 140 additions & 0 deletions configs/datasets/ReasonBench/reasonbench_gen_d15233.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,140 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import FixKRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.utils.text_postprocessors import first_capital_postprocess
from opencompass.datasets.reasonbench import ReasonBenchDataset

reasonbench_eval_cfg = dict(
evaluator=dict(type=AccEvaluator),
pred_postprocessor=dict(type=first_capital_postprocess)
)

reader_cfgs = []
for i in range(2, 5):
choices = ["A", "B", "C", "D"][:i]

reader_cfgs.append(dict(
input_columns=["prompt_ppl"],
output_column="label_ppl")
)

infer_cfg=dict(
ice_template=dict(
type=PromptTemplate,
template=dict(
begin="</E>",
round=[
dict(
role="HUMAN",
prompt="</E>{prompt_ppl}"
),
dict(role="BOT", prompt="Answer: {label_ppl}"),
]),
ice_token="</E>",
),
retriever=dict(type=FixKRetriever, fix_id_list=[]),
inferencer=dict(type=GenInferencer)
)


CausalReasoningDataset = [
dict(
abbr="reasonbench-causal",
type=ReasonBenchDataset,
path="data/reasonbench/causal.jsonl",
reader_cfg=reader_cfgs[0],
infer_cfg=infer_cfg,
eval_cfg=reasonbench_eval_cfg),
]

CommonsenseReasoningDataset = [
dict(
abbr="reasonbench-commonsense",
type=ReasonBenchDataset,
path="data/reasonbench/commonsense.jsonl",
reader_cfg=reader_cfgs[1],
infer_cfg=infer_cfg,
eval_cfg=reasonbench_eval_cfg),
]

AbductiveReasoningDataset = [
dict(
abbr="reasonbench-abductive",
type=ReasonBenchDataset,
path="data/reasonbench/abductive.jsonl",
reader_cfg=reader_cfgs[0],
infer_cfg=infer_cfg,
eval_cfg=reasonbench_eval_cfg),
]

DeductiveReasoningDataset = [
dict(
abbr="reasonbench-deductive",
type=ReasonBenchDataset,
path="data/reasonbench/deductive.jsonl",
reader_cfg=reader_cfgs[1],
infer_cfg=infer_cfg,
eval_cfg=reasonbench_eval_cfg),
]

InductiveReasoningDataset = [
dict(
abbr="reasonbench-inductive",
type=ReasonBenchDataset,
path="data/reasonbench/inductive.jsonl",
reader_cfg=reader_cfgs[0],
infer_cfg=infer_cfg,
eval_cfg=reasonbench_eval_cfg),
]

SymbolicReasoningDataset = [
dict(
abbr="reasonbench-symbolic",
type=ReasonBenchDataset,
path="data/reasonbench/symbolic.jsonl",
reader_cfg=reader_cfgs[2],
infer_cfg=infer_cfg,
eval_cfg=reasonbench_eval_cfg),
]

CLEVA_CommonsenseReasoningDataset = [
dict(
abbr="reasonbench-cleva_commonsense",
type=ReasonBenchDataset,
path="data/reasonbench/cleva_commonsense.jsonl",
reader_cfg=reader_cfgs[1],
infer_cfg=infer_cfg,
eval_cfg=reasonbench_eval_cfg),
]

CLEVA_DeductiveReasoningDataset = [
dict(
abbr="reasonbench-cleva_deductive",
type=ReasonBenchDataset,
path="data/reasonbench/cleva_deductive.jsonl",
reader_cfg=reader_cfgs[1],
infer_cfg=infer_cfg,
eval_cfg=reasonbench_eval_cfg),
]

CLEVA_InductiveReasoningDataset = [
dict(
abbr="reasonbench-cleva_inductive",
type=ReasonBenchDataset,
path="data/reasonbench/cleva_inductive.jsonl",
reader_cfg=reader_cfgs[0],
infer_cfg=infer_cfg,
eval_cfg=reasonbench_eval_cfg),
]

reasonbench_datasets = \
CLEVA_CommonsenseReasoningDataset + \
CLEVA_DeductiveReasoningDataset + \
CLEVA_InductiveReasoningDataset + \
CausalReasoningDataset + \
CommonsenseReasoningDataset + \
AbductiveReasoningDataset + \
DeductiveReasoningDataset + \
InductiveReasoningDataset + \
SymbolicReasoningDataset
4 changes: 4 additions & 0 deletions configs/datasets/ReasonBench/reasonbench_ppl.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
from mmengine.config import read_base

with read_base():
from .reasonbench_ppl_b4a005 import reasonbench_datasets
136 changes: 136 additions & 0 deletions configs/datasets/ReasonBench/reasonbench_ppl_b4a005.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,136 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import PPLInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets.reasonbench import ReasonBenchDataset

reasonbench_eval_cfg = dict(
evaluator=dict(type=AccEvaluator),
pred_role="BOT",
)

reader_cfgs, infer_cfgs = [], []
for i in range(2, 5):
choices = ["A", "B", "C", "D"][:i]

reader_cfgs.append(dict(
input_columns=["prompt_ppl"] + choices + ["choices"],
output_column="label")
)

infer_cfgs.append(dict(
prompt_template=dict(
type=PromptTemplate,
template={
str(id):
dict(
round=[
dict(role="HUMAN", prompt="{prompt_ppl}Answer:"),
dict(role="BOT", prompt=f"{choice}")
], )
for id, choice in enumerate(choices)
}),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=PPLInferencer)
))

CausalReasoningDataset = [
dict(
abbr="reasonbench-causal",
type=ReasonBenchDataset,
path="data/reasonbench/causal.jsonl",
reader_cfg=reader_cfgs[0],
infer_cfg=infer_cfgs[0],
eval_cfg=reasonbench_eval_cfg),
]

CommonsenseReasoningDataset = [
dict(
abbr="reasonbench-commonsense",
type=ReasonBenchDataset,
path="data/reasonbench/commonsense.jsonl",
reader_cfg=reader_cfgs[1],
infer_cfg=infer_cfgs[1],
eval_cfg=reasonbench_eval_cfg),
]

AbductiveReasoningDataset = [
dict(
abbr="reasonbench-abductive",
type=ReasonBenchDataset,
path="data/reasonbench/abductive.jsonl",
reader_cfg=reader_cfgs[0],
infer_cfg=infer_cfgs[0],
eval_cfg=reasonbench_eval_cfg),
]

DeductiveReasoningDataset = [
dict(
abbr="reasonbench-deductive",
type=ReasonBenchDataset,
path="data/reasonbench/deductive.jsonl",
reader_cfg=reader_cfgs[1],
infer_cfg=infer_cfgs[1],
eval_cfg=reasonbench_eval_cfg),
]

InductiveReasoningDataset = [
dict(
abbr="reasonbench-inductive",
type=ReasonBenchDataset,
path="data/reasonbench/inductive.jsonl",
reader_cfg=reader_cfgs[0],
infer_cfg=infer_cfgs[0],
eval_cfg=reasonbench_eval_cfg),
]

SymbolicReasoningDataset = [
dict(
abbr="reasonbench-symbolic",
type=ReasonBenchDataset,
path="data/reasonbench/symbolic.jsonl",
reader_cfg=reader_cfgs[2],
infer_cfg=infer_cfgs[2],
eval_cfg=reasonbench_eval_cfg),
]

CLEVA_CommonsenseReasoningDataset = [
dict(
abbr="reasonbench-cleva_commonsense",
type=ReasonBenchDataset,
path="data/reasonbench/cleva_commonsense.jsonl",
reader_cfg=reader_cfgs[1],
infer_cfg=infer_cfgs[1],
eval_cfg=reasonbench_eval_cfg),
]

CLEVA_DeductiveReasoningDataset = [
dict(
abbr="reasonbench-cleva_deductive",
type=ReasonBenchDataset,
path="data/reasonbench/cleva_deductive.jsonl",
reader_cfg=reader_cfgs[1],
infer_cfg=infer_cfgs[1],
eval_cfg=reasonbench_eval_cfg),
]

CLEVA_InductiveReasoningDataset = [
dict(
abbr="reasonbench-cleva_inductive",
type=ReasonBenchDataset,
path="data/reasonbench/cleva_inductive.jsonl",
reader_cfg=reader_cfgs[0],
infer_cfg=infer_cfgs[0],
eval_cfg=reasonbench_eval_cfg),
]

reasonbench_datasets = \
CLEVA_CommonsenseReasoningDataset + \
CLEVA_DeductiveReasoningDataset + \
CLEVA_InductiveReasoningDataset + \
CausalReasoningDataset + \
CommonsenseReasoningDataset + \
AbductiveReasoningDataset + \
DeductiveReasoningDataset + \
InductiveReasoningDataset + \
SymbolicReasoningDataset
1 change: 1 addition & 0 deletions opencompass/datasets/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@
from .qaspercut import * # noqa: F401, F403
from .race import * # noqa: F401, F403
from .realtoxicprompts import * # noqa: F401, F403
from .reasonbench import ReasonBenchDataset # noqa: F401, F403
from .record import * # noqa: F401, F403
from .safety import * # noqa: F401, F403
from .scibench import ScibenchDataset, scibench_postprocess # noqa: F401, F403
Expand Down
39 changes: 39 additions & 0 deletions opencompass/datasets/reasonbench/ReasonBenchDataset.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
import json

from datasets import Dataset

from opencompass.registry import LOAD_DATASET

from ..base import BaseDataset


@LOAD_DATASET.register_module()
class ReasonBenchDataset(BaseDataset):

@staticmethod
def load(path: str):
raw_data = []
with open(path, 'r', encoding='utf-8') as f:
for line in f:
line = json.loads(line)
prompt = line['prompt']
prompt_ppl = line['prompt_ppl']
label = line['label']
label_ppl = line['label_ppl']
choices = line['choices']
tag = line['tag']
source = line['source']
option_content = {choice: line[choice] for choice in choices}
data = {
'prompt': prompt,
'label': label,
'prompt_ppl': prompt_ppl,
'label_ppl': str(label_ppl)[0],
'choices': choices,
'tag': tag,
'source': source,
}
data.update(option_content)
raw_data.append(data)
dataset = Dataset.from_list(raw_data)
return dataset
1 change: 1 addition & 0 deletions opencompass/datasets/reasonbench/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
from .ReasonBenchDataset import * # noqa: F401, F403

0 comments on commit b35d991

Please sign in to comment.