Skip to content

Commit

Permalink
[Sync] Sync Internal (#941)
Browse files Browse the repository at this point in the history
  • Loading branch information
Leymore committed Mar 4, 2024
1 parent bbec7d8 commit b03d5dc
Show file tree
Hide file tree
Showing 73 changed files with 2,264 additions and 903 deletions.
7 changes: 7 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -91,8 +91,12 @@ docs/zh_cn/_build/

# sft config ignore list
configs/sft_cfg/*B_*
configs/sft_cfg/1B/*
configs/sft_cfg/7B/*
configs/sft_cfg/20B/*
configs/sft_cfg/60B/*
configs/sft_cfg/100B/*

configs/cky/
# in case llama clone in the opencompass
llama/
Expand Down Expand Up @@ -120,3 +124,6 @@ turbomind/
*.csv
*.npy
*.c

# aliyun
core.*
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
import os
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.datasets import GaokaoBenchDataset
from mmengine.config import read_base

with read_base():
from .GaokaoBench_prompts import MCQ_prompts, FBQ_prompts

GaokaoBench_datasets = []
for folder, prompts in [
("Multiple-choice_Questions", MCQ_prompts),
("Fill-in-the-blank_Questions", FBQ_prompts),
]:
for p in prompts:
reader_cfg = {
"input_columns": ["question"],
"output_column": "answer",
}
infer_cfg = {
"ice_template": {
"type": PromptTemplate,
"template": {"round": [{"role": "HUMAN", "prompt": p["prefix_prompt"] + "{question}"}]},
"ice_token": "</E>",
},
"retriever": {"type": ZeroRetriever},
"inferencer": {"type": GenInferencer, "max_out_len": 1024},
}
eval_cfg = {
"evaluator": {"type": "GaokaoBenchEvaluator" + "_" + p["type"]},
"pred_role": "BOT",
}
dataset = {
"type": GaokaoBenchDataset,
"abbr": "GaokaoBench_" + p["keyword"],
"path": os.path.join("data", "GAOKAO-BENCH", "data", folder, p["keyword"] + ".json"),
"reader_cfg": reader_cfg,
"infer_cfg": infer_cfg,
"eval_cfg": eval_cfg,
}
GaokaoBench_datasets.append(dataset)
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
import os
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.datasets import GaokaoBenchDataset
from mmengine.config import read_base

with read_base():
from .GaokaoBench_prompts import MCQ_prompts, FBQ_prompts

GaokaoBench_datasets = []
for folder, prompts in [
("Multiple-choice_Questions", MCQ_prompts),
("Fill-in-the-blank_Questions", FBQ_prompts),
]:
for p in prompts:
reader_cfg = {
"input_columns": ["question"],
"output_column": "answer",
}
infer_cfg = {
"prompt_template": {
"type": PromptTemplate,
"template": p["prefix_prompt"] + "{question}",
},
"retriever": {"type": ZeroRetriever},
"inferencer": {"type": GenInferencer, "max_out_len": 1024},
}
eval_cfg = {
"evaluator": {"type": "GaokaoBenchEvaluator" + "_" + p["type"]},
"pred_role": "BOT",
}
dataset = {
"type": GaokaoBenchDataset,
"abbr": "GaokaoBench_" + p["keyword"],
"path": os.path.join("data", "GAOKAO-BENCH", "data", folder, p["keyword"] + ".json"),
"reader_cfg": reader_cfg,
"infer_cfg": infer_cfg,
"eval_cfg": eval_cfg,
}
GaokaoBench_datasets.append(dataset)
191 changes: 191 additions & 0 deletions configs/datasets/GaokaoBench/GaokaoBench_prompts.py

Large diffs are not rendered by default.

41 changes: 20 additions & 21 deletions configs/datasets/TheoremQA/TheoremQA_gen_424e0a.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,37 +4,36 @@
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import TheoremQADataset, TheoremQA_postprocess

TheoremQA_reader_cfg = dict(
input_columns=['Question', 'Answer_type'],
output_column='Answer',
train_split='test')
TheoremQA_reader_cfg = dict(input_columns=["Question", "Answer_type"], output_column="Answer", train_split="test")

TheoremQA_prompt1 = "Please read a math problem, and then think step by step to derive the answer. The answer is decided by Answer Type. " \
"If the Answer type in [bool], the answer needs to be True or False. " \
"Else if the Answer type in [integer, float] , The answer needs to be in numerical form. " \
"Else if the Answer type in [list of integer, list of float] , the answer needs to be a list of number like [2, 3, 4]. " \
"Else if the Answer type in [option], the answer needs to be an option like (a), (b), (c), (d)." \
"You need to output the answer in your final sentence like 'Therefore, the answer is ...'."
TheoremQA_prompt2 = f"Below is an instruction that describes a task, paired with an input that provides further context. " \
f"Write a response that appropriately completes the request.\n\n### Instruction:\n{TheoremQA_prompt1}\n\n### Input:\n{{Question}}\nAnswer_type:{{Answer_type}}\n### Response:\n"
TheoremQA_prompt1 = (
"Please read a math problem, and then think step by step to derive the answer. The answer is decided by Answer Type. "
"If the Answer type in [bool], the answer needs to be True or False. "
"Else if the Answer type in [integer, float] , The answer needs to be in numerical form. "
"Else if the Answer type in [list of integer, list of float] , the answer needs to be a list of number like [2, 3, 4]. "
"Else if the Answer type in [option], the answer needs to be an option like (a), (b), (c), (d)."
"You need to output the answer in your final sentence like 'Therefore, the answer is ...'."
)
TheoremQA_prompt2 = (
f"Below is an instruction that describes a task, paired with an input that provides further context. "
f"Write a response that appropriately completes the request.\n\n### Instruction:\n{TheoremQA_prompt1}\n\n### Input:\n{{Question}}\nAnswer_type:{{Answer_type}}\n### Response:\n"
)

TheoremQA_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=TheoremQA_prompt2),
prompt_template=dict(type=PromptTemplate, template=TheoremQA_prompt2),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer, max_out_len=512))
inferencer=dict(type=GenInferencer, max_out_len=512),
)

TheoremQA_eval_cfg = dict(
evaluator=dict(type=AccEvaluator),
pred_postprocessor=dict(type=TheoremQA_postprocess))
TheoremQA_eval_cfg = dict(evaluator=dict(type=AccEvaluator), pred_postprocessor=dict(type=TheoremQA_postprocess))

TheoremQA_datasets = [
dict(
abbr='TheoremQA',
abbr="TheoremQA",
type=TheoremQADataset,
path="./data/TheoremQA/test.csv",
reader_cfg=TheoremQA_reader_cfg,
infer_cfg=TheoremQA_infer_cfg,
eval_cfg=TheoremQA_eval_cfg)
eval_cfg=TheoremQA_eval_cfg,
)
]
30 changes: 13 additions & 17 deletions configs/datasets/TheoremQA/TheoremQA_gen_7009de.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,45 +4,41 @@
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import TheoremQADataset, TheoremQA_postprocess

TheoremQA_reader_cfg = dict(
input_columns=['Question', 'Answer_type'],
output_column='Answer',
train_split='test')
TheoremQA_reader_cfg = dict(input_columns=["Question", "Answer_type"], output_column="Answer", train_split="test")

TheoremQA_prompt1 = """You are a mathematician, you are supposed to answer the given question. You need to output the answer in your final sentence like "Therefore, the answer is ...". The answer can only be one of the following forms:
1. a numerical value like 0.1, no symbol and no unit at all.
2. a list of number like [2, 3, 4].
3. True/False.
4. an option like (a), (b), (c), (d)
"""
TheoremQA_prompt2 = 'Question: {Question}\nLet\'s think step by step.'
TheoremQA_prompt2 = "Question: {Question}\nLet's think step by step."

TheoremQA_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(
begin=[
dict(
role='SYSTEM',
fallback_role='HUMAN',
prompt=TheoremQA_prompt1),
dict(role="SYSTEM", fallback_role="HUMAN", prompt=TheoremQA_prompt1),
],
round=[
dict(role='HUMAN', prompt=TheoremQA_prompt2),
])),
dict(role="HUMAN", prompt=TheoremQA_prompt2),
],
),
),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer, max_out_len=512))
inferencer=dict(type=GenInferencer, max_out_len=512),
)

TheoremQA_eval_cfg = dict(
evaluator=dict(type=AccEvaluator),
pred_postprocessor=dict(type=TheoremQA_postprocess))
TheoremQA_eval_cfg = dict(evaluator=dict(type=AccEvaluator), pred_postprocessor=dict(type=TheoremQA_postprocess))

TheoremQA_datasets = [
dict(
abbr='TheoremQA',
abbr="TheoremQA",
type=TheoremQADataset,
path="./data/TheoremQA/test.csv",
reader_cfg=TheoremQA_reader_cfg,
infer_cfg=TheoremQA_infer_cfg,
eval_cfg=TheoremQA_eval_cfg)
eval_cfg=TheoremQA_eval_cfg,
)
]
41 changes: 24 additions & 17 deletions configs/datasets/TheoremQA/TheoremQA_gen_ef26ca.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,34 +4,41 @@
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import TheoremQADataset, TheoremQA_postprocess

TheoremQA_reader_cfg = dict(
input_columns=['Question', 'Answer_type'],
output_column='Answer',
train_split='test')
TheoremQA_reader_cfg = dict(input_columns=["Question", "Answer_type"], output_column="Answer", train_split="test")

TheoremQA_prompt1 = """You are a mathematician, you are supposed to answer the given question. You need to output the answer in your final sentence like "Therefore, the answer is ...". The answer can only be one of the following forms:
1. a numerical value like 0.1, no symbol and no unit at all.
2. a list of number like [2, 3, 4].
3. True/False.
4. an option like (a), (b), (c), (d)
"""
TheoremQA_prompt2 = "Question: {Question}\nLet's think step by step."

TheoremQA_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(round=[
dict(
role='HUMAN',
prompt=
"""You are a mathematician, you are supposed to answer the given question. You need to output the answer in your final sentence like "Therefore, the answer is ...". The answer can only be one of the following forms:\n1. a numerical value like 0.1, no symbol and no unit at all.\n2. a list of number like [2, 3, 4].\n3. True/False.\n4. an option like (a), (b), (c), (d)\nQuestion: {Question}\nLet\'s think step by step."""
),
])),
template=dict(
round=[
dict(
role="HUMAN",
prompt=TheoremQA_prompt1 + TheoremQA_prompt2,
),
]
),
),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer, max_out_len=512))
inferencer=dict(type=GenInferencer, max_out_len=512),
)

TheoremQA_eval_cfg = dict(
evaluator=dict(type=AccEvaluator),
pred_postprocessor=dict(type=TheoremQA_postprocess))
TheoremQA_eval_cfg = dict(evaluator=dict(type=AccEvaluator), pred_postprocessor=dict(type=TheoremQA_postprocess))

TheoremQA_datasets = [
dict(
abbr='TheoremQA',
abbr="TheoremQA",
type=TheoremQADataset,
path="./data/TheoremQA/test.csv",
reader_cfg=TheoremQA_reader_cfg,
infer_cfg=TheoremQA_infer_cfg,
eval_cfg=TheoremQA_eval_cfg)
eval_cfg=TheoremQA_eval_cfg,
)
]
38 changes: 38 additions & 0 deletions configs/datasets/TheoremQA/TheoremQA_post_v2_gen_2c2583.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import TheoremQADataset, TheoremQA_postprocess_v2

TheoremQA_reader_cfg = dict(input_columns=["Question", "Answer_type"], output_column="Answer", train_split="test")

TheoremQA_prompt1 = """You are a mathematician, you are supposed to answer the given question. You need to output the answer in your final sentence like "Therefore, the answer is ...". The answer can only be one of the following forms:
1. a numerical value like 0.1, no symbol and no unit at all.
2. a list of number like [2, 3, 4].
3. True/False.
4. an option like (a), (b), (c), (d)
"""
TheoremQA_prompt2 = "Question: {Question}\nLet's think step by step."

TheoremQA_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=TheoremQA_prompt1 + TheoremQA_prompt2,
),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer, max_out_len=512),
)

# 正确的 evaluator 需要借助于 llm 来进行答案提取,此评测逻辑亦会有较多 FN 。
TheoremQA_eval_cfg = dict(evaluator=dict(type=AccEvaluator), pred_postprocessor=dict(type=TheoremQA_postprocess_v2))

TheoremQA_datasets = [
dict(
abbr="TheoremQA",
type=TheoremQADataset,
path="./data/TheoremQA/test.csv",
reader_cfg=TheoremQA_reader_cfg,
infer_cfg=TheoremQA_infer_cfg,
eval_cfg=TheoremQA_eval_cfg,
)
]
45 changes: 45 additions & 0 deletions configs/datasets/TheoremQA/TheoremQA_post_v2_gen_ef26ca.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import TheoremQADataset, TheoremQA_postprocess_v2

TheoremQA_reader_cfg = dict(input_columns=["Question", "Answer_type"], output_column="Answer", train_split="test")

TheoremQA_prompt1 = """You are a mathematician, you are supposed to answer the given question. You need to output the answer in your final sentence like "Therefore, the answer is ...". The answer can only be one of the following forms:
1. a numerical value like 0.1, no symbol and no unit at all.
2. a list of number like [2, 3, 4].
3. True/False.
4. an option like (a), (b), (c), (d)
"""
TheoremQA_prompt2 = "Question: {Question}\nLet's think step by step."

TheoremQA_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(
round=[
dict(
role="HUMAN",
prompt=TheoremQA_prompt1 + TheoremQA_prompt2,
),
]
),
),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer, max_out_len=512),
)

# 正确的 evaluator 需要借助于 llm 来进行答案提取,此评测逻辑亦会有较多 FN 。
TheoremQA_eval_cfg = dict(evaluator=dict(type=AccEvaluator), pred_postprocessor=dict(type=TheoremQA_postprocess_v2))

TheoremQA_datasets = [
dict(
abbr="TheoremQA",
type=TheoremQADataset,
path="./data/TheoremQA/test.csv",
reader_cfg=TheoremQA_reader_cfg,
infer_cfg=TheoremQA_infer_cfg,
eval_cfg=TheoremQA_eval_cfg,
)
]
Loading

0 comments on commit b03d5dc

Please sign in to comment.