-
Notifications
You must be signed in to change notification settings - Fork 436
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* minor add * minor add * minor fix
- Loading branch information
Showing
9 changed files
with
403 additions
and
13 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,55 @@ | ||
from opencompass.openicl.icl_prompt_template import PromptTemplate | ||
from opencompass.openicl.icl_retriever import ZeroRetriever | ||
from opencompass.openicl.icl_inferencer import AgentInferencer | ||
from opencompass.datasets import ( | ||
GSM8KDataset, | ||
gsm8k_postprocess, | ||
gsm8k_dataset_postprocess, | ||
Gsm8kAgentEvaluator, | ||
) | ||
|
||
gsm8k_reader_cfg = dict(input_columns=["question"], output_column="answer") | ||
|
||
gsm8k_infer_cfg = dict( | ||
prompt_template=dict( | ||
type=PromptTemplate, | ||
template=dict( | ||
round=[ | ||
# # ################################### NEW SHOT ################################### | ||
dict(role='HUMAN', prompt='Mark\'s basketball team scores 25 2 pointers, 8 3 pointers and 10 free throws. Their opponents score double the 2 pointers but half the 3 pointers and free throws. What\'s the total number of points scored by both teams added together?'), | ||
dict(role='BOT', prompt='Tool:PythonInterpreter\nTool Input:def solution():\n mark_pointers_2 = 25 * 2\n mark_pointers_3 = 8 * 3\n mark_free_throws = 10 * 1\n mark_points_scored = mark_pointers_2 + mark_pointers_3 + mark_free_throws\n opponents_pointers_2 = mark_pointers_2 * 2\n opponents_pointers_3 = mark_pointers_3 / 2\n opponents_free_throws = mark_free_throws / 2\n opponents_points_scored = opponents_pointers_2 + opponents_pointers_3 + opponents_free_throws\n total_points_scored = mark_points_scored + opponents_points_scored\n result = total_points_scored\n return result'), | ||
dict(role='SYSTEM', prompt='Response:201'), | ||
dict(role='BOT', prompt='Thought: According to the response, I got the answer\nFinalAnswer: 201'), | ||
|
||
dict(role='HUMAN', prompt='Bella has two times as many marbles as frisbees. She also has 20 more frisbees than deck cards. If she buys 2/5 times more of each item, what would be the total number of the items she will have if she currently has 60 marbles?'), | ||
dict(role='BOT', prompt='Tool:PythonInterpreter\nTool Input:def solution():\n marbles = 60\n num_increased_marbles = marbles * 2 / 5\n num_total_marbles = marbles + num_increased_marbles\n frisbees = marbles / 2\n num_increased_frisbees = frisbees * 2 / 5\n num_total_frisbees = frisbees + num_increased_frisbees\n deck_cards = frisbees - 20\n num_increased_deck_cards = deck_cards * 2 / 5\n num_total_deck_cards = deck_cards + num_increased_deck_cards\n num_total = num_total_marbles + num_total_frisbees + num_total_deck_cards\n result = num_total\n return result'), | ||
dict(role='SYSTEM', prompt='Response:140'), | ||
dict(role='BOT', prompt='Thought: According to the response, I got the answer\nFinalAnswer: 140'), | ||
|
||
dict(role='HUMAN', prompt='A group of 4 fruit baskets contains 9 apples, 15 oranges, and 14 bananas in the first three baskets and 2 less of each fruit in the fourth basket. How many fruits are there?'), | ||
dict(role='BOT', prompt="""Tool:PythonInterpreter\nTool Input:def solution():\n num_fruits_per_first_three_basket = 9 + 15 + 14\n num_fruits_first_three_basket = num_fruits_per_first_three_basket * 3\n num_apple_fourth_basket = 9 - 2\n num_orange_fourth_basket = 15 - 2\n num_banana_fourth_basket = 14 - 2\n num_fruits_fourth_basket = num_apple_fourth_basket + num_orange_fourth_basket + num_banana_fourth_basket\n num_fruits_total = num_fruits_first_three_basket + num_fruits_fourth_basket\n result = num_fruits_total\n return result"""), | ||
dict(role='SYSTEM', prompt='Response:146'), | ||
dict(role='BOT', prompt='Thought: According to the response, I got the answer\nFinalAnswer: 146'), | ||
|
||
dict(role='HUMAN', prompt='{question}'), | ||
])), | ||
retriever=dict(type=ZeroRetriever), | ||
inferencer=dict(type=AgentInferencer), | ||
) | ||
|
||
gsm8k_eval_cfg = dict( | ||
evaluator=dict(type=Gsm8kAgentEvaluator), | ||
pred_postprocessor=dict(type=gsm8k_postprocess), | ||
dataset_postprocessor=dict(type=gsm8k_dataset_postprocess), | ||
) | ||
|
||
gsm8k_datasets = [ | ||
dict( | ||
abbr='gsm8k-agent', | ||
type=GSM8KDataset, | ||
path='./data/gsm8k', | ||
reader_cfg=gsm8k_reader_cfg, | ||
infer_cfg=gsm8k_infer_cfg, | ||
eval_cfg=gsm8k_eval_cfg, | ||
) | ||
] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,37 @@ | ||
from opencompass.openicl.icl_prompt_template import PromptTemplate | ||
from opencompass.openicl.icl_retriever import ZeroRetriever | ||
from opencompass.openicl.icl_inferencer import GenInferencer | ||
from opencompass.datasets import GSM8KDataset, gsm8k_postprocess, gsm8k_dataset_postprocess, Gsm8kEvaluator | ||
|
||
gsm8k_reader_cfg = dict(input_columns=['question'], output_column='answer') | ||
|
||
gsm8k_infer_cfg = dict( | ||
prompt_template=dict( | ||
type=PromptTemplate, | ||
template=dict( | ||
round=[ | ||
dict(role='HUMAN', prompt="Question: Mark's basketball team scores 25 2 pointers, 8 3 pointers and 10 free throws. Their opponents score double the 2 pointers but half the 3 pointers and free throws. What's the total number of points scored by both teams added together?\nLet's think step by step\nAnswer:"), | ||
dict(role='BOT', prompt="Mark's team scores 25 2 pointers, meaning they scored 25*2= 50 points in 2 pointers.\nHis team also scores 6 3 pointers, meaning they scored 8*3= 24 points in 3 pointers\nThey scored 10 free throws, and free throws count as one point so they scored 10*1=10 points in free throws.\nAll together his team scored 50+24+10= 84 points\nMark's opponents scored double his team's number of 2 pointers, meaning they scored 50*2=100 points in 2 pointers.\nHis opponents scored half his team's number of 3 pointers, meaning they scored 24/2= 12 points in 3 pointers.\nThey also scored half Mark's team's points in free throws, meaning they scored 10/2=5 points in free throws.\nAll together Mark's opponents scored 100+12+5=117 points\nThe total score for the game is both team's scores added together, so it is 84+117=201 points\nThe answer is 201\n"), | ||
dict(role='HUMAN', prompt="Question: Bella has two times as many marbles as frisbees. She also has 20 more frisbees than deck cards. If she buys 2/5 times more of each item, what would be the total number of the items she will have if she currently has 60 marbles?\nLet's think step by step\nAnswer:"), | ||
dict(role='BOT', prompt="When Bella buys 2/5 times more marbles, she'll have increased the number of marbles by 2/5*60 = 24\nThe total number of marbles she'll have is 60+24 = 84\nIf Bella currently has 60 marbles, and she has two times as many marbles as frisbees, she has 60/2 = 30 frisbees.\nIf Bella buys 2/5 times more frisbees, she'll have 2/5*30 = 12 more frisbees.\nThe total number of frisbees she'll have will increase to 30+12 = 42\nBella also has 20 more frisbees than deck cards, meaning she has 30-20 = 10 deck cards\nIf she buys 2/5 times more deck cards, she'll have 2/5*10 = 4 more deck cards.\nThe total number of deck cards she'll have is 10+4 = 14\nTogether, Bella will have a total of 14+42+84 = 140 items\nThe answer is 140\n"), | ||
dict(role='HUMAN', prompt="Question: A group of 4 fruit baskets contains 9 apples, 15 oranges, and 14 bananas in the first three baskets and 2 less of each fruit in the fourth basket. How many fruits are there?\nLet's think step by step\nAnswer:"), | ||
dict(role='BOT', prompt="For the first three baskets, the number of apples and oranges in one basket is 9+15=24\nIn total, together with bananas, the number of fruits in one basket is 24+14=38 for the first three baskets.\nSince there are three baskets each having 38 fruits, there are 3*38=114 fruits in the first three baskets.\nThe number of apples in the fourth basket is 9-2=7\nThere are also 15-2=13 oranges in the fourth basket\nThe combined number of oranges and apples in the fourth basket is 13+7=20\nThe fourth basket also contains 14-2=12 bananas.\nIn total, the fourth basket has 20+12=32 fruits.\nThe four baskets together have 32+114=146 fruits.\nThe answer is 146\n"), | ||
dict(role='HUMAN', prompt="Question: {question}\nLet's think step by step\nAnswer:"), | ||
], | ||
)), | ||
retriever=dict(type=ZeroRetriever), | ||
inferencer=dict(type=GenInferencer, max_out_len=512)) | ||
|
||
gsm8k_eval_cfg = dict(evaluator=dict(type=Gsm8kEvaluator), | ||
pred_postprocessor=dict(type=gsm8k_postprocess), | ||
dataset_postprocessor=dict(type=gsm8k_dataset_postprocess)) | ||
|
||
gsm8k_datasets = [ | ||
dict( | ||
abbr='gsm8k', | ||
type=GSM8KDataset, | ||
path='./data/gsm8k', | ||
reader_cfg=gsm8k_reader_cfg, | ||
infer_cfg=gsm8k_infer_cfg, | ||
eval_cfg=gsm8k_eval_cfg) | ||
] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,102 @@ | ||
from opencompass.openicl.icl_prompt_template import PromptTemplate | ||
from opencompass.openicl.icl_retriever import ZeroRetriever | ||
from opencompass.openicl.icl_inferencer import AgentInferencer | ||
from opencompass.datasets import ( | ||
MATHDataset, MATHAgentEvaluator, math_postprocess | ||
) | ||
# use pal format but not perform well | ||
math_reader_cfg = dict(input_columns=['problem'], output_column='solution') | ||
|
||
math_infer_cfg = dict( | ||
prompt_template=dict( | ||
type=PromptTemplate, | ||
template=dict( | ||
round=[ | ||
# # ################################### NEW SHOT ################################### | ||
dict(role='HUMAN', prompt='Find the coefficient of $x^3$ when $3(x^2 - x^3+x) +3(x +2x^3- 3x^2 + 3x^5+x^3) -5(1+x-4x^3 - x^2)$ is simplifie.'), | ||
dict(role='BOT', prompt="""Tool:PythonInterpreter\nTool Input:from sympy import symbols, simplify | ||
def solution(): | ||
x = symbols('x') | ||
expr = 3*(x**2 - x**3 + x) + 3*(x + 2*x**3 - 3*x**2 + 3*x**5 + x**3) - 5*(1 + x - 4*x**3 - x**2) | ||
simplified_expr = simplify(expr) | ||
x3_coefficient = simplified_expr.as_coefficients_dict()[x**3] | ||
result = x3_coefficient | ||
return result"""), | ||
dict(role='SYSTEM', prompt='Response:26'), | ||
dict(role='BOT', prompt='FinalAnswer: The final answer is $26$. I hope it is correct.'), | ||
dict(role='HUMAN', prompt='The surface area of a sphere with radius $r$ is $4\pi r^2$. Including the area of its circular base, what is the total surface area of a hemisphere with radius 6 cm? Express your answer in terms of $\pi$.'), | ||
dict(role='BOT', prompt="""Tool:PythonInterpreter\nTool Input:import math | ||
def solution(): | ||
radius = 6 | ||
# Surface area of the hemisphere | ||
hemisphere_area = 2 * math.pi * radius**2 | ||
# Area of the circular base | ||
base_area = math.pi * radius**2 | ||
# Total surface area | ||
total_surface_area = hemisphere_area + base_area | ||
# Formatting the result in LaTeX | ||
result = r'{}\pi'.format(total_surface_area / math.pi) | ||
return result"""), | ||
dict(role='SYSTEM', prompt='Response:108.0\\pi'), | ||
dict(role='BOT', prompt='FinalAnswer: The final answer is $108.0\pi$. I hope it is correct.'), | ||
dict(role='HUMAN', prompt='Monica tosses a fair 6-sided die. If the roll is a prime number, then she wins that amount of dollars (so that, for example, if she rolls 3, then she wins 3 dollars). If the roll is composite, she wins nothing. Otherwise, she loses 3 dollars. What is the expected value of her winnings on one die toss? Express your answer as a dollar value to the nearest cent.'), | ||
dict(role='BOT', prompt="""Tool:PythonInterpreter\nTool Input:def solution(): | ||
# Probabilities of each outcome | ||
prime_prob = 1 / 6 | ||
composite_prob = 1 / 3 | ||
otherwise_prob = 1 / 6 | ||
# Expected value of each outcome | ||
prime_expected_value = (2 * prime_prob) + (3 * prime_prob) + (5 * prime_prob) | ||
composite_expected_value = 0 * composite_prob | ||
otherwise_expected_value = -3 * otherwise_prob | ||
# Total expected value | ||
total_expected_value = prime_expected_value + composite_expected_value + otherwise_expected_value | ||
# Dollar value to the nearest cent | ||
result = "{:.2f}".format(total_expected_value) | ||
return result"""), | ||
dict(role='SYSTEM', prompt='Response:1.17'), | ||
dict(role='BOT', prompt='FinalAnswer: The final answer is $1.17$. I hope it is correct.'), | ||
dict(role='HUMAN', prompt="Given $\mathbf{a} = \\begin{pmatrix} -7 \\ 0 \\ 1 \end{pmatrix}$ and $\mathbf{b} = \\begin{pmatrix} 4 \\ 2 \\ -1 \end{pmatrix},$ find $\mathbf{a} - 3 \mathbf{b}.$"), | ||
dict(role='BOT', prompt="""Tool:PythonInterpreter\nTool Input:import numpy as np | ||
def solution() | ||
a = np.array([-7, 0, 1]) | ||
b = np.array([4, 2, -1]) | ||
result = a - 3 * b | ||
result = r'\\begin{{pmatrix}} {} \ {} \ {} \end{{pmatrix}}'.format(result[0], result[1], result[2]) | ||
return result"""), | ||
dict(role='SYSTEM', prompt='Response:\\begin{pmatrix} -19 \\ -6 \\ 4 \\end{pmatrix}'), | ||
dict(role='BOT', prompt='FinalAnswer: The final answer is $\\begin{pmatrix} -19 \\ -6 \\ 4 \\end{pmatrix}$. I hope it is correct.'), | ||
dict(role='HUMAN', prompt='{problem}'), | ||
])), | ||
retriever=dict(type=ZeroRetriever), | ||
inferencer=dict(type=AgentInferencer), | ||
) | ||
|
||
math_eval_cfg = dict( | ||
evaluator=dict(type=MATHAgentEvaluator), | ||
pred_postprocessor=dict(type=math_postprocess), | ||
) | ||
|
||
math_datasets = [ | ||
dict( | ||
abbr='math-agent', | ||
type=MATHDataset, | ||
path='./data/math/math.json', | ||
reader_cfg=math_reader_cfg, | ||
infer_cfg=math_infer_cfg, | ||
eval_cfg=math_eval_cfg, | ||
) | ||
] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,68 @@ | ||
from opencompass.openicl.icl_prompt_template import PromptTemplate | ||
from opencompass.openicl.icl_retriever import ZeroRetriever | ||
from opencompass.openicl.icl_inferencer import GenInferencer | ||
from opencompass.datasets import MATHDataset, MATHEvaluator, math_postprocess | ||
|
||
math_reader_cfg = dict(input_columns=['problem'], output_column='solution') | ||
|
||
math_infer_cfg = dict( | ||
prompt_template=dict( | ||
type=PromptTemplate, | ||
template=dict(round=[ | ||
dict( | ||
role="HUMAN", | ||
prompt= | ||
"Problem:\nFind the coefficient of $x^3$ when $3(x^2 - x^3+x) +3(x +2x^3- 3x^2 + 3x^5+x^3) -5(1+x-4x^3 - x^2)$ is simplified.\nSolution:" | ||
), | ||
dict( | ||
role="BOT", | ||
prompt= | ||
"Combine like terms to simplify the expression. The coefficient of $x^3$ is calculated as $$(-3+2\cdot(2+1))+(-5)\cdot(-4))$ = 26$. Thus, the coefficient of $x^3$ is $\\boxed{26}$.\nFinal Answer: The final answer is $26$. I hope it is correct.\n" | ||
), | ||
dict( | ||
role="HUMAN", | ||
prompt= | ||
"Problem:\nThe surface area of a sphere with radius $r$ is $4\pi r^2$. Including the area of its circular base, what is the total surface area of a hemisphere with radius 6 cm? Express your answer in terms of $\pi$.\nSolution:" | ||
), | ||
dict( | ||
role="BOT", | ||
prompt= | ||
"The surface area of a hemisphere (not including the base) is half that of a sphere, so it is $2\pi r^2$. The area of the base is $\pi r^2$. Therefore, for a hemisphere with radius 6 cm, the total surface area is $2\pi (6)^2 + \pi (6)^2 = 108\pi$ square cm.\nFinal Answer: The final answer is $108\pi$ square cm. I hope it is correct.\n" | ||
), | ||
dict( | ||
role="HUMAN", | ||
prompt= | ||
"Problem:\nMonica tosses a fair 6-sided die. If the roll is a prime number, then she wins that amount of dollars (so that, for example, if she rolls 3, then she wins 3 dollars). If the roll is composite, she wins nothing. Otherwise, she loses 3 dollars. What is the expected value of her winnings on one die toss? Express your answer as a dollar value to the nearest cent.\nSolution:" | ||
), | ||
dict( | ||
role="BOT", | ||
prompt= | ||
"The prime numbers rolled could be 2, 3, or 5, and each has a 1/6 chance of being rolled. The composite number 4 or 6 has a 2/6 chance of being rolled, but it results in $0 win. The remaining non-prime and non-composite number is 1 , and it results in a loss of $3, with a 1/6 chance. So, the expected winnings are $(2+3+5)(1/6)+0(2/6)+(-3)(1/6) = \$1.17$.\nFinal Answer: The final answer is $\$1.17$. I hope it is correct.\n" | ||
), | ||
dict( | ||
role="HUMAN", | ||
prompt= | ||
"Problem:\nGiven $\mathbf{a} = \\begin{pmatrix} -7 \\ 0 \\ 1 \end{pmatrix}$ and $\mathbf{b} = \\begin{pmatrix} 4 \\ 2 \\ -1 \end{pmatrix},$ find $\mathbf{a} - 3 \mathbf{b}.$\nSolution:" | ||
), | ||
dict( | ||
role="BOT", | ||
prompt= | ||
"We find $3 \mathbf{b}$ first, which is $\\begin{pmatrix} 12 \\ 6 \\ -3 \end{pmatrix}$. Then we subtract this vector from $\mathbf{a}$. So, $\mathbf{a} - 3 \mathbf{b} = \\begin{pmatrix} -7 - 12 \\ 0 - 6 \\ 1 - (-3) \end{pmatrix} = \\begin{pmatrix} -19 \\ -6 \\ 4 \end{pmatrix}.$\nFinal Answer: The final answer is $\\begin{pmatrix} -19 \\ -6 \\ 4 \end{pmatrix}$. I hope it is correct.\n" | ||
), | ||
dict(role="HUMAN", prompt="Problem:\n{problem}\nSolution:\n"), | ||
])), | ||
retriever=dict(type=ZeroRetriever), | ||
inferencer=dict(type=GenInferencer, max_out_len=512)) | ||
|
||
math_eval_cfg = dict( | ||
evaluator=dict(type=MATHEvaluator), pred_postprocessor=dict(type=math_postprocess)) | ||
|
||
math_datasets = [ | ||
dict( | ||
type=MATHDataset, | ||
abbr='math', | ||
path='./data/math/math.json', | ||
reader_cfg=math_reader_cfg, | ||
infer_cfg=math_infer_cfg, | ||
eval_cfg=math_eval_cfg) | ||
] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,37 @@ | ||
from mmengine.config import read_base | ||
from opencompass.models.openai_api import OpenAI | ||
from opencompass.partitioners import SizePartitioner | ||
from opencompass.runners import LocalRunner | ||
from opencompass.tasks import OpenICLInferTask | ||
|
||
with read_base(): | ||
from .datasets.gsm8k.gsm8k_gen_d6de81 import gsm8k_datasets | ||
from .datasets.math.math_gen_1ed9c2 import math_datasets | ||
from .datasets.MathBench.mathbench_gen import mathbench_datasets | ||
from .summarizers.math_baseline import summarizer | ||
|
||
datasets = [] | ||
datasets += gsm8k_datasets | ||
datasets += math_datasets | ||
datasets += mathbench_datasets | ||
|
||
|
||
models = [ | ||
dict( | ||
abbr='gpt-3.5-react', | ||
type=OpenAI, | ||
path='gpt-3.5-turbo', | ||
key='ENV', | ||
query_per_second=1, | ||
max_seq_len=4096, | ||
batch_size=1, | ||
), | ||
] | ||
|
||
infer = dict( | ||
partitioner=dict(type=SizePartitioner, max_task_size=1000), | ||
runner=dict( | ||
type=LocalRunner, | ||
max_num_workers=16, | ||
task=dict(type=OpenICLInferTask)), | ||
) |
Oops, something went wrong.