Skip to content

Commit

Permalink
[Sync] deprecate old mbpps (#1064)
Browse files Browse the repository at this point in the history
  • Loading branch information
Leymore authored Apr 19, 2024
1 parent c172401 commit 8c85edd
Show file tree
Hide file tree
Showing 95 changed files with 1,506 additions and 408 deletions.
2 changes: 1 addition & 1 deletion configs/datasets/collections/base_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,6 @@
from ..math.math_evaluatorv2_gen_9d2049 import math_datasets
from ..TheoremQA.TheoremQA_post_v2_gen_2c2583 import TheoremQA_datasets
from ..humaneval.humaneval_gen_d2537e import humaneval_datasets
from ..mbpp.sanitized_mbpp_gen_cb43ef import sanitized_mbpp_datasets
from ..mbpp.deprecated_sanitized_mbpp_gen_cb43ef import sanitized_mbpp_datasets

datasets = sum((v for k, v in locals().items() if k.endswith("_datasets")), [])
2 changes: 1 addition & 1 deletion configs/datasets/collections/base_medium.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from ..GaokaoBench.GaokaoBench_mixed_f2038e import GaokaoBench_datasets
from ..bbh.bbh_gen_5b92b0 import bbh_datasets
from ..humaneval.humaneval_gen_8e312c import humaneval_datasets
from ..mbpp.mbpp_gen_1e1056 import mbpp_datasets
from ..mbpp.deprecated_mbpp_gen_1e1056 import mbpp_datasets
from ..CLUE_C3.CLUE_C3_ppl_e24a31 import C3_datasets
from ..CLUE_CMRC.CLUE_CMRC_gen_1bd3c8 import CMRC_datasets
from ..CLUE_DRCD.CLUE_DRCD_gen_1bd3c8 import DRCD_datasets
Expand Down
2 changes: 1 addition & 1 deletion configs/datasets/collections/base_medium_llama.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from ..GaokaoBench.GaokaoBench_mixed_f2038e import GaokaoBench_datasets
from ..bbh.bbh_gen_5b92b0 import bbh_datasets
from ..humaneval.humaneval_gen_a82cae import humaneval_datasets
from ..mbpp.mbpp_gen_1e1056 import mbpp_datasets
from ..mbpp.deprecated_mbpp_gen_1e1056 import mbpp_datasets
from ..CLUE_C3.CLUE_C3_ppl_e24a31 import C3_datasets
from ..CLUE_CMRC.CLUE_CMRC_gen_1bd3c8 import CMRC_datasets
from ..CLUE_DRCD.CLUE_DRCD_gen_1bd3c8 import DRCD_datasets
Expand Down
2 changes: 1 addition & 1 deletion configs/datasets/collections/base_small.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from ..FewCLUE_cluewsc.FewCLUE_cluewsc_ppl_868415 import cluewsc_datasets
from ..FewCLUE_eprstmt.FewCLUE_eprstmt_ppl_f1e631 import eprstmt_datasets
from ..humaneval.humaneval_gen_8e312c import humaneval_datasets
from ..mbpp.mbpp_gen_1e1056 import mbpp_datasets
from ..mbpp.deprecated_mbpp_gen_1e1056 import mbpp_datasets
from ..lambada.lambada_gen_217e11 import lambada_datasets
from ..storycloze.storycloze_ppl_496661 import storycloze_datasets
from ..SuperGLUE_AX_b.SuperGLUE_AX_b_ppl_6db806 import AX_b_datasets
Expand Down
2 changes: 1 addition & 1 deletion configs/datasets/collections/chat_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,6 @@
from ..math.math_evaluatorv2_gen_cecb31 import math_datasets
from ..TheoremQA.TheoremQA_post_v2_gen_ef26ca import TheoremQA_datasets
from ..humaneval.humaneval_gen_8e312c import humaneval_datasets
from ..mbpp.sanitized_mbpp_gen_1e1056 import sanitized_mbpp_datasets
from ..mbpp.deprecated_sanitized_mbpp_gen_1e1056 import sanitized_mbpp_datasets

datasets = sum((v for k, v in locals().items() if k.endswith("_datasets")), [])
2 changes: 1 addition & 1 deletion configs/datasets/collections/chat_medium.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from ..GaokaoBench.GaokaoBench_gen_5cfe9e import GaokaoBench_datasets
from ..bbh.bbh_gen_5b92b0 import bbh_datasets
from ..humaneval.humaneval_gen_8e312c import humaneval_datasets
from ..mbpp.mbpp_gen_1e1056 import mbpp_datasets
from ..mbpp.deprecated_mbpp_gen_1e1056 import mbpp_datasets
from ..CLUE_C3.CLUE_C3_gen_8c358f import C3_datasets
from ..CLUE_CMRC.CLUE_CMRC_gen_1bd3c8 import CMRC_datasets
from ..CLUE_DRCD.CLUE_DRCD_gen_1bd3c8 import DRCD_datasets
Expand Down
2 changes: 1 addition & 1 deletion configs/datasets/collections/chat_small.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
from ..FewCLUE_cluewsc.FewCLUE_cluewsc_gen_c68933 import cluewsc_datasets
from ..FewCLUE_eprstmt.FewCLUE_eprstmt_gen_740ea0 import eprstmt_datasets
from ..humaneval.humaneval_gen_8e312c import humaneval_datasets
from ..mbpp.mbpp_gen_1e1056 import mbpp_datasets
from ..mbpp.deprecated_mbpp_gen_1e1056 import mbpp_datasets
from ..lambada.lambada_gen_217e11 import lambada_datasets
from ..storycloze.storycloze_gen_7f656a import storycloze_datasets
from ..SuperGLUE_AX_b.SuperGLUE_AX_b_gen_4dfefa import AX_b_datasets
Expand Down
2 changes: 1 addition & 1 deletion configs/datasets/collections/leaderboard/qwen.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@
from ...gsm8k.gsm8k_gen_1d7fe4 import gsm8k_datasets
from ...drop.drop_gen_8a9ed9 import drop_datasets
from ...humaneval.humaneval_gen_a82cae import humaneval_datasets
from ...mbpp.mbpp_gen_1e1056 import mbpp_datasets
from ...mbpp.deprecated_mbpp_gen_1e1056 import mbpp_datasets
from ...bbh.bbh_gen_5bf00b import bbh_datasets


Expand Down
2 changes: 1 addition & 1 deletion configs/datasets/collections/leaderboard/qwen_chat.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@
from ...gsm8k.gsm8k_gen_1d7fe4 import gsm8k_datasets
from ...drop.drop_gen_8a9ed9 import drop_datasets
from ...humaneval.humaneval_gen_a82cae import humaneval_datasets
from ...mbpp.mbpp_gen_1e1056 import mbpp_datasets
from ...mbpp.deprecated_mbpp_gen_1e1056 import mbpp_datasets
from ...bbh.bbh_gen_5b92b0 import bbh_datasets


Expand Down
46 changes: 46 additions & 0 deletions configs/datasets/gpqa/gpqa_gen_015262.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.datasets import GPQADataset, GPQAEvaluator
from opencompass.utils import first_option_postprocess

gpqa_reader_cfg = dict(
input_columns=['question', 'A', 'B', 'C', 'D'],
output_column='answer')

gpqa_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(
round=[
dict(role='HUMAN', prompt='What is the correct answer to this question: {question}\nChoices:\n'
'(A){A}\n'
'(B){B}\n'
'(C){C}\n'
'(D){D}\n'
'Format your response as follows: "The correct answer is (insert answer here)"'),
], )),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer))

gpqa_eval_cfg = dict(evaluator=dict(type=GPQAEvaluator),
pred_postprocessor=dict(type=first_option_postprocess, options='ABCD'))

gpqa_datasets = []
gpqa_subsets = {
'extended': 'gpqa_extended.csv',
'main': 'gpqa_main.csv',
'diamond': 'gpqa_diamond.csv'
}

for split in list(gpqa_subsets.keys()):
gpqa_datasets.append(
dict(
abbr='GPQA_' + split,
type=GPQADataset,
path='./data/gpqa/',
name=gpqa_subsets[split],
reader_cfg=gpqa_reader_cfg,
infer_cfg=gpqa_infer_cfg,
eval_cfg=gpqa_eval_cfg)
)
File renamed without changes.
File renamed without changes.
File renamed without changes.
2 changes: 1 addition & 1 deletion configs/datasets/mbpp/mbpp_gen.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from mmengine.config import read_base

with read_base():
from .mbpp_gen_1e1056 import mbpp_datasets # noqa: F401, F403
from .mbpp_gen_830460 import mbpp_datasets # noqa: F401, F403
46 changes: 0 additions & 46 deletions configs/datasets/mbpp/mbpp_gen_5d6316.py

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,13 @@
type=PromptTemplate,
template=dict(
round=[
dict(role="HUMAN", prompt="You are an expert Python programmer, and here is your task: Write a function to find the similar elements from the given two tuple lists. Your code should pass these tests:\n\n assert similar_elements((3, 4, 5, 6),(5, 7, 4, 10)) == (4, 5)\n assert similar_elements((1, 2, 3, 4),(5, 4, 3, 7)) == (3, 4) \n assert similar_elements((11, 12, 14, 13),(17, 15, 14, 13)) == (13, 14) \n"),
dict(role="HUMAN", prompt="You are an expert Python programmer, and here is your task: Write a function to find the similar elements from the given two tuple lists. Your code should pass these tests:\n\n assert similar_elements((3, 4, 5, 6),(5, 7, 4, 10)) == (4, 5)\nassert similar_elements((1, 2, 3, 4),(5, 4, 3, 7)) == (3, 4) \nassert similar_elements((11, 12, 14, 13),(17, 15, 14, 13)) == (13, 14) \n"),
dict(role="BOT", prompt="[BEGIN]\n 'def similar_elements(test_tup1, test_tup2):\r\n res = tuple(set(test_tup1) & set(test_tup2))\r\n return (res)' \n[DONE] \n\n "),

dict(role="HUMAN", prompt="You are an expert Python programmer, and here is your task: Write a python function to identify non-prime numbers. Your code should pass these tests:\n\n assert is_not_prime(2) == False \n assert is_not_prime(10) == True \n assert is_not_prime(35) == True \n"),
dict(role="HUMAN", prompt="You are an expert Python programmer, and here is your task: Write a python function to identify non-prime numbers. Your code should pass these tests:\n\n assert is_not_prime(2) == False \nassert is_not_prime(10) == True \nassert is_not_prime(35) == True \n"),
dict(role="BOT", prompt="[BEGIN]\n 'import math\r\ndef is_not_prime(n):\r\n result = False\r\n for i in range(2,int(math.sqrt(n)) + 1):\r\n if n % i == 0:\r\n result = True\r\n return result' \n[DONE] \n\n "),

dict(role="HUMAN", prompt="You are an expert Python programmer, and here is your task: Write a function to find the largest integers from a given list of numbers using heap queue algorithm. Your code should pass these tests:\n\n assert heap_queue_largest( [25, 35, 22, 85, 14, 65, 75, 22, 58],3)==[85, 75, 65] \n assert heap_queue_largest( [25, 35, 22, 85, 14, 65, 75, 22, 58],2)==[85, 75] \n assert heap_queue_largest( [25, 35, 22, 85, 14, 65, 75, 22, 58],5)==[85, 75, 65, 58, 35] \n"),
dict(role="HUMAN", prompt="You are an expert Python programmer, and here is your task: Write a function to find the largest integers from a given list of numbers using heap queue algorithm. Your code should pass these tests:\n\n assert heap_queue_largest( [25, 35, 22, 85, 14, 65, 75, 22, 58],3)==[85, 75, 65] \nassert heap_queue_largest( [25, 35, 22, 85, 14, 65, 75, 22, 58],2)==[85, 75] \nassert heap_queue_largest( [25, 35, 22, 85, 14, 65, 75, 22, 58],5)==[85, 75, 65, 58, 35] \n"),
dict(role="BOT", prompt="[BEGIN]\n 'import heapq as hq\r\ndef heap_queue_largest(nums,n):\r\n largest_nums = hq.nlargest(n, nums)\r\n return largest_nums' \n[DONE] \n\n "),

dict(role="HUMAN", prompt="You are an expert Python programmer, and here is your task: {text} Your code should pass these tests:\n\n {test_list} \n"),
Expand All @@ -25,7 +25,7 @@
),
),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer),
inferencer=dict(type=GenInferencer, max_out_len=512),
)

mbpp_eval_cfg = dict(evaluator=dict(type=MBPPEvaluator), pred_role="BOT")
Expand Down
42 changes: 42 additions & 0 deletions configs/datasets/mbpp/mbpp_passk_gen_830460.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.datasets import MBPPDataset_V2, MBPPPassKEvaluator

mbpp_reader_cfg = dict(input_columns=["text", "test_list"], output_column="test_column")

mbpp_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(
round=[
dict(role="HUMAN", prompt="You are an expert Python programmer, and here is your task: Write a function to find the similar elements from the given two tuple lists. Your code should pass these tests:\n\n assert similar_elements((3, 4, 5, 6),(5, 7, 4, 10)) == (4, 5)\nassert similar_elements((1, 2, 3, 4),(5, 4, 3, 7)) == (3, 4) \nassert similar_elements((11, 12, 14, 13),(17, 15, 14, 13)) == (13, 14) \n"),
dict(role="BOT", prompt="[BEGIN]\n 'def similar_elements(test_tup1, test_tup2):\r\n res = tuple(set(test_tup1) & set(test_tup2))\r\n return (res)' \n[DONE] \n\n "),

dict(role="HUMAN", prompt="You are an expert Python programmer, and here is your task: Write a python function to identify non-prime numbers. Your code should pass these tests:\n\n assert is_not_prime(2) == False \nassert is_not_prime(10) == True \nassert is_not_prime(35) == True \n"),
dict(role="BOT", prompt="[BEGIN]\n 'import math\r\ndef is_not_prime(n):\r\n result = False\r\n for i in range(2,int(math.sqrt(n)) + 1):\r\n if n % i == 0:\r\n result = True\r\n return result' \n[DONE] \n\n "),

dict(role="HUMAN", prompt="You are an expert Python programmer, and here is your task: Write a function to find the largest integers from a given list of numbers using heap queue algorithm. Your code should pass these tests:\n\n assert heap_queue_largest( [25, 35, 22, 85, 14, 65, 75, 22, 58],3)==[85, 75, 65] \nassert heap_queue_largest( [25, 35, 22, 85, 14, 65, 75, 22, 58],2)==[85, 75] \nassert heap_queue_largest( [25, 35, 22, 85, 14, 65, 75, 22, 58],5)==[85, 75, 65, 58, 35] \n"),
dict(role="BOT", prompt="[BEGIN]\n 'import heapq as hq\r\ndef heap_queue_largest(nums,n):\r\n largest_nums = hq.nlargest(n, nums)\r\n return largest_nums' \n[DONE] \n\n "),

dict(role="HUMAN", prompt="You are an expert Python programmer, and here is your task: {text} Your code should pass these tests:\n\n {test_list} \n"),
dict(role="BOT", prompt="[BEGIN]\n"),
],
),
),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer, max_out_len=512),
)

mbpp_eval_cfg = dict(evaluator=dict(type=MBPPPassKEvaluator), pred_role="BOT")

mbpp_datasets = [
dict(
type=MBPPDataset_V2,
abbr="mbpp_passk",
path="./data/mbpp/mbpp.jsonl",
reader_cfg=mbpp_reader_cfg,
infer_cfg=mbpp_infer_cfg,
eval_cfg=mbpp_eval_cfg,
)
]
45 changes: 45 additions & 0 deletions configs/datasets/mbpp/mbpp_repeat10_gen_830460.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
# This config is used for pass@k evaluation with dataset repetition
# That model cannot generate multiple response for single input
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.datasets import MBPPDataset_V2, MBPPPassKEvaluator

mbpp_reader_cfg = dict(input_columns=["text", "test_list"], output_column="test_column")

mbpp_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(
round=[
dict(role="HUMAN", prompt="You are an expert Python programmer, and here is your task: Write a function to find the similar elements from the given two tuple lists. Your code should pass these tests:\n\n assert similar_elements((3, 4, 5, 6),(5, 7, 4, 10)) == (4, 5)\nassert similar_elements((1, 2, 3, 4),(5, 4, 3, 7)) == (3, 4) \nassert similar_elements((11, 12, 14, 13),(17, 15, 14, 13)) == (13, 14) \n"),
dict(role="BOT", prompt="[BEGIN]\n 'def similar_elements(test_tup1, test_tup2):\r\n res = tuple(set(test_tup1) & set(test_tup2))\r\n return (res)' \n[DONE] \n\n "),

dict(role="HUMAN", prompt="You are an expert Python programmer, and here is your task: Write a python function to identify non-prime numbers. Your code should pass these tests:\n\n assert is_not_prime(2) == False \nassert is_not_prime(10) == True \nassert is_not_prime(35) == True \n"),
dict(role="BOT", prompt="[BEGIN]\n 'import math\r\ndef is_not_prime(n):\r\n result = False\r\n for i in range(2,int(math.sqrt(n)) + 1):\r\n if n % i == 0:\r\n result = True\r\n return result' \n[DONE] \n\n "),

dict(role="HUMAN", prompt="You are an expert Python programmer, and here is your task: Write a function to find the largest integers from a given list of numbers using heap queue algorithm. Your code should pass these tests:\n\n assert heap_queue_largest( [25, 35, 22, 85, 14, 65, 75, 22, 58],3)==[85, 75, 65] \nassert heap_queue_largest( [25, 35, 22, 85, 14, 65, 75, 22, 58],2)==[85, 75] \nassert heap_queue_largest( [25, 35, 22, 85, 14, 65, 75, 22, 58],5)==[85, 75, 65, 58, 35] \n"),
dict(role="BOT", prompt="[BEGIN]\n 'import heapq as hq\r\ndef heap_queue_largest(nums,n):\r\n largest_nums = hq.nlargest(n, nums)\r\n return largest_nums' \n[DONE] \n\n "),

dict(role="HUMAN", prompt="You are an expert Python programmer, and here is your task: {text} Your code should pass these tests:\n\n {test_list} \n"),
dict(role="BOT", prompt="[BEGIN]\n"),
],
),
),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer, max_out_len=512),
)

mbpp_eval_cfg = dict(evaluator=dict(type=MBPPPassKEvaluator), pred_role="BOT")

mbpp_datasets = [
dict(
type=MBPPDataset_V2,
abbr="mbpp_repeat10",
path="./data/mbpp/mbpp.jsonl",
num_repeats=10,
reader_cfg=mbpp_reader_cfg,
infer_cfg=mbpp_infer_cfg,
eval_cfg=mbpp_eval_cfg,
)
]
Loading

0 comments on commit 8c85edd

Please sign in to comment.