Skip to content

Commit

Permalink
Bump sft-opencompass-v0.5.2
Browse files Browse the repository at this point in the history
  • Loading branch information
BIGWangYuDong authored and 周丰哲 committed Dec 14, 2023
1 parent 42c7d51 commit a793df5
Show file tree
Hide file tree
Showing 26 changed files with 630 additions and 48 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@
# If the IP address is not accessible,
# follow the instructions below to launch a code evaluate service.
# https://aicarrier.feishu.cn/docx/JpLAdWNh9oGC1fxH9Z9cTobLntb
"http://10.140.60.1", # T cluster, http://10.140.0.133 for S cluster
"http://10.140.60.10", # T cluster, http://10.140.0.133 for S cluster
# INTERNAL_END
port=5000
),
Expand Down
2 changes: 1 addition & 1 deletion configs/datasets/ds1000/ds1000_service_eval_gen_cbc84f.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@
# If the IP address is not accessible,
# follow the instructions below to launch a code evaluate service.
# https://aicarrier.feishu.cn/docx/JpLAdWNh9oGC1fxH9Z9cTobLntb
"http://10.140.60.1", # T cluster, http://10.140.0.133 for S cluster
"http://10.140.60.10", # T cluster, http://10.140.0.133 for S cluster
# INTERNAL_END
port=5000
),
Expand Down
36 changes: 36 additions & 0 deletions configs/datasets/humaneval/humaneval_gen_6d1cc2.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.datasets import HumanevalDataset, HumanEvaluator, humaneval_postprocess

humaneval_reader_cfg = dict(
input_columns=['prompt'], output_column='task_id', train_split='test')

# TODO: allow empty output-column
humaneval_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(round=[
dict(
role='HUMAN',
prompt='Below is an instruction that describes a task. Write a response that appropriately completes the request.\n\n### Instruction:\nComplete the following python function.:\n{prompt}\n\n### Response:\n'),
])),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer, max_out_len=512))

humaneval_eval_cfg = dict(
evaluator=dict(type=HumanEvaluator),
pred_role='BOT',
k=[1, 10, 100], # the parameter only for humaneval
pred_postprocessor=dict(type=humaneval_postprocess),
)

humaneval_datasets = [
dict(
abbr='openai_humaneval',
type=HumanevalDataset,
path='./data/humaneval/human-eval-v2-20210705.jsonl',
reader_cfg=humaneval_reader_cfg,
infer_cfg=humaneval_infer_cfg,
eval_cfg=humaneval_eval_cfg)
]
65 changes: 65 additions & 0 deletions configs/datasets/mbpp/mbpp_gen_caa7ab.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.datasets import MBPPDataset, MBPPEvaluator

mbpp_reader_cfg = dict(
input_columns=['text', 'test_list'], output_column='test_list_2')

mbpp_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(
round=[
dict(
role="HUMAN",
prompt=
"You are an expert Python programmer, and here is your task: Write a function to find the similar elements from the given two tuple lists. Your code should pass these tests:\n\n assert similar_elements((3, 4, 5, 6),(5, 7, 4, 10)) == (4, 5)\n assert similar_elements((1, 2, 3, 4),(5, 4, 3, 7)) == (3, 4) \n assert similar_elements((11, 12, 14, 13),(17, 15, 14, 13)) == (13, 14) \n\nYour code should start with a [BEGIN] tag and end with a [DONE] tag.\n"
),
dict(
role="BOT",
prompt=
"[BEGIN]\ndef similar_elements(test_tup1, test_tup2):\r\n res = tuple(set(test_tup1) & set(test_tup2))\r\n return (res)\n[DONE] \n\n "
),
dict(
role="HUMAN",
prompt=
"You are an expert Python programmer, and here is your task: Write a python function to identify non-prime numbers. Your code should pass these tests:\n\n assert is_not_prime(2) == False \n assert is_not_prime(10) == True \n assert is_not_prime(35) == True \n\nYour code should start with a [BEGIN] tag and end with a [DONE] tag.\n"
),
dict(
role="BOT",
prompt=
"[BEGIN]\nimport math\r\ndef is_not_prime(n):\r\n result = False\r\n for i in range(2,int(math.sqrt(n)) + 1):\r\n if n % i == 0:\r\n result = True\r\n return result\n[DONE] \n\n "
),
dict(
role="HUMAN",
prompt=
"You are an expert Python programmer, and here is your task: Write a function to find the largest integers from a given list of numbers using heap queue algorithm. Your code should pass these tests:\n\n assert heap_queue_largest( [25, 35, 22, 85, 14, 65, 75, 22, 58],3)==[85, 75, 65] \n assert heap_queue_largest( [25, 35, 22, 85, 14, 65, 75, 22, 58],2)==[85, 75] \n assert heap_queue_largest( [25, 35, 22, 85, 14, 65, 75, 22, 58],5)==[85, 75, 65, 58, 35] \n\nYour code should start with a [BEGIN] tag and end with a [DONE] tag.\n"
),
dict(
role="BOT",
prompt=
"[BEGIN]\nimport heapq as hq\r\ndef heap_queue_largest(nums,n):\r\n largest_nums = hq.nlargest(n, nums)\r\n return largest_nums\n[DONE] \n\n "
),
dict(
role="HUMAN",
prompt=
"You are an expert Python programmer, and here is your task: {text} Your code should pass these tests:\n\n {test_list} \n\nYour code should start with a [BEGIN] tag and end with a [DONE] tag.\n"
),
dict(role="BOT", prompt="[BEGIN]\n"),

], )),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer, max_out_len=512))

mbpp_eval_cfg = dict(evaluator=dict(type=MBPPEvaluator), pred_role="BOT")

mbpp_datasets = [
dict(
type=MBPPDataset,
abbr='mbpp',
path='./data/mbpp/mbpp.jsonl',
reader_cfg=mbpp_reader_cfg,
infer_cfg=mbpp_infer_cfg,
eval_cfg=mbpp_eval_cfg)
]
12 changes: 7 additions & 5 deletions configs/sft_cfg/123B_eval/eval_internlm-chat_123b.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@
import os.path as osp

with read_base():
from ..dataset_collections.medium_chat_sft_v051 import datasets
from ..summarizers.medium_chat_sft_v051 import summarizer
from ..dataset_collections.medium_chat_sft_v052 import datasets
from ..summarizers.medium_chat_sft_v052 import summarizer
from ..clusters.slurm_llmit2 import infer, eval
from ..lark import lark_bot_url

Expand All @@ -25,15 +25,17 @@
model_type='LLAMA',
tokenizer_path='/mnt/petrelfs/llmit/tokenizers/llamav4.model',
tokenizer_type='v4',
module_path="/mnt/petrelfs/llmit/code/opencompass_internal/deliver_1101/train_internlm_deliver_1101",
model_config="/mnt/petrelfs/llmit/code/opencompass_internal/deliver_1101/train_internlm_deliver_1101/configs/plato_123b_8k_sft.py",
module_path="/mnt/petrelfs/llmit/code/opencompass_internal/sft_opencompass_v052/train_internlm",
model_config="/mnt/petrelfs/llmit/code/opencompass_internal/sft_opencompass_v052/train_internlm/configs/plato_123b_8k_sft.py",
# if got w2w3 miss match error, set w2w3_bug=True
w2w3_bug=False,
meta_template=without_meta_template,
max_out_len=100,
# If want to use the full length of the model, set max_seq_len=8192, otherwise can set max_seq_len=2048.
max_seq_len=8192,
batch_size=8,
batch_size=8,
# using bf16 may decrease the performance, force set to fp16
model_dtype='torch.float16',
run_cfg=dict(
num_gpus=8,
num_procs=8))
Expand Down
4 changes: 3 additions & 1 deletion configs/sft_cfg/1B_eval/eval_internlm-chat_1b_code-only.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,13 @@
model_type='LLAMA',
tokenizer_path='/mnt/petrelfs/share_data/yanhang/tokenizes/llama.model',
tokenizer_type='llama',
module_path="/mnt/petrelfs/llmit/code/opencompass_internal/deliver_1101/train_internlm_deliver_1101",
module_path="/mnt/petrelfs/llmit/code/opencompass_internal/sft_opencompass_v052/train_internlm",
model_config="/mnt/petrelfs/wangyudong/opencompass/configs/sft_cfg/1B_eval/1B_model_config.py",
max_out_len=100,
max_seq_len=2048,
batch_size=8,
# using bf16 may decrease the performance, force set to fp16
model_dtype='torch.float16',
run_cfg=dict(
num_gpus=1,
num_procs=1))
Expand Down
12 changes: 7 additions & 5 deletions configs/sft_cfg/20B_eval/eval_internlm-chat_20b.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@
import os.path as osp

with read_base():
from ..dataset_collections.medium_chat_sft_v051 import datasets
from ..summarizers.medium_chat_sft_v051 import summarizer
from ..dataset_collections.medium_chat_sft_v052 import datasets
from ..summarizers.medium_chat_sft_v052 import summarizer
from ..clusters.slurm_llmit2 import infer, eval
from ..lark import lark_bot_url

Expand All @@ -24,13 +24,15 @@
model_type='LLAMA',
tokenizer_path='/mnt/petrelfs/llmit/tokenizers/V7.model',
tokenizer_type='v7',
module_path="/mnt/petrelfs/llmit/code/opencompass_internal/deliver_1101/train_internlm_deliver_1101",
model_config="/mnt/petrelfs/llmit/code/opencompass_internal/deliver_1101/train_internlm_deliver_1101/configs/newton_20b_8k_sft.py",
module_path="/mnt/petrelfs/llmit/code/opencompass_internal/sft_opencompass_v052/train_internlm",
model_config="/mnt/petrelfs/llmit/code/opencompass_internal/sft_opencompass_v052/train_internlm/configs/newton_20b_8k_sft.py",
meta_template=without_meta_template,
max_out_len=100,
# If want to use the full length of the model, set max_seq_len=8192, otherwise can set max_seq_len=2048.
max_seq_len=8192,
batch_size=8,
batch_size=8,
# using bf16 may decrease the performance, force set to fp16
model_dtype='torch.float16',
run_cfg=dict(
num_gpus=4,
num_procs=4))
Expand Down
11 changes: 6 additions & 5 deletions configs/sft_cfg/70B_eval/eval_internlm-chat_70b.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@
import os.path as osp

with read_base():
from ..dataset_collections.medium_chat_sft_v051 import datasets
from ..summarizers.medium_chat_sft_v051 import summarizer
from ..dataset_collections.medium_chat_sft_v052 import datasets
from ..summarizers.medium_chat_sft_v052 import summarizer
from ..clusters.slurm_llmit2 import infer, eval
from ..lark import lark_bot_url

Expand All @@ -25,14 +25,15 @@
model_type="LLAMA",
tokenizer_path='/mnt/petrelfs/llmit/tokenizers/llamav4.model',
tokenizer_type='v4',
# TODO: support relative path in train_internlm
module_path="/mnt/petrelfs/llmit/code/opencompass_internal/deliver_1101/train_internlm_deliver_1101",
model_config="/mnt/petrelfs/llmit/code/opencompass_internal/deliver_1101/train_internlm_deliver_1101/configs/euclid_70b_v2_0_sft.py",
module_path="/mnt/petrelfs/llmit/code/opencompass_internal/sft_opencompass_v052/train_internlm",
model_config="/mnt/petrelfs/llmit/code/opencompass_internal/sft_opencompass_v052/train_internlm/configs/euclid_70b_v2_0_sft.py",
meta_template=without_meta_template,
max_out_len=100,
# If want to use the full length of the model, set max_seq_len=8192, otherwise can set max_seq_len=2048.
max_seq_len=8192,
batch_size=8,
# using bf16 may decrease the performance, force set to fp16
model_dtype='torch.float16',
run_cfg=dict(num_gpus=4, num_procs=4),
)

Expand Down
56 changes: 56 additions & 0 deletions configs/sft_cfg/7B_chatml_eval/eval_internlm-chat_7b_chatml.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
from mmengine.config import read_base
from opencompass.models.internal import InternLMwithModule
from copy import deepcopy
import os.path as osp

with read_base():
from ..dataset_collections.medium_chat_sft_v052 import datasets
from ..summarizers.medium_chat_sft_v052 import summarizer
from ..clusters.slurm_llmit2 import infer, eval
from ..lark import lark_bot_url

without_meta_template = dict(
begin="""""",
round=[
dict(role='HUMAN', begin='<TOKENS_UNUSED_140>user\n', end='<TOKENS_UNUSED_139>\n'),
dict(role='BOT', begin='<TOKENS_UNUSED_140>assistant\n', end='<TOKENS_UNUSED_139>\n', generate=True),
],
eos_token_id=103166)

base_dict = dict(
abbr=None,
path=None,
type=InternLMwithModule,
model_type='INTERNLM',
tokenizer_path='/mnt/petrelfs/llmit/tokenizers/V7.model',
tokenizer_type='v7',
# TODO: add model config in the shared path
module_path="/mnt/petrelfs/llmit/code/opencompass_internal/sft_opencompass_v052/train_internlm",
model_config="/mnt/petrelfs/llmit/code/opencompass_internal/sft_opencompass_v052/train_internlm/configs/internlm_7b_16k_sft.py",
meta_template=without_meta_template,
max_out_len=100,
# If want to use the full length of the model, set max_seq_len=8192, otherwise can set max_seq_len=2048.
max_seq_len=8192,
batch_size=8,
# using bf16 may decrease the performance, force set to fp16
model_dtype='torch.float16',
run_cfg=dict(
num_gpus=1,
num_procs=1))

models_path = [
'/mnt/petrelfs/share_data/wangyudong/ckpt/20231206/v0.16_dev2_16k/sft_7b_16k_0.16dev2_rc5/10270',
]

models = []

for model_path in models_path:
tmp_model_dict = deepcopy(base_dict)
if model_path.endswith('/'):
model_path = model_path[:-1]
abbr = osp.split(osp.split(model_path)[0])[-1]
tmp_model_dict['abbr'] = abbr
tmp_model_dict['path'] = model_path
models.append(tmp_model_dict)

del models_path, model_path, tmp_model_dict, abbr, base_dict
10 changes: 6 additions & 4 deletions configs/sft_cfg/7B_eval/eval_internlm-chat_7b.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@
import os.path as osp

with read_base():
from ..dataset_collections.medium_chat_sft_v051 import datasets
from ..summarizers.medium_chat_sft_v051 import summarizer
from ..dataset_collections.medium_chat_sft_v052 import datasets
from ..summarizers.medium_chat_sft_v052 import summarizer
from ..clusters.slurm_llmit2 import infer, eval
from ..lark import lark_bot_url

Expand All @@ -24,13 +24,15 @@
model_type='INTERNLM',
tokenizer_path='/mnt/petrelfs/llmit/tokenizers/V7.model',
tokenizer_type='v7',
module_path="/mnt/petrelfs/llmit/code/opencompass_internal/deliver_1101/train_internlm_deliver_1101",
model_config="/mnt/petrelfs/llmit/code/opencompass_internal/deliver_1101/train_internlm_deliver_1101/configs/internlm_7b_8k_sft.py",
module_path="/mnt/petrelfs/llmit/code/opencompass_internal/sft_opencompass_v052/train_internlm",
model_config="/mnt/petrelfs/llmit/code/opencompass_internal/sft_opencompass_v052/train_internlm/configs/internlm_7b_8k_sft.py",
meta_template=without_meta_template,
max_out_len=100,
# If want to use the full length of the model, set max_seq_len=8192, otherwise can set max_seq_len=2048.
max_seq_len=8192,
batch_size=8,
# using bf16 may decrease the performance, force set to fp16
model_dtype='torch.float16',
run_cfg=dict(
num_gpus=1,
num_procs=1))
Expand Down
6 changes: 4 additions & 2 deletions configs/sft_cfg/7B_eval/eval_internlm-chat_7b_aliyun.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@
import os.path as osp

with read_base():
from ..dataset_collections.medium_chat_sft_v051 import datasets
from ..summarizers.medium_chat_sft_v051 import summarizer
from ..dataset_collections.medium_chat_sft_v052 import datasets
from ..summarizers.medium_chat_sft_v052 import summarizer
from ..clusters.aliyun_llm import infer, eval
from ..lark import lark_bot_url

Expand All @@ -31,6 +31,8 @@
# If want to use the full length of the model, set max_seq_len=8192, otherwise can set max_seq_len=2048.
max_seq_len=8192,
batch_size=8,
# using bf16 may decrease the performance, force set to fp16
model_dtype='torch.float16',
run_cfg=dict(
num_gpus=1,
num_procs=1))
Expand Down
8 changes: 4 additions & 4 deletions configs/sft_cfg/7B_eval/eval_internlm-chat_7b_llmv2.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@
import os.path as osp

with read_base():
from ..dataset_collections.medium_chat_sft_v051 import datasets
from ..summarizers.medium_chat_sft_v051 import summarizer
from ..dataset_collections.medium_chat_sft_v052 import datasets
from ..summarizers.medium_chat_sft_v052 import summarizer
from ..clusters.slurm_llmit2 import infer, eval
from ..lark import lark_bot_url

Expand All @@ -24,7 +24,7 @@
model_type='origin',
tokenizer_path='/mnt/petrelfs/llmit/tokenizers/V7.model',
tokenizer_type='v7',
module_path="/mnt/petrelfs/llmit/code/opencompass_internal/deliver_1101/train_internlm_deliver_1101",
module_path="/mnt/petrelfs/llmit/code/opencompass_internal/sft_opencompass_v052/train_internlm",
meta_template=without_meta_template,
max_out_len=100,
# If want to use the full length of the model, set max_seq_len=8192, otherwise can set max_seq_len=2048.
Expand All @@ -35,7 +35,7 @@
num_procs=1))

models_path = [
'/mnt/petrelfs/llmit/ckpt/7b-8k/sft_7b_v0_11/4930',
'/mnt/petrelfs/llmit/ckpt/maibao_kaoshi_7_5_ST_8k_v0213rc8/5260',
]

models = []
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,13 +35,15 @@
model_type='INTERNLM',
tokenizer_path='/mnt/petrelfs/llmit/tokenizers/V7.model',
tokenizer_type='v7',
module_path="/mnt/petrelfs/llmit/code/opencompass_internal/deliver_1101/train_internlm_deliver_1101",
model_config="/mnt/petrelfs/llmit/code/opencompass_internal/deliver_1101/train_internlm_deliver_1101/configs/internlm_7b_8k_sft.py",
module_path="/mnt/petrelfs/llmit/code/opencompass_internal/sft_opencompass_v052/train_internlm",
model_config="/mnt/petrelfs/llmit/code/opencompass_internal/sft_opencompass_v052/train_internlm/configs/internlm_7b_8k_sft.py",
meta_template=meta_template,
max_out_len=100,
# If want to use the full length of the model, set max_seq_len=8192, otherwise can set max_seq_len=2048.
max_seq_len=8192,
batch_size=8,
# using bf16 may decrease the performance, force set to fp16
model_dtype='torch.float16',
run_cfg=dict(
num_gpus=1,
num_procs=1))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,13 +32,15 @@
model_type='INTERNLM',
tokenizer_path='/mnt/petrelfs/llmit/tokenizers/V7.model',
tokenizer_type='v7',
module_path="/mnt/petrelfs/llmit/code/opencompass_internal/deliver_1101/train_internlm_deliver_1101",
model_config="/mnt/petrelfs/llmit/code/opencompass_internal/deliver_1101/train_internlm_deliver_1101/configs/internlm_7b_8k_sft.py",
module_path="/mnt/petrelfs/llmit/code/opencompass_internal/sft_opencompass_v052/train_internlm",
model_config="/mnt/petrelfs/llmit/code/opencompass_internal/sft_opencompass_v052/train_internlm/configs/internlm_7b_8k_sft.py",
meta_template=meta_template,
max_out_len=100,
# If want to use the full length of the model, set max_seq_len=8192, otherwise can set max_seq_len=2048.
max_seq_len=8192,
batch_size=8,
# using bf16 may decrease the performance, force set to fp16
model_dtype='torch.float16',
run_cfg=dict(
num_gpus=1,
num_procs=1))
Expand Down
Loading

0 comments on commit a793df5

Please sign in to comment.