forked from open-compass/opencompass
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Update NeedleInAHaystack Docs (open-compass#1102)
* update NeedleInAHaystack Test Docs * update docs
- Loading branch information
1 parent
6ef2a49
commit 8327acd
Showing
3 changed files
with
173 additions
and
582 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,78 +1,26 @@ | ||
from opencompass.models import HuggingFaceCausalLM | ||
from opencompass.models.turbomind import TurboMindModel | ||
from opencompass.runners import SlurmSequentialRunner | ||
from opencompass.partitioners import SizePartitioner, NaivePartitioner | ||
from opencompass.tasks import OpenICLInferTask, OpenICLEvalTask | ||
|
||
from mmengine.config import read_base | ||
with read_base(): | ||
# eval needlebench_4k | ||
from .datasets.needlebench.needlebench_4k.needlebench import needlebench_datasets | ||
from .summarizers.needlebench import needlebench_4k_summarizer as summarizer | ||
from .models.hf_internlm.lmdeploy_internlm2_chat_7b import models as internlm2_chat_7b_200k | ||
from .models.hf_internlm.hf_internlm2_chat_7b import models as internlm2_chat_7b | ||
|
||
# only eval original "needle in a haystack test" in needlebench_4k | ||
# from .datasets.needlebench.needlebench_4k.needlebench_single import needlebench_datasets_zh, needlebench_datasets_en | ||
# Evaluate needlebench_4k, adjust the configuration to use 8k, 32k, 128k, 200k, or 1000k if necessary. | ||
# from .datasets.needlebench.needlebench_4k.needlebench_4k import needlebench_datasets | ||
# from .summarizers.needlebench import needlebench_4k_summarizer as summarizer | ||
|
||
# only eval original "needle in a haystack test" in needlebench_4k | ||
from .datasets.needlebench.needlebench_4k.needlebench_single_4k import needlebench_zh_datasets, needlebench_en_datasets | ||
from .summarizers.needlebench import needlebench_4k_summarizer as summarizer | ||
|
||
# eval Ancestral Tracing Challenge(ATC) | ||
# from .datasets.needlebench.atc.atc import needlebench_atc_datasets_zh, needlebench_atc_datasets_en | ||
# from .summarizers.needlebench import needlebench_atc_summarizer as summarizer | ||
# from .datasets.needlebench.atc.atc_choice_50 import needlebench_datasets | ||
# from .summarizers.needlebench import atc_summarizer_50 as summarizer | ||
|
||
datasets = sum([v for k, v in locals().items() if ('datasets' in k)], []) | ||
|
||
hf_internlm2_chat_7b_model_meta_template = dict( | ||
round=[ | ||
dict(role='HUMAN', | ||
begin='<|im_start|>user\n', end='<|im_end|>\n'), | ||
dict(role='BOT', begin='<|im_start|>assistant\n', | ||
end='<|im_end|>\n', generate=True), | ||
], | ||
) | ||
hf_internlm2_chat_7b = dict( | ||
type=HuggingFaceCausalLM, | ||
abbr='internlm2-chat-7b-hf', | ||
path="internlm/internlm2-chat-7b", | ||
tokenizer_path='internlm/internlm2-chat-7b', | ||
model_kwargs=dict( | ||
trust_remote_code=True, | ||
device_map='auto', | ||
), | ||
tokenizer_kwargs=dict( | ||
padding_side='left', | ||
truncation_side='left', | ||
use_fast=False, | ||
trust_remote_code=True, | ||
), | ||
max_out_len=2000, | ||
max_seq_len=32768, | ||
batch_size=8, | ||
meta_template=hf_internlm2_chat_7b_model_meta_template, | ||
run_cfg=dict(num_gpus=1, num_procs=1), | ||
end_str='<|im_end|>', | ||
) | ||
|
||
internlm2_chat_7b_200k = dict( | ||
type=TurboMindModel, | ||
abbr='internlm2-chat-7b-200k', | ||
path="internlm/internlm2-chat-7b", | ||
meta_template=hf_internlm2_chat_7b_model_meta_template, | ||
engine_config=dict(session_len=210000, | ||
max_batch_size=8, | ||
rope_scaling_factor=2.0, | ||
model_name="internlm2-chat-7b"), | ||
gen_config=dict(top_k=1, top_p=0.8, | ||
temperature=1.0, | ||
max_new_tokens=2000), | ||
max_out_len=2000, | ||
max_seq_len=210000, | ||
batch_size=8, | ||
concurrency=8, | ||
run_cfg=dict(num_gpus=1, num_procs=1), | ||
) | ||
for m in internlm2_chat_7b: | ||
m['max_seq_len'] = 32768 # Ensure InternLM2-7B model can receive the full length of long texts, adjust for other models based on their supported maximum sequence length. | ||
m['max_out_len'] = 2000 # Ensure complete responses from the model in multi-needle retrieval tasks. | ||
|
||
models = [ | ||
# hf_internlm2_chat_7b, | ||
internlm2_chat_7b_200k, | ||
] | ||
models = internlm2_chat_7b | ||
|
||
work_dir = './outputs/needlebench' |
Oops, something went wrong.