Skip to content

Commit

Permalink
Update NeedleInAHaystack Docs (open-compass#1102)
Browse files Browse the repository at this point in the history
* update NeedleInAHaystack Test Docs

* update docs
  • Loading branch information
Mor-Li authored and BunnyRunnerX committed May 14, 2024
1 parent 6ef2a49 commit 8327acd
Show file tree
Hide file tree
Showing 3 changed files with 173 additions and 582 deletions.
80 changes: 14 additions & 66 deletions configs/eval_needlebench.py
Original file line number Diff line number Diff line change
@@ -1,78 +1,26 @@
from opencompass.models import HuggingFaceCausalLM
from opencompass.models.turbomind import TurboMindModel
from opencompass.runners import SlurmSequentialRunner
from opencompass.partitioners import SizePartitioner, NaivePartitioner
from opencompass.tasks import OpenICLInferTask, OpenICLEvalTask

from mmengine.config import read_base
with read_base():
# eval needlebench_4k
from .datasets.needlebench.needlebench_4k.needlebench import needlebench_datasets
from .summarizers.needlebench import needlebench_4k_summarizer as summarizer
from .models.hf_internlm.lmdeploy_internlm2_chat_7b import models as internlm2_chat_7b_200k
from .models.hf_internlm.hf_internlm2_chat_7b import models as internlm2_chat_7b

# only eval original "needle in a haystack test" in needlebench_4k
# from .datasets.needlebench.needlebench_4k.needlebench_single import needlebench_datasets_zh, needlebench_datasets_en
# Evaluate needlebench_4k, adjust the configuration to use 8k, 32k, 128k, 200k, or 1000k if necessary.
# from .datasets.needlebench.needlebench_4k.needlebench_4k import needlebench_datasets
# from .summarizers.needlebench import needlebench_4k_summarizer as summarizer

# only eval original "needle in a haystack test" in needlebench_4k
from .datasets.needlebench.needlebench_4k.needlebench_single_4k import needlebench_zh_datasets, needlebench_en_datasets
from .summarizers.needlebench import needlebench_4k_summarizer as summarizer

# eval Ancestral Tracing Challenge(ATC)
# from .datasets.needlebench.atc.atc import needlebench_atc_datasets_zh, needlebench_atc_datasets_en
# from .summarizers.needlebench import needlebench_atc_summarizer as summarizer
# from .datasets.needlebench.atc.atc_choice_50 import needlebench_datasets
# from .summarizers.needlebench import atc_summarizer_50 as summarizer

datasets = sum([v for k, v in locals().items() if ('datasets' in k)], [])

hf_internlm2_chat_7b_model_meta_template = dict(
round=[
dict(role='HUMAN',
begin='<|im_start|>user\n', end='<|im_end|>\n'),
dict(role='BOT', begin='<|im_start|>assistant\n',
end='<|im_end|>\n', generate=True),
],
)
hf_internlm2_chat_7b = dict(
type=HuggingFaceCausalLM,
abbr='internlm2-chat-7b-hf',
path="internlm/internlm2-chat-7b",
tokenizer_path='internlm/internlm2-chat-7b',
model_kwargs=dict(
trust_remote_code=True,
device_map='auto',
),
tokenizer_kwargs=dict(
padding_side='left',
truncation_side='left',
use_fast=False,
trust_remote_code=True,
),
max_out_len=2000,
max_seq_len=32768,
batch_size=8,
meta_template=hf_internlm2_chat_7b_model_meta_template,
run_cfg=dict(num_gpus=1, num_procs=1),
end_str='<|im_end|>',
)

internlm2_chat_7b_200k = dict(
type=TurboMindModel,
abbr='internlm2-chat-7b-200k',
path="internlm/internlm2-chat-7b",
meta_template=hf_internlm2_chat_7b_model_meta_template,
engine_config=dict(session_len=210000,
max_batch_size=8,
rope_scaling_factor=2.0,
model_name="internlm2-chat-7b"),
gen_config=dict(top_k=1, top_p=0.8,
temperature=1.0,
max_new_tokens=2000),
max_out_len=2000,
max_seq_len=210000,
batch_size=8,
concurrency=8,
run_cfg=dict(num_gpus=1, num_procs=1),
)
for m in internlm2_chat_7b:
m['max_seq_len'] = 32768 # Ensure InternLM2-7B model can receive the full length of long texts, adjust for other models based on their supported maximum sequence length.
m['max_out_len'] = 2000 # Ensure complete responses from the model in multi-needle retrieval tasks.

models = [
# hf_internlm2_chat_7b,
internlm2_chat_7b_200k,
]
models = internlm2_chat_7b

work_dir = './outputs/needlebench'
Loading

0 comments on commit 8327acd

Please sign in to comment.