Skip to content

Commit

Permalink
[Fix] Quick fix (#995)
Browse files Browse the repository at this point in the history
  • Loading branch information
bittersweet1999 authored Mar 22, 2024
1 parent 1d31985 commit 0665bb9
Show file tree
Hide file tree
Showing 6 changed files with 7 additions and 178 deletions.
16 changes: 2 additions & 14 deletions configs/eval_subjective_alignbench.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
with read_base():
from .datasets.subjective.alignbench.alignbench_judgeby_critiquellm import subjective_datasets

from opencompass.models import HuggingFaceCausalLM, HuggingFace, HuggingFaceChatGLM3
from opencompass.models import HuggingFaceCausalLM, HuggingFace, HuggingFaceChatGLM3, OpenAI
from opencompass.models.openai_api import OpenAIAllesAPIN
from opencompass.partitioners import NaivePartitioner, SizePartitioner
from opencompass.partitioners.sub_naive import SubjectiveNaivePartitioner
Expand Down Expand Up @@ -51,26 +51,14 @@

datasets = [*subjective_datasets]

infer = dict(
partitioner=dict(type=NaivePartitioner),
runner=dict(
type=SlurmSequentialRunner,
partition='llmeval',
quotatype='auto',
max_num_workers=256,
task=dict(type=OpenICLInferTask),
),
)

# -------------Evalation Stage ----------------------------------------

## ------------- JudgeLLM Configuration
judge_model = dict(
abbr='GPT4-Turbo',
type=OpenAIAllesAPIN,
type=OpenAI,
path='gpt-4-1106-preview',
key='xxxx', # The key will be obtained from $OPENAI_API_KEY, but you can write down your key here as well
url='xxxx',
meta_template=api_meta_template,
query_per_second=16,
max_out_len=2048,
Expand Down
11 changes: 1 addition & 10 deletions configs/eval_subjective_alpacaeval.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,16 +68,7 @@
temperature=1,
) # Re-inference gpt4's predictions or you can choose to use the pre-commited gpt4's predictions

infer = dict(
partitioner=dict(type=NaivePartitioner),
runner=dict(
type=SlurmSequentialRunner,
partition='llmeval',
quotatype='auto',
max_num_workers=256,
task=dict(type=OpenICLInferTask),
),
)


# -------------Evalation Stage ----------------------------------------

Expand Down
11 changes: 0 additions & 11 deletions configs/eval_subjective_compassarena.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,17 +69,6 @@
temperature=1,
) # Re-inference gpt4's predictions or you can choose to use the pre-commited gpt4's predictions

infer = dict(
partitioner=dict(type=SizePartitioner, strategy='split', max_task_size=10000),
runner=dict(
type=SlurmSequentialRunner,
partition='llm_dev2',
quotatype='auto',
max_num_workers=256,
task=dict(type=OpenICLInferTask),
),
)

# -------------Evalation Stage ----------------------------------------

## ------------- JudgeLLM Configuration
Expand Down
115 changes: 0 additions & 115 deletions configs/eval_subjective_corev2.py

This file was deleted.

16 changes: 2 additions & 14 deletions configs/eval_subjective_creationbench.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
with read_base():
from .datasets.subjective.creationbench.creationbench_judgeby_gpt4_withref import subjective_datasets

from opencompass.models import HuggingFaceCausalLM, HuggingFace, HuggingFaceChatGLM3
from opencompass.models import HuggingFaceCausalLM, HuggingFace, HuggingFaceChatGLM3, OpenAI
from opencompass.models.openai_api import OpenAIAllesAPIN
from opencompass.partitioners import NaivePartitioner, SizePartitioner
from opencompass.partitioners.sub_naive import SubjectiveNaivePartitioner
Expand Down Expand Up @@ -51,26 +51,14 @@

datasets = [*subjective_datasets]

infer = dict(
partitioner=dict(type=NaivePartitioner),
runner=dict(
type=SlurmSequentialRunner,
partition='llmeval',
quotatype='auto',
max_num_workers=256,
task=dict(type=OpenICLInferTask),
),
)

# -------------Evalation Stage ----------------------------------------

## ------------- JudgeLLM Configuration
judge_model = dict(
abbr='GPT4-Turbo',
type=OpenAIAllesAPIN,
type=OpenAI,
path='gpt-4-1106-preview',
key='xxxx', # The key will be obtained from $OPENAI_API_KEY, but you can write down your key here as well
url='xxxx',
meta_template=api_meta_template,
query_per_second=16,
max_out_len=2048,
Expand Down
16 changes: 2 additions & 14 deletions configs/eval_subjective_mtbench.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from .datasets.subjective.multiround.mtbench_single_judge_diff_temp import subjective_datasets
# from .datasets.subjective.multiround.mtbench_pair_judge import subjective_datasets

from opencompass.models import HuggingFaceCausalLM, HuggingFace, HuggingFaceChatGLM3
from opencompass.models import HuggingFaceCausalLM, HuggingFace, HuggingFaceChatGLM3, OpenAI
from opencompass.models.openai_api import OpenAIAllesAPIN
from opencompass.partitioners import NaivePartitioner, SizePartitioner
from opencompass.partitioners.sub_naive import SubjectiveNaivePartitioner
Expand Down Expand Up @@ -59,26 +59,14 @@

datasets = [*subjective_datasets]

infer = dict(
partitioner=dict(type=SizePartitioner, strategy='split', max_task_size=10000),
runner=dict(
type=SlurmSequentialRunner,
partition='llm_dev2',
quotatype='auto',
max_num_workers=256,
task=dict(type=OpenICLInferTask),
),
)

# -------------Evalation Stage ----------------------------------------

## ------------- JudgeLLM Configuration
judge_model = dict(
abbr='GPT4-Turbo',
type=OpenAIAllesAPIN,
type=OpenAI,
path='gpt-4-0613', # To compare with the official leaderboard, please use gpt4-0613
key='xxxx', # The key will be obtained from $OPENAI_API_KEY, but you can write down your key here as well
url='xxxx',
meta_template=api_meta_template,
query_per_second=16,
max_out_len=2048,
Expand Down

0 comments on commit 0665bb9

Please sign in to comment.