From 0665bb91a8eccbdc203fa7e8be90e050aefe950c Mon Sep 17 00:00:00 2001
From: bittersweet1999 <148421775+bittersweet1999@users.noreply.github.com>
Date: Fri, 22 Mar 2024 19:54:19 +0800
Subject: [PATCH] [Fix] Quick fix (#995)

---
 configs/eval_subjective_alignbench.py    |  16 +---
 configs/eval_subjective_alpacaeval.py    |  11 +--
 configs/eval_subjective_compassarena.py  |  11 ---
 configs/eval_subjective_corev2.py        | 115 -----------------------
 configs/eval_subjective_creationbench.py |  16 +---
 configs/eval_subjective_mtbench.py       |  16 +---
 6 files changed, 7 insertions(+), 178 deletions(-)
 delete mode 100644 configs/eval_subjective_corev2.py

diff --git a/configs/eval_subjective_alignbench.py b/configs/eval_subjective_alignbench.py
index 8f60016bf..0563ff874 100644
--- a/configs/eval_subjective_alignbench.py
+++ b/configs/eval_subjective_alignbench.py
@@ -3,7 +3,7 @@
 with read_base():
     from .datasets.subjective.alignbench.alignbench_judgeby_critiquellm import subjective_datasets
 
-from opencompass.models import HuggingFaceCausalLM, HuggingFace, HuggingFaceChatGLM3
+from opencompass.models import HuggingFaceCausalLM, HuggingFace, HuggingFaceChatGLM3, OpenAI
 from opencompass.models.openai_api import OpenAIAllesAPIN
 from opencompass.partitioners import NaivePartitioner, SizePartitioner
 from opencompass.partitioners.sub_naive import SubjectiveNaivePartitioner
@@ -51,26 +51,14 @@
 
 datasets = [*subjective_datasets]
 
-infer = dict(
-    partitioner=dict(type=NaivePartitioner),
-    runner=dict(
-        type=SlurmSequentialRunner,
-        partition='llmeval',
-        quotatype='auto',
-        max_num_workers=256,
-        task=dict(type=OpenICLInferTask),
-    ),
-)
-
 # -------------Evalation Stage ----------------------------------------
 
 ## ------------- JudgeLLM Configuration
 judge_model = dict(
     abbr='GPT4-Turbo',
-    type=OpenAIAllesAPIN,
+    type=OpenAI,
     path='gpt-4-1106-preview',
     key='xxxx',  # The key will be obtained from $OPENAI_API_KEY, but you can write down your key here as well
-    url='xxxx',
     meta_template=api_meta_template,
     query_per_second=16,
     max_out_len=2048,
diff --git a/configs/eval_subjective_alpacaeval.py b/configs/eval_subjective_alpacaeval.py
index 098547b93..13fd5ebe5 100644
--- a/configs/eval_subjective_alpacaeval.py
+++ b/configs/eval_subjective_alpacaeval.py
@@ -68,16 +68,7 @@
     temperature=1,
 )  # Re-inference gpt4's predictions or you can choose to use the pre-commited gpt4's predictions
 
-infer = dict(
-    partitioner=dict(type=NaivePartitioner),
-    runner=dict(
-        type=SlurmSequentialRunner,
-        partition='llmeval',
-        quotatype='auto',
-        max_num_workers=256,
-        task=dict(type=OpenICLInferTask),
-    ),
-)
+
 
 # -------------Evalation Stage ----------------------------------------
 
diff --git a/configs/eval_subjective_compassarena.py b/configs/eval_subjective_compassarena.py
index 58336a5c6..5e1f93eeb 100644
--- a/configs/eval_subjective_compassarena.py
+++ b/configs/eval_subjective_compassarena.py
@@ -69,17 +69,6 @@
     temperature=1,
 )  # Re-inference gpt4's predictions or you can choose to use the pre-commited gpt4's predictions
 
-infer = dict(
-    partitioner=dict(type=SizePartitioner, strategy='split', max_task_size=10000),
-    runner=dict(
-        type=SlurmSequentialRunner,
-        partition='llm_dev2',
-        quotatype='auto',
-        max_num_workers=256,
-        task=dict(type=OpenICLInferTask),
-    ),
-)
-
 # -------------Evalation Stage ----------------------------------------
 
 ## ------------- JudgeLLM Configuration
diff --git a/configs/eval_subjective_corev2.py b/configs/eval_subjective_corev2.py
deleted file mode 100644
index 2ca07b433..000000000
--- a/configs/eval_subjective_corev2.py
+++ /dev/null
@@ -1,115 +0,0 @@
-from mmengine.config import read_base
-
-with read_base():
-    from .datasets.subjective.subjective_cmp.subjective_corev2 import subjective_datasets
-
-from opencompass.models import HuggingFaceCausalLM, HuggingFace, HuggingFaceChatGLM3, OpenAI
-from opencompass.partitioners import NaivePartitioner, SizePartitioner
-from opencompass.partitioners.sub_naive import SubjectiveNaivePartitioner
-from opencompass.partitioners.sub_size import SubjectiveSizePartitioner
-from opencompass.runners import LocalRunner
-from opencompass.runners import SlurmSequentialRunner
-from opencompass.tasks import OpenICLInferTask
-from opencompass.tasks.subjective_eval import SubjectiveEvalTask
-from opencompass.summarizers import Corev2Summarizer
-
-api_meta_template = dict(
-    round=[
-        dict(role='HUMAN', api_role='HUMAN'),
-        dict(role='BOT', api_role='BOT', generate=True),
-    ],
-    reserved_roles=[
-        dict(role='SYSTEM', api_role='SYSTEM'),
-    ],
-)
-
-# -------------Inference Stage ----------------------------------------
-
-# For subjective evaluation, we often set do sample for models
-models = [
-    dict(
-        type=HuggingFaceChatGLM3,
-        abbr='chatglm3-6b-hf',
-        path='THUDM/chatglm3-6b',
-        tokenizer_path='THUDM/chatglm3-6b',
-        model_kwargs=dict(
-            device_map='auto',
-            trust_remote_code=True,
-        ),
-        tokenizer_kwargs=dict(
-            padding_side='left',
-            truncation_side='left',
-            trust_remote_code=True,
-        ),
-        generation_kwargs=dict(
-            do_sample=True,
-        ),
-        meta_template=api_meta_template,
-        max_out_len=2048,
-        max_seq_len=4096,
-        batch_size=1,
-        run_cfg=dict(num_gpus=1, num_procs=1),
-    )
-]
-
-datasets = [*subjective_datasets]
-
-gpt4 = dict(
-    abbr='gpt4-turbo',
-    type=OpenAI,
-    path='gpt-4-1106-preview',
-    key='',  # The key will be obtained from $OPENAI_API_KEY, but you can write down your key here as well
-    meta_template=api_meta_template,
-    query_per_second=1,
-    max_out_len=2048,
-    max_seq_len=4096,
-    batch_size=4,
-    retry=20,
-    temperature=1,
-)  # Re-inference gpt4's predictions or you can choose to use the pre-commited gpt4's predictions
-
-infer = dict(
-    partitioner=dict(type=SizePartitioner, max_task_size=500),
-    runner=dict(
-        type=SlurmSequentialRunner,
-        partition='llm_dev2',
-        quotatype='auto',
-        max_num_workers=256,
-        task=dict(type=OpenICLInferTask),
-    ),
-)
-
-# -------------Evalation Stage ----------------------------------------
-
-## ------------- JudgeLLM Configuration
-judge_model = dict(
-    abbr='GPT4-Turbo',
-    type=OpenAI,
-    path='gpt-4-1106-preview',
-    key='',  # The key will be obtained from $OPENAI_API_KEY, but you can write down your key here as well
-    meta_template=api_meta_template,
-    query_per_second=1,
-    max_out_len=1024,
-    max_seq_len=4096,
-    batch_size=2,
-    retry=20,
-    temperature=0,
-)
-
-## ------------- Evaluation Configuration
-eval = dict(
-    partitioner=dict(
-        type=SubjectiveSizePartitioner, mode='m2n', max_task_size=500, base_models=[gpt4], compare_models=models
-    ),
-    runner=dict(
-        type=SlurmSequentialRunner,
-        partition='llm_dev2',
-        quotatype='auto',
-        max_num_workers=256,
-        task=dict(type=SubjectiveEvalTask, judge_cfg=judge_model),
-    ),
-)
-
-summarizer = dict(type=Corev2Summarizer, match_method='smart')
-
-work_dir = 'outputs/corev2/'
diff --git a/configs/eval_subjective_creationbench.py b/configs/eval_subjective_creationbench.py
index 52bf7d4b8..922225f11 100644
--- a/configs/eval_subjective_creationbench.py
+++ b/configs/eval_subjective_creationbench.py
@@ -3,7 +3,7 @@
 with read_base():
     from .datasets.subjective.creationbench.creationbench_judgeby_gpt4_withref import subjective_datasets
 
-from opencompass.models import HuggingFaceCausalLM, HuggingFace, HuggingFaceChatGLM3
+from opencompass.models import HuggingFaceCausalLM, HuggingFace, HuggingFaceChatGLM3, OpenAI
 from opencompass.models.openai_api import OpenAIAllesAPIN
 from opencompass.partitioners import NaivePartitioner, SizePartitioner
 from opencompass.partitioners.sub_naive import SubjectiveNaivePartitioner
@@ -51,26 +51,14 @@
 
 datasets = [*subjective_datasets]
 
-infer = dict(
-    partitioner=dict(type=NaivePartitioner),
-    runner=dict(
-        type=SlurmSequentialRunner,
-        partition='llmeval',
-        quotatype='auto',
-        max_num_workers=256,
-        task=dict(type=OpenICLInferTask),
-    ),
-)
-
 # -------------Evalation Stage ----------------------------------------
 
 ## ------------- JudgeLLM Configuration
 judge_model = dict(
     abbr='GPT4-Turbo',
-    type=OpenAIAllesAPIN,
+    type=OpenAI,
     path='gpt-4-1106-preview',
     key='xxxx',  # The key will be obtained from $OPENAI_API_KEY, but you can write down your key here as well
-    url='xxxx',
     meta_template=api_meta_template,
     query_per_second=16,
     max_out_len=2048,
diff --git a/configs/eval_subjective_mtbench.py b/configs/eval_subjective_mtbench.py
index 940edabb3..c8dbb23c0 100644
--- a/configs/eval_subjective_mtbench.py
+++ b/configs/eval_subjective_mtbench.py
@@ -4,7 +4,7 @@
     from .datasets.subjective.multiround.mtbench_single_judge_diff_temp import subjective_datasets
     # from .datasets.subjective.multiround.mtbench_pair_judge import subjective_datasets
 
-from opencompass.models import HuggingFaceCausalLM, HuggingFace, HuggingFaceChatGLM3
+from opencompass.models import HuggingFaceCausalLM, HuggingFace, HuggingFaceChatGLM3, OpenAI
 from opencompass.models.openai_api import OpenAIAllesAPIN
 from opencompass.partitioners import NaivePartitioner, SizePartitioner
 from opencompass.partitioners.sub_naive import SubjectiveNaivePartitioner
@@ -59,26 +59,14 @@
 
 datasets = [*subjective_datasets]
 
-infer = dict(
-    partitioner=dict(type=SizePartitioner, strategy='split', max_task_size=10000),
-    runner=dict(
-        type=SlurmSequentialRunner,
-        partition='llm_dev2',
-        quotatype='auto',
-        max_num_workers=256,
-        task=dict(type=OpenICLInferTask),
-    ),
-)
-
 # -------------Evalation Stage ----------------------------------------
 
 ## ------------- JudgeLLM Configuration
 judge_model = dict(
     abbr='GPT4-Turbo',
-    type=OpenAIAllesAPIN,
+    type=OpenAI,
     path='gpt-4-0613', # To compare with the official leaderboard, please use gpt4-0613
     key='xxxx',  # The key will be obtained from $OPENAI_API_KEY, but you can write down your key here as well
-    url='xxxx',
     meta_template=api_meta_template,
     query_per_second=16,
     max_out_len=2048,