Skip to content

Commit

Permalink
[Feature] Add multi-prompt generation demo (#568)
Browse files Browse the repository at this point in the history
* [Feature] Add multi-prompt generation demo

* [Fix] change form in winogrande_gen_XXX.py

* [Fix] make multi prompt demo more directly

* [Fix] fix bug

* [Fix] minor fix

---------

Co-authored-by: yingfhu <[email protected]>
  • Loading branch information
jingmingzhuo and yingfhu authored Nov 20, 2023
1 parent 91fba2c commit 5e75e29
Show file tree
Hide file tree
Showing 3 changed files with 115 additions and 10 deletions.
49 changes: 49 additions & 0 deletions configs/datasets/winogrande/winogrande_gen_a027b6.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import winograndeDataset_V2
from opencompass.utils.text_postprocessors import first_option_postprocess

winogrande_reader_cfg = dict(
input_columns=["opt1", "opt2"],
output_column="answer",
)

winogrande_eval_cfg = dict(
evaluator=dict(type=AccEvaluator),
pred_role="BOT",
pred_postprocessor=dict(type=first_option_postprocess, options='AB'),
)

_winogrande_prompt = dict(
prompt_1="Which of the following is a good sentence:\nA. {opt1}\nB. {opt2}\nAnswer:",
prompt_2="Which is a good sentence out of the following:\nA. {opt1}\nB. {opt2}\nAnswer:",
prompt_3="Can you identify a good sentence from the following:\nA. {opt1}\nB. {opt2}\nAnswer:",
)

winogrande_datasets = []
for _choice in _winogrande_prompt:
winogrande_datasets.append(
dict(
abbr='winogrande_'+_choice,
type=winograndeDataset_V2,
path="./data/winogrande",
reader_cfg=winogrande_reader_cfg,
infer_cfg=dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(round=[
dict(
role="HUMAN",
prompt=_winogrande_prompt[_choice]
),
]),
),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer),
),
eval_cfg=winogrande_eval_cfg),
)

del _choice
48 changes: 48 additions & 0 deletions configs/eval_multi_prompt_demo.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
from mmengine.config import read_base
from opencompass.models import HuggingFaceCausalLM


with read_base():
from .datasets.winogrande.winogrande_gen_a027b6 import winogrande_datasets

datasets = [*winogrande_datasets]

_meta_template = dict(
round=[
dict(role='HUMAN', begin='<|User|>:', end='<eoh>\n'),
dict(role='BOT', begin='<|Bot|>:', end='<eoa>\n', generate=True),
],
)

models=[
dict(
type=HuggingFaceCausalLM,
abbr='internlm-chat-7b-hf',
path="internlm/internlm-chat-7b",
tokenizer_path='internlm/internlm-chat-7b',
tokenizer_kwargs=dict(
padding_side='left',
truncation_side='left',
use_fast=False,
trust_remote_code=True,
),
max_out_len=100,
max_seq_len=2048,
batch_size=8,
meta_template=_meta_template,
model_kwargs=dict(
trust_remote_code=True,
device_map='auto',
),
run_cfg=dict(num_gpus=1, num_procs=1),
)
]

_winogrande_all = [d['abbr'] for d in winogrande_datasets]

summarizer = dict(
summary_groups=[
{'name': 'winogrande', 'subsets': _winogrande_all},
{'name': 'winogrande_std', 'subsets': _winogrande_all, 'std': True},
]
)
28 changes: 18 additions & 10 deletions opencompass/summarizers/default.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# flake8: noqa
# yapf: disable
import getpass
import math
import os.path as osp
from datetime import datetime
from typing import List, Optional
Expand Down Expand Up @@ -127,21 +128,28 @@ def summarize(
results[dataset_abbr] = parsed_results[model_abbr][dataset_abbr][0]
eval_modes.append(dataset_eval_mode.get(dataset_abbr, 'unknown'))
if len(results) == len(sg['subsets']):
if 'weights' in sg:
numerator = sum(results[k] * sg['weights'][k] for k in sg['weights'])
denominator = sum(sg['weights'].values())
metric = 'weighted_average'
if 'std' in sg and sg['std'] == True:
avg = sum(results[k] for k in results) / len(results)
variance = sum((results[k] - avg)**2 for k in results) / len(results)
metric = 'standard_deviation'
results[metric] = math.sqrt(variance)
else:
numerator = sum(results[k] for k in results)
denominator = len(results)
metric = 'naive_average'
results[metric] = numerator / denominator
if 'weights' in sg:
numerator = sum(results[k] * sg['weights'][k] for k in sg['weights'])
denominator = sum(sg['weights'].values())
metric = 'weighted_average'
else:
numerator = sum(results[k] for k in results)
denominator = len(results)
metric = 'naive_average'
results[metric] = numerator / denominator

eval_modes = list(set(eval_modes))
eval_mode = eval_modes[0] if len(eval_modes) == 1 else 'mixed'

# add to global results
raw_results[model_abbr][sg['name']] = results
parsed_results[model_abbr][sg['name']] = [numerator / denominator]
parsed_results[model_abbr][sg['name']] = [results[metric]]

dataset_metrics[sg['name']] = [metric]
dataset_eval_mode[sg['name']] = eval_mode
elif len(results) == 0:
Expand Down

0 comments on commit 5e75e29

Please sign in to comment.