From a81bbb85bfef0ca687c46c9bdcb12a0320cdf3c5 Mon Sep 17 00:00:00 2001 From: hailsham Date: Thu, 19 Sep 2024 18:12:04 +0800 Subject: [PATCH 1/3] [FIX] Added handling for the "begin section" in meta_template to APITemplateParser (#1405) Co-authored-by: leifei --- opencompass/models/base_api.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/opencompass/models/base_api.py b/opencompass/models/base_api.py index c88aa154c..655f7d6e1 100644 --- a/opencompass/models/base_api.py +++ b/opencompass/models/base_api.py @@ -281,6 +281,9 @@ def parse_template(self, prompt_template: PromptType, new_prompt.append(item) prompt = new_prompt + if self.meta_template.get('begin', None): + prompt.insert(0, self.meta_template['begin']) + else: # in case the model does not have any meta template prompt = '' From ee058e25b2dfbb906606665b7a4ac2f1ea484c9d Mon Sep 17 00:00:00 2001 From: Songyang Zhang Date: Fri, 20 Sep 2024 17:12:52 +0800 Subject: [PATCH 2/3] [Feature] Support verbose for OpenAI API (#1546) --- opencompass/models/base_api.py | 4 +- opencompass/models/openai_api.py | 66 ++++++++++++++++--- .../icl_inferencer/icl_gen_inferencer.py | 1 + 3 files changed, 60 insertions(+), 11 deletions(-) diff --git a/opencompass/models/base_api.py b/opencompass/models/base_api.py index 655f7d6e1..13a8e956c 100644 --- a/opencompass/models/base_api.py +++ b/opencompass/models/base_api.py @@ -43,7 +43,8 @@ def __init__(self, retry: int = 2, max_seq_len: int = 2048, meta_template: Optional[Dict] = None, - generation_kwargs: Dict = dict()): + generation_kwargs: Dict = dict(), + verbose: bool = False): self.path = path self.max_seq_len = max_seq_len self.meta_template = meta_template @@ -53,6 +54,7 @@ def __init__(self, self.template_parser = APITemplateParser(meta_template) self.logger = get_logger() self.generation_kwargs = generation_kwargs + self.verbose = verbose @abstractmethod def generate(self, inputs: List[PromptType], diff --git a/opencompass/models/openai_api.py b/opencompass/models/openai_api.py index 7f306e4ef..4a07dee3f 100644 --- a/opencompass/models/openai_api.py +++ b/opencompass/models/openai_api.py @@ -90,14 +90,16 @@ def __init__(self, temperature: Optional[float] = None, tokenizer_path: Optional[str] = None, extra_body: Optional[Dict] = None, - max_completion_tokens: int = 16384): + max_completion_tokens: int = 16384, + verbose: bool = False): super().__init__(path=path, max_seq_len=max_seq_len, meta_template=meta_template, query_per_second=query_per_second, rpm_verbose=rpm_verbose, - retry=retry) + retry=retry, + verbose=verbose) import tiktoken self.tiktoken = tiktoken self.temperature = temperature @@ -310,7 +312,9 @@ def _generate(self, input: PromptType, max_out_len: int, 'http': self.proxy_url, 'https': self.proxy_url, } - + if self.verbose: + self.logger.debug( + f'Start send query to {self.proxy_url}') raw_response = requests.post( url, headers=header, @@ -318,6 +322,10 @@ def _generate(self, input: PromptType, max_out_len: int, proxies=proxies, ) + if self.verbose: + self.logger.debug( + f'Get response from {self.proxy_url}') + except requests.ConnectionError: self.logger.error('Got connection error, retrying...') continue @@ -371,27 +379,44 @@ def get_token_len(self, prompt: str) -> int: """ assert self.tokenizer_path or self.path try: + if self.verbose: + self.logger.info(f'Used tokenizer_path: {self.tokenizer_path}') tokenizer_path = self.tokenizer_path if self.tokenizer_path \ else self.path try: + if self.verbose: + self.logger.info( + f'Start load tiktoken encoding: {tokenizer_path}') enc = self.tiktoken.encoding_for_model(tokenizer_path) + if self.verbose: + self.logger.info( + f'Successfully tiktoken encoding: {tokenizer_path}') return len(enc.encode(prompt)) except Exception as e: self.logger.warn(f'{e}, tiktoken encoding cannot load ' f'{tokenizer_path}') from transformers import AutoTokenizer if self.hf_tokenizer is None: + if self.verbose: + self.logger.info( + f'Start load hf tokenizer: {tokenizer_path}') self.hf_tokenizer = AutoTokenizer.from_pretrained( tokenizer_path, trust_remote_code=True) self.logger.info( - f'Tokenizer is loaded from {tokenizer_path}') + f'Successfully load HF Tokenizer from {tokenizer_path}' + ) return len(self.hf_tokenizer(prompt).input_ids) except Exception: self.logger.warn( 'Can not get tokenizer automatically, ' 'will use default tokenizer gpt-4 for length calculation.') default_tokenizer = 'gpt-4' + enc = self.tiktoken.encoding_for_model(default_tokenizer) + if self.verbose: + self.logger.info( + f'Successfully load default tiktoken tokenizer: ' + f' {default_tokenizer}') return len(enc.encode(prompt)) def bin_trim(self, prompt: str, num_token: int) -> str: @@ -458,12 +483,26 @@ def __init__(self, temperature: float | None = None, tokenizer_path: str | None = None, extra_body: Dict | None = None, - max_completion_tokens: int = 16384): - super().__init__(path, max_seq_len, query_per_second, rpm_verbose, - retry, key, org, meta_template, openai_api_base, - openai_proxy_url, mode, logprobs, top_logprobs, - temperature, tokenizer_path, extra_body, - max_completion_tokens) + max_completion_tokens: int = 16384, + verbose: bool = False): + super().__init__(path, + max_seq_len, + query_per_second, + rpm_verbose, + retry, + key, + org, + meta_template, + openai_api_base, + openai_proxy_url, + mode, + logprobs, + top_logprobs, + temperature, + tokenizer_path, + extra_body, + verbose=verbose, + max_completion_tokens=max_completion_tokens) from openai import OpenAI if self.proxy_url is None: @@ -478,6 +517,8 @@ def __init__(self, base_url=openai_api_base, api_key=key, http_client=httpx.Client(proxies=proxies)) + if self.verbose: + self.logger.info(f'Used openai_client: {self.openai_client}') def _generate(self, input: PromptList | str, max_out_len: int, temperature: float) -> str: @@ -553,8 +594,13 @@ def _generate(self, input: PromptList | str, max_out_len: int, ) try: + if self.verbose: + self.logger.info('Start calling OpenAI API') responses = self.openai_client.chat.completions.create( **query_data) + if self.verbose: + self.logger.info( + 'Successfully get response from OpenAI API') return responses.choices[0].message.content except Exception as e: self.logger.error(e) diff --git a/opencompass/openicl/icl_inferencer/icl_gen_inferencer.py b/opencompass/openicl/icl_inferencer/icl_gen_inferencer.py index 17bdf468c..6a33b711a 100644 --- a/opencompass/openicl/icl_inferencer/icl_gen_inferencer.py +++ b/opencompass/openicl/icl_inferencer/icl_gen_inferencer.py @@ -127,6 +127,7 @@ def inference(self, index = len(tmp_result_dict) # 4. Wrap prompts with Dataloader + logger.info('Starting build dataloader') dataloader = self.get_dataloader(prompt_list[index:], self.batch_size) # 5. Inference for prompts in each batch From a0cfd611291a6d5df4d411f1c7edcbe79e315583 Mon Sep 17 00:00:00 2001 From: liushz Date: Mon, 23 Sep 2024 14:03:59 +0800 Subject: [PATCH 3/3] [Feature] Update MathBench & Math base model config (#1550) * Update MathBench & WikiBench for FullBench * Update MathBench & WikiBench for FullBench * Update GPQA & MMLU_Pro * Update MathBench & WikiBench for FullBench * Update MathBench & WikiBench for FullBench * Update MathBench & WikiBench for FullBench * Update MathBench & Math base config --------- Co-authored-by: liushz --- .../mathbench_2024_few_shot_mixed_4a3fd4.py | 81 +++++++++++++++++++ .../math/math_4shot_base_gen_43d5b6.py | 30 +++++++ .../mathbench_2024_few_shot_mixed_4a3fd4.py | 81 +++++++++++++++++++ .../math/math_4shot_base_gen_43d5b6.py | 30 +++++++ 4 files changed, 222 insertions(+) create mode 100644 configs/datasets/MathBench/mathbench_2024_few_shot_mixed_4a3fd4.py create mode 100644 configs/datasets/math/math_4shot_base_gen_43d5b6.py create mode 100644 opencompass/configs/datasets/MathBench/mathbench_2024_few_shot_mixed_4a3fd4.py create mode 100644 opencompass/configs/datasets/math/math_4shot_base_gen_43d5b6.py diff --git a/configs/datasets/MathBench/mathbench_2024_few_shot_mixed_4a3fd4.py b/configs/datasets/MathBench/mathbench_2024_few_shot_mixed_4a3fd4.py new file mode 100644 index 000000000..e7f2859e9 --- /dev/null +++ b/configs/datasets/MathBench/mathbench_2024_few_shot_mixed_4a3fd4.py @@ -0,0 +1,81 @@ +from mmengine.config import read_base +from copy import deepcopy +from opencompass.openicl.icl_prompt_template import PromptTemplate +from opencompass.openicl.icl_retriever import ZeroRetriever +from opencompass.openicl.icl_inferencer import GenInferencer, PPLInferencer +from opencompass.openicl.icl_evaluator import CircularEvaluator, AccEvaluator +from opencompass.datasets import MathBenchDataset, mathbench_postprocess +from opencompass.utils.text_postprocessors import first_option_postprocess + +with read_base(): + from .mathbench_prompt import zero_shot_prompts, few_shot_prompts, mathbench_sets + +# Max for this dataset is 4 +num_shot = 4 +# Generate reasoning path or not, only for single choice +with_reasoning = True +# Use circular evaluation or not +with_circular_eval = True +# Use PPL mode in single choice test or not +use_ppl_single_choice = True + +assert 0 <= num_shot <= 4 +if num_shot == 0: + prompts = zero_shot_prompts +else: + prompts = {name: p[- 2 * num_shot - 2:] for name, p in few_shot_prompts.items()} + +mathbench_datasets = [] +for _split in mathbench_sets: + for _name in mathbench_sets[_split]: + if 'single_choice' in _name: + if with_reasoning and not use_ppl_single_choice: + template_round = prompts[_name + '_with_reasoning'] + else: + template_round = prompts[_name] + else: + template_round = prompts[_name] + + if 'single_choice' in _name: + pred_postprocessor = dict(type=first_option_postprocess, options='ABCD') + else: + pred_postprocessor = dict(type=mathbench_postprocess, name=_name) + + if 'single_choice' in _name and with_circular_eval: + evaluator = dict(type=CircularEvaluator) + else: + evaluator = dict(type=AccEvaluator) + + # assemble the final config + mathbench_reader_cfg = dict(input_columns=['question'], output_column='answer') + if use_ppl_single_choice and 'single_choice' in _name: + template = {} + for answer in ['A', 'B', 'C', 'D']: + one_template_round = deepcopy(template_round) + one_template_round[-1]['prompt'] = one_template_round[-1]['prompt'].format(answer=answer) + template[answer] = dict(round=one_template_round) + mathbench_infer_cfg = dict( + prompt_template=dict(type=PromptTemplate, template=template), + retriever=dict(type=ZeroRetriever), + inferencer=dict(type=PPLInferencer), + ) + else: + mathbench_infer_cfg = dict( + prompt_template=dict(type=PromptTemplate, template=dict(round=template_round)), + retriever=dict(type=ZeroRetriever), + inferencer=dict(type=GenInferencer, max_out_len=2048, stopping_criteria=['Question:']), + ) + mathbench_eval_cfg = dict(evaluator=evaluator, pred_postprocessor=pred_postprocessor) + + mathbench_datasets.append( + dict( + abbr='mathbench-' + _split + '-' + _name, + type=MathBenchDataset, + path=f'data/mathbench_v1/{_split}', + name=_name, + with_circular=with_circular_eval, + reader_cfg=mathbench_reader_cfg, + infer_cfg=mathbench_infer_cfg, + eval_cfg=mathbench_eval_cfg, + ) + ) diff --git a/configs/datasets/math/math_4shot_base_gen_43d5b6.py b/configs/datasets/math/math_4shot_base_gen_43d5b6.py new file mode 100644 index 000000000..1e8696798 --- /dev/null +++ b/configs/datasets/math/math_4shot_base_gen_43d5b6.py @@ -0,0 +1,30 @@ +from mmengine.config import read_base +from opencompass.openicl.icl_prompt_template import PromptTemplate +from opencompass.openicl.icl_retriever import ZeroRetriever +from opencompass.openicl.icl_inferencer import GenInferencer +from opencompass.datasets import MATHDataset, MATHEvaluator, math_postprocess_v2 + +with read_base(): + from .math_4shot_example_from_google_research import prompt + +math_reader_cfg = dict(input_columns=['problem'], output_column='solution') + +math_infer_cfg = dict( + prompt_template=dict(type=PromptTemplate, template=prompt + '\n\nProblem:\n{problem}\nSolution:'), + retriever=dict(type=ZeroRetriever), + inferencer=dict(type=GenInferencer, max_out_len=2048, stopping_criteria=['Problem', '问题:'])) + +# postprocess v2 +math_eval_cfg = dict( + evaluator=dict(type=MATHEvaluator, version='v2'), + pred_postprocessor=dict(type=math_postprocess_v2)) + +math_datasets = [ + dict( + type=MATHDataset, + abbr='math', + path='opencompass/math', + reader_cfg=math_reader_cfg, + infer_cfg=math_infer_cfg, + eval_cfg=math_eval_cfg) +] diff --git a/opencompass/configs/datasets/MathBench/mathbench_2024_few_shot_mixed_4a3fd4.py b/opencompass/configs/datasets/MathBench/mathbench_2024_few_shot_mixed_4a3fd4.py new file mode 100644 index 000000000..e7f2859e9 --- /dev/null +++ b/opencompass/configs/datasets/MathBench/mathbench_2024_few_shot_mixed_4a3fd4.py @@ -0,0 +1,81 @@ +from mmengine.config import read_base +from copy import deepcopy +from opencompass.openicl.icl_prompt_template import PromptTemplate +from opencompass.openicl.icl_retriever import ZeroRetriever +from opencompass.openicl.icl_inferencer import GenInferencer, PPLInferencer +from opencompass.openicl.icl_evaluator import CircularEvaluator, AccEvaluator +from opencompass.datasets import MathBenchDataset, mathbench_postprocess +from opencompass.utils.text_postprocessors import first_option_postprocess + +with read_base(): + from .mathbench_prompt import zero_shot_prompts, few_shot_prompts, mathbench_sets + +# Max for this dataset is 4 +num_shot = 4 +# Generate reasoning path or not, only for single choice +with_reasoning = True +# Use circular evaluation or not +with_circular_eval = True +# Use PPL mode in single choice test or not +use_ppl_single_choice = True + +assert 0 <= num_shot <= 4 +if num_shot == 0: + prompts = zero_shot_prompts +else: + prompts = {name: p[- 2 * num_shot - 2:] for name, p in few_shot_prompts.items()} + +mathbench_datasets = [] +for _split in mathbench_sets: + for _name in mathbench_sets[_split]: + if 'single_choice' in _name: + if with_reasoning and not use_ppl_single_choice: + template_round = prompts[_name + '_with_reasoning'] + else: + template_round = prompts[_name] + else: + template_round = prompts[_name] + + if 'single_choice' in _name: + pred_postprocessor = dict(type=first_option_postprocess, options='ABCD') + else: + pred_postprocessor = dict(type=mathbench_postprocess, name=_name) + + if 'single_choice' in _name and with_circular_eval: + evaluator = dict(type=CircularEvaluator) + else: + evaluator = dict(type=AccEvaluator) + + # assemble the final config + mathbench_reader_cfg = dict(input_columns=['question'], output_column='answer') + if use_ppl_single_choice and 'single_choice' in _name: + template = {} + for answer in ['A', 'B', 'C', 'D']: + one_template_round = deepcopy(template_round) + one_template_round[-1]['prompt'] = one_template_round[-1]['prompt'].format(answer=answer) + template[answer] = dict(round=one_template_round) + mathbench_infer_cfg = dict( + prompt_template=dict(type=PromptTemplate, template=template), + retriever=dict(type=ZeroRetriever), + inferencer=dict(type=PPLInferencer), + ) + else: + mathbench_infer_cfg = dict( + prompt_template=dict(type=PromptTemplate, template=dict(round=template_round)), + retriever=dict(type=ZeroRetriever), + inferencer=dict(type=GenInferencer, max_out_len=2048, stopping_criteria=['Question:']), + ) + mathbench_eval_cfg = dict(evaluator=evaluator, pred_postprocessor=pred_postprocessor) + + mathbench_datasets.append( + dict( + abbr='mathbench-' + _split + '-' + _name, + type=MathBenchDataset, + path=f'data/mathbench_v1/{_split}', + name=_name, + with_circular=with_circular_eval, + reader_cfg=mathbench_reader_cfg, + infer_cfg=mathbench_infer_cfg, + eval_cfg=mathbench_eval_cfg, + ) + ) diff --git a/opencompass/configs/datasets/math/math_4shot_base_gen_43d5b6.py b/opencompass/configs/datasets/math/math_4shot_base_gen_43d5b6.py new file mode 100644 index 000000000..1e8696798 --- /dev/null +++ b/opencompass/configs/datasets/math/math_4shot_base_gen_43d5b6.py @@ -0,0 +1,30 @@ +from mmengine.config import read_base +from opencompass.openicl.icl_prompt_template import PromptTemplate +from opencompass.openicl.icl_retriever import ZeroRetriever +from opencompass.openicl.icl_inferencer import GenInferencer +from opencompass.datasets import MATHDataset, MATHEvaluator, math_postprocess_v2 + +with read_base(): + from .math_4shot_example_from_google_research import prompt + +math_reader_cfg = dict(input_columns=['problem'], output_column='solution') + +math_infer_cfg = dict( + prompt_template=dict(type=PromptTemplate, template=prompt + '\n\nProblem:\n{problem}\nSolution:'), + retriever=dict(type=ZeroRetriever), + inferencer=dict(type=GenInferencer, max_out_len=2048, stopping_criteria=['Problem', '问题:'])) + +# postprocess v2 +math_eval_cfg = dict( + evaluator=dict(type=MATHEvaluator, version='v2'), + pred_postprocessor=dict(type=math_postprocess_v2)) + +math_datasets = [ + dict( + type=MATHDataset, + abbr='math', + path='opencompass/math', + reader_cfg=math_reader_cfg, + infer_cfg=math_infer_cfg, + eval_cfg=math_eval_cfg) +]