diff --git a/lm_eval/api/model.py b/lm_eval/api/model.py index 1cf8aedb66..765fc01222 100644 --- a/lm_eval/api/model.py +++ b/lm_eval/api/model.py @@ -3,7 +3,7 @@ import json import logging import os -from typing import Dict, List, Optional, Tuple, Type, TypeVar +from typing import Dict, List, Optional, Tuple, Type, TypeVar, Union import transformers from sqlitedict import SqliteDict @@ -192,15 +192,13 @@ def tokenizer_name(self) -> str: "To use this model with chat templates, please implement the 'tokenizer_name' property." ) - @property - def chat_template(self) -> str: - """Must be defined for LM subclasses that implement Chat Templating. - Should return the structure of the chat template applied to user/assistant messages. - This is used only to save in the experiment results for reproducibility. + def chat_template(self, chat_template: Union[bool, str] = False) -> Optional[str]: + """Returns the chat template structure for user/assistant messages if a template is provided. + This method is intended to be overridden in a subclass to define a specific chat template format. + For models that do not support chat templates, this method returns None by default. """ - raise NotImplementedError( - "To use this model with chat templates, please implement the 'chat_template' property." - ) + + return "" def set_cache_hook(self, cache_hook) -> None: self.cache_hook = cache_hook @@ -316,6 +314,8 @@ class TemplateLM(LM): and boilerplate often included in other LM subclasses. """ + tokenizer = None + @property @abc.abstractmethod def eot_token_id(self): @@ -386,3 +386,99 @@ def loglikelihood_rolling( @abc.abstractmethod def generate_until(self, requests, disable_tqdm: bool = False) -> List[str]: pass + + def chat_template(self, chat_template: Union[bool, str] = False) -> Optional[str]: + """ + Set and get the appropriate chat template for the model. + This method sets the tokenizer's chat_template and returns the template string for reproducibility. + + The template selection logic is adapted from the Transformers library's `apply_chat_template` + method in the Tokenizer class. The original implementation can be found at: + https://github.com/huggingface/transformers/blob/fc35907f95459d7a6c5281dfadd680b6f7b620e3/src/transformers/tokenization_utils_base.py#L1687 + + This method ensures that the right template is chosen based on the following: + 0. If the model has no 'tokenizer' attribute: assumes that there is only a single possible chat template, handled on the model provider side internally. Returns the empty string. + 1. If the model's tokenizer has multiple templates: + a. Use the specified template if it exists in the dictionary. + b. Use the default template from the list if no specific template is provided. + c. Raise an error if no default template exists and no specific template is provided. + 2. If the model's tokenizer has a single template or no template: + a. Use the tokenizer's chat template if available. + b. Fall back to the default chat template if no tokenizer chat template exists. + + Args: + chat_template (Union[bool, str]): Specifies the chat template to use. + - If False or None, no template is applied. + - If True, the default or only available template is used. + - If a string, the template with the matching name is used. + + Returns: + Optional[str]: The selected chat template, or None if no template is applied. + """ + if self.tokenizer is None: + return "" + + if chat_template is False or chat_template is None: + eval_logger.warning( + "model.chat_template was called with the chat_template set to False or None. " + "Therefore no chat template will be applied. Make sure this is an intended behavior." + ) + return None + + # Convert boolean chat_template to None to ensure compatibility with the adapted logic + if isinstance(chat_template, bool): + chat_template = None + using_default_template = False + + # First, handle the cases when the model has a dict of multiple templates + template = self.tokenizer.chat_template or self.tokenizer.default_chat_template + + if isinstance(template, dict): + using_default_dict = self.tokenizer.chat_template is None + + if chat_template is not None: + if chat_template in template: + selected_template = template[chat_template] + if using_default_dict: + using_default_template = True + else: + raise ValueError( + f"The specified chat template '{chat_template}' is not available. " + f"Available template names are {sorted(template.keys())}." + ) + else: + # If user didn't pass a chat template, use the default template from the dict + if "default" in template: + selected_template = template["default"] + using_default_template = True + else: + raise ValueError( + "This model has multiple chat templates with no default specified! Please either pass a chat " + "template or the name of the template you wish to use to the `chat_template` argument. Available " + f"template names are {sorted(template.keys())}." + ) + + # Cases when the model has a single template or no template + else: + # priority: `chat_template` argument > `tokenizer.chat_template` > `tokenizer.default_chat_template + if isinstance(chat_template, str): + eval_logger.warning( + "Chat template name provided, but the tokenizer's chat template is not a dictionary. " + "Using the tokenizer's chat template or the default template instead." + ) + if self.tokenizer.chat_template is not None: + selected_template = self.tokenizer.chat_template + else: + selected_template = self.tokenizer.default_chat_template + using_default_template = True + + if using_default_template: + eval_logger.warning( + "No chat template is set for this tokenizer, falling back to a default class-level template. This is " + "very error-prone, because models are often trained with templates different from the class default! " + "Default chat templates are a legacy feature and will be removed in Transformers v4.43, at which " + "point any code depending on them will stop working. We recommend setting a valid chat template before " + "then to ensure that this model continues working without issues." + ) + + return selected_template diff --git a/lm_eval/evaluator.py b/lm_eval/evaluator.py index 9bd8288860..52a8d00d74 100644 --- a/lm_eval/evaluator.py +++ b/lm_eval/evaluator.py @@ -289,18 +289,12 @@ def _adjust_config(task_dict): if check_integrity: run_task_tests(task_list=tasks) - # hotfix: delete when chat_template fixed - try: - chat = lm.chat_template(apply_chat_template) - except: # noqa: E722 - chat = None - if evaluation_tracker is not None: evaluation_tracker.general_config_tracker.log_experiment_args( model_source=model, model_args=model_args, system_instruction=system_instruction, - chat_template=chat, + chat_template=lm.chat_template(apply_chat_template), fewshot_as_multiturn=fewshot_as_multiturn, ) diff --git a/lm_eval/models/api_models.py b/lm_eval/models/api_models.py index 9d21705512..7098893947 100644 --- a/lm_eval/models/api_models.py +++ b/lm_eval/models/api_models.py @@ -225,14 +225,6 @@ def header(self) -> dict: """Override this property to return the headers for the API request.""" return {"Authorization": f"Bearer {self.api_key}"} - @property - def chat_template(self) -> str: - """Must be defined for LM subclasses that implement Chat Templating. - Should return the structure of the chat template applied to user/assistant messages. - Only used for logging and reproducibility. - """ - return "" - @property def tokenizer_name(self) -> str: """Must be defined for LM subclasses which implement Chat Templating. diff --git a/lm_eval/models/huggingface.py b/lm_eval/models/huggingface.py index 80f65ca536..01a8cf6789 100644 --- a/lm_eval/models/huggingface.py +++ b/lm_eval/models/huggingface.py @@ -438,98 +438,6 @@ def world_size(self): def tokenizer_name(self) -> str: return self.tokenizer.name_or_path.replace("/", "__") - def chat_template(self, chat_template: Union[bool, str] = False) -> Optional[str]: - """ - Get the appropriate chat template for the model based on configuration and input. - This method determines, and returns the correct chat template, ensuring reproducibility. - - The template selection logic is adapted from the Transformers library's `apply_chat_template` - method in the Tokenizer class. The original implementation can be found at: - https://github.com/huggingface/transformers/blob/fc35907f95459d7a6c5281dfadd680b6f7b620e3/src/transformers/tokenization_utils_base.py#L1687 - - This method ensures that the right template is chosen based on the following: - 1. If the model's tokenizer has multiple templates: - a. Use the specified template if it exists in the dictionary. - b. Use the default template from the list if no specific template is provided. - c. Raise an error if no default template exists and no specific template is provided. - 2. If the model's tokenizer has a single template or no template: - a. Use the tokenizer's chat template if available. - b. Fall back to the default chat template if no tokenizer chat template exists. - - Args: - chat_template (Union[bool, str]): Specifies the chat template to use. - - If False or None, no template is applied. - - If True, the default or only available template is used. - - If a string, the template with the matching name is used. - - Returns: - Optional[str]: The selected chat template, or None if no template is applied. - """ - if chat_template is False or chat_template is None: - eval_logger.warning( - "model.chat_template was called with the chat_template set to False or None. " - "Therefore no chat template will be applied. Make sure this is an intended behavior." - ) - return None - - # Convert boolean chat_template to None to ensure compatibility with the adapted logic - if isinstance(chat_template, bool): - chat_template = None - using_default_template = False - - # First, handle the cases when the model has a dict of multiple templates - template = self.tokenizer.chat_template or self.tokenizer.default_chat_template - - if isinstance(template, dict): - using_default_dict = self.tokenizer.chat_template is None - - if chat_template is not None: - if chat_template in template: - selected_template = template[chat_template] - if using_default_dict: - using_default_template = True - else: - raise ValueError( - f"The specified chat template '{chat_template}' is not available. " - f"Available template names are {sorted(template.keys())}." - ) - else: - # If user didn't pass a chat template, use the default template from the dict - if "default" in template: - selected_template = template["default"] - using_default_template = True - else: - raise ValueError( - "This model has multiple chat templates with no default specified! Please either pass a chat " - "template or the name of the template you wish to use to the `chat_template` argument. Available " - f"template names are {sorted(template.keys())}." - ) - - # Cases when the model has a single template or no template - else: - # priority: `chat_template` argument > `tokenizer.chat_template` > `tokenizer.default_chat_template - if isinstance(chat_template, str): - eval_logger.warning( - "Chat template name provided, but the tokenizer's chat template is not a dictionary. " - "Using the tokenizer's chat template or the default template instead." - ) - if self.tokenizer.chat_template is not None: - selected_template = self.tokenizer.chat_template - else: - selected_template = self.tokenizer.default_chat_template - using_default_template = True - - if using_default_template: - eval_logger.warning( - "No chat template is set for this tokenizer, falling back to a default class-level template. This is " - "very error-prone, because models are often trained with templates different from the class default! " - "Default chat templates are a legacy feature and will be removed in Transformers v4.43, at which " - "point any code depending on them will stop working. We recommend setting a valid chat template before " - "then to ensure that this model continues working without issues." - ) - - return selected_template - def _get_backend( self, config: Union[transformers.PretrainedConfig, transformers.AutoConfig], diff --git a/lm_eval/models/openai_completions.py b/lm_eval/models/openai_completions.py index 26dc93d68f..4bf2c42b1e 100644 --- a/lm_eval/models/openai_completions.py +++ b/lm_eval/models/openai_completions.py @@ -29,7 +29,10 @@ def _create_payload( ) -> dict: if generate: gen_kwargs.pop("do_sample", False) - max_tokens = gen_kwargs.pop("max_gen_toks", self._max_gen_toks) + if "max_tokens" in gen_kwargs: + max_tokens = gen_kwargs.pop("max_tokens") + else: + max_tokens = gen_kwargs.pop("max_gen_toks", self._max_gen_toks) temperature = gen_kwargs.pop("temperature", 0) stop = gen_kwargs.pop("until", ["<|endoftext|>"]) return { @@ -124,7 +127,10 @@ def _create_payload( **kwargs, ) -> dict: gen_kwargs.pop("do_sample", False) - max_tokens = gen_kwargs.pop("max_gen_toks", self._max_gen_toks) + if "max_tokens" in gen_kwargs: + max_tokens = gen_kwargs.pop("max_tokens") + else: + max_tokens = gen_kwargs.pop("max_gen_toks", self._max_gen_toks) temperature = gen_kwargs.pop("temperature", 0) stop = gen_kwargs.pop("until", ["<|endoftext|>"]) if not isinstance(stop, (list, tuple)): @@ -194,6 +200,9 @@ def loglikelihood(self, requests, **kwargs): ), "Loglikelihood is not supported for gpt-3.5-turbo" return super().loglikelihood(requests, **kwargs) + def chat_template(self, chat_template: Union[bool, str] = False) -> Optional[str]: + return "" + @register_model("openai-chat-completions") class OpenAIChatCompletion(LocalChatCompletion): diff --git a/lm_eval/models/vllm_causallms.py b/lm_eval/models/vllm_causallms.py index d6a8dc1817..168f490a7b 100644 --- a/lm_eval/models/vllm_causallms.py +++ b/lm_eval/models/vllm_causallms.py @@ -187,12 +187,6 @@ def apply_chat_template(self, chat_history: List[Dict[str, str]]) -> str: chat_history, tokenize=False, add_generation_prompt=True ) - @property - def chat_template(self) -> str: - if self.tokenizer.chat_template is not None: - return self.tokenizer.chat_template - return self.tokenizer.default_chat_template - @property def tokenizer_name(self) -> str: return self.tokenizer.name_or_path.replace("/", "__")