diff --git a/llmfoundry/models/hf/hf_causal_lm.py b/llmfoundry/models/hf/hf_causal_lm.py index 63be7277ab..ddac76e98b 100644 --- a/llmfoundry/models/hf/hf_causal_lm.py +++ b/llmfoundry/models/hf/hf_causal_lm.py @@ -100,8 +100,7 @@ def __init__( 'use_flash_attention_2 is set to True, but flash-attention 2 is not installed. ' + 'Please `pip install llm-foundry[gpu]`.') - peft_config_dict = peft_config - if peft_config_dict is not None and not peft_installed: + if peft_config is not None and not peft_installed: raise ValueError( 'PEFT is not installed, but peft_config was passed. Please install LLM Foundry with the peft extra to use peft_config.' ) @@ -248,8 +247,8 @@ def _autoset_attn_implementation_monkeypatch( model.tie_weights() peft_config = None - if peft_config_dict is not None: - peft_config = self._get_peft_config(peft_config_dict) + if peft_config is not None: + peft_config = self._get_peft_config(peft_config) if pretrained_lora_id_or_path is not None: if not peft_installed: diff --git a/llmfoundry/utils/builders.py b/llmfoundry/utils/builders.py index 1f57a28b1a..f9052f8e08 100644 --- a/llmfoundry/utils/builders.py +++ b/llmfoundry/utils/builders.py @@ -376,10 +376,6 @@ def _extract_param_groups( def build_optimizer(model: torch.nn.Module, name: str, optimizer_config: Dict[str, Any]) -> Optimizer: - for k, v in optimizer_config.items(): - if isinstance(v, DictConfig): - optimizer_config[k] = om.to_container(v, resolve=True) - params = _extract_param_groups(model, optimizer_config) kwargs = {**optimizer_config} diff --git a/llmfoundry/utils/mosaicml_logger_utils.py b/llmfoundry/utils/mosaicml_logger_utils.py index d365e8fed1..cd290a8421 100644 --- a/llmfoundry/utils/mosaicml_logger_utils.py +++ b/llmfoundry/utils/mosaicml_logger_utils.py @@ -8,7 +8,6 @@ from composer.loggers.logger_destination import LoggerDestination from composer.loggers.mosaicml_logger import (MOSAICML_ACCESS_TOKEN_ENV_VAR, MOSAICML_PLATFORM_ENV_VAR) -from omegaconf import DictConfig, ListConfig _MODEL_KEYS_TO_LOG = [ 'pretrained_model_name_or_path', @@ -38,9 +37,10 @@ def find_mosaicml_logger( def log_eval_analytics(mosaicml_logger: MosaicMLLogger, - model_configs: ListConfig, icl_tasks: Union[str, - ListConfig], - eval_gauntlet_config: Optional[Union[str, DictConfig]]): + model_configs: List[Dict[str, Any]], + icl_tasks: Union[str, List[Dict[str, Any]]], + eval_gauntlet_config: Optional[Union[str, Dict[str, + Any]]]): """Logs analytics for runs using the `eval.py` script.""" metrics: Dict[str, Any] = { 'llmfoundry/script': 'eval', @@ -67,14 +67,17 @@ def log_eval_analytics(mosaicml_logger: MosaicMLLogger, def log_train_analytics(mosaicml_logger: MosaicMLLogger, - model_config: DictConfig, - train_loader_config: DictConfig, - eval_loader_config: Optional[Union[DictConfig, - ListConfig]], + model_config: Dict[str, + Any], train_loader_config: Dict[str, + Any], + eval_loader_config: Optional[Union[Dict[str, Any], + List[Dict[str, + Any]]]], callback_configs: Optional[Dict[str, Any]], tokenizer_name: str, load_path: Optional[str], - icl_tasks_config: Optional[Union[ListConfig, str]], - eval_gauntlet: Optional[Union[DictConfig, str]]): + icl_tasks_config: Optional[Union[List[Dict[str, Any]], + str]], + eval_gauntlet: Optional[Union[Dict[str, Any], str]]): """Logs analytics for runs using the `train.py` script.""" train_loader_dataset = train_loader_config.get('dataset', {}) metrics: Dict[str, Any] = { @@ -106,10 +109,10 @@ def log_train_analytics(mosaicml_logger: MosaicMLLogger, if eval_loader_config is not None: metrics['llmfoundry/eval_loaders'] = [] - if isinstance(eval_loader_config, ListConfig): - eval_loader_configs: ListConfig = eval_loader_config + if isinstance(eval_loader_config, list): + eval_loader_configs: list = eval_loader_config else: - eval_loader_configs = ListConfig([eval_loader_config]) + eval_loader_configs = [eval_loader_config] for loader_config in eval_loader_configs: eval_loader_info = {} diff --git a/scripts/eval/eval.py b/scripts/eval/eval.py index ed12c33cf3..80e1c9aedf 100644 --- a/scripts/eval/eval.py +++ b/scripts/eval/eval.py @@ -59,11 +59,7 @@ def evaluate_model( logged_config: DictConfig, should_log_config: bool = True, load_path: Optional[str] = None, - **kwargs: Dict[str, Any], ): - model_extra_params = kwargs - warnings.warn(f'Extra parameters: {model_extra_params}') - log.info(f'Evaluating model: {model_name}') # Build tokenizer and model tokenizer_cfg = tokenizer @@ -103,7 +99,7 @@ def evaluate_model( 'The FSDP config block is not supported when loading ' + 'Hugging Face models in 8bit.') - init_context = process_init_device(DictConfig(model), fsdp_config) + init_context = process_init_device(model, fsdp_config) name = model.pop('name') composer_model = build_composer_model(name=name, @@ -189,7 +185,7 @@ class EvalConfig: icl_tasks_str: Optional[str] = None # Logging parameters - python_log_level: Optional[str] = None + python_log_level: str = 'debug' loggers: Optional[Dict[str, Any]] = None log_config: bool = True @@ -299,15 +295,14 @@ def main(cfg: DictConfig) -> Tuple[List[Trainer], pd.DataFrame]: reproducibility.seed_all(eval_config.seed) dist.initialize_dist(get_device(None), timeout=eval_config.dist_timeout) - if eval_config.python_log_level is not None: - logging.basicConfig( - # Example of format string - # 2022-06-29 11:22:26,152: rank0[822018][MainThread]: INFO: Message here - format= - f'%(asctime)s: rank{dist.get_global_rank()}[%(process)d][%(threadName)s]: %(levelname)s: %(name)s: %(message)s' - ) - logging.getLogger('llmfoundry').setLevel( - eval_config.python_log_level.upper()) + logging.basicConfig( + # Example of format string + # 2022-06-29 11:22:26,152: rank0[822018][MainThread]: INFO: Message here + format= + f'%(asctime)s: rank{dist.get_global_rank()}[%(process)d][%(threadName)s]: %(levelname)s: %(name)s: %(message)s' + ) + logging.getLogger('llmfoundry').setLevel( + eval_config.python_log_level.upper()) # Default argument values for evaluate_model eval_gauntlet_df = None