From 31e48795b5b0c39512bf689e8bf8004b0d47059c Mon Sep 17 00:00:00 2001 From: Angel Ruiz Date: Fri, 22 Mar 2024 17:52:55 -0700 Subject: [PATCH] log details to metadata for run analytics (#992) --- llmfoundry/utils/__init__.py | 8 ++ llmfoundry/utils/mosaicmllogger_utils.py | 154 +++++++++++++++++++++++ scripts/eval/eval.py | 40 +++--- scripts/train/train.py | 23 ++-- 4 files changed, 199 insertions(+), 26 deletions(-) create mode 100644 llmfoundry/utils/mosaicmllogger_utils.py diff --git a/llmfoundry/utils/__init__.py b/llmfoundry/utils/__init__.py index 40e18ae3a3..ab646936f9 100644 --- a/llmfoundry/utils/__init__.py +++ b/llmfoundry/utils/__init__.py @@ -21,6 +21,10 @@ from llmfoundry.utils.logging_utils import SpecificWarningFilter from llmfoundry.utils.model_download_utils import ( download_from_hf_hub, download_from_http_fileserver, download_from_oras) +from llmfoundry.utils.mosaicmllogger_utils import (create_mosaicml_logger, + find_mosaicml_logger, + log_eval_analytics, + log_train_analytics) from llmfoundry.utils.prompt_files import load_prompts, load_prompts_from_file from llmfoundry.utils.registry_utils import (TypedRegistry, construct_from_registry, @@ -59,4 +63,8 @@ 'create_registry', 'construct_from_registry', 'TypedRegistry', + 'find_mosaicml_logger', + 'log_eval_analytics', + 'log_train_analytics', + 'create_mosaicml_logger', ] diff --git a/llmfoundry/utils/mosaicmllogger_utils.py b/llmfoundry/utils/mosaicmllogger_utils.py new file mode 100644 index 0000000000..182a7eef80 --- /dev/null +++ b/llmfoundry/utils/mosaicmllogger_utils.py @@ -0,0 +1,154 @@ +# Copyright 2024 MosaicML LLM Foundry authors +# SPDX-License-Identifier: Apache-2.0 +import json +import os +from typing import Any, Dict, List, Optional, Union + +from composer.loggers import MosaicMLLogger +from composer.loggers.logger_destination import LoggerDestination +from composer.loggers.mosaicml_logger import (MOSAICML_ACCESS_TOKEN_ENV_VAR, + MOSAICML_PLATFORM_ENV_VAR) +from omegaconf import DictConfig, ListConfig + + +def create_mosaicml_logger() -> Union[MosaicMLLogger, None]: + """Creates a MosaicMLLogger if the run was sent from the Mosaic platform.""" + if os.environ.get(MOSAICML_PLATFORM_ENV_VAR, 'false').lower( + ) == 'true' and os.environ.get(MOSAICML_ACCESS_TOKEN_ENV_VAR): + # Adds mosaicml logger to composer if the run was sent from Mosaic platform, + # access token is set, and mosaic logger wasn't previously added + return MosaicMLLogger() + + +def find_mosaicml_logger( + loggers: List[LoggerDestination]) -> Union[MosaicMLLogger, None]: + """Returns the first MosaicMLLogger from a list, and None otherwise.""" + return next( + (logger for logger in loggers if isinstance(logger, MosaicMLLogger)), + None) + + +def log_eval_analytics(mosaicml_logger: MosaicMLLogger, + model_configs: ListConfig, icl_tasks: Union[str, + ListConfig], + eval_gauntlet_config: Optional[Union[str, DictConfig]]): + """Logs analytics for runs using the `eval.py` script.""" + metrics: Dict[str, Any] = { + 'llmfoundry/script': 'eval', + } + + if eval_gauntlet_config is not None: + metrics['llmfoundry/gauntlet_configured'] = True + else: + metrics['llmfoundry/gauntlet_configured'] = False + + if isinstance(icl_tasks, str): + metrics['llmfoundry/icl_configured'] = True + elif len(icl_tasks) > 0: + metrics['llmfoundry/icl_configured'] = True + else: + metrics['llmfoundry/icl_configured'] = False + + metrics['llmfoundry/model_configs'] = [] + for model_config in model_configs: + model_config_data = {} + if model_config.get('vocab_size', None) is not None: + model_config_data['vocab_size'] = model_config.get('vocab_size') + if model_config.get('d_model', None) is not None: + model_config_data['d_model'] = model_config.get('d_model') + if model_config.get('n_heads', None) is not None: + model_config_data['n_heads'] = model_config.get('n_heads') + + if len(model_config_data) > 0: + metrics['llmfoundry/model_configs'].append( + json.dumps(model_config_data, sort_keys=True)) + mosaicml_logger.log_metrics(metrics) + mosaicml_logger._flush_metadata(force_flush=True) + + +def log_train_analytics(mosaicml_logger: MosaicMLLogger, + model_config: DictConfig, + train_loader_config: DictConfig, + eval_loader_config: Union[DictConfig, ListConfig, None], + callback_configs: Union[DictConfig, None], + tokenizer_name: str, load_path: Union[str, None], + icl_tasks_config: Optional[Union[ListConfig, str]], + eval_gauntlet: Optional[Union[DictConfig, str]]): + """Logs analytics for runs using the `train.py` script.""" + train_loader_dataset = train_loader_config.get('dataset', {}) + metrics: Dict[str, Any] = { + 'llmfoundry/tokenizer_name': + tokenizer_name, + 'llmfoundry/script': + 'train', + 'llmfoundry/train_loader_name': + train_loader_config.get('name'), + 'llmfoundry/train_loader_workers': + train_loader_dataset.get('num_workers'), + } + + if callback_configs is not None: + metrics['llmfoundry/callbacks'] = [ + name for name, _ in callback_configs.items() + ] + + if eval_gauntlet is not None: + metrics['llmfoundry/gauntlet_configured'] = True + else: + metrics['llmfoundry/gauntlet_configured'] = False + + if icl_tasks_config is not None: + if isinstance(icl_tasks_config, str): + metrics['llmfoundry/icl_configured'] = True + elif len(icl_tasks_config) > 0: + metrics['llmfoundry/icl_configured'] = True + else: + metrics['llmfoundry/icl_configured'] = False + else: + metrics['llmfoundry/icl_configured'] = False + + if train_loader_dataset.get('hf_name', None) is not None: + metrics['llmfoundry/train_dataset_hf_name'] = train_loader_dataset.get( + 'hf_name', None) + if train_loader_config.get('name') == 'finetuning': + metrics['llmfoundry/train_task_type'] = 'INSTRUCTION_FINETUNE' + elif train_loader_config.get('name') == 'text': + if load_path is not None or model_config.get('pretrained') == True: + metrics['llmfoundry/train_task_type'] = 'CONTINUED_PRETRAIN' + else: + metrics['llmfoundry/train_task_type'] = 'PRETRAIN' + + if eval_loader_config is not None: + metrics['llmfoundry/eval_loaders'] = [] + + if isinstance(eval_loader_config, ListConfig): + eval_loader_configs: ListConfig = eval_loader_config + else: + eval_loader_configs = ListConfig([eval_loader_config]) + + for loader_config in eval_loader_configs: + eval_loader_info = {} + eval_loader_dataset = loader_config.get('dataset', {}) + eval_loader_info['name'] = loader_config.get('name') + eval_loader_info['num_workers'] = eval_loader_dataset.get( + 'num_workers', None) + if eval_loader_dataset.get('hf_name', None) is not None: + eval_loader_info['dataset_hf_name'] = eval_loader_dataset.get( + 'hf_name') + + # Log as a key-sorted JSON string, so that we can easily parse it in Spark / SQL + metrics['llmfoundry/eval_loaders'].append( + json.dumps(eval_loader_info, sort_keys=True)) + + if model_config['name'] == 'hf_casual_lm': + metrics['llmfoundry/model_name'] = model_config.get( + 'pretrained_model_name_or_path') + if model_config.get('vocab_size', None) is not None: + metrics['llmfoundry/vocab_size'] = model_config.get('vocab_size'), + if model_config.get('d_model', None) is not None: + metrics['llmfoundry/d_model'] = model_config.get('d_model') + if model_config.get('n_heads', None) is not None: + metrics['llmfoundry/n_heads'] = model_config.get('n_heads') + + mosaicml_logger.log_metrics(metrics) + mosaicml_logger._flush_metadata(force_flush=True) diff --git a/scripts/eval/eval.py b/scripts/eval/eval.py index 722c603356..c1267ab1dd 100644 --- a/scripts/eval/eval.py +++ b/scripts/eval/eval.py @@ -11,7 +11,6 @@ import pandas as pd import torch -from composer.loggers import MosaicMLLogger from composer.loggers.logger_destination import LoggerDestination from composer.models.base import ComposerModel from composer.trainer import Trainer @@ -21,6 +20,9 @@ from rich.traceback import install from transformers import PreTrainedTokenizerBase +from llmfoundry.utils import (create_mosaicml_logger, find_mosaicml_logger, + log_eval_analytics) + install() from llmfoundry.models.model_registry import COMPOSER_MODEL_REGISTRY from llmfoundry.utils.builders import (add_metrics_to_eval_loaders, @@ -69,7 +71,7 @@ def evaluate_model( eval_loader_config: Optional[Union[DictConfig, ListConfig]], fsdp_config: Optional[Dict], num_retries: int, - loggers_cfg: Dict[str, Any], + loggers: List[LoggerDestination], python_log_level: Optional[str], precision: str, eval_gauntlet_df: Optional[pd.DataFrame], @@ -103,20 +105,9 @@ def evaluate_model( if eval_gauntlet_callback is not None: callbacks.append(eval_gauntlet_callback) - loggers: List[LoggerDestination] = [ - build_logger(name, logger_cfg) - for name, logger_cfg in loggers_cfg.items() - ] - if metadata is not None: - # Flatten the metadata for logging - loggers_cfg.pop('metadata', None) - loggers_cfg.update(metadata, merge=True) - # Find the MosaicMLLogger - mosaicml_logger = next(( - logger for logger in loggers if isinstance(logger, MosaicMLLogger)), - None) + mosaicml_logger = find_mosaicml_logger(loggers) if mosaicml_logger is not None: mosaicml_logger.log_metrics(metadata) @@ -153,7 +144,6 @@ def evaluate_model( assert composer_model is not None log.info(f'Building trainer for {model_cfg.model_name}...') - trainer = Trainer( run_name=run_name, seed=seed, @@ -297,6 +287,24 @@ def main(cfg: DictConfig) -> Tuple[List[Trainer], pd.DataFrame]: models_df = None composite_scores = None trainers = [] + + loggers: List[LoggerDestination] = [ + build_logger(name, logger_cfg) + for name, logger_cfg in loggers_cfg.items() + ] + + mosaicml_logger = find_mosaicml_logger(loggers) + if mosaicml_logger is None: + mosaicml_logger = create_mosaicml_logger() + # mosaicml_logger will be None if run isn't on MosaicML platform + if mosaicml_logger is not None: + loggers.append(mosaicml_logger) + + # mosaicml_logger will be None if the run isn't from the MosaicML platform + if mosaicml_logger is not None: + log_eval_analytics(mosaicml_logger, model_configs, icl_tasks, + eval_gauntlet_config) + for model_cfg in model_configs: (trainer, logger_keys, eval_gauntlet_callback, eval_gauntlet_df) = evaluate_model( @@ -311,7 +319,7 @@ def main(cfg: DictConfig) -> Tuple[List[Trainer], pd.DataFrame]: eval_loader_config=eval_loader_config, fsdp_config=fsdp_config, num_retries=num_retries, - loggers_cfg=loggers_cfg, + loggers=loggers, python_log_level=python_log_level, precision=precision, eval_gauntlet_df=eval_gauntlet_df, diff --git a/scripts/train/train.py b/scripts/train/train.py index 478b484fb9..c608dfb619 100644 --- a/scripts/train/train.py +++ b/scripts/train/train.py @@ -12,9 +12,6 @@ import torch from composer import Trainer from composer.core.callback import Callback -from composer.loggers import MosaicMLLogger -from composer.loggers.mosaicml_logger import (MOSAICML_ACCESS_TOKEN_ENV_VAR, - MOSAICML_PLATFORM_ENV_VAR) from composer.metrics.nlp import InContextLearningMetric from composer.profiler import (JSONTraceHandler, Profiler, TraceHandler, cyclic_schedule) @@ -23,6 +20,9 @@ from omegaconf import OmegaConf as om from rich.traceback import install +from llmfoundry.utils import (create_mosaicml_logger, find_mosaicml_logger, + log_train_analytics) + install() from transformers import PreTrainedTokenizerBase @@ -449,14 +449,11 @@ def main(cfg: DictConfig) -> Trainer: for name, logger_cfg in logger_configs.items() ] if logger_configs else [] - mosaicml_logger = next( - (logger for logger in loggers if isinstance(logger, MosaicMLLogger)), - None) + mosaicml_logger = find_mosaicml_logger(loggers) if mosaicml_logger is None: - if os.environ.get(MOSAICML_PLATFORM_ENV_VAR, 'false').lower( - ) == 'true' and os.environ.get(MOSAICML_ACCESS_TOKEN_ENV_VAR): - # Adds mosaicml logger to composer if the run was sent from Mosaic platform, access token is set, and mosaic logger wasn't previously added - mosaicml_logger = MosaicMLLogger() + mosaicml_logger = create_mosaicml_logger() + if mosaicml_logger is not None: + # mosaicml_logger will be None if run isn't on MosaicML platform loggers.append(mosaicml_logger) if metadata is not None: @@ -543,6 +540,12 @@ def main(cfg: DictConfig) -> Trainer: if eval_gauntlet_callback is not None: callbacks.append(eval_gauntlet_callback) + if mosaicml_logger is not None: + log_train_analytics(mosaicml_logger, model_config, train_loader_config, + eval_loader_config, callback_configs, + tokenizer_name, load_path, icl_tasks_config, + eval_gauntlet_config) + # Build Model log.info('Initializing model...') with init_context: