Skip to content

Commit

Permalink
log details to metadata for run analytics (#992)
Browse files Browse the repository at this point in the history
  • Loading branch information
angel-ruiz7 committed Mar 23, 2024
1 parent 26a5fd4 commit 31e4879
Show file tree
Hide file tree
Showing 4 changed files with 199 additions and 26 deletions.
8 changes: 8 additions & 0 deletions llmfoundry/utils/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,10 @@
from llmfoundry.utils.logging_utils import SpecificWarningFilter
from llmfoundry.utils.model_download_utils import (
download_from_hf_hub, download_from_http_fileserver, download_from_oras)
from llmfoundry.utils.mosaicmllogger_utils import (create_mosaicml_logger,
find_mosaicml_logger,
log_eval_analytics,
log_train_analytics)
from llmfoundry.utils.prompt_files import load_prompts, load_prompts_from_file
from llmfoundry.utils.registry_utils import (TypedRegistry,
construct_from_registry,
Expand Down Expand Up @@ -59,4 +63,8 @@
'create_registry',
'construct_from_registry',
'TypedRegistry',
'find_mosaicml_logger',
'log_eval_analytics',
'log_train_analytics',
'create_mosaicml_logger',
]
154 changes: 154 additions & 0 deletions llmfoundry/utils/mosaicmllogger_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,154 @@
# Copyright 2024 MosaicML LLM Foundry authors
# SPDX-License-Identifier: Apache-2.0
import json
import os
from typing import Any, Dict, List, Optional, Union

from composer.loggers import MosaicMLLogger
from composer.loggers.logger_destination import LoggerDestination
from composer.loggers.mosaicml_logger import (MOSAICML_ACCESS_TOKEN_ENV_VAR,
MOSAICML_PLATFORM_ENV_VAR)
from omegaconf import DictConfig, ListConfig


def create_mosaicml_logger() -> Union[MosaicMLLogger, None]:
"""Creates a MosaicMLLogger if the run was sent from the Mosaic platform."""
if os.environ.get(MOSAICML_PLATFORM_ENV_VAR, 'false').lower(
) == 'true' and os.environ.get(MOSAICML_ACCESS_TOKEN_ENV_VAR):
# Adds mosaicml logger to composer if the run was sent from Mosaic platform,
# access token is set, and mosaic logger wasn't previously added
return MosaicMLLogger()


def find_mosaicml_logger(
loggers: List[LoggerDestination]) -> Union[MosaicMLLogger, None]:
"""Returns the first MosaicMLLogger from a list, and None otherwise."""
return next(
(logger for logger in loggers if isinstance(logger, MosaicMLLogger)),
None)


def log_eval_analytics(mosaicml_logger: MosaicMLLogger,
model_configs: ListConfig, icl_tasks: Union[str,
ListConfig],
eval_gauntlet_config: Optional[Union[str, DictConfig]]):
"""Logs analytics for runs using the `eval.py` script."""
metrics: Dict[str, Any] = {
'llmfoundry/script': 'eval',
}

if eval_gauntlet_config is not None:
metrics['llmfoundry/gauntlet_configured'] = True
else:
metrics['llmfoundry/gauntlet_configured'] = False

if isinstance(icl_tasks, str):
metrics['llmfoundry/icl_configured'] = True
elif len(icl_tasks) > 0:
metrics['llmfoundry/icl_configured'] = True
else:
metrics['llmfoundry/icl_configured'] = False

metrics['llmfoundry/model_configs'] = []
for model_config in model_configs:
model_config_data = {}
if model_config.get('vocab_size', None) is not None:
model_config_data['vocab_size'] = model_config.get('vocab_size')
if model_config.get('d_model', None) is not None:
model_config_data['d_model'] = model_config.get('d_model')
if model_config.get('n_heads', None) is not None:
model_config_data['n_heads'] = model_config.get('n_heads')

if len(model_config_data) > 0:
metrics['llmfoundry/model_configs'].append(
json.dumps(model_config_data, sort_keys=True))
mosaicml_logger.log_metrics(metrics)
mosaicml_logger._flush_metadata(force_flush=True)


def log_train_analytics(mosaicml_logger: MosaicMLLogger,
model_config: DictConfig,
train_loader_config: DictConfig,
eval_loader_config: Union[DictConfig, ListConfig, None],
callback_configs: Union[DictConfig, None],
tokenizer_name: str, load_path: Union[str, None],
icl_tasks_config: Optional[Union[ListConfig, str]],
eval_gauntlet: Optional[Union[DictConfig, str]]):
"""Logs analytics for runs using the `train.py` script."""
train_loader_dataset = train_loader_config.get('dataset', {})
metrics: Dict[str, Any] = {
'llmfoundry/tokenizer_name':
tokenizer_name,
'llmfoundry/script':
'train',
'llmfoundry/train_loader_name':
train_loader_config.get('name'),
'llmfoundry/train_loader_workers':
train_loader_dataset.get('num_workers'),
}

if callback_configs is not None:
metrics['llmfoundry/callbacks'] = [
name for name, _ in callback_configs.items()
]

if eval_gauntlet is not None:
metrics['llmfoundry/gauntlet_configured'] = True
else:
metrics['llmfoundry/gauntlet_configured'] = False

if icl_tasks_config is not None:
if isinstance(icl_tasks_config, str):
metrics['llmfoundry/icl_configured'] = True
elif len(icl_tasks_config) > 0:
metrics['llmfoundry/icl_configured'] = True
else:
metrics['llmfoundry/icl_configured'] = False
else:
metrics['llmfoundry/icl_configured'] = False

if train_loader_dataset.get('hf_name', None) is not None:
metrics['llmfoundry/train_dataset_hf_name'] = train_loader_dataset.get(
'hf_name', None)
if train_loader_config.get('name') == 'finetuning':
metrics['llmfoundry/train_task_type'] = 'INSTRUCTION_FINETUNE'
elif train_loader_config.get('name') == 'text':
if load_path is not None or model_config.get('pretrained') == True:
metrics['llmfoundry/train_task_type'] = 'CONTINUED_PRETRAIN'
else:
metrics['llmfoundry/train_task_type'] = 'PRETRAIN'

if eval_loader_config is not None:
metrics['llmfoundry/eval_loaders'] = []

if isinstance(eval_loader_config, ListConfig):
eval_loader_configs: ListConfig = eval_loader_config
else:
eval_loader_configs = ListConfig([eval_loader_config])

for loader_config in eval_loader_configs:
eval_loader_info = {}
eval_loader_dataset = loader_config.get('dataset', {})
eval_loader_info['name'] = loader_config.get('name')
eval_loader_info['num_workers'] = eval_loader_dataset.get(
'num_workers', None)
if eval_loader_dataset.get('hf_name', None) is not None:
eval_loader_info['dataset_hf_name'] = eval_loader_dataset.get(
'hf_name')

# Log as a key-sorted JSON string, so that we can easily parse it in Spark / SQL
metrics['llmfoundry/eval_loaders'].append(
json.dumps(eval_loader_info, sort_keys=True))

if model_config['name'] == 'hf_casual_lm':
metrics['llmfoundry/model_name'] = model_config.get(
'pretrained_model_name_or_path')
if model_config.get('vocab_size', None) is not None:
metrics['llmfoundry/vocab_size'] = model_config.get('vocab_size'),
if model_config.get('d_model', None) is not None:
metrics['llmfoundry/d_model'] = model_config.get('d_model')
if model_config.get('n_heads', None) is not None:
metrics['llmfoundry/n_heads'] = model_config.get('n_heads')

mosaicml_logger.log_metrics(metrics)
mosaicml_logger._flush_metadata(force_flush=True)
40 changes: 24 additions & 16 deletions scripts/eval/eval.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@

import pandas as pd
import torch
from composer.loggers import MosaicMLLogger
from composer.loggers.logger_destination import LoggerDestination
from composer.models.base import ComposerModel
from composer.trainer import Trainer
Expand All @@ -21,6 +20,9 @@
from rich.traceback import install
from transformers import PreTrainedTokenizerBase

from llmfoundry.utils import (create_mosaicml_logger, find_mosaicml_logger,
log_eval_analytics)

install()
from llmfoundry.models.model_registry import COMPOSER_MODEL_REGISTRY
from llmfoundry.utils.builders import (add_metrics_to_eval_loaders,
Expand Down Expand Up @@ -69,7 +71,7 @@ def evaluate_model(
eval_loader_config: Optional[Union[DictConfig, ListConfig]],
fsdp_config: Optional[Dict],
num_retries: int,
loggers_cfg: Dict[str, Any],
loggers: List[LoggerDestination],
python_log_level: Optional[str],
precision: str,
eval_gauntlet_df: Optional[pd.DataFrame],
Expand Down Expand Up @@ -103,20 +105,9 @@ def evaluate_model(
if eval_gauntlet_callback is not None:
callbacks.append(eval_gauntlet_callback)

loggers: List[LoggerDestination] = [
build_logger(name, logger_cfg)
for name, logger_cfg in loggers_cfg.items()
]

if metadata is not None:
# Flatten the metadata for logging
loggers_cfg.pop('metadata', None)
loggers_cfg.update(metadata, merge=True)

# Find the MosaicMLLogger
mosaicml_logger = next((
logger for logger in loggers if isinstance(logger, MosaicMLLogger)),
None)
mosaicml_logger = find_mosaicml_logger(loggers)

if mosaicml_logger is not None:
mosaicml_logger.log_metrics(metadata)
Expand Down Expand Up @@ -153,7 +144,6 @@ def evaluate_model(
assert composer_model is not None

log.info(f'Building trainer for {model_cfg.model_name}...')

trainer = Trainer(
run_name=run_name,
seed=seed,
Expand Down Expand Up @@ -297,6 +287,24 @@ def main(cfg: DictConfig) -> Tuple[List[Trainer], pd.DataFrame]:
models_df = None
composite_scores = None
trainers = []

loggers: List[LoggerDestination] = [
build_logger(name, logger_cfg)
for name, logger_cfg in loggers_cfg.items()
]

mosaicml_logger = find_mosaicml_logger(loggers)
if mosaicml_logger is None:
mosaicml_logger = create_mosaicml_logger()
# mosaicml_logger will be None if run isn't on MosaicML platform
if mosaicml_logger is not None:
loggers.append(mosaicml_logger)

# mosaicml_logger will be None if the run isn't from the MosaicML platform
if mosaicml_logger is not None:
log_eval_analytics(mosaicml_logger, model_configs, icl_tasks,
eval_gauntlet_config)

for model_cfg in model_configs:
(trainer, logger_keys, eval_gauntlet_callback,
eval_gauntlet_df) = evaluate_model(
Expand All @@ -311,7 +319,7 @@ def main(cfg: DictConfig) -> Tuple[List[Trainer], pd.DataFrame]:
eval_loader_config=eval_loader_config,
fsdp_config=fsdp_config,
num_retries=num_retries,
loggers_cfg=loggers_cfg,
loggers=loggers,
python_log_level=python_log_level,
precision=precision,
eval_gauntlet_df=eval_gauntlet_df,
Expand Down
23 changes: 13 additions & 10 deletions scripts/train/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,6 @@
import torch
from composer import Trainer
from composer.core.callback import Callback
from composer.loggers import MosaicMLLogger
from composer.loggers.mosaicml_logger import (MOSAICML_ACCESS_TOKEN_ENV_VAR,
MOSAICML_PLATFORM_ENV_VAR)
from composer.metrics.nlp import InContextLearningMetric
from composer.profiler import (JSONTraceHandler, Profiler, TraceHandler,
cyclic_schedule)
Expand All @@ -23,6 +20,9 @@
from omegaconf import OmegaConf as om
from rich.traceback import install

from llmfoundry.utils import (create_mosaicml_logger, find_mosaicml_logger,
log_train_analytics)

install()

from transformers import PreTrainedTokenizerBase
Expand Down Expand Up @@ -449,14 +449,11 @@ def main(cfg: DictConfig) -> Trainer:
for name, logger_cfg in logger_configs.items()
] if logger_configs else []

mosaicml_logger = next(
(logger for logger in loggers if isinstance(logger, MosaicMLLogger)),
None)
mosaicml_logger = find_mosaicml_logger(loggers)
if mosaicml_logger is None:
if os.environ.get(MOSAICML_PLATFORM_ENV_VAR, 'false').lower(
) == 'true' and os.environ.get(MOSAICML_ACCESS_TOKEN_ENV_VAR):
# Adds mosaicml logger to composer if the run was sent from Mosaic platform, access token is set, and mosaic logger wasn't previously added
mosaicml_logger = MosaicMLLogger()
mosaicml_logger = create_mosaicml_logger()
if mosaicml_logger is not None:
# mosaicml_logger will be None if run isn't on MosaicML platform
loggers.append(mosaicml_logger)

if metadata is not None:
Expand Down Expand Up @@ -543,6 +540,12 @@ def main(cfg: DictConfig) -> Trainer:
if eval_gauntlet_callback is not None:
callbacks.append(eval_gauntlet_callback)

if mosaicml_logger is not None:
log_train_analytics(mosaicml_logger, model_config, train_loader_config,
eval_loader_config, callback_configs,
tokenizer_name, load_path, icl_tasks_config,
eval_gauntlet_config)

# Build Model
log.info('Initializing model...')
with init_context:
Expand Down

0 comments on commit 31e4879

Please sign in to comment.