Skip to content

Commit

Permalink
Merge branch 'main' into model_gauntlet
Browse files Browse the repository at this point in the history
  • Loading branch information
codestar12 committed Jun 28, 2023
2 parents 3f22feb + 9235e38 commit 266fcea
Show file tree
Hide file tree
Showing 18 changed files with 337 additions and 168 deletions.
6 changes: 6 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,9 @@ ENV/
env.bak/
venv.bak/

# python venv installed in the dir, llmfoundry-venv
*-venv

# Spyder project settings
.spyderproject
.spyproject
Expand All @@ -143,3 +146,6 @@ dmypy.json

# macOS
.DS_Store

# notebooks
notebooks/
6 changes: 3 additions & 3 deletions llmfoundry/__init__.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
# Copyright 2022 MosaicML LLM Foundry authors
# SPDX-License-Identifier: Apache-2.0

try:
import torch
import torch

try:
from llmfoundry import optim, utils
from llmfoundry.data import (ConcatTokensDataset,
MixtureOfDenoisersCollator, NoConcatDataset,
Expand All @@ -24,7 +24,7 @@

except ImportError as e:
try:
is_cuda_available = torch.cuda.is_available() # type: ignore
is_cuda_available = torch.cuda.is_available()
except:
is_cuda_available = False

Expand Down
8 changes: 5 additions & 3 deletions llmfoundry/callbacks/fdiff_callback.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,10 +47,12 @@ def batch_end(self, state: State, logger: Logger):
def eval_end(self, state: State, logger: Logger):
if self.diff_eval_metrics:
evaluator = state.dataloader_label
metrics = list(state.eval_metrics[evaluator].keys()) # type: ignore
assert evaluator is not None, 'dataloader should have been set'

metrics = list(state.eval_metrics[evaluator].keys())

for k in metrics:
mkey = '/'.join(['metrics', evaluator, k]) # type: ignore
mkey = '/'.join(['metrics', evaluator, k])
if mkey in self.eval_prev_metric.keys():
logger.log_metrics({
f'{mkey}_fdiff':
Expand All @@ -59,5 +61,5 @@ def eval_end(self, state: State, logger: Logger):
})

for k in metrics:
mkey = '/'.join(['metrics', evaluator, k]) # type: ignore
mkey = '/'.join(['metrics', evaluator, k])
self.eval_prev_metric[mkey] = state.eval_metric_values[k]
12 changes: 7 additions & 5 deletions llmfoundry/callbacks/generate_callback.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,9 +74,9 @@ def generate(self, state: State, logger: Logger):
dummy_input = device.tensor_to_device(dummy_input)
with get_precision_context(state.precision):
with torch.no_grad():
_ = model.model(input_ids=dummy_input) # type: ignore
_ = model.model(input_ids=dummy_input)

output_token_ids = model.model.generate( # type: ignore
output_token_ids = model.model.generate(
input_ids=tokenized_input['input_ids'],
attention_mask=tokenized_input['attention_mask'],
synced_gpus=True,
Expand All @@ -85,9 +85,11 @@ def generate(self, state: State, logger: Logger):

if dist.get_global_rank() == 0:
if self.wandb_logger is not None:
artifact = wandb.Artifact(
'generate_samples_' + str(wandb.run.id), # type: ignore
type='predictions')
assert wandb.run is not None, 'wandb should have started run'

artifact = wandb.Artifact('generate_samples_' +
str(wandb.run.id),
type='predictions')

rows = []
for i in range(len(self.prompts)):
Expand Down
10 changes: 5 additions & 5 deletions llmfoundry/data/denoising.py
Original file line number Diff line number Diff line change
Expand Up @@ -354,7 +354,7 @@ def build_text_denoising_dataloader(
cfg: DictConfig,
tokenizer: Tokenizer,
device_batch_size: int,
) -> DataLoader:
) -> DataLoader[Dict]:
"""Constructor function for a Mixture of Denoisers dataloader.
This function constructs a dataloader that can be used to train an
Expand Down Expand Up @@ -480,7 +480,7 @@ def build_text_denoising_dataloader(
batch_size=device_batch_size,
)

if dataset.tokenizer.pad_token is None: # type: ignore
if dataset.tokenizer.pad_token is None:
dataset.tokenizer.pad_token = dataset.tokenizer.eos_token

if cfg.dataset.get('packing_ratio'):
Expand Down Expand Up @@ -564,7 +564,7 @@ def noise_token_sequence(
else:
u = np.random.uniform(low=(mask_ratio * 2) - 1, high=1.0)
mean_span_length = float(np.round(1 + u * (length - 1)))
mask_ratio = mean_span_length / length # type: ignore
mask_ratio = mean_span_length / length
use_sentinels = False
else:
use_sentinels = True
Expand Down Expand Up @@ -871,9 +871,9 @@ def _format_tokens_for_decoder_only(
tokenizer = build_tokenizer(tokenizer_cfg)

loader = build_text_denoising_dataloader(cfg, tokenizer, device_batch_size)
assert isinstance(loader.dataset, StreamingTextDataset)

print(
f'\n\nTRUNCATING TO: {loader.dataset.max_seq_len}\n\n') # type: ignore
print(f'\n\nTRUNCATING TO: {loader.dataset.max_seq_len}\n\n')

packing = cfg.dataset.get('packing_ratio') is not None
if packing:
Expand Down
182 changes: 108 additions & 74 deletions llmfoundry/models/hf/hf_causal_lm.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,9 @@
import os
from typing import Mapping, Union

# required for loading a python model into composer
import peft
import transformers
from composer.metrics.nlp import (InContextLearningLMAccuracy,
InContextLearningLMExpectedCalibrationError,
InContextLearningMCExpectedCalibrationError,
Expand All @@ -30,7 +33,8 @@ class ComposerHFCausalLM(HuggingFaceModelWithZLoss):
"""Configures a :class:`.HuggingFaceModel` around a Causal LM.
Args:
cfg (DictConfig): An omegaconf dictionary used to configure the model:
om_model_config (DictConfig | peft.peft_model.PeftModel | transformers.PreTrainedModel): either n omegaconf dictionary used to configure the model, or an instantiated model object from the peft or transformers library.
if DictConfig, the following keys are required:
cfg.pretrained_model_name_or_path (str): The name of or local path to
the HF Causal LM (e.g., `gpt2` to instantiate a GPT2LMHeadModel).
cfg.config_overrides (dict, optional): An optional dictionary of keyword
Expand All @@ -45,34 +49,12 @@ class ComposerHFCausalLM(HuggingFaceModelWithZLoss):
tokenizer (PreTrainedTokenizer): The tokenizer that the model will use.
"""

def __init__(self, om_model_config: DictConfig, tokenizer: Tokenizer):
trust_remote_code = om_model_config.get('trust_remote_code', True)
use_auth_token = om_model_config.get('use_auth_token', False)
config = AutoConfig.from_pretrained(
om_model_config.pretrained_model_name_or_path,
trust_remote_code=trust_remote_code,
use_auth_token=use_auth_token,
)

# set config overrides
for k, v in om_model_config.get('config_overrides', {}).items():
if not hasattr(config, k):
raise ValueError(
f'config does not have attribute "{k}" to override ({k}: {v}).'
)

attr = getattr(config, k)
if isinstance(attr, Mapping):
extra_keys = [_k for _k in v.keys() if _k not in attr.keys()]
if extra_keys:
raise ValueError(
f'Config dict override got unknown keys. '
f'Extra keys: {extra_keys}. '
f'Expected (a subset of) keys: {list(attr.keys())}.')
getattr(config, k).update(v)
else:
setattr(config, k, v)
def __init__(self,
om_model_config: Union[DictConfig, peft.peft_model.PeftModel,
transformers.PreTrainedModel],
tokenizer: Tokenizer):

# set up training and eval metrics
train_metrics = [
LanguageCrossEntropy(),
LanguagePerplexity(),
Expand All @@ -87,64 +69,116 @@ def __init__(self, om_model_config: DictConfig, tokenizer: Tokenizer):
InContextLearningMCExpectedCalibrationError()
]

init_device = om_model_config.get('init_device', 'cpu')

# Get the device we want to initialize, and use the
# reolved version to initialize the HF model
resolved_init_device = hf_get_init_device(init_device)

# We need to have all non-zero local ranks be not-pretrained
# Rank 0 will still be pretrained, and distribute the weights appropriately
if dist.get_local_rank() != 0 and init_device == 'mixed':
om_model_config.pretrained = False

if resolved_init_device == 'cpu':
if om_model_config.pretrained:
model = AutoModelForCausalLM.from_pretrained(
om_model_config.pretrained_model_name_or_path,
trust_remote_code=trust_remote_code,
use_auth_token=use_auth_token,
config=config)
# if we are passed a DictConfig, we need to instantiate the model
if isinstance(om_model_config, DictConfig):

# load the model config
trust_remote_code = om_model_config.get('trust_remote_code', True)
use_auth_token = om_model_config.get('use_auth_token', False)
config = AutoConfig.from_pretrained(
om_model_config.pretrained_model_name_or_path,
trust_remote_code=trust_remote_code,
use_auth_token=use_auth_token,
)

# set config overrides
for k, v in om_model_config.get('config_overrides', {}).items():
if not hasattr(config, k):
raise ValueError(
f'config does not have attribute "{k}" to override ({k}: {v}).'
)

attr = getattr(config, k)
if isinstance(attr, Mapping):
extra_keys = [
_k for _k in v.keys() if _k not in attr.keys()
]
if extra_keys:
raise ValueError(
f'Config dict override got unknown keys. '
f'Extra keys: {extra_keys}. '
f'Expected (a subset of) keys: {list(attr.keys())}.'
)
getattr(config, k).update(v)
else:
setattr(config, k, v)

# below we set up the device to initialize the model on
init_device = om_model_config.get('init_device', 'cpu')

# Get the device we want to initialize, and use the
# reolved version to initialize the HF model
resolved_init_device = hf_get_init_device(init_device)

# We need to have all non-zero local ranks be not-pretrained
# Rank 0 will still be pretrained, and distribute the weights appropriately
if dist.get_local_rank() != 0 and init_device == 'mixed':
om_model_config.pretrained = False

# initialize the model on the correct device
if resolved_init_device == 'cpu':
if om_model_config.pretrained:
model = AutoModelForCausalLM.from_pretrained(
om_model_config.pretrained_model_name_or_path,
trust_remote_code=trust_remote_code,
use_auth_token=use_auth_token,
config=config)
else:
model = AutoModelForCausalLM.from_config(
config,
trust_remote_code=trust_remote_code,
)
elif resolved_init_device == 'meta':
if om_model_config.pretrained:
raise ValueError(
'Setting cfg.pretrained=True is not supported when init_device="meta".'
)
with init_empty_weights(include_buffers=False):
model = AutoModelForCausalLM.from_config(
config,
trust_remote_code=trust_remote_code,
)
else:
model = AutoModelForCausalLM.from_config(
config,
trust_remote_code=trust_remote_code,
)
elif resolved_init_device == 'meta':
if om_model_config.pretrained:
raise ValueError(
'Setting cfg.pretrained=True is not supported when init_device="meta".'
)
with init_empty_weights(include_buffers=False):
model = AutoModelForCausalLM.from_config(
config,
trust_remote_code=trust_remote_code,
f'init_device="{init_device}" must be either "cpu" or "meta".'
)
else:
raise ValueError(
f'init_device="{init_device}" must be either "cpu" or "meta".')

signal_file_path = '.local_rank0_completed_autoresume'
if dist.get_local_rank() == 0:
with open(signal_file_path, 'wb') as f:
f.write(b'local_rank0_completed_download')
signal_file_path = '.local_rank0_completed_autoresume'
if dist.get_local_rank() == 0:
with open(signal_file_path, 'wb') as f:
f.write(b'local_rank0_completed_download')

# Avoid the collective call until the local rank zero has finished trying to download the checkpoint
# so that we don't timeout for large downloads. This syncs all processes on the node
with dist.local_rank_zero_download_and_wait(signal_file_path):
# Then, wait to ensure every node has finished downloading the checkpoint
dist.barrier()

# Avoid the collective call until the local rank zero has finished trying to download the checkpoint
# so that we don't timeout for large downloads. This syncs all processes on the node
with dist.local_rank_zero_download_and_wait(signal_file_path):
# Then, wait to ensure every node has finished downloading the checkpoint
dist.barrier()
if dist.get_local_rank() == 0:
os.remove(signal_file_path)

if dist.get_local_rank() == 0:
os.remove(signal_file_path)
z_loss = om_model_config.get('z_loss', 0.0)

# elif the model is either a PeftModel or a PreTrainedModel
elif isinstance(
om_model_config,
(peft.peft_model.PeftModel, transformers.PreTrainedModel)):
model = om_model_config
init_device = 'cpu'
z_loss = 0.0

# else, unsoported type
else:
raise ValueError(
f'om_model_config must be either a DictConfig, PeftModel, or PreTrainedModel, but got {type(om_model_config)}'
)

composer_model = super().__init__(model=model,
shift_labels=True,
tokenizer=tokenizer,
metrics=train_metrics,
eval_metrics=eval_metrics,
z_loss=om_model_config.get(
'z_loss', 0.0),
z_loss=z_loss,
init_device=init_device)

return composer_model
2 changes: 0 additions & 2 deletions llmfoundry/models/hf/hf_prefix_lm.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,8 +98,6 @@ def __init__(self, om_model_config: DictConfig, tokenizer: Tokenizer):
if om_model_config.get('adapt_vocab_for_denoising', False):
adapt_tokenizer_for_denoising(tokenizer)

vocab_size = len(tokenizer)

init_device = om_model_config.get('init_device', 'cpu')

# Get the device we want to initialize, and use the
Expand Down
2 changes: 0 additions & 2 deletions llmfoundry/models/hf/hf_t5.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,8 +91,6 @@ def __init__(self, om_model_config: DictConfig, tokenizer: Tokenizer):
if om_model_config.get('adapt_vocab_for_denoising', False):
adapt_tokenizer_for_denoising(tokenizer)

vocab_size = len(tokenizer)

init_device = om_model_config.get('init_device', 'cpu')

# Get the device we want to initialize, and use the
Expand Down
4 changes: 3 additions & 1 deletion llmfoundry/models/layers/norm.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
# Copyright 2022 MosaicML LLM Foundry authors
# SPDX-License-Identifier: Apache-2.0

from typing import Dict, Type

import torch


Expand Down Expand Up @@ -107,7 +109,7 @@ def forward(self, x):
self.eps).to(dtype=x.dtype)


NORM_CLASS_REGISTRY = {
NORM_CLASS_REGISTRY: Dict[str, Type[torch.nn.Module]] = {
'layernorm': torch.nn.LayerNorm,
'low_precision_layernorm': LPLayerNorm,
'rmsnorm': RMSNorm,
Expand Down
Loading

0 comments on commit 266fcea

Please sign in to comment.