Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature/peft compatible models #346

Merged
merged 28 commits into from
Jun 27, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
cfd9795
ignore venv in dir
danbider Jun 12, 2023
5848b4e
adding a lean ComposerHFCausalLMFromPython that converts a loaded hf …
danbider Jun 13, 2023
dadd930
typechecking the composer convertor, import peft and transformers.
danbider Jun 20, 2023
d75fd09
support for input_embeds in forward calls for peft compatibility
danbider Jun 14, 2023
db837d3
fixing inputs_embeds typos
danbider Jun 16, 2023
081d00e
precommit fixes docs
danbider Jun 20, 2023
879c9cc
merged upstream
danbider Jun 22, 2023
635fb20
Merge branch 'main' into feature/peft-compatible-models
mvpatel2000 Jun 22, 2023
d012638
refactored hf causal
danbider Jun 23, 2023
459415e
merged upstream
danbider Jun 23, 2023
de5b5ad
attempt to conclude merge
danbider Jun 23, 2023
aba2921
removed python convertor from inits
danbider Jun 23, 2023
cd3452a
wip train.py
danbider Jun 24, 2023
0b4403d
added lora deps
danbider Jun 24, 2023
7bec6c5
removed 8 bit defaults
danbider Jun 24, 2023
9d64a03
Update llmfoundry/models/mpt/modeling_mpt.py
codestar12 Jun 26, 2023
63ef548
precommit edits models
danbider Jun 26, 2023
bdea8ec
Update llmfoundry/models/mpt/modeling_mpt.py
danbider Jun 27, 2023
786ac5e
delete deprecated hf class from init
danbider Jun 27, 2023
ef14b74
removed 8-bit and device map support for now
danbider Jun 27, 2023
c490802
formatting the peft builder for precommit
danbider Jun 27, 2023
23576d4
fixed comments on model ifs
danbider Jun 27, 2023
f30bdea
added a util for printing trainable params
danbider Jun 27, 2023
f3cf98a
deps pinned down and sent to gpu
danbider Jun 27, 2023
9b3ae8e
scipy dep for bitsandbytes
danbider Jun 27, 2023
22faff4
sent lora deps to regular install_requires
danbider Jun 27, 2023
1382d4e
pinned down scipy
danbider Jun 27, 2023
d9d0cad
Merge branch 'main' into feature/peft-compatible-models
codestar12 Jun 27, 2023
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,9 @@ ENV/
env.bak/
venv.bak/

# python venv installed in the dir, llmfoundry-venv
*-venv

# Spyder project settings
.spyderproject
.spyproject
Expand All @@ -143,3 +146,6 @@ dmypy.json

# macOS
.DS_Store

# notebooks
notebooks/
182 changes: 108 additions & 74 deletions llmfoundry/models/hf/hf_causal_lm.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,9 @@
import os
from typing import Mapping, Union

# required for loading a python model into composer
import peft
import transformers
from composer.metrics.nlp import (InContextLearningLMAccuracy,
InContextLearningLMExpectedCalibrationError,
InContextLearningMCExpectedCalibrationError,
Expand All @@ -30,7 +33,8 @@ class ComposerHFCausalLM(HuggingFaceModelWithZLoss):
"""Configures a :class:`.HuggingFaceModel` around a Causal LM.

Args:
cfg (DictConfig): An omegaconf dictionary used to configure the model:
om_model_config (DictConfig | peft.peft_model.PeftModel | transformers.PreTrainedModel): either n omegaconf dictionary used to configure the model, or an instantiated model object from the peft or transformers library.
if DictConfig, the following keys are required:
cfg.pretrained_model_name_or_path (str): The name of or local path to
the HF Causal LM (e.g., `gpt2` to instantiate a GPT2LMHeadModel).
cfg.config_overrides (dict, optional): An optional dictionary of keyword
Expand All @@ -45,34 +49,12 @@ class ComposerHFCausalLM(HuggingFaceModelWithZLoss):
tokenizer (PreTrainedTokenizer): The tokenizer that the model will use.
"""

def __init__(self, om_model_config: DictConfig, tokenizer: Tokenizer):
trust_remote_code = om_model_config.get('trust_remote_code', True)
use_auth_token = om_model_config.get('use_auth_token', False)
config = AutoConfig.from_pretrained(
om_model_config.pretrained_model_name_or_path,
trust_remote_code=trust_remote_code,
use_auth_token=use_auth_token,
)

# set config overrides
for k, v in om_model_config.get('config_overrides', {}).items():
if not hasattr(config, k):
raise ValueError(
f'config does not have attribute "{k}" to override ({k}: {v}).'
)

attr = getattr(config, k)
if isinstance(attr, Mapping):
extra_keys = [_k for _k in v.keys() if _k not in attr.keys()]
if extra_keys:
raise ValueError(
f'Config dict override got unknown keys. '
f'Extra keys: {extra_keys}. '
f'Expected (a subset of) keys: {list(attr.keys())}.')
getattr(config, k).update(v)
else:
setattr(config, k, v)
def __init__(self,
om_model_config: Union[DictConfig, peft.peft_model.PeftModel,
transformers.PreTrainedModel],
tokenizer: Tokenizer):

# set up training and eval metrics
train_metrics = [
LanguageCrossEntropy(),
LanguagePerplexity(),
Expand All @@ -87,64 +69,116 @@ def __init__(self, om_model_config: DictConfig, tokenizer: Tokenizer):
InContextLearningMCExpectedCalibrationError()
]

init_device = om_model_config.get('init_device', 'cpu')

# Get the device we want to initialize, and use the
# reolved version to initialize the HF model
resolved_init_device = hf_get_init_device(init_device)

# We need to have all non-zero local ranks be not-pretrained
# Rank 0 will still be pretrained, and distribute the weights appropriately
if dist.get_local_rank() != 0 and init_device == 'mixed':
om_model_config.pretrained = False

if resolved_init_device == 'cpu':
if om_model_config.pretrained:
model = AutoModelForCausalLM.from_pretrained(
om_model_config.pretrained_model_name_or_path,
trust_remote_code=trust_remote_code,
use_auth_token=use_auth_token,
config=config)
# if we are passed a DictConfig, we need to instantiate the model
if isinstance(om_model_config, DictConfig):

# load the model config
trust_remote_code = om_model_config.get('trust_remote_code', True)
use_auth_token = om_model_config.get('use_auth_token', False)
config = AutoConfig.from_pretrained(
om_model_config.pretrained_model_name_or_path,
trust_remote_code=trust_remote_code,
use_auth_token=use_auth_token,
)

# set config overrides
for k, v in om_model_config.get('config_overrides', {}).items():
if not hasattr(config, k):
raise ValueError(
f'config does not have attribute "{k}" to override ({k}: {v}).'
)

attr = getattr(config, k)
if isinstance(attr, Mapping):
extra_keys = [
_k for _k in v.keys() if _k not in attr.keys()
]
if extra_keys:
raise ValueError(
f'Config dict override got unknown keys. '
f'Extra keys: {extra_keys}. '
f'Expected (a subset of) keys: {list(attr.keys())}.'
)
getattr(config, k).update(v)
else:
setattr(config, k, v)

# below we set up the device to initialize the model on
init_device = om_model_config.get('init_device', 'cpu')

# Get the device we want to initialize, and use the
# reolved version to initialize the HF model
resolved_init_device = hf_get_init_device(init_device)

# We need to have all non-zero local ranks be not-pretrained
# Rank 0 will still be pretrained, and distribute the weights appropriately
if dist.get_local_rank() != 0 and init_device == 'mixed':
om_model_config.pretrained = False

# initialize the model on the correct device
if resolved_init_device == 'cpu':
if om_model_config.pretrained:
model = AutoModelForCausalLM.from_pretrained(
om_model_config.pretrained_model_name_or_path,
trust_remote_code=trust_remote_code,
use_auth_token=use_auth_token,
config=config)
else:
model = AutoModelForCausalLM.from_config(
config,
trust_remote_code=trust_remote_code,
)
elif resolved_init_device == 'meta':
if om_model_config.pretrained:
raise ValueError(
'Setting cfg.pretrained=True is not supported when init_device="meta".'
)
with init_empty_weights(include_buffers=False):
model = AutoModelForCausalLM.from_config(
config,
trust_remote_code=trust_remote_code,
)
else:
model = AutoModelForCausalLM.from_config(
config,
trust_remote_code=trust_remote_code,
)
elif resolved_init_device == 'meta':
if om_model_config.pretrained:
raise ValueError(
'Setting cfg.pretrained=True is not supported when init_device="meta".'
)
with init_empty_weights(include_buffers=False):
model = AutoModelForCausalLM.from_config(
config,
trust_remote_code=trust_remote_code,
f'init_device="{init_device}" must be either "cpu" or "meta".'
)
else:
raise ValueError(
f'init_device="{init_device}" must be either "cpu" or "meta".')

signal_file_path = '.local_rank0_completed_autoresume'
if dist.get_local_rank() == 0:
with open(signal_file_path, 'wb') as f:
f.write(b'local_rank0_completed_download')
signal_file_path = '.local_rank0_completed_autoresume'
if dist.get_local_rank() == 0:
with open(signal_file_path, 'wb') as f:
f.write(b'local_rank0_completed_download')

# Avoid the collective call until the local rank zero has finished trying to download the checkpoint
# so that we don't timeout for large downloads. This syncs all processes on the node
with dist.local_rank_zero_download_and_wait(signal_file_path):
# Then, wait to ensure every node has finished downloading the checkpoint
dist.barrier()

# Avoid the collective call until the local rank zero has finished trying to download the checkpoint
# so that we don't timeout for large downloads. This syncs all processes on the node
with dist.local_rank_zero_download_and_wait(signal_file_path):
# Then, wait to ensure every node has finished downloading the checkpoint
dist.barrier()
if dist.get_local_rank() == 0:
os.remove(signal_file_path)

if dist.get_local_rank() == 0:
os.remove(signal_file_path)
z_loss = om_model_config.get('z_loss', 0.0)

# elif the model is either a PeftModel or a PreTrainedModel
elif isinstance(
om_model_config,
(peft.peft_model.PeftModel, transformers.PreTrainedModel)):
model = om_model_config
init_device = 'cpu'
z_loss = 0.0
codestar12 marked this conversation as resolved.
Show resolved Hide resolved

# else, unsoported type
else:
raise ValueError(
f'om_model_config must be either a DictConfig, PeftModel, or PreTrainedModel, but got {type(om_model_config)}'
)

composer_model = super().__init__(model=model,
shift_labels=True,
tokenizer=tokenizer,
metrics=train_metrics,
eval_metrics=eval_metrics,
z_loss=om_model_config.get(
'z_loss', 0.0),
z_loss=z_loss,
init_device=init_device)

return composer_model
Loading