Skip to content

Commit

Permalink
Update tests to not rely on mistral (#1117)
Browse files Browse the repository at this point in the history
  • Loading branch information
dakinggg committed Apr 18, 2024
1 parent f01f625 commit 84b6410
Show file tree
Hide file tree
Showing 4 changed files with 17 additions and 40 deletions.
2 changes: 1 addition & 1 deletion tests/data/test_template_tokenization.py
Original file line number Diff line number Diff line change
Expand Up @@ -252,7 +252,7 @@ def test_multi_turn_chat_slicing(tokenizer_name: str, messages_format: bool):
def test_tokenize_no_labels_bos_pr():
# This tokenizer automatically adds bos tokens
tokenizer = transformers.AutoTokenizer.from_pretrained(
'mistralai/Mixtral-8x7B-v0.1')
'ai21labs/Jamba-v0.1', add_bos_token=True)

example = {'prompt': 'prompt', 'response': 'response'}

Expand Down
4 changes: 2 additions & 2 deletions tests/models/hf/test_fsdp_weight_tying.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ def test_fsdp_weight_tying(peft_config: Optional[dict], tmp_path: pathlib.Path,
init_device: str):
model_cfg = {
'name': 'hf_causal_lm',
'pretrained_model_name_or_path': 'mistralai/Mistral-7B-v0.1',
'pretrained_model_name_or_path': 'codellama/CodeLlama-7b-hf',
'config_overrides': {
'num_hidden_layers': 2,
'hidden_size': 32,
Expand All @@ -43,7 +43,7 @@ def test_fsdp_weight_tying(peft_config: Optional[dict], tmp_path: pathlib.Path,
'pretrained': False,
'init_device': init_device,
}
tokenizer_name = 'mistralai/Mistral-7B-v0.1'
tokenizer_name = 'codellama/CodeLlama-7b-hf'

assert model_cfg is not None
assert tokenizer_name is not None
Expand Down
18 changes: 10 additions & 8 deletions tests/models/hf/test_hf_peft_wrapping.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,15 @@


def test_peft_wraps():
mistral_cfg = transformers.AutoConfig.from_pretrained(
'mistralai/Mistral-7B-v0.1', num_hidden_layers=2)
mistral = transformers.AutoModelForCausalLM.from_config(mistral_cfg)
mistral = get_peft_model(mistral, LoraConfig())
prepare_hf_model_for_fsdp(mistral, 'cpu')
mpt_cfg = transformers.AutoConfig.from_pretrained('mosaicml/mpt-7b',
n_layers=2,
trust_remote_code=True)
mpt = transformers.AutoModelForCausalLM.from_config(mpt_cfg,
trust_remote_code=True)
mpt = get_peft_model(mpt, LoraConfig())
prepare_hf_model_for_fsdp(mpt, 'cpu')

for n, m in mistral.named_modules():
for n, m in mpt.named_modules():
if 'lora' in n and 'default' in n:
has_parameters = any(True for _ in m.parameters())
has_buffers = any(True for _ in m.buffers())
Expand Down Expand Up @@ -51,7 +53,7 @@ def test_lora_mixed_init(peft_config: Optional[dict], tmp_path: pathlib.Path,
init_device: str):
model_cfg = {
'name': 'hf_causal_lm',
'pretrained_model_name_or_path': 'mistralai/Mistral-7B-v0.1',
'pretrained_model_name_or_path': 'codellama/CodeLlama-7b-hf',
'config_overrides': {
'num_hidden_layers': 2,
'hidden_size': 32,
Expand All @@ -60,7 +62,7 @@ def test_lora_mixed_init(peft_config: Optional[dict], tmp_path: pathlib.Path,
'pretrained': False,
'init_device': init_device,
}
tokenizer_name = 'mistralai/Mistral-7B-v0.1'
tokenizer_name = 'codellama/CodeLlama-7b-hf'

assert model_cfg is not None
assert tokenizer_name is not None
Expand Down
33 changes: 4 additions & 29 deletions tests/models/layers/test_huggingface_flash.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
# SPDX-License-Identifier: Apache-2.0

import contextlib
import os

import pytest
from composer.core.precision import get_precision_context
Expand All @@ -15,53 +14,29 @@

@pytest.mark.gpu
@pytest.mark.world_size(2)
@pytest.mark.parametrize('model_name', ['llama2', 'mistral'])
@pytest.mark.parametrize('model_name', ['codellama'])
@pytest.mark.parametrize('use_flash_attention_2', [True, False])
@pytest.mark.parametrize('init_device', ['cpu', 'mixed', 'meta'])
def test_flash2(model_name: str, use_flash_attention_2: bool, init_device: str):
if model_name == 'llama2':
if 'HUGGING_FACE_HUB_TOKEN' not in os.environ:
pytest.skip(
'The CI cluster does not have access to the Llama models, so skip this test.'
)
if model_name == 'codellama':
model_cfg = {
'name': 'hf_causal_lm',
'pretrained_model_name_or_path': 'meta-llama/Llama-2-7b-hf',
'pretrained_model_name_or_path': 'codellama/CodeLlama-7b-hf',
'config_overrides': {
'num_hidden_layers': 2,
'intermediate_size': 64,
'hidden_size': 64,
},
'use_auth_token': True,
'pretrained': False,
'init_device': init_device,
}

tokenizer_name = 'meta-llama/Llama-2-7b-hf'
tokenizer_name = 'codellama/CodeLlama-7b-hf'
from transformers.models.llama.modeling_llama import (
LlamaAttention, LlamaFlashAttention2)
flash_attn_class = LlamaFlashAttention2 if use_flash_attention_2 else LlamaAttention
attention_layers_attr = 'model.model.layers'
attention_attr = 'self_attn'
elif model_name == 'mistral':
model_cfg = {
'name': 'hf_causal_lm',
'pretrained_model_name_or_path': 'mistralai/Mistral-7B-v0.1',
'config_overrides': {
'num_hidden_layers': 2,
'intermediate_size': 64,
'hidden_size': 64,
},
'pretrained': False,
'init_device': 'cpu',
}

tokenizer_name = 'mistralai/Mistral-7B-v0.1'
from transformers.models.mistral.modeling_mistral import (
MistralAttention, MistralFlashAttention2)
flash_attn_class = MistralFlashAttention2 if use_flash_attention_2 else MistralAttention
attention_layers_attr = 'model.model.layers'
attention_attr = 'self_attn'
else:
raise ValueError(f'Unknown model: {model_name}')

Expand Down

0 comments on commit 84b6410

Please sign in to comment.