diff --git a/tests/data/test_template_tokenization.py b/tests/data/test_template_tokenization.py index 632a79dac9..702202b091 100644 --- a/tests/data/test_template_tokenization.py +++ b/tests/data/test_template_tokenization.py @@ -252,7 +252,7 @@ def test_multi_turn_chat_slicing(tokenizer_name: str, messages_format: bool): def test_tokenize_no_labels_bos_pr(): # This tokenizer automatically adds bos tokens tokenizer = transformers.AutoTokenizer.from_pretrained( - 'mistralai/Mixtral-8x7B-v0.1') + 'ai21labs/Jamba-v0.1', add_bos_token=True) example = {'prompt': 'prompt', 'response': 'response'} diff --git a/tests/models/hf/test_fsdp_weight_tying.py b/tests/models/hf/test_fsdp_weight_tying.py index 6e7838e7ba..712e515653 100644 --- a/tests/models/hf/test_fsdp_weight_tying.py +++ b/tests/models/hf/test_fsdp_weight_tying.py @@ -33,7 +33,7 @@ def test_fsdp_weight_tying(peft_config: Optional[dict], tmp_path: pathlib.Path, init_device: str): model_cfg = { 'name': 'hf_causal_lm', - 'pretrained_model_name_or_path': 'mistralai/Mistral-7B-v0.1', + 'pretrained_model_name_or_path': 'codellama/CodeLlama-7b-hf', 'config_overrides': { 'num_hidden_layers': 2, 'hidden_size': 32, @@ -43,7 +43,7 @@ def test_fsdp_weight_tying(peft_config: Optional[dict], tmp_path: pathlib.Path, 'pretrained': False, 'init_device': init_device, } - tokenizer_name = 'mistralai/Mistral-7B-v0.1' + tokenizer_name = 'codellama/CodeLlama-7b-hf' assert model_cfg is not None assert tokenizer_name is not None diff --git a/tests/models/hf/test_hf_peft_wrapping.py b/tests/models/hf/test_hf_peft_wrapping.py index d8bea33dd4..a17840b4ca 100644 --- a/tests/models/hf/test_hf_peft_wrapping.py +++ b/tests/models/hf/test_hf_peft_wrapping.py @@ -17,8 +17,8 @@ def test_peft_wraps(): - mistral_cfg = transformers.AutoConfig.from_pretrained( - 'mistralai/Mistral-7B-v0.1', num_hidden_layers=2) + mistral_cfg = transformers.AutoConfig.from_pretrained('mosaicml/mpt-7b', + num_hidden_layers=2) mistral = transformers.AutoModelForCausalLM.from_config(mistral_cfg) mistral = get_peft_model(mistral, LoraConfig()) prepare_hf_model_for_fsdp(mistral, 'cpu') @@ -51,7 +51,7 @@ def test_lora_mixed_init(peft_config: Optional[dict], tmp_path: pathlib.Path, init_device: str): model_cfg = { 'name': 'hf_causal_lm', - 'pretrained_model_name_or_path': 'mistralai/Mistral-7B-v0.1', + 'pretrained_model_name_or_path': 'codellama/CodeLlama-7b-hf', 'config_overrides': { 'num_hidden_layers': 2, 'hidden_size': 32, @@ -60,7 +60,7 @@ def test_lora_mixed_init(peft_config: Optional[dict], tmp_path: pathlib.Path, 'pretrained': False, 'init_device': init_device, } - tokenizer_name = 'mistralai/Mistral-7B-v0.1' + tokenizer_name = 'codellama/CodeLlama-7b-hf' assert model_cfg is not None assert tokenizer_name is not None diff --git a/tests/models/layers/test_huggingface_flash.py b/tests/models/layers/test_huggingface_flash.py index 1e8ec2383d..2b1310f519 100644 --- a/tests/models/layers/test_huggingface_flash.py +++ b/tests/models/layers/test_huggingface_flash.py @@ -2,7 +2,6 @@ # SPDX-License-Identifier: Apache-2.0 import contextlib -import os import pytest from composer.core.precision import get_precision_context @@ -15,18 +14,14 @@ @pytest.mark.gpu @pytest.mark.world_size(2) -@pytest.mark.parametrize('model_name', ['llama2', 'mistral']) +@pytest.mark.parametrize('model_name', ['codellama', 'mistral']) @pytest.mark.parametrize('use_flash_attention_2', [True, False]) @pytest.mark.parametrize('init_device', ['cpu', 'mixed', 'meta']) def test_flash2(model_name: str, use_flash_attention_2: bool, init_device: str): - if model_name == 'llama2': - if 'HUGGING_FACE_HUB_TOKEN' not in os.environ: - pytest.skip( - 'The CI cluster does not have access to the Llama models, so skip this test.' - ) + if model_name == 'codellama': model_cfg = { 'name': 'hf_causal_lm', - 'pretrained_model_name_or_path': 'meta-llama/Llama-2-7b-hf', + 'pretrained_model_name_or_path': 'codellama/CodeLlama-7b-hf', 'config_overrides': { 'num_hidden_layers': 2, 'intermediate_size': 64, @@ -43,25 +38,6 @@ def test_flash2(model_name: str, use_flash_attention_2: bool, init_device: str): flash_attn_class = LlamaFlashAttention2 if use_flash_attention_2 else LlamaAttention attention_layers_attr = 'model.model.layers' attention_attr = 'self_attn' - elif model_name == 'mistral': - model_cfg = { - 'name': 'hf_causal_lm', - 'pretrained_model_name_or_path': 'mistralai/Mistral-7B-v0.1', - 'config_overrides': { - 'num_hidden_layers': 2, - 'intermediate_size': 64, - 'hidden_size': 64, - }, - 'pretrained': False, - 'init_device': 'cpu', - } - - tokenizer_name = 'mistralai/Mistral-7B-v0.1' - from transformers.models.mistral.modeling_mistral import ( - MistralAttention, MistralFlashAttention2) - flash_attn_class = MistralFlashAttention2 if use_flash_attention_2 else MistralAttention - attention_layers_attr = 'model.model.layers' - attention_attr = 'self_attn' else: raise ValueError(f'Unknown model: {model_name}')