From ced2f925c81b2666f140b42ebb45a7e33522f6e6 Mon Sep 17 00:00:00 2001 From: Michael Benayoun Date: Wed, 29 May 2024 14:12:48 +0200 Subject: [PATCH] [WIP] tests --- optimum/neuron/__init__.py | 2 + optimum/neuron/accelerate/accelerator.py | 2 +- optimum/neuron/utils/__init__.py | 1 + optimum/neuron/utils/patching.py | 2 +- optimum/neuron/utils/peft_utils.py | 10 ++--- optimum/neuron/utils/require_utils.py | 6 ++- optimum/neuron/utils/training_utils.py | 4 +- tests/distributed/test_common.py | 20 +++++----- .../distributed/test_model_parallelization.py | 6 +-- tests/distributed/utils.py | 27 ------------- tests/test_trainers.py | 23 +++-------- tests/utils.py | 40 ++++++++++++++++++- 12 files changed, 71 insertions(+), 72 deletions(-) diff --git a/optimum/neuron/__init__.py b/optimum/neuron/__init__.py index f2b43ff74..70654cefb 100644 --- a/optimum/neuron/__init__.py +++ b/optimum/neuron/__init__.py @@ -58,6 +58,7 @@ "ModelParallelismPlugin", ], "pipelines": ["pipeline"], + "utils": ["get_peft_model"], } if TYPE_CHECKING: @@ -88,6 +89,7 @@ from .pipelines import pipeline from .trainers import NeuronTrainer, Seq2SeqNeuronTrainer from .training_args import NeuronTrainingArguments, Seq2SeqNeuronTrainingArguments + from .utils import get_peft_model else: import sys diff --git a/optimum/neuron/accelerate/accelerator.py b/optimum/neuron/accelerate/accelerator.py index b8fdd4e32..b0393cf69 100644 --- a/optimum/neuron/accelerate/accelerator.py +++ b/optimum/neuron/accelerate/accelerator.py @@ -30,9 +30,9 @@ from accelerate.checkpointing import save_accelerator_state, save_custom_state from accelerate.utils import AutocastKwargs, DistributedType from accelerate.utils.operations import gather_object, recursively_apply -from transformers import PreTrainedModel from torch.utils.data import DataLoader from torch.utils.data.distributed import DistributedSampler +from transformers import PreTrainedModel from ...utils import logging from ..distributed import Parallelizer, ParallelizersManager diff --git a/optimum/neuron/utils/__init__.py b/optimum/neuron/utils/__init__.py index 148d6d0a0..42a6b7415 100644 --- a/optimum/neuron/utils/__init__.py +++ b/optimum/neuron/utils/__init__.py @@ -45,6 +45,7 @@ ) from .optimization_utils import get_attention_scores_sd, get_attention_scores_sdxl from .patching import DynamicPatch, ModelPatcher, Patcher, patch_everywhere, patch_within_function +from .peft_utils import get_peft_model from .training_utils import ( is_model_officially_supported, patch_transformers_for_neuron_sdk, diff --git a/optimum/neuron/utils/patching.py b/optimum/neuron/utils/patching.py index acd5c3c39..adcd1a8c2 100644 --- a/optimum/neuron/utils/patching.py +++ b/optimum/neuron/utils/patching.py @@ -19,7 +19,7 @@ import inspect import sys from abc import ABC, abstractmethod -from typing import TYPE_CHECKING, Any, Callable, List, Optional, Tuple, Union, Type +from typing import TYPE_CHECKING, Any, Callable, List, Optional, Tuple, Type, Union if TYPE_CHECKING: diff --git a/optimum/neuron/utils/peft_utils.py b/optimum/neuron/utils/peft_utils.py index b68d7f8a1..ad8b5a850 100644 --- a/optimum/neuron/utils/peft_utils.py +++ b/optimum/neuron/utils/peft_utils.py @@ -13,18 +13,19 @@ # See the License for the specific language governing permissions and # limitations under the License. """Utilities related to the PEFT library and support.""" -import gc import functools +import gc from typing import Any, List, Optional, Union from transformers.utils import is_peft_available -from .patching import Patcher, replace_class_in_inheritance_hierarchy +from .patching import replace_class_in_inheritance_hierarchy from .require_utils import requires_neuronx_distributed if is_peft_available(): - from peft import PeftModel, get_peft_model as orig_get_peft_model + from peft import PeftModel + from peft import get_peft_model as orig_get_peft_model from peft.utils import get_peft_model_state_dict, set_peft_model_state_dict else: @@ -105,11 +106,8 @@ def save_pretrained( return output - @functools.wraps(orig_get_peft_model) def get_peft_model(*args, **kwargs): peft_model = orig_get_peft_model(*args, **kwargs) replace_class_in_inheritance_hierarchy(peft_model, PeftModel, NeuronPeftModel) return peft_model - - diff --git a/optimum/neuron/utils/require_utils.py b/optimum/neuron/utils/require_utils.py index d828ebe89..df9f68313 100644 --- a/optimum/neuron/utils/require_utils.py +++ b/optimum/neuron/utils/require_utils.py @@ -17,7 +17,7 @@ import functools from typing import Any, Callable, Dict -from transformers.utils import is_safetensors_available +from transformers.utils import is_peft_available, is_safetensors_available from .import_utils import ( is_neuronx_distributed_available, @@ -27,12 +27,13 @@ ) -_AVAILABILITIES: Dict[str, Callable[[], bool]] = { +_AVAILABILITIES: Dict[str, Callable] = { "safetensors": is_safetensors_available, "torch_xla": is_torch_xla_available, "neuronx_distributed": is_neuronx_distributed_available, "torch_neuronx": is_torch_neuronx_available, "transformers_neuronx": is_transformers_neuronx_available, + "peft": is_peft_available, } @@ -59,3 +60,4 @@ def wrapper(*args, **kwargs): requires_neuronx_distributed = _create_requires_function("neuronx_distributed") requires_torch_neuronx = _create_requires_function("torch_neuronx") requires_transformers_neuronx = _create_requires_function("transformers_neuronx") +requires_peft = _create_requires_function("peft") diff --git a/optimum/neuron/utils/training_utils.py b/optimum/neuron/utils/training_utils.py index b3efcd71c..40a567b37 100644 --- a/optimum/neuron/utils/training_utils.py +++ b/optimum/neuron/utils/training_utils.py @@ -14,7 +14,7 @@ # limitations under the License. """Training utilities""" -from typing import TYPE_CHECKING, List, Optional, Type, Union, Any +from typing import TYPE_CHECKING, List, Optional, Type, Union import torch import transformers @@ -45,8 +45,8 @@ from ...utils.logging import set_verbosity as set_verbosity_optimum from ..generation import GeneralNeuronGenerationMixin, NeuronGenerationMixin from . import is_neuronx_distributed_available -from .require_utils import requires_neuronx_distributed, requires_torch_xla from .patching import replace_class_in_inheritance_hierarchy +from .require_utils import requires_neuronx_distributed, requires_torch_xla if is_neuronx_distributed_available(): diff --git a/tests/distributed/test_common.py b/tests/distributed/test_common.py index 5bc70ffcd..e2e23236b 100644 --- a/tests/distributed/test_common.py +++ b/tests/distributed/test_common.py @@ -36,8 +36,8 @@ from optimum.neuron.utils.testing_utils import is_trainium_test from .. import DistributedTest -from ..utils import create_static_seed_patcher, get_model -from .utils import create_accelerator_for_mp, get_model_inputs +from ..utils import create_accelerator, create_static_seed_patcher, get_model +from .utils import get_model_inputs if is_torch_xla_available(): @@ -159,7 +159,7 @@ def test_optimizer_parameters_match_model_parameters( model = get_tiny_llama_model(tp_size=tp_size, pp_size=pp_size, lazy_load=lazy_load) optimizer = get_optimizer(model, lazy_optimizer, with_groups) - accelerator = create_accelerator_for_mp(tp_size, pp_size, zero_1=zero_1) + accelerator = create_accelerator(tp_size, pp_size, zero_1=zero_1) if tp_size > 1 or pp_size > 1: assert accelerator.state.distributed_type is NeuronDistributedType.MODEL_PARALLELISM @@ -198,7 +198,7 @@ def test_optimizer_step(self, zero_1, gradient_accumulation_steps, max_grad_norm optimizer = get_optimizer(model, with_groups=False) - accelerator = create_accelerator_for_mp( + accelerator = create_accelerator( tp_size, pp_size, zero_1=zero_1, gradient_accumulation_steps=gradient_accumulation_steps ) @@ -302,7 +302,7 @@ def test_lazy_load(self, from_config, parallel_sizes): orig_parameters: Dict[str, torch.nn.Parameter] = dict(model.named_parameters()) - accelerator = create_accelerator_for_mp(tp_size, pp_size) + accelerator = create_accelerator(tp_size, pp_size) lazy_model = get_tiny_llama_model( tp_size=tp_size, pp_size=pp_size, lazy_load=True, from_config=from_config, use_static_seed_patcher=True ) @@ -349,7 +349,7 @@ def test_save_model_and_load_model(self, parallel_sizes, tmpdir, monkeypatch): model = get_tiny_llama_model(tp_size=tp_size, pp_size=pp_size, lazy_load=False, add_random_noise=True) - accelerator = create_accelerator_for_mp(tp_size, pp_size) + accelerator = create_accelerator(tp_size, pp_size) model = accelerator.prepare(model) accelerator.save_state(tmpdir.as_posix()) accelerator.state._reset_state(reset_partial_state=True) @@ -382,7 +382,7 @@ def test_save_model_and_load_model(self, parallel_sizes, tmpdir, monkeypatch): # Making sure that we end-up with a different model when starting over. new_model = get_tiny_llama_model(tp_size=tp_size, pp_size=pp_size, lazy_load=False, add_random_noise=True) - new_accelerator = create_accelerator_for_mp(tp_size, pp_size) + new_accelerator = create_accelerator(tp_size, pp_size) new_model = new_accelerator.prepare(new_model) new_accelerator.state._reset_state(reset_partial_state=True) del new_accelerator @@ -401,7 +401,7 @@ def test_save_model_and_load_model(self, parallel_sizes, tmpdir, monkeypatch): # Checking that when providing a checkpoint, we end-up with the same model as the original. new_model = get_tiny_llama_model(tp_size=tp_size, pp_size=pp_size, lazy_load=False, add_random_noise=True) - new_accelerator = create_accelerator_for_mp(tp_size, pp_size, checkpoint_dir=tmpdir) + new_accelerator = create_accelerator(tp_size, pp_size, checkpoint_dir=tmpdir) new_model = new_accelerator.prepare(new_model) # If there is no model parallelism, the checkpoint weights will not be loaded automatically since we do not @@ -463,9 +463,7 @@ def test_consolidate_model_parallel_checkpoints( # Saving to pytorch instead of safetensors because it fails otherwise for pickling issues with distributed tests. orig_model.save_pretrained(orig_model_path, safe_serialization=False) - accelerator = create_accelerator_for_mp( - tp_size, pp_size, kv_size_multiplier=kv_size_multiplier, use_xser=use_xser - ) + accelerator = create_accelerator(tp_size, pp_size, kv_size_multiplier=kv_size_multiplier, use_xser=use_xser) _ = accelerator.prepare(orig_model) output_dir = Path(tmpdir) / "parallel_model" diff --git a/tests/distributed/test_model_parallelization.py b/tests/distributed/test_model_parallelization.py index 35afb1d36..44c5b7f78 100644 --- a/tests/distributed/test_model_parallelization.py +++ b/tests/distributed/test_model_parallelization.py @@ -56,8 +56,8 @@ from optimum.neuron.utils.testing_utils import is_trainium_test from .. import DistributedTest -from ..utils import SEED, create_static_seed_patcher, get_model -from .utils import create_accelerator_for_mp, get_model_inputs +from ..utils import SEED, create_accelerator, create_static_seed_patcher, get_model +from .utils import get_model_inputs if is_torch_xla_available(): @@ -299,7 +299,7 @@ def _parallel_model_matches_original_model( use_static_seed_patcher=True, ) - accelerator = create_accelerator_for_mp( + accelerator = create_accelerator( tp_size, pp_size, parallelize_embeddings=parallelize_embeddings, diff --git a/tests/distributed/utils.py b/tests/distributed/utils.py index f9790adbd..6d5c39822 100644 --- a/tests/distributed/utils.py +++ b/tests/distributed/utils.py @@ -15,7 +15,6 @@ """Utilities for tests distributed.""" import inspect -from pathlib import Path from typing import TYPE_CHECKING, Dict, List, Optional, Union import torch @@ -39,7 +38,6 @@ MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING_NAMES, ) -from optimum.neuron import ModelParallelismPlugin, NeuronAccelerator from optimum.neuron.utils.require_utils import requires_neuronx_distributed, requires_torch_xla @@ -258,28 +256,3 @@ def get_model_inputs( ) inputs[name] = tensor return inputs - - -def create_accelerator_for_mp( - tp_size: int, - pp_size: int, - zero_1: bool = False, - gradient_accumulation_steps: int = 1, - parallelize_embeddings: bool = True, - sequence_parallel_enabled: bool = True, - kv_size_multiplier: Optional[int] = None, - checkpoint_dir: Optional[Union[Path, str]] = None, - use_xser: bool = True, -) -> NeuronAccelerator: - mp_plugin = ModelParallelismPlugin( - tensor_parallel_size=tp_size, - kv_size_multiplier=kv_size_multiplier, - parallelize_embeddings=parallelize_embeddings, - sequence_parallel_enabled=sequence_parallel_enabled, - pipeline_parallel_size=pp_size, - checkpoint_dir=checkpoint_dir, - use_xser=use_xser, - ) - return NeuronAccelerator( - mp_plugin=mp_plugin, zero_1=zero_1, gradient_accumulation_steps=gradient_accumulation_steps - ) diff --git a/tests/test_trainers.py b/tests/test_trainers.py index ea6b78148..58ef2c4c4 100644 --- a/tests/test_trainers.py +++ b/tests/test_trainers.py @@ -25,9 +25,7 @@ from huggingface_hub import HfApi from transformers import ( AutoConfig, - AutoModelForCausalLM, AutoModelForSequenceClassification, - AutoTokenizer, ) from optimum.neuron import NeuronTrainer, NeuronTrainingArguments @@ -41,8 +39,10 @@ from . import DistributedTest from .utils import ( + MODEL_NAME, create_dummy_causal_lm_dataset, default_data_collator_for_causal_lm, + get_tokenizer_and_tiny_llama_model, ) @@ -54,17 +54,6 @@ ) -MODEL_NAME = "michaelbenayoun/llama-2-tiny-4kv-heads-4layers-random" - - -def get_tokenizer_and_tiny_llama_model(parallel_sizes): - tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME) - _, tp_size, pp_size = parallel_sizes - config = AutoConfig.from_pretrained(MODEL_NAME) - model = AutoModelForCausalLM.from_pretrained(MODEL_NAME, config=config, ignore_mismatched_sizes=True) - return tokenizer, model - - @is_trainium_test class TestNeuronTrainingUtils(DistributedTest): @pytest.fixture( @@ -80,7 +69,7 @@ def test_get_model_param_count(self, parallel_sizes, tmpdir): _, tp_size, pp_size = parallel_sizes output_dir = Path(tmpdir) - _, model = get_tokenizer_and_tiny_llama_model(parallel_sizes) + _, model = get_tokenizer_and_tiny_llama_model() target_num_parameters = sum(p.numel() for p in model.parameters()) @@ -130,7 +119,7 @@ def test_save_checkpoint(self, hub_test, tmpdir, parallel_sizes): output_dir=output_dir.as_posix(), ) - tokenizer, model = get_tokenizer_and_tiny_llama_model(parallel_sizes) + tokenizer, model = get_tokenizer_and_tiny_llama_model() datasets = create_dummy_causal_lm_dataset(model.config.vocab_size, 120, 1, sequence_length=128) trainer = NeuronTrainer( @@ -197,7 +186,7 @@ def test_train_and_eval_use_remote_cache(self, hub_test_with_local_cache, tmpdir num_eval_samples = 100 per_device_eval_batch_size = 16 - tokenizer, model = get_tokenizer_and_tiny_llama_model(parallel_sizes) + tokenizer, model = get_tokenizer_and_tiny_llama_model() clone = copy.deepcopy(model) datasets = create_dummy_causal_lm_dataset(model.config.vocab_size, num_train_samples, num_eval_samples) @@ -296,7 +285,7 @@ def test_save_and_resume_from_checkpoint(self, parallel_sizes, tmpdir): max_train_samples = 100 max_eval_samples = 16 - tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME) + tokenizer, _ = get_tokenizer_and_tiny_llama_model() tokenizer.pad_token = tokenizer.eos_token def create_training_args(output_dir, resume_from_checkpoint=None, max_steps=max_steps): diff --git a/tests/utils.py b/tests/utils.py index bc9aadb37..9255807fc 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -20,15 +20,17 @@ import os import random import string -from typing import Callable, Dict, List, Optional, Tuple, Type +from pathlib import Path +from typing import Callable, Dict, List, Optional, Tuple, Type, Union import torch from datasets import Dataset, DatasetDict from huggingface_hub import CommitOperationDelete, HfApi, create_repo, delete_repo, get_token, login, logout from huggingface_hub.utils import RepositoryNotFoundError -from transformers import AutoConfig, PreTrainedModel +from transformers import AutoConfig, AutoModelForCausalLM, AutoTokenizer, PreTrainedModel from transformers.testing_utils import ENDPOINT_STAGING +from optimum.neuron import ModelParallelismPlugin, NeuronAccelerator from optimum.neuron.distributed import lazy_load_for_parallelism from optimum.neuron.utils.cache_utils import ( delete_custom_cache_repo_name_from_hf_home, @@ -45,6 +47,8 @@ SEED = 42 OPTIMUM_INTERNAL_TESTING_CACHE_REPO = "optimum-internal-testing/optimum-neuron-cache-for-testing" +MODEL_NAME = "michaelbenayoun/llama-2-tiny-4kv-heads-4layers-random" + def get_random_string(length) -> str: letters = string.ascii_lowercase @@ -213,6 +217,38 @@ def get_model( return model +def get_tokenizer_and_tiny_llama_model(): + tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME) + config = AutoConfig.from_pretrained(MODEL_NAME) + model = AutoModelForCausalLM.from_pretrained(MODEL_NAME, config=config, ignore_mismatched_sizes=True) + return tokenizer, model + + +def create_accelerator( + tp_size: int, + pp_size: int, + zero_1: bool = False, + gradient_accumulation_steps: int = 1, + parallelize_embeddings: bool = True, + sequence_parallel_enabled: bool = True, + kv_size_multiplier: Optional[int] = None, + checkpoint_dir: Optional[Union[Path, str]] = None, + use_xser: bool = True, +) -> NeuronAccelerator: + mp_plugin = ModelParallelismPlugin( + tensor_parallel_size=tp_size, + kv_size_multiplier=kv_size_multiplier, + parallelize_embeddings=parallelize_embeddings, + sequence_parallel_enabled=sequence_parallel_enabled, + pipeline_parallel_size=pp_size, + checkpoint_dir=checkpoint_dir, + use_xser=use_xser, + ) + return NeuronAccelerator( + mp_plugin=mp_plugin, zero_1=zero_1, gradient_accumulation_steps=gradient_accumulation_steps + ) + + class TrainiumTestMixin: @classmethod def setUpClass(cls):