From b44db7527a420ccc51c35ecabb240ec10393e2a7 Mon Sep 17 00:00:00 2001 From: Michael Benayoun Date: Fri, 5 Apr 2024 17:43:02 +0200 Subject: [PATCH] Fix --- optimum/neuron/trainers.py | 2 +- optimum/neuron/utils/hub_neuronx_cache.py | 2 +- tests/test_trainers.py | 35 +++++++-------- tests/utils.py | 55 +---------------------- 4 files changed, 19 insertions(+), 75 deletions(-) diff --git a/optimum/neuron/trainers.py b/optimum/neuron/trainers.py index 73e05065b..1a15a8263 100755 --- a/optimum/neuron/trainers.py +++ b/optimum/neuron/trainers.py @@ -77,7 +77,6 @@ get_hf_hub_cache_repos, get_model_name_or_path, get_neuron_cache_path, - get_neuronxcc_version, get_num_neuron_cores_used, has_write_access_to_repo, ) @@ -96,6 +95,7 @@ skip_first_batches, torch_xla_safe_save_file, ) +from .utils.version_utils import get_neuronxcc_version if is_apex_available(): diff --git a/optimum/neuron/utils/hub_neuronx_cache.py b/optimum/neuron/utils/hub_neuronx_cache.py index ed866146e..3d908ee96 100644 --- a/optimum/neuron/utils/hub_neuronx_cache.py +++ b/optimum/neuron/utils/hub_neuronx_cache.py @@ -405,7 +405,7 @@ def synchronize_hub_cache(cache_path: Optional[Union[str, Path]] = None, cache_r hub_cache_proxy.synchronize() -def get_hf_hub_cache_repod_entries( +def get_hub_cached_entries( model_id: str, mode: Union[Literal["training"], Literal["inference"], Mode], cache_repo_id: Optional[str] = None ): if cache_repo_id is None: diff --git a/tests/test_trainers.py b/tests/test_trainers.py index 09a5e1671..d863e8db8 100644 --- a/tests/test_trainers.py +++ b/tests/test_trainers.py @@ -35,7 +35,6 @@ from optimum.neuron.utils.cache_utils import ( get_neuron_cache_path, list_files_in_neuron_cache, - remove_ip_adress_from_path, set_neuron_cache_path, ) from optimum.neuron.utils.testing_utils import is_trainium_test @@ -140,16 +139,15 @@ def test_train_and_eval(self): last_files_in_repo = HfApi().list_repo_files(repo_id=self.CUSTOM_PRIVATE_CACHE_REPO) last_files_in_repo = [f for f in last_files_in_repo if not f.startswith(".")] last_files_in_cache = list_files_in_neuron_cache(get_neuron_cache_path(), only_relevant_files=True) - last_files_in_cache = [remove_ip_adress_from_path(p) for p in last_files_in_cache] # TODO: investigate that, not urgent. - # self.assertListEqual( - # files_in_repo, last_files_in_repo, "No file should have been added to the Hub after first training." - # ) - # self.assertListEqual( - # files_in_cache, - # last_files_in_cache, - # "No file should have been added to the cache after first training.", - # ) + self.assertListEqual( + files_in_repo, last_files_in_repo, "No file should have been added to the Hub after first training." + ) + self.assertListEqual( + files_in_cache, + last_files_in_cache, + "No file should have been added to the cache after first training.", + ) self.assertTrue( second_training_duration < first_training_duration, @@ -295,16 +293,15 @@ def test_train_and_eval_multiple_workers(self): last_files_in_repo = HfApi().list_repo_files(repo_id=self.CUSTOM_PRIVATE_CACHE_REPO) last_files_in_repo = [f for f in last_files_in_repo if not f.startswith(".")] last_files_in_cache = list_files_in_neuron_cache(get_neuron_cache_path(), only_relevant_files=True) - last_files_in_cache = [remove_ip_adress_from_path(p) for p in last_files_in_cache] # TODO: investigate that, not urgent. - # self.assertListEqual( - # files_in_repo, last_files_in_repo, "No file should have been added to the Hub after first training." - # ) - # self.assertListEqual( - # files_in_cache, - # last_files_in_cache, - # "No file should have been added to the cache after first training.", - # ) + self.assertListEqual( + files_in_repo, last_files_in_repo, "No file should have been added to the Hub after first training." + ) + self.assertListEqual( + files_in_cache, + last_files_in_cache, + "No file should have been added to the cache after first training.", + ) self.assertTrue( second_training_duration < first_training_duration, diff --git a/tests/utils.py b/tests/utils.py index f4b584e8c..1d5a7387c 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -16,11 +16,8 @@ import os import random -import shutil import string -from pathlib import Path -from tempfile import TemporaryDirectory -from typing import Dict, Optional, Set, Tuple, Union +from typing import Dict, Optional, Set, Tuple import torch from datasets import Dataset, DatasetDict @@ -30,15 +27,9 @@ from transformers.testing_utils import ENDPOINT_STAGING from optimum.neuron.utils.cache_utils import ( - _ADDED_IN_REGISTRY, - _REGISTRY_FILE_EXISTS, - NeuronHash, delete_custom_cache_repo_name_from_hf_home, load_custom_cache_repo_name_from_hf_home, - path_after_folder, - push_to_cache_on_hub, set_custom_cache_repo_name_in_hf_home, - set_neuron_cache_path, ) from optimum.utils import logging from optimum.utils.testing_utils import TOKEN, USER @@ -220,14 +211,6 @@ def tearDown(self): self.remove_all_files_in_repo(self.CUSTOM_CACHE_REPO) self.remove_all_files_in_repo(self.CUSTOM_PRIVATE_CACHE_REPO) - keys = list(_REGISTRY_FILE_EXISTS.keys()) - for key in keys: - _REGISTRY_FILE_EXISTS.pop(key) - - keys = list(_ADDED_IN_REGISTRY.keys()) - for key in keys: - _ADDED_IN_REGISTRY.pop(key) - def create_tiny_pretrained_model(self, num_linears: int = 1, random_num_linears: bool = False): return create_tiny_pretrained_model( num_linears=num_linears, @@ -241,39 +224,3 @@ def create_and_run_tiny_pretrained_model(self, num_linears: int = 1, random_num_ random_input = torch.rand(1, device="xla") print(tiny_model(random_input)) return tiny_model - - def push_tiny_pretrained_model_cache_to_hub( - self, repo_id: str, cache_dir: Optional[Union[str, Path]] = None - ) -> NeuronHash: - neuron_hash = None - orig_repo_id = load_custom_cache_repo_name_from_hf_home() - set_custom_cache_repo_name_in_hf_home(repo_id) - with TemporaryDirectory() as tmpdirname: - set_neuron_cache_path(tmpdirname) - - input_shapes = (("x", (1,)),) - data_type = torch.float32 - tiny_model = self.create_and_run_tiny_pretrained_model(random_num_linears=True) - neuron_hash = NeuronHash(tiny_model, input_shapes, data_type) - - tmp_cache_dir = Path(tmpdirname) / neuron_hash.neuron_compiler_version_dir_name - push_to_cache_on_hub( - neuron_hash, - tmp_cache_dir, - fail_when_could_not_push=True, - ) - if cache_dir is not None: - for file_or_dir in tmp_cache_dir.iterdir(): - if file_or_dir.is_file(): - shutil.copy( - file_or_dir, - cache_dir / path_after_folder(file_or_dir, neuron_hash.neuron_compiler_version_dir_name), - ) - else: - shutil.copytree( - file_or_dir, - cache_dir / path_after_folder(file_or_dir, neuron_hash.neuron_compiler_version_dir_name), - ) - if orig_repo_id is not None: - set_custom_cache_repo_name_in_hf_home(orig_repo_id) - return neuron_hash