Skip to content

Commit

Permalink
Fix
Browse files Browse the repository at this point in the history
  • Loading branch information
michaelbenayoun committed Apr 5, 2024
1 parent 2574d52 commit b44db75
Show file tree
Hide file tree
Showing 4 changed files with 19 additions and 75 deletions.
2 changes: 1 addition & 1 deletion optimum/neuron/trainers.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,6 @@
get_hf_hub_cache_repos,
get_model_name_or_path,
get_neuron_cache_path,
get_neuronxcc_version,
get_num_neuron_cores_used,
has_write_access_to_repo,
)
Expand All @@ -96,6 +95,7 @@
skip_first_batches,
torch_xla_safe_save_file,
)
from .utils.version_utils import get_neuronxcc_version


if is_apex_available():
Expand Down
2 changes: 1 addition & 1 deletion optimum/neuron/utils/hub_neuronx_cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -405,7 +405,7 @@ def synchronize_hub_cache(cache_path: Optional[Union[str, Path]] = None, cache_r
hub_cache_proxy.synchronize()


def get_hf_hub_cache_repod_entries(
def get_hub_cached_entries(
model_id: str, mode: Union[Literal["training"], Literal["inference"], Mode], cache_repo_id: Optional[str] = None
):
if cache_repo_id is None:
Expand Down
35 changes: 16 additions & 19 deletions tests/test_trainers.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,6 @@
from optimum.neuron.utils.cache_utils import (
get_neuron_cache_path,
list_files_in_neuron_cache,
remove_ip_adress_from_path,
set_neuron_cache_path,
)
from optimum.neuron.utils.testing_utils import is_trainium_test
Expand Down Expand Up @@ -140,16 +139,15 @@ def test_train_and_eval(self):
last_files_in_repo = HfApi().list_repo_files(repo_id=self.CUSTOM_PRIVATE_CACHE_REPO)
last_files_in_repo = [f for f in last_files_in_repo if not f.startswith(".")]
last_files_in_cache = list_files_in_neuron_cache(get_neuron_cache_path(), only_relevant_files=True)
last_files_in_cache = [remove_ip_adress_from_path(p) for p in last_files_in_cache]
# TODO: investigate that, not urgent.
# self.assertListEqual(
# files_in_repo, last_files_in_repo, "No file should have been added to the Hub after first training."
# )
# self.assertListEqual(
# files_in_cache,
# last_files_in_cache,
# "No file should have been added to the cache after first training.",
# )
self.assertListEqual(
files_in_repo, last_files_in_repo, "No file should have been added to the Hub after first training."
)
self.assertListEqual(
files_in_cache,
last_files_in_cache,
"No file should have been added to the cache after first training.",
)

self.assertTrue(
second_training_duration < first_training_duration,
Expand Down Expand Up @@ -295,16 +293,15 @@ def test_train_and_eval_multiple_workers(self):
last_files_in_repo = HfApi().list_repo_files(repo_id=self.CUSTOM_PRIVATE_CACHE_REPO)
last_files_in_repo = [f for f in last_files_in_repo if not f.startswith(".")]
last_files_in_cache = list_files_in_neuron_cache(get_neuron_cache_path(), only_relevant_files=True)
last_files_in_cache = [remove_ip_adress_from_path(p) for p in last_files_in_cache]
# TODO: investigate that, not urgent.
# self.assertListEqual(
# files_in_repo, last_files_in_repo, "No file should have been added to the Hub after first training."
# )
# self.assertListEqual(
# files_in_cache,
# last_files_in_cache,
# "No file should have been added to the cache after first training.",
# )
self.assertListEqual(
files_in_repo, last_files_in_repo, "No file should have been added to the Hub after first training."
)
self.assertListEqual(
files_in_cache,
last_files_in_cache,
"No file should have been added to the cache after first training.",
)

self.assertTrue(
second_training_duration < first_training_duration,
Expand Down
55 changes: 1 addition & 54 deletions tests/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,8 @@

import os
import random
import shutil
import string
from pathlib import Path
from tempfile import TemporaryDirectory
from typing import Dict, Optional, Set, Tuple, Union
from typing import Dict, Optional, Set, Tuple

import torch
from datasets import Dataset, DatasetDict
Expand All @@ -30,15 +27,9 @@
from transformers.testing_utils import ENDPOINT_STAGING

from optimum.neuron.utils.cache_utils import (
_ADDED_IN_REGISTRY,
_REGISTRY_FILE_EXISTS,
NeuronHash,
delete_custom_cache_repo_name_from_hf_home,
load_custom_cache_repo_name_from_hf_home,
path_after_folder,
push_to_cache_on_hub,
set_custom_cache_repo_name_in_hf_home,
set_neuron_cache_path,
)
from optimum.utils import logging
from optimum.utils.testing_utils import TOKEN, USER
Expand Down Expand Up @@ -220,14 +211,6 @@ def tearDown(self):
self.remove_all_files_in_repo(self.CUSTOM_CACHE_REPO)
self.remove_all_files_in_repo(self.CUSTOM_PRIVATE_CACHE_REPO)

keys = list(_REGISTRY_FILE_EXISTS.keys())
for key in keys:
_REGISTRY_FILE_EXISTS.pop(key)

keys = list(_ADDED_IN_REGISTRY.keys())
for key in keys:
_ADDED_IN_REGISTRY.pop(key)

def create_tiny_pretrained_model(self, num_linears: int = 1, random_num_linears: bool = False):
return create_tiny_pretrained_model(
num_linears=num_linears,
Expand All @@ -241,39 +224,3 @@ def create_and_run_tiny_pretrained_model(self, num_linears: int = 1, random_num_
random_input = torch.rand(1, device="xla")
print(tiny_model(random_input))
return tiny_model

def push_tiny_pretrained_model_cache_to_hub(
self, repo_id: str, cache_dir: Optional[Union[str, Path]] = None
) -> NeuronHash:
neuron_hash = None
orig_repo_id = load_custom_cache_repo_name_from_hf_home()
set_custom_cache_repo_name_in_hf_home(repo_id)
with TemporaryDirectory() as tmpdirname:
set_neuron_cache_path(tmpdirname)

input_shapes = (("x", (1,)),)
data_type = torch.float32
tiny_model = self.create_and_run_tiny_pretrained_model(random_num_linears=True)
neuron_hash = NeuronHash(tiny_model, input_shapes, data_type)

tmp_cache_dir = Path(tmpdirname) / neuron_hash.neuron_compiler_version_dir_name
push_to_cache_on_hub(
neuron_hash,
tmp_cache_dir,
fail_when_could_not_push=True,
)
if cache_dir is not None:
for file_or_dir in tmp_cache_dir.iterdir():
if file_or_dir.is_file():
shutil.copy(
file_or_dir,
cache_dir / path_after_folder(file_or_dir, neuron_hash.neuron_compiler_version_dir_name),
)
else:
shutil.copytree(
file_or_dir,
cache_dir / path_after_folder(file_or_dir, neuron_hash.neuron_compiler_version_dir_name),
)
if orig_repo_id is not None:
set_custom_cache_repo_name_in_hf_home(orig_repo_id)
return neuron_hash

0 comments on commit b44db75

Please sign in to comment.