Skip to content

Commit

Permalink
placeholder for tests
Browse files Browse the repository at this point in the history
  • Loading branch information
JingyaHuang committed Mar 21, 2024
1 parent 5a490da commit a4b6334
Show file tree
Hide file tree
Showing 3 changed files with 101 additions and 4 deletions.
2 changes: 1 addition & 1 deletion optimum/neuron/modeling_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -240,7 +240,7 @@ def _export(
force_download: bool = False,
cache_dir: Optional[str] = None,
compiler_workdir: Optional[Union[str, Path]] = None,
inline_weights_to_neff: bool = True,
inline_weights_to_neff: bool = False,
optlevel: str = "2",
subfolder: str = "",
local_files_only: bool = False,
Expand Down
4 changes: 2 additions & 2 deletions optimum/neuron/modeling_diffusion.py
Original file line number Diff line number Diff line change
Expand Up @@ -573,7 +573,7 @@ def _export(
force_download: bool = True,
cache_dir: Optional[str] = None,
compiler_workdir: Optional[str] = None,
inline_weights_to_neff: bool = True,
inline_weights_to_neff: bool = False,
optlevel: str = "2",
subfolder: str = "",
local_files_only: bool = False,
Expand Down Expand Up @@ -616,7 +616,7 @@ def _export(
standard cache should not be used.
compiler_workdir (`Optional[str]`, defaults to `None`):
Path to a directory in which the neuron compiler will store all intermediary files during the compilation(neff, weight, hlo graph...).
inline_weights_to_neff (`bool`, defaults to `True`):
inline_weights_to_neff (`bool`, defaults to `False`):
Whether to inline the weights to the neff graph. If set to False, weights will be seperated from the neff.
optlevel (`str`, defaults to `"2"`):
The level of optimization the compiler should perform. Can be `"1"`, `"2"` or `"3"`, defaults to "2".
Expand Down
99 changes: 98 additions & 1 deletion tests/cache/test_neuronx_cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
from huggingface_hub import HfApi
from transformers.testing_utils import ENDPOINT_STAGING

from optimum.neuron import NeuronModelForCausalLM
from optimum.neuron import NeuronModelForCausalLM, NeuronModelForSequenceClassification, NeuronStableDiffusionPipeline
from optimum.neuron.utils import get_hub_cached_entries, synchronize_hub_cache
from optimum.neuron.utils.testing_utils import is_inferentia_test, requires_neuronx
from optimum.utils.testing_utils import TOKEN
Expand Down Expand Up @@ -76,6 +76,33 @@ def export_decoder_model(model_id):
)


def export_encoder_model(model_id):
batch_size = 1
sequence_length = 64
return NeuronModelForSequenceClassification.from_pretrained(
model_id,
export=True,
dynamic_batch_size=False,
batch_size=batch_size,
sequence_length=sequence_length,
)


def export_stable_diffusion_model(model_id):
batch_size = 1
height = 64
width = 64
num_images_per_prompt = 4
return NeuronStableDiffusionPipeline.from_pretrained(
model_id,
export=True,
batch_size=batch_size,
height=height,
width=width,
num_images_per_prompt=num_images_per_prompt,
)


def check_decoder_generation(model):
batch_size = model.config.neuron["batch_size"]
input_ids = torch.ones((batch_size, 20), dtype=torch.int64)
Expand All @@ -84,6 +111,14 @@ def check_decoder_generation(model):
assert sample_output.shape[0] == batch_size


def check_encoder_inference(model):
pass


def check_stable_diffusion_inference(model):
pass


def get_local_cached_files(cache_path, extension="*"):
links = glob.glob(f"{cache_path}/**/*/*.{extension}", recursive=True)
return [link for link in links if os.path.isfile(link)]
Expand Down Expand Up @@ -140,6 +175,68 @@ def test_decoder_cache(cache_repos):
assert len(get_local_cached_files(cache_path, "neff")) == 0


@is_inferentia_test
@requires_neuronx
def test_encoder_cache(cache_repos):
cache_path, cache_repo_id = cache_repos
model_id = "hf-internal-testing/tiny-random-BertModel"
# Export the model a first time to populate the local cache
model = export_encoder_model(model_id)
check_encoder_inference(model)
# check registry
check_cache_entry(model, cache_path)
# Synchronize the hub cache with the local cache
synchronize_hub_cache(cache_repo_id=cache_repo_id)
assert_local_and_hub_cache_sync(cache_path, cache_repo_id)
# Verify we are able to fetch the cached entry for the model
model_entries = get_hub_cached_entries(model_id, "inference", cache_repo_id=cache_repo_id)
assert len(model_entries) == 1
assert model_entries[0] == model.config.neuron
# Clear the local cache
for root, dirs, files in os.walk(cache_path):
for f in files:
os.unlink(os.path.join(root, f))
for d in dirs:
shutil.rmtree(os.path.join(root, d))
assert local_cache_size(cache_path) == 0
# Export the model again: the compilation artifacts should be fetched from the Hub
model = export_encoder_model(model_id)
check_encoder_inference(model)
# Verify the local cache directory has not been populated
assert len(get_local_cached_files(cache_path, ".neuron")) == 0


@is_inferentia_test
@requires_neuronx
def test_stable_diffusion_cache(cache_repos):
cache_path, cache_repo_id = cache_repos
model_id = "hf-internal-testing/tiny-stable-diffusion-torch"
# Export the model a first time to populate the local cache
model = export_stable_diffusion_model(model_id)
check_stable_diffusion_inference(model)
# check registry
check_cache_entry(model, cache_path)
# Synchronize the hub cache with the local cache
synchronize_hub_cache(cache_repo_id=cache_repo_id)
assert_local_and_hub_cache_sync(cache_path, cache_repo_id)
# Verify we are able to fetch the cached entry for the model
model_entries = get_hub_cached_entries(model_id, "inference", cache_repo_id=cache_repo_id)
assert len(model_entries) == 1
assert model_entries[0] == model.config.neuron
# Clear the local cache
for root, dirs, files in os.walk(cache_path):
for f in files:
os.unlink(os.path.join(root, f))
for d in dirs:
shutil.rmtree(os.path.join(root, d))
assert local_cache_size(cache_path) == 0
# Export the model again: the compilation artifacts should be fetched from the Hub
model = export_stable_diffusion_model(model_id)
check_stable_diffusion_inference(model)
# Verify the local cache directory has not been populated
assert len(get_local_cached_files(cache_path, ".neuron")) == 0


@is_inferentia_test
@requires_neuronx
@pytest.mark.parametrize(
Expand Down

0 comments on commit a4b6334

Please sign in to comment.