placeholder for tests

huggingface · Mar 21, 2024 · a4b6334 · a4b6334
1 parent 5a490da
commit a4b6334
Show file tree

Hide file tree

Showing 3 changed files with 101 additions and 4 deletions.
diff --git a/optimum/neuron/modeling_base.py b/optimum/neuron/modeling_base.py
@@ -240,7 +240,7 @@ def _export(
         force_download: bool = False,
         cache_dir: Optional[str] = None,
         compiler_workdir: Optional[Union[str, Path]] = None,
-        inline_weights_to_neff: bool = True,
+        inline_weights_to_neff: bool = False,
         optlevel: str = "2",
         subfolder: str = "",
         local_files_only: bool = False,

diff --git a/optimum/neuron/modeling_diffusion.py b/optimum/neuron/modeling_diffusion.py
@@ -573,7 +573,7 @@ def _export(
         force_download: bool = True,
         cache_dir: Optional[str] = None,
         compiler_workdir: Optional[str] = None,
-        inline_weights_to_neff: bool = True,
+        inline_weights_to_neff: bool = False,
         optlevel: str = "2",
         subfolder: str = "",
         local_files_only: bool = False,
@@ -616,7 +616,7 @@ def _export(
                 standard cache should not be used.
             compiler_workdir (`Optional[str]`, defaults to `None`):
                 Path to a directory in which the neuron compiler will store all intermediary files during the compilation(neff, weight, hlo graph...).
-            inline_weights_to_neff (`bool`, defaults to `True`):
+            inline_weights_to_neff (`bool`, defaults to `False`):
                 Whether to inline the weights to the neff graph. If set to False, weights will be seperated from the neff.
             optlevel (`str`, defaults to `"2"`):
             The level of optimization the compiler should perform. Can be `"1"`, `"2"` or `"3"`, defaults to "2".

diff --git a/tests/cache/test_neuronx_cache.py b/tests/cache/test_neuronx_cache.py
@@ -24,7 +24,7 @@
 from huggingface_hub import HfApi
 from transformers.testing_utils import ENDPOINT_STAGING
 
-from optimum.neuron import NeuronModelForCausalLM
+from optimum.neuron import NeuronModelForCausalLM, NeuronModelForSequenceClassification, NeuronStableDiffusionPipeline
 from optimum.neuron.utils import get_hub_cached_entries, synchronize_hub_cache
 from optimum.neuron.utils.testing_utils import is_inferentia_test, requires_neuronx
 from optimum.utils.testing_utils import TOKEN
@@ -76,6 +76,33 @@ def export_decoder_model(model_id):
     )
 
 
+def export_encoder_model(model_id):
+    batch_size = 1
+    sequence_length = 64
+    return NeuronModelForSequenceClassification.from_pretrained(
+        model_id,
+        export=True,
+        dynamic_batch_size=False,
+        batch_size=batch_size,
+        sequence_length=sequence_length,
+    )
+
+
+def export_stable_diffusion_model(model_id):
+    batch_size = 1
+    height = 64
+    width = 64
+    num_images_per_prompt = 4
+    return NeuronStableDiffusionPipeline.from_pretrained(
+        model_id,
+        export=True,
+        batch_size=batch_size,
+        height=height,
+        width=width,
+        num_images_per_prompt=num_images_per_prompt,
+    )
+
+
 def check_decoder_generation(model):
     batch_size = model.config.neuron["batch_size"]
     input_ids = torch.ones((batch_size, 20), dtype=torch.int64)
@@ -84,6 +111,14 @@ def check_decoder_generation(model):
         assert sample_output.shape[0] == batch_size
 
 
+def check_encoder_inference(model):
+    pass
+
+
+def check_stable_diffusion_inference(model):
+    pass
+
+
 def get_local_cached_files(cache_path, extension="*"):
     links = glob.glob(f"{cache_path}/**/*/*.{extension}", recursive=True)
     return [link for link in links if os.path.isfile(link)]
@@ -140,6 +175,68 @@ def test_decoder_cache(cache_repos):
     assert len(get_local_cached_files(cache_path, "neff")) == 0
 
 
+@is_inferentia_test
+@requires_neuronx
+def test_encoder_cache(cache_repos):
+    cache_path, cache_repo_id = cache_repos
+    model_id = "hf-internal-testing/tiny-random-BertModel"
+    # Export the model a first time to populate the local cache
+    model = export_encoder_model(model_id)
+    check_encoder_inference(model)
+    # check registry
+    check_cache_entry(model, cache_path)
+    # Synchronize the hub cache with the local cache
+    synchronize_hub_cache(cache_repo_id=cache_repo_id)
+    assert_local_and_hub_cache_sync(cache_path, cache_repo_id)
+    # Verify we are able to fetch the cached entry for the model
+    model_entries = get_hub_cached_entries(model_id, "inference", cache_repo_id=cache_repo_id)
+    assert len(model_entries) == 1
+    assert model_entries[0] == model.config.neuron
+    # Clear the local cache
+    for root, dirs, files in os.walk(cache_path):
+        for f in files:
+            os.unlink(os.path.join(root, f))
+        for d in dirs:
+            shutil.rmtree(os.path.join(root, d))
+    assert local_cache_size(cache_path) == 0
+    # Export the model again: the compilation artifacts should be fetched from the Hub
+    model = export_encoder_model(model_id)
+    check_encoder_inference(model)
+    # Verify the local cache directory has not been populated
+    assert len(get_local_cached_files(cache_path, ".neuron")) == 0
+
+
+@is_inferentia_test
+@requires_neuronx
+def test_stable_diffusion_cache(cache_repos):
+    cache_path, cache_repo_id = cache_repos
+    model_id = "hf-internal-testing/tiny-stable-diffusion-torch"
+    # Export the model a first time to populate the local cache
+    model = export_stable_diffusion_model(model_id)
+    check_stable_diffusion_inference(model)
+    # check registry
+    check_cache_entry(model, cache_path)
+    # Synchronize the hub cache with the local cache
+    synchronize_hub_cache(cache_repo_id=cache_repo_id)
+    assert_local_and_hub_cache_sync(cache_path, cache_repo_id)
+    # Verify we are able to fetch the cached entry for the model
+    model_entries = get_hub_cached_entries(model_id, "inference", cache_repo_id=cache_repo_id)
+    assert len(model_entries) == 1
+    assert model_entries[0] == model.config.neuron
+    # Clear the local cache
+    for root, dirs, files in os.walk(cache_path):
+        for f in files:
+            os.unlink(os.path.join(root, f))
+        for d in dirs:
+            shutil.rmtree(os.path.join(root, d))
+    assert local_cache_size(cache_path) == 0
+    # Export the model again: the compilation artifacts should be fetched from the Hub
+    model = export_stable_diffusion_model(model_id)
+    check_stable_diffusion_inference(model)
+    # Verify the local cache directory has not been populated
+    assert len(get_local_cached_files(cache_path, ".neuron")) == 0
+
+
 @is_inferentia_test
 @requires_neuronx
 @pytest.mark.parametrize(