Skip to content

Commit

Permalink
fix tests
Browse files Browse the repository at this point in the history
  • Loading branch information
JingyaHuang committed Mar 26, 2024
1 parent 0fefd97 commit 136a043
Show file tree
Hide file tree
Showing 5 changed files with 49 additions and 19 deletions.
5 changes: 5 additions & 0 deletions optimum/commands/export/neuron.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,11 @@ def parse_args_neuron(parser: "ArgumentParser"):
help="If specified, the absolute difference tolerance when validating the model. Otherwise, the default atol for the model will be used.",
)
optional_group.add_argument("--cache_dir", type=str, default=None, help="Path indicating where to store cache.")
optional_group.add_argument(
"--disable_neuron_cache",
action="store_true",
help="Whether to disable automatic caching of compiled models (not applicable for JIT compilation).",
)
optional_group.add_argument(
"--trust-remote-code",
action="store_true",
Expand Down
2 changes: 1 addition & 1 deletion optimum/commands/export/neuronx.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ def parse_args_neuronx(parser: "ArgumentParser"):
optional_group.add_argument(
"--disable_neuron_cache",
action="store_true",
help="Whether to disable automatic caching of compiled TorchScript models (not applicable for JIT compilation).",
help="Whether to disable automatic caching of compiled models (not applicable for JIT compilation).",
)
optional_group.add_argument(
"--trust-remote-code",
Expand Down
24 changes: 16 additions & 8 deletions optimum/exporters/neuron/convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -465,7 +465,7 @@ def export_neuronx(
config: "NeuronDefaultConfig",
output: Path,
compiler_workdir: Optional[Path] = None,
inline_weights_to_neff: bool = True,
inline_weights_to_neff: bool = False,
optlevel: str = "2",
auto_cast: Optional[str] = None,
auto_cast_type: str = "bf16",
Expand All @@ -482,7 +482,7 @@ def export_neuronx(
Directory to store the exported Neuron model.
compiler_workdir (`Optional[Path]`, defaults to `None`):
The directory used by neuronx-cc, where you can find intermediary outputs (neff, weight, hlo...).
inline_weights_to_neff (`bool`, defaults to `True`):
inline_weights_to_neff (`bool`, defaults to `False`):
Whether to inline the weights to the neff graph. If set to False, weights will be seperated from the neff.
optlevel (`str`, defaults to `"2"`):
The level of optimization the compiler should perform. Can be `"1"`, `"2"` or `"3"`, defaults to "2".
Expand Down Expand Up @@ -546,6 +546,12 @@ def export_neuronx(
# diffusers specific
compiler_args = add_stable_diffusion_compiler_args(config, compiler_args)

if config.dynamic_batch_size is True and not inline_weights_to_neff:
logger.warning(
"Dynamic batching is not yet compatible with the weights/neff non-inlined model. `inline_weights_to_neff` is set to True. If you still want to separate the neff and weights, please set `dynamic_batch_size=False`."
)
inline_weights_to_neff = True

neuron_model = neuronx.trace(
checked_model,
dummy_inputs_tuple,
Expand All @@ -556,10 +562,6 @@ def export_neuronx(
)

if config.dynamic_batch_size is True:
if not inline_weights_to_neff:
raise ValueError(
"Dynamic batching is not yet compatible with the weights/neff non-inlined model. Please set `dynamic_batch_size=False` or `inline_weights_to_neff=True`."
)
neuron_model = neuronx.dynamic_batch(neuron_model)

# diffusers specific
Expand Down Expand Up @@ -608,7 +610,7 @@ def export_neuron(
config: "NeuronDefaultConfig",
output: Path,
compiler_workdir: Optional[Path] = None,
inline_weights_to_neff: bool = True,
inline_weights_to_neff: bool = False,
auto_cast: Optional[str] = None,
auto_cast_type: str = "bf16",
disable_fast_relayout: bool = False,
Expand All @@ -626,7 +628,7 @@ def export_neuron(
Directory to store the exported Neuron model.
compiler_workdir (`Optional[Path]`, defaults to `None`):
The directory used by neuron-cc, where you can find intermediary outputs (neff, weight, hlo...).
inline_weights_to_neff (`bool`, defaults to `True`):
inline_weights_to_neff (`bool`, defaults to `False`):
Whether to inline the weights to the neff graph. If set to False, weights will be seperated from the neff.
auto_cast (`Optional[str]`, defaults to `None`):
Whether to cast operations from FP32 to lower precision to speed up the inference. Can be `None`, `"matmul"` or `"all"`, you should use `None` to disable any auto-casting, use `"matmul"` to cast FP32 matrix multiplication operations, and use `"all"` to cast all FP32 operations.
Expand Down Expand Up @@ -666,6 +668,12 @@ def export_neuron(
checked_model = config.patch_model_for_export(model, dummy_inputs)
compiler_args = convert_neuronx_compiler_args_to_neuron(auto_cast, auto_cast_type, disable_fast_relayout)

if config.dynamic_batch_size is True and not inline_weights_to_neff:
logger.warning(
"Dynamic batching is not yet compatible with the weights/neff non-inlined model. `inline_weights_to_neff` is set to True. If you still want to separate the neff and weights, please set `dynamic_batch_size=False`."
)
inline_weights_to_neff = True

neuron_model = neuron.trace(
checked_model,
dummy_inputs_tuple,
Expand Down
20 changes: 11 additions & 9 deletions optimum/neuron/utils/hub_neuronx_cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,10 +152,7 @@ def exists(self, path: str):
if self.default_cache.exists(path):
return True
rel_path = self._rel_path(path)
file_exists = self.api.file_exists(self.repo_id, rel_path)
folder_info = self.api.list_repo_tree(self.repo_id, rel_path)
folder_exists = len(list(folder_info)) > 1
exists = file_exists or folder_exists
exists = self.api.file_exists(self.repo_id, rel_path)
if not exists:
logger.warning(
f"{rel_path} not found in {self.repo_id}: the corresponding graph will be recompiled."
Expand Down Expand Up @@ -184,7 +181,7 @@ def download_folder(self, folder_path: str, dst_path: str):
folder_info = list(self.api.list_repo_tree(self.repo_id, rel_folder_path))
folder_exists = len(folder_info) > 1
except Exception as e:
logger.warning(f"{rel_folder_path} not found in {self.repo_id}: {e} \nThe model will be recompiled.")
logger.info(f"{rel_folder_path} not found in {self.repo_id}: {e} \nThe model will be recompiled.")
folder_exists = False

if folder_exists:
Expand Down Expand Up @@ -239,7 +236,7 @@ def download_file_to_string(self, filename: str, limit: int = None):
def get_hub_cache():
HUB_CACHE = "aws-neuron/optimum-neuron-cache"
custom_hub_cache = load_custom_cache_repo_name_from_hf_home()
if custom_hub_cache is not None:
if custom_hub_cache is not None and len(custom_hub_cache) > 0:
return custom_hub_cache
else:
return os.getenv("CUSTOM_CACHE_REPO", HUB_CACHE)
Expand Down Expand Up @@ -351,7 +348,7 @@ def hf_create_compile_cache(cache_url):
# Create cache entry in local cache: it can be later synchronized with the hub cache
registry_path = default_cache.get_cache_dir_with_cache_key(registry_folder)
model_type = entry.config["model_type"]
entry_path = f"{registry_path}/{model_type}/{entry.model_id}" # TODO: this is not applicable for checkpoints with multiple models, eg. stable diffusion
entry_path = f"{registry_path}/{model_type}/{entry.model_id}"
config_path = f"{entry_path}/{entry.hash}.json"
if not default_cache.exists(config_path):
oldmask = os.umask(000)
Expand Down Expand Up @@ -549,9 +546,14 @@ def build_cache_config(
config = exclude_white_list_from_config(config, white_list, neuron_white_list)
clean_configs[name] = config

if "unet" in configs:
clean_configs["model_type"] = "stable-diffusion"
if len(clean_configs) > 1:
if "unet" in configs:
# stable diffusion
clean_configs["model_type"] = "stable-diffusion"
else:
# seq-to-seq
clean_configs["model_type"] = next(iter(clean_configs.values()))["model_type"]

return clean_configs
else:
return next(iter(clean_configs.values()))
Expand Down
17 changes: 16 additions & 1 deletion tests/cache/test_neuronx_cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@
from optimum.neuron import NeuronModelForCausalLM, NeuronModelForSequenceClassification, NeuronStableDiffusionPipeline
from optimum.neuron.utils import get_hub_cached_entries, synchronize_hub_cache
from optimum.neuron.utils.cache_utils import (
CACHE_REPO_FILENAME,
HF_HOME,
load_custom_cache_repo_name_from_hf_home,
set_custom_cache_repo_name_in_hf_home,
)
Expand All @@ -48,7 +50,9 @@ def cache_repos():
cache_repo_id = api.create_repo(cache_repo_id, private=True).repo_id
api.repo_info(cache_repo_id, repo_type="model")
cache_dir = TemporaryDirectory()
set_custom_cache_repo_name_in_hf_home(cache_repo_id, api=api)
set_custom_cache_repo_name_in_hf_home(
cache_repo_id, api=api
) # The custom repo will be registered under `HF_HOME`, we need to restore the env by the end of each test.
assert load_custom_cache_repo_name_from_hf_home() == cache_repo_id
cache_path = cache_dir.name
# Modify environment to force neuronx cache to use temporary caches
Expand All @@ -70,6 +74,12 @@ def cache_repos():
os.environ[var] = previous_env[var]


def unset_custom_cache_repo_name_in_hf_home(hf_home: str = HF_HOME):
hf_home_cache_repo_file = f"{hf_home}/{CACHE_REPO_FILENAME}"
if os.path.isfile(hf_home_cache_repo_file):
os.remove(hf_home_cache_repo_file)


def export_decoder_model(model_id):
batch_size = 2
sequence_length = 512
Expand Down Expand Up @@ -196,6 +206,7 @@ def test_decoder_cache(cache_repos):
check_decoder_generation(model)
# Verify the local cache directory has not been populated
assert len(get_local_cached_files(cache_path, "neff")) == 0
unset_custom_cache_repo_name_in_hf_home()


@is_inferentia_test
Expand Down Expand Up @@ -227,6 +238,7 @@ def test_encoder_cache(cache_repos):
check_encoder_inference(model, tokenizer)
# Verify the local cache directory has not been populated
assert len(get_local_cached_files(cache_path, ".neuron")) == 0
unset_custom_cache_repo_name_in_hf_home()


@is_inferentia_test
Expand Down Expand Up @@ -257,6 +269,7 @@ def test_stable_diffusion_cache(cache_repos):
check_stable_diffusion_inference(model)
# Verify the local cache directory has not been populated
assert len(get_local_cached_files(cache_path, ".neuron")) == 0
unset_custom_cache_repo_name_in_hf_home()


@is_inferentia_test
Expand All @@ -271,6 +284,7 @@ def test_stable_diffusion_cache(cache_repos):
ids=["invalid_repo", "invalid_endpoint", "invalid_token"],
)
def test_decoder_cache_unavailable(cache_repos, var, value, match):
unset_custom_cache_repo_name_in_hf_home() # clean the repo set by cli since it's prioritized than env variable
# Modify the specified environment variable to trigger an error
os.environ[var] = value
# Just exporting the model will only emit a warning
Expand Down Expand Up @@ -301,3 +315,4 @@ def test_optimum_neuron_cli_cache_synchronize(cache_repos):
stdout = stdout.decode("utf-8")
assert p.returncode == 0
assert f"1 entrie(s) found in cache for {model_id}" in stdout
unset_custom_cache_repo_name_in_hf_home()

0 comments on commit 136a043

Please sign in to comment.