huggingface · fxmarty · Apr 10, 2024 · Apr 10, 2024 · Apr 10, 2024
diff --git a/.github/workflows/test_offline.yml b/.github/workflows/test_offline.yml
@@ -0,0 +1,43 @@
+name: Offline usage / Python - Test
+
+on:
+  push:
+    branches: [ main ]
+  pull_request:
+    branches: [ main ]
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
+  cancel-in-progress: true
+
+jobs:
+  build:
+    strategy:
+      fail-fast: false
+      matrix:
+        python-version: [3.9]
+        os: [ubuntu-20.04]
+
+    runs-on: ${{ matrix.os }}
+    steps:
+    - uses: actions/checkout@v2
+    - name: Setup Python ${{ matrix.python-version }}
+      uses: actions/setup-python@v2
+      with:
+        python-version: ${{ matrix.python-version }}
+    - name: Install dependencies for pytorch export
+      run: |
+        pip install .[tests,exporters,onnxruntime]
+    - name: Test with unittest
+      run: |
+        HF_HOME=/tmp/ huggingface-cli download hf-internal-testing/tiny-random-gpt2
+
+        HF_HOME=/tmp/ HF_HUB_OFFLINE=1 optimum-cli export onnx --model hf-internal-testing/tiny-random-gpt2 gpt2_onnx --task text-generation
+
+        huggingface-cli download hf-internal-testing/tiny-random-gpt2
+
+        HF_HUB_OFFLINE=1 optimum-cli export onnx --model hf-internal-testing/tiny-random-gpt2 gpt2_onnx --task text-generation
+
+        pytest tests/onnxruntime/test_modeling.py -k "test_load_model_from_hub and not from_hub_onnx" -s -vvvvv
+
+        HF_HUB_OFFLINE=1 pytest tests/onnxruntime/test_modeling.py -k "test_load_model_from_hub and not from_hub_onnx" -s -vvvvv
diff --git a/optimum/commands/export/onnx.py b/optimum/commands/export/onnx.py
@@ -18,6 +18,8 @@
 from pathlib import Path
 from typing import TYPE_CHECKING
 
+from huggingface_hub.constants import HUGGINGFACE_HUB_CACHE
+
 from ...exporters import TasksManager
 from ...utils import DEFAULT_DUMMY_SHAPES
 from ..base import BaseOptimumCLICommand
@@ -122,7 +124,9 @@ def parse_args_onnx(parser):
         default=None,
         help="If specified, the absolute difference tolerance when validating the model. Otherwise, the default atol for the model will be used.",
     )
-    optional_group.add_argument("--cache_dir", type=str, default=None, help="Path indicating where to store cache.")
+    optional_group.add_argument(
+        "--cache_dir", type=str, default=HUGGINGFACE_HUB_CACHE, help="Path indicating where to store cache."
+    )
     optional_group.add_argument(
         "--trust-remote-code",
         action="store_true",

diff --git a/optimum/exporters/onnx/__main__.py b/optimum/exporters/onnx/__main__.py
@@ -17,6 +17,7 @@
 import argparse
 from pathlib import Path
 
+from huggingface_hub.constants import HUGGINGFACE_HUB_CACHE
 from packaging import version
 from requests.exceptions import ConnectionError as RequestsConnectionError
 from transformers import AutoConfig, AutoTokenizer
@@ -57,7 +58,7 @@ def main_export(
     no_post_process: bool = False,
     framework: Optional[str] = None,
     atol: Optional[float] = None,
-    cache_dir: Optional[str] = None,
+    cache_dir: str = HUGGINGFACE_HUB_CACHE,
     trust_remote_code: bool = False,
     pad_token_id: Optional[int] = None,
     subfolder: str = "",

diff --git a/optimum/exporters/tasks.py b/optimum/exporters/tasks.py
@@ -23,6 +23,7 @@
 from typing import TYPE_CHECKING, Any, Callable, Dict, List, Optional, Set, Tuple, Type, Union
 
 import huggingface_hub
+from huggingface_hub.constants import HUGGINGFACE_HUB_CACHE
 from packaging import version
 from requests.exceptions import ConnectionError as RequestsConnectionError
 from transformers import AutoConfig, PretrainedConfig, is_tf_available, is_torch_available
@@ -1377,8 +1378,9 @@ def get_model_class_for_task(
     def get_model_files(
         model_name_or_path: Union[str, Path],
         subfolder: str = "",
-        cache_dir: str = huggingface_hub.constants.HUGGINGFACE_HUB_CACHE,
+        cache_dir: str = HUGGINGFACE_HUB_CACHE,
         use_auth_token: Optional[str] = None,
+        revision: Optional[str] = None,
     ):
         request_exception = None
         full_model_path = Path(model_name_or_path) / subfolder
@@ -1393,21 +1395,25 @@ def get_model_files(
                 if not isinstance(model_name_or_path, str):
                     model_name_or_path = str(model_name_or_path)
                 all_files = huggingface_hub.list_repo_files(
-                    model_name_or_path, repo_type="model", token=use_auth_token
+                    model_name_or_path,
+                    repo_type="model",
+                    token=use_auth_token,
+                    revision=revision,
                 )
                 if subfolder != "":
                     all_files = [file[len(subfolder) + 1 :] for file in all_files if file.startswith(subfolder)]
-            except RequestsConnectionError as e:  # Hub not accessible
+            except (RequestsConnectionError, huggingface_hub.utils._http.OfflineModeIsEnabled) as e:
                 request_exception = e
                 object_id = model_name_or_path.replace("/", "--")
                 full_model_path = Path(cache_dir, f"models--{object_id}")
                 if full_model_path.is_dir():  # explore the cache first
                     # Resolve refs (for instance to convert main to the associated commit sha)
-                    revision_file = Path(full_model_path, "refs", "main")
-                    revision = ""
-                    if revision_file.is_file():
-                        with open(revision_file) as f:
-                            revision = f.read()
+                    if revision is None:
+                        revision_file = Path(full_model_path, "refs", "main")
+                        revision = ""
+                        if revision_file.is_file():
+                            with open(revision_file) as f:
+                                revision = f.read()
                     cached_path = Path(full_model_path, "snapshots", revision, subfolder)
                     all_files = [
                         os.path.relpath(os.path.join(dirpath, file), cached_path)
@@ -1422,7 +1428,7 @@ def determine_framework(
         model_name_or_path: Union[str, Path],
         subfolder: str = "",
         framework: Optional[str] = None,
-        cache_dir: str = huggingface_hub.constants.HUGGINGFACE_HUB_CACHE,
+        cache_dir: str = HUGGINGFACE_HUB_CACHE,
     ) -> str:
         """
         Determines the framework to use for the export.
@@ -1568,7 +1574,12 @@ def _infer_task_from_model_name_or_path(
                 raise RuntimeError(
                     "Cannot infer the task from a model repo with a subfolder yet, please specify the task manually."
                 )
-            model_info = huggingface_hub.model_info(model_name_or_path, revision=revision)
+            try:
+                model_info = huggingface_hub.model_info(model_name_or_path, revision=revision)
+            except (RequestsConnectionError, huggingface_hub.utils._http.OfflineModeIsEnabled):
+                raise RuntimeError(
+                    f"Hugging Face Hub is not reachable and we cannot infer the task from a cached model. Make sure you are not offline, or otherwise please specify the `task` (or `--task` in command-line) argument ({', '.join(TasksManager.get_all_tasks())})."
+                )
             library_name = TasksManager.infer_library_from_model(model_name_or_path, subfolder, revision)
 
             if library_name == "diffusers":
@@ -1680,7 +1691,7 @@ def infer_library_from_model(
         model_name_or_path: Union[str, Path],
         subfolder: str = "",
         revision: Optional[str] = None,
-        cache_dir: str = huggingface_hub.constants.HUGGINGFACE_HUB_CACHE,
+        cache_dir: str = HUGGINGFACE_HUB_CACHE,
         library_name: Optional[str] = None,
         use_auth_token: Optional[str] = None,
     ):
@@ -1827,7 +1838,7 @@ def get_model_from_task(
         subfolder: str = "",
         revision: Optional[str] = None,
         framework: Optional[str] = None,
-        cache_dir: Optional[str] = None,
+        cache_dir: str = HUGGINGFACE_HUB_CACHE,
         torch_dtype: Optional["torch.dtype"] = None,
         device: Optional[Union["torch.device", str]] = None,
         library_name: str = None,

diff --git a/optimum/modeling_base.py b/optimum/modeling_base.py
@@ -22,6 +22,7 @@
 from typing import TYPE_CHECKING, Optional, Union
 
 from huggingface_hub import HfApi, HfFolder
+from huggingface_hub.constants import HUGGINGFACE_HUB_CACHE
 from transformers import AutoConfig, PretrainedConfig, add_start_docstrings
 
 from .exporters import TasksManager
@@ -220,7 +221,7 @@ def _load_config(
         cls,
         config_name_or_path: Union[str, os.PathLike],
         revision: Optional[str] = None,
-        cache_dir: Optional[str] = None,
+        cache_dir: str = HUGGINGFACE_HUB_CACHE,
         use_auth_token: Optional[Union[bool, str]] = False,
         force_download: bool = False,
         subfolder: str = "",
@@ -262,7 +263,7 @@ def _from_pretrained(
         use_auth_token: Optional[Union[bool, str]] = None,
         revision: Optional[str] = None,
         force_download: bool = False,
-        cache_dir: Optional[str] = None,
+        cache_dir: str = HUGGINGFACE_HUB_CACHE,
         subfolder: str = "",
         local_files_only: bool = False,
         **kwargs,
@@ -278,7 +279,7 @@ def _from_transformers(
         use_auth_token: Optional[Union[bool, str]] = None,
         revision: Optional[str] = None,
         force_download: bool = False,
-        cache_dir: Optional[str] = None,
+        cache_dir: str = HUGGINGFACE_HUB_CACHE,
         subfolder: str = "",
         local_files_only: bool = False,
         trust_remote_code: bool = False,
@@ -298,7 +299,7 @@ def _export(
         use_auth_token: Optional[Union[bool, str]] = None,
         revision: Optional[str] = None,
         force_download: bool = False,
-        cache_dir: Optional[str] = None,
+        cache_dir: str = HUGGINGFACE_HUB_CACHE,
         subfolder: str = "",
         local_files_only: bool = False,
         trust_remote_code: bool = False,
@@ -317,7 +318,7 @@ def from_pretrained(
         export: bool = False,
         force_download: bool = False,
         use_auth_token: Optional[str] = None,
-        cache_dir: Optional[str] = None,
+        cache_dir: str = HUGGINGFACE_HUB_CACHE,
         subfolder: str = "",
         config: Optional[PretrainedConfig] = None,
         local_files_only: bool = False,

diff --git a/optimum/onnxruntime/modeling_decoder.py b/optimum/onnxruntime/modeling_decoder.py
@@ -21,6 +21,7 @@
 import numpy as np
 import onnx
 import torch
+from huggingface_hub.constants import HUGGINGFACE_HUB_CACHE
 from onnx.tools import update_model_dims
 from transformers import AutoModelForCausalLM, GenerationConfig
 from transformers.file_utils import add_end_docstrings, add_start_docstrings_to_model_forward
@@ -407,7 +408,7 @@ def _from_pretrained(
         use_auth_token: Optional[Union[bool, str]] = None,
         revision: Optional[str] = None,
         force_download: bool = False,
-        cache_dir: Optional[str] = None,
+        cache_dir: str = HUGGINGFACE_HUB_CACHE,
         file_name: Optional[str] = None,
         subfolder: str = "",
         use_cache: bool = True,
@@ -577,7 +578,7 @@ def _from_transformers(
         use_auth_token: Optional[Union[bool, str]] = None,
         revision: str = "main",
         force_download: bool = True,
-        cache_dir: Optional[str] = None,
+        cache_dir: str = HUGGINGFACE_HUB_CACHE,
         subfolder: str = "",
         local_files_only: bool = False,
         trust_remote_code: bool = False,

diff --git a/optimum/onnxruntime/modeling_diffusion.py b/optimum/onnxruntime/modeling_diffusion.py
@@ -33,6 +33,7 @@
 from diffusers.schedulers.scheduling_utils import SCHEDULER_CONFIG_NAME
 from diffusers.utils import CONFIG_NAME, is_invisible_watermark_available
 from huggingface_hub import snapshot_download
+from huggingface_hub.constants import HUGGINGFACE_HUB_CACHE
 from transformers import CLIPFeatureExtractor, CLIPTokenizer
 from transformers.file_utils import add_end_docstrings
 
@@ -272,7 +273,7 @@ def _from_pretrained(
         config: Dict[str, Any],
         use_auth_token: Optional[Union[bool, str]] = None,
         revision: Optional[str] = None,
-        cache_dir: Optional[str] = None,
+        cache_dir: str = HUGGINGFACE_HUB_CACHE,
         vae_decoder_file_name: str = ONNX_WEIGHTS_NAME,
         text_encoder_file_name: str = ONNX_WEIGHTS_NAME,
         unet_file_name: str = ONNX_WEIGHTS_NAME,
@@ -377,7 +378,7 @@ def _from_transformers(
         use_auth_token: Optional[Union[bool, str]] = None,
         revision: str = "main",
         force_download: bool = True,
-        cache_dir: Optional[str] = None,
+        cache_dir: str = HUGGINGFACE_HUB_CACHE,
         subfolder: str = "",
         local_files_only: bool = False,
         trust_remote_code: bool = False,

diff --git a/optimum/onnxruntime/modeling_ort.py b/optimum/onnxruntime/modeling_ort.py
@@ -22,7 +22,8 @@
 
 import numpy as np
 import torch
-from huggingface_hub import HfApi, HfFolder, hf_hub_download
+from huggingface_hub import HfFolder, hf_hub_download
+from huggingface_hub.constants import HUGGINGFACE_HUB_CACHE
 from huggingface_hub.utils import EntryNotFoundError
 from transformers import (
     AutoConfig,
@@ -449,7 +450,7 @@ def _from_pretrained(
         use_auth_token: Optional[Union[bool, str]] = None,
         revision: Optional[str] = None,
         force_download: bool = False,
-        cache_dir: Optional[str] = None,
+        cache_dir: str = HUGGINGFACE_HUB_CACHE,
         file_name: Optional[str] = None,
         subfolder: str = "",
         local_files_only: bool = False,
@@ -471,7 +472,12 @@ def _from_pretrained(
                     token = HfFolder().get_token()
                 else:
                     token = use_auth_token
-                repo_files = map(Path, HfApi().list_repo_files(model_id, revision=revision, token=token))
+
+                repo_files, _ = TasksManager.get_model_files(
+                    model_id, revision=revision, cache_dir=cache_dir, use_auth_token=token
+                )
+                repo_files = map(Path, repo_files)
+
                 pattern = "*.onnx" if subfolder == "" else f"{subfolder}/*.onnx"
                 onnx_files = [p for p in repo_files if p.match(pattern)]
 
@@ -531,7 +537,7 @@ def _from_transformers(
         use_auth_token: Optional[Union[bool, str]] = None,
         revision: Optional[str] = None,
         force_download: bool = False,
-        cache_dir: Optional[str] = None,
+        cache_dir: str = HUGGINGFACE_HUB_CACHE,
         subfolder: str = "",
         local_files_only: bool = False,
         trust_remote_code: bool = False,
@@ -567,7 +573,7 @@ def _export(
         use_auth_token: Optional[Union[bool, str]] = None,
         revision: Optional[str] = None,
         force_download: bool = False,
-        cache_dir: Optional[str] = None,
+        cache_dir: str = HUGGINGFACE_HUB_CACHE,
         subfolder: str = "",
         local_files_only: bool = False,
         trust_remote_code: bool = False,
@@ -619,7 +625,7 @@ def from_pretrained(
         export: bool = False,
         force_download: bool = False,
         use_auth_token: Optional[str] = None,
-        cache_dir: Optional[str] = None,
+        cache_dir: str = HUGGINGFACE_HUB_CACHE,
         subfolder: str = "",
         config: Optional["PretrainedConfig"] = None,
         local_files_only: bool = False,
@@ -852,7 +858,7 @@ def _cached_file(
         use_auth_token: Optional[Union[bool, str]] = None,
         revision: Optional[str] = None,
         force_download: bool = False,
-        cache_dir: Optional[str] = None,
+        cache_dir: str = HUGGINGFACE_HUB_CACHE,
         file_name: Optional[str] = None,
         subfolder: str = "",
         local_files_only: bool = False,
@@ -1017,7 +1023,7 @@ def _export(
         use_auth_token: Optional[Union[bool, str]] = None,
         revision: Optional[str] = None,
         force_download: bool = False,
-        cache_dir: Optional[str] = None,
+        cache_dir: str = HUGGINGFACE_HUB_CACHE,
         subfolder: str = "",
         local_files_only: bool = False,
         trust_remote_code: bool = False,

diff --git a/optimum/onnxruntime/modeling_seq2seq.py b/optimum/onnxruntime/modeling_seq2seq.py
@@ -28,6 +28,7 @@
 import numpy as np
 import torch
 from huggingface_hub import hf_hub_download
+from huggingface_hub.constants import HUGGINGFACE_HUB_CACHE
 from transformers import (
     AutoModelForSeq2SeqLM,
     AutoModelForSpeechSeq2Seq,
@@ -778,7 +779,7 @@ def _from_pretrained(
         use_auth_token: Optional[Union[bool, str]] = None,
         revision: Optional[str] = None,
         force_download: bool = False,
-        cache_dir: Optional[str] = None,
+        cache_dir: str = HUGGINGFACE_HUB_CACHE,
         encoder_file_name: str = ONNX_ENCODER_NAME,
         decoder_file_name: str = ONNX_DECODER_NAME,
         decoder_with_past_file_name: str = ONNX_DECODER_WITH_PAST_NAME,
@@ -1023,7 +1024,7 @@ def _from_transformers(
         use_auth_token: Optional[Union[bool, str]] = None,
         revision: str = "main",
         force_download: bool = True,
-        cache_dir: Optional[str] = None,
+        cache_dir: str = HUGGINGFACE_HUB_CACHE,
         subfolder: str = "",
         local_files_only: bool = False,
         trust_remote_code: bool = False,