Integrate new cache system for training (#472)

* Integrate new cache system for training * ci: reduce export and pipelines test frequency This runs export and pipelines tests in dedicated pipelines with stricter path filters to avoid running them on every change. --------- Co-authored-by: David Corvoysier <[email protected]>
huggingface · Feb 16, 2024 · d319856 · d319856
1 parent 1b477ba
commit d319856
Show file tree

Hide file tree

Showing 20 changed files with 370 additions and 224 deletions.
diff --git a/.github/workflows/test_inf1_export.yml b/.github/workflows/test_inf1_export.yml
@@ -1,4 +1,4 @@
-name: Optimum neuron / Test INF1 export
+name: Optimum neuron / Test INF1 partial export
 
 on:
   push:
@@ -18,7 +18,7 @@ concurrency:
 
 jobs:
   do-the-job:
-    name: Run INF1 tests
+    name: Run INF1 export tests
     runs-on: [self-hosted, 4-aws-inf1, 24-cpu, ci]
     env:
       AWS_REGION: us-east-1
@@ -46,4 +46,5 @@ jobs:
       - name: Run export tests
         run: |
           source aws_neuron_venv_pytorch/bin/activate
+          export MAX_EXPORT_TEST_COMBINATIONS=1
           HF_TOKEN=${{ secrets.HF_TOKEN_OPTIMUM_NEURON_CI }} pytest -m is_inferentia_test tests/exporters
diff --git a/.github/workflows/test_inf1_full_export.yml b/.github/workflows/test_inf1_full_export.yml
@@ -0,0 +1,47 @@
+name: Optimum neuron / Test INF1 full export
+
+on:
+  push:
+    branches: [ main ]
+    paths:
+      - "optimum/exporters/neuron/*.py"
+  pull_request:
+    branches: [ main ]
+    paths:
+      - "optimum/exporters/neuron/*.py"
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
+  cancel-in-progress: true
+
+jobs:
+  do-the-job:
+    name: Run INF1 full export tests
+    runs-on: [self-hosted, 4-aws-inf1, 24-cpu, ci]
+    env:
+      AWS_REGION: us-east-1
+    steps:
+      - name: Check AMI
+        run: dpkg -l | grep neuron
+      - name: Checkout
+        uses: actions/checkout@v2
+      - name: Install system packages
+        run: |
+          sudo apt install python3.8-venv -y
+      - name: Install python packages
+        run: |
+          python3 -m venv aws_neuron_venv_pytorch
+          source aws_neuron_venv_pytorch/bin/activate
+          python -m pip install -U pip
+          python -m pip config set global.extra-index-url https://pip.repos.neuron.amazonaws.com
+          python -m pip install .[neuron,tests]
+          python -m pip uninstall optimum -y
+          python -m pip install optimum
+      - name: Run CLI tests
+        run: |
+          source aws_neuron_venv_pytorch/bin/activate
+          HF_TOKEN=${{ secrets.HF_TOKEN_OPTIMUM_NEURON_CI }} pytest -m is_inferentia_test tests/cli
+      - name: Run export tests
+        run: |
+          source aws_neuron_venv_pytorch/bin/activate
+          HF_TOKEN=${{ secrets.HF_TOKEN_OPTIMUM_NEURON_CI }} pytest -m is_inferentia_test tests/exporters
diff --git a/.github/workflows/test_inf1_inference.yml b/.github/workflows/test_inf1_inference.yml
@@ -1,4 +1,4 @@
-name: Optimum neuron / Test INF1 inference & pipelines
+name: Optimum neuron / Test INF1 inference
 
 on:
   push:
@@ -43,7 +43,3 @@ jobs:
         run: |
           source aws_neuron_venv_pytorch/bin/activate
           HF_TOKEN=${{ secrets.HF_TOKEN_OPTIMUM_NEURON_CI }} pytest -m is_inferentia_test tests/inference
-      - name: Run pipelines tests
-        run: |
-          source aws_neuron_venv_pytorch/bin/activate
-          HF_TOKEN=${{ secrets.HF_TOKEN_OPTIMUM_NEURON_CI }} pytest -m is_inferentia_test tests/pipelines
diff --git a/.github/workflows/test_inf1_pipelines.yml b/.github/workflows/test_inf1_pipelines.yml
@@ -0,0 +1,43 @@
+name: Optimum neuron / Test INF1 pipelines
+
+on:
+  push:
+    branches: [ main ]
+    paths:
+      - "optimum/neuron/pipelines/**.py"
+  pull_request:
+    branches: [ main ]
+    paths:
+      - "optimum/neuron/pipelines/**.py"
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
+  cancel-in-progress: true
+
+jobs:
+  do-the-job:
+    name: Run INF1 tests
+    runs-on: [self-hosted, 4-aws-inf1, 24-cpu, ci]
+    env:
+      AWS_REGION: us-east-1
+    steps:
+      - name: Check AMI
+        run: dpkg -l | grep neuron
+      - name: Checkout
+        uses: actions/checkout@v2
+      - name: Install system packages
+        run: |
+          sudo apt install python3.8-venv -y
+      - name: Install python packages
+        run: |
+          python3 -m venv aws_neuron_venv_pytorch
+          source aws_neuron_venv_pytorch/bin/activate
+          python -m pip install -U pip
+          python -m pip config set global.extra-index-url https://pip.repos.neuron.amazonaws.com
+          python -m pip install .[neuron,tests]
+          python -m pip uninstall optimum -y
+          python -m pip install optimum
+      - name: Run pipelines tests
+        run: |
+          source aws_neuron_venv_pytorch/bin/activate
+          HF_TOKEN=${{ secrets.HF_TOKEN_OPTIMUM_NEURON_CI }} pytest -m is_inferentia_test tests/pipelines
diff --git a/.github/workflows/test_inf2_export.yml b/.github/workflows/test_inf2_export.yml
@@ -1,4 +1,4 @@
-name: Optimum neuron / Test INF2 export
+name: Optimum neuron / Test INF2 partial export
 
 on:
   push:
@@ -18,7 +18,7 @@ concurrency:
 
 jobs:
   do-the-job:
-    name: Run INF2 tests
+    name: Run INF2 export tests
     runs-on: [self-hosted, 1-aws-inf2, 32-cpu, ci] # run the job on the newly created runner
     env:
       AWS_REGION: us-east-1
@@ -38,4 +38,5 @@ jobs:
       - name: Run exporters tests
         run: |
           source aws_neuron_venv_pytorch/bin/activate
+          export MAX_EXPORT_TEST_COMBINATIONS=1
           HF_TOKEN=${{ secrets.HF_TOKEN_OPTIMUM_NEURON_CI }} pytest -m is_inferentia_test tests/exporters
diff --git a/.github/workflows/test_inf2_full_export.yml b/.github/workflows/test_inf2_full_export.yml
@@ -0,0 +1,39 @@
+name: Optimum neuron / Test INF2 full export
+
+on:
+  push:
+    branches: [ main ]
+    paths:
+      - "optimum/exporters/neuron/*.py"
+  pull_request:
+    branches: [ main ]
+    paths:
+      - "optimum/exporters/neuron/*.py"
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
+  cancel-in-progress: true
+
+jobs:
+  do-the-job:
+    name: Run INF2 full export tests
+    runs-on: [self-hosted, 1-aws-inf2, 32-cpu, ci] # run the job on the newly created runner
+    env:
+      AWS_REGION: us-east-1
+    steps:
+      - name: Check AMI
+        run: dpkg -l | grep neuron
+      - name: Checkout
+        uses: actions/checkout@v2
+      - name: Install python dependencies
+        run: |
+          sudo apt install python3.8-venv -y
+          python3 -m venv aws_neuron_venv_pytorch
+          source aws_neuron_venv_pytorch/bin/activate
+          python -m pip install -U pip
+          python -m pip config set global.extra-index-url https://pip.repos.neuron.amazonaws.com
+          python -m pip install .[neuronx,tests]
+      - name: Run exporters tests
+        run: |
+          source aws_neuron_venv_pytorch/bin/activate
+          HF_TOKEN=${{ secrets.HF_TOKEN_OPTIMUM_NEURON_CI }} pytest -m is_inferentia_test tests/exporters
diff --git a/MANIFEST.in b/MANIFEST.in
@@ -14,3 +14,4 @@
 
 include README.md
 include LICENSE
+include optimum/neuron/utils/neuron_cc_wrapper
diff --git a/optimum/commands/neuron/cache.py b/optimum/commands/neuron/cache.py
@@ -21,8 +21,6 @@
     CACHE_REPO_NAME,
     HF_HOME_CACHE_REPO_FILE,
     create_custom_cache_repo,
-    list_in_registry,
-    load_custom_cache_repo_name_from_hf_home,
     set_custom_cache_repo_name_in_hf_home,
 )
 from ...neuron.utils.runner import ExampleRunner
@@ -163,52 +161,6 @@ def run(self):
         )
 
 
-class ListRepoCommand(BaseOptimumCLICommand):
-    @staticmethod
-    def parse_args(parser: "ArgumentParser"):
-        parser.add_argument(
-            "name",
-            type=str,
-            nargs="?",
-            default=None,
-            help="The name of the repo to list. Will use the locally saved cache repo if left unspecified.",
-        )
-        parser.add_argument(
-            "-m",
-            "--model",
-            type=str,
-            default=None,
-            help="The model name or path of the model to consider. If left unspecified, will list all available models.",
-        )
-        parser.add_argument(
-            "-v",
-            "--version",
-            type=str,
-            default=None,
-            help=(
-                "The version of the Neuron X Compiler to consider. Will list all available versions if left "
-                "unspecified."
-            ),
-        )
-
-    def run(self):
-        if self.args.name is None:
-            custom_cache_repo_name = load_custom_cache_repo_name_from_hf_home()
-            if custom_cache_repo_name is None:
-                raise ValueError("No custom cache repo was set locally so you need to specify a cache repo name.")
-            self.args.name = custom_cache_repo_name
-
-        entries = list_in_registry(
-            self.args.name, model_name_or_path_or_hash=self.args.model, neuron_compiler_version=self.args.version
-        )
-        if not entries:
-            entries = ["Nothing was found."]
-        line = "\n" + "=" * 50 + "\n"
-        result = line.join(entries)
-
-        print(f"\n*** Repo id: {self.args.name} ***\n\n{result}")
-
-
 class SynchronizeRepoCommand(BaseOptimumCLICommand):
     @staticmethod
     def parse_args(parser: "ArgumentParser"):
@@ -226,18 +178,32 @@ def parse_args(parser: "ArgumentParser"):
             type=str,
             help="The model_id to lookup cached versions for.",
         )
+        parser.add_argument(
+            "--mode",
+            type=str,
+            choices=["training", "inference", "all"],
+            default="all",
+            help='The mode you wish to lookup compilation files for. Can be either "training", "inference" or "all"',
+        )
         parser.add_argument("--repo_id", type=str, default=None, help="The name of the repo to use as remote cache.")
 
-    def run(self):
-        entries = get_hub_cached_entries(self.args.model_id, cache_repo_id=self.args.repo_id)
+    def _list_entries(self, mode: str):
+        entries = get_hub_cached_entries(self.args.model_id, mode, cache_repo_id=self.args.repo_id)
         n_entries = len(entries)
-        output = f"\n*** {n_entries} entrie(s) found in cache for {self.args.model_id} ***\n\n"
+        output = f"\n*** {n_entries} entrie(s) found in cache for {self.args.model_id} for {mode}.***\n\n"
         for entry in entries:
             for key, value in entry.items():
                 output += f"\n{key}: {value}"
             output += "\n"
         print(output)
 
+    def run(self):
+        if self.args.mode == "all":
+            self._list_entries("training")
+            self._list_entries("inference")
+        else:
+            self._list_entries(self.args.mode)
+
 
 class CustomCacheRepoCommand(BaseOptimumCLICommand):
     SUBCOMMANDS = (
@@ -256,19 +222,14 @@ class CustomCacheRepoCommand(BaseOptimumCLICommand):
             help="Add a model to the cache of your choice (trainium only).",
             subcommand_class=AddToCacheRepoCommand,
         ),
-        CommandInfo(
-            name="list",
-            help="List models in a cache repo (trainium only).",
-            subcommand_class=ListRepoCommand,
-        ),
         CommandInfo(
             name="synchronize",
-            help="Synchronize the neuronx compiler cache with a hub cache repo (inferentia only).",
+            help="Synchronize the neuronx compiler cache with a hub cache repo.",
             subcommand_class=SynchronizeRepoCommand,
         ),
         CommandInfo(
             name="lookup",
-            help="Lookup the neuronx compiler hub cache for the specified model id (inferentia only).",
+            help="Lookup the neuronx compiler hub cache for the specified model id.",
             subcommand_class=LookupRepoCommand,
         ),
     )
diff --git a/optimum/neuron/modeling_decoder.py b/optimum/neuron/modeling_decoder.py
@@ -151,7 +151,7 @@ def __init__(
         cache_entry = None if checkpoint_id is None else ModelCacheEntry(checkpoint_id, config)
 
         # Export the model using the Optimum Neuron Cache
-        with hub_neuronx_cache(entry=cache_entry):
+        with hub_neuronx_cache("inference", entry=cache_entry):
             available_cores = get_available_cores()
             if num_cores > available_cores:
                 raise ValueError(