ci: update llm cache files

huggingface · Sep 26, 2024 · 7a8e99c · 7a8e99c
1 parent c3eed64
commit 7a8e99c
Show file tree

Hide file tree

Showing 4 changed files with 6 additions and 94 deletions.
diff --git a/.github/workflows/inference_cache_llm.yml b/.github/workflows/inference_cache_llm.yml
@@ -21,9 +21,9 @@ jobs:
       matrix:
         config: [
           gpt2,
-          llama3-8b,
+          llama,
+          llama3.1-70b,
           llama3-70b,
-          llama2-7b-13b,
           llama2-70b,
           mistral,
           llama-variants,

diff --git a/benchmark/text-generation/llama2-7b.py b/benchmark/text-generation/llama2-7b.py
diff --git a/benchmark/text-generation/llama2-13b.py → benchmark/text-generation/mistral_small.py b/benchmark/text-generation/llama2-13b.py → benchmark/text-generation/mistral_small.py
@@ -8,16 +8,14 @@
 
 
 def main():
-    NUM_CORES = 8
+    NUM_CORES = 12
     num_cores = get_available_cores()
     if num_cores < NUM_CORES:
         raise ValueError(f"This benchmark can only run on an instance with at least {NUM_CORES} cores.")
 
     model_configurations = {
-        "Llama-2-13B-BS1": ["meta-llama/Llama-2-13b-chat-hf", 1, 4096],
-        "Llama-2-13B-BS4": ["meta-llama/Llama-2-13b-chat-hf", 4, 4096],
-        "Llama-2-13B-BS8": ["meta-llama/Llama-2-13b-chat-hf", 8, 4096],
-        "Llama-2-13B-BS16": ["meta-llama/Llama-2-13b-chat-hf", 16, 4096],
+        "Mistral-Small-2409-BS1": ["mistralai/Mistral-Small-Instruct-2409", 1, 4096],
+        "Mistral-Small-2409-BS4": ["mistralai/Mistral-Small-Instruct-2409", 4, 4096],
     }
 
     for model_name, model_configuration in model_configurations.items():
@@ -27,7 +25,7 @@ def main():
             export=True,
             batch_size=batch_size,
             sequence_length=seq_length,
-            auto_cast_type="fp16",
+            auto_cast_type="bf16",
             num_cores=NUM_CORES,
         )
         with TemporaryDirectory() as tmpdir:

diff --git a/benchmark/text-generation/mistralv2.py b/benchmark/text-generation/mistralv2.py