Merge branch 'main' into add-tp-support-t5

huggingface · Oct 15, 2024 · ef9ba9c · ef9ba9c
2 parents ac7ad43 + 0ea7285
commit ef9ba9c
Show file tree

Hide file tree

Showing 65 changed files with 842 additions and 4,831 deletions.
diff --git a/.github/workflows/stale.yaml b/.github/workflows/stale.yaml
@@ -1,27 +1,24 @@
-name: Stale Bot
-
+name: 'Close stale issues and PRs'
 on:
   schedule:
-    - cron: "0 8 * * *"
+    - cron: '0 8 * * *'
+  workflow_dispatch:
+
+permissions:
+  issues: write
+  pull-requests: write
 
 jobs:
-  close_stale_issues:
-    name: Close Stale Issues
-    if: github.repository == 'huggingface/optimum-neuron'
-    runs-on: ubuntu-22.04
-    env:
-      COMMENT_BOT_TOKEN: ${{ secrets.COMMENT_BOT_TOKEN }}
+  stale:
+    runs-on: ubuntu-latest
     steps:
-    - uses: actions/checkout@v3
-
-    - name: Setup Python
-      uses: actions/setup-python@v4
-      with:
-        python-version: 3.8
-
-    - name: Install requirements
-      run: |
-        pip install PyGithub
-    - name: Close stale issues
-      run: |
-        python tools/stale.py
+      - uses: actions/stale@v9
+        with:
+          stale-issue-message: 'This issue is stale because it has been open 30 days with no activity. Remove stale label or comment or this will be closed in 5 days.'
+          stale-pr-message: 'This PR is stale because it has been open 15 days with no activity. Remove stale label or comment or this will be closed in 5 days.'
+          close-issue-message: 'This issue was closed because it has been stalled for 5 days with no activity.'
+          close-pr-message: 'This PR was closed because it has been stalled for 5 days with no activity.'
+          days-before-issue-stale: 30
+          days-before-pr-stale: 15
+          days-before-issue-close: 5
+          days-before-pr-close: 5
diff --git a/benchmark/text-generation-inference/accuracy/README.md b/benchmark/text-generation-inference/accuracy/README.md
@@ -0,0 +1,14 @@
+# Evaluate LLM on several benchmarks
+
+NeuronX TGI supports the OpenAI API, which allows to evaluate neuron models using [lm-harness](https://github.com/EleutherAI/lm-evaluation-harness).
+
+Please refer to [lm-harness](https://github.com/EleutherAI/lm-evaluation-harness) documentation for installation instructions and benchmark comnfiguration.
+
+## Some results
+
+### meta-llama/Meta-Llama-3.1-8B-Instruct
+
+Tasks|Version|     Filter     |n-shot|  Metric   |   |Value |   |Stderr|
+|-----|------:|----------------|-----:|-----------|---|-----:|---|-----:|
+|gsm8k|      3|flexible-extract|     5|exact_match|↑  |0.7885|±  |0.0112|
+|     |       |strict-match    |     5|exact_match|↑  |0.0425|±  |0.0056|
diff --git a/benchmark/text-generation-inference/accuracy/evaluate.sh b/benchmark/text-generation-inference/accuracy/evaluate.sh
@@ -0,0 +1,12 @@
+model_id=${1:-meta-llama/Meta-Llama-3.1-8B-Instruct}
+tasks=${3:-gsm8k}
+batch_size=${2:-1}
+
+export HF_TOKEN=$(cat ~/.cache/huggingface/token)
+
+base_url="http://127.0.0.1:8080/v1/chat/completions"
+
+lm_eval --model local-chat-completions \
+        --tasks ${tasks} \
+        --model_args model=${model_id},base_url=${base_url},num_concurrent=${batch_size},max_retries=3,tokenized_requests=False \
+        --apply_chat_template
diff --git a/benchmark/text-generation-inference/benchmark.sh b/benchmark/text-generation-inference/benchmark.sh
diff --git a/benchmark/text-generation-inference/generate_csv.py b/benchmark/text-generation-inference/generate_csv.py
diff --git a/benchmark/text-generation-inference/llama-7b/.env b/benchmark/text-generation-inference/llama-7b/.env
diff --git a/benchmark/text-generation-inference/llama-7b/docker-compose.yaml b/benchmark/text-generation-inference/llama-7b/docker-compose.yaml
diff --git a/benchmark/text-generation-inference/llama-7b/nginx.conf b/benchmark/text-generation-inference/llama-7b/nginx.conf
diff --git a/benchmark/text-generation-inference/llama-7b/tgi-results.csv b/benchmark/text-generation-inference/llama-7b/tgi-results.csv
diff --git a/benchmark/text-generation-inference/llama3-8b/.env b/benchmark/text-generation-inference/llama3-8b/.env
diff --git a/benchmark/text-generation-inference/llama3-8b/tgi-results.csv b/benchmark/text-generation-inference/llama3-8b/tgi-results.csv
diff --git a/benchmark/text-generation-inference/mistral-7b/.env b/benchmark/text-generation-inference/mistral-7b/.env
diff --git a/benchmark/text-generation-inference/mistral-7b/docker-compose.yaml b/benchmark/text-generation-inference/mistral-7b/docker-compose.yaml
diff --git a/benchmark/text-generation-inference/mistral-7b/nginx.conf b/benchmark/text-generation-inference/mistral-7b/nginx.conf
diff --git a/benchmark/text-generation-inference/mistral-7b/tgi-results.csv b/benchmark/text-generation-inference/mistral-7b/tgi-results.csv