huggingface · IlyasMoutawwakil · Jun 28, 2024 · May 27, 2024 · May 27, 2024 · May 28, 2024
diff --git a/.github/workflows/check_code_quality.yml b/.github/workflows/check_code_quality.yml
@@ -1,19 +1,11 @@
-name: check_code_quality
+name: Code Quality
 
 on:
   push:
-    branches: [ main ]
-    paths:
-      - "optimum/**.py"
-      - "tests/**.py"
-      - "examples/**.py"
+    branches: [main]
 
   pull_request:
-    branches: [ main ]
-    paths:
-      - "optimum/**.py"
-      - "tests/**.py"
-      - "examples/**.py"
+    branches: [main]
 
 concurrency:
   group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
@@ -29,25 +21,23 @@ jobs:
 
     runs-on: ${{ matrix.os }}
     steps:
-    - uses: actions/checkout@v2
-    - name: Setup Python ${{ matrix.python-version }}
-      uses: actions/setup-python@v2
-      with:
-        python-version: ${{ matrix.python-version }}
-    - name: Create and start a virtual environment
-      run: |
-        python -m venv venv
-        source venv/bin/activate
-    - name: Install dependencies
-      run: |
-        source venv/bin/activate
-        pip install --upgrade pip
-        pip install .[quality]
-    - name: Check style with black
-      run: |
-        source venv/bin/activate
-        black --check .
-    - name: Check style with ruff
-      run: |
-        source venv/bin/activate
-        ruff .
+      - name: Checkout code
+        uses: actions/checkout@v4
+
+      - name: Setup Python ${{ matrix.python-version }}
+        uses: actions/setup-python@v5
+        with:
+          python-version: ${{ matrix.python-version }}
+
+      - name: Install dependencies
+        run: |
+          pip install --upgrade pip
+          pip install .[quality]
+
+      - name: Check style with black
+        run: |
+          black --check .
+
+      - name: Check style with ruff
+        run: |
+          ruff .
diff --git a/.github/workflows/test_gptq.yml b/.github/workflows/test_gptq.yml
@@ -1,29 +1,46 @@
-name: GPTQ Quantization / Test GPU
+name: GPTQ / Python - Test
 
 on:
   workflow_dispatch:
-  schedule:
-    - cron: 0 1 */3 * * # at 1am every 3 days
+  push:
+    branches: [main]
+    paths:
+      - tests/gptq/**
+      - optimum/gptq/**
+      - .github/workflows/test_gptq.yml
   pull_request:
-    types: [opened, synchronize, reopened, labeled]
-  # uncomment to enable on PR merge on main branch:
-  #push:
-  #  branches:
-  #    - main
+    branches: [main]
+    paths:
+      - tests/gptq/**
+      - optimum/gptq/**
+      - .github/workflows/test_gptq.yml
+  schedule:
+    # every day at midnight
+    - cron: "0 0 * * *"
 
 jobs:
-  do-the-job:
-    if: ${{ (github.event_name == 'workflow_dispatch') || (github.event_name == 'schedule') || contains( github.event.pull_request.labels.*.name, 'gpu-test') }}
-    name: Start self-hosted EC2 runner
+  test_gptq:
     runs-on: [single-gpu, nvidia-gpu, t4, ci]
-    env:
-      AWS_REGION: us-east-1
+
     steps:
-      - name: Checkout
-        uses: actions/checkout@v2
-      - name: Build image
-        run: |
-          docker build -f tests/gptq/Dockerfile_quantization_gpu -t gptq-gpu .
-      - name: Test with unittest within docker container
-        run: |
-          docker run --rm --gpus all -v $(pwd)/hf_cache:/root/.cache/huggingface --workdir=/workspace/optimum/tests gptq-gpu:latest
+      - name: Checkout code
+        uses: actions/checkout@v4
+
+      - name: Run tests
+        uses: addnab/docker-run-action@v3
+        with:
+          image: pytorch/pytorch:2.2.2-cuda12.1-cudnn8-runtime
+          # latest auto-gptq was built with pytorch 2.2 and cuda 12.1
+          options: |
+            --rm
+            --gpus all
+            --shm-size 16G
+            --env RUN_SLOW=1
+            --env HF_HOME=/mnt/cache/
+            --volume /mnt/cache/:/mnt/cache/
+            --volume ${{ github.workspace }}:/workspace
+            --workdir /workspace
+          run: |
+            pip install auto-gptq
+            pip install -e .[tests]
+            pytest tests/gptq -s -vvvv --durations=0
diff --git a/optimum/gptq/eval.py b/optimum/gptq/eval.py
@@ -0,0 +1,41 @@
+import torch
+import torch.nn as nn
+from datasets import load_dataset
+from tqdm import tqdm
+
+
+def evaluate_perplexity(model, tokenizer):
+    def _perplexity(nlls, n_samples, seqlen):
+        return torch.exp(torch.stack(nlls).sum() / (n_samples * seqlen))
+
+    # load and prepare dataset
+    data = load_dataset("wikitext", "wikitext-2-raw-v1", split="test")
+    data = tokenizer("\n\n".join(data["text"]), return_tensors="pt")
+    data = data.input_ids.to(model.device)
+
+    seqlen = 512
+    model = model.eval()
+    n_samples = data.numel() // seqlen
+
+    nlls = []
+
+    with tqdm(range(n_samples), desc="Perplexity -") as progress_bar:
+        for i in progress_bar:
+            start_index = i * seqlen
+            end_index = (i + 1) * seqlen
+            batch = data[:, start_index:end_index].to(model.device)
+            with torch.no_grad():
+                logits = model(batch).logits
+            shift_logits = logits[:, :-1, :].contiguous().float()
+            shift_labels = data[:, start_index:end_index][:, 1:]
+            loss_fct = nn.CrossEntropyLoss()
+            loss = loss_fct(shift_logits.view(-1, shift_logits.size(-1)), shift_labels.view(-1))
+            neg_log_likelihood = loss.float() * seqlen
+            nlls.append(neg_log_likelihood)
+
+            curr_ppl = _perplexity(nlls, i + 1, seqlen)
+            progress_bar.set_description(f"Perplexity {curr_ppl:.3f}")
+
+    ppl = _perplexity(nlls, n_samples, seqlen)
+
+    return ppl.item()
diff --git a/tests/gptq/Dockerfile_quantization_gpu b/tests/gptq/Dockerfile_quantization_gpu