diff --git a/.ci/tritonbench/install-triton-nightly.sh b/.ci/tritonbench/install-triton-nightly.sh new file mode 100644 index 000000000..4d79004f3 --- /dev/null +++ b/.ci/tritonbench/install-triton-nightly.sh @@ -0,0 +1,26 @@ +#!/bin/bash +if [ -z "${BASE_CONDA_ENV}" ]; then + echo "ERROR: BASE_CONDA_ENV is not set" + exit 1 +fi + +if [ -z "${CONDA_ENV}" ]; then + echo "ERROR: CONDA_ENV is not set" + exit 1 +fi + +if [ -z "${SETUP_SCRIPT}" ]; then + echo "ERROR: SETUP_SCRIPT is not set" + exit 1 +fi + +CONDA_ENV=${BASE_CONDA_ENV} . "${SETUP_SCRIPT}" +conda activate "${BASE_CONDA_ENV}" +# Remove the conda env if exists +conda remove --name "${CONDA_ENV}" -y --all || true +conda create --name "${CONDA_ENV}" -y --clone "${BASE_CONDA_ENV}" +conda activate "${CONDA_ENV}" + +. "${SETUP_SCRIPT}" +# Install the nightly openai/triton +pip install -U --index-url https://aiinfra.pkgs.visualstudio.com/PublicPackages/_packaging/Triton-Nightly/pypi/simple/ triton-nightly diff --git a/.ci/tritonbench/test.sh b/.ci/tritonbench/test-install.sh similarity index 87% rename from .ci/tritonbench/test.sh rename to .ci/tritonbench/test-install.sh index 34604aae4..383f7d4cd 100644 --- a/.ci/tritonbench/test.sh +++ b/.ci/tritonbench/test-install.sh @@ -8,5 +8,5 @@ fi parent_dir=$(dirname "$(readlink -f "$0")")/../.. cd ${parent_dir} -# Test TritonBench +# Test TritonBench installation python install.py --userbenchmark triton --fbgemm --test diff --git a/.ci/tritonbench/test-operators.sh b/.ci/tritonbench/test-operators.sh new file mode 100644 index 000000000..40af2f18f --- /dev/null +++ b/.ci/tritonbench/test-operators.sh @@ -0,0 +1,28 @@ +#!/bin/bash +set -x + +if [ -z "${SETUP_SCRIPT}" ]; then + echo "ERROR: SETUP_SCRIPT is not set" + exit 1 +fi + +. "${SETUP_SCRIPT}" + +# Test Tritonbench operators +# TODO: test every operator, fwd+bwd +python run_benchmark.py triton --op launch_latency --mode fwd --num-inputs 1 --test-only +python run_benchmark.py triton --op addmm --mode fwd --num-inputs 1 --test-only +python run_benchmark.py triton --op gemm --mode fwd --num-inputs 1 --test-only +python run_benchmark.py triton --op sum --mode fwd --num-inputs 1 --test-only +python run_benchmark.py triton --op softmax --mode fwd --num-inputs 1 --test-only +python run_benchmark.py triton --op layer_norm --mode fwd --num-inputs 1 --test-only + + +# Segfault +# python run_benchmark.py triton --op flash_attention --mode fwd --num-inputs 1 --test-only + +# CUDA OOM +# python run_benchmark.py triton --op jagged_layer_norm --mode fwd --num-inputs 1 --test-only +# python run_benchmark.py triton --op jagged_mean --mode fwd --num-inputs 1 --test-only +# python run_benchmark.py triton --op jagged_softmax --mode fwd --num-inputs 1 --test-only +# python run_benchmark.py triton --op jagged_sum --mode fwd --num-inputs 1 --test-only diff --git a/.github/workflows/pr-test.yml b/.github/workflows/pr-test.yml index c9bdb3131..815506b7c 100644 --- a/.github/workflows/pr-test.yml +++ b/.github/workflows/pr-test.yml @@ -3,18 +3,22 @@ on: pull_request: # ignore tritonbench paths paths-ignore: - - 'torchbenchmark/operators' + - 'torchbenchmark/operators/*' + - 'torchbenchmark/util/kernels/*' - 'torchbenchmark/util/triton_op.py' - - 'userbenchmark/triton' + - 'userbenchmark/triton/*' + - '.ci/tritonbench/*' workflow_dispatch: push: branches: - main # ignore tritonbench paths paths-ignore: - - 'torchbenchmark/operators' + - 'torchbenchmark/operators/*' + - 'torchbenchmark/util/kernels/*' - 'torchbenchmark/util/triton_op.py' - - 'userbenchmark/triton' + - 'userbenchmark/triton/*' + - '.ci/tritonbench/*' jobs: cpu-test: diff --git a/.github/workflows/tritonbench-test.yml b/.github/workflows/tritonbench-test.yml new file mode 100644 index 000000000..7e58ccf39 --- /dev/null +++ b/.github/workflows/tritonbench-test.yml @@ -0,0 +1,63 @@ +name: Tritonbench PR Test on Triton nightly +on: + pull_request: + paths: + - 'torchbenchmark/operators/*' + - 'torchbenchmark/util/kernels/*' + - 'torchbenchmark/util/triton_op.py' + - 'userbenchmark/triton/*' + - '.ci/tritonbench/*' + workflow_dispatch: + push: + branches: + - main + paths: + - 'torchbenchmark/operators/*' + - 'torchbenchmark/util/kernels/*' + - 'torchbenchmark/util/triton_op.py' + - 'userbenchmark/triton/*' + - '.ci/tritonbench/*' + +jobs: + cuda-test: + # Don't run on forked repos + if: github.repository_owner == 'pytorch' + runs-on: [a100-runner] + timeout-minutes: 240 + environment: docker-s3-upload + env: + BASE_CONDA_ENV: "torchbench" + CONDA_ENV: "tritonbench-pr-test-cuda" + SETUP_SCRIPT: "/workspace/setup_instance.sh" + TEST_CONFIG: "cuda" + HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }} + steps: + - name: Checkout TorchBench + uses: actions/checkout@v3 + with: + submodules: 'true' + - name: Tune Nvidia GPU + run: | + sudo nvidia-smi -pm 1 + sudo nvidia-smi -ac 1215,1410 + sudo ldconfig + nvidia-smi + - name: Install triton-nightly + run: | + bash ./.ci/tritonbench/install-triton-nightly.sh + - name: Test Tritonbench install + run: | + bash ./.ci/tritonbench/test-install.sh + - name: Test Tritonbench operators + run: | + bash ./.ci/tritonbench/test-operators.sh + - name: Clean up Conda env + if: always() + run: | + . "${SETUP_SCRIPT}" + conda deactivate && conda deactivate + conda remove -n "${CONDA_ENV}" --all + +concurrency: + group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }} + cancel-in-progress: true