Skip to content

Deploy the H100 flash_attention operator #1

Deploy the H100 flash_attention operator

Deploy the H100 flash_attention operator #1

Workflow file for this run

name: TorchBench PR Test
on:
pull_request:
workflow_dispatch:
push:
branches:
- main
jobs:
cpu-test:
uses: ./.github/workflows/_linux-test-cpu.yml
secrets:
HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
cuda-test:
uses: ./.github/workflows/_linux-test-cuda.yml
secrets:
HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }}
cancel-in-progress: true