-
Notifications
You must be signed in to change notification settings - Fork 280
101 lines (100 loc) · 3.52 KB
/
_linux-benchmark-cuda.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
name: linux-benchmark-cuda
on:
workflow_call:
inputs:
userbenchmark:
required: true
type: string
description: Name of the benchmark
userbenchmark-run-args:
required: true
type: string
description: Userbenchmark run command line arguments
secrets:
HUGGING_FACE_HUB_TOKEN:
required: false
description: |
HF auth token to avoid rate limits when downloading models or datasets from hub
AWS_ACCESS_KEY_ID:
required: true
description: |
AWS access token for S3 uploading
AWS_SECRET_ACCESS_KEY:
required: true
description: |
AWS secret access key for S3 uploading
jobs:
# Run a specific userbenchmark with given arguments
# Need to pass in userbenchmark name and arguments
benchmark:
# Don't run on forked repos
if: github.repository_owner == 'pytorch'
runs-on: [a100-runner]
timeout-minutes: 1440 # 24 hours
environment: docker-s3-upload
env:
BASE_CONDA_ENV: "torchbench"
CONDA_ENV: "userbenchmark"
SETUP_SCRIPT: "/workspace/setup_instance.sh"
HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
steps:
- name: Checkout TorchBench
uses: actions/checkout@v3
with:
path: benchmark
- name: Tune Nvidia GPU
run: |
sudo nvidia-smi -pm 1
sudo nvidia-smi -ac 1215,1410
sudo ldconfig
nvidia-smi
- name: Remove result if it already exists
if: always()
run: |
# remove old results if exists
if [ -d benchmark-output ]; then rm -Rf benchmark-output; fi
pushd benchmark
if [ -d .userbenchmark ]; then rm -Rf .userbenchmark; fi
- name: Clone and setup conda env
run: |
CONDA_ENV=${BASE_CONDA_ENV} . "${SETUP_SCRIPT}"
conda create --name "${CONDA_ENV}" --clone "${BASE_CONDA_ENV}"
- name: Install benchmark
run: |
. "${SETUP_SCRIPT}"
pushd benchmark
python install.py
- name: Run benchmark
run: |
. "${SETUP_SCRIPT}"
pushd benchmark
python run_benchmark.py ${{ inputs.userbenchmark }} ${{ inputs.userbenchmark-run-args }}
- name: Copy benchmark logs
if: always()
run: |
pushd benchmark
cp -r ./.userbenchmark/${{ inputs.userbenchmark }} ../benchmark-output
- name: Upload benchmark result to GH Actions Artifact
uses: actions/upload-artifact@v3
if: always()
with:
name: ${{ inputs.userbenchmark }} benchmarking result
path: benchmark-output/
- name: Copy artifact and upload to Amazon S3
env:
WORKFLOW_RUN_ID: ${{ github.run_id }}
WORKFLOW_RUN_ATTEMPT: ${{ github.run_attempt }}
run: |
. "${SETUP_SCRIPT}"
pushd benchmark
# Upload the result json to Amazon S3
python ./scripts/userbenchmark/upload_s3_csv.py --s3-prefix torchbench-csv --userbenchmark ${{ inputs.userbenchmark }} \
--upload-path ../benchmark-output --match-filename "^${{ inputs.userbenchmark }}.*\.csv"
- name: Clean up Conda env
if: always()
run: |
. "${SETUP_SCRIPT}"
conda deactivate && conda deactivate
conda remove -n "${CONDA_ENV}" --all