TorchBench Userbenchmark on A100 #74

Workflow file for this run

.github/workflows/userbenchmark-a100.yml at fdd7def

	name: TorchBench Userbenchmark on A100
	on:
	schedule:
	- cron: '00 18 * * *' # run at 6:00 PM UTC, K8s containers will roll out at 12PM EST
	workflow_dispatch:
	inputs:
	userbenchmark_name:
	description: "Name of the user benchmark to run"
	userbenchmark_options:
	description: "Option of the user benchmark to run"

	jobs:
	run-userbenchmark:
	runs-on: [a100-runner]
	timeout-minutes: 1440 # 24 hours
	environment: docker-s3-upload
	env:
	BASE_CONDA_ENV: "torchbench"
	CONDA_ENV: "userbenchmark-a100"
	PLATFORM_NAME: "gcp_a100"
	TORCHBENCH_USERBENCHMARK_SCRIBE_GRAPHQL_ACCESS_TOKEN: ${{ secrets.TORCHBENCH_USERBENCHMARK_SCRIBE_GRAPHQL_ACCESS_TOKEN }}
	AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
	AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
	SETUP_SCRIPT: "/workspace/setup_instance.sh"
	steps:
	- name: Checkout TorchBench
	uses: actions/checkout@v3
	with:
	path: benchmark
	- name: Tune Nvidia GPU
	run: \|
	sudo nvidia-smi -pm 1
	sudo nvidia-smi -ac 1215,1410
	nvidia-smi
	- name: Clone and setup conda env
	run: \|
	CONDA_ENV=${BASE_CONDA_ENV} . "${SETUP_SCRIPT}"
	conda create --name "${CONDA_ENV}" --clone "${BASE_CONDA_ENV}"
	- name: Install TorchBench
	run: \|
	set -x
	. "${SETUP_SCRIPT}"
	pushd benchmark
	python install.py
	- name: Run user benchmark
	run: \|
	set -x
	. "${SETUP_SCRIPT}"
	# remove old results
	if [ -d benchmark-output ]; then rm -Rf benchmark-output; fi
	pushd benchmark
	if [ -d .userbenchmark ]; then rm -Rf .userbenchmark; fi
	MANUAL_WORKFLOW="${{ github.event.inputs.userbenchmark_name }}"
	if [ -z "${MANUAL_WORKFLOW}" ]; then
	# Figure out what userbenchmarks we should run, and run it
	python ./.github/scripts/userbenchmark/schedule-benchmarks.py --platform ${PLATFORM_NAME}
	if [ -d ./.userbenchmark ]; then
	cp -r ./.userbenchmark ../benchmark-output
	else
	mkdir ../benchmark-output
	fi
	else
	python run_benchmark.py "${{ github.event.inputs.userbenchmark_name }}" ${{ github.event.inputs.userbenchmark_options }}
	cp -r ./.userbenchmark/"${{ github.event.inputs.userbenchmark_name }}" ../benchmark-output
	fi
	- name: Upload artifact
	uses: actions/upload-artifact@v3
	with:
	name: TorchBench result
	path: benchmark-output/
	- name: Upload result jsons to Scribe and S3
	run: \|
	. "${SETUP_SCRIPT}"
	pushd benchmark
	RESULTS=($(find ${PWD}/../benchmark-output -name "metrics-*.json" -maxdepth 2 \| sort -r))
	echo "Uploading result jsons: ${RESULTS}"
	for r in ${RESULTS[@]}; do
	python ./scripts/userbenchmark/upload_scribe.py --userbenchmark_json "${r}" --userbenchmark_platform "${PLATFORM_NAME}"
	python ./scripts/userbenchmark/upload_s3.py --upload-file "${r}" --userbenchmark_platform "${PLATFORM_NAME}"
	done
	- name: Clean up Conda env
	if: always()
	run: \|
	. "${SETUP_SCRIPT}"
	conda deactivate && conda deactivate
	conda remove -n "${CONDA_ENV}" --all

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

TorchBench Userbenchmark on A100 #74

Workflow file

TorchBench Userbenchmark on A100 #74

Jobs

Run details

Workflow file for this run