Haystack 1.x Benchmarks #14

Workflow file for this run

.github/workflows/benchmarks.yml at a7209f6

	name: Haystack 1.x Benchmarks

	on:
	workflow_dispatch:
	schedule:
	# At 00:01 on Sunday
	- cron: "1 0 * * 0"

	permissions:
	id-token: write
	contents: read

	env:
	AWS_REGION: eu-central-1

	jobs:
	deploy-runner:
	runs-on: ubuntu-latest
	outputs:
	cml_runner_id: ${{ steps.deploy.outputs.cml_runner_id }}
	steps:
	- uses: actions/checkout@v4

	- uses: iterative/setup-cml@v2

	- name: AWS authentication
	uses: aws-actions/configure-aws-credentials@e3dd6a429d7300a6a4c196c26e071d42e0343502
	with:
	aws-region: ${{ env.AWS_REGION }}
	role-to-assume: ${{ secrets.AWS_CI_ROLE_ARN }}

	- name: Launch EC2 instance and deploy runner
	id: deploy
	env:
	repo_token: ${{ secrets.HAYSTACK_BOT_TOKEN }}
	run: \|
	OUTPUT=$(cml runner launch \
	--cloud aws \
	--cloud-region ${{ env.AWS_REGION }} \
	--cloud-type=p3.2xlarge \
	--cloud-hdd-size=64 \
	--labels=cml 2>&1 \| tee /dev/fd/2)
	# Extract 'id' from the log and set it as an environment variable
	ID_VALUE=$(echo "$OUTPUT" \| jq -r '.message? \| fromjson? \| select(.id != null) \| .id // empty')
	echo "cml_runner_id=$ID_VALUE" >> "$GITHUB_OUTPUT"

	run-reader-benchmarks:
	needs: deploy-runner
	runs-on: [self-hosted, cml]
	container:
	image: docker://iterativeai/cml:0-dvc2-base1-gpu
	options: --gpus all
	timeout-minutes: 2880

	steps:
	- uses: actions/checkout@v4
	with:
	ref: v1.x

	- name: Install Haystack + Datadog requirements
	run: \|
	pip install .[metrics,benchmarks,inference]
	pip install -r test/benchmarks/datadog/requirements.txt

	- name: Run benchmarks
	working-directory: test/benchmarks
	run: \|
	mkdir +p out
	for f in ./configs/reader/*.yml; do
	name="${f%.*}"
	echo "=== Running benchmarks for $name ===";
	config_name="$(basename "$name")"
	python run.py --output "out/$config_name.json" "$f";
	echo "=== Benchmarks done for $name (or failed) ===";
	done

	- name: Send Benchmark results to Datadog
	working-directory: test/benchmarks
	run: \|
	python datadog/send_metrics.py out/ ${{ secrets.CORE_DATADOG_API_KEY }} https://api.datadoghq.eu

	- name: Archive benchmark results
	uses: actions/upload-artifact@v4
	with:
	name: benchmark-results-reader
	path: test/benchmarks/out/

	run-elasticsearch-benchmarks:
	needs:
	- deploy-runner
	- run-reader-benchmarks
	runs-on: [self-hosted, cml]
	container:
	image: docker://iterativeai/cml:0-dvc2-base1-gpu
	options: --gpus all
	services:
	elasticsearch:
	image: elasticsearch:7.17.6
	env:
	discovery.type: "single-node"
	ports:
	- 9201:9200
	timeout-minutes: 2880

	steps:
	- uses: actions/checkout@v4
	with:
	ref: v1.x

	- name: Install Haystack + Datadog requirements
	run: \|
	pip install .[metrics,elasticsearch,benchmarks,inference]
	pip install -r test/benchmarks/datadog/requirements.txt

	- name: Run benchmarks
	working-directory: test/benchmarks
	run: \|
	mkdir +p out
	for f in ./configs/*/-elasticsearch-*.yml; do
	name="${f%.*}"
	echo "=== Running benchmarks for $name ===";
	config_name="$(basename "$name")"
	python run.py --output "out/$config_name.json" "$f";
	echo "=== Benchmarks done for $name (or failed) ===";
	done

	- name: Send Benchmark results to Datadog
	working-directory: test/benchmarks
	run: \|
	python datadog/send_metrics.py out/ ${{ secrets.CORE_DATADOG_API_KEY }} https://api.datadoghq.eu

	- name: Archive benchmark results
	uses: actions/upload-artifact@v4
	with:
	name: benchmark-results-elasticsearch
	path: test/benchmarks/out/

	run-weaviate-benchmarks:
	needs:
	- deploy-runner
	- run-elasticsearch-benchmarks
	runs-on: [self-hosted, cml]
	container:
	image: docker://iterativeai/cml:0-dvc2-base1-gpu
	options: --gpus all
	services:
	weaviate:
	image: semitechnologies/weaviate:1.17.2
	env:
	AUTHENTICATION_ANONYMOUS_ACCESS_ENABLED: "true"
	PERSISTENCE_DATA_PATH: "/var/lib/weaviate"
	ports:
	- 8080:8080
	timeout-minutes: 2880

	steps:
	- uses: actions/checkout@v4
	with:
	ref: v1.x

	- name: Install Haystack + Datadog requirements
	run: \|
	pip install .[metrics,weaviate,benchmarks,inference]
	pip install -r test/benchmarks/datadog/requirements.txt

	- name: Run benchmarks
	working-directory: test/benchmarks
	run: \|
	mkdir +p out
	for f in ./configs/*/-weaviate-*.yml; do
	name="${f%.*}"
	echo "=== Running benchmarks for $name ===";
	config_name="$(basename "$name")"
	python run.py --output "out/$config_name.json" "$f";
	echo "=== Benchmarks done for $name (or failed) ===";
	done

	- name: Send Benchmark results to Datadog
	working-directory: test/benchmarks
	run: \|
	python datadog/send_metrics.py out/ ${{ secrets.CORE_DATADOG_API_KEY }} https://api.datadoghq.eu

	- name: Archive benchmark results
	uses: actions/upload-artifact@v4
	with:
	name: benchmark-results-weaviate
	path: test/benchmarks/out/

	run-opensearch-benchmarks:
	needs:
	- deploy-runner
	- run-weaviate-benchmarks
	runs-on: [self-hosted, cml]
	container:
	image: docker://iterativeai/cml:0-dvc2-base1-gpu
	options: --gpus all
	services:
	opensearch:
	image: opensearchproject/opensearch:1.3.5
	env:
	discovery.type: "single-node"
	OPENSEARCH_JAVA_OPTS: "-Xms4096m -Xmx4096m"
	ports:
	- 9200:9200
	timeout-minutes: 2880

	steps:
	- uses: actions/checkout@v4
	with:
	ref: v1.x

	- name: Install Haystack + Datadog requirements
	run: \|
	pip install .[metrics,opensearch,benchmarks,inference]
	pip install -r test/benchmarks/datadog/requirements.txt

	- name: Run benchmarks
	working-directory: test/benchmarks
	run: \|
	mkdir +p out
	for f in ./configs/*/-opensearch-*.yml; do
	name="${f%.*}"
	echo "=== Running benchmarks for $name ===";
	config_name="$(basename "$name")"
	python run.py --output "out/$config_name.json" "$f";
	echo "=== Benchmarks done for $name (or failed) ===";
	done

	- name: Send Benchmark results to Datadog
	working-directory: test/benchmarks
	run: \|
	python datadog/send_metrics.py out/ ${{ secrets.CORE_DATADOG_API_KEY }} https://api.datadoghq.eu

	- name: Archive benchmark results
	uses: actions/upload-artifact@v4
	with:
	name: benchmark-results-opensearch
	path: test/benchmarks/out/

	terminate-runner:
	if: always()
	needs:
	- deploy-runner
	- run-opensearch-benchmarks
	runs-on: ubuntu-latest
	steps:
	- name: AWS authentication
	uses: aws-actions/configure-aws-credentials@e3dd6a429d7300a6a4c196c26e071d42e0343502
	with:
	aws-region: ${{ env.AWS_REGION }}
	role-to-assume: ${{ secrets.AWS_CI_ROLE_ARN }}

	- name: Terminate EC2 instance
	env:
	CML_RUNNER_ID: ${{needs.deploy-runner.outputs.cml_runner_id}}
	run: \|
	# Get the instance ID using its Name tag and terminate the instance
	INSTANCE_ID=$(aws ec2 describe-instances --filters "Name=tag:Name,Values=${{ env.CML_RUNNER_ID }}" --query "Reservations[].Instances[].[InstanceId]" --output text)
	aws ec2 terminate-instances --instance-ids "$INSTANCE_ID"

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Haystack 1.x Benchmarks #14

Workflow file

Haystack 1.x Benchmarks #14

Jobs

Run details

Workflow file for this run