Continuous Benchmark: Search on disk #10

Workflow file for this run

.github/workflows/continuous-search-on-disk.yaml at 1a1aba1

	name: "Continuous Benchmark: Search on disk"

	on:
	workflow_dispatch:
	inputs:
	build_vector_db_image:
	required: false
	description: "Build vector DB benchmark image from source branch and use it (false = use prebuilt ghcr image)"
	default: "false"
	container_mem_limit:
	required: false
	description: "Memory limit for the Qdrant container during search (force-eviction regime)"
	default: "384m"
	feature_flags_all:
	type: boolean
	description: "Enable all feature flags on the dev branch"
	default: true
	schedule:
	# Run every day at 1am
	- cron: "0 1 * * *"

	concurrency:
	group: continuous-benchmark

	jobs:
	buildImage:
	name: Build Vector DB Image
	runs-on: ubuntu-latest
	permissions:
	contents: read
	packages: write
	if: ${{ inputs.build_vector_db_image == 'true' }}
	steps:
	- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
	- id: prepare-tag
	shell: bash
	run: \|
	branch_tmp="${{ github.ref_name }}"
	branch=${branch_tmp//\//-}
	tag="ghcr.io/${{ github.repository_owner }}/vector-db-benchmark:${branch}"
	echo "tag=${tag}" >> $GITHUB_OUTPUT
	- uses: docker/setup-buildx-action@d7f5e7f509e45cec5c76c4d5afdd7de93d0b3df5 # v4.1.0
	- uses: docker/login-action@650006c6eb7dba73a995cc03b0b2d7f5ca915bee # v4.2.0
	with:
	registry: ghcr.io
	username: ${{ github.repository_owner }}
	password: ${{ secrets.GITHUB_TOKEN }}
	- uses: docker/build-push-action@f9f3042f7e2789586610d6e8b85c8f03e5195baf # v7.2.0
	with:
	context: .
	push: true
	tags: ${{ steps.prepare-tag.outputs.tag }}
	provenance: false
	cache-from: type=gha
	cache-to: type=gha,mode=max

	runOnDiskSearchBenchmark:
	needs: [buildImage]
	if: ${{ !cancelled() && (needs.buildImage.result == 'success' \|\| needs.buildImage.result == 'skipped') }}
	runs-on: ubuntu-latest
	permissions:
	contents: read
	packages: read
	steps:
	- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
	- uses: webfactory/ssh-agent@e83874834305fe9a4a2997156cb26c5de65a8555 # v0.10.0
	with:
	ssh-private-key: ${{ secrets.SSH_PRIVATE_KEY }}
	- name: Benches
	id: benches
	run: \|
	export HCLOUD_TOKEN=${{ secrets.HCLOUD_TOKEN }}
	export POSTGRES_PASSWORD=${{ secrets.POSTGRES_PASSWORD }}
	export POSTGRES_HOST=${{ secrets.POSTGRES_HOST }}
	export POSTGRES_TABLE="benchmark_on_disk"
	export SERVER_NAME="benchmark-server-3"
	export ENGINE_NAME="qdrant-on-disk-bq-inline-off"
	export SNAPSHOT_URL="https://storage.googleapis.com/qdrant-benchmark-snapshots/on-disk-search/snapshots/ondisk_global-inline-off.snapshot"
	export CONTAINER_MEM_LIMIT="${{ inputs.container_mem_limit \|\| '384m' }}"

	if [ "${{ inputs.build_vector_db_image }}" = "true" ]; then
	branch_tmp="${{ github.ref_name }}"
	branch=${branch_tmp//\//-}
	export VECTOR_DB_BENCHMARK_IMAGE="ghcr.io/${{ github.repository_owner }}/vector-db-benchmark:${branch}"
	export GHCR_USERNAME="${{ github.repository_owner }}"
	export GHCR_PASSWORD="${{ secrets.GITHUB_TOKEN }}"
	fi

	bash -x tools/setup_ci.sh

	DATASETS_LIST=(
	random-768-2M-on-disk-bq-keyword-narrow-filter random-768-2M-on-disk-bq-keyword-wide-filter
	random-768-2M-on-disk-bq-int-narrow-filter random-768-2M-on-disk-bq-int-wide-filter
	random-768-2M-on-disk-bq-float-narrow-filter random-768-2M-on-disk-bq-float-wide-filter
	random-768-2M-on-disk-bq-bool-wide-filter
	random-768-2M-on-disk-bq-uuid-narrow-filter
	random-768-2M-on-disk-bq-geo-narrow-filter random-768-2M-on-disk-bq-geo-wide-filter
	random-768-2M-on-disk-bq-text-narrow-filter random-768-2M-on-disk-bq-text-wide-filter
	random-768-2M-on-disk-bq-datetime-narrow-filter random-768-2M-on-disk-bq-datetime-wide-filter
	)
	FF_DEV="${{ inputs.feature_flags_all }}"
	FF_DEV="${FF_DEV:-true}"
	VERSION_PAIRS=("ghcr/dev\|${FF_DEV}" "docker/master\|false")
	SNAPSHOT_CONTAINER_NAME="qdrant-continuous-benchmarks-snapshot"

	set +e
	FAILED=0

	# Restore the snapshot once with the first version, then swap qdrant
	# images on the same volume per (dataset, version). Same-dataset
	# dev/master pairs land ~30 s apart.
	INITIAL="${VERSION_PAIRS[0]}"
	export QDRANT_VERSION="${INITIAL%%\|*}"
	export QDRANT__FEATURE_FLAGS__ALL="${INITIAL##*\|}"
	CURRENT_VERSION="$QDRANT_VERSION"
	CURRENT_FF="$QDRANT__FEATURE_FLAGS__ALL"

	echo "===== prepare $ENGINE_NAME @ $QDRANT_VERSION ====="
	export BENCHMARK_STRATEGY="search-on-disk-prepare"
	timeout 30m bash -x tools/run_remote_benchmark.sh
	prepare_rc=$?
	if [ "$prepare_rc" -ne 0 ]; then
	echo "::error::prepare failed (rc=$prepare_rc); aborting"
	exit 1
	fi

	export BENCHMARK_STRATEGY="search-on-disk-search"
	for DATASET in "${DATASETS_LIST[@]}"; do
	export DATASETS="$DATASET"
	for VERSION_PAIR in "${VERSION_PAIRS[@]}"; do
	export QDRANT_VERSION="${VERSION_PAIR%%\|*}"
	export QDRANT__FEATURE_FLAGS__ALL="${VERSION_PAIR##*\|}"

	if [[ "$QDRANT_VERSION" != "$CURRENT_VERSION" \|\| "$QDRANT__FEATURE_FLAGS__ALL" != "$CURRENT_FF" ]]; then
	echo "===== swap qdrant image $CURRENT_VERSION -> $QDRANT_VERSION ====="
	timeout 15m bash -x tools/run_server_container_with_volume.sh "$SNAPSHOT_CONTAINER_NAME" "$CONTAINER_MEM_LIMIT" "continue"
	swap_rc=$?
	if [ "$swap_rc" -ne 0 ]; then
	echo "::error::image swap failed (rc=$swap_rc); aborting remaining cells"
	FAILED=$((FAILED + 1))
	break 2
	fi
	CURRENT_VERSION="$QDRANT_VERSION"
	CURRENT_FF="$QDRANT__FEATURE_FLAGS__ALL"
	fi

	echo "===== search $DATASETS @ $QDRANT_VERSION ====="
	timeout 60m bash -x tools/run_ci.sh \|\| FAILED=$((FAILED + 1))
	done
	done

	set -e
	if [ "$FAILED" -gt 0 ]; then
	echo "::warning::$FAILED cell run(s) failed"
	exit 1
	fi
	- name: Fail job if any of the benches failed
	if: steps.benches.outputs.failed == 'error' \|\| steps.benches.outputs.failed == 'timeout'
	run: exit 1
	- name: Send slack message
	uses: ./.github/workflows/actions/send-slack-msg
	if: failure() \|\| cancelled()
	with:
	bench_name: "runOnDiskSearchBenchmark"
	job_status: ${{ job.status }}
	failed_outputs: ${{ steps.benches.outputs.failed }}
	qdrant_version: ${{ steps.benches.outputs.qdrant_version }}
	engine_name: ${{ steps.benches.outputs.engine_name }}
	dataset: ${{ steps.benches.outputs.dataset }}
	env:
	SLACK_WEBHOOK_URL: ${{ secrets.CI_ALERTS_CHANNEL_WEBHOOK_URL }}
	SLACK_WEBHOOK_TYPE: INCOMING_WEBHOOK

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Continuous Benchmark: Search on disk #10

Workflow file

Continuous Benchmark: Search on disk #10

Uh oh!

Workflow file for this run