add: `agent-shell` directory to `.gitignore` (#10381) #17

Workflow file for this run

.github/workflows/zfnd-deploy-nodes-gcp.yml at 3881ba4

	# Google Cloud node deployments and tests that run when Rust code or dependencies are modified,
	# but only on PRs from the ZcashFoundation/zebra repository.
	# (External PRs are tested/deployed by GitHub's Merge Queue.)
	#
	# 1. `versioning`: Extracts the major version from the release semver. Useful for segregating instances based on major versions.
	# 2. `build`: Builds a Docker image named `zebrad` with the necessary tags derived from Git.
	# 3. `test-docker-configurations`: Validates all Zebra Docker configurations by running a matrix of configuration tests.
	# 6. `deploy-nodes`: Deploys Managed Instance Groups (MiGs) with 2-3 instances (1 per zone) for Mainnet and Testnet.
	# - Stateful disks preserve state across updates (rolling updates with health checks)
	# - Instance count matches available zones (up to 3), with 1 instance per zone
	# - Main and release instances get static IPs (manual deployments get ephemeral IPs)
	# - If triggered by main branch pushes, it always replaces the MIG. For releases, MIGs are replaced only if deploying the same major version; otherwise, a new major version is deployed.
	# 7. `deploy-instance`: Deploys a single node in a specified GCP zone for testing specific commits. Instances from this job aren't auto-replaced or deleted.
	name: Deploy Nodes to GCP

	# Ensures that only one workflow task will run at a time. Previous deployments, if
	# already in process, won't get cancelled. Instead, we let the first to complete
	# then queue the latest pending workflow, cancelling any workflows in between.
	#
	# Since the different event types each use a different Managed Instance Group or instance,
	# we can run different event types concurrently.
	#
	# For pull requests, we only run the tests from this workflow, and don't do any deployments.
	# So an in-progress pull request gets cancelled, just like other tests.
	concurrency:
	group: ${{ github.workflow }}-${{ github.event_name }}-${{ github.ref }}
	cancel-in-progress: ${{ github.event_name == 'pull_request' }}

	on:

	workflow_dispatch:
	inputs:
	# Deployment configuration
	network:
	description: "Network to deploy: Mainnet or Testnet"
	required: true
	type: choice
	default: Mainnet
	options:
	- Mainnet
	- Testnet
	environment:
	description: "Environment to deploy to"
	required: true
	type: choice
	default: dev
	options:
	- dev
	- prod
	# Disk configuration
	need_cached_disk:
	description: Use a cached state disk
	type: boolean
	default: true
	cached_disk_type:
	description: Type of cached disk to use
	required: true
	type: choice
	default: tip
	options:
	- tip
	- checkpoint
	# Build configuration
	no_cache:
	description: Disable the Docker cache for this build
	type: boolean
	default: false
	# Logging configuration
	log_file:
	description: Log to a file path rather than standard output
	default: ""

	push:
	# Skip main branch updates where Rust code and dependencies aren't modified.
	branches:
	- main
	paths:
	# code and tests
	- "*/.rs"
	# hard-coded checkpoints and proptest regressions
	- "*/.txt"
	# dependencies
	- "**/Cargo.toml"
	- "**/Cargo.lock"
	# configuration files
	- .cargo/config.toml
	- "**/clippy.toml"
	# workflow definitions
	- docker/**
	- .dockerignore
	- .github/workflows/zfnd-deploy-nodes-gcp.yml
	- .github/workflows/zfnd-build-docker-image.yml

	# Only runs the Docker image tests, doesn't deploy any instances
	pull_request:
	# Skip PRs where Rust code and dependencies aren't modified.
	paths:
	# code and tests
	- "*/.rs"
	# hard-coded checkpoints and proptest regressions
	- "*/.txt"
	# dependencies
	- "**/Cargo.toml"
	- "**/Cargo.lock"
	# configuration files
	- .cargo/config.toml
	- "**/clippy.toml"
	# workflow definitions
	- docker/**
	- .dockerignore
	- .github/workflows/zfnd-deploy-nodes-gcp.yml
	- .github/workflows/zfnd-build-docker-image.yml

	release:
	types:
	- published

	permissions:
	contents: read

	jobs:
	# If a release was made we want to extract the first part of the semver from the
	# tag_name
	#
	# Generate the following output to pass to subsequent jobs
	# - If our semver is `v1.3.0` the resulting output from this job would be `v1`
	#
	# Note: We just use the first part of the version to replace old instances, and change
	# it when a major version is released, to keep a segregation between new and old
	# versions.
	versioning:
	name: Versioning
	runs-on: ubuntu-latest
	outputs:
	major_version: ${{ steps.set.outputs.major_version }}
	if: ${{ github.event_name == 'release' }}
	steps:
	- name: Getting Zebrad Version
	id: get
	uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd #v8.0.0
	with:
	result-encoding: string
	script: \|
	return context.payload.release.tag_name.substring(0,2)
	- name: Setting API Version
	id: set
	run: echo "major_version=${{ steps.get.outputs.result }}" >> "$GITHUB_OUTPUT"

	# Finds a cached state disk for zebra
	#
	# Passes the disk name to subsequent jobs using `cached_disk_name` output
	#
	# For push events, this job always runs.
	# For workflow_dispatch events, it runs only if inputs.need_cached_disk is true.
	# For release events, this job is skipped (releases use fixed disk names, not cached images).
	# PRs from forked repositories are skipped.
	get-disk-name:
	name: Get disk name
	permissions:
	contents: read
	id-token: write
	uses: ./.github/workflows/zfnd-find-cached-disks.yml
	# Skip for releases (they use fixed disk names like 'zebrad-cache-mainnet-tip')
	# For workflow_dispatch: only run if need_cached_disk is true
	if: ${{ github.event_name != 'release' && !(github.event.pull_request.head.repo.fork) && (github.event_name != 'workflow_dispatch' \|\| inputs.need_cached_disk) }}
	with:
	network: ${{ inputs.network \|\| vars.ZCASH_NETWORK }}
	disk_prefix: zebrad-cache
	disk_suffix: ${{ inputs.cached_disk_type \|\| 'tip' }}

	# Each time this workflow is executed, a build will be triggered to create a new image
	# with the corresponding tags using information from Git
	#
	# The image will be commonly named `zebrad:<short-hash \| github-ref \| semver>`
	build:
	name: Build CD Docker
	permissions:
	contents: read
	id-token: write
	pull-requests: write
	uses: ./.github/workflows/zfnd-build-docker-image.yml
	# Build for:
	# - Pull requests
	# - Manual workflow_dispatch
	# - Push to main branch
	# - Releases
	if: ${{ (github.event_name == 'pull_request' && !github.event.pull_request.head.repo.fork) \|\| github.event_name == 'workflow_dispatch' \|\| (github.event_name == 'push' && github.ref_name == 'main') \|\| github.event_name == 'release' }}
	with:
	dockerfile_path: ./docker/Dockerfile
	dockerfile_target: runtime
	image_name: zebrad
	no_cache: ${{ inputs.no_cache \|\| false }}
	rust_log: info
	features: ${{ format('{0} {1}', vars.RUST_PROD_FEATURES, vars.RUST_TEST_FEATURES) }}
	environment: ${{ github.event_name == 'release' && 'prod' \|\| (github.event_name == 'workflow_dispatch' && inputs.environment) \|\| 'dev' }}
	# This step needs access to Docker Hub secrets to run successfully
	secrets:
	DOCKERHUB_USERNAME: ${{ secrets.DOCKERHUB_USERNAME }}
	DOCKERHUB_TOKEN: ${{ secrets.DOCKERHUB_TOKEN }}

	# Deploy Managed Instance Groups (MiGs) for Mainnet and Testnet,
	# with one node in the configured GCP region.
	#
	# Separate Mainnet and Testnet MiGs are deployed whenever there are:
	# - pushes to the main branch, or
	# - version releases of Zebra.
	#
	# Once this workflow is triggered:
	# - by pushes to main: the MiG is always replaced,
	# - by releases: the MiG is only replaced if the same major version is being deployed,
	# otherwise a new major version is deployed in a new MiG.
	#
	# Runs:
	# - on every push to the `main` branch
	# - on every release, when it's published
	# - on workflow_dispatch for manual deployments

	# Determine which networks to deploy based on the trigger
	set-matrix:
	runs-on: ubuntu-latest
	outputs:
	networks: ${{ steps.set-networks.outputs.matrix }}
	steps:
	- id: set-networks
	run: \|
	if [ "${{ github.event_name }}" = "workflow_dispatch" ]; then
	# Manually triggered deployment: output a valid JSON array with the single chosen network.
	echo "matrix=[\"${{ inputs.network }}\"]" >> $GITHUB_OUTPUT
	else
	echo 'matrix=["Mainnet","Testnet"]' >> $GITHUB_OUTPUT
	fi

	deploy-nodes:
	strategy:
	matrix:
	network: ${{ fromJSON(needs.set-matrix.outputs.networks) }}
	name: Deploy ${{ matrix.network }} nodes
	needs:
	[
	set-matrix,
	build,
	versioning,
	get-disk-name,
	]
	runs-on: ubuntu-latest
	timeout-minutes: 60
	env:
	CACHED_DISK_NAME: ${{ needs.get-disk-name.outputs.cached_disk_name }}
	# Use prod environment for releases, allow manual selection for workflow_dispatch, default to dev for others
	environment: ${{ github.event_name == 'release' && 'prod' \|\| (github.event_name == 'workflow_dispatch' && inputs.environment) \|\| 'dev' }}
	permissions:
	contents: read
	id-token: write
	# Deploy when:
	# - Build job succeeded (needs.build.result == 'success')
	# - Running in ZcashFoundation repo (not a fork)
	# - Event is one of: push to main, release, or workflow_dispatch
	# - Workflow not cancelled or failed
	if: ${{ !cancelled() && !failure() && needs.build.result == 'success' && github.repository_owner == 'ZcashFoundation' && ((github.event_name == 'push' && github.ref_name == 'main') \|\| github.event_name == 'release' \|\| github.event_name == 'workflow_dispatch') }}

	steps:
	- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd #v6.0.2
	with:
	persist-credentials: false

	- name: Inject slug/short variables
	uses: rlespinasse/github-slug-action@9e7def61550737ba68c62d34a32dd31792e3f429 #v5.5.0
	with:
	short-length: 7

	# Makes the Zcash network name lowercase.
	#
	# Labels in GCP are required to be in lowercase, but the blockchain network
	# uses sentence case, so we need to downcase the network.
	#
	# Passes lowercase network to subsequent steps using $NETWORK env variable.
	- name: Downcase network name for labels
	run: \|
	NETWORK_CAPS="${{ matrix.network }}"
	echo "NETWORK=${NETWORK_CAPS,,}" >> "$GITHUB_ENV"

	# Setup gcloud CLI
	- name: Authenticate to Google Cloud
	id: auth
	uses: google-github-actions/auth@7c6bc770dae815cd3e89ee6cdf493a5fab2cc093 #v3.0.0
	with:
	workload_identity_provider: "${{ vars.GCP_WIF }}"
	service_account: "${{ vars.GCP_DEPLOYMENTS_SA }}"

	- name: Set up Cloud SDK
	uses: google-github-actions/setup-gcloud@aa5489c8933f4cc7a4f7d45035b3b1440c9c10db #v3.0.1
	with:
	install_components: 'beta'

	- name: Create instance template for ${{ matrix.network }}
	run: \|
	# Set common naming variables (exported for use in subsequent steps)
	GIT_PREFIX="${{ needs.versioning.outputs.major_version \|\| env.GITHUB_REF_SLUG_URL }}"
	MIG_NAME="zebrad-${GIT_PREFIX}-${NETWORK}"
	TEMPLATE_NAME="zebrad-${GIT_PREFIX}-${{ env.GITHUB_SHA_SHORT }}-${NETWORK}"
	echo "MIG_NAME=${MIG_NAME}" >> $GITHUB_ENV
	echo "TEMPLATE_NAME=${TEMPLATE_NAME}" >> $GITHUB_ENV

	# Constant DISK_NAME per deployment type enables disk preservation across updates
	if [ ${{ github.event_name == 'release' }} ]; then
	DISK_NAME="zebrad-cache-${NETWORK}"
	else
	# Use git prefix (branch/ref name) so each deployment gets its own disk
	DISK_NAME="zebrad-cache-${GIT_PREFIX}-${NETWORK}"
	fi
	echo "DISK_NAME=${DISK_NAME}" >> $GITHUB_ENV

	# Fixed disk name is safe since we use 1 instance per zone (no conflicts)
	DISK_PARAMS="name=${DISK_NAME},device-name=${DISK_NAME},size=400GB,type=pd-balanced"

	# Use cached image if available to speed up initial sync
	if [ -n "${{ env.CACHED_DISK_NAME }}" ]; then
	DISK_PARAMS+=",image=${{ env.CACHED_DISK_NAME }}"
	fi

	# Set log file based on input or default
	if [ ${{ github.event_name == 'workflow_dispatch' && inputs.log_file != '' }} ]; then
	LOG_FILE="${{ inputs.log_file }}"
	else
	LOG_FILE="${{ vars.CD_LOG_FILE }}"
	fi

	# Set RPC port based on network
	if [ "${{ matrix.network }}" = "Mainnet" ]; then
	RPC_PORT="8232"
	else
	RPC_PORT="18232"
	fi

	# Check if template already exists (templates are immutable, same commit = same config)
	if gcloud compute instance-templates describe "${TEMPLATE_NAME}" &>/dev/null; then
	echo "Template ${TEMPLATE_NAME} already exists, reusing existing template"
	else
	gcloud compute instance-templates create-with-container "${TEMPLATE_NAME}" \
	--machine-type ${{ vars.GCP_SMALL_MACHINE }} \
	--provisioning-model=SPOT \
	--boot-disk-size=10GB \
	--boot-disk-type=pd-standard \
	--image-project=cos-cloud \
	--image-family=cos-stable \
	--subnet=${{ vars.GCP_SUBNETWORK }} \
	--no-address \
	--create-disk="${DISK_PARAMS}" \
	--container-mount-disk=mount-path='/home/zebra/.cache/zebra',name=${DISK_NAME},mode=rw \
	--container-stdin \
	--container-tty \
	--container-image ${{ vars.GAR_BASE }}/zebrad@${{ needs.build.outputs.image_digest }} \
	--container-env "ZEBRA_NETWORK__NETWORK=${{ matrix.network }},ZEBRA_NETWORK__LISTEN_ADDR=0.0.0.0,LOG_FILE=${LOG_FILE},SENTRY_DSN=${{ vars.SENTRY_DSN }},ZEBRA_HEALTH__LISTEN_ADDR=0.0.0.0:8080,ZEBRA_HEALTH__MIN_CONNECTED_PEERS=1,ZEBRA_RPC__LISTEN_ADDR=0.0.0.0:${RPC_PORT}" \
	--service-account ${{ vars.GCP_DEPLOYMENTS_SA }} \
	--scopes cloud-platform \
	--metadata google-logging-enabled=true,google-logging-use-fluentbit=true,google-monitoring-enabled=true \
	--labels=app=zebrad,environment=${{ github.event_name == 'release' && 'prod' \|\| (github.event_name == 'workflow_dispatch' && inputs.environment) \|\| 'dev' }},network=${NETWORK},github_ref=${{ env.GITHUB_REF_SLUG_URL }} \
	--tags zebrad
	fi

	# HTTP health check on /healthy endpoint (sync-aware: 200 during sync, 503 on failure)
	- name: Create or update health check
	run: \|
	gcloud compute health-checks create http zebra-${NETWORK}-health \
	--port=8080 \
	--request-path=/healthy \
	--check-interval=60s \
	--timeout=10s \
	--unhealthy-threshold=3 \
	--healthy-threshold=2 \
	--global 2>/dev/null \|\| \
	gcloud compute health-checks update http zebra-${NETWORK}-health \
	--request-path=/healthy \
	--check-interval=60s \
	--timeout=10s \
	--unhealthy-threshold=3 \
	--healthy-threshold=2 \
	--global

	# Check if our destination instance group exists already
	- name: Check if ${{ matrix.network }} instance group exists
	id: does-group-exist
	continue-on-error: true
	run: \|
	gcloud compute instance-groups list \| grep "${MIG_NAME}" \| grep "${{ vars.GCP_REGION }}"

	# Deploy new managed instance group with 1 instance per zone (2-3 total)
	- name: Create managed instance group for ${{ matrix.network }}
	if: steps.does-group-exist.outcome == 'failure'
	run: \|
	# Query available zones (up to 3) and set instance count to match
	ZONES=$(gcloud compute zones list \
	--filter="region:${{ vars.GCP_REGION }}" \
	--format="value(name)" \
	--limit=3 \| paste -sd,)

	ZONE_COUNT=$(echo "${ZONES}" \| tr ',' '\n' \| wc -l)

	echo "Using ${ZONE_COUNT} zones: ${ZONES}"

	gcloud compute instance-groups managed create \
	"${MIG_NAME}" \
	--template "${TEMPLATE_NAME}" \
	--region "${{ vars.GCP_REGION }}" \
	--size "${ZONE_COUNT}" \
	--health-check="zebra-${NETWORK}-health" \
	--initial-delay=3600 \
	--instance-redistribution-type=NONE \
	--target-distribution-shape=EVEN \
	--zones="${ZONES}"

	# Stateful policy preserves disks across updates (auto-delete on MIG deletion)
	- name: Configure stateful disk policy
	if: steps.does-group-exist.outcome == 'failure'
	run: \|
	gcloud compute instance-groups managed update "${MIG_NAME}" \
	--stateful-disk "device-name=${DISK_NAME},auto-delete=on-permanent-instance-deletion" \
	--region "${{ vars.GCP_REGION }}"

	# Assign static IPs to instances (only for main branch and releases, not manual deployments)
	- name: Assign static IPs to instances
	if: ${{ steps.does-group-exist.outcome == 'failure' && github.event_name != 'workflow_dispatch' }}
	run: \|
	# Wait for MIG to be stable (all instances created)
	gcloud compute instance-groups managed wait-until "${MIG_NAME}" \
	--stable \
	--region "${{ vars.GCP_REGION }}" \
	--timeout=1200

	# Get static IPs and instances
	IP_NAMES=("zebra-${NETWORK}" "zebra-${NETWORK}-secondary" "zebra-${NETWORK}-tertiary")
	mapfile -t IP_ADDRESSES < <(
	for ip_name in "${IP_NAMES[@]}"; do
	gcloud compute addresses describe "$ip_name" \
	--region ${{ vars.GCP_REGION }} \
	--format='value(address)' 2>/dev/null \|\| echo ""
	done
	)

	mapfile -t INSTANCES < <(
	gcloud compute instance-groups managed list-instances "${MIG_NAME}" \
	--region "${{ vars.GCP_REGION }}" \
	--format="value(instance.basename())" \| sort
	)

	# Assign IPs via stateful instance config (creates config + assigns IP in one command)
	for i in "${!INSTANCES[@]}"; do
	[ -z "${IP_ADDRESSES[$i]}" ] && continue

	echo "Assigning ${IP_ADDRESSES[$i]} to ${INSTANCES[$i]}"
	gcloud compute instance-groups managed instance-configs create "${MIG_NAME}" \
	--instance="${INSTANCES[$i]}" \
	--stateful-external-ip="address=${IP_ADDRESSES[$i]},interface-name=nic0,auto-delete=never" \
	--region "${{ vars.GCP_REGION }}"
	done

	# Detect how many zones the MIG spans (needed for max-unavailable constraint)
	- name: Get zone count for MIG
	if: steps.does-group-exist.outcome == 'success'
	id: zone-count
	run: \|
	ZONE_COUNT=$(gcloud compute instance-groups managed describe "${MIG_NAME}" \
	--region "${{ vars.GCP_REGION }}" \
	--format="value(distributionPolicy.zones.len())")
	echo "count=${ZONE_COUNT}" >> $GITHUB_OUTPUT
	echo "MIG spans ${ZONE_COUNT} zones"

	# Rolling update (RECREATE method requires max-surge=0, max-unavailable >= zone count)
	- name: Update managed instance group for ${{ matrix.network }}
	if: steps.does-group-exist.outcome == 'success'
	run: \|
	gcloud compute instance-groups managed rolling-action start-update \
	"${MIG_NAME}" \
	--version template="${TEMPLATE_NAME}" \
	--replacement-method=recreate \
	--max-surge=0 \
	--max-unavailable=${{ steps.zone-count.outputs.count }} \
	--region "${{ vars.GCP_REGION }}"

	# Re-assign static IPs after rolling update (instances are recreated without external IPs)
	- name: Re-assign static IPs after rolling update
	if: ${{ steps.does-group-exist.outcome == 'success' && github.event_name != 'workflow_dispatch' }}
	run: \|
	# Wait for rolling update to complete
	gcloud compute instance-groups managed wait-until "${MIG_NAME}" \
	--stable \
	--region "${{ vars.GCP_REGION }}" \
	--timeout=1200

	# Get static IPs and instances
	IP_NAMES=("zebra-${NETWORK}" "zebra-${NETWORK}-secondary" "zebra-${NETWORK}-tertiary")
	mapfile -t IP_ADDRESSES < <(
	for ip_name in "${IP_NAMES[@]}"; do
	gcloud compute addresses describe "$ip_name" \
	--region ${{ vars.GCP_REGION }} \
	--format='value(address)' 2>/dev/null \|\| echo ""
	done
	)

	mapfile -t INSTANCES < <(
	gcloud compute instance-groups managed list-instances "${MIG_NAME}" \
	--region "${{ vars.GCP_REGION }}" \
	--format="value(instance.basename())" \| sort
	)

	# Assign IPs via stateful instance config (creates config + assigns IP in one command)
	for i in "${!INSTANCES[@]}"; do
	[ -z "${IP_ADDRESSES[$i]}" ] && continue

	echo "Assigning ${IP_ADDRESSES[$i]} to ${INSTANCES[$i]}"
	gcloud compute instance-groups managed instance-configs create "${MIG_NAME}" \
	--instance="${INSTANCES[$i]}" \
	--stateful-external-ip="address=${IP_ADDRESSES[$i]},interface-name=nic0,auto-delete=never" \
	--region "${{ vars.GCP_REGION }}" \
	--update-instance 2>/dev/null \|\| \
	gcloud compute instance-groups managed instance-configs update "${MIG_NAME}" \
	--instance="${INSTANCES[$i]}" \
	--stateful-external-ip="address=${IP_ADDRESSES[$i]},interface-name=nic0,auto-delete=never" \
	--region "${{ vars.GCP_REGION }}"
	done

	deploy-nodes-success:
	name: Deploy nodes success
	runs-on: ubuntu-latest
	# Only run when the deployment job actually executed
	if: >-
	${{
	always() &&
	needs.deploy-nodes.result != 'skipped'
	}}
	needs:
	- versioning
	- get-disk-name
	- build
	- set-matrix
	- deploy-nodes
	timeout-minutes: 1
	steps:
	- name: Decide whether the needed jobs succeeded or failed
	uses: re-actors/alls-green@05ac9388f0aebcb5727afa17fcccfecd6f8ec5fe #v1.2.2
	with:
	jobs: ${{ toJSON(needs) }}
	allowed-skips: versioning, get-disk-name, build

	failure-issue:
	name: Open or update issues for release failures
	# When a new job is added to this workflow, add it to this list.
	needs: [versioning, build, deploy-nodes]

	# Only open tickets for failed or cancelled jobs that are not coming from PRs.
	# (PR statuses are already reported in the PR jobs list, and checked by GitHub's Merge Queue.)
	if: (failure() && github.event.pull_request == null) \|\| (cancelled() && github.event.pull_request == null)
	runs-on: ubuntu-latest
	steps:
	- uses: jayqi/failed-build-issue-action@1a893bbf43ef1c2a8705e2b115cd4f0fe3c5649b #v1.2.0
	with:
	title-template: "{{refname}} branch CI failed: {{eventName}} in {{workflow}}"
	# New failures open an issue with this label.
	label-name: S-ci-fail-release-auto-issue
	# If there is already an open issue with this label, any failures become comments on that issue.
	always-create-new-issue: false
	github-token: ${{ secrets.GITHUB_TOKEN }}

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

add: `agent-shell` directory to `.gitignore` (#10381) #17

Workflow file

add: `agent-shell` directory to `.gitignore` (#10381) #17

Uh oh!

Workflow file for this run

add: agent-shell directory to .gitignore (#10381) #17

Workflow file

Workflow file for this run

add: `agent-shell` directory to `.gitignore` (#10381) #17