Skip to content

GCP: Test Image

GCP: Test Image #14

Workflow file for this run

name: "GCP: Test Image"
on:
workflow_dispatch:
inputs:
version_major:
description: 'AlmaLinux major version'
required: true
default: '10'
type: choice
options:
- 10-kitten
- 10
- 9
- 8
arch:
description: 'Architecture to test'
required: true
default: 'ALL'
type: choice
options:
- ALL
- x86_64
- aarch64
image_override:
description: 'Image to test, overrides version_major to test a direct image instead. Architecture must be set properly for the image being passed. This must be a full path to a GCP image, for example, projects/almalinux-dev-images-469421/global/images/almalinux-9-v20230920'
required: false
default: ''
x86_shape_override:
description: 'x86_64 shape to use for jobs that do not iterate over matrix.shape (e.g. test-gcp-nonpershape). Empty = let cloud-image-tests choose. Example: c4-standard-8.'
required: false
default: ''
aarch64_shape_override:
description: 'aarch64 shape to use for jobs that do not iterate over matrix.shape (e.g. test-gcp-nonpershape). Empty = let cloud-image-tests choose. Example: c4a-standard-8.'
required: false
default: ''
cit_git_repo:
description: 'owner/repo of a cloud-image-tests fork to build instead of pulling the prebuilt image. Empty = use gcr.io/compute-image-tools/cloud-image-tests:latest. Full URLs are NOT accepted (actions/checkout takes owner/repo only). Example: GoogleCloudPlatform/cloud-image-tests'
required: false
default: ''
cit_git_ref:
description: 'Branch, tag, or commit SHA inside cit_git_repo. Empty = repository default branch. Ignored when cit_git_repo is empty.'
required: false
default: ''
# notify_mattermost:
# description: "Send notification to Mattermost"
# required: true
# type: boolean
# default: false
env:
# Default zone fallback list for cit-run-with-retry. Multi-region spread so a
# single-region capacity event doesn't fail the whole run. Shapes with
# narrower availability override this via matrix `zones:` and/or `zone:`.
GCP_DEFAULT_ZONES: "us-central1-a us-central1-b us-central1-c us-central1-f us-south1-a us-south1-b us-south1-c us-west1-a us-west1-b us-west1-c northamerica-northeast1-a northamerica-northeast1-b europe-west1-b europe-west1-c europe-west1-d europe-west2-a europe-west2-b europe-west2-c europe-west3-a europe-west3-b europe-west3-c europe-west4-a europe-west4-b europe-west4-c"
jobs:
init-data:
runs-on: ubuntu-latest
outputs:
image_path: ${{ steps.determine_image.outputs.image_path }}
steps:
- name: Determine image to test
id: determine_image
run: |
if [ -n "${{ inputs.image_override }}" ]; then
echo "Using image override: ${{ inputs.image_override }}"
image_path="${{ inputs.image_override }}"
elif [ "${{ inputs.arch }}" = "ALL" ]; then
echo "Using version major: ${{ inputs.version_major }}"
echo "Using all architectures"
image_path="projects/almalinux-dev-images-469421/global/images/family/almalinux-${{ inputs.version_major }}"
else
echo "Using version major: ${{ inputs.version_major }}"
echo "Using arch: ${{ inputs.arch }}"
image_path="projects/almalinux-dev-images-469421/global/images/family/almalinux-${{ inputs.version_major }}"
fi
echo "Determined image path: ${image_path}"
echo "image_path=${image_path}" >> $GITHUB_OUTPUT
# Optional: build cloud-image-tests from a user-supplied git repo+ref and
# publish a docker tarball as an artifact. All test jobs `needs:` this; when
# cit_git_repo is empty this job is skipped, which GitHub Actions treats as
# non-blocking (skipped != failed), so test jobs run as today.
build-cit:
name: Build cloud-image-tests from source
if: inputs.cit_git_repo != ''
needs: init-data
# family=m8azn + cpu=4 -> m8azn.xlarge (highest single-thread perf in AWS,
# best for Go compile). Note: runs-on's inline labels do NOT support
# `instance=<type>` — only `family=` + `cpu=`/`ram=`/etc. To pin a
# different AWS size, change cpu (4=xlarge, 8=2xlarge, 16=4xlarge, ...).
# volume=500g grows the actual root EBS volume (where /home/runner lives)
# rather than attaching a secondary disk. Requires runs-on >= 2.9 — the
# older `disk=` selector mounts an additional volume at /runs-on/ which
# doesn't help when /home/runner/_diag/ fills up on the small fixed root.
# Forks fall back to the default GH-hosted runner. Tune size based on
# the runs-on cost/performance summary printed by the step below.
runs-on: "${{ github.repository_owner == 'AlmaLinux' && format('runs-on={0}/family=m8azn/cpu=4/volume=500g/spot=false/image=ubuntu24-full-x64', github.run_id) || 'ubuntu-24.04' }}"
steps:
- name: Show runs-on cost/performance summary
if: github.repository_owner == 'AlmaLinux'
uses: runs-on/action@v1
- name: Checkout CIT source
uses: actions/checkout@v6
with:
repository: ${{ inputs.cit_git_repo }}
ref: ${{ inputs.cit_git_ref }}
path: cit-src
- name: Build CIT image
shell: bash
run: docker build -t cit-custom:run-${{ github.run_id }} cit-src
- name: Save image to tarball
shell: bash
run: docker save cit-custom:run-${{ github.run_id }} | gzip > cit-image.tar.gz
- name: Upload CIT image artifact
uses: actions/upload-artifact@v7
with:
name: cit-custom-image
path: cit-image.tar.gz
retention-days: 1
# this runs an initial set of smoke tests against a shape that we can use with high concurrency
# this is to speed up the feedback loop in case something obvious can be caught quickly with a new image
# without having to wait on the full run of per-shape tests which takes a while
test-gcp-initialtest:
name: AlmaLinux ${{ inputs.image_override || format('{0} {1}', inputs.version_major, matrix.arch) }} Initial Tests
needs: [init-data, build-cit]
permissions:
id-token: write
contents: read
runs-on: ubuntu-24.04
strategy:
fail-fast: false
matrix:
# this sets the arch matrix based on the input
# if input is ALL, then set to both x86_64 and aarch64
# otherwise set to the selected arch
arch: ${{ fromJSON(inputs.arch == 'ALL' && '["x86_64","aarch64"]' || format('["{0}"]', inputs.arch)) }}
steps:
# we don't need the checked out files, but this is required for the google auth action to work
- uses: 'actions/checkout@v6'
- id: 'google-auth-image-testing'
uses: 'google-github-actions/auth@v3'
with:
workload_identity_provider: 'projects/527193872801/locations/global/workloadIdentityPools/github-actions/providers/github'
service_account: 'github-actions-image-testing@almalinux-image-testing-469421.iam.gserviceaccount.com'
- name: 'Set up Google Cloud SDK'
uses: 'google-github-actions/setup-gcloud@v3'
- name: Download custom CIT image
if: inputs.cit_git_repo != ''
uses: actions/download-artifact@v7
with:
name: cit-custom-image
- name: Load custom CIT image
if: inputs.cit_git_repo != ''
shell: bash
run: gunzip -c cit-image.tar.gz | docker load
- name: 'Run Google cloud-image-testing tests which are hard-coded to specific shapes'
uses: ./.github/actions/cit-run-with-retry
with:
image: "${{ needs.init-data.outputs.image_path }}${{ inputs.image_override == '' && matrix.arch == 'aarch64' && '-arm64' || '' }}"
filter: '^(lssd|disk|vmspec)$'
shape_flag: "${{ matrix.arch == 'aarch64' && '-arm64_shape c4a-standard-8' || '-x86_shape c4-standard-8' }}"
parallel_count: '20'
parallel_stagger: '1s'
zones: ${{ env.GCP_DEFAULT_ZONES }}
creds_path: ${{ env.GOOGLE_GHA_CREDS_PATH }}
quota_log_file: ${{ runner.temp }}/quota-failures.jsonl
image_ref: ${{ inputs.cit_git_repo != '' && format('cit-custom:run-{0}', github.run_id) || 'gcr.io/compute-image-tools/cloud-image-tests:latest' }}
- name: Upload quota-failure log
if: always()
uses: actions/upload-artifact@v7
with:
name: quota-failures-initialtest-${{ matrix.arch }}
path: ${{ runner.temp }}/quota-failures.jsonl
if-no-files-found: ignore
retention-days: 7
# this runs tests which are hard-coded to specific shapes in CIT
# running them against all shapes would be pointless as they are forced to specific shapes anyway
test-gcp-nonpershape:
name: AlmaLinux ${{ inputs.image_override || format('{0} {1}', inputs.version_major, matrix.arch) }} Non-Per-Shape Tests
needs: [init-data, build-cit]
permissions:
id-token: write
contents: read
runs-on: ubuntu-24.04
strategy:
fail-fast: false
matrix:
# this sets the arch matrix based on the input
# if input is ALL, then set to both x86_64 and aarch64
# otherwise set to the selected arch
arch: ${{ fromJSON(inputs.arch == 'ALL' && '["x86_64","aarch64"]' || format('["{0}"]', inputs.arch)) }}
steps:
# we don't need the checked out files, but this is required for the google auth action to work
- uses: 'actions/checkout@v6'
- id: 'google-auth-image-testing'
uses: 'google-github-actions/auth@v3'
with:
workload_identity_provider: 'projects/527193872801/locations/global/workloadIdentityPools/github-actions/providers/github'
service_account: 'github-actions-image-testing@almalinux-image-testing-469421.iam.gserviceaccount.com'
- name: 'Set up Google Cloud SDK'
uses: 'google-github-actions/setup-gcloud@v3'
- name: Download custom CIT image
if: inputs.cit_git_repo != ''
uses: actions/download-artifact@v7
with:
name: cit-custom-image
- name: Load custom CIT image
if: inputs.cit_git_repo != ''
shell: bash
run: gunzip -c cit-image.tar.gz | docker load
- name: 'Run Google cloud-image-testing tests which are hard-coded to specific shapes'
uses: ./.github/actions/cit-run-with-retry
with:
image: "${{ needs.init-data.outputs.image_path }}${{ inputs.image_override == '' && matrix.arch == 'aarch64' && '-arm64' || '' }}"
filter: '^(lssd|disk|vmspec)$'
# If an arch-matching shape_override input is set, pin the shape;
# otherwise leave empty and let cloud-image-tests pick its hardcoded shapes.
shape_flag: "${{ matrix.arch == 'aarch64' && inputs.aarch64_shape_override != '' && format('-arm64_shape {0}', inputs.aarch64_shape_override) || matrix.arch == 'x86_64' && inputs.x86_shape_override != '' && format('-x86_shape {0}', inputs.x86_shape_override) || '' }}"
parallel_stagger: '10s'
zones: ${{ env.GCP_DEFAULT_ZONES }}
creds_path: ${{ env.GOOGLE_GHA_CREDS_PATH }}
quota_log_file: ${{ runner.temp }}/quota-failures.jsonl
image_ref: ${{ inputs.cit_git_repo != '' && format('cit-custom:run-{0}', github.run_id) || 'gcr.io/compute-image-tools/cloud-image-tests:latest' }}
- name: Upload quota-failure log
if: always()
uses: actions/upload-artifact@v7
with:
name: quota-failures-nonpershape-${{ matrix.arch }}
path: ${{ runner.temp }}/quota-failures.jsonl
if-no-files-found: ignore
retention-days: 7
test-gcp-pershape-x86_64:
name: ${{ inputs.image_override || format('{0}', inputs.version_major) }} x86_64 ${{ matrix.shape }}
needs: [init-data, test-gcp-initialtest, build-cit]
permissions:
id-token: write
contents: read
runs-on: "${{ github.repository_owner == 'AlmaLinux' && format('runs-on={0}/runner=2cpu-linux-x64/spot=false/image=almalinux-10-x86_64', github.run_id) || 'ubuntu-24.04' }}"
if: inputs.arch == 'ALL' || inputs.arch == 'x86_64'
strategy:
fail-fast: false
matrix:
shape:
- n4-standard-2
- n4-standard-80
- n2-standard-4
- n2-standard-128
- n2d-standard-2
- n2d-standard-224
- n1-standard-1
- n1-standard-96
- c4-standard-2
# can never get capacity for 288 so using 192
- c4-standard-192
# all lssd shapes fail
# https://github.com/GoogleCloudPlatform/cloud-image-tests/issues/345
# - c4-standard-4-lssd
# - c4-standard-288-lssd
# never has capacity anywhere
# - c4-standard-288-metal
- c4d-standard-2
# can never get capacity for 384 so using 192
- c4d-standard-192
# - c4d-standard-8-lssd
# - c4d-standard-384-lssd
- c3-standard-4
- c3-standard-176
# <failure message="" type="Failure"><![CDATA[step "create-vms" run error: googleapi: Error 400: C3 Metal instance does not support multi-vNIC., badRequest]]></failure>
# - c3-standard-192-metal
# lssd always fails
# - c3-standard-4-lssd
# - c3-standard-176-lssd
- c3d-standard-4
- c3d-standard-360
# - c3d-standard-8-lssd
# - c3d-standard-360-lssd
- e2-standard-2
- e2-standard-32
- e2-medium
- t2d-standard-1
- t2d-standard-60
# h4d tests fail
# https://github.com/GoogleCloudPlatform/cloud-image-tests/issues/346
# - h4d-standard-192
# - h4d-highmem-192-lssd
# can never get capacity for this shape
# - h3-standard-88
- c2-standard-4
- c2-standard-60
- c2d-standard-2
- c2d-standard-112
# m4, x4, m3, m2 disabled due to no quotas
# - m4-megamem-28
# - m4-megamem-224
# - x4-megamem-960-metal
# - x4-megamem-1920-metal
# - m3-megamem-64
# - m2-megamem-416
# m1 quota too low
# - m1-megamem-96
# z3 tests fail
# https://github.com/GoogleCloudPlatform/cloud-image-tests/issues/346
# z3 tests not needed, basically c3 with lots of lssd, so covered by c3-lssd tests
# - z3-highmem-14-standardlssd
# things with special zone requirements or other special needs
include:
# Placeholder entry that declares the optional `zone:` / `zones:`
# keys on the matrix type so `matrix.zone` / `matrix.zones`
# expressions in the cit-run-with-retry call are valid. Empty
# values are no-ops at runtime (the action falls back to
# env.GCP_DEFAULT_ZONES). Add real overrides below as needed.
- shape: n4-standard-2
zone: ''
zones: ''
# only available in us-central1-b
# disabled, never any capacity available
# - shape: c4d-standard-384-metal
# zone: us-central1-b
# zones: us-central1-b
steps:
# we don't need the checked out files, but this is required for the google auth action to work
- uses: 'actions/checkout@v6'
- id: 'google-auth-image-testing'
uses: 'google-github-actions/auth@v3'
with:
workload_identity_provider: 'projects/527193872801/locations/global/workloadIdentityPools/github-actions/providers/github'
service_account: 'github-actions-image-testing@almalinux-image-testing-469421.iam.gserviceaccount.com'
- name: 'Set up Google Cloud SDK'
uses: 'google-github-actions/setup-gcloud@v3'
- name: Runner OS specific configuration
shell: bash
run: |
# Runner OS specific configuration
if [ -e /etc/redhat-release ]; then
sudo setenforce 0
sudo dnf -y install podman
runner_user=$USER
sudo loginctl enable-linger $runner_user
docker_cmd=podman
elif lsb_release -cs > /dev/null 2>&1; then
docker_cmd=docker
else
echo "[Debug] Unknown OS"
exit 1
fi
echo "docker_cmd=${docker_cmd}" >> $GITHUB_ENV
- name: Download custom CIT image
if: inputs.cit_git_repo != ''
uses: actions/download-artifact@v7
with:
name: cit-custom-image
- name: Load custom CIT image
if: inputs.cit_git_repo != ''
shell: bash
# build-cit emits a docker-save tarball; podman 4.x+ reads it natively.
run: gunzip -c cit-image.tar.gz | ${{ env.docker_cmd }} load
- name: 'Run Google cloud-image-testing tests on ${{ matrix.shape }}'
uses: ./.github/actions/cit-run-with-retry
with:
runtime: ${{ env.docker_cmd }}
image: ${{ needs.init-data.outputs.image_path || inputs.image_override }}
filter: '^(cvm|livemigrate|suspendresume|loadbalancer|guestagent|hostnamevalidation|imageboot|licensevalidation|network|security|hotattach|packagevalidation|ssh|metadata)$'
shape_flag: "-x86_shape ${{ matrix.shape }}"
parallel_count: '1'
first_attempt_zone: ${{ matrix.zone || '' }}
zones: ${{ matrix.zones || env.GCP_DEFAULT_ZONES }}
creds_path: ${{ env.GOOGLE_GHA_CREDS_PATH }}
quota_log_file: ${{ runner.temp }}/quota-failures.jsonl
image_ref: ${{ inputs.cit_git_repo != '' && format('cit-custom:run-{0}', github.run_id) || 'gcr.io/compute-image-tools/cloud-image-tests:latest' }}
- name: Upload quota-failure log
if: always()
uses: actions/upload-artifact@v7
with:
name: quota-failures-pershape-x86_64-${{ matrix.shape }}
path: ${{ runner.temp }}/quota-failures.jsonl
if-no-files-found: ignore
retention-days: 7
test-gcp-pershape-aarch64:
name: ${{ inputs.image_override || format('{0}', inputs.version_major) }} aarch64 ${{ matrix.shape }}
needs: [init-data, test-gcp-initialtest, build-cit]
permissions:
id-token: write
contents: read
runs-on: ubuntu-24.04
if: inputs.arch == 'ALL' || inputs.arch == 'aarch64'
strategy:
fail-fast: false
matrix:
shape:
# pretty limited quotas for this one
# let's test one shape flavor + lssd
# metal gets large-CPUs detection check
- c4a-standard-4
- c4a-standard-72
# all lssd shapes fail
# https://github.com/GoogleCloudPlatform/cloud-image-tests/issues/345
#- c4a-standard-4-lssd
# pretty limited quotas for this one
# let's just test one shape flavor
# can never get capacity for this shape, even with low parallelism, so skipping for now
# - t2a-standard-4
- n4a-standard-4
include:
# Placeholder so `parallel_count` / `zone` / `zones` exist on the
# matrix type (the cit-run-with-retry call below references them
# via `matrix.X || <fallback>`). Empty values are no-ops. Add
# real per-shape overrides here as needed.
- shape: c4a-standard-72
parallel_count: ''
zone: ''
zones: ''
steps:
# this isn't a great way to handle this, but GH actions has limitations...
- name: Skip certain jobs
id: checker
if: (matrix.shape == 'c4a-standard-96-metal' && inputs.version_major == '8')
run: |
echo "Skipping ${{ matrix.shape }} for version_major ${{ inputs.version_major}} due to incompatibility."
echo "skip=true" >> $GITHUB_OUTPUT
exit 0
# we don't need the checked out files, but this is required for the google auth action to work
- uses: 'actions/checkout@v6'
if: steps.checker.outputs.skip != 'true'
- id: 'google-auth-image-testing'
uses: 'google-github-actions/auth@v3'
if: steps.checker.outputs.skip != 'true'
with:
workload_identity_provider: 'projects/527193872801/locations/global/workloadIdentityPools/github-actions/providers/github'
service_account: 'github-actions-image-testing@almalinux-image-testing-469421.iam.gserviceaccount.com'
- name: 'Set up Google Cloud SDK'
if: steps.checker.outputs.skip != 'true'
uses: 'google-github-actions/setup-gcloud@v3'
- name: Download custom CIT image
if: inputs.cit_git_repo != '' && steps.checker.outputs.skip != 'true'
uses: actions/download-artifact@v7
with:
name: cit-custom-image
- name: Load custom CIT image
if: inputs.cit_git_repo != '' && steps.checker.outputs.skip != 'true'
shell: bash
run: gunzip -c cit-image.tar.gz | docker load
- name: 'Run Google cloud-image-testing tests on ${{ matrix.shape }}'
if: steps.checker.outputs.skip != 'true'
uses: ./.github/actions/cit-run-with-retry
with:
image: "${{ needs.init-data.outputs.image_path || inputs.image_override }}${{ inputs.image_override == '' && '-arm64' || '' }}"
filter: '^(cvm|livemigrate|suspendresume|loadbalancer|guestagent|hostnamevalidation|imageboot|licensevalidation|network|security|hotattach|packagevalidation|ssh|metadata)$'
shape_flag: "-arm64_shape ${{ matrix.shape }}"
parallel_count: ${{ matrix.parallel_count || '1' }}
first_attempt_zone: ${{ matrix.zone || '' }}
zones: ${{ matrix.zones || env.GCP_DEFAULT_ZONES }}
creds_path: ${{ env.GOOGLE_GHA_CREDS_PATH }}
quota_log_file: ${{ runner.temp }}/quota-failures.jsonl
image_ref: ${{ inputs.cit_git_repo != '' && format('cit-custom:run-{0}', github.run_id) || 'gcr.io/compute-image-tools/cloud-image-tests:latest' }}
- name: Upload quota-failure log
if: always() && steps.checker.outputs.skip != 'true'
uses: actions/upload-artifact@v7
with:
name: quota-failures-pershape-aarch64-${{ matrix.shape }}
path: ${{ runner.temp }}/quota-failures.jsonl
if-no-files-found: ignore
retention-days: 7
# Aggregates the per-job JSONL quota-failure logs into a single markdown
# table on the run's summary page. Useful for filing quota-increase
# requests: groups by (location, quota, shape) so the highest-impact
# caps surface first.
summarize-quota-failures:
name: Summarize quota failures
needs:
- test-gcp-initialtest
- test-gcp-nonpershape
- test-gcp-pershape-x86_64
- test-gcp-pershape-aarch64
if: always()
runs-on: ubuntu-24.04
steps:
- name: Download all quota-failure artifacts
uses: actions/download-artifact@v4
with:
pattern: quota-failures-*
path: quota-failures
merge-multiple: true
- name: Write summary
shell: bash
run: |
set -uo pipefail
shopt -s nullglob
files=(quota-failures/*.jsonl quota-failures/**/*.jsonl)
if (( ${#files[@]} == 0 )); then
{
echo "## Quota failure summary"
echo
echo "No quota failures recorded in this run."
} >> "$GITHUB_STEP_SUMMARY"
exit 0
fi
cat "${files[@]}" > all-quota-failures.jsonl
total=$(wc -l < all-quota-failures.jsonl)
{
echo "## Quota failure summary"
echo
echo "**Total quota failure events:** ${total}"
echo
echo "Group key: (location, quota, shape). Counts include retries."
echo "Use this to scope quota-increase requests."
echo
echo "| Location | Quota | Shape | Count |"
echo "|----------|-------|-------|-------|"
} >> "$GITHUB_STEP_SUMMARY"
jq -s -r '
group_by([.location, .quota, .shape])
| map({
location: .[0].location,
quota: .[0].quota,
shape: .[0].shape,
count: length
})
| sort_by(-.count, .location, .quota, .shape)
| .[]
| "| \(.location // "?") | \(.quota) | \(.shape // "?") | \(.count) |"
' all-quota-failures.jsonl >> "$GITHUB_STEP_SUMMARY"