AKS Test #629
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # Copyright 2025-2026 Defense Unicorns | |
| # SPDX-License-Identifier: AGPL-3.0-or-later OR LicenseRef-Defense-Unicorns-Commercial | |
| name: AKS Test | |
| on: | |
| schedule: | |
| - cron: "0 0 * * 0" # Every Sunday Midnight (UTC) / Saturday 5pm MT | |
| workflow_call: {} | |
| permissions: | |
| id-token: write | |
| contents: read | |
| packages: read | |
| jobs: | |
| test-aks-install: | |
| strategy: | |
| fail-fast: false | |
| matrix: | |
| flavor: [upstream, registry1, unicorn] | |
| runs-on: ubuntu-latest | |
| name: test-aks | |
| permissions: | |
| id-token: write | |
| contents: read | |
| packages: read | |
| pull-requests: write # Allows writing to pull requests (needed for renovate-readiness) | |
| env: | |
| SHA: ${{ github.sha }} | |
| UDS_REGION: centralus | |
| UDS_RESOURCE_GROUP_NAME: ${{ secrets.AZURE_RESOURCE_GROUP }} | |
| UDS_STORAGE_ACCOUNT_NAME: ${{ secrets.AZURE_STORAGE_ACCOUNT_NAME }} | |
| UDS_CONTAINER_NAME: ${{ secrets.AZURE_STORAGE_CONTAINER_NAME }} | |
| steps: | |
| - name: Set ENV | |
| run: | | |
| echo "UDS_CLUSTER_NAME=uds-ci-${{ matrix.flavor }}-${SHA:0:7}" >> $GITHUB_ENV | |
| echo "UDS_STATE_KEY="uds-core/${SHA:0:7}-nightly-aks-core-${{ matrix.flavor }}.tfstate >> $GITHUB_ENV | |
| echo "TF_VAR_location=${UDS_REGION}" >> $GITHUB_ENV | |
| echo "TF_VAR_cluster_name=uds-ci-${{ matrix.flavor }}-${SHA:0:7}" >> $GITHUB_ENV | |
| echo "TF_VAR_resource_group_name=uds-ci-${{ matrix.flavor }}" >> $GITHUB_ENV | |
| echo "ARM_SUBSCRIPTION_ID=${{ secrets.AZURE_SUBSCRIPTION_ID }}" >> $GITHUB_ENV | |
| echo "ARM_CLIENT_ID=${{ secrets.AZURE_CLIENT_ID }}" >> $GITHUB_ENV | |
| echo "ARM_TENANT_ID=${{ secrets.AZURE_TENANT_ID }}" >> $GITHUB_ENV | |
| echo "ARM_USE_OIDC=true" >> $GITHUB_ENV | |
| echo "ARM_STORAGE_USE_AZUREAD=true" >> $GITHUB_ENV | |
| - name: Checkout repository | |
| uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6 | |
| - name: Check renovate readiness | |
| if: startsWith(github.event.pull_request.head.ref, 'renovate/') # Only call for Renovate PRs | |
| uses: ./.github/actions/renovate-readiness | |
| with: | |
| github_token: ${{ secrets.GITHUB_TOKEN }} | |
| - name: Azure login | |
| uses: azure/login@532459ea530d8321f2fb9bb10d1e0bcf23869a43 # v3 | |
| with: | |
| client-id: ${{ secrets.AZURE_CLIENT_ID }} | |
| tenant-id: ${{ secrets.AZURE_TENANT_ID }} | |
| subscription-id: ${{ secrets.AZURE_SUBSCRIPTION_ID }} | |
| - name: Environment setup | |
| uses: ./.github/actions/setup | |
| with: | |
| ghToken: ${{ secrets.GITHUB_TOKEN }} | |
| registry1Username: ${{ (matrix.flavor == 'registry1') && secrets.IRON_BANK_ROBOT_USERNAME || '' }} | |
| registry1Password: ${{ (matrix.flavor == 'registry1') && secrets.IRON_BANK_ROBOT_PASSWORD || '' }} | |
| rapidfortUsername: ${{ (matrix.flavor == 'unicorn') && secrets.RAPIDFORT_USERNAME || '' }} | |
| rapidfortPassword: ${{ (matrix.flavor == 'unicorn') && secrets.RAPIDFORT_PASSWORD || '' }} | |
| - name: Setup Tofu | |
| uses: opentofu/setup-opentofu@fc711fa910b93cba0f3fbecaafc9f42fd0c411cb # v2.0.0 | |
| with: | |
| # renovate: datasource=github-tags depName=opentofu/opentofu extractVersion=^v?(?<version>.*)$ | |
| tofu_version: 1.11.6 | |
| tofu_wrapper: false | |
| - name: Create UDS Core Package | |
| run: ZARF_ARCHITECTURE=amd64 uds run -f tasks/create.yaml standard-package --no-progress --with create_options="--skip-sbom" --set FLAVOR=${{ matrix.flavor }} | |
| - name: Create Core Bundle | |
| run: uds create .github/bundles/aks --confirm | |
| - name: Create IAC | |
| run: uds run -f tasks/iac.yaml apply-tofu --no-progress --set K8S_DISTRO=aks --set CLOUD=azure | |
| - name: Get Resource Group Name | |
| run: echo "AKS_RESOURCE_GROUP=$(tofu output -raw resource_group_name)" >> $GITHUB_ENV | |
| working-directory: .github/test-infra/azure/aks | |
| - name: Configure Cluster DNS | |
| run: uds run -f tasks/utils.yaml aks-coredns-setup --no-progress | |
| - name: Wait for cluster ready | |
| run: uds run -f tasks/iac.yaml cluster-ready --no-progress | |
| - name: Deploy Core Bundle | |
| env: | |
| UDS_CONFIG: .github/bundles/aks/uds-config.yaml | |
| # Retry up to 2 times on failure to ride out transient AKS apiserver restart | |
| # windows that cause webhook proxy errors. --force-conflicts on retries works | |
| # around zarf issue 4771 (field-manager conflicts on partial re-apply). | |
| run: | | |
| for attempt in 1 2 3; do | |
| FORCE="" | |
| [ "$attempt" -gt 1 ] && FORCE="--force-conflicts" | |
| if uds deploy .github/bundles/aks/uds-bundle-uds-core-aks-nightly-*.tar.zst --confirm $FORCE; then | |
| echo "CORE_ATTEMPTS=$attempt" >> $GITHUB_ENV | |
| exit 0 | |
| fi | |
| if [ "$attempt" -lt 3 ]; then | |
| echo "Deploy failed on attempt $attempt; sleeping 30s before retry..." | |
| sleep 30 | |
| fi | |
| done | |
| echo "CORE_ATTEMPTS=fail" >> $GITHUB_ENV | |
| exit 1 | |
| timeout-minutes: 40 | |
| - name: Test UDS Core | |
| # Retry up to 2 times on failure to ride out transient AKS apiserver | |
| # webhook disruptions during test-app deploy, playwright, or vitest phases. | |
| run: | | |
| for attempt in 1 2 3; do | |
| if uds run -f tasks/test.yaml uds-core-non-k3d --set EXCLUDED_PACKAGES="metrics-server"; then | |
| echo "TEST_ATTEMPTS=$attempt" >> $GITHUB_ENV | |
| exit 0 | |
| fi | |
| if [ "$attempt" -lt 3 ]; then | |
| echo "Tests failed on attempt $attempt; sleeping 30s before retry..." | |
| sleep 30 | |
| fi | |
| done | |
| echo "TEST_ATTEMPTS=fail" >> $GITHUB_ENV | |
| exit 1 | |
| - name: Retry summary | |
| if: always() | |
| run: | | |
| status() { | |
| case "$1" in | |
| 1) echo "✅ $1 (first try)" ;; | |
| 2|3) echo "⚠️ $1 (retry needed)" ;; | |
| "") echo "❔ not run" ;; | |
| *) echo "❌ failed after 3 attempts" ;; | |
| esac | |
| } | |
| { | |
| echo "## Core Retry Summary" | |
| echo "" | |
| echo "| Step | Attempts |" | |
| echo "|------|----------|" | |
| echo "| Core deploy | $(status "${CORE_ATTEMPTS:-}") |" | |
| echo "| Test UDS Core | $(status "${TEST_ATTEMPTS:-}") |" | |
| } >> "$GITHUB_STEP_SUMMARY" | |
| - name: Debug Output | |
| if: always() | |
| uses: ./.github/actions/debug-output | |
| - name: Azure login (refresh token for log collection) | |
| if: always() | |
| uses: azure/login@532459ea530d8321f2fb9bb10d1e0bcf23869a43 # v3 | |
| with: | |
| client-id: ${{ secrets.AZURE_CLIENT_ID }} | |
| tenant-id: ${{ secrets.AZURE_TENANT_ID }} | |
| subscription-id: ${{ secrets.AZURE_SUBSCRIPTION_ID }} | |
| - name: Install log-analytics extension | |
| if: always() | |
| run: | | |
| if ! az extension add --name log-analytics --allow-preview true --yes; then | |
| echo "::warning::Failed to install Azure CLI log-analytics extension; control plane log collection may be incomplete." | |
| fi | |
| - name: Wait for Log Analytics ingestion | |
| if: always() | |
| run: | | |
| TARGET=$(date -u +%s) | |
| DEADLINE=$((TARGET + 600)) # 10 min cap | |
| WORKSPACE_ID=$(az monitor log-analytics workspace list \ | |
| --resource-group "$AKS_RESOURCE_GROUP" \ | |
| --query "[0].customerId" -o tsv 2>/dev/null) || true | |
| if [ -z "$WORKSPACE_ID" ]; then | |
| echo "::warning::No Log Analytics workspace found; skipping ingestion wait." | |
| exit 0 | |
| fi | |
| while [ $(date +%s) -lt $DEADLINE ]; do | |
| LATEST=$(az monitor log-analytics query \ | |
| --workspace "$WORKSPACE_ID" \ | |
| --analytics-query "AzureDiagnostics | summarize max(TimeGenerated)" \ | |
| -o tsv 2>/dev/null | awk 'NF{print $NF; exit}') || true | |
| if [ -n "$LATEST" ]; then | |
| LATEST_SEC=$(date -u -d "$LATEST" +%s 2>/dev/null || echo 0) | |
| if [ "$LATEST_SEC" -ge "$TARGET" ]; then | |
| echo "Ingestion caught up: latest=$LATEST" | |
| exit 0 | |
| fi | |
| echo "Latest ingested: $LATEST (target: $(date -u -d @$TARGET +%FT%TZ)); waiting..." | |
| else | |
| echo "No ingestion timestamp yet; waiting..." | |
| fi | |
| sleep 30 | |
| done | |
| echo "::warning::Log Analytics ingestion did not catch up within 10m; log collection may be incomplete." | |
| - name: Save logs | |
| if: always() | |
| uses: ./.github/actions/save-logs | |
| with: | |
| suffix: -aks-${{ matrix.flavor }} | |
| distro: "aks" | |
| resource_group: ${{ env.AKS_RESOURCE_GROUP }} | |
| - name: Remove UDS Core | |
| if: always() | |
| run: set +e; for i in $(seq 0 2); do uds remove .github/bundles/aks/uds-bundle-uds-core-aks-nightly-*.tar.zst --confirm; if [ $? -eq 0 ]; then break; fi; done | |
| timeout-minutes: 10 | |
| continue-on-error: true | |
| - name: Remove IAC | |
| if: always() | |
| run: uds run -f tasks/iac.yaml destroy-iac --no-progress --set K8S_DISTRO=aks --set CLOUD=azure | |
| - name: Send Slack notification | |
| if: failure() && github.event_name == 'schedule' | |
| uses: ./.github/actions/slack-alert | |
| with: | |
| webhook-url: ${{ secrets.SLACK_WEBHOOK_URL }} | |
| channel: ${{ vars.SLACK_ALERT_CHANNEL }} |