Skip to content

feat(cli): update-state + targeted reset --package (closes #257) #644

feat(cli): update-state + targeted reset --package (closes #257)

feat(cli): update-state + targeted reset --package (closes #257) #644

Workflow file for this run

# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Build when operator code changes
name: Operator CI
on:
workflow_dispatch: {}
pull_request:
paths:
- operator/**/*.go
- operator/go.mod
- operator/go.sum
- operator/deps.mk
- operator/config/**
- containers/operator.Dockerfile
- .github/actions/**
- .github/workflows/operator-ci.yaml
- k8s-tests/**
- chart/**
push:
branches:
- main
tags:
- operator/*
paths:
- operator/**/*.go
- operator/go.mod
- operator/go.sum
- operator/deps.mk
- operator/config/**
- containers/operator.Dockerfile
- .github/actions/**
- .github/workflows/operator-ci.yaml
- k8s-tests/**
- chart/**
## these envs control the build and test process below
env:
REGISTRY: ghcr.io
IMAGE_NAME: ${{ github.repository }}
GO_VERSION: 1.26.3
DEBIAN_VERSION: trixie
# Opt all JS actions into Node 24. Node 20 is being phased out by GitHub
# Actions starting June 2026; this avoids the deprecation warnings without
# waiting for each action (checkout/setup-go/cache/upload-artifact) to
# publish a Node 24 release.
FORCE_JAVASCRIPT_ACTIONS_TO_NODE24: true
KIND_BINARY_VERSION: v0.31.0
PLATFORMS: linux/amd64,linux/arm64
PUSH_TO_REGISTRY: ${{ github.event_name != 'pull_request' || github.event.pull_request.head.repo.full_name == github.repository }}
jobs:
fetch-distroless-versions:
name: Fetch Latest Distroless Versions
runs-on: ubuntu-latest
outputs:
go-version: ${{ steps.fetch.outputs.go-version }}
steps:
- name: Fetch versions from NVIDIA CDN
id: fetch
run: |
# Fetch the versions.json file
VERSIONS_JSON=$(curl -fsSL https://developer.download.nvidia.com/distroless-oss/versions.json)
# Extract latest Go v4 version (format: "v4.0.1" -> "4.0.1")
GO_DISTROLESS_VERSION=$(echo "$VERSIONS_JSON" | jq -r '.v4.go.go' | sed 's/^v//')
echo "go-version=${GO_DISTROLESS_VERSION}" >> $GITHUB_OUTPUT
echo "📦 Go v4 Distroless Version: ${GO_DISTROLESS_VERSION}"
# Test operator across supported Kubernetes versions and test suites
tests:
runs-on: ubuntu-latest
strategy:
matrix:
# Standard E2E tests on all supported K8s versions
k8s-version: ["1.32.11", "1.33.7", "1.34.3", "1.35.0"]
test-suite: ["e2e"]
pool: ["core", "interrupt", "uninstall", "lifecycle"]
make-targets: ["setup-kind-cluster e2e-tests"]
include:
# Deployment policy tests on 15-node cluster (K8s 1.35 only)
- k8s-version: "1.35.0"
test-suite: deployment-policy
kind-config: k8s-tests/chainsaw/deployment-policy/kind-config.yaml
make-targets: "setup-kind-cluster deployment-policy-tests"
# CLI e2e tests on K8s 1.34 only
- k8s-version: "1.35.0"
test-suite: cli-e2e
make-targets: "setup-kind-cluster cli-e2e-tests"
- k8s-version: "1.35.0"
test-suite: unit-tests
make-targets: "vet lint unit-tests"
- k8s-version: "1.35.0"
test-suite: helm-tests
make-targets: "helm-tests"
fail-fast: false # Continue testing other versions if one fails
name: ${{ matrix.test-suite }}${{ matrix.pool && format('/{0}', matrix.pool) || '' }} (k8s-${{ matrix.k8s-version }})
steps:
- uses: actions/checkout@v6
with:
fetch-tags: true
fetch-depth: 0
- name: Setup Go ${{ env.GO_VERSION }}
uses: actions/setup-go@v6
with:
go-version: ${{ env.GO_VERSION }}
cache-dependency-path: operator/go.sum
- name: Log in to the Container registry
if: matrix.test-suite != 'unit-tests' # unit tests don't need a container registry login
uses: docker/login-action@v4
with:
registry: ${{ env.REGISTRY }}
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
# Cache build tools and dependencies for faster builds
- name: Restore cached Binaries
id: cached-binaries
uses: actions/cache/restore@v5
with:
key: ${{ env.GO_VERSION }}-${{ runner.os }}-${{ runner.arch }}-bin-${{ hashFiles('operator/deps.mk') }}
restore-keys: ${{ env.GO_VERSION }}-${{ runner.os }}-${{ runner.arch }}-bin-
path: |
${{ github.workspace }}/operator/bin
~/.cache/go-build
- name: Install dependencies
if: steps.cached-binaries.outputs.cache-hit != 'true'
run: |
cd operator
make install-deps
- name: Save cached Binaries
id: save-cached-binaries
if: steps.cached-binaries.outputs.cache-hit != 'true'
uses: actions/cache/save@v5
with:
key: ${{ env.GO_VERSION }}-${{ runner.os }}-${{ runner.arch }}-bin-${{ hashFiles('operator/deps.mk') }}
path: |
${{ github.workspace }}/operator/bin
~/.cache/go-build
- name: Create Kubernetes KinD Cluster v${{ matrix.k8s-version }}
if: matrix.test-suite != 'unit-tests' && matrix.test-suite != 'helm-tests' # unit tests don't need a cluster; helm-tests uses ctlptl
id: kind
uses: helm/kind-action@v1.14.0
with:
version: ${{ env.KIND_BINARY_VERSION }}
node_image: kindest/node:v${{ matrix.k8s-version }}
config: ${{ matrix.kind-config || 'operator/config/local-dev/kind-config.yaml' }}
cluster_name: kind
- name: Install kind
if: matrix.test-suite == 'helm-tests'
run: |
curl -fsSL -o /tmp/kind-linux-amd64 https://kind.sigs.k8s.io/dl/${KIND_BINARY_VERSION}/kind-linux-amd64
curl -fsSL -o /tmp/kind-linux-amd64.sha256sum https://kind.sigs.k8s.io/dl/${KIND_BINARY_VERSION}/kind-linux-amd64.sha256sum
cd /tmp && sha256sum -c kind-linux-amd64.sha256sum
sudo install /tmp/kind-linux-amd64 /usr/local/bin/kind
- name: Create ctlptl KinD Cluster
if: matrix.test-suite == 'helm-tests'
run: ./operator/bin/ctlptl apply -f operator/config/local-dev/ctlptl-config.yaml
- name: Set up ctlptl KinD Cluster
if: matrix.test-suite == 'helm-tests'
run: |
cd operator
make setup-kind-cluster
# Run test suite
- name: Run ${{ matrix.test-suite }} tests
env:
POOL: ${{ matrix.pool }}
run: |
cd operator
make ${{ matrix.make-targets }} merge-coverage
# Save coverage artifacts from any test suite that generates them
- name: Upload coverage artifact
if: hashFiles('operator/reporting/cover.out') != ''
uses: actions/upload-artifact@v5
with:
name: coverage-${{ matrix.test-suite }}${{ matrix.pool && format('-{0}', matrix.pool) || '' }}-k8s-${{ matrix.k8s-version }}
path: operator/reporting/cover.out
retention-days: 1
if-no-files-found: ignore
# Merge coverage from all test suites and upload to Coveralls
upload-coverage:
runs-on: ubuntu-latest
needs: [tests]
# Only upload coverage for PRs and main branch pushes, not for tags
if: success() && !startsWith(github.ref, 'refs/tags/')
steps:
- uses: actions/checkout@v6
- name: Setup Go ${{ env.GO_VERSION }}
uses: actions/setup-go@v6
with:
go-version: ${{ env.GO_VERSION }}
- name: Download all coverage artifacts
uses: actions/download-artifact@v5
with:
pattern: coverage-*
path: coverage-artifacts
merge-multiple: false
- name: Merge coverage files
run: |
cd operator
mkdir -p reporting
# Combine all coverage files
for file in ../coverage-artifacts/*/cover.out; do
if [ -f "$file" ]; then
echo "Merging coverage from $file"
cat "$file" >> reporting/all-cover.out
fi
done
# Create final merged coverage file
echo "mode: set" > reporting/cover.out
tail -n +2 reporting/all-cover.out | sed '/mode: set/d' >> reporting/cover.out
# Show total coverage
echo "📊 Total Combined Coverage:"
go tool cover -func reporting/cover.out | grep total
- name: Upload to Coveralls
uses: coverallsapp/github-action@v2.3.6
with:
github-token: ${{ secrets.GITHUB_TOKEN }}
file: operator/reporting/cover.out
format: golang
# Compute image tags and version metadata once for reuse
compute-metadata:
runs-on: ubuntu-latest
needs: [tests, fetch-distroless-versions]
outputs:
git-sha: ${{ steps.meta.outputs.git-sha }}
version: ${{ steps.meta.outputs.version }}
tags: ${{ steps.meta.outputs.tags }}
steps:
- uses: actions/checkout@v6
- name: Fetch all tags
run: git fetch --tags --force
- name: Compute metadata
id: meta
run: |
export GIT_SHA=$(git rev-parse --short ${{ github.sha }})
echo "git-sha=${GIT_SHA}" >> $GITHUB_OUTPUT
case ${{ github.ref_type }} in
branch)
export VERSION=$(git tag --list 'operator*' --sort=-v:refname | head -n 1 | cut -d/ -f2)+${GIT_SHA}
TAGS="${GIT_SHA} $(echo "${VERSION}" | tr + -)"
;;
tag)
export VERSION=$(echo "${{ github.ref_name }}" | cut -f 2 -d /)
TAGS="${GIT_SHA} ${VERSION} latest"
;;
*)
echo "Unknown ref type: ${{ github.ref_type }}"
exit 1
;;
esac
echo "version=${VERSION}" >> $GITHUB_OUTPUT
echo "tags=${TAGS}" >> $GITHUB_OUTPUT
echo "📦 Version: ${VERSION}"
echo "🏷️ Tags: ${TAGS}"
# Build container images on native architecture runners (much faster than QEMU)
build-operator:
runs-on: ${{ matrix.runner }}
needs: [compute-metadata, fetch-distroless-versions]
strategy:
matrix:
include:
- platform: linux/amd64
runner: ubuntu-latest
- platform: linux/arm64
runner: ubuntu-24.04-arm
permissions:
contents: read
packages: write
attestations: write
id-token: write
steps:
- name: Checkout repository
uses: actions/checkout@v6
- name: Fetch all tags
run: git fetch --tags --force
- name: Log in to the Container registry
uses: docker/login-action@v4
with:
registry: ${{ env.REGISTRY }}
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v4
# Build and tag container image for single platform on native hardware
- name: Build the operator container image (${{ matrix.platform }})
id: build
env:
GIT_SHA: ${{ needs.compute-metadata.outputs.git-sha }}
VERSION: ${{ needs.compute-metadata.outputs.version }}
run: |
cd operator
PLATFORM_TAG=$(echo "${{ matrix.platform }}" | tr '/' '-')
# Lowercase for Docker compliance
IMAGE_NAME=$(echo "${{env.IMAGE_NAME}}" | tr '[:upper:]' '[:lower:]')
REGISTRY=$(echo "${{env.REGISTRY}}" | tr '[:upper:]' '[:lower:]')
# Build platform-specific tags for all target tags
TAGS=""
for TAG in ${{ needs.compute-metadata.outputs.tags }}; do
TAGS="$TAGS -t ${REGISTRY}/${IMAGE_NAME}/operator:${TAG}-${PLATFORM_TAG}"
done
if [ "${PUSH_TO_REGISTRY}" = "true" ]; then
PUSH_OR_LOAD="--push"
else
PUSH_OR_LOAD="--load"
echo "Fork PR build: building image without pushing to registry"
fi
set -x
docker buildx build \
--build-arg GIT_SHA=${GIT_SHA} \
--build-arg VERSION=${VERSION} \
--build-arg GO_VERSION=${{ env.GO_VERSION }} \
--build-arg DEBIAN_VERSION=${{ env.DEBIAN_VERSION }} \
--build-arg DISTROLESS_VERSION=${{ needs.fetch-distroless-versions.outputs.go-version }} \
${PUSH_OR_LOAD} \
--platform ${{ matrix.platform }} \
--provenance=false \
${TAGS@L} \
--metadata-file=metadata.json \
-f ../containers/operator.Dockerfile .
echo "digest=$(cat metadata.json | jq -r .\"containerimage.digest\")" >> $GITHUB_OUTPUT
# Create multi-platform manifest from individual architecture builds
create-manifest:
if: github.event_name != 'pull_request' || github.event.pull_request.head.repo.full_name == github.repository
runs-on: ubuntu-latest
needs: [compute-metadata, build-operator]
outputs:
digest: ${{ steps.digest.outputs.digest }}
subject-name: ${{ steps.manifest.outputs.subject-name }}
permissions:
contents: read
packages: write
attestations: write
id-token: write
steps:
- name: Checkout repository
uses: actions/checkout@v6
- name: Log in to the Container registry
uses: docker/login-action@v4
with:
registry: ${{ env.REGISTRY }}
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v4
# Create and push multi-platform manifests, then delete platform-specific tags
- name: Create manifests and cleanup
id: manifest
run: |
sudo apt-get update && sudo apt-get install -y jq
# Lowercase for Docker compliance
IMAGE_NAME=$(echo "${{env.IMAGE_NAME}}" | tr '[:upper:]' '[:lower:]')
REGISTRY=$(echo "${{env.REGISTRY}}" | tr '[:upper:]' '[:lower:]')
# Create manifest for each tag combining amd64 and arm64 images
for TAG in ${{ needs.compute-metadata.outputs.tags }}; do
FULL_TAG="${REGISTRY}/${IMAGE_NAME}/operator:${TAG}"
echo "📦 Creating manifest for $FULL_TAG"
docker manifest create $FULL_TAG \
${FULL_TAG}-linux-amd64 \
${FULL_TAG}-linux-arm64
docker manifest push $FULL_TAG
echo "✅ Pushed $FULL_TAG"
done
echo "subject-name=${REGISTRY}/${IMAGE_NAME}/operator" >> $GITHUB_OUTPUT
# Note: Platform-specific tags (e.g., v1.0.0-linux-amd64) are left in registry
# as intermediate artifacts. Users should pull the multi-platform manifest tags.
# GitHub Container Registry doesn't easily support programmatic tag deletion.
echo "✅ Multi-platform manifests created successfully"
- name: Resolve multi-platform manifest digest
id: digest
uses: ./.github/actions/resolve-oci-digest
with:
image: ${{ steps.manifest.outputs.subject-name }}
tag: ${{ needs.compute-metadata.outputs.git-sha }}
- name: Sign GHCR operator image and attach SBOM
if: env.PUSH_TO_REGISTRY == 'true' && startsWith(github.ref, 'refs/tags/operator/')
uses: ./.github/actions/cosign-sign-sbom
with:
subject-name: ${{ steps.manifest.outputs.subject-name }}
subject-digest: ${{ steps.digest.outputs.digest }}
- name: Attest GHCR operator provenance
if: env.PUSH_TO_REGISTRY == 'true' && startsWith(github.ref, 'refs/tags/operator/')
uses: actions/attest-build-provenance@a2bbfa25375fe432b6a289bc6b6cd05ecd0c4c32 # v4.1.0
with:
subject-name: ${{ steps.manifest.outputs.subject-name }}
subject-digest: ${{ steps.digest.outputs.digest }}
push-to-registry: true
- name: Verify GHCR operator signature and attestations
if: env.PUSH_TO_REGISTRY == 'true' && startsWith(github.ref, 'refs/tags/operator/')
uses: ./.github/actions/cosign-verify-release
with:
subject-name: ${{ steps.manifest.outputs.subject-name }}
subject-digest: ${{ steps.digest.outputs.digest }}
certificate-identity-regexp: ^https://github.com/${{ github.repository }}/\.github/workflows/operator-ci\.yaml@refs/tags/operator/.*$
# Single required check for branch protection. operator-ci, agent-ci,
# and lint-ci all publish a check named `ci-gate` — GitHub composes
# same-named required checks, so every ci-gate that posts must pass.
# lint-ci is the always-runs workflow (no path filter) so a ci-gate
# always appears on every PR; this workflow's ci-gate posts only when
# operator-ci's path filter triggers.
#
# The if: always() + jq result==success pattern is the standard fix
# for GitHub Actions' "skipped == green" pitfall.
#
# needs: only includes jobs that ALWAYS run. upload-coverage skips on
# tag builds and create-manifest skips on fork PRs — including either
# would make the gate fail red on legitimate skips.
ci-gate:
name: ci-gate
needs: [tests, build-operator]
if: always()
runs-on: ubuntu-latest
steps:
- name: Verify all required jobs passed
run: |
results='${{ toJSON(needs) }}'
echo "$results"
echo "$results" | jq -e 'to_entries | all(.value.result == "success")'