From a3a3273d74c62b3873ed206663cf8c1dc0033488 Mon Sep 17 00:00:00 2001 From: Brian Lockwood Date: Tue, 19 May 2026 13:07:26 -0700 Subject: [PATCH 1/5] ci: publish keyless release attestations (#232) (#240) * ci: publish keyless release attestations * ci: drop nvcr release attestations * ci: narrow release attestation identity * ci: gate attestations to release tags * docs: show immutable release subjects * ci: attest helm chart releases --------- Signed-off-by: AnouarMohamed Co-authored-by: Anouar Mohamed --- .github/actions/cosign-sign-sbom/action.yml | 108 +++++++++++++++ .../actions/cosign-verify-release/action.yml | 74 ++++++++++ .github/actions/resolve-oci-digest/action.yml | 98 ++++++++++++++ .github/workflows/agent-ci.yaml | 50 +++++-- .github/workflows/operator-ci.yaml | 48 +++++-- .github/workflows/release.yml | 46 ++++++- docs/release-process.md | 126 +++++++++++++++++- 7 files changed, 527 insertions(+), 23 deletions(-) create mode 100644 .github/actions/cosign-sign-sbom/action.yml create mode 100644 .github/actions/cosign-verify-release/action.yml create mode 100644 .github/actions/resolve-oci-digest/action.yml diff --git a/.github/actions/cosign-sign-sbom/action.yml b/.github/actions/cosign-sign-sbom/action.yml new file mode 100644 index 00000000..f021e4da --- /dev/null +++ b/.github/actions/cosign-sign-sbom/action.yml @@ -0,0 +1,108 @@ +# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +name: Cosign sign and SBOM attest +description: Sign an OCI subject and attach a CycloneDX SBOM attestation. + +inputs: + subject-name: + description: OCI repository without tag or digest. + required: true + subject-digest: + description: OCI digest in sha256: form. + required: true + sbom-source: + description: Source for Syft SBOM generation. Defaults to subject-name@subject-digest. + required: false + default: "" + syft-version: + description: Syft version used to generate CycloneDX SBOMs. + required: false + default: "v1.38.0" + +outputs: + sbom-file: + description: Generated CycloneDX SBOM file. + value: ${{ steps.sbom.outputs.sbom-file }} + +runs: + using: composite + steps: + - name: Install cosign + uses: sigstore/cosign-installer@6f9f17788090df1f26f669e9d70d6ae9567deba6 # v4.1.2 + + - name: Install Syft + id: syft + uses: anchore/sbom-action/download-syft@e22c389904149dbc22b58101806040fa8d37a610 # v0.24.0 + with: + syft-version: ${{ inputs.syft-version }} + + - name: Generate CycloneDX SBOM + id: sbom + shell: bash + env: + SUBJECT_NAME: ${{ inputs.subject-name }} + SUBJECT_DIGEST: ${{ inputs.subject-digest }} + SBOM_SOURCE: ${{ inputs.sbom-source }} + SYFT: ${{ steps.syft.outputs.cmd }} + run: | + set -euo pipefail + + if [ -z "${SUBJECT_NAME}" ]; then + echo "::error::subject-name is required" + exit 1 + fi + if ! [[ "${SUBJECT_DIGEST}" =~ ^sha256:[a-f0-9]{64}$ ]]; then + echo "::error::subject-digest must be in sha256:<64 hex> form" + exit 1 + fi + if [ -z "${SYFT}" ]; then + echo "::error::Syft command was not provided by the installer" + exit 1 + fi + + subject="${SUBJECT_NAME}@${SUBJECT_DIGEST}" + if [ -z "${SBOM_SOURCE}" ]; then + SBOM_SOURCE="${subject}" + fi + + work_dir="${RUNNER_TEMP}/nodewright-sboms" + mkdir -p "${work_dir}" + safe_name="$(printf '%s' "${SUBJECT_NAME}" | tr '/:@' '---' | tr -c '[:alnum:]._-' '-')" + sbom_file="${work_dir}/${safe_name}.cyclonedx.json" + + "${SYFT}" "${SBOM_SOURCE}" -o "cyclonedx-json=${sbom_file}" + jq empty "${sbom_file}" + echo "sbom-file=${sbom_file}" >> "${GITHUB_OUTPUT}" + + - name: Sign subject + shell: bash + env: + SUBJECT_NAME: ${{ inputs.subject-name }} + SUBJECT_DIGEST: ${{ inputs.subject-digest }} + run: | + set -euo pipefail + cosign sign --yes "${SUBJECT_NAME}@${SUBJECT_DIGEST}" + + - name: Attach CycloneDX SBOM attestation + shell: bash + env: + SUBJECT_NAME: ${{ inputs.subject-name }} + SUBJECT_DIGEST: ${{ inputs.subject-digest }} + SBOM_FILE: ${{ steps.sbom.outputs.sbom-file }} + run: | + set -euo pipefail + cosign attest --yes --predicate "${SBOM_FILE}" --type cyclonedx "${SUBJECT_NAME}@${SUBJECT_DIGEST}" diff --git a/.github/actions/cosign-verify-release/action.yml b/.github/actions/cosign-verify-release/action.yml new file mode 100644 index 00000000..e23cfa20 --- /dev/null +++ b/.github/actions/cosign-verify-release/action.yml @@ -0,0 +1,74 @@ +# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +name: Cosign verify release subject +description: Verify release signature, CycloneDX SBOM attestation, and SLSA v1 provenance. + +inputs: + subject-name: + description: OCI repository without tag or digest. + required: true + subject-digest: + description: OCI digest in sha256: form. + required: true + certificate-identity-regexp: + description: Expected Fulcio certificate identity regexp. + required: true + certificate-oidc-issuer: + description: Expected Fulcio OIDC issuer. + required: false + default: "https://token.actions.githubusercontent.com" + +runs: + using: composite + steps: + - name: Install cosign + uses: sigstore/cosign-installer@6f9f17788090df1f26f669e9d70d6ae9567deba6 # v4.1.2 + + - name: Verify release subject + shell: bash + env: + SUBJECT_NAME: ${{ inputs.subject-name }} + SUBJECT_DIGEST: ${{ inputs.subject-digest }} + CERTIFICATE_IDENTITY_REGEXP: ${{ inputs.certificate-identity-regexp }} + CERTIFICATE_OIDC_ISSUER: ${{ inputs.certificate-oidc-issuer }} + run: | + set -euo pipefail + + if [ -z "${SUBJECT_NAME}" ]; then + echo "::error::subject-name is required" + exit 1 + fi + if ! [[ "${SUBJECT_DIGEST}" =~ ^sha256:[a-f0-9]{64}$ ]]; then + echo "::error::subject-digest must be in sha256:<64 hex> form" + exit 1 + fi + + subject="${SUBJECT_NAME}@${SUBJECT_DIGEST}" + cosign verify \ + --certificate-identity-regexp "${CERTIFICATE_IDENTITY_REGEXP}" \ + --certificate-oidc-issuer "${CERTIFICATE_OIDC_ISSUER}" \ + "${subject}" + cosign verify-attestation \ + --certificate-identity-regexp "${CERTIFICATE_IDENTITY_REGEXP}" \ + --certificate-oidc-issuer "${CERTIFICATE_OIDC_ISSUER}" \ + --type cyclonedx \ + "${subject}" + cosign verify-attestation \ + --certificate-identity-regexp "${CERTIFICATE_IDENTITY_REGEXP}" \ + --certificate-oidc-issuer "${CERTIFICATE_OIDC_ISSUER}" \ + --type https://slsa.dev/provenance/v1 \ + "${subject}" diff --git a/.github/actions/resolve-oci-digest/action.yml b/.github/actions/resolve-oci-digest/action.yml new file mode 100644 index 00000000..5014fa6e --- /dev/null +++ b/.github/actions/resolve-oci-digest/action.yml @@ -0,0 +1,98 @@ +# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +name: Resolve OCI digest +description: Resolve the immutable digest for an OCI image, manifest list, or chart artifact. + +inputs: + image: + description: OCI repository without tag or digest. + required: true + tag: + description: OCI tag to resolve. + required: true + attempts: + description: Number of attempts before failing. + required: false + default: "1" + retry-delay-seconds: + description: Delay between attempts. + required: false + default: "15" + +outputs: + digest: + description: Resolved digest in sha256: form. + value: ${{ steps.resolve.outputs.digest }} + subject: + description: Immutable OCI subject reference. + value: ${{ steps.resolve.outputs.subject }} + +runs: + using: composite + steps: + - name: Resolve digest + id: resolve + shell: bash + env: + IMAGE: ${{ inputs.image }} + TAG: ${{ inputs.tag }} + ATTEMPTS: ${{ inputs.attempts }} + RETRY_DELAY_SECONDS: ${{ inputs.retry-delay-seconds }} + run: | + set -euo pipefail + + if [ -z "${IMAGE}" ]; then + echo "::error::image is required" + exit 1 + fi + if [ -z "${TAG}" ]; then + echo "::error::tag is required" + exit 1 + fi + if ! [[ "${ATTEMPTS}" =~ ^[0-9]+$ ]] || [ "${ATTEMPTS}" -lt 1 ]; then + echo "::error::attempts must be a positive integer" + exit 1 + fi + if ! [[ "${RETRY_DELAY_SECONDS}" =~ ^[0-9]+$ ]]; then + echo "::error::retry-delay-seconds must be a non-negative integer" + exit 1 + fi + + ref="${IMAGE}:${TAG}" + digest="" + err_file="${RUNNER_TEMP}/resolve-oci-digest.err" + for attempt in $(seq 1 "${ATTEMPTS}"); do + echo "Resolving ${ref} (attempt ${attempt}/${ATTEMPTS})" + set +e + digest="$(docker buildx imagetools inspect "${ref}" --format '{{json .Manifest}}' 2>"${err_file}" | jq -r '.digest // empty')" + status=$? + set -e + + if [ "${status}" -eq 0 ] && [[ "${digest}" =~ ^sha256:[a-f0-9]{64}$ ]]; then + echo "digest=${digest}" >> "${GITHUB_OUTPUT}" + echo "subject=${IMAGE}@${digest}" >> "${GITHUB_OUTPUT}" + exit 0 + fi + + cat "${err_file}" >&2 || true + if [ "${attempt}" -lt "${ATTEMPTS}" ]; then + sleep "${RETRY_DELAY_SECONDS}" + fi + done + + echo "::error::failed to resolve digest for ${ref}" + exit 1 diff --git a/.github/workflows/agent-ci.yaml b/.github/workflows/agent-ci.yaml index 97ec7f6a..b83a6252 100644 --- a/.github/workflows/agent-ci.yaml +++ b/.github/workflows/agent-ci.yaml @@ -23,6 +23,7 @@ on: - agent/** - '!agent/go/**' - containers/agent.Dockerfile + - .github/actions/** - .github/workflows/agent-ci.yaml push: branches: @@ -33,6 +34,7 @@ on: - agent/** - '!agent/go/**' - containers/agent.Dockerfile + - .github/actions/** - .github/workflows/agent-ci.yaml env: REGISTRY: ghcr.io @@ -215,18 +217,27 @@ jobs: if: github.event_name != 'pull_request' || github.event.pull_request.head.repo.full_name == github.repository runs-on: ubuntu-latest needs: [compute-metadata, build-agent] + outputs: + digest: ${{ steps.digest.outputs.digest }} + subject-name: ${{ steps.manifest.outputs.subject-name }} permissions: contents: read packages: write attestations: write id-token: write steps: + - name: Checkout repository + uses: actions/checkout@v6 + - name: Log in to the Container registry uses: docker/login-action@v4 with: registry: ${{ env.REGISTRY }} username: ${{ github.actor }} password: ${{ secrets.GITHUB_TOKEN }} + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v4 # Create and push multi-platform manifests, then delete platform-specific tags - name: Create manifests and cleanup @@ -246,27 +257,44 @@ jobs: docker manifest push $FULL_TAG echo "✅ Pushed $FULL_TAG" done - - # Get digest of the main tag (git sha) for attestation - MAIN_TAG="${REGISTRY}/${IMAGE_NAME}/agent:${{ needs.compute-metadata.outputs.git-sha }}" - DIGEST=$(docker manifest inspect $MAIN_TAG | jq -r '.manifests[0].digest') - echo "digest=$DIGEST" >> $GITHUB_OUTPUT + echo "subject-name=${REGISTRY}/${IMAGE_NAME}/agent" >> $GITHUB_OUTPUT # Note: Platform-specific tags (e.g., v1.0.0-linux-amd64) are left in registry # as intermediate artifacts. Users should pull the multi-platform manifest tags. # GitHub Container Registry doesn't easily support programmatic tag deletion. echo "✅ Multi-platform manifests created successfully" + + - name: Resolve multi-platform manifest digest + id: digest + uses: ./.github/actions/resolve-oci-digest + with: + image: ${{ steps.manifest.outputs.subject-name }} + tag: ${{ needs.compute-metadata.outputs.git-sha }} - # Generate supply chain security attestation for the multi-platform manifest - - name: Generate artifact attestation - if: env.PUSH_TO_REGISTRY == 'true' - uses: actions/attest-build-provenance@v4 + - name: Sign GHCR agent image and attach SBOM + if: env.PUSH_TO_REGISTRY == 'true' && startsWith(github.ref, 'refs/tags/agent/') + uses: ./.github/actions/cosign-sign-sbom with: subject-name: ${{ steps.manifest.outputs.subject-name }} - subject-digest: ${{ steps.manifest.outputs.digest }} + subject-digest: ${{ steps.digest.outputs.digest }} + + - name: Attest GHCR agent provenance + if: env.PUSH_TO_REGISTRY == 'true' && startsWith(github.ref, 'refs/tags/agent/') + uses: actions/attest-build-provenance@a2bbfa25375fe432b6a289bc6b6cd05ecd0c4c32 # v4.1.0 + with: + subject-name: ${{ steps.manifest.outputs.subject-name }} + subject-digest: ${{ steps.digest.outputs.digest }} push-to-registry: true - + + - name: Verify GHCR agent signature and attestations + if: env.PUSH_TO_REGISTRY == 'true' && startsWith(github.ref, 'refs/tags/agent/') + uses: ./.github/actions/cosign-verify-release + with: + subject-name: ${{ steps.manifest.outputs.subject-name }} + subject-digest: ${{ steps.digest.outputs.digest }} + certificate-identity-regexp: ^https://github.com/${{ github.repository }}/\.github/workflows/agent-ci\.yaml@refs/tags/agent/.*$ + operator-agent-tests: if: github.event_name != 'pull_request' || github.event.pull_request.head.repo.full_name == github.repository name: Operator Agent Integration Tests diff --git a/.github/workflows/operator-ci.yaml b/.github/workflows/operator-ci.yaml index 6cfe03d3..52b570d4 100644 --- a/.github/workflows/operator-ci.yaml +++ b/.github/workflows/operator-ci.yaml @@ -27,6 +27,7 @@ on: - operator/deps.mk - operator/config/** - containers/operator.Dockerfile + - .github/actions/** - .github/workflows/operator-ci.yaml - k8s-tests/** - chart/** @@ -42,6 +43,7 @@ on: - operator/deps.mk - operator/config/** - containers/operator.Dockerfile + - .github/actions/** - .github/workflows/operator-ci.yaml - k8s-tests/** - chart/** @@ -349,18 +351,27 @@ jobs: if: github.event_name != 'pull_request' || github.event.pull_request.head.repo.full_name == github.repository runs-on: ubuntu-latest needs: [compute-metadata, build-operator] + outputs: + digest: ${{ steps.digest.outputs.digest }} + subject-name: ${{ steps.manifest.outputs.subject-name }} permissions: contents: read packages: write attestations: write id-token: write steps: + - name: Checkout repository + uses: actions/checkout@v6 + - name: Log in to the Container registry uses: docker/login-action@v4 with: registry: ${{ env.REGISTRY }} username: ${{ github.actor }} password: ${{ secrets.GITHUB_TOKEN }} + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v4 # Create and push multi-platform manifests, then delete platform-specific tags - name: Create manifests and cleanup @@ -382,27 +393,44 @@ jobs: docker manifest push $FULL_TAG echo "✅ Pushed $FULL_TAG" done - - # Get digest of the main tag (git sha) for attestation - MAIN_TAG="${REGISTRY}/${IMAGE_NAME}/operator:${{ needs.compute-metadata.outputs.git-sha }}" - DIGEST=$(docker manifest inspect $MAIN_TAG | jq -r '.manifests[0].digest') - echo "digest=$DIGEST" >> $GITHUB_OUTPUT + echo "subject-name=${REGISTRY}/${IMAGE_NAME}/operator" >> $GITHUB_OUTPUT # Note: Platform-specific tags (e.g., v1.0.0-linux-amd64) are left in registry # as intermediate artifacts. Users should pull the multi-platform manifest tags. # GitHub Container Registry doesn't easily support programmatic tag deletion. echo "✅ Multi-platform manifests created successfully" + + - name: Resolve multi-platform manifest digest + id: digest + uses: ./.github/actions/resolve-oci-digest + with: + image: ${{ steps.manifest.outputs.subject-name }} + tag: ${{ needs.compute-metadata.outputs.git-sha }} - # Generate supply chain security attestation for the multi-platform manifest - - name: Generate artifact attestation - if: env.PUSH_TO_REGISTRY == 'true' - uses: actions/attest-build-provenance@v4 + - name: Sign GHCR operator image and attach SBOM + if: env.PUSH_TO_REGISTRY == 'true' && startsWith(github.ref, 'refs/tags/operator/') + uses: ./.github/actions/cosign-sign-sbom + with: + subject-name: ${{ steps.manifest.outputs.subject-name }} + subject-digest: ${{ steps.digest.outputs.digest }} + + - name: Attest GHCR operator provenance + if: env.PUSH_TO_REGISTRY == 'true' && startsWith(github.ref, 'refs/tags/operator/') + uses: actions/attest-build-provenance@a2bbfa25375fe432b6a289bc6b6cd05ecd0c4c32 # v4.1.0 with: subject-name: ${{ steps.manifest.outputs.subject-name }} - subject-digest: ${{ steps.manifest.outputs.digest }} + subject-digest: ${{ steps.digest.outputs.digest }} push-to-registry: true + - name: Verify GHCR operator signature and attestations + if: env.PUSH_TO_REGISTRY == 'true' && startsWith(github.ref, 'refs/tags/operator/') + uses: ./.github/actions/cosign-verify-release + with: + subject-name: ${{ steps.manifest.outputs.subject-name }} + subject-digest: ${{ steps.digest.outputs.digest }} + certificate-identity-regexp: ^https://github.com/${{ github.repository }}/\.github/workflows/operator-ci\.yaml@refs/tags/operator/.*$ + # Single required check for branch protection. operator-ci, agent-ci, # and lint-ci all publish a check named `ci-gate` — GitHub composes # same-named required checks, so every ci-gate that posts must pass. diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 72596039..a557e7e2 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -138,6 +138,8 @@ jobs: permissions: contents: read packages: write + attestations: write + id-token: write steps: - name: Checkout repository uses: actions/checkout@v4 @@ -148,13 +150,17 @@ jobs: version: v3.16.2 - name: Verify chart version matches tag + id: chart-metadata run: | + CHART_NAME=$(yq '.name' chart/Chart.yaml) TAG_VERSION="${GITHUB_REF_NAME#chart/}" CHART_VERSION=$(yq '.version' chart/Chart.yaml) if [ "${TAG_VERSION}" != "${CHART_VERSION}" ]; then echo "Tag version ${TAG_VERSION} does not match chart/Chart.yaml version ${CHART_VERSION}" >&2 exit 1 fi + echo "name=${CHART_NAME}" >> "${GITHUB_OUTPUT}" + echo "version=${CHART_VERSION}" >> "${GITHUB_OUTPUT}" - name: Package chart run: | @@ -167,5 +173,43 @@ jobs: | helm registry login ghcr.io --username "${{ github.actor }}" --password-stdin - name: Push chart to ghcr.io + id: push-chart + env: + CHART_NAME: ${{ steps.chart-metadata.outputs.name }} + CHART_VERSION: ${{ steps.chart-metadata.outputs.version }} run: | - helm push dist/*.tgz oci://ghcr.io/nvidia/nodewright/charts + set -euo pipefail + + chart_repo="ghcr.io/nvidia/nodewright/charts" + chart_subject="${chart_repo}/${CHART_NAME}" + push_output="$(helm push "dist/${CHART_NAME}-${CHART_VERSION}.tgz" "oci://${chart_repo}")" + printf '%s\n' "${push_output}" + chart_digest="$(awk '/^Digest:/ {print $2}' <<< "${push_output}")" + if ! [[ "${chart_digest}" =~ ^sha256:[a-f0-9]{64}$ ]]; then + echo "::error::failed to parse Helm chart digest from helm push output" + exit 1 + fi + + echo "subject-name=${chart_subject}" >> "${GITHUB_OUTPUT}" + echo "digest=${chart_digest}" >> "${GITHUB_OUTPUT}" + + - name: Sign GHCR Helm chart and attach SBOM + uses: ./.github/actions/cosign-sign-sbom + with: + subject-name: ${{ steps.push-chart.outputs.subject-name }} + subject-digest: ${{ steps.push-chart.outputs.digest }} + sbom-source: chart + + - name: Attest GHCR Helm chart provenance + uses: actions/attest-build-provenance@a2bbfa25375fe432b6a289bc6b6cd05ecd0c4c32 # v4.1.0 + with: + subject-name: ${{ steps.push-chart.outputs.subject-name }} + subject-digest: ${{ steps.push-chart.outputs.digest }} + push-to-registry: true + + - name: Verify GHCR Helm chart signature and attestations + uses: ./.github/actions/cosign-verify-release + with: + subject-name: ${{ steps.push-chart.outputs.subject-name }} + subject-digest: ${{ steps.push-chart.outputs.digest }} + certificate-identity-regexp: ^https://github.com/${{ github.repository }}/\.github/workflows/release\.yml@refs/tags/chart/.*$ diff --git a/docs/release-process.md b/docs/release-process.md index 7d544bee..42911809 100644 --- a/docs/release-process.md +++ b/docs/release-process.md @@ -192,9 +192,133 @@ Note: **After tagging:** - [ ] CI/CD pipeline completes -- [ ] Images published successfully +- [ ] Images and chart artifacts published successfully - [ ] Test deployment with new version +### Verify release signatures and attestations + +Release workflows publish keyless Sigstore signatures, CycloneDX SBOM attestations, and SLSA v1 provenance attestations for GHCR image and Helm chart release artifacts. + +Prerequisites: + +- Docker buildx (`docker buildx version`) +- cosign (`cosign version`) +- jq (`jq --version`) + +The expected OIDC issuer is: + +```bash +https://token.actions.githubusercontent.com +``` + +The expected certificate identity must match the specific component release workflow identity on that component's tag refs. + +For operator images: + +```bash +^https://github.com/NVIDIA/nodewright/\.github/workflows/operator-ci\.yaml@refs/tags/operator/.*$ +``` + +For agent images: + +```bash +^https://github.com/NVIDIA/nodewright/\.github/workflows/agent-ci\.yaml@refs/tags/agent/.*$ +``` + +For Helm chart artifacts: + +```bash +^https://github.com/NVIDIA/nodewright/\.github/workflows/release\.yml@refs/tags/chart/.*$ +``` + +Resolve the artifact digest first, then verify by immutable digest: + +#### Operator image + +```bash +IMAGE=ghcr.io/nvidia/nodewright/operator +TAG=v0.15.0 +DIGEST=$(docker buildx imagetools inspect "${IMAGE}:${TAG}" --format '{{json .Manifest}}' | jq -r '.digest') +SUBJECT="${IMAGE}@${DIGEST}" +IDENTITY='^https://github.com/NVIDIA/nodewright/\.github/workflows/operator-ci\.yaml@refs/tags/operator/.*$' +ISSUER='https://token.actions.githubusercontent.com' + +cosign verify \ + --certificate-identity-regexp "${IDENTITY}" \ + --certificate-oidc-issuer "${ISSUER}" \ + "${SUBJECT}" +cosign verify-attestation \ + --certificate-identity-regexp "${IDENTITY}" \ + --certificate-oidc-issuer "${ISSUER}" \ + --type cyclonedx \ + "${SUBJECT}" +cosign verify-attestation \ + --certificate-identity-regexp "${IDENTITY}" \ + --certificate-oidc-issuer "${ISSUER}" \ + --type https://slsa.dev/provenance/v1 \ + "${SUBJECT}" +``` + +#### Agent image + +```bash +IMAGE=ghcr.io/nvidia/nodewright/agent +TAG=v6.4.0 +DIGEST=$(docker buildx imagetools inspect "${IMAGE}:${TAG}" --format '{{json .Manifest}}' | jq -r '.digest') +SUBJECT="${IMAGE}@${DIGEST}" +IDENTITY='^https://github.com/NVIDIA/nodewright/\.github/workflows/agent-ci\.yaml@refs/tags/agent/.*$' +ISSUER='https://token.actions.githubusercontent.com' + +cosign verify \ + --certificate-identity-regexp "${IDENTITY}" \ + --certificate-oidc-issuer "${ISSUER}" \ + "${SUBJECT}" +cosign verify-attestation \ + --certificate-identity-regexp "${IDENTITY}" \ + --certificate-oidc-issuer "${ISSUER}" \ + --type cyclonedx \ + "${SUBJECT}" +cosign verify-attestation \ + --certificate-identity-regexp "${IDENTITY}" \ + --certificate-oidc-issuer "${ISSUER}" \ + --type https://slsa.dev/provenance/v1 \ + "${SUBJECT}" +``` + +#### Helm chart + +```bash +CHART=ghcr.io/nvidia/nodewright/charts/skyhook-operator +TAG=v0.15.1 +DIGEST=$(docker buildx imagetools inspect "${CHART}:${TAG}" --format '{{json .Manifest}}' | jq -r '.digest') +SUBJECT="${CHART}@${DIGEST}" +IDENTITY='^https://github.com/NVIDIA/nodewright/\.github/workflows/release\.yml@refs/tags/chart/.*$' +ISSUER='https://token.actions.githubusercontent.com' + +cosign verify \ + --certificate-identity-regexp "${IDENTITY}" \ + --certificate-oidc-issuer "${ISSUER}" \ + "${SUBJECT}" +cosign verify-attestation \ + --certificate-identity-regexp "${IDENTITY}" \ + --certificate-oidc-issuer "${ISSUER}" \ + --type cyclonedx \ + "${SUBJECT}" +cosign verify-attestation \ + --certificate-identity-regexp "${IDENTITY}" \ + --certificate-oidc-issuer "${ISSUER}" \ + --type https://slsa.dev/provenance/v1 \ + "${SUBJECT}" +``` + +Use the same command pattern for each released artifact: + +| Artifact | Immutable OCI subject | +|----------|-----------------------| +| GHCR operator image | `ghcr.io/nvidia/nodewright/operator@sha256:` | +| GHCR agent image | `ghcr.io/nvidia/nodewright/agent@sha256:` | +| GHCR Helm chart | `ghcr.io/nvidia/nodewright/charts/skyhook-operator@sha256:` | + ## Common Commands ```bash From 04a5b1a70667d52d02516bc0490b6aa94ca59406 Mon Sep 17 00:00:00 2001 From: Brian Lockwood Date: Tue, 19 May 2026 13:07:49 -0700 Subject: [PATCH 2/5] chore(docs): update docs around release and location of helm chart (#237) (#239) --- .claude/CLAUDE.md | 2 +- .github/workflows/lint-ci.yaml | 64 ++++- README.md | 28 ++- chart/Chart.yaml | 8 +- chart/README.md | 10 +- chart/templates/_helpers.tpl | 19 +- chart/values.yaml | 10 +- docs/release-process.md | 233 ++++++++++++------ .../helm-chart-test/assert-no-schedule.yaml | 10 +- .../helm-chart-test/assert-scheduled.yaml | 10 +- .../assert-no-schedule.yaml | 7 +- .../assert-scheduled.yaml | 7 +- .../assert-override-resources.yaml | 6 +- .../assert-scaled-resources.yaml | 6 +- .../helm-webhook-test/assert-scheduled.yaml | 6 +- 15 files changed, 271 insertions(+), 155 deletions(-) diff --git a/.claude/CLAUDE.md b/.claude/CLAUDE.md index 9648166f..89fbe227 100644 --- a/.claude/CLAUDE.md +++ b/.claude/CLAUDE.md @@ -8,7 +8,7 @@ The canonical file lives at `.claude/CLAUDE.md`. The root-level `AGENTS.md` is a Skyhook (being renamed to NodeWright) is a Kubernetes-aware package manager for safely modifying host infrastructure at scale. It coordinates the node lifecycle (cordon → drain → apply package → interrupt/reboot → uncordon) as controlled rollouts gated by interruption budgets and deployment policies. -Rename status: the project is transitioning from Skyhook → NodeWright. Public names (CRDs `skyhook.nvidia.com/v1alpha1`, Helm chart `skyhook-operator`, CLI `kubectl skyhook`, namespace `skyhook`) still use `skyhook`. The Go module, however, is already `github.com/NVIDIA/nodewright/operator` — don't "fix" imports back to skyhook. +Rename status: the project is transitioning from Skyhook → NodeWright. Most public names (CRDs `skyhook.nvidia.com/v1alpha1`, CLI `kubectl skyhook`, namespace `skyhook`) still use `skyhook`. Components already moved to `nodewright`: the Go module (`github.com/NVIDIA/nodewright/operator`), the Helm chart (`name: nodewright`, distributed at `oci://ghcr.io/nvidia/nodewright/charts/nodewright`), and the operator image (`ghcr.io/nvidia/nodewright/operator`). The agent image is still at `ghcr.io/nvidia/skyhook/agent` pending its migration. Don't "fix" `nodewright` references back to `skyhook`, and don't preemptively rename what hasn't moved yet. ## Required reading: `docs/` (load every session) diff --git a/.github/workflows/lint-ci.yaml b/.github/workflows/lint-ci.yaml index 1e694843..d574dc8a 100644 --- a/.github/workflows/lint-ci.yaml +++ b/.github/workflows/lint-ci.yaml @@ -88,25 +88,69 @@ jobs: !**/CHANGELOG.md # NVIDIA-managed template; not for us to lint. !SECURITY.md - - name: lychee link check + - name: Install lychee if: always() - env: - # GitHub token avoids unauthenticated rate-limiting on github.com URLs. - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} run: | set -euo pipefail curl -sSfL "https://github.com/lycheeverse/lychee/releases/download/lychee-v${LYCHEE_VERSION}/lychee-x86_64-unknown-linux-gnu.tar.gz" -o /tmp/lychee.tgz echo "${LYCHEE_SHA256} /tmp/lychee.tgz" | sha256sum -c - tar -xzf /tmp/lychee.tgz -C /tmp lychee - # THIRD_PARTY_NOTICES.md files are generated by 'make notices'; license - # texts contain upstream URLs we don't control. Same rationale as the - # vendor/ excludes. + + # Persist lychee's URL cache across runs so transient external + # failures don't re-fail every PR. Cache is keyed weekly so dead + # links still surface within ~7 days. + - name: Restore lychee cache + if: always() + uses: actions/cache@v4 + with: + path: .lycheecache + key: lychee-${{ github.run_id }} + restore-keys: | + lychee- + + # Pass 1: internal links only (relative paths, anchors, file://). + # These can never be flaky — if this fails, the PR really did break + # a link. --offline tells lychee to skip http(s) entirely. + - name: lychee — internal links (blocking) + if: always() + run: | + /tmp/lychee \ + --no-progress \ + --offline \ + --exclude-file ci/lycheeignore \ + --exclude-path THIRD_PARTY_NOTICES.md \ + --exclude-path agent/THIRD_PARTY_NOTICES.md \ + --exclude-path agent/vendor \ + --exclude-path operator/THIRD_PARTY_NOTICES.md \ + --exclude-path operator/vendor \ + './**/*.md' + + # Pass 2: external links. Non-blocking — a `kubernetes.io` blip or + # GitHub rate-limit shouldn't block a docs PR. 4xx/5xx/429 are + # accepted as "not definitively broken" since they're commonly + # transient. Real dead links surface as ❌ in the step log; ci-gate + # below ignores this step's result. + - name: lychee — external links (advisory) + if: always() + continue-on-error: true + # `--retry-wait-time 5` is a constant minimum delay between retries + # (exponential backoff in lychee is reserved for HTTP 429); with + # `--max-retries 8` that's roughly 8×5s = 40s per failing URL. The + # step timeout is a runaway guard for the case where many URLs all + # retry concurrently and/or are stuck timing out at `--timeout 20`. + timeout-minutes: 15 + env: + # GitHub token avoids unauthenticated rate-limiting on github.com URLs. + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + run: | /tmp/lychee \ --no-progress \ --cache \ - --max-cache-age 1d \ - --max-retries 5 \ - --retry-wait-time 3 \ + --max-cache-age 7d \ + --max-retries 8 \ + --retry-wait-time 5 \ + --timeout 20 \ + --accept "200..=299,403,429,500..=599" \ --exclude-file ci/lycheeignore \ --exclude-path THIRD_PARTY_NOTICES.md \ --exclude-path agent/THIRD_PARTY_NOTICES.md \ diff --git a/README.md b/README.md index 6f4bea55..1338381b 100644 --- a/README.md +++ b/README.md @@ -6,7 +6,9 @@ **NodeWright** is a Kubernetes-aware package manager for cluster administrators to safely modify and maintain underlying host declaratively at scale. -> **Note:** NodeWright is being renamed from Skyhook. Code, CRDs, Helm charts, and CLI commands still use `skyhook` for now. The rename will roll out incrementally to avoid breaking changes. +> **Note:** NodeWright is being renamed from Skyhook. The Helm chart and operator image are already published under `nodewright` (see the install command below). CRDs (`skyhook.nvidia.com/v1alpha1`), the CLI (`kubectl skyhook`), and the default install namespace (`skyhook`) still use `skyhook` for now to avoid breaking existing users. The rename will roll out incrementally. +> +> **Distribution change (v0.16.0+):** NodeWright is now distributed exclusively through GitHub Container Registry (`ghcr.io`) — both the container images and the Helm chart (as an OCI artifact). Publication to `nvcr.io` / the NGC Helm repository (`helm.ngc.nvidia.com`) is paused and is planned to return in a future release. **Existing users installing from NGC need to switch to the OCI install below.** See [Distribution: ghcr.io only (for now)](docs/release-process.md#distribution-ghcrio-only-for-now) for the full story. ## Why NodeWright? @@ -70,18 +72,18 @@ Install NodeWright quickly using Helm without downloading the repository: ### Install NodeWright ```bash -# Add the NVIDIA Helm repository -helm repo add skyhook https://helm.ngc.nvidia.com/nvidia/skyhook -helm repo update -helm search repo skyhook ## should show the latest version - -# basic install -helm install skyhook skyhook/skyhook-operator \ - --version v0.15.0 \ +# The chart is distributed as an OCI artifact on GitHub Container Registry. +# Helm 3.8+ supports OCI natively — no `helm repo add` needed. +helm install nodewright oci://ghcr.io/nvidia/nodewright/charts/nodewright \ + --version v0.16.0-rc1 \ --namespace skyhook \ --create-namespace ``` +> **Where things live:** chart at `oci://ghcr.io/nvidia/nodewright/charts/nodewright`, operator image at `ghcr.io/nvidia/nodewright/operator`, agent image at `ghcr.io/nvidia/skyhook/agent` (agent path migration to `nodewright` is pending). NGC / `nvcr.io` distribution is paused — see [docs/release-process.md#distribution-ghcrio-only-for-now](docs/release-process.md#distribution-ghcrio-only-for-now). +> +> **Migrating from `helm repo add skyhook https://helm.ngc.nvidia.com/...`?** Run `helm repo remove skyhook` and use the OCI install above. If you also want to keep the existing in-cluster release name (e.g. `skyhook`), substitute it for `nodewright` in the `helm install` command — the chart works either way. + ### Configure Image Pull Secrets (if needed) If you're using private container registries, create the necessary secrets: @@ -152,7 +154,7 @@ kubectl describe skyhook skyhook-sample ```bash # Uninstall the chart (cleanup happens automatically) -helm uninstall skyhook --namespace skyhook +helm uninstall nodewright --namespace skyhook ``` The pre-delete hook will: @@ -168,13 +170,13 @@ The pre-delete hook will: To disable automatic cleanup and manage resources manually: ```bash -helm install skyhook ./chart --namespace skyhook --set cleanup.enabled=false +helm install nodewright ./chart --namespace skyhook --set cleanup.enabled=false ``` To adjust the job timeout: ```bash -helm install skyhook ./chart --namespace skyhook \ +helm install nodewright ./chart --namespace skyhook \ --set cleanup.jobTimeoutSeconds=180 ``` @@ -190,7 +192,7 @@ kubectl delete skyhooks --all kubectl delete deploymentpolicies --all # Then uninstall the chart -helm uninstall skyhook --namespace skyhook +helm uninstall nodewright --namespace skyhook ``` **Why cleanup matters:** If you uninstall while Skyhook CRs with finalizers still exist, it can leave resources in a broken state that may cause reinstall issues. diff --git a/chart/Chart.yaml b/chart/Chart.yaml index c71a3349..4d9fbb42 100644 --- a/chart/Chart.yaml +++ b/chart/Chart.yaml @@ -1,13 +1,13 @@ apiVersion: v2 -name: skyhook-operator -description: A Helm chart for the Skyhook Operator. +name: nodewright +description: A Helm chart for NodeWright (formerly Skyhook). type: application # This is the chart version. This version number must be incremented each time you make changes to the helm chart. OR # it the agent version is updated, or operator version is updated. # Versions are expected to follow Semantic Versioning (https://semver.org/) -version: v0.15.1 +version: v0.16.0-rc1 # This is the version number operator container being deployed. # Versions are expected to follow Semantic Versioning (https://semver.org/) -appVersion: v0.15.0 +appVersion: v0.16.0-rc1 # this is the minimum version of kubernetes that the operator supports/tested against. kubeVersion: ">=1.27.0-0" diff --git a/chart/README.md b/chart/README.md index ccfd7735..e62b5599 100644 --- a/chart/README.md +++ b/chart/README.md @@ -34,10 +34,10 @@ Settings | Description | Default | | controllerManager.manager.env.logLevel | Log level for the operator controller. If you want more or less logs, change this value to "debug" or "error". | "info" | | controllerManager.manager.env.reapplyOnReboot | Reapply the packages on reboot. This is useful for systems that are read-only. | "false" | | controllerManager.manager.env.runtimeRequiredTaint | This feature assumes nodes are added to the cluster with `--register-with-taints` kubelet flag. This taint is assume to be all new nodes, and skyhook pods will tolerate this taint, and remove it one the nodes packages are complete. | skyhook.nvidia.com=runtime-required:NoSchedule | -| controllerManager.manager.image.repository | Where to get the image from | "nvcr.io/nvidia/skyhook/operator" | +| controllerManager.manager.image.repository | Where to get the image from | "ghcr.io/nvidia/nodewright/operator" | | controllerManager.manager.image.tag | what version of the operator to run | defaults to appVersion | | controllerManager.manager.image.digest | content-addressable pin for the operator image. If set, the digest determines the pulled image. If both tag and digest are provided, the digest takes precedence; the rendered image may include `tag@digest` but the digest controls selection. | "" | -| controllerManager.manager.agent.repository | Where to get the image from | "nvcr.io/nvidia/skyhook/agent" | +| controllerManager.manager.agent.repository | Where to get the image from | "ghcr.io/nvidia/skyhook/agent" | | controllerManager.manager.agent.tag | what version of the agent to run | defaults to the current latest, but is not latest example v6.1.5 | | controllerManager.manager.agent.digest | content-addressable pin for the agent image. Same precedence rules as above: if both tag and digest are provided, the digest controls which image is pulled. | "" | | imagePullSecret | the secret used to pull the operator controller image, agent image, and package images. | "" | @@ -72,7 +72,7 @@ Settings | Description | Default | imagePullSecret: "node-init-secret" ``` -If you use public images (like the default `nvcr.io/nvidia/skyhook/*` images), no action is needed. +If you use public images (default operator `ghcr.io/nvidia/nodewright/operator` and agent `ghcr.io/nvidia/skyhook/agent` — the agent path migration to `nodewright` is pending), no action is needed. ### Resource Management @@ -95,7 +95,7 @@ By default, the Helm chart includes a pre-delete hook that automatically cleans ```bash # Uninstall with automatic cleanup (default) -helm uninstall skyhook --namespace skyhook +helm uninstall nodewright --namespace skyhook ``` The pre-delete hook will: @@ -121,7 +121,7 @@ When disabled, you must manually delete resources before uninstalling to avoid i # Manual cleanup when automatic cleanup is disabled kubectl delete skyhooks --all kubectl delete deploymentpolicies --all -helm uninstall skyhook --namespace skyhook +helm uninstall nodewright --namespace skyhook ``` ### Configuring Timeout Values diff --git a/chart/templates/_helpers.tpl b/chart/templates/_helpers.tpl index 7ba5edc2..37d9f9f7 100644 --- a/chart/templates/_helpers.tpl +++ b/chart/templates/_helpers.tpl @@ -6,20 +6,21 @@ Expand the name of the chart. {{- end }} {{/* -Create a default fully qualified app name. -We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). -If release name contains chart name it will be used as a full name. +Resource name used by Deployment, ServiceAccount, PDB, etc. (anything that +includes `chart.fullname`). + +The conventional Helm `-` prefix is intentionally dropped: +NodeWright is a singleton per namespace (cluster-scoped CRDs, webhook +configurations, finalizers), so a release-name prefix on resource names +adds no value and just makes them noisy. Truncated at 63 chars for the DNS +name limit. `fullnameOverride` is still honored for users who really do +need a custom name. */}} {{- define "chart.fullname" -}} {{- if .Values.fullnameOverride }} {{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }} {{- else }} -{{- $name := default .Chart.Name .Values.nameOverride }} -{{- if contains $name .Release.Name }} -{{- .Release.Name | trunc 63 | trimSuffix "-" }} -{{- else }} -{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }} -{{- end }} +{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }} {{- end }} {{- end }} diff --git a/chart/values.yaml b/chart/values.yaml index 3d60174a..8a75f286 100644 --- a/chart/values.yaml +++ b/chart/values.yaml @@ -83,14 +83,14 @@ controllerManager: ## puaseImage: is the image used for the pause container in the operator controller. pauseImage: registry.k8s.io/pause:3.10 image: - repository: nvcr.io/nvidia/skyhook/operator - tag: "v0.15.0" ## if both tag and digest are omitted, defaults to the chart appVersion - digest: "sha256:09e4f71cca8757818515f9e7dd4b8f47d30c642dc3a7efe1329d5c19efea76b9" # manifest list digest (multi-arch) + repository: ghcr.io/nvidia/nodewright/operator + tag: "v0.16.0-rc1" ## if both tag and digest are omitted, defaults to the chart appVersion + digest: "sha256:dfd610cda0880e091c8445bf115c8e8f947784ad81ac58ded52aa01dd54dff10" # manifest list digest (multi-arch) on ghcr.io/nvidia/nodewright/operator:v0.16.0-rc1 ## agentImage: is the image used for the agent container. This image is the default for this install, but can be overridden in the CR at package level. agent: - repository: nvcr.io/nvidia/skyhook/agent + repository: ghcr.io/nvidia/skyhook/agent tag: "v6.4.1" - digest: "sha256:dedf73c19455444e2663671368eaf3fcd657ea1417f0d8738c6133ed738b9c52" # manifest list digest (multi-arch) - update after CI publishes images + digest: "sha256:8ee4d28d19ffa26f2ff87f5b5990653f728630dede0398e4292b3d265c9faa8a" # manifest list digest (multi-arch) on ghcr.io/nvidia/skyhook/agent:v6.4.1 # resources: If this is defined it will override the default calculation for resources # from estimatedNodeCount and estimatedPackageCount. The below values are diff --git a/docs/release-process.md b/docs/release-process.md index 42911809..875f0e9a 100644 --- a/docs/release-process.md +++ b/docs/release-process.md @@ -4,110 +4,170 @@ Step-by-step process for releasing Skyhook components using **release branches** ## Release Branch Strategy -Skyhook uses **release branches** (`release/v{MAJOR.MINOR}.x`) to manage integrated releases and patches. +At feature-freeze a release branch is cut from `main`. All release candidates and the final release for that minor version are tagged on that branch, and every later patch for the same minor line is cherry-picked back to the same branch and tagged from there. The release branch is the single source of truth for everything that ships under one minor version — once it exists, nothing for that minor goes anywhere else. + +**Flow (one minor line):** + +```mermaid +%%{init: {'gitGraph': {'mainBranchName': 'main', 'showCommitLabel': false}}}%% +gitGraph + commit + commit + branch release/v0.16.x + checkout main + commit id: "X" + checkout release/v0.16.x + cherry-pick id: "X" tag: "v0.16.0-rc1" + checkout main + commit id: "Y" + checkout release/v0.16.x + cherry-pick id: "Y" tag: "v0.16.0-rc2" + commit tag: "v0.16.0" + checkout main + commit id: "Z" + checkout release/v0.16.x + cherry-pick id: "Z" tag: "v0.16.1" +``` + +`X`, `Y`, `Z` are fixes that land on `main` first and get cherry-picked to `release/v0.16.x`. Each RC and the final release are tagged on that branch. Patches (`v0.16.1`, …) stay on the same release branch. -**Key Principles:** +**Key principles:** -- **Operator-centric**: Most releases are driven by operator features and bug fixes -- **Agent follows**: Agent changes typically only require chart patch releases -- **Chart coordinates**: Chart version tracks the overall release and defines compatibility +- **Branch first, then tag.** Always cut the release branch before the first RC. Tags only live on release branches, never on `main`. +- **Cherry-pick from `main`.** Any fix or feature destined for a release lands on `main` first, then is cherry-picked to the release branch. The release branch is never the place to *develop* — only to *stabilize and ship*. + - Rare exception: a change that is genuinely release-branch-only (e.g. a `chart/Chart.yaml` version bump for that line) can be committed directly to the release branch via a feature branch and PR. +- **RCs are the validation gate.** Cut `-rc1`, `-rc2`, … on the release branch until you're happy. When an RC is approved, make a single `Chart.yaml` bump commit dropping the `-rcN` suffix and tag `vX.Y.0` on that commit — no other code changes between the last good RC and the final release. +- **Patches stay on the same branch.** `v0.16.1`, `v0.16.2`, … are all cut from `release/v0.16.x` — cherry-pick the fix from `main`, bump `chart/Chart.yaml`, tag. +- **Component naming:** Operator drives the release; agent often reuses the previous version; chart always gets tagged because `Chart.yaml` (and therefore `appVersion`) moves with every release. ### Major/Minor Release Workflow ```bash -# 1. Complete development on main +# 1. Cut the release branch from main at feature-freeze. git checkout main && git pull origin main -# Ensure all features/fixes are merged and tested - -# 2. Create release branch -git checkout -b release/v0.9.x -git push origin release/v0.9.x - -# 3. Update chart with final versions -# Edit chart/Chart.yaml: -version: v0.9.0 # Chart version -appVersion: v0.9.0 # Recommended operator version - -git add chart/Chart.yaml -git commit -m "release: prepare v0.9.0" -git push origin release/v0.9.x - -# 4. Tag all components that changed -git tag operator/v0.9.0 # Operator drives the release -git tag agent/v6.4.0 # Only if agent changed (often reuses previous version) -git tag chart/v0.9.0 # Chart always gets tagged -git push origin operator/v0.9.0 chart/v0.9.0 # Push operator + chart (add agent tag if needed) -``` +git checkout -b release/v0.16.x +git push origin release/v0.16.x -**Automated:** Tests → Multi-platform build → Publish to ghcr.io + nvcr.io + NGC +# 2. Cherry-pick anything that has merged to main since the cut but belongs in the release. +# Repeat throughout the stabilization period. +git cherry-pick -x +git push origin release/v0.16.x -A `chart/v*` tag push also publishes the Helm chart as an OCI artifact to `oci://ghcr.io/nvidia/nodewright/charts/skyhook-operator`. Consumers install with: +# 3. Prepare the chart for the RC. Edit chart/Chart.yaml: +# version: v0.16.0-rc1 +# appVersion: v0.16.0-rc1 +git commit -am "release: prepare v0.16.0-rc1" +git push origin release/v0.16.x -```bash -helm install skyhook-operator oci://ghcr.io/nvidia/nodewright/charts/skyhook-operator --version v0.9.0 +# 4. Tag the RC on the release branch. +git tag operator/v0.16.0-rc1 +git tag chart/v0.16.0-rc1 +# Tag agent only if it changed since the last released agent version. +git push origin operator/v0.16.0-rc1 chart/v0.16.0-rc1 + +# 5. Validate the RC. If issues are found, cherry-pick more fixes from main, +# bump Chart.yaml to v0.16.0-rc2, and tag -rc2. Repeat until clean. + +# 6. Cut the final release on the same commit as the last good RC. +# Bump Chart.yaml to v0.16.0 (drop the -rcN suffix) and commit. +git commit -am "release: v0.16.0" +git push origin release/v0.16.x +git tag operator/v0.16.0 +git tag chart/v0.16.0 +git push origin operator/v0.16.0 chart/v0.16.0 ``` -### Patch Release Workflow +**Automated:** Tests → Multi-platform build → Publish to `ghcr.io` + +A `chart/v*` tag push also publishes the Helm chart as an OCI artifact to `oci://ghcr.io/nvidia/nodewright/charts/nodewright`. Consumers install with: ```bash -# 1. Work on release branch -git checkout release/v0.9.x -git pull origin release/v0.9.x - -# 2. Apply fixes (backport from main or develop directly) -# ... make changes to operator, agent, or chart -git add . -git commit -m "fix: critical security issue" - -# 3. Update chart version if needed -# Edit chart/Chart.yaml: -version: v0.9.1 # Increment patch version -appVersion: v0.9.1 # Update if operator changed - -# 4. Tag only what changed -git tag operator/v0.9.1 # If operator changed -git tag agent/v6.4.1 # Only if agent changed (rare) -git tag chart/v0.9.1 # Chart always gets tagged for releases -git push origin operator/v0.9.1 chart/v0.9.1 # Usually just operator + chart +helm install nodewright oci://ghcr.io/nvidia/nodewright/charts/nodewright --version v0.16.0 ``` -### Agent-Only Changes +### Distribution: ghcr.io only (for now) -```bash -# Agent changes typically don't require new release branches -git checkout release/v0.9.x # Work on existing release branch -# ... fix agent issue -git tag agent/v6.4.1 # New agent version -git tag chart/v0.9.1 # Patch chart to reference new agent -git push origin agent/v6.4.1 chart/v0.9.1 -``` +Starting with `v0.16.0`, NodeWright is distributed **exclusively via GitHub Container Registry (`ghcr.io`)**: -### Release Candidates +| Artifact | Location | +| --- | --- | +| Operator image | `ghcr.io/nvidia/nodewright/operator` | +| Agent image | `ghcr.io/nvidia/skyhook/agent` *(migration to `ghcr.io/nvidia/nodewright/agent` pending)* | +| Helm chart (OCI) | `oci://ghcr.io/nvidia/nodewright/charts/nodewright` | -Cut a release candidate with an `-rc` suffix on the component tag. The workflow detects the suffix and marks the GitHub release as a prerelease so it doesn't become "Latest" on the Releases page. +`v0.16.0` is the **first release using OCI on `ghcr.io` for the Helm chart** — previously the chart was published to the NGC Helm repository (`https://helm.ngc.nvidia.com/nvidia/skyhook`). The OCI distribution removes the `helm repo add` step entirely; Helm 3.8+ pulls from `oci://` URLs directly. -Only `v..` and `v..-rc` are accepted — any other suffix (`-beta`, `-alpha`, `-rc.1`, etc.) is rejected by the workflow so the tag format stays predictable. +Distribution through `nvcr.io` / NGC is **paused** and is planned to return in a future release. Until then, the chart's image-pull defaults in `chart/values.yaml` point at `ghcr.io`. When NGC distribution resumes, the defaults and this section will be updated; users who pin to `ghcr.io` paths today won't be forced to migrate. -```bash -# Operator RC -git tag operator/v0.16.0-rc1 -git push origin operator/v0.16.0-rc1 +### Release Candidate Tag Format -# Chart RC — Chart.yaml must match the tag, including the suffix -# Edit chart/Chart.yaml: -version: v0.16.0-rc1 -appVersion: v0.16.0-rc1 +Only two tag shapes are accepted by the release workflow per component: -git commit -am "release: prepare v0.16.0-rc1" -git tag chart/v0.16.0-rc1 -git push origin chart/v0.16.0-rc1 -``` +- `/v..` — final release +- `/v..-rc` — release candidate, published as a GitHub pre-release + +Any other suffix (`-beta`, `-alpha`, `-rc.1`, `-rc1a`, etc.) is rejected by `.github/workflows/release.yml` so the tag format stays predictable. Notes: -- Helm OCI accepts pre-release versions, so `chart/v0.16.0-rc1` pushes `skyhook-operator-v0.16.0-rc1.tgz` to `oci://ghcr.io/nvidia/nodewright/charts`. Install with `--version v0.16.0-rc1`. +- Helm OCI accepts pre-release versions, so `chart/v0.16.0-rc1` pushes `nodewright-v0.16.0-rc1.tgz` to `oci://ghcr.io/nvidia/nodewright/charts`. Install with `--version v0.16.0-rc1`. - `git cliff --latest` scopes release notes to commits since the previous tag of the same component, so each RC's notes only cover commits since the prior RC (or the prior stable, for `-rc1`). +### Patch Release Workflow + +Patches stay on the existing release branch. Fix on `main` first, cherry-pick to the release branch, then tag. + +```bash +# 1. Land the fix on main as a normal PR (so it ships in future minors too). +# Note the commit SHA after it merges. + +# 2. Cherry-pick to the active release branch. +git checkout release/v0.16.x +git pull origin release/v0.16.x +git cherry-pick -x + +# 3. Bump chart/Chart.yaml to the new patch version. +# version: v0.16.1 +# appVersion: v0.16.1 +git commit -am "release: v0.16.1" +git push origin release/v0.16.x + +# 4. Tag the components that changed and push *every* tag you created. +# The push list MUST include the agent tag if you tagged the agent above — +# otherwise the agent tag stays local and CI never sees it. +git tag operator/v0.16.1 # If operator changed +git tag agent/v6.4.1 # Only if agent changed (rare) +git tag chart/v0.16.1 # Chart always gets tagged +git push origin operator/v0.16.1 agent/v6.4.1 chart/v0.16.1 # drop any tag you didn't create +``` + +If the fix is urgent enough to need its own RC cycle, repeat the RC workflow above (e.g. `operator/v0.16.1-rc1`) before tagging `v0.16.1`. + +### Agent-Only Changes + +Agent-only fixes don't need a new minor; they ride on the active release branch as a chart patch. + +```bash +# Land the agent fix on main, then cherry-pick to the active release branch. +git checkout release/v0.16.x +git cherry-pick -x + +# Bump chart/Chart.yaml to reference the new agent version (e.g. update the +# agent tag/digest under controllerManager.manager.agent and bump the chart +# version to v0.16.1). +git commit -am "release: v0.16.1 (agent v6.4.1)" +git push origin release/v0.16.x + +# Tag and push the components that changed. +git tag agent/v6.4.1 +git tag chart/v0.16.1 +git push origin agent/v6.4.1 chart/v0.16.1 +``` + +### Release-Branch-Only Changes (rare) + +If a change genuinely doesn't belong on `main` — for example, the `chart/Chart.yaml` version bump for `v0.16.1`, or a backport that doesn't apply cleanly and needs to be re-implemented for the older line — open it as a feature branch off the release branch and PR it back to the release branch. **Default to cherry-picking from `main` first; only diverge when there's a clear reason the change can't exist there.** + ### Legacy: Individual Component Releases (Deprecated) *The following workflows are deprecated in favor of the release branch strategy above.* @@ -145,12 +205,23 @@ git push origin chart/v1.2.3 ## Release Checklist -**Before tagging:** +**Before cutting the release branch (minor / major):** + +- [ ] All target features/fixes merged to `main` +- [ ] Tests passing on `main` +- [ ] Documentation updated on `main` + +**Before each RC tag:** + +- [ ] All intended cherry-picks from `main` have landed on the release branch +- [ ] `chart/Chart.yaml` `version` and `appVersion` match the RC tag (including the `-rcN` suffix) +- [ ] Tests passing on the release branch + +**Before the final release tag:** -- [ ] All PRs merged to main -- [ ] For charts: Chart.yaml updated and merged -- [ ] Tests passing -- [ ] Documentation updated +- [ ] The last RC validated successfully +- [ ] `chart/Chart.yaml` bumped to the non-RC version on the same commit +- [ ] No new commits between the validated RC and the release tag other than the `Chart.yaml` bump ### Pin multi-arch image digests in the chart diff --git a/k8s-tests/chainsaw/helm/helm-chart-test/assert-no-schedule.yaml b/k8s-tests/chainsaw/helm/helm-chart-test/assert-no-schedule.yaml index 8d74810b..55dad2ce 100644 --- a/k8s-tests/chainsaw/helm/helm-chart-test/assert-no-schedule.yaml +++ b/k8s-tests/chainsaw/helm/helm-chart-test/assert-no-schedule.yaml @@ -20,9 +20,9 @@ metadata: annotations: kubectl.kubernetes.io/default-container: manager labels: - app: foobar-skyhook-operator-controller-manager + app: nodewright-controller-manager app.kubernetes.io/instance: foobar - app.kubernetes.io/name: skyhook-operator + app.kubernetes.io/name: nodewright control-plane: controller-manager namespace: skyhook ownerReferences: @@ -49,7 +49,7 @@ spec: - command: - /manager ((env[?name == 'RUNTIME_REQUIRED_TAINT'].value)[0] == 'skyhook.nvidia.com=runtime-required:NoSchedule'): true - (image == 'ghcr.io/nvidia/skyhook/operator:latest' || contains(image, 'localhost:5005/skyhook-operator')): true + (image == 'ghcr.io/nvidia/nodewright/operator:latest' || contains(image, 'localhost:5005/skyhook-operator')): true livenessProbe: failureThreshold: 3 httpGet: @@ -125,8 +125,8 @@ spec: schedulerName: default-scheduler securityContext: runAsNonRoot: true - serviceAccount: foobar-skyhook-operator-controller-manager - serviceAccountName: foobar-skyhook-operator-controller-manager + serviceAccount: nodewright-controller-manager + serviceAccountName: nodewright-controller-manager terminationGracePeriodSeconds: 10 (tolerations[?key == 'dedicated']): - effect: NoSchedule diff --git a/k8s-tests/chainsaw/helm/helm-chart-test/assert-scheduled.yaml b/k8s-tests/chainsaw/helm/helm-chart-test/assert-scheduled.yaml index be52c516..00c0dd91 100644 --- a/k8s-tests/chainsaw/helm/helm-chart-test/assert-scheduled.yaml +++ b/k8s-tests/chainsaw/helm/helm-chart-test/assert-scheduled.yaml @@ -20,9 +20,9 @@ metadata: annotations: kubectl.kubernetes.io/default-container: manager labels: - app: foobar-skyhook-operator-controller-manager + app: nodewright-controller-manager app.kubernetes.io/instance: foobar - app.kubernetes.io/name: skyhook-operator + app.kubernetes.io/name: nodewright control-plane: controller-manager namespace: skyhook ownerReferences: @@ -49,7 +49,7 @@ spec: - command: - /manager ((env[?name == 'RUNTIME_REQUIRED_TAINT'].value)[0] == 'skyhook.nvidia.com=runtime-required:NoSchedule'): true - (image == 'ghcr.io/nvidia/skyhook/operator:latest' || contains(image, 'localhost:5005/skyhook-operator:')): true + (image == 'ghcr.io/nvidia/nodewright/operator:latest' || contains(image, 'localhost:5005/skyhook-operator:')): true livenessProbe: failureThreshold: 3 httpGet: @@ -125,8 +125,8 @@ spec: schedulerName: default-scheduler securityContext: runAsNonRoot: true - serviceAccount: foobar-skyhook-operator-controller-manager - serviceAccountName: foobar-skyhook-operator-controller-manager + serviceAccount: nodewright-controller-manager + serviceAccountName: nodewright-controller-manager terminationGracePeriodSeconds: 10 (tolerations[?key == 'dedicated']): - effect: NoSchedule diff --git a/k8s-tests/chainsaw/helm/helm-node-affinity-test/assert-no-schedule.yaml b/k8s-tests/chainsaw/helm/helm-node-affinity-test/assert-no-schedule.yaml index ef6b12c0..2d339997 100644 --- a/k8s-tests/chainsaw/helm/helm-node-affinity-test/assert-no-schedule.yaml +++ b/k8s-tests/chainsaw/helm/helm-node-affinity-test/assert-no-schedule.yaml @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # @@ -14,16 +14,15 @@ # See the License for the specific language governing permissions and # limitations under the License. - apiVersion: v1 kind: Pod metadata: annotations: kubectl.kubernetes.io/default-container: manager labels: - app: node-affinity-test-skyhook-operator-controller-manager + app: nodewright-controller-manager app.kubernetes.io/instance: node-affinity-test - app.kubernetes.io/name: skyhook-operator + app.kubernetes.io/name: nodewright control-plane: controller-manager namespace: skyhook ownerReferences: diff --git a/k8s-tests/chainsaw/helm/helm-node-affinity-test/assert-scheduled.yaml b/k8s-tests/chainsaw/helm/helm-node-affinity-test/assert-scheduled.yaml index 869cc307..6309701d 100644 --- a/k8s-tests/chainsaw/helm/helm-node-affinity-test/assert-scheduled.yaml +++ b/k8s-tests/chainsaw/helm/helm-node-affinity-test/assert-scheduled.yaml @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # @@ -14,16 +14,15 @@ # See the License for the specific language governing permissions and # limitations under the License. - apiVersion: v1 kind: Pod metadata: annotations: kubectl.kubernetes.io/default-container: manager labels: - app: node-affinity-test-skyhook-operator-controller-manager + app: nodewright-controller-manager app.kubernetes.io/instance: node-affinity-test - app.kubernetes.io/name: skyhook-operator + app.kubernetes.io/name: nodewright control-plane: controller-manager namespace: skyhook ownerReferences: diff --git a/k8s-tests/chainsaw/helm/helm-scale-test/assert-override-resources.yaml b/k8s-tests/chainsaw/helm/helm-scale-test/assert-override-resources.yaml index db36bdd5..6f9ccbfb 100644 --- a/k8s-tests/chainsaw/helm/helm-scale-test/assert-override-resources.yaml +++ b/k8s-tests/chainsaw/helm/helm-scale-test/assert-override-resources.yaml @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # @@ -20,9 +20,9 @@ metadata: annotations: kubectl.kubernetes.io/default-container: manager labels: - app: foobar-skyhook-operator-controller-manager + app: nodewright-controller-manager app.kubernetes.io/instance: foobar - app.kubernetes.io/name: skyhook-operator + app.kubernetes.io/name: nodewright control-plane: controller-manager namespace: skyhook spec: diff --git a/k8s-tests/chainsaw/helm/helm-scale-test/assert-scaled-resources.yaml b/k8s-tests/chainsaw/helm/helm-scale-test/assert-scaled-resources.yaml index 79a5e40f..044560e7 100644 --- a/k8s-tests/chainsaw/helm/helm-scale-test/assert-scaled-resources.yaml +++ b/k8s-tests/chainsaw/helm/helm-scale-test/assert-scaled-resources.yaml @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # @@ -20,9 +20,9 @@ metadata: annotations: kubectl.kubernetes.io/default-container: manager labels: - app: foobar-skyhook-operator-controller-manager + app: nodewright-controller-manager app.kubernetes.io/instance: foobar - app.kubernetes.io/name: skyhook-operator + app.kubernetes.io/name: nodewright control-plane: controller-manager namespace: skyhook spec: diff --git a/k8s-tests/chainsaw/helm/helm-webhook-test/assert-scheduled.yaml b/k8s-tests/chainsaw/helm/helm-webhook-test/assert-scheduled.yaml index 2cd4f15f..596bfdad 100644 --- a/k8s-tests/chainsaw/helm/helm-webhook-test/assert-scheduled.yaml +++ b/k8s-tests/chainsaw/helm/helm-webhook-test/assert-scheduled.yaml @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # @@ -20,9 +20,9 @@ metadata: annotations: kubectl.kubernetes.io/default-container: manager labels: - app: webhooks-skyhook-operator-controller-manager + app: nodewright-controller-manager app.kubernetes.io/instance: webhooks - app.kubernetes.io/name: skyhook-operator + app.kubernetes.io/name: nodewright control-plane: controller-manager namespace: skyhook ownerReferences: From d9280c7b10384e569389b0197448fd2873673c63 Mon Sep 17 00:00:00 2001 From: Alex Yuskauskas Date: Fri, 22 May 2026 14:14:48 -0700 Subject: [PATCH 3/5] chore(chart): bump to v0.16.0 with pinned operator and agent digests Updates chart/values.yaml to pin the multi-arch manifest digests for the newly released operator (v0.16.0) and agent (v6.4.2) images, bumps chart/Chart.yaml version and appVersion to v0.16.0 to match, and regenerates chart/CHANGELOG.md via `make changelog COMPONENT=chart`. The new chart/v0.16.0 section calls out the bundled component versions and links to their GitHub releases. Signed-off-by: Alex Yuskauskas --- chart/CHANGELOG.md | 56 +++++++++++++++++++++++++++++----------------- chart/Chart.yaml | 4 ++-- chart/values.yaml | 8 +++---- 3 files changed, 41 insertions(+), 27 deletions(-) diff --git a/chart/CHANGELOG.md b/chart/CHANGELOG.md index 3298ee30..3aea2c54 100644 --- a/chart/CHANGELOG.md +++ b/chart/CHANGELOG.md @@ -2,6 +2,30 @@ All notable changes to this project will be documented in this file. +## [chart/v0.16.0] - 2026-05-22 + +### Component Versions + +- Operator: [`v0.16.0`](https://github.com/NVIDIA/nodewright/releases/tag/operator%2Fv0.16.0) (`ghcr.io/nvidia/nodewright/operator@sha256:3dfeda5d8fbfe7b6778bb92ad4437caa2e73c1670d54ae29e55ca7d0d5ef5408`) +- Agent: [`v6.4.2`](https://github.com/NVIDIA/nodewright/releases/tag/agent%2Fv6.4.2) (`ghcr.io/nvidia/skyhook/agent@sha256:7cd80f5ef351266dc08c3979e802f9dc936a1ed9fd02e199233e7968a3a0de3e`) + +### Bug Fixes + +- Update helm chart for drift + +### Other Tasks + +- Run helm tests with ctlptl registry +- Update go and libs to latest +- Parallelize e2e tests by pool and add merge gates +- *(docs)* Update docs around release and location of helm chart + +## [chart/v0.15.1] - 2026-04-14 + +### Other Tasks + +- *(chart)* Update min k8s version in chart + ## [chart/v0.15.0] - 2026-04-06 ### New Features @@ -11,6 +35,7 @@ All notable changes to this project will be documented in this file. ### Other Tasks - Update project to follow the OSS template +- *(chart)* Version bump ## [chart/v0.14.0] - 2026-03-10 @@ -105,27 +130,6 @@ All notable changes to this project will be documented in this file. ### Bug Fixes -- *(chart)* Fix broken helm chart tests -- *(operator)* Make metrics binding disabled by default -- *(chart/metrics)* Update for prometheus auto scraping and rbac examples -- *(chart)* Set back to v6.1.4 agent due to bug in v6.2.0 - -### New Features - -- *(chart)* Enable scraping of metrics by prometheus -- *(operator)* Update k8s sdk version -- Fix agent for distroless and have scr name in flag/history/log -- *(chart)* Add node affinity for operator pod configuration -- *(operator)* Added disabled, paused, waiting, and blocked statuses for skyhooks and nodes - -### Other Tasks - -- *(helm)* Update versions - -## [chart/v0.8.0] - 2025-06-06 - -### Bug Fixes - - Remove interrupt timeout which was flawed by design - Deadlock if reboot pods are missing, adds them back - Miscellaneous fixes to project structure @@ -134,6 +138,10 @@ All notable changes to this project will be documented in this file. - How we compare interrupt pods - Reviews - *(operator)* Missed changes related to changing min value for priority +- *(chart)* Fix broken helm chart tests +- *(operator)* Make metrics binding disabled by default +- *(chart/metrics)* Update for prometheus auto scraping and rbac examples +- *(chart)* Set back to v6.1.4 agent due to bug in v6.2.0 ### New Features @@ -144,10 +152,16 @@ All notable changes to this project will be documented in this file. - Change how limits are manged to a use a limitrange via helm - *(operator)* Add strict ordering of skyhooks along with documentation - *(operator)* Change default resources to follow a 2:1 ratio and add documentation about scaling +- *(chart)* Enable scraping of metrics by prometheus +- *(operator)* Update k8s sdk version +- Fix agent for distroless and have scr name in flag/history/log +- *(chart)* Add node affinity for operator pod configuration +- *(operator)* Added disabled, paused, waiting, and blocked statuses for skyhooks and nodes ### Other Tasks - *(helm)* Added docs for the helm chart - *(chart)* Update version to correct new version +- *(helm)* Update versions diff --git a/chart/Chart.yaml b/chart/Chart.yaml index 4d9fbb42..6c127f5a 100644 --- a/chart/Chart.yaml +++ b/chart/Chart.yaml @@ -5,9 +5,9 @@ type: application # This is the chart version. This version number must be incremented each time you make changes to the helm chart. OR # it the agent version is updated, or operator version is updated. # Versions are expected to follow Semantic Versioning (https://semver.org/) -version: v0.16.0-rc1 +version: v0.16.0 # This is the version number operator container being deployed. # Versions are expected to follow Semantic Versioning (https://semver.org/) -appVersion: v0.16.0-rc1 +appVersion: v0.16.0 # this is the minimum version of kubernetes that the operator supports/tested against. kubeVersion: ">=1.27.0-0" diff --git a/chart/values.yaml b/chart/values.yaml index 8a75f286..e014435e 100644 --- a/chart/values.yaml +++ b/chart/values.yaml @@ -84,13 +84,13 @@ controllerManager: pauseImage: registry.k8s.io/pause:3.10 image: repository: ghcr.io/nvidia/nodewright/operator - tag: "v0.16.0-rc1" ## if both tag and digest are omitted, defaults to the chart appVersion - digest: "sha256:dfd610cda0880e091c8445bf115c8e8f947784ad81ac58ded52aa01dd54dff10" # manifest list digest (multi-arch) on ghcr.io/nvidia/nodewright/operator:v0.16.0-rc1 + tag: "v0.16.0" ## if both tag and digest are omitted, defaults to the chart appVersion + digest: "sha256:3dfeda5d8fbfe7b6778bb92ad4437caa2e73c1670d54ae29e55ca7d0d5ef5408" # manifest list digest (multi-arch) on ghcr.io/nvidia/nodewright/operator:v0.16.0 ## agentImage: is the image used for the agent container. This image is the default for this install, but can be overridden in the CR at package level. agent: repository: ghcr.io/nvidia/skyhook/agent - tag: "v6.4.1" - digest: "sha256:8ee4d28d19ffa26f2ff87f5b5990653f728630dede0398e4292b3d265c9faa8a" # manifest list digest (multi-arch) on ghcr.io/nvidia/skyhook/agent:v6.4.1 + tag: "v6.4.2" + digest: "sha256:7cd80f5ef351266dc08c3979e802f9dc936a1ed9fd02e199233e7968a3a0de3e" # manifest list digest (multi-arch) on ghcr.io/nvidia/skyhook/agent:v6.4.2 # resources: If this is defined it will override the default calculation for resources # from estimatedNodeCount and estimatedPackageCount. The below values are From 6e50da2519255084cd34e406e2f073461c2f4618 Mon Sep 17 00:00:00 2001 From: ayuskauskas Date: Fri, 22 May 2026 14:39:36 -0700 Subject: [PATCH 4/5] fix(ci): capture stderr when parsing helm push output (#249) `helm push` (3.16+) writes the human-readable "Pushed:" and "Digest:" lines to stderr, so the existing `$(helm push ...)` only captured an empty stdout and the awk that extracts the digest produced no match. Redirect stderr into stdout for the command substitution so the digest-parser sees the same output that's already visible in the runner log. The follow-up `sha256:[a-f0-9]{64}` regex check still guards against malformed input. Surfaced when publishing chart/v0.16.0 to oci://ghcr.io/nvidia/nodewright/charts: the push itself succeeded, but the subsequent attestation steps failed because no digest was extracted. Signed-off-by: Alex Yuskauskas --- .github/workflows/release.yml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index a557e7e2..c5fb0575 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -182,7 +182,9 @@ jobs: chart_repo="ghcr.io/nvidia/nodewright/charts" chart_subject="${chart_repo}/${CHART_NAME}" - push_output="$(helm push "dist/${CHART_NAME}-${CHART_VERSION}.tgz" "oci://${chart_repo}")" + # `helm push` writes "Pushed:" and "Digest:" to stderr (helm 3.16+), + # so capture both streams or awk sees an empty string. + push_output="$(helm push "dist/${CHART_NAME}-${CHART_VERSION}.tgz" "oci://${chart_repo}" 2>&1)" printf '%s\n' "${push_output}" chart_digest="$(awk '/^Digest:/ {print $2}' <<< "${push_output}")" if ! [[ "${chart_digest}" =~ ^sha256:[a-f0-9]{64}$ ]]; then From 38440473e550affe2454c3a749ace7f6a201dabf Mon Sep 17 00:00:00 2001 From: Alex Yuskauskas Date: Fri, 22 May 2026 14:40:13 -0700 Subject: [PATCH 5/5] fix(chart): agent container path pointing to skyhook not nodewright --- chart/values.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/chart/values.yaml b/chart/values.yaml index e014435e..2ed90ca0 100644 --- a/chart/values.yaml +++ b/chart/values.yaml @@ -88,7 +88,7 @@ controllerManager: digest: "sha256:3dfeda5d8fbfe7b6778bb92ad4437caa2e73c1670d54ae29e55ca7d0d5ef5408" # manifest list digest (multi-arch) on ghcr.io/nvidia/nodewright/operator:v0.16.0 ## agentImage: is the image used for the agent container. This image is the default for this install, but can be overridden in the CR at package level. agent: - repository: ghcr.io/nvidia/skyhook/agent + repository: ghcr.io/nvidia/nodewright/agent tag: "v6.4.2" digest: "sha256:7cd80f5ef351266dc08c3979e802f9dc936a1ed9fd02e199233e7968a3a0de3e" # manifest list digest (multi-arch) on ghcr.io/nvidia/skyhook/agent:v6.4.2