NVIDIA
diff --git a/‎.github/actions/gpu-snapshot-validate/action.yml‎
Lines changed: 60 additions & 0 deletions b/‎.github/actions/gpu-snapshot-validate/action.yml‎
Lines changed: 60 additions & 0 deletions
diff --git a/‎.github/dependabot.yml‎
Lines changed: 6 additions & 0 deletions b/‎.github/dependabot.yml‎
Lines changed: 6 additions & 0 deletions
diff --git a/‎.github/workflows/actionlint.yaml‎
Lines changed: 54 additions & 0 deletions b/‎.github/workflows/actionlint.yaml‎
Lines changed: 54 additions & 0 deletions
diff --git a/‎.github/workflows/codeql.yaml‎
Lines changed: 1 addition & 2 deletions b/‎.github/workflows/codeql.yaml‎
Lines changed: 1 addition & 2 deletions
diff --git a/‎.github/workflows/conflict-check.yaml‎
Lines changed: 3 additions & 1 deletion b/‎.github/workflows/conflict-check.yaml‎
Lines changed: 3 additions & 1 deletion
diff --git a/‎.github/workflows/dependabot-auto-merge.yaml‎
Lines changed: 4 additions & 3 deletions b/‎.github/workflows/dependabot-auto-merge.yaml‎
Lines changed: 4 additions & 3 deletions
diff --git a/‎.github/workflows/gpu-h100-inference-test.yaml‎
Lines changed: 7 additions & 27 deletions b/‎.github/workflows/gpu-h100-inference-test.yaml‎
Lines changed: 7 additions & 27 deletions
diff --git a/‎.github/workflows/gpu-h100-training-test.yaml‎
Lines changed: 7 additions & 27 deletions b/‎.github/workflows/gpu-h100-training-test.yaml‎
Lines changed: 7 additions & 27 deletions
diff --git a/‎.github/workflows/gpu-smoke-test.yaml‎
Lines changed: 6 additions & 26 deletions b/‎.github/workflows/gpu-smoke-test.yaml‎
Lines changed: 6 additions & 26 deletions
diff --git a/‎.github/workflows/inactive-pr-reminder.yaml‎
Lines changed: 3 additions & 1 deletion b/‎.github/workflows/inactive-pr-reminder.yaml‎
Lines changed: 3 additions & 1 deletion
@@ -0,0 +1,60 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+name: GPU Snapshot and Validate
+description: Run aicr snapshot and validate GPU detection
+
+inputs:
+  gpu_model:
+    description: 'Expected GPU model substring (e.g., T4, H100)'
+    required: true
+  min_gpu_count:
+    description: 'Minimum expected GPU count'
+    required: true
+    default: '1'
+  cluster_name:
+    description: 'Kind cluster name (for kubectl context)'
+    required: true
+
+runs:
+  using: composite
+  steps:
+    - name: Run aicr snapshot
+      shell: bash
+      run: |
+        ./aicr snapshot --deploy-agent \
+          --kubeconfig="${HOME}/.kube/config" \
+          --namespace=default \
+          --image=ko.local:smoke-test \
+          --require-gpu \
+          --output=snapshot.yaml
+        echo "--- Snapshot output ---"
+        cat snapshot.yaml
+
+    - name: Validate snapshot detected GPU
+      shell: bash
+      run: |
+        GPU_MODEL=$(yq eval '.measurements[] | select(.type == "GPU") | .subtypes[0].data["gpu.model"]' snapshot.yaml)
+        GPU_COUNT=$(yq eval '.measurements[] | select(.type == "GPU") | .subtypes[0].data["gpu-count"]' snapshot.yaml)
+        echo "GPU model: ${GPU_MODEL}"
+        echo "GPU count: ${GPU_COUNT}"
+        if [[ "${GPU_MODEL}" != *"${{ inputs.gpu_model }}"* ]]; then
+          echo "::error::Expected ${{ inputs.gpu_model }} GPU in snapshot, got: ${GPU_MODEL}"
+          exit 1
+        fi
+        if [[ "${GPU_COUNT}" -lt ${{ inputs.min_gpu_count }} ]]; then
+          echo "::error::Expected gpu-count >= ${{ inputs.min_gpu_count }}, got: ${GPU_COUNT}"
+          exit 1
+        fi
+        echo "Snapshot correctly detected ${GPU_COUNT}x ${GPU_MODEL}"
@@ -32,6 +32,12 @@ updates:
         exclude-patterns:
           # klog has independent versioning
           - "k8s.io/klog/v2"
+      golang-x:
+        patterns:
+          - "golang.org/x/*"
+      opencontainers:
+        patterns:
+          - "github.com/opencontainers/*"
 
   - package-ecosystem: "github-actions"
     directories:
 
@@ -0,0 +1,54 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+name: Lint GitHub Actions
+
+on:
+  push:
+    branches:
+      - main
+    paths:
+      - '.github/workflows/**'
+      - '.github/actions/**'
+  pull_request:
+    branches:
+      - main
+    paths:
+      - '.github/workflows/**'
+      - '.github/actions/**'
+  workflow_dispatch: {}
+
+permissions:
+  contents: read
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
+  cancel-in-progress: true
+
+jobs:
+
+  actionlint:
+    name: Lint Workflows
+    runs-on: ubuntu-latest
+    timeout-minutes: 5
+    steps:
+      - name: Checkout Code
+        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd  # v6.0.2
+        with:
+          persist-credentials: false
+
+      - name: Run actionlint
+        uses: reviewdog/action-actionlint@a5524e1de9f1c345efc2e244a8b348e17f1b8e58  # v1.65.0
+        with:
+          fail_on_error: true
@@ -18,7 +18,6 @@ on:
   push:
     branches:
       - main
-      - "pull-request/*"
     paths-ignore:
       - '**.md'
       - 'docs/**'
@@ -37,7 +36,7 @@ permissions:
   contents: read
 
 concurrency:
-  group: ${{ github.workflow }}-${{ github.ref }}
+  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
   cancel-in-progress: true
 
 jobs:
 
@@ -22,7 +22,6 @@ on:
 
 permissions:
   contents: read
-  pull-requests: write
 
 concurrency:
   group: ${{ github.workflow }}
@@ -32,6 +31,9 @@ jobs:
   conflicts:
     name: Check Open PRs for Conflicts
     runs-on: ubuntu-latest
+    permissions:
+      contents: read
+      pull-requests: write
     timeout-minutes: 10
     steps:
       - name: Check Mergeable State
 
@@ -17,17 +17,18 @@ name: Dependabot Auto-Merge
 on:
   pull_request_target:
     types: [opened, synchronize]
-  workflow_dispatch: {}
 
 permissions:
-  contents: write
-  pull-requests: write
+  contents: read
 
 jobs:
 
   auto-merge:
     name: Auto-Merge Patch Updates
     runs-on: ubuntu-latest
+    permissions:
+      contents: write
+      pull-requests: write
     timeout-minutes: 5
     if: github.actor == 'dependabot[bot]'
     steps:
 
@@ -80,32 +80,12 @@ jobs:
 
       # --- Snapshot and validation ---
 
-      - name: Run aicr snapshot
-        run: |
-          ./aicr snapshot --deploy-agent \
-            --kubeconfig="${HOME}/.kube/config" \
-            --namespace=default \
-            --image=ko.local:smoke-test \
-            --require-gpu \
-            --output=snapshot.yaml
-          echo "--- Snapshot output ---"
-          cat snapshot.yaml
-
-      - name: Validate snapshot detected GPU
-        run: |
-          GPU_MODEL=$(yq eval '.measurements[] | select(.type == "GPU") | .subtypes[0].data["gpu.model"]' snapshot.yaml)
-          GPU_COUNT=$(yq eval '.measurements[] | select(.type == "GPU") | .subtypes[0].data["gpu-count"]' snapshot.yaml)
-          echo "GPU model: ${GPU_MODEL}"
-          echo "GPU count: ${GPU_COUNT}"
-          if [[ "${GPU_MODEL}" != *"H100"* ]]; then
-            echo "::error::Expected H100 GPU in snapshot, got: ${GPU_MODEL}"
-            exit 1
-          fi
-          if [[ "${GPU_COUNT}" -lt 1 ]]; then
-            echo "::error::Expected gpu-count >= 1, got: ${GPU_COUNT}"
-            exit 1
-          fi
-          echo "Snapshot correctly detected ${GPU_COUNT}x ${GPU_MODEL}"
+      - name: Snapshot and validate GPU
+        uses: ./.github/actions/gpu-snapshot-validate
+        with:
+          gpu_model: H100
+          min_gpu_count: '1'
+          cluster_name: ${{ env.KIND_CLUSTER_NAME }}
 
       # --- Install Karpenter before validation so cluster-autoscaling check passes ---
 
@@ -252,7 +232,7 @@ jobs:
 
       - name: Upload conformance evidence
         if: always()
-        uses: actions/upload-artifact@v4
+        uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f  # v6.0.0
         with:
           name: conformance-evidence
           path: |
 
@@ -77,32 +77,12 @@ jobs:
 
       # --- Snapshot and validation ---
 
-      - name: Run aicr snapshot
-        run: |
-          ./aicr snapshot --deploy-agent \
-            --kubeconfig="${HOME}/.kube/config" \
-            --namespace=default \
-            --image=ko.local:smoke-test \
-            --require-gpu \
-            --output=snapshot.yaml
-          echo "--- Snapshot output ---"
-          cat snapshot.yaml
-
-      - name: Validate snapshot detected GPU
-        run: |
-          GPU_MODEL=$(yq eval '.measurements[] | select(.type == "GPU") | .subtypes[0].data["gpu.model"]' snapshot.yaml)
-          GPU_COUNT=$(yq eval '.measurements[] | select(.type == "GPU") | .subtypes[0].data["gpu-count"]' snapshot.yaml)
-          echo "GPU model: ${GPU_MODEL}"
-          echo "GPU count: ${GPU_COUNT}"
-          if [[ "${GPU_MODEL}" != *"H100"* ]]; then
-            echo "::error::Expected H100 GPU in snapshot, got: ${GPU_MODEL}"
-            exit 1
-          fi
-          if [[ "${GPU_COUNT}" -lt 2 ]]; then
-            echo "::error::Expected gpu-count >= 2 for training, got: ${GPU_COUNT}"
-            exit 1
-          fi
-          echo "Snapshot correctly detected ${GPU_COUNT}x ${GPU_MODEL}"
+      - name: Snapshot and validate GPU
+        uses: ./.github/actions/gpu-snapshot-validate
+        with:
+          gpu_model: H100
+          min_gpu_count: '2'
+          cluster_name: ${{ env.KIND_CLUSTER_NAME }}
 
       # --- Install Karpenter before validation so cluster-autoscaling check passes ---
 
@@ -164,7 +144,7 @@ jobs:
 
       - name: Upload conformance evidence
         if: always()
-        uses: actions/upload-artifact@v4
+        uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f  # v6.0.0
         with:
           name: conformance-evidence
           path: |
 
@@ -93,32 +93,12 @@ jobs:
 
       # --- Snapshot and validation ---
 
-      - name: Run aicr snapshot
-        run: |
-          ./aicr snapshot --deploy-agent \
-            --kubeconfig="${HOME}/.kube/config" \
-            --namespace=default \
-            --image=ko.local:smoke-test \
-            --require-gpu \
-            --output=snapshot.yaml
-          echo "--- Snapshot output ---"
-          cat snapshot.yaml
-
-      - name: Validate snapshot detected GPU
-        run: |
-          GPU_MODEL=$(yq eval '.measurements[] | select(.type == "GPU") | .subtypes[0].data["gpu.model"]' snapshot.yaml)
-          GPU_COUNT=$(yq eval '.measurements[] | select(.type == "GPU") | .subtypes[0].data["gpu-count"]' snapshot.yaml)
-          echo "GPU model: ${GPU_MODEL}"
-          echo "GPU count: ${GPU_COUNT}"
-          if [[ "${GPU_MODEL}" != *"T4"* ]]; then
-            echo "::error::Expected T4 GPU in snapshot, got: ${GPU_MODEL}"
-            exit 1
-          fi
-          if [[ "${GPU_COUNT}" -lt 1 ]]; then
-            echo "::error::Expected gpu-count >= 1, got: ${GPU_COUNT}"
-            exit 1
-          fi
-          echo "Snapshot correctly detected ${GPU_COUNT}x ${GPU_MODEL}"
+      - name: Snapshot and validate GPU
+        uses: ./.github/actions/gpu-snapshot-validate
+        with:
+          gpu_model: T4
+          min_gpu_count: '1'
+          cluster_name: ${{ env.KIND_CLUSTER_NAME }}
 
       - name: Debug diagnostics
         if: failure()
 
@@ -20,7 +20,7 @@ on:
   workflow_dispatch: {}
 
 permissions:
-  pull-requests: write
+  contents: read
 
 concurrency:
   group: ${{ github.workflow }}
@@ -30,6 +30,8 @@ jobs:
   remind:
     name: Nudge Inactive PRs
     runs-on: ubuntu-latest
+    permissions:
+      pull-requests: write
     timeout-minutes: 10
     steps:
       - name: Check for Inactive PRs