kubernetes-sigs · jgehrcke · Mar 9, 2026 · Mar 7, 2026 · Mar 7, 2026 · Mar 7, 2026
diff --git a/tests/bats/Makefile b/tests/bats/Makefile
@@ -22,7 +22,7 @@ TEST_CHART_VERSION ?= "$(VERSION_GHCR_CHART)"
 
 # The baseline Helm chart to test upgrades from and downgrades to.
 TEST_CHART_LASTSTABLE_REPO ?= "oci://ghcr.io/nvidia/k8s-dra-driver-gpu"
-TEST_CHART_LASTSTABLE_VERSION ?= "25.8.1-fc69d985-chart"
+TEST_CHART_LASTSTABLE_VERSION ?= "25.12.0-0882da87-chart"
 
 # If not "false": the to-be-tested Helm chart is installed from the local
 # filesystem (from `deployments/helm/nvidia-dra-driver-gpu`). Make sure
@@ -136,9 +136,18 @@ endef
 
 default: tests
 
+# Temporary image tag, see
+# https://github.com/NVIDIA/k8s-dra-driver-gpu/issues/908. The `$$$$` below
+# evaluates to $$ in the recipe-executing shell, which resolves to its process
+# ID. With that, concurrent recipe executions use different image tags,
+# preventing rare export errors. Note that `docker tag` is atomic (never fails
+# with "already exists").
 .PHONY: runner-image
 runner-image:
-	docker buildx build . -t $(BATS_IMAGE) -f tests/bats/Dockerfile
+	TMP_TAG="$(BATS_IMAGE)-tmp-$$$$" && \
+	docker buildx build . -t $$TMP_TAG -f tests/bats/Dockerfile && \
+	docker tag $$TMP_TAG $(BATS_IMAGE) && \
+	docker rmi $$TMP_TAG
 
 # Warning: destructive against currently configured k8s cluster.
 #

diff --git a/tests/bats/helpers.sh b/tests/bats/helpers.sh
@@ -217,21 +217,33 @@ get_device_attrs_from_any_gpu_slice() {
   local device_type="$1"
   local node_name="$2"
   local spath="${BATS_TEST_TMPDIR}/gpu_resource_slice_content"
-  local slicename
+  local slicenames
 
-  # Get contents of first listed GPU plugin resource slice; dump into file. If a
-  # node_name was provided, filter for the GPU plugin resource slice on that
-  # node.
+  # For debugging, show current set of slices.
+  kubectl get resourceslice >&2
+
+  # Get names of resource slices: either for all gpu plugin resource slices, or
+  # all for all gpu slices announced by a specific node.
   if [ -n "$node_name" ]; then
-    slicename="$(kubectl get resourceslices.resource.k8s.io | grep 'gpu.nvidia.com' | grep "$node_name" | head -n1 | awk '{print $1}')"
+    slicenames="$(kubectl get resourceslices.resource.k8s.io | grep 'gpu.nvidia.com' | grep "$node_name" | awk '{print $1}')"
   else
-    slicename="$(kubectl get resourceslices.resource.k8s.io | grep 'gpu.nvidia.com' | head -n1 | awk '{print $1}')"
+    slicenames="$(kubectl get resourceslices.resource.k8s.io | grep 'gpu.nvidia.com' | awk '{print $1}')"
   fi
-  log "resource slice name: $slicename"
 
-  # For debugging, show current set of slices.
-  kubectl get resourceslice >&2
-  kubectl get resourceslices.resource.k8s.io -o yaml "${slicename}" > "${spath}"
+  # Identify a suitable (device-announcing) resource slice. This is relevant
+  # because on Kubernetes 1.35+ there is one special resource slice per node
+  # that only contains counters, and no devices. We must make sure that we
+  # do not use that for device attribute extraction.
+  local slicename
+  for slicename in $slicenames; do
+    kubectl get resourceslices.resource.k8s.io -o yaml "${slicename}" > "${spath}"
+    if grep -q "devices:" "${spath}"; then
+      log "identified suitable resource slice: $slicename"
+      break
+    fi
+    log "do not use slice $slicename (does not define devices)"
+  done
+
   log "wrote resource slice content to: ${spath}"
 
   # Log contents, for https://github.com/NVIDIA/k8s-dra-driver-gpu/issues/902