From b53e354e95ca803f19fab2d351610d89b6b10769 Mon Sep 17 00:00:00 2001 From: Swati Gupta Date: Wed, 23 Apr 2025 18:21:45 +0000 Subject: [PATCH 1/4] Add gpu mutating webhook --- .../admission_controller.go | 144 ++++++++++++++++ cmd/gpu-mutating-webhook/main.go | 162 ++++++++++++++++++ 2 files changed, 306 insertions(+) create mode 100644 cmd/gpu-mutating-webhook/admission_controller.go create mode 100644 cmd/gpu-mutating-webhook/main.go diff --git a/cmd/gpu-mutating-webhook/admission_controller.go b/cmd/gpu-mutating-webhook/admission_controller.go new file mode 100644 index 000000000..f1ac407d5 --- /dev/null +++ b/cmd/gpu-mutating-webhook/admission_controller.go @@ -0,0 +1,144 @@ +/** +# Copyright 2024 NVIDIA CORPORATION +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +**/ + +package main + +import ( + "encoding/json" + "errors" + "fmt" + "io/ioutil" + "log" + "net/http" + + admissionv1 "k8s.io/api/admission/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/runtime/serializer" +) + +const ( + jsonContentType = `application/json` +) + +var ( + universalDeserializer = serializer.NewCodecFactory(runtime.NewScheme()).UniversalDeserializer() +) + +type patchOperation struct { + Op string `json:"op"` + Path string `json:"path"` + Value interface{} `json:"value,omitempty"` +} + +type admitFunc func(*admissionv1.AdmissionRequest) ([]patchOperation, error) + +func isKubeNamespace(ns string) bool { + return (ns == metav1.NamespacePublic || ns == metav1.NamespaceSystem) +} + +func doServeAdmitFunc(w http.ResponseWriter, r *http.Request, admit admitFunc) ([]byte, error) { + // Request validation. Only handle POST requests with a body and json content type. + if r.Method != http.MethodPost { + w.WriteHeader(http.StatusMethodNotAllowed) + return nil, fmt.Errorf("invalid method %s, only POST is allowed", r.Method) + } + + body, err := ioutil.ReadAll(r.Body) + if err != nil { + w.WriteHeader(http.StatusBadRequest) + return nil, fmt.Errorf("could not read request body: %v", err) + } + + if ct := r.Header.Get("Content-Type"); ct != jsonContentType { + w.WriteHeader(http.StatusBadRequest) + return nil, fmt.Errorf("unsupported content type %s, only %s is supported", ct, jsonContentType) + } + + // Parse the AdmissionReview request. + var admissionReviewReq admissionv1.AdmissionReview + if _, _, err := universalDeserializer.Decode(body, nil, &admissionReviewReq); err != nil { + w.WriteHeader(http.StatusBadRequest) + return nil, fmt.Errorf("could not deserialize AdmissionReview: %v", err) + } else if admissionReviewReq.Request == nil { + w.WriteHeader(http.StatusBadRequest) + return nil, errors.New("malformed admission review: Request is nil") + } + + // Build the response + admissionReviewResp := admissionv1.AdmissionReview{ + TypeMeta: admissionReviewReq.TypeMeta, + Response: &admissionv1.AdmissionResponse{ + UID: admissionReviewReq.Request.UID, + }, + } + + // Skip k8s namespaces + var patchOps []patchOperation + if !isKubeNamespace(admissionReviewReq.Request.Namespace) { + patchOps, err = admit(admissionReviewReq.Request) + } + + if err != nil { + admissionReviewResp.Response.Allowed = false + admissionReviewResp.Response.Result = &metav1.Status{ + Message: err.Error(), + } + } else { + patchBytes, err := json.Marshal(patchOps) + if err != nil { + w.WriteHeader(http.StatusInternalServerError) + return nil, fmt.Errorf("could not marshal JSON patch: %v", err) + } + admissionReviewResp.Response.Allowed = true + admissionReviewResp.Response.Patch = patchBytes + + pt := admissionv1.PatchTypeJSONPatch + admissionReviewResp.Response.PatchType = &pt + } + + respBytes, err := json.Marshal(admissionReviewResp) + if err != nil { + return nil, fmt.Errorf("could not marshal AdmissionReview response: %v", err) + } + return respBytes, nil +} + +// serveAdmitFunc is a wrapper that handles HTTP, calls doServeAdmitFunc, and writes the result. +func serveAdmitFunc(w http.ResponseWriter, r *http.Request, admit admitFunc) { + log.Print("Handling webhook request ...") + + respBytes, err := doServeAdmitFunc(w, r, admit) + if err != nil { + log.Printf("Error handling webhook request: %v", err) + w.WriteHeader(http.StatusInternalServerError) + _, _ = w.Write([]byte(err.Error())) + return + } + + log.Print("Webhook request handled successfully") + _, writeErr := w.Write(respBytes) + if writeErr != nil { + log.Printf("Could not write response: %v", writeErr) + } +} + +// admitFuncHandler converts an admitFunc into an http.Handler +func admitFuncHandler(admit admitFunc) http.Handler { + return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + serveAdmitFunc(w, r, admit) + }) +} diff --git a/cmd/gpu-mutating-webhook/main.go b/cmd/gpu-mutating-webhook/main.go new file mode 100644 index 000000000..a7d7c3f92 --- /dev/null +++ b/cmd/gpu-mutating-webhook/main.go @@ -0,0 +1,162 @@ +/** +# Copyright 2025 NVIDIA CORPORATION +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +**/ + +package main + +import ( + "fmt" + "log" + "net/http" + "path/filepath" + "strings" + + admissionv1 "k8s.io/api/admission/v1" + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" +) + +const ( + tlsDir = `/etc/webhook/tls` + tlsCertFile = `tls.crt` + tlsKeyFile = `tls.key` +) + +var ( + podResource = metav1.GroupVersionResource{Group: "", Version: "v1", Resource: "pods"} + gpuClaimName = "nvidia-gpu-resourceclaim" + gpuTemplateName = "nvidia-gpu-resourceclaim-template" +) + +func applyGPUMutation(req *admissionv1.AdmissionRequest) ([]patchOperation, error) { + // Only mutate if the incoming resource is a Pod CREATE request. + if req.Resource != podResource { + log.Printf("applyGPUMutation invoked for a non-Pod resource: %v", req.Resource) + return nil, nil + } + if req.Operation != admissionv1.Create { + log.Printf("applyGPUMutation invoked for operation %s, ignoring", req.Operation) + return nil, nil + } + + raw := req.Object.Raw + var pod corev1.Pod + if _, _, err := universalDeserializer.Decode(raw, nil, &pod); err != nil { + return nil, fmt.Errorf("could not deserialize pod object: %v", err) + } + + var patches []patchOperation + + // Check if the Pod already has a resource claim + hasGPUClaim := false + for _, rc := range pod.Spec.ResourceClaims { + if rc.Name == gpuClaimName { + hasGPUClaim = true + break + } + } + + // Escape "nvidia.com/gpu" for JSON Patch + escapedGPUKey := strings.ReplaceAll(strings.ReplaceAll("nvidia.com/gpu", "~", "~0"), "/", "~1") + + for i, c := range pod.Spec.Containers { + foundGPU := false + + if _, ok := c.Resources.Requests["nvidia.com/gpu"]; ok { + foundGPU = true + patches = append(patches, patchOperation{ + Op: "remove", + Path: fmt.Sprintf("/spec/containers/%d/resources/requests/%s", i, escapedGPUKey), + }) + } + + if _, ok := c.Resources.Limits["nvidia.com/gpu"]; ok { + foundGPU = true + patches = append(patches, patchOperation{ + Op: "remove", + Path: fmt.Sprintf("/spec/containers/%d/resources/limits/%s", i, escapedGPUKey), + }) + } + + if foundGPU { + gpuClaimPresent := false + for _, claimRef := range c.Resources.Claims { + if claimRef.Name == gpuClaimName { + gpuClaimPresent = true + break + } + } + if !gpuClaimPresent { + if c.Resources.Claims == nil { + patches = append(patches, patchOperation{ + Op: "add", + Path: fmt.Sprintf("/spec/containers/%d/resources/claims", i), + Value: []map[string]string{ + {"name": gpuClaimName}, + }, + }) + } else { + patches = append(patches, patchOperation{ + Op: "add", + Path: fmt.Sprintf("/spec/containers/%d/resources/claims/-", i), + Value: map[string]string{"name": gpuClaimName}, + }) + } + } + } + } + + if len(patches) > 0 && !hasGPUClaim { + newClaim := map[string]string{ + "name": gpuClaimName, + "resourceClaimTemplateName": gpuTemplateName, + } + + if pod.Spec.ResourceClaims == nil { + patches = append(patches, patchOperation{ + Op: "add", + Path: "/spec/resourceClaims", + Value: []map[string]string{ + newClaim, + }, + }) + } else { + patches = append(patches, patchOperation{ + Op: "add", + Path: "/spec/resourceClaims/-", + Value: newClaim, + }) + } + log.Printf("Added ResourceClaim %q referencing template %q to Pod %q", + gpuClaimName, gpuTemplateName, pod.Name) + } + + return patches, nil +} + +func main() { + certPath := filepath.Join(tlsDir, tlsCertFile) + keyPath := filepath.Join(tlsDir, tlsKeyFile) + + mux := http.NewServeMux() + mux.Handle("/mutate", admitFuncHandler(applyGPUMutation)) + + server := &http.Server{ + Addr: ":8443", + Handler: mux, + } + log.Printf("Starting webhook server on %s", server.Addr) + log.Fatal(server.ListenAndServeTLS(certPath, keyPath)) +} From 13cdba2a6c7947a890d59a71e9b7d9debffd28e9 Mon Sep 17 00:00:00 2001 From: Swati Gupta Date: Wed, 23 Apr 2025 18:22:05 +0000 Subject: [PATCH 2/4] Vendor update --- vendor/modules.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/vendor/modules.txt b/vendor/modules.txt index 8bf3eb5e0..026967ff4 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -362,6 +362,7 @@ gopkg.in/inf.v0 gopkg.in/yaml.v3 # k8s.io/api v0.32.0 ## explicit; go 1.23.0 +k8s.io/api/admission/v1 k8s.io/api/admissionregistration/v1 k8s.io/api/admissionregistration/v1alpha1 k8s.io/api/admissionregistration/v1beta1 From ec7c440de26ca65df34a5320540512f7d4318f10 Mon Sep 17 00:00:00 2001 From: Swati Gupta Date: Wed, 23 Apr 2025 19:02:00 +0000 Subject: [PATCH 3/4] Add helm deployment --- deployments/container/Dockerfile | 1 + .../nvidia-dra-driver-gpu/generate-certs.sh | 120 ++++++++++++++++++ .../templates/gpumutatingwebhook.yaml | 48 +++++++ templates/gpu-claim-template.tmpl.yaml | 11 ++ 4 files changed, 180 insertions(+) create mode 100755 deployments/helm/nvidia-dra-driver-gpu/generate-certs.sh create mode 100644 deployments/helm/nvidia-dra-driver-gpu/templates/gpumutatingwebhook.yaml create mode 100644 templates/gpu-claim-template.tmpl.yaml diff --git a/deployments/container/Dockerfile b/deployments/container/Dockerfile index 295d24619..597be64be 100644 --- a/deployments/container/Dockerfile +++ b/deployments/container/Dockerfile @@ -67,4 +67,5 @@ RUN mkdir /licenses && mv /NGC-DL-CONTAINER-LICENSE /licenses/NGC-DL-CONTAINER-L COPY --from=build /artifacts/compute-domain-controller /usr/bin/compute-domain-controller COPY --from=build /artifacts/compute-domain-kubelet-plugin /usr/bin/compute-domain-kubelet-plugin COPY --from=build /artifacts/gpu-kubelet-plugin /usr/bin/gpu-kubelet-plugin +COPY --from=build /artifacts/gpu-mutating-webhook /usr/bin/gpu-mutating-webhook COPY --from=build /build/templates /templates diff --git a/deployments/helm/nvidia-dra-driver-gpu/generate-certs.sh b/deployments/helm/nvidia-dra-driver-gpu/generate-certs.sh new file mode 100755 index 000000000..ce0602b8e --- /dev/null +++ b/deployments/helm/nvidia-dra-driver-gpu/generate-certs.sh @@ -0,0 +1,120 @@ +#!/bin/bash +set -e + +mkdir -p certs +cd certs + +SERVICE=gpu-mutating-webhook +NAMESPACE=nvidia-dra-driver-gpu +SECRET_NAME=webhook-tls + +# Generate the CA key and certificate +openssl genrsa -out ca.key 2048 +openssl req -new -x509 -days 365 -key ca.key -subj "/CN=Kubernetes CA" -out ca.crt + +# Generate the server key +openssl genrsa -out server.key 2048 + +# Generate a Certificate Signing Request +cat > csr.conf << EOF +[req] +req_extensions = v3_req +distinguished_name = req_distinguished_name + +[req_distinguished_name] +[ v3_req ] +basicConstraints = CA:FALSE +keyUsage = nonRepudiation, digitalSignature, keyEncipherment +extendedKeyUsage = serverAuth +subjectAltName = @alt_names + +[alt_names] +DNS.1 = ${SERVICE} +DNS.2 = ${SERVICE}.${NAMESPACE} +DNS.3 = ${SERVICE}.${NAMESPACE}.svc +EOF + +openssl req -new -key server.key -subj "/CN=${SERVICE}.${NAMESPACE}.svc" -out server.csr -config csr.conf + +# Sign the certificate +cat > cert.conf << EOF +[auth_ext] +authorityKeyIdentifier=keyid,issuer +basicConstraints=CA:FALSE +keyUsage = digitalSignature, nonRepudiation, keyEncipherment, dataEncipherment +extendedKeyUsage = serverAuth +subjectAltName = @alt_names + +[alt_names] +DNS.1 = ${SERVICE} +DNS.2 = ${SERVICE}.${NAMESPACE} +DNS.3 = ${SERVICE}.${NAMESPACE}.svc +EOF + +openssl x509 -req -in server.csr -CA ca.crt -CAkey ca.key -CAcreateserial -out server.crt -days 365 -extfile cert.conf -extensions auth_ext + +# Base64 encode the certificates +CA_BUNDLE=$(openssl base64 -A < ca.crt) +TLS_CERT=$(openssl base64 -A < server.crt) +TLS_KEY=$(openssl base64 -A < server.key) + +# Create the Secret YAML +cat > webhook-secret.yaml << EOF +apiVersion: v1 +kind: Secret +metadata: + name: ${SECRET_NAME} + namespace: ${NAMESPACE} +type: kubernetes.io/tls +data: + tls.crt: ${TLS_CERT} + tls.key: ${TLS_KEY} +EOF + +# Create the webhookconfiguration +cat > mutatingwebhook.yaml << EOF +apiVersion: admissionregistration.k8s.io/v1 +kind: MutatingWebhookConfiguration +metadata: + name: gpu-mutating-webhook +webhooks: +- name: gpu.mutating.k8s.io + admissionReviewVersions: ["v1"] + sideEffects: None + failurePolicy: Ignore + clientConfig: + service: + name: gpu-mutating-webhook + namespace: nvidia-dra-driver-gpu + path: "/mutate" + caBundle: ${CA_BUNDLE} + rules: + - apiGroups: [""] + apiVersions: ["v1"] + operations: ["CREATE", "UPDATE"] + resources: ["pods"] + namespaceSelector: + matchExpressions: + - key: kubernetes.io/metadata.name + operator: NotIn + values: ["kube-system", "nvidia-dra-driver-gpu"] +EOF + +# Create the resourceclaimtemplate +cat > gpuresourceclaim.yaml << EOF +apiVersion: resource.k8s.io/v1beta1 +kind: ResourceClaimTemplate +metadata: + name: nvidia-gpu-resourceclaim-template +spec: + spec: + devices: + requests: + - name: gpu + deviceClassName: gpu.nvidia.com +EOF + +echo "Generated TLS certificates and secret successfully" +echo "Apply the secret with: kubectl apply -f webhook-secret.yaml" +echo "Apply the webhook configuration with: kubectl apply -f mutatingwebhook.yaml" +echo "Apply the resourceclaimtemplate with: kubectl apply -f gpuresourceclaim.yaml" diff --git a/deployments/helm/nvidia-dra-driver-gpu/templates/gpumutatingwebhook.yaml b/deployments/helm/nvidia-dra-driver-gpu/templates/gpumutatingwebhook.yaml new file mode 100644 index 000000000..7ff1d8f1d --- /dev/null +++ b/deployments/helm/nvidia-dra-driver-gpu/templates/gpumutatingwebhook.yaml @@ -0,0 +1,48 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: gpu-mutating-webhook + namespace: nvidia-dra-driver-gpu + labels: + app: gpu-mutating-webhook +spec: + replicas: 1 + selector: + matchLabels: + app: gpu-mutating-webhook + template: + metadata: + labels: + app: gpu-mutating-webhook + spec: + nodeSelector: + kubernetes.io/hostname: sc-starwars-mab9-b00 + serviceAccountName: nvidia-dra-driver-gpu-service-account + containers: + - name: webhook + image: localhost:5001/mutating-webhook:7.0 + command: ["/usr/bin/gpu-mutating-webhook"] + imagePullPolicy: IfNotPresent + ports: + - containerPort: 8443 + name: webhook-api + volumeMounts: + - name: webhook-tls + mountPath: /etc/webhook/tls + readOnly: true + volumes: + - name: webhook-tls + secret: + secretName: webhook-tls +--- +apiVersion: v1 +kind: Service +metadata: + name: gpu-mutating-webhook + namespace: nvidia-dra-driver-gpu +spec: + selector: + app: gpu-mutating-webhook + ports: + - port: 443 + targetPort: webhook-api diff --git a/templates/gpu-claim-template.tmpl.yaml b/templates/gpu-claim-template.tmpl.yaml new file mode 100644 index 000000000..7a9c1fab2 --- /dev/null +++ b/templates/gpu-claim-template.tmpl.yaml @@ -0,0 +1,11 @@ +--- +apiVersion: resource.k8s.io/v1beta1 +kind: ResourceClaimTemplate +metadata: + name: nvidia-gpu-resourceclaim-template +spec: + spec: + devices: + requests: + - name: gpu + deviceClassName: gpu.nvidia.com From 463045609865b57ab4c618121e920e68bcf592f4 Mon Sep 17 00:00:00 2001 From: Swati Gupta Date: Thu, 1 May 2025 23:18:07 +0000 Subject: [PATCH 4/4] Refractor code into helper functions and add klog Signed-off-by: Swati Gupta --- .../admission_controller.go | 1 + cmd/gpu-mutating-webhook/main.go | 213 ++++++++++-------- 2 files changed, 124 insertions(+), 90 deletions(-) diff --git a/cmd/gpu-mutating-webhook/admission_controller.go b/cmd/gpu-mutating-webhook/admission_controller.go index f1ac407d5..05cf98835 100644 --- a/cmd/gpu-mutating-webhook/admission_controller.go +++ b/cmd/gpu-mutating-webhook/admission_controller.go @@ -46,6 +46,7 @@ type patchOperation struct { type admitFunc func(*admissionv1.AdmissionRequest) ([]patchOperation, error) +// Swati: skip nvidia-dra-driver-gpu ns as well func isKubeNamespace(ns string) bool { return (ns == metav1.NamespacePublic || ns == metav1.NamespaceSystem) } diff --git a/cmd/gpu-mutating-webhook/main.go b/cmd/gpu-mutating-webhook/main.go index a7d7c3f92..a06f14eab 100644 --- a/cmd/gpu-mutating-webhook/main.go +++ b/cmd/gpu-mutating-webhook/main.go @@ -26,126 +26,155 @@ import ( admissionv1 "k8s.io/api/admission/v1" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/klog/v2" ) const ( - tlsDir = `/etc/webhook/tls` - tlsCertFile = `tls.crt` - tlsKeyFile = `tls.key` + tlsDir = `/etc/webhook/tls` + tlsCertFile = `tls.crt` + tlsKeyFile = `tls.key` + gpuResourceName = "nvidia.com/gpu" + gpuClaimName = "nvidia-gpu-resourceclaim" + gpuTemplateName = "nvidia-gpu-resourceclaim-template" ) var ( - podResource = metav1.GroupVersionResource{Group: "", Version: "v1", Resource: "pods"} - gpuClaimName = "nvidia-gpu-resourceclaim" - gpuTemplateName = "nvidia-gpu-resourceclaim-template" + podResource = metav1.GroupVersionResource{Version: "v1", Resource: "pods"} ) func applyGPUMutation(req *admissionv1.AdmissionRequest) ([]patchOperation, error) { - // Only mutate if the incoming resource is a Pod CREATE request. - if req.Resource != podResource { - log.Printf("applyGPUMutation invoked for a non-Pod resource: %v", req.Resource) - return nil, nil - } - if req.Operation != admissionv1.Create { - log.Printf("applyGPUMutation invoked for operation %s, ignoring", req.Operation) + // Only mutate Pod CREATE + // Swati: may be add UPDATE + if req.Resource != podResource || req.Operation != admissionv1.Create { + klog.Infof("skip mutation for %v/%v", req.Resource, req.Operation) return nil, nil } - raw := req.Object.Raw var pod corev1.Pod - if _, _, err := universalDeserializer.Decode(raw, nil, &pod); err != nil { - return nil, fmt.Errorf("could not deserialize pod object: %v", err) + if _, _, err := universalDeserializer.Decode(req.Object.Raw, nil, &pod); err != nil { + klog.Errorf("failed to decode Pod: %v", err) + return nil, fmt.Errorf("could not deserialize pod: %w", err) } + key := escapeJSONPointer(gpuResourceName) var patches []patchOperation - - // Check if the Pod already has a resource claim - hasGPUClaim := false - for _, rc := range pod.Spec.ResourceClaims { - if rc.Name == gpuClaimName { - hasGPUClaim = true - break + var ctrGPUResourceClaims []string + + // Iterate on all containers and check for "nvidia.com/gpu" limits + // using the logic described here for prefering limits over requests + // GPUs are only supposed to be specified in the limits section, meaning + // - can specify GPU limits without specifying requests. limit will be used as request value by default + // - can specify GPU in both limits and requests but they must be equal + // - cannot specify GPU requests without specifying limits + // refer: https://kubernetes.io/docs/tasks/manage-gpus/scheduling-gpus/#using-device-plugins + for ci, ctr := range pod.Spec.Containers { + ctrName := ctr.Name + limitCount, limitOk := ctr.Resources.Limits[gpuResourceName] + + // skip if no GPUs in limits + if !limitOk || limitCount.Value() < 1 { + continue } - } - - // Escape "nvidia.com/gpu" for JSON Patch - escapedGPUKey := strings.ReplaceAll(strings.ReplaceAll("nvidia.com/gpu", "~", "~0"), "/", "~1") - - for i, c := range pod.Spec.Containers { - foundGPU := false - - if _, ok := c.Resources.Requests["nvidia.com/gpu"]; ok { - foundGPU = true - patches = append(patches, patchOperation{ - Op: "remove", - Path: fmt.Sprintf("/spec/containers/%d/resources/requests/%s", i, escapedGPUKey), - }) + gpuCount := limitCount.Value() + + // check any GPUs in requests + // it must be equal to limits + if reqCount, reqOK := ctr.Resources.Requests[gpuResourceName]; reqOK { + if reqCount.Value() != gpuCount { + klog.Warningf("container[%q]: gpu request (%d) != limit (%d), skipping mutation", ctrName, reqCount.Value(), gpuCount) + continue + } + reqPatch := removeResourceRequest(ci, "requests", key) + patches = append(patches, reqPatch) + klog.Infof("removed container[%q].Resources.Requests: %v", ctrName, reqPatch) } - - if _, ok := c.Resources.Limits["nvidia.com/gpu"]; ok { - foundGPU = true - patches = append(patches, patchOperation{ - Op: "remove", - Path: fmt.Sprintf("/spec/containers/%d/resources/limits/%s", i, escapedGPUKey), - }) + limitPatch := removeResourceRequest(ci, "limits", key) + patches = append(patches, limitPatch) + klog.Infof("removed container[%q].Resources.Limits: %v", ctrName, limitPatch) + + // ensure container-claims slice exists + // this is JSON way to first creating the field if it does not exist and append later with "-" + if len(ctr.Resources.Claims) == 0 { + createPatch := createClaimPatch(fmt.Sprintf("/spec/containers/%d/resources/claims", ci)) + patches = append(patches, createPatch) + klog.Infof("created container[%q] empty claims array: %v", ctrName, createPatch) } - if foundGPU { - gpuClaimPresent := false - for _, claimRef := range c.Resources.Claims { - if claimRef.Name == gpuClaimName { - gpuClaimPresent = true - break - } - } - if !gpuClaimPresent { - if c.Resources.Claims == nil { - patches = append(patches, patchOperation{ - Op: "add", - Path: fmt.Sprintf("/spec/containers/%d/resources/claims", i), - Value: []map[string]string{ - {"name": gpuClaimName}, - }, - }) - } else { - patches = append(patches, patchOperation{ - Op: "add", - Path: fmt.Sprintf("/spec/containers/%d/resources/claims/-", i), - Value: map[string]string{"name": gpuClaimName}, - }) - } - } + // append one claim per GPU + for i := int64(0); i < gpuCount; i++ { + claimName := fmt.Sprintf("%s-%d", gpuClaimName, i) + ctrGPUResourceClaims = append(ctrGPUResourceClaims, claimName) + appendPatch := appendClaimPatch( + fmt.Sprintf("/spec/containers/%d/resources/claims", ci), + map[string]string{"name": claimName}, + ) + patches = append(patches, appendPatch) + klog.Infof("added to container[%q].Resources.Claims: %v", ctrName, appendPatch) } } - if len(patches) > 0 && !hasGPUClaim { - newClaim := map[string]string{ - "name": gpuClaimName, - "resourceClaimTemplateName": gpuTemplateName, + // Add claims pod-level + podName := pod.Name + if len(ctrGPUResourceClaims) > 0 { + // ensure pod-claims slice exists + if len(pod.Spec.ResourceClaims) == 0 { + createPatch := createClaimPatch("/spec/resourceClaims") + patches = append(patches, createPatch) + klog.Infof("created pod[%q] empty claims array: %v", podName, createPatch) } - if pod.Spec.ResourceClaims == nil { - patches = append(patches, patchOperation{ - Op: "add", - Path: "/spec/resourceClaims", - Value: []map[string]string{ - newClaim, + // append each container GPU claim at pod-level + for _, name := range ctrGPUResourceClaims { + appendPatch := appendClaimPatch( + "/spec/resourceClaims", + map[string]string{ + "name": name, + "resourceClaimTemplateName": gpuTemplateName, }, - }) - } else { - patches = append(patches, patchOperation{ - Op: "add", - Path: "/spec/resourceClaims/-", - Value: newClaim, - }) + ) + patches = append(patches, appendPatch) + klog.Infof("added ResourceClaim %q (template=%q) to %q: %v", name, gpuTemplateName, podName, appendPatch) } - log.Printf("Added ResourceClaim %q referencing template %q to Pod %q", - gpuClaimName, gpuTemplateName, pod.Name) } return patches, nil } +// escapeJSONPointer replace "/" with "~1" +// refer: https://github.com/json-patch/json-patch-tests/issues/42 +// needed for "nvidia.com/gpu". otherwise JSON will treat "/" as a path delimiter and treat "gpu" as new field +func escapeJSONPointer(s string) string { + return strings.ReplaceAll(s, "/", "~1") +} + +// removeResourceRequest removes either .resources.requests or .resources.limits +func removeResourceRequest(ci int, field, key string) patchOperation { + return patchOperation{ + Op: "remove", + Path: fmt.Sprintf("/spec/containers/%d/resources/%s/%s", ci, field, key), + } +} + +// createClaimPatch creates an empty slice at the given path +func createClaimPatch(path string) patchOperation { + return patchOperation{ + Op: "add", + Path: path, + Value: []map[string]string{}, + } +} + +// appendClaimPatch appends to the slice at path +// "-" is JSON way to inserting at the end of the array when no index is specified. +// refer: https://datatracker.ietf.org/doc/html/rfc6902 +func appendClaimPatch(path string, entry map[string]string) patchOperation { + return patchOperation{ + Op: "add", + Path: path + "/-", + Value: entry, + } +} + func main() { certPath := filepath.Join(tlsDir, tlsCertFile) keyPath := filepath.Join(tlsDir, tlsKeyFile) @@ -157,6 +186,10 @@ func main() { Addr: ":8443", Handler: mux, } - log.Printf("Starting webhook server on %s", server.Addr) - log.Fatal(server.ListenAndServeTLS(certPath, keyPath)) + + if err := server.ListenAndServeTLS(certPath, keyPath); err != nil { + // Swati: need better error handling here + log.Fatalf("Failed to start server: %v", err) + } + klog.Infof("Started gpu-mutating-webhook server at %s", server.Addr) }