Skip to content

Commit d99aa9f

Browse files
committed
Merge remote-tracking branch 'upstream/master' into forked-master
2 parents 15a0b2f + 95362dc commit d99aa9f

File tree

27 files changed

+1381
-194
lines changed

27 files changed

+1381
-194
lines changed

Dockerfile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ RUN make _build-manager BIN_PATH=build/_output/cmd && \
55
make _build-sriov-network-operator-config-cleanup BIN_PATH=build/_output/cmd
66

77
FROM quay.io/centos/centos:stream9
8+
USER 65532:65532
89
COPY --from=builder /go/src/github.com/k8snetworkplumbingwg/sriov-network-operator/build/_output/cmd/manager /usr/bin/sriov-network-operator
910
COPY --from=builder /go/src/github.com/k8snetworkplumbingwg/sriov-network-operator/build/_output/cmd/sriov-network-operator-config-cleanup /usr/bin/sriov-network-operator-config-cleanup
1011
COPY bindata /bindata

README.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -419,6 +419,10 @@ Feature gates are used to enable or disable specific features in the operator.
419419
- **Description:** Enables the firmware reset via `mstfwreset` before a system reboot. This feature is specific to Mellanox network devices and is used to ensure that the firmware is properly reset during system maintenance.
420420
- **Default:** Disabled
421421

422+
6. **Block Device Plugin Until Configured** (`blockDevicePluginUntilConfigured`)
423+
- **Description:** Prevents the SR-IOV device plugin from starting until the sriov-config-daemon has applied the SR-IOV configuration for the node. When enabled, the device plugin daemonset runs an init container that sets a wait-for-config annotation on its pod and waits until the sriov-config-daemon removes this annotation after applying the configuration. This addresses the race condition where the device plugin starts and reports available resources before the configuration is actually applied, which can lead to pods being scheduled prematurely.
424+
- **Default:** Enabled
425+
422426
### Enabling Feature Gates
423427

424428
To enable a feature gate, add it to your configuration file or command line with the desired state. For example, to enable the `resourceInjectorMatchCondition` feature gate, you would specify:

bindata/manifests/daemon/daemonset.yaml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -201,6 +201,8 @@ spec:
201201
volumeMounts:
202202
- name: host
203203
mountPath: /host
204+
- name: tmp
205+
mountPath: /tmp
204206
lifecycle:
205207
preStop:
206208
exec:
@@ -216,3 +218,5 @@ spec:
216218
hostPath:
217219
path: /etc/os-release
218220
type: File
221+
- name: tmp
222+
emptyDir: {}

bindata/manifests/operator-webhook/server.yaml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,12 @@ spec:
9898
securityContext:
9999
readOnlyRootFilesystem: true
100100
allowPrivilegeEscalation: false
101+
capabilities:
102+
drop:
103+
- ALL
104+
runAsNonRoot: true
105+
seccompProfile:
106+
type: RuntimeDefault
101107
resources:
102108
requests:
103109
cpu: 10m

bindata/manifests/plugins/002-rbac.yaml

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,3 +49,27 @@ subjects:
4949
- kind: ServiceAccount
5050
name: sriov-device-plugin
5151
namespace: {{.Namespace}}
52+
---
53+
apiVersion: rbac.authorization.k8s.io/v1
54+
kind: Role
55+
metadata:
56+
name: sriov-device-plugin-pod-access
57+
namespace: {{.Namespace}}
58+
rules:
59+
- apiGroups: [""]
60+
resources: ["pods"]
61+
verbs: ["get", "list", "watch", "update", "patch"]
62+
---
63+
apiVersion: rbac.authorization.k8s.io/v1
64+
kind: RoleBinding
65+
metadata:
66+
name: sriov-device-plugin-pod-access
67+
namespace: {{.Namespace}}
68+
roleRef:
69+
apiGroup: rbac.authorization.k8s.io
70+
kind: Role
71+
name: sriov-device-plugin-pod-access
72+
subjects:
73+
- kind: ServiceAccount
74+
name: sriov-device-plugin
75+
namespace: {{.Namespace}}

bindata/manifests/plugins/sriov-device-plugin.yaml

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,14 @@ spec:
3939
component: network
4040
type: infra
4141
openshift.io/component: network
42+
# NOTE: The controller uses equality.Semantic.DeepDerivative(in.Spec, ds.Spec)
43+
# to detect changes in the DaemonSet's spec and decide if an update is needed.
44+
# To ensure the init container is properly managed when toggling the
45+
# BlockDevicePluginUntilConfigured feature gate, we define an explicit field
46+
# (init-container-enabled) that is always present in the DaemonSet labels.
47+
# Its value reflects the state of the feature gate and guarantees spec changes
48+
# are propagated, ensuring the init container is added or removed as required.
49+
init-container-enabled: "{{ .BlockDevicePluginUntilConfigured }}"
4250
spec:
4351
hostNetwork: true
4452
nodeSelector:
@@ -55,6 +63,25 @@ spec:
5563
- name: {{ . }}
5664
{{- end }}
5765
{{- end }}
66+
{{- if .BlockDevicePluginUntilConfigured }}
67+
initContainers:
68+
- name: sriov-device-plugin-init
69+
image: {{.SRIOVNetworkConfigDaemonImage}}
70+
command:
71+
- sriov-network-config-daemon
72+
- wait-for-config
73+
- --pod-name=$(POD_NAME)
74+
- --pod-namespace=$(POD_NAMESPACE)
75+
env:
76+
- name: POD_NAME
77+
valueFrom:
78+
fieldRef:
79+
fieldPath: metadata.name
80+
- name: POD_NAMESPACE
81+
valueFrom:
82+
fieldRef:
83+
fieldPath: metadata.namespace
84+
{{- end }}
5885
containers:
5986
- name: sriov-device-plugin
6087
image: {{.SRIOVDevicePluginImage}}

bindata/manifests/webhook/server.yaml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,12 @@ spec:
101101
securityContext:
102102
readOnlyRootFilesystem: true
103103
allowPrivilegeEscalation: false
104+
capabilities:
105+
drop:
106+
- ALL
107+
runAsNonRoot: true
108+
seccompProfile:
109+
type: RuntimeDefault
104110
resources:
105111
requests:
106112
cpu: 10m

cmd/sriov-network-config-daemon/start.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -185,7 +185,7 @@ func initFeatureGates(defaultConfig *sriovnetworkv1.SriovOperatorConfig) (featur
185185
featureGates := featuregate.New()
186186
featureGates.Init(defaultConfig.Spec.FeatureGates)
187187
fnLogger.Info("Enabled featureGates", "featureGates", featureGates.String())
188-
188+
vars.FeatureGate = featureGates
189189
return featureGates, nil
190190
}
191191

Lines changed: 206 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,206 @@
1+
/*
2+
Copyright 2025.
3+
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
http://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License.
15+
*/
16+
package main
17+
18+
import (
19+
"context"
20+
"fmt"
21+
"time"
22+
23+
"github.com/go-logr/logr"
24+
"github.com/spf13/cobra"
25+
corev1 "k8s.io/api/core/v1"
26+
"k8s.io/apimachinery/pkg/fields"
27+
"k8s.io/apimachinery/pkg/types"
28+
"k8s.io/apimachinery/pkg/util/wait"
29+
"k8s.io/client-go/rest"
30+
ctrl "sigs.k8s.io/controller-runtime"
31+
"sigs.k8s.io/controller-runtime/pkg/cache"
32+
"sigs.k8s.io/controller-runtime/pkg/client"
33+
"sigs.k8s.io/controller-runtime/pkg/log"
34+
metricsserver "sigs.k8s.io/controller-runtime/pkg/metrics/server"
35+
36+
"github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/consts"
37+
snolog "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/log"
38+
"github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/utils"
39+
)
40+
41+
var (
42+
waitForConfigCmd = &cobra.Command{
43+
Use: "wait-for-config",
44+
Short: "Wait for SR-IOV configuration to be applied",
45+
Long: "Init container command that sets annotation on pod and waits for " +
46+
"sriov-config-daemon to apply configuration and remove the annotation",
47+
RunE: runWaitForConfigCmd,
48+
}
49+
50+
waitForConfigOpts struct {
51+
podName string
52+
podNamespace string
53+
}
54+
)
55+
56+
func init() {
57+
rootCmd.AddCommand(waitForConfigCmd)
58+
waitForConfigCmd.PersistentFlags().StringVar(&waitForConfigOpts.podName, "pod-name", "",
59+
"kubernetes pod name of the device plugin")
60+
waitForConfigCmd.PersistentFlags().StringVar(&waitForConfigOpts.podNamespace, "pod-namespace", "",
61+
"kubernetes namespace where the device plugin pod is running")
62+
}
63+
64+
type WaitForConfigReconciler struct {
65+
client.Client
66+
Pod types.NamespacedName
67+
Cancel context.CancelFunc
68+
}
69+
70+
func (r *WaitForConfigReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) {
71+
logger := log.FromContext(ctx)
72+
73+
// This check is currently redundant since the cache is configured to watch only the target pod object.
74+
// However, it is intentionally included to document this assumption and to provide a safeguard in case
75+
// the cache configuration is modified in the future to watch additional pods.
76+
if r.Pod != req.NamespacedName {
77+
return ctrl.Result{}, nil
78+
}
79+
80+
pod := &corev1.Pod{}
81+
if err := r.Get(ctx, req.NamespacedName, pod); err != nil {
82+
logger.Error(err, "Failed to get pod")
83+
return ctrl.Result{}, client.IgnoreNotFound(err)
84+
}
85+
86+
if !utils.ObjectHasAnnotationKey(pod, consts.DevicePluginWaitConfigAnnotation) {
87+
logger.Info("Annotation removed, device plugin can proceed")
88+
r.Cancel()
89+
return ctrl.Result{}, nil
90+
}
91+
92+
logger.Info("Annotation still present, waiting...")
93+
return ctrl.Result{RequeueAfter: consts.DaemonRequeueTime}, nil
94+
}
95+
96+
func (r *WaitForConfigReconciler) SetupWithManager(mgr ctrl.Manager) error {
97+
return ctrl.NewControllerManagedBy(mgr).
98+
For(&corev1.Pod{}).
99+
Complete(r)
100+
}
101+
102+
func validateWaitForConfigOpts() error {
103+
if waitForConfigOpts.podName == "" {
104+
return fmt.Errorf("--pod-name is required")
105+
}
106+
if waitForConfigOpts.podNamespace == "" {
107+
return fmt.Errorf("--pod-namespace is required")
108+
}
109+
return nil
110+
}
111+
112+
func runWaitForConfigCmd(cmd *cobra.Command, args []string) error {
113+
snolog.InitLog()
114+
setupLog := log.Log.WithName("wait-for-config")
115+
116+
if err := validateWaitForConfigOpts(); err != nil {
117+
setupLog.Error(err, "invalid command line arguments")
118+
return err
119+
}
120+
121+
config, err := rest.InClusterConfig()
122+
if err != nil {
123+
setupLog.Error(err, "failed to get in-cluster config")
124+
return err
125+
}
126+
127+
return startWaitForConfigManager(setupLog, config, types.NamespacedName{Name: waitForConfigOpts.podName, Namespace: waitForConfigOpts.podNamespace})
128+
}
129+
130+
func startWaitForConfigManager(setupLog logr.Logger, config *rest.Config, podName types.NamespacedName) error {
131+
ctx, cancel := context.WithCancel(ctrl.SetupSignalHandler())
132+
defer cancel()
133+
134+
setupLog.Info("Starting wait-for-config", "pod", podName)
135+
136+
// Create a temporary client to set the annotation immediately
137+
tempClient, err := client.New(config, client.Options{})
138+
if err != nil {
139+
setupLog.Error(err, "failed to create kubernetes client")
140+
return err
141+
}
142+
143+
// Set annotation on pod to signal that we are waiting for config
144+
setupLog.Info("Setting annotation on pod", "annotation", consts.DevicePluginWaitConfigAnnotation)
145+
err = setAnnotationOnPod(ctx, setupLog, tempClient, podName)
146+
if err != nil {
147+
setupLog.Error(err, "failed to set annotation on pod")
148+
return err
149+
}
150+
setupLog.Info("Annotation set successfully, waiting for removal")
151+
152+
// Configure Manager
153+
// Watch only specific pod object
154+
selector := fields.SelectorFromSet(fields.Set{"metadata.name": podName.Name})
155+
mgr, err := ctrl.NewManager(config, ctrl.Options{
156+
Metrics: metricsserver.Options{BindAddress: "0"},
157+
Cache: cache.Options{
158+
DefaultNamespaces: map[string]cache.Config{podName.Namespace: {}},
159+
ByObject: map[client.Object]cache.ByObject{
160+
&corev1.Pod{}: {Field: selector},
161+
},
162+
},
163+
})
164+
if err != nil {
165+
setupLog.Error(err, "unable to start manager")
166+
return err
167+
}
168+
169+
if err = (&WaitForConfigReconciler{
170+
Client: mgr.GetClient(),
171+
Pod: podName,
172+
Cancel: cancel,
173+
}).SetupWithManager(mgr); err != nil {
174+
setupLog.Error(err, "unable to create controller")
175+
return err
176+
}
177+
178+
setupLog.Info("Starting manager")
179+
if err := mgr.Start(ctx); err != nil {
180+
setupLog.Error(err, "problem running manager")
181+
return err
182+
}
183+
return nil
184+
}
185+
186+
// setAnnotationOnPod sets the wait-for-config annotation on the pod
187+
func setAnnotationOnPod(ctx context.Context, logger logr.Logger, c client.Client, podName types.NamespacedName) error {
188+
return wait.ExponentialBackoff(wait.Backoff{
189+
Steps: 10,
190+
Duration: 1 * time.Second,
191+
Factor: 2.0,
192+
Jitter: 0.1,
193+
Cap: 30 * time.Second,
194+
}, func() (bool, error) {
195+
pod := &corev1.Pod{}
196+
if err := c.Get(ctx, podName, pod); err != nil {
197+
logger.Error(err, "failed to get pod, retrying")
198+
return false, nil
199+
}
200+
if err := utils.AnnotateObject(ctx, pod, consts.DevicePluginWaitConfigAnnotation, "true", c); err != nil {
201+
logger.Error(err, "failed to annotate pod, retrying")
202+
return false, nil
203+
}
204+
return true, nil
205+
})
206+
}

0 commit comments

Comments
 (0)