Skip to content

Commit 10f4a47

Browse files
Merge pull request #605 from kyledong-suse/feature
feat: modify startup probe with INFO and final sync failure error
2 parents c3a4dcf + 0fc3a2c commit 10f4a47

5 files changed

Lines changed: 39 additions & 13 deletions

File tree

charts/runtime-enforcer/templates/agent/daemonset.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -92,8 +92,8 @@ spec:
9292
httpGet:
9393
path: /readyz
9494
port: 8081
95-
# we give 45s (5s delay + 8 attempts every 5s) for the startup probe to succeed
96-
initialDelaySeconds: 5
95+
# we give 50s (10s delay + 8 attempts every 5s) for the startup probe to succeed
96+
initialDelaySeconds: 10
9797
periodSeconds: 5
9898
failureThreshold: 8
9999
resources: {{- toYaml .Values.agent.resources | nindent 10 }}

cmd/agent/main.go

Lines changed: 21 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,8 @@ import (
3939
"sigs.k8s.io/controller-runtime/pkg/manager"
4040
)
4141

42+
const wpSyncInProgressMsg = "waiting for WorkloadPolicy synchronization to complete"
43+
4244
type Config struct {
4345
learningNamespaceSelector string
4446
nriSocketPath string
@@ -95,25 +97,30 @@ func setupWorkloadPolicyHandler(
9597
ctrlMgr manager.Manager,
9698
logger *slog.Logger,
9799
resolver *resolver.Resolver,
98-
) error {
100+
) (*workloadpolicyhandler.WorkloadPolicyHandler, error) {
99101
wpHandler := workloadpolicyhandler.NewWorkloadPolicyHandler(ctrlMgr.GetClient(), logger, resolver)
100102
err := wpHandler.SetupWithManager(ctrlMgr)
101103
if err != nil {
102-
return fmt.Errorf("unable to set up WorkloadPolicy handler: %w", err)
104+
return nil, fmt.Errorf("unable to set up WorkloadPolicy handler: %w", err)
103105
}
104106
// controller-runtime doesn't support a separate startup probe, so we use the readiness probe instead.
105107
// See https://github.com/kubernetes-sigs/controller-runtime/issues/2644 for more details.
106108
if err = ctrlMgr.AddReadyzCheck("policy readyz", func(req *http.Request) error {
107109
if syncErr := wpHandler.HasSynced(req.Context()); syncErr != nil {
108-
logger.ErrorContext(req.Context(), "WorkloadPolicy handler is not synced", "error", syncErr)
109-
return fmt.Errorf("WorkloadPolicy handler is not synced: %w", syncErr)
110+
logger.InfoContext(
111+
req.Context(),
112+
wpSyncInProgressMsg,
113+
"error",
114+
syncErr,
115+
)
116+
return fmt.Errorf("%s: %w", wpSyncInProgressMsg, syncErr)
110117
}
111118
return nil
112119
}); err != nil {
113-
return fmt.Errorf("failed to add policy readiness probe: %w", err)
120+
return nil, fmt.Errorf("failed to add policy readiness probe: %w", err)
114121
}
115122

116-
return nil
123+
return wpHandler, nil
117124
}
118125

119126
func waitForMutatingAdmissionWebhook(ctx context.Context) error {
@@ -239,7 +246,8 @@ func startAgent(ctx context.Context, logger *slog.Logger, config Config) error {
239246
return fmt.Errorf("failed to create resolver: %w", err)
240247
}
241248

242-
if err = setupWorkloadPolicyHandler(ctrlMgr, logger, resolver); err != nil {
249+
wpHandler, err := setupWorkloadPolicyHandler(ctrlMgr, logger, resolver)
250+
if err != nil {
243251
return err
244252
}
245253

@@ -298,6 +306,12 @@ func startAgent(ctx context.Context, logger *slog.Logger, config Config) error {
298306

299307
logger.InfoContext(ctx, "starting manager")
300308
if err = ctrlMgr.Start(ctx); err != nil {
309+
if !resolver.IsNRISynchronized() || !wpHandler.IsSynchronized() {
310+
logger.ErrorContext(ctx, "agent terminated before startup synchronization completed",
311+
"nriSynchronized", resolver.IsNRISynchronized(),
312+
"workloadPoliciesSynchronized", wpHandler.IsSynchronized(),
313+
)
314+
}
301315
return fmt.Errorf("failed to start manager: %w", err)
302316
}
303317

internal/nri/plugin.go

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,8 @@ import (
1414
"github.com/rancher-sandbox/runtime-enforcer/internal/types/workloadkind"
1515
)
1616

17+
const nriSyncRetryMsg = "NRI pod/container sync not ready yet, will retry"
18+
1719
type plugin struct {
1820
stub stub.Stub
1921
logger *slog.Logger
@@ -163,8 +165,8 @@ func (p *plugin) Synchronize(
163165
)
164166
if err := p.resolver.AddPodContainerFromNri(podData); err != nil {
165167
// This could be recoverable. Returning an error so we can retry.
166-
podLogger.ErrorContext(ctx, "failed to add pod container from NRI", "error", err)
167-
return nil, fmt.Errorf("failed to add pod container from NRI: %w", err)
168+
podLogger.InfoContext(ctx, nriSyncRetryMsg, "error", err)
169+
return nil, fmt.Errorf("%s: %w", nriSyncRetryMsg, err)
168170
}
169171
}
170172
// Mark resolver as synchronized, so old agent can be safely removed.

internal/resolver/nri_api.go

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,8 @@ import (
88
"github.com/rancher-sandbox/runtime-enforcer/internal/bpf"
99
)
1010

11+
const nriSyncInProgressMsg = "waiting for NRI synchronization to complete"
12+
1113
func convertPodData(pod PodInput) *podEntry {
1214
return &podEntry{
1315
meta: &pod.Meta,
@@ -113,10 +115,14 @@ func (r *Resolver) NRISynchronized() {
113115
r.nriSynchronized.Store(true)
114116
}
115117

118+
func (r *Resolver) IsNRISynchronized() bool {
119+
return r.nriSynchronized.Load()
120+
}
121+
116122
func (r *Resolver) Ping(req *http.Request) error {
117123
if !r.nriSynchronized.Load() {
118-
r.logger.WarnContext(req.Context(), "NRI handler has not yet synchronized")
119-
return errors.New("NRI handler has not yet synchronized")
124+
r.logger.InfoContext(req.Context(), nriSyncInProgressMsg)
125+
return errors.New(nriSyncInProgressMsg)
120126
}
121127
return nil
122128
}

internal/workloadpolicyhandler/workloadpolicy_handler.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -111,6 +111,10 @@ func (r *WorkloadPolicyHandler) HasSynced(ctx context.Context) error {
111111
return nil
112112
}
113113

114+
func (r *WorkloadPolicyHandler) IsSynchronized() bool {
115+
return r.hasSynced.Load()
116+
}
117+
114118
// SetupWithManager sets up the controller with the Manager.
115119
func (r *WorkloadPolicyHandler) SetupWithManager(mgr ctrl.Manager) error {
116120
err := ctrl.NewControllerManagedBy(mgr).

0 commit comments

Comments
 (0)