Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
117 changes: 107 additions & 10 deletions pkg/network/operator.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package network

import (
"context"
"errors"
"fmt"
"time"

Expand Down Expand Up @@ -203,7 +204,13 @@ func (builder *OperatorBuilder) SetIPForwarding(
return builder, err
}

// SetMultiNetworkPolicy enables network.operator multinetworkpolicy feature.
// SetMultiNetworkPolicy enables or disables the network.operator multinetworkpolicy feature.
// When disabling, the Cluster Network Operator often does not set Progressing=True; the initial
// wait for Progressing=True is skipped in that case only. Enabling keeps the original sequence.
// On disable, we wait until status.observedGeneration catches metadata.generation from the update
// so condition polling is not satisfied from stale status from before this change.
// The wait for Progressing=False uses a disable-specific path: CNO may omit the Progressing
// condition when idle, which would otherwise never match WaitUntilInCondition(..., False).
func (builder *OperatorBuilder) SetMultiNetworkPolicy(state bool, timeout time.Duration) (*OperatorBuilder, error) {
if valid, err := builder.validate(); !valid {
return builder, err
Expand All @@ -221,20 +228,34 @@ func (builder *OperatorBuilder) SetMultiNetworkPolicy(state bool, timeout time.D
return nil, err
}

err = builder.WaitUntilInCondition(
operatorv1.OperatorStatusTypeProgressing, 60*time.Second, operatorv1.ConditionTrue)
if err != nil {
return nil, err
targetGeneration := builder.Definition.Generation

if state {
err = builder.WaitUntilInCondition(
operatorv1.OperatorStatusTypeProgressing, 60*time.Second, operatorv1.ConditionTrue)
if err != nil {
return nil, err
}
Comment on lines +233 to +238
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe using a generation count is a WA. WDYT?

Suggested change
if state {
err = builder.WaitUntilInCondition(
operatorv1.OperatorStatusTypeProgressing, 60*time.Second, operatorv1.ConditionTrue)
if err != nil {
return nil, err
}
// Capture the generation that the API server assigned to this update.
// We need it to know when the operator has actually started processing *our* change
// (as opposed to still reflecting a previous reconciliation).
targetGeneration := builder.Definition.Generation
if state {
err = builder.WaitUntilInCondition(
operatorv1.OperatorStatusTypeProgressing, 60*time.Second, operatorv1.ConditionTrue)
if err != nil {
return nil, err
}
} else {
// When disabling, Progressing=True may never appear.
// Instead, wait until observedGeneration catches up to the generation
// we just wrote — that proves the operator has seen and processed our change.
err = builder.waitUntilObservedGeneration(targetGeneration, timeout)
if err != nil {
return nil, err
}
}
......
// waitUntilObservedGeneration waits until status.observedGeneration >= the target generation,
// proving the operator has reconciled the spec change.
func (builder *OperatorBuilder) waitUntilObservedGeneration(
targetGeneration int64, timeout time.Duration) error {
if valid, err := builder.validate(); !valid {
return err
}
klog.V(100).Infof("Waiting for network.operator %s observedGeneration >= %d",
builder.Definition.Name, targetGeneration)
return wait.PollUntilContextTimeout(
context.TODO(), 3*time.Second, timeout, true, func(ctx context.Context) (bool, error) {
if !builder.Exists() {
return false, fmt.Errorf("network.operator object %s does not exist", builder.Definition.Name)
}
return builder.Object.Status.ObservedGeneration >= targetGeneration, nil
})
}

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good idea — waiting for observedGeneration >= generation after Update() can anchor waits on our spec change. I’d only add it after confirming CNO bumps observedGeneration reliably for useMultiNetworkPolicy on our versions. This PR stays minimal: skip Progressing=True on disable, keep Progressing=False + Available=True.

} else {
err = builder.waitUntilObservedGeneration(targetGeneration, timeout)
if err != nil {
return nil, err
}
}

if state {
err = builder.WaitUntilInCondition(
operatorv1.OperatorStatusTypeProgressing, timeout, operatorv1.ConditionFalse)
} else {
err = builder.waitUntilProgressingSettledOnDisable(timeout)
}

err = builder.WaitUntilInCondition(
operatorv1.OperatorStatusTypeProgressing, timeout, operatorv1.ConditionFalse)
if err != nil {
return nil, err
}

return builder, builder.WaitUntilInCondition(
operatorv1.OperatorStatusTypeAvailable, 60*time.Second, operatorv1.ConditionTrue)
operatorv1.OperatorStatusTypeAvailable, timeout, operatorv1.ConditionTrue)
}

return builder, err
Expand Down Expand Up @@ -288,8 +309,8 @@ func (builder *OperatorBuilder) WaitUntilInCondition(
return err
}

klog.V(100).Infof("Wait until network.operator object %s is in condition %v",
builder.Definition.Name, condition)
klog.V(100).Infof("Wait until network.operator object %s has condition %s=%s",
builder.Definition.Name, condition, status)

err := wait.PollUntilContextTimeout(
context.TODO(), 3*time.Second, timeout, true, func(ctx context.Context) (bool, error) {
Expand All @@ -306,9 +327,85 @@ func (builder *OperatorBuilder) WaitUntilInCondition(
return false, nil
})

if err != nil && errors.Is(err, context.DeadlineExceeded) && builder.Object != nil {
klog.V(100).Infof("timeout waiting for network.operator %s condition %s=%s; last status conditions: %#v",
builder.Definition.Name, condition, status, builder.Object.Status.Conditions)
}

return err
}

// waitUntilObservedGeneration waits until status.observedGeneration reflects the given metadata.generation,
// indicating the operator has reconciled that spec revision.
func (builder *OperatorBuilder) waitUntilObservedGeneration(targetGeneration int64, timeout time.Duration) error {
if valid, err := builder.validate(); !valid {
return err
}

klog.V(100).Infof("Wait until network.operator %s observedGeneration >= %d",
builder.Definition.Name, targetGeneration)

return wait.PollUntilContextTimeout(
context.TODO(), 3*time.Second, timeout, true, func(ctx context.Context) (bool, error) {
if !builder.Exists() {
return false, fmt.Errorf("network.operator object %s does not exist", builder.Definition.Name)
}

return builder.Object.Status.ObservedGeneration >= targetGeneration, nil
})
}

// waitUntilProgressingSettledOnDisable waits until Progressing is False, or until the operator
// reports available and not degraded while Progressing is absent (CNO may omit Progressing when idle).
func (builder *OperatorBuilder) waitUntilProgressingSettledOnDisable(timeout time.Duration) error {
if valid, err := builder.validate(); !valid {
return err
}

klog.V(100).Infof("Wait until network.operator %s is settled after disabling MultiNetworkPolicy",
builder.Definition.Name)

return wait.PollUntilContextTimeout(
context.TODO(), 3*time.Second, timeout, true, func(ctx context.Context) (bool, error) {
if !builder.Exists() {
return false, fmt.Errorf("network.operator object %s does not exist", builder.Definition.Name)
}

for _, c := range builder.Object.Status.Conditions {
if c.Type != operatorv1.OperatorStatusTypeProgressing {
continue
}

return c.Status == operatorv1.ConditionFalse, nil
}

return operatorAvailableAndNotDegraded(builder.Object.Status.Conditions), nil
})
}

func operatorAvailableAndNotDegraded(conditions []operatorv1.OperatorCondition) bool {
var available, degraded *operatorv1.OperatorCondition

for i := range conditions {
switch conditions[i].Type {
case operatorv1.OperatorStatusTypeAvailable:
available = &conditions[i]
case operatorv1.OperatorStatusTypeDegraded:
degraded = &conditions[i]
}
}

if available == nil || available.Status != operatorv1.ConditionTrue {
return false
}

if degraded != nil && degraded.Status == operatorv1.ConditionTrue {
return false
}

return len(conditions) > 0
}

// validate will check that the builder and builder definition are properly initialized before
// accessing any member fields.
func (builder *OperatorBuilder) validate() (bool, error) {
Expand Down