diff --git a/api/v1/conditions.go b/api/v1/conditions.go new file mode 100644 index 0000000000..c58cb54c54 --- /dev/null +++ b/api/v1/conditions.go @@ -0,0 +1,143 @@ +/* +Copyright 2021. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package v1 + +import ( + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" +) + +// Condition types used across SR-IOV Network Operator CRDs +const ( + // ConditionProgressing indicates that the resource is being actively reconciled + ConditionProgressing = "Progressing" + + // ConditionDegraded indicates that the resource is not functioning as expected + ConditionDegraded = "Degraded" + + // ConditionReady indicates that the resource has reached its desired state and is fully functional + ConditionReady = "Ready" + + // ConditionDrainProgressing indicates that the node is being actively drained + ConditionDrainProgressing = "DrainProgressing" + + // ConditionDrainDegraded indicates that the drain process is not functioning as expected + ConditionDrainDegraded = "DrainDegraded" + + // ConditionDrainComplete indicates that the drain operation completed successfully + ConditionDrainComplete = "DrainComplete" +) + +// Common condition reasons used across SR-IOV Network Operator CRDs +const ( + // Reasons for Ready condition + ReasonNetworkReady = "NetworkReady" + ReasonNodeReady = "NodeConfigurationReady" + ReasonNodeDrainReady = "NodeDrainReady" + ReasonOperatorReady = "OperatorReady" + ReasonNotReady = "NotReady" + + // Reasons for Degraded condition + ReasonProvisioningFailed = "ProvisioningFailed" + ReasonConfigurationFailed = "ConfigurationFailed" + ReasonNetworkAttachmentDefNotFound = "NetworkAttachmentDefinitionNotFound" + ReasonNetworkAttachmentDefInvalid = "NetworkAttachmentDefinitionInvalid" + ReasonNamespaceNotFound = "NamespaceNotFound" + ReasonHardwareError = "HardwareError" + ReasonDriverError = "DriverError" + ReasonOperatorComponentsNotHealthy = "OperatorComponentsNotHealthy" + ReasonNotDegraded = "NotDegraded" + + // Reasons for Progressing condition + ReasonConfiguringNode = "ConfiguringNode" + ReasonApplyingConfiguration = "ApplyingConfiguration" + ReasonCreatingVFs = "CreatingVFs" + ReasonLoadingDriver = "LoadingDriver" + ReasonDrainingNode = "DrainingNode" + ReasonNotProgressing = "NotProgressing" + + // Reasons for DrainComplete condition + ReasonDrainCompleted = "DrainCompleted" + ReasonDrainNotNeeded = "DrainNotNeeded" + ReasonDrainPending = "DrainPending" + + // Reasons for DrainDegraded condition + ReasonDrainFailed = "DrainFailed" + + // Reasons for Policy conditions + ReasonPolicyReady = "PolicyReady" + ReasonPolicyNotReady = "PolicyNotReady" + ReasonNoMatchingNodes = "NoMatchingNodes" + ReasonPartiallyApplied = "PartiallyApplied" + ReasonAllNodesConfigured = "AllNodesConfigured" + ReasonSomeNodesFailed = "SomeNodesFailed" + ReasonSomeNodesProgressing = "SomeNodesProgressing" + ReasonAllNodesProgressingOrReady = "AllNodesProgressingOrReady" +) + +// DrainState represents the current state of a drain operation +type DrainState string + +const ( + // DrainStateIdle indicates no drain is in progress + DrainStateIdle DrainState = "Idle" + // DrainStateDraining indicates drain is in progress with no errors + DrainStateDraining DrainState = "Draining" + // DrainStateDrainingWithErrors indicates drain is in progress but encountering errors + DrainStateDrainingWithErrors DrainState = "DrainingWithErrors" + // DrainStateComplete indicates drain completed successfully + DrainStateComplete DrainState = "Complete" +) + +// NetworkStatus defines the common observed state for network-type CRDs +type NetworkStatus struct { + // Conditions represent the latest available observations of the network's state + // +patchMergeKey=type + // +patchStrategy=merge + // +listType=map + // +listMapKey=type + // +optional + Conditions []metav1.Condition `json:"conditions,omitempty" patchStrategy:"merge" patchMergeKey:"type"` +} + +// ConditionsEqual compares two condition slices ignoring LastTransitionTime. +// This is useful to avoid unnecessary API updates when conditions haven't actually changed. +func ConditionsEqual(a, b []metav1.Condition) bool { + if len(a) != len(b) { + return false + } + + // Create maps for easier comparison + aMap := make(map[string]metav1.Condition) + for _, c := range a { + aMap[c.Type] = c + } + + for _, bc := range b { + ac, exists := aMap[bc.Type] + if !exists { + return false + } + // Compare all fields except LastTransitionTime + if ac.Status != bc.Status || + ac.Reason != bc.Reason || + ac.Message != bc.Message || + ac.ObservedGeneration != bc.ObservedGeneration { + return false + } + } + return true +} diff --git a/api/v1/conditions_suite_test.go b/api/v1/conditions_suite_test.go new file mode 100644 index 0000000000..7355142651 --- /dev/null +++ b/api/v1/conditions_suite_test.go @@ -0,0 +1,29 @@ +/* +Copyright 2021. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package v1_test + +import ( + "testing" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" +) + +func TestConditions(t *testing.T) { + RegisterFailHandler(Fail) + RunSpecs(t, "Conditions Suite") +} diff --git a/api/v1/conditions_test.go b/api/v1/conditions_test.go new file mode 100644 index 0000000000..31d33a90b8 --- /dev/null +++ b/api/v1/conditions_test.go @@ -0,0 +1,121 @@ +/* +Copyright 2021. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package v1_test + +import ( + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + + v1 "github.com/k8snetworkplumbingwg/sriov-network-operator/api/v1" +) + +var _ = Describe("ConditionsEqual", func() { + DescribeTable("comparing conditions", + func(a, b []metav1.Condition, expected bool) { + result := v1.ConditionsEqual(a, b) + Expect(result).To(Equal(expected)) + }, + Entry("both empty", + []metav1.Condition{}, + []metav1.Condition{}, + true, + ), + Entry("different length", + []metav1.Condition{ + {Type: v1.ConditionReady, Status: metav1.ConditionTrue}, + }, + []metav1.Condition{}, + false, + ), + Entry("same conditions", + []metav1.Condition{ + {Type: v1.ConditionReady, Status: metav1.ConditionTrue, Reason: v1.ReasonNodeReady, Message: "ready", ObservedGeneration: 1}, + }, + []metav1.Condition{ + {Type: v1.ConditionReady, Status: metav1.ConditionTrue, Reason: v1.ReasonNodeReady, Message: "ready", ObservedGeneration: 1}, + }, + true, + ), + Entry("different status", + []metav1.Condition{ + {Type: v1.ConditionReady, Status: metav1.ConditionTrue, Reason: v1.ReasonNodeReady, Message: "ready", ObservedGeneration: 1}, + }, + []metav1.Condition{ + {Type: v1.ConditionReady, Status: metav1.ConditionFalse, Reason: v1.ReasonNodeReady, Message: "ready", ObservedGeneration: 1}, + }, + false, + ), + Entry("different reason", + []metav1.Condition{ + {Type: v1.ConditionReady, Status: metav1.ConditionTrue, Reason: v1.ReasonNodeReady, Message: "ready", ObservedGeneration: 1}, + }, + []metav1.Condition{ + {Type: v1.ConditionReady, Status: metav1.ConditionTrue, Reason: v1.ReasonNotReady, Message: "ready", ObservedGeneration: 1}, + }, + false, + ), + Entry("different message", + []metav1.Condition{ + {Type: v1.ConditionReady, Status: metav1.ConditionTrue, Reason: v1.ReasonNodeReady, Message: "ready", ObservedGeneration: 1}, + }, + []metav1.Condition{ + {Type: v1.ConditionReady, Status: metav1.ConditionTrue, Reason: v1.ReasonNodeReady, Message: "not ready", ObservedGeneration: 1}, + }, + false, + ), + Entry("different observedGeneration", + []metav1.Condition{ + {Type: v1.ConditionReady, Status: metav1.ConditionTrue, Reason: v1.ReasonNodeReady, Message: "ready", ObservedGeneration: 1}, + }, + []metav1.Condition{ + {Type: v1.ConditionReady, Status: metav1.ConditionTrue, Reason: v1.ReasonNodeReady, Message: "ready", ObservedGeneration: 2}, + }, + false, + ), + Entry("different LastTransitionTime should still be equal", + []metav1.Condition{ + {Type: v1.ConditionReady, Status: metav1.ConditionTrue, Reason: v1.ReasonNodeReady, Message: "ready", ObservedGeneration: 1, LastTransitionTime: metav1.Now()}, + }, + []metav1.Condition{ + {Type: v1.ConditionReady, Status: metav1.ConditionTrue, Reason: v1.ReasonNodeReady, Message: "ready", ObservedGeneration: 1, LastTransitionTime: metav1.Now()}, + }, + true, + ), + Entry("multiple conditions same", + []metav1.Condition{ + {Type: v1.ConditionReady, Status: metav1.ConditionTrue, Reason: v1.ReasonNodeReady, Message: "ready", ObservedGeneration: 1}, + {Type: v1.ConditionProgressing, Status: metav1.ConditionFalse, Reason: v1.ReasonNotProgressing, Message: "not progressing", ObservedGeneration: 1}, + }, + []metav1.Condition{ + {Type: v1.ConditionReady, Status: metav1.ConditionTrue, Reason: v1.ReasonNodeReady, Message: "ready", ObservedGeneration: 1}, + {Type: v1.ConditionProgressing, Status: metav1.ConditionFalse, Reason: v1.ReasonNotProgressing, Message: "not progressing", ObservedGeneration: 1}, + }, + true, + ), + Entry("condition type not found in second slice", + []metav1.Condition{ + {Type: v1.ConditionReady, Status: metav1.ConditionTrue, Reason: v1.ReasonNodeReady, Message: "ready", ObservedGeneration: 1}, + }, + []metav1.Condition{ + {Type: v1.ConditionProgressing, Status: metav1.ConditionFalse, Reason: v1.ReasonNotProgressing, Message: "not progressing", ObservedGeneration: 1}, + }, + false, + ), + ) +}) diff --git a/api/v1/helper.go b/api/v1/helper.go index 65306e037e..5a84c3a04d 100644 --- a/api/v1/helper.go +++ b/api/v1/helper.go @@ -692,6 +692,16 @@ func (cr *SriovIBNetwork) NetworkNamespace() string { return cr.Spec.NetworkNamespace } +// GetConditions returns the conditions from the status +func (cr *SriovIBNetwork) GetConditions() []metav1.Condition { + return cr.Status.Conditions +} + +// SetConditions sets the conditions in the status +func (cr *SriovIBNetwork) SetConditions(conditions []metav1.Condition) { + cr.Status.Conditions = conditions +} + // RenderNetAttDef renders a net-att-def for sriov CNI func (cr *SriovNetwork) RenderNetAttDef() (*uns.Unstructured, error) { logger := log.WithName("RenderNetAttDef") @@ -811,6 +821,16 @@ func (cr *SriovNetwork) NetworkNamespace() string { return cr.Spec.NetworkNamespace } +// GetConditions returns the conditions from the status +func (cr *SriovNetwork) GetConditions() []metav1.Condition { + return cr.Status.Conditions +} + +// SetConditions sets the conditions in the status +func (cr *SriovNetwork) SetConditions(conditions []metav1.Condition) { + cr.Status.Conditions = conditions +} + // RenderNetAttDef renders a net-att-def for sriov CNI func (cr *OVSNetwork) RenderNetAttDef() (*uns.Unstructured, error) { logger := log.WithName("RenderNetAttDef") @@ -874,6 +894,16 @@ func (cr *OVSNetwork) NetworkNamespace() string { return cr.Spec.NetworkNamespace } +// GetConditions returns the conditions from the status +func (cr *OVSNetwork) GetConditions() []metav1.Condition { + return cr.Status.Conditions +} + +// SetConditions sets the conditions in the status +func (cr *OVSNetwork) SetConditions(conditions []metav1.Condition) { + cr.Status.Conditions = conditions +} + // NetFilterMatch -- parse netFilter and check for a match func NetFilterMatch(netFilter string, netValue string) (isMatch bool) { logger := log.WithName("NetFilterMatch") diff --git a/api/v1/ovsnetwork_types.go b/api/v1/ovsnetwork_types.go index 05b69bf7f6..80da1e6a1b 100644 --- a/api/v1/ovsnetwork_types.go +++ b/api/v1/ovsnetwork_types.go @@ -63,10 +63,14 @@ type TrunkConfig struct { // OVSNetworkStatus defines the observed state of OVSNetwork type OVSNetworkStatus struct { + NetworkStatus `json:",inline"` } //+kubebuilder:object:root=true //+kubebuilder:subresource:status +//+kubebuilder:printcolumn:name="Ready",type=string,JSONPath=`.status.conditions[?(@.type=="Ready")].status` +//+kubebuilder:printcolumn:name="Degraded",type=string,JSONPath=`.status.conditions[?(@.type=="Degraded")].status` +//+kubebuilder:printcolumn:name="Age",type="date",JSONPath=".metadata.creationTimestamp" // OVSNetwork is the Schema for the ovsnetworks API type OVSNetwork struct { diff --git a/api/v1/sriovibnetwork_types.go b/api/v1/sriovibnetwork_types.go index d8634d9ca7..6358987508 100644 --- a/api/v1/sriovibnetwork_types.go +++ b/api/v1/sriovibnetwork_types.go @@ -47,12 +47,14 @@ type SriovIBNetworkSpec struct { // SriovIBNetworkStatus defines the observed state of SriovIBNetwork type SriovIBNetworkStatus struct { - // INSERT ADDITIONAL STATUS FIELD - define observed state of cluster - // Important: Run "make" to regenerate code after modifying this file + NetworkStatus `json:",inline"` } //+kubebuilder:object:root=true //+kubebuilder:subresource:status +//+kubebuilder:printcolumn:name="Ready",type=string,JSONPath=`.status.conditions[?(@.type=="Ready")].status` +//+kubebuilder:printcolumn:name="Degraded",type=string,JSONPath=`.status.conditions[?(@.type=="Degraded")].status` +//+kubebuilder:printcolumn:name="Age",type="date",JSONPath=".metadata.creationTimestamp" // SriovIBNetwork is the Schema for the sriovibnetworks API type SriovIBNetwork struct { diff --git a/api/v1/sriovnetwork_types.go b/api/v1/sriovnetwork_types.go index 1119a48e00..c12911130f 100644 --- a/api/v1/sriovnetwork_types.go +++ b/api/v1/sriovnetwork_types.go @@ -75,12 +75,14 @@ type SriovNetworkSpec struct { // SriovNetworkStatus defines the observed state of SriovNetwork type SriovNetworkStatus struct { - // INSERT ADDITIONAL STATUS FIELD - define observed state of cluster - // Important: Run "make" to regenerate code after modifying this file + NetworkStatus `json:",inline"` } //+kubebuilder:object:root=true //+kubebuilder:subresource:status +//+kubebuilder:printcolumn:name="Ready",type=string,JSONPath=`.status.conditions[?(@.type=="Ready")].status` +//+kubebuilder:printcolumn:name="Degraded",type=string,JSONPath=`.status.conditions[?(@.type=="Degraded")].status` +//+kubebuilder:printcolumn:name="Age",type="date",JSONPath=".metadata.creationTimestamp" // SriovNetwork is the Schema for the sriovnetworks API type SriovNetwork struct { diff --git a/api/v1/zz_generated.deepcopy.go b/api/v1/zz_generated.deepcopy.go index 307d07de69..d84bdfed29 100644 --- a/api/v1/zz_generated.deepcopy.go +++ b/api/v1/zz_generated.deepcopy.go @@ -171,6 +171,28 @@ func (in Interfaces) DeepCopy() Interfaces { return *out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *NetworkStatus) DeepCopyInto(out *NetworkStatus) { + *out = *in + if in.Conditions != nil { + in, out := &in.Conditions, &out.Conditions + *out = make([]metav1.Condition, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new NetworkStatus. +func (in *NetworkStatus) DeepCopy() *NetworkStatus { + if in == nil { + return nil + } + out := new(NetworkStatus) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *OVSBridgeConfig) DeepCopyInto(out *OVSBridgeConfig) { *out = *in @@ -287,7 +309,7 @@ func (in *OVSNetwork) DeepCopyInto(out *OVSNetwork) { out.TypeMeta = in.TypeMeta in.ObjectMeta.DeepCopyInto(&out.ObjectMeta) in.Spec.DeepCopyInto(&out.Spec) - out.Status = in.Status + in.Status.DeepCopyInto(&out.Status) } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new OVSNetwork. @@ -369,6 +391,7 @@ func (in *OVSNetworkSpec) DeepCopy() *OVSNetworkSpec { // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *OVSNetworkStatus) DeepCopyInto(out *OVSNetworkStatus) { *out = *in + in.NetworkStatus.DeepCopyInto(&out.NetworkStatus) } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new OVSNetworkStatus. @@ -453,7 +476,7 @@ func (in *SriovIBNetwork) DeepCopyInto(out *SriovIBNetwork) { out.TypeMeta = in.TypeMeta in.ObjectMeta.DeepCopyInto(&out.ObjectMeta) out.Spec = in.Spec - out.Status = in.Status + in.Status.DeepCopyInto(&out.Status) } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new SriovIBNetwork. @@ -524,6 +547,7 @@ func (in *SriovIBNetworkSpec) DeepCopy() *SriovIBNetworkSpec { // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *SriovIBNetworkStatus) DeepCopyInto(out *SriovIBNetworkStatus) { *out = *in + in.NetworkStatus.DeepCopyInto(&out.NetworkStatus) } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new SriovIBNetworkStatus. @@ -542,7 +566,7 @@ func (in *SriovNetwork) DeepCopyInto(out *SriovNetwork) { out.TypeMeta = in.TypeMeta in.ObjectMeta.DeepCopyInto(&out.ObjectMeta) in.Spec.DeepCopyInto(&out.Spec) - out.Status = in.Status + in.Status.DeepCopyInto(&out.Status) } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new SriovNetwork. @@ -953,6 +977,7 @@ func (in *SriovNetworkSpec) DeepCopy() *SriovNetworkSpec { // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *SriovNetworkStatus) DeepCopyInto(out *SriovNetworkStatus) { *out = *in + in.NetworkStatus.DeepCopyInto(&out.NetworkStatus) } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new SriovNetworkStatus. diff --git a/config/crd/bases/sriovnetwork.openshift.io_ovsnetworks.yaml b/config/crd/bases/sriovnetwork.openshift.io_ovsnetworks.yaml index 63da95e46c..0c87629928 100644 --- a/config/crd/bases/sriovnetwork.openshift.io_ovsnetworks.yaml +++ b/config/crd/bases/sriovnetwork.openshift.io_ovsnetworks.yaml @@ -14,7 +14,17 @@ spec: singular: ovsnetwork scope: Namespaced versions: - - name: v1 + - additionalPrinterColumns: + - jsonPath: .status.conditions[?(@.type=="Ready")].status + name: Ready + type: string + - jsonPath: .status.conditions[?(@.type=="Degraded")].status + name: Degraded + type: string + - jsonPath: .metadata.creationTimestamp + name: Age + type: date + name: v1 schema: openAPIV3Schema: description: OVSNetwork is the Schema for the ovsnetworks API @@ -97,6 +107,68 @@ spec: type: object status: description: OVSNetworkStatus defines the observed state of OVSNetwork + properties: + conditions: + description: Conditions represent the latest available observations + of the network's state + items: + description: Condition contains details for one aspect of the current + state of this API Resource. + properties: + lastTransitionTime: + description: |- + lastTransitionTime is the last time the condition transitioned from one status to another. + This should be when the underlying condition changed. If that is not known, then using the time when the API field changed is acceptable. + format: date-time + type: string + message: + description: |- + message is a human readable message indicating details about the transition. + This may be an empty string. + maxLength: 32768 + type: string + observedGeneration: + description: |- + observedGeneration represents the .metadata.generation that the condition was set based upon. + For instance, if .metadata.generation is currently 12, but the .status.conditions[x].observedGeneration is 9, the condition is out of date + with respect to the current state of the instance. + format: int64 + minimum: 0 + type: integer + reason: + description: |- + reason contains a programmatic identifier indicating the reason for the condition's last transition. + Producers of specific condition types may define expected values and meanings for this field, + and whether the values are considered a guaranteed API. + The value should be a CamelCase string. + This field may not be empty. + maxLength: 1024 + minLength: 1 + pattern: ^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$ + type: string + status: + description: status of the condition, one of True, False, Unknown. + enum: + - "True" + - "False" + - Unknown + type: string + type: + description: type of condition in CamelCase or in foo.example.com/CamelCase. + maxLength: 316 + pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$ + type: string + required: + - lastTransitionTime + - message + - reason + - status + - type + type: object + type: array + x-kubernetes-list-map-keys: + - type + x-kubernetes-list-type: map type: object type: object served: true diff --git a/config/crd/bases/sriovnetwork.openshift.io_sriovibnetworks.yaml b/config/crd/bases/sriovnetwork.openshift.io_sriovibnetworks.yaml index 93e6276485..6101be69a3 100644 --- a/config/crd/bases/sriovnetwork.openshift.io_sriovibnetworks.yaml +++ b/config/crd/bases/sriovnetwork.openshift.io_sriovibnetworks.yaml @@ -14,7 +14,17 @@ spec: singular: sriovibnetwork scope: Namespaced versions: - - name: v1 + - additionalPrinterColumns: + - jsonPath: .status.conditions[?(@.type=="Ready")].status + name: Ready + type: string + - jsonPath: .status.conditions[?(@.type=="Degraded")].status + name: Degraded + type: string + - jsonPath: .metadata.creationTimestamp + name: Age + type: date + name: v1 schema: openAPIV3Schema: description: SriovIBNetwork is the Schema for the sriovibnetworks API @@ -70,6 +80,68 @@ spec: type: object status: description: SriovIBNetworkStatus defines the observed state of SriovIBNetwork + properties: + conditions: + description: Conditions represent the latest available observations + of the network's state + items: + description: Condition contains details for one aspect of the current + state of this API Resource. + properties: + lastTransitionTime: + description: |- + lastTransitionTime is the last time the condition transitioned from one status to another. + This should be when the underlying condition changed. If that is not known, then using the time when the API field changed is acceptable. + format: date-time + type: string + message: + description: |- + message is a human readable message indicating details about the transition. + This may be an empty string. + maxLength: 32768 + type: string + observedGeneration: + description: |- + observedGeneration represents the .metadata.generation that the condition was set based upon. + For instance, if .metadata.generation is currently 12, but the .status.conditions[x].observedGeneration is 9, the condition is out of date + with respect to the current state of the instance. + format: int64 + minimum: 0 + type: integer + reason: + description: |- + reason contains a programmatic identifier indicating the reason for the condition's last transition. + Producers of specific condition types may define expected values and meanings for this field, + and whether the values are considered a guaranteed API. + The value should be a CamelCase string. + This field may not be empty. + maxLength: 1024 + minLength: 1 + pattern: ^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$ + type: string + status: + description: status of the condition, one of True, False, Unknown. + enum: + - "True" + - "False" + - Unknown + type: string + type: + description: type of condition in CamelCase or in foo.example.com/CamelCase. + maxLength: 316 + pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$ + type: string + required: + - lastTransitionTime + - message + - reason + - status + - type + type: object + type: array + x-kubernetes-list-map-keys: + - type + x-kubernetes-list-type: map type: object type: object served: true diff --git a/config/crd/bases/sriovnetwork.openshift.io_sriovnetworks.yaml b/config/crd/bases/sriovnetwork.openshift.io_sriovnetworks.yaml index 29e686b352..d712a2e74f 100644 --- a/config/crd/bases/sriovnetwork.openshift.io_sriovnetworks.yaml +++ b/config/crd/bases/sriovnetwork.openshift.io_sriovnetworks.yaml @@ -14,7 +14,17 @@ spec: singular: sriovnetwork scope: Namespaced versions: - - name: v1 + - additionalPrinterColumns: + - jsonPath: .status.conditions[?(@.type=="Ready")].status + name: Ready + type: string + - jsonPath: .status.conditions[?(@.type=="Degraded")].status + name: Degraded + type: string + - jsonPath: .metadata.creationTimestamp + name: Age + type: date + name: v1 schema: openAPIV3Schema: description: SriovNetwork is the Schema for the sriovnetworks API @@ -128,6 +138,68 @@ spec: type: object status: description: SriovNetworkStatus defines the observed state of SriovNetwork + properties: + conditions: + description: Conditions represent the latest available observations + of the network's state + items: + description: Condition contains details for one aspect of the current + state of this API Resource. + properties: + lastTransitionTime: + description: |- + lastTransitionTime is the last time the condition transitioned from one status to another. + This should be when the underlying condition changed. If that is not known, then using the time when the API field changed is acceptable. + format: date-time + type: string + message: + description: |- + message is a human readable message indicating details about the transition. + This may be an empty string. + maxLength: 32768 + type: string + observedGeneration: + description: |- + observedGeneration represents the .metadata.generation that the condition was set based upon. + For instance, if .metadata.generation is currently 12, but the .status.conditions[x].observedGeneration is 9, the condition is out of date + with respect to the current state of the instance. + format: int64 + minimum: 0 + type: integer + reason: + description: |- + reason contains a programmatic identifier indicating the reason for the condition's last transition. + Producers of specific condition types may define expected values and meanings for this field, + and whether the values are considered a guaranteed API. + The value should be a CamelCase string. + This field may not be empty. + maxLength: 1024 + minLength: 1 + pattern: ^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$ + type: string + status: + description: status of the condition, one of True, False, Unknown. + enum: + - "True" + - "False" + - Unknown + type: string + type: + description: type of condition in CamelCase or in foo.example.com/CamelCase. + maxLength: 316 + pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$ + type: string + required: + - lastTransitionTime + - message + - reason + - status + - type + type: object + type: array + x-kubernetes-list-map-keys: + - type + x-kubernetes-list-type: map type: object type: object served: true diff --git a/controllers/generic_network_controller.go b/controllers/generic_network_controller.go index f5b60294a0..81e98f99f3 100644 --- a/controllers/generic_network_controller.go +++ b/controllers/generic_network_controller.go @@ -39,6 +39,7 @@ import ( sriovnetworkv1 "github.com/k8snetworkplumbingwg/sriov-network-operator/api/v1" "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/consts" + "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/status" "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/utils" "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/vars" ) @@ -49,6 +50,10 @@ type NetworkCRInstance interface { RenderNetAttDef() (*uns.Unstructured, error) // return name of the target namespace for the network NetworkNamespace() string + // GetConditions returns the conditions from the status + GetConditions() []metav1.Condition + // SetConditions sets the conditions in the status + SetConditions(conditions []metav1.Condition) } // interface which controller should implement to be compatible with genericNetworkReconciler @@ -63,15 +68,21 @@ type networkController interface { Name() string } -func newGenericNetworkReconciler(c client.Client, s *runtime.Scheme, controller networkController) *genericNetworkReconciler { - return &genericNetworkReconciler{Client: c, Scheme: s, controller: controller} +func newGenericNetworkReconciler(c client.Client, s *runtime.Scheme, controller networkController, statusPatcher status.Interface) *genericNetworkReconciler { + return &genericNetworkReconciler{ + Client: c, + Scheme: s, + controller: controller, + statusPatcher: statusPatcher, + } } // genericNetworkReconciler provide common code for all network controllers type genericNetworkReconciler struct { client.Client - Scheme *runtime.Scheme - controller networkController + Scheme *runtime.Scheme + controller networkController + statusPatcher status.Interface } func (r *genericNetworkReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { @@ -102,14 +113,20 @@ func (r *genericNetworkReconciler) Reconcile(ctx context.Context, req ctrl.Reque } if instance.NetworkNamespace() != "" && instance.GetNamespace() != vars.Namespace { + errMsg := fmt.Errorf("spec.networkNamespace cannot be set when the resource is not in the operator namespace (%s)", vars.Namespace) reqLogger.Error( - fmt.Errorf("bad value for NetworkNamespace"), + errMsg, ".spec.networkNamespace can't be specified if the resource belongs to a namespace other than the operator's", "operatorNamespace", vars.Namespace, ".metadata.namespace", instance.GetNamespace(), ".spec.networkNamespace", instance.NetworkNamespace(), ) - return reconcile.Result{}, nil + // Update conditions to reflect the error + err = r.updateConditions(ctx, instance, false, errMsg) + if err != nil { + reqLogger.Error(err, "Failed to update conditions") + } + return reconcile.Result{}, err } // examine DeletionTimestamp to determine if object is under deletion @@ -128,6 +145,11 @@ func (r *genericNetworkReconciler) Reconcile(ctx context.Context, req ctrl.Reque } raw, err := instance.RenderNetAttDef() if err != nil { + // Update conditions to reflect rendering failure + updateErr := r.updateConditions(ctx, instance, false, err) + if updateErr != nil { + reqLogger.Error(updateErr, "Failed to update conditions") + } return reconcile.Result{}, err } netAttDef := &netattdefv1.NetworkAttachmentDefinition{} @@ -170,6 +192,12 @@ func (r *genericNetworkReconciler) Reconcile(ctx context.Context, req ctrl.Reque err = r.Get(ctx, types.NamespacedName{Name: netAttDef.Namespace}, targetNamespace) if errors.IsNotFound(err) { reqLogger.Info("Target namespace doesn't exist, NetworkAttachmentDefinition will be created when namespace is available", "Namespace", netAttDef.Namespace, "Name", netAttDef.Name) + // Update conditions to reflect waiting state + updateErr := r.updateConditions(ctx, instance, false, nil) + if updateErr != nil { + reqLogger.Error(updateErr, "Failed to update conditions") + return reconcile.Result{}, updateErr + } return reconcile.Result{}, nil } @@ -177,6 +205,12 @@ func (r *genericNetworkReconciler) Reconcile(ctx context.Context, req ctrl.Reque err = r.Create(ctx, netAttDef) if err != nil { reqLogger.Error(err, "Couldn't create NetworkAttachmentDefinition CR", "Namespace", netAttDef.Namespace, "Name", netAttDef.Name) + // Update conditions to reflect creation failure + updateErr := r.updateConditions(ctx, instance, false, err) + if updateErr != nil { + reqLogger.Error(updateErr, "Failed to update conditions") + return reconcile.Result{}, updateErr + } return reconcile.Result{}, err } @@ -184,8 +218,20 @@ func (r *genericNetworkReconciler) Reconcile(ctx context.Context, req ctrl.Reque if err != nil { return reconcile.Result{}, err } + + // Update conditions to reflect successful creation + updateErr := r.updateConditions(ctx, instance, true, nil) + if updateErr != nil { + reqLogger.Error(updateErr, "Failed to update conditions") + return reconcile.Result{}, updateErr + } } else { reqLogger.Error(err, "Couldn't get NetworkAttachmentDefinition CR", "Namespace", netAttDef.Namespace, "Name", netAttDef.Name) + // Update conditions to reflect get failure + updateErr := r.updateConditions(ctx, instance, false, err) + if updateErr != nil { + reqLogger.Error(updateErr, "Failed to update conditions") + } return reconcile.Result{}, err } } else { @@ -201,6 +247,13 @@ func (r *genericNetworkReconciler) Reconcile(ctx context.Context, req ctrl.Reque "Namespace", netAttDef.Namespace, "Name", netAttDef.Name, "CurrentOwner", foundOwner, "ExpectedOwner", expectedOwner, ) + // Update conditions to reflect ownership conflict + ownershipErr := fmt.Errorf("NetworkAttachmentDefinition '%s/%s' already exists and is owned by %s", netAttDef.Namespace, netAttDef.Name, foundOwner) + updateErr := r.updateConditions(ctx, instance, false, ownershipErr) + if updateErr != nil { + reqLogger.Error(updateErr, "Failed to update conditions") + return reconcile.Result{}, updateErr + } return reconcile.Result{}, nil } @@ -211,9 +264,21 @@ func (r *genericNetworkReconciler) Reconcile(ctx context.Context, req ctrl.Reque err = r.Update(ctx, netAttDef) if err != nil { reqLogger.Error(err, "Couldn't update NetworkAttachmentDefinition CR", "Namespace", netAttDef.Namespace, "Name", netAttDef.Name) + // Update conditions to reflect update failure + updateErr := r.updateConditions(ctx, instance, false, err) + if updateErr != nil { + reqLogger.Error(updateErr, "Failed to update conditions", "Namespace", instance.GetNamespace(), "Name", instance.GetName()) + } return reconcile.Result{}, err } } + + // NetworkAttachmentDefinition exists and is up to date + err = r.updateConditions(ctx, instance, true, nil) + if err != nil { + reqLogger.Error(err, "Failed to update conditions") + return reconcile.Result{}, err + } } return ctrl.Result{}, nil @@ -338,9 +403,70 @@ func (r *genericNetworkReconciler) cleanResourcesAndFinalizers(ctx context.Conte if found { instance.SetFinalizers(newFinalizers) if err := r.Update(ctx, instance); err != nil { + // If the object is not found, it's already been deleted + if errors.IsNotFound(err) { + return nil + } return err } } } return nil } + +// updateConditions updates the conditions in the instance status +func (r *genericNetworkReconciler) updateConditions(ctx context.Context, instance NetworkCRInstance, nadFound bool, err error) error { + updateInstance := r.controller.GetObject() + getErr := r.Get(ctx, types.NamespacedName{Namespace: instance.GetNamespace(), Name: instance.GetName()}, updateInstance) + if getErr != nil { + // If the object is not found, skip updating conditions + if errors.IsNotFound(getErr) { + return nil + } + return getErr + } + + oldConditions := updateInstance.GetConditions() + newConditions := make([]metav1.Condition, len(oldConditions)) + copy(newConditions, oldConditions) + + if err != nil { + // Network provisioning failed + status.SetCondition(&newConditions, sriovnetworkv1.ConditionReady, metav1.ConditionFalse, + sriovnetworkv1.ReasonProvisioningFailed, fmt.Sprintf("Failed to provision network: %v", err), + instance.GetGeneration()) + status.SetCondition(&newConditions, sriovnetworkv1.ConditionDegraded, metav1.ConditionTrue, + sriovnetworkv1.ReasonProvisioningFailed, fmt.Sprintf("NetworkAttachmentDefinition provisioning failed: %v", err), + instance.GetGeneration()) + } else if nadFound { + // Network is ready + status.SetCondition(&newConditions, sriovnetworkv1.ConditionReady, metav1.ConditionTrue, + sriovnetworkv1.ReasonNetworkReady, "NetworkAttachmentDefinition is provisioned and ready", + instance.GetGeneration()) + status.SetCondition(&newConditions, sriovnetworkv1.ConditionDegraded, metav1.ConditionFalse, + sriovnetworkv1.ReasonNotDegraded, "Network is functioning correctly", + instance.GetGeneration()) + } else { + // NAD not found yet (waiting for namespace) - this is a degraded state + status.SetCondition(&newConditions, sriovnetworkv1.ConditionReady, metav1.ConditionFalse, + sriovnetworkv1.ReasonNotReady, "Waiting for target namespace to be created", + instance.GetGeneration()) + status.SetCondition(&newConditions, sriovnetworkv1.ConditionDegraded, metav1.ConditionTrue, + sriovnetworkv1.ReasonNamespaceNotFound, "Target namespace does not exist", + instance.GetGeneration()) + } + + // Only update status if conditions changed + if !sriovnetworkv1.ConditionsEqual(oldConditions, newConditions) { + // Use the StatusPatcher to update status with automatic retry and event emission + if updateErr := r.statusPatcher.UpdateStatusWithEvents(ctx, updateInstance, oldConditions, newConditions, func() error { + updateInstance.SetConditions(newConditions) + return nil + }); updateErr != nil { + log.FromContext(ctx).Error(updateErr, "Failed to update status conditions") + return updateErr + } + } + + return nil +} diff --git a/controllers/generic_network_controller_test.go b/controllers/generic_network_controller_test.go index 7ef9da6b78..15900202e8 100644 --- a/controllers/generic_network_controller_test.go +++ b/controllers/generic_network_controller_test.go @@ -2,6 +2,7 @@ package controllers import ( "context" + "fmt" "sync" "time" @@ -15,33 +16,38 @@ import ( sriovnetworkv1 "github.com/k8snetworkplumbingwg/sriov-network-operator/api/v1" "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/consts" + "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/status" "github.com/k8snetworkplumbingwg/sriov-network-operator/test/util" ) var _ = Describe("All Network Controllers", Ordered, func() { var cancel context.CancelFunc var ctx context.Context - BeforeAll(func() { By("Setup controller manager") k8sManager, err := setupK8sManagerForTest() Expect(err).ToNot(HaveOccurred()) + statusPatcher := status.NewPatcher(k8sManager.GetClient(), k8sManager.GetEventRecorder("test"), k8sManager.GetScheme()) + err = (&SriovNetworkReconciler{ - Client: k8sManager.GetClient(), - Scheme: k8sManager.GetScheme(), + Client: k8sManager.GetClient(), + Scheme: k8sManager.GetScheme(), + StatusPatcher: statusPatcher, }).SetupWithManager(k8sManager) Expect(err).ToNot(HaveOccurred()) err = (&SriovIBNetworkReconciler{ - Client: k8sManager.GetClient(), - Scheme: k8sManager.GetScheme(), + Client: k8sManager.GetClient(), + Scheme: k8sManager.GetScheme(), + StatusPatcher: statusPatcher, }).SetupWithManager(k8sManager) Expect(err).ToNot(HaveOccurred()) err = (&OVSNetworkReconciler{ - Client: k8sManager.GetClient(), - Scheme: k8sManager.GetScheme(), + Client: k8sManager.GetClient(), + Scheme: k8sManager.GetScheme(), + StatusPatcher: statusPatcher, }).SetupWithManager(k8sManager) Expect(err).ToNot(HaveOccurred()) @@ -145,6 +151,30 @@ var _ = Describe("All Network Controllers", Ordered, func() { g.Expect(netAttDef.Spec.Config).To(ContainSubstring(`"sriov"`)) g.Expect(netAttDef.Spec.Config).ToNot(ContainSubstring(`"ib-sriov"`)) }).WithPolling(30 * time.Millisecond).WithTimeout(300 * time.Millisecond).Should(Succeed()) + + By("Check that the second network has Degraded condition due to ownership conflict") + Eventually(func(g Gomega) { + network := &sriovnetworkv1.SriovIBNetwork{} + err := k8sClient.Get(ctx, client.ObjectKey{Name: cr2.Name, Namespace: cr2.Namespace}, network) + g.Expect(err).NotTo(HaveOccurred()) + + conditions := network.Status.Conditions + g.Expect(conditions).NotTo(BeEmpty()) + + // The second network should have Ready=False since it couldn't provision + readyCondition := findCondition(conditions, sriovnetworkv1.ConditionReady) + g.Expect(readyCondition).NotTo(BeNil()) + g.Expect(readyCondition.Status).To(Equal(metav1.ConditionFalse)) + g.Expect(readyCondition.Reason).To(Equal(sriovnetworkv1.ReasonProvisioningFailed)) + g.Expect(readyCondition.Message).To(ContainSubstring("Failed to provision network")) + + // The second network should have Degraded=True + degradedCondition := findCondition(conditions, sriovnetworkv1.ConditionDegraded) + g.Expect(degradedCondition).NotTo(BeNil()) + g.Expect(degradedCondition.Status).To(Equal(metav1.ConditionTrue)) + g.Expect(degradedCondition.Reason).To(Equal(sriovnetworkv1.ReasonProvisioningFailed)) + g.Expect(degradedCondition.Message).To(ContainSubstring("NetworkAttachmentDefinition provisioning failed")) + }).WithTimeout(5 * time.Second).WithPolling(100 * time.Millisecond).Should(Succeed()) }) It("when using the same network type with the same name, in different namespaces", func() { @@ -178,10 +208,329 @@ var _ = Describe("All Network Controllers", Ordered, func() { g.Expect(netAttDef.Spec.Config).To(ContainSubstring(`"min_tx_rate": 42`)) g.Expect(netAttDef.Spec.Config).ToNot(ContainSubstring(`"min_tx_rate": 84`)) }).WithPolling(30 * time.Millisecond).WithTimeout(300 * time.Millisecond).Should(Succeed()) + + By("Check that the second network has Degraded condition due to same name conflict") + Eventually(func(g Gomega) { + network := &sriovnetworkv1.SriovNetwork{} + err := k8sClient.Get(ctx, client.ObjectKey{Name: cr2.Name, Namespace: cr2.Namespace}, network) + g.Expect(err).NotTo(HaveOccurred()) + + conditions := network.Status.Conditions + g.Expect(conditions).NotTo(BeEmpty()) + + // The second network should have Ready=False since it couldn't provision + readyCondition := findCondition(conditions, sriovnetworkv1.ConditionReady) + g.Expect(readyCondition).NotTo(BeNil()) + g.Expect(readyCondition.Status).To(Equal(metav1.ConditionFalse)) + g.Expect(readyCondition.Reason).To(Equal(sriovnetworkv1.ReasonProvisioningFailed)) + + // The second network should have Degraded=True + degradedCondition := findCondition(conditions, sriovnetworkv1.ConditionDegraded) + g.Expect(degradedCondition).NotTo(BeNil()) + g.Expect(degradedCondition.Status).To(Equal(metav1.ConditionTrue)) + g.Expect(degradedCondition.Reason).To(Equal(sriovnetworkv1.ReasonProvisioningFailed)) + }).WithTimeout(5 * time.Second).WithPolling(100 * time.Millisecond).Should(Succeed()) }) }) }) + Context("Conditions", func() { + + AfterEach(func() { + cleanNetworksInNamespace(testNamespace) + cleanNetworksInNamespace("default") + }) + + It("should set Ready condition when NetworkAttachmentDefinition is created", func() { + cr := sriovnetworkv1.SriovNetwork{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-ready-condition", + Namespace: testNamespace, + }, + Spec: sriovnetworkv1.SriovNetworkSpec{ + NetworkNamespace: "default", + ResourceName: "test-resource", + }, + } + + err := k8sClient.Create(ctx, &cr) + Expect(err).NotTo(HaveOccurred()) + + Eventually(func(g Gomega) { + network := &sriovnetworkv1.SriovNetwork{} + err := k8sClient.Get(ctx, client.ObjectKey{Name: cr.Name, Namespace: cr.Namespace}, network) + g.Expect(err).NotTo(HaveOccurred()) + + // Check Ready condition is True + g.Expect(network.Status.Conditions).ToNot(BeEmpty()) + readyCondition := findCondition(network.Status.Conditions, sriovnetworkv1.ConditionReady) + g.Expect(readyCondition).ToNot(BeNil()) + g.Expect(readyCondition.Status).To(Equal(metav1.ConditionTrue)) + g.Expect(readyCondition.Reason).To(Equal(sriovnetworkv1.ReasonNetworkReady)) + g.Expect(readyCondition.ObservedGeneration).To(Equal(network.Generation)) + }).WithTimeout(5 * time.Second).WithPolling(100 * time.Millisecond).Should(Succeed()) + }) + + It("should set Ready=True immediately after NAD creation (not waiting state)", func() { + // This test verifies the fix for the bug where after successfully creating + // the NetworkAttachmentDefinition, the controller incorrectly set conditions + // indicating it was waiting for the namespace instead of being ready. + cr := sriovnetworkv1.SriovNetwork{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-nad-creation-condition", + Namespace: testNamespace, + }, + Spec: sriovnetworkv1.SriovNetworkSpec{ + NetworkNamespace: "default", + ResourceName: "test-resource-creation", + }, + } + + err := k8sClient.Create(ctx, &cr) + Expect(err).NotTo(HaveOccurred()) + + // First, wait for the NAD to be created + netAttDef := &netattdefv1.NetworkAttachmentDefinition{} + err = util.WaitForNamespacedObject(netAttDef, k8sClient, "default", cr.GetName(), util.RetryInterval, util.Timeout) + Expect(err).NotTo(HaveOccurred()) + + // Now check that conditions are correctly set to Ready=True + // This should happen immediately after NAD creation, not require a second reconcile + Eventually(func(g Gomega) { + network := &sriovnetworkv1.SriovNetwork{} + err := k8sClient.Get(ctx, client.ObjectKey{Name: cr.Name, Namespace: cr.Namespace}, network) + g.Expect(err).NotTo(HaveOccurred()) + + // Conditions must be set + g.Expect(network.Status.Conditions).ToNot(BeEmpty()) + + // Ready must be True (not False with "waiting for namespace" reason) + readyCondition := findCondition(network.Status.Conditions, sriovnetworkv1.ConditionReady) + g.Expect(readyCondition).ToNot(BeNil()) + g.Expect(readyCondition.Status).To(Equal(metav1.ConditionTrue), + "Ready condition should be True after NAD creation, got %s with reason %s", + readyCondition.Status, readyCondition.Reason) + g.Expect(readyCondition.Reason).To(Equal(sriovnetworkv1.ReasonNetworkReady), + "Ready reason should be NetworkReady, not waiting for namespace") + + // Degraded must be False + degradedCondition := findCondition(network.Status.Conditions, sriovnetworkv1.ConditionDegraded) + g.Expect(degradedCondition).ToNot(BeNil()) + g.Expect(degradedCondition.Status).To(Equal(metav1.ConditionFalse)) + }).WithTimeout(5 * time.Second).WithPolling(100 * time.Millisecond).Should(Succeed()) + }) + + It("should set Degraded condition to False when network is healthy", func() { + cr := sriovnetworkv1.SriovIBNetwork{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-healthy-condition", + Namespace: testNamespace, + }, + Spec: sriovnetworkv1.SriovIBNetworkSpec{ + NetworkNamespace: "default", + ResourceName: "test-resource", + }, + } + + err := k8sClient.Create(ctx, &cr) + Expect(err).NotTo(HaveOccurred()) + + Eventually(func(g Gomega) { + network := &sriovnetworkv1.SriovIBNetwork{} + err := k8sClient.Get(ctx, client.ObjectKey{Name: cr.Name, Namespace: cr.Namespace}, network) + g.Expect(err).NotTo(HaveOccurred()) + + // Check Degraded condition is False + g.Expect(network.Status.Conditions).ToNot(BeEmpty()) + degradedCondition := findCondition(network.Status.Conditions, sriovnetworkv1.ConditionDegraded) + g.Expect(degradedCondition).ToNot(BeNil()) + g.Expect(degradedCondition.Status).To(Equal(metav1.ConditionFalse)) + g.Expect(degradedCondition.Reason).To(Equal(sriovnetworkv1.ReasonNotDegraded)) + }).WithTimeout(5 * time.Second).WithPolling(100 * time.Millisecond).Should(Succeed()) + }) + + It("should set Ready to False when waiting for namespace", func() { + cr := sriovnetworkv1.OVSNetwork{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-waiting-condition", + Namespace: testNamespace, + }, + Spec: sriovnetworkv1.OVSNetworkSpec{ + NetworkNamespace: "nonexistent-namespace", + ResourceName: "test-resource", + }, + } + + err := k8sClient.Create(ctx, &cr) + Expect(err).NotTo(HaveOccurred()) + + Eventually(func(g Gomega) { + network := &sriovnetworkv1.OVSNetwork{} + err := k8sClient.Get(ctx, client.ObjectKey{Name: cr.Name, Namespace: cr.Namespace}, network) + g.Expect(err).NotTo(HaveOccurred()) + + // Check Ready condition is False with NotReady reason + g.Expect(network.Status.Conditions).ToNot(BeEmpty()) + readyCondition := findCondition(network.Status.Conditions, sriovnetworkv1.ConditionReady) + g.Expect(readyCondition).ToNot(BeNil()) + g.Expect(readyCondition.Status).To(Equal(metav1.ConditionFalse)) + g.Expect(readyCondition.Reason).To(Equal(sriovnetworkv1.ReasonNotReady)) + g.Expect(readyCondition.Message).To(ContainSubstring("Waiting for target namespace")) + }).WithTimeout(5 * time.Second).WithPolling(100 * time.Millisecond).Should(Succeed()) + }) + + It("should update ObservedGeneration when spec changes", func() { + cr := sriovnetworkv1.SriovNetwork{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-generation", + Namespace: testNamespace, + }, + Spec: sriovnetworkv1.SriovNetworkSpec{ + NetworkNamespace: "default", + ResourceName: "test-resource", + Vlan: 100, + }, + } + + err := k8sClient.Create(ctx, &cr) + Expect(err).NotTo(HaveOccurred()) + + var initialGeneration int64 + Eventually(func(g Gomega) { + network := &sriovnetworkv1.SriovNetwork{} + err := k8sClient.Get(ctx, client.ObjectKey{Name: cr.Name, Namespace: cr.Namespace}, network) + g.Expect(err).NotTo(HaveOccurred()) + g.Expect(network.Status.Conditions).ToNot(BeEmpty()) + + readyCondition := findCondition(network.Status.Conditions, sriovnetworkv1.ConditionReady) + g.Expect(readyCondition).ToNot(BeNil()) + initialGeneration = readyCondition.ObservedGeneration + }).WithTimeout(5 * time.Second).WithPolling(100 * time.Millisecond).Should(Succeed()) + + // Update the spec + Eventually(func(g Gomega) { + network := &sriovnetworkv1.SriovNetwork{} + err := k8sClient.Get(ctx, client.ObjectKey{Name: cr.Name, Namespace: cr.Namespace}, network) + g.Expect(err).NotTo(HaveOccurred()) + + network.Spec.Vlan = 200 + err = k8sClient.Update(ctx, network) + g.Expect(err).NotTo(HaveOccurred()) + }).WithTimeout(5 * time.Second).WithPolling(100 * time.Millisecond).Should(Succeed()) + + // Verify ObservedGeneration is updated + Eventually(func(g Gomega) { + network := &sriovnetworkv1.SriovNetwork{} + err := k8sClient.Get(ctx, client.ObjectKey{Name: cr.Name, Namespace: cr.Namespace}, network) + g.Expect(err).NotTo(HaveOccurred()) + + readyCondition := findCondition(network.Status.Conditions, sriovnetworkv1.ConditionReady) + g.Expect(readyCondition).ToNot(BeNil()) + g.Expect(readyCondition.ObservedGeneration).To(BeNumerically(">", initialGeneration)) + }).WithTimeout(5 * time.Second).WithPolling(100 * time.Millisecond).Should(Succeed()) + }) + + It("should maintain both Ready and Degraded conditions", func() { + cr := sriovnetworkv1.SriovNetwork{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-multiple-conditions", + Namespace: testNamespace, + }, + Spec: sriovnetworkv1.SriovNetworkSpec{ + NetworkNamespace: "default", + ResourceName: "test-resource", + }, + } + + err := k8sClient.Create(ctx, &cr) + Expect(err).NotTo(HaveOccurred()) + + Eventually(func(g Gomega) { + network := &sriovnetworkv1.SriovNetwork{} + err := k8sClient.Get(ctx, client.ObjectKey{Name: cr.Name, Namespace: cr.Namespace}, network) + g.Expect(err).NotTo(HaveOccurred()) + + // Should have both Ready and Degraded conditions + g.Expect(network.Status.Conditions).To(HaveLen(2)) + + readyCondition := findCondition(network.Status.Conditions, sriovnetworkv1.ConditionReady) + g.Expect(readyCondition).ToNot(BeNil()) + g.Expect(readyCondition.Status).To(Equal(metav1.ConditionTrue)) + + degradedCondition := findCondition(network.Status.Conditions, sriovnetworkv1.ConditionDegraded) + g.Expect(degradedCondition).ToNot(BeNil()) + g.Expect(degradedCondition.Status).To(Equal(metav1.ConditionFalse)) + }).WithTimeout(5 * time.Second).WithPolling(100 * time.Millisecond).Should(Succeed()) + }) + + It("should work for all network types (SriovNetwork, SriovIBNetwork, OVSNetwork)", func() { + networks := []struct { + name string + obj client.Object + }{ + { + name: "sriov-test", + obj: &sriovnetworkv1.SriovNetwork{ + ObjectMeta: metav1.ObjectMeta{Name: "sriov-test", Namespace: testNamespace}, + Spec: sriovnetworkv1.SriovNetworkSpec{NetworkNamespace: "default", ResourceName: "test"}, + }, + }, + { + name: "sriovib-test", + obj: &sriovnetworkv1.SriovIBNetwork{ + ObjectMeta: metav1.ObjectMeta{Name: "sriovib-test", Namespace: testNamespace}, + Spec: sriovnetworkv1.SriovIBNetworkSpec{NetworkNamespace: "default", ResourceName: "test"}, + }, + }, + { + name: "ovs-test", + obj: &sriovnetworkv1.OVSNetwork{ + ObjectMeta: metav1.ObjectMeta{Name: "ovs-test", Namespace: testNamespace}, + Spec: sriovnetworkv1.OVSNetworkSpec{NetworkNamespace: "default", ResourceName: "test"}, + }, + }, + } + + for _, network := range networks { + err := k8sClient.Create(ctx, network.obj) + Expect(err).NotTo(HaveOccurred()) + + Eventually(func(g Gomega) { + var networkWithConditions NetworkCRInstance + switch v := network.obj.(type) { + case *sriovnetworkv1.SriovNetwork: + n := &sriovnetworkv1.SriovNetwork{} + err := k8sClient.Get(ctx, client.ObjectKey{Name: network.name, Namespace: testNamespace}, n) + g.Expect(err).NotTo(HaveOccurred()) + networkWithConditions = n + case *sriovnetworkv1.SriovIBNetwork: + n := &sriovnetworkv1.SriovIBNetwork{} + err := k8sClient.Get(ctx, client.ObjectKey{Name: network.name, Namespace: testNamespace}, n) + g.Expect(err).NotTo(HaveOccurred()) + networkWithConditions = n + case *sriovnetworkv1.OVSNetwork: + n := &sriovnetworkv1.OVSNetwork{} + err := k8sClient.Get(ctx, client.ObjectKey{Name: network.name, Namespace: testNamespace}, n) + g.Expect(err).NotTo(HaveOccurred()) + networkWithConditions = n + default: + Fail(fmt.Sprintf("unexpected network type: %T", v)) + } + + // Verify conditions are set + conditions := networkWithConditions.GetConditions() + g.Expect(conditions).ToNot(BeEmpty()) + + // Verify Ready condition + readyCondition := findCondition(conditions, sriovnetworkv1.ConditionReady) + g.Expect(readyCondition).ToNot(BeNil()) + g.Expect(readyCondition.Status).To(Equal(metav1.ConditionTrue)) + g.Expect(readyCondition.Reason).To(Equal(sriovnetworkv1.ReasonNetworkReady)) + }, 5*time.Second, 100*time.Millisecond).Should(Succeed()) + } + }) + }) + }) func cleanNetworksInNamespace(namespace string) { diff --git a/controllers/ovsnetwork_controller.go b/controllers/ovsnetwork_controller.go index 53dbe3fa63..6397546007 100644 --- a/controllers/ovsnetwork_controller.go +++ b/controllers/ovsnetwork_controller.go @@ -24,12 +24,14 @@ import ( "sigs.k8s.io/controller-runtime/pkg/client" sriovnetworkv1 "github.com/k8snetworkplumbingwg/sriov-network-operator/api/v1" + "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/status" ) // OVSNetworkReconciler reconciles a OVSNetwork object type OVSNetworkReconciler struct { client.Client Scheme *runtime.Scheme + StatusPatcher status.Interface genericReconciler *genericNetworkReconciler } @@ -59,6 +61,6 @@ func (r *OVSNetworkReconciler) GetObjectList() client.ObjectList { // SetupWithManager sets up the controller with the Manager. func (r *OVSNetworkReconciler) SetupWithManager(mgr ctrl.Manager) error { - r.genericReconciler = newGenericNetworkReconciler(r.Client, r.Scheme, r) + r.genericReconciler = newGenericNetworkReconciler(r.Client, r.Scheme, r, r.StatusPatcher) return r.genericReconciler.SetupWithManager(mgr) } diff --git a/controllers/ovsnetwork_controller_test.go b/controllers/ovsnetwork_controller_test.go index ef464e5e9b..b153a2d389 100644 --- a/controllers/ovsnetwork_controller_test.go +++ b/controllers/ovsnetwork_controller_test.go @@ -15,6 +15,7 @@ import ( . "github.com/onsi/gomega" v1 "github.com/k8snetworkplumbingwg/sriov-network-operator/api/v1" + "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/status" "github.com/k8snetworkplumbingwg/sriov-network-operator/test/util" ) @@ -53,8 +54,9 @@ var _ = Describe("OVSNetwork Controller", Ordered, func() { Expect(err).NotTo(HaveOccurred()) err = (&OVSNetworkReconciler{ - Client: k8sManager.GetClient(), - Scheme: k8sManager.GetScheme(), + Client: k8sManager.GetClient(), + Scheme: k8sManager.GetScheme(), + StatusPatcher: status.NewPatcher(k8sManager.GetClient(), k8sManager.GetEventRecorder("test"), k8sManager.GetScheme()), }).SetupWithManager(k8sManager) Expect(err).NotTo(HaveOccurred()) @@ -186,4 +188,238 @@ var _ = Describe("OVSNetwork Controller", Ordered, func() { }, util.APITimeout, util.RetryInterval).Should(Succeed()) }) }) + + Context("conditions", func() { + It("should set Ready=True and Degraded=False when network is successfully provisioned", func() { + netCR := &v1.OVSNetwork{ + ObjectMeta: metav1.ObjectMeta{ + Name: "ovs-cond-ready", + Namespace: testNamespace, + }, + Spec: v1.OVSNetworkSpec{ + ResourceName: "ovs_resource_cond", + }, + } + + Expect(k8sClient.Create(ctx, netCR)).NotTo(HaveOccurred()) + DeferCleanup(func() { removeOVSNetwork(ctx, netCR) }) + + Eventually(func(g Gomega) { + network := &v1.OVSNetwork{} + g.Expect(k8sClient.Get(ctx, types.NamespacedName{Name: netCR.Name, Namespace: testNamespace}, network)).To(Succeed()) + + // Verify Ready condition + readyCondition := findCondition(network.Status.Conditions, v1.ConditionReady) + g.Expect(readyCondition).ToNot(BeNil()) + g.Expect(readyCondition.Status).To(Equal(metav1.ConditionTrue)) + g.Expect(readyCondition.Reason).To(Equal(v1.ReasonNetworkReady)) + g.Expect(readyCondition.Message).To(ContainSubstring("NetworkAttachmentDefinition is provisioned")) + + // Verify Degraded condition + degradedCondition := findCondition(network.Status.Conditions, v1.ConditionDegraded) + g.Expect(degradedCondition).ToNot(BeNil()) + g.Expect(degradedCondition.Status).To(Equal(metav1.ConditionFalse)) + g.Expect(degradedCondition.Reason).To(Equal(v1.ReasonNotDegraded)) + }, util.APITimeout, util.RetryInterval).Should(Succeed()) + }) + + It("should set Ready=False and Degraded=True when target namespace does not exist", func() { + netCR := &v1.OVSNetwork{ + ObjectMeta: metav1.ObjectMeta{ + Name: "ovs-cond-degraded", + Namespace: testNamespace, + }, + Spec: v1.OVSNetworkSpec{ + NetworkNamespace: "ovs-non-existent-ns-degraded", + ResourceName: "ovs_resource_cond_degraded", + }, + } + + Expect(k8sClient.Create(ctx, netCR)).NotTo(HaveOccurred()) + DeferCleanup(func() { removeOVSNetwork(ctx, netCR) }) + + // Wait for conditions to be set + time.Sleep(2 * time.Second) + + Eventually(func(g Gomega) { + network := &v1.OVSNetwork{} + g.Expect(k8sClient.Get(ctx, types.NamespacedName{Name: netCR.Name, Namespace: testNamespace}, network)).To(Succeed()) + + // Verify Ready condition is False (waiting for namespace) + readyCondition := findCondition(network.Status.Conditions, v1.ConditionReady) + g.Expect(readyCondition).ToNot(BeNil()) + g.Expect(readyCondition.Status).To(Equal(metav1.ConditionFalse)) + g.Expect(readyCondition.Reason).To(Equal(v1.ReasonNotReady)) + + // Verify Degraded condition is True (namespace not found) + degradedCondition := findCondition(network.Status.Conditions, v1.ConditionDegraded) + g.Expect(degradedCondition).ToNot(BeNil()) + g.Expect(degradedCondition.Status).To(Equal(metav1.ConditionTrue)) + g.Expect(degradedCondition.Reason).To(Equal(v1.ReasonNamespaceNotFound)) + g.Expect(degradedCondition.Message).To(ContainSubstring("namespace does not exist")) + }, util.APITimeout, util.RetryInterval).Should(Succeed()) + }) + + It("should have consistent observedGeneration in conditions", func() { + netCR := &v1.OVSNetwork{ + ObjectMeta: metav1.ObjectMeta{ + Name: "ovs-cond-gen", + Namespace: testNamespace, + }, + Spec: v1.OVSNetworkSpec{ + ResourceName: "ovs_resource_cond_gen", + }, + } + + Expect(k8sClient.Create(ctx, netCR)).NotTo(HaveOccurred()) + DeferCleanup(func() { removeOVSNetwork(ctx, netCR) }) + + Eventually(func(g Gomega) { + network := &v1.OVSNetwork{} + g.Expect(k8sClient.Get(ctx, types.NamespacedName{Name: netCR.Name, Namespace: testNamespace}, network)).To(Succeed()) + + readyCondition := findCondition(network.Status.Conditions, v1.ConditionReady) + degradedCondition := findCondition(network.Status.Conditions, v1.ConditionDegraded) + + g.Expect(readyCondition).ToNot(BeNil()) + g.Expect(degradedCondition).ToNot(BeNil()) + + // Both conditions should have the same observedGeneration + g.Expect(readyCondition.ObservedGeneration).To(Equal(degradedCondition.ObservedGeneration)) + // And it should match the current generation + g.Expect(readyCondition.ObservedGeneration).To(Equal(network.Generation)) + }, util.APITimeout, util.RetryInterval).Should(Succeed()) + }) + + It("should set Degraded=True when app network sets NetworkNamespace from non-operator namespace", func() { + appNs := "ovs-app-ns-cross" + // Create application namespace + nsObj := &corev1.Namespace{ + ObjectMeta: metav1.ObjectMeta{Name: appNs}, + } + Expect(k8sClient.Create(ctx, nsObj)).NotTo(HaveOccurred()) + DeferCleanup(func() { + Expect(k8sClient.Delete(ctx, nsObj)).NotTo(HaveOccurred()) + }) + + // Network in application namespace with cross-namespace target - should be degraded + netCR := &v1.OVSNetwork{ + ObjectMeta: metav1.ObjectMeta{ + Name: "ovs-app-cross", + Namespace: appNs, + }, + Spec: v1.OVSNetworkSpec{ + NetworkNamespace: "default", // Different from its own namespace + ResourceName: "ovs_resource_cross", + }, + } + + Expect(k8sClient.Create(ctx, netCR)).NotTo(HaveOccurred()) + DeferCleanup(func() { + Expect(k8sClient.Delete(ctx, netCR)).To(Succeed()) + }) + + Eventually(func(g Gomega) { + network := &v1.OVSNetwork{} + g.Expect(k8sClient.Get(ctx, types.NamespacedName{Name: netCR.Name, Namespace: appNs}, network)).To(Succeed()) + + readyCondition := findCondition(network.Status.Conditions, v1.ConditionReady) + g.Expect(readyCondition).ToNot(BeNil()) + g.Expect(readyCondition.Status).To(Equal(metav1.ConditionFalse)) + + degradedCondition := findCondition(network.Status.Conditions, v1.ConditionDegraded) + g.Expect(degradedCondition).ToNot(BeNil()) + g.Expect(degradedCondition.Status).To(Equal(metav1.ConditionTrue)) + g.Expect(degradedCondition.Message).To(ContainSubstring("networkNamespace cannot be set")) + }, util.APITimeout, util.RetryInterval).Should(Succeed()) + }) + + It("should mark app network as degraded when operator network already owns the NAD with same name", func() { + targetNs := "ovs-target-ns-conflict" + nadName := "ovs-test-nad-conflict" + + // Create target namespace + nsObj := &corev1.Namespace{ + ObjectMeta: metav1.ObjectMeta{Name: targetNs}, + } + Expect(k8sClient.Create(ctx, nsObj)).NotTo(HaveOccurred()) + DeferCleanup(func() { + Expect(k8sClient.Delete(ctx, nsObj)).NotTo(HaveOccurred()) + }) + + // Network 1: In operator namespace, targeting target-ns + netCROperator := &v1.OVSNetwork{ + ObjectMeta: metav1.ObjectMeta{ + Name: nadName, + Namespace: testNamespace, + }, + Spec: v1.OVSNetworkSpec{ + NetworkNamespace: targetNs, + ResourceName: "ovs_resource_op", + }, + } + + // Network 2: In target namespace with SAME NAME + netCRApp := &v1.OVSNetwork{ + ObjectMeta: metav1.ObjectMeta{ + Name: nadName, + Namespace: targetNs, + }, + Spec: v1.OVSNetworkSpec{ + ResourceName: "ovs_resource_app", + }, + } + + // Create operator network first + Expect(k8sClient.Create(ctx, netCROperator)).NotTo(HaveOccurred()) + DeferCleanup(func() { removeOVSNetwork(ctx, netCROperator) }) + + // Wait for NAD to be created by operator network + Eventually(func(g Gomega) { + netAttDef := &netattdefv1.NetworkAttachmentDefinition{} + g.Expect(k8sClient.Get(ctx, types.NamespacedName{Name: nadName, Namespace: targetNs}, netAttDef)).NotTo(HaveOccurred()) + }, util.APITimeout, util.RetryInterval).Should(Succeed()) + + // Network from operator namespace should be Ready + Eventually(func(g Gomega) { + network := &v1.OVSNetwork{} + g.Expect(k8sClient.Get(ctx, types.NamespacedName{Name: netCROperator.Name, Namespace: testNamespace}, network)).To(Succeed()) + + readyCondition := findCondition(network.Status.Conditions, v1.ConditionReady) + g.Expect(readyCondition).ToNot(BeNil()) + g.Expect(readyCondition.Status).To(Equal(metav1.ConditionTrue)) + + degradedCondition := findCondition(network.Status.Conditions, v1.ConditionDegraded) + g.Expect(degradedCondition).ToNot(BeNil()) + g.Expect(degradedCondition.Status).To(Equal(metav1.ConditionFalse)) + }, util.APITimeout, util.RetryInterval).Should(Succeed()) + + // Now create app network with same name - should be degraded + Expect(k8sClient.Create(ctx, netCRApp)).NotTo(HaveOccurred()) + DeferCleanup(func() { + Expect(k8sClient.Delete(ctx, netCRApp)).To(Succeed()) + }) + + // Network from app namespace should be Degraded (NAD already exists and is owned by another network) + Eventually(func(g Gomega) { + network := &v1.OVSNetwork{} + g.Expect(k8sClient.Get(ctx, types.NamespacedName{Name: netCRApp.Name, Namespace: targetNs}, network)).To(Succeed()) + + readyCondition := findCondition(network.Status.Conditions, v1.ConditionReady) + g.Expect(readyCondition).ToNot(BeNil()) + g.Expect(readyCondition.Status).To(Equal(metav1.ConditionFalse)) + + degradedCondition := findCondition(network.Status.Conditions, v1.ConditionDegraded) + g.Expect(degradedCondition).ToNot(BeNil()) + g.Expect(degradedCondition.Status).To(Equal(metav1.ConditionTrue)) + }, util.APITimeout, util.RetryInterval).Should(Succeed()) + + // Verify NAD still exists and is owned by operator network (not overwritten) + Eventually(func(g Gomega) { + netAttDef := &netattdefv1.NetworkAttachmentDefinition{} + g.Expect(k8sClient.Get(ctx, types.NamespacedName{Name: nadName, Namespace: targetNs}, netAttDef)).NotTo(HaveOccurred()) + g.Expect(netAttDef.GetAnnotations()["k8s.v1.cni.cncf.io/resourceName"]).To(ContainSubstring("ovs_resource_op")) + }, util.APITimeout, util.RetryInterval).Should(Succeed()) + }) + }) }) diff --git a/controllers/sriovibnetwork_controller.go b/controllers/sriovibnetwork_controller.go index 6c0c245349..5911122fe9 100644 --- a/controllers/sriovibnetwork_controller.go +++ b/controllers/sriovibnetwork_controller.go @@ -24,12 +24,14 @@ import ( "sigs.k8s.io/controller-runtime/pkg/client" sriovnetworkv1 "github.com/k8snetworkplumbingwg/sriov-network-operator/api/v1" + "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/status" ) // SriovIBNetworkReconciler reconciles a SriovIBNetwork object type SriovIBNetworkReconciler struct { client.Client Scheme *runtime.Scheme + StatusPatcher status.Interface genericReconciler *genericNetworkReconciler } @@ -59,6 +61,6 @@ func (r *SriovIBNetworkReconciler) GetObjectList() client.ObjectList { // SetupWithManager sets up the controller with the Manager. func (r *SriovIBNetworkReconciler) SetupWithManager(mgr ctrl.Manager) error { - r.genericReconciler = newGenericNetworkReconciler(r.Client, r.Scheme, r) + r.genericReconciler = newGenericNetworkReconciler(r.Client, r.Scheme, r, r.StatusPatcher) return r.genericReconciler.SetupWithManager(mgr) } diff --git a/controllers/sriovibnetwork_controller_test.go b/controllers/sriovibnetwork_controller_test.go index badb11d975..4fe3acb6d9 100644 --- a/controllers/sriovibnetwork_controller_test.go +++ b/controllers/sriovibnetwork_controller_test.go @@ -19,6 +19,7 @@ import ( . "github.com/onsi/gomega" sriovnetworkv1 "github.com/k8snetworkplumbingwg/sriov-network-operator/api/v1" + "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/status" "github.com/k8snetworkplumbingwg/sriov-network-operator/test/util" ) @@ -32,8 +33,9 @@ var _ = Describe("SriovIBNetwork Controller", Ordered, func() { Expect(err).ToNot(HaveOccurred()) err = (&SriovIBNetworkReconciler{ - Client: k8sManager.GetClient(), - Scheme: k8sManager.GetScheme(), + Client: k8sManager.GetClient(), + Scheme: k8sManager.GetScheme(), + StatusPatcher: status.NewPatcher(k8sManager.GetClient(), k8sManager.GetEventRecorder("test"), k8sManager.GetScheme()), }).SetupWithManager(k8sManager) Expect(err).ToNot(HaveOccurred()) @@ -276,6 +278,253 @@ var _ = Describe("SriovIBNetwork Controller", Ordered, func() { Expect(strings.TrimSpace(netAttDef.Spec.Config)).To(Equal(expect)) }) }) + + Context("conditions", func() { + It("should set Ready=True and Degraded=False when network is successfully provisioned", func() { + cr := sriovnetworkv1.SriovIBNetwork{ + ObjectMeta: metav1.ObjectMeta{ + Name: "ib-cond-ready", + Namespace: testNamespace, + }, + Spec: sriovnetworkv1.SriovIBNetworkSpec{ + NetworkNamespace: "default", + ResourceName: "ib_resource_cond", + }, + } + + err := k8sClient.Create(ctx, &cr) + Expect(err).NotTo(HaveOccurred()) + DeferCleanup(func() { + Expect(k8sClient.Delete(ctx, &cr)).To(Succeed()) + }) + + Eventually(func(g Gomega) { + network := &sriovnetworkv1.SriovIBNetwork{} + g.Expect(k8sClient.Get(ctx, types.NamespacedName{Name: cr.Name, Namespace: testNamespace}, network)).To(Succeed()) + + // Verify Ready condition + readyCondition := findCondition(network.Status.Conditions, sriovnetworkv1.ConditionReady) + g.Expect(readyCondition).ToNot(BeNil()) + g.Expect(readyCondition.Status).To(Equal(metav1.ConditionTrue)) + g.Expect(readyCondition.Reason).To(Equal(sriovnetworkv1.ReasonNetworkReady)) + g.Expect(readyCondition.Message).To(ContainSubstring("NetworkAttachmentDefinition is provisioned")) + + // Verify Degraded condition + degradedCondition := findCondition(network.Status.Conditions, sriovnetworkv1.ConditionDegraded) + g.Expect(degradedCondition).ToNot(BeNil()) + g.Expect(degradedCondition.Status).To(Equal(metav1.ConditionFalse)) + g.Expect(degradedCondition.Reason).To(Equal(sriovnetworkv1.ReasonNotDegraded)) + }, util.APITimeout, util.RetryInterval).Should(Succeed()) + }) + + It("should set Ready=False and Degraded=True when target namespace does not exist", func() { + cr := sriovnetworkv1.SriovIBNetwork{ + ObjectMeta: metav1.ObjectMeta{ + Name: "ib-cond-degraded", + Namespace: testNamespace, + }, + Spec: sriovnetworkv1.SriovIBNetworkSpec{ + NetworkNamespace: "ib-non-existent-ns-degraded", + ResourceName: "ib_resource_cond_degraded", + }, + } + + err := k8sClient.Create(ctx, &cr) + Expect(err).NotTo(HaveOccurred()) + DeferCleanup(func() { + Expect(k8sClient.Delete(ctx, &cr)).To(Succeed()) + }) + + // Wait for conditions to be set + time.Sleep(2 * time.Second) + + Eventually(func(g Gomega) { + network := &sriovnetworkv1.SriovIBNetwork{} + g.Expect(k8sClient.Get(ctx, types.NamespacedName{Name: cr.Name, Namespace: testNamespace}, network)).To(Succeed()) + + // Verify Ready condition is False (waiting for namespace) + readyCondition := findCondition(network.Status.Conditions, sriovnetworkv1.ConditionReady) + g.Expect(readyCondition).ToNot(BeNil()) + g.Expect(readyCondition.Status).To(Equal(metav1.ConditionFalse)) + g.Expect(readyCondition.Reason).To(Equal(sriovnetworkv1.ReasonNotReady)) + + // Verify Degraded condition is True (namespace not found) + degradedCondition := findCondition(network.Status.Conditions, sriovnetworkv1.ConditionDegraded) + g.Expect(degradedCondition).ToNot(BeNil()) + g.Expect(degradedCondition.Status).To(Equal(metav1.ConditionTrue)) + g.Expect(degradedCondition.Reason).To(Equal(sriovnetworkv1.ReasonNamespaceNotFound)) + g.Expect(degradedCondition.Message).To(ContainSubstring("namespace does not exist")) + }, util.APITimeout, util.RetryInterval).Should(Succeed()) + }) + + It("should have consistent observedGeneration in conditions", func() { + cr := sriovnetworkv1.SriovIBNetwork{ + ObjectMeta: metav1.ObjectMeta{ + Name: "ib-cond-gen", + Namespace: testNamespace, + }, + Spec: sriovnetworkv1.SriovIBNetworkSpec{ + NetworkNamespace: "default", + ResourceName: "ib_resource_cond_gen", + }, + } + + err := k8sClient.Create(ctx, &cr) + Expect(err).NotTo(HaveOccurred()) + DeferCleanup(func() { + Expect(k8sClient.Delete(ctx, &cr)).To(Succeed()) + }) + + Eventually(func(g Gomega) { + network := &sriovnetworkv1.SriovIBNetwork{} + g.Expect(k8sClient.Get(ctx, types.NamespacedName{Name: cr.Name, Namespace: testNamespace}, network)).To(Succeed()) + + readyCondition := findCondition(network.Status.Conditions, sriovnetworkv1.ConditionReady) + degradedCondition := findCondition(network.Status.Conditions, sriovnetworkv1.ConditionDegraded) + + g.Expect(readyCondition).ToNot(BeNil()) + g.Expect(degradedCondition).ToNot(BeNil()) + + // Both conditions should have the same observedGeneration + g.Expect(readyCondition.ObservedGeneration).To(Equal(degradedCondition.ObservedGeneration)) + // And it should match the current generation + g.Expect(readyCondition.ObservedGeneration).To(Equal(network.Generation)) + }, util.APITimeout, util.RetryInterval).Should(Succeed()) + }) + + It("should set Degraded=True when app network sets NetworkNamespace from non-operator namespace", func() { + appNs := "ib-app-ns-cross" + // Create application namespace + nsObj := &corev1.Namespace{ + ObjectMeta: metav1.ObjectMeta{Name: appNs}, + } + Expect(k8sClient.Create(ctx, nsObj)).NotTo(HaveOccurred()) + DeferCleanup(func() { + Expect(k8sClient.Delete(ctx, nsObj)).NotTo(HaveOccurred()) + }) + + // Network in application namespace with cross-namespace target - should be degraded + cr := sriovnetworkv1.SriovIBNetwork{ + ObjectMeta: metav1.ObjectMeta{ + Name: "ib-app-cross", + Namespace: appNs, + }, + Spec: sriovnetworkv1.SriovIBNetworkSpec{ + NetworkNamespace: "default", // Different from its own namespace + ResourceName: "ib_resource_cross", + }, + } + + Expect(k8sClient.Create(ctx, &cr)).NotTo(HaveOccurred()) + DeferCleanup(func() { + Expect(k8sClient.Delete(ctx, &cr)).To(Succeed()) + }) + + Eventually(func(g Gomega) { + network := &sriovnetworkv1.SriovIBNetwork{} + g.Expect(k8sClient.Get(ctx, types.NamespacedName{Name: cr.Name, Namespace: appNs}, network)).To(Succeed()) + + readyCondition := findCondition(network.Status.Conditions, sriovnetworkv1.ConditionReady) + g.Expect(readyCondition).ToNot(BeNil()) + g.Expect(readyCondition.Status).To(Equal(metav1.ConditionFalse)) + + degradedCondition := findCondition(network.Status.Conditions, sriovnetworkv1.ConditionDegraded) + g.Expect(degradedCondition).ToNot(BeNil()) + g.Expect(degradedCondition.Status).To(Equal(metav1.ConditionTrue)) + g.Expect(degradedCondition.Message).To(ContainSubstring("networkNamespace cannot be set")) + }, util.APITimeout, util.RetryInterval).Should(Succeed()) + }) + + It("should mark app network as degraded when operator network already owns the NAD with same name", func() { + targetNs := "ib-target-ns-conflict" + nadName := "ib-test-nad-conflict" + + // Create target namespace + nsObj := &corev1.Namespace{ + ObjectMeta: metav1.ObjectMeta{Name: targetNs}, + } + Expect(k8sClient.Create(ctx, nsObj)).NotTo(HaveOccurred()) + DeferCleanup(func() { + Expect(k8sClient.Delete(ctx, nsObj)).NotTo(HaveOccurred()) + }) + + // Network 1: In operator namespace, targeting target-ns + crOperator := sriovnetworkv1.SriovIBNetwork{ + ObjectMeta: metav1.ObjectMeta{ + Name: nadName, + Namespace: testNamespace, + }, + Spec: sriovnetworkv1.SriovIBNetworkSpec{ + NetworkNamespace: targetNs, + ResourceName: "ib_resource_op", + }, + } + + // Network 2: In target namespace with SAME NAME + crApp := sriovnetworkv1.SriovIBNetwork{ + ObjectMeta: metav1.ObjectMeta{ + Name: nadName, + Namespace: targetNs, + }, + Spec: sriovnetworkv1.SriovIBNetworkSpec{ + ResourceName: "ib_resource_app", + }, + } + + // Create operator network first + Expect(k8sClient.Create(ctx, &crOperator)).NotTo(HaveOccurred()) + DeferCleanup(func() { + Expect(k8sClient.Delete(ctx, &crOperator)).To(Succeed()) + }) + + // Wait for NAD to be created by operator network + Eventually(func(g Gomega) { + netAttDef := &netattdefv1.NetworkAttachmentDefinition{} + g.Expect(k8sClient.Get(ctx, types.NamespacedName{Name: nadName, Namespace: targetNs}, netAttDef)).NotTo(HaveOccurred()) + }, util.APITimeout, util.RetryInterval).Should(Succeed()) + + // Network from operator namespace should be Ready + Eventually(func(g Gomega) { + network := &sriovnetworkv1.SriovIBNetwork{} + g.Expect(k8sClient.Get(ctx, types.NamespacedName{Name: crOperator.Name, Namespace: testNamespace}, network)).To(Succeed()) + + readyCondition := findCondition(network.Status.Conditions, sriovnetworkv1.ConditionReady) + g.Expect(readyCondition).ToNot(BeNil()) + g.Expect(readyCondition.Status).To(Equal(metav1.ConditionTrue)) + + degradedCondition := findCondition(network.Status.Conditions, sriovnetworkv1.ConditionDegraded) + g.Expect(degradedCondition).ToNot(BeNil()) + g.Expect(degradedCondition.Status).To(Equal(metav1.ConditionFalse)) + }, util.APITimeout, util.RetryInterval).Should(Succeed()) + + // Now create app network with same name - should be degraded + Expect(k8sClient.Create(ctx, &crApp)).NotTo(HaveOccurred()) + DeferCleanup(func() { + Expect(k8sClient.Delete(ctx, &crApp)).To(Succeed()) + }) + + // Network from app namespace should be Degraded (NAD already exists and is owned by another network) + Eventually(func(g Gomega) { + network := &sriovnetworkv1.SriovIBNetwork{} + g.Expect(k8sClient.Get(ctx, types.NamespacedName{Name: crApp.Name, Namespace: targetNs}, network)).To(Succeed()) + + readyCondition := findCondition(network.Status.Conditions, sriovnetworkv1.ConditionReady) + g.Expect(readyCondition).ToNot(BeNil()) + g.Expect(readyCondition.Status).To(Equal(metav1.ConditionFalse)) + + degradedCondition := findCondition(network.Status.Conditions, sriovnetworkv1.ConditionDegraded) + g.Expect(degradedCondition).ToNot(BeNil()) + g.Expect(degradedCondition.Status).To(Equal(metav1.ConditionTrue)) + }, util.APITimeout, util.RetryInterval).Should(Succeed()) + + // Verify NAD still exists and is owned by operator network (not overwritten) + Eventually(func(g Gomega) { + netAttDef := &netattdefv1.NetworkAttachmentDefinition{} + g.Expect(k8sClient.Get(ctx, types.NamespacedName{Name: nadName, Namespace: targetNs}, netAttDef)).NotTo(HaveOccurred()) + g.Expect(netAttDef.GetAnnotations()["k8s.v1.cni.cncf.io/resourceName"]).To(ContainSubstring("ib_resource_op")) + }, util.APITimeout, util.RetryInterval).Should(Succeed()) + }) + }) }) func generateExpectedIBNetConfig(cr *sriovnetworkv1.SriovIBNetwork) string { diff --git a/controllers/sriovnetwork_controller.go b/controllers/sriovnetwork_controller.go index 1ef6b5cb88..83ecac3866 100644 --- a/controllers/sriovnetwork_controller.go +++ b/controllers/sriovnetwork_controller.go @@ -24,12 +24,14 @@ import ( "sigs.k8s.io/controller-runtime/pkg/client" sriovnetworkv1 "github.com/k8snetworkplumbingwg/sriov-network-operator/api/v1" + "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/status" ) // SriovNetworkReconciler reconciles a SriovNetwork object type SriovNetworkReconciler struct { client.Client Scheme *runtime.Scheme + StatusPatcher status.Interface genericReconciler *genericNetworkReconciler } @@ -59,6 +61,6 @@ func (r *SriovNetworkReconciler) GetObjectList() client.ObjectList { // SetupWithManager sets up the controller with the Manager. func (r *SriovNetworkReconciler) SetupWithManager(mgr ctrl.Manager) error { - r.genericReconciler = newGenericNetworkReconciler(r.Client, r.Scheme, r) + r.genericReconciler = newGenericNetworkReconciler(r.Client, r.Scheme, r, r.StatusPatcher) return r.genericReconciler.SetupWithManager(mgr) } diff --git a/controllers/sriovnetwork_controller_test.go b/controllers/sriovnetwork_controller_test.go index 191ca7438a..c4620abf8f 100644 --- a/controllers/sriovnetwork_controller_test.go +++ b/controllers/sriovnetwork_controller_test.go @@ -21,6 +21,7 @@ import ( . "github.com/onsi/gomega" sriovnetworkv1 "github.com/k8snetworkplumbingwg/sriov-network-operator/api/v1" + "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/status" "github.com/k8snetworkplumbingwg/sriov-network-operator/test/util" ) @@ -39,8 +40,9 @@ var _ = Describe("SriovNetwork Controller", Ordered, func() { Expect(err).ToNot(HaveOccurred()) err = (&SriovNetworkReconciler{ - Client: k8sManager.GetClient(), - Scheme: k8sManager.GetScheme(), + Client: k8sManager.GetClient(), + Scheme: k8sManager.GetScheme(), + StatusPatcher: status.NewPatcher(k8sManager.GetClient(), k8sManager.GetEventRecorder("test"), k8sManager.GetScheme()), }).SetupWithManager(k8sManager) Expect(err).ToNot(HaveOccurred()) @@ -344,6 +346,350 @@ var _ = Describe("SriovNetwork Controller", Ordered, func() { Should(Succeed()) }) + Context("conditions", func() { + It("should set Ready=True and Degraded=False when network is successfully provisioned", func() { + cr := sriovnetworkv1.SriovNetwork{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-cond-ready", + Namespace: testNamespace, + }, + Spec: sriovnetworkv1.SriovNetworkSpec{ + NetworkNamespace: "default", + ResourceName: "resource_cond", + }, + } + + err := k8sClient.Create(ctx, &cr) + Expect(err).NotTo(HaveOccurred()) + DeferCleanup(func() { + Expect(k8sClient.Delete(ctx, &cr)).To(Succeed()) + }) + + Eventually(func(g Gomega) { + network := &sriovnetworkv1.SriovNetwork{} + g.Expect(k8sClient.Get(ctx, types.NamespacedName{Name: cr.Name, Namespace: testNamespace}, network)).To(Succeed()) + + // Verify Ready condition + readyCondition := findCondition(network.Status.Conditions, sriovnetworkv1.ConditionReady) + g.Expect(readyCondition).ToNot(BeNil()) + g.Expect(readyCondition.Status).To(Equal(metav1.ConditionTrue)) + g.Expect(readyCondition.Reason).To(Equal(sriovnetworkv1.ReasonNetworkReady)) + g.Expect(readyCondition.Message).To(ContainSubstring("NetworkAttachmentDefinition is provisioned")) + + // Verify Degraded condition + degradedCondition := findCondition(network.Status.Conditions, sriovnetworkv1.ConditionDegraded) + g.Expect(degradedCondition).ToNot(BeNil()) + g.Expect(degradedCondition.Status).To(Equal(metav1.ConditionFalse)) + g.Expect(degradedCondition.Reason).To(Equal(sriovnetworkv1.ReasonNotDegraded)) + }, util.APITimeout, util.RetryInterval).Should(Succeed()) + }) + + It("should set Ready=False and Degraded=True when target namespace does not exist", func() { + cr := sriovnetworkv1.SriovNetwork{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-cond-degraded", + Namespace: testNamespace, + }, + Spec: sriovnetworkv1.SriovNetworkSpec{ + NetworkNamespace: "non-existent-ns-degraded", + ResourceName: "resource_cond_degraded", + }, + } + + err := k8sClient.Create(ctx, &cr) + Expect(err).NotTo(HaveOccurred()) + DeferCleanup(func() { + Expect(k8sClient.Delete(ctx, &cr)).To(Succeed()) + }) + + // Wait for conditions to be set + time.Sleep(2 * time.Second) + + Eventually(func(g Gomega) { + network := &sriovnetworkv1.SriovNetwork{} + g.Expect(k8sClient.Get(ctx, types.NamespacedName{Name: cr.Name, Namespace: testNamespace}, network)).To(Succeed()) + + // Verify Ready condition is False (waiting for namespace) + readyCondition := findCondition(network.Status.Conditions, sriovnetworkv1.ConditionReady) + g.Expect(readyCondition).ToNot(BeNil()) + g.Expect(readyCondition.Status).To(Equal(metav1.ConditionFalse)) + g.Expect(readyCondition.Reason).To(Equal(sriovnetworkv1.ReasonNotReady)) + + // Verify Degraded condition is True (namespace not found) + degradedCondition := findCondition(network.Status.Conditions, sriovnetworkv1.ConditionDegraded) + g.Expect(degradedCondition).ToNot(BeNil()) + g.Expect(degradedCondition.Status).To(Equal(metav1.ConditionTrue)) + g.Expect(degradedCondition.Reason).To(Equal(sriovnetworkv1.ReasonNamespaceNotFound)) + g.Expect(degradedCondition.Message).To(ContainSubstring("namespace does not exist")) + }, util.APITimeout, util.RetryInterval).Should(Succeed()) + }) + + It("should have consistent observedGeneration in conditions", func() { + cr := sriovnetworkv1.SriovNetwork{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-cond-gen", + Namespace: testNamespace, + }, + Spec: sriovnetworkv1.SriovNetworkSpec{ + NetworkNamespace: "default", + ResourceName: "resource_cond_gen", + }, + } + + err := k8sClient.Create(ctx, &cr) + Expect(err).NotTo(HaveOccurred()) + DeferCleanup(func() { + Expect(k8sClient.Delete(ctx, &cr)).To(Succeed()) + }) + + Eventually(func(g Gomega) { + network := &sriovnetworkv1.SriovNetwork{} + g.Expect(k8sClient.Get(ctx, types.NamespacedName{Name: cr.Name, Namespace: testNamespace}, network)).To(Succeed()) + + readyCondition := findCondition(network.Status.Conditions, sriovnetworkv1.ConditionReady) + degradedCondition := findCondition(network.Status.Conditions, sriovnetworkv1.ConditionDegraded) + + g.Expect(readyCondition).ToNot(BeNil()) + g.Expect(degradedCondition).ToNot(BeNil()) + + // Both conditions should have the same observedGeneration + g.Expect(readyCondition.ObservedGeneration).To(Equal(degradedCondition.ObservedGeneration)) + // And it should match the current generation + g.Expect(readyCondition.ObservedGeneration).To(Equal(network.Generation)) + }, util.APITimeout, util.RetryInterval).Should(Succeed()) + }) + + It("should set Ready=True when SriovNetwork is in application namespace (same as NetworkNamespace)", func() { + appNs := "app-ns-cond" + // Create application namespace + nsObj := &corev1.Namespace{ + ObjectMeta: metav1.ObjectMeta{Name: appNs}, + } + Expect(k8sClient.Create(ctx, nsObj)).NotTo(HaveOccurred()) + DeferCleanup(func() { + Expect(k8sClient.Delete(ctx, nsObj)).NotTo(HaveOccurred()) + }) + + // Create SriovNetwork in application namespace (not operator namespace) + cr := sriovnetworkv1.SriovNetwork{ + ObjectMeta: metav1.ObjectMeta{ + Name: "app-network", + Namespace: appNs, + }, + Spec: sriovnetworkv1.SriovNetworkSpec{ + ResourceName: "resource_app", + // No NetworkNamespace specified - should use same namespace + }, + } + + err := k8sClient.Create(ctx, &cr) + Expect(err).NotTo(HaveOccurred()) + DeferCleanup(func() { + Expect(k8sClient.Delete(ctx, &cr)).To(Succeed()) + }) + + Eventually(func(g Gomega) { + network := &sriovnetworkv1.SriovNetwork{} + g.Expect(k8sClient.Get(ctx, types.NamespacedName{Name: cr.Name, Namespace: appNs}, network)).To(Succeed()) + + // Verify Ready condition is True (network in same namespace) + readyCondition := findCondition(network.Status.Conditions, sriovnetworkv1.ConditionReady) + g.Expect(readyCondition).ToNot(BeNil()) + g.Expect(readyCondition.Status).To(Equal(metav1.ConditionTrue)) + g.Expect(readyCondition.Reason).To(Equal(sriovnetworkv1.ReasonNetworkReady)) + + // Verify Degraded condition is False + degradedCondition := findCondition(network.Status.Conditions, sriovnetworkv1.ConditionDegraded) + g.Expect(degradedCondition).ToNot(BeNil()) + g.Expect(degradedCondition.Status).To(Equal(metav1.ConditionFalse)) + }, util.APITimeout, util.RetryInterval).Should(Succeed()) + + // Verify NetworkAttachmentDefinition is created in the same namespace + Eventually(func(g Gomega) { + netAttDef := &netattdefv1.NetworkAttachmentDefinition{} + g.Expect(k8sClient.Get(ctx, types.NamespacedName{Name: cr.Name, Namespace: appNs}, netAttDef)).NotTo(HaveOccurred()) + }, util.APITimeout, util.RetryInterval).Should(Succeed()) + }) + + It("should show different conditions for networks in operator vs application namespace with cross-namespace target", func() { + appNs := "app-ns-multi" + // Create application namespace + nsObj := &corev1.Namespace{ + ObjectMeta: metav1.ObjectMeta{Name: appNs}, + } + Expect(k8sClient.Create(ctx, nsObj)).NotTo(HaveOccurred()) + DeferCleanup(func() { + Expect(k8sClient.Delete(ctx, nsObj)).NotTo(HaveOccurred()) + }) + + // Network 1: In operator namespace, targeting application namespace - should succeed + crOperator := sriovnetworkv1.SriovNetwork{ + ObjectMeta: metav1.ObjectMeta{ + Name: "network-operator-ns", + Namespace: testNamespace, + }, + Spec: sriovnetworkv1.SriovNetworkSpec{ + NetworkNamespace: appNs, + ResourceName: "resource_multi_1", + }, + } + + // Network 2: In application namespace, targeting different namespace - should be degraded + // (cross-namespace targeting from non-operator namespace is not allowed) + crApp := sriovnetworkv1.SriovNetwork{ + ObjectMeta: metav1.ObjectMeta{ + Name: "network-app-ns", + Namespace: appNs, + }, + Spec: sriovnetworkv1.SriovNetworkSpec{ + NetworkNamespace: "default", // Different from its own namespace + ResourceName: "resource_multi_2", + }, + } + + Expect(k8sClient.Create(ctx, &crOperator)).NotTo(HaveOccurred()) + Expect(k8sClient.Create(ctx, &crApp)).NotTo(HaveOccurred()) + DeferCleanup(func() { + Expect(k8sClient.Delete(ctx, &crOperator)).To(Succeed()) + Expect(k8sClient.Delete(ctx, &crApp)).To(Succeed()) + }) + + // Network in operator namespace should be Ready + Eventually(func(g Gomega) { + network := &sriovnetworkv1.SriovNetwork{} + g.Expect(k8sClient.Get(ctx, types.NamespacedName{Name: crOperator.Name, Namespace: testNamespace}, network)).To(Succeed()) + + readyCondition := findCondition(network.Status.Conditions, sriovnetworkv1.ConditionReady) + g.Expect(readyCondition).ToNot(BeNil()) + g.Expect(readyCondition.Status).To(Equal(metav1.ConditionTrue)) + + degradedCondition := findCondition(network.Status.Conditions, sriovnetworkv1.ConditionDegraded) + g.Expect(degradedCondition).ToNot(BeNil()) + g.Expect(degradedCondition.Status).To(Equal(metav1.ConditionFalse)) + }, util.APITimeout, util.RetryInterval).Should(Succeed()) + + // Network in application namespace with cross-namespace target should be Degraded + Eventually(func(g Gomega) { + network := &sriovnetworkv1.SriovNetwork{} + g.Expect(k8sClient.Get(ctx, types.NamespacedName{Name: crApp.Name, Namespace: appNs}, network)).To(Succeed()) + + readyCondition := findCondition(network.Status.Conditions, sriovnetworkv1.ConditionReady) + g.Expect(readyCondition).ToNot(BeNil()) + g.Expect(readyCondition.Status).To(Equal(metav1.ConditionFalse)) + g.Expect(readyCondition.Reason).To(Equal(sriovnetworkv1.ReasonProvisioningFailed)) + + degradedCondition := findCondition(network.Status.Conditions, sriovnetworkv1.ConditionDegraded) + g.Expect(degradedCondition).ToNot(BeNil()) + g.Expect(degradedCondition.Status).To(Equal(metav1.ConditionTrue)) + g.Expect(degradedCondition.Reason).To(Equal(sriovnetworkv1.ReasonProvisioningFailed)) + g.Expect(degradedCondition.Message).To(ContainSubstring("networkNamespace cannot be set")) + }, util.APITimeout, util.RetryInterval).Should(Succeed()) + + // NAD should NOT be created in target namespace + time.Sleep(2 * time.Second) + Consistently(func(g Gomega) { + netAttDef := &netattdefv1.NetworkAttachmentDefinition{} + err := k8sClient.Get(ctx, types.NamespacedName{Name: crApp.Name, Namespace: "default"}, netAttDef) + g.Expect(errors.IsNotFound(err)).To(BeTrue()) + }, 2*time.Second, 100*time.Millisecond).Should(Succeed()) + }) + + It("should mark app network as degraded when operator network already owns the NAD with same name", func() { + targetNs := "target-ns-conflict" + nadName := "test-nad-conflict" + + // Create target namespace + nsObj := &corev1.Namespace{ + ObjectMeta: metav1.ObjectMeta{Name: targetNs}, + } + Expect(k8sClient.Create(ctx, nsObj)).NotTo(HaveOccurred()) + DeferCleanup(func() { + Expect(k8sClient.Delete(ctx, nsObj)).NotTo(HaveOccurred()) + }) + + // Network 1: In operator namespace, targeting target-ns + // Creates NAD named "test-nad-conflict" in target-ns + crOperator := sriovnetworkv1.SriovNetwork{ + ObjectMeta: metav1.ObjectMeta{ + Name: nadName, + Namespace: testNamespace, + }, + Spec: sriovnetworkv1.SriovNetworkSpec{ + NetworkNamespace: targetNs, + ResourceName: "resource_op", + }, + } + + // Network 2: In target namespace with SAME NAME + // Would try to create NAD with same name in same namespace - should be degraded + crApp := sriovnetworkv1.SriovNetwork{ + ObjectMeta: metav1.ObjectMeta{ + Name: nadName, // Same name as operator network + Namespace: targetNs, + }, + Spec: sriovnetworkv1.SriovNetworkSpec{ + ResourceName: "resource_app", + // No NetworkNamespace - uses same namespace (targetNs) + }, + } + + // Create operator network first + Expect(k8sClient.Create(ctx, &crOperator)).NotTo(HaveOccurred()) + DeferCleanup(func() { + Expect(k8sClient.Delete(ctx, &crOperator)).To(Succeed()) + }) + + // Wait for NAD to be created by operator network + Eventually(func(g Gomega) { + netAttDef := &netattdefv1.NetworkAttachmentDefinition{} + g.Expect(k8sClient.Get(ctx, types.NamespacedName{Name: nadName, Namespace: targetNs}, netAttDef)).NotTo(HaveOccurred()) + }, util.APITimeout, util.RetryInterval).Should(Succeed()) + + // Network from operator namespace should be Ready + Eventually(func(g Gomega) { + network := &sriovnetworkv1.SriovNetwork{} + g.Expect(k8sClient.Get(ctx, types.NamespacedName{Name: crOperator.Name, Namespace: testNamespace}, network)).To(Succeed()) + + readyCondition := findCondition(network.Status.Conditions, sriovnetworkv1.ConditionReady) + g.Expect(readyCondition).ToNot(BeNil()) + g.Expect(readyCondition.Status).To(Equal(metav1.ConditionTrue)) + + degradedCondition := findCondition(network.Status.Conditions, sriovnetworkv1.ConditionDegraded) + g.Expect(degradedCondition).ToNot(BeNil()) + g.Expect(degradedCondition.Status).To(Equal(metav1.ConditionFalse)) + }, util.APITimeout, util.RetryInterval).Should(Succeed()) + + // Now create app network with same name - should be degraded + Expect(k8sClient.Create(ctx, &crApp)).NotTo(HaveOccurred()) + DeferCleanup(func() { + Expect(k8sClient.Delete(ctx, &crApp)).To(Succeed()) + }) + + // Network from app namespace should be Degraded (NAD already exists and is owned by another network) + Eventually(func(g Gomega) { + network := &sriovnetworkv1.SriovNetwork{} + g.Expect(k8sClient.Get(ctx, types.NamespacedName{Name: crApp.Name, Namespace: targetNs}, network)).To(Succeed()) + + readyCondition := findCondition(network.Status.Conditions, sriovnetworkv1.ConditionReady) + g.Expect(readyCondition).ToNot(BeNil()) + g.Expect(readyCondition.Status).To(Equal(metav1.ConditionFalse)) + + degradedCondition := findCondition(network.Status.Conditions, sriovnetworkv1.ConditionDegraded) + g.Expect(degradedCondition).ToNot(BeNil()) + g.Expect(degradedCondition.Status).To(Equal(metav1.ConditionTrue)) + }, util.APITimeout, util.RetryInterval).Should(Succeed()) + + // Verify NAD still exists and is owned by operator network (not overwritten) + Eventually(func(g Gomega) { + netAttDef := &netattdefv1.NetworkAttachmentDefinition{} + g.Expect(k8sClient.Get(ctx, types.NamespacedName{Name: nadName, Namespace: targetNs}, netAttDef)).NotTo(HaveOccurred()) + // The NAD should NOT have owner reference to app network (since they're in different namespaces) + // It should still be controlled by the operator network's reconciler + g.Expect(netAttDef.GetAnnotations()["k8s.v1.cni.cncf.io/resourceName"]).To(ContainSubstring("resource_op")) + }, util.APITimeout, util.RetryInterval).Should(Succeed()) + }) + }) + Context("When the SriovNetwork namespace is not equal to the operator one", func() { BeforeAll(func() { nsBlue := &corev1.Namespace{ObjectMeta: metav1.ObjectMeta{Name: "ns-blue"}} diff --git a/controllers/suite_test.go b/controllers/suite_test.go index 1f9c5ed210..be5c369f0d 100644 --- a/controllers/suite_test.go +++ b/controllers/suite_test.go @@ -238,3 +238,12 @@ func TestAPIs(t *testing.T) { RunSpecs(t, "Controller Suite", reporterConfig) } + +func findCondition(conditions []metav1.Condition, conditionType string) *metav1.Condition { + for i := range conditions { + if conditions[i].Type == conditionType { + return &conditions[i] + } + } + return nil +} diff --git a/deployment/sriov-network-operator-chart/crds/sriovnetwork.openshift.io_ovsnetworks.yaml b/deployment/sriov-network-operator-chart/crds/sriovnetwork.openshift.io_ovsnetworks.yaml index 63da95e46c..0c87629928 100644 --- a/deployment/sriov-network-operator-chart/crds/sriovnetwork.openshift.io_ovsnetworks.yaml +++ b/deployment/sriov-network-operator-chart/crds/sriovnetwork.openshift.io_ovsnetworks.yaml @@ -14,7 +14,17 @@ spec: singular: ovsnetwork scope: Namespaced versions: - - name: v1 + - additionalPrinterColumns: + - jsonPath: .status.conditions[?(@.type=="Ready")].status + name: Ready + type: string + - jsonPath: .status.conditions[?(@.type=="Degraded")].status + name: Degraded + type: string + - jsonPath: .metadata.creationTimestamp + name: Age + type: date + name: v1 schema: openAPIV3Schema: description: OVSNetwork is the Schema for the ovsnetworks API @@ -97,6 +107,68 @@ spec: type: object status: description: OVSNetworkStatus defines the observed state of OVSNetwork + properties: + conditions: + description: Conditions represent the latest available observations + of the network's state + items: + description: Condition contains details for one aspect of the current + state of this API Resource. + properties: + lastTransitionTime: + description: |- + lastTransitionTime is the last time the condition transitioned from one status to another. + This should be when the underlying condition changed. If that is not known, then using the time when the API field changed is acceptable. + format: date-time + type: string + message: + description: |- + message is a human readable message indicating details about the transition. + This may be an empty string. + maxLength: 32768 + type: string + observedGeneration: + description: |- + observedGeneration represents the .metadata.generation that the condition was set based upon. + For instance, if .metadata.generation is currently 12, but the .status.conditions[x].observedGeneration is 9, the condition is out of date + with respect to the current state of the instance. + format: int64 + minimum: 0 + type: integer + reason: + description: |- + reason contains a programmatic identifier indicating the reason for the condition's last transition. + Producers of specific condition types may define expected values and meanings for this field, + and whether the values are considered a guaranteed API. + The value should be a CamelCase string. + This field may not be empty. + maxLength: 1024 + minLength: 1 + pattern: ^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$ + type: string + status: + description: status of the condition, one of True, False, Unknown. + enum: + - "True" + - "False" + - Unknown + type: string + type: + description: type of condition in CamelCase or in foo.example.com/CamelCase. + maxLength: 316 + pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$ + type: string + required: + - lastTransitionTime + - message + - reason + - status + - type + type: object + type: array + x-kubernetes-list-map-keys: + - type + x-kubernetes-list-type: map type: object type: object served: true diff --git a/deployment/sriov-network-operator-chart/crds/sriovnetwork.openshift.io_sriovibnetworks.yaml b/deployment/sriov-network-operator-chart/crds/sriovnetwork.openshift.io_sriovibnetworks.yaml index 93e6276485..6101be69a3 100644 --- a/deployment/sriov-network-operator-chart/crds/sriovnetwork.openshift.io_sriovibnetworks.yaml +++ b/deployment/sriov-network-operator-chart/crds/sriovnetwork.openshift.io_sriovibnetworks.yaml @@ -14,7 +14,17 @@ spec: singular: sriovibnetwork scope: Namespaced versions: - - name: v1 + - additionalPrinterColumns: + - jsonPath: .status.conditions[?(@.type=="Ready")].status + name: Ready + type: string + - jsonPath: .status.conditions[?(@.type=="Degraded")].status + name: Degraded + type: string + - jsonPath: .metadata.creationTimestamp + name: Age + type: date + name: v1 schema: openAPIV3Schema: description: SriovIBNetwork is the Schema for the sriovibnetworks API @@ -70,6 +80,68 @@ spec: type: object status: description: SriovIBNetworkStatus defines the observed state of SriovIBNetwork + properties: + conditions: + description: Conditions represent the latest available observations + of the network's state + items: + description: Condition contains details for one aspect of the current + state of this API Resource. + properties: + lastTransitionTime: + description: |- + lastTransitionTime is the last time the condition transitioned from one status to another. + This should be when the underlying condition changed. If that is not known, then using the time when the API field changed is acceptable. + format: date-time + type: string + message: + description: |- + message is a human readable message indicating details about the transition. + This may be an empty string. + maxLength: 32768 + type: string + observedGeneration: + description: |- + observedGeneration represents the .metadata.generation that the condition was set based upon. + For instance, if .metadata.generation is currently 12, but the .status.conditions[x].observedGeneration is 9, the condition is out of date + with respect to the current state of the instance. + format: int64 + minimum: 0 + type: integer + reason: + description: |- + reason contains a programmatic identifier indicating the reason for the condition's last transition. + Producers of specific condition types may define expected values and meanings for this field, + and whether the values are considered a guaranteed API. + The value should be a CamelCase string. + This field may not be empty. + maxLength: 1024 + minLength: 1 + pattern: ^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$ + type: string + status: + description: status of the condition, one of True, False, Unknown. + enum: + - "True" + - "False" + - Unknown + type: string + type: + description: type of condition in CamelCase or in foo.example.com/CamelCase. + maxLength: 316 + pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$ + type: string + required: + - lastTransitionTime + - message + - reason + - status + - type + type: object + type: array + x-kubernetes-list-map-keys: + - type + x-kubernetes-list-type: map type: object type: object served: true diff --git a/deployment/sriov-network-operator-chart/crds/sriovnetwork.openshift.io_sriovnetworks.yaml b/deployment/sriov-network-operator-chart/crds/sriovnetwork.openshift.io_sriovnetworks.yaml index 29e686b352..d712a2e74f 100644 --- a/deployment/sriov-network-operator-chart/crds/sriovnetwork.openshift.io_sriovnetworks.yaml +++ b/deployment/sriov-network-operator-chart/crds/sriovnetwork.openshift.io_sriovnetworks.yaml @@ -14,7 +14,17 @@ spec: singular: sriovnetwork scope: Namespaced versions: - - name: v1 + - additionalPrinterColumns: + - jsonPath: .status.conditions[?(@.type=="Ready")].status + name: Ready + type: string + - jsonPath: .status.conditions[?(@.type=="Degraded")].status + name: Degraded + type: string + - jsonPath: .metadata.creationTimestamp + name: Age + type: date + name: v1 schema: openAPIV3Schema: description: SriovNetwork is the Schema for the sriovnetworks API @@ -128,6 +138,68 @@ spec: type: object status: description: SriovNetworkStatus defines the observed state of SriovNetwork + properties: + conditions: + description: Conditions represent the latest available observations + of the network's state + items: + description: Condition contains details for one aspect of the current + state of this API Resource. + properties: + lastTransitionTime: + description: |- + lastTransitionTime is the last time the condition transitioned from one status to another. + This should be when the underlying condition changed. If that is not known, then using the time when the API field changed is acceptable. + format: date-time + type: string + message: + description: |- + message is a human readable message indicating details about the transition. + This may be an empty string. + maxLength: 32768 + type: string + observedGeneration: + description: |- + observedGeneration represents the .metadata.generation that the condition was set based upon. + For instance, if .metadata.generation is currently 12, but the .status.conditions[x].observedGeneration is 9, the condition is out of date + with respect to the current state of the instance. + format: int64 + minimum: 0 + type: integer + reason: + description: |- + reason contains a programmatic identifier indicating the reason for the condition's last transition. + Producers of specific condition types may define expected values and meanings for this field, + and whether the values are considered a guaranteed API. + The value should be a CamelCase string. + This field may not be empty. + maxLength: 1024 + minLength: 1 + pattern: ^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$ + type: string + status: + description: status of the condition, one of True, False, Unknown. + enum: + - "True" + - "False" + - Unknown + type: string + type: + description: type of condition in CamelCase or in foo.example.com/CamelCase. + maxLength: 316 + pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$ + type: string + required: + - lastTransitionTime + - message + - reason + - status + - type + type: object + type: array + x-kubernetes-list-map-keys: + - type + x-kubernetes-list-type: map type: object type: object served: true diff --git a/doc/design/kubernetes-conditions-integration.md b/doc/design/kubernetes-conditions-integration.md new file mode 100644 index 0000000000..5c96e18d59 --- /dev/null +++ b/doc/design/kubernetes-conditions-integration.md @@ -0,0 +1,451 @@ +--- +title: Kubernetes Conditions Integration for SR-IOV Network Operator CRDs +authors: + - SR-IOV Network Operator Team +reviewers: + - TBD +creation-date: 21-07-2025 +last-updated: 08-01-2026 +status: implemented +--- + +# Kubernetes Conditions Integration for SR-IOV Network Operator CRDs + +## Summary + +This proposal enhances the observability and operational transparency of the SR-IOV Network Operator by integrating standard Kubernetes conditions into the status of its key Custom Resource Definitions (CRDs). This enables users and automated systems to easily understand the current state, progress, and health of SR-IOV network configurations and components directly through Kubernetes API objects. + +## Motivation + +Adding Kubernetes conditions to the SR-IOV Network Operator's CRDs is crucial for several reasons: + +* **Improved Observability:** Conditions provide a standardized, machine-readable way to convey the state of a resource, including its readiness, progress, and any encountered issues. This allows for better monitoring and debugging. + +* **Enhanced User Experience:** Users can quickly ascertain the health and status of their `SriovNetwork`, `SriovIBNetwork`, `OVSNetwork`, `SriovNetworkNodeState`, `SriovOperatorConfig`, `SriovNetworkNodePolicy`, and `SriovNetworkPoolConfig` resources without needing to delve into logs or complex operator-specific status fields. + +* **Standardized API Interaction:** Aligning with Kubernetes' best practices for API object status makes the SR-IOV operator more consistent with other Kubernetes operators and native resources, simplifying integration with existing tooling (e.g., `kubectl wait`, Prometheus alerts). + +* **Automated Remediation and Orchestration:** External controllers or automation tools can reliably react to changes in resource conditions, enabling more robust and intelligent orchestration workflows and automated problem resolution. + +* **Clearer Error Reporting:** Specific conditions can indicate different types of errors (e.g., `Degraded`, `Ready`, `Progressing`), providing more granular insight into failures. + +* **Simplified Troubleshooting:** When a resource is not in the desired state, conditions can point directly to the reason, accelerating troubleshooting. + +### Use Cases + +1. **Network Resource Provisioning Status:** + - A user creates a `SriovNetwork`, `SriovIBNetwork`, or `OVSNetwork` custom resource + - Condition `Ready` is set to `True` once the network is successfully provisioned and ready for use by pods + - Condition `Degraded` is set to `True` with a reason if the network provisioning fails + +2. **Node Configuration Health:** + - The operator updates the `SriovNetworkNodeState` for a node + - Condition `Progressing` when the operator is applying changes to the node's SR-IOV configuration + - Condition `Degraded` if a node's SR-IOV configuration is incorrect + - Condition `Ready` indicating the overall readiness of the SR-IOV components on that specific node + - Drain-specific conditions (`DrainProgressing`, `DrainDegraded`, `DrainComplete`) for tracking drain operations + +3. **Operator Configuration Status:** + - An administrator modifies the `SriovOperatorConfig` + - Condition `Ready` indicates that the operator's components are running and healthy + - Condition `Degraded` if the operator itself encounters issues + +4. **Policy Configuration Management:** + - An administrator creates or updates a `SriovNetworkNodePolicy` + - Condition `Ready` indicates that the policy configuration has been successfully applied to all target nodes + - Condition `Progressing` when the policy configuration is being applied to the selected nodes + - Condition `Degraded` if any matched nodes are in a degraded state + - Status includes `matchedNodeCount` and `readyNodeCount` for aggregated visibility + +5. **Pool Configuration Management:** + - An administrator creates or updates a `SriovNetworkPoolConfig` + - Condition `Ready` indicates that the pool configuration has been successfully applied to all target nodes + - Condition `Progressing` when the pool configuration is being applied to the selected nodes + - Condition `Degraded` if the pool configuration fails to apply or conflicts with existing configurations + - Status includes `matchedNodeCount` and `readyNodeCount` for aggregated visibility + +### Goals + +* Add standard Kubernetes conditions to all major SR-IOV CRDs (`SriovNetwork`, `SriovIBNetwork`, `OVSNetwork`, `SriovNetworkNodeState`, `SriovOperatorConfig`, `SriovNetworkNodePolicy`, `SriovNetworkPoolConfig`) +* Implement consistent condition types across all CRDs where applicable +* Ensure conditions are updated in real-time as resource states change +* Maintain backward compatibility with existing status fields +* Provide comprehensive documentation and examples for condition usage +* Enable `kubectl wait` functionality for all resources +* Add aggregated status for policy and pool resources showing matched/ready node counts + +### Non-Goals + +* Modifying existing status field structures (maintaining backward compatibility) +* Adding conditions to deprecated or legacy CRDs +* Implementing custom condition types beyond standard Kubernetes patterns +* Changing existing controller reconciliation logic beyond condition updates + +## Implementation + +### Condition Types + +The following condition types are implemented across SR-IOV CRDs: + +```go +const ( + // ConditionProgressing indicates that the resource is being actively reconciled + ConditionProgressing = "Progressing" + + // ConditionDegraded indicates that the resource is not functioning as expected + ConditionDegraded = "Degraded" + + // ConditionReady indicates that the resource has reached its desired state and is fully functional + ConditionReady = "Ready" + + // Drain-specific conditions for SriovNetworkNodeState + + // ConditionDrainProgressing indicates that the node is being actively drained + ConditionDrainProgressing = "DrainProgressing" + + // ConditionDrainDegraded indicates that the drain process is not functioning as expected + ConditionDrainDegraded = "DrainDegraded" + + // ConditionDrainComplete indicates that the drain operation completed successfully + ConditionDrainComplete = "DrainComplete" +) +``` + +### Condition Reasons + +The following reasons are used across conditions: + +```go +const ( + // Reasons for Ready condition + ReasonNetworkReady = "NetworkReady" + ReasonNodeReady = "NodeConfigurationReady" + ReasonNodeDrainReady = "NodeDrainReady" + ReasonOperatorReady = "OperatorReady" + ReasonNotReady = "NotReady" + + // Reasons for Degraded condition + ReasonProvisioningFailed = "ProvisioningFailed" + ReasonConfigurationFailed = "ConfigurationFailed" + ReasonNetworkAttachmentDefNotFound = "NetworkAttachmentDefinitionNotFound" + ReasonNetworkAttachmentDefInvalid = "NetworkAttachmentDefinitionInvalid" + ReasonNamespaceNotFound = "NamespaceNotFound" + ReasonHardwareError = "HardwareError" + ReasonDriverError = "DriverError" + ReasonOperatorComponentsNotHealthy = "OperatorComponentsNotHealthy" + ReasonNotDegraded = "NotDegraded" + + // Reasons for Progressing condition + ReasonConfiguringNode = "ConfiguringNode" + ReasonApplyingConfiguration = "ApplyingConfiguration" + ReasonCreatingVFs = "CreatingVFs" + ReasonLoadingDriver = "LoadingDriver" + ReasonDrainingNode = "DrainingNode" + ReasonNotProgressing = "NotProgressing" + + // Reasons for DrainComplete condition + ReasonDrainCompleted = "DrainCompleted" + ReasonDrainNotNeeded = "DrainNotNeeded" + ReasonDrainPending = "DrainPending" + + // Reasons for DrainDegraded condition + ReasonDrainFailed = "DrainFailed" + + // Reasons for Policy/PoolConfig conditions + ReasonPolicyReady = "PolicyReady" + ReasonPolicyNotReady = "PolicyNotReady" + ReasonNoMatchingNodes = "NoMatchingNodes" + ReasonPartiallyApplied = "PartiallyApplied" + ReasonAllNodesConfigured = "AllNodesConfigured" + ReasonSomeNodesFailed = "SomeNodesFailed" + ReasonSomeNodesProgressing = "SomeNodesProgressing" +) +``` + +### API Extensions + +#### NetworkStatus (Shared by Network CRDs) + +```go +// NetworkStatus defines the common observed state for network-type CRDs +type NetworkStatus struct { + // Conditions represent the latest available observations of the network's state + // +patchMergeKey=type + // +patchStrategy=merge + // +listType=map + // +listMapKey=type + // +optional + Conditions []metav1.Condition `json:"conditions,omitempty" patchStrategy:"merge" patchMergeKey:"type"` +} +``` + +#### SriovNetwork, SriovIBNetwork, OVSNetwork + +All network CRDs embed `NetworkStatus`: + +```go +type SriovNetworkStatus struct { + NetworkStatus `json:",inline"` +} +``` + +**Conditions:** +- `Ready`: NetworkAttachmentDefinition is created and network is ready for use +- `Degraded`: NetworkAttachmentDefinition creation failed or configuration is invalid + +**kubectl output columns:** Ready, Degraded, Age + +#### SriovNetworkNodeState + +```go +type SriovNetworkNodeStateStatus struct { + Interfaces InterfaceExts `json:"interfaces,omitempty"` + Bridges Bridges `json:"bridges,omitempty"` + System System `json:"system,omitempty"` + SyncStatus string `json:"syncStatus,omitempty"` + LastSyncError string `json:"lastSyncError,omitempty"` + + // Conditions represent the latest available observations of the SriovNetworkNodeState's state + // +patchMergeKey=type + // +patchStrategy=merge + // +listType=map + // +listMapKey=type + // +optional + Conditions []metav1.Condition `json:"conditions,omitempty" patchStrategy:"merge" patchMergeKey:"type"` +} +``` + +**Conditions:** +- `Ready`: Node's SR-IOV configuration is complete and functional +- `Progressing`: Node is being configured (VF creation, driver loading, node draining, etc.) +- `Degraded`: Node configuration failed or hardware issues detected +- `DrainProgressing`: Node drain operation is in progress +- `DrainDegraded`: Node drain operation encountered errors (e.g., PDB violations) +- `DrainComplete`: Node drain operation completed successfully + +**kubectl output columns:** Sync Status, Ready, Progressing, Degraded, DrainProgress, DrainDegraded, DrainComplete, Age + +#### SriovOperatorConfig + +```go +type SriovOperatorConfigStatus struct { + Injector string `json:"injector,omitempty"` + OperatorWebhook string `json:"operatorWebhook,omitempty"` + + // Conditions represent the latest available observations of the SriovOperatorConfig's state + // +patchMergeKey=type + // +patchStrategy=merge + // +listType=map + // +listMapKey=type + // +optional + Conditions []metav1.Condition `json:"conditions,omitempty" patchStrategy:"merge" patchMergeKey:"type"` +} +``` + +**Conditions:** +- `Ready`: Operator components are running and healthy +- `Degraded`: Operator components are failing or misconfigured + +**kubectl output columns:** Ready, Progressing, Degraded, Age + +#### SriovNetworkNodePolicy + +```go +type SriovNetworkNodePolicyStatus struct { + // MatchedNodeCount is the number of nodes that match the nodeSelector for this policy + MatchedNodeCount int `json:"matchedNodeCount"` + + // ReadyNodeCount is the number of matched nodes that have successfully applied the configuration + ReadyNodeCount int `json:"readyNodeCount"` + + // Conditions represent the latest available observations of the SriovNetworkNodePolicy's state + // +patchMergeKey=type + // +patchStrategy=merge + // +listType=map + // +listMapKey=type + // +optional + Conditions []metav1.Condition `json:"conditions,omitempty" patchStrategy:"merge" patchMergeKey:"type"` +} +``` + +**Conditions:** +- `Ready`: True only when ALL matched nodes are ready +- `Progressing`: True when ANY matched node is progressing +- `Degraded`: True when ANY matched node is degraded + +**kubectl output columns:** Matched, Ready Nodes, Ready, Progressing, Degraded, Age + +#### SriovNetworkPoolConfig + +```go +type SriovNetworkPoolConfigStatus struct { + // MatchedNodeCount is the number of nodes that match the nodeSelector for this pool + MatchedNodeCount int `json:"matchedNodeCount"` + + // ReadyNodeCount is the number of matched nodes that have successfully applied the configuration + ReadyNodeCount int `json:"readyNodeCount"` + + // Conditions represent the latest available observations of the SriovNetworkPoolConfig's state + // +patchMergeKey=type + // +patchStrategy=merge + // +listType=map + // +listMapKey=type + // +optional + Conditions []metav1.Condition `json:"conditions,omitempty" patchStrategy:"merge" patchMergeKey:"type"` +} +``` + +**Conditions:** +- `Ready`: Pool configuration has been successfully applied to all target nodes +- `Progressing`: Pool configuration is being applied to selected nodes +- `Degraded`: Pool configuration failed to apply or conflicts with existing configurations + +**kubectl output columns:** Matched, Ready Nodes, Ready, Progressing, Degraded, Age + +### Implementation Details + +#### Status Patcher Package (pkg/status) + +A new `pkg/status` package provides utilities for managing CRD status updates: + +```go +// Interface defines methods for updating resource status with retry logic and event emission +type Interface interface { + // UpdateStatus updates the status of a resource with retry on conflict + UpdateStatus(ctx context.Context, obj client.Object, updateFunc func() error) error + + // PatchStatus patches the status of a resource with retry on conflict + PatchStatus(ctx context.Context, obj client.Object, patch client.Patch) error + + // UpdateStatusWithEvents updates status and emits events for condition transitions + UpdateStatusWithEvents(ctx context.Context, obj client.Object, oldConditions, newConditions []metav1.Condition, updateFunc func() error) error + + // SetCondition sets a condition with the given type, status, reason, and message + SetCondition(conditions *[]metav1.Condition, conditionType string, status metav1.ConditionStatus, reason, message string, generation int64) + + // IsConditionTrue checks if a condition exists and has status True + IsConditionTrue(conditions []metav1.Condition, conditionType string) bool + + // FindCondition returns the condition with the given type, or nil if not found + FindCondition(conditions []metav1.Condition, conditionType string) *metav1.Condition + + // HasConditionChanged checks if the new condition differs from the existing one + HasConditionChanged(conditions []metav1.Condition, conditionType string, status metav1.ConditionStatus, reason, message string) bool +} +``` + +#### Transition Detection + +The package includes transition detection for automatic event emission: + +```go +// TransitionType represents the type of condition transition +type TransitionType string + +const ( + TransitionAdded TransitionType = "Added" + TransitionChanged TransitionType = "Changed" + TransitionRemoved TransitionType = "Removed" + TransitionUnchanged TransitionType = "Unchanged" +) + +// DetectTransitions compares old and new conditions and returns a list of transitions +func DetectTransitions(oldConditions, newConditions []metav1.Condition) []Transition +``` + +#### Helper Functions + +The `api/v1/conditions.go` file provides shared helper functions: + +```go +// ConditionsEqual compares two condition slices ignoring LastTransitionTime. +// This is useful to avoid unnecessary API updates when conditions haven't actually changed. +func ConditionsEqual(a, b []metav1.Condition) bool +``` + +#### Controller Integration + +Each controller uses the status patcher to update conditions: + +**Network Controllers (SriovNetwork, SriovIBNetwork, OVSNetwork):** +- Set `Ready=True, Degraded=False` on successful NAD provisioning +- Set `Ready=False, Degraded=True` when namespace not found or provisioning fails + +**SriovOperatorConfig Controller:** +- Set `Ready=True, Degraded=False` on successful reconciliation +- Set `Ready=False, Degraded=True` on component failures + +**Config Daemon:** +- Updates `SriovNetworkNodeState` conditions during sync operations +- Sets configuration conditions based on sync status + +**Drain Controller:** +- Sets drain-specific conditions during drain operations +- Updates `DrainProgressing`, `DrainDegraded`, `DrainComplete` based on drain state + +**Status Controllers (Policy and PoolConfig):** +- Dedicated controllers aggregate conditions from matching `SriovNetworkNodeState` objects +- Watches changes to NodeStates and Nodes to update aggregated status + +### Usage Examples + +```bash +# Check network status +kubectl get sriovnetwork -o wide +NAME READY DEGRADED AGE +my-network True False 5m + +# Wait for network to be ready +kubectl wait --for=condition=Ready sriovnetwork/my-network --timeout=60s + +# Check policy status with node counts +kubectl get sriovnetworknodepolicy -o wide +NAME MATCHED READY NODES READY PROGRESSING DEGRADED AGE +policy1 3 3 True False False 10m + +# Check pool config status +kubectl get sriovnetworkpoolconfig -o wide +NAME MATCHED READY NODES READY PROGRESSING DEGRADED AGE +worker-pool 5 5 True False False 15m + +# Check node state with all conditions +kubectl get sriovnetworknodestate -o wide +NAME SYNC STATUS READY PROGRESSING DEGRADED DRAINPROGRESS DRAINDEGRADED DRAINCOMPLETE AGE +worker1 Succeeded True False False False False True 1h +``` + +### Backward Compatibility + +* Existing status fields are preserved +* Conditions are added as optional fields +* Controllers continue to update legacy status fields alongside conditions +* Client code relying on existing status fields is not affected + +### Error Handling + +* Condition updates use retry logic with conflict handling (up to 3 retries) +* Failed condition updates are logged but don't cause reconciliation failure +* Conditions are updated atomically using Patch operations to avoid race conditions + +### Upgrade & Downgrade Considerations + +#### Upgrade +* New CRD versions with condition fields are backward compatible +* Existing CR instances continue to function without conditions +* Controllers start populating conditions immediately after upgrade +* No manual intervention required from users + +#### Downgrade +* Conditions are ignored by older controller versions +* Existing status fields continue to be populated +* No data loss or functionality degradation during downgrade +* CRD structure remains compatible with older API versions + +## Testing + +The implementation includes: + +* **Unit tests** for all condition handling logic +* **Unit tests** for status patcher and transition detection +* **Integration tests** for controller condition updates +* **E2E conformance tests** validating conditions in real cluster scenarios diff --git a/main.go b/main.go index 197b7cf2ec..ce9131f38a 100644 --- a/main.go +++ b/main.go @@ -51,6 +51,7 @@ import ( "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/featuregate" snolog "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/log" "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/orchestrator" + "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/status" "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/utils" "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/vars" //+kubebuilder:scaffold:imports @@ -160,23 +161,29 @@ func main() { featureGate := featuregate.New() + // Create status patcher for network controllers (includes embedded condition manager) + globalStatusPatcher := status.NewPatcher(mgrGlobal.GetClient(), mgrGlobal.GetEventRecorder("sriov-network-operator"), mgrGlobal.GetScheme()) + if err = (&controllers.SriovNetworkReconciler{ - Client: mgrGlobal.GetClient(), - Scheme: mgrGlobal.GetScheme(), + Client: mgrGlobal.GetClient(), + Scheme: mgrGlobal.GetScheme(), + StatusPatcher: globalStatusPatcher, }).SetupWithManager(mgrGlobal); err != nil { setupLog.Error(err, "unable to create controller", "controller", "SriovNetwork") os.Exit(1) } if err = (&controllers.SriovIBNetworkReconciler{ - Client: mgrGlobal.GetClient(), - Scheme: mgrGlobal.GetScheme(), + Client: mgrGlobal.GetClient(), + Scheme: mgrGlobal.GetScheme(), + StatusPatcher: globalStatusPatcher, }).SetupWithManager(mgrGlobal); err != nil { setupLog.Error(err, "unable to create controller", "controller", "SriovIBNetwork") os.Exit(1) } if err = (&controllers.OVSNetworkReconciler{ - Client: mgrGlobal.GetClient(), - Scheme: mgrGlobal.GetScheme(), + Client: mgrGlobal.GetClient(), + Scheme: mgrGlobal.GetScheme(), + StatusPatcher: globalStatusPatcher, }).SetupWithManager(mgrGlobal); err != nil { setupLog.Error(err, "unable to create controller", "controller", "OVSNetwork") os.Exit(1) diff --git a/pkg/status/mock/mock_patcher.go b/pkg/status/mock/mock_patcher.go new file mode 100644 index 0000000000..2b93f017e0 --- /dev/null +++ b/pkg/status/mock/mock_patcher.go @@ -0,0 +1,85 @@ +// Code generated by MockGen. DO NOT EDIT. +// Source: patcher.go +// +// Generated by this command: +// +// mockgen -destination mock/mock_patcher.go -source patcher.go +// + +// Package mock_status is a generated GoMock package. +package mock_status + +import ( + context "context" + reflect "reflect" + + gomock "go.uber.org/mock/gomock" + v1 "k8s.io/apimachinery/pkg/apis/meta/v1" + client "sigs.k8s.io/controller-runtime/pkg/client" +) + +// MockInterface is a mock of Interface interface. +type MockInterface struct { + ctrl *gomock.Controller + recorder *MockInterfaceMockRecorder + isgomock struct{} +} + +// MockInterfaceMockRecorder is the mock recorder for MockInterface. +type MockInterfaceMockRecorder struct { + mock *MockInterface +} + +// NewMockInterface creates a new mock instance. +func NewMockInterface(ctrl *gomock.Controller) *MockInterface { + mock := &MockInterface{ctrl: ctrl} + mock.recorder = &MockInterfaceMockRecorder{mock} + return mock +} + +// EXPECT returns an object that allows the caller to indicate expected use. +func (m *MockInterface) EXPECT() *MockInterfaceMockRecorder { + return m.recorder +} + +// PatchStatus mocks base method. +func (m *MockInterface) PatchStatus(ctx context.Context, obj client.Object, patch client.Patch) error { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "PatchStatus", ctx, obj, patch) + ret0, _ := ret[0].(error) + return ret0 +} + +// PatchStatus indicates an expected call of PatchStatus. +func (mr *MockInterfaceMockRecorder) PatchStatus(ctx, obj, patch any) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "PatchStatus", reflect.TypeOf((*MockInterface)(nil).PatchStatus), ctx, obj, patch) +} + +// UpdateStatus mocks base method. +func (m *MockInterface) UpdateStatus(ctx context.Context, obj client.Object, updateFunc func() error) error { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "UpdateStatus", ctx, obj, updateFunc) + ret0, _ := ret[0].(error) + return ret0 +} + +// UpdateStatus indicates an expected call of UpdateStatus. +func (mr *MockInterfaceMockRecorder) UpdateStatus(ctx, obj, updateFunc any) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "UpdateStatus", reflect.TypeOf((*MockInterface)(nil).UpdateStatus), ctx, obj, updateFunc) +} + +// UpdateStatusWithEvents mocks base method. +func (m *MockInterface) UpdateStatusWithEvents(ctx context.Context, obj client.Object, oldConditions, newConditions []v1.Condition, updateFunc func() error) error { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "UpdateStatusWithEvents", ctx, obj, oldConditions, newConditions, updateFunc) + ret0, _ := ret[0].(error) + return ret0 +} + +// UpdateStatusWithEvents indicates an expected call of UpdateStatusWithEvents. +func (mr *MockInterfaceMockRecorder) UpdateStatusWithEvents(ctx, obj, oldConditions, newConditions, updateFunc any) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "UpdateStatusWithEvents", reflect.TypeOf((*MockInterface)(nil).UpdateStatusWithEvents), ctx, obj, oldConditions, newConditions, updateFunc) +} diff --git a/pkg/status/patcher.go b/pkg/status/patcher.go new file mode 100644 index 0000000000..3fb41757cb --- /dev/null +++ b/pkg/status/patcher.go @@ -0,0 +1,214 @@ +/* +Copyright 2021. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package status + +import ( + "context" + "fmt" + + "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/apimachinery/pkg/api/meta" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/client-go/tools/events" + "sigs.k8s.io/controller-runtime/pkg/client" +) + +const ( + // MaxRetries is the maximum number of retries for status updates on conflict + MaxRetries = 3 +) + +// Interface provides methods for updating resource status with retry logic and event emission +// +//go:generate ../../bin/mockgen -destination mock/mock_patcher.go -source patcher.go +type Interface interface { + // UpdateStatus updates the status of a resource with retry on conflict + UpdateStatus(ctx context.Context, obj client.Object, updateFunc func() error) error + + // PatchStatus patches the status of a resource with retry on conflict + PatchStatus(ctx context.Context, obj client.Object, patch client.Patch) error + + // UpdateStatusWithEvents updates status and emits events for condition transitions + UpdateStatusWithEvents(ctx context.Context, obj client.Object, oldConditions, newConditions []metav1.Condition, updateFunc func() error) error +} + +// Patcher is the production implementation of Interface +type Patcher struct { + client client.Client + recorder events.EventRecorder + scheme *runtime.Scheme +} + +// NewPatcher creates a new Patcher instance +func NewPatcher(client client.Client, recorder events.EventRecorder, scheme *runtime.Scheme) Interface { + return &Patcher{ + client: client, + recorder: recorder, + scheme: scheme, + } +} + +// UpdateStatus updates the status of a resource with retry on conflict +func (p *Patcher) UpdateStatus(ctx context.Context, obj client.Object, updateFunc func() error) error { + var lastErr error + + for i := 0; i < MaxRetries; i++ { + // Apply the update function + if err := updateFunc(); err != nil { + return fmt.Errorf("failed to apply status update: %w", err) + } + + // Try to update the status + if err := p.client.Status().Update(ctx, obj); err != nil { + if errors.IsConflict(err) { + // Conflict, retry after fetching latest version + lastErr = err + if err := p.client.Get(ctx, client.ObjectKeyFromObject(obj), obj); err != nil { + return fmt.Errorf("failed to fetch latest version for retry: %w", err) + } + continue + } + return fmt.Errorf("failed to update status: %w", err) + } + + // Success + return nil + } + + return fmt.Errorf("failed to update status after %d retries: %w", MaxRetries, lastErr) +} + +// PatchStatus patches the status of a resource with retry on conflict +func (p *Patcher) PatchStatus(ctx context.Context, obj client.Object, patch client.Patch) error { + var lastErr error + + for i := 0; i < MaxRetries; i++ { + if err := p.client.Status().Patch(ctx, obj, patch); err != nil { + if errors.IsConflict(err) { + // Conflict, retry after fetching latest version + lastErr = err + if err := p.client.Get(ctx, client.ObjectKeyFromObject(obj), obj); err != nil { + return fmt.Errorf("failed to fetch latest version for retry: %w", err) + } + continue + } + return fmt.Errorf("failed to patch status: %w", err) + } + + // Success + return nil + } + + return fmt.Errorf("failed to patch status after %d retries: %w", MaxRetries, lastErr) +} + +// UpdateStatusWithEvents updates status using Patch and emits events for condition transitions. +// Using Patch instead of Update ensures that only the modified fields are updated, +// preventing race conditions where concurrent updates could overwrite each other's changes. +func (p *Patcher) UpdateStatusWithEvents(ctx context.Context, obj client.Object, oldConditions, newConditions []metav1.Condition, updateFunc func() error) error { + var lastErr error + + for i := 0; i < MaxRetries; i++ { + // Create a deep copy before modifications to use as patch base + baseCopy := obj.DeepCopyObject().(client.Object) + + // Apply the update function + if err := updateFunc(); err != nil { + return fmt.Errorf("failed to apply status update: %w", err) + } + + // Use Patch instead of Update to avoid overwriting concurrent changes + if err := p.client.Status().Patch(ctx, obj, client.MergeFrom(baseCopy)); err != nil { + if errors.IsConflict(err) { + // Conflict, retry after fetching latest version + lastErr = err + if err := p.client.Get(ctx, client.ObjectKeyFromObject(obj), obj); err != nil { + return fmt.Errorf("failed to fetch latest version for retry: %w", err) + } + continue + } + return fmt.Errorf("failed to patch status: %w", err) + } + + // Success - emit events for condition transitions + if p.recorder != nil { + transitions := DetectTransitions(oldConditions, newConditions) + for _, transition := range transitions { + if transition.Type != TransitionUnchanged { + p.recorder.Eventf(obj, nil, transition.EventType(), transition.EventReason(), "StatusChange", transition.Message) + } + } + } + + return nil + } + + return fmt.Errorf("failed to patch status after %d retries: %w", MaxRetries, lastErr) +} + +// Condition helper functions + +// SetCondition sets a condition with the given type, status, reason, and message +func SetCondition(conditions *[]metav1.Condition, conditionType string, status metav1.ConditionStatus, reason, message string, generation int64) { + condition := metav1.Condition{ + Type: conditionType, + Status: status, + Reason: reason, + Message: message, + ObservedGeneration: generation, + } + meta.SetStatusCondition(conditions, condition) +} + +// IsConditionTrue checks if a condition exists and has status True +func IsConditionTrue(conditions []metav1.Condition, conditionType string) bool { + condition := meta.FindStatusCondition(conditions, conditionType) + return condition != nil && condition.Status == metav1.ConditionTrue +} + +// IsConditionFalse checks if a condition exists and has status False +func IsConditionFalse(conditions []metav1.Condition, conditionType string) bool { + condition := meta.FindStatusCondition(conditions, conditionType) + return condition != nil && condition.Status == metav1.ConditionFalse +} + +// IsConditionUnknown checks if a condition exists and has status Unknown +func IsConditionUnknown(conditions []metav1.Condition, conditionType string) bool { + condition := meta.FindStatusCondition(conditions, conditionType) + return condition != nil && condition.Status == metav1.ConditionUnknown +} + +// FindCondition returns the condition with the given type, or nil if not found +func FindCondition(conditions []metav1.Condition, conditionType string) *metav1.Condition { + return meta.FindStatusCondition(conditions, conditionType) +} + +// RemoveCondition removes the condition with the given type +func RemoveCondition(conditions *[]metav1.Condition, conditionType string) { + meta.RemoveStatusCondition(conditions, conditionType) +} + +// HasConditionChanged checks if the new condition differs from the existing one +// Returns true if the condition doesn't exist or if status, reason, or message differ +func HasConditionChanged(conditions []metav1.Condition, conditionType string, status metav1.ConditionStatus, reason, message string) bool { + existing := meta.FindStatusCondition(conditions, conditionType) + if existing == nil { + return true + } + return existing.Status != status || existing.Reason != reason || existing.Message != message +} diff --git a/pkg/status/patcher_test.go b/pkg/status/patcher_test.go new file mode 100644 index 0000000000..5518055d7e --- /dev/null +++ b/pkg/status/patcher_test.go @@ -0,0 +1,185 @@ +/* +Copyright 2021. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package status + +import ( + "testing" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" +) + +func TestStatus(t *testing.T) { + RegisterFailHandler(Fail) + RunSpecs(t, "Status Package Suite") +} + +var _ = Describe("Condition Helper Functions", func() { + Describe("SetCondition", func() { + It("should set a new condition", func() { + conditions := []metav1.Condition{} + SetCondition(&conditions, "Ready", metav1.ConditionTrue, "TestReason", "Test message", 1) + + Expect(conditions).To(HaveLen(1)) + Expect(conditions[0].Type).To(Equal("Ready")) + Expect(conditions[0].Status).To(Equal(metav1.ConditionTrue)) + Expect(conditions[0].Reason).To(Equal("TestReason")) + Expect(conditions[0].Message).To(Equal("Test message")) + Expect(conditions[0].ObservedGeneration).To(Equal(int64(1))) + }) + + It("should update an existing condition", func() { + conditions := []metav1.Condition{ + { + Type: "Ready", + Status: metav1.ConditionFalse, + Reason: "OldReason", + Message: "Old message", + ObservedGeneration: 1, + }, + } + + SetCondition(&conditions, "Ready", metav1.ConditionTrue, "NewReason", "New message", 2) + + Expect(conditions).To(HaveLen(1)) + Expect(conditions[0].Type).To(Equal("Ready")) + Expect(conditions[0].Status).To(Equal(metav1.ConditionTrue)) + Expect(conditions[0].Reason).To(Equal("NewReason")) + Expect(conditions[0].Message).To(Equal("New message")) + Expect(conditions[0].ObservedGeneration).To(Equal(int64(2))) + }) + }) + + Describe("IsConditionTrue", func() { + It("should return true when condition exists and is True", func() { + conditions := []metav1.Condition{ + {Type: "Ready", Status: metav1.ConditionTrue}, + } + Expect(IsConditionTrue(conditions, "Ready")).To(BeTrue()) + }) + + It("should return false when condition exists but is not True", func() { + conditions := []metav1.Condition{ + {Type: "Ready", Status: metav1.ConditionFalse}, + } + Expect(IsConditionTrue(conditions, "Ready")).To(BeFalse()) + }) + + It("should return false when condition does not exist", func() { + conditions := []metav1.Condition{} + Expect(IsConditionTrue(conditions, "Ready")).To(BeFalse()) + }) + }) + + Describe("IsConditionFalse", func() { + It("should return true when condition exists and is False", func() { + conditions := []metav1.Condition{ + {Type: "Ready", Status: metav1.ConditionFalse}, + } + Expect(IsConditionFalse(conditions, "Ready")).To(BeTrue()) + }) + + It("should return false when condition exists but is not False", func() { + conditions := []metav1.Condition{ + {Type: "Ready", Status: metav1.ConditionTrue}, + } + Expect(IsConditionFalse(conditions, "Ready")).To(BeFalse()) + }) + }) + + Describe("IsConditionUnknown", func() { + It("should return true when condition exists and is Unknown", func() { + conditions := []metav1.Condition{ + {Type: "Ready", Status: metav1.ConditionUnknown}, + } + Expect(IsConditionUnknown(conditions, "Ready")).To(BeTrue()) + }) + }) + + Describe("FindCondition", func() { + It("should find an existing condition", func() { + conditions := []metav1.Condition{ + {Type: "Ready", Status: metav1.ConditionTrue}, + } + condition := FindCondition(conditions, "Ready") + Expect(condition).ToNot(BeNil()) + Expect(condition.Type).To(Equal("Ready")) + }) + + It("should return nil when condition does not exist", func() { + conditions := []metav1.Condition{} + condition := FindCondition(conditions, "Ready") + Expect(condition).To(BeNil()) + }) + }) + + Describe("RemoveCondition", func() { + It("should remove an existing condition", func() { + conditions := []metav1.Condition{ + {Type: "Ready", Status: metav1.ConditionTrue}, + {Type: "Degraded", Status: metav1.ConditionFalse}, + } + RemoveCondition(&conditions, "Ready") + Expect(conditions).To(HaveLen(1)) + Expect(conditions[0].Type).To(Equal("Degraded")) + }) + + It("should do nothing when condition does not exist", func() { + conditions := []metav1.Condition{ + {Type: "Ready", Status: metav1.ConditionTrue}, + } + RemoveCondition(&conditions, "NonExistent") + Expect(conditions).To(HaveLen(1)) + }) + }) + + Describe("HasConditionChanged", func() { + It("should return true when condition does not exist", func() { + conditions := []metav1.Condition{} + Expect(HasConditionChanged(conditions, "Ready", metav1.ConditionTrue, "Reason", "Message")).To(BeTrue()) + }) + + It("should return true when status changed", func() { + conditions := []metav1.Condition{ + {Type: "Ready", Status: metav1.ConditionFalse, Reason: "Reason", Message: "Message"}, + } + Expect(HasConditionChanged(conditions, "Ready", metav1.ConditionTrue, "Reason", "Message")).To(BeTrue()) + }) + + It("should return true when reason changed", func() { + conditions := []metav1.Condition{ + {Type: "Ready", Status: metav1.ConditionTrue, Reason: "OldReason", Message: "Message"}, + } + Expect(HasConditionChanged(conditions, "Ready", metav1.ConditionTrue, "NewReason", "Message")).To(BeTrue()) + }) + + It("should return true when message changed", func() { + conditions := []metav1.Condition{ + {Type: "Ready", Status: metav1.ConditionTrue, Reason: "Reason", Message: "Old"}, + } + Expect(HasConditionChanged(conditions, "Ready", metav1.ConditionTrue, "Reason", "New")).To(BeTrue()) + }) + + It("should return false when nothing changed", func() { + conditions := []metav1.Condition{ + {Type: "Ready", Status: metav1.ConditionTrue, Reason: "Reason", Message: "Message"}, + } + Expect(HasConditionChanged(conditions, "Ready", metav1.ConditionTrue, "Reason", "Message")).To(BeFalse()) + }) + }) +}) diff --git a/pkg/status/transitions.go b/pkg/status/transitions.go new file mode 100644 index 0000000000..36dd1af272 --- /dev/null +++ b/pkg/status/transitions.go @@ -0,0 +1,121 @@ +/* +Copyright 2021. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package status + +import ( + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" +) + +// Event type constants +const ( + EventTypeNormal = "Normal" + EventTypeWarning = "Warning" +) + +// TransitionType represents the type of condition transition +type TransitionType string + +const ( + // TransitionAdded indicates a condition was added + TransitionAdded TransitionType = "Added" + // TransitionChanged indicates a condition's status changed + TransitionChanged TransitionType = "Changed" + // TransitionRemoved indicates a condition was removed + TransitionRemoved TransitionType = "Removed" + // TransitionUnchanged indicates a condition did not change + TransitionUnchanged TransitionType = "Unchanged" +) + +// Transition represents a change in a condition +type Transition struct { + Type TransitionType + ConditionType string + OldStatus metav1.ConditionStatus + NewStatus metav1.ConditionStatus + Reason string + Message string +} + +// EventType returns the Kubernetes event type for this transition +func (t *Transition) EventType() string { + // TODO: for now we put everything in normal events, in the future we can differentiate between warnings and normal events for draining and other cases + // switch t.Type { + // case TransitionAdded: + // if t.NewStatus == metav1.ConditionTrue { + // return EventTypeNormal + // } + // return EventTypeWarning + // case TransitionChanged: + // // TODO: for now we put everything in normal events, in the future we can differentiate between warnings and normal events for draining and other cases + // return EventTypeNormal + // case TransitionRemoved, TransitionUnchanged: + // return EventTypeNormal + // } + return EventTypeNormal +} + +// EventReason returns a suitable event reason for this transition +func (t *Transition) EventReason() string { + return t.ConditionType + string(t.Type) +} + +// DetectTransitions compares old and new conditions and returns a list of transitions +func DetectTransitions(oldConditions, newConditions []metav1.Condition) []Transition { + transitions := []Transition{} + + // Build map of old conditions for quick lookup + oldMap := make(map[string]metav1.Condition) + for _, cond := range oldConditions { + oldMap[cond.Type] = cond + } + + // Check for added or changed conditions + for _, newCond := range newConditions { + oldCond, exists := oldMap[newCond.Type] + if !exists { + transitions = append(transitions, Transition{ + Type: TransitionAdded, + ConditionType: newCond.Type, + NewStatus: newCond.Status, + Reason: newCond.Reason, + Message: newCond.Message, + }) + } else if oldCond.Status != newCond.Status || oldCond.Reason != newCond.Reason || oldCond.Message != newCond.Message { + transitions = append(transitions, Transition{ + Type: TransitionChanged, + ConditionType: newCond.Type, + OldStatus: oldCond.Status, + NewStatus: newCond.Status, + Reason: newCond.Reason, + Message: newCond.Message, + }) + } + // Mark as processed + delete(oldMap, newCond.Type) + } + + // Check for removed conditions + for _, oldCond := range oldMap { + transitions = append(transitions, Transition{ + Type: TransitionRemoved, + ConditionType: oldCond.Type, + OldStatus: oldCond.Status, + }) + } + + return transitions +} diff --git a/pkg/status/transitions_test.go b/pkg/status/transitions_test.go new file mode 100644 index 0000000000..6049b6acfc --- /dev/null +++ b/pkg/status/transitions_test.go @@ -0,0 +1,137 @@ +/* +Copyright 2021. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package status + +import ( + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" +) + +var _ = Describe("Transitions", func() { + Describe("DetectTransitions", func() { + It("should detect added conditions", func() { + oldConditions := []metav1.Condition{} + newConditions := []metav1.Condition{ + {Type: "Ready", Status: metav1.ConditionTrue, Reason: "TestReason", Message: "Test message"}, + } + + transitions := DetectTransitions(oldConditions, newConditions) + Expect(transitions).To(HaveLen(1)) + Expect(transitions[0].Type).To(Equal(TransitionAdded)) + Expect(transitions[0].ConditionType).To(Equal("Ready")) + Expect(transitions[0].NewStatus).To(Equal(metav1.ConditionTrue)) + }) + + It("should detect changed conditions", func() { + oldConditions := []metav1.Condition{ + {Type: "Ready", Status: metav1.ConditionFalse, Reason: "OldReason", Message: "Old message"}, + } + newConditions := []metav1.Condition{ + {Type: "Ready", Status: metav1.ConditionTrue, Reason: "NewReason", Message: "New message"}, + } + + transitions := DetectTransitions(oldConditions, newConditions) + Expect(transitions).To(HaveLen(1)) + Expect(transitions[0].Type).To(Equal(TransitionChanged)) + Expect(transitions[0].ConditionType).To(Equal("Ready")) + Expect(transitions[0].OldStatus).To(Equal(metav1.ConditionFalse)) + Expect(transitions[0].NewStatus).To(Equal(metav1.ConditionTrue)) + }) + + It("should detect removed conditions", func() { + oldConditions := []metav1.Condition{ + {Type: "Ready", Status: metav1.ConditionTrue, Reason: "TestReason", Message: "Test message"}, + } + newConditions := []metav1.Condition{} + + transitions := DetectTransitions(oldConditions, newConditions) + Expect(transitions).To(HaveLen(1)) + Expect(transitions[0].Type).To(Equal(TransitionRemoved)) + Expect(transitions[0].ConditionType).To(Equal("Ready")) + }) + + It("should not detect unchanged conditions", func() { + conditions := []metav1.Condition{ + {Type: "Ready", Status: metav1.ConditionTrue, Reason: "TestReason", Message: "Test message"}, + } + + transitions := DetectTransitions(conditions, conditions) + Expect(transitions).To(BeEmpty()) + }) + + It("should detect multiple transitions", func() { + oldConditions := []metav1.Condition{ + {Type: "Ready", Status: metav1.ConditionFalse, Reason: "OldReason", Message: "Old"}, + {Type: "Degraded", Status: metav1.ConditionTrue, Reason: "Error", Message: "Error message"}, + } + newConditions := []metav1.Condition{ + {Type: "Ready", Status: metav1.ConditionTrue, Reason: "NewReason", Message: "New"}, + {Type: "Progressing", Status: metav1.ConditionTrue, Reason: "Applying", Message: "Applying changes"}, + } + + transitions := DetectTransitions(oldConditions, newConditions) + Expect(transitions).To(HaveLen(3)) // Ready changed, Progressing added, Degraded removed + }) + }) + + Describe("Transition EventType", func() { + It("should return Normal for new True condition", func() { + transition := Transition{ + Type: TransitionAdded, + NewStatus: metav1.ConditionTrue, + } + Expect(transition.EventType()).To(Equal("Normal")) + }) + + It("should return Normal for new False condition", func() { + transition := Transition{ + Type: TransitionAdded, + NewStatus: metav1.ConditionFalse, + } + Expect(transition.EventType()).To(Equal("Normal")) + }) + + It("should return Normal for transition to True", func() { + transition := Transition{ + Type: TransitionChanged, + OldStatus: metav1.ConditionFalse, + NewStatus: metav1.ConditionTrue, + } + Expect(transition.EventType()).To(Equal("Normal")) + }) + + It("should return Normal for transition from True to False", func() { + transition := Transition{ + Type: TransitionChanged, + OldStatus: metav1.ConditionTrue, + NewStatus: metav1.ConditionFalse, + } + Expect(transition.EventType()).To(Equal("Normal")) + }) + }) + + Describe("Transition EventReason", func() { + It("should combine condition type and transition type", func() { + transition := Transition{ + Type: TransitionChanged, + ConditionType: "Ready", + } + Expect(transition.EventReason()).To(Equal("ReadyChanged")) + }) + }) +}) diff --git a/test/conformance/tests/fixtures.go b/test/conformance/tests/fixtures.go index c14769bbe2..881314795d 100644 --- a/test/conformance/tests/fixtures.go +++ b/test/conformance/tests/fixtures.go @@ -1,9 +1,18 @@ package tests import ( + "context" + "fmt" + "time" + . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" + "k8s.io/apimachinery/pkg/api/meta" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + runtimeclient "sigs.k8s.io/controller-runtime/pkg/client" + + sriovv1 "github.com/k8snetworkplumbingwg/sriov-network-operator/api/v1" "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/consts" "github.com/k8snetworkplumbingwg/sriov-network-operator/test/util/clean" "github.com/k8snetworkplumbingwg/sriov-network-operator/test/util/cluster" @@ -67,3 +76,29 @@ var _ = AfterSuite(func() { err := clean.All() Expect(err).NotTo(HaveOccurred()) }) + +// assertCondition verifies that a SR-IOV resource has the expected condition status. +// Supported object types: *sriovv1.SriovNetwork +func assertCondition(obj runtimeclient.Object, conditionType string, expectedStatus metav1.ConditionStatus) { + name := obj.GetName() + namespace := obj.GetNamespace() + + EventuallyWithOffset(1, func(g Gomega) { + err := clients.Get(context.Background(), + runtimeclient.ObjectKey{Name: name, Namespace: namespace}, obj) + g.Expect(err).ToNot(HaveOccurred()) + + var conditions []metav1.Condition + switch o := obj.(type) { + case *sriovv1.SriovNetwork: + conditions = o.Status.Conditions + default: + g.Expect(fmt.Errorf("unsupported object type %T", obj)).ToNot(HaveOccurred()) + } + + cond := meta.FindStatusCondition(conditions, conditionType) + g.Expect(cond).ToNot(BeNil(), "Condition %s not found on %T %s/%s", conditionType, obj, namespace, name) + g.Expect(cond.Status).To(Equal(expectedStatus), + "Expected condition %s to be %s but got %s", conditionType, expectedStatus, cond.Status) + }, 2*time.Minute, time.Second).Should(Succeed()) +} diff --git a/test/conformance/tests/test_sriov_operator.go b/test/conformance/tests/test_sriov_operator.go index d814c7e11d..1dcd224322 100644 --- a/test/conformance/tests/test_sriov_operator.go +++ b/test/conformance/tests/test_sriov_operator.go @@ -141,6 +141,10 @@ var _ = Describe("[sriov] operator", Ordered, func() { return clients.Get(context.Background(), runtimeclient.ObjectKey{Name: "test-apivolnetwork", Namespace: namespaces.Test}, netAttDef) }, (10+snoTimeoutMultiplier*110)*time.Second, 1*time.Second).ShouldNot(HaveOccurred()) + By("Verifying SriovNetwork conditions are set correctly") + assertCondition(&sriovv1.SriovNetwork{ObjectMeta: metav1.ObjectMeta{Name: sriovNetwork.Name, Namespace: operatorNamespace}}, sriovv1.ConditionReady, metav1.ConditionTrue) + assertCondition(&sriovv1.SriovNetwork{ObjectMeta: metav1.ObjectMeta{Name: sriovNetwork.Name, Namespace: operatorNamespace}}, sriovv1.ConditionDegraded, metav1.ConditionFalse) + podDefinition := pod.DefineWithNetworks([]string{sriovNetwork.Name}) created, err := clients.Pods(namespaces.Test).Create(context.Background(), podDefinition, metav1.CreateOptions{}) Expect(err).ToNot(HaveOccurred()) @@ -194,6 +198,10 @@ var _ = Describe("[sriov] operator", Ordered, func() { return clients.Get(context.Background(), runtimeclient.ObjectKey{Name: "test-apivolnetwork", Namespace: namespaces.Test}, netAttDef) }, (10+snoTimeoutMultiplier*110)*time.Second, 1*time.Second).ShouldNot(HaveOccurred()) + By("Verifying SriovNetwork conditions are set correctly") + assertCondition(&sriovv1.SriovNetwork{ObjectMeta: metav1.ObjectMeta{Name: sriovNetwork.Name, Namespace: operatorNamespace}}, sriovv1.ConditionReady, metav1.ConditionTrue) + assertCondition(&sriovv1.SriovNetwork{ObjectMeta: metav1.ObjectMeta{Name: sriovNetwork.Name, Namespace: operatorNamespace}}, sriovv1.ConditionDegraded, metav1.ConditionFalse) + podDefinition := pod.DefineWithNetworks([]string{sriovNetwork.Name}) podDefinition.ObjectMeta.Labels = map[string]string{"anyname": "anyvalue"} created, err := clients.Pods(namespaces.Test).Create(context.Background(), podDefinition, metav1.CreateOptions{}) @@ -323,6 +331,10 @@ var _ = Describe("[sriov] operator", Ordered, func() { validateNetworkFields(copyObj, spoofChkStatusValidation) + By("Verifying SriovNetwork conditions are set correctly") + assertCondition(&sriovv1.SriovNetwork{ObjectMeta: metav1.ObjectMeta{Name: copyObj.Name, Namespace: operatorNamespace}}, sriovv1.ConditionReady, metav1.ConditionTrue) + assertCondition(&sriovv1.SriovNetwork{ObjectMeta: metav1.ObjectMeta{Name: copyObj.Name, Namespace: operatorNamespace}}, sriovv1.ConditionDegraded, metav1.ConditionFalse) + By("removing sriov network") err = clients.Delete(context.Background(), sriovNetwork) Expect(err).ToNot(HaveOccurred()) @@ -366,6 +378,10 @@ var _ = Describe("[sriov] operator", Ordered, func() { validateNetworkFields(copyObj, trustChkStatusValidation) + By("Verifying SriovNetwork conditions are set correctly") + assertCondition(&sriovv1.SriovNetwork{ObjectMeta: metav1.ObjectMeta{Name: copyObj.Name, Namespace: operatorNamespace}}, sriovv1.ConditionReady, metav1.ConditionTrue) + assertCondition(&sriovv1.SriovNetwork{ObjectMeta: metav1.ObjectMeta{Name: copyObj.Name, Namespace: operatorNamespace}}, sriovv1.ConditionDegraded, metav1.ConditionFalse) + By("removing sriov network") err = clients.Delete(context.Background(), sriovNetwork) Expect(err).ToNot(HaveOccurred()) @@ -583,6 +599,10 @@ var _ = Describe("[sriov] operator", Ordered, func() { Expect(err).ToNot(HaveOccurred()) waitForNetAttachDef(sriovNetworkName, ns1) + By("Verifying SriovNetwork conditions are set correctly") + assertCondition(&sriovv1.SriovNetwork{ObjectMeta: metav1.ObjectMeta{Name: sriovNetworkName, Namespace: operatorNamespace}}, sriovv1.ConditionReady, metav1.ConditionTrue) + assertCondition(&sriovv1.SriovNetwork{ObjectMeta: metav1.ObjectMeta{Name: sriovNetworkName, Namespace: operatorNamespace}}, sriovv1.ConditionDegraded, metav1.ConditionFalse) + srNetwork := &sriovv1.SriovNetwork{} err = clients.Get(context.Background(), runtimeclient.ObjectKey{Namespace: operatorNamespace, Name: sriovNetworkName}, srNetwork) Expect(err).ToNot(HaveOccurred())