diff --git a/charts/operator/crds/monitoring.googleapis.com_operatorconfigs.yaml b/charts/operator/crds/monitoring.googleapis.com_operatorconfigs.yaml index b4ab00e104..8cb76ce906 100644 --- a/charts/operator/crds/monitoring.googleapis.com_operatorconfigs.yaml +++ b/charts/operator/crds/monitoring.googleapis.com_operatorconfigs.yaml @@ -512,9 +512,52 @@ spec: type: boolean type: object type: object + status: + description: Status holds the status of the OperatorConfig. + properties: + conditions: + description: Represents the latest available observations of a podmonitor's + current state. + items: + description: MonitoringCondition describes the condition of a PodMonitoring. + properties: + lastTransitionTime: + description: Last time the condition transitioned from one status + to another. + format: date-time + type: string + lastUpdateTime: + description: The last time this condition was updated. + format: date-time + type: string + message: + description: A human-readable message indicating details about + the transition. + type: string + reason: + description: The reason for the condition's last transition. + type: string + status: + description: Status of the condition, one of True, False, Unknown. + type: string + type: + description: MonitoringConditionType is the type of MonitoringCondition. + type: string + required: + - status + - type + type: object + type: array + observedGeneration: + description: The generation observed by the controller. + format: int64 + type: integer + type: object type: object served: true storage: true + subresources: + status: {} - deprecated: true name: v1alpha1 schema: diff --git a/doc/api.md b/doc/api.md index 54a94bbb60..105140d16c 100644 --- a/doc/api.md +++ b/doc/api.md @@ -66,6 +66,8 @@ Resource Types:
  • OperatorConfig
  • +OperatorConfigStatus +
  • OperatorConfigValidator
  • OperatorFeatures @@ -948,9 +950,11 @@ CompressionType

    "gzip"

    - +

    CompressionGzip indicates that gzip compression should be used.

    +

    "none"

    - +

    CompressionNone indicates that no compression should be used.

    +

    @@ -1462,7 +1466,10 @@ string Description -

    "ConfigurationCreateSuccess"

    +

    "CollectorDaemonSetExists"

    +

    CollectorDaemonSetExists indicates whether the collector DaemonSet exists.

    + +

    "ConfigurationCreateSuccess"

    ConfigurationCreateSuccess indicates that the config generated from the monitoring resource was created successfully.

    @@ -1473,7 +1480,7 @@ monitoring resource was created successfully.

    -(Appears in: ClusterNodeMonitoring, PodMonitoringStatus, RulesStatus) +(Appears in: ClusterNodeMonitoring, OperatorConfigStatus, PodMonitoringStatus, RulesStatus)

    MonitoringStatus holds status information of a monitoring resource.

    @@ -1734,6 +1741,54 @@ ScalingSpec

    Scaling contains configuration options for scaling GMP.

    + + +status
    + + +OperatorConfigStatus + + + + +

    Status holds the status of the OperatorConfig.

    + + + + +

    +OperatorConfigStatus + +

    +

    +(Appears in: OperatorConfig) +

    +
    +

    OperatorConfigStatus holds status information of the OperatorConfig.

    +
    + + + + + + + + + + + +
    FieldDescription
    +MonitoringStatus
    + + +MonitoringStatus + + +
    +

    +(Members of MonitoringStatus are embedded into this type.) +

    +

    diff --git a/manifests/setup.yaml b/manifests/setup.yaml index d2ca1405a9..2c4ed19353 100644 --- a/manifests/setup.yaml +++ b/manifests/setup.yaml @@ -2318,9 +2318,49 @@ spec: type: boolean type: object type: object + status: + description: Status holds the status of the OperatorConfig. + properties: + conditions: + description: Represents the latest available observations of a podmonitor's current state. + items: + description: MonitoringCondition describes the condition of a PodMonitoring. + properties: + lastTransitionTime: + description: Last time the condition transitioned from one status to another. + format: date-time + type: string + lastUpdateTime: + description: The last time this condition was updated. + format: date-time + type: string + message: + description: A human-readable message indicating details about the transition. + type: string + reason: + description: The reason for the condition's last transition. + type: string + status: + description: Status of the condition, one of True, False, Unknown. + type: string + type: + description: MonitoringConditionType is the type of MonitoringCondition. + type: string + required: + - status + - type + type: object + type: array + observedGeneration: + description: The generation observed by the controller. + format: int64 + type: integer + type: object type: object served: true storage: true + subresources: + status: {} - deprecated: true name: v1alpha1 schema: diff --git a/pkg/operator/apis/monitoring/v1/monitoring_types.go b/pkg/operator/apis/monitoring/v1/monitoring_types.go index 82e0e7645b..1205a2eb41 100644 --- a/pkg/operator/apis/monitoring/v1/monitoring_types.go +++ b/pkg/operator/apis/monitoring/v1/monitoring_types.go @@ -35,6 +35,8 @@ const ( // ConfigurationCreateSuccess indicates that the config generated from the // monitoring resource was created successfully. ConfigurationCreateSuccess MonitoringConditionType = "ConfigurationCreateSuccess" + // CollectorDaemonSetExists indicates whether the collector DaemonSet exists. + CollectorDaemonSetExists MonitoringConditionType = "CollectorDaemonSetExists" ) // MonitoringCondition describes the condition of a PodMonitoring. @@ -68,6 +70,7 @@ func NewDefaultConditions(now metav1.Time) []MonitoringCondition { } } +// IsValid returns true if the condition has a valid type and status. func (cond *MonitoringCondition) IsValid() bool { return cond.Type != "" && cond.Status != "" } @@ -107,7 +110,7 @@ func (status *MonitoringStatus) SetMonitoringCondition(gen int64, now metav1.Tim cond.LastUpdateTime = now // Check if the condition results in a transition of status state. - if old := conds[cond.Type]; old.Status == cond.Status { + if old := conds[cond.Type]; old != nil && old.Status == cond.Status { cond.LastTransitionTime = old.LastTransitionTime } else { cond.LastTransitionTime = cond.LastUpdateTime diff --git a/pkg/operator/apis/monitoring/v1/operator_types.go b/pkg/operator/apis/monitoring/v1/operator_types.go index 76ddbcaedf..cbdaf5dcff 100644 --- a/pkg/operator/apis/monitoring/v1/operator_types.go +++ b/pkg/operator/apis/monitoring/v1/operator_types.go @@ -28,6 +28,7 @@ import ( // +genclient // +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object // +kubebuilder:storageversion +// +kubebuilder:subresource:status type OperatorConfig struct { metav1.TypeMeta `json:",inline"` metav1.ObjectMeta `json:"metadata,omitempty"` @@ -47,6 +48,13 @@ type OperatorConfig struct { Features OperatorFeatures `json:"features,omitempty"` // Scaling contains configuration options for scaling GMP. Scaling ScalingSpec `json:"scaling,omitempty"` + // Status holds the status of the OperatorConfig. + Status OperatorConfigStatus `json:"status,omitempty"` +} + +// GetMonitoringStatus returns the status of the OperatorConfig. +func (oc *OperatorConfig) GetMonitoringStatus() *MonitoringStatus { + return &oc.Status.MonitoringStatus } func (oc *OperatorConfig) Validate() error { @@ -213,8 +221,12 @@ type TargetStatusSpec struct { // +kubebuilder:validation:Enum=none;gzip type CompressionType string -const CompressionNone CompressionType = "none" -const CompressionGzip CompressionType = "gzip" +const ( + // CompressionNone indicates that no compression should be used. + CompressionNone CompressionType = "none" + // CompressionGzip indicates that gzip compression should be used. + CompressionGzip CompressionType = "gzip" +) // KubeletScraping allows enabling scraping of the Kubelets' metric endpoints. type KubeletScraping struct { @@ -225,6 +237,11 @@ type KubeletScraping struct { TLSInsecureSkipVerify bool `json:"tlsInsecureSkipVerify,omitempty"` } +// OperatorConfigStatus holds status information of the OperatorConfig. +type OperatorConfigStatus struct { + MonitoringStatus `json:",inline"` +} + // ExportFilters provides mechanisms to filter the scraped data that's sent to GMP. type ExportFilters struct { // A list of Prometheus time series matchers. Every time series must match at least one diff --git a/pkg/operator/apis/monitoring/v1/zz_generated.deepcopy.go b/pkg/operator/apis/monitoring/v1/zz_generated.deepcopy.go index eda798b774..f18d08c904 100644 --- a/pkg/operator/apis/monitoring/v1/zz_generated.deepcopy.go +++ b/pkg/operator/apis/monitoring/v1/zz_generated.deepcopy.go @@ -768,6 +768,7 @@ func (in *OperatorConfig) DeepCopyInto(out *OperatorConfig) { } out.Features = in.Features out.Scaling = in.Scaling + in.Status.DeepCopyInto(&out.Status) return } @@ -822,6 +823,23 @@ func (in *OperatorConfigList) DeepCopyObject() runtime.Object { return nil } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *OperatorConfigStatus) DeepCopyInto(out *OperatorConfigStatus) { + *out = *in + in.MonitoringStatus.DeepCopyInto(&out.MonitoringStatus) + return +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new OperatorConfigStatus. +func (in *OperatorConfigStatus) DeepCopy() *OperatorConfigStatus { + if in == nil { + return nil + } + out := new(OperatorConfigStatus) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *OperatorConfigValidator) DeepCopyInto(out *OperatorConfigValidator) { *out = *in diff --git a/pkg/operator/collection.go b/pkg/operator/collection.go index 1f7d190e9e..93076de691 100644 --- a/pkg/operator/collection.go +++ b/pkg/operator/collection.go @@ -77,7 +77,10 @@ func setupCollectionControllers(op *Operator) error { // at least once initially. For( &monitoringv1.OperatorConfig{}, - builder.WithPredicates(objFilterOperatorConfig), + builder.WithPredicates( + objFilterOperatorConfig, + predicate.GenerationChangedPredicate{}, + ), ). // Any update to a PodMonitoring requires regenerating the config. Watches( @@ -163,6 +166,7 @@ func (r *collectionReconciler) Reconcile(ctx context.Context, req reconcile.Requ // Fetch OperatorConfig if it exists. if err := r.client.Get(ctx, req.NamespacedName, &config); apierrors.IsNotFound(err) { logger.Info("no operatorconfig created yet") + return reconcile.Result{}, nil } else if err != nil { return reconcile.Result{}, fmt.Errorf("get operatorconfig for incoming: %q: %w", req.String(), err) } @@ -171,10 +175,13 @@ func (r *collectionReconciler) Reconcile(ctx context.Context, req reconcile.Requ return reconcile.Result{}, fmt.Errorf("ensure collector secrets: %w", err) } // Deploy Prometheus collector as a node agent. - if err := r.ensureCollectorDaemonSet(ctx); err != nil { + if changed, err := r.ensureCollectorDaemonSet(ctx, &config); err != nil { return reconcile.Result{}, fmt.Errorf("ensure collector daemon set: %w", err) + } else if changed { + if err := patchMonitoringStatus(ctx, r.client, &config, config.GetMonitoringStatus()); err != nil { + return reconcile.Result{}, fmt.Errorf("patch operatorconfig status: %w", err) + } } - if err := r.ensureCollectorConfig(ctx, &config.Collection, config.Features.Config.Compression, config.Exports); err != nil { return reconcile.Result{}, fmt.Errorf("ensure collector config: %w", err) } @@ -216,18 +223,36 @@ func (r *collectionReconciler) ensureCollectorSecrets(ctx context.Context, spec } // ensureCollectorDaemonSet populates the collector DaemonSet with operator-provided values. -func (r *collectionReconciler) ensureCollectorDaemonSet(ctx context.Context) error { +func (r *collectionReconciler) ensureCollectorDaemonSet(ctx context.Context, config *monitoringv1.OperatorConfig) (bool, error) { logger, _ := logr.FromContext(ctx) - var ds appsv1.DaemonSet - err := r.client.Get(ctx, client.ObjectKey{Namespace: r.opts.OperatorNamespace, Name: NameCollector}, &ds) + // Build the DaemonSet. + // For now, we only ensure the DaemonSet exists. + // In the future, we might want to reconcile the DaemonSet spec as well. + ds := &appsv1.DaemonSet{ + ObjectMeta: metav1.ObjectMeta{ + Name: NameCollector, + Namespace: r.opts.OperatorNamespace, + }, + } + + err := r.client.Get(ctx, client.ObjectKeyFromObject(ds), ds) // Some users deliberately not want to run the collectors. Only emit a warning but don't cause // retries as this logic gets re-triggered anyway if the DaemonSet is created later. + cond := &monitoringv1.MonitoringCondition{ + Type: monitoringv1.CollectorDaemonSetExists, + Status: corev1.ConditionUnknown, + } if apierrors.IsNotFound(err) { logger.Error(err, "collector DaemonSet does not exist") - return nil - } - return err + cond.Status = corev1.ConditionFalse + cond.Reason = "DaemonSetMissing" + cond.Message = "Collector DaemonSet does not exist." + err = nil + } else if err == nil { + cond.Status = corev1.ConditionTrue + } + return config.Status.SetMonitoringCondition(config.GetGeneration(), metav1.Now(), cond), err } func gzipData(data []byte) ([]byte, error) { diff --git a/pkg/operator/collection_test.go b/pkg/operator/collection_test.go index fc7f331b9f..b48a6639ad 100644 --- a/pkg/operator/collection_test.go +++ b/pkg/operator/collection_test.go @@ -60,7 +60,8 @@ func newFakeClientBuilder() *fake.ClientBuilder { WithStatusSubresource(&monitoringv1.ClusterNodeMonitoring{}). WithStatusSubresource(&monitoringv1.Rules{}). WithStatusSubresource(&monitoringv1.ClusterRules{}). - WithStatusSubresource(&monitoringv1.GlobalRules{}) + WithStatusSubresource(&monitoringv1.GlobalRules{}). + WithStatusSubresource(&monitoringv1.OperatorConfig{}) } func TestCollectionReconcile(t *testing.T) { @@ -500,6 +501,97 @@ func TestCollectionReconcile(t *testing.T) { } } +func TestCollectionReconcile_OperatorConfigStatus(t *testing.T) { + testCases := []struct { + desc string + daemonSet *appsv1.DaemonSet + expectedStatus corev1.ConditionStatus + expectedReason string + }{ + { + desc: "daemonset missing", + daemonSet: nil, + expectedStatus: corev1.ConditionFalse, + expectedReason: "DaemonSetMissing", + }, + { + desc: "daemonset exists", + daemonSet: &appsv1.DaemonSet{ + ObjectMeta: metav1.ObjectMeta{ + Name: NameCollector, + Namespace: "gmp-system", + }, + }, + expectedStatus: corev1.ConditionTrue, + expectedReason: "", + }, + } + + for _, tc := range testCases { + t.Run(tc.desc, func(t *testing.T) { + logger := testr.New(t) + ctx := logr.NewContext(t.Context(), logger) + opts := Options{ + ProjectID: "test-proj", + Location: "test-loc", + Cluster: "test-cluster", + OperatorNamespace: "gmp-system", + PublicNamespace: "gmp-public", + } + + objs := []client.Object{ + &monitoringv1.OperatorConfig{ + ObjectMeta: metav1.ObjectMeta{ + Name: NameOperatorConfig, + Namespace: opts.PublicNamespace, + }, + }, + } + if tc.daemonSet != nil { + objs = append(objs, tc.daemonSet) + } + + kubeClient := newFakeClientBuilder().WithObjects(objs...).Build() + r := newCollectionReconciler(kubeClient, opts) + + _, err := r.Reconcile(ctx, reconcile.Request{ + NamespacedName: types.NamespacedName{ + Namespace: opts.PublicNamespace, + Name: NameOperatorConfig, + }, + }) + if err != nil { + t.Fatalf("Reconcile failed: %v", err) + } + + var config monitoringv1.OperatorConfig + if err := kubeClient.Get(ctx, types.NamespacedName{Namespace: opts.PublicNamespace, Name: NameOperatorConfig}, &config); err != nil { + t.Fatalf("Get OperatorConfig failed: %v", err) + } + + cond := getCondition(config.Status.Conditions, monitoringv1.CollectorDaemonSetExists) + if cond == nil { + t.Fatal("CollectorDaemonSetExists condition not found") + } + if cond.Status != tc.expectedStatus { + t.Errorf("expected status %v, got %v", tc.expectedStatus, cond.Status) + } + if tc.expectedReason != "" && cond.Reason != tc.expectedReason { + t.Errorf("expected reason %v, got %v", tc.expectedReason, cond.Reason) + } + }) + } +} + +func getCondition(conditions []monitoringv1.MonitoringCondition, t monitoringv1.MonitoringConditionType) *monitoringv1.MonitoringCondition { + for _, c := range conditions { + if c.Type == t { + return &c + } + } + return nil +} + func TestSetConfigMapData(t *testing.T) { const data = "§psdmopnwepg30t-3ivp msdlc\n\r`1-k`23dvpdmfpdfgfn-p"