diff --git a/cmd/hyperconverged-cluster-operator/main.go b/cmd/hyperconverged-cluster-operator/main.go index 669d7113ce..9f551b284e 100644 --- a/cmd/hyperconverged-cluster-operator/main.go +++ b/cmd/hyperconverged-cluster-operator/main.go @@ -7,6 +7,7 @@ import ( "maps" "os" + "github.com/go-logr/logr" netattdefv1 "github.com/k8snetworkplumbingwg/network-attachment-definition-client/pkg/apis/k8s.cni.cncf.io/v1" openshiftconfigv1 "github.com/openshift/api/config/v1" consolev1 "github.com/openshift/api/console/v1" @@ -143,6 +144,7 @@ func main() { // Detect OpenShift version ctx := context.Background() + ctx = logr.NewContext(ctx, logger) err = cmdcommon.ClusterInitializations(ctx, apiClient, scheme, logger) cmdHelper.ExitOnError(err, "Cannot detect cluster type") @@ -153,6 +155,7 @@ func main() { // Determine Perses availability before creating the manager so we can shape the cache accordingly persesAvailable := hcoutil.IsPersesAvailable(ctx, apiClient) + logger.Info("Perses CRD availability at startup", "available", persesAvailable) // Create a new Cmd to provide shared dependencies and start components mgr, err := manager.New(cfg, getManagerOptions(operatorNamespace, needLeaderElection, ci, scheme, persesAvailable)) @@ -237,12 +240,9 @@ func main() { logger.Error(err, "unable to create controller", "controller", "Observability") os.Exit(1) } - // Register Perses controller only if CRDs are available; otherwise avoid watching unknown types. - if persesAvailable { - if err = perses.SetupPersesWithManager(mgr, ownresources.GetDeploymentRef()); err != nil { - logger.Error(err, "unable to create controller", "controller", "ObservabilityPerses") - os.Exit(1) - } + if err = perses.SetupPersesWithManager(ctx, mgr, ownresources.GetDeploymentRef()); err != nil { + logger.Error(err, "unable to create controller", "controller", "ObservabilityPerses") + os.Exit(1) } } diff --git a/controllers/crd/crd_controller.go b/controllers/crd/crd_controller.go index 2b28d81c46..03928976a9 100644 --- a/controllers/crd/crd_controller.go +++ b/controllers/crd/crd_controller.go @@ -93,7 +93,7 @@ func (r *ReconcileCRD) operatorRestart() { // Reconcile refreshes KubeDesheduler view on ClusterInfo singleton func (r *ReconcileCRD) Reconcile(ctx context.Context, req reconcile.Request) (reconcile.Result, error) { - log.Info("Triggered by a CRD") + log.Info("Triggered by a CRD", "CRD", req.Name) if !hcoutil.GetClusterInfo().IsDeschedulerAvailable() { if hcoutil.GetClusterInfo().IsDeschedulerCRDDeployed(ctx, r.client) { log.Info("KubeDescheduler CRD got deployed, restarting the operator to reconfigure the operator for the new kind") @@ -103,10 +103,14 @@ func (r *ReconcileCRD) Reconcile(ctx context.Context, req reconcile.Request) (re } // If Perses CRDs became available after boot, restart once to register Perses controller and cache the new GVKs. - if !r.persesAvailableOnBoot && hcoutil.IsPersesAvailable(ctx, r.client) { - log.Info("Perses CRDs detected, restarting the operator to register the Perses controller") - r.eventEmitter.EmitEvent(nil, corev1.EventTypeNormal, "Perses CRDs detected", "Restarting the operator to register the Perses controller") - r.operatorRestart() + if !r.persesAvailableOnBoot { + available := hcoutil.IsPersesAvailable(ctx, r.client) + log.Info("Perses CRDs availability after boot", "available", available, "CRD", req.Name) + if available { + log.Info("Perses CRDs detected, restarting the operator to register the Perses controller") + r.eventEmitter.EmitEvent(nil, corev1.EventTypeNormal, "Perses CRDs detected", "Restarting the operator to register the Perses controller") + r.operatorRestart() + } } return reconcile.Result{}, nil diff --git a/controllers/perses/perses_controller.go b/controllers/perses/perses_controller.go index 06eeeb9ac1..b268b57097 100644 --- a/controllers/perses/perses_controller.go +++ b/controllers/perses/perses_controller.go @@ -82,14 +82,16 @@ func (r *PersesReconciler) Reconcile(ctx context.Context, req reconcile.Request) return reconcile.Result{}, err } -func SetupPersesWithManager(mgr manager.Manager, ownerRef metav1.OwnerReference) error { +func SetupPersesWithManager(ctx context.Context, mgr manager.Manager, ownerRef metav1.OwnerReference) error { persesLog.Info("Setting up Perses controller") // Skip registration cleanly when Perses CRDs are not installed (e.g., unit/CI envs) - if !checkPersesAvailable(context.Background(), mgr.GetClient()) { + available := checkPersesAvailable(ctx, mgr.GetClient()) + if !available { persesLog.Info("Perses CRDs not found; skipping Perses controller registration") return nil } + persesLog.Info("Perses CRDs detected; registering Perses controller") namespace := hcoutil.GetOperatorNamespaceFromEnv() dashboards, err := initDashboards(namespace, persesLog) diff --git a/controllers/perses/perses_test.go b/controllers/perses/perses_test.go index afd1a77d81..a68c948742 100644 --- a/controllers/perses/perses_test.go +++ b/controllers/perses/perses_test.go @@ -164,7 +164,8 @@ var _ = Describe("Perses controller", func() { }) Context("SetupPersesWithManager guard", func() { - It("should skip controller registration when Perses CRDs are not available", func() { + It("should skip controller registration when Perses CRDs are not available", func(ctx context.Context) { + ctx = logr.NewContext(ctx, GinkgoLogr) old := checkPersesAvailable checkPersesAvailable = func(_ context.Context, _ client.Client) bool { return false } defer func() { checkPersesAvailable = old }() @@ -174,7 +175,7 @@ var _ = Describe("Perses controller", func() { mgr, err := commontestutils.NewManagerMock(nil, manager.Options{Scheme: scheme.Scheme}, cl, GinkgoLogr) Expect(err).ToNot(HaveOccurred()) - err = SetupPersesWithManager(mgr, metav1.OwnerReference{}) + err = SetupPersesWithManager(ctx, mgr, metav1.OwnerReference{}) Expect(err).ToNot(HaveOccurred()) }) }) diff --git a/pkg/util/cluster.go b/pkg/util/cluster.go index 9be5f7fd63..25fd2df2d8 100644 --- a/pkg/util/cluster.go +++ b/pkg/util/cluster.go @@ -3,6 +3,7 @@ package util import ( "context" "errors" + "fmt" "os" "slices" @@ -228,8 +229,10 @@ func isNADExists(ctx context.Context, cl client.Client, logger logr.Logger) bool // IsPersesAvailable returns true when the Perses CRDs are installed in the cluster. func IsPersesAvailable(ctx context.Context, cl client.Client) bool { logger := logr.FromContextOrDiscard(ctx) - return isCRDExists(ctx, cl, PersesDashboardsCRDName, logger) && - isCRDExists(ctx, cl, PersesDatasourcesCRDName, logger) + dashboardsAvailable := isCRDExists(ctx, cl, PersesDashboardsCRDName, logger) + datasourcesAvailable := isCRDExists(ctx, cl, PersesDatasourcesCRDName, logger) + logger.Info("Perses CRD availability check", "dashboards", dashboardsAvailable, "datasources", datasourcesAvailable) + return dashboardsAvailable && datasourcesAvailable } func isCRDExists(ctx context.Context, cl client.Client, crdName string, logger logr.Logger) bool { @@ -237,17 +240,26 @@ func isCRDExists(ctx context.Context, cl client.Client, crdName string, logger l key := client.ObjectKey{Name: crdName} err := cl.Get(ctx, key, found) if err != nil { - if !apierrors.IsNotFound(err) { - logger.Error(err, "cannot find CRD", "CRD", crdName) + if apierrors.IsNotFound(err) || meta.IsNoMatchError(err) { + logger.Info("CRD not found", "CRD", crdName, "notFound", apierrors.IsNotFound(err), "noMatch", meta.IsNoMatchError(err)) } else { - logger.Info("CRD not found", "CRD", crdName) + logger.Error(err, "cannot find CRD", "CRD", crdName) } } else { - logger.Info("CRD found", "CRD", crdName) + logger.Info("CRD found", "CRD", crdName, "resourceVersion", found.ResourceVersion, "conditions", summarizeCRDConditions(found)) } return err == nil } +func summarizeCRDConditions(crd *apiextensionsv1.CustomResourceDefinition) []string { + summary := make([]string, 0, len(crd.Status.Conditions)) + for _, cond := range crd.Status.Conditions { + entry := fmt.Sprintf("%s=%s(%s)", cond.Type, cond.Status, cond.Reason) + summary = append(summary, entry) + } + return summary +} + func init() { clusterInfo = &ClusterInfoImp{ runningLocally: IsRunModeLocal(), diff --git a/tests/func-tests/assets/persesdashboards.crd.yaml b/tests/func-tests/assets/persesdashboards.crd.yaml new file mode 100644 index 0000000000..98c76f5a42 --- /dev/null +++ b/tests/func-tests/assets/persesdashboards.crd.yaml @@ -0,0 +1,270 @@ +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + annotations: + controller-gen.kubebuilder.io/version: v0.16.2 + observability.openshift.io/api-support: DevPreview + operatorframework.io/installed-alongside-36062ba172442a9a: openshift-cluster-observability-operator/cluster-observability-operator.v1.3.1 + labels: + operators.coreos.com/cluster-observability-operator.openshift-cluster-observability: "" + name: persesdashboards.perses.dev +spec: + conversion: + strategy: None + group: perses.dev + names: + kind: PersesDashboard + listKind: PersesDashboardList + plural: persesdashboards + shortNames: + - perdb + singular: persesdashboard + scope: Namespaced + versions: + - name: v1alpha1 + schema: + openAPIV3Schema: + description: PersesDashboard is the Schema for the persesdashboards API + properties: + apiVersion: + description: |- + APIVersion defines the versioned schema of this representation of an object. + Servers should convert recognized schemas to the latest internal value, and + may reject unrecognized values. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources + type: string + kind: + description: |- + Kind is a string value representing the REST resource this object represents. + Servers may infer this from the endpoint the client submits requests to. + Cannot be updated. + In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds + type: string + metadata: + type: object + spec: + properties: + datasources: + additionalProperties: + properties: + default: + type: boolean + display: + properties: + description: + type: string + name: + type: string + type: object + plugin: + description: |- + Plugin will contain the datasource configuration. + The data typed is available in Cue. + properties: + kind: + type: string + spec: + x-kubernetes-preserve-unknown-fields: true + required: + - kind + - spec + type: object + required: + - default + - plugin + type: object + description: Datasources is an optional list of datasource definition. + type: object + display: + properties: + description: + type: string + name: + type: string + type: object + duration: + description: Duration is the default time range to use when getting + data to fill the dashboard + format: duration + pattern: ^(([0-9]+)y)?(([0-9]+)w)?(([0-9]+)d)?(([0-9]+)h)?(([0-9]+)m)?(([0-9]+)s)?(([0-9]+)ms)?$ + type: string + layouts: + items: + properties: + kind: + type: string + spec: + x-kubernetes-preserve-unknown-fields: true + required: + - kind + - spec + type: object + type: array + panels: + additionalProperties: + properties: + kind: + type: string + spec: + properties: + display: + properties: + description: + type: string + name: + type: string + required: + - name + type: object + links: + items: + properties: + name: + type: string + renderVariables: + type: boolean + targetBlank: + type: boolean + tooltip: + type: string + url: + type: string + required: + - url + type: object + type: array + plugin: + properties: + kind: + type: string + spec: + x-kubernetes-preserve-unknown-fields: true + required: + - kind + - spec + type: object + queries: + items: + properties: + kind: + type: string + spec: + properties: + plugin: + properties: + kind: + type: string + spec: + x-kubernetes-preserve-unknown-fields: true + required: + - kind + - spec + type: object + required: + - plugin + type: object + required: + - kind + - spec + type: object + type: array + required: + - display + - plugin + type: object + required: + - kind + - spec + type: object + type: object + refreshInterval: + description: RefreshInterval is the default refresh interval to use + when landing on the dashboard + format: duration + pattern: ^(([0-9]+)y)?(([0-9]+)w)?(([0-9]+)d)?(([0-9]+)h)?(([0-9]+)m)?(([0-9]+)s)?(([0-9]+)ms)?$ + type: string + variables: + items: + properties: + kind: + description: Kind is the type of the variable. Depending on + the value of Kind, it will change the content of Spec. + type: string + spec: + x-kubernetes-preserve-unknown-fields: true + required: + - kind + - spec + type: object + type: array + required: + - duration + - layouts + - panels + type: object + status: + description: PersesDashboardStatus defines the observed state of PersesDashboard + properties: + conditions: + items: + description: Condition contains details for one aspect of the current + state of this API Resource. + properties: + lastTransitionTime: + description: |- + lastTransitionTime is the last time the condition transitioned from one status to another. + This should be when the underlying condition changed. If that is not known, then using the time when the API field changed is acceptable. + format: date-time + type: string + message: + description: |- + message is a human readable message indicating details about the transition. + This may be an empty string. + maxLength: 32768 + type: string + observedGeneration: + description: |- + observedGeneration represents the .metadata.generation that the condition was set based upon. + For instance, if .metadata.generation is currently 12, but the .status.conditions[x].observedGeneration is 9, the condition is out of date + with respect to the current state of the instance. + format: int64 + minimum: 0 + type: integer + reason: + description: |- + reason contains a programmatic identifier indicating the reason for the condition's last transition. + Producers of specific condition types may define expected values and meanings for this field, + and whether the values are considered a guaranteed API. + The value should be a CamelCase string. + This field may not be empty. + maxLength: 1024 + minLength: 1 + pattern: ^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$ + type: string + status: + description: status of the condition, one of True, False, Unknown. + enum: + - "True" + - "False" + - Unknown + type: string + type: + description: type of condition in CamelCase or in foo.example.com/CamelCase. + maxLength: 316 + pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$ + type: string + required: + - lastTransitionTime + - message + - reason + - status + - type + type: object + type: array + type: object + type: object + served: true + storage: true + subresources: + status: {} diff --git a/tests/func-tests/assets/persesdatasources.crd.yaml b/tests/func-tests/assets/persesdatasources.crd.yaml new file mode 100644 index 0000000000..e3659be331 --- /dev/null +++ b/tests/func-tests/assets/persesdatasources.crd.yaml @@ -0,0 +1,302 @@ +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + annotations: + controller-gen.kubebuilder.io/version: v0.16.2 + observability.openshift.io/api-support: DevPreview + operatorframework.io/installed-alongside-36062ba172442a9a: openshift-cluster-observability-operator/cluster-observability-operator.v1.3.1 + labels: + operators.coreos.com/cluster-observability-operator.openshift-cluster-observability: "" + name: persesdatasources.perses.dev +spec: + conversion: + strategy: None + group: perses.dev + names: + kind: PersesDatasource + listKind: PersesDatasourceList + plural: persesdatasources + shortNames: + - perds + singular: persesdatasource + scope: Namespaced + versions: + - name: v1alpha1 + schema: + openAPIV3Schema: + description: PersesDatasource is the Schema for the PersesDatasources API + properties: + apiVersion: + description: |- + APIVersion defines the versioned schema of this representation of an object. + Servers should convert recognized schemas to the latest internal value, and + may reject unrecognized values. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources + type: string + kind: + description: |- + Kind is a string value representing the REST resource this object represents. + Servers may infer this from the endpoint the client submits requests to. + Cannot be updated. + In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds + type: string + metadata: + type: object + spec: + properties: + client: + properties: + basicAuth: + description: BasicAuth basic auth config for perses client + properties: + name: + description: Name of basic auth k8s resource (when type is + secret or configmap) + type: string + namespace: + description: Namsespace of certificate k8s resource (when + type is secret or configmap) + type: string + password_path: + description: Path to password + type: string + type: + description: Type source type of secret + enum: + - secret + - configmap + - file + type: string + username: + description: Username for basic auth + type: string + required: + - password_path + - type + - username + type: object + kubernetesAuth: + description: KubernetesAuth configuration for perses client + properties: + enable: + description: Enable kubernetes auth for perses client + type: boolean + required: + - enable + type: object + oauth: + description: OAuth configuration for perses client + properties: + authStyle: + description: |- + AuthStyle optionally specifies how the endpoint wants the + client ID & client secret sent. The zero value means to + auto-detect. + type: integer + clientIDPath: + description: Path to client id + type: string + clientSecretPath: + description: Path to client secret + type: string + endpointParams: + additionalProperties: + items: + type: string + type: array + description: EndpointParams specifies additional parameters + for requests to the token endpoint. + type: object + name: + description: Name of basic auth k8s resource (when type is + secret or configmap) + type: string + namespace: + description: Namsespace of certificate k8s resource (when + type is secret or configmap) + type: string + scopes: + description: Scope specifies optional requested permissions. + items: + type: string + type: array + tokenURL: + description: |- + TokenURL is the resource server's token endpoint + URL. This is a constant specific to each server. + type: string + type: + description: Type source type of secret + enum: + - secret + - configmap + - file + type: string + required: + - tokenURL + - type + type: object + tls: + description: TLS the equivalent to the tls_config for perses client + properties: + caCert: + description: CaCert to verify the perses certificate + properties: + certPath: + description: Path to Certificate + type: string + name: + description: Name of basic auth k8s resource (when type + is secret or configmap) + type: string + namespace: + description: Namsespace of certificate k8s resource (when + type is secret or configmap) + type: string + privateKeyPath: + description: Path to Private key certificate + type: string + type: + description: Type source type of secret + enum: + - secret + - configmap + - file + type: string + required: + - certPath + - type + type: object + enable: + description: Enable TLS connection to perses + type: boolean + insecureSkipVerify: + description: InsecureSkipVerify skip verify of perses certificate + type: boolean + userCert: + description: UserCert client cert/key for mTLS + properties: + certPath: + description: Path to Certificate + type: string + name: + description: Name of basic auth k8s resource (when type + is secret or configmap) + type: string + namespace: + description: Namsespace of certificate k8s resource (when + type is secret or configmap) + type: string + privateKeyPath: + description: Path to Private key certificate + type: string + type: + description: Type source type of secret + enum: + - secret + - configmap + - file + type: string + required: + - certPath + - type + type: object + required: + - enable + type: object + type: object + config: + properties: + default: + type: boolean + display: + properties: + description: + type: string + name: + type: string + type: object + plugin: + description: |- + Plugin will contain the datasource configuration. + The data typed is available in Cue. + properties: + kind: + type: string + spec: + x-kubernetes-preserve-unknown-fields: true + required: + - kind + - spec + type: object + required: + - default + - plugin + type: object + type: object + status: + description: PersesDatasourceStatus defines the observed state of PersesDatasource + properties: + conditions: + items: + description: Condition contains details for one aspect of the current + state of this API Resource. + properties: + lastTransitionTime: + description: |- + lastTransitionTime is the last time the condition transitioned from one status to another. + This should be when the underlying condition changed. If that is not known, then using the time when the API field changed is acceptable. + format: date-time + type: string + message: + description: |- + message is a human readable message indicating details about the transition. + This may be an empty string. + maxLength: 32768 + type: string + observedGeneration: + description: |- + observedGeneration represents the .metadata.generation that the condition was set based upon. + For instance, if .metadata.generation is currently 12, but the .status.conditions[x].observedGeneration is 9, the condition is out of date + with respect to the current state of the instance. + format: int64 + minimum: 0 + type: integer + reason: + description: |- + reason contains a programmatic identifier indicating the reason for the condition's last transition. + Producers of specific condition types may define expected values and meanings for this field, + and whether the values are considered a guaranteed API. + The value should be a CamelCase string. + This field may not be empty. + maxLength: 1024 + minLength: 1 + pattern: ^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$ + type: string + status: + description: status of the condition, one of True, False, Unknown. + enum: + - "True" + - "False" + - Unknown + type: string + type: + description: type of condition in CamelCase or in foo.example.com/CamelCase. + maxLength: 316 + pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$ + type: string + required: + - lastTransitionTime + - message + - reason + - status + - type + type: object + type: array + type: object + type: object + served: true + storage: true + subresources: + status: {} diff --git a/tests/func-tests/client.go b/tests/func-tests/client.go index 9fe18212b2..668a6b8b45 100644 --- a/tests/func-tests/client.go +++ b/tests/func-tests/client.go @@ -9,6 +9,7 @@ import ( imagev1 "github.com/openshift/api/image/v1" deschedulerv1 "github.com/openshift/cluster-kube-descheduler-operator/pkg/apis/descheduler/v1" monitoringv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1" + persesv1alpha1 "github.com/rhobs/perses-operator/api/v1alpha1" appsv1 "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" apiextensionsv1 "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1" @@ -84,6 +85,7 @@ func setScheme(cli client.Client) { monitoringv1.AddToScheme, deschedulerv1.AddToScheme, apiextensionsv1.AddToScheme, + persesv1alpha1.AddToScheme, } for _, f := range funcs { diff --git a/tests/func-tests/logs.go b/tests/func-tests/logs.go new file mode 100644 index 0000000000..8e0cd3cb0e --- /dev/null +++ b/tests/func-tests/logs.go @@ -0,0 +1,103 @@ +package tests + +import ( + "context" + "fmt" + "io" + "time" + + "github.com/onsi/ginkgo/v2" + v1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/client-go/kubernetes" +) + +const ( + hcoOperatorLabelValue = "hyperconverged-cluster-operator" + hcoWebhookLabelValue = "hyperconverged-cluster-webhook" +) + +// LogCaptureOptions controls how pod logs are collected for debugging. +type LogCaptureOptions struct { + // Since limits logs to those generated after the given time. + Since *time.Time + // IncludePrevious also dumps logs from the previous container instance. + IncludePrevious bool +} + +// DumpHCOPodLogs prints HCO operator and webhook pod logs to the Ginkgo output. +// This is intended for temporary debugging when tests trigger pod restarts. +func DumpHCOPodLogs(ctx context.Context, stage string, options LogCaptureOptions) { + ginkgo.GinkgoHelper() + + if InstallNamespace == "" { + ginkgo.GinkgoLogr.Info("Skipping HCO log capture: install namespace is empty", "stage", stage) + return + } + + cli := GetK8sClientSet() + dumpPodsLogs(ctx, cli, hcoOperatorLabelValue, stage, options) + dumpPodsLogs(ctx, cli, hcoWebhookLabelValue, stage, options) +} + +func dumpPodsLogs(ctx context.Context, cli *kubernetes.Clientset, labelValue, stage string, options LogCaptureOptions) { + ginkgo.GinkgoHelper() + + selector := fmt.Sprintf("name=%s", labelValue) + pods, err := cli.CoreV1().Pods(InstallNamespace).List(ctx, metav1.ListOptions{ + LabelSelector: selector, + }) + if err != nil { + ginkgo.GinkgoLogr.Error(err, "Failed to list pods for log capture", "stage", stage, "selector", selector) + return + } + if len(pods.Items) == 0 { + ginkgo.GinkgoLogr.Info("No pods found for log capture", "stage", stage, "selector", selector) + return + } + + for _, pod := range pods.Items { + for _, container := range pod.Spec.Containers { + streamLogs(ctx, cli, pod, container.Name, stage, options) + } + } +} + +func streamLogs(ctx context.Context, cli *kubernetes.Clientset, pod v1.Pod, containerName, stage string, options LogCaptureOptions) { + ginkgo.GinkgoHelper() + + logOpts := &v1.PodLogOptions{ + Container: containerName, + Timestamps: true, + } + if options.Since != nil { + logOpts.SinceTime = &metav1.Time{Time: *options.Since} + } + + writeLogStream(ctx, cli, pod, containerName, stage, logOpts, false) + if options.IncludePrevious { + previousOpts := *logOpts + previousOpts.Previous = true + writeLogStream(ctx, cli, pod, containerName, stage, &previousOpts, true) + } +} + +func writeLogStream(ctx context.Context, cli *kubernetes.Clientset, pod v1.Pod, containerName, stage string, logOpts *v1.PodLogOptions, previous bool) { + stream, err := cli.CoreV1().Pods(pod.Namespace).GetLogs(pod.Name, logOpts).Stream(ctx) + if err != nil { + ginkgo.GinkgoLogr.Error(err, "Failed to stream pod logs", "stage", stage, "pod", pod.Name, "container", containerName, "previous", previous) + return + } + defer func() { + if err := stream.Close(); err != nil { + ginkgo.GinkgoLogr.Error(err, "Failed to close pod log stream", "stage", stage, "pod", pod.Name, "container", containerName, "previous", previous) + } + }() + + fmt.Fprintf(ginkgo.GinkgoWriter, "\n===== HCO logs (%s) pod=%s container=%s previous=%t =====\n", stage, pod.Name, containerName, previous) + if _, err := io.Copy(ginkgo.GinkgoWriter, stream); err != nil { + ginkgo.GinkgoLogr.Error(err, "Failed to copy pod logs", "stage", stage, "pod", pod.Name, "container", containerName, "previous", previous) + return + } + fmt.Fprintln(ginkgo.GinkgoWriter, "\n===== HCO logs end =====") +} diff --git a/tests/func-tests/observability_controller_test.go b/tests/func-tests/observability_controller_test.go index 326c2585d3..ab459017c5 100644 --- a/tests/func-tests/observability_controller_test.go +++ b/tests/func-tests/observability_controller_test.go @@ -5,6 +5,7 @@ import ( "crypto/tls" "fmt" "net/http" + "time" . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" @@ -47,6 +48,13 @@ var _ = Describe("Observability Controller", Label(tests.OpenshiftLabel, testNam Context("PodDisruptionBudgetAtLimit", func() { It("should be silenced", func(ctx context.Context) { + start := time.Now() + before := start.Add(-10 * time.Minute) + tests.DumpHCOPodLogs(ctx, "before observability controller test", tests.LogCaptureOptions{Since: &before}) + DeferCleanup(func(ctx context.Context) { + tests.DumpHCOPodLogs(ctx, "after observability controller test", tests.LogCaptureOptions{Since: &start, IncludePrevious: true}) + }) + amAPI := alertmanager.NewAPI(httpClient, alertmanagerURL, cliConfig.BearerToken) amSilences, err := amAPI.ListSilences() diff --git a/tests/func-tests/perses_dynamic_crd_test.go b/tests/func-tests/perses_dynamic_crd_test.go new file mode 100644 index 0000000000..c732e5ed26 --- /dev/null +++ b/tests/func-tests/perses_dynamic_crd_test.go @@ -0,0 +1,130 @@ +package tests_test + +import ( + "bytes" + "context" + _ "embed" + "time" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + apiextensionsv1 "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1" + apierrors "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/util/yaml" + "sigs.k8s.io/controller-runtime/pkg/client" + + persesv1alpha1 "github.com/rhobs/perses-operator/api/v1alpha1" + + hcoutil "github.com/kubevirt/hyperconverged-cluster-operator/pkg/util" + tests "github.com/kubevirt/hyperconverged-cluster-operator/tests/func-tests" +) + +//go:embed assets/persesdashboards.crd.yaml +var dashboardsCRD []byte + +//go:embed assets/persesdatasources.crd.yaml +var datasourcesCRD []byte + +var _ = Describe("Perses dynamic CRD gating", Label(tests.OpenshiftLabel, "perses"), func() { + var cli client.Client + var crdsPreExisted bool + var testStart time.Time + + BeforeEach(func(ctx context.Context) { + testStart = time.Now() + before := testStart.Add(-10 * time.Minute) + tests.DumpHCOPodLogs(ctx, "before perses dynamic CRD gating", tests.LogCaptureOptions{Since: &before}) + DeferCleanup(func(ctx context.Context) { + tests.DumpHCOPodLogs(ctx, "after perses dynamic CRD gating", tests.LogCaptureOptions{Since: &testStart, IncludePrevious: true}) + }) + + cli = tests.GetControllerRuntimeClient() + // Check if both dashboards and datasources CRDs already exist + crdsPreExisted = areCRDsDeployed(ctx, cli) + + if !crdsPreExisted { + // Apply required CRDs from embedded YAMLs: dashboards and datasources + Expect(applyCRDFromBytes(ctx, cli, dashboardsCRD)).To(Succeed()) + Expect(applyCRDFromBytes(ctx, cli, datasourcesCRD)).To(Succeed()) + tests.WaitForHCOOperatorRollout(ctx) + } + + DeferCleanup(func(ctx context.Context) { + // Only clean up if we created the CRDs in this test run + if !crdsPreExisted { + Expect(deleteCRD(ctx, cli, hcoutil.PersesDashboardsCRDName)).To(Succeed()) + Expect(deleteCRD(ctx, cli, hcoutil.PersesDatasourcesCRDName)).To(Succeed()) + } + }) + }) + + It("creates dashboards and datasources when CRDs are available", func(ctx context.Context) { + // Eventually, HCO should create at least one PersesDashboard and one PersesDatasource in its namespace + Eventually(func(g Gomega, gctx context.Context) []persesv1alpha1.PersesDashboard { + var list persesv1alpha1.PersesDashboardList + g.Expect(cli.List(gctx, &list, &client.ListOptions{Namespace: tests.InstallNamespace})).To(Succeed()) + return list.Items + }).WithTimeout(5 * time.Minute).WithPolling(10 * time.Second).WithContext(ctx).ShouldNot(BeEmpty()) + + Eventually(func(g Gomega, gctx context.Context) []persesv1alpha1.PersesDatasource { + var list persesv1alpha1.PersesDatasourceList + g.Expect(cli.List(gctx, &list, &client.ListOptions{Namespace: tests.InstallNamespace})).To(Succeed()) + return list.Items + }).WithTimeout(5 * time.Minute).WithPolling(10 * time.Second).WithContext(ctx).ShouldNot(BeEmpty()) + + // And specifically, the expected names should be present + Eventually(func(gctx context.Context) error { + return cli.Get( + gctx, + client.ObjectKey{Namespace: tests.InstallNamespace, Name: "perses-dashboard-node-memory-overview"}, + &persesv1alpha1.PersesDashboard{}, + ) + }).WithTimeout(5 * time.Minute).WithPolling(10 * time.Second).WithContext(ctx).Should(Succeed()) + + Eventually(func(gctx context.Context) error { + return cli.Get( + gctx, + client.ObjectKey{Namespace: tests.InstallNamespace, Name: "perses-thanos-datasource"}, + &persesv1alpha1.PersesDatasource{}, + ) + }).WithTimeout(5 * time.Minute).WithPolling(10 * time.Second).WithContext(ctx).Should(Succeed()) + }) +}) + +// areCRDsDeployed returns true if both dashboards and datasources CRDs exist. +func areCRDsDeployed(ctx context.Context, cli client.Client) bool { + if err := cli.Get(ctx, client.ObjectKey{Name: hcoutil.PersesDashboardsCRDName}, &apiextensionsv1.CustomResourceDefinition{}); err != nil { + return false + } + if err := cli.Get(ctx, client.ObjectKey{Name: hcoutil.PersesDatasourcesCRDName}, &apiextensionsv1.CustomResourceDefinition{}); err != nil { + return false + } + return true +} + +// applyCRDFromBytes applies the CRD contained in the given YAML bytes. +func applyCRDFromBytes(ctx context.Context, cli client.Client, data []byte) error { + dec := yaml.NewYAMLOrJSONDecoder(bytes.NewReader(data), 4096) + var crd apiextensionsv1.CustomResourceDefinition + if err := dec.Decode(&crd); err != nil { + return err + } + if err := cli.Create(ctx, &crd); err != nil && !apierrors.IsAlreadyExists(err) { + return err + } + return nil +} + +// deleteCRD deletes the CRD by name, ignoring NotFound errors. +func deleteCRD(ctx context.Context, cli client.Client, name string) error { + crd := &apiextensionsv1.CustomResourceDefinition{ + ObjectMeta: metav1.ObjectMeta{ + Name: name, + }, + } + if err := cli.Delete(ctx, crd); err != nil && !apierrors.IsNotFound(err) { + return err + } + return nil +} diff --git a/tests/func-tests/utils.go b/tests/func-tests/utils.go index 2cf8ec0ee2..b40780979d 100644 --- a/tests/func-tests/utils.go +++ b/tests/func-tests/utils.go @@ -37,6 +37,11 @@ const ( TestNamespace = "hco-test-default" ) +const ( + hcoRolloutTimeout = 5 * time.Minute + hcoRolloutPolling = 5 * time.Second +) + func init() { flag.StringVar(&KubeVirtStorageClassLocal, "storage-class-local", "local", "Storage provider to use for tests which want local storage") flag.StringVar(&InstallNamespace, "installed-namespace", "", "Set the namespace KubeVirt is installed in") @@ -55,6 +60,29 @@ func BeforeEach(ctx context.Context) { deleteAllResources(ctx, cli, "persistentvolumeclaims") } +// WaitForHCOOperatorRollout waits until the HCO operator deployment is ready. +func WaitForHCOOperatorRollout(ctx context.Context) { + ginkgo.GinkgoHelper() + + cli := GetK8sClientSet() + key := types.NamespacedName{Namespace: InstallNamespace, Name: hcoutil.HCOOperatorName} + + Eventually(func(g Gomega, gctx context.Context) { + deployment, err := cli.AppsV1().Deployments(key.Namespace).Get(gctx, key.Name, metav1.GetOptions{}) + g.Expect(err).ToNot(HaveOccurred()) + + desired := int32(1) + if deployment.Spec.Replicas != nil { + desired = *deployment.Spec.Replicas + } + + g.Expect(deployment.Status.ObservedGeneration).To(BeNumerically(">=", deployment.Generation)) + g.Expect(deployment.Status.UpdatedReplicas).To(BeNumerically(">=", desired)) + g.Expect(deployment.Status.ReadyReplicas).To(BeNumerically(">=", desired)) + g.Expect(deployment.Status.AvailableReplicas).To(BeNumerically(">=", desired)) + }).WithTimeout(hcoRolloutTimeout).WithPolling(hcoRolloutPolling).WithContext(ctx).Should(Succeed()) +} + func FailIfNotOpenShift(ctx context.Context, cli client.Client, testName string) { isOpenShift := false Eventually(func(ctx context.Context) error {