base-14
diff --git a/‎README.md‎
Lines changed: 16 additions & 0 deletions b/‎README.md‎
Lines changed: 16 additions & 0 deletions
diff --git a/‎internal/controller/events.go‎
Lines changed: 50 additions & 0 deletions b/‎internal/controller/events.go‎
Lines changed: 50 additions & 0 deletions
diff --git a/‎internal/controller/memgraphcluster_controller.go‎
Lines changed: 74 additions & 7 deletions b/‎internal/controller/memgraphcluster_controller.go‎
Lines changed: 74 additions & 7 deletions
diff --git a/‎internal/controller/metrics.go‎
Lines changed: 114 additions & 0 deletions b/‎internal/controller/metrics.go‎
Lines changed: 114 additions & 0 deletions
@@ -140,6 +140,22 @@ The operator exposes Prometheus metrics for monitoring cluster health and operat
 | `memgraph_validation_last_run_timestamp_seconds` | Gauge | Unix timestamp of the last validation run |
 | `memgraph_validation_passed` | Gauge | Whether the last validation passed (1) or not (0) |
 
+### Storage Metrics (from SHOW STORAGE INFO)
+
+The operator collects storage statistics from each Memgraph instance:
+
+| Metric | Type | Description |
+|--------|------|-------------|
+| `memgraph_storage_vertex_count` | Gauge | Number of vertices in the database |
+| `memgraph_storage_edge_count` | Gauge | Number of edges in the database |
+| `memgraph_storage_average_degree` | Gauge | Average degree of vertices |
+| `memgraph_storage_memory_resident_bytes` | Gauge | Current resident memory usage |
+| `memgraph_storage_memory_peak_bytes` | Gauge | Peak resident memory usage |
+| `memgraph_storage_disk_usage_bytes` | Gauge | Disk space consumed |
+| `memgraph_storage_memory_tracked_bytes` | Gauge | Actively tracked memory allocation |
+| `memgraph_storage_allocation_limit_bytes` | Gauge | Maximum memory allocation limit |
+| `memgraph_storage_unreleased_delta_objects` | Gauge | Delta objects awaiting cleanup |
+
 All metrics include `cluster` and `namespace` labels. Instance-level metrics also include `instance` and `role` labels.
 
 ## Contributing
 
@@ -0,0 +1,50 @@
+// Copyright 2025 Base14. See LICENSE file for details.
+
+package controller
+
+// Event reason constants for Kubernetes events
+const (
+	// Cluster lifecycle events
+	EventReasonClusterCreated  = "ClusterCreated"
+	EventReasonClusterReady    = "ClusterReady"
+	EventReasonClusterDegraded = "ClusterDegraded"
+
+	// StatefulSet events
+	EventReasonCreatingStatefulSet = "CreatingStatefulSet"
+	EventReasonScalingStatefulSet  = "ScalingStatefulSet"
+
+	// Replication events
+	EventReasonMainInstanceConfigured = "MainInstanceConfigured"
+	EventReasonReplicaRegistered      = "ReplicaRegistered"
+	EventReasonReplicaUnregistered    = "ReplicaUnregistered"
+	EventReasonReplicationHealthy     = "ReplicationHealthy"
+	EventReasonReplicationError       = "ReplicationError"
+	EventReasonReplicaUnhealthy       = "ReplicaUnhealthy"
+	EventReasonReplicationLagHigh     = "ReplicationLagHigh"
+
+	// Failover events
+	EventReasonMainInstanceFailed = "MainInstanceFailed"
+	EventReasonFailoverStarted    = "FailoverStarted"
+	EventReasonFailoverCompleted  = "FailoverCompleted"
+	EventReasonFailoverFailed     = "FailoverFailed"
+
+	// Snapshot events
+	EventReasonSnapshotCronJobCreated = "SnapshotCronJobCreated"
+	EventReasonSnapshotCronJobUpdated = "SnapshotCronJobUpdated"
+	EventReasonSnapshotSucceeded      = "SnapshotSucceeded"
+	EventReasonSnapshotFailed         = "SnapshotFailed"
+
+	// S3 backup events
+	EventReasonS3BackupSucceeded = "S3BackupSucceeded"
+	EventReasonS3BackupFailed    = "S3BackupFailed"
+
+	// Health check events
+	EventReasonHealthCheckPassed = "HealthCheckPassed"
+	EventReasonHealthCheckFailed = "HealthCheckFailed"
+
+	// Write service events
+	EventReasonUpdatedWriteService = "UpdatedWriteService"
+
+	// Reconcile events
+	EventReasonReconcileError = "ReconcileError"
+)
@@ -100,12 +100,17 @@ func (r *MemgraphClusterReconciler) Reconcile(ctx context.Context, req ctrl.Requ
 	// Initialize status if needed
 	if cluster.Status.Phase == "" {
 		cluster.Status.Phase = memgraphv1alpha1.ClusterPhasePending
+		r.Recorder.Event(cluster, corev1.EventTypeNormal, EventReasonClusterCreated,
+			"Cluster created, starting initialization")
 		if err := r.Status().Update(ctx, cluster); err != nil {
 			return ctrl.Result{}, err
 		}
 		return ctrl.Result{Requeue: true}, nil
 	}
 
+	// Track previous phase for event emission
+	previousPhase := cluster.Status.Phase
+
 	// Reconcile resources
 	result, err := r.reconcileResources(ctx, cluster, log)
 
@@ -114,7 +119,7 @@ func (r *MemgraphClusterReconciler) Reconcile(ctx context.Context, req ctrl.Requ
 	r.metrics.RecordReconcileDuration(cluster.Name, cluster.Namespace, duration)
 
 	if err != nil {
-		r.Recorder.Event(cluster, corev1.EventTypeWarning, "ReconcileError", err.Error())
+		r.Recorder.Event(cluster, corev1.EventTypeWarning, EventReasonReconcileError, err.Error())
 		r.metrics.RecordReconcileOperation(cluster.Name, cluster.Namespace, "error")
 		return result, err
 	}
@@ -123,6 +128,9 @@ func (r *MemgraphClusterReconciler) Reconcile(ctx context.Context, req ctrl.Requ
 	r.metrics.RecordReconcileOperation(cluster.Name, cluster.Namespace, "success")
 	r.metrics.RecordClusterPhase(cluster.Name, cluster.Namespace, string(cluster.Status.Phase))
 
+	// Emit phase transition events
+	r.emitPhaseTransitionEvents(cluster, previousPhase)
+
 	replicas := cluster.Spec.Replicas
 	if replicas == 0 {
 		replicas = 3
@@ -210,7 +218,7 @@ func (r *MemgraphClusterReconciler) reconcileResources(ctx context.Context, clus
 		} else {
 			if err := r.replicationManager.ConfigureReplication(ctx, cluster, pods, writeInstance, log); err != nil {
 				log.Error("failed to configure replication", zap.Error(err))
-				r.Recorder.Event(cluster, corev1.EventTypeWarning, "ReplicationError",
+				r.Recorder.Event(cluster, corev1.EventTypeWarning, EventReasonReplicationError,
 					fmt.Sprintf("Failed to configure replication: %v", err))
 			} else {
 				health, err := r.replicationManager.CheckReplicationHealth(ctx, cluster, writeInstance, log)
@@ -240,7 +248,10 @@ func (r *MemgraphClusterReconciler) reconcileResources(ctx context.Context, clus
 		}
 	}
 
-	// 11. Update status
+	// 11. Collect storage metrics from all running pods
+	r.collectStorageMetrics(ctx, cluster, pods, writeInstance, log)
+
+	// 12. Update status
 	if err := r.updateStatus(ctx, cluster, pods, writeInstance, registeredReplicas); err != nil {
 		return ctrl.Result{}, err
 	}
@@ -272,7 +283,7 @@ func (r *MemgraphClusterReconciler) ensureReplicationManager() error {
 		return fmt.Errorf("failed to create memgraph client: %w", err)
 	}
 
-	r.replicationManager = NewReplicationManager(mgClient)
+	r.replicationManager = NewReplicationManager(mgClient, r.Recorder)
 	return nil
 }
 
@@ -311,7 +322,7 @@ func (r *MemgraphClusterReconciler) reconcileStatefulSet(ctx context.Context, cl
 			log.Info("creating StatefulSet",
 				zap.String("statefulset", desired.Name),
 				zap.Int32("replicas", *desired.Spec.Replicas))
-			r.Recorder.Event(cluster, corev1.EventTypeNormal, "CreatingStatefulSet",
+			r.Recorder.Event(cluster, corev1.EventTypeNormal, EventReasonCreatingStatefulSet,
 				fmt.Sprintf("Creating StatefulSet %s with %d replicas", desired.Name, *desired.Spec.Replicas))
 			return r.Create(ctx, desired)
 		}
@@ -325,7 +336,7 @@ func (r *MemgraphClusterReconciler) reconcileStatefulSet(ctx context.Context, cl
 			zap.Int32("currentReplicas", *existing.Spec.Replicas),
 			zap.Int32("desiredReplicas", *desired.Spec.Replicas))
 		existing.Spec.Replicas = desired.Spec.Replicas
-		r.Recorder.Event(cluster, corev1.EventTypeNormal, "ScalingStatefulSet",
+		r.Recorder.Event(cluster, corev1.EventTypeNormal, EventReasonScalingStatefulSet,
 			fmt.Sprintf("Scaling StatefulSet %s to %d replicas", existing.Name, *desired.Spec.Replicas))
 		return r.Update(ctx, existing)
 	}
@@ -360,7 +371,7 @@ func (r *MemgraphClusterReconciler) reconcileWriteService(ctx context.Context, c
 			zap.String("previousInstance", currentWriteInstance),
 			zap.String("newInstance", writeInstance))
 		existing.Spec.Selector = desired.Spec.Selector
-		r.Recorder.Event(cluster, corev1.EventTypeNormal, "UpdatedWriteService",
+		r.Recorder.Event(cluster, corev1.EventTypeNormal, EventReasonUpdatedWriteService,
 			fmt.Sprintf("Write service now pointing to %s", writeInstance))
 		return r.Update(ctx, existing)
 	}
@@ -558,6 +569,62 @@ func conditionMessage(ok bool, trueMsg, falseMsg string) string {
 	return falseMsg
 }
 
+// collectStorageMetrics collects storage metrics from all running pods
+func (r *MemgraphClusterReconciler) collectStorageMetrics(
+	ctx context.Context,
+	cluster *memgraphv1alpha1.MemgraphCluster,
+	pods []corev1.Pod,
+	writeInstance string,
+	log *zap.Logger,
+) {
+	if r.replicationManager == nil || r.replicationManager.Client() == nil {
+		return
+	}
+
+	mgClient := r.replicationManager.Client()
+	for _, pod := range pods {
+		if !isPodReady(&pod) {
+			continue
+		}
+
+		role := "replica"
+		if pod.Name == writeInstance {
+			role = "main"
+		}
+
+		info, err := mgClient.GetStorageInfo(ctx, cluster.Namespace, pod.Name)
+		if err != nil {
+			log.Debug("failed to collect storage metrics",
+				zap.String("pod", pod.Name),
+				zap.Error(err))
+			continue
+		}
+
+		r.metrics.RecordStorageInfo(cluster.Name, cluster.Namespace, pod.Name, role, info)
+	}
+}
+
+// emitPhaseTransitionEvents emits events when cluster phase changes
+func (r *MemgraphClusterReconciler) emitPhaseTransitionEvents(
+	cluster *memgraphv1alpha1.MemgraphCluster,
+	previousPhase memgraphv1alpha1.ClusterPhase,
+) {
+	currentPhase := cluster.Status.Phase
+	if currentPhase == previousPhase {
+		return
+	}
+
+	switch {
+	case currentPhase == memgraphv1alpha1.ClusterPhaseRunning:
+		r.Recorder.Event(cluster, corev1.EventTypeNormal, EventReasonClusterReady,
+			fmt.Sprintf("Cluster is ready with %d instances", cluster.Status.ReadyInstances))
+	case previousPhase == memgraphv1alpha1.ClusterPhaseRunning &&
+		currentPhase == memgraphv1alpha1.ClusterPhaseInitializing:
+		r.Recorder.Event(cluster, corev1.EventTypeWarning, EventReasonClusterDegraded,
+			fmt.Sprintf("Cluster degraded: %d instances ready", cluster.Status.ReadyInstances))
+	}
+}
+
 // SetupWithManager sets up the controller with the Manager.
 func (r *MemgraphClusterReconciler) SetupWithManager(mgr ctrl.Manager) error {
 	return ctrl.NewControllerManagedBy(mgr).
 
@@ -3,6 +3,7 @@
 package controller
 
 import (
+	"github.com/base14/memgraph-operator/internal/memgraph"
 	"github.com/prometheus/client_golang/prometheus"
 	"sigs.k8s.io/controller-runtime/pkg/metrics"
 )
@@ -127,6 +128,79 @@ var (
 		},
 		[]string{"cluster", "namespace"},
 	)
+
+	// Memgraph storage metrics (from SHOW STORAGE INFO)
+	storageVertexCountGauge = prometheus.NewGaugeVec(
+		prometheus.GaugeOpts{
+			Name: "memgraph_storage_vertex_count",
+			Help: "Number of vertices in the database",
+		},
+		[]string{"cluster", "namespace", "instance", "role"},
+	)
+
+	storageEdgeCountGauge = prometheus.NewGaugeVec(
+		prometheus.GaugeOpts{
+			Name: "memgraph_storage_edge_count",
+			Help: "Number of edges in the database",
+		},
+		[]string{"cluster", "namespace", "instance", "role"},
+	)
+
+	storageAverageDegreeGauge = prometheus.NewGaugeVec(
+		prometheus.GaugeOpts{
+			Name: "memgraph_storage_average_degree",
+			Help: "Average degree of vertices in the database",
+		},
+		[]string{"cluster", "namespace", "instance", "role"},
+	)
+
+	storageMemoryResGauge = prometheus.NewGaugeVec(
+		prometheus.GaugeOpts{
+			Name: "memgraph_storage_memory_resident_bytes",
+			Help: "Current resident memory usage in bytes",
+		},
+		[]string{"cluster", "namespace", "instance", "role"},
+	)
+
+	storagePeakMemoryResGauge = prometheus.NewGaugeVec(
+		prometheus.GaugeOpts{
+			Name: "memgraph_storage_memory_peak_bytes",
+			Help: "Peak resident memory usage in bytes",
+		},
+		[]string{"cluster", "namespace", "instance", "role"},
+	)
+
+	storageDiskUsageGauge = prometheus.NewGaugeVec(
+		prometheus.GaugeOpts{
+			Name: "memgraph_storage_disk_usage_bytes",
+			Help: "Disk space consumed in bytes",
+		},
+		[]string{"cluster", "namespace", "instance", "role"},
+	)
+
+	storageMemoryTrackedGauge = prometheus.NewGaugeVec(
+		prometheus.GaugeOpts{
+			Name: "memgraph_storage_memory_tracked_bytes",
+			Help: "Actively tracked memory allocation in bytes",
+		},
+		[]string{"cluster", "namespace", "instance", "role"},
+	)
+
+	storageAllocationLimitGauge = prometheus.NewGaugeVec(
+		prometheus.GaugeOpts{
+			Name: "memgraph_storage_allocation_limit_bytes",
+			Help: "Maximum memory allocation limit in bytes",
+		},
+		[]string{"cluster", "namespace", "instance", "role"},
+	)
+
+	storageUnreleasedDeltaObjectsGauge = prometheus.NewGaugeVec(
+		prometheus.GaugeOpts{
+			Name: "memgraph_storage_unreleased_delta_objects",
+			Help: "Count of delta objects awaiting cleanup",
+		},
+		[]string{"cluster", "namespace", "instance", "role"},
+	)
 )
 
 func init() {
@@ -146,6 +220,16 @@ func init() {
 		failoverEventsTotal,
 		validationLastRunTimestamp,
 		validationPassedGauge,
+		// Storage metrics
+		storageVertexCountGauge,
+		storageEdgeCountGauge,
+		storageAverageDegreeGauge,
+		storageMemoryResGauge,
+		storagePeakMemoryResGauge,
+		storageDiskUsageGauge,
+		storageMemoryTrackedGauge,
+		storageAllocationLimitGauge,
+		storageUnreleasedDeltaObjectsGauge,
 	)
 }
 
@@ -235,6 +319,36 @@ func (m *MetricsRecorder) RecordValidation(cluster, namespace string, timestamp
 	validationPassedGauge.WithLabelValues(cluster, namespace).Set(passedValue)
 }
 
+// RecordStorageInfo records storage metrics from SHOW STORAGE INFO
+func (m *MetricsRecorder) RecordStorageInfo(cluster, namespace, instance, role string, info *memgraph.StorageInfo) {
+	if info == nil {
+		return
+	}
+	storageVertexCountGauge.WithLabelValues(cluster, namespace, instance, role).Set(float64(info.VertexCount))
+	storageEdgeCountGauge.WithLabelValues(cluster, namespace, instance, role).Set(float64(info.EdgeCount))
+	storageAverageDegreeGauge.WithLabelValues(cluster, namespace, instance, role).Set(info.AverageDegree)
+	storageMemoryResGauge.WithLabelValues(cluster, namespace, instance, role).Set(float64(info.MemoryRes))
+	storagePeakMemoryResGauge.WithLabelValues(cluster, namespace, instance, role).Set(float64(info.PeakMemoryRes))
+	storageDiskUsageGauge.WithLabelValues(cluster, namespace, instance, role).Set(float64(info.DiskUsage))
+	storageMemoryTrackedGauge.WithLabelValues(cluster, namespace, instance, role).Set(float64(info.MemoryTracked))
+	storageAllocationLimitGauge.WithLabelValues(cluster, namespace, instance, role).Set(float64(info.AllocationLimit))
+	storageUnreleasedDeltaObjectsGauge.WithLabelValues(cluster, namespace, instance, role).
+		Set(float64(info.UnreleasedDeltaObjects))
+}
+
+// DeleteInstanceStorageMetrics removes storage metrics for a specific instance
+func (m *MetricsRecorder) DeleteInstanceStorageMetrics(cluster, namespace, instance, role string) {
+	storageVertexCountGauge.DeleteLabelValues(cluster, namespace, instance, role)
+	storageEdgeCountGauge.DeleteLabelValues(cluster, namespace, instance, role)
+	storageAverageDegreeGauge.DeleteLabelValues(cluster, namespace, instance, role)
+	storageMemoryResGauge.DeleteLabelValues(cluster, namespace, instance, role)
+	storagePeakMemoryResGauge.DeleteLabelValues(cluster, namespace, instance, role)
+	storageDiskUsageGauge.DeleteLabelValues(cluster, namespace, instance, role)
+	storageMemoryTrackedGauge.DeleteLabelValues(cluster, namespace, instance, role)
+	storageAllocationLimitGauge.DeleteLabelValues(cluster, namespace, instance, role)
+	storageUnreleasedDeltaObjectsGauge.DeleteLabelValues(cluster, namespace, instance, role)
+}
+
 // DeleteClusterMetrics removes metrics for a deleted cluster
 func (m *MetricsRecorder) DeleteClusterMetrics(cluster, namespace string) {
 	clusterPhaseGauge.DeleteLabelValues(cluster, namespace)