@@ -79,19 +79,29 @@ func (r *Reconciler) Reconcile(
7979 log := logging .FromContext (ctx )
8080 defer span .End ()
8181
82+ log .Info ("Starting reconciliation" , "cluster" , request .Name )
83+
8284 // get the postgrescluster from the cache
8385 cluster := & v1beta1.PostgresCluster {}
8486 if err := r .Client .Get (ctx , request .NamespacedName , cluster ); err != nil {
8587 // NotFound cannot be fixed by requeuing so ignore it. During background
8688 // deletion, we receive delete events from cluster's dependents after
8789 // cluster is deleted.
8890 if err = client .IgnoreNotFound (err ); err != nil {
89- log .Error (err , "unable to fetch PostgresCluster" )
91+ log .Error (err , "unable to fetch PostgresCluster" , "cluster" , request . Name )
9092 span .RecordError (err )
93+ } else {
94+ log .Info ("PostgresCluster not found, likely deleted" , "cluster" , request .Name )
9195 }
9296 return runtime .ErrorWithBackoff (err )
9397 }
9498
99+ log .Info ("Retrieved PostgresCluster" ,
100+ "cluster" , cluster .Name ,
101+ "generation" , cluster .GetGeneration (),
102+ "observedGeneration" , cluster .Status .ObservedGeneration ,
103+ "resourceVersion" , cluster .GetResourceVersion ())
104+
95105 // Set any defaults that may not have been stored in the API. No DeepCopy
96106 // is necessary because controller-runtime makes a copy before returning
97107 // from its cache.
@@ -114,12 +124,12 @@ func (r *Reconciler) Reconcile(
114124 // deleted or there was an error.
115125 if result , err := r .handleDelete (ctx , cluster ); err != nil {
116126 span .RecordError (err )
117- log .Error (err , "deleting" )
127+ log .Error (err , "deleting" , "cluster" , cluster . Name )
118128 return runtime .ErrorWithBackoff (err )
119129
120130 } else if result != nil {
121131 if log := log .V (1 ); log .Enabled () {
122- log .Info ("deleting" , "result" , fmt .Sprintf ("%+v" , * result ))
132+ log .Info ("deleting" , "cluster" , cluster . Name , " result" , fmt .Sprintf ("%+v" , * result ))
123133 }
124134 return * result , nil
125135 }
@@ -180,10 +190,10 @@ func (r *Reconciler) Reconcile(
180190 // managed fields on the status subresource: https://issue.k8s.io/88901
181191 if err := r .Client .Status ().Patch (
182192 ctx , cluster , client .MergeFrom (before ), r .Owner ); err != nil {
183- log .Error (err , "patching cluster status" )
193+ log .Error (err , "patching cluster status" , "cluster" , cluster . Name )
184194 return err
185195 }
186- log .V (1 ).Info ("patched cluster status" )
196+ log .V (1 ).Info ("patched cluster status" , "cluster" , cluster . Name )
187197 }
188198 return nil
189199 }
@@ -210,7 +220,16 @@ func (r *Reconciler) Reconcile(
210220 }
211221
212222 if err == nil {
223+ log .V (1 ).Info ("Starting backups reconciliation check" , "cluster" , cluster .Name )
213224 backupsSpecFound , backupsReconciliationAllowed , err = r .BackupsEnabled (ctx , cluster )
225+ if err != nil {
226+ log .Error (err , "Failed to check backups status" , "cluster" , cluster .Name )
227+ } else {
228+ log .V (1 ).Info ("Backups status" ,
229+ "cluster" , cluster .Name ,
230+ "backupsSpecFound" , backupsSpecFound ,
231+ "backupsReconciliationAllowed" , backupsReconciliationAllowed )
232+ }
214233
215234 // If we cannot reconcile because the backup reconciliation is paused, set a condition and exit
216235 if ! backupsReconciliationAllowed {
@@ -264,7 +283,11 @@ func (r *Reconciler) Reconcile(
264283 postgres .SetHugePages (cluster , & pgParameters )
265284
266285 if err == nil {
286+ log .V (1 ).Info ("Starting root certificate reconciliation" , "cluster" , cluster .Name )
267287 rootCA , err = r .reconcileRootCertificate (ctx , cluster )
288+ if err != nil {
289+ log .Error (err , "Failed to reconcile root certificate" , "cluster" , cluster .Name )
290+ }
268291 }
269292
270293 if err == nil {
@@ -274,45 +297,92 @@ func (r *Reconciler) Reconcile(
274297 // return a bool indicating that the controller should return early while any
275298 // required Jobs are running, after which it will indicate that an early
276299 // return is no longer needed, and reconciliation can proceed normally.
300+ log .V (1 ).Info ("Starting directory move jobs reconciliation" , "cluster" , cluster .Name )
301+
277302 returnEarly , err := r .reconcileDirMoveJobs (ctx , cluster )
303+ if err != nil {
304+ log .Error (err , "Failed to reconcile directory move jobs" , "cluster" , cluster .Name )
305+ } else if returnEarly {
306+ log .Info ("Directory move jobs in progress, returning early" , "cluster" , cluster .Name )
307+ }
278308 if err != nil || returnEarly {
279309 return runtime .ErrorWithBackoff (errors .Join (err , patchClusterStatus ()))
280310 }
281311 }
282312 if err == nil {
313+ log .V (1 ).Info ("Starting PVC observation" , "cluster" , cluster .Name )
283314 clusterVolumes , err = r .observePersistentVolumeClaims (ctx , cluster )
315+ if err != nil {
316+ log .Error (err , "Failed to observe PVCs" , "cluster" , cluster .Name )
317+ }
284318 }
285319 if err == nil {
320+ log .V (1 ).Info ("Starting PVC configuration" , "cluster" , cluster .Name )
286321 clusterVolumes , err = r .configureExistingPVCs (ctx , cluster , clusterVolumes )
322+ if err != nil {
323+ log .Error (err , "Failed to configure PVCs" , "cluster" , cluster .Name )
324+ }
287325 }
288326 if err == nil {
327+ log .V (1 ).Info ("Starting instance observation" , "cluster" , cluster .Name )
289328 instances , err = r .observeInstances (ctx , cluster )
329+ if err != nil {
330+ log .Error (err , "Failed to observe instances" , "cluster" , cluster .Name )
331+ } else {
332+ // Count writable instances
333+ writableCount := 0
334+ for _ , instance := range instances .forCluster {
335+ if writable , known := instance .IsWritable (); writable && known {
336+ writableCount ++
337+ }
338+ }
339+ log .V (1 ).Info ("Instance observation complete" ,
340+ "cluster" , cluster .Name ,
341+ "instanceCount" , len (instances .forCluster ),
342+ "writableInstanceCount" , writableCount )
343+ }
290344 }
291345
292346 result := reconcile.Result {}
293347
294348 if err == nil {
349+ log .V (1 ).Info ("Starting Patroni status reconciliation" , "cluster" , cluster .Name )
295350 var requeue time.Duration
296351 if requeue , err = r .reconcilePatroniStatus (ctx , cluster , instances ); err == nil && requeue > 0 {
297352 result .RequeueAfter = requeue
353+ log .V (1 ).Info ("Patroni status reconciliation requires requeue" ,
354+ "cluster" , cluster .Name ,
355+ "requeueAfter" , requeue )
298356 }
299357 }
300358 if err == nil {
359+ log .V (1 ).Info ("Starting Patroni switchover reconciliation" , "cluster" , cluster .Name )
301360 err = r .reconcilePatroniSwitchover (ctx , cluster , instances )
361+ if err != nil {
362+ log .Error (err , "Failed to reconcile Patroni switchover" , "cluster" , cluster .Name )
363+ }
302364 }
303365 // reconcile the Pod service before reconciling any data source in case it is necessary
304366 // to start Pods during data source reconciliation that require network connections (e.g.
305367 // if it is necessary to start a dedicated repo host to bootstrap a new cluster using its
306368 // own existing backups).
307369 if err == nil {
370+ log .V (1 ).Info ("Starting cluster pod service reconciliation" , "cluster" , cluster .Name )
308371 clusterPodService , err = r .reconcileClusterPodService (ctx , cluster )
372+ if err != nil {
373+ log .Error (err , "Failed to reconcile cluster pod service" , "cluster" , cluster .Name )
374+ }
309375 }
310376 // reconcile the RBAC resources before reconciling any data source in case
311377 // restore/move Job pods require the ServiceAccount to access any data source.
312378 // e.g., we are restoring from an S3 source using an IAM for access
313379 // - https://docs.aws.amazon.com/eks/latest/userguide/iam-roles-for-service-accounts-technical-overview.html
314380 if err == nil {
381+ log .V (1 ).Info ("Starting RBAC resources reconciliation" , "cluster" , cluster .Name )
315382 instanceServiceAccount , err = r .reconcileRBACResources (ctx , cluster )
383+ if err != nil {
384+ log .Error (err , "Failed to reconcile RBAC resources" , "cluster" , cluster .Name )
385+ }
316386 }
317387 // First handle reconciling any data source configured for the PostgresCluster. This includes
318388 // reconciling the data source defined to bootstrap a new cluster, as well as a reconciling
@@ -330,93 +400,188 @@ func (r *Reconciler) Reconcile(
330400 }
331401 }
332402 if err == nil {
403+ log .V (1 ).Info ("Starting cluster config map reconciliation" , "cluster" , cluster .Name )
333404 clusterConfigMap , err = r .reconcileClusterConfigMap (ctx , cluster , pgHBAs , pgParameters )
405+ if err != nil {
406+ log .Error (err , "Failed to reconcile cluster config map" , "cluster" , cluster .Name )
407+ }
334408 }
335409 if err == nil {
410+ log .V (1 ).Info ("Starting replication secret reconciliation" , "cluster" , cluster .Name )
336411 clusterReplicationSecret , err = r .reconcileReplicationSecret (ctx , cluster , rootCA )
412+ if err != nil {
413+ log .Error (err , "Failed to reconcile replication secret" , "cluster" , cluster .Name )
414+ }
337415 }
338416 if err == nil {
417+ log .V (1 ).Info ("Starting Patroni leader lease reconciliation" , "cluster" , cluster .Name )
339418 patroniLeaderService , err = r .reconcilePatroniLeaderLease (ctx , cluster )
419+ if err != nil {
420+ log .Error (err , "Failed to reconcile Patroni leader lease" , "cluster" , cluster .Name )
421+ }
340422 }
341423 if err == nil {
424+ log .V (1 ).Info ("Starting primary service reconciliation" , "cluster" , cluster .Name )
342425 primaryService , err = r .reconcileClusterPrimaryService (ctx , cluster , patroniLeaderService )
426+ if err != nil {
427+ log .Error (err , "Failed to reconcile primary service" , "cluster" , cluster .Name )
428+ }
343429 }
344430 if err == nil {
431+ log .V (1 ).Info ("Starting replica service reconciliation" , "cluster" , cluster .Name )
345432 replicaService , err = r .reconcileClusterReplicaService (ctx , cluster )
433+ if err != nil {
434+ log .Error (err , "Failed to reconcile replica service" , "cluster" , cluster .Name )
435+ }
346436 }
347437 if err == nil {
438+ log .V (1 ).Info ("Starting cluster certificate reconciliation" , "cluster" , cluster .Name )
348439 primaryCertificate , err = r .reconcileClusterCertificate (ctx , rootCA , cluster , primaryService , replicaService )
440+ if err != nil {
441+ log .Error (err , "Failed to reconcile cluster certificate" , "cluster" , cluster .Name )
442+ }
349443 }
350444 if err == nil {
445+ log .V (1 ).Info ("Starting Patroni distributed configuration reconciliation" , "cluster" , cluster .Name )
351446 err = r .reconcilePatroniDistributedConfiguration (ctx , cluster )
447+ if err != nil {
448+ log .Error (err , "Failed to reconcile Patroni distributed configuration" , "cluster" , cluster .Name )
449+ }
352450 }
353451 if err == nil {
452+ log .V (1 ).Info ("Starting Patroni dynamic configuration reconciliation" , "cluster" , cluster .Name )
354453 err = r .reconcilePatroniDynamicConfiguration (ctx , cluster , instances , pgHBAs , pgParameters )
454+ if err != nil {
455+ log .Error (err , "Failed to reconcile Patroni dynamic configuration" , "cluster" , cluster .Name )
456+ }
355457 }
356458 if err == nil {
459+ log .V (1 ).Info ("Starting monitoring secret reconciliation" , "cluster" , cluster .Name )
357460 monitoringSecret , err = r .reconcileMonitoringSecret (ctx , cluster )
461+ if err != nil {
462+ log .Error (err , "Failed to reconcile monitoring secret" , "cluster" , cluster .Name )
463+ }
358464 }
359465 if err == nil {
466+ log .V (1 ).Info ("Starting exporter queries config reconciliation" , "cluster" , cluster .Name )
360467 exporterQueriesConfig , err = r .reconcileExporterQueriesConfig (ctx , cluster )
468+ if err != nil {
469+ log .Error (err , "Failed to reconcile exporter queries config" , "cluster" , cluster .Name )
470+ }
361471 }
362472 if err == nil {
473+ log .V (1 ).Info ("Starting exporter web config reconciliation" , "cluster" , cluster .Name )
363474 exporterWebConfig , err = r .reconcileExporterWebConfig (ctx , cluster )
475+ if err != nil {
476+ log .Error (err , "Failed to reconcile exporter web config" , "cluster" , cluster .Name )
477+ }
364478 }
365479 if err == nil {
480+ log .V (1 ).Info ("Starting instance sets reconciliation" , "cluster" , cluster .Name )
366481 err = r .reconcileInstanceSets (
367482 ctx , cluster , clusterConfigMap , clusterReplicationSecret , rootCA ,
368483 clusterPodService , instanceServiceAccount , instances , patroniLeaderService ,
369484 primaryCertificate , clusterVolumes , exporterQueriesConfig , exporterWebConfig ,
370485 backupsSpecFound ,
371486 )
487+ if err != nil {
488+ log .Error (err , "Failed to reconcile instance sets" , "cluster" , cluster .Name )
489+ }
372490 }
373491
374492 if err == nil {
493+ log .V (1 ).Info ("Starting Postgres databases reconciliation" , "cluster" , cluster .Name )
375494 err = r .reconcilePostgresDatabases (ctx , cluster , instances )
495+ if err != nil {
496+ log .Error (err , "Failed to reconcile Postgres databases" , "cluster" , cluster .Name )
497+ }
376498 }
377499 if err == nil {
500+ log .V (1 ).Info ("Starting Postgres users reconciliation" , "cluster" , cluster .Name )
378501 err = r .reconcilePostgresUsers (ctx , cluster , instances )
502+ if err != nil {
503+ log .Error (err , "Failed to reconcile Postgres users" , "cluster" , cluster .Name )
504+ }
379505 }
380506
381507 if err == nil {
508+ log .V (1 ).Info ("Starting PGBackRest reconciliation" , "cluster" , cluster .Name )
382509 var next reconcile.Result
383510 if next , err = r .reconcilePGBackRest (ctx , cluster ,
384511 instances , rootCA , backupsSpecFound ); err == nil && ! next .IsZero () {
385512 result .Requeue = result .Requeue || next .Requeue
386513 if next .RequeueAfter > 0 {
514+ log .V (1 ).Info ("PGBackRest reconciliation requires requeue" ,
515+ "cluster" , cluster .Name ,
516+ "requeueAfter" , next .RequeueAfter )
387517 result .RequeueAfter = next .RequeueAfter
388518 }
389519 }
520+ if err != nil {
521+ log .Error (err , "Failed to reconcile PGBackRest" , "cluster" , cluster .Name )
522+ }
390523 }
391524 if err == nil {
525+ log .V (1 ).Info ("Starting dedicated snapshot volume reconciliation" , "cluster" , cluster .Name )
392526 dedicatedSnapshotPVC , err = r .reconcileDedicatedSnapshotVolume (ctx , cluster , clusterVolumes )
527+ if err != nil {
528+ log .Error (err , "Failed to reconcile dedicated snapshot volume" , "cluster" , cluster .Name )
529+ }
393530 }
394531 if err == nil {
532+ log .V (1 ).Info ("Starting volume snapshots reconciliation" , "cluster" , cluster .Name )
395533 err = r .reconcileVolumeSnapshots (ctx , cluster , dedicatedSnapshotPVC )
534+ if err != nil {
535+ log .Error (err , "Failed to reconcile volume snapshots" , "cluster" , cluster .Name )
536+ }
396537 }
397538 if err == nil {
539+ log .V (1 ).Info ("Starting PGBouncer reconciliation" , "cluster" , cluster .Name )
398540 err = r .reconcilePGBouncer (ctx , cluster , instances , primaryCertificate , rootCA )
541+ if err != nil {
542+ log .Error (err , "Failed to reconcile PGBouncer" , "cluster" , cluster .Name )
543+ }
399544 }
400545 if err == nil {
546+ log .V (1 ).Info ("Starting PGMonitor reconciliation" , "cluster" , cluster .Name )
401547 err = r .reconcilePGMonitor (ctx , cluster , instances , monitoringSecret )
548+ if err != nil {
549+ log .Error (err , "Failed to reconcile PGMonitor" , "cluster" , cluster .Name )
550+ }
402551 }
403552 if err == nil {
553+ log .V (1 ).Info ("Starting database init SQL reconciliation" , "cluster" , cluster .Name )
404554 err = r .reconcileDatabaseInitSQL (ctx , cluster , instances )
555+ if err != nil {
556+ log .Error (err , "Failed to reconcile database init SQL" , "cluster" , cluster .Name )
557+ }
405558 }
406559 if err == nil {
560+ log .V (1 ).Info ("Starting PGAdmin reconciliation" , "cluster" , cluster .Name )
407561 err = r .reconcilePGAdmin (ctx , cluster )
562+ if err != nil {
563+ log .Error (err , "Failed to reconcile PGAdmin" , "cluster" , cluster .Name )
564+ }
408565 }
409566 if err == nil {
410567 // This is after [Reconciler.rolloutInstances] to ensure that recreating
411568 // Pods takes precedence.
569+ log .V (1 ).Info ("Starting Patroni restarts reconciliation" , "cluster" , cluster .Name )
412570 err = r .handlePatroniRestarts (ctx , cluster , instances )
571+ if err != nil {
572+ log .Error (err , "Failed to reconcile Patroni restarts" , "cluster" , cluster .Name )
573+ }
413574 }
414575
415576 // at this point everything reconciled successfully, and we can update the
416577 // observedGeneration
417578 cluster .Status .ObservedGeneration = cluster .GetGeneration ()
418579
419- log .V (1 ).Info ("reconciled cluster" )
580+ log .Info ("Completed reconciliation" ,
581+ "cluster" , cluster .Name ,
582+ "generation" , cluster .GetGeneration (),
583+ "observedGeneration" , cluster .Status .ObservedGeneration ,
584+ "result" , fmt .Sprintf ("%+v" , result ))
420585
421586 return result , errors .Join (err , patchClusterStatus ())
422587}
0 commit comments