Skip to content

Commit 5c8a6ec

Browse files
jsilvelaarmruleonardoce
committed
fix(roles): preserve password when the referenced Secret is missing (cloudnative-pg#10053)
Previously, when a role's passwordSecret could not be fetched, the reconciler would clear the password on the PostgreSQL role. The role is now left untouched until the Secret becomes available. Role reconciliation no longer stops at the first failing action: every action is attempted and the resulting errors are aggregated, giving better visibility into partial failures. Closes cloudnative-pg#9677 Signed-off-by: Jaime Silvela <jaime.silvela@mailfence.com> Signed-off-by: Armando Ruocco <armando.ruocco@enterprisedb.com> Signed-off-by: Leonardo Cecchi <leonardo.cecchi@enterprisedb.com> Co-authored-by: Armando Ruocco <armando.ruocco@enterprisedb.com> Co-authored-by: Leonardo Cecchi <leonardo.cecchi@enterprisedb.com> (cherry picked from commit 1854d20)
1 parent 8c85072 commit 5c8a6ec

8 files changed

Lines changed: 318 additions & 94 deletions

File tree

api/v1/cluster_types.go

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -681,8 +681,10 @@ type ManagedRoles struct {
681681
// +optional
682682
ByStatus map[RoleStatus][]string `json:"byStatus,omitempty"`
683683

684-
// CannotReconcile lists roles that cannot be reconciled in PostgreSQL,
685-
// with an explanation of the cause
684+
// CannotReconcile lists roles that cannot be reconciled, with an
685+
// explanation of the cause. Failures may originate in PostgreSQL
686+
// (e.g. dropping a role that owns objects) or in Kubernetes (e.g.
687+
// the referenced password Secret cannot be fetched).
686688
// +optional
687689
CannotReconcile map[string][]string `json:"cannotReconcile,omitempty"`
688690

@@ -2260,8 +2262,10 @@ type RoleConfiguration struct {
22602262
// +optional
22612263
Ensure EnsureOption `json:"ensure,omitempty"`
22622264

2263-
// Secret containing the password of the role (if present)
2264-
// If null, the password will be ignored unless DisablePassword is set
2265+
// Secret containing the password of the role (if present).
2266+
// If null, the password will be ignored unless DisablePassword is set.
2267+
// When set, the secret must follow the `kubernetes.io/basic-auth` format
2268+
// and contain both a `username` and a `password` field.
22652269
// +optional
22662270
PasswordSecret *LocalObjectReference `json:"passwordSecret,omitempty"`
22672271

config/crd/bases/postgresql.cnpg.io_clusters.yaml

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3284,8 +3284,10 @@ spec:
32843284
type: string
32853285
passwordSecret:
32863286
description: |-
3287-
Secret containing the password of the role (if present)
3288-
If null, the password will be ignored unless DisablePassword is set
3287+
Secret containing the password of the role (if present).
3288+
If null, the password will be ignored unless DisablePassword is set.
3289+
When set, the secret must follow the `kubernetes.io/basic-auth` format
3290+
and contain both a `username` and a `password` field.
32893291
properties:
32903292
name:
32913293
description: Name of the referent.
@@ -6342,8 +6344,10 @@ spec:
63426344
type: string
63436345
type: array
63446346
description: |-
6345-
CannotReconcile lists roles that cannot be reconciled in PostgreSQL,
6346-
with an explanation of the cause
6347+
CannotReconcile lists roles that cannot be reconciled, with an
6348+
explanation of the cause. Failures may originate in PostgreSQL
6349+
(e.g. dropping a role that owns objects) or in Kubernetes (e.g.
6350+
the referenced password Secret cannot be fetched).
63476351
type: object
63486352
passwordStatus:
63496353
additionalProperties:

docs/src/cloudnative-pg.v1.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1208,7 +1208,7 @@ _Appears in:_
12081208
| Field | Description | Required | Default | Validation |
12091209
| --- | --- | --- | --- | --- |
12101210
| `byStatus` _object (keys:[RoleStatus](#rolestatus), values:string array)_ | ByStatus gives the list of roles in each state | | | |
1211-
| `cannotReconcile` _object (keys:string, values:string array)_ | CannotReconcile lists roles that cannot be reconciled in PostgreSQL,<br />with an explanation of the cause | | | |
1211+
| `cannotReconcile` _object (keys:string, values:string array)_ | CannotReconcile lists roles that cannot be reconciled, with an<br />explanation of the cause. Failures may originate in PostgreSQL<br />(e.g. dropping a role that owns objects) or in Kubernetes (e.g.<br />the referenced password Secret cannot be fetched). | | | |
12121212
| `passwordStatus` _object (keys:string, values:[PasswordState](#passwordstate))_ | PasswordStatus gives the last transaction id and password secret version for each managed role | | | |
12131213

12141214

@@ -2042,7 +2042,7 @@ _Appears in:_
20422042
| `name` _string_ | Name of the role | True | | |
20432043
| `comment` _string_ | Description of the role | | | |
20442044
| `ensure` _[EnsureOption](#ensureoption)_ | Ensure the role is `present` or `absent` - defaults to "present" | | present | Enum: [present absent] <br /> |
2045-
| `passwordSecret` _[LocalObjectReference](https://pkg.go.dev/github.com/cloudnative-pg/machinery/pkg/api#LocalObjectReference)_ | Secret containing the password of the role (if present)<br />If null, the password will be ignored unless DisablePassword is set | | | |
2045+
| `passwordSecret` _[LocalObjectReference](https://pkg.go.dev/github.com/cloudnative-pg/machinery/pkg/api#LocalObjectReference)_ | Secret containing the password of the role (if present).<br />If null, the password will be ignored unless DisablePassword is set.<br />When set, the secret must follow the `kubernetes.io/basic-auth` format<br />and contain both a `username` and a `password` field. | | | |
20462046
| `connectionLimit` _integer_ | If the role can log in, this specifies how many concurrent<br />connections the role can make. `-1` (the default) means no limit. | | -1 | |
20472047
| `validUntil` _[Time](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.34/#time-v1-meta)_ | Date and time after which the role's password is no longer valid.<br />When omitted, the password will never expire (default). | | | |
20482048
| `inRoles` _string array_ | List of one or more existing roles to which this role will be<br />immediately added as a new member. Default empty. | | | |

internal/management/controller/roles/reconciler.go

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -52,11 +52,8 @@ func Reconcile(
5252
}
5353

5454
// get current passwords from spec/secrets
55-
latestPasswordResourceVersion, err := getPasswordSecretResourceVersion(
55+
latestPasswordResourceVersion := getPasswordSecretResourceVersion(
5656
ctx, c, cluster.Spec.Managed.Roles, cluster.Namespace)
57-
if err != nil {
58-
return reconcile.Result{}, err
59-
}
6057

6158
contextLogger.Debug("getting the managed roles status")
6259
rolesInDB, err := List(ctx, db)
@@ -79,7 +76,8 @@ func Reconcile(
7976

8077
if len(rolesByStatus[apiv1.RoleStatusPendingReconciliation]) != 0 {
8178
instance.TriggerRoleSynchronizer(cluster.Spec.Managed)
82-
contextLogger.Info("Triggered a managed role reconciliation")
79+
contextLogger.Info("Triggered a managed role reconciliation",
80+
"pending-roles", roleNamesByStatus[apiv1.RoleStatusPendingReconciliation])
8381
}
8482

8583
updatedCluster := cluster.DeepCopy()

internal/management/controller/roles/roles.go

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,22 @@ func (r rolesByAction) convertToRolesByStatus() rolesByStatus {
114114
return rolesByStatus
115115
}
116116

117+
// passwordSecretIsRequiredButMissing reports whether the spec asks for a
118+
// password secret that getPasswordSecretResourceVersion was unable to fetch.
119+
// Such roles must be routed through the update path so the synchronizer can
120+
// surface the failure in CannotReconcile rather than silently classifying
121+
// them as reconciled.
122+
func passwordSecretIsRequiredButMissing(
123+
inSpec apiv1.RoleConfiguration,
124+
latestSecretResourceVersion map[string]string,
125+
) bool {
126+
if inSpec.PasswordSecret == nil || inSpec.DisablePassword {
127+
return false
128+
}
129+
_, ok := latestSecretResourceVersion[inSpec.Name]
130+
return !ok
131+
}
132+
117133
// evaluateNextRoleActions evaluates the action needed for each role in the DB and/or the Spec.
118134
// It has no side effects
119135
func evaluateNextRoleActions(
@@ -148,7 +164,8 @@ func evaluateNextRoleActions(
148164
roleAdapterFromName(role.Name))
149165
case isInSpec &&
150166
(!role.isEquivalentTo(inSpec) ||
151-
role.passwordNeedsUpdating(lastPasswordState, latestSecretResourceVersion)):
167+
role.passwordNeedsUpdating(lastPasswordState, latestSecretResourceVersion) ||
168+
passwordSecretIsRequiredButMissing(inSpec, latestSecretResourceVersion)):
152169
internalRole := roleConfigurationAdapter{
153170
RoleConfiguration: inSpec,
154171
validUntilNullIsInfinity: role.ValidUntil.Valid,

internal/management/controller/roles/runnable.go

Lines changed: 43 additions & 67 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,6 @@ import (
2828

2929
"github.com/cloudnative-pg/machinery/pkg/log"
3030
corev1 "k8s.io/api/core/v1"
31-
apierrs "k8s.io/apimachinery/pkg/api/errors"
3231
"k8s.io/apimachinery/pkg/types"
3332
"sigs.k8s.io/controller-runtime/pkg/client"
3433

@@ -132,13 +131,6 @@ func (sr *RoleSynchronizer) Start(ctx context.Context) error {
132131
// with the spec. It also updates the cluster Status with the latest applied changes
133132
func (sr *RoleSynchronizer) reconcile(ctx context.Context, config *apiv1.ManagedConfiguration) error {
134133
var err error
135-
136-
defer func() {
137-
if r := recover(); r != nil {
138-
err = fmt.Errorf("recovered from a panic: %s", r)
139-
}
140-
}()
141-
142134
contextLog := log.FromContext(ctx).WithName("roles_reconciler")
143135
contextLog.Debug("reconciling managed roles")
144136

@@ -163,7 +155,7 @@ func (sr *RoleSynchronizer) reconcile(ctx context.Context, config *apiv1.Managed
163155
if err != nil {
164156
return fmt.Errorf("while getting superuser connection: %w", err)
165157
}
166-
appliedState, irreconcilableRoles, err := sr.synchronizeRoles(ctx, superUserDB, config, rolePasswords)
158+
appliedState, unreconciledRoles, err := sr.synchronizeRoles(ctx, superUserDB, config, rolePasswords)
167159
if err != nil {
168160
return fmt.Errorf("while syncrhonizing managed roles: %w", err)
169161
}
@@ -176,7 +168,7 @@ func (sr *RoleSynchronizer) reconcile(ctx context.Context, config *apiv1.Managed
176168
}
177169
updatedCluster := remoteCluster.DeepCopy()
178170
updatedCluster.Status.ManagedRolesStatus.PasswordStatus = appliedState
179-
updatedCluster.Status.ManagedRolesStatus.CannotReconcile = irreconcilableRoles
171+
updatedCluster.Status.ManagedRolesStatus.CannotReconcile = unreconciledRoles
180172
return sr.client.Status().Patch(ctx, updatedCluster, client.MergeFrom(&remoteCluster))
181173
}
182174

@@ -191,70 +183,61 @@ func getRoleNames(roles []roleConfigurationAdapter) []string {
191183
// synchronizeRoles aligns roles in the database to the spec
192184
// It returns
193185
// - the PasswordState for any updated roles
194-
// - any roles that had expectable postgres errors
195-
// - any unexpected error
186+
// - the per-role errors encountered while applying role actions (e.g.
187+
// unrealizable postgres operations, missing password secret)
188+
// - any error that prevents the whole synchronization from running
189+
// (e.g. listing the roles in the database failed)
196190
func (sr *RoleSynchronizer) synchronizeRoles(
197191
ctx context.Context,
198192
db *sql.DB,
199193
config *apiv1.ManagedConfiguration,
200194
storedPasswordState map[string]apiv1.PasswordState,
201195
) (map[string]apiv1.PasswordState, map[string][]string, error) {
202-
latestSecretResourceVersion, err := getPasswordSecretResourceVersion(
196+
latestSecretResourceVersion := getPasswordSecretResourceVersion(
203197
ctx, sr.client, config.Roles, sr.instance.GetNamespaceName())
204-
if err != nil {
205-
return nil, nil, err
206-
}
207198
rolesInDB, err := List(ctx, db)
208199
if err != nil {
209200
return nil, nil, err
210201
}
211202
rolesByAction := evaluateNextRoleActions(
212203
ctx, config, rolesInDB, storedPasswordState, latestSecretResourceVersion)
213204

214-
passwordStates, irreconcilableRoles, err := sr.applyRoleActions(ctx, db, rolesByAction)
215-
if err != nil {
216-
return nil, nil, err
217-
}
205+
passwordStates, unreconciledRoles := sr.applyRoleActions(ctx, db, rolesByAction)
218206

219207
// Merge the status from database into spec. We should keep all the status
220208
// otherwise in the next loop the user without status will be marked as need update
221209
for role, stateInDatabase := range passwordStates {
222210
storedPasswordState[role] = stateInDatabase
223211
}
224-
return storedPasswordState, irreconcilableRoles, nil
212+
return storedPasswordState, unreconciledRoles, nil
225213
}
226214

227-
// applyRoleActions applies the actions to reconcile roles in the DB with the Spec
228-
// It returns the apiv1.PasswordState for each role, as well as a map of roles that
229-
// cannot be reconciled for expectable errors, e.g. dropping a role owning content
230-
//
231-
// NOTE: applyRoleActions will carry on after an expectable error, i.e. an error
232-
// due to an invalid request for postgres. This is so that other actions will not
233-
// be blocked by a user error.
234-
// It will, however, error out on unexpected errors.
215+
// applyRoleActions applies the actions to reconcile roles in the DB with the Spec.
216+
// It returns the apiv1.PasswordState for each successfully applied role, and a
217+
// map collecting the errors encountered per role. Both expectable errors
218+
// (e.g. dropping a role that owns content) and unexpected errors are recorded
219+
// in the map, so that an error on one role does not block the reconciliation
220+
// of the others.
235221
func (sr *RoleSynchronizer) applyRoleActions(
236222
ctx context.Context,
237223
db *sql.DB,
238224
rolesByAction rolesByAction,
239-
) (map[string]apiv1.PasswordState, map[string][]string, error) {
225+
) (map[string]apiv1.PasswordState, map[string][]string) {
240226
contextLog := log.FromContext(ctx).WithName("roles_reconciler")
241227
contextLog.Debug("applying role actions")
242228

243-
irreconcilableRoles := make(map[string][]string)
229+
unreconciledRoles := make(map[string][]string)
244230
appliedChanges := make(map[string]apiv1.PasswordState)
245-
handleRoleError := func(errToEvaluate error, roleName string, action roleAction) error {
246-
// log unexpected errors, collect expectable PostgreSQL errors
231+
handleRoleError := func(errToEvaluate error, roleName string, action roleAction) {
247232
if errToEvaluate == nil {
248-
return nil
233+
return
249234
}
250235
roleError, err := parseRoleError(errToEvaluate, roleName, action)
251-
if err != nil {
252-
contextLog.Error(err, "while performing "+string(action), "role", roleName)
253-
return err
236+
if err == nil {
237+
unreconciledRoles[roleName] = append(unreconciledRoles[roleName], roleError.Error())
238+
} else {
239+
unreconciledRoles[roleName] = append(unreconciledRoles[roleName], errToEvaluate.Error())
254240
}
255-
256-
irreconcilableRoles[roleName] = append(irreconcilableRoles[roleName], roleError.Error())
257-
return nil
258241
}
259242

260243
actionsCreateUpdate := []roleAction{roleCreate, roleUpdate}
@@ -264,42 +247,32 @@ func (sr *RoleSynchronizer) applyRoleActions(
264247
if err == nil {
265248
appliedChanges[role.Name] = appliedState
266249
}
267-
if unhandledErr := handleRoleError(err, role.Name, action); unhandledErr != nil {
268-
return nil, nil, unhandledErr
269-
}
250+
handleRoleError(err, role.Name, action)
270251
}
271252
}
272253

273254
for _, role := range rolesByAction[roleSetComment] {
274255
// NOTE: adding/updating a comment on a role does not alter its TransactionID
275256
err := UpdateComment(ctx, db, role.toDatabaseRole())
276-
if unhandledErr := handleRoleError(err, role.Name, roleSetComment); unhandledErr != nil {
277-
return nil, nil, unhandledErr
278-
}
257+
handleRoleError(err, role.Name, roleSetComment)
279258
}
280259

281260
for _, role := range rolesByAction[roleUpdateMemberships] {
282261
// NOTE: revoking / granting to a role does not alter its TransactionID
283262
dbRole := role.toDatabaseRole()
284263
grants, revokes, err := getRoleMembershipDiff(ctx, db, role, dbRole)
285-
if unhandledErr := handleRoleError(err, role.Name, roleUpdateMemberships); unhandledErr != nil {
286-
return nil, nil, unhandledErr
287-
}
264+
handleRoleError(err, role.Name, roleUpdateMemberships)
288265

289266
err = UpdateMembership(ctx, db, dbRole, grants, revokes)
290-
if unhandledErr := handleRoleError(err, role.Name, roleUpdateMemberships); unhandledErr != nil {
291-
return nil, nil, unhandledErr
292-
}
267+
handleRoleError(err, role.Name, roleUpdateMemberships)
293268
}
294269

295270
for _, role := range rolesByAction[roleDelete] {
296271
err := Delete(ctx, db, role.toDatabaseRole())
297-
if unhandledErr := handleRoleError(err, role.Name, roleDelete); unhandledErr != nil {
298-
return nil, nil, unhandledErr
299-
}
272+
handleRoleError(err, role.Name, roleDelete)
300273
}
301274

302-
return appliedChanges, irreconcilableRoles, nil
275+
return appliedChanges, unreconciledRoles
303276
}
304277

305278
func getRoleMembershipDiff(
@@ -397,10 +370,8 @@ func getPassword(
397370
client.ObjectKey{Namespace: namespace, Name: secretName},
398371
&secret)
399372
if err != nil {
400-
if apierrs.IsNotFound(err) {
401-
return passwordSecret{}, nil
402-
}
403-
return passwordSecret{}, err
373+
return passwordSecret{},
374+
fmt.Errorf("failed to get password secret %s: %w", secretName, err)
404375
}
405376
usernameFromSecret, passwordFromSecret, err := utils.GetUserPasswordFromSecret(&secret)
406377
if err != nil {
@@ -418,26 +389,31 @@ func getPassword(
418389
nil
419390
}
420391

421-
// getPasswordSecretResourceVersion returns a list of resource version of the passwords secrets for managed roles
422-
// stored as Kubernetes secrets
392+
// getPasswordSecretResourceVersion returns a list of resource version of the password secrets for managed roles
393+
// stored as Kubernetes secrets. Roles whose secret cannot be fetched are
394+
// omitted from the result; the action evaluator will route them through the
395+
// update path so the failure surfaces in CannotReconcile.
423396
func getPasswordSecretResourceVersion(
424397
ctx context.Context,
425-
client client.Client,
398+
cl client.Client,
426399
rolesInSpec []apiv1.RoleConfiguration,
427400
namespace string,
428-
) (map[string]string, error) {
401+
) map[string]string {
402+
contextLog := log.FromContext(ctx).WithName("roles_reconciler")
429403
re := make(map[string]string)
430404
for _, role := range rolesInSpec {
431405
if role.PasswordSecret == nil || role.DisablePassword {
432406
continue
433407
}
434-
passwordSecret, err := getPassword(ctx, client, roleConfigurationAdapter{RoleConfiguration: role}, namespace)
408+
passwordSecret, err := getPassword(ctx, cl, roleConfigurationAdapter{RoleConfiguration: role}, namespace)
435409
if err != nil {
436-
return nil, err
410+
contextLog.Debug("could not fetch password secret for role; will be flagged for reconciliation",
411+
"role", role.Name, "secret", role.PasswordSecret.Name, "err", err.Error())
412+
continue
437413
}
438414
re[role.Name] = passwordSecret.version
439415
}
440-
return re, nil
416+
return re
441417
}
442418

443419
func getRolesToGrant(inRoleInDB, inRoleInSpec []string) []string {

0 commit comments

Comments
 (0)