Skip to content

Commit 787bbac

Browse files
devantlerCopilot
andauthored
fix: ensure cluster-autoscaler-config secret during cluster update (#4607)
* fix: ensure cluster-autoscaler-config secret during cluster update When `ksail cluster update` installs the cluster-autoscaler Helm chart, the autoscaler pod requires the `cluster-autoscaler-config` Secret in kube-system. This secret was only created during `cluster create` by the Talos provisioner's bootstrapAndFinalize, but never during update. Add `ensureAutoscalerSecretIfNeeded` to the Talos provisioner's Update() method. It reuses the existing `ensureSnapshotImage` and `ensureAutoscalerSecret` methods to create or update the secret before the component reconciler installs the Helm chart. The method is a no-op when: - The provider is not Hetzner (hetznerOpts is nil) - Node autoscaling is disabled - Talos configs are not available Fixes #4606 Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> * fix: add nil-bundle guard and improve docstring for autoscaler secret - Add nil guard for p.talosConfigs.Bundle() to prevent panic - Move Bundle() call before ensureSnapshotImage() to avoid unnecessary snapshot lookups when the bundle is nil - Fix docstring to accurately describe error propagation behavior - Add test for non-nil Configs with nil bundle - Add WithTalosConfigsForTest seam for setting talosConfigs in tests Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> * chore: apply golangci-lint fixes --------- Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> Co-authored-by: devantler <26203420+devantler@users.noreply.github.com>
1 parent 63007e7 commit 787bbac

3 files changed

Lines changed: 129 additions & 0 deletions

File tree

pkg/svc/provisioner/cluster/talos/export_test.go

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ import (
66
"net/netip"
77

88
"github.com/devantler-tech/ksail/v7/pkg/apis/cluster/v1alpha1"
9+
talosconfigmanager "github.com/devantler-tech/ksail/v7/pkg/fsutil/configmanager/talos"
910
"github.com/devantler-tech/ksail/v7/pkg/k8s"
1011
omniprovider "github.com/devantler-tech/ksail/v7/pkg/svc/provider/omni"
1112
"github.com/devantler-tech/ksail/v7/pkg/svc/provisioner/cluster/clusterupdate"
@@ -297,3 +298,20 @@ func (p *Provisioner) IsDockerProviderForTest() bool {
297298
func (p *Provisioner) ClusterReadinessChecksCountForTest() int {
298299
return len(p.clusterReadinessChecks())
299300
}
301+
302+
// EnsureAutoscalerSecretIfNeededForTest exposes ensureAutoscalerSecretIfNeeded for unit testing.
303+
func (p *Provisioner) EnsureAutoscalerSecretIfNeededForTest(
304+
ctx context.Context,
305+
clusterName string,
306+
) error {
307+
return p.ensureAutoscalerSecretIfNeeded(ctx, clusterName)
308+
}
309+
310+
// WithTalosConfigsForTest sets talosConfigs on the provisioner for unit testing.
311+
func (p *Provisioner) WithTalosConfigsForTest(
312+
configs *talosconfigmanager.Configs,
313+
) *Provisioner {
314+
p.talosConfigs = configs
315+
316+
return p
317+
}

pkg/svc/provisioner/cluster/talos/update.go

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,17 @@ func (p *Provisioner) Update(
101101
// actually running (e.g., booted from a Hetzner ISO at a different version).
102102
// See: https://github.com/devantler-tech/ksail/issues/4260
103103

104+
// Ensure the cluster-autoscaler-config Secret exists when the node autoscaler
105+
// is enabled. During cluster create, this secret is created by
106+
// bootstrapAndFinalize. During update the component reconciler installs the
107+
// Helm chart but did not previously create the prerequisite secret, causing
108+
// the autoscaler pod to enter CreateContainerConfigError.
109+
// See: https://github.com/devantler-tech/ksail/issues/4606
110+
secretErr = p.ensureAutoscalerSecretIfNeeded(ctx, clusterName)
111+
if secretErr != nil {
112+
return result, fmt.Errorf("failed to ensure autoscaler config secret: %w", secretErr)
113+
}
114+
104115
return result, nil
105116
}
106117

@@ -805,3 +816,30 @@ func (p *Provisioner) syncHetznerFirewallRules(
805816

806817
return nil
807818
}
819+
820+
// ensureAutoscalerSecretIfNeeded creates or updates the cluster-autoscaler-config
821+
// Secret when the node autoscaler is enabled on Hetzner. It is a no-op when
822+
// autoscaling is disabled, the provider is not Hetzner, or the config bundle
823+
// is unavailable. Snapshot image lookup errors are propagated because they
824+
// indicate a misconfigured cluster. This mirrors the create-time call in
825+
// bootstrapAndFinalize.
826+
func (p *Provisioner) ensureAutoscalerSecretIfNeeded(
827+
ctx context.Context,
828+
clusterName string,
829+
) error {
830+
if p.hetznerOpts == nil || !p.hetznerOpts.NodeAutoscalerEnabled || p.talosConfigs == nil {
831+
return nil
832+
}
833+
834+
configBundle := p.talosConfigs.Bundle()
835+
if configBundle == nil {
836+
return nil
837+
}
838+
839+
snapshotImageID, err := p.ensureSnapshotImage(ctx, clusterName)
840+
if err != nil {
841+
return fmt.Errorf("looking up snapshot image for autoscaler secret: %w", err)
842+
}
843+
844+
return p.ensureAutoscalerSecret(ctx, configBundle, snapshotImageID)
845+
}

pkg/svc/provisioner/cluster/talos/update_test.go

Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -481,3 +481,76 @@ func TestNeedsSecretSync_ScaleDown(t *testing.T) {
481481
diff,
482482
))
483483
}
484+
485+
// TestEnsureAutoscalerSecretIfNeeded_NoopWhenNotHetzner verifies that
486+
// ensureAutoscalerSecretIfNeeded is a no-op when hetznerOpts is nil
487+
// (non-Hetzner provider).
488+
func TestEnsureAutoscalerSecretIfNeeded_NoopWhenNotHetzner(t *testing.T) {
489+
t.Parallel()
490+
491+
provisioner := talosprovisioner.NewProvisioner(nil, nil).WithLogWriter(io.Discard)
492+
493+
err := provisioner.EnsureAutoscalerSecretIfNeededForTest(
494+
context.Background(),
495+
"test-cluster",
496+
)
497+
require.NoError(t, err)
498+
}
499+
500+
// TestEnsureAutoscalerSecretIfNeeded_NoopWhenAutoscalerDisabled verifies that
501+
// ensureAutoscalerSecretIfNeeded is a no-op when NodeAutoscalerEnabled is false.
502+
func TestEnsureAutoscalerSecretIfNeeded_NoopWhenAutoscalerDisabled(t *testing.T) {
503+
t.Parallel()
504+
505+
provisioner := talosprovisioner.NewProvisioner(nil, nil).
506+
WithHetznerOptions(v1alpha1.OptionsHetzner{
507+
NodeAutoscalerEnabled: false,
508+
}).
509+
WithLogWriter(io.Discard)
510+
511+
err := provisioner.EnsureAutoscalerSecretIfNeededForTest(
512+
context.Background(),
513+
"test-cluster",
514+
)
515+
require.NoError(t, err)
516+
}
517+
518+
// TestEnsureAutoscalerSecretIfNeeded_NoopWhenNilTalosConfigs verifies that
519+
// ensureAutoscalerSecretIfNeeded is a no-op when talosConfigs is nil, even
520+
// when autoscaler is enabled on Hetzner.
521+
func TestEnsureAutoscalerSecretIfNeeded_NoopWhenNilTalosConfigs(t *testing.T) {
522+
t.Parallel()
523+
524+
provisioner := talosprovisioner.NewProvisioner(nil, nil).
525+
WithHetznerOptions(v1alpha1.OptionsHetzner{
526+
NodeAutoscalerEnabled: true,
527+
}).
528+
WithLogWriter(io.Discard)
529+
530+
err := provisioner.EnsureAutoscalerSecretIfNeededForTest(
531+
context.Background(),
532+
"test-cluster",
533+
)
534+
require.NoError(t, err)
535+
}
536+
537+
// TestEnsureAutoscalerSecretIfNeeded_NoopWhenNilBundle verifies that
538+
// ensureAutoscalerSecretIfNeeded is a no-op when talosConfigs is non-nil but
539+
// Bundle() returns nil, preventing a nil-dereference panic.
540+
func TestEnsureAutoscalerSecretIfNeeded_NoopWhenNilBundle(t *testing.T) {
541+
t.Parallel()
542+
543+
configs := &talosconfigmanager.Configs{}
544+
provisioner := talosprovisioner.NewProvisioner(nil, nil).
545+
WithHetznerOptions(v1alpha1.OptionsHetzner{
546+
NodeAutoscalerEnabled: true,
547+
}).
548+
WithTalosConfigsForTest(configs).
549+
WithLogWriter(io.Discard)
550+
551+
err := provisioner.EnsureAutoscalerSecretIfNeededForTest(
552+
context.Background(),
553+
"test-cluster",
554+
)
555+
require.NoError(t, err)
556+
}

0 commit comments

Comments
 (0)