From 97064306cde64490013256fd3df2ba12d7d8e6c9 Mon Sep 17 00:00:00 2001 From: Brandon Schurman Date: Tue, 26 May 2026 16:21:15 -0400 Subject: [PATCH] hub: optional startupProbe for slow-startup migration windows MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds an optional hub.startupProbe (disabled by default to preserve existing behaviour). When enabled, kubelet gives Hub the full failureThreshold × periodSeconds window to become Ready before the liveness and readiness probes start counting failures. Default tuning when enabled is 30 × 5s = 150s. Useful on a cold start where Hub takes longer than the existing liveness probe's tolerance (default 15s, periodSeconds=5 × failureThreshold=3) — for example when Hub has to run a substantial Postgres schema migration as part of an upgrade. Without this probe, large migrations risk being interrupted mid-flight by kubelet restarting the pod. Backward compatible: existing installs see no change unless they explicitly set hub.startupProbe.enabled=true. --- charts/lunar/CHANGELOG.md | 13 +++++++++++++ charts/lunar/templates/hub-deployment.yaml | 11 +++++++++++ charts/lunar/values.yaml | 16 ++++++++++++++++ 3 files changed, 40 insertions(+) diff --git a/charts/lunar/CHANGELOG.md b/charts/lunar/CHANGELOG.md index 954d502..aae12d8 100644 --- a/charts/lunar/CHANGELOG.md +++ b/charts/lunar/CHANGELOG.md @@ -9,6 +9,19 @@ History starts at 1.0.0 (the snippet→script rename and ghcr.io switchover); earlier 0.x versions had no production users. For 0.x history see `git log -- charts/lunar/`. +## [Unreleased] + +### Added + +- **Optional `hub.startupProbe`.** New optional probe (disabled by default + to preserve current behaviour) that gives Hub a longer window to come + Ready before liveness/readiness kick in. Useful when Hub takes longer + than `livenessProbe.failureThreshold × periodSeconds` to start — e.g. + on a cold start with a substantial Postgres schema migration. + Default tuning when enabled: `30 × 5s = 150s` startup window. Set + `hub.startupProbe.enabled: true` and tune `failureThreshold` / + `periodSeconds` to taste. + ## [2.0.0] - 2026-05-20 ### Breaking diff --git a/charts/lunar/templates/hub-deployment.yaml b/charts/lunar/templates/hub-deployment.yaml index fba8863..94277eb 100644 --- a/charts/lunar/templates/hub-deployment.yaml +++ b/charts/lunar/templates/hub-deployment.yaml @@ -183,6 +183,17 @@ spec: failureThreshold: {{ .failureThreshold }} {{- end }} {{- end }} + {{- if .Values.hub.startupProbe.enabled }} + {{- with .Values.hub.startupProbe }} + startupProbe: + httpGet: + path: /health + port: hub-health + initialDelaySeconds: {{ .initialDelaySeconds }} + periodSeconds: {{ .periodSeconds }} + failureThreshold: {{ .failureThreshold }} + {{- end }} + {{- end }} ports: - name: hub-grpc containerPort: 8000 diff --git a/charts/lunar/values.yaml b/charts/lunar/values.yaml index da8ba50..4e80c1b 100644 --- a/charts/lunar/values.yaml +++ b/charts/lunar/values.yaml @@ -265,6 +265,22 @@ hub: periodSeconds: 5 failureThreshold: 3 + # Optional startup probe (disabled by default to preserve existing + # behaviour). Enable when Hub takes longer to become Ready than the + # liveness probe's failureThreshold × periodSeconds tolerates — e.g. + # on a cold start with a substantial Postgres schema migration. While + # the startupProbe is running, kubelet suppresses liveness/readiness + # probe failures, so Hub gets `failureThreshold × periodSeconds` + # seconds total before kubelet starts considering it unhealthy. + # + # Default tuning when enabled is 30 × 5s = 150s. Raise + # `failureThreshold` for larger databases with longer migrations. + startupProbe: + enabled: false + initialDelaySeconds: 0 + periodSeconds: 5 + failureThreshold: 30 + labels: {} annotations: {} podAnnotations: {}