diff --git a/charts/lunar/CHANGELOG.md b/charts/lunar/CHANGELOG.md index ee2aec0..93f2ef7 100644 --- a/charts/lunar/CHANGELOG.md +++ b/charts/lunar/CHANGELOG.md @@ -9,6 +9,19 @@ History starts at 1.0.0 (the snippet→script rename and ghcr.io switchover); earlier 0.x versions had no production users. For 0.x history see `git log -- charts/lunar/`. +## [Unreleased] + +### Added + +- **Optional `hub.startupProbe`.** New optional probe (disabled by default + to preserve current behaviour) that gives Hub a longer window to come + Ready before liveness/readiness kick in. Useful when Hub takes longer + than `livenessProbe.failureThreshold × periodSeconds` to start — e.g. + on a cold start with a substantial Postgres schema migration. + Default tuning when enabled: `30 × 5s = 150s` startup window. Set + `hub.startupProbe.enabled: true` and tune `failureThreshold` / + `periodSeconds` to taste. + ## [2.1.0] - 2026-05-25 ### Added diff --git a/charts/lunar/templates/hub-deployment.yaml b/charts/lunar/templates/hub-deployment.yaml index b0b14a7..7c1ab42 100644 --- a/charts/lunar/templates/hub-deployment.yaml +++ b/charts/lunar/templates/hub-deployment.yaml @@ -201,6 +201,17 @@ spec: failureThreshold: {{ .failureThreshold }} {{- end }} {{- end }} + {{- if .Values.hub.startupProbe.enabled }} + {{- with .Values.hub.startupProbe }} + startupProbe: + httpGet: + path: /health + port: hub-health + initialDelaySeconds: {{ .initialDelaySeconds }} + periodSeconds: {{ .periodSeconds }} + failureThreshold: {{ .failureThreshold }} + {{- end }} + {{- end }} ports: - name: hub-grpc containerPort: 8000 diff --git a/charts/lunar/values.yaml b/charts/lunar/values.yaml index e4539b7..88de23e 100644 --- a/charts/lunar/values.yaml +++ b/charts/lunar/values.yaml @@ -279,6 +279,22 @@ hub: periodSeconds: 5 failureThreshold: 3 + # Optional startup probe (disabled by default to preserve existing + # behaviour). Enable when Hub takes longer to become Ready than the + # liveness probe's failureThreshold × periodSeconds tolerates — e.g. + # on a cold start with a substantial Postgres schema migration. While + # the startupProbe is running, kubelet suppresses liveness/readiness + # probe failures, so Hub gets `failureThreshold × periodSeconds` + # seconds total before kubelet starts considering it unhealthy. + # + # Default tuning when enabled is 30 × 5s = 150s. Raise + # `failureThreshold` for larger databases with longer migrations. + startupProbe: + enabled: false + initialDelaySeconds: 0 + periodSeconds: 5 + failureThreshold: 30 + labels: {} annotations: {} podAnnotations: {}