Skip to content
32 changes: 32 additions & 0 deletions .github/workflows/check-secret-checksum.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
name: Check secret checksum

# Fails when a password-type config field is not part of secretsChecksum, so a
# new secret can't be added without wiring it into the pod-restart checksum.
# Runs on PRs and on push to main (like the chart-version checks) so drift is
# surfaced after merge. Note: this only blocks a merge if it is also added to
# the branch's required status checks (ideally strict / require up to date).
on:
pull_request:
paths:
- "replicated/config.yaml"
- "replicated/openhands.yaml"
- "scripts/check_secret_checksum.py"
push:
branches:
- main
paths:
- "replicated/config.yaml"
- "replicated/openhands.yaml"
- "scripts/check_secret_checksum.py"

jobs:
check-secret-checksum:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: actions/setup-python@v5
with:
python-version: "3.x"
- run: pip install --quiet pyyaml
- name: Every password config field must be in secretsChecksum
run: python3 scripts/check_secret_checksum.py
2 changes: 1 addition & 1 deletion charts/openhands/Chart.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ apiVersion: v2
description: OpenHands is an AI-driven autonomous software engineer
name: openhands
appVersion: cloud-1.29.1
version: 0.7.23
version: 0.7.24
maintainers:
- name: rbren
- name: xingyao
Expand Down
3 changes: 3 additions & 0 deletions charts/openhands/templates/deployment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,9 @@ spec:
{{- if .Values.allowedUsers }}
checksum/user-waitlist: {{ include (print $.Template.BasePath "/user-waitlist-configmap.yaml") . | sha256sum }}
{{- end }}
{{- if .Values.secretsChecksum }}
checksum/config-secrets: {{ .Values.secretsChecksum | quote }}
{{- end }}
{{- include "openhands.caBundle.checksumAnnotation" . | nindent 8 }}
spec:
terminationGracePeriodSeconds: 60
Expand Down
3 changes: 3 additions & 0 deletions charts/openhands/templates/integration-events-deployment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,9 @@ spec:
{{- if .Values.allowedUsers }}
checksum/user-waitlist: {{ include (print $.Template.BasePath "/user-waitlist-configmap.yaml") . | sha256sum }}
{{- end }}
{{- if .Values.secretsChecksum }}
checksum/config-secrets: {{ .Values.secretsChecksum | quote }}
{{- end }}
{{- include "openhands.caBundle.checksumAnnotation" . | nindent 8 }}
spec:
terminationGracePeriodSeconds: 60
Expand Down
3 changes: 3 additions & 0 deletions charts/openhands/templates/mcp-events-deployment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,9 @@ spec:
{{- if .Values.allowedUsers }}
checksum/user-waitlist: {{ include (print $.Template.BasePath "/user-waitlist-configmap.yaml") . | sha256sum }}
{{- end }}
{{- if .Values.secretsChecksum }}
checksum/config-secrets: {{ .Values.secretsChecksum | quote }}
{{- end }}
{{- include "openhands.caBundle.checksumAnnotation" . | nindent 8 }}
spec:
terminationGracePeriodSeconds: 60
Expand Down
8 changes: 8 additions & 0 deletions charts/openhands/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,14 @@ bitbucket:
enterpriseSSO:
enabled: false

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🟡 Suggestion: The comment explains the mechanism well, but consider adding a note about the restart scope: 'Changing any secret in the KOTS config restarts the openhands pod, even if that secret only affects other components (e.g., LiteLLM keys).' This helps operators understand the blast radius when rotating credentials.

# sha256 of all secret (password) config values, injected by KOTS. Changing any
# secret-backed config changes this value, which changes the openhands pod
# template and forces a rollout so the new secret is picked up (secret-sourced
# env vars only load at pod start). The value is computed in replicated/openhands.yaml.
# Blast radius: this restarts the openhands pod on ANY secret change, including
# secrets that only affect other components (e.g. an LLM key used only by LiteLLM).
secretsChecksum: ""

bitbucketDataCenter:
enabled: false
host: ""
Expand Down
9 changes: 9 additions & 0 deletions replicated/openhands.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -309,6 +309,15 @@ spec:
repository: 'images.r9.all-hands.dev/proxy/{{repl LicenseFieldValue "appSlug"}}/quay.io/minio/minio'
imagePullSecrets:
- name: '{{repl ImagePullSecretName }}'
# sha256 of every secret (password) config value. Changing any secret in the
# KOTS config changes this hash, which changes the openhands pod-template
# annotation (checksum/config-secrets) and forces a rollout so secret-backed
# env vars are reloaded. Keep this in sync with the password fields in
# config.yaml; CI (check-secret-checksum) fails if a password field is missing here.
# Grouped in order: LLM provider keys; app/infra secrets (admin, postgres,
# redis, jwt, keycloak, litellm, sandbox, plugin-directory, automation); then
# auth/integration secrets (bitbucket DC, github, gitlab, slack, laminar).
secretsChecksum: 'repl{{ sha256sum (printf "%s|%s|%s|%s|%s|%s|%s|%s|%s|%s|%s|%s|%s|%s|%s|%s|%s|%s|%s|%s|%s|%s|%s|%s|%s|%s|%s|%s|%s|%s|%s|%s|%s|%s|%s" (ConfigOption "anthropic_api_key") (ConfigOption "openai_api_key") (ConfigOption "google_gemini_api_key") (ConfigOption "deepseek_api_key") (ConfigOption "mistral_api_key") (ConfigOption "azure_api_key") (ConfigOption "azure_client_secret") (ConfigOption "groq_api_key") (ConfigOption "openrouter_api_key") (ConfigOption "aws_secret_access_key") (ConfigOption "custom_api_key") (ConfigOption "admin_password") (ConfigOption "postgres_password") (ConfigOption "redis_password") (ConfigOption "jwt_secret") (ConfigOption "keycloak_admin_password") (ConfigOption "keycloak_client_secret") (ConfigOption "litellm_api_key") (ConfigOption "default_api_key") (ConfigOption "sandbox_api_key") (ConfigOption "keycloak_smtp_password") (ConfigOption "plugin_directory_identity_shared_secret") (ConfigOption "plugin_directory_session_secret") (ConfigOption "automation_service_key") (ConfigOption "automation_webhook_secret") (ConfigOption "bitbucket_data_center_client_secret") (ConfigOption "bitbucket_data_center_bot_token") (ConfigOption "github_oauth_client_secret") (ConfigOption "github_app_webhook_secret") (ConfigOption "gitlab_oauth_client_secret") (ConfigOption "slack_client_secret") (ConfigOption "slack_signing_secret") (ConfigOption "external_postgres_password") (ConfigOption "custom_sandbox_image_registry_password") (ConfigOption "laminar_project_api_key")) }}'
bitbucketDataCenter:
enabled: repl{{ ConfigOptionEquals "bitbucket_data_center_auth_enabled" "1" }}
host: 'repl{{ ConfigOption "bitbucket_data_center_domain" }}'
Expand Down
73 changes: 73 additions & 0 deletions scripts/check_secret_checksum.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
#!/usr/bin/env python3
"""Fail if a password-type KOTS config field is missing from secretsChecksum.

Background: secret-backed env vars on the openhands deployment (sourced via
secretKeyRef) are read only at pod start, and Kubernetes does not restart a pod
when a referenced Secret changes. The openhands pod template carries a
checksum/config-secrets annotation whose value (secretsChecksum in
replicated/openhands.yaml) is a KOTS-rendered sha256 of every secret config
value. When a secret changes, the hash changes, the pod template changes, and
the pod rolls so the new secret is picked up.

That only works if every secret (password) field in config.yaml is part of the
hash. If someone adds a new password field but forgets to add it to the hash,
changing it in the admin console silently has no effect until a manual restart.
This check keeps the two in sync, the same way the chart-version check keeps
chart changes and version bumps in sync.

NOTE: this check only blocks a merge if it is configured as a required status
check on the protected branch (ideally strict / require-up-to-date). Otherwise
it is advisory. It also runs on push to main so drift is surfaced immediately.
"""

import pathlib
import re
import sys

import yaml

ROOT = pathlib.Path(__file__).resolve().parents[1]

# 1. Password (secret) field names declared in config.yaml, parsed structurally
# so reformatting the YAML can't make the check silently miss a field.
config = yaml.safe_load((ROOT / "replicated" / "config.yaml").read_text())
password_fields = []


def collect(items):
for item in items or []:
if isinstance(item, dict):
if item.get("type") == "password" and "name" in item:
password_fields.append(item["name"])
collect(item.get("items"))


for group in config.get("spec", {}).get("groups", []):
collect(group.get("items"))

# 2. ConfigOption names referenced inside the secretsChecksum value.
checksum_expr = ""
for doc in yaml.safe_load_all((ROOT / "replicated" / "openhands.yaml").read_text()):
if isinstance(doc, dict):
value = (doc.get("spec", {}).get("values", {}) or {}).get("secretsChecksum")
if value:
checksum_expr = value
break
covered = set(re.findall(r'ConfigOption\s+"([^"]+)"', checksum_expr))

# 3. Every secret field must be covered by the checksum.
missing = [f for f in password_fields if f not in covered]
if missing:
print(
"ERROR: these password config fields are not included in secretsChecksum "
"(replicated/openhands.yaml):"
)
for field in missing:
print(f" - {field}")
print(
"\nAdd each one to the secretsChecksum sha256 so changing it in the admin "
"console restarts the openhands pod. See scripts/check_secret_checksum.py."
)
sys.exit(1)

print(f"OK: all {len(password_fields)} password config fields are covered by secretsChecksum.")
Loading