fix(usersecrets): redact secret DDL anywhere in a multi-statement query (#769)

EDsCODE · claude · web-flow · commit 941882a03b0f · 2026-06-11T17:13:32.000-07:00
* fix(usersecrets): redact secret DDL anywhere in a multi-statement query

RedactForLog only classified the statement head, so a client could prefix
any leading statement (e.g. "SELECT 1; CREATE PERSISTENT SECRET foo (...,
SECRET 'realKey')") and the head no longer classified as secret DDL — the
query, including credential literals, was returned verbatim and written to
logs, OTel spans, the persisted query log, and pg_stat_activity. This
violates the "Never log/store secret statement text" invariant.

RedactForLog now splits the query on top-level semicolons (reusing the
existing literal/identifier/comment-aware tokenization, not a naive
strings.Split) and, if ANY top-level segment classifies as CREATE SECRET,
replaces the whole query with the fixed redaction placeholder. The
single-statement fast path is unchanged. Over-redaction is intentional:
false positives only cost log fidelity, never credential exposure.

Also fixes the same head-only blind spot in the Flight SQL ingress
RejectPersistentSecretDDL check via a new ContainsPersistentSecretDDL
helper that scans every top-level statement.

Generated-By: PostHog Code
Task-Id: 96557416-3af0-4633-bcf0-164d1e64cf22

* fix(usersecrets): redact secret literals echoed in error messages

RedactForLog scrubs the query attribute, but engine error messages echo
the offending SQL verbatim (DuckDB: `LINE 1: ... SECRET 'literal'`), so a
failed CREATE SECRET leaked the credential through the unredacted error
sinks even though the query text was redacted:

  msg="Query execution errored." query="CREATE TEMPORARY SECRET badt (…redacted)"
    error="...Parser Error: ... SECRET 'realkey' BOGUS)"

This violated the same "never log/store secret statement text" invariant
the PR set out to enforce, just via the error channel instead of the
query channel. Reproduced live against the multi-tenant cluster.

Add usersecrets.RedactErrorForLog(query, errMsg): when the originating
query carries CREATE SECRET DDL (head or any top-level statement, shared
tokenizer with RedactForLog so they can't drift), the whole error is
replaced with a fixed placeholder. Over-redaction only costs diagnostic
detail, never credential exposure; non-secret errors pass through.

Apply it at every sink that logs a query's error: logQueryError and
logQueryFinished (slog `error` attr) and logQuery (query-log `Exception`
column), each classifying against the original query before it is
replaced with the redacted form.

Co-Authored-By: Claude Fable 5 &lt;noreply@anthropic.com&gt;

---------

Co-authored-by: Claude Fable 5 &lt;noreply@anthropic.com&gt;
diff --git a/CLAUDE.md b/CLAUDE.md
@@ -272,7 +272,13 @@ Invariants for anyone touching this path:
   false "DROP succeeded" is fatal for a credential revocation.
 - **Never log/store secret statement text.** `usersecrets.RedactForLog` guards
   logQueryStarted/Finished/Error, the query log, spans, and pg_stat_activity
-  (`currentQuery`); keep new logging of query text behind it.
+  (`currentQuery`); keep new logging of query text behind it. Engine **error
+  messages echo the offending SQL** (DuckDB emits `LINE 1: ... SECRET '...'`),
+  so a failed CREATE SECRET leaks the credential via the `error` attribute /
+  query-log `Exception` even when the query attribute is redacted —
+  `usersecrets.RedactErrorForLog(query, errMsg)` guards those error sinks
+  (logQueryError/logQueryFinished, `logQuery`); keep new error logging behind it
+  too, and pass the original (un-redacted) query so it can classify.
 - Touching the interception, wipe/replay, or payload shape → update
   `server/conn_user_secrets_test.go`, `duckdbservice/user_secrets_test.go`,
   and the `persistent_user_secret`(+`_isolation`) assertions in
diff --git a/server/conn.go b/server/conn.go
@@ -419,15 +419,16 @@ func (c *clientConn) logQueryStarted(query string) {
 // "started" and a "finished" line, and can look at the separate error
 // line for severity context.
 func (c *clientConn) logQueryFinished(query string, start time.Time, rows int64, err error) {
-	query = usersecrets.RedactForLog(query)
 	attrs := []any{
-		"query", query,
+		"query", usersecrets.RedactForLog(query),
 		"duration_ms", time.Since(start).Milliseconds(),
 		"rows", rows,
 		"trace_id", observe.TraceIDFromContext(c.ctx),
 	}
 	if err != nil {
-		attrs = append(attrs, "error", err.Error())
+		// Engine errors echo the offending SQL, so a failed CREATE SECRET
+		// leaks the credential here unless the error is redacted too.
+		attrs = append(attrs, "error", usersecrets.RedactErrorForLog(query, err.Error()))
 	}
 	c.logger().Info("Query finished.", attrs...)
 }
@@ -439,8 +440,13 @@ func (c *clientConn) logQueryFinished(query string, start time.Time, rows int64,
 // (worker crash, IO failure, internal panic, infra unreachable), not
 // "user typo'd a column name."
 func (c *clientConn) logQueryError(query string, err error) {
-	query = usersecrets.RedactForLog(query)
-	attrs := []any{"query", query, "error", err}
+	// Engine errors echo the offending SQL, so a failed CREATE SECRET leaks the
+	// credential via the error attribute unless it is redacted too. Classify
+	// against the original query before it is replaced with the redacted form.
+	attrs := []any{
+		"query", usersecrets.RedactForLog(query),
+		"error", usersecrets.RedactErrorForLog(query, err.Error()),
+	}
 	if isDuckLakeTransactionConflict(err) {
 		c.logger().Warn("DuckLake transaction conflict.", attrs...)
 		return
diff --git a/server/flightsqlingress/ingress.go b/server/flightsqlingress/ingress.go
@@ -331,8 +331,7 @@ func (h *ControlPlaneFlightSQLHandler) checkUserSecretDDL(query string) error {
 	if !h.rejectPersistentSecretDDL {
 		return nil
 	}
-	st := usersecrets.Classify(query)
-	if st.Kind != usersecrets.KindNone && st.Persistent {
+	if usersecrets.ContainsPersistentSecretDDL(query) {
 		return status.Error(codes.InvalidArgument,
 			"persistent secrets are managed via the PostgreSQL protocol on this deployment; CREATE/DROP PERSISTENT SECRET is not supported over Flight SQL (a secret created here would not survive the session)")
 	}
diff --git a/server/querylog.go b/server/querylog.go
@@ -496,7 +496,10 @@ func (c *clientConn) logQuery(start time.Time, query, transpiledQuery, cmdType s
 	}
 
 	// CREATE SECRET option lists carry credential material; never persist
-	// them to the query log.
+	// them to the query log. The engine's error text echoes the offending SQL,
+	// so a failed CREATE SECRET leaks the credential via Exception unless the
+	// error is redacted too — classify against the original query first.
+	errMsg = usersecrets.RedactErrorForLog(query, errMsg)
 	query = usersecrets.RedactForLog(query)
 	transpiledQuery = usersecrets.RedactForLog(transpiledQuery)
 
diff --git a/server/usersecrets/classify.go b/server/usersecrets/classify.go
@@ -89,6 +89,27 @@ func Classify(query string) Statement {
 	return st
 }
 
+// ContainsPersistentSecretDDL reports whether any top-level statement in query
+// is CREATE/DROP PERSISTENT SECRET. Unlike Classify (which inspects only the
+// statement head), this scans every top-level statement, so a persistent-secret
+// DDL hidden behind a leading statement ("SELECT 1; CREATE PERSISTENT SECRET
+// ...") is still caught. Used to reject persistent-secret DDL on the Flight SQL
+// ingress, where it would execute but never persist.
+func ContainsPersistentSecretDDL(query string) bool {
+	if st, _, ok := parseSecretDDLHead(query); ok && st.Persistent {
+		return true
+	}
+	if !hasTrailingStatement(query) {
+		return false
+	}
+	for _, seg := range splitTopLevel(query) {
+		if st, _, ok := parseSecretDDLHead(seg); ok && st.Persistent {
+			return true
+		}
+	}
+	return false
+}
+
 // parseSecretDDLHead parses the statement head (through the optional secret
 // name) and returns the classification plus the byte offset just past the
 // head. The fast path for non-secret statements is two short case-folded
@@ -234,6 +255,58 @@ func hasTrailingStatement(query string) bool {
 	return false
 }
 
+// splitTopLevel splits query on top-level semicolons (outside string literals,
+// quoted identifiers, and comments), using the same scanning rules as
+// hasTrailingStatement so it stays in sync with the rest of this package. The
+// returned segments do NOT include the separating semicolons; a trailing empty
+// segment (query ending in ';') is omitted.
+func splitTopLevel(query string) []string {
+	var segments []string
+	start := 0
+	i := 0
+	for i < len(query) {
+		c := query[i]
+		switch {
+		case c == '\'':
+			i = skipQuoted(query, i, '\'')
+		case c == '"':
+			i = skipQuoted(query, i, '"')
+		case c == '-' && strings.HasPrefix(query[i:], "--"):
+			idx := strings.IndexByte(query[i:], '\n')
+			if idx < 0 {
+				i = len(query)
+			} else {
+				i += idx + 1
+			}
+		case c == '/' && strings.HasPrefix(query[i:], "/*"):
+			depth := 1
+			i += 2
+			for i < len(query) && depth > 0 {
+				switch {
+				case strings.HasPrefix(query[i:], "/*"):
+					depth++
+					i += 2
+				case strings.HasPrefix(query[i:], "*/"):
+					depth--
+					i += 2
+				default:
+					i++
+				}
+			}
+		case c == ';':
+			segments = append(segments, query[start:i])
+			i++
+			start = i
+		default:
+			i++
+		}
+	}
+	if start < len(query) {
+		segments = append(segments, query[start:])
+	}
+	return segments
+}
+
 // skipQuoted returns the index just past a quoted region starting at start
 // (where query[start] == quote). Doubled quotes are escapes.
 func skipQuoted(query string, start int, quote byte) int {
diff --git a/server/usersecrets/redact.go b/server/usersecrets/redact.go
@@ -2,22 +2,94 @@ package usersecrets
 
 import "strings"
 
+// redactedPlaceholder replaces a whole query whose credential material cannot
+// be safely located and stripped in place (a CREATE SECRET that is not the
+// statement head of the string).
+const redactedPlaceholder = "(…redacted)"
+
 // RedactForLog returns a version of query safe to write to logs, traces,
-// query logs, and pg_stat_activity. For CREATE SECRET statements (any
-// persistence, including multi-statement strings whose head is secret DDL)
-// everything after the statement head is dropped — the option list carries
-// credential material, and on a multi-statement string the trailing
-// statements are dropped along with it. DROP variants carry only a name and
-// pass through unchanged, as does every non-secret statement.
+// query logs, and pg_stat_activity. CREATE SECRET option lists carry
+// credential material and must never reach a log sink.
+//
+// The fast path: when the statement head is a CREATE SECRET, everything after
+// the head is dropped (the option list and, for a multi-statement string, the
+// trailing statements). DROP variants carry only a name and pass through, as
+// does every non-secret single statement.
+//
+// The hardened path guards against secret DDL that is NOT the statement head,
+// e.g. "SELECT 1; CREATE PERSISTENT SECRET foo (...)" or
+// "BEGIN; CREATE SECRET ...". Such a string does not classify at its head, so
+// the head-only check would leak it verbatim. We therefore split on top-level
+// semicolons and, if ANY segment classifies as a CREATE SECRET, replace the
+// entire query with a fixed placeholder. Over-redaction is harmless here —
+// false positives only cost log fidelity, never credential exposure.
 //
 // This is driven by the same tokenizer as Classify, so the redactor can never
 // be out of sync with the interceptor: any whitespace/comment arrangement
 // Classify accepts is redacted, and the non-matching fast path is two short
 // case-folded keyword comparisons with no allocation.
 func RedactForLog(query string) string {
-	st, headEnd, ok := parseSecretDDLHead(query)
-	if !ok || st.Kind != KindCreate {
-		return query
+	if st, headEnd, ok := parseSecretDDLHead(query); ok && st.Kind == KindCreate {
+		return strings.TrimSpace(query[:headEnd]) + " " + redactedPlaceholder
+	}
+
+	// Head is not a CREATE SECRET. If the query is a single top-level
+	// statement, there is nothing more to check — the fast path already
+	// handled it (and DROP / non-secret pass through unchanged). Only when
+	// there are multiple top-level statements must we scan for secret DDL
+	// hiding behind a leading statement.
+	if queryHasCreateSecret(query) {
+		return redactedPlaceholder
+	}
+	return query
+}
+
+// redactedErrorPlaceholder replaces an error message that may echo the text of
+// a CREATE SECRET statement. Engines surface parser/binder/execution errors
+// with the offending SQL inlined (DuckDB emits e.g. `LINE 1: ... SECRET
+// 'literal'`), so an error raised by secret DDL can carry the credential even
+// though the query attribute itself was already redacted by RedactForLog.
+// Logging that error verbatim leaks the secret on every failed CREATE SECRET.
+const redactedErrorPlaceholder = "(error redacted: statement carries secret DDL)"
+
+// RedactErrorForLog returns an error message safe to log/store alongside query.
+// When query carries CREATE SECRET DDL anywhere (head or a later top-level
+// statement), the engine's error text may echo the secret literal, so the whole
+// message is replaced with a fixed placeholder. Over-redaction only costs
+// diagnostic detail, never credential exposure; errors from non-secret queries
+// (and empty errors) pass through unchanged.
+//
+// Callers MUST pass the original, un-redacted query — classification needs the
+// real statement text. Pair this with RedactForLog at every query log site that
+// also emits an error: RedactForLog scrubs the query attribute, RedactErrorForLog
+// scrubs the error attribute.
+func RedactErrorForLog(query, errMsg string) string {
+	if errMsg == "" {
+		return errMsg
+	}
+	if queryHasCreateSecret(query) {
+		return redactedErrorPlaceholder
+	}
+	return errMsg
+}
+
+// queryHasCreateSecret reports whether query contains a CREATE SECRET at its
+// head or in any top-level statement. It shares the tokenizer with RedactForLog
+// (parseSecretDDLHead / splitTopLevel) so the query and error redactors can
+// never drift apart. DROP SECRET (which carries only a name) is not a match.
+func queryHasCreateSecret(query string) bool {
+	if st, _, ok := parseSecretDDLHead(query); ok && st.Kind == KindCreate {
+		return true
+	}
+	// A single top-level statement whose head is not CREATE SECRET cannot hide
+	// secret DDL; only multi-statement strings need the per-segment scan.
+	if !hasTrailingStatement(query) {
+		return false
+	}
+	for _, seg := range splitTopLevel(query) {
+		if st, _, ok := parseSecretDDLHead(seg); ok && st.Kind == KindCreate {
+			return true
+		}
 	}
-	return strings.TrimSpace(query[:headEnd]) + " (…redacted)"
+	return false
 }
diff --git a/server/usersecrets/redact_test.go b/server/usersecrets/redact_test.go

Original file line number	Diff line number	Diff line change
`@@ -331,8 +331,7 @@ func (h *ControlPlaneFlightSQLHandler) checkUserSecretDDL(query string) error {`
`331`	`331`	`if !h.rejectPersistentSecretDDL {`
`332`	`332`	`return nil`
`333`	`333`	`}`
`334`		`- st := usersecrets.Classify(query)`
`335`		`- if st.Kind != usersecrets.KindNone && st.Persistent {`
	`334`	`+ if usersecrets.ContainsPersistentSecretDDL(query) {`
`336`	`335`	`return status.Error(codes.InvalidArgument,`
`337`	`336`	`"persistent secrets are managed via the PostgreSQL protocol on this deployment; CREATE/DROP PERSISTENT SECRET is not supported over Flight SQL (a secret created here would not survive the session)")`
`338`	`337`	`}`