Skip to content

Commit 0e78dd3

Browse files
bill-phclaude
andcommitted
fix(server): drop iceberg credential_chain fallback, fail loudly instead
Removes the credential_chain branch introduced in the previous commit on this PR. The fallback was hedging — it dressed up the broken DuckDB chain path (the very thing this PR exists to fix) as forward-compat. In the multitenant deploy the worker pod has no AWS identity, so a silent fallback would just defer the same failure to attach time and obscure the real misconfiguration. Explicit STS credentials minted by the control plane are the only supported auth model. If iceberg.Enabled is true and the activation payload lacks credentials, that is a configuration bug (broken STS broker, missing per-tenant IAM role, race in the activator) and should surface as a clear error at attach time, not a credential_chain SQL that will fail in DuckDB with an opaque "Secret Validation Failure". Changes: - BuildIcebergSecretStmt: no more empty-keyID branch; always emit PROVIDER config. - AttachIcebergCatalog: validate keyID/secret are non-empty when iceberg is enabled; return an error naming the table_bucket so logs identify which tenant is affected. - Tests: drop the credential_chain-fallback case; keep the with/without session-token, region default, and quote-escaping coverage. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
1 parent 8213934 commit 0e78dd3

3 files changed

Lines changed: 25 additions & 34 deletions

File tree

server/iceberg/migration.go

Lines changed: 12 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -25,32 +25,25 @@ const CatalogName = "iceberg"
2525
// the iceberg extension internally signs with SigV4 and pulls credentials
2626
// from any TYPE S3 secret in scope.
2727
//
28-
// When keyID is non-empty the secret is built with PROVIDER config and the
29-
// supplied short-lived credentials inlined directly. This is the multitenant
30-
// production path: the control plane assumes the per-tenant IAM role via
31-
// STS and ships the resulting temporary credentials in the worker
32-
// activation payload, identical to how the DuckLake S3 secret is built (see
28+
// The secret is always built with PROVIDER config and the supplied
29+
// short-lived credentials inlined directly. This is the only supported
30+
// auth model: the control plane assumes the per-tenant IAM role via STS
31+
// and ships the resulting temporary credentials in the worker activation
32+
// payload, identical to how the DuckLake S3 secret is built (see
3333
// buildConfigSecret in server/server.go). The same role has both s3:* and
34-
// s3tables:* permissions on the tenant's data and table buckets, so reusing
35-
// the credentials here is correct.
34+
// s3tables:* permissions on the tenant's data and table buckets, so
35+
// reusing the credentials here is correct.
3636
//
37-
// When keyID is empty the secret falls back to PROVIDER credential_chain.
38-
// This preserves the legacy/standalone path where the host running the
39-
// worker has its own AWS identity reachable via env vars, ~/.aws, or IMDS.
40-
// On EKS deployments without IRSA/Pod Identity on the worker pod (the
41-
// PostHog production topology) this fallback will fail at create time —
42-
// callers in that topology must supply explicit credentials.
37+
// keyID and secret are required — callers must validate upstream and emit
38+
// a clear error if the activation payload is missing them when iceberg is
39+
// enabled. sessionToken is optional (omitted from the DDL when empty) to
40+
// support the rare static-IAM-user case, though STS:AssumeRole always
41+
// returns one in production.
4342
func BuildIcebergSecretStmt(cfg Config, keyID, secret, sessionToken string) string {
4443
region := cfg.Region
4544
if region == "" {
4645
region = DefaultRegion
4746
}
48-
if keyID == "" {
49-
return fmt.Sprintf(
50-
"CREATE OR REPLACE SECRET iceberg_sigv4 (TYPE S3, PROVIDER credential_chain, REGION '%s')",
51-
escapeSQLStringLiteral(region),
52-
)
53-
}
5447
stmt := fmt.Sprintf(
5548
"CREATE OR REPLACE SECRET iceberg_sigv4 (TYPE S3, PROVIDER config, KEY_ID '%s', SECRET '%s', REGION '%s'",
5649
escapeSQLStringLiteral(keyID),

server/iceberg/migration_test.go

Lines changed: 2 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -39,17 +39,9 @@ func TestBuildIcebergSecretStmtWithoutSessionToken(t *testing.T) {
3939
}
4040
}
4141

42-
func TestBuildIcebergSecretStmtFallsBackToCredentialChain(t *testing.T) {
43-
got := BuildIcebergSecretStmt(Config{Region: "us-west-2"}, "", "", "")
44-
want := "CREATE OR REPLACE SECRET iceberg_sigv4 (TYPE S3, PROVIDER credential_chain, REGION 'us-west-2')"
45-
if got != want {
46-
t.Fatalf("BuildIcebergSecretStmt should fall back to credential_chain when keyID is empty:\n got: %s\nwant: %s", got, want)
47-
}
48-
}
49-
5042
func TestBuildIcebergSecretStmtDefaultsRegion(t *testing.T) {
51-
got := BuildIcebergSecretStmt(Config{}, "", "", "")
52-
want := "CREATE OR REPLACE SECRET iceberg_sigv4 (TYPE S3, PROVIDER credential_chain, REGION 'us-east-1')"
43+
got := BuildIcebergSecretStmt(Config{}, "AKIA_TEST", "shh", "tok")
44+
want := "CREATE OR REPLACE SECRET iceberg_sigv4 (TYPE S3, PROVIDER config, KEY_ID 'AKIA_TEST', SECRET 'shh', REGION 'us-east-1', SESSION_TOKEN 'tok')"
5345
if got != want {
5446
t.Fatalf("BuildIcebergSecretStmt should default to us-east-1:\n got: %s\nwant: %s", got, want)
5547
}

server/server.go

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1605,16 +1605,22 @@ func isDeltaCatalogEmptyError(err error) bool {
16051605
// fail-soft for the "fresh tenant, no namespaces yet" case so a worker
16061606
// activation isn't blocked by an empty catalog.
16071607
//
1608-
// keyID/secret/sessionToken are short-lived AWS credentials produced by the
1609-
// control plane's STS AssumeRole on the per-tenant IAM role. They are the
1610-
// same credentials DuckLake uses (see buildConfigSecret) — the per-tenant
1608+
// keyID/secret/sessionToken are short-lived AWS credentials produced by
1609+
// the control plane's STS AssumeRole on the per-tenant IAM role the
1610+
// same credentials DuckLake uses (see buildConfigSecret). The per-tenant
16111611
// role has both s3:* and s3tables:* permissions, so reusing them here is
1612-
// correct. When empty, BuildIcebergSecretStmt falls back to
1613-
// PROVIDER credential_chain (for standalone/host-AWS-identity deployments).
1612+
// correct. This is the only supported auth model for iceberg: we do NOT
1613+
// fall back to DuckDB's credential_chain because the worker pod has no
1614+
// AWS identity of its own on the multitenant production topology. Missing
1615+
// credentials when iceberg is enabled is a configuration bug and surfaces
1616+
// as an explicit error rather than a silent fallback.
16141617
func AttachIcebergCatalog(db *sql.DB, ic IcebergConfig, sem chan struct{}, keyID, secret, sessionToken string) error {
16151618
if !ic.Enabled || ic.TableBucket == "" {
16161619
return nil
16171620
}
1621+
if keyID == "" || secret == "" {
1622+
return fmt.Errorf("iceberg catalog enabled (table_bucket=%q) but no AWS credentials in activation payload — control plane STS broker did not populate DuckLake S3 credentials", ic.TableBucket)
1623+
}
16181624

16191625
select {
16201626
case sem <- struct{}{}:

0 commit comments

Comments
 (0)