Skip to content

Commit e8898ac

Browse files
fuziontechclaude
andauthored
feat: enable Delta Lake catalog attachment by default (#553)
* feat: enable Delta Lake catalog attachment by default Flips delta_catalog_enabled to true everywhere: CLI/env/YAML defaults in DefaultServerConfig and the duckgres-worker flag, plus column-level gorm:"default:true" on the configstore ManagedWarehouseS3 and DuckLakeConfig fields. A one-shot migration tracked in duckgres_schema_migrations backfills existing configstore rows so upgraded clusters pick up the new default without admin intervention; admins can still opt out per-warehouse via the admin API afterward without being re-flipped on subsequent restarts. To keep plain standalone deployments (no DuckLake metadata_store / object_store / data_path) bootable under the new default, AttachDeltaCatalog now silently no-ops when no catalog path is derivable instead of erroring. The catalog is still attached as 'delta' (ATTACH '...' AS delta (TYPE delta), unchanged). Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com> * fix(server): treat empty Delta location as benign skip Under the always-on Delta default, a fresh DuckLake tenant has no Delta data at the sibling delta/ prefix yet, and ATTACH returns "No files in log segment" from DeltaKernel. That's the expected state, not a failure — log it and continue without the catalog attached. Fixes the k8s tenant isolation test where billing's worker activation rejected client queries before the tenant ever wrote Delta data. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com> * fix(server): probe-and-detach when Delta path has no transaction log ATTACH '...' (TYPE delta) is lazy — it succeeds against an empty path, then every subsequent query against DuckLake fails at prepare time because the planner walks all attached catalogs and Delta blows up trying to read a missing _delta_log/. Earlier the empty-path check was only at ATTACH time; that wasn't enough because ATTACH never errored. Now we probe with SHOW TABLES FROM delta immediately after ATTACH and DETACH if the catalog is unusable, leaving the connection clean for ducklake-only queries until Delta data lands. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com> --------- Co-authored-by: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent 42246bd commit e8898ac

7 files changed

Lines changed: 134 additions & 8 deletions

File tree

cmd/duckgres-worker/main.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,7 @@ func main() {
7575
sessionInitTimeout := flag.String("session-init-timeout", "", "Session startup metadata/probe timeout (e.g., '10s', '30s') (env: DUCKGRES_SESSION_INIT_TIMEOUT)")
7676

7777
// DuckLake (workers attach DuckLake)
78-
duckLakeDeltaCatalogEnabled := flag.Bool("ducklake-delta-catalog-enabled", false, "Attach a Delta Lake catalog during DuckLake worker boot (env: DUCKGRES_DUCKLAKE_DELTA_CATALOG_ENABLED)")
78+
duckLakeDeltaCatalogEnabled := flag.Bool("ducklake-delta-catalog-enabled", true, "Attach a Delta Lake catalog during DuckLake worker boot (default true; use --ducklake-delta-catalog-enabled=false to disable; env: DUCKGRES_DUCKLAKE_DELTA_CATALOG_ENABLED)")
7979
duckLakeDeltaCatalogPath := flag.String("ducklake-delta-catalog-path", "", "Delta Lake catalog/table path to attach (env: DUCKGRES_DUCKLAKE_DELTA_CATALOG_PATH)")
8080
duckLakeDefaultSpecVersion := flag.String("ducklake-default-spec-version", "", "Default DuckLake spec version for migration checks (env: DUCKGRES_DUCKLAKE_DEFAULT_SPEC_VERSION)")
8181

configresolve/cliflags.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ func RegisterCLIInputsFlags(fs *flag.FlagSet) func() CLIInputs {
4040
threads := fs.Int("threads", 0, "DuckDB threads per session (env: DUCKGRES_THREADS)")
4141
memoryBudget := fs.String("memory-budget", "", "Total memory for all DuckDB sessions (e.g., '24GB') (env: DUCKGRES_MEMORY_BUDGET)")
4242
memoryRebalance := fs.Bool("memory-rebalance", false, "Enable dynamic per-connection memory reallocation (control-plane mode) (env: DUCKGRES_MEMORY_REBALANCE)")
43-
duckLakeDeltaCatalogEnabled := fs.Bool("ducklake-delta-catalog-enabled", false, "Attach a Delta Lake catalog during DuckLake worker boot (env: DUCKGRES_DUCKLAKE_DELTA_CATALOG_ENABLED)")
43+
duckLakeDeltaCatalogEnabled := fs.Bool("ducklake-delta-catalog-enabled", true, "Attach a Delta Lake catalog during DuckLake worker boot (default true; use --ducklake-delta-catalog-enabled=false to disable; env: DUCKGRES_DUCKLAKE_DELTA_CATALOG_ENABLED)")
4444
duckLakeDeltaCatalogPath := fs.String("ducklake-delta-catalog-path", "", "Delta Lake catalog/table path to attach, defaults to sibling delta/ prefix at DuckLake object-store root (env: DUCKGRES_DUCKLAKE_DELTA_CATALOG_PATH)")
4545
duckLakeDefaultSpecVersion := fs.String("ducklake-default-spec-version", "", "Default DuckLake spec version for migration checks (env: DUCKGRES_DUCKLAKE_DEFAULT_SPEC_VERSION)")
4646
processMinWorkers := fs.Int("process-min-workers", 0, "Pre-warm worker count at startup for process workers (control-plane mode) (env: DUCKGRES_PROCESS_MIN_WORKERS)")

configresolve/resolve.go

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -145,6 +145,7 @@ func DefaultServerConfig() server.Config {
145145
CheckpointInterval: 24 * time.Hour,
146146
DataInliningRowLimit: intPtr(0),
147147
DisableMetadataThreadLocalCache: boolPtr(true),
148+
DeltaCatalogEnabled: true,
148149
},
149150
QueryLog: server.QueryLogConfig{
150151
Enabled: true,
@@ -1091,9 +1092,6 @@ func ResolveEffective(fileCfg *configloader.FileConfig, cli CLIInputs, getenv fu
10911092
}
10921093
if cfg.DuckLake.DeltaCatalogEnabled && cfg.DuckLake.DeltaCatalogPath == "" {
10931094
cfg.DuckLake.DeltaCatalogPath = ducklake.DefaultDeltaCatalogPath(cfg.DuckLake)
1094-
if cfg.DuckLake.DeltaCatalogPath == "" {
1095-
warn("ducklake.delta_catalog_enabled requires ducklake.delta_catalog_path when no ducklake.object_store or ducklake.data_path is configured")
1096-
}
10971095
}
10981096

10991097
return Resolved{

controlplane/configstore/models.go

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -104,7 +104,7 @@ type ManagedWarehouseS3 struct {
104104
Endpoint string `gorm:"size:512" json:"endpoint"`
105105
UseSSL bool `json:"use_ssl"`
106106
URLStyle string `gorm:"size:16" json:"url_style"`
107-
DeltaCatalogEnabled bool `json:"delta_catalog_enabled"`
107+
DeltaCatalogEnabled bool `gorm:"default:true" json:"delta_catalog_enabled"`
108108
DeltaCatalogPath string `gorm:"size:1024" json:"delta_catalog_path"`
109109
}
110110

@@ -187,7 +187,7 @@ type DuckLakeConfig struct {
187187
S3URLStyle string `gorm:"size:16" json:"s3_url_style"`
188188
S3Chain string `gorm:"size:255" json:"s3_chain"`
189189
S3Profile string `gorm:"size:255" json:"s3_profile"`
190-
DeltaCatalogEnabled bool `json:"delta_catalog_enabled"`
190+
DeltaCatalogEnabled bool `gorm:"default:true" json:"delta_catalog_enabled"`
191191
DeltaCatalogPath string `gorm:"size:1024" json:"delta_catalog_path"`
192192
UpdatedAt time.Time `json:"updated_at"`
193193
}
@@ -219,6 +219,16 @@ type QueryLogConfig struct {
219219

220220
func (QueryLogConfig) TableName() string { return "duckgres_query_log_config" }
221221

222+
// SchemaMigration tracks one-shot data migrations that aren't expressible
223+
// through GORM's AutoMigrate (e.g., backfills of new column defaults onto
224+
// existing rows). One row per migration name, inserted exactly once.
225+
type SchemaMigration struct {
226+
Name string `gorm:"primaryKey;size:128" json:"name"`
227+
AppliedAt time.Time `json:"applied_at"`
228+
}
229+
230+
func (SchemaMigration) TableName() string { return "duckgres_schema_migrations" }
231+
222232
// ControlPlaneInstanceState describes the liveness state of a control-plane instance.
223233
type ControlPlaneInstanceState string
224234

controlplane/configstore/store.go

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,10 +79,16 @@ func NewConfigStore(connStr string, pollInterval time.Duration) (*ConfigStore, e
7979
&DuckLakeConfig{},
8080
&RateLimitConfig{},
8181
&QueryLogConfig{},
82+
&SchemaMigration{},
8283
); err != nil {
8384
return nil, fmt.Errorf("auto-migrate config store: %w", err)
8485
}
8586

87+
// One-shot data migrations (idempotent — tracked in duckgres_schema_migrations).
88+
if err := migrateDeltaCatalogDefaultEnabled(db); err != nil {
89+
return nil, fmt.Errorf("migrate delta catalog default: %w", err)
90+
}
91+
8692
runtimeSchema, err := resolveRuntimeSchema(db)
8793
if err != nil {
8894
return nil, fmt.Errorf("resolve runtime schema: %w", err)
@@ -415,6 +421,46 @@ func (cs *ConfigStore) UpdateWarehouseState(orgID string, expectedState ManagedW
415421
return nil
416422
}
417423

424+
// migrateDeltaCatalogDefaultEnabled is a one-shot backfill that flips existing
425+
// rows where delta_catalog_enabled was stored as false (the old default) to
426+
// true. New rows get true automatically via the gorm:"default:true" column
427+
// default. Tracked in duckgres_schema_migrations so it runs exactly once;
428+
// admins can disable per-warehouse via the admin API after the backfill
429+
// without it being re-flipped on subsequent restarts.
430+
const deltaCatalogDefaultMigrationName = "2026_05_delta_catalog_default_enabled"
431+
432+
func migrateDeltaCatalogDefaultEnabled(db *gorm.DB) error {
433+
var existing SchemaMigration
434+
err := db.Where("name = ?", deltaCatalogDefaultMigrationName).First(&existing).Error
435+
if err == nil {
436+
return nil // already applied
437+
}
438+
if !errors.Is(err, gorm.ErrRecordNotFound) {
439+
return fmt.Errorf("check schema migration: %w", err)
440+
}
441+
442+
return db.Transaction(func(tx *gorm.DB) error {
443+
if err := tx.Exec(
444+
`UPDATE duckgres_ducklake_config SET delta_catalog_enabled = TRUE WHERE delta_catalog_enabled IS DISTINCT FROM TRUE`,
445+
).Error; err != nil {
446+
return fmt.Errorf("backfill ducklake config: %w", err)
447+
}
448+
if err := tx.Exec(
449+
`UPDATE duckgres_managed_warehouses SET s3_delta_catalog_enabled = TRUE WHERE s3_delta_catalog_enabled IS DISTINCT FROM TRUE`,
450+
).Error; err != nil {
451+
return fmt.Errorf("backfill managed warehouses: %w", err)
452+
}
453+
if err := tx.Create(&SchemaMigration{
454+
Name: deltaCatalogDefaultMigrationName,
455+
AppliedAt: time.Now().UTC(),
456+
}).Error; err != nil {
457+
return fmt.Errorf("record schema migration: %w", err)
458+
}
459+
slog.Info("Backfilled delta_catalog_enabled=true on existing config rows.")
460+
return nil
461+
})
462+
}
463+
418464
func migrateOrgUserPK(db *gorm.DB) error {
419465
// Check if the PK already has 2 columns (idempotent)
420466
var count int64

main_test.go

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -214,6 +214,37 @@ func TestResolveEffectiveConfigDuckLakeDisableMetadataThreadLocalCacheDefaultsTr
214214
}
215215

216216
func TestResolveEffectiveConfigDuckLakeDeltaCatalog(t *testing.T) {
217+
// Default-on: with no file/env/CLI overrides, Delta is enabled by default.
218+
resolvedDefault := configresolve.ResolveEffective(nil, configresolve.CLIInputs{}, envFromMap(nil), nil)
219+
if !resolvedDefault.Server.DuckLake.DeltaCatalogEnabled {
220+
t.Fatal("expected DeltaCatalogEnabled to default to true")
221+
}
222+
223+
// Default-on with DuckLake configured: path auto-derives to a sibling delta/.
224+
resolvedDuckLake := configresolve.ResolveEffective(&FileConfig{
225+
DuckLake: DuckLakeFileConfig{
226+
ObjectStore: "s3://warehouse/ducklake/",
227+
},
228+
}, configresolve.CLIInputs{}, envFromMap(nil), nil)
229+
if !resolvedDuckLake.Server.DuckLake.DeltaCatalogEnabled {
230+
t.Fatal("expected default DeltaCatalogEnabled with DuckLake configured")
231+
}
232+
if got, want := resolvedDuckLake.Server.DuckLake.DeltaCatalogPath, "s3://warehouse/delta/"; got != want {
233+
t.Fatalf("expected derived Delta catalog path %q, got %q", want, got)
234+
}
235+
236+
// Explicit YAML opt-out wins over the new default.
237+
fileDisabled := false
238+
resolvedOptOut := configresolve.ResolveEffective(&FileConfig{
239+
DuckLake: DuckLakeFileConfig{
240+
ObjectStore: "s3://warehouse/ducklake/",
241+
DeltaCatalogEnabled: &fileDisabled,
242+
},
243+
}, configresolve.CLIInputs{}, envFromMap(nil), nil)
244+
if resolvedOptOut.Server.DuckLake.DeltaCatalogEnabled {
245+
t.Fatal("expected YAML ducklake.delta_catalog_enabled=false to disable Delta catalog")
246+
}
247+
217248
fileEnabled := true
218249
resolved := configresolve.ResolveEffective(&FileConfig{
219250
DuckLake: DuckLakeFileConfig{

server/server.go

Lines changed: 42 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1486,13 +1486,17 @@ func AttachDuckLake(db *sql.DB, dlCfg DuckLakeConfig, sem chan struct{}, dataDir
14861486
// AttachDeltaCatalog attaches the configured Delta Lake catalog/table alongside
14871487
// DuckLake. It reuses the DuckLake S3 secret settings so Delta scans can access
14881488
// the same object store credentials.
1489+
//
1490+
// Delta is enabled by default. When no path is derivable (e.g. a plain
1491+
// standalone DuckDB instance with no DuckLake object_store/data_path), this is
1492+
// a benign no-op: there's no Delta sibling to attach.
14891493
func AttachDeltaCatalog(db *sql.DB, dlCfg DuckLakeConfig, sem chan struct{}) error {
14901494
if !dlCfg.DeltaCatalogEnabled {
14911495
return nil
14921496
}
14931497
catalogPath := ducklake.DeltaCatalogPath(dlCfg)
14941498
if catalogPath == "" {
1495-
return fmt.Errorf("delta catalog path is empty")
1499+
return nil
14961500
}
14971501

14981502
select {
@@ -1521,12 +1525,49 @@ func AttachDeltaCatalog(db *sql.DB, dlCfg DuckLakeConfig, sem chan struct{}) err
15211525
attachStmt := ducklake.BuildDeltaAttachStmt(dlCfg)
15221526
slog.Info("Attaching Delta catalog.", "path", catalogPath)
15231527
if _, err := db.Exec(attachStmt); err != nil {
1528+
// Delta is enabled by default. A fresh DuckLake tenant won't have any
1529+
// Delta data at the sibling delta/ prefix yet, so DeltaKernel returns
1530+
// "No files in log segment". That's expected — the catalog will start
1531+
// resolving once Delta data lands at the path. Treat as a benign skip
1532+
// instead of failing connection setup.
1533+
if isDeltaCatalogEmptyError(err) {
1534+
slog.Info("Skipping Delta catalog attach: no Delta data at path yet.", "path", catalogPath)
1535+
return nil
1536+
}
15241537
return fmt.Errorf("failed to attach Delta catalog: %w", err)
15251538
}
1539+
// ATTACH '...' (TYPE delta) is lazy — it doesn't read the transaction log
1540+
// until something forces resolution. With Delta attached but the path
1541+
// empty, every unqualified-table query the user runs against DuckLake will
1542+
// fail at prepare time when the planner walks all attached catalogs and
1543+
// Delta tries to read a missing _delta_log/. Probe immediately and detach
1544+
// if there's no Delta data here yet, so the catalog only sticks around
1545+
// once it's actually queryable.
1546+
if _, err := db.Exec("SHOW TABLES FROM delta"); err != nil {
1547+
if isDeltaCatalogEmptyError(err) {
1548+
if _, derr := db.Exec("DETACH delta"); derr != nil {
1549+
slog.Warn("Failed to detach empty Delta catalog after attach probe.", "error", derr)
1550+
}
1551+
slog.Info("Detached Delta catalog: no Delta data at path yet.", "path", catalogPath)
1552+
return nil
1553+
}
1554+
return fmt.Errorf("failed to probe Delta catalog: %w", err)
1555+
}
15261556
slog.Info("Attached Delta catalog successfully.", "path", catalogPath)
15271557
return nil
15281558
}
15291559

1560+
// isDeltaCatalogEmptyError reports whether err indicates the Delta location
1561+
// exists but contains no Delta transaction log yet (the common case for a
1562+
// fresh DuckLake tenant under the always-on Delta default). The DuckDB Delta
1563+
// extension surfaces this as "No files in log segment" via DeltaKernel.
1564+
func isDeltaCatalogEmptyError(err error) bool {
1565+
if err == nil {
1566+
return false
1567+
}
1568+
return strings.Contains(err.Error(), "No files in log segment")
1569+
}
1570+
15301571
func deltaCatalogNeedsS3Secret(catalogPath string, dlCfg DuckLakeConfig) bool {
15311572
if !strings.Contains(catalogPath, "://") {
15321573
return false

0 commit comments

Comments
 (0)