Skip to content

Commit 2acc015

Browse files
bill-phclaude
andauthored
feat: bundle PostHog ducklake fork (v1.0-posthog.1) (#556)
* feat: bundle PostHog ducklake fork (v1.0-posthog.1) Switches the ducklake extension shipped in the duckgres and duckgres-worker images from the upstream extensions.duckdb.org build to PostHog's fork at https://github.com/PostHog/ducklake/releases. Mirrors the existing httpfs override pattern. Also adds ducklake.duckdb_extension to shouldRefreshBundledExtension so the cached copy on a long-lived data-dir PVC is replaced when the image ships a newer fork build — same rationale as postgres_scanner. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com> * test: drop redundant ducklake refresh test Already covered by TestSeedBundledExtensionsReplacesExistingFilesWithUpdatedContents exercising the same code path via postgres_scanner. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com> * test(k8s): assert worker loads PostHog ducklake fork After bundling the v1.0-posthog.1 fork in Dockerfile.worker, add a kind- cluster integration test that queries duckdb_extensions().extension_version on a worker and asserts it matches the fork's commit short SHA (90dc1f24) rather than the upstream build's. The expected value lives next to the test with a comment pointing at DUCKLAKE_EXTENSION_TAG in the Dockerfiles so the two move together. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com> --------- Co-authored-by: Claude Opus 4.7 <noreply@anthropic.com>
1 parent 833f140 commit 2acc015

4 files changed

Lines changed: 51 additions & 9 deletions

File tree

Dockerfile

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -12,16 +12,17 @@ ARG BUILD_TAGS=""
1212
ARG TARGETARCH
1313
ARG DUCKDB_EXTENSION_VERSION=1.5.2
1414
ARG HTTPFS_EXTENSION_TAG=v1.5.2-stoi-fix
15+
ARG DUCKLAKE_EXTENSION_TAG=v1.0-posthog.1
1516
ARG DUCKDB_EXTENSION_REPOSITORY=https://extensions.duckdb.org
1617
ARG DUCKDB_NIGHTLY_EXTENSION_REPOSITORY=http://nightly-extensions.duckdb.org
1718
RUN CGO_ENABLED=1 go build -tags "${BUILD_TAGS}" -ldflags "-X main.version=${VERSION} -X main.commit=${COMMIT} -X main.date=$(date -u +%Y-%m-%dT%H:%M:%SZ)" -o duckgres .
1819
RUN mkdir -p "/build/duckdb-extensions/v${DUCKDB_EXTENSION_VERSION}/linux_${TARGETARCH}" \
1920
&& curl -fsSL "https://github.com/benben/duckdb-httpfs/releases/download/${HTTPFS_EXTENSION_TAG}/httpfs-linux-${TARGETARCH}.duckdb_extension" \
2021
-o "/build/duckdb-extensions/v${DUCKDB_EXTENSION_VERSION}/linux_${TARGETARCH}/httpfs.duckdb_extension" \
21-
&& for ext in ducklake json; do \
22-
curl -fsSL "${DUCKDB_EXTENSION_REPOSITORY}/v${DUCKDB_EXTENSION_VERSION}/linux_${TARGETARCH}/${ext}.duckdb_extension.gz" \
23-
| gunzip > "/build/duckdb-extensions/v${DUCKDB_EXTENSION_VERSION}/linux_${TARGETARCH}/${ext}.duckdb_extension"; \
24-
done \
22+
&& curl -fsSL "https://github.com/PostHog/ducklake/releases/download/${DUCKLAKE_EXTENSION_TAG}/ducklake-linux-${TARGETARCH}.duckdb_extension" \
23+
-o "/build/duckdb-extensions/v${DUCKDB_EXTENSION_VERSION}/linux_${TARGETARCH}/ducklake.duckdb_extension" \
24+
&& curl -fsSL "${DUCKDB_EXTENSION_REPOSITORY}/v${DUCKDB_EXTENSION_VERSION}/linux_${TARGETARCH}/json.duckdb_extension.gz" \
25+
| gunzip > "/build/duckdb-extensions/v${DUCKDB_EXTENSION_VERSION}/linux_${TARGETARCH}/json.duckdb_extension" \
2526
&& curl -fsSL "${DUCKDB_NIGHTLY_EXTENSION_REPOSITORY}/v${DUCKDB_EXTENSION_VERSION}/linux_${TARGETARCH}/postgres_scanner.duckdb_extension.gz" \
2627
| gunzip > "/build/duckdb-extensions/v${DUCKDB_EXTENSION_VERSION}/linux_${TARGETARCH}/postgres_scanner.duckdb_extension"
2728

Dockerfile.worker

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@ RUN go mod download
5050

5151
ARG DUCKDB_EXTENSION_VERSION=1.5.2
5252
ARG HTTPFS_EXTENSION_TAG=v1.5.2-stoi-fix
53+
ARG DUCKLAKE_EXTENSION_TAG=v1.0-posthog.1
5354
ARG DUCKDB_EXTENSION_REPOSITORY=https://extensions.duckdb.org
5455
ARG DUCKDB_NIGHTLY_EXTENSION_REPOSITORY=http://nightly-extensions.duckdb.org
5556

@@ -78,10 +79,10 @@ RUN if [ -n "$DUCKDB_BINDINGS_VERSION" ]; then \
7879
RUN mkdir -p "/build/duckdb-extensions/v${DUCKDB_EXTENSION_VERSION}/linux_${TARGETARCH}" \
7980
&& curl -fsSL "https://github.com/benben/duckdb-httpfs/releases/download/${HTTPFS_EXTENSION_TAG}/httpfs-linux-${TARGETARCH}.duckdb_extension" \
8081
-o "/build/duckdb-extensions/v${DUCKDB_EXTENSION_VERSION}/linux_${TARGETARCH}/httpfs.duckdb_extension" \
81-
&& for ext in ducklake json; do \
82-
curl -fsSL "${DUCKDB_EXTENSION_REPOSITORY}/v${DUCKDB_EXTENSION_VERSION}/linux_${TARGETARCH}/${ext}.duckdb_extension.gz" \
83-
| gunzip > "/build/duckdb-extensions/v${DUCKDB_EXTENSION_VERSION}/linux_${TARGETARCH}/${ext}.duckdb_extension"; \
84-
done \
82+
&& curl -fsSL "https://github.com/PostHog/ducklake/releases/download/${DUCKLAKE_EXTENSION_TAG}/ducklake-linux-${TARGETARCH}.duckdb_extension" \
83+
-o "/build/duckdb-extensions/v${DUCKDB_EXTENSION_VERSION}/linux_${TARGETARCH}/ducklake.duckdb_extension" \
84+
&& curl -fsSL "${DUCKDB_EXTENSION_REPOSITORY}/v${DUCKDB_EXTENSION_VERSION}/linux_${TARGETARCH}/json.duckdb_extension.gz" \
85+
| gunzip > "/build/duckdb-extensions/v${DUCKDB_EXTENSION_VERSION}/linux_${TARGETARCH}/json.duckdb_extension" \
8586
&& curl -fsSL "${DUCKDB_NIGHTLY_EXTENSION_REPOSITORY}/v${DUCKDB_EXTENSION_VERSION}/linux_${TARGETARCH}/postgres_scanner.duckdb_extension.gz" \
8687
| gunzip > "/build/duckdb-extensions/v${DUCKDB_EXTENSION_VERSION}/linux_${TARGETARCH}/postgres_scanner.duckdb_extension"
8788

server/server.go

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -983,7 +983,11 @@ func seedBundledExtensions(srcRoot, dstRoot string) error {
983983
}
984984

985985
func shouldRefreshBundledExtension(srcPath string) bool {
986-
return filepath.Base(srcPath) == "postgres_scanner.duckdb_extension"
986+
switch filepath.Base(srcPath) {
987+
case "postgres_scanner.duckdb_extension", "ducklake.duckdb_extension":
988+
return true
989+
}
990+
return false
987991
}
988992

989993
func copyFile(srcPath, dstPath string, mode os.FileMode) error {

tests/k8s/k8s_test.go

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -188,6 +188,36 @@ func TestK8sSharedWarmWorkerActivation(t *testing.T) {
188188
}
189189
}
190190

191+
// expectedDucklakeExtensionVersion is the short SHA of the commit
192+
// PostHog/ducklake's v1.0-posthog.1 tag points at. DuckDB's
193+
// EXT_VERSION_DUCKLAKE macro embeds this string at build time and exposes
194+
// it via duckdb_extensions().extension_version. Bump this in lock-step
195+
// with DUCKLAKE_EXTENSION_TAG in Dockerfile / Dockerfile.worker.
196+
const expectedDucklakeExtensionVersion = "90dc1f24"
197+
198+
// TestK8sDucklakeExtensionIsBundledFork asserts the worker pods load the
199+
// PostHog ducklake fork bundled by Dockerfile.worker, not the upstream
200+
// build that DuckDB would otherwise fetch from extensions.duckdb.org.
201+
// The version string is the short SHA of the fork's tagged commit.
202+
func TestK8sDucklakeExtensionIsBundledFork(t *testing.T) {
203+
if err := retryQueryWithReconnect("SELECT 1", 30*time.Second); err != nil {
204+
t.Fatalf("warm-up query failed: %v", err)
205+
}
206+
207+
var version string
208+
if err := retryScanStringWithReconnect(
209+
"SELECT extension_version FROM duckdb_extensions() WHERE extension_name = 'ducklake' AND loaded",
210+
60*time.Second, &version,
211+
); err != nil {
212+
t.Fatalf("query ducklake extension_version: %v", err)
213+
}
214+
if version != expectedDucklakeExtensionVersion {
215+
t.Fatalf("ducklake extension_version = %q, want %q (PostHog fork v1.0-posthog.1). "+
216+
"If the bundled fork was upgraded, update expectedDucklakeExtensionVersion alongside DUCKLAKE_EXTENSION_TAG.",
217+
version, expectedDucklakeExtensionVersion)
218+
}
219+
}
220+
191221
func TestK8sWorkerCrashRecovery(t *testing.T) {
192222
// Run a query to ensure a worker exists
193223
if err := retryQueryWithReconnect("SELECT 1", 30*time.Second); err != nil {
@@ -739,6 +769,12 @@ func retryScanIntWithReconnect(query string, timeout time.Duration, dest *int) e
739769
})
740770
}
741771

772+
func retryScanStringWithReconnect(query string, timeout time.Duration, dest *string) error {
773+
return retryDBOperationWithReconnectAs("postgres", "postgres", timeout, fmt.Sprintf("query %q", query), func(ctx context.Context, db *sql.DB) error {
774+
return db.QueryRowContext(ctx, query).Scan(dest)
775+
})
776+
}
777+
742778
func retryQueryWithReconnectAs(username, password, query string, timeout time.Duration) error {
743779
return retryDBOperationWithReconnectAs(username, password, timeout, fmt.Sprintf("query %q", query), func(ctx context.Context, db *sql.DB) error {
744780
var result int

0 commit comments

Comments
 (0)