Hi thank you for working on this, its a well-needed feature. Just wanted to point out that a very good use case for this extension is a Google Cloud hosted DuckLake, but that integration is currently not working. My repro:
Repro of Ducklake/GCS Extension Issue
# /// script
# requires-python = ">=3.12"
# dependencies = [
# "duckdb>=1.4.3",
# "polars",
# "gcsfs",
# "pyarrow"
# ]
# ///
"""
Minimal reproducible example for ducklake + GCS extension failing to work together.
This demonstrates that:
1. Writing to ducklake on GCS via fsspec works
2. Reading the same data via GCS extension with credential_chain fails
Setup:
1. Set GCS_BUCKET and GCS_METADATA_PATH below
2. Authenticate: gcloud auth application-default login
3. Run: uv run --script test_ducklake_gcs_integration.py
"""
import duckdb
import polars as pl
# ==================== CONFIGURATION ====================
GCS_BUCKET = "gs://your-bucket-name/ducklake-test" # CHANGE ME
GCS_METADATA_PATH = "/tmp/ducklake_metadata.db"
# =======================================================
def setup_ducklake_with_fsspec():
"""
Step 1: Create ducklake table on GCS using fsspec backend (this works).
"""
print("=" * 70)
print("STEP 1: Creating ducklake table on GCS using fsspec backend")
print("=" * 70)
# Use fsspec backend
conn = duckdb.connect()
from fsspec import filesystem
fs = filesystem("gcs")
conn.register_filesystem(fs)
# Load extensions
conn.execute("INSTALL ducklake;")
conn.execute("LOAD ducklake;")
print("✓ Loaded ducklake extension")
# Attach ducklake with GCS data path
print(f"\nAttaching ducklake with metadata: {GCS_METADATA_PATH}")
print(f" data path: {GCS_BUCKET}")
conn.execute(f"""
ATTACH 'ducklake:{GCS_METADATA_PATH}' AS dl_catalog (
DATA_PATH '{GCS_BUCKET}'
);
""")
conn.execute("USE dl_catalog.main;")
print("✓ Attached ducklake")
# Create test table
print("\nCreating test table with sample data...")
df = pl.DataFrame(
{
"id": [1, 2, 3, 4, 5],
"name": ["alice", "bob", "charlie", "david", "eve"],
"value": [10.5, 20.3, 30.1, 40.8, 50.2],
}
)
# Drop if exists to allow multiple runs
conn.execute("DROP TABLE IF EXISTS test_table;")
conn.execute("CREATE TABLE test_table (id INTEGER, name VARCHAR, value DOUBLE);")
conn.execute("INSERT INTO test_table SELECT * FROM df")
result = conn.execute("SELECT COUNT(*) FROM test_table").fetchone()
print(f"✓ Created table with {result[0]} rows")
# Verify files were written to GCS
files = conn.execute(
"SELECT COUNT(*) FROM ducklake_list_files('dl_catalog', 'test_table')"
).fetchone()
print(f"✓ Written {files[0]} file(s) to GCS\n")
conn.close()
return df
def read_ducklake_with_gcs_extension():
"""
Step 2: Try to read the same ducklake table using GCS extension (expected to fail).
"""
print("=" * 70)
print("STEP 2: Reading ducklake table using GCS extension with credential_chain")
print("=" * 70)
conn = duckdb.connect()
# Install GCS extension instead of using fsspec
print("Installing GCS community extension...")
conn.execute("INSTALL gcs FROM community;")
conn.execute("LOAD gcs;")
print("✓ GCS extension loaded")
# Create secret with credential_chain
print("\nCreating GCS secret with credential_chain...")
conn.execute("""
CREATE SECRET gcs_secret (
TYPE GCP,
PROVIDER credential_chain
);""")
print("✓ Secret created")
# Load ducklake
conn.execute("INSTALL ducklake;")
conn.execute("LOAD ducklake;")
print("✓ Loaded ducklake extension")
# Try to attach and read
print(f" data path: {GCS_BUCKET}")
try:
conn.execute(f"""
ATTACH 'ducklake:{GCS_METADATA_PATH}' AS dl_catalog (
DATA_PATH '{GCS_BUCKET}'
);
""")
conn.execute("USE dl_catalog.main;")
print("✓ Attached ducklake")
print("\nAttempting: SELECT * FROM test_table")
result = conn.execute("SELECT * FROM test_table").fetchall()
print("✓ SUCCESS - Read {len(result)} rows!")
print("First row:", result[0])
return True
except Exception as e:
print("✗ FAILED")
print(f"Error type: {type(e).__name__}")
print(f"Error message: {e}")
print("\nThis demonstrates that credential_chain does not work")
print("for reading ducklake tables from GCS.")
return False
finally:
conn.close()
def verify_direct_parquet_read():
"""
Step 3: Verify that direct parquet reads work (sanity check).
"""
print("\n" + "=" * 70)
print("STEP 3: Sanity check - direct parquet read with GCS extension")
print("=" * 70)
conn = duckdb.connect()
conn.execute("INSTALL gcs FROM community;")
conn.execute("LOAD gcs;")
secret_sql = """
CREATE SECRET gcs_secret (
TYPE GCP,
PROVIDER credential_chain
);
"""
conn.execute(secret_sql)
print(f"\nAttempting to list files in: {GCS_BUCKET}/*.parquet")
try:
# Try to read any parquet file from the bucket
result = conn.execute(
f"SELECT COUNT(*) FROM parquet_scan('{GCS_BUCKET}/**/*.parquet')"
).fetchone()
print(f"✓ Can read parquet files directly: found {result[0]} rows")
return True
except Exception as e:
print(f"✗ Direct parquet read also failed: {e}")
print("This might indicate a broader credential issue.")
return False
finally:
conn.close()
def main():
if GCS_BUCKET == "gs://your-bucket-name/ducklake-test":
print("\n⚠️ ERROR: Please configure GCS_BUCKET first!")
print("Edit the GCS_BUCKET variable at the top of this file.\n")
return
print("\nDucklake + GCS Extension Credential Test\n")
setup_ducklake_with_fsspec()
gcs_ext_success = read_ducklake_with_gcs_extension()
direct_read_success = verify_direct_parquet_read()
# Summary
print("\n" + "=" * 70)
print("SUMMARY")
print("=" * 70)
print("✓ fsspec write to ducklake: SUCCESS")
print(
f"{'✓' if gcs_ext_success else '✗'} GCS extension read ducklake: {'SUCCESS' if gcs_ext_success else 'FAILED'}"
)
print(
f"{'✓' if direct_read_success else '✗'} GCS extension read parquet: {'SUCCESS' if direct_read_success else 'FAILED'}"
)
if not gcs_ext_success and direct_read_success:
print("\n→ Issue confirmed: credential_chain works for direct reads")
print(" but not for ducklake table reads.")
elif not gcs_ext_success and not direct_read_success:
print("\n→ Both failed: might be a credential configuration issue.")
# Cleanup suggestion
print(f"\nCleanup: You may want to delete {GCS_METADATA_PATH} and {GCS_BUCKET}")
if __name__ == "__main__":
main()
For me the repro gives
Result
❯ uv run --script test_ducklake_gcs_issue.py
Ducklake + GCS Extension Credential Test
======================================================================
STEP 1: Creating ducklake table on GCS using fsspec backend
======================================================================
✓ Loaded ducklake extension
Attaching ducklake with metadata: /tmp/ducklake_metadata.db
data path: gs://silurian-temporary/ducklake-test
✓ Attached ducklake
Creating test table with sample data...
✓ Created table with 5 rows
✓ Written 1 file(s) to GCS
======================================================================
STEP 2: Reading ducklake table using GCS extension with credential_chain
======================================================================
Installing GCS community extension...
✓ GCS extension loaded
Creating GCS secret with credential_chain...
✓ Secret created
✓ Loaded ducklake extension
data path: gs://silurian-temporary/ducklake-test
✓ Attached ducklake
Attempting: SELECT * FROM test_table
✗ FAILED
Error type: HTTPException
Error message: HTTP Error: HTTP GET error on 'https://storage.googleapis.com/silurian-temporary/ducklake-test/main/test_table/ducklake-019b3028-38b0-7bb9-9e36-4b708ae8cd36.parquet' (HTTP 403)
This demonstrates that credential_chain does not work
for reading ducklake tables from GCS.
======================================================================
STEP 3: Sanity check - direct parquet read with GCS extension
======================================================================
Attempting to list files in: gs://silurian-temporary/ducklake-test/*.parquet
✓ Can read parquet files directly: found 20 rows
======================================================================
SUMMARY
======================================================================
✓ fsspec write to ducklake: SUCCESS
✗ GCS extension read ducklake: FAILED
✓ GCS extension read parquet: SUCCESS
→ Issue confirmed: credential_chain works for direct reads
but not for ducklake table reads.
Cleanup: You may want to delete /tmp/ducklake_metadata.db and gs://silurian-temporary/ducklake-test
Hi thank you for working on this, its a well-needed feature. Just wanted to point out that a very good use case for this extension is a Google Cloud hosted DuckLake, but that integration is currently not working. My repro:
Repro of Ducklake/GCS Extension Issue
For me the repro gives
Result
❯ uv run --script test_ducklake_gcs_issue.py Ducklake + GCS Extension Credential Test ====================================================================== STEP 1: Creating ducklake table on GCS using fsspec backend ====================================================================== ✓ Loaded ducklake extension Attaching ducklake with metadata: /tmp/ducklake_metadata.db data path: gs://silurian-temporary/ducklake-test ✓ Attached ducklake Creating test table with sample data... ✓ Created table with 5 rows ✓ Written 1 file(s) to GCS ====================================================================== STEP 2: Reading ducklake table using GCS extension with credential_chain ====================================================================== Installing GCS community extension... ✓ GCS extension loaded Creating GCS secret with credential_chain... ✓ Secret created ✓ Loaded ducklake extension data path: gs://silurian-temporary/ducklake-test ✓ Attached ducklake Attempting: SELECT * FROM test_table ✗ FAILED Error type: HTTPException Error message: HTTP Error: HTTP GET error on 'https://storage.googleapis.com/silurian-temporary/ducklake-test/main/test_table/ducklake-019b3028-38b0-7bb9-9e36-4b708ae8cd36.parquet' (HTTP 403) This demonstrates that credential_chain does not work for reading ducklake tables from GCS. ====================================================================== STEP 3: Sanity check - direct parquet read with GCS extension ====================================================================== Attempting to list files in: gs://silurian-temporary/ducklake-test/*.parquet ✓ Can read parquet files directly: found 20 rows ====================================================================== SUMMARY ====================================================================== ✓ fsspec write to ducklake: SUCCESS ✗ GCS extension read ducklake: FAILED ✓ GCS extension read parquet: SUCCESS → Issue confirmed: credential_chain works for direct reads but not for ducklake table reads. Cleanup: You may want to delete /tmp/ducklake_metadata.db and gs://silurian-temporary/ducklake-test