Skip to content

Commit eeb18fb

Browse files
authored
feat(duckdb): add file systems configuration option (#4778)
1 parent c6d7309 commit eeb18fb

File tree

6 files changed

+66
-5
lines changed

6 files changed

+66
-5
lines changed

docs/integrations/engines/duckdb.md

Lines changed: 34 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
| `extensions` | Extension to load into duckdb. Only autoloadable extensions are supported. | list | N |
1919
| `connector_config` | Configuration to pass into the duckdb connector. | dict | N |
2020
| `secrets` | Configuration for authenticating external sources (e.g., S3) using DuckDB secrets. | dict | N |
21+
| `filesystems` | Configuration for registering `fsspec` filesystems to the DuckDB connection. | dict | N |
2122

2223
#### DuckDB Catalogs Example
2324

@@ -256,4 +257,36 @@ After configuring the secrets, you can directly reference S3 paths in your catal
256257

257258
Refer to the official DuckDB documentation for the full list of [supported S3 secret parameters](https://duckdb.org/docs/stable/extensions/httpfs/s3api.html#overview-of-s3-secret-parameters) and for more information on the [Secrets Manager configuration](https://duckdb.org/docs/configuration/secrets_manager.html).
258259

259-
> Note: Loading credentials at runtime using `load_aws_credentials()` or similar deprecated functions may fail when using SQLMesh.
260+
> Note: Loading credentials at runtime using `load_aws_credentials()` or similar deprecated functions may fail when using SQLMesh.
261+
262+
##### File system configuration example for Microsoft Onelake
263+
264+
The `filesystems` accepts a list of file systems to register in the DuckDB connection. This is especially useful for Azure Storage Accounts, as it adds write support for DuckDB which is not natively supported by DuckDB (yet).
265+
266+
267+
=== "YAML"
268+
269+
```yaml linenums="1"
270+
gateways:
271+
ducklake:
272+
connection:
273+
type: duckdb
274+
catalogs:
275+
ducklake:
276+
type: ducklake
277+
path: myducklakecatalog.duckdb
278+
data_path: abfs://MyFabricWorkspace/MyFabricLakehouse.Lakehouse/Files/DuckLake.Files
279+
extensions:
280+
- ducklake
281+
filesystems:
282+
- fs: abfs
283+
account_name: onelake
284+
account_host: onelake.blob.fabric.microsoft.com
285+
client_id: {{ env_var('AZURE_CLIENT_ID') }}
286+
client_secret: {{ env_var('AZURE_CLIENT_SECRET') }}
287+
tenant_id: {{ env_var('AZURE_TENANT_ID') }}
288+
# anon: False # To use azure.identity.DefaultAzureCredential authentication
289+
```
290+
291+
292+
Refer to the documentation for `fsspec` [fsspec.filesystem](https://filesystem-spec.readthedocs.io/en/latest/api.html#fsspec.filesystem) and `adlfs` [adlfs.AzureBlobFileSystem](https://fsspec.github.io/adlfs/api/#api-reference) for a full list of storage options.

sqlmesh/core/config/connection.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -266,6 +266,7 @@ class BaseDuckDBConnectionConfig(ConnectionConfig):
266266
extensions: A list of autoloadable extensions to load.
267267
connector_config: A dictionary of configuration to pass into the duckdb connector.
268268
secrets: A list of dictionaries used to generate DuckDB secrets for authenticating with external services (e.g. S3).
269+
filesystems: A list of dictionaries used to register `fsspec` filesystems to the DuckDB cursor.
269270
concurrent_tasks: The maximum number of tasks that can use this connection concurrently.
270271
register_comments: Whether or not to register model comments with the SQL engine.
271272
pre_ping: Whether or not to pre-ping the connection before starting a new transaction to ensure it is still alive.
@@ -277,6 +278,7 @@ class BaseDuckDBConnectionConfig(ConnectionConfig):
277278
extensions: t.List[t.Union[str, t.Dict[str, t.Any]]] = []
278279
connector_config: t.Dict[str, t.Any] = {}
279280
secrets: t.List[t.Dict[str, t.Any]] = []
281+
filesystems: t.List[t.Dict[str, t.Any]] = []
280282

281283
concurrent_tasks: int = 1
282284
register_comments: bool = True
@@ -371,6 +373,15 @@ def init(cursor: duckdb.DuckDBPyConnection) -> None:
371373
except Exception as e:
372374
raise ConfigError(f"Failed to create secret: {e}")
373375

376+
if self.filesystems:
377+
from fsspec import filesystem # type: ignore
378+
379+
for file_system in self.filesystems:
380+
options = file_system.copy()
381+
fs = options.pop("fs")
382+
fs = filesystem(fs, **options)
383+
cursor.register_filesystem(fs)
384+
374385
for i, (alias, path_options) in enumerate(
375386
(getattr(self, "catalogs", None) or {}).items()
376387
):

sqlmesh/dbt/target.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -138,6 +138,7 @@ class DuckDbConfig(TargetConfig):
138138
extensions: A list of autoloadable extensions to load.
139139
settings: A dictionary of settings to pass into the duckdb connector.
140140
secrets: A list of secrets to pass to the secret manager in the duckdb connector.
141+
filesystems: A list of `fsspec` filesystems to register in the duckdb connection.
141142
"""
142143

143144
type: t.Literal["duckdb"] = "duckdb"
@@ -147,6 +148,7 @@ class DuckDbConfig(TargetConfig):
147148
extensions: t.Optional[t.List[str]] = None
148149
settings: t.Optional[t.Dict[str, t.Any]] = None
149150
secrets: t.Optional[t.List[t.Dict[str, t.Any]]] = None
151+
filesystems: t.Optional[t.List[t.Dict[str, t.Any]]] = None
150152

151153
@model_validator(mode="before")
152154
def validate_authentication(cls, data: t.Any) -> t.Any:
@@ -182,6 +184,8 @@ def to_sqlmesh(self, **kwargs: t.Any) -> ConnectionConfig:
182184
kwargs["connector_config"] = self.settings
183185
if self.secrets is not None:
184186
kwargs["secrets"] = self.secrets
187+
if self.filesystems is not None:
188+
kwargs["filesystems"] = self.filesystems
185189
return DuckDBConnectionConfig(
186190
database=self.path,
187191
concurrent_tasks=1,

tests/core/test_config.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -562,6 +562,7 @@ def test_connection_config_serialization():
562562
"pretty_sql": False,
563563
"connector_config": {},
564564
"secrets": [],
565+
"filesystems": [],
565566
"database": "my_db",
566567
}
567568
assert serialized["default_test_connection"] == {
@@ -573,6 +574,7 @@ def test_connection_config_serialization():
573574
"pretty_sql": False,
574575
"connector_config": {},
575576
"secrets": [],
577+
"filesystems": [],
576578
"database": "my_test_db",
577579
}
578580

tests/core/test_connection_config.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -437,9 +437,20 @@ def test_duckdb(make_config):
437437
"secret": "aws_secret",
438438
}
439439
],
440+
filesystems=[
441+
{
442+
"protocol": "abfs",
443+
"storage_options": {
444+
"account_name": "onelake",
445+
"account_host": "onelake.blob.fabric.microsoft.com",
446+
"anon": False,
447+
},
448+
}
449+
],
440450
)
441451
assert config.connector_config
442452
assert config.secrets
453+
assert config.filesystems
443454
assert isinstance(config, DuckDBConnectionConfig)
444455
assert not config.is_recommended_for_state_sync
445456

tests/integrations/jupyter/test_magics.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -745,16 +745,16 @@ def test_info(notebook, sushi_context, convert_all_html_output_to_text, get_all_
745745
"Models: 20",
746746
"Macros: 8",
747747
"",
748-
"Connection:\n type: duckdb\n concurrent_tasks: 1\n register_comments: true\n pre_ping: false\n pretty_sql: false\n extensions: []\n connector_config: {}\n secrets: None",
749-
"Test Connection:\n type: duckdb\n concurrent_tasks: 1\n register_comments: true\n pre_ping: false\n pretty_sql: false\n extensions: []\n connector_config: {}\n secrets: None",
748+
"Connection:\n type: duckdb\n concurrent_tasks: 1\n register_comments: true\n pre_ping: false\n pretty_sql: false\n extensions: []\n connector_config: {}\n secrets: None\n filesystems: []",
749+
"Test Connection:\n type: duckdb\n concurrent_tasks: 1\n register_comments: true\n pre_ping: false\n pretty_sql: false\n extensions: []\n connector_config: {}\n secrets: None\n filesystems: []",
750750
"Data warehouse connection succeeded",
751751
]
752752
assert get_all_html_output(output) == [
753753
"<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">Models: <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">20</span></pre>",
754754
"<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">Macros: <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">8</span></pre>",
755755
"<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"></pre>",
756-
'<pre style="white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,\'DejaVu Sans Mono\',consolas,\'Courier New\',monospace">Connection: type: duckdb concurrent_tasks: <span style="color: #008080; text-decoration-color: #008080; font-weight: bold">1</span> register_comments: true pre_ping: false pretty_sql: false extensions: <span style="font-weight: bold">[]</span> connector_config: <span style="font-weight: bold">{}</span> secrets: <span style="color: #800080; text-decoration-color: #800080; font-style: italic">None</span></pre>',
757-
'<pre style="white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,\'DejaVu Sans Mono\',consolas,\'Courier New\',monospace">Test Connection: type: duckdb concurrent_tasks: <span style="color: #008080; text-decoration-color: #008080; font-weight: bold">1</span> register_comments: true pre_ping: false pretty_sql: false extensions: <span style="font-weight: bold">[]</span> connector_config: <span style="font-weight: bold">{}</span> secrets: <span style="color: #800080; text-decoration-color: #800080; font-style: italic">None</span></pre>',
756+
'<pre style="white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,\'DejaVu Sans Mono\',consolas,\'Courier New\',monospace">Connection: type: duckdb concurrent_tasks: <span style="color: #008080; text-decoration-color: #008080; font-weight: bold">1</span> register_comments: true pre_ping: false pretty_sql: false extensions: <span style="font-weight: bold">[]</span> connector_config: <span style="font-weight: bold">{}</span> secrets: <span style="color: #800080; text-decoration-color: #800080; font-style: italic">None</span> filesystems: <span style="font-weight: bold">[]</span></pre>',
757+
'<pre style="white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,\'DejaVu Sans Mono\',consolas,\'Courier New\',monospace">Test Connection: type: duckdb concurrent_tasks: <span style="color: #008080; text-decoration-color: #008080; font-weight: bold">1</span> register_comments: true pre_ping: false pretty_sql: false extensions: <span style="font-weight: bold">[]</span> connector_config: <span style="font-weight: bold">{}</span> secrets: <span style="color: #800080; text-decoration-color: #800080; font-style: italic">None</span> filesystems: <span style="font-weight: bold">[]</span></pre>',
758758
"<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">Data warehouse connection <span style=\"color: #008000; text-decoration-color: #008000\">succeeded</span></pre>",
759759
]
760760

0 commit comments

Comments
 (0)