diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index ddcb32f5..ff8e1beb 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -43,7 +43,7 @@ jobs: steps: - name: Check out the repository - uses: actions/checkout@v4 + uses: actions/checkout@v5 with: persist-credentials: false @@ -81,7 +81,7 @@ jobs: steps: - name: Check out the repository - uses: actions/checkout@v4 + uses: actions/checkout@v5 with: persist-credentials: false @@ -129,7 +129,7 @@ jobs: steps: - name: Check out the repository - uses: actions/checkout@v4 + uses: actions/checkout@v5 with: persist-credentials: false @@ -177,7 +177,7 @@ jobs: steps: - name: Check out the repository - uses: actions/checkout@v4 + uses: actions/checkout@v5 with: persist-credentials: false @@ -240,7 +240,7 @@ jobs: steps: - name: Check out the repository - uses: actions/checkout@v4 + uses: actions/checkout@v5 with: persist-credentials: false @@ -285,7 +285,7 @@ jobs: steps: - name: Check out the repository - uses: actions/checkout@v4 + uses: actions/checkout@v5 with: persist-credentials: false @@ -330,7 +330,7 @@ jobs: steps: - name: Check out the repository - uses: actions/checkout@v4 + uses: actions/checkout@v5 with: persist-credentials: false @@ -376,7 +376,7 @@ jobs: steps: - name: Check out the repository - uses: actions/checkout@v4 + uses: actions/checkout@v5 with: persist-credentials: false @@ -412,7 +412,7 @@ jobs: steps: - name: Check out the repository - uses: actions/checkout@v4 + uses: actions/checkout@v5 with: persist-credentials: false @@ -466,7 +466,7 @@ jobs: run: | pip install --upgrade wheel pip --version - - uses: actions/download-artifact@v4 + - uses: actions/download-artifact@v5 with: name: dist path: dist/ diff --git a/.github/workflows/nightly.yml b/.github/workflows/nightly.yml index 60b861c9..8d8fbe18 100644 --- a/.github/workflows/nightly.yml +++ b/.github/workflows/nightly.yml @@ -41,7 +41,7 @@ jobs: steps: - name: Check out the repository - uses: actions/checkout@v4 + uses: actions/checkout@v5 with: persist-credentials: false diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 40cc0c24..8cc6f91d 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -11,7 +11,7 @@ jobs: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v5 - name: Set up Python uses: actions/setup-python@v5 with: @@ -45,7 +45,7 @@ jobs: steps: - name: Download all the dists - uses: actions/download-artifact@v4 + uses: actions/download-artifact@v5 with: name: python-package-distributions path: dist/ @@ -66,12 +66,12 @@ jobs: steps: - name: Download all the dists - uses: actions/download-artifact@v4 + uses: actions/download-artifact@v5 with: name: python-package-distributions path: dist/ - name: Sign the dists with Sigstore - uses: sigstore/gh-action-sigstore-python@v3.0.0 + uses: sigstore/gh-action-sigstore-python@v3.0.1 with: inputs: >- ./dist/*.tar.gz diff --git a/AUTHORS b/AUTHORS new file mode 100644 index 00000000..64fadd8c --- /dev/null +++ b/AUTHORS @@ -0,0 +1,58 @@ +Aleksandar Milicevic +Alex-Monahan +Alexander VR <6877992+AlexanderVR@users.noreply.github.com> +Alexander VR +Amaral Vieira <8823335+amaralvieira@users.noreply.github.com> +Andrei +Arno Roos +Benoit Perigaud <8754100+b-per@users.noreply.github.com> +Brian Gold +Claude +Damir Vandic +Dave +David Roher +David Roher +Doug Beatty +Dumky de Wilde <14962728+dumkydewilde@users.noreply.github.com> +Edgar Ramírez-Mondragón +Elliana May +Felippe F. Caso +Florian +Gabor Szarnyas +Gabriel Montañola +Guen Prawiroatmodjo +Guen Prawiroatmodjo +Hidde Stokvis +Jacob Matson +Jeremy Cohen +Jesse Cotton +Jesse Cotton +Josh Wills +Josh Wills +Josh Wills +Keith Thompson +Kshitij Aranke +Leonardo Horta +Lieven +Louisa H <54686345+hrl20@users.noreply.github.com> +Louisa Huang +Michelle Ark +Michelle Ark +Nathan +Nintorac Dev +Olivier Agudo-Perez +Radek Tomšej +Radek Tomšej +Thom van Engelenburg +Thomas Boles +Thomas H +Tobias Hoffmann <130311884+thfmn@users.noreply.github.com> +W. Aaron Morris +dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> +dwreeves +firewall413 <60637730+firewall413@users.noreply.github.com> +gregwdata <79663385+gregwdata@users.noreply.github.com> +mehd-io +mrjsj +oagudoperez +wideltann diff --git a/ChangeLog b/ChangeLog new file mode 100644 index 00000000..27123288 --- /dev/null +++ b/ChangeLog @@ -0,0 +1,869 @@ +CHANGES +======= + +* added decimal +* added check if arrary type to dbt2glue +* fixed test: > cursor.execute("CREATE TABLE tt1 (id int, name text)") E sqlite3.OperationalError: table tt1 already exists +* fixed CreatePartition operation: Partition already exists +* resolve issue where multiple glue updates of the same schema would result in multiple schema versions (instead of just adding a partition) +* set default delimiter +* adding a partition delimiter for more generic partitioning +* added partition\_columns new argument for external\_read\_location. This will update the Glue schema to the schema of the latest partition (which in turn allows for append-only schema evolution in later partitions, this is supported by Glue) +* fixed CreatePartition operation: Partition already exists +* Update excel plugin to filter all null rows (#615) +* Change logger.exception to logger.warning to fix orchestrator compatibility (#612) +* Bump actions/checkout from 4 to 5 (#610) +* Fix secrets map/list type handling in DuckDB SQL generation (#609) +* Bump freezegun from 1.5.3 to 1.5.5 (#607) +* Bump actions/download-artifact from 4 to 5 (#606) +* Bump mypy from 1.17.0 to 1.17.1 (#604) +* Bump dbt-tests-adapter from 1.16.0 to 1.17.0 (#602) +* Bump mypy from 1.16.1 to 1.17.0 (#597) +* Add struct column support with flatten functionality (#594) +* Bump pyarrow from 18.1.0 to 21.0.0 (#598) +* Bump freezegun from 1.5.2 to 1.5.3 (#595) +* Update incremental.sql (#591) +* Make the adapter.commit call inside of upstream.sql dependent on the existence of upstream\_nodes (#588) +* Bump mypy from 1.16.0 to 1.16.1 (#582) +* Bump sigstore/gh-action-sigstore-python from 3.0.0 to 3.0.1 (#583) + +1.9.4 +----- + +* fix write conflict when first creating dbt\_temp schema (#584) +* bump duckdb to 1.3.1 (#585) +* Fix error swallowing that hides DuckDB exception details (#579) +* Fix bug where schema's database not specified (#577) +* Assume native comment support in DuckDB catalog queries (#571) +* Bump dbt-tests-adapter from 1.15.1 to 1.16.0 (#570) +* Fix attachment option quoting to prevent SQL errors with path-like values (#566) +* FIX: Fixes issue caused by external materializations with column name contains spaces (#563) +* Bump mypy from 1.15.0 to 1.16.0 (#561) +* Bump dbt-tests-adapter from 1.15.0 to 1.15.1 (#562) +* Update macros for dropping schema and relations when using ducklake (#557) +* Support using attached database aliases as primary database name +* lint stuff +* Add support for arbitrary key-value pairs in Attachment ATTACH options +* Bump freezegun from 1.5.1 to 1.5.2 +* See if this is related to the mysterious doc test failures that popped up all of a sudden +* Read .duckdbrc file on DuckDBT CLI startup +* Add named secret parameter to attachment class +* Bump dbt-tests-adapter from 1.11.0 to 1.15.0 +* add tests and quoting - reset to single commit + +1.9.3 +----- + +* Add info on the interactive shell for the DuckDB UI +* bump duckdb version, patch test (#542) +* remove duckdbt entrypoint for now +* Fix: turn transactions back on by default for MotherDuck (#540) +* Fix: override \`run\_hooks\` macro so we can run a post hook outside a txn without dangling \`commit;\` (#539) +* - trailing whitespace +* Update README.md to clarify DuckDB file path behavior and automatic database creation +* Add support for scoped secrets/credentials by storage prefix (#530) +* Update test\_basic.py +* Support dbt\_valid\_to\_current for snapshots +* Test alter table add column works without recreate +* formatting fixes +* rename to table\_function from parameterized\_view +* Formatting fixes. Missed the pre-commit hook! +* parameterized\_view custom materialization to avoid view recreation on schema change +* Tweak this impl a bit +* Add model completion with fzf integration +* Add manifest tracking to duckdbt shell +* resolve issue where multiple glue updates of the same schema would result in multiple schema versions (instead of just adding a partition) +* linting etc +* Vibe-claudeing a duckdbt CLI +* remove print statement, use empty string instead (#517) + +1.9.2 +----- + +* Bump dbt-tests-adapter from 1.10.4 to 1.11.0 (#509) +* Bump mypy from 1.14.1 to 1.15.0 (#508) +* skip s3 tests if the secrets are not available (#511) +* [Fix] External materialization to S3 does not work if table is empty (#510) +* Update deprecated v3 -> v4 for upload-artifact and download-artifact (#507) +* set default delimiter +* adding a partition delimiter for more generic partitioning +* added partition\_columns new argument for external\_read\_location. This will update the Glue schema to the schema of the latest partition (which in turn allows for append-only schema evolution in later partitions, this is supported by Glue) +* Make keep\_open: true the default setting for dbt-duckdb going forward (#502) +* remove package build/test build steps (#498) +* run on schedule +* add nightly tox env that installs duckdb nightly release, add github action for nightly tests +* Bump mypy from 1.13.0 to 1.14.1 +* add support for per\_thread\_output in external materialization +* Fix out of date README comment about version support for dbt-core and duckdb (#487) +* Drop 3.8 support, add 3.12 (#490) +* download-artifact should also be bumped to v4 (#489) +* bump github actions versions (#486) +* use session scoped fixture +* use tmp\_path instead of tmp\_dir\_factory.getbasetemp + +1.9.1 +----- + +* Add unique UUID to temp table name (#482) +* fix(postgres-plugin #478): Correct attribute access, plugin initialization, and test assertions; gitignore for a wsl development workflow +* autodiscovery for pytest unit tests +* adding default open dir to the dev container +* Bump dbt-tests-adapter from 1.10.3 to 1.10.4 +* Bump mypy from 1.11.2 to 1.13.0 +* Update README.md to reflect postgres attachments +* Bump dbt-tests-adapter from 1.10.2 to 1.10.3 +* Add safety check for node in graph +* Remove mention of outdated MotherDuck constraints +* fix +* Move the secret creation to a connection-level operation instead of a cursor-level (aka per-thread) operation +* resolved indentation & format issues +* infer type from location, default to parquet if not set +* added glue client fix (local variable 'client' referenced before assignment) +* removed empty line +* auto\_detect=true default values for json and csv +* set default values for csv/json/parquet read options +* adding transaction support in md +* removed double inport +* fixed ruff issues +* added parquet & json read options + added struct field support for glue +* Update to Buena Vista 0.5.0 +* Move this message to a one-time warning function +* Just send this to the dbt.log instead of to stdout to make it less noisy +* Include the relation in question in the log message for the grant no-op +* Linting fixes for PR #456 +* Make grant configs a no-op for DuckDB +* Support HTTP secret extra\_http\_headers + +1.9.0 +----- + +* Bump dbt-tests-adapter from 1.9.2 to 1.10.2 (#454) +* (fix) generate\_database\_name macro should be case insensitive (#453) +* Add support for configuring community/nightly extensions in the profile directly +* check if config has model attr (#451) +* Test with dbt-core prereleases + +1.8.4 +----- + +* Put the dev-requirements back the way they were +* Try out this hack +* Update the versions of Python we run tests against from 3.9 through 3.12 +* (fix): MotherDuck config should set SaaS mode at the end (#446) +* (fix) Add support for attaching a MotherDuck database with token in settings or config (#444) +* A version-independent fix for the datediff week handling in duckdb +* Add explicit register/unregister operations for python models +* remove deprecated functions +* consistently use kwargs +* support preleases +* formatting +* not never, never +* add type hint for secrets +* formatting +* pass credentials to Glue plugin constructor, secrets should always have a value +* Disable Python models on MotherDuck when SaaS mode is on (#435) +* Bump mypy from 1.11.1 to 1.11.2 +* author-email -> author\_email (#428) + +1.8.3 +----- + +* (chore) Use pbr for packaging for automated versioning with git tag, move to setup.cfg (#427) +* Bump mypy from 1.11.0 to 1.11.1 +* fix(secrets): value should be enclosed in quotes (#421) + +1.8.2 +----- + +* Prep for 1.8.2 release (#418) +* Bump DuckDB version to 1.0.0 +* Bump mypy from 1.10.1 to 1.11.0 +* formatting and typing +* dataclasses don't like extra kwargs so let's move List[Secret] to a private attribute +* Let duckdb handle the validation logic +* add scope to test +* Bump dbt-tests-adapter from 1.9.1 to 1.9.2 +* Fixes #412 by not requiring aliases for limit 0 clauses in dbt-duckdb +* Bump mypy from 1.10.0 to 1.10.1 +* type fixes +* Formatting and types +* Bump dbt-tests-adapter from 1.8.0 to 1.9.1 +* pass credentials to plugin, get creds in glue plugin +* formatting, mypy fix +* Deprecate using settings for secrets, bump duckdb version requirement to 0.10.0 +* clean up code, add docstrings +* add HF secret +* add scope +* fix typo +* add Azure secret +* make test more useful +* fix typo in test +* Update readme +* prepared statements doesn't work for secrets +* mypy fixes +* pre-commit +* remove add\_secret method +* create or replace secret if a name is specified +* run create secret on cursor init +* convert secrets dict to secrets in post constructor +* rename test\_connections to test\_credentials +* add .env to gitignore +* put secrets stuff in own module +* need this for Python 3.12 +* Add add\_secret method to creds +* Bump duckdb from 0.10.2 to 1.0.0 + +1.8.1 +----- + +* Prep for 1.8.1 +* Fix version constraints +* Skip duckdb v0.10.3 +* Bump freezegun from 1.5.0 to 1.5.1 + +1.8.0 +----- + +* Adjust this since we won't need to change it going forward anymore +* Prep for 1.8.0 + +1.7.5 +----- + +* Prep for the 1.7.5 release +* Update the README with examples of using either meta or config for setting up external sources +* Bump dbt-tests-adapter from 1.7.11 to 1.7.14 +* Use duckdb 0.10.2 +* Use different MD token +* Bump freezegun from 1.4.0 to 1.5.0 +* Bump mypy from 1.9.0 to 1.10.0 +* Setup python dep as well +* Update the devcontainer/pre-commit to use Python 3.11 +* May just have to skip it in memory cause I think maybe it works in file-mode +* Try whistling this +* Make this test not flaky +* precommit etc +* Skip BV for TestUnitTestingTypesDuckDB +* address comments +* query cancellation +* Added profile\_name to boto3 Session + +1.7.4 +----- + +* Prepare a 1.7.4 release +* Skip this test in BV for the moment +* Readd dbt-core to setup.py for install back-compat +* Check if token exists before running MotherDuck CI/CD job +* run tests on master not main +* Add pre-model hook for cleaning up remote temporary table (MotherDuck) +* Readd dbt-tests-adapter +* Revert dev-requirements +* Bump dbt-tests-adapter from 1.7.10 to 1.7.11 +* Skip persist\_docs for MD +* Fix unit tests +* Fix semver comparison logic +* run ruff +* lazy load agate +* fix newline +* Add support for persist\_docs-related functionality now that comments are fully supported in DuckDB 0.10.1 +* formatting + initial mypy fixes +* Bump dbt-tests-adapter from 1.7.9 to 1.7.10 +* use RelationConfig attributes in create\_from\_source +* Bump sigstore/gh-action-sigstore-python from 1.2.3 to 2.1.1 +* Bump actions/setup-python from 4 to 5 +* Bump mypy from 1.8.0 to 1.9.0 + +1.7.3 +----- + +* fix that too +* Fix module field reference on PluginConfig for MD +* Prepare a 1.7.3 release +* Update release.yml +* use --generate-notes in gh release +* only run release pipeline on semver tag pushes +* reorder imports +* fix: custom\_user\_agent does not work if motherduck plugin is not specified, add dbt-duckdb version +* add release pipeline +* Bump dbt-tests-adapter from 1.7.8 to 1.7.9 +* implement DuckDbRelation.create\_from, fix TestExternalSources::test\_external\_sources +* bump dbt-common and dbt-adapters to 1.0.0b1 +* mypy fix +* mypy fix +* mypy fix +* pop custom\_user\_agent from settings, add unit test +* Add test for date\_spine use case +* Update dateadd to work with new datediff + +1.7.2 +----- + +* README updates +* Prep for 1.7.2 release +* Fixing this b/c it bothers me +* Bonus: functional tests for dbt unit testing +* Fix unit tests +* Try different install reqs +* Migrate to dbt-common + dbt-adapters +* Bump dbt-tests-adapter from 1.7.7 to 1.7.8 +* Clarify docstring +* move token\_from\_config to plugin +* missed one +* use Dict instead of dict to be backward compatible with older Python versions +* Move config update logic into MotherDuck Plugin +* Rename motherduck\_token property to token\_from\_config and add docstr +* add md to extras\_require and constrain duckdb version, update in tox.ini +* address mypy issues +* fix: both lower and upper caps not defined +* address mypy error +* MD also supports lowercase for config options +* formatting +* set upper limit for duckdb version +* pass plugin config token via duckdb connect config +* Switch to using DuckDB's built-in date\_diff function for the datediff utils macro +* fixed p\_column error +* partition\_column type info & parse function +* reorder lines in UT +* remove redundant test, use creds.is\_motherduck to determine if user agent should have been passed or not +* add trailign whitespace +* Add test that shows no user agent when connecting to non-md path +* remove redundant fixture, use \_\_version\_\_ instead of hardcoded +* Add custom\_user\_agent to config, add UT +* Bump dbt-tests-adapter from 1.7.6 to 1.7.7 +* remove superfluous need\_drop\_temp variable and add temp\_relation to to\_drop within if-statement +* Update dbt/adapters/duckdb/impl.py +* Add plugin test to MotherDuck tox environment +* update docstring +* add \_temp\_schema\_name attribute to adapter +* address mypy issues +* Add test for temp schema name config +* formatting +* make temp schema name configurable, fix bugs for local in-memory tests +* Reverse change to tox.ini for CI +* use credentials.is\_motherduck in LocalEnvironment +* Set md profile path back to md:test, make database\_name fixture +* use adapter.is\_motherduck +* Don't drop temp schema after test ends +* Don't use MD\_CONNECT, instead use SET motherduck\_token +* Create remote temporary tables in a separate schema dbt\_temp +* create temp schema if needed +* add more cleanup to UT, add schema temp to target database for temp table workaround +* add some helpful inline comments +* clean up temp tables for incrementals, add db creation and cleanup for md tests +* use py311 for md tox env +* clarify docstring +* clarify docstsring +* consolidate MotherDuck plugin tests +* Add UT to test incremental model on MotherDuck +* add is\_motherduck property to credentials +* added support to dynamically registering/adding partitions (not just specifying the partition columns in the glue table but for each external s3 write,we want to register those partitions and their s3 locations in glue too) +* clarify inline comment +* add workaround for temporary tables in remote databases +* Bump dbt-tests-adapter from 1.7.5 to 1.7.6 +* passed the typekey test +* fixed partition\_columns hint +* Fixed parameter hint for partition\_columns +* set s3\_parent path to original if partitioning is used +* fixed assertion, won't add partitionkeys if no partition\_columns are present +* removed old argument +* added partition support for Glue +* Bump dbt-tests-adapter from 1.7.4 to 1.7.5 +* Add note about incremental materialization strategies + +1.7.1 +----- + +* Bump mypy from 1.7.1 to 1.8.0 +* Bump freezegun from 1.3.1 to 1.4.0 +* Testing other types of exceptions being retryable +* Revert "Provide support for Postgres-specific ATTACH options" +* Test updates for the new retry settings/config +* Make the query tests pass again +* generalize this for connect retries as well as query retries +* Generalize this a bit while we figure out what exactly is getting thrown +* Add support for retrying certain types of exceptions we see when running models in dbt-duckdb +* Bump dbt-tests-adapter from 1.7.3 to 1.7.4 +* Provide support for Postgres-specific ATTACH options +* point to duckdb supported version doc for motherduck +* bump version supported by MotherDuck +* Bump actions/setup-python from 4 to 5 +* Bump freezegun from 1.3.0 to 1.3.1 +* That turned out to be more work than I thought +* why are you like this +* Fixes #304 +* Adjust line lengths everywhere +* Move from black/flake to ruff and fix the stuff ruff found +* Bump dbt-tests-adapter from 1.7.2 to 1.7.3 +* Bump freezegun from 1.2.2 to 1.3.0 +* sqlite write issue fixed +* tests reproduce the issue +* Bump mypy from 1.7.0 to 1.7.1 +* Bump dbt-tests-adapter from 1.7.1 to 1.7.2 +* Bump dbt-tests-adapter from 1.7.0 to 1.7.1 +* Bump black from 23.10.1 to 23.11.0 +* Bump mypy from 1.6.1 to 1.7.0 +* Update sqlalchemy.py +* Update sqlalchemy.py +* Add support for providing additional kwargs for sqlalchemy connection +* Update README.md + +1.7.0 +----- + +* Update deps for the real dbt-core 1.7.0 stuff +* Remove unused hologram-dependent unit tests +* Working test adapter version +* Add in all of the seed delimiter tests +* Support delimiter options inside of the dbt-duckdb fast seed loader +* Mark these tests as skipped for the moment +* Placeholders for the not-yet-working date spine tests +* Version bumps for 1.7.0 +* Bump black from 23.10.0 to 23.10.1 + +1.6.2 +----- + +* Prep and cut a 1.6.2 release +* update delta readme +* format fixes +* adapt readme; add default materialization config +* Bump mypy from 1.6.0 to 1.6.1 +* Bump black from 23.9.1 to 23.10.0 +* Nit +* Bump MotherDuck's version requirement +* See if that makes the precommit checks happier +* With linting enabled and fixes for it +* refactor df registration for more general approach +* Update README with latest target DuckDB version +* Update Excel plugin with output support +* Bump dbt-tests-adapter from 1.6.5 to 1.6.6 +* Bump mypy from 1.5.1 to 1.6.0 +* use tempfile +* change pyarrow\_table to pyarrow\_dataset; use tempfile +* re-enable ability to return a DuckDbPyRelation that holds a reference to temp tables in a dbt python model + +1.6.1 +----- + +* Prep for 1.6.1 release +* fix readme +* update readme +* add delta\_table3\_expected in test +* addapt test; add create schema; fix storage +* delete redudant configure call +* delete test delta +* refactor registered df; delete launch +* add df registration +* try to register df to localsession +* simplify test\_delta +* make duplicate connection example simple +* add notebook with delta+conn testing +* add simple test showcase +* add time travel; remote storage (should test) +* add delta read plugin +* Fix cursor isolation on Python model +* Add creds as arg +* Initialize new cursor +* Add batch example +* Add separate cursor for batch reads/writes +* Bump dbt-tests-adapter from 1.6.4 to 1.6.5 +* Bump dbt-tests-adapter from 1.6.3 to 1.6.4 +* put the threads settings in there too +* Add a sample\_profiles.yml file for dbt-duckdb to make an initial 'dbt init' even more seamless +* pydoc'ing some things and relaxing the Pandas dev requirement now that DuckDB 0.9.0 is out the door +* Bump dbt-tests-adapter from 1.6.2 to 1.6.3 +* Allow access to the RuntimeConfigObject from the TargetConfig used by the store method of plugins +* Refactor render\_write\_options for direct option rendering +* Fix typos and formatting issues in dbt-duckdb README documentation +* Add a more helpful error message in the case that the options argument to an external materialization is not a dictionary +* Add a maxfail arg to the MD pytest runs so they fail quickly +* Re-enable all of the core tests for MD +* need to adjust the workflow too +* Require the MOTHERDUCK\_TOKEN to be present in the environment; cut back on BV test runs to make things go faster +* See if I can isolate whatever issue is springing up with the MotherDuck integration tests +* Bump dbt-tests-adapter from 1.6.1 to 1.6.2 +* Bump black from 23.7.0 to 23.9.1 +* Pin pandas depenedency to 2.0.0 to get around dtypes error in integration tests +* Bump actions/checkout from 3 to 4 +* Bump dbt-tests-adapter from 1.6.0 to 1.6.1 +* Bump mypy from 1.5.0 to 1.5.1 +* Update README.md +* Bump mypy from 1.4.1 to 1.5.0 +* Add option to fetch Gsheet data using range +* fix path +* Update README.md +* Update excel.py +* fix: change quoting character +* feat: add s3 support to excel plugin through env +* separate out motherduck plugin test from the others, and skip when using a non-motherduck profile +* fix skip\_by\_profile\_type fixture to apply to class-level tests and fixtures, especially since is only used at the class level +* add source tags to SourceConfig + +1.6.0 +----- + +* Update README for 1.6.0 +* Update for mypy fixes +* Simplify and fix bug in the split\_part utils macro +* Prep for dbt 1.6.0 update +* Add an option to the profile for specifying additional entries for sys.path and revamp how we lookup builtin plugin modules +* Make fast seed loading the default +* try whistling this +* Fix that bit too +* See if this makes the MD integration test stuff work properly +* Bump dbt-tests-adapter from 1.5.2 to 1.5.4 +* Bump black from 23.3.0 to 23.7.0 +* fix: remove parenthesis from md\_connect call +* Require duckdb >= 0.7.0 for dbt-duckdb going forward and remove some dead code +* Add a keep\_open option to the profile +* Some config args to make the updated plugins test run on GH +* Add in plugins for the motherduck and postgres extensions so we can do a little bit of profile-safe initialization work +* Updates to support the revamped dbt debug command +* Simplify some setup stuff now that Python 3.7 is EOL'd +* Make it nicer to run the functional adapter tests on OS X +* picky picky +* Check and reset the environment for a DuckDB connection if the credentials have changed +* Disable these tests on MD and add a note to the README +* Try to figure out what is going on with the MD integration tests +* Bump dbt-tests-adapter from 1.5.1 to 1.5.2 +* Bump mypy from 1.3.0 to 1.4.1 +* Handle the remote case clean-er +* Add more detailed checks to ensure consistent settings between the connection path and database name in the profile + +1.5.2 +----- + +* Document the Plugin.store functionality +* Add in MotherDuck docs and do a version bump +* Disable this test for now while DuckDB issue 7934 gets worked out +* Support alternate string formatting strategies for external sources +* ack don't need that now +* Add an example of doing a write-side operation in the sqlalchemy plugin +* Automatically disable transactions on motherduck paths +* just ignore this typecheck'd bit then +* Fix whitespace +* Wire up MotherDuck CI tests +* Some cleanup items +* Version that has all tests passing with the md test profile except for the ones that can't pass b/c the functionality isn't supported +* A more-working version +* Keep any MD connections open in local mode, just like we do for the in-memory connections +* Add an alias here for the update target to get around whatever is broken when running against MD +* disable\_transactions test +* Start working on a transaction-less mode for dbt-duckdb to see if I can get the db files to be smaller after dbt-duckdb runs +* Prep for Python 3.7 EOL +* add missing newline +* Updates to support a broader class of incremental functionality +* Simplify extension loading in dbt-duckdb +* refrain from loading data in Relation.create\_from\_source unless we have an active environment +* Some more tweaks to this interface +* Bump dbt-tests-adapter from 1.5.0 to 1.5.1 +* Bump dbt-tests-adapter from 1.5.0 to 1.5.1 +* fix unit test +* and now with a \*working\* glue test, hooray +* A functional plugin test for the glue stuff +* Add a write-side plugin hook and port the glue stuff over to be its first implementation +* Step 1: Cleanup the SourceConfig class and make it more pythonic +* Some fixes for catalog lookups in a multi-database, multi-schema world of DuckDB queries: ensure that we are always looking for the relations we need to find in as specific a way as possible (so ideally, database + schema + identifier whenever all 3 fields are available to use and supported by DuckDB) +* Update README with details on plugin stuff +* spec out plugin config and writing your own stuff +* revamp the python support section +* docstrings for the BasePlugin class +* Add tests and the stuff I needed to add to actually make this thing work +* Revamp the plugin structure and add in the ability to do some initialization of the DuckDB connection itself +* put a pin in this +* now with tests +* Little bit more on this +* Create a fast-loading mechanism for seed files using DuckDB's COPY command +* Bump mypy from 1.2.0 to 1.3.0 +* For the remote.password stuff to work we need to actually pass it to psycopg2.connecct +* Prepping the dbt-duckdb tests to pass under the next release of DuckDB + +1.5.1 +----- + +* Prep a 1.5.1 release to fix the myriad bugs in the 1.5.0 release +* pyarrow should not be required to run dbt-duckdb python models +* Have the local environment release the connection when it's not in use and not necessary for in-memory runs +* Set a default value for the conn attribute in the case an exception is thrown during init +* oh yep that also +* more tests that need updating +* update the attach test +* Support profiles that are simply 'type: duckdb' by correctly configuring the in-memory database in dbt-duckdb +* Use config info from the source.config and source.table.config entries as well in plugins and external location configuration + +1.5.0 +----- + +* yep that one is important too; I really should have a process for this +* oh yeah that bit is important too +* Do the 1.5.0 release +* still kind of a thrill whenever the tests save my ass +* fix newline +* Handle URI-style paths correctly in dbt-duckdb +* add in tz-aware timestamp test + +1.4.2 +----- + +* Doing a 1.4.2 release to pickup bug fixes ahead of the 1.5.0 cutover +* simplify this back down +* let's try whistling this +* now with format fixes +* Renaming/refactoring some things +* Bump dbt-tests-adapter from 1.4.5 to 1.4.6 +* stopping here for the night; need to do most of the localenv stuff tmrw +* fix that +* Just trying to see where this leads me +* First up: this is a bit nicer +* Reorder logic, fix tests +* Add back the test imports I foolishly removed +* Fixup create\_table\_as logic +* Add references alias for foreign\_key +* Add support for model contracts in v1.5 +* Update the snapshot merge SQL to sync it up with the Postgres impl; re-enable the tests +* Add in some of the new test functionality and restructure the directory a bit to prep for some extensions +* Various updates so tests will work against 1.5.0 +* Version bumps +* Enable additional iceberg config options + and add a Spark-style save mode argument for plugin-based sources +* Add the option to materialize a plugin source as a view instead of a table +* Add in iceberg tests and some additional functionality for scan filtering +* Add in the sheet\_name parameter and some more test checks +* Apparently you can't have multiple tests in a suite that both override profiles\_config\_update, or something +* rm unused import +* try to get some more detail on what is going wrong here since this works locally +* really shouldn't ever let gpt3.5 write code, lesson learned +* Add in sqlalchemy dev dep +* this is why gpt3.5 isn't allowed to write code anymore +* Add in a SQLAlchemy plugin +* Fixing bugs and adding a test case (that only runs on my machine) for the gsheet plugin +* Bump mypy from 1.1.1 to 1.2.0 +* Only install pyiceberg in the plugins test until we stop support for 3.7 +* Okay I think that's all of the basics +* Simplify the sources test a bit +* commit those bits too +* WIP: source plugins +* Conditional branching on self.remote +* Set unique\_field in credentials.py +* plugins WIP +* Fix this bit up +* only return non-empty creds to prevent code 400 when session token is empty +* Bump black from 22.12.0 to 23.3.0 +* Wire up GH integration tests to run against a Buena Vista server +* Ensure an event loop exists before we run a python module +* Adding in a simple BV server for testing purposes +* really looking forward to when gpt4 does this stuff for me +* didn't do flake for some reason, huh +* black etc +* Adds a remote environment for DuckDB databases running in a Buena Vista server using the postgres protocol +* Wire up GH Actions to always run file-based tests as well as in-memory tests +* add format to fix glue registration bug +* Change profile-type to 'file' +* Inject any key-value pairs from the meta dictionary into the f-string context we use for rendering the location of an external source +* Simplify environment and connection +* Update .gitignore +* Fix tests so they can run against a file db +* Run every functional test in multithreaded mode by default +* fix comma vs spaces causing empty test. Use the upstream config to generate the upstream options +* pre-commit things +* Refactor the code in the connections module into the credentials and environments modules + +1.4.1 +----- + +* More doc fixes and a version bump for the release +* Test for the external json formatting +* Oh yeah I think JSON works too now, yay +* Okay have mostly got this together now +* Some incremental progress here +* update the historical changelog +* Add a test that shows that pyarrow.dataset.Dataset is supported as a return type for Python models +* Bump mypy from 1.0.1 to 1.1.1 +* Bump dbt-tests-adapter from 1.4.4 to 1.4.5 +* Bump dbt-tests-adapter from 1.4.3 to 1.4.4 +* fix: handle multithreaded writes of pyarrow models +* fix: clear connections before tests +* Some more targeted unit tests for the new adapter functions +* case shouldn't matter here +* Test some slightly different things here +* loadbearing config change that +* Fix mypy thingy +* Expand the set of quoted chars here +* fix some things the tests found +* Refactor/generalize the external materialization's write options since there are a lot of them +* WIP for this, add in the infra I need +* Bump dbt-tests-adapter from 1.4.1 to 1.4.3 +* fix test dbt selection to run both models, not their empty intersection. Fix load\_upstream macro to commit the newly created views +* Bump mypy from 1.0.0 to 1.0.1 +* ah tweaking this so as not to trigger GH rate limit issues +* loadbearing whitespace +* Try moving the filesystem tests into their own workflow given the extra deps +* Add filesystems integration tests + fix up some of the other integration test fixtures +* Update dbt/adapters/duckdb/connections.py +* Update dbt/adapters/duckdb/connections.py +* WIP: Add support for reading/writing with fsspec-compatible filesystems + +1.4.0 +----- + +* README updates for attaching databases +* Create attach-specific functional tests +* That's pretty much why we write tests +* Start experimenting with ATTACH configs and full-namespace database references for DuckDB 0.7.0 +* Define the attachment configuration options on the DuckDBCredentials class +* black +* Getting the code ready for DuckDB 0.7.0 and dbt 1.4.0 +* Bump mypy from 0.991 to 1.0.0 +* remove extra import +* cleanup test +* use pyarrow instead of pandas where possible. Ensure import before isinstance checks. Add pyarrow as dev dependency +* add usage of register\_upstream\_external\_models macro to README +* replace model-level macro with an on-run-start hook that materializes all models upstream of the selected ones +* Add in some lru\_cache to limit the hit from these creds lookups on small runs +* fix bug +* Use sts to verify the credentials are correct/work +* fix that precommit error +* Add in support for getting AWS creds using the AWS credentials chain +* Bump tests +* Keep at 3.8 for mypy +* Move back to 3.10 for code quality check +* Change main python version +* Stringify Python versions +* add new python versions to actiosn +* Add 3.10 and 3.11 support +* Match refactored class names from DBT 1.4.0 +* Register views for upstream external models +* use the cursor with the stored credentials for python models rather than the bare connection +* Bump dbt-tests-adapter from 1.3.1 to 1.3.2 +* fix that +* Update/modernize the duckdb\_\_load\_csv\_rows macro impl +* Also format the ext location string with the schema for further help with boilerplate +* and that +* see if this magically fixes things +* Bump black from 22.10.0 to 22.12.0 +* Use better, non-regex based syntax for handling the limit case inside of the listagg macro +* 1.3.3 release +* Work around pandas dependency on parameter bindings that feature datetime instances +* precommit stuff +* Oops not what I meant +* Use an importlib-based approach to loading and executing dbt Python models +* Bump mypy from 0.990 to 0.991 +* Bump dbt-tests-adapter from 1.3.0 to 1.3.1 +* well thats just head-slappingly stupid of me +* okay fine i'll default to pandas instead +* make this CI friendly +* Support DuckDBPyRelation as a return type from Python models +* huh not sure how that got there +* Allow dbt-duckdb to work with versions after 0.5.0, including the new 0.6.0 +* Bump mypy from 0.982 to 0.990 +* Add render support for all of the string config options for external materializations + +v1.3.1 +------ + +* Bump to 1.3.1 and release that +* fix: run external everytime (#51) +* Fix upgrading pip (#53) +* fix: disable upgrading pip in github action (#52) +* bit more polish +* typos and formatting +* trailing ws +* Docs update for the 1.3.0 release +* Now with tests for sources with an external location +* try whistling this +* fix not in +* import ordering +* Another rev at this, adjusting things somewhat to allow for templating external locations at the source level +* Redo this as a jinja templated var +* checkpoint this for now +* Minimal amount of effort required to make external locations work for dbt-duckdb sources +* feat: add support for glue catalog (#47) +* okay finally have my black settings right +* Add an external\_root setting for the DuckDB profile as a location to write external files to when no location is specified explicitly +* fix: default delimiter changed to comma +* fix: remove unused macro create\_view\_loacation (#45) +* Add external location macro (#44) +* feat: add support for external materialization (#35) +* Bump freezegun from 0.3.12 to 1.2.2 (#41) +* Bump actions/download-artifact from 2 to 3 (#40) +* fix: development env requirements +* feat: add dependabot (#39) +* patch the COLUMNS\_EQUAL\_SQL template to not use the same identifiers as the tested relations in the TestConcurrency test cases +* style: fix missing import ordering (#36) +* feat: add support for python models (#28) +* feat: add new functional tests from 1.3.0 (#32) + +v1.2.3 +------ + +* Black wit longer lines +* Ensure that we always keep a connection open when we're running dbt-duckdb in-memory +* fix: black formatting +* fix: running action +* fix: ignore type of \_\_path\_\_ +* feat: adapt to scaffolding template +* Generalize the extensions/settings config for DuckDB and update the README to reflect +* Reference count the open connections and close the parent connection when they are all closed +* Move creds fetch outside of the lock +* black + README updates +* Make multiple threads work with dbt-duckdb +* Update README +* Update README +* Update to 1.2.0 as the release version +* Okay now S3 stuff works correctly, yay +* Use the cursor (aka connection) associated with the connection.handle instead of the parent one from duckdb.connect +* add in extension loading support +* format the tests +* Update to target real dbt 1.2.0 +* Add in support for the cross-db util impls for DuckDB and tests for the same +* WIP for 1.2.0rc1; may be basis of dbt-duckdb 1.2.0 +* update CHANGELOG +* Version bump to explicitly check that the profile is only using a single thread for dbt-duckdb +* Enforce the single-threaded profile config for dbt-duckdb and note it in the README +* Updated CHANGELOG +* Fix a bunch of stuff and get ready for the next version +* Remove old test spec +* Now with adapter specific naming +* Use the new dbt adapter test library +* Prep version bump for 1.1.2 release +* Update CHANGELOG +* Exclude duckdb 0.4.0 from compatible versions +* Align with minor version of dbt-core +* and minor version bump +* Fix that typo +* Define a CHANGELOG +* Upgrade to duckdb 0.3.2; refactor connections now that we can support dbt threads correctly +* Be more explicit about erors in model queries/give more helpful debug info +* Less code == good +* tiny version bump +* Fix this and that +* Fix this so that duckdb shows up as a supported plugin when you run dbt --version +* black all the things +* Add Apache License; fixes #4 +* Simplify DuckDBConnectionWrapper logic +* Support for dbt 0.20.1 +* Support for duckdb 0.2.8. Fixes #2 +* Update for PyPI usage with released version of duckdb 0.2.2. Fixes #1 +* now everything works, yay +* enable snapshot tests now that update..from is supported +* No longer need to worry about closing these cursors explicitly +* how bad i am at stuff +* Update README.md +* Instructions for the creds config +* Add default values for all of the credential fields so you don't usually have to specify anything besides the path +* Give useful install instructions for local dev +* \_\_init\_\_.py infra so pip install . will work +* Re-order the operations in the snapshot\_merge impl +* Enable more of the standard tests by default +* hack in a version of snapshot merge that I can (eventually) make work with duckdb +* Make the tests get a little bit further +* Add a block to close any open cursors returned by the add\_query method +* Some more macros we needed +* acknowledge passed in schemas +* first cut at the duckdb catalog macro +* much less awful +* Latest and greatest +* Checkpointing some more work here +* closer to working +* Weaken version reqs for the time being +* Initial commit of dbt-duckdb diff --git a/README.md b/README.md index bbddffee..6d3084d8 100644 --- a/README.md +++ b/README.md @@ -168,8 +168,8 @@ When fetching a secret for a path, the secret scopes are compared to the path, r #### Attaching Additional Databases -DuckDB version `0.7.0` added support for [attaching additional databases](https://duckdb.org/docs/sql/statements/attach.html) to your dbt-duckdb run so that you can read -and write from multiple databases. Additional databases may be configured using [dbt run hooks](https://docs.getdbt.com/docs/build/hooks-operations) or via the `attach` argument +DuckDB supports [attaching additional databases](https://duckdb.org/docs/sql/statements/attach.html) to your dbt-duckdb run so that you can read +and write from multiple databases. Additional databases may be configured via the `attach` argument in your profile that was added in dbt-duckdb `1.4.0`: ``` @@ -188,14 +188,54 @@ default: type: sqlite - path: postgresql://username@hostname/dbname type: postgres + # Using the options dict for arbitrary ATTACH options + - path: /tmp/special.duckdb + options: + cache_size: 1GB + threads: 4 + enable_fsst: true ``` The attached databases may be referred to in your dbt sources and models by either the basename of the database file minus its suffix (e.g., `/tmp/other.duckdb` is the `other` database and `s3://yep/even/this/works.duckdb` is the `works` database) or by an alias that you specify (so the `./yet/another.duckdb` database in the above configuration is referred to as `yet_another` instead of `another`.) Note that these additional databases do not necessarily have to be DuckDB files: DuckDB's storage and catalog engines are pluggable, and -DuckDB `0.7.0` ships with support for reading and writing from attached databases. You can indicate the type of the database you are connecting to via the `type` argument, +DuckDB ships with support for reading and writing from attached databases. You can indicate the type of the database you are connecting to via the `type` argument, which currently supports `duckdb`, `sqlite` and `postgres`. +##### Arbitrary ATTACH Options + +As DuckDB continues to add new attachment options, you can use the `options` dictionary to specify any additional key-value pairs that will be passed to the `ATTACH` statement. This allows you to take advantage of new DuckDB features without waiting for explicit support in dbt-duckdb: + +``` +attach: + # Standard way using direct fields + - path: /tmp/db1.duckdb + type: sqlite + read_only: true + + # New way using options dict (equivalent to above) + - path: /tmp/db2.duckdb + options: + type: sqlite + read_only: true + + # Mix of both (no conflicts allowed) + - path: /tmp/db3.duckdb + type: sqlite + options: + block_size: 16384 + + # Using options dict for future DuckDB attachment options + - path: /tmp/db4.duckdb + options: + type: duckdb + # Example: hypothetical future options DuckDB might add + compression: lz4 + memory_limit: 2GB +``` + +Note: If you specify the same option in both a direct field (`type`, `secret`, `read_only`) and in the `options` dict, dbt-duckdb will raise an error to prevent conflicts. + #### Configuring dbt-duckdb Plugins dbt-duckdb has its own [plugin](dbt/adapters/duckdb/plugins/__init__.py) system to enable advanced users to extend @@ -495,6 +535,53 @@ an external database, respectively. dbt-duckdb ships with a number of [built-in plugins](dbt/adapters/duckdb/plugins/) that can be used as examples for implementing your own. +### Interactive Shell + +As of version 1.9.3, dbt-duckdb includes an interactive shell that allows you to run dbt commands and query the DuckDB database in an integrated CLI environment. The shell automatically launches the [DuckDB UI](https://duckdb.org/2025/03/12/duckdb-ui.html), providing a visual interface to explore your data while working with your dbt models. + +To start the interactive shell, use: + +``` +python -m dbt.adapters.duckdb.cli +``` + +You can specify a profile to use with the `--profile` flag: + +``` +python -m dbt.adapters.duckdb.cli --profile my_profile +``` + +The shell provides access to all standard dbt commands: +- `run` - Run dbt models +- `test` - Run tests on dbt models +- `build` - Build and test dbt models +- `seed` - Load seed files +- `snapshot` - Run snapshots +- `compile` - Compile models without running them +- `parse` - Parse the project +- `debug` - Debug connection +- `deps` - Install dependencies +- `list` - List resources + +When you launch the shell, it automatically: +1. Runs `dbt debug` to test your connection +2. Parses your dbt project +3. Launches the DuckDB UI for visual data exploration + +The shell supports model name autocompletion if you install the optional `iterfzf` package: + +``` +pip install iterfzf +``` + +Example workflow: +1. Start the interactive shell +2. View your project's models in the launched DuckDB UI +3. Run `build` to build your models +4. Immediately see the results in the UI and continue iterating + +This interactive environment makes it easier to develop and test dbt models while simultaneously exploring the data in a visual interface. + ### Roadmap Things that we would like to add in the near future: diff --git a/dbt/adapters/duckdb/cli.py b/dbt/adapters/duckdb/cli.py index 8079a794..19f76483 100644 --- a/dbt/adapters/duckdb/cli.py +++ b/dbt/adapters/duckdb/cli.py @@ -40,6 +40,21 @@ def __init__(self, profile: Optional[str] = None): if DuckDBConnectionManager._ENV: env = DuckDBConnectionManager._ENV cursor = env.handle().cursor() + + # Check if .duckdbrc file exists + # and execute the SQL commands if it does + duckdbrc_path = os.path.expanduser("~/.duckdbrc") + if os.path.exists(duckdbrc_path): + print(f"Executing commands from {duckdbrc_path}...") + try: + with open(duckdbrc_path, "r") as f: + sql_script = f.read() + if sql_script.strip(): + cursor.execute(sql_script) + print(f"Successfully executed {duckdbrc_path}") + except Exception as e: + print(f"Error executing {duckdbrc_path}: {e}") + print("Launching DuckDB UI...") cursor.execute("CALL start_ui()") cursor.close() diff --git a/dbt/adapters/duckdb/column.py b/dbt/adapters/duckdb/column.py index 8dc72ea2..0fcccb1a 100644 --- a/dbt/adapters/duckdb/column.py +++ b/dbt/adapters/duckdb/column.py @@ -1,10 +1,55 @@ +import re from dataclasses import dataclass +from dataclasses import field +from typing import List from dbt.adapters.base.column import Column @dataclass class DuckDBColumn(Column): + fields: List["DuckDBColumn"] = field(default_factory=list) + + def __post_init__(self): + if self.is_struct(): + self._parse_struct_fields() + + def _parse_struct_fields(self): + # In DuckDB, structs are defined as STRUCT(key1 type1, key2 type2, ...) + # We need to extract the key-type pairs from the struct definition + # e.g., STRUCT(a VARCHAR, b INTEGER) -> ["a VARCHAR", "b INTEGER"] + # We can't just split by comma, because types can contain commas + # e.g. DECIMAL(10, 2) + # The following logic will handle nested structs and complex types + match = re.match(r"STRUCT\((.*)\)", self.dtype, re.IGNORECASE) + if not match: + return + + content = match.group(1) + + fields = [] + paren_level = 0 + current_field = "" + for char in content: + if char == "(": + paren_level += 1 + elif char == ")": + paren_level -= 1 + + if char == "," and paren_level == 0: + fields.append(current_field.strip()) + current_field = "" + else: + current_field += char + fields.append(current_field.strip()) + + for f in fields: + # Split on the first space to separate the name from the type + parts = f.split(" ", 1) + col_name = parts[0] + col_type = parts[1] + self.fields.append(DuckDBColumn(column=col_name, dtype=col_type)) + def is_float(self): return self.dtype.lower() in { # floats @@ -38,3 +83,30 @@ def is_integer(self) -> bool: "signed", "long", } + + def is_struct(self) -> bool: + return self.dtype.lower().startswith("struct") + + def flatten(self) -> List["DuckDBColumn"]: + if not self.is_struct(): + return [self] + + flat_columns: List["DuckDBColumn"] = [] + for column_field in self.fields: + if column_field.is_struct(): + # Recursively flatten nested structs + for nested_field in column_field.flatten(): + flat_columns.append( + DuckDBColumn( + column=f"{self.column}.{nested_field.column}", + dtype=nested_field.dtype, + ) + ) + else: + flat_columns.append( + DuckDBColumn( + column=f"{self.column}.{column_field.column}", + dtype=column_field.dtype, + ) + ) + return flat_columns diff --git a/dbt/adapters/duckdb/connections.py b/dbt/adapters/duckdb/connections.py index 359e153e..e68aace9 100644 --- a/dbt/adapters/duckdb/connections.py +++ b/dbt/adapters/duckdb/connections.py @@ -97,6 +97,9 @@ def exception_handler(self, sql: str, connection_name="master"): raise except RuntimeError as e: logger.debug("duckdb error: {}".format(str(e))) + logger.debug("Error running SQL: {}".format(sql)) + # Preserve original RuntimeError with full context instead of swallowing + raise dbt.exceptions.DbtRuntimeError(str(e)) from e except Exception as exc: logger.debug("Error running SQL: {}".format(sql)) logger.debug("Rolling back transaction.") diff --git a/dbt/adapters/duckdb/constants.py b/dbt/adapters/duckdb/constants.py new file mode 100644 index 00000000..30c38fdf --- /dev/null +++ b/dbt/adapters/duckdb/constants.py @@ -0,0 +1,2 @@ +TEMP_SCHEMA_NAME = "temp_schema_name" +DEFAULT_TEMP_SCHEMA_NAME = "dbt_temp" diff --git a/dbt/adapters/duckdb/credentials.py b/dbt/adapters/duckdb/credentials.py index 1294ef06..d30e779a 100644 --- a/dbt/adapters/duckdb/credentials.py +++ b/dbt/adapters/duckdb/credentials.py @@ -27,9 +27,15 @@ class Attachment(dbtClassMixin): # An optional alias for the attached database alias: Optional[str] = None + # An optional reference to a secret name from the secret manager + secret: Optional[str] = None + # Whether the attached database is read-only or read/write read_only: bool = False + # Arbitrary key-value pairs for additional ATTACH options + options: Optional[Dict[str, Any]] = None + def to_sql(self) -> str: # remove query parameters (not supported in ATTACH) parsed = urlparse(self.path) @@ -37,13 +43,69 @@ def to_sql(self) -> str: base = f"ATTACH '{path}'" if self.alias: base += f" AS {self.alias}" - options = [] + + # Check for conflicts between legacy fields and options dict + if self.options: + conflicts = [] + if self.type and "type" in self.options: + conflicts.append("type") + if self.secret and "secret" in self.options: + conflicts.append("secret") + if self.read_only and "read_only" in self.options: + conflicts.append("read_only") + + if conflicts: + raise DbtRuntimeError( + f"Attachment option(s) {conflicts} specified in both direct fields and options dict. " + f"Please specify each option in only one location." + ) + + # Collect all options, prioritizing direct fields over options dict + all_options = [] + + # Add legacy options for backward compatibility if self.type: - options.append(f"TYPE {self.type}") + all_options.append(f"TYPE {self.type}") + elif self.options and "type" in self.options: + all_options.append(f"TYPE {self.options['type']}") + + if self.secret: + all_options.append(f"SECRET {self.secret}") + elif self.options and "secret" in self.options: + all_options.append(f"SECRET {self.options['secret']}") + if self.read_only: - options.append("READ_ONLY") - if options: - joined = ", ".join(options) + all_options.append("READ_ONLY") + elif self.options and "read_only" in self.options and self.options["read_only"]: + all_options.append("READ_ONLY") + + # Add arbitrary options from the options dict (excluding handled ones) + if self.options: + handled_keys = {"type", "secret", "read_only"} + for key, value in self.options.items(): + if key in handled_keys: + continue + + # Format the option appropriately + if isinstance(value, bool): + if value: # Only add boolean options if they're True + all_options.append(key.upper()) + elif value is not None: + # Quote string values for DuckDB SQL compatibility + if isinstance(value, str): + # Only quote if not already quoted (single or double quotes) + stripped_value = value.strip() + if (stripped_value.startswith("'") and stripped_value.endswith("'")) or ( + stripped_value.startswith('"') and stripped_value.endswith('"') + ): + all_options.append(f"{key.upper()} {value}") + else: + all_options.append(f"{key.upper()} '{value}'") + else: + all_options.append(f"{key.upper()} {value}") + + if all_options: + joined = ", ".join(all_options) base += f" ({joined})" return base @@ -163,6 +225,19 @@ def __post_init__(self): self.secrets = self.secrets or [] self._secrets = [] + # Build set of ducklake database names for efficient lookup + self._ducklake_dbs = set() + if self.attach: + for attachment in self.attach: + if ( + hasattr(attachment, "alias") + and attachment.alias + and hasattr(attachment, "path") + and attachment.path + and "ducklake:" in attachment.path + ): + self._ducklake_dbs.add(attachment.alias) + # Add MotherDuck plugin if the path is a MotherDuck database # and plugin was not specified in profile.yml if self.is_motherduck: @@ -228,18 +303,25 @@ def __pre_deserialize__(cls, data: Dict[Any, Any]) -> Dict[Any, Any]: parsed = urlparse(path) base_file = os.path.basename(parsed.path) path_db = os.path.splitext(base_file)[0] - # For MotherDuck, turn on disable_transactions unless - # it's explicitly set already by the user if cls._is_motherduck(parsed.scheme): - if "disable_transactions" not in data: - data["disable_transactions"] = True if path_db == "": path_db = "my_db" + # Check if the database field matches any attach alias + attach_aliases = [] + if data.get("attach"): + for attach_data in data["attach"]: + if isinstance(attach_data, dict) and attach_data.get("alias"): + attach_aliases.append(attach_data["alias"]) + + database_from_data = data.get("database") + database_matches_attach_alias = database_from_data in attach_aliases + if path_db and "database" not in data: data["database"] = path_db elif path_db and data["database"] != path_db: - if not data.get("remote"): + # Allow database name to differ from path_db if it matches an attach alias + if not data.get("remote") and not database_matches_attach_alias: raise DbtRuntimeError( "Inconsistency detected between 'path' and 'database' fields in profile; " f"the 'database' property must be set to '{path_db}' to match the 'path'" diff --git a/dbt/adapters/duckdb/environments/__init__.py b/dbt/adapters/duckdb/environments/__init__.py index 1bd8a264..904ae04d 100644 --- a/dbt/adapters/duckdb/environments/__init__.py +++ b/dbt/adapters/duckdb/environments/__init__.py @@ -11,6 +11,7 @@ import duckdb from dbt_common.exceptions import DbtRuntimeError +from ..constants import DEFAULT_TEMP_SCHEMA_NAME from ..credentials import DuckDBCredentials from ..credentials import Extension from ..plugins import BasePlugin @@ -88,12 +89,6 @@ def __init__(self, creds: DuckDBCredentials): if path not in sys.path: sys.path.append(path) - major, minor, patch = [int(x) for x in duckdb.__version__.split("-")[0].split(".")] - if major == 0 and (minor < 10 or (minor == 10 and patch == 0)): - self._supports_comments = False - else: - self._supports_comments = True - @property def creds(self) -> DuckDBCredentials: return self._creds @@ -117,9 +112,6 @@ def store_relation(self, plugin_name: str, target_config: TargetConfig) -> None: def get_binding_char(self) -> str: return "?" - def supports_comments(self) -> bool: - return self._supports_comments - @classmethod @abc.abstractmethod def is_cancelable(cls) -> bool: @@ -204,6 +196,14 @@ def initialize_db( for attachment in creds.attach: conn.execute(attachment.to_sql()) + if creds.is_motherduck: + # Each incremental model will try to create a temporary schema, usually the + # DEFAULT_TEMP_SCHEMA_NAME, in its own transaction, which will result in all + # except the first-run model to fail with a write-write conflict. By creating + # the schema here, we make the CREATE SCHEMA statement in the incremental models + # a no-op, which will prevent the write-write conflict. + conn.execute("CREATE SCHEMA IF NOT EXISTS {}".format(DEFAULT_TEMP_SCHEMA_NAME)) + return conn @classmethod diff --git a/dbt/adapters/duckdb/environments/local.py b/dbt/adapters/duckdb/environments/local.py index 0b1a4e5b..27cb197c 100644 --- a/dbt/adapters/duckdb/environments/local.py +++ b/dbt/adapters/duckdb/environments/local.py @@ -24,7 +24,8 @@ def execute(self, sql, bindings=None): else: return self._cursor.execute(sql, bindings) except RuntimeError as e: - raise DbtRuntimeError(str(e)) + # Preserve original error with full context including potential transaction state info + raise DbtRuntimeError(str(e)) from e class DuckDBConnectionWrapper: diff --git a/dbt/adapters/duckdb/impl.py b/dbt/adapters/duckdb/impl.py index 02c70031..1ce6d525 100644 --- a/dbt/adapters/duckdb/impl.py +++ b/dbt/adapters/duckdb/impl.py @@ -1,4 +1,5 @@ import os +import traceback from collections import defaultdict from typing import Any from typing import List @@ -12,6 +13,8 @@ from dbt_common.exceptions import DbtInternalError from dbt_common.exceptions import DbtRuntimeError +from .constants import DEFAULT_TEMP_SCHEMA_NAME +from .constants import TEMP_SCHEMA_NAME from dbt.adapters.base import BaseRelation from dbt.adapters.base.column import Column as BaseColumn from dbt.adapters.base.impl import ConstraintSupport @@ -28,9 +31,6 @@ from dbt.adapters.sql import SQLAdapter -TEMP_SCHEMA_NAME = "temp_schema_name" -DEFAULT_TEMP_SCHEMA_NAME = "dbt_temp" - if TYPE_CHECKING: import agate @@ -69,6 +69,14 @@ def debug_query(self): def is_motherduck(self): return self.config.credentials.is_motherduck + @available + def is_ducklake(self, relation: DuckDBRelation) -> bool: + """Check if a relation's database is backed by a ducklake attachment.""" + if not relation or not relation.database: + return False + + return relation.database in self.config.credentials._ducklake_dbs + @available def convert_datetimes_to_strs(self, table: "agate.Table") -> "agate.Table": import agate @@ -128,13 +136,6 @@ def external_root(self) -> str: def get_binding_char(self): return DuckDBConnectionManager.env().get_binding_char() - @available - def catalog_comment(self, prefix): - if DuckDBConnectionManager.env().supports_comments(): - return f"{prefix}.comment" - else: - return "''" - @available def external_write_options(self, write_location: str, rendered_options: dict) -> str: if "format" not in rendered_options: @@ -169,14 +170,29 @@ def external_write_options(self, write_location: str, rendered_options: dict) -> return ", ".join(ret) @available - def external_read_location(self, write_location: str, rendered_options: dict) -> str: - if rendered_options.get("partition_by") or rendered_options.get("per_thread_output"): - globs = [write_location, "*"] - if rendered_options.get("partition_by"): - partition_by = str(rendered_options.get("partition_by")) - globs.extend(["*"] * len(partition_by.split(","))) - return ".".join(["/".join(globs), str(rendered_options.get("format", "parquet"))]) - return write_location + def external_read_location( + self, + write_location: str, + rendered_options: dict, + partition_columns: list, + partition_delimiter= "=", + ) -> str: + """ + :param partition_columns: A list of dictionaries describing partition columns and values. + e.g.: [{'Name': 'import_day', 'Value': '2'}, ...] + :param partition_delimiter: String used to join the partition name and value. + Defaults to "=" (Hive-style). + Examples: "_", "-", "" + """ + if rendered_options.get("partition_by"): + parts = [write_location] + for col in partition_columns: + # Use the delimiter to form the partition path + parts.append(f"{col['Name']}{partition_delimiter}{col['Value']}") + parts.append("*") + return "/".join(parts) + else: + return write_location @available def warn_once(self, msg: str): @@ -191,7 +207,10 @@ def commit_if_has_connection(self) -> None: """This is just a quick-fix. Python models do not execute begin function so the transaction_open is always false.""" try: self.connections.commit_if_has_connection() - except DbtInternalError: + except DbtInternalError as e: + # Log commit errors instead of silently swallowing them to aid debugging + logger.warning(f"Commit failed with DbtInternalError: {e}\n{traceback.format_exc()}") + # Still pass to maintain backward compatibility, but now with visibility pass def submit_python_job(self, parsed_model: dict, compiled_code: str) -> AdapterResponse: @@ -231,18 +250,22 @@ def get_rows_different_sql( return sql @available.parse(lambda *a, **k: []) - def get_column_schema_from_query(self, sql: str) -> List[BaseColumn]: - """Get a list of the Columns with names and data types from the given sql.""" + def get_column_schema_from_query(self, sql: str) -> List[DuckDBColumn]: + """Get a list of the column names and data types from the given sql. + :param str sql: The sql to execute. + :return: List[DuckDBColumn] + """ # Taking advantage of yet another amazing DuckDB SQL feature right here: the # ability to DESCRIBE a query instead of a relation describe_sql = f"DESCRIBE ({sql})" _, cursor = self.connections.add_select_query(describe_sql) - ret = [] + flattened_columns = [] for row in cursor.fetchall(): name, dtype = row[0], row[1] - ret.append(DuckDBColumn.create(name, dtype)) - return ret + column = DuckDBColumn(column=name, dtype=dtype) + flattened_columns.extend(column.flatten()) + return flattened_columns @classmethod def render_column_constraint(cls, constraint: ColumnLevelConstraint) -> Optional[str]: diff --git a/dbt/adapters/duckdb/plugins/excel.py b/dbt/adapters/duckdb/plugins/excel.py index 4a4e7ed5..e96bba8e 100644 --- a/dbt/adapters/duckdb/plugins/excel.py +++ b/dbt/adapters/duckdb/plugins/excel.py @@ -76,6 +76,7 @@ def store(self, target_config: TargetConfig): target_output_config["sheet_name"] = sheet_name df = pd_utils.target_to_df(target_config) + df = df[df.notna().any(axis=1)] if target_output_config.get("skip_empty_sheet", False) and df.shape[0] == 0: return try: diff --git a/dbt/adapters/duckdb/plugins/glue.py b/dbt/adapters/duckdb/plugins/glue.py index 1735bf1b..0ac68448 100644 --- a/dbt/adapters/duckdb/plugins/glue.py +++ b/dbt/adapters/duckdb/plugins/glue.py @@ -34,6 +34,12 @@ class UndetectedType(Exception): def _dbt2glue(dtype: str, ignore_null: bool = False) -> str: # pragma: no cover """DuckDB to Glue data types conversion.""" + # Check if it's an array type + if dtype.strip().endswith("[]"): + base_type = dtype.strip()[:-2] # Remove the [] suffix + base_glue_type = _dbt2glue(base_type, ignore_null) + return f"array<{base_glue_type}>" + data_type = dtype.split("(")[0] if data_type.lower() in ["int1", "tinyint"]: return "tinyint" @@ -49,7 +55,9 @@ def _dbt2glue(dtype: str, ignore_null: bool = False) -> str: # pragma: no cover ) if data_type.lower() in ["float4", "float", "real"]: return "float" - if data_type.lower() in ["float8", "numeric", "decimal", "double"]: + if data_type.lower() in ["decimal"]: + return "decimal" + if data_type.lower() in ["float8", "numeric", "double"]: return "double" if data_type.lower() in ["boolean", "bool", "logical"]: return "boolean" @@ -161,6 +169,28 @@ def _create_table( ) +def _add_partition( + client: "GlueClient", + database: str, + table_def: "TableInputTypeDef", + partition_columns: List[Dict[str, str]], +) -> None: + + if partition_columns != []: + partition_input, partition_values = _parse_partition_columns(partition_columns, table_def) + + try: + client.create_partition( + DatabaseName=database, TableName=table_def["Name"], PartitionInput=partition_input + ) + except client.exceptions.AlreadyExistsException: + client.update_partition( + DatabaseName=database, + TableName=table_def["Name"], + PartitionValueList=partition_values, + PartitionInput=partition_input, + ) + def _update_table( client: "GlueClient", database: str, @@ -178,6 +208,7 @@ def _update_table( TableName=table_def["Name"], PartitionValues=partition_values, ) + client.update_partition( DatabaseName=database, TableName=table_def["Name"], @@ -337,14 +368,29 @@ def create_or_update_table( if glue_table: # Existing columns in AWS Glue catalog glue_columns = _get_column_type_def(glue_table) - # Create new version only if columns are changed - if glue_columns != columns: + partition_names = {col["Name"] for col in partition_columns} + + # Filter out dicts in model_columns that have a 'Name' in partition_names + columns = [col for col in columns if col["Name"] not in partition_names] + + # Convert both column lists to lowercase for case-insensitive comparison + glue_columns_lower = [{k: v.lower() if isinstance(v, str) else v for k, v in col.items()} for col in glue_columns] if glue_columns else [] + columns_lower = [{k: v.lower() if isinstance(v, str) else v for k, v in col.items()} for col in columns] + + if glue_columns_lower != columns_lower: _update_table( client=client, database=database, table_def=table_def, partition_columns=partition_columns, ) + else: + _add_partition( + client=client, + database=database, + table_def=table_def, + partition_columns=partition_columns, + ) else: _create_table( client=client, diff --git a/dbt/adapters/duckdb/secrets.py b/dbt/adapters/duckdb/secrets.py index a744c082..c26222ac 100644 --- a/dbt/adapters/duckdb/secrets.py +++ b/dbt/adapters/duckdb/secrets.py @@ -40,6 +40,24 @@ def create( secret_kwargs=kwargs, ) + def _format_value(self, key: str, value: Any) -> str: + """Format a value for DuckDB SQL based on its type and key.""" + # Keys that should not be quoted + unquoted_keys = ["type", "provider", "extra_http_headers"] + + if isinstance(value, dict): + # Format as DuckDB map: map {'key1': 'value1', 'key2': 'value2'} + items = [f"'{k}': '{v}'" for k, v in value.items()] + return f"{key} map {{{', '.join(items)}}}" + elif isinstance(value, list): + # Format as DuckDB array: array ['item1', 'item2'] + items = [f"'{item}'" for item in value] + return f"{key} array [{', '.join(items)}]" + elif key in unquoted_keys: + return f"{key} {value}" + else: + return f"{key} '{value}'" + def to_sql(self) -> str: name = f" {self.name}" if self.name else "" or_replace = " OR REPLACE" if name else "" @@ -60,19 +78,14 @@ def to_sql(self) -> str: params_sql: List[str] = [] for key, value in params.items(): if value is not None and key not in ["name", "persistent"]: - if key not in ["type", "provider", "extra_http_headers"]: - params_sql.append(f"{key} '{value}'") - else: - params_sql.append(f"{key} {value}") + params_sql.append(self._format_value(key, value)) for s in scope_value: params_sql.append(f"scope '{s}'") params_sql_str = f",\n{tab}".join(params_sql) else: params_sql_list = [ - f"{key} '{value}'" - if key not in ["type", "provider", "extra_http_headers"] - else f"{key} {value}" + self._format_value(key, value) for key, value in params.items() if value is not None and key not in ["name", "persistent"] ] diff --git a/dbt/include/duckdb/macros/adapters.sql b/dbt/include/duckdb/macros/adapters.sql index 29d2d570..4116d825 100644 --- a/dbt/include/duckdb/macros/adapters.sql +++ b/dbt/include/duckdb/macros/adapters.sql @@ -164,7 +164,11 @@ def materialize(df, con): {% macro duckdb__drop_relation(relation) -%} {% call statement('drop_relation', auto_begin=False) -%} - drop {{ relation.type }} if exists {{ relation }} cascade + {% if adapter.is_ducklake(relation) %} + drop {{ relation.type }} if exists {{ relation }} + {% else %} + drop {{ relation.type }} if exists {{ relation }} cascade + {% endif %} {%- endcall %} {% endmacro %} diff --git a/dbt/include/duckdb/macros/catalog.sql b/dbt/include/duckdb/macros/catalog.sql index bd94f1dc..339a6568 100644 --- a/dbt/include/duckdb/macros/catalog.sql +++ b/dbt/include/duckdb/macros/catalog.sql @@ -7,7 +7,7 @@ , t.database_name , t.schema_name , 'BASE TABLE' as table_type - , {{ adapter.catalog_comment('t') }} as table_comment + , t.comment as table_comment from duckdb_tables() t WHERE t.database_name = '{{ database }}' UNION ALL @@ -15,7 +15,7 @@ , v.database_name , v.schema_name , 'VIEW' as table_type - , {{ adapter.catalog_comment('v') }} as table_comment + , v.comment as table_comment from duckdb_views() v WHERE v.database_name = '{{ database }}' ) @@ -28,8 +28,8 @@ c.column_name, c.column_index as column_index, c.data_type as column_type, - {{ adapter.catalog_comment('c') }} as column_comment, - '' as table_owner + c.comment as column_comment, + NULL as table_owner FROM relations r JOIN duckdb_columns() c ON r.schema_name = c.schema_name AND r.table_name = c.table_name WHERE ( {%- for schema in schemas -%} diff --git a/dbt/include/duckdb/macros/columns.sql b/dbt/include/duckdb/macros/columns.sql index f0d5a19e..da8d414d 100644 --- a/dbt/include/duckdb/macros/columns.sql +++ b/dbt/include/duckdb/macros/columns.sql @@ -4,7 +4,7 @@ {% for column in add_columns %} {% set sql -%} alter {{ relation.type }} {{ relation }} add column - {{ column.name }} {{ column.data_type }} + {{ api.Relation.create(identifier=column.name) }} {{ column.data_type }} {%- endset -%} {% do run_query(sql) %} {% endfor %} @@ -14,7 +14,7 @@ {% for column in remove_columns %} {% set sql -%} alter {{ relation.type }} {{ relation }} drop column - {{ column.name }} + {{ api.Relation.create(identifier=column.name) }} {%- endset -%} {% do run_query(sql) %} {% endfor %} diff --git a/dbt/include/duckdb/macros/materializations/external.sql b/dbt/include/duckdb/macros/materializations/external.sql index fa15758e..8158261b 100644 --- a/dbt/include/duckdb/macros/materializations/external.sql +++ b/dbt/include/duckdb/macros/materializations/external.sql @@ -15,7 +15,7 @@ {%- endif -%} {%- set write_options = adapter.external_write_options(location, rendered_options) -%} - {%- set read_location = adapter.external_read_location(location, rendered_options) -%} + {%- set read_location = adapter.external_read_location(location, rendered_options, config.get('partition_columns', [])) -%} {%- set parquet_read_options = config.get('parquet_read_options', {'union_by_name': False}) -%} {%- set json_read_options = config.get('json_read_options', {'auto_detect': True}) -%} {%- set csv_read_options = config.get('csv_read_options', {'auto_detect': True}) -%} @@ -97,7 +97,7 @@ {% if row_count[0][0] == 0 %} where 1 {%- for col in get_columns_in_relation(temp_relation) -%} - {{ '' }} AND {{ col.column }} is not NULL + {{ '' }} AND "{{ col.column }}" is not NULL {%- endfor -%} {% endif %} ); @@ -117,7 +117,7 @@ {% if row_count[0][0] == 0 %} where 1 {%- for col in get_columns_in_relation(temp_relation) -%} - {{ '' }} AND {{ col.column }} is not NULL + {{ '' }} AND "{{ col.column }}" is not NULL {%- endfor -%} {% endif %} ); @@ -137,7 +137,7 @@ {% if row_count[0][0] == 0 %} where 1 {%- for col in get_columns_in_relation(temp_relation) -%} - {{ '' }} AND {{ col.column }} is not NULL + {{ '' }} AND "{{ col.column }}" is not NULL {%- endfor -%} {% endif %} ); diff --git a/dbt/include/duckdb/macros/materializations/incremental.sql b/dbt/include/duckdb/macros/materializations/incremental.sql index f5797391..268f84a0 100644 --- a/dbt/include/duckdb/macros/materializations/incremental.sql +++ b/dbt/include/duckdb/macros/materializations/incremental.sql @@ -49,7 +49,7 @@ {% do to_drop.append(temp_relation) %} {% endif %} {% if language == 'python' %} - {% set build_python = create_table_as(False, temp_relation, compiled_code, language) %} + {% set build_python = create_table_as(temporary, temp_relation, compiled_code, language) %} {% call statement("pre", language=language) %} {{- build_python }} {% endcall %} diff --git a/dbt/include/duckdb/macros/utils/upstream.sql b/dbt/include/duckdb/macros/utils/upstream.sql index e5fad1e9..81077981 100644 --- a/dbt/include/duckdb/macros/utils/upstream.sql +++ b/dbt/include/duckdb/macros/utils/upstream.sql @@ -22,7 +22,7 @@ {%- set upstream_location = adapter.external_read_location(location, rendered_options) -%} {% if upstream_rel.schema not in upstream_schemas %} {% call statement('main', language='sql') -%} - create schema if not exists {{ upstream_rel.schema }} + create schema if not exists {{ upstream_rel.without_identifier() }} {%- endcall %} {% do upstream_schemas.update({upstream_rel.schema: None}) %} {% endif %} @@ -35,6 +35,8 @@ {% endif %} {% endfor %} {% endfor %} -{% do adapter.commit() %} +{% if upstream_nodes %} + {% do adapter.commit() %} +{% endif %} {% endif %} {%- endmacro -%} diff --git a/dev-requirements.txt b/dev-requirements.txt index c5f08449..312dfb8d 100644 --- a/dev-requirements.txt +++ b/dev-requirements.txt @@ -2,20 +2,20 @@ # git+https://github.com/dbt-labs/dbt-core.git#egg=dbt-core&subdirectory=core # git+https://github.com/dbt-labs/dbt-adapters.git#subdirectory=dbt-tests-adapter -dbt-tests-adapter==1.11.0 +dbt-tests-adapter==1.17.0 boto3 mypy-boto3-glue pandas -pyarrow==18.1.0 +pyarrow==21.0.0 buenavista==0.5.0 bumpversion flaky -freezegun==1.5.1 +freezegun==1.5.5 fsspec gspread ipdb -mypy==1.15.0 +mypy==1.17.1 openpyxl pip-tools pre-commit diff --git a/setup.cfg b/setup.cfg index ca0289f2..215254e8 100644 --- a/setup.cfg +++ b/setup.cfg @@ -37,10 +37,6 @@ include = dbt dbt.* -[options.entry_points] -console_scripts = - duckdbt = dbt.adapters.duckdb.cli:main - [build-system] requires = ["setuptools >= 61.2", "pbr>=1.9"] @@ -49,7 +45,7 @@ glue = boto3 mypy-boto3-glue md = - duckdb==1.1.1 + duckdb==1.3.1 [files] packages = diff --git a/tests/functional/adapter/test_incremental.py b/tests/functional/adapter/test_incremental.py index c34d7626..7a9f4622 100644 --- a/tests/functional/adapter/test_incremental.py +++ b/tests/functional/adapter/test_incremental.py @@ -6,16 +6,139 @@ ) from dbt.tests.adapter.incremental.test_incremental_on_schema_change import ( BaseIncrementalOnSchemaChange, + BaseIncrementalOnSchemaChangeSetup ) +from dbt.artifacts.schemas.results import RunStatus +from dbt.tests.util import run_dbt +import pytest class TestIncrementalUniqueKey(BaseIncrementalUniqueKey): - pass + def test__bad_unique_key_list(self, project): + """expect compilation error from unique key not being a column""" + + (status, exc) = self.fail_to_build_inc_missing_unique_key_column( + incremental_model_name="not_found_unique_key_list" + ) + + assert status == RunStatus.Error + # MotherDuck has a `dbt_temp` workaround for incremental runs which causes this test to fail + # because the error message is being truncated with DuckDB >= 1.2.0 + if not project.adapter.config.credentials.is_motherduck: + assert "thisisnotacolumn" in exc.lower() class TestIncrementalPredicates(BaseIncrementalPredicates): pass -class TestIncrementalOnSchemaChange(BaseIncrementalOnSchemaChange): +class TestIncrementalOnSchemaChange(BaseIncrementalOnSchemaChange): pass + + +models__incremental_append_new_columns_with_space = """ +{{ + config( + materialized='incremental', + unique_key='id', + on_schema_change='append_new_columns' + ) +}} + +{% set string_type = dbt.type_string() %} + +WITH source_data AS (SELECT * FROM {{ ref('model_a') }} ) + +{% if is_incremental() %} + +SELECT id, + cast(field1 as {{string_type}}) as field1, + cast(field2 as {{string_type}}) as field2, + cast(field3 as {{string_type}}) as "field 3", + cast(field4 as {{string_type}}) as "field 4" +FROM source_data WHERE id NOT IN (SELECT id from {{ this }} ) + +{% else %} + +SELECT id, + cast(field1 as {{string_type}}) as field1, + cast(field2 as {{string_type}}) as field2 +FROM source_data where id <= 3 + +{% endif %} +""" + +class TestIncrementalOnSchemaChangeQuotingFalse(BaseIncrementalOnSchemaChangeSetup): + """ We need a new class based on the _Setup base class to allow project config change without repeating all other tests""" + @pytest.fixture(scope="class") + def models(self): + """ Override the models test fixture with the custom one injected """ + # Get the original models dict + mods = dict(BaseIncrementalOnSchemaChange.models.__wrapped__(self)) + # Add the custom model + mods["incremental_append_new_columns_with_space.sql"] = models__incremental_append_new_columns_with_space + return mods + + + def run_twice_and_return_status(self, select, expect_pass_2nd_run): + """Two runs of the specified models - return the status and message from the second""" + run_dbt( + ["run", "--select", select, "--full-refresh"], + expect_pass=True, + ) + run_result = run_dbt( + ["run", "--select", select], expect_pass=expect_pass_2nd_run + ).results[-1] + return run_result.status, run_result.message + + + @pytest.fixture(scope="class") + def project_config_update(self): + return {"quoting": {"identifier": False}} + + + def test__handle_identifier_quoting_config_false(self, project): + # it should fail if quoting is set to false + (status, exc) = self.run_twice_and_return_status( + select="model_a incremental_append_new_columns_with_space", + expect_pass_2nd_run=False + ) + assert status == RunStatus.Error + +class TestIncrementalOnSchemaChangeQuotingTrue(BaseIncrementalOnSchemaChangeSetup): + """ We need a new class based on the _Setup base class to allow project config change without repeating all other tests""" + @pytest.fixture(scope="class") + def models(self): + """ Override the models test fixture with the custom one injected """ + # Get the original models dict + mods = dict(BaseIncrementalOnSchemaChange.models.__wrapped__(self)) + # Add the custom model + mods["incremental_append_new_columns_with_space.sql"] = models__incremental_append_new_columns_with_space + return mods + + + def run_twice_and_return_status(self, select, expect_pass_2nd_run): + """Two runs of the specified models - return the status and message from the second""" + run_dbt( + ["run", "--select", select, "--full-refresh"], + expect_pass=True, + ) + run_result = run_dbt( + ["run", "--select", select], expect_pass=expect_pass_2nd_run + ).results[-1] + return run_result.status, run_result.message + + + @pytest.fixture(scope="class") + def project_config_update(self): + return {"quoting": {"identifier": True}} + + + def test__handle_identifier_quoting_config_false(self, project): + # it should fail if quoting is set to false + (status, exc) = self.run_twice_and_return_status( + select="model_a incremental_append_new_columns_with_space", + expect_pass_2nd_run=True + ) + assert status == RunStatus.Success + diff --git a/tests/functional/plugins/motherduck/test_motherduck_write_conflict.py b/tests/functional/plugins/motherduck/test_motherduck_write_conflict.py new file mode 100644 index 00000000..bfcbedf9 --- /dev/null +++ b/tests/functional/plugins/motherduck/test_motherduck_write_conflict.py @@ -0,0 +1,83 @@ +import pytest +from dbt.tests.util import run_dbt +from dbt.exceptions import DbtRuntimeError + + +incremental_model_1_sql = """ +{{ config(materialized='incremental') }} + +select + generate_series as id, + 'model_1_data_' || generate_series::varchar as data, + current_timestamp as created_at +from generate_series(1, 100) + +{% if is_incremental() %} + where generate_series > (select coalesce(max(id), 0) from {{ this }}) +{% endif %} +""" + +incremental_model_2_sql = """ +{{ config(materialized='incremental') }} + +select + generate_series as id, + 'model_2_data_' || generate_series::varchar as data, + current_timestamp as created_at +from generate_series(1, 50) + +{% if is_incremental() %} + where generate_series > (select coalesce(max(id), 0) from {{ this }}) +{% endif %} +""" + + +@pytest.mark.skip_profile("buenavista", "file", "memory") +class TestMDWriteConflict: + """Test to reproduce the write-write conflict with multiple models trying to create the dbt_temp schema concurrently.""" + + @pytest.fixture(scope="class") + def profiles_config_update(self, dbt_profile_target): + """Configure with 2 threads to trigger write conflict.""" + return { + "test": { + "outputs": { + "dev": { + "type": "duckdb", + "path": "test_write_conflict.duckdb", + "attach": [ + { + "path": "md:", + } # Attach MotherDuck + ], + "threads": 2, # Enable threading to trigger conflict + } + }, + "target": "dev", + } + } + + @pytest.fixture(scope="class") + def models(self): + return { + "incremental_model_1.sql": incremental_model_1_sql, + "incremental_model_2.sql": incremental_model_2_sql, + } + + def test_write_conflict_on_second_run(self, project): + """ + Test that reproduces the write-write conflict: + 1. First run always succeeds (initializes both incremental models) + 2. Second run, which is the first true incremental run, should succeed, + while it previously failed with a write-write conflict due to + both models trying to create the dbt_temp schema simultaneously. + """ + results = run_dbt(expect_pass=True) + + res1 = project.run_sql("SELECT count(*) FROM incremental_model_1", fetch="one") + assert res1[0] == 100 + + res2 = project.run_sql("SELECT count(*) FROM incremental_model_2", fetch="one") + assert res2[0] == 50 + + run_dbt(expect_pass=True) diff --git a/tests/functional/plugins/test_plugins.py b/tests/functional/plugins/test_plugins.py index 124ee178..609a1c96 100644 --- a/tests/functional/plugins/test_plugins.py +++ b/tests/functional/plugins/test_plugins.py @@ -46,6 +46,8 @@ class TestPlugins: @pytest.fixture(scope="class") def sqlite_test_db(self): path = "/tmp/satest.db" + if os.path.exists(path): + os.unlink(path) db = sqlite3.connect(path) cursor = db.cursor() cursor.execute("CREATE TABLE tt1 (id int, name text)") diff --git a/tests/unit/test_column.py b/tests/unit/test_column.py index 1f690fa4..a1243fe6 100644 --- a/tests/unit/test_column.py +++ b/tests/unit/test_column.py @@ -43,3 +43,43 @@ def test_is_float(dtype, expected): def test_is_integer(dtype, expected): column = DuckDBColumn(column="integer_test", dtype=dtype) assert column.is_integer() == expected + +# Test cases for is_struct method +@pytest.mark.parametrize("dtype, expected", [ + ("struct(a integer, b varchar)", True), + ("struct(a integer)", True), + ("STRUCT(a integer, b varchar)", True), + ("integer", False), + ("varchar", False), +]) +def test_is_struct(dtype, expected): + column = DuckDBColumn(column="struct_test", dtype=dtype) + assert column.is_struct() == expected + +# Test cases for flatten method +def test_flatten_simple_struct(): + column = DuckDBColumn(column="struct_test", dtype="struct(a integer, b varchar)") + flattened = column.flatten() + assert len(flattened) == 2 + assert flattened[0].column == "struct_test.a" + assert flattened[0].dtype == "integer" + assert flattened[1].column == "struct_test.b" + assert flattened[1].dtype == "varchar" + +def test_flatten_nested_struct(): + column = DuckDBColumn(column="struct_test", dtype="struct(a integer, b struct(c integer, d varchar))") + flattened = column.flatten() + assert len(flattened) == 3 + assert flattened[0].column == "struct_test.a" + assert flattened[0].dtype == "integer" + assert flattened[1].column == "struct_test.b.c" + assert flattened[1].dtype == "integer" + assert flattened[2].column == "struct_test.b.d" + assert flattened[2].dtype == "varchar" + +def test_flatten_non_struct(): + column = DuckDBColumn(column="integer_test", dtype="integer") + flattened = column.flatten() + assert len(flattened) == 1 + assert flattened[0].column == "integer_test" + assert flattened[0].dtype == "integer" \ No newline at end of file diff --git a/tests/unit/test_credentials.py b/tests/unit/test_credentials.py index 09b2d638..1c674ccd 100644 --- a/tests/unit/test_credentials.py +++ b/tests/unit/test_credentials.py @@ -237,6 +237,73 @@ def test_attachments(): assert expected_sql[i] == attachment.to_sql() +def test_attachments_with_options(): + # Test arbitrary options in options dict + attachment = Attachment( + path="/tmp/test.db", + alias="test_db", + options={ + "cache_size": "1GB", + "threads": 4, + "enable_fsst": True, + "disabled_feature": False + } + ) + sql = attachment.to_sql() + assert "ATTACH '/tmp/test.db' AS test_db (CACHE_SIZE '1GB', THREADS 4, ENABLE_FSST)" == sql + + # Test options dict with legacy options (no conflicts) + attachment = Attachment( + path="/tmp/test.db", + options={"type": "sqlite", "secret": "my_secret", "read_only": True} + ) + sql = attachment.to_sql() + assert "ATTACH '/tmp/test.db' (TYPE sqlite, SECRET my_secret, READ_ONLY)" == sql + + # Test mixed legacy and options dict (no conflicts) + attachment = Attachment( + path="/tmp/test.db", + type="sqlite", + options={"cache_size": "512MB", "enable_fsst": True} + ) + sql = attachment.to_sql() + assert "ATTACH '/tmp/test.db' (TYPE sqlite, CACHE_SIZE '512MB', ENABLE_FSST)" == sql + + +def test_attachment_option_conflicts(): + from dbt_common.exceptions import DbtRuntimeError + + # Test conflict with type + with pytest.raises(DbtRuntimeError) as exc: + attachment = Attachment( + path="/tmp/test.db", + type="sqlite", + options={"type": "postgres"} + ) + attachment.to_sql() + assert "type" in str(exc.value) + + # Test conflict with secret + with pytest.raises(DbtRuntimeError) as exc: + attachment = Attachment( + path="/tmp/test.db", + secret="secret1", + options={"secret": "secret2"} + ) + attachment.to_sql() + assert "secret" in str(exc.value) + + # Test conflict with read_only + with pytest.raises(DbtRuntimeError) as exc: + attachment = Attachment( + path="/tmp/test.db", + read_only=True, + options={"read_only": True} + ) + attachment.to_sql() + assert "read_only" in str(exc.value) + + def test_infer_database_name_from_path(): payload = {} creds = DuckDBCredentials.from_dict(payload) @@ -268,3 +335,93 @@ def test_infer_database_name_from_path(): } creds = DuckDBCredentials.from_dict(payload) assert creds.database == "remote" + + +def test_database_matches_attach_alias(): + """Test that database field can match an attach alias instead of path-derived name""" + payload = { + "path": "/tmp/primary.db", # This would normally set database to "primary" + "database": "my_alias", # But we want to use an attach alias instead + "attach": [ + {"path": "/tmp/secondary.db", "alias": "my_alias"}, + {"path": "/tmp/tertiary.db", "alias": "other_alias"} + ] + } + creds = DuckDBCredentials.from_dict(payload) + assert creds.database == "my_alias" + + +def test_database_mismatch_without_attach_alias(): + """Test that mismatched database still raises error when not in attach aliases""" + from dbt_common.exceptions import DbtRuntimeError + + payload = { + "path": "/tmp/primary.db", + "database": "nonexistent_alias", + "attach": [ + {"path": "/tmp/secondary.db", "alias": "my_alias"} + ] + } + with pytest.raises(DbtRuntimeError) as exc: + DuckDBCredentials.from_dict(payload) + assert "Inconsistency detected between 'path' and 'database' fields" in str(exc.value) + + +def test_database_matches_attach_alias_no_alias(): + """Test that attach entries without aliases don't affect validation""" + from dbt_common.exceptions import DbtRuntimeError + + payload = { + "path": "/tmp/primary.db", + "database": "nonexistent_alias", + "attach": [ + {"path": "/tmp/secondary.db"} # No alias + ] + } + with pytest.raises(DbtRuntimeError) as exc: + DuckDBCredentials.from_dict(payload) + assert "Inconsistency detected between 'path' and 'database' fields" in str(exc.value) + + +def test_add_ducklake_secret_with_map(): + """Test ducklake secret with metadata_parameters as a map.""" + creds = DuckDBCredentials( + secrets=[ + dict( + type="ducklake", + name="sdp_catalog", + metadata_path="", + metadata_schema="oxy_main", + metadata_parameters={"TYPE": "postgres", "SECRET": "sdp_metadata"} + ) + ] + ) + + sql = creds.secrets_sql()[0] + expected = """CREATE OR REPLACE SECRET sdp_catalog ( + type ducklake, + metadata_path '', + metadata_schema 'oxy_main', + metadata_parameters map {'TYPE': 'postgres', 'SECRET': 'sdp_metadata'} +)""" + assert sql == expected + + +def test_add_secret_with_list(): + """Test secret with list parameter.""" + creds = DuckDBCredentials( + secrets=[ + dict( + type="custom", + name="test_secret", + allowed_hosts=["host1", "host2", "host3"] + ) + ] + ) + + sql = creds.secrets_sql()[0] + expected = """CREATE OR REPLACE SECRET test_secret ( + type custom, + allowed_hosts array ['host1', 'host2', 'host3'] +)""" + assert sql == expected diff --git a/tests/unit/test_data_path_quoting.py b/tests/unit/test_data_path_quoting.py new file mode 100644 index 00000000..fcb1c31e --- /dev/null +++ b/tests/unit/test_data_path_quoting.py @@ -0,0 +1,127 @@ +import pytest +from dbt.adapters.duckdb.credentials import Attachment + + +class TestDataPathQuoting: + """Test that data_path options are properly quoted in SQL generation.""" + + def test_data_path_s3_url_should_be_quoted(self): + """Test that S3 URLs in data_path are properly quoted.""" + attachment = Attachment( + path="/tmp/test.db", + options={"data_path": "s3://my-bucket/path"} + ) + sql = attachment.to_sql() + # Should generate: ATTACH '/tmp/test.db' (DATA_PATH 's3://my-bucket/path') + assert "DATA_PATH 's3://my-bucket/path'" in sql + + def test_data_path_windows_path_should_be_quoted(self): + """Test that Windows paths in data_path are properly quoted.""" + attachment = Attachment( + path="/tmp/test.db", + options={"data_path": "C:\\Users\\test\\data"} + ) + sql = attachment.to_sql() + # Should generate: ATTACH '/tmp/test.db' (DATA_PATH 'C:\Users\test\data') + assert "DATA_PATH 'C:\\Users\\test\\data'" in sql + + def test_data_path_unix_path_should_be_quoted(self): + """Test that Unix paths in data_path are properly quoted.""" + attachment = Attachment( + path="/tmp/test.db", + options={"data_path": "/home/user/data"} + ) + sql = attachment.to_sql() + # Should generate: ATTACH '/tmp/test.db' (DATA_PATH '/home/user/data') + assert "DATA_PATH '/home/user/data'" in sql + + def test_data_path_url_with_spaces_should_be_quoted(self): + """Test that paths with spaces are properly quoted.""" + attachment = Attachment( + path="/tmp/test.db", + options={"data_path": "/path/with spaces/data"} + ) + sql = attachment.to_sql() + # Should generate: ATTACH '/tmp/test.db' (DATA_PATH '/path/with spaces/data') + assert "DATA_PATH '/path/with spaces/data'" in sql + + def test_numeric_options_should_not_be_quoted(self): + """Test that numeric options are not quoted.""" + attachment = Attachment( + path="/tmp/test.db", + options={"timeout": 30000} + ) + sql = attachment.to_sql() + # Should generate: ATTACH '/tmp/test.db' (TIMEOUT 30000) + assert "TIMEOUT 30000" in sql + assert "TIMEOUT '30000'" not in sql + + def test_boolean_options_work_correctly(self): + """Test that boolean options work as expected.""" + attachment = Attachment( + path="/tmp/test.db", + options={"use_cache": True, "skip_validation": False} + ) + sql = attachment.to_sql() + # True booleans should appear as flag, False booleans should be omitted + assert "USE_CACHE" in sql + assert "SKIP_VALIDATION" not in sql + + def test_multiple_options_with_data_path(self): + """Test multiple options including data_path.""" + attachment = Attachment( + path="/tmp/test.db", + options={ + "data_path": "s3://bucket/path", + "timeout": 5000, + "use_cache": True + } + ) + sql = attachment.to_sql() + assert "DATA_PATH 's3://bucket/path'" in sql + assert "TIMEOUT 5000" in sql + assert "USE_CACHE" in sql + + def test_already_single_quoted_strings_not_double_quoted(self): + """Test that already single-quoted strings are not double-quoted.""" + attachment = Attachment( + path="/tmp/test.db", + options={"data_path": "'s3://my-bucket/path'"} + ) + sql = attachment.to_sql() + # Should keep existing single quotes, not add more + assert "DATA_PATH 's3://my-bucket/path'" in sql + assert "DATA_PATH ''s3://my-bucket/path''" not in sql + + def test_already_double_quoted_strings_preserved(self): + """Test that already double-quoted strings are preserved.""" + attachment = Attachment( + path="/tmp/test.db", + options={"data_path": '"s3://my-bucket/path"'} + ) + sql = attachment.to_sql() + # Should keep existing double quotes + assert 'DATA_PATH "s3://my-bucket/path"' in sql + assert 'DATA_PATH \'"s3://my-bucket/path"\'' not in sql + + def test_quoted_strings_with_whitespace_preserved(self): + """Test that quoted strings with surrounding whitespace are preserved.""" + attachment = Attachment( + path="/tmp/test.db", + options={"data_path": " 's3://my-bucket/path' "} + ) + sql = attachment.to_sql() + # Should detect quotes despite whitespace and preserve original value + assert "DATA_PATH 's3://my-bucket/path' " in sql + assert "DATA_PATH ' 's3://my-bucket/path' '" not in sql + + def test_quoted_strings_with_whitespace_double_quotes(self): + """Test that double quoted strings with surrounding whitespace are preserved.""" + attachment = Attachment( + path="/tmp/test.db", + options={"data_path": ' "s3://my-bucket/path" '} + ) + sql = attachment.to_sql() + # Should detect quotes despite whitespace and preserve original value + assert 'DATA_PATH "s3://my-bucket/path" ' in sql + assert 'DATA_PATH \' "s3://my-bucket/path" \'' not in sql \ No newline at end of file diff --git a/tests/unit/test_duckdb_adapter.py b/tests/unit/test_duckdb_adapter.py index 692fb99f..d70f5004 100644 --- a/tests/unit/test_duckdb_adapter.py +++ b/tests/unit/test_duckdb_adapter.py @@ -5,6 +5,7 @@ from dbt.flags import set_from_args from dbt.adapters.duckdb import DuckDBAdapter from dbt.adapters.duckdb.connections import DuckDBConnectionManager +from dbt.adapters.duckdb.relation import DuckDBRelation from tests.unit.utils import config_from_parts_or_dicts, mock_connection @@ -122,3 +123,183 @@ def test_create_secret(self, connector): secret 'xyz', region 'us-west-2' )""") + + +class TestDuckDBAdapterIsDucklake(unittest.TestCase): + def setUp(self): + set_from_args(Namespace(STRICT_MODE=True), {}) + + self.base_profile_cfg = { + "outputs": { + "test": { + "type": "duckdb", + "path": ":memory:", + } + }, + "target": "test", + } + + project_cfg = { + "name": "X", + "version": "0.1", + "profile": "test", + "project-root": "/tmp/dbt/does-not-exist", + "quoting": { + "identifier": False, + "schema": True, + }, + "config-version": 2, + } + + self.project_cfg = project_cfg + self.mock_mp_context = mock.MagicMock() + + def _get_adapter(self, profile_cfg): + config = config_from_parts_or_dicts(self.project_cfg, profile_cfg, cli_vars={}) + return DuckDBAdapter(config, self.mock_mp_context) + + def test_is_ducklake_no_attach_config(self): + """Test is_ducklake returns False when no attach configuration exists.""" + adapter = self._get_adapter(self.base_profile_cfg) + relation = DuckDBRelation.create(database="test_db", schema="test_schema", identifier="test_table") + + result = adapter.is_ducklake(relation) + + self.assertFalse(result) + + def test_is_ducklake_empty_attach_config(self): + """Test is_ducklake returns False when attach configuration is empty.""" + profile_cfg = self.base_profile_cfg.copy() + profile_cfg["outputs"]["test"]["attach"] = [] + + adapter = self._get_adapter(profile_cfg) + relation = DuckDBRelation.create(database="test_db", schema="test_schema", identifier="test_table") + + result = adapter.is_ducklake(relation) + + self.assertFalse(result) + + def test_is_ducklake_with_ducklake_attachment(self): + """Test is_ducklake returns True when relation database matches ducklake attachment.""" + profile_cfg = self.base_profile_cfg.copy() + profile_cfg["outputs"]["test"]["attach"] = [ + { + "alias": "ducklake_db", + "path": "ducklake:sqlite:storage/metadata.sqlite" + } + ] + + adapter = self._get_adapter(profile_cfg) + relation = DuckDBRelation.create(database="ducklake_db", schema="test_schema", identifier="test_table") + + result = adapter.is_ducklake(relation) + + self.assertTrue(result) + + def test_is_ducklake_with_regular_attachment(self): + """Test is_ducklake returns False when relation database matches non-ducklake attachment.""" + profile_cfg = self.base_profile_cfg.copy() + profile_cfg["outputs"]["test"]["attach"] = [ + { + "alias": "regular_db", + "path": "/path/to/regular.db" + } + ] + + adapter = self._get_adapter(profile_cfg) + relation = DuckDBRelation.create(database="regular_db", schema="test_schema", identifier="test_table") + + result = adapter.is_ducklake(relation) + + self.assertFalse(result) + + def test_is_ducklake_with_mixed_attachments(self): + """Test is_ducklake correctly identifies ducklake among mixed attachments.""" + profile_cfg = self.base_profile_cfg.copy() + profile_cfg["outputs"]["test"]["attach"] = [ + { + "alias": "regular_db", + "path": "/path/to/regular.db" + }, + { + "alias": "ducklake_db", + "path": "ducklake:sqlite:storage/metadata.sqlite" + }, + { + "alias": "another_db", + "path": "s3://another-bucket/data" + } + ] + + adapter = self._get_adapter(profile_cfg) + + # Test ducklake database + ducklake_relation = DuckDBRelation.create(database="ducklake_db", schema="test_schema", identifier="test_table") + self.assertTrue(adapter.is_ducklake(ducklake_relation)) + + # Test regular database + regular_relation = DuckDBRelation.create(database="regular_db", schema="test_schema", identifier="test_table") + self.assertFalse(adapter.is_ducklake(regular_relation)) + + # Test another non-ducklake database + another_relation = DuckDBRelation.create(database="another_db", schema="test_schema", identifier="test_table") + self.assertFalse(adapter.is_ducklake(another_relation)) + + def test_is_ducklake_no_database_on_relation(self): + """Test is_ducklake returns False when relation has no database.""" + profile_cfg = self.base_profile_cfg.copy() + profile_cfg["outputs"]["test"]["attach"] = [ + { + "alias": "ducklake_db", + "path": "ducklake:sqlite:storage/metadata.sqlite" + } + ] + + adapter = self._get_adapter(profile_cfg) + relation = DuckDBRelation.create(database=None, schema="test_schema", identifier="test_table") + + result = adapter.is_ducklake(relation) + + self.assertFalse(result) + + def test_is_ducklake_none_relation(self): + """Test is_ducklake returns False when relation is None.""" + adapter = self._get_adapter(self.base_profile_cfg) + + result = adapter.is_ducklake(None) + + self.assertFalse(result) + + def test_is_ducklake_attachment_missing_alias(self): + """Test is_ducklake handles attachments missing alias gracefully.""" + profile_cfg = self.base_profile_cfg.copy() + profile_cfg["outputs"]["test"]["attach"] = [ + { + "path": "ducklake:sqlite:storage/metadata.sqlite" + # Missing alias + } + ] + + adapter = self._get_adapter(profile_cfg) + relation = DuckDBRelation.create(database="test_db", schema="test_schema", identifier="test_table") + + result = adapter.is_ducklake(relation) + + self.assertFalse(result) + + def test_is_ducklake_attachment_with_empty_path(self): + """Test is_ducklake handles attachments with empty path gracefully.""" + profile_cfg = self.base_profile_cfg.copy() + profile_cfg["outputs"]["test"]["attach"] = [ + { + "alias": "test_db", + "path": "" # Empty path instead of missing path + } + ] + + adapter = self._get_adapter(profile_cfg) + relation = DuckDBRelation.create(database="test_db", schema="test_schema", identifier="test_table") + + result = adapter.is_ducklake(relation) + + self.assertFalse(result) diff --git a/tests/unit/test_get_column_schema.py b/tests/unit/test_get_column_schema.py new file mode 100644 index 00000000..c971383b --- /dev/null +++ b/tests/unit/test_get_column_schema.py @@ -0,0 +1,61 @@ +import unittest +from argparse import Namespace +from unittest import mock + +from dbt.flags import set_from_args +from dbt.adapters.duckdb import DuckDBAdapter +from tests.unit.utils import config_from_parts_or_dicts + + +class TestDuckDBAdapterGetColumnSchemaFromQuery(unittest.TestCase): + def setUp(self): + set_from_args(Namespace(STRICT_MODE=True), {}) + + profile_cfg = { + "outputs": { + "test": { + "type": "duckdb", + "path": ":memory:", + } + }, + "target": "test", + } + + project_cfg = { + "name": "X", + "version": "0.1", + "profile": "test", + "project-root": "/tmp/dbt/does-not-exist", + "quoting": { + "identifier": False, + "schema": True, + }, + "config-version": 2, + } + + self.config = config_from_parts_or_dicts(project_cfg, profile_cfg, cli_vars={}) + self.mock_mp_context = mock.MagicMock() + self._adapter = None + + @property + def adapter(self): + if self._adapter is None: + self._adapter = DuckDBAdapter(self.config, self.mock_mp_context) + return self._adapter + + def test_get_column_schema_from_query_with_struct(self): + """Test get_column_schema_from_query flattens struct columns.""" + mock_cursor = mock.MagicMock() + mock_cursor.fetchall.return_value = [ + ("id", "INTEGER"), + ("user_data", "STRUCT(name VARCHAR, age INTEGER)") + ] + + with mock.patch.object(self.adapter.connections, 'add_select_query', return_value=(None, mock_cursor)): + result = self.adapter.get_column_schema_from_query("SELECT * FROM test_table") + + # Verify result contains flattened columns (1 simple + 2 from struct) + self.assertEqual(len(result), 3) + self.assertEqual(result[0].column, "id") + self.assertEqual(result[1].column, "user_data.name") + self.assertEqual(result[2].column, "user_data.age")