diff --git a/Cargo.toml b/Cargo.toml index 6366f6f4..0a660dce 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,5 +1,28 @@ -[package] -name = "datafusion-table-providers" +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +[workspace] +members = [ + "core", + "python", +] +resolver = "2" + +[workspace.package] version = "0.2.3" readme = "README.md" edition = "2021" @@ -7,126 +30,8 @@ repository = "https://github.com/datafusion-contrib/datafusion-table-providers" license = "Apache-2.0" description = "Extend the capabilities of DataFusion to support additional data sources via implementations of the `TableProvider` trait." -[dependencies] -arrow-flight = { version = "53", optional = true, features = [ - "flight-sql-experimental", - "tls", -] } -arrow-odbc = { version = "14.0", optional = true } -async-stream = { version = "0.3.6", optional = true } -async-trait = "0.1" -bb8 = { version = "0.8", optional = true } -bb8-postgres = { version = "0.8", optional = true } -bigdecimal = "0.4.6" -byteorder = "1.5.0" -chrono = "0.4.38" -dashmap = "6.1.0" +[workspace.dependencies] datafusion = { version = "43", default-features = false } -datafusion-federation = { version = "0.3.1", features = [ - "sql", -], optional = true } -datafusion-proto = { version = "43", optional = true } -duckdb = { version = "1.1.1", features = [ - "bundled", - "r2d2", - "vtab", - "vtab-arrow", - "appender-arrow", -], optional = true } -dyn-clone = { version = "1.0", optional = true } -fallible-iterator = "0.3.0" -fundu = "2.0.1" -futures = "0.3" -geo-types = "0.7" -itertools = "0.13.0" -mysql_async = { version = "0.34", features = [ - "native-tls-tls", - "chrono", -], optional = true } -native-tls = { version = "0.2.12", optional = true } -num-bigint = "0.4" -odbc-api = { version = "10.0.0", optional = true } -pem = { version = "3.0.4", optional = true } -postgres-native-tls = { version = "0.5.0", optional = true } -prost = { version = "0.13", optional = true } -r2d2 = { version = "0.8.10", optional = true } -rusqlite = { version = "0.32.1", optional = true } -sea-query = { version = "0.32.0", features = [ - "backend-sqlite", - "backend-postgres", - "postgres-array", - "with-rust_decimal", - "with-bigdecimal", - "with-time", - "with-chrono", -] } -secrecy = "0.8.0" -serde = { version = "1.0", optional = true } -serde_json = "1.0" -sha2 = "0.10.8" -snafu = "0.8.5" -time = "0.3.36" -tokio = { version = "1.41", features = ["macros", "fs"] } -tokio-postgres = { version = "0.7.12", features = [ - "with-chrono-0_4", - "with-uuid-1", - "with-serde_json-1", - "with-geo-types-0_7", -], optional = true } -tokio-rusqlite = { version = "0.6.0", optional = true } -tonic = { version = "0.12", optional = true, features = [ - "tls-native-roots", - "tls-webpki-roots", -] } -tracing = "0.1.40" -trust-dns-resolver = "0.23.2" -url = "2.5.4" -uuid = { version = "1.11.0", optional = true } - -[dev-dependencies] -anyhow = "1.0" -bollard = "0.18.1" -geozero = { version = "0.14.0", features = ["with-wkb"] } -insta = { version = "1.41.1", features = ["filters"] } -prost = { version = "0.13" } -rand = "0.8.5" -reqwest = "0.12.9" -rstest = "0.23.0" -test-log = { version = "0.2.16", features = ["trace"] } -tokio-stream = { version = "0.1.16", features = ["net"] } -tracing-subscriber = { version = "0.3.18", features = ["env-filter"] } - -[features] -duckdb = [ - "dep:duckdb", - "dep:r2d2", - "dep:uuid", - "dep:dyn-clone", - "dep:async-stream", -] -duckdb-federation = ["duckdb", "federation"] -federation = ["dep:datafusion-federation"] -flight = [ - "dep:arrow-flight", - "datafusion/serde", - "dep:datafusion-proto", - "dep:serde", - "dep:tonic", -] -mysql = ["dep:mysql_async", "dep:async-stream"] -mysql-federation = ["mysql", "federation"] -odbc = ["dep:odbc-api", "dep:arrow-odbc", "dep:async-stream", "dep:dyn-clone"] -odbc-federation = ["odbc", "federation"] -postgres = [ - "dep:tokio-postgres", - "dep:uuid", - "dep:postgres-native-tls", - "dep:bb8", - "dep:bb8-postgres", - "dep:native-tls", - "dep:pem", - "dep:async-stream", -] -postgres-federation = ["postgres", "federation"] -sqlite = ["dep:rusqlite", "dep:tokio-rusqlite"] -sqlite-federation = ["sqlite", "federation"] +datafusion-ffi = { version = "43" } +datafusion-proto = { version = "43" } +datafusion-table-providers = { path = "core" } diff --git a/core/Cargo.toml b/core/Cargo.toml new file mode 100644 index 00000000..c661dee3 --- /dev/null +++ b/core/Cargo.toml @@ -0,0 +1,132 @@ +[package] +name = "datafusion-table-providers" +version = { workspace = true } +readme = { workspace = true } +edition = { workspace = true } +repository = { workspace = true } +license = { workspace = true } +description = { workspace = true } + +[dependencies] +arrow-flight = { version = "53", optional = true, features = [ + "flight-sql-experimental", + "tls", +] } +arrow-odbc = { version = "14.0", optional = true } +async-stream = { version = "0.3.6", optional = true } +async-trait = "0.1" +bb8 = { version = "0.8", optional = true } +bb8-postgres = { version = "0.8", optional = true } +bigdecimal = "0.4.6" +byteorder = "1.5.0" +chrono = "0.4.38" +dashmap = "6.1.0" +datafusion = { workspace = true } +datafusion-federation = { version = "0.3.1", features = [ + "sql", +], optional = true } +datafusion-proto = { workspace = true, optional = true } +duckdb = { version = "1.1.1", features = [ + "bundled", + "r2d2", + "vtab", + "vtab-arrow", + "appender-arrow", +], optional = true } +dyn-clone = { version = "1.0", optional = true } +fallible-iterator = "0.3.0" +fundu = "2.0.1" +futures = "0.3" +geo-types = "0.7" +itertools = "0.13.0" +mysql_async = { version = "0.34", features = [ + "native-tls-tls", + "chrono", +], optional = true } +native-tls = { version = "0.2.12", optional = true } +num-bigint = "0.4" +odbc-api = { version = "10.0.0", optional = true } +pem = { version = "3.0.4", optional = true } +postgres-native-tls = { version = "0.5.0", optional = true } +prost = { version = "0.13", optional = true } +r2d2 = { version = "0.8.10", optional = true } +rusqlite = { version = "0.32.1", optional = true } +sea-query = { version = "0.32.0", features = [ + "backend-sqlite", + "backend-postgres", + "postgres-array", + "with-rust_decimal", + "with-bigdecimal", + "with-time", + "with-chrono", +] } +secrecy = "0.8.0" +serde = { version = "1.0", optional = true } +serde_json = "1.0" +sha2 = "0.10.8" +snafu = "0.8.5" +time = "0.3.36" +tokio = { version = "1.41", features = ["macros", "fs"] } +tokio-postgres = { version = "0.7.12", features = [ + "with-chrono-0_4", + "with-uuid-1", + "with-serde_json-1", + "with-geo-types-0_7", +], optional = true } +tokio-rusqlite = { version = "0.6.0", optional = true } +tonic = { version = "0.12", optional = true, features = [ + "tls-native-roots", + "tls-webpki-roots", +] } +tracing = "0.1.40" +trust-dns-resolver = "0.23.2" +url = "2.5.4" +uuid = { version = "1.11.0", optional = true } + +[dev-dependencies] +anyhow = "1.0" +bollard = "0.18.1" +geozero = { version = "0.14.0", features = ["with-wkb"] } +insta = { version = "1.41.1", features = ["filters"] } +prost = { version = "0.13" } +rand = "0.8.5" +reqwest = "0.12.9" +rstest = "0.23.0" +test-log = { version = "0.2.16", features = ["trace"] } +tokio-stream = { version = "0.1.16", features = ["net"] } +tracing-subscriber = { version = "0.3.18", features = ["env-filter"] } + +[features] +duckdb = [ + "dep:duckdb", + "dep:r2d2", + "dep:uuid", + "dep:dyn-clone", + "dep:async-stream", +] +duckdb-federation = ["duckdb", "federation"] +federation = ["dep:datafusion-federation"] +flight = [ + "dep:arrow-flight", + "datafusion/serde", + "dep:datafusion-proto", + "dep:serde", + "dep:tonic", +] +mysql = ["dep:mysql_async", "dep:async-stream"] +mysql-federation = ["mysql", "federation"] +odbc = ["dep:odbc-api", "dep:arrow-odbc", "dep:async-stream", "dep:dyn-clone"] +odbc-federation = ["odbc", "federation"] +postgres = [ + "dep:tokio-postgres", + "dep:uuid", + "dep:postgres-native-tls", + "dep:bb8", + "dep:bb8-postgres", + "dep:native-tls", + "dep:pem", + "dep:async-stream", +] +postgres-federation = ["postgres", "federation"] +sqlite = ["dep:rusqlite", "dep:tokio-rusqlite"] +sqlite-federation = ["sqlite", "federation"] diff --git a/examples/duckdb.rs b/core/examples/duckdb.rs similarity index 100% rename from examples/duckdb.rs rename to core/examples/duckdb.rs diff --git a/examples/duckdb_example.db b/core/examples/duckdb_example.db similarity index 100% rename from examples/duckdb_example.db rename to core/examples/duckdb_example.db diff --git a/examples/duckdb_external_table.rs b/core/examples/duckdb_external_table.rs similarity index 100% rename from examples/duckdb_external_table.rs rename to core/examples/duckdb_external_table.rs diff --git a/examples/duckdb_function.rs b/core/examples/duckdb_function.rs similarity index 100% rename from examples/duckdb_function.rs rename to core/examples/duckdb_function.rs diff --git a/examples/flight-sql.rs b/core/examples/flight-sql.rs similarity index 100% rename from examples/flight-sql.rs rename to core/examples/flight-sql.rs diff --git a/examples/mysql.rs b/core/examples/mysql.rs similarity index 100% rename from examples/mysql.rs rename to core/examples/mysql.rs diff --git a/examples/odbc_sqlite.rs b/core/examples/odbc_sqlite.rs similarity index 100% rename from examples/odbc_sqlite.rs rename to core/examples/odbc_sqlite.rs diff --git a/examples/postgres.rs b/core/examples/postgres.rs similarity index 100% rename from examples/postgres.rs rename to core/examples/postgres.rs diff --git a/examples/sqlite.rs b/core/examples/sqlite.rs similarity index 100% rename from examples/sqlite.rs rename to core/examples/sqlite.rs diff --git a/examples/sqlite_example.db b/core/examples/sqlite_example.db similarity index 100% rename from examples/sqlite_example.db rename to core/examples/sqlite_example.db diff --git a/src/common.rs b/core/src/common.rs similarity index 100% rename from src/common.rs rename to core/src/common.rs diff --git a/src/duckdb.rs b/core/src/duckdb.rs similarity index 100% rename from src/duckdb.rs rename to core/src/duckdb.rs diff --git a/src/duckdb/creator.rs b/core/src/duckdb/creator.rs similarity index 100% rename from src/duckdb/creator.rs rename to core/src/duckdb/creator.rs diff --git a/src/duckdb/federation.rs b/core/src/duckdb/federation.rs similarity index 100% rename from src/duckdb/federation.rs rename to core/src/duckdb/federation.rs diff --git a/src/duckdb/sql_table.rs b/core/src/duckdb/sql_table.rs similarity index 100% rename from src/duckdb/sql_table.rs rename to core/src/duckdb/sql_table.rs diff --git a/src/duckdb/write.rs b/core/src/duckdb/write.rs similarity index 100% rename from src/duckdb/write.rs rename to core/src/duckdb/write.rs diff --git a/src/flight.rs b/core/src/flight.rs similarity index 100% rename from src/flight.rs rename to core/src/flight.rs diff --git a/src/flight/codec.rs b/core/src/flight/codec.rs similarity index 100% rename from src/flight/codec.rs rename to core/src/flight/codec.rs diff --git a/src/flight/exec.rs b/core/src/flight/exec.rs similarity index 100% rename from src/flight/exec.rs rename to core/src/flight/exec.rs diff --git a/src/flight/sql.rs b/core/src/flight/sql.rs similarity index 100% rename from src/flight/sql.rs rename to core/src/flight/sql.rs diff --git a/src/lib.rs b/core/src/lib.rs similarity index 100% rename from src/lib.rs rename to core/src/lib.rs diff --git a/src/mysql.rs b/core/src/mysql.rs similarity index 100% rename from src/mysql.rs rename to core/src/mysql.rs diff --git a/src/mysql/write.rs b/core/src/mysql/write.rs similarity index 100% rename from src/mysql/write.rs rename to core/src/mysql/write.rs diff --git a/src/odbc.rs b/core/src/odbc.rs similarity index 100% rename from src/odbc.rs rename to core/src/odbc.rs diff --git a/src/postgres.rs b/core/src/postgres.rs similarity index 100% rename from src/postgres.rs rename to core/src/postgres.rs diff --git a/src/postgres/write.rs b/core/src/postgres/write.rs similarity index 100% rename from src/postgres/write.rs rename to core/src/postgres/write.rs diff --git a/src/sql/arrow_sql_gen/arrow.rs b/core/src/sql/arrow_sql_gen/arrow.rs similarity index 100% rename from src/sql/arrow_sql_gen/arrow.rs rename to core/src/sql/arrow_sql_gen/arrow.rs diff --git a/src/sql/arrow_sql_gen/mod.rs b/core/src/sql/arrow_sql_gen/mod.rs similarity index 100% rename from src/sql/arrow_sql_gen/mod.rs rename to core/src/sql/arrow_sql_gen/mod.rs diff --git a/src/sql/arrow_sql_gen/mysql.rs b/core/src/sql/arrow_sql_gen/mysql.rs similarity index 100% rename from src/sql/arrow_sql_gen/mysql.rs rename to core/src/sql/arrow_sql_gen/mysql.rs diff --git a/src/sql/arrow_sql_gen/postgres.rs b/core/src/sql/arrow_sql_gen/postgres.rs similarity index 100% rename from src/sql/arrow_sql_gen/postgres.rs rename to core/src/sql/arrow_sql_gen/postgres.rs diff --git a/src/sql/arrow_sql_gen/postgres/builder.rs b/core/src/sql/arrow_sql_gen/postgres/builder.rs similarity index 100% rename from src/sql/arrow_sql_gen/postgres/builder.rs rename to core/src/sql/arrow_sql_gen/postgres/builder.rs diff --git a/src/sql/arrow_sql_gen/postgres/composite.rs b/core/src/sql/arrow_sql_gen/postgres/composite.rs similarity index 100% rename from src/sql/arrow_sql_gen/postgres/composite.rs rename to core/src/sql/arrow_sql_gen/postgres/composite.rs diff --git a/src/sql/arrow_sql_gen/postgres/schema.rs b/core/src/sql/arrow_sql_gen/postgres/schema.rs similarity index 100% rename from src/sql/arrow_sql_gen/postgres/schema.rs rename to core/src/sql/arrow_sql_gen/postgres/schema.rs diff --git a/src/sql/arrow_sql_gen/sqlite.rs b/core/src/sql/arrow_sql_gen/sqlite.rs similarity index 100% rename from src/sql/arrow_sql_gen/sqlite.rs rename to core/src/sql/arrow_sql_gen/sqlite.rs diff --git a/src/sql/arrow_sql_gen/statement.rs b/core/src/sql/arrow_sql_gen/statement.rs similarity index 100% rename from src/sql/arrow_sql_gen/statement.rs rename to core/src/sql/arrow_sql_gen/statement.rs diff --git a/src/sql/db_connection_pool/dbconnection.rs b/core/src/sql/db_connection_pool/dbconnection.rs similarity index 100% rename from src/sql/db_connection_pool/dbconnection.rs rename to core/src/sql/db_connection_pool/dbconnection.rs diff --git a/src/sql/db_connection_pool/dbconnection/duckdbconn.rs b/core/src/sql/db_connection_pool/dbconnection/duckdbconn.rs similarity index 100% rename from src/sql/db_connection_pool/dbconnection/duckdbconn.rs rename to core/src/sql/db_connection_pool/dbconnection/duckdbconn.rs diff --git a/src/sql/db_connection_pool/dbconnection/mysqlconn.rs b/core/src/sql/db_connection_pool/dbconnection/mysqlconn.rs similarity index 100% rename from src/sql/db_connection_pool/dbconnection/mysqlconn.rs rename to core/src/sql/db_connection_pool/dbconnection/mysqlconn.rs diff --git a/src/sql/db_connection_pool/dbconnection/odbcconn.rs b/core/src/sql/db_connection_pool/dbconnection/odbcconn.rs similarity index 100% rename from src/sql/db_connection_pool/dbconnection/odbcconn.rs rename to core/src/sql/db_connection_pool/dbconnection/odbcconn.rs diff --git a/src/sql/db_connection_pool/dbconnection/postgresconn.rs b/core/src/sql/db_connection_pool/dbconnection/postgresconn.rs similarity index 100% rename from src/sql/db_connection_pool/dbconnection/postgresconn.rs rename to core/src/sql/db_connection_pool/dbconnection/postgresconn.rs diff --git a/src/sql/db_connection_pool/dbconnection/sqliteconn.rs b/core/src/sql/db_connection_pool/dbconnection/sqliteconn.rs similarity index 100% rename from src/sql/db_connection_pool/dbconnection/sqliteconn.rs rename to core/src/sql/db_connection_pool/dbconnection/sqliteconn.rs diff --git a/src/sql/db_connection_pool/duckdbpool.rs b/core/src/sql/db_connection_pool/duckdbpool.rs similarity index 100% rename from src/sql/db_connection_pool/duckdbpool.rs rename to core/src/sql/db_connection_pool/duckdbpool.rs diff --git a/src/sql/db_connection_pool/mod.rs b/core/src/sql/db_connection_pool/mod.rs similarity index 100% rename from src/sql/db_connection_pool/mod.rs rename to core/src/sql/db_connection_pool/mod.rs diff --git a/src/sql/db_connection_pool/mysqlpool.rs b/core/src/sql/db_connection_pool/mysqlpool.rs similarity index 100% rename from src/sql/db_connection_pool/mysqlpool.rs rename to core/src/sql/db_connection_pool/mysqlpool.rs diff --git a/src/sql/db_connection_pool/odbcpool.rs b/core/src/sql/db_connection_pool/odbcpool.rs similarity index 100% rename from src/sql/db_connection_pool/odbcpool.rs rename to core/src/sql/db_connection_pool/odbcpool.rs diff --git a/src/sql/db_connection_pool/postgrespool.rs b/core/src/sql/db_connection_pool/postgrespool.rs similarity index 100% rename from src/sql/db_connection_pool/postgrespool.rs rename to core/src/sql/db_connection_pool/postgrespool.rs diff --git a/src/sql/db_connection_pool/sqlitepool.rs b/core/src/sql/db_connection_pool/sqlitepool.rs similarity index 100% rename from src/sql/db_connection_pool/sqlitepool.rs rename to core/src/sql/db_connection_pool/sqlitepool.rs diff --git a/src/sql/mod.rs b/core/src/sql/mod.rs similarity index 100% rename from src/sql/mod.rs rename to core/src/sql/mod.rs diff --git a/src/sql/sql_provider_datafusion/federation.rs b/core/src/sql/sql_provider_datafusion/federation.rs similarity index 100% rename from src/sql/sql_provider_datafusion/federation.rs rename to core/src/sql/sql_provider_datafusion/federation.rs diff --git a/src/sql/sql_provider_datafusion/mod.rs b/core/src/sql/sql_provider_datafusion/mod.rs similarity index 100% rename from src/sql/sql_provider_datafusion/mod.rs rename to core/src/sql/sql_provider_datafusion/mod.rs diff --git a/src/sqlite.rs b/core/src/sqlite.rs similarity index 100% rename from src/sqlite.rs rename to core/src/sqlite.rs diff --git a/src/sqlite/federation.rs b/core/src/sqlite/federation.rs similarity index 100% rename from src/sqlite/federation.rs rename to core/src/sqlite/federation.rs diff --git a/src/sqlite/sql_table.rs b/core/src/sqlite/sql_table.rs similarity index 100% rename from src/sqlite/sql_table.rs rename to core/src/sqlite/sql_table.rs diff --git a/src/sqlite/sqlite_interval.rs b/core/src/sqlite/sqlite_interval.rs similarity index 100% rename from src/sqlite/sqlite_interval.rs rename to core/src/sqlite/sqlite_interval.rs diff --git a/src/sqlite/write.rs b/core/src/sqlite/write.rs similarity index 100% rename from src/sqlite/write.rs rename to core/src/sqlite/write.rs diff --git a/src/util/column_reference.rs b/core/src/util/column_reference.rs similarity index 100% rename from src/util/column_reference.rs rename to core/src/util/column_reference.rs diff --git a/src/util/constraints.rs b/core/src/util/constraints.rs similarity index 100% rename from src/util/constraints.rs rename to core/src/util/constraints.rs diff --git a/src/util/indexes.rs b/core/src/util/indexes.rs similarity index 100% rename from src/util/indexes.rs rename to core/src/util/indexes.rs diff --git a/src/util/mod.rs b/core/src/util/mod.rs similarity index 100% rename from src/util/mod.rs rename to core/src/util/mod.rs diff --git a/src/util/ns_lookup.rs b/core/src/util/ns_lookup.rs similarity index 100% rename from src/util/ns_lookup.rs rename to core/src/util/ns_lookup.rs diff --git a/src/util/on_conflict.rs b/core/src/util/on_conflict.rs similarity index 100% rename from src/util/on_conflict.rs rename to core/src/util/on_conflict.rs diff --git a/src/util/retriable_error.rs b/core/src/util/retriable_error.rs similarity index 100% rename from src/util/retriable_error.rs rename to core/src/util/retriable_error.rs diff --git a/src/util/secrets.rs b/core/src/util/secrets.rs similarity index 100% rename from src/util/secrets.rs rename to core/src/util/secrets.rs diff --git a/src/util/test.rs b/core/src/util/test.rs similarity index 100% rename from src/util/test.rs rename to core/src/util/test.rs diff --git a/tests/arrow_record_batch_gen/mod.rs b/core/tests/arrow_record_batch_gen/mod.rs similarity index 100% rename from tests/arrow_record_batch_gen/mod.rs rename to core/tests/arrow_record_batch_gen/mod.rs diff --git a/tests/docker/mod.rs b/core/tests/docker/mod.rs similarity index 100% rename from tests/docker/mod.rs rename to core/tests/docker/mod.rs diff --git a/tests/duckdb/mod.rs b/core/tests/duckdb/mod.rs similarity index 100% rename from tests/duckdb/mod.rs rename to core/tests/duckdb/mod.rs diff --git a/tests/flight/mod.rs b/core/tests/flight/mod.rs similarity index 100% rename from tests/flight/mod.rs rename to core/tests/flight/mod.rs diff --git a/tests/integration.rs b/core/tests/integration.rs similarity index 100% rename from tests/integration.rs rename to core/tests/integration.rs diff --git a/tests/mysql/common.rs b/core/tests/mysql/common.rs similarity index 100% rename from tests/mysql/common.rs rename to core/tests/mysql/common.rs diff --git a/tests/mysql/mod.rs b/core/tests/mysql/mod.rs similarity index 100% rename from tests/mysql/mod.rs rename to core/tests/mysql/mod.rs diff --git a/tests/postgres/common.rs b/core/tests/postgres/common.rs similarity index 100% rename from tests/postgres/common.rs rename to core/tests/postgres/common.rs diff --git a/tests/postgres/mod.rs b/core/tests/postgres/mod.rs similarity index 100% rename from tests/postgres/mod.rs rename to core/tests/postgres/mod.rs diff --git a/tests/postgres/schema.rs b/core/tests/postgres/schema.rs similarity index 100% rename from tests/postgres/schema.rs rename to core/tests/postgres/schema.rs diff --git a/tests/postgres/scripts/complex_table.sql b/core/tests/postgres/scripts/complex_table.sql similarity index 100% rename from tests/postgres/scripts/complex_table.sql rename to core/tests/postgres/scripts/complex_table.sql diff --git a/tests/postgres/snapshots/integration__postgres__schema__postgres_schema_inference_complex_types.snap b/core/tests/postgres/snapshots/integration__postgres__schema__postgres_schema_inference_complex_types.snap similarity index 100% rename from tests/postgres/snapshots/integration__postgres__schema__postgres_schema_inference_complex_types.snap rename to core/tests/postgres/snapshots/integration__postgres__schema__postgres_schema_inference_complex_types.snap diff --git a/tests/sqlite/mod.rs b/core/tests/sqlite/mod.rs similarity index 100% rename from tests/sqlite/mod.rs rename to core/tests/sqlite/mod.rs diff --git a/python/.cargo/config.toml b/python/.cargo/config.toml new file mode 100644 index 00000000..91a099a6 --- /dev/null +++ b/python/.cargo/config.toml @@ -0,0 +1,12 @@ +[target.x86_64-apple-darwin] +rustflags = [ + "-C", "link-arg=-undefined", + "-C", "link-arg=dynamic_lookup", +] + +[target.aarch64-apple-darwin] +rustflags = [ + "-C", "link-arg=-undefined", + "-C", "link-arg=dynamic_lookup", +] + diff --git a/python/.gitignore b/python/.gitignore new file mode 100644 index 00000000..9ea5de3e --- /dev/null +++ b/python/.gitignore @@ -0,0 +1,26 @@ +/venv +.idea +.DS_Store +.vscode + +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# Python dist ignore +dist + +# C extensions +*.so + +# Python dist +dist + +# pyenv +# For a library or package, you might want to ignore these files since the code is +# intended to run in multiple environments; otherwise, check them in: +.python-version +venv +.venv + diff --git a/python/Cargo.toml b/python/Cargo.toml new file mode 100644 index 00000000..97fae5cc --- /dev/null +++ b/python/Cargo.toml @@ -0,0 +1,21 @@ +[package] +name = "datafusion-table-providers-python" +version = { workspace = true } +readme = { workspace = true } +edition = { workspace = true } +repository = { workspace = true } +license = { workspace = true } +description = { workspace = true } + +[lib] +name = "datafusion_table_providers" +crate-type = ["cdylib"] +doc = false + +[dependencies] +arrow = { version = "53", features = ["pyarrow"] } +datafusion = { workspace = true, features = ["pyarrow"] } +datafusion-ffi = { workspace = true } +datafusion-table-providers = { workspace = true, features = ["sqlite"] } +pyo3 = { version = "0.22", features = ["extension-module", "abi3", "abi3-py39"] } +tokio = { version = "1.42", features = ["macros", "rt", "rt-multi-thread", "sync"] } diff --git a/python/examples/sqlite_demo.py b/python/examples/sqlite_demo.py new file mode 100644 index 00000000..fd1b376b --- /dev/null +++ b/python/examples/sqlite_demo.py @@ -0,0 +1,11 @@ +from datafusion import SessionContext +from datafusion_table_providers import sqlite + +ctx = SessionContext() +pool = sqlite.SqliteTableFactory("../../core/examples/sqlite_example.db", "file", 3.0, None) +tables = pool.tables() + +for t in tables: + ctx.register_table_provider(t, pool.get_table(t)) + print("Checking table:", t) + ctx.table(t).show() diff --git a/python/pyproject.toml b/python/pyproject.toml new file mode 100644 index 00000000..c721341a --- /dev/null +++ b/python/pyproject.toml @@ -0,0 +1,59 @@ +[build-system] +requires = ["maturin>=1.5.1,<1.6.0"] +build-backend = "maturin" + +[project] +name = "datafusion_table_providers" +description = "Build and run queries against data" +readme = "../README.md" +license = { file = "../LICENSE" } +requires-python = ">=3.9" +keywords = ["datafusion", "dataframe", "rust", "query-engine"] +classifier = [ + "Development Status :: 2 - Pre-Alpha", + "Intended Audience :: Developers", + "License :: OSI Approved :: Apache Software License", + "License :: OSI Approved", + "Operating System :: MacOS", + "Operating System :: Microsoft :: Windows", + "Operating System :: POSIX :: Linux", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python", + "Programming Language :: Rust", +] +# TODO update this to 43.1.0 as soon as release is complete +dependencies = ["datafusion>=42.0.0"] + +[project.urls] +repository = "https://github.com/datafusion-contrib/datafusion-table-providers" + +[tool.isort] +profile = "black" + +[tool.maturin] +python-source = "python" +module-name = "datafusion_table_providers._internal" +include = [{ path = "../Cargo.lock", format = "sdist" }] +exclude = [".github/**", "ci/**", ".asf.yaml"] +# Require Cargo.lock is up to date +locked = true + +# Enable docstring linting using the google style guide +[tool.ruff.lint] +select = ["E4", "E7", "E9", "F", "D", "W"] + +[tool.ruff.lint.pydocstyle] +convention = "google" + +[tool.ruff.lint.pycodestyle] +max-doc-length = 88 + +# Disable docstring checking for these directories +[tool.ruff.lint.per-file-ignores] +"python/tests/*" = ["D"] +"examples/*" = ["D", "W505"] +"dev/*" = ["D"] +"benchmarks/*" = ["D", "F"] +"docs/*" = ["D"] diff --git a/python/python/datafusion_table_providers/__init__.py b/python/python/datafusion_table_providers/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/python/python/datafusion_table_providers/sqlite.py b/python/python/datafusion_table_providers/sqlite.py new file mode 100644 index 00000000..252bcf2d --- /dev/null +++ b/python/python/datafusion_table_providers/sqlite.py @@ -0,0 +1,19 @@ + +from typing import Any, List, Optional +from . import _internal + +class SqliteTableFactory: + + def __init__(self, path: str, mode: str, busy_timeout_s: float, attach_databases: Optional[List[str]] = None) -> None: + """Create a new :py:class:`SessionConfig` with the given configuration options. + + Args: + config_options: Configuration options. + """ + self._raw = _internal.sqlite.RawSqliteTableFactory(path, mode, busy_timeout_s, attach_databases) + + def tables(self) -> List[str]: + return self._raw.tables() + + def get_table(self, table_reference: str) -> Any: + return self._raw.get_table(table_reference) diff --git a/python/src/duckdb.rs b/python/src/duckdb.rs new file mode 100644 index 00000000..8b137891 --- /dev/null +++ b/python/src/duckdb.rs @@ -0,0 +1 @@ + diff --git a/python/src/flight.rs b/python/src/flight.rs new file mode 100644 index 00000000..8b137891 --- /dev/null +++ b/python/src/flight.rs @@ -0,0 +1 @@ + diff --git a/python/src/lib.rs b/python/src/lib.rs new file mode 100644 index 00000000..a5f25538 --- /dev/null +++ b/python/src/lib.rs @@ -0,0 +1,46 @@ +use std::{ffi::CString, sync::Arc}; + +use datafusion::catalog::TableProvider; +use datafusion_ffi::table_provider::FFI_TableProvider; +use pyo3::{prelude::*, types::PyCapsule}; + +#[pyclass(module = "datafusion_table_providers._internal")] +struct RawTableProvider { + pub(crate) table: Arc, + pub(crate) supports_pushdown_filters: bool, +} + +#[pymethods] +impl RawTableProvider { + fn __datafusion_table_provider__<'py>( + &self, + py: Python<'py>, + ) -> PyResult> { + let name = CString::new("datafusion_table_provider").unwrap(); + + let provider = + FFI_TableProvider::new(Arc::clone(&self.table), self.supports_pushdown_filters); + + PyCapsule::new_bound(py, provider, Some(name.clone())) + } +} + +pub mod duckdb; +pub mod flight; +pub mod mysql; +pub mod odbc; +pub mod postgres; +pub mod sqlite; +pub mod utils; + +#[pymodule] +// module name need to match project name +fn _internal(py: Python, m: &Bound<'_, PyModule>) -> PyResult<()> { + m.add_class::()?; + + let sqlite = PyModule::new_bound(py, "sqlite")?; + sqlite::init_module(&sqlite)?; + m.add_submodule(&sqlite)?; + + Ok(()) +} diff --git a/python/src/mysql.rs b/python/src/mysql.rs new file mode 100644 index 00000000..8b137891 --- /dev/null +++ b/python/src/mysql.rs @@ -0,0 +1 @@ + diff --git a/python/src/odbc.rs b/python/src/odbc.rs new file mode 100644 index 00000000..8b137891 --- /dev/null +++ b/python/src/odbc.rs @@ -0,0 +1 @@ + diff --git a/python/src/postgres.rs b/python/src/postgres.rs new file mode 100644 index 00000000..8b137891 --- /dev/null +++ b/python/src/postgres.rs @@ -0,0 +1 @@ + diff --git a/python/src/sqlite.rs b/python/src/sqlite.rs new file mode 100644 index 00000000..3da272d2 --- /dev/null +++ b/python/src/sqlite.rs @@ -0,0 +1,81 @@ +use std::{sync::Arc, time::Duration}; + +use datafusion_table_providers::{ + sql::db_connection_pool::{ + sqlitepool::{SqliteConnectionPool, SqliteConnectionPoolFactory}, + DbConnectionPool, + }, + sqlite::SqliteTableFactory, +}; +use pyo3::prelude::*; + +use crate::{ + utils::{to_pyerr, wait_for_future}, + RawTableProvider, +}; + +#[pyclass(module = "datafusion_table_providers._internal.sqlite")] +struct RawSqliteTableFactory { + pool: Arc, + factory: SqliteTableFactory, +} + +#[pymethods] +impl RawSqliteTableFactory { + #[new] + #[pyo3(signature = (path, mode, busy_timeout_s, attach_databases = None))] + pub fn new( + py: Python, + path: &str, + mode: String, + busy_timeout_s: f64, + attach_databases: Option>, + ) -> PyResult { + let mode = mode.as_str().into(); + let busy_timeout = Duration::from_secs_f64(busy_timeout_s); + let attach_databases = + attach_databases.map(|d| d.into_iter().map(Arc::from).collect()); + let factory = SqliteConnectionPoolFactory::new(path, mode, busy_timeout) + .with_databases(attach_databases); + let pool = Arc::new(wait_for_future(py, factory.build()).map_err(to_pyerr)?); + + Ok(Self { + factory: SqliteTableFactory::new(Arc::clone(&pool)), + pool, + }) + } + + pub fn tables(&self, py: Python) -> PyResult> { + wait_for_future(py, async { + let conn = self.pool.connect().await.map_err(to_pyerr)?; + let conn_async = conn.as_async().ok_or(to_pyerr( + "Unable to create connection to sqlite db".to_string(), + ))?; + let schemas = conn_async.schemas().await.map_err(to_pyerr)?; + + let mut tables = Vec::default(); + for schema in schemas { + let schema_tables = conn_async.tables(&schema).await.map_err(to_pyerr)?; + tables.extend(schema_tables); + } + + Ok(tables) + }) + } + + pub fn get_table(&self, py: Python, table_reference: &str) -> PyResult { + let table = wait_for_future(py, self.factory.table_provider(table_reference.into())) + .map_err(to_pyerr)?; + + Ok(RawTableProvider { + table, + supports_pushdown_filters: true, + }) + } +} + +pub(crate) fn init_module(m: &Bound<'_, PyModule>) -> PyResult<()> { + m.add_class::()?; + + Ok(()) +} diff --git a/python/src/utils.rs b/python/src/utils.rs new file mode 100644 index 00000000..48c6893a --- /dev/null +++ b/python/src/utils.rs @@ -0,0 +1,24 @@ +use pyo3::{exceptions::PyException, prelude::*}; +use std::{future::Future, sync::OnceLock}; + +pub(crate) struct TokioRuntime(tokio::runtime::Runtime); + +#[inline] +pub(crate) fn get_tokio_runtime() -> &'static TokioRuntime { + static RUNTIME: OnceLock = OnceLock::new(); + RUNTIME.get_or_init(|| TokioRuntime(tokio::runtime::Runtime::new().unwrap())) +} + +/// Utility to collect rust futures with GIL released +pub fn wait_for_future(py: Python, f: F) -> F::Output +where + F: Future + Send, + F::Output: Send, +{ + let runtime: &tokio::runtime::Runtime = &get_tokio_runtime().0; + py.allow_threads(|| runtime.block_on(f)) +} + +pub fn to_pyerr(err: T) -> PyErr { + PyException::new_err(err.to_string()) +}