diff --git a/examples/adbc_example.py b/examples/adbc_example.py new file mode 100644 index 0000000..530f9f3 --- /dev/null +++ b/examples/adbc_example.py @@ -0,0 +1,44 @@ +# Install pyarrow, adbc-driver-manager and duckdb before running this example +# This example currently can be run only with duckdb<=1.1.3, later versions of duckdb no longer support substrait in adbc +# /// script +# dependencies = [ +# "pyarrow==20.0.0", +# "adbc-driver-manager==1.5.0", +# "duckdb==1.1.3", +# "substrait[extensions] @ file:///${PROJECT_ROOT}/" +# ] +# /// + + +import adbc_driver_duckdb.dbapi +import pyarrow +from substrait.builders.plan import read_named_table, filter +from substrait.builders.extended_expression import scalar_function, column, literal +from substrait.builders.type import i64 +from substrait.extension_registry import ExtensionRegistry +import pyarrow.substrait as pa_substrait + +registry = ExtensionRegistry() + +data = pyarrow.record_batch( + [[1, 2, 3, 4], ["a", "b", "c", "d"]], + names=["ints", "strs"], +) + +def read_adbc_named_table(name: str, conn): + pa_schema = conn.adbc_get_table_schema(name) + substrait_schema = pa_substrait.serialize_schema(pa_schema).to_pysubstrait().base_schema + return read_named_table(name, substrait_schema) + +with adbc_driver_duckdb.dbapi.connect(":memory:") as conn: + with conn.cursor() as cur: + cur.adbc_ingest("AnswerToEverything", data) + + cur.executescript("INSTALL substrait;") + cur.executescript("LOAD substrait;") + + table = read_adbc_named_table("AnswerToEverything", conn) + table = filter(table, expression=scalar_function('functions_comparison.yaml', 'gte', column('ints'), literal(3, i64()))) + + cur.execute(table(registry).SerializeToString()) + print(cur.fetch_arrow_table()) \ No newline at end of file diff --git a/examples/duckdb_example.py b/examples/duckdb_example.py new file mode 100644 index 0000000..bfdde7d --- /dev/null +++ b/examples/duckdb_example.py @@ -0,0 +1,42 @@ +# Install duckdb and pyarrow before running this example +# /// script +# dependencies = [ +# "pyarrow==20.0.0", +# "duckdb==1.2.1", +# "substrait[extensions] @ file:///${PROJECT_ROOT}/" +# ] +# /// + + +import duckdb +from substrait.builders.plan import read_named_table, project, filter +from substrait.builders.extended_expression import column, scalar_function, literal +from substrait.builders.type import i32 +from substrait.extension_registry import ExtensionRegistry +from substrait.json import dump_json +import pyarrow.substrait as pa_substrait + +try: + duckdb.install_extension("substrait") +except: + duckdb.install_extension("substrait", repository="community") +duckdb.load_extension("substrait") + +duckdb.install_extension("tpch") +duckdb.load_extension("tpch") + +duckdb.sql("CALL dbgen(sf = 1);") + +registry = ExtensionRegistry(load_default_extensions=True) + +def read_duckdb_named_table(name: str, conn): + pa_schema = conn.sql(f"SELECT * FROM {name} LIMIT 0").arrow().schema + substrait_schema = pa_substrait.serialize_schema(pa_schema).to_pysubstrait().base_schema + return read_named_table(name, substrait_schema) + +table = read_duckdb_named_table("customer", duckdb) +table = filter(table, expression=scalar_function('functions_comparison.yaml', 'equal', column('c_nationkey'), literal(3, i32()))) +table = project(table, expressions=[column('c_name'), column('c_address'), column('c_nationkey')]) + +sql = f"CALL from_substrait_json('{dump_json(table(registry))}')" +print(duckdb.sql(sql))