From eefb5c4204e7f4f2d56288153cd6db98c59bf09c Mon Sep 17 00:00:00 2001 From: tokoko Date: Sat, 3 May 2025 10:34:21 +0000 Subject: [PATCH 1/4] chore: add adbc, duckdb examples --- examples/adbc_example.py | 38 ++++++++++++++++++++++++++++++++++++++ examples/duckdb_example.py | 36 ++++++++++++++++++++++++++++++++++++ 2 files changed, 74 insertions(+) create mode 100644 examples/adbc_example.py create mode 100644 examples/duckdb_example.py diff --git a/examples/adbc_example.py b/examples/adbc_example.py new file mode 100644 index 0000000..b36810e --- /dev/null +++ b/examples/adbc_example.py @@ -0,0 +1,38 @@ +""" +Install pyarrow, adbc_driver_duckdb and duckdb before running this example +This example currently can be run only with duckdb<=1.1.3, later vesions of duckdb no longer support substrait in adbc +""" + + +import adbc_driver_duckdb.dbapi +import pyarrow +from substrait.builders.plan import read_named_table, filter +from substrait.builders.extended_expression import scalar_function, column, literal +from substrait.builders.type import i64 +from substrait.extension_registry import ExtensionRegistry +import pyarrow.substrait as pa_substrait + +registry = ExtensionRegistry() + +data = pyarrow.record_batch( + [[1, 2, 3, 4], ["a", "b", "c", "d"]], + names=["ints", "strs"], +) + +def read_adbc_named_table(name: str, conn): + pa_schema = conn.adbc_get_table_schema(name) + substrait_schema = pa_substrait.serialize_schema(pa_schema).to_pysubstrait().base_schema + return read_named_table(name, substrait_schema) + +with adbc_driver_duckdb.dbapi.connect(":memory:") as conn: + with conn.cursor() as cur: + cur.adbc_ingest("AnswerToEverything", data) + + cur.executescript("INSTALL substrait;") + cur.executescript("LOAD substrait;") + + table = read_adbc_named_table("AnswerToEverything", conn) + table = filter(table, expression=scalar_function('functions_comparison.yaml', 'gte', column('ints'), literal(3, i64()))) + + cur.execute(table(registry).SerializeToString()) + print(cur.fetch_arrow_table()) \ No newline at end of file diff --git a/examples/duckdb_example.py b/examples/duckdb_example.py new file mode 100644 index 0000000..c593684 --- /dev/null +++ b/examples/duckdb_example.py @@ -0,0 +1,36 @@ +""" +Install duckdb and pyarrow before running this example +""" + +import duckdb +from substrait.builders.plan import read_named_table, project, filter +from substrait.builders.extended_expression import column, scalar_function, literal +from substrait.builders.type import i32 +from substrait.extension_registry import ExtensionRegistry +from substrait.json import dump_json +import pyarrow.substrait as pa_substrait + +try: + duckdb.install_extension("substrait") +except: + duckdb.install_extension("substrait", repository="community") +duckdb.load_extension("substrait") + +duckdb.install_extension("tpch") +duckdb.load_extension("tpch") + +duckdb.sql("CALL dbgen(sf = 1);") + +registry = ExtensionRegistry(load_default_extensions=True) + +def read_duckdb_named_table(name: str, conn): + pa_schema = conn.sql(f"SELECT * FROM {name} LIMIT 0").arrow().schema + substrait_schema = pa_substrait.serialize_schema(pa_schema).to_pysubstrait().base_schema + return read_named_table(name, substrait_schema) + +table = read_duckdb_named_table("customer", duckdb) +table = filter(table, expression=scalar_function('functions_comparison.yaml', 'equal', column('c_nationkey'), literal(3, i32()))) +table = project(table, expressions=[column('c_name'), column('c_address'), column('c_nationkey')]) + +sql = f"CALL from_substrait_json('{dump_json(table(registry))}')" +print(duckdb.sql(sql)) From a9a52df7c3f3e213f2ac0c106a6dd8c0e2bbe7e1 Mon Sep 17 00:00:00 2001 From: tokoko Date: Sat, 3 May 2025 10:41:42 +0000 Subject: [PATCH 2/4] chore: add adbc, duckdb examples --- examples/adbc_example.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/examples/adbc_example.py b/examples/adbc_example.py index b36810e..0e4d17e 100644 --- a/examples/adbc_example.py +++ b/examples/adbc_example.py @@ -1,9 +1,8 @@ """ -Install pyarrow, adbc_driver_duckdb and duckdb before running this example +Install pyarrow, adbc-driver-manager and duckdb before running this example This example currently can be run only with duckdb<=1.1.3, later vesions of duckdb no longer support substrait in adbc """ - import adbc_driver_duckdb.dbapi import pyarrow from substrait.builders.plan import read_named_table, filter From 86d828a92ac4d2354a49081b45303e65d0b7c64e Mon Sep 17 00:00:00 2001 From: tokoko Date: Sat, 3 May 2025 17:34:10 +0000 Subject: [PATCH 3/4] chore: add dependency versions to examples --- examples/adbc_example.py | 15 +++++++++++---- examples/duckdb_example.py | 12 +++++++++--- 2 files changed, 20 insertions(+), 7 deletions(-) diff --git a/examples/adbc_example.py b/examples/adbc_example.py index 0e4d17e..3480bb1 100644 --- a/examples/adbc_example.py +++ b/examples/adbc_example.py @@ -1,7 +1,14 @@ -""" -Install pyarrow, adbc-driver-manager and duckdb before running this example -This example currently can be run only with duckdb<=1.1.3, later vesions of duckdb no longer support substrait in adbc -""" +# Install pyarrow, adbc-driver-manager and duckdb before running this example +# This example currently can be run only with duckdb<=1.1.3, later vesions of duckdb no longer support substrait in adbc +# /// script +# dependencies = [ +# "pyarrow==20.0.0", +# "adbc-driver-manager==1.5.0", +# "duckdb==1.1.3", +# "substrait[extensions] @ file:///${PROJECT_ROOT}/" +# ] +# /// + import adbc_driver_duckdb.dbapi import pyarrow diff --git a/examples/duckdb_example.py b/examples/duckdb_example.py index c593684..bfdde7d 100644 --- a/examples/duckdb_example.py +++ b/examples/duckdb_example.py @@ -1,6 +1,12 @@ -""" -Install duckdb and pyarrow before running this example -""" +# Install duckdb and pyarrow before running this example +# /// script +# dependencies = [ +# "pyarrow==20.0.0", +# "duckdb==1.2.1", +# "substrait[extensions] @ file:///${PROJECT_ROOT}/" +# ] +# /// + import duckdb from substrait.builders.plan import read_named_table, project, filter From 5268f633fe13f821ff7e1602531a6a9524975178 Mon Sep 17 00:00:00 2001 From: tokoko Date: Sat, 3 May 2025 17:35:48 +0000 Subject: [PATCH 4/4] fix: typo in adbc_example --- examples/adbc_example.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/adbc_example.py b/examples/adbc_example.py index 3480bb1..530f9f3 100644 --- a/examples/adbc_example.py +++ b/examples/adbc_example.py @@ -1,5 +1,5 @@ # Install pyarrow, adbc-driver-manager and duckdb before running this example -# This example currently can be run only with duckdb<=1.1.3, later vesions of duckdb no longer support substrait in adbc +# This example currently can be run only with duckdb<=1.1.3, later versions of duckdb no longer support substrait in adbc # /// script # dependencies = [ # "pyarrow==20.0.0",