Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
69 changes: 50 additions & 19 deletions ibis/backends/duckdb/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -1342,10 +1342,33 @@ def _to_duckdb_relation(
"""
self._run_pre_execute_hooks(expr)
table_expr = expr.as_table()
sql = self.compile(table_expr, limit=limit, params=params, **kwargs)
if table_expr.schema().geospatial:
geocols = table_expr.schema().geospatial

if geocols:
self._load_extensions(["spatial"])
return self.con.sql(sql)

# DuckDB 1.4.x: to_arrow_table() returns DuckDB's internal binary
# format for GEOMETRY columns, not WKB. Wrap geometry columns with
# ST_ASWKB so downstream Arrow → Shapely conversion gets valid WKB.
# DuckDB 1.5.x: new method to_arrow_reader() returns geoarrow.wkb natively,
# so no wrapping is needed (and is handled in to_pyarrow / execute).
if not hasattr(duckdb.DuckDBPyRelation, "to_arrow_reader"):
quoted = self.compiler.quoted
geocols_set = set(geocols)
inner = self.compiler.to_sqlglot(
table_expr, limit=limit, params=params
).subquery("t")
cols = [
self.compiler.f.st_aswkb(
sg.column(col, quoted=quoted)
).as_(col, quoted=quoted)
if col in geocols_set
else sg.column(col, quoted=quoted)
for col in table_expr.schema().keys()
]
return self.con.sql(sg.select(*cols).from_(inner).sql("duckdb"))

return self.con.sql(self.compile(table_expr, limit=limit, params=params, **kwargs))

def to_pyarrow_batches(
self,
Expand Down Expand Up @@ -1375,20 +1398,15 @@ def to_pyarrow_batches(
chunk_size
The number of rows to fetch per batch
"""
import pyarrow as pa
import pyarrow_hotfix # noqa: F401

self._run_pre_execute_hooks(expr)
table = expr.as_table()
sql = self.compile(table, limit=limit, params=params)

def batch_producer(cur):
yield from cur.fetch_record_batch(rows_per_batch=chunk_size)

result = self.raw_sql(sql)
return pa.ipc.RecordBatchReader.from_batches(
expr.as_table().schema().to_pyarrow(), batch_producer(result)
)
rel = self._to_duckdb_relation(expr, params=params, limit=limit)
if hasattr(rel, "to_arrow_reader"):
# DuckDB 1.5.x: fetch_record_batch is deprecated and causes ABI errors
return rel.to_arrow_reader(batch_size=chunk_size)
else:
# DuckDB 1.4.x: fetch_arrow_reader accepts a positional batch_size
return rel.fetch_arrow_reader(chunk_size)

def to_pyarrow(
self,
Expand All @@ -1401,9 +1419,15 @@ def to_pyarrow(
) -> pa.Table:
from ibis.backends.duckdb.converter import DuckDBPyArrowData

table = self._to_duckdb_relation(
expr, params=params, limit=limit, **kwargs
).to_arrow_table()
rel = self._to_duckdb_relation(expr, params=params, limit=limit, **kwargs)
if hasattr(rel, "to_arrow_reader"):
# DuckDB 1.5.x: to_arrow_reader avoids InternalException when result
# contains GEOMETRY('EPSG:xxx') columns.
# See https://github.com/duckdb/duckdb-python/issues/475
table = rel.to_arrow_reader().read_all()
else:
# DuckDB 1.4.x
table = rel.to_arrow_table()
return expr.__pyarrow_result__(table, data_mapper=DuckDBPyArrowData)

def execute(
Expand All @@ -1423,7 +1447,14 @@ def execute(
from ibis.backends.duckdb.converter import DuckDBPandasData

rel = self._to_duckdb_relation(expr, params=params, limit=limit, **kwargs)
table = rel.to_arrow_table()
if hasattr(rel, "to_arrow_reader"):
# DuckDB 1.5.x: to_arrow_reader avoids InternalException when result
# contains GEOMETRY('EPSG:xxx') columns.
# See https://github.com/duckdb/duckdb-python/issues/475
table = rel.to_arrow_reader().read_all()
else:
# DuckDB 1.4.x
table = rel.to_arrow_table()

df = pd.DataFrame(
{
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,2 @@
SELECT
*
REPLACE (ST_ASWKB("p") AS "p")
FROM (
SELECT
ST_GEOMFROMTEXT('POINT (1 0)') AS "p"
)
ST_GEOMFROMTEXT('POINT (1 0)') AS "p"
Original file line number Diff line number Diff line change
@@ -1,7 +1,2 @@
SELECT
*
REPLACE (ST_ASWKB("p") AS "p")
FROM (
SELECT
ST_GEOMFROMTEXT('POINT (1 0)') AS "p"
)
ST_GEOMFROMTEXT('POINT (1 0)') AS "p"
3 changes: 1 addition & 2 deletions ibis/backends/duckdb/tests/test_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -434,9 +434,8 @@ def test_pyarrow_batches_chunk_size(con): # 10443
assert len(next(batches)) == 800
assert len(next(batches)) == 800

batches = con.to_pyarrow_batches(t, chunk_size=-1)
with pytest.raises(TypeError):
next(batches)
con.to_pyarrow_batches(t, chunk_size=-1)


@pytest.mark.parametrize(
Expand Down
29 changes: 25 additions & 4 deletions ibis/backends/duckdb/tests/test_geospatial.py
Original file line number Diff line number Diff line change
Expand Up @@ -200,8 +200,18 @@ def test_geospatial_buffer_point(zones, zones_gdf):
gtm.assert_geoseries_equal(buffer.to_pandas(), gp_buffer, check_crs=False)


@pytest.mark.xfail_version(
duckdb=["shapely>=2.1.0"], raises=AssertionError, reason="numerics are different"
@pytest.mark.xfail(
condition=(
vparse(duckdb.__version__) < vparse("1.5")
and vparse(shapely.__version__) >= vparse("2.1.0")
),
raises=AssertionError,
reason=(
"DuckDB <1.5 spatial uses an older geometry engine for ST_BUFFER "
"that produces slightly different polygon vertex coordinates; "
"shapely>=2.1.0 tightened geometry equality comparisons, making "
"the mismatch visible in assert_geoseries_equal"
),
)
def test_geospatial_buffer(zones, zones_gdf):
buffer = zones.geom.buffer(100.0)
Expand Down Expand Up @@ -463,7 +473,10 @@ def no_roundtrip(
None,
{},
None,
marks=no_roundtrip(reason="duckdb wkb doesn't preserve the geometry type"),
marks=no_roundtrip(
reason="duckdb wkb doesn't preserve the geometry type",
raises=duckdb.Error,
),
id="selafin",
),
param("JML", None, {}, None, id="jml"),
Expand All @@ -476,6 +489,9 @@ def no_roundtrip(
marks=no_roundtrip(reason="can't read the written file"),
id="mvt",
),
# NOTE: Writing MapML fails in DuckDB 1.5.3 with
# "basic_string: construction from null is not valid"
# https://github.com/duckdb/duckdb-spatial/issues/818
param("MapML", None, {}, None, id="mapml"),
param(
"PMTiles",
Expand Down Expand Up @@ -527,7 +543,12 @@ def test_to_geo(con, driver, canonical_extension, kwargs, preproc, tmp_path):
reason="GDAL_DATA not set",
),
),
"GEORSS",
param(
# NOTE: Writing GeoRSS fails in DuckDB 1.5.3 with
# "basic_string: construction from null is not valid"
# https://github.com/duckdb/duckdb-spatial/issues/818
"GEORSS",
),
],
)
def test_to_geo_geom_only(con, driver, tmp_path):
Expand Down
25 changes: 6 additions & 19 deletions ibis/backends/sql/compilers/duckdb.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,25 +115,7 @@ def to_sqlglot(
limit: str | None = None,
params: Mapping[ir.Expr, Any] | None = None,
):
sql = super().to_sqlglot(expr, limit=limit, params=params)

table_expr = expr.as_table()
geocols = table_expr.schema().geospatial

if not geocols:
return sql

quoted = self.quoted
return sg.select(
sge.Star(
replace=[
self.f.st_aswkb(sg.column(col, quoted=quoted)).as_(
col, quoted=quoted
)
for col in geocols
]
)
).from_(sql.subquery())
return super().to_sqlglot(expr, limit=limit, params=params)

def visit_StructColumn(self, op, *, names, values):
return sge.Struct.from_arg_list(
Expand Down Expand Up @@ -263,6 +245,11 @@ def visit_ArrayZip(self, op, *, arg):
any_arg_null = sg.or_(*(arr.is_(NULL) for arr in arg))
return self.if_(any_arg_null, NULL, zipped_arrays)

def visit_ArrayStringJoin(self, op, *, arg, sep):
return self.if_(
self.f.len(arg) > 0, self.f.array_to_string(arg, sep), NULL
)

def visit_Array(self, op, *, exprs):
return self.cast(self.f.array(*exprs), op.dtype)

Expand Down
22 changes: 20 additions & 2 deletions ibis/backends/sql/datatypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -357,7 +357,17 @@ def _from_sqlglot_GEOMETRY(
nullable: bool | None = None,
) -> sge.DataType:
if arg is not None:
typeclass = _geotypes[arg.this.this]
arg_value = arg.this.this
if arg_value in _geotypes:
typeclass = _geotypes[arg_value]
else:
# DuckDB 1.5.3+ reports SRID as the first positional arg,
# e.g. GEOMETRY('EPSG:2263')
typeclass = dt.GeoSpatial
if srid is None and arg_value.startswith("EPSG:"):
srid = sge.DataTypeParam(
this=sge.Literal.number(arg_value.split(":")[1])
)
else:
typeclass = dt.GeoSpatial
if srid is not None:
Expand All @@ -372,7 +382,15 @@ def _from_sqlglot_GEOGRAPHY(
nullable: bool | None = None,
) -> sge.DataType:
if arg is not None:
typeclass = _geotypes[arg.this.this]
arg_value = arg.this.this
if arg_value in _geotypes:
typeclass = _geotypes[arg_value]
else:
typeclass = dt.GeoSpatial
if srid is None and arg_value.startswith("EPSG:"):
srid = sge.DataTypeParam(
this=sge.Literal.number(arg_value.split(":")[1])
)
else:
typeclass = dt.GeoSpatial
if srid is not None:
Expand Down
3 changes: 2 additions & 1 deletion ibis/backends/tests/test_numeric.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
from ibis import literal as L
from ibis.backends.tests.errors import (
DatabricksServerOperationError,
DuckDBBinderException,
DuckDBParserException,
ExaQueryError,
GoogleBadRequest,
Expand Down Expand Up @@ -410,7 +411,7 @@ def test_numeric_literal(con, backend, expr, expected_types):
pytest.mark.notyet(
["duckdb"],
reason="Unsupported precision.",
raises=DuckDBParserException,
raises=(DuckDBParserException, DuckDBBinderException),
),
pytest.mark.notyet(
["pyspark"],
Expand Down
2 changes: 1 addition & 1 deletion requirements-dev.txt

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading
Loading