ibis-project · JonAnCla · May 29, 2026 · May 29, 2026 · May 29, 2026 · May 29, 2026
diff --git a/ibis/backends/duckdb/__init__.py b/ibis/backends/duckdb/__init__.py
@@ -1342,10 +1342,33 @@ def _to_duckdb_relation(
         """
         self._run_pre_execute_hooks(expr)
         table_expr = expr.as_table()
-        sql = self.compile(table_expr, limit=limit, params=params, **kwargs)
-        if table_expr.schema().geospatial:
+        geocols = table_expr.schema().geospatial
+
+        if geocols:
             self._load_extensions(["spatial"])
-        return self.con.sql(sql)
+
+            # DuckDB 1.4.x: to_arrow_table() returns DuckDB's internal binary
+            # format for GEOMETRY columns, not WKB. Wrap geometry columns with
+            # ST_ASWKB so downstream Arrow → Shapely conversion gets valid WKB.
+            # DuckDB 1.5.x: new method to_arrow_reader() returns geoarrow.wkb natively,
+            # so no wrapping is needed (and is handled in to_pyarrow / execute).
+            if not hasattr(duckdb.DuckDBPyRelation, "to_arrow_reader"):
+                quoted = self.compiler.quoted
+                geocols_set = set(geocols)
+                inner = self.compiler.to_sqlglot(
+                    table_expr, limit=limit, params=params
+                ).subquery("t")
+                cols = [
+                    self.compiler.f.st_aswkb(
+                        sg.column(col, quoted=quoted)
+                    ).as_(col, quoted=quoted)
+                    if col in geocols_set
+                    else sg.column(col, quoted=quoted)
+                    for col in table_expr.schema().keys()
+                ]
+                return self.con.sql(sg.select(*cols).from_(inner).sql("duckdb"))
+
+        return self.con.sql(self.compile(table_expr, limit=limit, params=params, **kwargs))
 
     def to_pyarrow_batches(
         self,
@@ -1375,20 +1398,15 @@ def to_pyarrow_batches(
         chunk_size
             The number of rows to fetch per batch
         """
-        import pyarrow as pa
         import pyarrow_hotfix  # noqa: F401
 
-        self._run_pre_execute_hooks(expr)
-        table = expr.as_table()
-        sql = self.compile(table, limit=limit, params=params)
-
-        def batch_producer(cur):
-            yield from cur.fetch_record_batch(rows_per_batch=chunk_size)
-
-        result = self.raw_sql(sql)
-        return pa.ipc.RecordBatchReader.from_batches(
-            expr.as_table().schema().to_pyarrow(), batch_producer(result)
-        )
+        rel = self._to_duckdb_relation(expr, params=params, limit=limit)
+        if hasattr(rel, "to_arrow_reader"):
+            # DuckDB 1.5.x: fetch_record_batch is deprecated and causes ABI errors
+            return rel.to_arrow_reader(batch_size=chunk_size)
+        else:
+            # DuckDB 1.4.x: fetch_arrow_reader accepts a positional batch_size
+            return rel.fetch_arrow_reader(chunk_size)
 
     def to_pyarrow(
         self,
@@ -1401,9 +1419,15 @@ def to_pyarrow(
     ) -> pa.Table:
         from ibis.backends.duckdb.converter import DuckDBPyArrowData
 
-        table = self._to_duckdb_relation(
-            expr, params=params, limit=limit, **kwargs
-        ).to_arrow_table()
+        rel = self._to_duckdb_relation(expr, params=params, limit=limit, **kwargs)
+        if hasattr(rel, "to_arrow_reader"):
+            # DuckDB 1.5.x: to_arrow_reader avoids InternalException when result
+            # contains GEOMETRY('EPSG:xxx') columns.
+            # See https://github.com/duckdb/duckdb-python/issues/475
+            table = rel.to_arrow_reader().read_all()
+        else:
+            # DuckDB 1.4.x
+            table = rel.to_arrow_table()
         return expr.__pyarrow_result__(table, data_mapper=DuckDBPyArrowData)
 
     def execute(
@@ -1423,7 +1447,14 @@ def execute(
         from ibis.backends.duckdb.converter import DuckDBPandasData
 
         rel = self._to_duckdb_relation(expr, params=params, limit=limit, **kwargs)
-        table = rel.to_arrow_table()
+        if hasattr(rel, "to_arrow_reader"):
+            # DuckDB 1.5.x: to_arrow_reader avoids InternalException when result
+            # contains GEOMETRY('EPSG:xxx') columns.
+            # See https://github.com/duckdb/duckdb-python/issues/475
+            table = rel.to_arrow_reader().read_all()
+        else:
+            # DuckDB 1.4.x
+            table = rel.to_arrow_table()
 
         df = pd.DataFrame(
             {

diff --git a/...nds/duckdb/tests/snapshots/test_geospatial/test_literal_geospatial_explicit/expr0/out.sql b/...nds/duckdb/tests/snapshots/test_geospatial/test_literal_geospatial_explicit/expr0/out.sql
@@ -1,7 +1,2 @@
 SELECT
-  *
-  REPLACE (ST_ASWKB("p") AS "p")
-FROM (
-  SELECT
-    ST_GEOMFROMTEXT('POINT (1 0)') AS "p"
-)
+  ST_GEOMFROMTEXT('POINT (1 0)') AS "p"
diff --git a/...nds/duckdb/tests/snapshots/test_geospatial/test_literal_geospatial_explicit/expr1/out.sql b/...nds/duckdb/tests/snapshots/test_geospatial/test_literal_geospatial_explicit/expr1/out.sql
@@ -1,7 +1,2 @@
 SELECT
-  *
-  REPLACE (ST_ASWKB("p") AS "p")
-FROM (
-  SELECT
-    ST_GEOMFROMTEXT('POINT (1 0)') AS "p"
-)
+  ST_GEOMFROMTEXT('POINT (1 0)') AS "p"
diff --git a/ibis/backends/duckdb/tests/test_client.py b/ibis/backends/duckdb/tests/test_client.py
@@ -434,9 +434,8 @@ def test_pyarrow_batches_chunk_size(con):  # 10443
     assert len(next(batches)) == 800
     assert len(next(batches)) == 800
 
-    batches = con.to_pyarrow_batches(t, chunk_size=-1)
     with pytest.raises(TypeError):
-        next(batches)
+        con.to_pyarrow_batches(t, chunk_size=-1)
 
 
 @pytest.mark.parametrize(

diff --git a/ibis/backends/duckdb/tests/test_geospatial.py b/ibis/backends/duckdb/tests/test_geospatial.py
@@ -200,8 +200,18 @@ def test_geospatial_buffer_point(zones, zones_gdf):
     gtm.assert_geoseries_equal(buffer.to_pandas(), gp_buffer, check_crs=False)
 
 
-@pytest.mark.xfail_version(
-    duckdb=["shapely>=2.1.0"], raises=AssertionError, reason="numerics are different"
+@pytest.mark.xfail(
+    condition=(
+        vparse(duckdb.__version__) < vparse("1.5")
+        and vparse(shapely.__version__) >= vparse("2.1.0")
+    ),
+    raises=AssertionError,
+    reason=(
+        "DuckDB <1.5 spatial uses an older geometry engine for ST_BUFFER "
+        "that produces slightly different polygon vertex coordinates; "
+        "shapely>=2.1.0 tightened geometry equality comparisons, making "
+        "the mismatch visible in assert_geoseries_equal"
+    ),
 )
 def test_geospatial_buffer(zones, zones_gdf):
     buffer = zones.geom.buffer(100.0)
@@ -463,7 +473,10 @@ def no_roundtrip(
             None,
             {},
             None,
-            marks=no_roundtrip(reason="duckdb wkb doesn't preserve the geometry type"),
+            marks=no_roundtrip(
+                reason="duckdb wkb doesn't preserve the geometry type",
+                raises=duckdb.Error,
+            ),
             id="selafin",
         ),
         param("JML", None, {}, None, id="jml"),
@@ -476,6 +489,9 @@ def no_roundtrip(
             marks=no_roundtrip(reason="can't read the written file"),
             id="mvt",
         ),
+        # NOTE: Writing MapML fails in DuckDB 1.5.3 with
+        # "basic_string: construction from null is not valid"
+        # https://github.com/duckdb/duckdb-spatial/issues/818
         param("MapML", None, {}, None, id="mapml"),
         param(
             "PMTiles",
@@ -527,7 +543,12 @@ def test_to_geo(con, driver, canonical_extension, kwargs, preproc, tmp_path):
                 reason="GDAL_DATA not set",
             ),
         ),
-        "GEORSS",
+        param(
+            # NOTE: Writing GeoRSS fails in DuckDB 1.5.3 with
+            # "basic_string: construction from null is not valid"
+            # https://github.com/duckdb/duckdb-spatial/issues/818
+            "GEORSS",
+        ),
     ],
 )
 def test_to_geo_geom_only(con, driver, tmp_path):

diff --git a/ibis/backends/sql/compilers/duckdb.py b/ibis/backends/sql/compilers/duckdb.py
@@ -115,25 +115,7 @@ def to_sqlglot(
         limit: str | None = None,
         params: Mapping[ir.Expr, Any] | None = None,
     ):
-        sql = super().to_sqlglot(expr, limit=limit, params=params)
-
-        table_expr = expr.as_table()
-        geocols = table_expr.schema().geospatial
-
-        if not geocols:
-            return sql
-
-        quoted = self.quoted
-        return sg.select(
-            sge.Star(
-                replace=[
-                    self.f.st_aswkb(sg.column(col, quoted=quoted)).as_(
-                        col, quoted=quoted
-                    )
-                    for col in geocols
-                ]
-            )
-        ).from_(sql.subquery())
+        return super().to_sqlglot(expr, limit=limit, params=params)
 
     def visit_StructColumn(self, op, *, names, values):
         return sge.Struct.from_arg_list(
@@ -263,6 +245,11 @@ def visit_ArrayZip(self, op, *, arg):
         any_arg_null = sg.or_(*(arr.is_(NULL) for arr in arg))
         return self.if_(any_arg_null, NULL, zipped_arrays)
 
+    def visit_ArrayStringJoin(self, op, *, arg, sep):
+        return self.if_(
+            self.f.len(arg) > 0, self.f.array_to_string(arg, sep), NULL
+        )
+
     def visit_Array(self, op, *, exprs):
         return self.cast(self.f.array(*exprs), op.dtype)
 

diff --git a/ibis/backends/sql/datatypes.py b/ibis/backends/sql/datatypes.py
@@ -357,7 +357,17 @@ def _from_sqlglot_GEOMETRY(
         nullable: bool | None = None,
     ) -> sge.DataType:
         if arg is not None:
-            typeclass = _geotypes[arg.this.this]
+            arg_value = arg.this.this
+            if arg_value in _geotypes:
+                typeclass = _geotypes[arg_value]
+            else:
+                # DuckDB 1.5.3+ reports SRID as the first positional arg,
+                # e.g. GEOMETRY('EPSG:2263')
+                typeclass = dt.GeoSpatial
+                if srid is None and arg_value.startswith("EPSG:"):
+                    srid = sge.DataTypeParam(
+                        this=sge.Literal.number(arg_value.split(":")[1])
+                    )
         else:
             typeclass = dt.GeoSpatial
         if srid is not None:
@@ -372,7 +382,15 @@ def _from_sqlglot_GEOGRAPHY(
         nullable: bool | None = None,
     ) -> sge.DataType:
         if arg is not None:
-            typeclass = _geotypes[arg.this.this]
+            arg_value = arg.this.this
+            if arg_value in _geotypes:
+                typeclass = _geotypes[arg_value]
+            else:
+                typeclass = dt.GeoSpatial
+                if srid is None and arg_value.startswith("EPSG:"):
+                    srid = sge.DataTypeParam(
+                        this=sge.Literal.number(arg_value.split(":")[1])
+                    )
         else:
             typeclass = dt.GeoSpatial
         if srid is not None:

diff --git a/ibis/backends/tests/test_numeric.py b/ibis/backends/tests/test_numeric.py
@@ -16,6 +16,7 @@
 from ibis import literal as L
 from ibis.backends.tests.errors import (
     DatabricksServerOperationError,
+    DuckDBBinderException,
     DuckDBParserException,
     ExaQueryError,
     GoogleBadRequest,
@@ -410,7 +411,7 @@ def test_numeric_literal(con, backend, expr, expected_types):
                 pytest.mark.notyet(
                     ["duckdb"],
                     reason="Unsupported precision.",
-                    raises=DuckDBParserException,
+                    raises=(DuckDBParserException, DuckDBBinderException),
                 ),
                 pytest.mark.notyet(
                     ["pyspark"],

diff --git a/requirements-dev.txt b/requirements-dev.txt