Skip to content

Commit fa2b37c

Browse files
committed
Support subscripting on duckdb.map (#773)
In #689 the subscript operator was not implemented for the duckdb.map type. This worked fine for MAPs returned by `duckdb.query()` because that returned duckdb.unresolved_type. But not when getting a map from a `duckdb` table column, i.e. a MotherDuck table. So this implements the subscript operator directly on the `duckdb.map` type. Fixes #732
1 parent e7d0112 commit fa2b37c

File tree

3 files changed

+117
-57
lines changed

3 files changed

+117
-57
lines changed

sql/pg_duckdb--0.3.0--1.0.0.sql

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -686,10 +686,12 @@ LANGUAGE C AS 'MODULE_PATHNAME', 'pgduckdb_enable_motherduck';
686686
CREATE TYPE duckdb.map;
687687
CREATE FUNCTION duckdb.map_in(cstring) RETURNS duckdb.map AS 'MODULE_PATHNAME', 'duckdb_map_in' LANGUAGE C IMMUTABLE STRICT;
688688
CREATE FUNCTION duckdb.map_out(duckdb.map) RETURNS cstring AS 'MODULE_PATHNAME', 'duckdb_map_out' LANGUAGE C IMMUTABLE STRICT;
689+
CREATE FUNCTION duckdb.map_subscript(internal) RETURNS internal AS 'MODULE_PATHNAME', 'duckdb_map_subscript' LANGUAGE C IMMUTABLE STRICT;
689690
CREATE TYPE duckdb.map(
690691
INTERNALLENGTH = VARIABLE,
691692
INPUT = duckdb.map_in,
692-
OUTPUT = duckdb.map_out
693+
OUTPUT = duckdb.map_out,
694+
SUBSCRIPT = duckdb.map_subscript
693695
);
694696

695697
-- Drop legacy secret objects

src/pgduckdb_options.cpp

Lines changed: 73 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -422,17 +422,6 @@ CoerceSubscriptToText(struct ParseState *pstate, A_Indices *subscript, const cha
422422
return coerced_expr;
423423
}
424424

425-
Node *
426-
CoerceRowSubscriptToText(struct ParseState *pstate, A_Indices *subscript) {
427-
return CoerceSubscriptToText(pstate, subscript, "duckdb.row");
428-
}
429-
430-
// Cloned implementation from CoerceRowSubscriptToText
431-
Node *
432-
CoerceStructSubscriptToText(struct ParseState *pstate, A_Indices *subscript) {
433-
return CoerceSubscriptToText(pstate, subscript, "duckdb.struct");
434-
}
435-
436425
/*
437426
* In Postgres all index operations in a row ar all slices or all plain
438427
* index operations. If you mix them, all are converted to slices.
@@ -473,9 +462,9 @@ AddSubscriptExpressions(SubscriptingRef *sbsref, struct ParseState *pstate, A_In
473462

474463
/*
475464
* DuckdbSubscriptTransform is called by the parser when a subscripting
476-
* operation is performed on a duckdb.row. It has two main puprposes:
477-
* 1. Ensure that the row is being indexed using a string literal
478-
* 2. Ensure that the return type of this index operation is duckdb.unresolved_type
465+
* operation is performed on a duckdb type that can be indexed by arbitrary
466+
* expressions. All this does is parse those expressions and make sure the
467+
* subscript returns an an duckdb.unresolved_type again.
479468
*/
480469
void
481470
DuckdbSubscriptTransform(SubscriptingRef *sbsref, List *indirection, struct ParseState *pstate, bool isSlice,
@@ -497,6 +486,46 @@ DuckdbSubscriptTransform(SubscriptingRef *sbsref, List *indirection, struct Pars
497486
elog(ERROR, "Subscripting %s with an empty subscript is not supported", type_name);
498487
}
499488

489+
// Transform each subscript expression
490+
foreach_node(A_Indices, subscript, indirection) {
491+
AddSubscriptExpressions(sbsref, pstate, subscript, isSlice);
492+
}
493+
494+
// Set the result type of the subscripting operation
495+
sbsref->refrestype = pgduckdb::DuckdbUnresolvedTypeOid();
496+
sbsref->reftypmod = -1;
497+
}
498+
499+
/*
500+
* DuckdbTextSubscriptTransform is called by the parser when a subscripting
501+
* operation is performed on type that can only be indexed by string literals.
502+
* It has two main puprposes:
503+
* 1. Ensure that the row is being indexed using a string literal
504+
* 2. Ensure that the return type of this index operation is
505+
* duckdb.unresolved_type
506+
*
507+
* Currently this is used for duckdb.row and duckdb.struct types.
508+
*/
509+
void
510+
DuckdbTextSubscriptTransform(SubscriptingRef *sbsref, List *indirection, struct ParseState *pstate, bool isSlice,
511+
bool isAssignment, const char *type_name) {
512+
/*
513+
* We need to populate our cache for some of the code below. Normally this
514+
* cache is populated at the start of our planner hook, but this function
515+
* is being called from the parser.
516+
*/
517+
if (!pgduckdb::IsExtensionRegistered()) {
518+
elog(ERROR, "BUG: Using %s but the pg_duckdb extension is not installed", type_name);
519+
}
520+
521+
if (isAssignment) {
522+
elog(ERROR, "Assignment to %s is not supported", type_name);
523+
}
524+
525+
if (indirection == NIL) {
526+
elog(ERROR, "Subscripting %s with an empty subscript is not supported", type_name);
527+
}
528+
500529
bool first = true;
501530

502531
// Transform each subscript expression
@@ -505,7 +534,8 @@ DuckdbSubscriptTransform(SubscriptingRef *sbsref, List *indirection, struct Pars
505534
* a column reference. But the subscripts after that can be anything,
506535
* DuckDB should interpret those. */
507536
if (first) {
508-
sbsref->refupperindexpr = lappend(sbsref->refupperindexpr, CoerceRowSubscriptToText(pstate, subscript));
537+
sbsref->refupperindexpr =
538+
lappend(sbsref->refupperindexpr, CoerceSubscriptToText(pstate, subscript, type_name));
509539
if (isSlice) {
510540
sbsref->reflowerindexpr = lappend(sbsref->reflowerindexpr, NULL);
511541
}
@@ -524,13 +554,13 @@ DuckdbSubscriptTransform(SubscriptingRef *sbsref, List *indirection, struct Pars
524554
void
525555
DuckdbRowSubscriptTransform(SubscriptingRef *sbsref, List *indirection, struct ParseState *pstate, bool isSlice,
526556
bool isAssignment) {
527-
DuckdbSubscriptTransform(sbsref, indirection, pstate, isSlice, isAssignment, "duckdb.row");
557+
DuckdbTextSubscriptTransform(sbsref, indirection, pstate, isSlice, isAssignment, "duckdb.row");
528558
}
529559

530560
void
531561
DuckdbStructSubscriptTransform(SubscriptingRef *sbsref, List *indirection, struct ParseState *pstate, bool isSlice,
532562
bool isAssignment) {
533-
DuckdbSubscriptTransform(sbsref, indirection, pstate, isSlice, isAssignment, "duckdb.struct");
563+
DuckdbTextSubscriptTransform(sbsref, indirection, pstate, isSlice, isAssignment, "duckdb.struct");
534564
}
535565

536566
static bool
@@ -617,52 +647,40 @@ DECLARE_PG_FUNCTION(duckdb_struct_subscript) {
617647
PG_RETURN_POINTER(&duckdb_struct_subscript_routines);
618648
}
619649

620-
/*
621-
* DuckdbUnresolvedTypeSubscriptTransform is called by the parser when a
622-
* subscripting operation is performed on a duckdb.unresolved_type. All this
623-
* does is parse ensre that any subscript on duckdb.unresolved_type returns an
624-
* unrsolved type again.
625-
*/
626650
void
627-
DuckdbUnresolvedTypeSubscriptTransform(SubscriptingRef *sbsref, List *indirection, struct ParseState *pstate,
628-
bool isSlice, bool isAssignment) {
629-
/*
630-
* We need to populate our cache for some of the code below. Normally this
631-
* cache is populated at the start of our planner hook, but this function
632-
* is being called from the parser.
633-
*/
634-
if (!pgduckdb::IsExtensionRegistered()) {
635-
elog(ERROR, "BUG: Using duckdb.unresolved_type but the pg_duckdb extension is not installed");
636-
}
651+
DuckdbMapSubscriptExecSetup(const SubscriptingRef *sbsref, SubscriptingRefState *sbsrefstate,
652+
SubscriptExecSteps *methods) {
653+
DuckdbSubscriptExecSetup(sbsref, sbsrefstate, methods, "duckdb.map");
654+
}
637655

638-
if (isAssignment) {
639-
elog(ERROR, "Assignment to duckdb.unresolved_type is not supported");
640-
}
656+
void
657+
DuckdbMapSubscriptTransform(SubscriptingRef *sbsref, List *indirection, struct ParseState *pstate, bool isSlice,
658+
bool isAssignment) {
659+
DuckdbSubscriptTransform(sbsref, indirection, pstate, isSlice, isAssignment, "duckdb.map");
660+
}
641661

642-
if (indirection == NIL) {
643-
elog(ERROR, "Subscripting duckdb.row with an empty subscript is not supported");
644-
}
662+
static SubscriptRoutines duckdb_map_subscript_routines = {
663+
.transform = DuckdbMapSubscriptTransform,
664+
.exec_setup = DuckdbMapSubscriptExecSetup,
665+
.fetch_strict = false,
666+
.fetch_leakproof = true,
667+
.store_leakproof = true,
668+
};
645669

646-
// Transform each subscript expression
647-
foreach_node(A_Indices, subscript, indirection) {
648-
AddSubscriptExpressions(sbsref, pstate, subscript, isSlice);
649-
}
670+
DECLARE_PG_FUNCTION(duckdb_map_subscript) {
671+
PG_RETURN_POINTER(&duckdb_map_subscript_routines);
672+
}
650673

651-
// Set the result type of the subscripting operation
652-
sbsref->refrestype = pgduckdb::DuckdbUnresolvedTypeOid();
653-
sbsref->reftypmod = -1;
674+
void
675+
DuckdbUnresolvedTypeSubscriptTransform(SubscriptingRef *sbsref, List *indirection, struct ParseState *pstate,
676+
bool isSlice, bool isAssignment) {
677+
DuckdbSubscriptTransform(sbsref, indirection, pstate, isSlice, isAssignment, "duckdb.unresolved_type");
654678
}
655679

656-
/*
657-
* DuckdbUnresolvedTypeSubscriptExecSetup is called by the executor when a
658-
* subscripting operation is performed on a duckdb.unresolved_type. This should
659-
* never happen, because any query that contains a duckdb.unresolved_type should
660-
* automatically be use DuckDB execution.
661-
*/
662680
void
663-
DuckdbUnresolvedTypeSubscriptExecSetup(const SubscriptingRef * /*sbsref*/, SubscriptingRefState * /*sbsrefstate*/,
664-
SubscriptExecSteps * /*exprstate*/) {
665-
elog(ERROR, "Subscripting duckdb.unresolved_type is not supported in the Postgres Executor");
681+
DuckdbUnresolvedTypeSubscriptExecSetup(const SubscriptingRef *sbsref, SubscriptingRefState *sbsrefstate,
682+
SubscriptExecSteps *methods) {
683+
DuckdbSubscriptExecSetup(sbsref, sbsrefstate, methods, "duckdb.unresolved_type");
666684
}
667685

668686
static SubscriptRoutines duckdb_unresolved_type_subscript_routines = {

test/pycheck/motherduck_test.py

Lines changed: 41 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,9 @@
66
library for that purpose.
77
"""
88

9-
from .utils import Cursor, Postgres, PG_MAJOR_VERSION
9+
import datetime
10+
11+
from .utils import Cursor, Postgres, Duckdb, PG_MAJOR_VERSION
1012
from .motherduck_token_helper import (
1113
can_run_md_multi_user_tests,
1214
can_run_md_tests,
@@ -196,3 +198,41 @@ def test_md_alter_table(md_cur: Cursor):
196198
# duckdb after the table is created
197199
with pytest.raises(psycopg.errors.FeatureNotSupported):
198200
md_cur.sql("ALTER TABLE t SET ACCESS METHOD duckdb")
201+
202+
203+
def test_md_duckdb_only_types(md_cur: Cursor, ddb: Duckdb):
204+
ddb.sql("""
205+
CREATE TABLE t1(
206+
m MAP(INT, VARCHAR),
207+
s STRUCT(v VARCHAR, i INTEGER),
208+
u UNION(t time, d date),
209+
)""")
210+
ddb.sql("""
211+
INSERT INTO t1 VALUES (
212+
MAP{1: 'abc'},
213+
{'v': 'struct abc', 'i': 123},
214+
'12:00'::time,
215+
), (
216+
MAP{2: 'def'},
217+
{'v': 'struct def', 'i': 456},
218+
'2023-10-01'::date,
219+
)
220+
""")
221+
md_cur.wait_until_table_exists("t1")
222+
assert md_cur.sql("""select * from t1""") == [
223+
("{1=abc}", "{'v': struct abc, 'i': 123}", "12:00:00"),
224+
("{2=def}", "{'v': struct def, 'i': 456}", "2023-10-01"),
225+
]
226+
227+
assert md_cur.sql("""select m[1] from t1""") == ["abc", None]
228+
assert md_cur.sql("""select s['v'] from t1""") == ["struct abc", "struct def"]
229+
assert md_cur.sql("""select s['i'] from t1""") == [123, 456]
230+
assert md_cur.sql("""select union_extract(u,'t') from t1""") == [
231+
datetime.time(12, 0),
232+
None,
233+
]
234+
assert md_cur.sql("""select union_extract(u, 'd') from t1""") == [
235+
None,
236+
datetime.date(2023, 10, 1),
237+
]
238+
assert md_cur.sql("""select union_tag(u) from t1""") == ["t", "d"]

0 commit comments

Comments
 (0)