Skip to content

Commit 2c1fdab

Browse files
committed
Move subscript code to dedicated file (#773)
The `pgduckdb_options.cpp` file got quite big, and it's also good to separate PG-only code from DuckDB code.
1 parent fa2b37c commit 2c1fdab

File tree

4 files changed

+341
-335
lines changed

4 files changed

+341
-335
lines changed

include/pgduckdb/pgduckdb_background_worker.hpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
#pragma once
22

3+
#include "pgduckdb/pg/declarations.hpp"
4+
35
namespace pgduckdb {
46

57
void InitBackgroundWorkersShmem(void);

src/pg/pgduckdb_subscript.cpp

Lines changed: 332 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,332 @@
1+
#include "pgduckdb/pgduckdb_metadata_cache.hpp"
2+
#include "pgduckdb/utility/cpp_wrapper.hpp"
3+
4+
extern "C" {
5+
#include "postgres.h"
6+
#include "executor/execExpr.h"
7+
#include "parser/parse_coerce.h"
8+
#include "parser/parse_node.h"
9+
#include "parser/parse_expr.h"
10+
#include "nodes/subscripting.h"
11+
#include "nodes/nodeFuncs.h"
12+
#include "pgduckdb/vendor/pg_list.hpp"
13+
}
14+
15+
namespace pgduckdb {
16+
17+
namespace pg {
18+
19+
Node *
20+
CoerceSubscriptToText(struct ParseState *pstate, A_Indices *subscript, const char *type_name) {
21+
if (!subscript->uidx) {
22+
elog(ERROR, "Creating a slice out of %s is not supported", type_name);
23+
}
24+
25+
Node *subscript_expr = transformExpr(pstate, subscript->uidx, pstate->p_expr_kind);
26+
int expr_location = exprLocation(subscript->uidx);
27+
Oid subscript_expr_type = exprType(subscript_expr);
28+
29+
if (subscript->lidx) {
30+
elog(ERROR, "Creating a slice out of %s is not supported", type_name);
31+
}
32+
33+
Node *coerced_expr = coerce_to_target_type(pstate, subscript_expr, subscript_expr_type, TEXTOID, -1,
34+
COERCION_IMPLICIT, COERCE_IMPLICIT_CAST, expr_location);
35+
if (!coerced_expr) {
36+
ereport(ERROR, (errcode(ERRCODE_DATATYPE_MISMATCH), errmsg("%s subscript must have text type", type_name),
37+
parser_errposition(pstate, expr_location)));
38+
}
39+
40+
if (!IsA(subscript_expr, Const)) {
41+
ereport(ERROR, (errcode(ERRCODE_DATATYPE_MISMATCH), errmsg("%s subscript must be a constant", type_name),
42+
parser_errposition(pstate, expr_location)));
43+
}
44+
45+
Const *subscript_const = castNode(Const, subscript_expr);
46+
if (subscript_const->constisnull) {
47+
ereport(ERROR, (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED), errmsg("%s subscript cannot be NULL", type_name),
48+
parser_errposition(pstate, expr_location)));
49+
}
50+
51+
return coerced_expr;
52+
}
53+
54+
/*
55+
* In Postgres all index operations in a row ar all slices or all plain
56+
* index operations. If you mix them, all are converted to slices.
57+
* There's no difference in representation possible between
58+
* "col[1:2][1]" and "col[1:2][1:]". If you want this seperation you
59+
* need to use parenthesis to seperate: "(col[1:2])[1]"
60+
* This might seem like fairly strange behaviour, but Postgres uses
61+
* this to be able to slice in multi-dimensional arrays and thtis
62+
* behaviour is documented here:
63+
* https://www.postgresql.org/docs/current/arrays.html#ARRAYS-ACCESSING
64+
*
65+
* This is different from DuckDB, but there's not much we can do about
66+
* that. So we'll have this same behaviour by, which means we need to always
67+
* add the lower subscript to the slice. The lower subscript will be NULL in
68+
* that case.
69+
*
70+
* See also comments on SubscriptingRef in nodes/subscripting.h
71+
*/
72+
void
73+
AddSubscriptExpressions(SubscriptingRef *sbsref, struct ParseState *pstate, A_Indices *subscript, bool isSlice) {
74+
Assert(isSlice || subscript->uidx);
75+
76+
Node *upper_subscript_expr = NULL;
77+
if (subscript->uidx) {
78+
upper_subscript_expr = transformExpr(pstate, subscript->uidx, pstate->p_expr_kind);
79+
}
80+
81+
sbsref->refupperindexpr = lappend(sbsref->refupperindexpr, upper_subscript_expr);
82+
83+
if (isSlice) {
84+
Node *lower_subscript_expr = NULL;
85+
if (subscript->uidx) {
86+
lower_subscript_expr = transformExpr(pstate, subscript->lidx, pstate->p_expr_kind);
87+
}
88+
sbsref->reflowerindexpr = lappend(sbsref->reflowerindexpr, lower_subscript_expr);
89+
}
90+
}
91+
92+
/*
93+
* DuckdbSubscriptTransform is called by the parser when a subscripting
94+
* operation is performed on a duckdb type that can be indexed by arbitrary
95+
* expressions. All this does is parse those expressions and make sure the
96+
* subscript returns an an duckdb.unresolved_type again.
97+
*/
98+
void
99+
DuckdbSubscriptTransform(SubscriptingRef *sbsref, List *indirection, struct ParseState *pstate, bool isSlice,
100+
bool isAssignment, const char *type_name) {
101+
/*
102+
* We need to populate our cache for some of the code below. Normally this
103+
* cache is populated at the start of our planner hook, but this function
104+
* is being called from the parser.
105+
*/
106+
if (!pgduckdb::IsExtensionRegistered()) {
107+
elog(ERROR, "BUG: Using %s but the pg_duckdb extension is not installed", type_name);
108+
}
109+
110+
if (isAssignment) {
111+
elog(ERROR, "Assignment to %s is not supported", type_name);
112+
}
113+
114+
if (indirection == NIL) {
115+
elog(ERROR, "Subscripting %s with an empty subscript is not supported", type_name);
116+
}
117+
118+
// Transform each subscript expression
119+
foreach_node(A_Indices, subscript, indirection) {
120+
AddSubscriptExpressions(sbsref, pstate, subscript, isSlice);
121+
}
122+
123+
// Set the result type of the subscripting operation
124+
sbsref->refrestype = pgduckdb::DuckdbUnresolvedTypeOid();
125+
sbsref->reftypmod = -1;
126+
}
127+
128+
/*
129+
* DuckdbTextSubscriptTransform is called by the parser when a subscripting
130+
* operation is performed on type that can only be indexed by string literals.
131+
* It has two main puprposes:
132+
* 1. Ensure that the row is being indexed using a string literal
133+
* 2. Ensure that the return type of this index operation is
134+
* duckdb.unresolved_type
135+
*
136+
* Currently this is used for duckdb.row and duckdb.struct types.
137+
*/
138+
void
139+
DuckdbTextSubscriptTransform(SubscriptingRef *sbsref, List *indirection, struct ParseState *pstate, bool isSlice,
140+
bool isAssignment, const char *type_name) {
141+
/*
142+
* We need to populate our cache for some of the code below. Normally this
143+
* cache is populated at the start of our planner hook, but this function
144+
* is being called from the parser.
145+
*/
146+
if (!pgduckdb::IsExtensionRegistered()) {
147+
elog(ERROR, "BUG: Using %s but the pg_duckdb extension is not installed", type_name);
148+
}
149+
150+
if (isAssignment) {
151+
elog(ERROR, "Assignment to %s is not supported", type_name);
152+
}
153+
154+
if (indirection == NIL) {
155+
elog(ERROR, "Subscripting %s with an empty subscript is not supported", type_name);
156+
}
157+
158+
bool first = true;
159+
160+
// Transform each subscript expression
161+
foreach_node(A_Indices, subscript, indirection) {
162+
/* The first subscript needs to be a TEXT constant, since it should be
163+
* a column reference. But the subscripts after that can be anything,
164+
* DuckDB should interpret those. */
165+
if (first) {
166+
sbsref->refupperindexpr =
167+
lappend(sbsref->refupperindexpr, CoerceSubscriptToText(pstate, subscript, type_name));
168+
if (isSlice) {
169+
sbsref->reflowerindexpr = lappend(sbsref->reflowerindexpr, NULL);
170+
}
171+
first = false;
172+
continue;
173+
}
174+
175+
AddSubscriptExpressions(sbsref, pstate, subscript, isSlice);
176+
}
177+
178+
// Set the result type of the subscripting operation
179+
sbsref->refrestype = pgduckdb::DuckdbUnresolvedTypeOid();
180+
sbsref->reftypmod = -1;
181+
}
182+
183+
static bool
184+
DuckdbSubscriptCheckSubscripts(ExprState * /*state*/, ExprEvalStep *op, ExprContext * /*econtext*/) {
185+
SubscriptingRefState *sbsrefstate = op->d.sbsref_subscript.state;
186+
char *type_name = strVal(sbsrefstate->workspace);
187+
elog(ERROR, "Subscripting %s is not supported in the Postgres Executor", type_name);
188+
}
189+
190+
static void
191+
DuckdbSubscriptFetch(ExprState * /*state*/, ExprEvalStep *op, ExprContext * /*econtext*/) {
192+
SubscriptingRefState *sbsrefstate = op->d.sbsref_subscript.state;
193+
char *type_name = strVal(sbsrefstate->workspace);
194+
elog(ERROR, "Subscripting %s is not supported in the Postgres Executor", type_name);
195+
}
196+
197+
static void
198+
DuckdbSubscriptAssign(ExprState * /*state*/, ExprEvalStep *op, ExprContext * /*econtext*/) {
199+
SubscriptingRefState *sbsrefstate = op->d.sbsref_subscript.state;
200+
char *type_name = strVal(sbsrefstate->workspace);
201+
elog(ERROR, "Subscripting %s is not supported in the Postgres Executor", type_name);
202+
}
203+
204+
static void
205+
DuckdbSubscriptFetchOld(ExprState * /*state*/, ExprEvalStep *op, ExprContext * /*econtext*/) {
206+
SubscriptingRefState *sbsrefstate = op->d.sbsref_subscript.state;
207+
char *type_name = strVal(sbsrefstate->workspace);
208+
elog(ERROR, "Subscripting %s is not supported in the Postgres Executor", type_name);
209+
}
210+
211+
/*
212+
* DuckdbSubscriptExecSetup stores a bunch of functions in the methods
213+
* structure. These functions are called by the Postgres executor when a
214+
* subscripting is executed. We need to implement this function, because it is
215+
* called for materialized CTEs. Even in that case the actual functions that
216+
* are stored in methods are never supposed to be called, because pg_duckdb
217+
* shouldn't force usage of DuckDB execution when duckdb types are present in
218+
* the query. So these methods are just stubs that throw an error when called.
219+
*/
220+
void
221+
DuckdbSubscriptExecSetup(const SubscriptingRef * /*sbsref*/, SubscriptingRefState *sbsrefstate,
222+
SubscriptExecSteps *methods, const char *type_name) {
223+
224+
sbsrefstate->workspace = makeString(pstrdup(type_name));
225+
methods->sbs_check_subscripts = DuckdbSubscriptCheckSubscripts;
226+
methods->sbs_fetch = DuckdbSubscriptFetch;
227+
methods->sbs_assign = DuckdbSubscriptAssign;
228+
methods->sbs_fetch_old = DuckdbSubscriptFetchOld;
229+
}
230+
231+
void
232+
DuckdbRowSubscriptTransform(SubscriptingRef *sbsref, List *indirection, struct ParseState *pstate, bool isSlice,
233+
bool isAssignment) {
234+
DuckdbTextSubscriptTransform(sbsref, indirection, pstate, isSlice, isAssignment, "duckdb.row");
235+
}
236+
237+
void
238+
DuckdbRowSubscriptExecSetup(const SubscriptingRef *sbsref, SubscriptingRefState *sbsrefstate,
239+
SubscriptExecSteps *methods) {
240+
DuckdbSubscriptExecSetup(sbsref, sbsrefstate, methods, "duckdb.row");
241+
}
242+
243+
static SubscriptRoutines duckdb_row_subscript_routines = {
244+
.transform = DuckdbRowSubscriptTransform,
245+
.exec_setup = DuckdbRowSubscriptExecSetup,
246+
.fetch_strict = false,
247+
.fetch_leakproof = true,
248+
.store_leakproof = true,
249+
};
250+
251+
void
252+
DuckdbUnresolvedTypeSubscriptTransform(SubscriptingRef *sbsref, List *indirection, struct ParseState *pstate,
253+
bool isSlice, bool isAssignment) {
254+
DuckdbSubscriptTransform(sbsref, indirection, pstate, isSlice, isAssignment, "duckdb.unresolved_type");
255+
}
256+
257+
void
258+
DuckdbUnresolvedTypeSubscriptExecSetup(const SubscriptingRef *sbsref, SubscriptingRefState *sbsrefstate,
259+
SubscriptExecSteps *methods) {
260+
DuckdbSubscriptExecSetup(sbsref, sbsrefstate, methods, "duckdb.unresolved_type");
261+
}
262+
263+
static SubscriptRoutines duckdb_unresolved_type_subscript_routines = {
264+
.transform = DuckdbUnresolvedTypeSubscriptTransform,
265+
.exec_setup = DuckdbUnresolvedTypeSubscriptExecSetup,
266+
.fetch_strict = false,
267+
.fetch_leakproof = true,
268+
.store_leakproof = true,
269+
};
270+
271+
void
272+
DuckdbStructSubscriptTransform(SubscriptingRef *sbsref, List *indirection, struct ParseState *pstate, bool isSlice,
273+
bool isAssignment) {
274+
DuckdbTextSubscriptTransform(sbsref, indirection, pstate, isSlice, isAssignment, "duckdb.struct");
275+
}
276+
277+
void
278+
DuckdbStructSubscriptExecSetup(const SubscriptingRef *sbsref, SubscriptingRefState *sbsrefstate,
279+
SubscriptExecSteps *methods) {
280+
DuckdbSubscriptExecSetup(sbsref, sbsrefstate, methods, "duckdb.struct");
281+
}
282+
283+
static SubscriptRoutines duckdb_struct_subscript_routines = {
284+
.transform = DuckdbStructSubscriptTransform,
285+
.exec_setup = DuckdbStructSubscriptExecSetup,
286+
.fetch_strict = false,
287+
.fetch_leakproof = true,
288+
.store_leakproof = true,
289+
};
290+
291+
void
292+
DuckdbMapSubscriptTransform(SubscriptingRef *sbsref, List *indirection, struct ParseState *pstate, bool isSlice,
293+
bool isAssignment) {
294+
DuckdbSubscriptTransform(sbsref, indirection, pstate, isSlice, isAssignment, "duckdb.map");
295+
}
296+
297+
void
298+
DuckdbMapSubscriptExecSetup(const SubscriptingRef *sbsref, SubscriptingRefState *sbsrefstate,
299+
SubscriptExecSteps *methods) {
300+
DuckdbSubscriptExecSetup(sbsref, sbsrefstate, methods, "duckdb.map");
301+
}
302+
303+
static SubscriptRoutines duckdb_map_subscript_routines = {
304+
.transform = DuckdbMapSubscriptTransform,
305+
.exec_setup = DuckdbMapSubscriptExecSetup,
306+
.fetch_strict = false,
307+
.fetch_leakproof = true,
308+
.store_leakproof = true,
309+
};
310+
311+
} // namespace pg
312+
313+
} // namespace pgduckdb
314+
315+
extern "C" {
316+
317+
DECLARE_PG_FUNCTION(duckdb_row_subscript) {
318+
PG_RETURN_POINTER(&pgduckdb::pg::duckdb_row_subscript_routines);
319+
}
320+
321+
DECLARE_PG_FUNCTION(duckdb_unresolved_type_subscript) {
322+
PG_RETURN_POINTER(&pgduckdb::pg::duckdb_unresolved_type_subscript_routines);
323+
}
324+
325+
DECLARE_PG_FUNCTION(duckdb_struct_subscript) {
326+
PG_RETURN_POINTER(&pgduckdb::pg::duckdb_struct_subscript_routines);
327+
}
328+
329+
DECLARE_PG_FUNCTION(duckdb_map_subscript) {
330+
PG_RETURN_POINTER(&pgduckdb::pg::duckdb_map_subscript_routines);
331+
}
332+
}

0 commit comments

Comments
 (0)