Skip to content

Commit 969d1a5

Browse files
Pushdown uuid functions
Signed-off-by: Aykut Bozkurt <aykut.bozkurt@snowflake.com>
1 parent 2f3c6e8 commit 969d1a5

6 files changed

Lines changed: 389 additions & 1 deletion

File tree

duckdb_pglake/src/duckdb_pglake_extension.cpp

Lines changed: 120 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,123 @@ inline void AtanhPG(DataChunk &args, ExpressionState &state, Vector &result)
110110
}
111111

112112

113+
/*
114+
* Extract timestamp from UUID, mimicking Postgres behavior.
115+
* Postgres supports UUID v1 and v7 (above 18), returning NULL for other versions.
116+
* DuckDB's uuid_extract_timestamp only supports v7 and throws an error for others.
117+
*/
118+
inline void UUIDExtractTimestampPG(DataChunk &args, ExpressionState &state, Vector &result)
119+
{
120+
D_ASSERT(args.ColumnCount() == 2);
121+
auto &input_vector = args.data[0];
122+
auto &version_vector = args.data[1];
123+
auto count = args.size();
124+
125+
// Prepare input in unified format
126+
UnifiedVectorFormat vdata;
127+
input_vector.ToUnifiedFormat(count, vdata);
128+
129+
UnifiedVectorFormat vdata_version;
130+
version_vector.ToUnifiedFormat(count, vdata_version);
131+
132+
auto input_data = UnifiedVectorFormat::GetData<hugeint_t>(vdata);
133+
auto pg_version_data = UnifiedVectorFormat::GetData<int32_t>(vdata_version);
134+
auto result_data = FlatVector::GetData<timestamp_t>(result);
135+
auto &result_validity = FlatVector::Validity(result);
136+
137+
for (idx_t i = 0; i < count; i++) {
138+
auto idx = vdata.sel->get_index(i);
139+
140+
// Propagate input NULL
141+
if (!vdata.validity.RowIsValid(idx)) {
142+
result_validity.SetInvalid(i);
143+
continue;
144+
}
145+
146+
auto uuid_val = input_data[idx];
147+
auto pg_version = pg_version_data[idx];
148+
149+
// Check if RFC 4122 variant (bits 10xxxxxx in the variant field)
150+
// The variant field is in byte 8 (counting from 0)
151+
uint8_t variant_byte = static_cast<uint8_t>((uuid_val.lower >> 56) & 0xFF);
152+
if ((variant_byte & 0xc0) != 0x80) {
153+
// Not RFC 4122 variant, return NULL
154+
result_validity.SetInvalid(i);
155+
continue;
156+
}
157+
158+
// Extract version (first 4 bits of byte 6)
159+
uint8_t version = (static_cast<uint8_t>((uuid_val.upper) >> 8) & 0xf0) >> 4;
160+
161+
if (version == 1) {
162+
// UUID v1: Extract timestamp from time_low, time_mid, and time_hi_and_version fields
163+
// Mimic PostgreSQL's uuid_extract_timestamp implementation
164+
165+
// DuckDB stores UUID with XOR flip on the upper 64 bits, so we need to undo it
166+
// to get the original byte values
167+
uint64_t unsigned_upper = static_cast<uint64_t>(uuid_val.upper) ^ (uint64_t(1) << 63);
168+
169+
// Extract individual bytes from the UUID (bytes 0-7 are in upper)
170+
uint8_t data[8];
171+
for (int i = 0; i < 8; i++) {
172+
data[i] = static_cast<uint8_t>((unsigned_upper >> (56 - i * 8)) & 0xFF);
173+
}
174+
175+
// Extract timestamp following PostgreSQL's exact logic
176+
// See: src/backend/utils/adt/uuid.c:uuid_extract_timestamp()
177+
uint64_t tms = ((uint64_t) data[0] << 24)
178+
+ ((uint64_t) data[1] << 16)
179+
+ ((uint64_t) data[2] << 8)
180+
+ ((uint64_t) data[3])
181+
+ ((uint64_t) data[4] << 40)
182+
+ ((uint64_t) data[5] << 32)
183+
+ (((uint64_t) data[6] & 0xf) << 56)
184+
+ ((uint64_t) data[7] << 48);
185+
186+
// Convert 100-ns intervals to microseconds
187+
int64_t timestamp_us = static_cast<int64_t>(tms / 10);
188+
189+
// Adjust from UUID epoch (1582-10-15) to Postgres epoch (2000-01-01)
190+
// This matches PostgreSQL's calculation exactly
191+
constexpr int64_t POSTGRES_EPOCH_JDATE = 2451545; // date2j(2000, 1, 1)
192+
constexpr int64_t UUIDV1_EPOCH_JDATE = 2299161; // date2j(1582, 10, 15)
193+
constexpr int64_t SECS_PER_DAY = 86400;
194+
constexpr int64_t USECS_PER_SEC = 1000000;
195+
constexpr int64_t UUID_TO_PG_EPOCH_US =
196+
(POSTGRES_EPOCH_JDATE - UUIDV1_EPOCH_JDATE) * SECS_PER_DAY * USECS_PER_SEC;
197+
198+
timestamp_us -= UUID_TO_PG_EPOCH_US;
199+
200+
// Convert from Postgres epoch (2000-01-01) to Unix epoch (1970-01-01)
201+
// Unix epoch is 946684800 seconds (30 years) before Postgres epoch
202+
// So we ADD this offset to convert from Postgres timestamp to Unix timestamp
203+
constexpr int64_t PG_TO_UNIX_EPOCH_US = 946684800LL * USECS_PER_SEC;
204+
timestamp_us += PG_TO_UNIX_EPOCH_US;
205+
206+
result_data[i] = timestamp_t{timestamp_us};
207+
}
208+
// UUID v7 is supported in Postgres 18 and above
209+
else if (version == 7 && pg_version >= 180000) {
210+
// UUID v7: Extract timestamp from first 48 bits (Unix timestamp in milliseconds)
211+
int64_t upper = uuid_val.upper;
212+
// Flip the top byte to handle signed representation
213+
upper ^= NumericLimits<int64_t>::Minimum();
214+
int64_t unix_ts_milli = upper >> 16;
215+
216+
// Convert milliseconds to microseconds
217+
constexpr int64_t kMilliToMicro = 1000;
218+
int64_t unix_ts_us = kMilliToMicro * unix_ts_milli;
219+
220+
result_data[i] = timestamp_t{unix_ts_us};
221+
}
222+
else {
223+
// Not a timestamp-containing UUID version, return NULL
224+
result_validity.SetInvalid(i);
225+
}
226+
}
227+
}
228+
229+
113230
/*
114231
* Postgres and DuckDB have different behavior for the SUBSTRING function when
115232
* the length or offset is negative. This function implements the Postgres
@@ -307,6 +424,9 @@ static void LoadInternal(ExtensionLoader &loader) {
307424
substr.AddFunction(ScalarFunction({LogicalType::VARCHAR, LogicalType::BIGINT, LogicalType::BIGINT}, LogicalType::VARCHAR, SubstringPG));
308425
loader.RegisterFunction(substr);
309426

427+
auto uuid_extract_timestamp_pg = ScalarFunction("uuid_extract_timestamp_pg", {LogicalType::UUID, LogicalType::INTEGER}, LogicalType::TIMESTAMP_TZ, UUIDExtractTimestampPG);
428+
loader.RegisterFunction(uuid_extract_timestamp_pg);
429+
310430
PgLakeUtilityFunctions::RegisterFunctions(loader);
311431
PgLakeFileSystemFunctions::RegisterFunctions(loader);
312432

pg_lake_engine/pg_lake_engine--3.0--3.1.sql

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,3 +41,10 @@ WITH text_text_map_name AS
4141
(SELECT map_type.create('TEXT','TEXT') AS name)
4242
SELECT map_type.create('TEXT', name) AS text_map_of_text
4343
FROM text_text_map_name;
44+
45+
46+
CREATE FUNCTION __lake__internal__nsp__.uuid_extract_timestamp_pg(uuid, integer)
47+
RETURNS timestamp with time zone
48+
LANGUAGE C
49+
IMMUTABLE PARALLEL SAFE STRICT
50+
AS 'MODULE_PATHNAME', $function$pg_lake_internal_dummy_function$function$;

pg_lake_engine/src/pgduck/rewrite_query.c

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
#include "catalog/pg_operator.h"
2424
#include "catalog/pg_operator_d.h"
2525
#include "catalog/pg_proc.h"
26+
#include "catalog/pg_type.h"
2627
#include "common/hashfn.h"
2728
#include "pg_lake/extensions/pg_lake_spatial.h"
2829
#include "pg_lake/extensions/postgis.h"
@@ -145,6 +146,9 @@ static ExpressionRewriter GetOperatorRewriter(Oid functionId);
145146
static char *GetOperatorRewriteFunctionName(Oid functionId);
146147

147148
static Node *RewriteFuncExprBtrim(Node *node, void *context);
149+
#if PG_VERSION_NUM >= 170000
150+
static Node *RewriteFuncExprUuidExtractTimestamp(Node *node, void *context);
151+
#endif
148152
static Node *RewriteFunctionCallExpression(Oid functionId, Node *node, void *context);
149153
static Node *RewriteFuncExprCast(Node *node, void *context);
150154
static Node *RewriteFuncExprExtract(Node *node, void *context);
@@ -343,6 +347,13 @@ static FunctionCallRewriteRuleByName BuiltinFunctionCallRewriteRulesByName[] =
343347
"pg_catalog", "decode", RewriteFuncExprDecode, 0
344348
},
345349

350+
#if PG_VERSION_NUM >= 170000
351+
/* uuid functions */
352+
{
353+
"pg_catalog", "uuid_extract_timestamp", RewriteFuncExprUuidExtractTimestamp, 0
354+
},
355+
#endif
356+
346357
};
347358

348359
static OperatorRewriteRuleByName BuiltinOperatorRewriteRulesByName[] =
@@ -1638,6 +1649,45 @@ RewriteFuncExprDecode(Node *node, void *context)
16381649
}
16391650

16401651

1652+
#if PG_VERSION_NUM >= 170000
1653+
/*
1654+
* RewriteFuncExprUuidExtractTimestamp rewrites uuid_extract_timestamp(..) function calls
1655+
* to use the custom uuid_extract_timestamp_pg(..) function that mimics Postgres behavior.
1656+
*
1657+
* Postgres returns NULL for UUID versions that don't contain timestamps (v1 and v7 (>=pg18 ) only).
1658+
* DuckDB's uuid_extract_timestamp throws an error for non-v7 UUIDs.
1659+
*/
1660+
static Node *
1661+
RewriteFuncExprUuidExtractTimestamp(Node *node, void *context)
1662+
{
1663+
FuncExpr *funcExpr = castNode(FuncExpr, node);
1664+
int argCount = list_length(funcExpr->args);
1665+
1666+
/* uuid_extract_timestamp should have exactly 1 argument */
1667+
if (argCount != 1)
1668+
return node;
1669+
1670+
Node *firstArg = linitial(funcExpr->args);
1671+
1672+
if (exprType(firstArg) != UUIDOID)
1673+
return node;
1674+
1675+
/* add the second argument, the Postgres version number */
1676+
funcExpr->args = list_make2(firstArg, MakeIntConst(PG_VERSION_NUM));
1677+
argCount = 2;
1678+
1679+
/* Rewrite to uuid_extract_timestamp_pg(uuid) */
1680+
List *funcName = list_make2(makeString(PG_LAKE_INTERNAL_NSP),
1681+
makeString("uuid_extract_timestamp_pg"));
1682+
Oid argTypes[] = {UUIDOID, INT4OID};
1683+
1684+
funcExpr->funcid = LookupFuncName(funcName, argCount, argTypes, false);
1685+
1686+
return (Node *) funcExpr;
1687+
}
1688+
#endif
1689+
1690+
16411691
/*
16421692
* RewriteFuncExprMapExtract rewrites map_type.extract(..) function calls
16431693
* into remote map_extract(map,key)[1] expressions

pg_lake_engine/src/pgduck/shippable_builtin_functions.c

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -443,6 +443,14 @@ static const PGDuckShippableFunction ShippableBuiltinProcs[] =
443443
{"encode", 'f', 2, {"bytea", "text"}, IsEncodeShippable},
444444
{"decode", 'f', 2, {"text", "text"}, IsDecodeShippable},
445445

446+
#if PG_VERSION_NUM >= 170000
447+
{"uuid_extract_version", 'f', 1, {"uuid"}, NULL},
448+
{"uuid_extract_timestamp", 'f', 1, {"uuid"}, NULL},
449+
#endif
450+
#if PG_VERSION_NUM >= 180000
451+
{"uuidv7", 'f', 0, {}, NULL},
452+
#endif
453+
446454
/* trim() */
447455
};
448456

pg_lake_spatial/tests/pytests/test_internal_schema.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,6 @@ def test_internal_schema(
2121
""",
2222
pg_conn,
2323
)[0][0]
24-
assert result == 63
24+
assert result == 64
2525

2626
pg_conn.rollback()

0 commit comments

Comments
 (0)