Skip to content

Commit bee896e

Browse files
Pushdown uuid functions
Signed-off-by: Aykut Bozkurt <aykut.bozkurt@snowflake.com>
1 parent ed75a48 commit bee896e

6 files changed

Lines changed: 388 additions & 1 deletion

File tree

duckdb_pglake/src/duckdb_pglake_extension.cpp

Lines changed: 120 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,123 @@ inline void AtanhPG(DataChunk &args, ExpressionState &state, Vector &result)
109109
}
110110

111111

112+
/*
113+
* Extract timestamp from UUID, mimicking Postgres behavior.
114+
* Postgres supports UUID v1 and v7 (above 18), returning NULL for other versions.
115+
* DuckDB's uuid_extract_timestamp only supports v7 and throws an error for others.
116+
*/
117+
inline void UUIDExtractTimestampPG(DataChunk &args, ExpressionState &state, Vector &result)
118+
{
119+
D_ASSERT(args.ColumnCount() == 2);
120+
auto &input_vector = args.data[0];
121+
auto &version_vector = args.data[1];
122+
auto count = args.size();
123+
124+
// Prepare input in unified format
125+
UnifiedVectorFormat vdata;
126+
input_vector.ToUnifiedFormat(count, vdata);
127+
128+
UnifiedVectorFormat vdata_version;
129+
version_vector.ToUnifiedFormat(count, vdata_version);
130+
131+
auto input_data = UnifiedVectorFormat::GetData<hugeint_t>(vdata);
132+
auto pg_version_data = UnifiedVectorFormat::GetData<int32_t>(vdata_version);
133+
auto result_data = FlatVector::GetData<timestamp_t>(result);
134+
auto &result_validity = FlatVector::Validity(result);
135+
136+
for (idx_t i = 0; i < count; i++) {
137+
auto idx = vdata.sel->get_index(i);
138+
auto version_idx = vdata_version.sel->get_index(i);
139+
140+
// Propagate input NULL
141+
if (!vdata.validity.RowIsValid(idx)) {
142+
result_validity.SetInvalid(i);
143+
continue;
144+
}
145+
146+
auto uuid_val = input_data[idx];
147+
auto pg_version = pg_version_data[version_idx];
148+
149+
// Check if RFC 4122 variant (bits 10xxxxxx in the variant field)
150+
// The variant field is in byte 8 (counting from 0)
151+
uint8_t variant_byte = static_cast<uint8_t>((uuid_val.lower >> 56) & 0xFF);
152+
if ((variant_byte & 0xc0) != 0x80) {
153+
// Not RFC 4122 variant, return NULL
154+
result_validity.SetInvalid(i);
155+
continue;
156+
}
157+
158+
// Extract version (first 4 bits of byte 6)
159+
// DuckDB stores UUID with XOR flip on the upper 64 bits, so we need to undo it
160+
uint64_t unsigned_upper = static_cast<uint64_t>(uuid_val.upper) ^ (uint64_t(1) << 63);
161+
uint8_t version = (static_cast<uint8_t>((unsigned_upper) >> 8) & 0xf0) >> 4;
162+
163+
if (version == 1) {
164+
// UUID v1: Extract timestamp from time_low, time_mid, and time_hi_and_version fields
165+
// Mimic PostgreSQL's uuid_extract_timestamp implementation
166+
167+
// Extract individual bytes from the UUID (bytes 0-7 are in upper)
168+
// We already have unsigned_upper computed above
169+
uint8_t data[8];
170+
for (int j = 0; j < 8; j++) {
171+
data[j] = static_cast<uint8_t>((unsigned_upper >> (56 - j * 8)) & 0xFF);
172+
}
173+
174+
// Extract timestamp following PostgreSQL's exact logic
175+
// See: src/backend/utils/adt/uuid.c:uuid_extract_timestamp()
176+
uint64_t tms = ((uint64_t) data[0] << 24)
177+
+ ((uint64_t) data[1] << 16)
178+
+ ((uint64_t) data[2] << 8)
179+
+ ((uint64_t) data[3])
180+
+ ((uint64_t) data[4] << 40)
181+
+ ((uint64_t) data[5] << 32)
182+
+ (((uint64_t) data[6] & 0xf) << 56)
183+
+ ((uint64_t) data[7] << 48);
184+
185+
// Convert 100-ns intervals to microseconds
186+
int64_t timestamp_us = static_cast<int64_t>(tms / 10);
187+
188+
// Adjust from UUID epoch (1582-10-15) to Postgres epoch (2000-01-01)
189+
// This matches PostgreSQL's calculation exactly
190+
constexpr int64_t POSTGRES_EPOCH_JDATE = 2451545; // date2j(2000, 1, 1)
191+
constexpr int64_t UUIDV1_EPOCH_JDATE = 2299161; // date2j(1582, 10, 15)
192+
constexpr int64_t SECS_PER_DAY = 86400;
193+
constexpr int64_t USECS_PER_SEC = 1000000;
194+
constexpr int64_t UUID_TO_PG_EPOCH_US =
195+
(POSTGRES_EPOCH_JDATE - UUIDV1_EPOCH_JDATE) * SECS_PER_DAY * USECS_PER_SEC;
196+
197+
timestamp_us -= UUID_TO_PG_EPOCH_US;
198+
199+
// Convert from Postgres epoch (2000-01-01) to Unix epoch (1970-01-01)
200+
// Unix epoch is 946684800 seconds (30 years) before Postgres epoch
201+
// So we ADD this offset to convert from Postgres timestamp to Unix timestamp
202+
constexpr int64_t PG_TO_UNIX_EPOCH_US = 946684800LL * USECS_PER_SEC;
203+
timestamp_us += PG_TO_UNIX_EPOCH_US;
204+
205+
result_data[i] = timestamp_t{timestamp_us};
206+
}
207+
// UUID v7 is supported in Postgres 18 and above
208+
else if (version == 7 && pg_version >= 180000) {
209+
// UUID v7: Extract timestamp from first 48 bits (Unix timestamp in milliseconds)
210+
int64_t upper = uuid_val.upper;
211+
// Flip the top byte to handle signed representation
212+
upper ^= NumericLimits<int64_t>::Minimum();
213+
int64_t unix_ts_milli = upper >> 16;
214+
215+
// Convert milliseconds to microseconds
216+
constexpr int64_t kMilliToMicro = 1000;
217+
int64_t unix_ts_us = kMilliToMicro * unix_ts_milli;
218+
219+
result_data[i] = timestamp_t{unix_ts_us};
220+
}
221+
else {
222+
// Not a timestamp-containing UUID version, return NULL
223+
result_validity.SetInvalid(i);
224+
}
225+
}
226+
}
227+
228+
112229
/*
113230
* InitcapPG implements the Postgres initcap(text) function for the
114231
* C collation.
@@ -348,6 +465,9 @@ static void LoadInternal(ExtensionLoader &loader) {
348465
substr.AddFunction(ScalarFunction({LogicalType::VARCHAR, LogicalType::BIGINT, LogicalType::BIGINT}, LogicalType::VARCHAR, SubstringPG));
349466
loader.RegisterFunction(substr);
350467

468+
auto uuid_extract_timestamp_pg = ScalarFunction("uuid_extract_timestamp_pg", {LogicalType::UUID, LogicalType::INTEGER}, LogicalType::TIMESTAMP_TZ, UUIDExtractTimestampPG);
469+
loader.RegisterFunction(uuid_extract_timestamp_pg);
470+
351471
PgLakeUtilityFunctions::RegisterFunctions(loader);
352472
PgLakeFileSystemFunctions::RegisterFunctions(loader);
353473

pg_lake_engine/pg_lake_engine--3.2--3.3.sql

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,3 +10,9 @@ CREATE FUNCTION __lake__internal__nsp__.initcap_pg(text)
1010
LANGUAGE C
1111
IMMUTABLE PARALLEL SAFE STRICT
1212
AS 'MODULE_PATHNAME', $function$pg_lake_internal_dummy_function$function$;
13+
14+
CREATE FUNCTION __lake__internal__nsp__.uuid_extract_timestamp_pg(uuid, integer)
15+
RETURNS timestamp with time zone
16+
LANGUAGE C
17+
IMMUTABLE PARALLEL SAFE STRICT
18+
AS 'MODULE_PATHNAME', $function$pg_lake_internal_dummy_function$function$;

pg_lake_engine/src/pgduck/rewrite_query.c

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
#include "catalog/pg_operator.h"
2525
#include "catalog/pg_operator_d.h"
2626
#include "catalog/pg_proc.h"
27+
#include "catalog/pg_type.h"
2728
#include "common/hashfn.h"
2829
#include "pg_lake/extensions/pg_lake_spatial.h"
2930
#include "pg_lake/extensions/postgis.h"
@@ -146,6 +147,9 @@ static ExpressionRewriter GetOperatorRewriter(Oid functionId);
146147
static char *GetOperatorRewriteFunctionName(Oid functionId);
147148

148149
static Node *RewriteFuncExprBtrim(Node *node, void *context);
150+
#if PG_VERSION_NUM >= 170000
151+
static Node *RewriteFuncExprUuidExtractTimestamp(Node *node, void *context);
152+
#endif
149153
static Node *RewriteFunctionCallExpression(Oid functionId, Node *node, void *context);
150154
static Node *RewriteFuncExprCast(Node *node, void *context);
151155
static Node *RewriteFuncExprExtract(Node *node, void *context);
@@ -350,6 +354,13 @@ static FunctionCallRewriteRuleByName BuiltinFunctionCallRewriteRulesByName[] =
350354
"pg_catalog", "decode", RewriteFuncExprDecode, 0
351355
},
352356

357+
#if PG_VERSION_NUM >= 170000
358+
/* uuid functions */
359+
{
360+
"pg_catalog", "uuid_extract_timestamp", RewriteFuncExprUuidExtractTimestamp, 0
361+
},
362+
#endif
363+
353364
};
354365

355366
static OperatorRewriteRuleByName BuiltinOperatorRewriteRulesByName[] =
@@ -1689,6 +1700,45 @@ RewriteFuncExprDecode(Node *node, void *context)
16891700
}
16901701

16911702

1703+
#if PG_VERSION_NUM >= 170000
1704+
/*
1705+
* RewriteFuncExprUuidExtractTimestamp rewrites uuid_extract_timestamp(..) function calls
1706+
* to use the custom uuid_extract_timestamp_pg(..) function that mimics Postgres behavior.
1707+
*
1708+
* Postgres returns NULL for UUID versions that don't contain timestamps (v1 and v7 (>=pg18 ) only).
1709+
* DuckDB's uuid_extract_timestamp throws an error for non-v7 UUIDs.
1710+
*/
1711+
static Node *
1712+
RewriteFuncExprUuidExtractTimestamp(Node *node, void *context)
1713+
{
1714+
FuncExpr *funcExpr = castNode(FuncExpr, node);
1715+
int argCount = list_length(funcExpr->args);
1716+
1717+
/* uuid_extract_timestamp should have exactly 1 argument */
1718+
if (argCount != 1)
1719+
return node;
1720+
1721+
Node *firstArg = linitial(funcExpr->args);
1722+
1723+
if (exprType(firstArg) != UUIDOID)
1724+
return node;
1725+
1726+
/* add the second argument, the Postgres version number */
1727+
funcExpr->args = list_make2(firstArg, MakeIntConst(PG_VERSION_NUM));
1728+
argCount = 2;
1729+
1730+
/* Rewrite to uuid_extract_timestamp_pg(uuid) */
1731+
List *funcName = list_make2(makeString(PG_LAKE_INTERNAL_NSP),
1732+
makeString("uuid_extract_timestamp_pg"));
1733+
Oid argTypes[] = {UUIDOID, INT4OID};
1734+
1735+
funcExpr->funcid = LookupFuncName(funcName, argCount, argTypes, false);
1736+
1737+
return (Node *) funcExpr;
1738+
}
1739+
#endif
1740+
1741+
16921742
/*
16931743
* RewriteFuncExprMapExtract rewrites map_type.extract(..) function calls
16941744
* into remote map_extract(map,key)[1] expressions

pg_lake_engine/src/pgduck/shippable_builtin_functions.c

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -444,6 +444,14 @@ static const PGDuckShippableFunction ShippableBuiltinProcs[] =
444444
{"encode", 'f', 2, {"bytea", "text"}, IsEncodeShippable},
445445
{"decode", 'f', 2, {"text", "text"}, IsDecodeShippable},
446446

447+
#if PG_VERSION_NUM >= 170000
448+
{"uuid_extract_version", 'f', 1, {"uuid"}, NULL},
449+
{"uuid_extract_timestamp", 'f', 1, {"uuid"}, NULL},
450+
#endif
451+
#if PG_VERSION_NUM >= 180000
452+
{"uuidv7", 'f', 0, {}, NULL},
453+
#endif
454+
447455
/* trim() */
448456
};
449457

pg_lake_spatial/tests/pytests/test_internal_schema.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,6 @@ def test_internal_schema(
2121
""",
2222
pg_conn,
2323
)[0][0]
24-
assert result == 64
24+
assert result == 65
2525

2626
pg_conn.rollback()

0 commit comments

Comments
 (0)