Skip to content

Commit d94b00d

Browse files
Pushdown uuid functions
Signed-off-by: Aykut Bozkurt <aykut.bozkurt@snowflake.com>
1 parent 62cb274 commit d94b00d

6 files changed

Lines changed: 388 additions & 1 deletion

File tree

duckdb_pglake/src/duckdb_pglake_extension.cpp

Lines changed: 120 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,123 @@ inline void AtanhPG(DataChunk &args, ExpressionState &state, Vector &result)
110110
}
111111

112112

113+
/*
114+
* Extract timestamp from UUID, mimicking Postgres behavior.
115+
* Postgres supports UUID v1 and v7 (above 18), returning NULL for other versions.
116+
* DuckDB's uuid_extract_timestamp only supports v7 and throws an error for others.
117+
*/
118+
inline void UUIDExtractTimestampPG(DataChunk &args, ExpressionState &state, Vector &result)
119+
{
120+
D_ASSERT(args.ColumnCount() == 2);
121+
auto &input_vector = args.data[0];
122+
auto &version_vector = args.data[1];
123+
auto count = args.size();
124+
125+
// Prepare input in unified format
126+
UnifiedVectorFormat vdata;
127+
input_vector.ToUnifiedFormat(count, vdata);
128+
129+
UnifiedVectorFormat vdata_version;
130+
version_vector.ToUnifiedFormat(count, vdata_version);
131+
132+
auto input_data = UnifiedVectorFormat::GetData<hugeint_t>(vdata);
133+
auto pg_version_data = UnifiedVectorFormat::GetData<int32_t>(vdata_version);
134+
auto result_data = FlatVector::GetData<timestamp_t>(result);
135+
auto &result_validity = FlatVector::Validity(result);
136+
137+
for (idx_t i = 0; i < count; i++) {
138+
auto idx = vdata.sel->get_index(i);
139+
auto version_idx = vdata_version.sel->get_index(i);
140+
141+
// Propagate input NULL
142+
if (!vdata.validity.RowIsValid(idx)) {
143+
result_validity.SetInvalid(i);
144+
continue;
145+
}
146+
147+
auto uuid_val = input_data[idx];
148+
auto pg_version = pg_version_data[version_idx];
149+
150+
// Check if RFC 4122 variant (bits 10xxxxxx in the variant field)
151+
// The variant field is in byte 8 (counting from 0)
152+
uint8_t variant_byte = static_cast<uint8_t>((uuid_val.lower >> 56) & 0xFF);
153+
if ((variant_byte & 0xc0) != 0x80) {
154+
// Not RFC 4122 variant, return NULL
155+
result_validity.SetInvalid(i);
156+
continue;
157+
}
158+
159+
// Extract version (first 4 bits of byte 6)
160+
// DuckDB stores UUID with XOR flip on the upper 64 bits, so we need to undo it
161+
uint64_t unsigned_upper = static_cast<uint64_t>(uuid_val.upper) ^ (uint64_t(1) << 63);
162+
uint8_t version = (static_cast<uint8_t>((unsigned_upper) >> 8) & 0xf0) >> 4;
163+
164+
if (version == 1) {
165+
// UUID v1: Extract timestamp from time_low, time_mid, and time_hi_and_version fields
166+
// Mimic PostgreSQL's uuid_extract_timestamp implementation
167+
168+
// Extract individual bytes from the UUID (bytes 0-7 are in upper)
169+
// We already have unsigned_upper computed above
170+
uint8_t data[8];
171+
for (int i = 0; i < 8; i++) {
172+
data[i] = static_cast<uint8_t>((unsigned_upper >> (56 - i * 8)) & 0xFF);
173+
}
174+
175+
// Extract timestamp following PostgreSQL's exact logic
176+
// See: src/backend/utils/adt/uuid.c:uuid_extract_timestamp()
177+
uint64_t tms = ((uint64_t) data[0] << 24)
178+
+ ((uint64_t) data[1] << 16)
179+
+ ((uint64_t) data[2] << 8)
180+
+ ((uint64_t) data[3])
181+
+ ((uint64_t) data[4] << 40)
182+
+ ((uint64_t) data[5] << 32)
183+
+ (((uint64_t) data[6] & 0xf) << 56)
184+
+ ((uint64_t) data[7] << 48);
185+
186+
// Convert 100-ns intervals to microseconds
187+
int64_t timestamp_us = static_cast<int64_t>(tms / 10);
188+
189+
// Adjust from UUID epoch (1582-10-15) to Postgres epoch (2000-01-01)
190+
// This matches PostgreSQL's calculation exactly
191+
constexpr int64_t POSTGRES_EPOCH_JDATE = 2451545; // date2j(2000, 1, 1)
192+
constexpr int64_t UUIDV1_EPOCH_JDATE = 2299161; // date2j(1582, 10, 15)
193+
constexpr int64_t SECS_PER_DAY = 86400;
194+
constexpr int64_t USECS_PER_SEC = 1000000;
195+
constexpr int64_t UUID_TO_PG_EPOCH_US =
196+
(POSTGRES_EPOCH_JDATE - UUIDV1_EPOCH_JDATE) * SECS_PER_DAY * USECS_PER_SEC;
197+
198+
timestamp_us -= UUID_TO_PG_EPOCH_US;
199+
200+
// Convert from Postgres epoch (2000-01-01) to Unix epoch (1970-01-01)
201+
// Unix epoch is 946684800 seconds (30 years) before Postgres epoch
202+
// So we ADD this offset to convert from Postgres timestamp to Unix timestamp
203+
constexpr int64_t PG_TO_UNIX_EPOCH_US = 946684800LL * USECS_PER_SEC;
204+
timestamp_us += PG_TO_UNIX_EPOCH_US;
205+
206+
result_data[i] = timestamp_t{timestamp_us};
207+
}
208+
// UUID v7 is supported in Postgres 18 and above
209+
else if (version == 7 && pg_version >= 180000) {
210+
// UUID v7: Extract timestamp from first 48 bits (Unix timestamp in milliseconds)
211+
int64_t upper = uuid_val.upper;
212+
// Flip the top byte to handle signed representation
213+
upper ^= NumericLimits<int64_t>::Minimum();
214+
int64_t unix_ts_milli = upper >> 16;
215+
216+
// Convert milliseconds to microseconds
217+
constexpr int64_t kMilliToMicro = 1000;
218+
int64_t unix_ts_us = kMilliToMicro * unix_ts_milli;
219+
220+
result_data[i] = timestamp_t{unix_ts_us};
221+
}
222+
else {
223+
// Not a timestamp-containing UUID version, return NULL
224+
result_validity.SetInvalid(i);
225+
}
226+
}
227+
}
228+
229+
113230
/*
114231
* InitcapPG implements the Postgres initcap(text) function for the
115232
* C collation.
@@ -350,6 +467,9 @@ static void LoadInternal(ExtensionLoader &loader) {
350467
substr.AddFunction(ScalarFunction({LogicalType::VARCHAR, LogicalType::BIGINT, LogicalType::BIGINT}, LogicalType::VARCHAR, SubstringPG));
351468
loader.RegisterFunction(substr);
352469

470+
auto uuid_extract_timestamp_pg = ScalarFunction("uuid_extract_timestamp_pg", {LogicalType::UUID, LogicalType::INTEGER}, LogicalType::TIMESTAMP_TZ, UUIDExtractTimestampPG);
471+
loader.RegisterFunction(uuid_extract_timestamp_pg);
472+
353473
PgLakeUtilityFunctions::RegisterFunctions(loader);
354474
PgLakeFileSystemFunctions::RegisterFunctions(loader);
355475

pg_lake_engine/pg_lake_engine--3.2--3.3.sql

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,3 +10,9 @@ CREATE FUNCTION __lake__internal__nsp__.initcap_pg(text)
1010
LANGUAGE C
1111
IMMUTABLE PARALLEL SAFE STRICT
1212
AS 'MODULE_PATHNAME', $function$pg_lake_internal_dummy_function$function$;
13+
14+
CREATE FUNCTION __lake__internal__nsp__.uuid_extract_timestamp_pg(uuid, integer)
15+
RETURNS timestamp with time zone
16+
LANGUAGE C
17+
IMMUTABLE PARALLEL SAFE STRICT
18+
AS 'MODULE_PATHNAME', $function$pg_lake_internal_dummy_function$function$;

pg_lake_engine/src/pgduck/rewrite_query.c

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
#include "catalog/pg_operator.h"
2525
#include "catalog/pg_operator_d.h"
2626
#include "catalog/pg_proc.h"
27+
#include "catalog/pg_type.h"
2728
#include "common/hashfn.h"
2829
#include "pg_lake/extensions/pg_lake_spatial.h"
2930
#include "pg_lake/extensions/postgis.h"
@@ -146,6 +147,9 @@ static ExpressionRewriter GetOperatorRewriter(Oid functionId);
146147
static char *GetOperatorRewriteFunctionName(Oid functionId);
147148

148149
static Node *RewriteFuncExprBtrim(Node *node, void *context);
150+
#if PG_VERSION_NUM >= 170000
151+
static Node *RewriteFuncExprUuidExtractTimestamp(Node *node, void *context);
152+
#endif
149153
static Node *RewriteFunctionCallExpression(Oid functionId, Node *node, void *context);
150154
static Node *RewriteFuncExprCast(Node *node, void *context);
151155
static Node *RewriteFuncExprExtract(Node *node, void *context);
@@ -350,6 +354,13 @@ static FunctionCallRewriteRuleByName BuiltinFunctionCallRewriteRulesByName[] =
350354
"pg_catalog", "decode", RewriteFuncExprDecode, 0
351355
},
352356

357+
#if PG_VERSION_NUM >= 170000
358+
/* uuid functions */
359+
{
360+
"pg_catalog", "uuid_extract_timestamp", RewriteFuncExprUuidExtractTimestamp, 0
361+
},
362+
#endif
363+
353364
};
354365

355366
static OperatorRewriteRuleByName BuiltinOperatorRewriteRulesByName[] =
@@ -1689,6 +1700,45 @@ RewriteFuncExprDecode(Node *node, void *context)
16891700
}
16901701

16911702

1703+
#if PG_VERSION_NUM >= 170000
1704+
/*
1705+
* RewriteFuncExprUuidExtractTimestamp rewrites uuid_extract_timestamp(..) function calls
1706+
* to use the custom uuid_extract_timestamp_pg(..) function that mimics Postgres behavior.
1707+
*
1708+
* Postgres returns NULL for UUID versions that don't contain timestamps (v1 and v7 (>=pg18 ) only).
1709+
* DuckDB's uuid_extract_timestamp throws an error for non-v7 UUIDs.
1710+
*/
1711+
static Node *
1712+
RewriteFuncExprUuidExtractTimestamp(Node *node, void *context)
1713+
{
1714+
FuncExpr *funcExpr = castNode(FuncExpr, node);
1715+
int argCount = list_length(funcExpr->args);
1716+
1717+
/* uuid_extract_timestamp should have exactly 1 argument */
1718+
if (argCount != 1)
1719+
return node;
1720+
1721+
Node *firstArg = linitial(funcExpr->args);
1722+
1723+
if (exprType(firstArg) != UUIDOID)
1724+
return node;
1725+
1726+
/* add the second argument, the Postgres version number */
1727+
funcExpr->args = list_make2(firstArg, MakeIntConst(PG_VERSION_NUM));
1728+
argCount = 2;
1729+
1730+
/* Rewrite to uuid_extract_timestamp_pg(uuid) */
1731+
List *funcName = list_make2(makeString(PG_LAKE_INTERNAL_NSP),
1732+
makeString("uuid_extract_timestamp_pg"));
1733+
Oid argTypes[] = {UUIDOID, INT4OID};
1734+
1735+
funcExpr->funcid = LookupFuncName(funcName, argCount, argTypes, false);
1736+
1737+
return (Node *) funcExpr;
1738+
}
1739+
#endif
1740+
1741+
16921742
/*
16931743
* RewriteFuncExprMapExtract rewrites map_type.extract(..) function calls
16941744
* into remote map_extract(map,key)[1] expressions

pg_lake_engine/src/pgduck/shippable_builtin_functions.c

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -444,6 +444,14 @@ static const PGDuckShippableFunction ShippableBuiltinProcs[] =
444444
{"encode", 'f', 2, {"bytea", "text"}, IsEncodeShippable},
445445
{"decode", 'f', 2, {"text", "text"}, IsDecodeShippable},
446446

447+
#if PG_VERSION_NUM >= 170000
448+
{"uuid_extract_version", 'f', 1, {"uuid"}, NULL},
449+
{"uuid_extract_timestamp", 'f', 1, {"uuid"}, NULL},
450+
#endif
451+
#if PG_VERSION_NUM >= 180000
452+
{"uuidv7", 'f', 0, {}, NULL},
453+
#endif
454+
447455
/* trim() */
448456
};
449457

pg_lake_spatial/tests/pytests/test_internal_schema.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,6 @@ def test_internal_schema(
2121
""",
2222
pg_conn,
2323
)[0][0]
24-
assert result == 64
24+
assert result == 65
2525

2626
pg_conn.rollback()

0 commit comments

Comments
 (0)