@@ -110,6 +110,123 @@ inline void AtanhPG(DataChunk &args, ExpressionState &state, Vector &result)
110110}
111111
112112
113+ /*
114+ * Extract timestamp from UUID, mimicking Postgres behavior.
115+ * Postgres supports UUID v1 and v7 (above 18), returning NULL for other versions.
116+ * DuckDB's uuid_extract_timestamp only supports v7 and throws an error for others.
117+ */
118+ inline void UUIDExtractTimestampPG (DataChunk &args, ExpressionState &state, Vector &result)
119+ {
120+ D_ASSERT (args.ColumnCount () == 2 );
121+ auto &input_vector = args.data [0 ];
122+ auto &version_vector = args.data [1 ];
123+ auto count = args.size ();
124+
125+ // Prepare input in unified format
126+ UnifiedVectorFormat vdata;
127+ input_vector.ToUnifiedFormat (count, vdata);
128+
129+ UnifiedVectorFormat vdata_version;
130+ version_vector.ToUnifiedFormat (count, vdata_version);
131+
132+ auto input_data = UnifiedVectorFormat::GetData<hugeint_t >(vdata);
133+ auto pg_version_data = UnifiedVectorFormat::GetData<int32_t >(vdata_version);
134+ auto result_data = FlatVector::GetData<timestamp_t >(result);
135+ auto &result_validity = FlatVector::Validity (result);
136+
137+ for (idx_t i = 0 ; i < count; i++) {
138+ auto idx = vdata.sel ->get_index (i);
139+ auto version_idx = vdata_version.sel ->get_index (i);
140+
141+ // Propagate input NULL
142+ if (!vdata.validity .RowIsValid (idx)) {
143+ result_validity.SetInvalid (i);
144+ continue ;
145+ }
146+
147+ auto uuid_val = input_data[idx];
148+ auto pg_version = pg_version_data[version_idx];
149+
150+ // Check if RFC 4122 variant (bits 10xxxxxx in the variant field)
151+ // The variant field is in byte 8 (counting from 0)
152+ uint8_t variant_byte = static_cast <uint8_t >((uuid_val.lower >> 56 ) & 0xFF );
153+ if ((variant_byte & 0xc0 ) != 0x80 ) {
154+ // Not RFC 4122 variant, return NULL
155+ result_validity.SetInvalid (i);
156+ continue ;
157+ }
158+
159+ // Extract version (first 4 bits of byte 6)
160+ // DuckDB stores UUID with XOR flip on the upper 64 bits, so we need to undo it
161+ uint64_t unsigned_upper = static_cast <uint64_t >(uuid_val.upper ) ^ (uint64_t (1 ) << 63 );
162+ uint8_t version = (static_cast <uint8_t >((unsigned_upper) >> 8 ) & 0xf0 ) >> 4 ;
163+
164+ if (version == 1 ) {
165+ // UUID v1: Extract timestamp from time_low, time_mid, and time_hi_and_version fields
166+ // Mimic PostgreSQL's uuid_extract_timestamp implementation
167+
168+ // Extract individual bytes from the UUID (bytes 0-7 are in upper)
169+ // We already have unsigned_upper computed above
170+ uint8_t data[8 ];
171+ for (int i = 0 ; i < 8 ; i++) {
172+ data[i] = static_cast <uint8_t >((unsigned_upper >> (56 - i * 8 )) & 0xFF );
173+ }
174+
175+ // Extract timestamp following PostgreSQL's exact logic
176+ // See: src/backend/utils/adt/uuid.c:uuid_extract_timestamp()
177+ uint64_t tms = ((uint64_t ) data[0 ] << 24 )
178+ + ((uint64_t ) data[1 ] << 16 )
179+ + ((uint64_t ) data[2 ] << 8 )
180+ + ((uint64_t ) data[3 ])
181+ + ((uint64_t ) data[4 ] << 40 )
182+ + ((uint64_t ) data[5 ] << 32 )
183+ + (((uint64_t ) data[6 ] & 0xf ) << 56 )
184+ + ((uint64_t ) data[7 ] << 48 );
185+
186+ // Convert 100-ns intervals to microseconds
187+ int64_t timestamp_us = static_cast <int64_t >(tms / 10 );
188+
189+ // Adjust from UUID epoch (1582-10-15) to Postgres epoch (2000-01-01)
190+ // This matches PostgreSQL's calculation exactly
191+ constexpr int64_t POSTGRES_EPOCH_JDATE = 2451545 ; // date2j(2000, 1, 1)
192+ constexpr int64_t UUIDV1_EPOCH_JDATE = 2299161 ; // date2j(1582, 10, 15)
193+ constexpr int64_t SECS_PER_DAY = 86400 ;
194+ constexpr int64_t USECS_PER_SEC = 1000000 ;
195+ constexpr int64_t UUID_TO_PG_EPOCH_US =
196+ (POSTGRES_EPOCH_JDATE - UUIDV1_EPOCH_JDATE) * SECS_PER_DAY * USECS_PER_SEC;
197+
198+ timestamp_us -= UUID_TO_PG_EPOCH_US;
199+
200+ // Convert from Postgres epoch (2000-01-01) to Unix epoch (1970-01-01)
201+ // Unix epoch is 946684800 seconds (30 years) before Postgres epoch
202+ // So we ADD this offset to convert from Postgres timestamp to Unix timestamp
203+ constexpr int64_t PG_TO_UNIX_EPOCH_US = 946684800LL * USECS_PER_SEC;
204+ timestamp_us += PG_TO_UNIX_EPOCH_US;
205+
206+ result_data[i] = timestamp_t {timestamp_us};
207+ }
208+ // UUID v7 is supported in Postgres 18 and above
209+ else if (version == 7 && pg_version >= 180000 ) {
210+ // UUID v7: Extract timestamp from first 48 bits (Unix timestamp in milliseconds)
211+ int64_t upper = uuid_val.upper ;
212+ // Flip the top byte to handle signed representation
213+ upper ^= NumericLimits<int64_t >::Minimum ();
214+ int64_t unix_ts_milli = upper >> 16 ;
215+
216+ // Convert milliseconds to microseconds
217+ constexpr int64_t kMilliToMicro = 1000 ;
218+ int64_t unix_ts_us = kMilliToMicro * unix_ts_milli;
219+
220+ result_data[i] = timestamp_t {unix_ts_us};
221+ }
222+ else {
223+ // Not a timestamp-containing UUID version, return NULL
224+ result_validity.SetInvalid (i);
225+ }
226+ }
227+ }
228+
229+
113230/*
114231* Postgres and DuckDB have different behavior for the SUBSTRING function when
115232* the length or offset is negative. This function implements the Postgres
@@ -307,6 +424,9 @@ static void LoadInternal(ExtensionLoader &loader) {
307424 substr.AddFunction (ScalarFunction ({LogicalType::VARCHAR, LogicalType::BIGINT, LogicalType::BIGINT}, LogicalType::VARCHAR, SubstringPG));
308425 loader.RegisterFunction (substr);
309426
427+ auto uuid_extract_timestamp_pg = ScalarFunction (" uuid_extract_timestamp_pg" , {LogicalType::UUID, LogicalType::INTEGER}, LogicalType::TIMESTAMP_TZ, UUIDExtractTimestampPG);
428+ loader.RegisterFunction (uuid_extract_timestamp_pg);
429+
310430 PgLakeUtilityFunctions::RegisterFunctions (loader);
311431 PgLakeFileSystemFunctions::RegisterFunctions (loader);
312432
0 commit comments