@@ -109,6 +109,123 @@ inline void AtanhPG(DataChunk &args, ExpressionState &state, Vector &result)
109109}
110110
111111
112+ /*
113+ * Extract timestamp from UUID, mimicking Postgres behavior.
114+ * Postgres supports UUID v1 and v7 (above 18), returning NULL for other versions.
115+ * DuckDB's uuid_extract_timestamp only supports v7 and throws an error for others.
116+ */
117+ inline void UUIDExtractTimestampPG (DataChunk &args, ExpressionState &state, Vector &result)
118+ {
119+ D_ASSERT (args.ColumnCount () == 2 );
120+ auto &input_vector = args.data [0 ];
121+ auto &version_vector = args.data [1 ];
122+ auto count = args.size ();
123+
124+ // Prepare input in unified format
125+ UnifiedVectorFormat vdata;
126+ input_vector.ToUnifiedFormat (count, vdata);
127+
128+ UnifiedVectorFormat vdata_version;
129+ version_vector.ToUnifiedFormat (count, vdata_version);
130+
131+ auto input_data = UnifiedVectorFormat::GetData<hugeint_t >(vdata);
132+ auto pg_version_data = UnifiedVectorFormat::GetData<int32_t >(vdata_version);
133+ auto result_data = FlatVector::GetData<timestamp_t >(result);
134+ auto &result_validity = FlatVector::Validity (result);
135+
136+ for (idx_t i = 0 ; i < count; i++) {
137+ auto idx = vdata.sel ->get_index (i);
138+ auto version_idx = vdata_version.sel ->get_index (i);
139+
140+ // Propagate input NULL
141+ if (!vdata.validity .RowIsValid (idx)) {
142+ result_validity.SetInvalid (i);
143+ continue ;
144+ }
145+
146+ auto uuid_val = input_data[idx];
147+ auto pg_version = pg_version_data[version_idx];
148+
149+ // Check if RFC 4122 variant (bits 10xxxxxx in the variant field)
150+ // The variant field is in byte 8 (counting from 0)
151+ uint8_t variant_byte = static_cast <uint8_t >((uuid_val.lower >> 56 ) & 0xFF );
152+ if ((variant_byte & 0xc0 ) != 0x80 ) {
153+ // Not RFC 4122 variant, return NULL
154+ result_validity.SetInvalid (i);
155+ continue ;
156+ }
157+
158+ // Extract version (first 4 bits of byte 6)
159+ // DuckDB stores UUID with XOR flip on the upper 64 bits, so we need to undo it
160+ uint64_t unsigned_upper = static_cast <uint64_t >(uuid_val.upper ) ^ (uint64_t (1 ) << 63 );
161+ uint8_t version = (static_cast <uint8_t >((unsigned_upper) >> 8 ) & 0xf0 ) >> 4 ;
162+
163+ if (version == 1 ) {
164+ // UUID v1: Extract timestamp from time_low, time_mid, and time_hi_and_version fields
165+ // Mimic PostgreSQL's uuid_extract_timestamp implementation
166+
167+ // Extract individual bytes from the UUID (bytes 0-7 are in upper)
168+ // We already have unsigned_upper computed above
169+ uint8_t data[8 ];
170+ for (int j = 0 ; j < 8 ; j++) {
171+ data[j] = static_cast <uint8_t >((unsigned_upper >> (56 - j * 8 )) & 0xFF );
172+ }
173+
174+ // Extract timestamp following PostgreSQL's exact logic
175+ // See: src/backend/utils/adt/uuid.c:uuid_extract_timestamp()
176+ uint64_t tms = ((uint64_t ) data[0 ] << 24 )
177+ + ((uint64_t ) data[1 ] << 16 )
178+ + ((uint64_t ) data[2 ] << 8 )
179+ + ((uint64_t ) data[3 ])
180+ + ((uint64_t ) data[4 ] << 40 )
181+ + ((uint64_t ) data[5 ] << 32 )
182+ + (((uint64_t ) data[6 ] & 0xf ) << 56 )
183+ + ((uint64_t ) data[7 ] << 48 );
184+
185+ // Convert 100-ns intervals to microseconds
186+ int64_t timestamp_us = static_cast <int64_t >(tms / 10 );
187+
188+ // Adjust from UUID epoch (1582-10-15) to Postgres epoch (2000-01-01)
189+ // This matches PostgreSQL's calculation exactly
190+ constexpr int64_t POSTGRES_EPOCH_JDATE = 2451545 ; // date2j(2000, 1, 1)
191+ constexpr int64_t UUIDV1_EPOCH_JDATE = 2299161 ; // date2j(1582, 10, 15)
192+ constexpr int64_t SECS_PER_DAY = 86400 ;
193+ constexpr int64_t USECS_PER_SEC = 1000000 ;
194+ constexpr int64_t UUID_TO_PG_EPOCH_US =
195+ (POSTGRES_EPOCH_JDATE - UUIDV1_EPOCH_JDATE) * SECS_PER_DAY * USECS_PER_SEC;
196+
197+ timestamp_us -= UUID_TO_PG_EPOCH_US;
198+
199+ // Convert from Postgres epoch (2000-01-01) to Unix epoch (1970-01-01)
200+ // Unix epoch is 946684800 seconds (30 years) before Postgres epoch
201+ // So we ADD this offset to convert from Postgres timestamp to Unix timestamp
202+ constexpr int64_t PG_TO_UNIX_EPOCH_US = 946684800LL * USECS_PER_SEC;
203+ timestamp_us += PG_TO_UNIX_EPOCH_US;
204+
205+ result_data[i] = timestamp_t {timestamp_us};
206+ }
207+ // UUID v7 is supported in Postgres 18 and above
208+ else if (version == 7 && pg_version >= 180000 ) {
209+ // UUID v7: Extract timestamp from first 48 bits (Unix timestamp in milliseconds)
210+ int64_t upper = uuid_val.upper ;
211+ // Flip the top byte to handle signed representation
212+ upper ^= NumericLimits<int64_t >::Minimum ();
213+ int64_t unix_ts_milli = upper >> 16 ;
214+
215+ // Convert milliseconds to microseconds
216+ constexpr int64_t kMilliToMicro = 1000 ;
217+ int64_t unix_ts_us = kMilliToMicro * unix_ts_milli;
218+
219+ result_data[i] = timestamp_t {unix_ts_us};
220+ }
221+ else {
222+ // Not a timestamp-containing UUID version, return NULL
223+ result_validity.SetInvalid (i);
224+ }
225+ }
226+ }
227+
228+
112229/*
113230 * InitcapPG implements the Postgres initcap(text) function for the
114231 * C collation.
@@ -348,6 +465,9 @@ static void LoadInternal(ExtensionLoader &loader) {
348465 substr.AddFunction (ScalarFunction ({LogicalType::VARCHAR, LogicalType::BIGINT, LogicalType::BIGINT}, LogicalType::VARCHAR, SubstringPG));
349466 loader.RegisterFunction (substr);
350467
468+ auto uuid_extract_timestamp_pg = ScalarFunction (" uuid_extract_timestamp_pg" , {LogicalType::UUID, LogicalType::INTEGER}, LogicalType::TIMESTAMP_TZ, UUIDExtractTimestampPG);
469+ loader.RegisterFunction (uuid_extract_timestamp_pg);
470+
351471 PgLakeUtilityFunctions::RegisterFunctions (loader);
352472 PgLakeFileSystemFunctions::RegisterFunctions (loader);
353473
0 commit comments