2525#include "postgres.h"
2626
2727#include "utils/builtins.h"
28+ #include "utils/lsyscache.h"
2829
2930#include "pg_lake/fdw/partition_pushdown.h"
3031#include "pg_lake/fdw/partition_transform.h"
3132#include "pg_lake/iceberg/api/partitioning.h"
3233#include "pg_lake/iceberg/manifest_spec.h"
3334
34- /* prefix for synthetic partition columns in COPY TO queries */
35- #define PARTITION_COLUMN_PREFIX "__part_"
36-
3735
3836static char * PartitionTransformToDuckDBExpression (IcebergPartitionTransform * transform );
3937
@@ -87,13 +85,21 @@ PartitionTransformToDuckDBExpression(IcebergPartitionTransform * transform)
8785 case PARTITION_TRANSFORM_IDENTITY :
8886 {
8987 /*
90- * Identity partitions use the column value directly. DuckDB
91- * writes the value in its default text format into the
92- * Hive-style path (e.g. __part_0=2025-01-15), and
93- * DeserializePartitionValueFromPGText parses it back using
94- * the transform's result PG type.
88+ * Identity partitions use the column value directly for
89+ * non-temporal types. For date/timestamp types, we produce
90+ * epoch integers to avoid DuckDB text formatting issues (e.g.
91+ * BC dates formatted as "4713-01-01 (BC)").
92+ *
93+ * ParsePartitionValuesFromPath uses
94+ * DeserializePartitionValueFromEpochInteger to convert epoch
95+ * integers back to Iceberg binary.
9596 */
96- return psprintf ("%s" , col );
97+ if (typeOid == DATEOID )
98+ return psprintf ("datediff('day', date '1970-01-01', %s::date)" , col );
99+ else if (typeOid == TIMESTAMPOID || typeOid == TIMESTAMPTZOID )
100+ return psprintf ("epoch_us(%s)" , col );
101+ else
102+ return psprintf ("%s" , col );
97103 }
98104
99105 case PARTITION_TRANSFORM_YEAR :
@@ -144,6 +150,85 @@ GetPartitionByExpressions(List *transforms)
144150}
145151
146152
153+ /*
154+ * HexDigitToInt converts a hex character ('0'-'9', 'A'-'F', 'a'-'f') to its
155+ * integer value (0-15). Returns -1 for invalid characters.
156+ */
157+ static int
158+ HexDigitToInt (char c )
159+ {
160+ if (c >= '0' && c <= '9' )
161+ return c - '0' ;
162+ if (c >= 'A' && c <= 'F' )
163+ return c - 'A' + 10 ;
164+ if (c >= 'a' && c <= 'f' )
165+ return c - 'a' + 10 ;
166+ return -1 ;
167+ }
168+
169+
170+ /*
171+ * UrlDecodePartitionValue decodes percent-encoded characters in a Hive-style
172+ * partition value (e.g. "1e%2B20" -> "1e+20").
173+ *
174+ * DuckDB percent-encodes special characters when writing partition directory
175+ * names. We must decode them before parsing the value.
176+ */
177+ static char *
178+ UrlDecodePartitionValue (const char * encoded )
179+ {
180+ int len = strlen (encoded );
181+ char * decoded = palloc (len + 1 );
182+ int j = 0 ;
183+
184+ for (int i = 0 ; i < len ; i ++ )
185+ {
186+ if (encoded [i ] == '%' && i + 2 < len )
187+ {
188+ int hi = HexDigitToInt (encoded [i + 1 ]);
189+ int lo = HexDigitToInt (encoded [i + 2 ]);
190+
191+ if (hi >= 0 && lo >= 0 )
192+ {
193+ decoded [j ++ ] = (char ) (hi * 16 + lo );
194+ i += 2 ;
195+ continue ;
196+ }
197+ }
198+ decoded [j ++ ] = encoded [i ];
199+ }
200+
201+ decoded [j ] = '\0' ;
202+ return decoded ;
203+ }
204+
205+
206+ /*
207+ * NormalizeDuckDBTextToPGText converts a DuckDB text representation of a value
208+ * to PostgreSQL's canonical text format by roundtripping through PG's type I/O.
209+ *
210+ * DuckDB may format values differently from PG (e.g. "1.0" vs "1" for numeric,
211+ * "-0.0" vs "-0" for float8). This normalization ensures the text matches what
212+ * DeserializePartitionValueFromPGText expects for its roundtrip assertion.
213+ */
214+ static char *
215+ NormalizeDuckDBTextToPGText (const char * duckdbText , Oid resultTypeOid ,
216+ int32 resultTypeMod )
217+ {
218+ Oid typoinput ;
219+ Oid typioparam ;
220+ Oid typoutput ;
221+ bool typIsVarlena ;
222+
223+ getTypeInputInfo (resultTypeOid , & typoinput , & typioparam );
224+ Datum d = OidInputFunctionCall (typoinput , (char * ) duckdbText ,
225+ typioparam , resultTypeMod );
226+
227+ getTypeOutputInfo (resultTypeOid , & typoutput , & typIsVarlena );
228+ return OidOutputFunctionCall (typoutput , d );
229+ }
230+
231+
147232/*
148233 * ParsePartitionValuesFromPath extracts partition values from the Hive-style
149234 * directory path produced by DuckDB COPY TO with PARTITION_BY.
@@ -168,7 +253,7 @@ ParsePartitionValuesFromPath(char *filePath, List *transforms)
168253 IcebergPartitionTransform * transform = list_nth (transforms , partIndex );
169254
170255 /* build the search key: "__part_N=" */
171- char * searchKey = psprintf (PARTITION_COLUMN_PREFIX " %d=" , partIndex );
256+ char * searchKey = psprintf ("__part_ %d=" , partIndex );
172257 int searchKeyLen = strlen (searchKey );
173258
174259 /* find this key in the path */
@@ -190,6 +275,9 @@ ParsePartitionValuesFromPath(char *filePath, List *transforms)
190275
191276 char * valueText = pnstrdup (valueStart , valueLen );
192277
278+ /* URL-decode (DuckDB percent-encodes special chars in Hive paths) */
279+ valueText = UrlDecodePartitionValue (valueText );
280+
193281 /* populate the partition field */
194282 PartitionField * field = & partition -> fields [partIndex ];
195283
@@ -203,12 +291,29 @@ ParsePartitionValuesFromPath(char *filePath, List *transforms)
203291 field -> value = NULL ;
204292 field -> value_length = 0 ;
205293 }
294+ else if (transform -> type == PARTITION_TRANSFORM_IDENTITY &&
295+ (transform -> pgType .postgresTypeOid == DATEOID ||
296+ transform -> pgType .postgresTypeOid == TIMESTAMPOID ||
297+ transform -> pgType .postgresTypeOid == TIMESTAMPTZOID ))
298+ {
299+ /*
300+ * Identity temporal types use epoch integers in the path (days
301+ * for date, microseconds for timestamp).
302+ */
303+ field -> value = DeserializePartitionValueFromEpochInteger (
304+ transform , valueText , & field -> value_length );
305+ }
206306 else
207307 {
208308 /*
209- * Convert the text value to Iceberg binary format. The text
210- * representation matches the transform's result PG type.
309+ * Normalize DuckDB text to PG canonical format (e.g. "1.0" -> "1"
310+ * for numeric) so the roundtrip assertion in
311+ * DeserializePartitionValueFromPGText passes.
211312 */
313+ valueText = NormalizeDuckDBTextToPGText (valueText ,
314+ transform -> resultPgType .postgresTypeOid ,
315+ transform -> resultPgType .postgresTypeMod );
316+
212317 field -> value = DeserializePartitionValueFromPGText (
213318 transform , valueText , & field -> value_length );
214319 }
0 commit comments