Snowflake-Labs
diff --git a/‎pg_lake_engine/include/pg_lake/pgduck/iceberg_datum_validation.h‎
Lines changed: 24 additions & 0 deletions b/‎pg_lake_engine/include/pg_lake/pgduck/iceberg_datum_validation.h‎
Lines changed: 24 additions & 0 deletions
diff --git a/‎pg_lake_engine/include/pg_lake/pgduck/iceberg_validation.h‎
Lines changed: 19 additions & 0 deletions b/‎pg_lake_engine/include/pg_lake/pgduck/iceberg_validation.h‎
Lines changed: 19 additions & 0 deletions
diff --git a/‎pg_lake_engine/src/init.c‎
Lines changed: 28 additions & 0 deletions b/‎pg_lake_engine/src/init.c‎
Lines changed: 28 additions & 0 deletions
@@ -41,3 +41,27 @@ extern PGDLLEXPORT Datum IcebergErrorOrClampDatum(Datum value, Oid typeOid,
 												  int32 typmod,
 												  IcebergOutOfRangePolicy policy,
 												  bool *isNull);
+
+/*
+ * IcebergSizeClampDatum truncates or NULLs a Datum so that string and
+ * binary values fit the byte limits expressed by
+ * pg_lake_engine.iceberg_max_string_bytes and
+ * pg_lake_engine.iceberg_max_binary_bytes (0 = no limit).
+ *
+ * Lossless types are truncated:
+ *   - text/varchar/bpchar  -> trimmed at a UTF-8 character boundary to
+ *                             iceberg_max_string_bytes.
+ *   - bytea                -> byte-truncated to iceberg_max_binary_bytes.
+ *
+ * Structured-string types are replaced with NULL via *isNull = true,
+ * since truncation would corrupt them:
+ *   - jsonb/json
+ *
+ * Recurses through arrays, composites, maps, and domains.  Nested values
+ * that would be NULLed are absorbed as NULL within the reconstructed
+ * container.
+ *
+ * If both GUCs are 0, the value is returned unchanged regardless of type.
+ */
+extern PGDLLEXPORT Datum IcebergSizeClampDatum(Datum value, Oid typeOid,
+											   int32 typmod, bool *isNull);
@@ -72,3 +72,22 @@ extern PGDLLEXPORT bool TypeNeedsIcebergValidation(Oid typeOid, int32 typmod,
 #define TEMPORAL_DATE_MIN_YEAR		(-4712)
 #define TEMPORAL_TIMESTAMP_MIN_YEAR	1
 #define TEMPORAL_MAX_YEAR			9999
+
+/*
+ * Downstream byte limits for values written to Iceberg tables, set via the
+ * pg_lake_engine.iceberg_max_string_bytes and
+ * pg_lake_engine.iceberg_max_binary_bytes GUCs.  0 means no limit.  These
+ * caps are imposed by some downstream consumers (e.g. Snowflake VARCHAR
+ * 16 MiB / BINARY 8 MiB) and applied via IcebergSizeClampDatum.
+ */
+extern PGDLLEXPORT int IcebergMaxStringBytes;
+extern PGDLLEXPORT int IcebergMaxBinaryBytes;
+
+/*
+ * TypeNeedsIcebergSizeClamping returns true if a Datum of typeOid (or any
+ * lossless string / structured-string / bytea component nested within it)
+ * could potentially be size-clamped by IcebergSizeClampDatum.  Recurses
+ * through arrays, composites, maps, and domains.  Independent of the
+ * current GUC values.
+ */
+extern PGDLLEXPORT bool TypeNeedsIcebergSizeClamping(Oid typeOid);
@@ -44,6 +44,7 @@
 #include "pg_extension_base/pg_extension_base_ids.h"
 #include "pg_lake/pgduck/cache_worker.h"
 #include "pg_lake/pgduck/client.h"
+#include "pg_lake/pgduck/iceberg_validation.h"
 #include "pg_lake/util/s3_writer_utils.h"
 #include "utils/guc.h"
 
@@ -186,6 +187,33 @@ _PG_init(void)
 							0,
 							NULL, NULL, NULL);
 
+	DefineCustomIntVariable("pg_lake_engine.iceberg_max_string_bytes",
+							gettext_noop("Maximum bytes for string values written to "
+										 "Iceberg tables. Values of text/varchar/bpchar "
+										 "exceeding this size are truncated at a UTF-8 "
+										 "character boundary; values of jsonb/json are "
+										 "replaced with NULL since truncation would "
+										 "corrupt the structure. 0 disables the limit. "
+										 "Intended for downstream consumers (e.g. "
+										 "Snowflake) that impose per-column byte caps."),
+							NULL,
+							&IcebergMaxStringBytes,
+							0, 0, INT_MAX,
+							PGC_USERSET,
+							GUC_UNIT_BYTE,
+							NULL, NULL, NULL);
+
+	DefineCustomIntVariable("pg_lake_engine.iceberg_max_binary_bytes",
+							gettext_noop("Maximum bytes for bytea values written to "
+										 "Iceberg tables. Values exceeding this size are "
+										 "byte-truncated. 0 disables the limit."),
+							NULL,
+							&IcebergMaxBinaryBytes,
+							0, 0, INT_MAX,
+							PGC_USERSET,
+							GUC_UNIT_BYTE,
+							NULL, NULL, NULL);
+
 	DefineCustomStringVariable(
 							   "pg_lake.stage_location",
 							   gettext_noop("Base URL for @STAGE/ resolution in paths"),