Skip to content

Commit 7ee6596

Browse files
pg_lake_iceberg: refuse compatibility_mode rewrite of type-bound columns
A compatibility_mode rewrite swaps only a column's typeName. There is no implicit or assignment cast from uuid to text, so a nested-uuid column that also carries a DEFAULT / GENERATED / IDENTITY expression cannot be silently re-coerced the way numeric->double can. Rather than emit a broken column (or let core fail later with a confusing message), ErrorIfColumnRewriteUnsafe fails fast with an actionable error when a column we would actually rewrite carries such a clause. This is reachable only from user-authored DDL: callers that build Iceberg tables programmatically construct columns from type/typmod/collation alone and never attach these clauses, so for them it is a defensive invariant, not a reachable error path. Top-level uuid columns and uuid-free columns are never rewritten, so a clause on them is left untouched. Add CREATE and ALTER ADD COLUMN tests covering DEFAULT/GENERATED on a rewritten column (incl. a nested composite), plus the allowed cases. Co-authored-by: Cursor <cursoragent@cursor.com>
1 parent 199782d commit 7ee6596

2 files changed

Lines changed: 191 additions & 0 deletions

File tree

pg_lake_iceberg/src/iceberg/compatibility_mode.c

Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,16 @@
3535
* supplies the per-mode leaf rule. Out-of-range value handling, timetz
3636
* normalization, and numeric->double live elsewhere and compose on top of
3737
* this pass (each leaves the fields it does not touch alone).
38+
*
39+
* Rewriting a column means swapping only its typeName, which is sound only if
40+
* nothing else on the column is bound to the original type. Any column we
41+
* actually rewrite is therefore screened by ErrorIfColumnRewriteUnsafe, which
42+
* refuses columns carrying a DEFAULT / GENERATED / IDENTITY expression (there
43+
* is no implicit uuid->text cast for core to re-coerce them through). This is
44+
* reachable only from user-authored DDL: callers that build Iceberg tables
45+
* programmatically construct each column from type/typmod/collation alone and
46+
* never attach these clauses, so the check there is a defensive invariant, not
47+
* an error path users of those callers can hit.
3848
*/
3949

4050
#include "postgres.h"
@@ -136,6 +146,71 @@ NestedUuidToText(Oid typeOid, int32 typeMod, int level, void *context,
136146
}
137147

138148

149+
/*
150+
* ErrorIfColumnRewriteUnsafe refuses to rewrite a column whose declared type
151+
* carries a clause whose meaning is bound to that type.
152+
*
153+
* Changing a column's type underneath a DEFAULT or GENERATED expression is only
154+
* safe if the expression's result can be coerced to the new type the way the
155+
* core CREATE/ALTER path expects. numeric->double gets away with it because
156+
* numeric->float8 is an implicit cast; but there is NO implicit or assignment
157+
* cast from uuid to text, so a uuid-typed DEFAULT/GENERATED on a column we
158+
* rewrite to text would either change the stored semantics or (more often) fail
159+
* later with a confusing "default expression is of type uuid[]" error from
160+
* core. We fail fast with an actionable message instead.
161+
*
162+
* Identity is impossible here (it requires an integer type, never a column that
163+
* contains a uuid) but is covered defensively so future leaf rules stay safe.
164+
*
165+
* Only user-authored DDL can reach this: callers that generate Iceberg tables
166+
* programmatically build columns from type/typmod/collation alone and never
167+
* attach a DEFAULT / GENERATED / IDENTITY, so for them this is unreachable.
168+
*
169+
* At this pre-transform stage these clauses live as Constraint nodes in
170+
* columnDef->constraints; transformColumnDefinition() only fills the cooked
171+
* raw_default/generated/identity fields later, but we check those too in case
172+
* the pass is ever fed an already-transformed ColumnDef.
173+
*/
174+
static void
175+
ErrorIfColumnRewriteUnsafe(ColumnDef *columnDef)
176+
{
177+
const char *clause = NULL;
178+
ListCell *lc;
179+
180+
if (columnDef->raw_default != NULL || columnDef->cooked_default != NULL)
181+
clause = "a DEFAULT";
182+
else if (columnDef->generated != '\0')
183+
clause = "a GENERATED";
184+
else if (columnDef->identity != '\0')
185+
clause = "an IDENTITY";
186+
187+
foreach(lc, columnDef->constraints)
188+
{
189+
Constraint *con = lfirst_node(Constraint, lc);
190+
191+
if (con->contype == CONSTR_DEFAULT)
192+
clause = "a DEFAULT";
193+
else if (con->contype == CONSTR_GENERATED)
194+
clause = "a GENERATED";
195+
else if (con->contype == CONSTR_IDENTITY)
196+
clause = "an IDENTITY";
197+
}
198+
199+
if (clause == NULL)
200+
return;
201+
202+
ereport(ERROR,
203+
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
204+
errmsg("compatibility_mode cannot rewrite column \"%s\"",
205+
columnDef->colname),
206+
errdetail("The column has %s expression bound to its original type, "
207+
"which would no longer match once a nested uuid is "
208+
"rewritten to text.", clause),
209+
errhint("Drop the expression, or keep the uuid at the top level "
210+
"of the column (top-level uuid columns are left unchanged).")));
211+
}
212+
213+
139214
/*
140215
* ApplyIcebergTableCompatibilityModeForSchema rewrites the column TYPES of a
141216
* ColumnDef list in place according to the compatibility mode. Top-level
@@ -146,6 +221,10 @@ NestedUuidToText(Oid typeOid, int32 typeMod, int level, void *context,
146221
* ConvertTypeTree; this pass only supplies the per-mode leaf rule. The two run
147222
* as independent passes on the same list and compose (each leaves untouched the
148223
* fields the other rewrote).
224+
*
225+
* Swapping only typeName is sound only for columns that carry nothing else
226+
* bound to the original type, so any column we actually rewrite is first run
227+
* through ErrorIfColumnRewriteUnsafe.
149228
*/
150229
void
151230
ApplyIcebergTableCompatibilityModeForSchema(List *columnDefList,
@@ -196,6 +275,9 @@ ApplyIcebergTableCompatibilityModeForSchema(List *columnDefList,
196275

197276
if (ConvertTypeTree(typeOid, typmod, 0, leafConv, NULL,
198277
&convertedOid, &convertedMod))
278+
{
279+
ErrorIfColumnRewriteUnsafe(columnDef);
199280
columnDef->typeName = makeTypeNameFromOid(convertedOid, convertedMod);
281+
}
200282
}
201283
}

pg_lake_table/tests/pytests/test_compatibility_mode.py

Lines changed: 109 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -579,6 +579,115 @@ def test_alter_add_without_option_keeps_uuid(
579579
pg_conn.rollback()
580580

581581

582+
# ---------------------------------------------------------------------------
583+
# type-bound clauses on a rewritten column are refused
584+
#
585+
# Rewriting a column swaps only its type; there is no implicit uuid->text cast,
586+
# so a DEFAULT/GENERATED expression bound to the original (uuid) type cannot be
587+
# silently re-coerced. We fail fast instead of producing a broken column.
588+
# This is reachable only from user-authored DDL (programmatic Iceberg table
589+
# creation builds columns from type/typmod/collation alone, never these
590+
# clauses). A clause on a column we DON'T rewrite (top-level uuid, or a
591+
# uuid-free column) is fine.
592+
# ---------------------------------------------------------------------------
593+
594+
595+
def test_default_on_rewritten_column_rejected(
596+
s3, pg_conn, extension, with_default_location
597+
):
598+
error = run_command(
599+
"CREATE TABLE compat_def (id int, us uuid[] DEFAULT '{}'::uuid[]) "
600+
"USING iceberg WITH (compatibility_mode = 'snowflake');",
601+
pg_conn,
602+
raise_error=False,
603+
)
604+
assert "cannot rewrite column" in error
605+
assert "us" in error
606+
pg_conn.rollback()
607+
608+
609+
def test_generated_on_rewritten_column_rejected(
610+
s3, pg_conn, extension, with_default_location
611+
):
612+
error = run_command(
613+
"CREATE TABLE compat_gen "
614+
"(u uuid, g uuid[] GENERATED ALWAYS AS (ARRAY[u]) STORED) "
615+
"USING iceberg WITH (compatibility_mode = 'snowflake');",
616+
pg_conn,
617+
raise_error=False,
618+
)
619+
assert "cannot rewrite column" in error
620+
assert "g" in error
621+
pg_conn.rollback()
622+
623+
624+
def test_default_on_nested_composite_column_rejected(
625+
s3, pg_conn, extension, with_default_location
626+
):
627+
"""The clause is refused even when the uuid is buried inside a composite."""
628+
error = run_command(
629+
"""
630+
CREATE TYPE def_comp AS (cid uuid, note text);
631+
CREATE TABLE compat_def_comp
632+
(id int, c def_comp DEFAULT ROW(NULL, 'x')::def_comp)
633+
USING iceberg WITH (compatibility_mode = 'snowflake');
634+
""",
635+
pg_conn,
636+
raise_error=False,
637+
)
638+
assert "cannot rewrite column" in error
639+
pg_conn.rollback()
640+
641+
642+
def test_alter_add_default_on_rewritten_column_rejected(
643+
s3, pg_conn, extension, with_default_location
644+
):
645+
run_command(
646+
"CREATE TABLE compat_alter_def (id int) USING iceberg "
647+
"WITH (compatibility_mode = 'snowflake');",
648+
pg_conn,
649+
)
650+
pg_conn.commit()
651+
try:
652+
error = run_command(
653+
"ALTER TABLE compat_alter_def ADD COLUMN us uuid[] DEFAULT '{}'::uuid[];",
654+
pg_conn,
655+
raise_error=False,
656+
)
657+
assert "cannot rewrite column" in error
658+
assert "us" in error
659+
pg_conn.rollback()
660+
finally:
661+
run_command("DROP TABLE compat_alter_def", pg_conn)
662+
pg_conn.commit()
663+
664+
665+
def test_default_on_top_level_uuid_allowed(
666+
s3, pg_conn, extension, with_default_location
667+
):
668+
"""A top-level uuid is never rewritten, so a DEFAULT on it is fine."""
669+
run_command(
670+
"CREATE TABLE compat_top_def (id int, u uuid DEFAULT gen_random_uuid()) "
671+
"USING iceberg WITH (compatibility_mode = 'snowflake');",
672+
pg_conn,
673+
)
674+
assert _col_format_type(pg_conn, "compat_top_def", "u") == "uuid"
675+
pg_conn.rollback()
676+
677+
678+
def test_default_on_uuid_free_column_allowed(
679+
s3, pg_conn, extension, with_default_location
680+
):
681+
"""A DEFAULT on a column we don't rewrite is untouched by the guard."""
682+
run_command(
683+
"CREATE TABLE compat_clean_def (id int DEFAULT 7, note text DEFAULT 'x') "
684+
"USING iceberg WITH (compatibility_mode = 'snowflake');",
685+
pg_conn,
686+
)
687+
assert _col_format_type(pg_conn, "compat_clean_def", "id") == "integer"
688+
pg_conn.rollback()
689+
690+
582691
# ---------------------------------------------------------------------------
583692
# dropped-column lifecycle (data written on both sides of the drop)
584693
# ---------------------------------------------------------------------------

0 commit comments

Comments
 (0)