Skip to content

Commit d4c7638

Browse files
committed
Fix NOT NULL bypass when clamp mode converts values to NULL (#316)
ExecConstraints was running before IcebergErrorOrClampSlotInPlace, so values like bounded numeric NaN and multidimensional arrays passed the NOT NULL check in their original (non-null) form, then got clamped to NULL and silently stored in NOT NULL columns. Move clamping before ExecConstraints in both postgresExecForeignInsert and postgresExecForeignUpdate so constraint checks see post-clamp values. Factor the clamping + ExecConstraints sequence into a shared ClampAndCheckConstraints helper used by both INSERT and UPDATE paths. --------- Signed-off-by: sfc-gh-npuka <naisila.puka@snowflake.com>
1 parent f59754e commit d4c7638

2 files changed

Lines changed: 273 additions & 21 deletions

File tree

pg_lake_table/src/fdw/pg_lake_table.c

Lines changed: 35 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -502,6 +502,11 @@ static void merge_fdw_options(PgLakeRelationInfo * fpinfo,
502502
const PgLakeRelationInfo * fpinfo_o,
503503
const PgLakeRelationInfo * fpinfo_i);
504504

505+
static void IcebergErrorOrClampSlotInPlace(TupleTableSlot *slot, TupleDesc tupleDesc,
506+
IcebergOutOfRangePolicy policy);
507+
static void ClampAndCheckConstraints(PgLakeModifyState * fmstate,
508+
ResultRelInfo *resultRelInfo,
509+
TupleTableSlot *slot, EState *estate);
505510
static void WriteInsertRecord(PgLakeModifyState * modifyState, TupleTableSlot *slot);
506511
static void PrepareDeletionSlot(PgLakeFileModifyState * fileModifyState,
507512
uint64 fileRowNumber,
@@ -2220,14 +2225,7 @@ postgresExecForeignInsert(EState *estate,
22202225
{
22212226
PgLakeModifyState *fmstate = (PgLakeModifyState *) resultRelInfo->ri_FdwState;
22222227

2223-
/*
2224-
* Constraint checks are skipped by PostgreSQL itself, since it assumes
2225-
* them to be unenforceable as data can change underneath.
2226-
*/
2227-
Relation resultRelationDesc = resultRelInfo->ri_RelationDesc;
2228-
2229-
if (resultRelationDesc->rd_att->constr)
2230-
ExecConstraints(resultRelInfo, slot, estate);
2228+
ClampAndCheckConstraints(fmstate, resultRelInfo, slot, estate);
22312229

22322230
WriteInsertRecord(fmstate, slot);
22332231

@@ -2268,12 +2266,7 @@ postgresExecForeignUpdate(EState *estate,
22682266
return NULL;
22692267
}
22702268

2271-
/*
2272-
* Constraint checks are skipped by PostgreSQL itself, since it assumes
2273-
* them to be unenforceable as data can change underneath.
2274-
*/
2275-
if (resultRelation->rd_att->constr)
2276-
ExecConstraints(resultRelInfo, slot, estate);
2269+
ClampAndCheckConstraints(fmstate, resultRelInfo, slot, estate);
22772270

22782271
/*
22792272
* Also check the tuple against the partition constraint, since PostgreSQL
@@ -2643,19 +2636,40 @@ IcebergErrorOrClampSlotInPlace(TupleTableSlot *slot, TupleDesc tupleDesc,
26432636

26442637

26452638
/*
2646-
* WriteInsertRecord validates the tuple against Iceberg write constraints
2647-
* (when applicable) and forwards it to the insert destination.
2639+
* ClampAndCheckConstraints normalizes the slot for Iceberg write and then
2640+
* runs PostgreSQL constraint checks (NOT NULL, CHECK, etc.).
2641+
*
2642+
* Clamping must happen first so that ExecConstraints sees post-clamp values,
2643+
* e.g. bounded numeric NaN clamped to NULL is caught by NOT NULL.
2644+
*
2645+
* PostgreSQL skips constraint checks on foreign tables, so we run them
2646+
* ourselves.
2647+
*/
2648+
static void
2649+
ClampAndCheckConstraints(PgLakeModifyState * fmstate,
2650+
ResultRelInfo *resultRelInfo,
2651+
TupleTableSlot *slot, EState *estate)
2652+
{
2653+
if (fmstate->outOfRangePolicy != ICEBERG_OOR_NONE &&
2654+
fmstate->needsOutOfRangeValidation)
2655+
IcebergErrorOrClampSlotInPlace(slot, fmstate->tupleDesc,
2656+
fmstate->outOfRangePolicy);
2657+
2658+
Relation rel = resultRelInfo->ri_RelationDesc;
2659+
2660+
if (rel->rd_att->constr)
2661+
ExecConstraints(resultRelInfo, slot, estate);
2662+
}
2663+
2664+
2665+
/*
2666+
* WriteInsertRecord forwards the tuple to the insert destination.
26482667
*/
26492668
static void
26502669
WriteInsertRecord(PgLakeModifyState * modifyState, TupleTableSlot *slot)
26512670
{
26522671
DestReceiver *insertDest = modifyState->insertDest;
26532672

2654-
if (modifyState->outOfRangePolicy != ICEBERG_OOR_NONE &&
2655-
modifyState->needsOutOfRangeValidation)
2656-
IcebergErrorOrClampSlotInPlace(slot, modifyState->tupleDesc,
2657-
modifyState->outOfRangePolicy);
2658-
26592673
if (modifyState->insertedRowCount == 0)
26602674
{
26612675
/* incoming inserts have the tuple descriptor of the table */

pg_lake_table/tests/pytests/test_iceberg_validation.py

Lines changed: 238 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2844,3 +2844,241 @@ def test_explain_shows_pg_nullify_nested_list_in_list_transform(
28442844
run_command("RESET search_path;", pg_conn)
28452845
run_command(f"DROP SCHEMA IF EXISTS {schema} CASCADE;", pg_conn)
28462846
pg_conn.commit()
2847+
2848+
2849+
# =====================================================================
2850+
# NOT NULL constraint enforcement after clamping
2851+
#
2852+
# When out_of_range_values = 'clamp', values like bounded numeric NaN
2853+
# and multidimensional arrays are clamped to NULL. The NOT NULL
2854+
# constraint must still be enforced on the post-clamp value.
2855+
# =====================================================================
2856+
2857+
2858+
def test_clamp_to_null_enforces_not_null(pg_conn, extension, s3, with_default_location):
2859+
"""NOT NULL constraint is enforced on values clamped to NULL.
2860+
2861+
Covers both value types that clamp to NULL (bounded numeric NaN and
2862+
multidimensional arrays) and both DML paths (INSERT and UPDATE).
2863+
2864+
+-------+---------------------+--------+----------------------------+
2865+
| case | value | op | expected |
2866+
+-------+---------------------+--------+----------------------------+
2867+
| NaN | 'NaN'::numeric | INSERT | not-null constraint error |
2868+
| NaN | 'NaN'::numeric | UPDATE | not-null constraint error |
2869+
| mdim | ARRAY[ARRAY[1,2,3]] | INSERT | not-null constraint error |
2870+
| mdim | ARRAY[ARRAY[1,2,3]] | UPDATE | not-null constraint error |
2871+
+-------+---------------------+--------+----------------------------+
2872+
"""
2873+
schema = "test_clamp_null_nn"
2874+
2875+
run_command(f"CREATE SCHEMA {schema};", pg_conn)
2876+
run_command(f"SET search_path TO {schema};", pg_conn)
2877+
2878+
try:
2879+
run_command(
2880+
"CREATE TABLE num_target ("
2881+
" id int,"
2882+
" n numeric(18,6) NOT NULL"
2883+
") USING iceberg WITH (out_of_range_values = 'clamp');",
2884+
pg_conn,
2885+
)
2886+
run_command(
2887+
"CREATE TABLE arr_target ("
2888+
" id int,"
2889+
" vals int[] NOT NULL"
2890+
") USING iceberg WITH (out_of_range_values = 'clamp');",
2891+
pg_conn,
2892+
)
2893+
pg_conn.commit()
2894+
2895+
# -- NaN INSERT into NOT NULL numeric --
2896+
err = run_command(
2897+
"INSERT INTO num_target VALUES (1, 'NaN'::numeric(18,6));",
2898+
pg_conn,
2899+
raise_error=False,
2900+
)
2901+
assert "not-null constraint" in str(err)
2902+
pg_conn.rollback()
2903+
2904+
# -- NaN UPDATE on NOT NULL numeric --
2905+
run_command("INSERT INTO num_target VALUES (1, 42.0);", pg_conn)
2906+
pg_conn.commit()
2907+
2908+
err = run_command(
2909+
"UPDATE num_target SET n = 'NaN'::numeric(18,6) WHERE id = 1;",
2910+
pg_conn,
2911+
raise_error=False,
2912+
)
2913+
assert "not-null constraint" in str(err)
2914+
pg_conn.rollback()
2915+
2916+
result = run_query("SELECT n FROM num_target WHERE id = 1;", pg_conn)
2917+
assert result[0][0] is not None
2918+
2919+
# -- Multidimensional array INSERT into NOT NULL column --
2920+
err = run_command(
2921+
"INSERT INTO arr_target VALUES (1, ARRAY[ARRAY[1,2,3]]);",
2922+
pg_conn,
2923+
raise_error=False,
2924+
)
2925+
assert "not-null constraint" in str(err)
2926+
pg_conn.rollback()
2927+
2928+
# -- Multidimensional array UPDATE on NOT NULL column --
2929+
run_command("INSERT INTO arr_target VALUES (1, ARRAY[10,20,30]);", pg_conn)
2930+
pg_conn.commit()
2931+
2932+
err = run_command(
2933+
"UPDATE arr_target SET vals = ARRAY[ARRAY[1,2],ARRAY[3,4]] WHERE id = 1;",
2934+
pg_conn,
2935+
raise_error=False,
2936+
)
2937+
assert "not-null constraint" in str(err)
2938+
pg_conn.rollback()
2939+
2940+
result = run_query("SELECT vals FROM arr_target WHERE id = 1;", pg_conn)
2941+
assert result[0][0] == [10, 20, 30]
2942+
finally:
2943+
pg_conn.rollback()
2944+
run_command("RESET search_path;", pg_conn)
2945+
run_command(f"DROP SCHEMA IF EXISTS {schema} CASCADE;", pg_conn)
2946+
pg_conn.commit()
2947+
2948+
2949+
# =====================================================================
2950+
# CHECK constraint interaction with clamping
2951+
#
2952+
# When clamping runs before constraint checks, CHECK constraints
2953+
# evaluate the post-clamp value. This means clamping can "rescue"
2954+
# values that would otherwise violate a CHECK. These tests document
2955+
# the expected behavior: the CHECK validates what is actually stored.
2956+
# =====================================================================
2957+
2958+
2959+
def test_temporal_clamp_rescues_check_constraint(
2960+
pg_conn, extension, s3, with_default_location
2961+
):
2962+
"""Temporal clamping can satisfy a CHECK that the original value would violate.
2963+
2964+
Year 10000 violates CHECK (d <= '9999-12-31'), but clamping brings it
2965+
to exactly 9999-12-31, which satisfies the CHECK.
2966+
"""
2967+
schema = "test_clamp_check_temporal"
2968+
2969+
run_command(f"CREATE SCHEMA {schema};", pg_conn)
2970+
run_command(f"SET search_path TO {schema};", pg_conn)
2971+
run_command("SET TIME ZONE 'UTC';", pg_conn)
2972+
2973+
try:
2974+
run_command(
2975+
"CREATE TABLE target (d date CHECK (d <= '9999-12-31'::date))"
2976+
" USING iceberg WITH (out_of_range_values = 'clamp');",
2977+
pg_conn,
2978+
)
2979+
pg_conn.commit()
2980+
2981+
run_command(
2982+
"INSERT INTO target VALUES ('infinity'::date);",
2983+
pg_conn,
2984+
)
2985+
pg_conn.commit()
2986+
2987+
result = run_query("SELECT d::text FROM target;", pg_conn)
2988+
assert result[0][0] == "9999-12-31"
2989+
finally:
2990+
pg_conn.rollback()
2991+
run_command("RESET TIME ZONE;", pg_conn)
2992+
run_command("RESET search_path;", pg_conn)
2993+
run_command(f"DROP SCHEMA IF EXISTS {schema} CASCADE;", pg_conn)
2994+
pg_conn.commit()
2995+
2996+
2997+
def test_temporal_clamp_still_fails_strict_check_constraint(
2998+
pg_conn, extension, s3, with_default_location
2999+
):
3000+
"""Temporal clamping cannot rescue a CHECK that the clamped value still violates.
3001+
3002+
infinity is clamped to 9999-12-31, but CHECK (d < '9999-12-31') uses
3003+
strict less-than, so the clamped value still fails.
3004+
"""
3005+
schema = "test_clamp_check_strict"
3006+
3007+
run_command(f"CREATE SCHEMA {schema};", pg_conn)
3008+
run_command(f"SET search_path TO {schema};", pg_conn)
3009+
run_command("SET TIME ZONE 'UTC';", pg_conn)
3010+
3011+
try:
3012+
run_command(
3013+
"CREATE TABLE target (d date CHECK (d < '9999-12-31'::date))"
3014+
" USING iceberg WITH (out_of_range_values = 'clamp');",
3015+
pg_conn,
3016+
)
3017+
pg_conn.commit()
3018+
3019+
err = run_command(
3020+
"INSERT INTO target VALUES ('infinity'::date);",
3021+
pg_conn,
3022+
raise_error=False,
3023+
)
3024+
assert "check constraint" in str(err).lower()
3025+
pg_conn.rollback()
3026+
finally:
3027+
pg_conn.rollback()
3028+
run_command("RESET TIME ZONE;", pg_conn)
3029+
run_command("RESET search_path;", pg_conn)
3030+
run_command(f"DROP SCHEMA IF EXISTS {schema} CASCADE;", pg_conn)
3031+
pg_conn.commit()
3032+
3033+
3034+
def test_numeric_nan_clamp_succeeds_on_nullable_column(
3035+
pg_conn, extension, s3, with_default_location
3036+
):
3037+
"""Bounded numeric NaN is clamped to NULL and stored when the column is nullable.
3038+
3039+
This is the happy-path counterpart of the NOT NULL tests: clamping
3040+
converts NaN to NULL, which is valid for a nullable column. Covers
3041+
both INSERT and UPDATE.
3042+
"""
3043+
schema = "test_clamp_nan_nullable"
3044+
3045+
run_command(f"CREATE SCHEMA {schema};", pg_conn)
3046+
run_command(f"SET search_path TO {schema};", pg_conn)
3047+
3048+
try:
3049+
run_command(
3050+
"CREATE TABLE target ("
3051+
" id int,"
3052+
" n numeric(18,6)"
3053+
") USING iceberg WITH (out_of_range_values = 'clamp');",
3054+
pg_conn,
3055+
)
3056+
pg_conn.commit()
3057+
3058+
# INSERT: NaN is clamped to NULL and succeeds
3059+
run_command(
3060+
"INSERT INTO target VALUES (1, 'NaN'::numeric(18,6));",
3061+
pg_conn,
3062+
)
3063+
pg_conn.commit()
3064+
3065+
result = run_query("SELECT n FROM target WHERE id = 1;", pg_conn)
3066+
assert result[0][0] is None
3067+
3068+
# UPDATE: NaN is clamped to NULL and succeeds
3069+
run_command("INSERT INTO target VALUES (2, 99.0);", pg_conn)
3070+
pg_conn.commit()
3071+
3072+
run_command(
3073+
"UPDATE target SET n = 'NaN'::numeric(18,6) WHERE id = 2;",
3074+
pg_conn,
3075+
)
3076+
pg_conn.commit()
3077+
3078+
result = run_query("SELECT n FROM target WHERE id = 2;", pg_conn)
3079+
assert result[0][0] is None
3080+
finally:
3081+
pg_conn.rollback()
3082+
run_command("RESET search_path;", pg_conn)
3083+
run_command(f"DROP SCHEMA IF EXISTS {schema} CASCADE;", pg_conn)
3084+
pg_conn.commit()

0 commit comments

Comments
 (0)