Skip to content

Commit 31f2f96

Browse files
sfc-gh-mslotclaude
andcommitted
Update tests and reindent for native typed cursor results after OID fix
The previous commit makes pgduck_server send real PostgreSQL type OIDs in its row description instead of InvalidOid. With proper OIDs, psycopg2 correctly parses each column into its native Python type (int, bool, datetime, Decimal, memoryview) instead of silently coercing everything to str via TEXTOID fallback. Tests written against the old buggy behavior compared expected values to strings. Update the affected assertions to expect native types — or, where the test compares a cursor result against a CSV-parsed transmit result, stringify the cursor side via the existing converters. Also runs pgindent on type_conversion.{c,h} and duckdb.c to fix whitespace nits introduced by the previous commit. Notable test changes: - test_common/helpers/db.py: run_simple_command now expects [(1,)] not [("1",)]; this single helper was responsible for ~17 failures across test_pgbench, test_server_protocol_failures, test_psql, test_cancellations, and test_simple_server. - test_types.py: fix the long-standing latent interval-arithmetic mismatch — the existing normalizer assumes a 360-day year, but psycopg2 converts PG `interval` directly to timedelta using a 365-day year (and PG normalizes "16 mons 15 days" to "1 yr 4 mons 15 days"), so PG cursor results need their own expected list. - test_identity_partitioned_writes.py / test_date_partitioned_writes.py / test_truncate_partitioned_writes.py: lake_table.data_file_partition_values.value is text, but res[0][0] from pgduck via SELECT DISTINCT col is now native — branch on col_type and cast appropriately (BOOLEAN partition values are stored as "t"/"f", BYTEA as "\\x...", others compare via str()). Signed-off-by: Marco Slot <marco.slot@snowflake.com> Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
1 parent f071bc7 commit 31f2f96

17 files changed

Lines changed: 247 additions & 206 deletions

pg_lake_table/tests/pytests/test_date_partitioned_writes.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -164,7 +164,7 @@ def test_calendar_partition_write(
164164
f"SELECT count(DISTINCT {part_expr}), count(*) FROM '{path}'",
165165
pgduck_conn,
166166
)
167-
assert res[0][0] == "1"
167+
assert res[0][0] == 1
168168
assert int(res[0][1]) == row_count
169169

170170
res = run_query(
@@ -177,7 +177,9 @@ def test_calendar_partition_write(
177177
pg_conn,
178178
)
179179

180-
assert res_partition[0][0] == res[0][0]
180+
# res_partition[0][0] is text; res[0][0] is now native (int for
181+
# year/month/day partitions).
182+
assert res_partition[0][0] == str(res[0][0])
181183

182184
# register the same table to spark
183185
# and make sure it can understand our partitioning

pg_lake_table/tests/pytests/test_iceberg_remove_unreferenced_files.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -900,7 +900,7 @@ def test_remove_unreferenced_files_13(
900900
res = run_query(
901901
f"SELECT count(*) FROM read_parquet('{file_paths_q[0][0]}/*')", pgduck_conn
902902
)
903-
assert res[0][0] == "0"
903+
assert res[0][0] == 0
904904

905905
# no files inserted
906906
file_paths = run_query(

pg_lake_table/tests/pytests/test_identity_partitioned_writes.py

Lines changed: 19 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -260,21 +260,30 @@ def test_identity_partition_write(
260260
pg_conn,
261261
)
262262

263+
# res_partition is the partition-value text from
264+
# lake_table.data_file_partition_values; res[0][0] now comes back as a
265+
# native Python type from pgduck_server (the type depends on col_type).
266+
partition_text = res_partition[0][0]
267+
actual_value = res[0][0]
268+
263269
if col_type == "BOOLEAN":
264-
assert bool(res_partition[0][0]) == bool(res[0][0])
270+
# partition value is PG bool short form ("t" / "f")
271+
expected_bool = partition_text == "t"
272+
assert expected_bool == actual_value
265273
elif col_type in ("TIMESTAMP", "TIMESTAMPTZ"):
266-
expected = datetime.datetime.fromisoformat(res_partition[0][0])
267-
actual = datetime.datetime.fromisoformat(res[0][0])
268-
269-
if actual.tzinfo is not None:
274+
expected = datetime.datetime.fromisoformat(partition_text)
275+
if actual_value.tzinfo is not None and expected.tzinfo is None:
270276
expected = expected.replace(tzinfo=datetime.timezone.utc)
271-
assert expected == actual
277+
assert expected == actual_value
272278
elif col_type == "DATE":
273-
expected = datetime.date.fromisoformat(res_partition[0][0])
274-
actual = datetime.date.fromisoformat(res[0][0])
275-
assert expected == actual
279+
expected = datetime.date.fromisoformat(partition_text)
280+
assert expected == actual_value
281+
elif col_type == "BYTEA":
282+
# partition value is "\\x..." text; cursor returns memoryview
283+
assert partition_text == "\\x" + bytes(actual_value).hex()
276284
else:
277-
assert res_partition[0][0] == res[0][0]
285+
# All other types compare cleanly via stringification.
286+
assert partition_text == str(actual_value)
278287

279288
res = run_query(f"SELECT count(*) FROM '{file_path}'", pgduck_conn)
280289
# each file should have a distinct value

pg_lake_table/tests/pytests/test_multi_expr_partition_writes.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -105,7 +105,7 @@ def test_multi_expression_partition_write(
105105
""",
106106
pgduck_conn,
107107
)[0][0]
108-
assert part_key_cnt == "1"
108+
assert part_key_cnt == 1
109109

110110
# 5) tidy up session GUC
111111
run_command("RESET pg_lake_table.enable_insert_select_pushdown;", pg_conn)

pg_lake_table/tests/pytests/test_partition_evolution.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -104,7 +104,7 @@ def test_partition_evolution(
104104

105105
for p in files_stage1:
106106
rows = run_query(f"SELECT count(*) FROM '{p}'", pgduck_conn)[0][0]
107-
assert rows == "2"
107+
assert rows == 2
108108
_file_holds_single_key(p, KEY_STAGE1, pgduck_conn)
109109

110110
# ─── stage 2 ── ALTER SET … + second insert (8 new partitions) ────────
@@ -135,7 +135,7 @@ def test_partition_evolution(
135135
assert len(files_stage2) == 8
136136

137137
for p in files_stage2:
138-
assert run_query(f"SELECT count(*) FROM '{p}'", pgduck_conn)[0][0] == "2"
138+
assert run_query(f"SELECT count(*) FROM '{p}'", pgduck_conn)[0][0] == 2
139139
_file_holds_single_key(p, KEY_STAGE2, pgduck_conn)
140140

141141
# ─── stage 3 ── ALTER DROP partition_by + final insert (un-partitioned) ─
@@ -153,7 +153,7 @@ def test_partition_evolution(
153153
assert len(files_stage3) == 1
154154

155155
path = next(iter(files_stage3))
156-
assert run_query(f"SELECT count(*) FROM '{path}'", pgduck_conn)[0][0] == "4"
156+
assert run_query(f"SELECT count(*) FROM '{path}'", pgduck_conn)[0][0] == 4
157157

158158
# ─── cleanup session GUC ────────────────────────────────────────────────
159159
pg_conn.rollback()

pg_lake_table/tests/pytests/test_truncate_partitioned_writes.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -154,7 +154,7 @@ def test_truncate_partition_write(
154154
res = run_query(
155155
f"SELECT count(DISTINCT {part_expr}), count(*) FROM '{path}'", pgduck_conn
156156
)
157-
assert res[0][0] == "1"
157+
assert res[0][0] == 1
158158
assert int(res[0][1]) == row_count
159159

160160
res = run_query(f"SELECT DISTINCT {part_expr} FROM '{path}'", pgduck_conn)
@@ -163,7 +163,9 @@ def test_truncate_partition_write(
163163
f"SELECT value FROM lake_table.data_file_partition_values WHERE id='{id}'",
164164
pg_conn,
165165
)
166-
assert res_partition[0][0].strip() == res[0][0].strip()
166+
# res_partition[0][0] is text; res[0][0] is now native (int for
167+
# numeric truncations, string for varchar/char/text).
168+
assert res_partition[0][0].strip() == str(res[0][0]).strip()
167169

168170
# register the same table to spark
169171
# and make sure it can understand our partitioning

pgduck_server/include/duckdb/type_conversion.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,6 @@ typedef struct DuckDBTypeInfo
5959

6060
extern DuckDBTypeInfo * find_duck_type_info(duckdb_type duckType);
6161

62-
extern Oid duckdb_type_to_pg_oid(duckdb_type duckType);
62+
extern Oid duckdb_type_to_pg_oid(duckdb_type duckType);
6363

6464
#endif

pgduck_server/src/duckdb/duckdb.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1148,9 +1148,9 @@ duckdb_query_result_send_column_metadata(DuckDBQueryResult * duckdb_query_result
11481148
pq_writeint16(buf, originalColumnNumber);
11491149

11501150
/*
1151-
* We convert each DuckDB type to the closest PostgreSQL OID equivalent.
1152-
* columnLength and columnTypeMod are left as -1, see TypeInfo struct
1153-
* comment for the reasoning.
1151+
* We convert each DuckDB type to the closest PostgreSQL OID
1152+
* equivalent. columnLength and columnTypeMod are left as -1, see
1153+
* TypeInfo struct comment for the reasoning.
11541154
*/
11551155
pq_writeint32(buf, duckdb_type_to_pg_oid(duckType));
11561156
pq_writeint16(buf, -1);

pgduck_server/src/duckdb/type_conversion.c

Lines changed: 91 additions & 89 deletions
Original file line numberDiff line numberDiff line change
@@ -348,103 +348,105 @@ find_duck_type_info(duckdb_type duckType)
348348
* For types with no direct PG equivalent (e.g. UBIGINT, HUGEINT, UHUGEINT,
349349
* complex logical types), we fall back to TEXTOID so the client can still
350350
* consume the text-serialised value we always send.
351-
*
351+
*
352352
* Note: consider falling back to InvalidOid (0).
353353
*
354354
* For DECIMAL we always return NUMERICOID regardless of width/scale;
355355
* callers that need the typmod should handle that separately.
356356
*/
357-
Oid duckdb_type_to_pg_oid(duckdb_type duckType)
357+
Oid
358+
duckdb_type_to_pg_oid(duckdb_type duckType)
358359
{
359360
switch (duckType)
360361
{
361-
/* ---- booleans ---- */
362-
case DUCKDB_TYPE_BOOLEAN:
363-
return BOOLOID;
364-
365-
/* ---- integers ---- */
366-
case DUCKDB_TYPE_TINYINT:
367-
return INT2OID; /* no INT1 in PG */
368-
case DUCKDB_TYPE_SMALLINT:
369-
return INT2OID;
370-
case DUCKDB_TYPE_INTEGER:
371-
return INT4OID;
372-
case DUCKDB_TYPE_BIGINT:
373-
return INT8OID;
374-
375-
/* unsigned — promote to next signed size that fits */
376-
case DUCKDB_TYPE_UTINYINT:
377-
return INT2OID;
378-
case DUCKDB_TYPE_USMALLINT:
379-
return INT4OID;
380-
case DUCKDB_TYPE_UINTEGER:
381-
return INT8OID;
382-
case DUCKDB_TYPE_UBIGINT:
383-
return INT8OID; /* no u64 in PG, but this is the closest consistent choice */
384-
case DUCKDB_TYPE_HUGEINT:
385-
return NUMERICOID;
386-
case DUCKDB_TYPE_UHUGEINT:
387-
return NUMERICOID;
388-
389-
/* ---- floats ---- */
390-
case DUCKDB_TYPE_FLOAT:
391-
return FLOAT4OID;
392-
case DUCKDB_TYPE_DOUBLE:
393-
return FLOAT8OID;
394-
395-
/* ---- arbitrary precision ---- */
396-
case DUCKDB_TYPE_DECIMAL:
397-
return NUMERICOID;
398-
399-
/* ---- text / bytes ---- */
400-
case DUCKDB_TYPE_VARCHAR:
401-
return TEXTOID;
402-
case DUCKDB_TYPE_BLOB:
403-
return BYTEAOID;
404-
case DUCKDB_TYPE_BIT:
405-
return BITOID;
406-
case DUCKDB_TYPE_ENUM:
407-
return TEXTOID; /* sent as label text */
408-
409-
/* ---- date / time ---- */
410-
case DUCKDB_TYPE_DATE:
411-
return DATEOID;
412-
case DUCKDB_TYPE_TIME:
413-
return TIMEOID;
414-
case DUCKDB_TYPE_TIME_TZ:
415-
return TIMETZOID;
416-
case DUCKDB_TYPE_TIMESTAMP:
417-
return TIMESTAMPOID;
418-
case DUCKDB_TYPE_TIMESTAMP_S:
419-
return TIMESTAMPOID;
420-
case DUCKDB_TYPE_TIMESTAMP_MS:
421-
return TIMESTAMPOID;
422-
case DUCKDB_TYPE_TIMESTAMP_NS:
423-
return TIMESTAMPOID;
424-
case DUCKDB_TYPE_TIMESTAMP_TZ:
425-
return TIMESTAMPTZOID;
426-
case DUCKDB_TYPE_INTERVAL:
427-
return INTERVALOID;
428-
429-
/* ---- uuid ---- */
430-
case DUCKDB_TYPE_UUID:
431-
return UUIDOID;
432-
433-
/* ---- complex / nested — serialised as text ---- */
434-
/* TODO: Handle this case */
435-
case DUCKDB_TYPE_LIST:
436-
return TEXTOID;
437-
case DUCKDB_TYPE_ARRAY:
438-
return TEXTOID;
439-
case DUCKDB_TYPE_STRUCT:
440-
return TEXTOID;
441-
case DUCKDB_TYPE_MAP:
442-
return TEXTOID;
443-
case DUCKDB_TYPE_UNION:
444-
return TEXTOID;
445-
446-
default:
447-
return InvalidOid;
362+
/* ---- booleans ---- */
363+
case DUCKDB_TYPE_BOOLEAN:
364+
return BOOLOID;
365+
366+
/* ---- integers ---- */
367+
case DUCKDB_TYPE_TINYINT:
368+
return INT2OID; /* no INT1 in PG */
369+
case DUCKDB_TYPE_SMALLINT:
370+
return INT2OID;
371+
case DUCKDB_TYPE_INTEGER:
372+
return INT4OID;
373+
case DUCKDB_TYPE_BIGINT:
374+
return INT8OID;
375+
376+
/* unsigned — promote to next signed size that fits */
377+
case DUCKDB_TYPE_UTINYINT:
378+
return INT2OID;
379+
case DUCKDB_TYPE_USMALLINT:
380+
return INT4OID;
381+
case DUCKDB_TYPE_UINTEGER:
382+
return INT8OID;
383+
case DUCKDB_TYPE_UBIGINT:
384+
return INT8OID; /* no u64 in PG, but this is the closest
385+
* consistent choice */
386+
case DUCKDB_TYPE_HUGEINT:
387+
return NUMERICOID;
388+
case DUCKDB_TYPE_UHUGEINT:
389+
return NUMERICOID;
390+
391+
/* ---- floats ---- */
392+
case DUCKDB_TYPE_FLOAT:
393+
return FLOAT4OID;
394+
case DUCKDB_TYPE_DOUBLE:
395+
return FLOAT8OID;
396+
397+
/* ---- arbitrary precision ---- */
398+
case DUCKDB_TYPE_DECIMAL:
399+
return NUMERICOID;
400+
401+
/* ---- text / bytes ---- */
402+
case DUCKDB_TYPE_VARCHAR:
403+
return TEXTOID;
404+
case DUCKDB_TYPE_BLOB:
405+
return BYTEAOID;
406+
case DUCKDB_TYPE_BIT:
407+
return BITOID;
408+
case DUCKDB_TYPE_ENUM:
409+
return TEXTOID; /* sent as label text */
410+
411+
/* ---- date / time ---- */
412+
case DUCKDB_TYPE_DATE:
413+
return DATEOID;
414+
case DUCKDB_TYPE_TIME:
415+
return TIMEOID;
416+
case DUCKDB_TYPE_TIME_TZ:
417+
return TIMETZOID;
418+
case DUCKDB_TYPE_TIMESTAMP:
419+
return TIMESTAMPOID;
420+
case DUCKDB_TYPE_TIMESTAMP_S:
421+
return TIMESTAMPOID;
422+
case DUCKDB_TYPE_TIMESTAMP_MS:
423+
return TIMESTAMPOID;
424+
case DUCKDB_TYPE_TIMESTAMP_NS:
425+
return TIMESTAMPOID;
426+
case DUCKDB_TYPE_TIMESTAMP_TZ:
427+
return TIMESTAMPTZOID;
428+
case DUCKDB_TYPE_INTERVAL:
429+
return INTERVALOID;
430+
431+
/* ---- uuid ---- */
432+
case DUCKDB_TYPE_UUID:
433+
return UUIDOID;
434+
435+
/* ---- complex / nested — serialised as text ---- */
436+
/* TODO: Handle this case */
437+
case DUCKDB_TYPE_LIST:
438+
return TEXTOID;
439+
case DUCKDB_TYPE_ARRAY:
440+
return TEXTOID;
441+
case DUCKDB_TYPE_STRUCT:
442+
return TEXTOID;
443+
case DUCKDB_TYPE_MAP:
444+
return TEXTOID;
445+
case DUCKDB_TYPE_UNION:
446+
return TEXTOID;
447+
448+
default:
449+
return InvalidOid;
448450
}
449451
}
450452

0 commit comments

Comments
 (0)