Snowflake-Labs
diff --git a/‎.github/actions/run-test/action.yml‎
Lines changed: 1 addition & 1 deletion b/‎.github/actions/run-test/action.yml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.github/workflows/lint.yml‎
Lines changed: 4 additions & 0 deletions b/‎.github/workflows/lint.yml‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎Makefile‎
Lines changed: 12 additions & 1 deletion b/‎Makefile‎
Lines changed: 12 additions & 1 deletion
diff --git a/‎pg_lake_copy/tests/pytests/test_duckdb_reserved_keyword_copy.py‎
Lines changed: 339 additions & 0 deletions b/‎pg_lake_copy/tests/pytests/test_duckdb_reserved_keyword_copy.py‎
Lines changed: 339 additions & 0 deletions
diff --git a/‎pg_lake_engine/include/pg_lake/pgduck/keywords.h‎
Lines changed: 11 additions & 1 deletion b/‎pg_lake_engine/include/pg_lake/pgduck/keywords.h‎
Lines changed: 11 additions & 1 deletion
@@ -140,7 +140,7 @@ runs:
       uses: actions/upload-artifact@v4
       if: always()
       with:
-        name: postgres-logs-${{ inputs.pg_version }}-${{ inputs.test_make_target }}
+        name: postgres-logs-${{ inputs.pg_version }}-${{ inputs.test_make_target }}${{ inputs.from_pg_version != '' && format('-from{0}', inputs.from_pg_version) || '' }}${{ inputs.worker_id != '' && format('-{0}', inputs.worker_id) || '' }}
         path: |
           /tmp/pg_lake_tests/logfile
           /tmp/pg_installcheck_tests/logfile
 
@@ -53,3 +53,7 @@ jobs:
       - name: Check formatting
         run: |
           make check-indent
+
+      - name: Check DuckDB keyword table is in sync
+        run: |
+          make check-duckdb-kwlist
@@ -14,7 +14,7 @@ CUSTOM_TARGETS = check-pg_lake_engine installcheck-pg_lake_engine check-pg_exten
 DUCKDB_BUILD_USE_CACHE ?= 0
 
 # other phony targets go here
-.PHONY: all fast install install-fast installcheck clean check submodules uninstall check-indent reindent installcheck-postgres installcheck-postgres-with_extensions_created
+.PHONY: all fast install install-fast installcheck clean check submodules uninstall check-indent reindent installcheck-postgres installcheck-postgres-with_extensions_created generate-duckdb-kwlist check-duckdb-kwlist
 .PHONY: $(ALL_TARGETS)
 .PHONY: $(PHONY_TARGETS)
 
@@ -222,6 +222,17 @@ uninstall-avro:
 	rm -f $(PG_LIBDIR)/libavro.*
 	rm -rf $(PG_INCLUDEDIR)/avro*
 
+## DuckDB keyword list maintenance
+# Regenerate the checked-in keyword table from the vendored DuckDB kwlist.hpp.
+# Re-run whenever duckdb_pglake/duckdb is updated to a new DuckDB release.
+generate-duckdb-kwlist:
+	python3 tools/generate_duckdb_kwlist.py
+
+# Verify that the checked-in keyword table matches the current kwlist.hpp.
+# Run in CI to catch stale keyword tables after a DuckDB version bump.
+check-duckdb-kwlist:
+	python3 tools/generate_duckdb_kwlist.py --check
+
 ## Other targets
 check-isolation_pg_lake_table:
 	$(MAKE) -C pg_lake_table check-isolation
 
@@ -0,0 +1,339 @@
+"""
+Tests for correct identifier quoting during COPY TO / COPY FROM when table
+columns are named after DuckDB reserved keywords.
+
+DuckDB reserves several keywords (PIVOT, QUALIFY, LAMBDA, etc.) that
+PostgreSQL does not.  These are legal unquoted column names in PostgreSQL but
+require quoting in the SQL that pg_lake generates for pgduck_server.
+
+See: https://github.com/Snowflake-Labs/pg_lake/issues/277
+"""
+
+import pytest
+from utils_pytest import *
+
+# DuckDB-only reserved keywords that are safe as unquoted PostgreSQL
+# identifiers.
+DUCKDB_ONLY_RESERVED = [
+    "describe",
+    "lambda",
+    "pivot",
+    "pivot_longer",
+    "pivot_wider",
+    "qualify",
+    "show",
+    "summarize",
+    "unpivot",
+]
+
+# Smaller set used in multi-column tests for conciseness.
+TEST_KEYWORDS = ["pivot", "qualify", "lambda", "show"]
+
+
+def test_copy_to_parquet_with_reserved_keyword_columns(pg_conn, s3):
+    """
+    COPY ... TO parquet must succeed when the table has DuckDB-reserved
+    keyword column names, and COPY ... FROM must read the file back correctly.
+    """
+    url = f"s3://{TEST_BUCKET}/test_kw_copy_parquet/kw_cols.parquet"
+
+    try:
+        run_command(
+            """
+            CREATE TABLE test_kw_copy_src (
+                pivot    int,
+                qualify  int,
+                lambda   int,
+                show     int
+            );
+            INSERT INTO test_kw_copy_src VALUES (1, 2, 3, 4), (10, 20, 30, 40);
+            """,
+            pg_conn,
+        )
+
+        run_command(
+            f"COPY test_kw_copy_src TO '{url}' WITH (format 'parquet')",
+            pg_conn,
+        )
+
+        run_command(
+            f"""
+            CREATE TABLE test_kw_copy_dst (
+                pivot    int,
+                qualify  int,
+                lambda   int,
+                show     int
+            );
+            COPY test_kw_copy_dst FROM '{url}' WITH (format 'parquet');
+            """,
+            pg_conn,
+        )
+
+        result = run_query(
+            "SELECT pivot, qualify, lambda, show FROM test_kw_copy_dst ORDER BY pivot",
+            pg_conn,
+        )
+        assert result == [[1, 2, 3, 4], [10, 20, 30, 40]]
+    finally:
+        pg_conn.rollback()
+
+
+def test_copy_to_csv_with_reserved_keyword_columns(pg_conn, s3):
+    """
+    COPY ... TO CSV and back must work with DuckDB-reserved keyword columns.
+    """
+    url = f"s3://{TEST_BUCKET}/test_kw_copy_csv/kw_cols.csv"
+
+    try:
+        run_command(
+            f"""
+            CREATE TABLE test_kw_csv_src (pivot int, qualify int, lambda int, show int);
+            INSERT INTO test_kw_csv_src VALUES (3, 6, 9, 12), (30, 60, 90, 120);
+            COPY test_kw_csv_src TO '{url}' WITH (format 'csv', header on);
+
+            CREATE TABLE test_kw_csv_dst (pivot int, qualify int, lambda int, show int);
+            COPY test_kw_csv_dst FROM '{url}' WITH (format 'csv', header on);
+            """,
+            pg_conn,
+        )
+
+        result = run_query(
+            "SELECT pivot, qualify, lambda, show FROM test_kw_csv_dst ORDER BY pivot",
+            pg_conn,
+        )
+        assert result == [[3, 6, 9, 12], [30, 60, 90, 120]]
+    finally:
+        pg_conn.rollback()
+
+
+def test_copy_to_json_with_reserved_keyword_columns(pg_conn, s3):
+    """
+    COPY ... TO JSON and back must work with DuckDB-reserved keyword columns.
+    """
+    url = f"s3://{TEST_BUCKET}/test_kw_copy_json/kw_cols.json"
+
+    try:
+        run_command(
+            f"""
+            CREATE TABLE test_kw_json_src (pivot int, qualify int, lambda int, show int);
+            INSERT INTO test_kw_json_src VALUES (7, 14, 21, 28), (70, 140, 210, 280);
+            COPY test_kw_json_src TO '{url}' WITH (format 'json');
+
+            CREATE TABLE test_kw_json_dst (pivot int, qualify int, lambda int, show int);
+            COPY test_kw_json_dst FROM '{url}' WITH (format 'json');
+            """,
+            pg_conn,
+        )
+
+        result = run_query(
+            "SELECT pivot, qualify, lambda, show FROM test_kw_json_dst ORDER BY pivot",
+            pg_conn,
+        )
+        assert result == [[7, 14, 21, 28], [70, 140, 210, 280]]
+    finally:
+        pg_conn.rollback()
+
+
+# ---------------------------------------------------------------------------
+# Composite type field names with embedded double-quotes
+# ---------------------------------------------------------------------------
+
+
+def test_copy_roundtrip_composite_with_embedded_quote(pg_conn, s3):
+    """
+    COPY TO/FROM must handle composite types whose field names contain
+    double-quote characters.  The STRUCT type definition sent to DuckDB must
+    properly escape the embedded quotes.
+
+    Reproduces: https://github.com/snowflake-eng/sfpg-extension-pg_lake_replication/issues/361
+    """
+    url = f"s3://{TEST_BUCKET}/test_kw_copy_composite_quote/data.parquet"
+
+    try:
+        run_command(
+            """
+            CREATE TYPE test_kw_composite_quote AS (
+                U&"has\\0022quote" text,
+                normal int,
+                "has'single" int,
+                U&"has\\005Cback" int
+            )
+            """,
+            pg_conn,
+        )
+
+        run_command(
+            """
+            CREATE TABLE test_kw_cq_src (id int, s test_kw_composite_quote);
+            INSERT INTO test_kw_cq_src VALUES (1, ROW('hello', 42, 7, 8));
+            """,
+            pg_conn,
+        )
+
+        run_command(
+            f"COPY test_kw_cq_src TO '{url}' WITH (format 'parquet')",
+            pg_conn,
+        )
+
+        run_command(
+            f"""
+            CREATE TABLE test_kw_cq_dst (id int, s test_kw_composite_quote);
+            COPY test_kw_cq_dst FROM '{url}' WITH (format 'parquet');
+            """,
+            pg_conn,
+        )
+
+        result = run_query(
+            "SELECT id, (s).normal FROM test_kw_cq_dst",
+            pg_conn,
+        )
+        assert result == [[1, 42]]
+
+        result = run_query(
+            'SELECT (s).U&"has\\0022quote" FROM test_kw_cq_dst',
+            pg_conn,
+        )
+        assert result == [["hello"]]
+
+        result = run_query(
+            """SELECT (s)."has'single", (s).U&"has\\005Cback" FROM test_kw_cq_dst""",
+            pg_conn,
+        )
+        assert result == [[7, 8]]
+    finally:
+        pg_conn.rollback()
+
+
+def test_copy_roundtrip_csv_composite_with_embedded_quote(pg_conn, s3):
+    """
+    COPY TO/FROM CSV with composite types containing double-quote field names.
+    This exercises the read_csv columns= type string path.
+
+    Reproduces: https://github.com/snowflake-eng/sfpg-extension-pg_lake_replication/issues/361
+    """
+    url = f"s3://{TEST_BUCKET}/test_kw_copy_csv_composite_quote/data.csv"
+
+    try:
+        run_command(
+            """
+            CREATE TYPE test_kw_csv_cq AS (
+                U&"has\\0022quote" text,
+                normal int,
+                "has'single" int,
+                U&"has\\005Cback" int
+            )
+            """,
+            pg_conn,
+        )
+
+        run_command(
+            """
+            CREATE TABLE test_kw_csv_cq_src (id int, s test_kw_csv_cq);
+            INSERT INTO test_kw_csv_cq_src VALUES (1, ROW('world', 99, 11, 12));
+            """,
+            pg_conn,
+        )
+
+        run_command(
+            f"COPY test_kw_csv_cq_src TO '{url}' WITH (format 'csv', header on)",
+            pg_conn,
+        )
+
+        run_command(
+            f"""
+            CREATE TABLE test_kw_csv_cq_dst (id int, s test_kw_csv_cq);
+            COPY test_kw_csv_cq_dst FROM '{url}' WITH (format 'csv', header on);
+            """,
+            pg_conn,
+        )
+
+        result = run_query(
+            "SELECT id, (s).normal FROM test_kw_csv_cq_dst",
+            pg_conn,
+        )
+        assert result == [[1, 99]]
+
+        result = run_query(
+            """SELECT (s)."has'single", (s).U&"has\\005Cback" FROM test_kw_csv_cq_dst""",
+            pg_conn,
+        )
+        assert result == [[11, 12]]
+    finally:
+        pg_conn.rollback()
+
+
+# ---------------------------------------------------------------------------
+# Parameterised round-trip tests — each keyword, each format
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.parametrize("keyword", DUCKDB_ONLY_RESERVED)
+def test_copy_roundtrip_parquet(pg_conn, s3, keyword):
+    """Each DuckDB-only reserved keyword survives a parquet round-trip."""
+    url = f"s3://{TEST_BUCKET}/test_kw_roundtrip_parquet/{keyword}/data.parquet"
+
+    try:
+        run_command(
+            f"""
+            CREATE TABLE test_kw_pq_src_{keyword} ({keyword} int);
+            INSERT INTO test_kw_pq_src_{keyword} VALUES (42);
+            COPY test_kw_pq_src_{keyword} TO '{url}' WITH (format 'parquet');
+
+            CREATE TABLE test_kw_pq_dst_{keyword} ({keyword} int);
+            COPY test_kw_pq_dst_{keyword} FROM '{url}' WITH (format 'parquet');
+            """,
+            pg_conn,
+        )
+
+        result = run_query(f"SELECT {keyword} FROM test_kw_pq_dst_{keyword}", pg_conn)
+        assert result == [[42]]
+    finally:
+        pg_conn.rollback()
+
+
+@pytest.mark.parametrize("keyword", DUCKDB_ONLY_RESERVED)
+def test_copy_roundtrip_csv(pg_conn, s3, keyword):
+    """Each DuckDB-only reserved keyword survives a CSV round-trip."""
+    url = f"s3://{TEST_BUCKET}/test_kw_roundtrip_csv/{keyword}/data.csv"
+
+    try:
+        run_command(
+            f"""
+            CREATE TABLE test_kw_csv_src_{keyword} ({keyword} int);
+            INSERT INTO test_kw_csv_src_{keyword} VALUES (42);
+            COPY test_kw_csv_src_{keyword} TO '{url}' WITH (format 'csv', header on);
+
+            CREATE TABLE test_kw_csv_dst_{keyword} ({keyword} int);
+            COPY test_kw_csv_dst_{keyword} FROM '{url}' WITH (format 'csv', header on);
+            """,
+            pg_conn,
+        )
+
+        result = run_query(f"SELECT {keyword} FROM test_kw_csv_dst_{keyword}", pg_conn)
+        assert result == [[42]]
+    finally:
+        pg_conn.rollback()
+
+
+@pytest.mark.parametrize("keyword", DUCKDB_ONLY_RESERVED)
+def test_copy_roundtrip_json(pg_conn, s3, keyword):
+    """Each DuckDB-only reserved keyword survives a JSON round-trip."""
+    url = f"s3://{TEST_BUCKET}/test_kw_roundtrip_json/{keyword}/data.json"
+
+    try:
+        run_command(
+            f"""
+            CREATE TABLE test_kw_json_src_{keyword} ({keyword} int);
+            INSERT INTO test_kw_json_src_{keyword} VALUES (42);
+            COPY test_kw_json_src_{keyword} TO '{url}' WITH (format 'json');
+
+            CREATE TABLE test_kw_json_dst_{keyword} ({keyword} int);
+            COPY test_kw_json_dst_{keyword} FROM '{url}' WITH (format 'json');
+            """,
+            pg_conn,
+        )
+
+        result = run_query(f"SELECT {keyword} FROM test_kw_json_dst_{keyword}", pg_conn)
+        assert result == [[42]]
+    finally:
+        pg_conn.rollback()
@@ -17,4 +17,14 @@
 
 #pragma once
 
-PGDLLEXPORT bool IsDuckDBReservedWord(char *candidateWord);
+/*
+ * duckdb_quote_identifier — like quote_identifier() but also quotes
+ * identifiers that are reserved (in any non-UNRESERVED category) in DuckDB
+ * but not in PostgreSQL — e.g. LAMBDA, PIVOT, QUALIFY, SUMMARIZE, DESCRIBE,
+ * SHOW, UNPIVOT (RESERVED_KEYWORD) as well as ASOF, ANTI, GLOB
+ * (COL_NAME_KEYWORD / TYPE_FUNC_NAME_KEYWORD).
+ *
+ * Use this for all identifiers (column names, field names, relation names)
+ * that will appear in SQL sent to pgduck_server.
+ */
+PGDLLEXPORT const char *duckdb_quote_identifier(const char *ident);