Snowflake-Labs
diff --git a/‎pg_lake_copy/include/pg_lake/copy/copy_io.h‎
Lines changed: 3 additions & 0 deletions b/‎pg_lake_copy/include/pg_lake/copy/copy_io.h‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎pg_lake_copy/src/copy/copy.c‎
Lines changed: 82 additions & 15 deletions b/‎pg_lake_copy/src/copy/copy.c‎
Lines changed: 82 additions & 15 deletions
diff --git a/‎pg_lake_copy/src/copy/copy_io.c‎
Lines changed: 50 additions & 0 deletions b/‎pg_lake_copy/src/copy/copy_io.c‎
Lines changed: 50 additions & 0 deletions
diff --git a/‎pg_lake_engine/include/pg_lake/csv/csv_writer.h‎
Lines changed: 18 additions & 0 deletions b/‎pg_lake_engine/include/pg_lake/csv/csv_writer.h‎
Lines changed: 18 additions & 0 deletions
diff --git a/‎pg_lake_engine/include/pg_lake/pgduck/client.h‎
Lines changed: 23 additions & 0 deletions b/‎pg_lake_engine/include/pg_lake/pgduck/client.h‎
Lines changed: 23 additions & 0 deletions
diff --git a/‎pg_lake_engine/include/pg_lake/pgduck/write_data.h‎
Lines changed: 49 additions & 0 deletions b/‎pg_lake_engine/include/pg_lake/pgduck/write_data.h‎
Lines changed: 49 additions & 0 deletions
@@ -18,8 +18,11 @@
 #ifndef PG_LAKE_COPY_IO_H
 #define PG_LAKE_COPY_IO_H
 
+#include "libpq-fe.h"
+
 
 void		CopyInputToFile(char *filePath, int columnCount, bool isBinary);
+void		CopyInputToStream(PGconn *streamConn, int columnCount, bool isBinary);
 void		CopyFileToOutput(char *filePath, int columnCount, bool isBinary);
 
 
 
@@ -96,6 +96,8 @@ PgLakeCopyValidityCheckHookType PgLakeCopyValidityCheckHook = NULL;
 static bool IsPgLakeCopy(CopyStmt *copyStmt);
 static bool IsCopyFromStdin(CopyStmt *copyStmt);
 static bool IsCopyToStdout(CopyStmt *copyStmt);
+static int64 StreamingCopyFromStdinPushdown(Oid relationId, char *readQuery,
+											TupleDesc tupleDesc);
 static void ProcessPgLakeCopyFrom(CopyStmt *copyStmt, ParseState *pstate,
 								  Relation relation, Node *whereClause,
 								  uint64 *rowsProcessed);
@@ -449,23 +451,52 @@ ProcessPgLakeCopyFrom(CopyStmt *copyStmt, ParseState *pstate, Relation relation,
 	 */
 	TupleDesc	tupleDesc = BuildTupleDescriptorForRelation(relation, copyStmt->attlist);
 
+	/*
+	 * Compute doCopyPushdown up front so the STDIN block can choose between
+	 * the streaming-write path (when GUC is on AND we'll push the COPY down
+	 * to pgduck) and the file-based path (everything else). For non-pushdown
+	 * COPYs the COPY query is wrapped in TRANSMIT and rows round-trip back
+	 * through PG; that's a different shape than the deferred-INSERT we use
+	 * for streaming, so streaming + non-pushdown still uses the file-based
+	 * path. Document the gap.
+	 */
+	bool		doCopyPushdown = IsCopyFromPushdownable(relation, copyStmt->attlist,
+														whereClause, sourceFormat);
+	bool		useStreamingStdin = StreamingWritesEnabled && IsCopyFromStdin(copyStmt) &&
+		doCopyPushdown;
+
 	if (IsCopyFromStdin(copyStmt))
 	{
-		sourcePath = GenerateTempFileName(TEMP_FILE_PATTERN, ensureCleanup);
+		if (useStreamingStdin)
+		{
+			/*
+			 * Streaming-write path: don't park bytes locally. Use the
+			 * pgduck_server RECEIVE sink-path placeholder; the deferred COPY
+			 * query that AddQueryResultToTableStream builds will pick this up
+			 * and substitute it with the server-local sink path before
+			 * running read_csv() on it.
+			 */
+			sourcePath = pstrdup(PG_LAKE_RECV_PATH_PLACEHOLDER);
+		}
+		else
+		{
+			sourcePath = GenerateTempFileName(TEMP_FILE_PATTERN, ensureCleanup);
 
-		/*
-		 * we send the expected column count to make pedantic clients happy
-		 */
-		int			columnCount = tupleDesc->natts;
+			/*
+			 * we send the expected column count to make pedantic clients
+			 * happy
+			 */
+			int			columnCount = tupleDesc->natts;
 
-		bool		isBinary = true;
+			bool		isBinary = true;
 
-		/*
-		 * We copy the incoming bytes to a file first and then try to convert
-		 * that file. We could perhaps optimize this in the future by copying
-		 * via a named pipe.
-		 */
-		CopyInputToFile(sourcePath, columnCount, isBinary);
+			/*
+			 * We copy the incoming bytes to a file first and then try to
+			 * convert that file. We could perhaps optimize this in the future
+			 * by copying via a named pipe.
+			 */
+			CopyInputToFile(sourcePath, columnCount, isBinary);
+		}
 	}
 
 	/*
@@ -483,9 +514,6 @@ ProcessPgLakeCopyFrom(CopyStmt *copyStmt, ParseState *pstate, Relation relation,
 	 * case. However, we do want explicit casts to avoid writing incorrect
 	 * types.
 	 */
-	bool		doCopyPushdown = IsCopyFromPushdownable(relation, copyStmt->attlist,
-														whereClause, sourceFormat);
-
 	if (!doCopyPushdown)
 		readFlags |= READ_DATA_TRANSMIT;
 	else
@@ -535,6 +563,13 @@ ProcessPgLakeCopyFrom(CopyStmt *copyStmt, ParseState *pstate, Relation relation,
 	 */
 	if (doCopyPushdown)
 	{
+		if (useStreamingStdin)
+		{
+			*rowsProcessed = StreamingCopyFromStdinPushdown(relationId, readQuery,
+															tupleDesc);
+			return;
+		}
+
 		*rowsProcessed = AddQueryResultToTable(relationId, readQuery, tupleDesc, true);
 		return;
 	}
@@ -660,6 +695,38 @@ IsCopyFromPushdownable(Relation relation, List *columnNameList,
 }
 
 
+/*
+ * StreamingCopyFromStdinPushdown drives the COPY tablename FROM STDIN
+ * pushdown path when pg_lake_engine.streaming_writes=on.
+ *
+ * Pulled out of ProcessPgLakeCopyFrom so the caller stays small enough
+ * for gcc's clobbered-variable analysis (-Wclobbered) to keep its
+ * pre-streaming behavior on the unrelated TupleDesc helpers below;
+ * inlining a multi-step PG-call sequence into ProcessPgLakeCopyFrom
+ * triggered false-positive clobber warnings on attributeDescriptor /
+ * cleanTupleDesc that don't fire when this branch is its own function.
+ *
+ * No PG_TRY/CATCH: errors here trigger transaction abort; pgduck's
+ * PGDuckClientTransactionCallback recycles the libpq connection and
+ * the in-progress-file record registered inside Start... is cleaned
+ * up by the pre-commit hook on abort.
+ */
+static int64
+StreamingCopyFromStdinPushdown(Oid relationId, char *readQuery, TupleDesc tupleDesc)
+{
+	AddQueryResultStreamHandle *handle =
+		StartAddQueryResultToTableStream(relationId, readQuery, tupleDesc,
+										  /* wrapNativeTypes */ true);
+
+	int			columnCount = tupleDesc->natts;
+	bool		isBinary = true;
+
+	CopyInputToStream(AddQueryResultStreamConnection(handle), columnCount, isBinary);
+
+	return FinishAddQueryResultToTableStream(handle);
+}
+
+
 /*
 * GenerateReadDataSourceQuery generates the pgduck query to read the given
 * source format/path.
 
@@ -30,6 +30,7 @@
 #include "storage/fd.h"
 #include "utils/memutils.h"
 
+#include "libpq-fe.h"
 #include "libpq/libpq.h"
 
 #define MAX_READ_SIZE (65536)
@@ -55,6 +56,55 @@ static void SendCopyEnd(void);
 static void SendCopyData(char *sendBuffer, int sendBufferLength);
 
 
+/*
+ * CopyInputToStream is the streaming counterpart of CopyInputToFile.
+ *
+ * Instead of writing the postgres client's COPY-IN bytes to a local file,
+ * forwards them via PQputCopyData on `streamConn`. The caller must have
+ * already opened a libpq COPY-IN stream against pgduck_server (via
+ * OpenCopyInStreamToPGDuck) so streamConn is in COPY-IN-active state.
+ * Uses the same SendCopyInResponseToClient(columnCount, isBinary) the
+ * file-based path uses to tell the client we're ready.
+ *
+ * Caller is responsible for finalizing the libpq stream via
+ * FinishCopyInStreamToPGDuck (or moral equivalent) afterwards.
+ */
+void
+CopyInputToStream(PGconn *streamConn, int columnCount, bool isBinary)
+{
+	CopyFromStdinState cstate = {
+		.fe_msgbuf = makeStringInfo(),
+		.raw_reached_eof = false
+	};
+
+	/* tell the client we are ready for data */
+	SendCopyInResponseToClient(columnCount, isBinary);
+
+	char	   *receiveBuffer = palloc(MAX_READ_SIZE);
+
+	while (!cstate.raw_reached_eof)
+	{
+		unsigned long bytesRead = ReceiveDataFromClient(&cstate, receiveBuffer);
+
+		if (bytesRead > 0)
+		{
+			if (PQputCopyData(streamConn, receiveBuffer, (int) bytesRead) != 1)
+			{
+				ereport(ERROR, (errcode(ERRCODE_CONNECTION_FAILURE),
+								errmsg("failed to forward COPY data to pgduck_server: %s",
+									   PQerrorMessage(streamConn))));
+			}
+		}
+		else if (bytesRead == 0)
+		{
+			break;
+		}
+	}
+
+	pfree(receiveBuffer);
+}
+
+
 /*
  * CopyInputToFile copies data from the socket to the given file.
  * We request the client send a specific column count.
 
@@ -18,6 +18,8 @@
 #ifndef CSV_WRITER_H
 #define CSV_WRITER_H
 
+#include "libpq-fe.h"
+
 #include "pg_lake/copy/copy_format.h"
 #include "tcop/dest.h"
 #include "nodes/pg_list.h"
@@ -28,6 +30,22 @@ extern PGDLLEXPORT DestReceiver *CreateCSVDestReceiverExtended(char *filename,
 															   List *copyOptions,
 															   CopyDataFormat targetFormat,
 															   bool sessionLifetime);
+
+/*
+ * Streaming variant: bytes go to a libpq COPY-IN already opened on
+ * `streamConn` (use OpenCopyInStreamToPGDuck() to get there). The caller
+ * is responsible for finalizing the stream via FinishCopyInStreamToPGDuck()
+ * AFTER calling rShutdown on the returned DestReceiver — rShutdown only
+ * flushes the per-row buffer and emits any binary trailer; it deliberately
+ * does NOT call PQputCopyEnd, so callers can still emit additional
+ * CopyData (e.g. for multi-segment writes) before closing. The deferred
+ * query's PGresult is returned by FinishCopyInStreamToPGDuck so callers
+ * can extract row counts / column statistics.
+ */
+extern PGDLLEXPORT DestReceiver *CreateCSVStreamDestReceiver(PGconn *streamConn,
+															 List *copyOptions,
+															 CopyDataFormat targetFormat);
+
 extern PGDLLEXPORT int GetCSVDestReceiverMaxLineSize(DestReceiver *dest);
 extern PGDLLEXPORT uint64 GetCSVDestReceiverFileSize(DestReceiver *dest);
 
 
@@ -52,4 +52,27 @@ extern PGDLLEXPORT char *GetSingleValueFromPGDuck(char *query);
 extern PGDLLEXPORT void SendQueryWithParams(PGDuckConnection * pgduckConn, char *queryString,
 											int numParams, const char **parameterValues);
 
+/*
+ * Streaming-write helpers for the RECEIVE protocol prefix on pgduck_server.
+ *
+ * OpenCopyInStreamToPGDuck sends `queryString` (which must begin with
+ * "RECEIVE " and contain '@@PG_LAKE_RECV@@' as the read_csv path
+ * placeholder) and waits for pgduck_server's CopyInResponse. After this
+ * returns, the caller may emit CSV bytes via PQputCopyData on
+ * pgDuckConnection->conn (typically by passing it to
+ * CreateCSVStreamDestReceiver and driving a producer query through it).
+ *
+ * FinishCopyInStreamToPGDuck calls PQputCopyEnd(NULL), waits for the
+ * deferred query's first PGresult, drains the trailing NULL terminator,
+ * and returns the PGresult to the caller. The caller owns it and must
+ * PQclear it. Errors raised by the deferred query (e.g. INSERT failures)
+ * surface here as ereport(ERROR) before returning.
+ *
+ * Both throw on protocol or query errors; the connection is left in an
+ * idle state on success, in an error state on failure.
+ */
+extern PGDLLEXPORT void OpenCopyInStreamToPGDuck(PGDuckConnection * pgDuckConnection,
+												 const char *queryString);
+extern PGDLLEXPORT PGresult *FinishCopyInStreamToPGDuck(PGDuckConnection * pgDuckConnection);
+
 #endif
@@ -23,11 +23,25 @@
 #include "pg_lake/parquet/field.h"
 #include "pg_lake/pgduck/iceberg_validation.h"
 #include "nodes/pg_list.h"
+#include "tcop/dest.h"
 
 /* pg_lake_table.target_row_group_size_mb */
 #define DEFAULT_TARGET_ROW_GROUP_SIZE_MB 512
 extern PGDLLEXPORT int TargetRowGroupSizeMB;
 
+/* pg_lake_engine.streaming_writes */
+extern PGDLLEXPORT bool StreamingWritesEnabled;
+
+/*
+ * Sink-path placeholder substituted server-side by pgduck_server's
+ * RECEIVE handler. Must match SINK_PLACEHOLDER in pgduck_server's
+ * pgsession.c. Streaming-write callers pass this where the file-based
+ * path would pass the local CSV file path; the placeholder is then
+ * embedded inside read_csv('<placeholder>', ...) and pgduck swaps in
+ * the server-local sink path before running the deferred query.
+ */
+#define PG_LAKE_RECV_PATH_PLACEHOLDER "@@PG_LAKE_RECV@@"
+
 typedef enum ParquetVersion
 {
 	PARQUET_VERSION_V1 = 1,
@@ -57,7 +71,42 @@ extern PGDLLEXPORT StatsCollector * WriteQueryResultTo(char *query,
 													   List *leafFields,
 													   IcebergOutOfRangePolicy outOfRangePolicy,
 													   bool wrapNativeTypes);
+/*
+ * BuildCopyToCommandString assembles `COPY (query) TO 'destinationPath'
+ * WITH (format ..., compression ..., return_stats, ...)` for the given
+ * destination format / compression / format options. The same string
+ * builder is used by WriteQueryResultTo (file path), OpenCSVStreamWriter
+ * (streaming), and the streaming AddQueryResultToTable variant in
+ * writable_table.c.
+ */
+extern PGDLLEXPORT char *BuildCopyToCommandString(char *query, char *destinationPath,
+												  CopyDataFormat destinationFormat,
+												  CopyDataCompression destinationCompression,
+												  List *formatOptions,
+												  bool queryHasRowId,
+												  DataFileSchema * schema,
+												  TupleDesc queryTupleDesc,
+												  IcebergOutOfRangePolicy outOfRangePolicy,
+												  bool wrapNativeTypes);
+
 extern PGDLLEXPORT void AppendFields(StringInfo map, DataFileSchema * schema);
 extern PGDLLEXPORT char *TupleDescToColumnMapForWrite(TupleDesc tupleDesc, CopyDataFormat destinationFormat);
 extern PGDLLEXPORT char *TupleDescToProjectionListForWrite(TupleDesc tupleDesc,
 														   CopyDataFormat destinationFormat);
+
+/*
+ * Streaming counterpart of ConvertCSVFileTo. See OpenCSVStreamWriter in
+ * write_data.c for the contract. The struct is opaque to callers.
+ */
+typedef struct CSVStreamWriter CSVStreamWriter;
+
+extern PGDLLEXPORT CSVStreamWriter * OpenCSVStreamWriter(TupleDesc csvTupleDesc,
+														 int maxLineSize,
+														 char *destinationPath,
+														 CopyDataFormat destinationFormat,
+														 CopyDataCompression destinationCompression,
+														 List *formatOptions,
+														 DataFileSchema * schema,
+														 List *leafFields);
+extern PGDLLEXPORT DestReceiver *CSVStreamWriterDestReceiver(CSVStreamWriter * writer);
+extern PGDLLEXPORT StatsCollector * FinishCSVStreamWriter(CSVStreamWriter * writer);