Skip to content

Commit 7b919df

Browse files
Cache TupleDesc per composite column in copy_dest_receive
For INSERT..SELECT involving composite-type columns, each row called lookup_rowtype_tupdesc (via StructOutForPGDuck) triggering the expensive insert_rel_type_cache_if_needed path on every row. Add a composite_tupdescs cache array (one slot per physical attribute) to CopyToStateData. In CopyOneRowTo, validate and reuse the cached TupleDesc for composite columns, falling back to lookup_rowtype_tupdesc only on the first row or a type change. The pin is held for the duration of the COPY and released in EndCopy. PGDuckSerialize and StructOutForPGDuck gain a cachedTupleDesc parameter; when non-NULL the tupdesc is used directly, skipping both the lookup and the matching ReleaseTupleDesc. All other call sites pass NULL. Signed-off-by: David Christensen <david.christensen@snowflake.com>
1 parent 3d19097 commit 7b919df

File tree

15 files changed

+486
-21
lines changed

15 files changed

+486
-21
lines changed

pg_lake_engine/include/pg_lake/pgduck/array_conversion.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,9 +17,11 @@
1717

1818
#pragma once
1919

20+
#include "access/tupdesc.h"
2021
#include "utils/array.h"
2122
#include "pg_lake/copy/copy_format.h"
2223

2324
#define ARRAY_OUT_OID (751)
2425

25-
extern PGDLLEXPORT char *ArrayOutForPGDuck(ArrayType *array, CopyDataFormat format);
26+
extern PGDLLEXPORT char *ArrayOutForPGDuck(ArrayType *array, CopyDataFormat format,
27+
TupleDesc cachedElemTupleDesc);

pg_lake_engine/include/pg_lake/pgduck/serialize.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,13 +20,14 @@
2020
#include "postgres.h"
2121
#include "fmgr.h"
2222

23+
#include "access/tupdesc.h"
2324
#include "pg_lake/parquet/field.h"
2425
#include "pg_lake/pgduck/type.h"
2526

2627
#define BYTEA_OUT_OID 31
2728

2829
extern PGDLLEXPORT char *PGDuckSerialize(FmgrInfo *flinfo, Oid typeOid, Datum value,
29-
CopyDataFormat format);
30+
CopyDataFormat format, TupleDesc cachedTupleDesc);
3031
extern PGDLLEXPORT char *PGDuckOnlySerialize(Oid typeOid, Datum value);
3132
extern PGDLLEXPORT bool IsPGDuckSerializeRequired(PGType postgresType);
3233
extern PGDLLEXPORT char *IntervalOutForPGDuck(Datum value);

pg_lake_engine/include/pg_lake/pgduck/struct_conversion.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,10 @@
1717

1818
#pragma once
1919

20+
#include "access/tupdesc.h"
2021
#include "pg_lake/copy/copy_format.h"
2122

2223
#define RECORD_OUT_OID 2291
2324

24-
extern PGDLLEXPORT char *StructOutForPGDuck(Datum myStruct, CopyDataFormat format);
25+
extern PGDLLEXPORT char *StructOutForPGDuck(Datum myStruct, CopyDataFormat format,
26+
TupleDesc cachedTupleDesc);

pg_lake_engine/src/csv/csv_writer.c

Lines changed: 55 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,8 @@
2929
#include "fmgr.h"
3030
#include "miscadmin.h"
3131

32+
#include "access/htup_details.h"
33+
#include "catalog/pg_type.h"
3234
#include "catalog/pg_type_d.h"
3335
#include "commands/copy.h"
3436
#include "commands/defrem.h"
@@ -45,6 +47,7 @@
4547
#include "utils/builtins.h"
4648
#include "utils/lsyscache.h"
4749
#include "utils/memutils.h"
50+
#include "utils/typcache.h"
4851
#include "utils/numeric.h"
4952
#include "utils/rel.h"
5053

@@ -101,6 +104,13 @@ typedef struct CopyToStateData
101104
MemoryContext copycontext; /* per-copy execution context */
102105

103106
FmgrInfo *out_functions; /* lookup info for output functions */
107+
108+
/*
109+
* Per-column TupleDesc cache for composite-type columns. NULL means the
110+
* column is not composite (or is an anonymous RECORD column). A non-NULL
111+
* value is a pinned TupleDesc looked up once during StartCopyTo.
112+
*/
113+
TupleDesc *composite_tupdescs;
104114
MemoryContext rowcontext; /* per-row evaluation context */
105115
uint64 bytes_processed; /* number of bytes processed so far */
106116

@@ -287,6 +297,20 @@ EndCopy(CopyToState cstate)
287297
}
288298
}
289299

300+
/* Release pinned TupleDescs from the composite type cache */
301+
{
302+
ListCell *cur;
303+
304+
foreach(cur, cstate->attnumlist)
305+
{
306+
int attnum = lfirst_int(cur);
307+
TupleDesc cached = cstate->composite_tupdescs[attnum - 1];
308+
309+
if (cached != NULL)
310+
ReleaseTupleDesc(cached);
311+
}
312+
}
313+
290314
MemoryContextDelete(cstate->rowcontext);
291315
MemoryContextDelete(cstate->copycontext);
292316
}
@@ -453,6 +477,7 @@ StartCopyTo(CopyToState cstate, TupleDesc tupDesc)
453477

454478
/* Get info about the columns we need to process. */
455479
cstate->out_functions = (FmgrInfo *) palloc(num_phys_attrs * sizeof(FmgrInfo));
480+
cstate->composite_tupdescs = (TupleDesc *) palloc0(num_phys_attrs * sizeof(TupleDesc));
456481
foreach(cur, cstate->attnumlist)
457482
{
458483
int attnum = lfirst_int(cur);
@@ -469,6 +494,22 @@ StartCopyTo(CopyToState cstate, TupleDesc tupDesc)
469494
&out_func_oid,
470495
&isvarlena);
471496
fmgr_info(out_func_oid, &cstate->out_functions[attnum - 1]);
497+
498+
/*
499+
* Pre-populate the TupleDesc cache for composite columns and
500+
* arrays of composite elements. Anonymous RECORD types are skipped
501+
* because their structure is not known until row data is inspected.
502+
*/
503+
{
504+
Oid elemTypeOid = get_element_type(attr->atttypid);
505+
Oid baseTypeOid = OidIsValid(elemTypeOid) ? elemTypeOid : attr->atttypid;
506+
int32 baseTypmod = OidIsValid(elemTypeOid) ? -1 : attr->atttypmod;
507+
508+
if (get_typtype(baseTypeOid) == TYPTYPE_COMPOSITE &&
509+
baseTypeOid != RECORDOID)
510+
cstate->composite_tupdescs[attnum - 1] =
511+
lookup_rowtype_tupdesc(baseTypeOid, baseTypmod);
512+
}
472513
}
473514

474515
/*
@@ -775,12 +816,25 @@ CopyOneRowTo(CopyToState cstate, TupleTableSlot *slot)
775816
* Since we are at the top-level when emitting an
776817
* attribute in CopyOneRowTo(), we are not inside a
777818
* composite type.
819+
*
820+
* For composite-type columns, cache the TupleDesc so
821+
* we avoid repeating the expensive lookup_rowtype_tupdesc
822+
* call on every row.
823+
*/
824+
/*
825+
* Pass the pre-populated TupleDesc (non-NULL for
826+
* composite columns and arrays of composite elements)
827+
* so PGDuckSerialize can skip the per-row
828+
* lookup_rowtype_tupdesc call.
778829
*/
830+
TupleDesc cachedTupleDesc =
831+
cstate->composite_tupdescs[attnum - 1];
779832

780833
string = PGDuckSerialize(&out_functions[attnum - 1],
781834
attr->atttypid,
782835
value,
783-
cstate->targetFormat);
836+
cstate->targetFormat,
837+
cachedTupleDesc);
784838
}
785839
else
786840
string = OutputFunctionCall(&out_functions[attnum - 1],

pg_lake_engine/src/data_file/data_file_stats.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -772,7 +772,7 @@ SerializeTextArrayTypeToPgDuck(ArrayType *array)
772772
getTypeOutputInfo(TEXTARRAYOID, &outFuncId, &isvarlena);
773773
fmgr_info(outFuncId, &outFunc);
774774

775-
return PGDuckSerialize(&outFunc, TEXTARRAYOID, arrayDatum, DATA_FORMAT_INVALID);
775+
return PGDuckSerialize(&outFunc, TEXTARRAYOID, arrayDatum, DATA_FORMAT_INVALID, NULL);
776776
}
777777

778778

pg_lake_engine/src/pgduck/array_conversion.c

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717

1818
#include "postgres.h"
1919

20+
#include "access/tupdesc.h"
2021
#include "pg_lake/pgduck/array_conversion.h"
2122
#include "pg_lake/pgduck/serialize.h"
2223
#include "utils/arrayaccess.h"
@@ -34,7 +35,8 @@
3435
* - we do not use fcinfo (not in a UDF) or AnyArrayType
3536
*/
3637
char *
37-
ArrayOutForPGDuck(ArrayType *array, CopyDataFormat format)
38+
ArrayOutForPGDuck(ArrayType *array, CopyDataFormat format,
39+
TupleDesc cachedElemTupleDesc)
3840
{
3941
Oid element_type = ARR_ELEMTYPE(array);
4042
char *p,
@@ -113,7 +115,7 @@ ArrayOutForPGDuck(ArrayType *array, CopyDataFormat format)
113115
else
114116
{
115117
values[i] = PGDuckSerialize(&my_extra->proc, element_type, itemvalue,
116-
format);
118+
format, cachedElemTupleDesc);
117119

118120
/* count data plus backslashes; detect chars needing quotes */
119121
needquote = !IsSerializedAsContainer(element_type, format);

pg_lake_engine/src/pgduck/map_conversion.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -127,7 +127,7 @@ MapOutForPGDuck(Datum myMap, CopyDataFormat format)
127127
ereport(ERROR, (errmsg("cannot have NULL for map key entry")));;
128128

129129
serializedKey = PGDuckSerialize(&keysExtra->proc, keysElementType, pairValues[0],
130-
format);
130+
format, NULL);
131131
if (!keyIsContainer)
132132
serializedKey = (char *) quote_literal_cstr(serializedKey);
133133

@@ -136,7 +136,7 @@ MapOutForPGDuck(Datum myMap, CopyDataFormat format)
136136
else
137137
{
138138
serializedValue = PGDuckSerialize(&valuesExtra->proc, valuesElementType, pairValues[1],
139-
format);
139+
format, NULL);
140140
if (!valIsContainer)
141141
serializedValue = (char *) quote_literal_cstr(serializedValue);
142142
}

pg_lake_engine/src/pgduck/rewrite_query.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -789,7 +789,7 @@ RewriteConst(Const *constExpr)
789789

790790
/* serialize Const in the DuckDB format */
791791
char *pgduckText = PGDuckSerialize(&outFunc, constTypeId, constExpr->constvalue,
792-
DATA_FORMAT_INVALID);
792+
DATA_FORMAT_INVALID, NULL);
793793

794794
/* construct a text constant with the rewritten text */
795795
Const *textConst = makeNode(Const);

pg_lake_engine/src/pgduck/serialize.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -125,19 +125,19 @@ ConvertISOYearToBCIfNeeded(const char *dateTimestampString)
125125
*/
126126
char *
127127
PGDuckSerialize(FmgrInfo *flinfo, Oid columnType, Datum value,
128-
CopyDataFormat format)
128+
CopyDataFormat format, TupleDesc cachedTupleDesc)
129129
{
130130
if (flinfo->fn_oid == ARRAY_OUT_OID)
131131
{
132132
/* maps are a type of array */
133133
if (IsMapTypeOid(columnType))
134134
return MapOutForPGDuck(value, format);
135135

136-
return ArrayOutForPGDuck(DatumGetArrayTypeP(value), format);
136+
return ArrayOutForPGDuck(DatumGetArrayTypeP(value), format, cachedTupleDesc);
137137
}
138138

139139
if (flinfo->fn_oid == RECORD_OUT_OID)
140-
return StructOutForPGDuck(value, format);
140+
return StructOutForPGDuck(value, format, cachedTupleDesc);
141141

142142
/*
143143
* For Iceberg, intervals are serialized as struct(months, days,

pg_lake_engine/src/pgduck/struct_conversion.c

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ typedef struct RecordIOData
5252
*/
5353

5454
char *
55-
StructOutForPGDuck(Datum myStruct, CopyDataFormat format)
55+
StructOutForPGDuck(Datum myStruct, CopyDataFormat format, TupleDesc cachedTupleDesc)
5656
{
5757
HeapTupleHeader rec = DatumGetHeapTupleHeader(myStruct);
5858
Oid tupType;
@@ -72,7 +72,10 @@ StructOutForPGDuck(Datum myStruct, CopyDataFormat format)
7272
/* Extract type info from the tuple itself */
7373
tupType = HeapTupleHeaderGetTypeId(rec);
7474
tupTypmod = HeapTupleHeaderGetTypMod(rec);
75-
tupdesc = lookup_rowtype_tupdesc(tupType, tupTypmod);
75+
if (cachedTupleDesc != NULL)
76+
tupdesc = cachedTupleDesc;
77+
else
78+
tupdesc = lookup_rowtype_tupdesc(tupType, tupTypmod);
7679
ncolumns = tupdesc->natts;
7780

7881
/* Build a temporary HeapTuple control structure */
@@ -152,7 +155,7 @@ StructOutForPGDuck(Datum myStruct, CopyDataFormat format)
152155
}
153156

154157
attr = values[i];
155-
value = PGDuckSerialize(&column_info->proc, column_type, attr, format);
158+
value = PGDuckSerialize(&column_info->proc, column_type, attr, format, NULL);
156159

157160
/* Detect whether we need double quotes for this value */
158161
bool needQuotes = !IsContainerType(column_type);
@@ -176,7 +179,10 @@ StructOutForPGDuck(Datum myStruct, CopyDataFormat format)
176179

177180
pfree(values);
178181
pfree(nulls);
179-
ReleaseTupleDesc(tupdesc);
182+
183+
/* Only release if we looked it up ourselves */
184+
if (cachedTupleDesc == NULL)
185+
ReleaseTupleDesc(tupdesc);
180186

181187
return buf.data;
182188
}

0 commit comments

Comments
 (0)