2222#include "datatype/timestamp.h"
2323
2424#include "pg_lake/parquet/leaf_field.h"
25+ #include "pg_lake/pgduck/client.h"
26+
27+
28+ /*
29+ * ColumnStatsMode describes the mode of column stats.
30+ * - When truncate mode (default) is used, the column stats are truncated
31+ * to the given length.
32+ * - When none mode is used, the column stats are not collected.
33+ */
34+ typedef enum ColumnStatsMode
35+ {
36+ COLUMN_STATS_MODE_TRUNCATE = 0 ,
37+ COLUMN_STATS_MODE_NONE = 1 ,
38+ } ColumnStatsMode ;
39+
40+ /*
41+ * ColumnStatsConfig describes the configuration for column stats.
42+ * - mode: the mode of column stats.
43+ * - truncateLen: the length to truncate the column stats in truncate mode.
44+ */
45+ typedef struct ColumnStatsConfig
46+ {
47+ ColumnStatsMode mode ;
48+
49+ /* used for truncate mode */
50+ size_t truncateLen ;
51+ } ColumnStatsConfig ;
52+
53+
54+
2555
2656 /*
2757 * DataFileColumnStats stores column statistics for a data file.
@@ -43,6 +73,8 @@ typedef struct DataFileColumnStats
4373 */
4474typedef struct DataFileStats
4575{
76+ char * dataFilePath ;
77+
4678 /* number of bytes in the file */
4779 int64 fileSize ;
4880
@@ -61,3 +93,26 @@ typedef struct DataFileStats
6193 /* for a new data file with row IDs, the start of the range */
6294 int64 rowIdStart ;
6395} DataFileStats ;
96+
97+ typedef struct StatsCollector
98+ {
99+ int64 totalRowCount ;
100+ List * dataFileStats ;
101+ } StatsCollector ;
102+
103+ extern PGDLLEXPORT DataFileStats * DeepCopyDataFileStats (const DataFileStats * stats );
104+ extern PGDLLEXPORT StatsCollector * GetDataFileStatsListFromPGResult (PGresult * result ,
105+ List * leafFields ,
106+ DataFileSchema * schema );
107+ extern PGDLLEXPORT StatsCollector * ExecuteCopyToCommandOnPGDuckConnection (char * copyCommand ,
108+ List * leafFields ,
109+ DataFileSchema * schema ,
110+ bool disablePreserveInsertionOrder ,
111+ char * destinationPath ,
112+ CopyDataFormat destinationFormat );
113+ extern PGDLLEXPORT bool ShouldSkipStatistics (LeafField * leafField );
114+ extern PGDLLEXPORT DataFileStats * CreateDataFileStatsForDataFile (char * dataFilePath ,
115+ int64 rowCount , int64 deletedRowCount ,
116+ List * leafFields );
117+ extern PGDLLEXPORT void ApplyColumnStatsModeForAllFileStats (Oid relationId , List * dataFileStats );
118+ extern PGDLLEXPORT List * GetRemoteParquetColumnStats (char * path , List * leafFields );
0 commit comments