@@ -45,14 +45,6 @@ struct VectorOfValuesEquality {
4545template <class T >
4646using vector_of_value_map_t = unordered_map<vector<Value>, T, VectorOfValuesHashFunction, VectorOfValuesEquality>;
4747
48- struct CopyToFileInfo {
49- explicit CopyToFileInfo (string file_path_p) : file_path(std::move(file_path_p)) {
50- }
51-
52- string file_path;
53- unique_ptr<CopyFunctionFileStatistics> file_stats;
54- };
55-
5648class CopyToFunctionGlobalState : public GlobalSinkState {
5749public:
5850 explicit CopyToFunctionGlobalState (ClientContext &context, unique_ptr<GlobalFunctionData> global_state)
@@ -67,8 +59,8 @@ class CopyToFunctionGlobalState : public GlobalSinkState {
6759 unordered_set<string> created_directories;
6860 // ! shared state for HivePartitionedColumnData
6961 shared_ptr<GlobalHivePartitionState> partition_state;
70- // ! File names
71- vector<CopyToFileInfo> file_names ;
62+ // ! Written file info and stats
63+ vector<unique_ptr< CopyToFileInfo>> written_files ;
7264 // ! Max open files
7365 idx_t max_open_files;
7466
@@ -99,16 +91,16 @@ class CopyToFunctionGlobalState : public GlobalSinkState {
9991 return path;
10092 }
10193
102- optional_ptr<CopyFunctionFileStatistics > AddFile (const StorageLockKey &l, const string &file_name,
103- CopyFunctionReturnType return_type) {
94+ optional_ptr<CopyToFileInfo > AddFile (const StorageLockKey &l, const string &file_name,
95+ CopyFunctionReturnType return_type) {
10496 D_ASSERT (l.GetType () == StorageLockType::EXCLUSIVE);
105- optional_ptr<CopyFunctionFileStatistics> result ;
106- CopyToFileInfo file_info (file_name) ;
97+ auto file_info = make_uniq<CopyToFileInfo>(file_name) ;
98+ optional_ptr< CopyToFileInfo> result ;
10799 if (return_type == CopyFunctionReturnType::WRITTEN_FILE_STATISTICS) {
108- file_info. file_stats = make_uniq<CopyFunctionFileStatistics>();
109- result = file_info.file_stats . get ();
100+ file_info-> file_stats = make_uniq<CopyFunctionFileStatistics>();
101+ result = file_info.get ();
110102 }
111- file_names .push_back (std::move (file_info));
103+ written_files .push_back (std::move (file_info));
112104 return result;
113105 }
114106
@@ -172,15 +164,29 @@ class CopyToFunctionGlobalState : public GlobalSinkState {
172164 full_path = op.filename_pattern .CreateFilename (fs, hive_path, op.file_extension , offset);
173165 }
174166 }
175- optional_ptr<CopyFunctionFileStatistics> file_stats ;
167+ optional_ptr<CopyToFileInfo> written_file_info ;
176168 if (op.return_type != CopyFunctionReturnType::CHANGED_ROWS) {
177- file_stats = AddFile (*global_lock, full_path, op.return_type );
169+ written_file_info = AddFile (*global_lock, full_path, op.return_type );
178170 }
179171 // initialize writes
180172 auto info = make_uniq<PartitionWriteInfo>();
181173 info->global_state = op.function .copy_to_initialize_global (context.client , *op.bind_data , full_path);
182- if (file_stats) {
183- op.function .copy_to_get_written_statistics (context.client , *op.bind_data , *info->global_state , *file_stats);
174+ if (written_file_info) {
175+ // set up the file stats for the copy
176+ op.function .copy_to_get_written_statistics (context.client , *op.bind_data , *info->global_state ,
177+ *written_file_info->file_stats );
178+
179+ // set the partition info
180+ vector<Value> partition_keys;
181+ vector<Value> partition_values;
182+ for (idx_t i = 0 ; i < op.partition_columns .size (); i++) {
183+ const auto &partition_col_name = op.names [op.partition_columns [i]];
184+ const auto &partition_value = values[i];
185+ partition_keys.emplace_back (partition_col_name);
186+ partition_values.push_back (partition_value.DefaultCastAs (LogicalType::VARCHAR));
187+ }
188+ written_file_info->partition_keys = Value::MAP (LogicalType::VARCHAR, LogicalType::VARCHAR,
189+ std::move (partition_keys), std::move (partition_values));
184190 }
185191 auto &result = *info;
186192 info->active_writes = 1 ;
@@ -308,13 +314,13 @@ unique_ptr<GlobalFunctionData> PhysicalCopyToFile::CreateFileState(ClientContext
308314 idx_t this_file_offset = g.last_file_offset ++;
309315 auto &fs = FileSystem::GetFileSystem (context);
310316 string output_path (filename_pattern.CreateFilename (fs, file_path, file_extension, this_file_offset));
311- optional_ptr<CopyFunctionFileStatistics> file_stats ;
317+ optional_ptr<CopyToFileInfo> written_file_info ;
312318 if (return_type != CopyFunctionReturnType::CHANGED_ROWS) {
313- file_stats = g.AddFile (global_lock, output_path, return_type);
319+ written_file_info = g.AddFile (global_lock, output_path, return_type);
314320 }
315321 auto result = function.copy_to_initialize_global (context, *bind_data, output_path);
316- if (file_stats ) {
317- function.copy_to_get_written_statistics (context, *bind_data, *result, *file_stats);
322+ if (written_file_info ) {
323+ function.copy_to_get_written_statistics (context, *bind_data, *result, *written_file_info-> file_stats );
318324 }
319325 return result;
320326}
@@ -410,9 +416,10 @@ unique_ptr<GlobalSinkState> PhysicalCopyToFile::GetGlobalSinkState(ClientContext
410416 auto state = make_uniq<CopyToFunctionGlobalState>(
411417 context, function.copy_to_initialize_global (context, *bind_data, file_path));
412418 auto global_lock = state->lock .GetExclusiveLock ();
413- auto file_stats = state->AddFile (*global_lock, file_path, return_type);
414- if (file_stats) {
415- function.copy_to_get_written_statistics (context, *bind_data, *state->global_state , *file_stats);
419+ auto written_file_info = state->AddFile (*global_lock, file_path, return_type);
420+ if (written_file_info) {
421+ function.copy_to_get_written_statistics (context, *bind_data, *state->global_state ,
422+ *written_file_info->file_stats );
416423 }
417424 return std::move (state);
418425}
@@ -576,18 +583,17 @@ unique_ptr<GlobalSourceState> PhysicalCopyToFile::GetGlobalSourceState(ClientCon
576583 return make_uniq<CopyToFileGlobalSourceState>();
577584}
578585
579- void PhysicalCopyToFile::ReturnStatistics (DataChunk &chunk, idx_t row_idx, const string &file_name,
580- CopyFunctionFileStatistics &file_stats) {
586+ void PhysicalCopyToFile::ReturnStatistics (DataChunk &chunk, idx_t row_idx, CopyToFileInfo &info) {
587+ auto &file_stats = *info.file_stats ;
588+
581589 // filename VARCHAR
582- chunk.SetValue (0 , row_idx, file_name );
590+ chunk.SetValue (0 , row_idx, info. file_path );
583591 // count BIGINT
584592 chunk.SetValue (1 , row_idx, Value::UBIGINT (file_stats.row_count ));
585593 // file size bytes BIGINT
586594 chunk.SetValue (2 , row_idx, Value::UBIGINT (file_stats.file_size_bytes ));
587- // footer offset BIGINT
588- chunk.SetValue (3 , row_idx, file_stats.footer_offset );
589- // footer size BIGINT
590- chunk.SetValue (4 , row_idx, file_stats.footer_size );
595+ // footer size bytes BIGINT
596+ chunk.SetValue (3 , row_idx, file_stats.footer_size_bytes );
591597 // column statistics map(varchar, map(varchar, varchar))
592598 map<string, Value> stats;
593599 for (auto &entry : file_stats.column_statistics ) {
@@ -612,25 +618,27 @@ void PhysicalCopyToFile::ReturnStatistics(DataChunk &chunk, idx_t row_idx, const
612618 values.emplace_back (std::move (entry.second ));
613619 }
614620 auto map_val_type = LogicalType::MAP (LogicalType::VARCHAR, LogicalType::VARCHAR);
615- chunk.SetValue (5 , row_idx, Value::MAP (LogicalType::VARCHAR, map_val_type, std::move (keys), std::move (values)));
621+ chunk.SetValue (4 , row_idx, Value::MAP (LogicalType::VARCHAR, map_val_type, std::move (keys), std::move (values)));
622+
623+ // partition_keys map(varchar, varchar)
624+ chunk.SetValue (5 , row_idx, info.partition_keys );
616625}
617626
618627SourceResultType PhysicalCopyToFile::GetData (ExecutionContext &context, DataChunk &chunk,
619628 OperatorSourceInput &input) const {
620629 auto &g = sink_state->Cast <CopyToFunctionGlobalState>();
621630 if (return_type == CopyFunctionReturnType::WRITTEN_FILE_STATISTICS) {
622631 auto &source_state = input.global_state .Cast <CopyToFileGlobalSourceState>();
623- idx_t next_end = MinValue<idx_t >(source_state.offset + STANDARD_VECTOR_SIZE, g.file_names .size ());
632+ idx_t next_end = MinValue<idx_t >(source_state.offset + STANDARD_VECTOR_SIZE, g.written_files .size ());
624633 idx_t count = next_end - source_state.offset ;
625634 for (idx_t i = 0 ; i < count; i++) {
626- auto &file_entry = g.file_names [source_state.offset + i];
627- auto &file_stats = *file_entry.file_stats ;
628- ReturnStatistics (chunk, i, file_entry.file_path , file_stats);
635+ auto &file_entry = *g.written_files [source_state.offset + i];
636+ ReturnStatistics (chunk, i, file_entry);
629637 }
630638 chunk.SetCardinality (count);
631639 source_state.offset += count;
632- return source_state.offset < g.file_names .size () ? SourceResultType::HAVE_MORE_OUTPUT
633- : SourceResultType::FINISHED;
640+ return source_state.offset < g.written_files .size () ? SourceResultType::HAVE_MORE_OUTPUT
641+ : SourceResultType::FINISHED;
634642 }
635643
636644 chunk.SetCardinality (1 );
@@ -641,8 +649,8 @@ SourceResultType PhysicalCopyToFile::GetData(ExecutionContext &context, DataChun
641649 case CopyFunctionReturnType::CHANGED_ROWS_AND_FILE_LIST: {
642650 chunk.SetValue (0 , 0 , Value::BIGINT (NumericCast<int64_t >(g.rows_copied .load ())));
643651 vector<Value> file_name_list;
644- for (auto &file_names : g.file_names ) {
645- file_name_list.emplace_back (file_names. file_path );
652+ for (auto &file_info : g.written_files ) {
653+ file_name_list.emplace_back (file_info-> file_path );
646654 }
647655 chunk.SetValue (1 , 0 , Value::LIST (LogicalType::VARCHAR, std::move (file_name_list)));
648656 break ;
0 commit comments