Skip to content

Commit fbba3cb

Browse files
committed
vendor: Update vendored sources to duckdb/duckdb@b32a12c
Rework JSON Reader: use the new MultiFileReaderFunction interface (duckdb/duckdb#16477) Mirror discussions to the internal repository (duckdb/duckdb#16464)
1 parent 1f2bd98 commit fbba3cb

File tree

5 files changed

+39
-38
lines changed

5 files changed

+39
-38
lines changed

src/duckdb/extension/parquet/parquet_extension.cpp

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,8 @@ struct ParquetReadLocalState : public LocalTableFunctionState {
8585
};
8686

8787
struct ParquetMultiFileInfo {
88-
static unique_ptr<BaseFileReaderOptions> InitializeOptions(ClientContext &context);
88+
static unique_ptr<BaseFileReaderOptions> InitializeOptions(ClientContext &context,
89+
optional_ptr<TableFunctionInfo> info);
8990
static bool ParseCopyOption(ClientContext &context, const string &key, const vector<Value> &values,
9091
BaseFileReaderOptions &options, vector<string> &expected_names,
9192
vector<LogicalType> &expected_types);
@@ -103,7 +104,7 @@ struct ParquetMultiFileInfo {
103104
FileExpandResult expand_result);
104105
static unique_ptr<GlobalTableFunctionState>
105106
InitializeGlobalState(ClientContext &context, MultiFileBindData &bind_data, MultiFileGlobalState &global_state);
106-
static unique_ptr<LocalTableFunctionState> InitializeLocalState();
107+
static unique_ptr<LocalTableFunctionState> InitializeLocalState(ExecutionContext &, GlobalTableFunctionState &);
107108
static shared_ptr<BaseFileReader> CreateReader(ClientContext &context, GlobalTableFunctionState &gstate,
108109
BaseUnionData &union_data, const MultiFileBindData &bind_data_p);
109110
static shared_ptr<BaseFileReader> CreateReader(ClientContext &context, GlobalTableFunctionState &gstate,
@@ -112,7 +113,7 @@ struct ParquetMultiFileInfo {
112113
static shared_ptr<BaseFileReader> CreateReader(ClientContext &context, const string &filename,
113114
ParquetOptions &options, const MultiFileReaderOptions &file_options);
114115
static shared_ptr<BaseUnionData> GetUnionData(shared_ptr<BaseFileReader> scan_p, idx_t file_idx);
115-
static void FinalizeReader(ClientContext &context, BaseFileReader &reader);
116+
static void FinalizeReader(ClientContext &context, BaseFileReader &reader, GlobalTableFunctionState &);
116117
static void Scan(ClientContext &context, BaseFileReader &reader, GlobalTableFunctionState &global_state,
117118
LocalTableFunctionState &local_state, DataChunk &chunk);
118119
static bool TryInitializeScan(ClientContext &context, shared_ptr<BaseFileReader> &reader,
@@ -337,7 +338,8 @@ class ParquetScanFunction {
337338
}
338339
};
339340

340-
unique_ptr<BaseFileReaderOptions> ParquetMultiFileInfo::InitializeOptions(ClientContext &context) {
341+
unique_ptr<BaseFileReaderOptions> ParquetMultiFileInfo::InitializeOptions(ClientContext &context,
342+
optional_ptr<TableFunctionInfo> info) {
341343
return make_uniq<ParquetFileReaderOptions>(context);
342344
}
343345

@@ -543,15 +545,16 @@ shared_ptr<BaseUnionData> ParquetMultiFileInfo::GetUnionData(shared_ptr<BaseFile
543545
return result;
544546
}
545547

546-
void ParquetMultiFileInfo::FinalizeReader(ClientContext &context, BaseFileReader &reader) {
548+
void ParquetMultiFileInfo::FinalizeReader(ClientContext &context, BaseFileReader &reader, GlobalTableFunctionState &) {
547549
}
548550

549551
unique_ptr<GlobalTableFunctionState> ParquetMultiFileInfo::InitializeGlobalState(ClientContext &, MultiFileBindData &,
550552
MultiFileGlobalState &) {
551553
return make_uniq<ParquetReadGlobalState>();
552554
}
553555

554-
unique_ptr<LocalTableFunctionState> ParquetMultiFileInfo::InitializeLocalState() {
556+
unique_ptr<LocalTableFunctionState> ParquetMultiFileInfo::InitializeLocalState(ExecutionContext &,
557+
GlobalTableFunctionState &) {
555558
return make_uniq<ParquetReadLocalState>();
556559
}
557560

src/duckdb/src/execution/operator/csv_scanner/table_function/csv_multi_file_info.cpp

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,8 @@
88

99
namespace duckdb {
1010

11-
unique_ptr<BaseFileReaderOptions> CSVMultiFileInfo::InitializeOptions(ClientContext &context) {
11+
unique_ptr<BaseFileReaderOptions> CSVMultiFileInfo::InitializeOptions(ClientContext &context,
12+
optional_ptr<TableFunctionInfo> info) {
1213
return make_uniq<CSVFileReaderOptions>();
1314
}
1415

@@ -255,7 +256,8 @@ struct CSVLocalState : public LocalTableFunctionState {
255256
bool done = false;
256257
};
257258

258-
unique_ptr<LocalTableFunctionState> CSVMultiFileInfo::InitializeLocalState() {
259+
unique_ptr<LocalTableFunctionState> CSVMultiFileInfo::InitializeLocalState(ExecutionContext &,
260+
GlobalTableFunctionState &) {
259261
return make_uniq<CSVLocalState>();
260262
}
261263

@@ -321,7 +323,7 @@ shared_ptr<BaseUnionData> CSVMultiFileInfo::GetUnionData(shared_ptr<BaseFileRead
321323
return data;
322324
}
323325

324-
void CSVMultiFileInfo::FinalizeReader(ClientContext &context, BaseFileReader &reader) {
326+
void CSVMultiFileInfo::FinalizeReader(ClientContext &context, BaseFileReader &reader, GlobalTableFunctionState &) {
325327
auto &csv_file_scan = reader.Cast<CSVFileScan>();
326328
csv_file_scan.InitializeFileNamesTypes();
327329
csv_file_scan.SetStart();

src/duckdb/src/function/table/version/pragma_version.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
#ifndef DUCKDB_PATCH_VERSION
2-
#define DUCKDB_PATCH_VERSION "0-dev938"
2+
#define DUCKDB_PATCH_VERSION "0-dev1000"
33
#endif
44
#ifndef DUCKDB_MINOR_VERSION
55
#define DUCKDB_MINOR_VERSION 3
@@ -8,10 +8,10 @@
88
#define DUCKDB_MAJOR_VERSION 1
99
#endif
1010
#ifndef DUCKDB_VERSION
11-
#define DUCKDB_VERSION "v1.3.0-dev938"
11+
#define DUCKDB_VERSION "v1.3.0-dev1000"
1212
#endif
1313
#ifndef DUCKDB_SOURCE_ID
14-
#define DUCKDB_SOURCE_ID "052b3dc200"
14+
#define DUCKDB_SOURCE_ID "b32a12cdda"
1515
#endif
1616
#include "duckdb/function/table/system_functions.hpp"
1717
#include "duckdb/main/database.hpp"

src/duckdb/src/include/duckdb/common/multi_file_reader_function.hpp

Lines changed: 18 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -202,7 +202,7 @@ class MultiFileReaderFunction : public TableFunction {
202202
auto file_list = multi_file_reader->CreateFileList(context, input.inputs[0]);
203203
MultiFileReaderOptions file_options;
204204

205-
auto options = OP::InitializeOptions(context);
205+
auto options = OP::InitializeOptions(context, input.info);
206206
for (auto &kv : input.named_parameters) {
207207
auto loption = StringUtil::Lower(kv.first);
208208
if (multi_file_reader->ParseOption(loption, kv.second, file_options, context)) {
@@ -220,7 +220,7 @@ class MultiFileReaderFunction : public TableFunction {
220220
static unique_ptr<FunctionData> MultiFileBindCopy(ClientContext &context, CopyInfo &info,
221221
vector<string> &expected_names,
222222
vector<LogicalType> &expected_types) {
223-
auto options = OP::InitializeOptions(context);
223+
auto options = OP::InitializeOptions(context, nullptr);
224224
MultiFileReaderOptions file_options;
225225

226226
for (auto &option : info.options) {
@@ -320,7 +320,7 @@ class MultiFileReaderFunction : public TableFunction {
320320
}
321321
InitializeReader(*reader, bind_data, global_state.column_indexes, global_state.filters, context, i,
322322
global_state.multi_file_reader_state);
323-
OP::FinalizeReader(context, *reader);
323+
OP::FinalizeReader(context, *reader, *global_state.global_state);
324324
} catch (...) {
325325
parallel_lock.lock();
326326
global_state.error_opening_file = true;
@@ -425,7 +425,7 @@ class MultiFileReaderFunction : public TableFunction {
425425
auto result = make_uniq<MultiFileLocalState>();
426426
result->is_parallel = true;
427427
result->batch_index = 0;
428-
result->local_state = OP::InitializeLocalState();
428+
result->local_state = OP::InitializeLocalState(context, *gstate.global_state);
429429

430430
if (gstate.CanRemoveColumns()) {
431431
result->all_columns.Initialize(context.client, gstate.scanned_types);
@@ -613,29 +613,24 @@ class MultiFileReaderFunction : public TableFunction {
613613
continue;
614614
}
615615
auto &reader_data = *reader_data_ptr;
616-
parallel_lock.unlock();
617616
double progress_in_file;
618-
{
619-
lock_guard<mutex> l(*reader_data.file_mutex);
620-
if (reader_data.file_state == MultiFileFileState::OPEN) {
621-
// file is currently open - get the progress within the file
622-
progress_in_file = OP::GetProgressInFile(context, *reader_data.reader);
623-
} else if (reader_data.file_state == MultiFileFileState::CLOSED) {
624-
// file has been closed - check if the reader is still in use
625-
auto reader = reader_data.closed_reader.lock();
626-
if (!reader) {
627-
// reader has been destroyed - we are done with this file
628-
progress_in_file = 100.0;
629-
} else {
630-
// file is still being read
631-
progress_in_file = OP::GetProgressInFile(context, *reader);
632-
}
617+
if (reader_data.file_state == MultiFileFileState::OPEN) {
618+
// file is currently open - get the progress within the file
619+
progress_in_file = OP::GetProgressInFile(context, *reader_data.reader);
620+
} else if (reader_data.file_state == MultiFileFileState::CLOSED) {
621+
// file has been closed - check if the reader is still in use
622+
auto reader = reader_data.closed_reader.lock();
623+
if (!reader) {
624+
// reader has been destroyed - we are done with this file
625+
progress_in_file = 100.0;
633626
} else {
634-
// file has not been opened yet - progress in this file is zero
635-
progress_in_file = 0;
627+
// file is still being read
628+
progress_in_file = OP::GetProgressInFile(context, *reader);
636629
}
630+
} else {
631+
// file has not been opened yet - progress in this file is zero
632+
progress_in_file = 0;
637633
}
638-
parallel_lock.lock();
639634
progress_in_file = MaxValue<double>(0.0, MinValue<double>(100.0, progress_in_file));
640635
total_progress += progress_in_file;
641636
if (i == gstate.completed_file_index && progress_in_file >= 100) {

src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_multi_file_info.hpp

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,8 @@ class CSVFileReaderOptions : public BaseFileReaderOptions {
1919
};
2020

2121
struct CSVMultiFileInfo {
22-
static unique_ptr<BaseFileReaderOptions> InitializeOptions(ClientContext &context);
22+
static unique_ptr<BaseFileReaderOptions> InitializeOptions(ClientContext &context,
23+
optional_ptr<TableFunctionInfo> info);
2324
static bool ParseCopyOption(ClientContext &context, const string &key, const vector<Value> &values,
2425
BaseFileReaderOptions &options, vector<string> &expected_names,
2526
vector<LogicalType> &expected_types);
@@ -37,7 +38,7 @@ struct CSVMultiFileInfo {
3738
FileExpandResult expand_result);
3839
static unique_ptr<GlobalTableFunctionState>
3940
InitializeGlobalState(ClientContext &context, MultiFileBindData &bind_data, MultiFileGlobalState &global_state);
40-
static unique_ptr<LocalTableFunctionState> InitializeLocalState();
41+
static unique_ptr<LocalTableFunctionState> InitializeLocalState(ExecutionContext &, GlobalTableFunctionState &);
4142
static shared_ptr<BaseFileReader> CreateReader(ClientContext &context, GlobalTableFunctionState &gstate,
4243
BaseUnionData &union_data, const MultiFileBindData &bind_data_p);
4344
static shared_ptr<BaseFileReader> CreateReader(ClientContext &context, GlobalTableFunctionState &gstate,
@@ -47,7 +48,7 @@ struct CSVMultiFileInfo {
4748
CSVReaderOptions &options,
4849
const MultiFileReaderOptions &file_options);
4950
static shared_ptr<BaseUnionData> GetUnionData(shared_ptr<BaseFileReader> scan_p, idx_t file_idx);
50-
static void FinalizeReader(ClientContext &context, BaseFileReader &reader);
51+
static void FinalizeReader(ClientContext &context, BaseFileReader &reader, GlobalTableFunctionState &);
5152
static bool TryInitializeScan(ClientContext &context, shared_ptr<BaseFileReader> &reader,
5253
GlobalTableFunctionState &gstate, LocalTableFunctionState &lstate);
5354
static void Scan(ClientContext &context, BaseFileReader &reader, GlobalTableFunctionState &global_state,

0 commit comments

Comments
 (0)