Skip to content

Commit d89b73f

Browse files
duckdblabs-botgithub-actions[bot]
authored andcommitted
Update vendored DuckDB sources to fda0ba6a7a
1 parent 9eabdb0 commit d89b73f

31 files changed

+290
-87
lines changed

src/duckdb/extension/json/json_extension.cpp

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,13 @@ void JsonExtension::Load(DuckDB &db) {
6868

6969
// JSON copy function
7070
auto copy_fun = JSONFunctions::GetJSONCopyFunction();
71-
ExtensionUtil::RegisterFunction(db_instance, std::move(copy_fun));
71+
ExtensionUtil::RegisterFunction(db_instance, copy_fun);
72+
copy_fun.extension = "ndjson";
73+
copy_fun.name = "ndjson";
74+
ExtensionUtil::RegisterFunction(db_instance, copy_fun);
75+
copy_fun.extension = "jsonl";
76+
copy_fun.name = "jsonl";
77+
ExtensionUtil::RegisterFunction(db_instance, copy_fun);
7278

7379
// JSON macro's
7480
for (idx_t index = 0; json_macros[index].name != nullptr; index++) {

src/duckdb/src/catalog/catalog_set.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -721,6 +721,11 @@ void CatalogSet::Scan(const std::function<void(CatalogEntry &)> &callback) {
721721
}
722722
}
723723

724+
void CatalogSet::SetDefaultGenerator(unique_ptr<DefaultGenerator> defaults_p) {
725+
lock_guard<mutex> lock(catalog_lock);
726+
defaults = std::move(defaults_p);
727+
}
728+
724729
void CatalogSet::Verify(Catalog &catalog_p) {
725730
D_ASSERT(&catalog_p == &catalog);
726731
vector<reference<CatalogEntry>> entries;

src/duckdb/src/catalog/duck_catalog.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ void DuckCatalog::Initialize(bool load_builtin) {
3030
CreateSchemaInfo info;
3131
info.schema = DEFAULT_SCHEMA;
3232
info.internal = true;
33+
info.on_conflict = OnCreateConflict::IGNORE_ON_CONFLICT;
3334
CreateSchema(data, info);
3435

3536
if (load_builtin) {

src/duckdb/src/common/enum_util.cpp

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1032,19 +1032,20 @@ const StringUtil::EnumStringLiteral *GetDataFileTypeValues() {
10321032
{ static_cast<uint32_t>(DataFileType::FILE_DOES_NOT_EXIST), "FILE_DOES_NOT_EXIST" },
10331033
{ static_cast<uint32_t>(DataFileType::DUCKDB_FILE), "DUCKDB_FILE" },
10341034
{ static_cast<uint32_t>(DataFileType::SQLITE_FILE), "SQLITE_FILE" },
1035-
{ static_cast<uint32_t>(DataFileType::PARQUET_FILE), "PARQUET_FILE" }
1035+
{ static_cast<uint32_t>(DataFileType::PARQUET_FILE), "PARQUET_FILE" },
1036+
{ static_cast<uint32_t>(DataFileType::UNKNOWN_FILE), "UNKNOWN_FILE" }
10361037
};
10371038
return values;
10381039
}
10391040

10401041
template<>
10411042
const char* EnumUtil::ToChars<DataFileType>(DataFileType value) {
1042-
return StringUtil::EnumToString(GetDataFileTypeValues(), 4, "DataFileType", static_cast<uint32_t>(value));
1043+
return StringUtil::EnumToString(GetDataFileTypeValues(), 5, "DataFileType", static_cast<uint32_t>(value));
10431044
}
10441045

10451046
template<>
10461047
DataFileType EnumUtil::FromString<DataFileType>(const char *value) {
1047-
return static_cast<DataFileType>(StringUtil::StringToEnum(GetDataFileTypeValues(), 4, "DataFileType", value));
1048+
return static_cast<DataFileType>(StringUtil::StringToEnum(GetDataFileTypeValues(), 5, "DataFileType", value));
10481049
}
10491050

10501051
const StringUtil::EnumStringLiteral *GetDateCastResultValues() {

src/duckdb/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp

Lines changed: 23 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -749,7 +749,7 @@ bool LineError::HandleErrors(StringValueResult &result) {
749749
default:
750750
throw InvalidInputException("CSV Error not allowed when inserting row");
751751
}
752-
result.error_handler.Error(csv_error);
752+
result.error_handler.Error(csv_error, result.try_row);
753753
}
754754
if (is_error_in_line && scan_id != StringValueScanner::LINE_FINDER_ID) {
755755
if (result.sniffing) {
@@ -777,7 +777,7 @@ void StringValueResult::NullPaddingQuotedNewlineCheck() const {
777777
// If we have null_padding set, we found a quoted new line, we are scanning the file in parallel; We error.
778778
LinesPerBoundary lines_per_batch(iterator.GetBoundaryIdx(), lines_read);
779779
auto csv_error = CSVError::NullPaddingFail(state_machine.options, lines_per_batch, path);
780-
error_handler.Error(csv_error);
780+
error_handler.Error(csv_error, try_row);
781781
}
782782
}
783783

@@ -847,13 +847,13 @@ bool StringValueResult::AddRowInternal() {
847847
state_machine.options, cur_col_id - 1, lines_per_batch, borked_line,
848848
current_line_position.begin.GetGlobalPosition(requested_size, first_nl),
849849
last_position.GetGlobalPosition(requested_size, first_nl), path);
850-
error_handler.Error(csv_error);
850+
error_handler.Error(csv_error, try_row);
851851
} else {
852852
auto csv_error = CSVError::IncorrectColumnAmountError(
853853
state_machine.options, cur_col_id - 1, lines_per_batch, borked_line,
854854
current_line_position.begin.GetGlobalPosition(requested_size, first_nl),
855855
last_position.GetGlobalPosition(requested_size), path);
856-
error_handler.Error(csv_error);
856+
error_handler.Error(csv_error, try_row);
857857
}
858858
}
859859
// If we are here we ignore_errors, so we delete this line
@@ -966,6 +966,7 @@ StringValueScanner::StringValueScanner(idx_t scanner_idx_p, const shared_ptr<CSV
966966
lines_read += csv_file_scan->skipped_rows;
967967
}
968968
iterator.buffer_size = state_machine->options.buffer_size_option.GetValue();
969+
result.try_row = scanner_idx == LINE_FINDER_ID;
969970
}
970971

971972
StringValueScanner::StringValueScanner(const shared_ptr<CSVBufferManager> &buffer_manager,
@@ -1710,19 +1711,24 @@ bool StringValueScanner::IsRowValid(CSVIterator &current_iterator) const {
17101711
return false;
17111712
}
17121713
constexpr idx_t result_size = 1;
1713-
auto scan_finder = make_uniq<StringValueScanner>(StringValueScanner::LINE_FINDER_ID, buffer_manager,
1714-
state_machine_strict, make_shared_ptr<CSVErrorHandler>(),
1715-
csv_file_scan, false, current_iterator, result_size);
1716-
auto &tuples = scan_finder->ParseChunk();
1717-
current_iterator.pos = scan_finder->GetIteratorPosition();
1718-
bool has_error = false;
1719-
if (tuples.current_errors.HasError()) {
1720-
if (tuples.current_errors.Size() != 1 || !tuples.current_errors.HasErrorType(MAXIMUM_LINE_SIZE)) {
1721-
// We ignore maximum line size errors
1722-
has_error = true;
1723-
}
1724-
}
1725-
return (tuples.number_of_rows == 1 || tuples.first_line_is_comment) && !has_error && tuples.borked_rows.empty();
1714+
auto scan_finder = make_uniq<StringValueScanner>(LINE_FINDER_ID, buffer_manager, state_machine_strict,
1715+
make_shared_ptr<CSVErrorHandler>(), csv_file_scan, false,
1716+
current_iterator, result_size);
1717+
try {
1718+
auto &tuples = scan_finder->ParseChunk();
1719+
current_iterator.pos = scan_finder->GetIteratorPosition();
1720+
bool has_error = false;
1721+
if (tuples.current_errors.HasError()) {
1722+
if (tuples.current_errors.Size() != 1 || !tuples.current_errors.HasErrorType(MAXIMUM_LINE_SIZE)) {
1723+
// We ignore maximum line size errors
1724+
has_error = true;
1725+
}
1726+
}
1727+
return (tuples.number_of_rows == 1 || tuples.first_line_is_comment) && !has_error && tuples.borked_rows.empty();
1728+
} catch (const Exception &e) {
1729+
return false;
1730+
}
1731+
return true;
17261732
}
17271733

17281734
ValidRowInfo StringValueScanner::TryRow(CSVState state, idx_t start_pos, idx_t end_pos) const {

src/duckdb/src/execution/operator/csv_scanner/util/csv_error.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@ void CSVErrorHandler::ThrowError(const CSVError &csv_error) {
6060

6161
void CSVErrorHandler::Error(const CSVError &csv_error, bool force_error) {
6262
lock_guard<mutex> parallel_lock(main_mutex);
63-
if ((ignore_errors && !force_error) || (PrintLineNumber(csv_error) && !CanGetLine(csv_error.GetBoundaryIndex()))) {
63+
if (!force_error && (ignore_errors || (PrintLineNumber(csv_error) && !CanGetLine(csv_error.GetBoundaryIndex())))) {
6464
// We store this error, we can't throw it now, or we are ignoring it
6565
errors.push_back(csv_error);
6666
return;

src/duckdb/src/execution/operator/schema/physical_attach.cpp

Lines changed: 0 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -62,20 +62,6 @@ SourceResultType PhysicalAttach::GetData(ExecutionContext &context, DataChunk &c
6262
}
6363
}
6464

65-
string extension = "";
66-
if (FileSystem::IsRemoteFile(path, extension)) {
67-
if (!ExtensionHelper::TryAutoLoadExtension(context.client, extension)) {
68-
throw MissingExtensionException("Attaching path '%s' requires extension '%s' to be loaded", path,
69-
extension);
70-
}
71-
if (options.access_mode == AccessMode::AUTOMATIC) {
72-
// Attaching of remote files gets bumped to READ_ONLY
73-
// This is due to the fact that on most (all?) remote files writes to DB are not available
74-
// and having this raised later is not super helpful
75-
options.access_mode = AccessMode::READ_ONLY;
76-
}
77-
}
78-
7965
// Get the database type and attach the database.
8066
db_manager.GetDatabaseType(context.client, *info, config, options);
8167
auto attached_db = db_manager.AttachDatabase(context.client, *info, options);

src/duckdb/src/function/table/version/pragma_version.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
#ifndef DUCKDB_PATCH_VERSION
2-
#define DUCKDB_PATCH_VERSION "0-dev3309"
2+
#define DUCKDB_PATCH_VERSION "0-dev3365"
33
#endif
44
#ifndef DUCKDB_MINOR_VERSION
55
#define DUCKDB_MINOR_VERSION 3
@@ -8,10 +8,10 @@
88
#define DUCKDB_MAJOR_VERSION 1
99
#endif
1010
#ifndef DUCKDB_VERSION
11-
#define DUCKDB_VERSION "v1.3.0-dev3309"
11+
#define DUCKDB_VERSION "v1.3.0-dev3365"
1212
#endif
1313
#ifndef DUCKDB_SOURCE_ID
14-
#define DUCKDB_SOURCE_ID "027bc16ee8"
14+
#define DUCKDB_SOURCE_ID "fda0ba6a7a"
1515
#endif
1616
#include "duckdb/function/table/system_functions.hpp"
1717
#include "duckdb/main/database.hpp"

src/duckdb/src/include/duckdb/catalog/catalog_entry/duck_schema_entry.hpp

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
//===----------------------------------------------------------------------===//
22
// DuckDB
33
//
4-
// duckdb/catalog/catalog_entry/dschema_catalog_entry.hpp
4+
// duckdb/catalog/catalog_entry/duck_schema_entry.hpp
55
//
66
//
77
//===----------------------------------------------------------------------===//
@@ -70,11 +70,10 @@ class DuckSchemaEntry : public SchemaCatalogEntry {
7070

7171
void Verify(Catalog &catalog) override;
7272

73-
private:
74-
void OnDropEntry(CatalogTransaction transaction, CatalogEntry &entry);
75-
76-
private:
7773
//! Get the catalog set for the specified type
7874
CatalogSet &GetCatalogSet(CatalogType type);
75+
76+
private:
77+
void OnDropEntry(CatalogTransaction transaction, CatalogEntry &entry);
7978
};
8079
} // namespace duckdb

src/duckdb/src/include/duckdb/catalog/catalog_set.hpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -128,6 +128,9 @@ class CatalogSet {
128128

129129
void Verify(Catalog &catalog);
130130

131+
//! Override the default generator - this should not be used after the catalog set has been used
132+
void SetDefaultGenerator(unique_ptr<DefaultGenerator> defaults);
133+
131134
private:
132135
bool DropDependencies(CatalogTransaction transaction, const string &name, bool cascade,
133136
bool allow_drop_internal = false);

src/duckdb/src/include/duckdb/execution/operator/csv_scanner/string_value_scanner.hpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -217,6 +217,9 @@ class StringValueResult : public ScannerResult {
217217
bool added_last_line = false;
218218
bool quoted_new_line = false;
219219

220+
//! If we are trying a row or not when figuring out the next row to start from.
221+
bool try_row = false;
222+
220223
unsafe_unique_array<ParseTypeInfo> parse_types;
221224
vector<string> names;
222225

@@ -376,7 +379,7 @@ class StringValueScanner : public BaseScanner {
376379
idx_t start_pos;
377380
//! Pointer to the previous buffer handle, necessary for over-buffer values
378381
shared_ptr<CSVBufferHandle> previous_buffer_handle;
379-
//! Strict state machine, is basically a state machine with rfc 4180 set to true, used to figure out new line.
382+
//! Strict state machine is basically a state machine with rfc 4180 set to true, used to figure out a new line.
380383
shared_ptr<CSVStateMachine> state_machine_strict;
381384
};
382385

src/duckdb/src/include/duckdb/main/attached_database.hpp

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -56,11 +56,10 @@ class AttachedDatabase : public CatalogEntry {
5656
//! Create the built-in system database (without storage).
5757
explicit AttachedDatabase(DatabaseInstance &db, AttachedDatabaseType type = AttachedDatabaseType::SYSTEM_DATABASE);
5858
//! Create an attached database instance with the specified name and storage.
59-
AttachedDatabase(DatabaseInstance &db, Catalog &catalog, string name, string file_path,
60-
const AttachOptions &options);
59+
AttachedDatabase(DatabaseInstance &db, Catalog &catalog, string name, string file_path, AttachOptions &options);
6160
//! Create an attached database instance with the specified storage extension.
6261
AttachedDatabase(DatabaseInstance &db, Catalog &catalog, StorageExtension &ext, ClientContext &context, string name,
63-
const AttachInfo &info, const AttachOptions &options);
62+
AttachInfo &info, AttachOptions &options);
6463
~AttachedDatabase() override;
6564

6665
//! Initializes the catalog and storage of the attached database.

src/duckdb/src/include/duckdb/main/database.hpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -75,8 +75,8 @@ class DatabaseInstance : public enable_shared_from_this<DatabaseInstance> {
7575

7676
DUCKDB_API SettingLookupResult TryGetCurrentSetting(const string &key, Value &result) const;
7777

78-
unique_ptr<AttachedDatabase> CreateAttachedDatabase(ClientContext &context, const AttachInfo &info,
79-
const AttachOptions &options);
78+
unique_ptr<AttachedDatabase> CreateAttachedDatabase(ClientContext &context, AttachInfo &info,
79+
AttachOptions &options);
8080

8181
void AddExtensionInfo(const string &name, const ExtensionLoadedInfo &info);
8282

src/duckdb/src/include/duckdb/main/database_manager.hpp

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -46,8 +46,7 @@ class DatabaseManager {
4646
//! Get an attached database by its name
4747
optional_ptr<AttachedDatabase> GetDatabase(ClientContext &context, const string &name);
4848
//! Attach a new database
49-
optional_ptr<AttachedDatabase> AttachDatabase(ClientContext &context, const AttachInfo &info,
50-
const AttachOptions &options);
49+
optional_ptr<AttachedDatabase> AttachDatabase(ClientContext &context, AttachInfo &info, AttachOptions &options);
5150
//! Detach an existing database
5251
void DetachDatabase(ClientContext &context, const string &name, OnEntryNotFound if_not_found);
5352
//! Returns a reference to the system catalog

src/duckdb/src/include/duckdb/parser/parsed_data/copy_info.hpp

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
#include "duckdb/common/unordered_map.hpp"
1414
#include "duckdb/common/types/value.hpp"
1515
#include "duckdb/common/case_insensitive_map.hpp"
16+
#include "duckdb/parser/query_node.hpp"
1617

1718
namespace duckdb {
1819

@@ -23,7 +24,7 @@ struct CopyInfo : public ParseInfo {
2324
static constexpr const ParseInfoType TYPE = ParseInfoType::COPY_INFO;
2425

2526
public:
26-
CopyInfo() : ParseInfo(TYPE), catalog(INVALID_CATALOG), schema(DEFAULT_SCHEMA) {
27+
CopyInfo() : ParseInfo(TYPE), catalog(INVALID_CATALOG), schema(DEFAULT_SCHEMA), is_format_auto_detected(true) {
2728
}
2829

2930
//! The catalog name to copy to/from
@@ -38,15 +39,18 @@ struct CopyInfo : public ParseInfo {
3839
bool is_from;
3940
//! The file format of the external file
4041
string format;
42+
//! If the format is manually set (i.e., via the format parameter) or was discovered by inspecting the file path
43+
bool is_format_auto_detected;
4144
//! The file path to copy to/from
4245
string file_path;
4346
//! Set of (key, value) options
4447
case_insensitive_map_t<vector<Value>> options;
45-
// The SQL statement used instead of a table when copying data out to a file
48+
//! The SQL statement used instead of a table when copying data out to a file
4649
unique_ptr<QueryNode> select_statement;
4750

4851
public:
49-
static string CopyOptionsToString(const string &format, const case_insensitive_map_t<vector<Value>> &options);
52+
static string CopyOptionsToString(const string &format, bool is_format_auto_detected,
53+
const case_insensitive_map_t<vector<Value>> &options);
5054

5155
public:
5256
unique_ptr<CopyInfo> Copy() const;

src/duckdb/src/include/duckdb/storage/magic_bytes.hpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,8 @@ enum class DataFileType : uint8_t {
1717
FILE_DOES_NOT_EXIST, // file does not exist
1818
DUCKDB_FILE, // duckdb database file
1919
SQLITE_FILE, // sqlite database file
20-
PARQUET_FILE // parquet file
20+
PARQUET_FILE, // parquet file
21+
UNKNOWN_FILE // unknown file type
2122
};
2223

2324
class MagicBytes {

src/duckdb/src/include/duckdb/storage/storage_extension.hpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,4 +49,8 @@ class StorageExtension {
4949
}
5050
};
5151

52+
struct OpenFileStorageExtension {
53+
static unique_ptr<StorageExtension> Create();
54+
};
55+
5256
} // namespace duckdb

src/duckdb/src/main/attached_database.cpp

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -84,7 +84,7 @@ AttachedDatabase::AttachedDatabase(DatabaseInstance &db, AttachedDatabaseType ty
8484
}
8585

8686
AttachedDatabase::AttachedDatabase(DatabaseInstance &db, Catalog &catalog_p, string name_p, string file_path_p,
87-
const AttachOptions &options)
87+
AttachOptions &options)
8888
: CatalogEntry(CatalogType::DATABASE_ENTRY, catalog_p, std::move(name_p)), db(db), parent_catalog(&catalog_p) {
8989

9090
if (options.access_mode == AccessMode::READ_ONLY) {
@@ -116,19 +116,17 @@ AttachedDatabase::AttachedDatabase(DatabaseInstance &db, Catalog &catalog_p, str
116116
}
117117

118118
AttachedDatabase::AttachedDatabase(DatabaseInstance &db, Catalog &catalog_p, StorageExtension &storage_extension_p,
119-
ClientContext &context, string name_p, const AttachInfo &info,
120-
const AttachOptions &options)
119+
ClientContext &context, string name_p, AttachInfo &info, AttachOptions &options)
121120
: CatalogEntry(CatalogType::DATABASE_ENTRY, catalog_p, std::move(name_p)), db(db), parent_catalog(&catalog_p),
122121
storage_extension(&storage_extension_p) {
122+
StorageExtensionInfo *storage_info = storage_extension->storage_info.get();
123+
catalog = storage_extension->attach(storage_info, context, *this, name, info, options.access_mode);
123124

124125
if (options.access_mode == AccessMode::READ_ONLY) {
125126
type = AttachedDatabaseType::READ_ONLY_DATABASE;
126127
} else {
127128
type = AttachedDatabaseType::READ_WRITE_DATABASE;
128129
}
129-
130-
StorageExtensionInfo *storage_info = storage_extension->storage_info.get();
131-
catalog = storage_extension->attach(storage_info, context, *this, name, *info.Copy(), options.access_mode);
132130
if (!catalog) {
133131
throw InternalException("AttachedDatabase - attach function did not return a catalog");
134132
}

0 commit comments

Comments
 (0)