Skip to content

Commit 197c3c5

Browse files
mach-kernelpeasee
authored andcommitted
Fix duckdb_arrow_scan memory leak from get_schema. (#10)
* via spec: get_schema has a lifetime that is owned by the caller, ergo if the caller wants to drop it, we should let them do that * oops, still need to check for valid schema!
1 parent 2ec6a30 commit 197c3c5

1 file changed

Lines changed: 3 additions & 23 deletions

File tree

src/main/capi/arrow-c.cpp

Lines changed: 3 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -355,12 +355,6 @@ void EmptyStreamRelease(ArrowArrayStream *stream) {
355355

356356
void FactoryGetSchema(ArrowArrayStream *stream, ArrowSchema &schema) {
357357
stream->get_schema(stream, &schema);
358-
359-
// Need to nullify the root schema's release function here, because streams don't allow us to set the release
360-
// function. For the schema's children, we nullify the release functions in `duckdb_arrow_scan`, so we don't need to
361-
// handle them again here. We set this to nullptr and not EmptySchemaRelease to prevent ArrowSchemaWrapper's
362-
// destructor from destroying the schema (it's the caller's responsibility).
363-
schema.release = nullptr;
364358
}
365359

366360
int GetSchema(struct ArrowArrayStream *stream, struct ArrowSchema *out) {
@@ -439,30 +433,16 @@ duckdb_state Ingest(duckdb_connection connection, const char *table_name, struct
439433
duckdb_state duckdb_arrow_scan(duckdb_connection connection, const char *table_name, duckdb_arrow_stream arrow) {
440434
auto stream = reinterpret_cast<ArrowArrayStream *>(arrow);
441435

442-
// Backup release functions - we nullify children schema release functions because we don't want to release on
443-
// behalf of the caller, downstream in our code. Note that Arrow releases target immediate children, but aren't
444-
// recursive. So we only back up immediate children here and restore their functions.
445436
ArrowSchema schema;
446437
if (stream->get_schema(stream, &schema) == DuckDBError) {
447438
return DuckDBError;
448439
}
449440

450-
typedef void (*release_fn_t)(ArrowSchema *);
451-
std::vector<release_fn_t> release_fns(duckdb::NumericCast<idx_t>(schema.n_children));
452-
for (idx_t i = 0; i < duckdb::NumericCast<idx_t>(schema.n_children); i++) {
453-
auto child = schema.children[i];
454-
release_fns[i] = child->release;
455-
child->release = arrow_array_stream_wrapper::EmptySchemaRelease;
441+
if (schema.release) {
442+
schema.release(&schema);
456443
}
457444

458-
auto ret = arrow_array_stream_wrapper::Ingest(connection, table_name, stream);
459-
460-
// Restore release functions.
461-
for (idx_t i = 0; i < duckdb::NumericCast<idx_t>(schema.n_children); i++) {
462-
schema.children[i]->release = release_fns[i];
463-
}
464-
465-
return ret;
445+
return arrow_array_stream_wrapper::Ingest(connection, table_name, stream);
466446
}
467447

468448
duckdb_state duckdb_arrow_array_scan(duckdb_connection connection, const char *table_name,

0 commit comments

Comments
 (0)