Skip to content

Commit 7947412

Browse files
mach-kernelsgrebnov
authored andcommitted
Fix duckdb_arrow_scan memory leak from get_schema. (#10)
* via spec: get_schema has a lifetime that is owned by the caller, ergo if the caller wants to drop it, we should let them do that * oops, still need to check for valid schema!
1 parent 5f305ef commit 7947412

1 file changed

Lines changed: 3 additions & 23 deletions

File tree

src/main/capi/arrow-c.cpp

Lines changed: 3 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -357,12 +357,6 @@ void EmptyStreamRelease(ArrowArrayStream *stream) {
357357

358358
void FactoryGetSchema(ArrowArrayStream *stream, ArrowSchema &schema) {
359359
stream->get_schema(stream, &schema);
360-
361-
// Need to nullify the root schema's release function here, because streams don't allow us to set the release
362-
// function. For the schema's children, we nullify the release functions in `duckdb_arrow_scan`, so we don't need to
363-
// handle them again here. We set this to nullptr and not EmptySchemaRelease to prevent ArrowSchemaWrapper's
364-
// destructor from destroying the schema (it's the caller's responsibility).
365-
schema.release = nullptr;
366360
}
367361

368362
int GetSchema(struct ArrowArrayStream *stream, struct ArrowSchema *out) {
@@ -441,30 +435,16 @@ duckdb_state Ingest(duckdb_connection connection, const char *table_name, struct
441435
duckdb_state duckdb_arrow_scan(duckdb_connection connection, const char *table_name, duckdb_arrow_stream arrow) {
442436
auto stream = reinterpret_cast<ArrowArrayStream *>(arrow);
443437

444-
// Backup release functions - we nullify children schema release functions because we don't want to release on
445-
// behalf of the caller, downstream in our code. Note that Arrow releases target immediate children, but aren't
446-
// recursive. So we only back up immediate children here and restore their functions.
447438
ArrowSchema schema;
448439
if (stream->get_schema(stream, &schema) == DuckDBError) {
449440
return DuckDBError;
450441
}
451442

452-
typedef void (*release_fn_t)(ArrowSchema *);
453-
std::vector<release_fn_t> release_fns(duckdb::NumericCast<idx_t>(schema.n_children));
454-
for (idx_t i = 0; i < duckdb::NumericCast<idx_t>(schema.n_children); i++) {
455-
auto child = schema.children[i];
456-
release_fns[i] = child->release;
457-
child->release = arrow_array_stream_wrapper::EmptySchemaRelease;
443+
if (schema.release) {
444+
schema.release(&schema);
458445
}
459446

460-
auto ret = arrow_array_stream_wrapper::Ingest(connection, table_name, stream);
461-
462-
// Restore release functions.
463-
for (idx_t i = 0; i < duckdb::NumericCast<idx_t>(schema.n_children); i++) {
464-
schema.children[i]->release = release_fns[i];
465-
}
466-
467-
return ret;
447+
return arrow_array_stream_wrapper::Ingest(connection, table_name, stream);
468448
}
469449

470450
duckdb_state duckdb_arrow_array_scan(duckdb_connection connection, const char *table_name,

0 commit comments

Comments
 (0)