-
Notifications
You must be signed in to change notification settings - Fork 0
API for feparate planning and execution #75
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Draft
vustef
wants to merge
18
commits into
main
Choose a base branch
from
feature/split-scan-api
base: main
Could not load branches
Branch not found: {{ refName }}
Loading
Could not load tags
Nothing to show
Loading
Are you sure you want to change the base?
Some commits from the old base branch may be removed from the timeline,
and old review comments may become outdated.
Draft
Changes from all commits
Commits
Show all changes
18 commits
Select commit
Hold shift + click to select a range
09df555
chore: add .worktrees to gitignore
vustef fc098f4
feat: add FFI types and config fields for split-scan API
vustef 7848ea2
feat: add plan_files/create_reader/next_task/read_task FFI for full scan
vustef 0e6ab05
feat: add split-scan FFI for incremental scans
vustef 2b224fb
feat: add Julia API for split-scan (plan/create_reader/next/read)
vustef d6ce623
test: add tests for split scan API (full + incremental)
vustef 30c82a6
feat: expose data_file_path from scan tasks to Julia API
vustef dc55dda
feat: expose data_file_path from scan tasks to Julia API
vustef 005dde8
refactor: use wrapper structs instead of Ptr{Cvoid} aliases for dispatch
vustef 4857d08
feat: add batch task pull/read API for all scan types
vustef 3ba9519
Revert "feat: add batch task pull/read API for all scan types"
vustef a013f7d
feat: expose task_record_count for all scan task types
vustef 61d55fe
style: cargo fmt --all
vustef e91242e
buffer ordered to preserve order within files
vustef ca76405
rust-side prefetching
vustef f2d20d4
reader concurrency
vustef 8880576
prefetch tasks too
vustef b3024cd
task prefetch depth + blocking thread config
vustef File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,22 +1,44 @@ | ||
| use std::ffi::{c_char, c_void, CStr}; | ||
| use std::ptr; | ||
|
|
||
| use futures::stream; | ||
| use futures::{StreamExt, TryStreamExt}; | ||
| use iceberg::arrow::ArrowReaderBuilder; | ||
| use iceberg::io::FileIO; | ||
| use iceberg::scan::{TableScan, TableScanBuilder}; | ||
| use object_store_ffi::{ | ||
| export_runtime_op, with_cancellation, CResult, NotifyGuard, ResponseGuard, RT, | ||
| }; | ||
| use tokio::sync::Mutex as AsyncMutex; | ||
|
|
||
| use crate::scan_common::*; | ||
| use crate::{IcebergArrowStream, IcebergArrowStreamResponse, IcebergTable}; | ||
| use crate::{ | ||
| IcebergArrowStream, IcebergArrowStreamResponse, IcebergFileScanTaskStreamResponse, | ||
| IcebergNextFileScanTaskResponse, IcebergTable, | ||
| }; | ||
|
|
||
| /// Struct for regular (full) scan builder and scan | ||
| /// FFI wrapper for a regular (full) table scan. | ||
| /// | ||
| /// In addition to the builder/scan from iceberg-rust, we store copies of | ||
| /// file_io, batch_size, and data_file_concurrency_limit so that | ||
| /// `create_reader` can build an ArrowReaderBuilder without accessing | ||
| /// private fields on TableScan. | ||
| #[repr(C)] | ||
| pub struct IcebergScan { | ||
| pub builder: Option<TableScanBuilder<'static>>, | ||
| pub scan: Option<TableScan>, | ||
| /// 0 = auto-detect (num_cpus) | ||
| pub serialization_concurrency: usize, | ||
| /// Cloned from Table at scan creation time for use by create_reader | ||
| pub file_io: Option<FileIO>, | ||
| /// 0 = no batch size override (use reader default) | ||
| pub batch_size: usize, | ||
| /// 0 = auto-detect (num_cpus) | ||
| pub data_file_concurrency_limit: usize, | ||
| /// 0 = use DEFAULT_PREFETCH_DEPTH (for batch prefetch) | ||
| pub prefetch_depth: usize, | ||
| /// 0 = use DEFAULT_TASK_PREFETCH_DEPTH (for task planning prefetch) | ||
| pub task_prefetch_depth: usize, | ||
| } | ||
|
|
||
| unsafe impl Send for IcebergScan {} | ||
|
|
@@ -28,23 +50,46 @@ pub extern "C" fn iceberg_new_scan(table: *mut IcebergTable) -> *mut IcebergScan | |
| return ptr::null_mut(); | ||
| } | ||
| let table_ref = unsafe { &*table }; | ||
| // Clone FileIO for later use by create_reader. | ||
| // FileIO is Arc-based internally so this is cheap. | ||
| let file_io = table_ref.table.file_io().clone(); | ||
| let scan_builder = table_ref.table.scan(); | ||
| Box::into_raw(Box::new(IcebergScan { | ||
| builder: Some(scan_builder), | ||
| scan: None, | ||
| serialization_concurrency: 0, | ||
| file_io: Some(file_io), | ||
| batch_size: 0, | ||
| data_file_concurrency_limit: 0, | ||
| prefetch_depth: 0, | ||
| task_prefetch_depth: 0, | ||
| })) | ||
| } | ||
|
|
||
| // Use macros from scan_common for shared functionality | ||
| impl_select_columns!(iceberg_select_columns, IcebergScan); | ||
|
|
||
| impl_scan_builder_method!( | ||
| iceberg_scan_with_data_file_concurrency_limit, | ||
| IcebergScan, | ||
| with_data_file_concurrency_limit, | ||
| n: usize | ||
| ); | ||
| /// Sets data file concurrency and stores the value for create_reader. | ||
| #[no_mangle] | ||
| pub extern "C" fn iceberg_scan_with_data_file_concurrency_limit( | ||
| scan: &mut *mut IcebergScan, | ||
| n: usize, | ||
| ) -> CResult { | ||
| if scan.is_null() || (*scan).is_null() { | ||
| return CResult::Error; | ||
| } | ||
| let mut scan_val = *unsafe { Box::from_raw(*scan) }; | ||
| if scan_val.builder.is_none() { | ||
| *scan = Box::into_raw(Box::new(scan_val)); | ||
| return CResult::Error; | ||
| } | ||
| scan_val.builder = scan_val | ||
| .builder | ||
| .map(|b| b.with_data_file_concurrency_limit(n)); | ||
| scan_val.data_file_concurrency_limit = n; | ||
| *scan = Box::into_raw(Box::new(scan_val)); | ||
| CResult::Ok | ||
| } | ||
|
|
||
| impl_scan_builder_method!( | ||
| iceberg_scan_with_manifest_file_concurrency_limit, | ||
|
|
@@ -73,6 +118,16 @@ impl_with_serialization_concurrency_limit!( | |
| IcebergScan | ||
| ); | ||
|
|
||
| impl_with_prefetch_depth!( | ||
| iceberg_scan_with_prefetch_depth, | ||
| IcebergScan | ||
| ); | ||
|
|
||
| impl_with_task_prefetch_depth!( | ||
| iceberg_scan_with_task_prefetch_depth, | ||
| IcebergScan | ||
| ); | ||
|
|
||
| impl_scan_builder_method!( | ||
| iceberg_scan_with_snapshot_id, | ||
| IcebergScan, | ||
|
|
@@ -104,11 +159,17 @@ export_runtime_op!( | |
| serialization_concurrency | ||
| }; | ||
|
|
||
| Ok((scan_ref.as_ref().unwrap(), serialization_concurrency)) | ||
| let prefetch_depth = if scan_ptr.prefetch_depth == 0 { | ||
| crate::table::DEFAULT_PREFETCH_DEPTH | ||
| } else { | ||
| scan_ptr.prefetch_depth | ||
| }; | ||
|
|
||
| Ok((scan_ref.as_ref().unwrap(), serialization_concurrency, prefetch_depth)) | ||
| }, | ||
| result_tuple, | ||
| async { | ||
| let (scan_ref, serialization_concurrency) = result_tuple; | ||
| let (scan_ref, serialization_concurrency, prefetch_depth) = result_tuple; | ||
|
|
||
| let stream = scan_ref.to_arrow().await?; | ||
|
|
||
|
|
@@ -118,11 +179,189 @@ export_runtime_op!( | |
| serialization_concurrency | ||
| ); | ||
|
|
||
| Ok::<IcebergArrowStream, anyhow::Error>(IcebergArrowStream { | ||
| stream: AsyncMutex::new(serialized_stream), | ||
| }) | ||
| Ok::<IcebergArrowStream, anyhow::Error>(IcebergArrowStream::new( | ||
| serialized_stream, | ||
| prefetch_depth, | ||
| )) | ||
| }, | ||
| scan: *mut IcebergScan | ||
| ); | ||
|
|
||
| impl_scan_free!(iceberg_scan_free, IcebergScan); | ||
|
|
||
| // --------------------------------------------------------------------------- | ||
| // Split-scan API: plan_files → create_reader → next_task → read_task | ||
| // | ||
| // This separates scan planning from reading, allowing multiple consumers | ||
| // to concurrently pull tasks from a shared stream and read them with a | ||
| // shared ArrowReader (which caches loaded delete files via Arc). | ||
| // --------------------------------------------------------------------------- | ||
|
|
||
| // Async: plan which files to read. Returns a shared task stream. | ||
| // Multiple consumers can call next_task on the same stream concurrently. | ||
| export_runtime_op!( | ||
| iceberg_plan_files, | ||
| IcebergFileScanTaskStreamResponse, | ||
| || { | ||
| if scan.is_null() { | ||
| return Err(anyhow::anyhow!("Null scan pointer provided")); | ||
| } | ||
| let scan_ptr = unsafe { &*scan }; | ||
| let scan_ref = scan_ptr.scan.as_ref() | ||
| .ok_or_else(|| anyhow::anyhow!("Scan not built — call build! first"))?; | ||
|
|
||
| let task_prefetch_depth = if scan_ptr.task_prefetch_depth == 0 { | ||
| crate::table::DEFAULT_TASK_PREFETCH_DEPTH | ||
| } else { | ||
| scan_ptr.task_prefetch_depth | ||
| }; | ||
|
|
||
| Ok((scan_ref, task_prefetch_depth)) | ||
| }, | ||
| result_tuple, | ||
| async { | ||
| let (scan_ref, task_prefetch_depth) = result_tuple; | ||
| let stream = scan_ref.plan_files().await?; | ||
| Ok::<crate::IcebergFileScanTaskStream, anyhow::Error>( | ||
| crate::IcebergFileScanTaskStream::new(stream, task_prefetch_depth) | ||
| ) | ||
| }, | ||
| scan: *mut IcebergScan | ||
| ); | ||
|
|
||
| /// Sync: create a shared ArrowReader context from the scan's configuration. | ||
| /// | ||
| /// The returned context should be passed to every read_task call. | ||
| /// Cloning the inner ArrowReader is cheap and shares the delete-file cache. | ||
| /// | ||
| /// For full scans, row_group_filtering is enabled and row_selection is | ||
| /// disabled, matching the defaults in TableScan::to_arrow(). | ||
| /// `reader_concurrency`: data-file concurrency for the reader (0 = use scan default). | ||
| /// This overrides the scan-level `data_file_concurrency_limit` when set, | ||
| /// allowing the per-task reader to use a different parallelism than the scan planner. | ||
| #[no_mangle] | ||
| pub extern "C" fn iceberg_create_reader( | ||
| scan: *mut IcebergScan, | ||
| reader_concurrency: usize, | ||
| ) -> *mut crate::IcebergArrowReaderContext { | ||
| if scan.is_null() { | ||
| return std::ptr::null_mut(); | ||
| } | ||
| let scan_ptr = unsafe { &*scan }; | ||
|
|
||
| let Some(file_io) = scan_ptr.file_io.as_ref() else { | ||
| return std::ptr::null_mut(); | ||
| }; | ||
|
|
||
| let mut builder = ArrowReaderBuilder::new(file_io.clone()) | ||
| .with_row_group_filtering_enabled(true) | ||
| .with_row_selection_enabled(false); | ||
|
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Actually there's iceberg_create_incremental_reader |
||
|
|
||
| // reader_concurrency > 0 overrides the scan-level setting | ||
| let data_file_concurrency = if reader_concurrency > 0 { | ||
| reader_concurrency | ||
| } else { | ||
| scan_ptr.data_file_concurrency_limit | ||
| }; | ||
| if data_file_concurrency > 0 { | ||
| builder = builder.with_data_file_concurrency_limit(data_file_concurrency); | ||
| } | ||
| if scan_ptr.batch_size > 0 { | ||
| builder = builder.with_batch_size(scan_ptr.batch_size); | ||
| } | ||
|
|
||
| let serialization_concurrency = if scan_ptr.serialization_concurrency == 0 { | ||
| std::thread::available_parallelism() | ||
| .map(|n| n.get()) | ||
| .unwrap_or(1) | ||
| } else { | ||
| scan_ptr.serialization_concurrency | ||
| }; | ||
|
|
||
| let prefetch_depth = if scan_ptr.prefetch_depth == 0 { | ||
| crate::table::DEFAULT_PREFETCH_DEPTH | ||
| } else { | ||
| scan_ptr.prefetch_depth | ||
| }; | ||
|
|
||
| Box::into_raw(Box::new(crate::IcebergArrowReaderContext { | ||
| reader: builder.build(), | ||
| serialization_concurrency, | ||
| prefetch_depth, | ||
| })) | ||
| } | ||
|
|
||
| // Async: pull the next file scan task from the stream. | ||
| // Returns null payload when the stream is exhausted (end of planning). | ||
| // Safe to call concurrently from multiple consumers — the stream is | ||
| // behind an AsyncMutex. | ||
| export_runtime_op!( | ||
| iceberg_next_file_scan_task, | ||
| IcebergNextFileScanTaskResponse, | ||
| || { | ||
| if task_stream.is_null() { | ||
| return Err(anyhow::anyhow!("Null task stream pointer")); | ||
| } | ||
| let stream_ref = unsafe { &*task_stream }; | ||
| Ok(stream_ref) | ||
| }, | ||
| stream_ref, | ||
| async { | ||
| let result: Result<Option<crate::IcebergFileScanTask>, anyhow::Error> = match stream_ref.next().await? { | ||
| Some(task) => Ok(Some(crate::IcebergFileScanTask { task })), | ||
| None => Ok(None), | ||
| }; | ||
| result | ||
| }, | ||
| task_stream: *mut crate::IcebergFileScanTaskStream | ||
| ); | ||
|
|
||
| // Async: read a single file scan task into an Arrow stream. | ||
| // | ||
| // Clones the ArrowReader from the shared context — this shares the | ||
| // CachingDeleteFileLoader cache across all concurrent consumers. | ||
| // The task is wrapped in a one-element stream and fed to | ||
| // ArrowReader::read(). | ||
| export_runtime_op!( | ||
| iceberg_read_file_scan_task, | ||
| IcebergArrowStreamResponse, | ||
| || { | ||
| if reader_ctx.is_null() { | ||
| return Err(anyhow::anyhow!("Null reader context pointer")); | ||
| } | ||
| if task.is_null() { | ||
| return Err(anyhow::anyhow!("Null task pointer")); | ||
| } | ||
| let ctx = unsafe { &*reader_ctx }; | ||
| // Clone the reader — cheap, shares delete-file cache via Arc | ||
| let reader = ctx.reader.clone(); | ||
| let serialization_concurrency = ctx.serialization_concurrency; | ||
| let prefetch_depth = ctx.prefetch_depth; | ||
|
|
||
| // Consume the task — caller must NOT call free_task after this | ||
| let task_ref = unsafe { Box::from_raw(task) }; | ||
| let file_scan_task = task_ref.task; | ||
|
|
||
| Ok((reader, serialization_concurrency, prefetch_depth, file_scan_task)) | ||
| }, | ||
| result_tuple, | ||
| async { | ||
| let (reader, serialization_concurrency, prefetch_depth, file_scan_task) = result_tuple; | ||
|
|
||
| // Wrap single task in a one-element stream for ArrowReader::read() | ||
| let task_stream = stream::once(async { Ok(file_scan_task) }).boxed(); | ||
| let record_batch_stream = reader.read(task_stream)?; | ||
|
|
||
| let serialized_stream = crate::transform_stream_with_parallel_serialization( | ||
| record_batch_stream, | ||
| serialization_concurrency, | ||
| ); | ||
|
|
||
| Ok::<crate::IcebergArrowStream, anyhow::Error>(crate::IcebergArrowStream::new( | ||
| serialized_stream, | ||
| prefetch_depth, | ||
| )) | ||
| }, | ||
| reader_ctx: *mut crate::IcebergArrowReaderContext, | ||
| task: *mut crate::IcebergFileScanTask | ||
| ); | ||
Oops, something went wrong.
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Claude also mentioned this:
in iceberg-rust repo in s3.rs. I haven't noticed this helps though