1- use std:: ffi:: { c_char, c_void} ;
1+ use std:: ffi:: { c_char, c_void, CStr } ;
22use std:: ptr;
33use std:: sync:: Arc ;
44
@@ -12,12 +12,39 @@ use lance_index::vector::hnsw::builder::HnswBuildParams;
1212use lance_index:: vector:: pq:: PQBuildParams ;
1313use lance_index:: { DatasetIndexExt , IndexType } ;
1414use lance_linalg:: distance:: DistanceType ;
15+ use serde:: { Deserialize , Serialize } ;
1516
1617use crate :: error:: { clear_last_error, set_last_error, ErrorCode } ;
1718use crate :: runtime;
1819
1920use super :: types:: { SchemaHandle , StreamHandle } ;
20- use super :: util:: { cstr_to_str, dataset_handle, FfiError , FfiResult } ;
21+ use super :: util:: { cstr_to_str, dataset_handle, to_c_string, FfiError , FfiResult } ;
22+
23+ #[ derive( Debug , Deserialize ) ]
24+ #[ serde( default , deny_unknown_fields) ]
25+ struct OptimizeIndexOptionsInput {
26+ mode : Option < String > ,
27+ retrain : Option < bool > ,
28+ num_indices_to_merge : Option < usize > ,
29+ }
30+
31+ impl Default for OptimizeIndexOptionsInput {
32+ fn default ( ) -> Self {
33+ Self {
34+ mode : None ,
35+ retrain : None ,
36+ num_indices_to_merge : None ,
37+ }
38+ }
39+ }
40+
41+ #[ derive( Debug , Serialize ) ]
42+ struct OptimizeIndexMetricsOutput {
43+ index_name : String ,
44+ mode : String ,
45+ retrain : bool ,
46+ num_indices_to_merge : Option < usize > ,
47+ }
2148
2249#[ no_mangle]
2350pub unsafe extern "C" fn lance_get_index_list_schema ( dataset : * mut c_void ) -> * mut c_void {
@@ -340,7 +367,13 @@ pub unsafe extern "C" fn lance_dataset_optimize_index(
340367 index_name : * const c_char ,
341368 retrain : u8 ,
342369) -> i32 {
343- match dataset_optimize_index_inner ( dataset, index_name, retrain) {
370+ match dataset_optimize_index_with_options_inner (
371+ dataset,
372+ index_name,
373+ ptr:: null ( ) ,
374+ ptr:: null_mut ( ) ,
375+ retrain != 0 ,
376+ ) {
344377 Ok ( ( ) ) => {
345378 clear_last_error ( ) ;
346379 0
@@ -352,10 +385,112 @@ pub unsafe extern "C" fn lance_dataset_optimize_index(
352385 }
353386}
354387
355- fn dataset_optimize_index_inner (
388+ #[ no_mangle]
389+ pub unsafe extern "C" fn lance_dataset_optimize_index_with_options (
356390 dataset : * mut c_void ,
357391 index_name : * const c_char ,
358- retrain : u8 ,
392+ options_json : * const c_char ,
393+ out_metrics_json : * mut * const c_char ,
394+ ) -> i32 {
395+ if !out_metrics_json. is_null ( ) {
396+ unsafe {
397+ ptr:: write_unaligned ( out_metrics_json, ptr:: null ( ) ) ;
398+ }
399+ }
400+ match dataset_optimize_index_with_options_inner (
401+ dataset,
402+ index_name,
403+ options_json,
404+ out_metrics_json,
405+ false ,
406+ ) {
407+ Ok ( ( ) ) => {
408+ clear_last_error ( ) ;
409+ 0
410+ }
411+ Err ( err) => {
412+ set_last_error ( err. code , err. message ) ;
413+ -1
414+ }
415+ }
416+ }
417+
418+ fn parse_optimize_index_options_json (
419+ options_json : * const c_char ,
420+ legacy_retrain : bool ,
421+ ) -> FfiResult < ( OptimizeOptions , String , bool , Option < usize > ) > {
422+ let input = if options_json. is_null ( ) {
423+ OptimizeIndexOptionsInput :: default ( )
424+ } else {
425+ let text = unsafe { CStr :: from_ptr ( options_json) }
426+ . to_str ( )
427+ . map_err ( |err| FfiError :: new ( ErrorCode :: Utf8 , format ! ( "options_json utf8: {err}" ) ) ) ?;
428+ if text. trim ( ) . is_empty ( ) {
429+ OptimizeIndexOptionsInput :: default ( )
430+ } else {
431+ serde_json:: from_str ( text) . map_err ( |err| {
432+ FfiError :: new (
433+ ErrorCode :: InvalidArgument ,
434+ format ! ( "optimize_index options_json parse: {err}" ) ,
435+ )
436+ } ) ?
437+ }
438+ } ;
439+
440+ let mode = if let Some ( mode) = input. mode . as_ref ( ) {
441+ mode. trim ( ) . to_ascii_lowercase ( )
442+ } else if input. retrain . unwrap_or ( false ) || legacy_retrain {
443+ String :: from ( "retrain" )
444+ } else {
445+ String :: from ( "append" )
446+ } ;
447+
448+ let ( options, retrain, num_indices_to_merge) = match mode. as_str ( ) {
449+ "append" => {
450+ if input. num_indices_to_merge . is_some ( ) {
451+ return Err ( FfiError :: new (
452+ ErrorCode :: InvalidArgument ,
453+ "num_indices_to_merge is only valid for mode='merge'" ,
454+ ) ) ;
455+ }
456+ ( OptimizeOptions :: append ( ) , false , Some ( 0 ) )
457+ }
458+ "merge" => {
459+ let num = input. num_indices_to_merge . unwrap_or ( 1 ) ;
460+ if num == 0 {
461+ return Err ( FfiError :: new (
462+ ErrorCode :: InvalidArgument ,
463+ "num_indices_to_merge must be > 0 for mode='merge'" ,
464+ ) ) ;
465+ }
466+ ( OptimizeOptions :: merge ( num) , false , Some ( num) )
467+ }
468+ "retrain" => {
469+ if input. num_indices_to_merge . is_some ( ) {
470+ return Err ( FfiError :: new (
471+ ErrorCode :: InvalidArgument ,
472+ "num_indices_to_merge is invalid for mode='retrain'" ,
473+ ) ) ;
474+ }
475+ ( OptimizeOptions :: retrain ( ) , true , None )
476+ }
477+ other => {
478+ return Err ( FfiError :: new (
479+ ErrorCode :: InvalidArgument ,
480+ format ! ( "unsupported optimize mode: {other}" ) ,
481+ ) )
482+ }
483+ } ;
484+
485+ Ok ( ( options, mode, retrain, num_indices_to_merge) )
486+ }
487+
488+ fn dataset_optimize_index_with_options_inner (
489+ dataset : * mut c_void ,
490+ index_name : * const c_char ,
491+ options_json : * const c_char ,
492+ out_metrics_json : * mut * const c_char ,
493+ legacy_retrain : bool ,
359494) -> FfiResult < ( ) > {
360495 let handle = unsafe { dataset_handle ( dataset) ? } ;
361496 let index_name = unsafe { cstr_to_str ( index_name, "index_name" ) ? } ;
@@ -365,25 +500,41 @@ fn dataset_optimize_index_inner(
365500 "index_name must be non-empty" ,
366501 ) ) ;
367502 }
503+ let index_name_owned = index_name. to_string ( ) ;
368504
369- let mut options = if retrain != 0 {
370- OptimizeOptions :: retrain ( )
371- } else {
372- OptimizeOptions :: append ( )
373- } ;
374- options = options. index_names ( vec ! [ index_name. to_string( ) ] ) ;
505+ let ( mut options, mode, retrain, num_indices_to_merge) =
506+ parse_optimize_index_options_json ( options_json, legacy_retrain) ?;
507+ options = options. index_names ( vec ! [ index_name_owned. clone( ) ] ) ;
375508
376509 let mut ds: Dataset = handle. dataset . as_ref ( ) . clone ( ) ;
377- run_with_large_stack ( move || {
510+ let metrics = run_with_large_stack ( move || {
378511 match runtime:: block_on ( async { ds. optimize_indices ( & options) . await } ) {
379- Ok ( Ok ( ( ) ) ) => Ok ( ( ) ) ,
512+ Ok ( Ok ( ( ) ) ) => Ok ( OptimizeIndexMetricsOutput {
513+ index_name : index_name_owned,
514+ mode,
515+ retrain,
516+ num_indices_to_merge,
517+ } ) ,
380518 Ok ( Err ( err) ) => Err ( FfiError :: new (
381519 ErrorCode :: DatasetOptimizeIndices ,
382520 format ! ( "dataset optimize_indices: {err}" ) ,
383521 ) ) ,
384522 Err ( err) => Err ( FfiError :: new ( ErrorCode :: Runtime , format ! ( "runtime: {err}" ) ) ) ,
385523 }
386- } ) ?
524+ } ) ??;
525+
526+ if !out_metrics_json. is_null ( ) {
527+ let payload = serde_json:: to_string ( & metrics) . map_err ( |err| {
528+ FfiError :: new (
529+ ErrorCode :: DatasetOptimizeIndices ,
530+ format ! ( "optimize_index metrics_json serialize: {err}" ) ,
531+ )
532+ } ) ?;
533+ unsafe {
534+ ptr:: write_unaligned ( out_metrics_json, to_c_string ( payload) . into_raw ( ) as * const c_char ) ;
535+ }
536+ }
537+ Ok ( ( ) )
387538}
388539
389540fn normalize_index_type ( index_type : & str ) -> String {
0 commit comments