@@ -2572,3 +2572,175 @@ fn test_scanner_nearest_null_safety() {
25722572 unsafe { lance_scanner_close ( scanner) } ;
25732573 unsafe { lance_dataset_close ( ds) } ;
25742574}
2575+
2576+ #[ test]
2577+ fn test_scanner_full_text_search ( ) {
2578+ let ( _tmp, uri) = create_test_dataset ( ) ;
2579+ let uri_c = c_str ( & uri) ;
2580+ let ds = unsafe { lance_dataset_open ( uri_c. as_ptr ( ) , ptr:: null ( ) , 0 ) } ;
2581+ let column = c_str ( "name" ) ;
2582+ // Build inverted index on `name` first.
2583+ let inverted_params = c_str ( r#"{"base_tokenizer":"simple","language":"English"}"# ) ;
2584+ unsafe {
2585+ lance_dataset_create_scalar_index (
2586+ ds,
2587+ column. as_ptr ( ) ,
2588+ ptr:: null ( ) ,
2589+ LanceScalarIndexType :: Inverted as i32 ,
2590+ inverted_params. as_ptr ( ) ,
2591+ false ,
2592+ ) ;
2593+ }
2594+ let scanner = unsafe { lance_scanner_new ( ds, ptr:: null ( ) , ptr:: null ( ) ) } ;
2595+ let q = c_str ( "alice" ) ;
2596+ let cols = [ column. as_ptr ( ) , ptr:: null ( ) ] ;
2597+ let rc = unsafe { lance_scanner_full_text_search ( scanner, q. as_ptr ( ) , cols. as_ptr ( ) , 0 ) } ;
2598+ assert_eq ! ( rc, 0 , "{}" , unsafe {
2599+ std:: ffi:: CStr :: from_ptr( lance_last_error_message( ) ) . to_string_lossy( )
2600+ } ) ;
2601+
2602+ let mut stream = FFI_ArrowArrayStream :: empty ( ) ;
2603+ assert_eq ! (
2604+ unsafe { lance_scanner_to_arrow_stream( scanner, & mut stream as * mut _) } ,
2605+ 0
2606+ ) ;
2607+ let reader = unsafe { ArrowArrayStreamReader :: from_raw ( & mut stream as * mut _ ) . unwrap ( ) } ;
2608+ let schema = reader. schema ( ) ;
2609+ assert ! (
2610+ schema. field_with_name( "_score" ) . is_ok( ) ,
2611+ "_score column missing from schema"
2612+ ) ;
2613+ let mut total = 0 ;
2614+ for b in reader {
2615+ total += b. unwrap ( ) . num_rows ( ) ;
2616+ }
2617+ assert ! ( total >= 1 , "expected at least 1 hit for 'alice'" ) ;
2618+ unsafe { lance_scanner_close ( scanner) } ;
2619+ unsafe { lance_dataset_close ( ds) } ;
2620+ }
2621+
2622+ #[ test]
2623+ fn test_fts_fuzzy ( ) {
2624+ let ( _tmp, uri) = create_test_dataset ( ) ;
2625+ let uri_c = c_str ( & uri) ;
2626+ let ds = unsafe { lance_dataset_open ( uri_c. as_ptr ( ) , ptr:: null ( ) , 0 ) } ;
2627+ let column = c_str ( "name" ) ;
2628+ let inverted_params = c_str ( r#"{"base_tokenizer":"simple","language":"English"}"# ) ;
2629+ unsafe {
2630+ lance_dataset_create_scalar_index (
2631+ ds,
2632+ column. as_ptr ( ) ,
2633+ ptr:: null ( ) ,
2634+ LanceScalarIndexType :: Inverted as i32 ,
2635+ inverted_params. as_ptr ( ) ,
2636+ false ,
2637+ ) ;
2638+ }
2639+ let scanner = unsafe { lance_scanner_new ( ds, ptr:: null ( ) , ptr:: null ( ) ) } ;
2640+ // "alise" within edit distance 2 of "alice" (in the test fixture).
2641+ let q = c_str ( "alise" ) ;
2642+ let cols = [ column. as_ptr ( ) , ptr:: null ( ) ] ;
2643+ let rc = unsafe { lance_scanner_full_text_search ( scanner, q. as_ptr ( ) , cols. as_ptr ( ) , 2 ) } ;
2644+ assert_eq ! ( rc, 0 , "{}" , unsafe {
2645+ std:: ffi:: CStr :: from_ptr( lance_last_error_message( ) ) . to_string_lossy( )
2646+ } ) ;
2647+
2648+ let mut stream = FFI_ArrowArrayStream :: empty ( ) ;
2649+ assert_eq ! (
2650+ unsafe { lance_scanner_to_arrow_stream( scanner, & mut stream as * mut _) } ,
2651+ 0
2652+ ) ;
2653+ let reader = unsafe { ArrowArrayStreamReader :: from_raw ( & mut stream as * mut _ ) . unwrap ( ) } ;
2654+ let mut total = 0 ;
2655+ for b in reader {
2656+ total += b. unwrap ( ) . num_rows ( ) ;
2657+ }
2658+ assert ! ( total >= 1 , "expected fuzzy match for 'alise' → 'alice'" ) ;
2659+
2660+ unsafe { lance_scanner_close ( scanner) } ;
2661+ unsafe { lance_dataset_close ( ds) } ;
2662+ }
2663+
2664+ #[ test]
2665+ fn test_nearest_after_fts_is_rejected ( ) {
2666+ let ( _tmp, uri) = create_vector_dataset ( 64 , 8 ) ;
2667+ let uri_c = c_str ( & uri) ;
2668+ let ds = unsafe { lance_dataset_open ( uri_c. as_ptr ( ) , ptr:: null ( ) , 0 ) } ;
2669+ let scanner = unsafe { lance_scanner_new ( ds, ptr:: null ( ) , ptr:: null ( ) ) } ;
2670+
2671+ // Set FTS first (no inverted index needed for this test — error happens
2672+ // at the second call, before any stream materialization).
2673+ let q = c_str ( "foo" ) ;
2674+ unsafe {
2675+ lance_scanner_full_text_search ( scanner, q. as_ptr ( ) , ptr:: null ( ) , 0 ) ;
2676+ }
2677+
2678+ let column = c_str ( "embedding" ) ;
2679+ let query: Vec < f32 > = vec ! [ 0.5 ; 8 ] ;
2680+ let rc = unsafe {
2681+ lance_scanner_nearest (
2682+ scanner,
2683+ column. as_ptr ( ) ,
2684+ query. as_ptr ( ) as * const std:: ffi:: c_void ,
2685+ 8 ,
2686+ LanceDataType :: Float32 as i32 ,
2687+ 5 ,
2688+ )
2689+ } ;
2690+ assert_eq ! ( rc, -1 ) ;
2691+ let msg = unsafe {
2692+ std:: ffi:: CStr :: from_ptr ( lance_last_error_message ( ) )
2693+ . to_string_lossy ( )
2694+ . into_owned ( )
2695+ } ;
2696+ let lower = msg. to_lowercase ( ) ;
2697+ assert ! (
2698+ lower. contains( "full_text" )
2699+ || lower. contains( "fts" )
2700+ || lower. contains( "mutually exclusive" ) ,
2701+ "msg was: {}" ,
2702+ msg
2703+ ) ;
2704+
2705+ unsafe { lance_scanner_close ( scanner) } ;
2706+ unsafe { lance_dataset_close ( ds) } ;
2707+ }
2708+
2709+ #[ test]
2710+ fn test_fts_after_nearest_is_rejected ( ) {
2711+ let ( _tmp, uri) = create_vector_dataset ( 64 , 8 ) ;
2712+ let uri_c = c_str ( & uri) ;
2713+ let ds = unsafe { lance_dataset_open ( uri_c. as_ptr ( ) , ptr:: null ( ) , 0 ) } ;
2714+ let scanner = unsafe { lance_scanner_new ( ds, ptr:: null ( ) , ptr:: null ( ) ) } ;
2715+ let column = c_str ( "embedding" ) ;
2716+ let query: Vec < f32 > = vec ! [ 0.5 ; 8 ] ;
2717+ unsafe {
2718+ lance_scanner_nearest (
2719+ scanner,
2720+ column. as_ptr ( ) ,
2721+ query. as_ptr ( ) as * const std:: ffi:: c_void ,
2722+ 8 ,
2723+ LanceDataType :: Float32 as i32 ,
2724+ 5 ,
2725+ ) ;
2726+ }
2727+ let q = c_str ( "foo" ) ;
2728+ let rc = unsafe { lance_scanner_full_text_search ( scanner, q. as_ptr ( ) , ptr:: null ( ) , 0 ) } ;
2729+ assert_eq ! ( rc, -1 ) ;
2730+ let msg = unsafe {
2731+ std:: ffi:: CStr :: from_ptr ( lance_last_error_message ( ) )
2732+ . to_string_lossy ( )
2733+ . into_owned ( )
2734+ } ;
2735+ let lower = msg. to_lowercase ( ) ;
2736+ assert ! (
2737+ lower. contains( "nearest" )
2738+ || lower. contains( "vector" )
2739+ || lower. contains( "mutually exclusive" ) ,
2740+ "msg was: {}" ,
2741+ msg
2742+ ) ;
2743+
2744+ unsafe { lance_scanner_close ( scanner) } ;
2745+ unsafe { lance_dataset_close ( ds) } ;
2746+ }
0 commit comments