@@ -63,6 +63,20 @@ pub(crate) fn validated_file_size(file_size_bytes: i64, file_path: &str) -> Data
6363 } )
6464}
6565
66+ /// Validate and convert record_count from i64 (as stored in DuckLake metadata) to u64.
67+ ///
68+ /// DuckLake stores record counts as signed integers in SQL. A negative value indicates
69+ /// corrupt or invalid metadata. Without this check, a negative record_count would cause
70+ /// incorrect behavior (e.g., empty ranges in full-file deletes, or incorrect row filtering).
71+ pub ( crate ) fn validated_record_count ( record_count : i64 , file_path : & str ) -> DataFusionResult < u64 > {
72+ u64:: try_from ( record_count) . map_err ( |_| {
73+ DataFusionError :: Execution ( format ! (
74+ "Invalid record_count ({}) for file '{}': value must be non-negative" ,
75+ record_count, file_path
76+ ) )
77+ } )
78+ }
79+
6680/// Returns the expected schema for DuckLake delete files
6781///
6882/// Delete files have a standard schema: (file_path: VARCHAR, pos: INT64)
@@ -731,4 +745,43 @@ mod tests {
731745 assert ! ( msg. contains( "bad.parquet" ) ) ;
732746 assert ! ( msg. contains( & i64 :: MIN . to_string( ) ) ) ;
733747 }
748+
749+ #[ test]
750+ fn test_validated_record_count_positive ( ) {
751+ assert_eq ! ( validated_record_count( 0 , "test.parquet" ) . unwrap( ) , 0 ) ;
752+ assert_eq ! ( validated_record_count( 100 , "test.parquet" ) . unwrap( ) , 100 ) ;
753+ assert_eq ! (
754+ validated_record_count( i64 :: MAX , "test.parquet" ) . unwrap( ) ,
755+ i64 :: MAX as u64
756+ ) ;
757+ }
758+
759+ #[ test]
760+ fn test_validated_record_count_negative ( ) {
761+ let err = validated_record_count ( -1 , "data/test.parquet" ) . unwrap_err ( ) ;
762+ let msg = err. to_string ( ) ;
763+ assert ! (
764+ msg. contains( "-1" ) ,
765+ "Error should contain the negative value: {}" ,
766+ msg
767+ ) ;
768+ assert ! (
769+ msg. contains( "data/test.parquet" ) ,
770+ "Error should contain the file path: {}" ,
771+ msg
772+ ) ;
773+ assert ! (
774+ msg. contains( "record_count" ) ,
775+ "Error should mention record_count: {}" ,
776+ msg
777+ ) ;
778+ }
779+
780+ #[ test]
781+ fn test_validated_record_count_large_negative ( ) {
782+ let err = validated_record_count ( i64:: MIN , "bad.parquet" ) . unwrap_err ( ) ;
783+ let msg = err. to_string ( ) ;
784+ assert ! ( msg. contains( "bad.parquet" ) ) ;
785+ assert ! ( msg. contains( & i64 :: MIN . to_string( ) ) ) ;
786+ }
734787}
0 commit comments