@@ -44,6 +44,7 @@ use async_trait::async_trait;
4444use callisto:: sea_orm_active_enums:: ConvTypeEnum ;
4545use callisto:: { mega_blob, mega_mr, mega_tree, raw_blob} ;
4646use common:: errors:: MegaError ;
47+ use common:: model:: Pagination ;
4748use jupiter:: storage:: base_storage:: StorageConnector ;
4849use jupiter:: storage:: Storage ;
4950use jupiter:: utils:: converter:: generate_git_keep_with_timestamp;
@@ -52,11 +53,12 @@ use mercury::hash::SHA1;
5253use mercury:: internal:: object:: blob:: Blob ;
5354use mercury:: internal:: object:: commit:: Commit ;
5455use mercury:: internal:: object:: tree:: { Tree , TreeItem , TreeItemMode } ;
56+ use neptune:: model:: diff_model:: DiffItem ;
5557use neptune:: neptune_engine:: Diff ;
5658
5759use crate :: api_service:: ApiHandler ;
5860use crate :: model:: git:: CreateFileInfo ;
59- use crate :: model:: mr:: { MrDiff , MrDiffFile , MrPageInfo } ;
61+ use crate :: model:: mr:: MrDiffFile ;
6062
6163#[ derive( Clone ) ]
6264pub struct MonoApiService {
@@ -371,19 +373,42 @@ impl MonoApiService {
371373 Ok ( p_commit_id)
372374 }
373375
376+ pub async fn content_diff ( & self , mr_link : & str ) -> Result < Vec < DiffItem > , GitError > {
377+ let stg = self . storage . mr_storage ( ) ;
378+ let mr =
379+ stg. get_mr ( mr_link) . await . unwrap ( ) . ok_or_else ( || {
380+ GitError :: CustomError ( format ! ( "Merge request not found: {mr_link}" ) )
381+ } ) ?;
382+
383+ let old_blobs = self
384+ . get_commit_blobs ( & mr. from_hash )
385+ . await
386+ . map_err ( |e| GitError :: CustomError ( format ! ( "Failed to get old commit blobs: {e}" ) ) ) ?;
387+ let new_blobs = self
388+ . get_commit_blobs ( & mr. to_hash )
389+ . await
390+ . map_err ( |e| GitError :: CustomError ( format ! ( "Failed to get new commit blobs: {e}" ) ) ) ?;
391+
392+ let diff_output = self . get_diff_by_blobs ( old_blobs, new_blobs) . await ?;
393+
394+ Ok ( diff_output)
395+ }
396+
374397 /// Fetches the content difference for a merge request, paginated by page_id and page_size.
375398 /// # Arguments
376399 /// * `mr_link` - The link to the merge request.
377400 /// * `page_id` - The page number to fetch. (id out of bounds will return empty)
378401 /// * `page_size` - The number of items per page.
379402 /// # Returns
380403 /// a `Result` containing `MrDiff` on success or a `GitError` on failure.
381- pub async fn content_diff (
404+ pub async fn paged_content_diff (
382405 & self ,
383406 mr_link : & str ,
384- page_id : usize ,
385- page_size : usize ,
386- ) -> Result < MrDiff , GitError > {
407+ page : Pagination ,
408+ ) -> Result < ( Vec < DiffItem > , u64 ) , GitError > {
409+ let per_page = page. per_page as usize ;
410+ let page_id = page. page as usize ;
411+
387412 // old and new blobs for comparison
388413 let stg = self . storage . mr_storage ( ) ;
389414 let mr =
@@ -405,8 +430,8 @@ impl MonoApiService {
405430 . await ?;
406431
407432 // ensure page_id is within bounds
408- let start = ( page_id. saturating_sub ( 1 ) ) * page_size ;
409- let end = ( start + page_size ) . min ( sorted_changed_files. len ( ) ) ;
433+ let start = ( page_id. saturating_sub ( 1 ) ) * per_page ;
434+ let end = ( start + per_page ) . min ( sorted_changed_files. len ( ) ) ;
410435
411436 let page_slice: & [ MrDiffFile ] = if start < sorted_changed_files. len ( ) {
412437 let start_idx = start;
@@ -421,6 +446,23 @@ impl MonoApiService {
421446 let mut page_new_blobs = Vec :: new ( ) ;
422447 self . collect_page_blobs ( page_slice, & mut page_old_blobs, & mut page_new_blobs) ;
423448
449+ // get diff output
450+ let diff_output = self
451+ . get_diff_by_blobs ( page_old_blobs, page_new_blobs)
452+ . await
453+ . map_err ( |e| GitError :: CustomError ( format ! ( "Failed to get diff output: {e}" ) ) ) ?;
454+
455+ // calculate total pages
456+ let total = sorted_changed_files. len ( ) . div_ceil ( per_page) ;
457+
458+ Ok ( ( diff_output, total as u64 ) )
459+ }
460+
461+ async fn get_diff_by_blobs (
462+ & self ,
463+ old_blobs : Vec < ( PathBuf , SHA1 ) > ,
464+ new_blobs : Vec < ( PathBuf , SHA1 ) > ,
465+ ) -> Result < Vec < DiffItem > , GitError > {
424466 let mut blob_cache: HashMap < SHA1 , Vec < u8 > > = HashMap :: new ( ) ;
425467
426468 // Collect all unique hashes
@@ -432,41 +474,55 @@ impl MonoApiService {
432474 all_hashes. insert ( * hash) ;
433475 }
434476
435- // Fetch all blobs concurrently
477+ // Fetch all blobs with better error handling and logging
478+ let mut failed_hashes = Vec :: new ( ) ;
436479 for hash in all_hashes {
437480 match self . get_raw_blob_by_hash ( & hash. to_string ( ) ) . await {
438481 Ok ( Some ( blob) ) => {
439482 blob_cache. insert ( hash, blob. data . unwrap_or_default ( ) ) ;
440483 }
441- _ => {
484+ Ok ( None ) => {
485+ tracing:: warn!( "Blob not found for hash: {}" , hash) ;
486+ blob_cache. insert ( hash, Vec :: new ( ) ) ;
487+ }
488+ Err ( e) => {
489+ tracing:: error!( "Failed to fetch blob {}: {}" , hash, e) ;
490+ failed_hashes. push ( hash) ;
442491 blob_cache. insert ( hash, Vec :: new ( ) ) ;
443492 }
444493 }
445494 }
446495
447- // Simple synchronous closure that uses the pre-fetched cache
448- let read_content = |_file : & PathBuf , hash : & SHA1 | -> Vec < u8 > {
449- blob_cache. get ( hash) . cloned ( ) . unwrap_or_default ( )
496+ if !failed_hashes. is_empty ( ) {
497+ tracing:: warn!(
498+ "Failed to fetch {} blob(s): {:?}" ,
499+ failed_hashes. len( ) ,
500+ failed_hashes
501+ ) ;
502+ }
503+
504+ // Enhanced content reader with better error handling
505+ let read_content = |file : & PathBuf , hash : & SHA1 | -> Vec < u8 > {
506+ match blob_cache. get ( hash) {
507+ Some ( content) => content. clone ( ) ,
508+ None => {
509+ tracing:: warn!( "Missing blob content for file: {:?}, hash: {}" , file, hash) ;
510+ Vec :: new ( )
511+ }
512+ }
450513 } ;
451514
452- // Use the unified diff function that returns a single string
515+ // Use the unified diff function with configurable algorithm
453516 let diff_output = Diff :: diff (
454- page_old_blobs ,
455- page_new_blobs ,
517+ old_blobs ,
518+ new_blobs ,
456519 "histogram" . to_string ( ) ,
457520 Vec :: new ( ) ,
458521 read_content,
459522 )
460523 . await ;
461524
462- Ok ( MrDiff {
463- data : diff_output,
464- page_info : Some ( MrPageInfo {
465- total_pages : ( sorted_changed_files. len ( ) - 1 ) . div_ceil ( page_size) ,
466- current_page : page_id,
467- page_size,
468- } ) ,
469- } )
525+ Ok ( diff_output)
470526 }
471527
472528 fn collect_page_blobs (
@@ -573,7 +629,7 @@ impl MonoApiService {
573629#[ cfg( test) ]
574630mod test {
575631 use super :: * ;
576- use crate :: model:: mr:: { MrDiffFile , MrPageInfo } ;
632+ use crate :: model:: mr:: MrDiffFile ;
577633 use mercury:: hash:: SHA1 ;
578634 use std:: path:: PathBuf ;
579635 use std:: str:: FromStr ;
@@ -798,20 +854,18 @@ mod test {
798854 }
799855
800856 #[ test]
801- fn test_paging_page_info_construction ( ) {
857+ fn test_paging_algorithm ( ) {
802858 let total_files = 10usize ;
803859 let current_page = 2u32 ;
804860 let page_size = 3u32 ;
805861
806- let page_info = MrPageInfo {
807- total_pages : ( total_files + page_size as usize - 1 ) / page_size as usize ,
808- current_page : current_page as usize ,
809- page_size : page_size as usize ,
810- } ;
862+ let total_pages = ( total_files + page_size as usize - 1 ) / page_size as usize ;
863+ let current_page = current_page as usize ;
864+ let page_size = page_size as usize ;
811865
812- assert_eq ! ( page_info . total_pages, 4 ) ;
813- assert_eq ! ( page_info . current_page, 2 ) ;
814- assert_eq ! ( page_info . page_size, 3 ) ;
866+ assert_eq ! ( total_pages, 4 ) ;
867+ assert_eq ! ( current_page, 2 ) ;
868+ assert_eq ! ( page_size, 3 ) ;
815869 }
816870
817871 #[ test]
@@ -914,4 +968,100 @@ mod test {
914968 assert_eq ! ( new_blobs[ 0 ] . 0 , PathBuf :: from( "new.txt" ) ) ;
915969 assert_eq ! ( new_blobs[ 1 ] . 0 , PathBuf :: from( "modified.txt" ) ) ;
916970 }
971+
972+ #[ tokio:: test]
973+ async fn test_content_diff_functionality ( ) {
974+ use mercury:: internal:: object:: blob:: Blob ;
975+ use std:: collections:: HashMap ;
976+
977+ // Test basic diff generation with sample data
978+ let old_content = "Hello World\n Line 2\n Line 3" ;
979+ let new_content = "Hello Universe\n Line 2\n Line 3 modified" ;
980+
981+ let old_blob = Blob :: from_content ( old_content) ;
982+ let new_blob = Blob :: from_content ( new_content) ;
983+
984+ let old_blobs = vec ! [ ( PathBuf :: from( "test_file.txt" ) , old_blob. id) ] ;
985+ let new_blobs = vec ! [ ( PathBuf :: from( "test_file.txt" ) , new_blob. id) ] ;
986+
987+ // Create a blob cache for the test
988+ let mut blob_cache: HashMap < SHA1 , Vec < u8 > > = HashMap :: new ( ) ;
989+ blob_cache. insert ( old_blob. id , old_content. as_bytes ( ) . to_vec ( ) ) ;
990+ blob_cache. insert ( new_blob. id , new_content. as_bytes ( ) . to_vec ( ) ) ;
991+
992+ // Test the diff engine directly
993+ let read_content = |_file : & PathBuf , hash : & SHA1 | -> Vec < u8 > {
994+ blob_cache. get ( hash) . cloned ( ) . unwrap_or_default ( )
995+ } ;
996+
997+ let diff_output = Diff :: diff (
998+ old_blobs,
999+ new_blobs,
1000+ "histogram" . to_string ( ) ,
1001+ Vec :: new ( ) ,
1002+ read_content,
1003+ )
1004+ . await ;
1005+
1006+ // Verify diff output contains expected content
1007+ assert ! ( !diff_output. is_empty( ) , "Diff output should not be empty" ) ;
1008+ assert_eq ! ( diff_output. len( ) , 1 , "Should have diff for one file" ) ;
1009+
1010+ let diff_item = & diff_output[ 0 ] ;
1011+ assert_eq ! ( diff_item. path, "test_file.txt" ) ;
1012+ assert ! (
1013+ diff_item. data. contains( "diff --git" ) ,
1014+ "Should contain git diff header"
1015+ ) ;
1016+ assert ! (
1017+ diff_item. data. contains( "-Hello World" ) ,
1018+ "Should show removed line"
1019+ ) ;
1020+ assert ! (
1021+ diff_item. data. contains( "+Hello Universe" ) ,
1022+ "Should show added line"
1023+ ) ;
1024+ assert ! ( diff_item. data. contains( "-Line 3" ) , "Should show old line 3" ) ;
1025+ assert ! (
1026+ diff_item. data. contains( "+Line 3 modified" ) ,
1027+ "Should show new line 3"
1028+ ) ;
1029+ }
1030+
1031+ #[ tokio:: test]
1032+ async fn test_get_diff_by_blobs_with_empty_content ( ) {
1033+ // Test diff generation with empty content (simulating missing blobs)
1034+ let old_hash = SHA1 :: from_str ( "1234567890123456789012345678901234567890" ) . unwrap ( ) ;
1035+ let new_hash = SHA1 :: from_str ( "abcdefabcdefabcdefabcdefabcdefabcdefabcd" ) . unwrap ( ) ;
1036+
1037+ let old_blobs = vec ! [ ( PathBuf :: from( "empty_file.txt" ) , old_hash) ] ;
1038+ let new_blobs = vec ! [ ( PathBuf :: from( "empty_file.txt" ) , new_hash) ] ;
1039+
1040+ // Create empty blob cache to simulate missing blobs
1041+ let blob_cache: HashMap < SHA1 , Vec < u8 > > = HashMap :: new ( ) ;
1042+
1043+ let read_content = |_file : & PathBuf , hash : & SHA1 | -> Vec < u8 > {
1044+ blob_cache. get ( hash) . cloned ( ) . unwrap_or_default ( )
1045+ } ;
1046+
1047+ // Test the diff engine with empty content
1048+ let diff_output = Diff :: diff (
1049+ old_blobs,
1050+ new_blobs,
1051+ "histogram" . to_string ( ) ,
1052+ Vec :: new ( ) ,
1053+ read_content,
1054+ )
1055+ . await ;
1056+
1057+ assert ! (
1058+ !diff_output. is_empty( ) ,
1059+ "Should generate diff even with empty blobs"
1060+ ) ;
1061+ assert_eq ! ( diff_output[ 0 ] . path, "empty_file.txt" ) ;
1062+ assert ! (
1063+ diff_output[ 0 ] . data. contains( "diff --git" ) ,
1064+ "Should contain git diff header"
1065+ ) ;
1066+ }
9171067}
0 commit comments