Skip to content

Commit 6bfd2f0

Browse files
authored
[mono]UPDATE: Alter to CommonPage and Restored content_diff for debug… (#1335)
* [mono]UPDATE: Alter to CommonPage and Restored content_diff for debugging Signed-off-by: AidCheng <cn.aiden.cheng@gmail.com> * [mono]:UPDATE: removed unused service in unit test Signed-off-by: AidCheng <cn.aiden.cheng@gmail.com> * [mono]:CLIPPY FIX Signed-off-by: AidCheng <cn.aiden.cheng@gmail.com> * FIX: delete comments, add docs, use to_string_lossy to replace unwarp Signed-off-by: AidCheng <cn.aiden.cheng@gmail.com> --------- Signed-off-by: AidCheng <cn.aiden.cheng@gmail.com>
1 parent 02607e5 commit 6bfd2f0

File tree

12 files changed

+292
-100
lines changed

12 files changed

+292
-100
lines changed

ceres/src/api_service/mono_api_service.rs

Lines changed: 183 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@ use async_trait::async_trait;
4444
use callisto::sea_orm_active_enums::ConvTypeEnum;
4545
use callisto::{mega_blob, mega_mr, mega_tree, raw_blob};
4646
use common::errors::MegaError;
47+
use common::model::Pagination;
4748
use jupiter::storage::base_storage::StorageConnector;
4849
use jupiter::storage::Storage;
4950
use jupiter::utils::converter::generate_git_keep_with_timestamp;
@@ -52,11 +53,12 @@ use mercury::hash::SHA1;
5253
use mercury::internal::object::blob::Blob;
5354
use mercury::internal::object::commit::Commit;
5455
use mercury::internal::object::tree::{Tree, TreeItem, TreeItemMode};
56+
use neptune::model::diff_model::DiffItem;
5557
use neptune::neptune_engine::Diff;
5658

5759
use crate::api_service::ApiHandler;
5860
use crate::model::git::CreateFileInfo;
59-
use crate::model::mr::{MrDiff, MrDiffFile, MrPageInfo};
61+
use crate::model::mr::MrDiffFile;
6062

6163
#[derive(Clone)]
6264
pub struct MonoApiService {
@@ -371,19 +373,42 @@ impl MonoApiService {
371373
Ok(p_commit_id)
372374
}
373375

376+
pub async fn content_diff(&self, mr_link: &str) -> Result<Vec<DiffItem>, GitError> {
377+
let stg = self.storage.mr_storage();
378+
let mr =
379+
stg.get_mr(mr_link).await.unwrap().ok_or_else(|| {
380+
GitError::CustomError(format!("Merge request not found: {mr_link}"))
381+
})?;
382+
383+
let old_blobs = self
384+
.get_commit_blobs(&mr.from_hash)
385+
.await
386+
.map_err(|e| GitError::CustomError(format!("Failed to get old commit blobs: {e}")))?;
387+
let new_blobs = self
388+
.get_commit_blobs(&mr.to_hash)
389+
.await
390+
.map_err(|e| GitError::CustomError(format!("Failed to get new commit blobs: {e}")))?;
391+
392+
let diff_output = self.get_diff_by_blobs(old_blobs, new_blobs).await?;
393+
394+
Ok(diff_output)
395+
}
396+
374397
/// Fetches the content difference for a merge request, paginated by page_id and page_size.
375398
/// # Arguments
376399
/// * `mr_link` - The link to the merge request.
377400
/// * `page_id` - The page number to fetch. (id out of bounds will return empty)
378401
/// * `page_size` - The number of items per page.
379402
/// # Returns
380403
/// a `Result` containing `MrDiff` on success or a `GitError` on failure.
381-
pub async fn content_diff(
404+
pub async fn paged_content_diff(
382405
&self,
383406
mr_link: &str,
384-
page_id: usize,
385-
page_size: usize,
386-
) -> Result<MrDiff, GitError> {
407+
page: Pagination,
408+
) -> Result<(Vec<DiffItem>, u64), GitError> {
409+
let per_page = page.per_page as usize;
410+
let page_id = page.page as usize;
411+
387412
// old and new blobs for comparison
388413
let stg = self.storage.mr_storage();
389414
let mr =
@@ -405,8 +430,8 @@ impl MonoApiService {
405430
.await?;
406431

407432
// ensure page_id is within bounds
408-
let start = (page_id.saturating_sub(1)) * page_size;
409-
let end = (start + page_size).min(sorted_changed_files.len());
433+
let start = (page_id.saturating_sub(1)) * per_page;
434+
let end = (start + per_page).min(sorted_changed_files.len());
410435

411436
let page_slice: &[MrDiffFile] = if start < sorted_changed_files.len() {
412437
let start_idx = start;
@@ -421,6 +446,23 @@ impl MonoApiService {
421446
let mut page_new_blobs = Vec::new();
422447
self.collect_page_blobs(page_slice, &mut page_old_blobs, &mut page_new_blobs);
423448

449+
// get diff output
450+
let diff_output = self
451+
.get_diff_by_blobs(page_old_blobs, page_new_blobs)
452+
.await
453+
.map_err(|e| GitError::CustomError(format!("Failed to get diff output: {e}")))?;
454+
455+
// calculate total pages
456+
let total = sorted_changed_files.len().div_ceil(per_page);
457+
458+
Ok((diff_output, total as u64))
459+
}
460+
461+
async fn get_diff_by_blobs(
462+
&self,
463+
old_blobs: Vec<(PathBuf, SHA1)>,
464+
new_blobs: Vec<(PathBuf, SHA1)>,
465+
) -> Result<Vec<DiffItem>, GitError> {
424466
let mut blob_cache: HashMap<SHA1, Vec<u8>> = HashMap::new();
425467

426468
// Collect all unique hashes
@@ -432,41 +474,55 @@ impl MonoApiService {
432474
all_hashes.insert(*hash);
433475
}
434476

435-
// Fetch all blobs concurrently
477+
// Fetch all blobs with better error handling and logging
478+
let mut failed_hashes = Vec::new();
436479
for hash in all_hashes {
437480
match self.get_raw_blob_by_hash(&hash.to_string()).await {
438481
Ok(Some(blob)) => {
439482
blob_cache.insert(hash, blob.data.unwrap_or_default());
440483
}
441-
_ => {
484+
Ok(None) => {
485+
tracing::warn!("Blob not found for hash: {}", hash);
486+
blob_cache.insert(hash, Vec::new());
487+
}
488+
Err(e) => {
489+
tracing::error!("Failed to fetch blob {}: {}", hash, e);
490+
failed_hashes.push(hash);
442491
blob_cache.insert(hash, Vec::new());
443492
}
444493
}
445494
}
446495

447-
// Simple synchronous closure that uses the pre-fetched cache
448-
let read_content = |_file: &PathBuf, hash: &SHA1| -> Vec<u8> {
449-
blob_cache.get(hash).cloned().unwrap_or_default()
496+
if !failed_hashes.is_empty() {
497+
tracing::warn!(
498+
"Failed to fetch {} blob(s): {:?}",
499+
failed_hashes.len(),
500+
failed_hashes
501+
);
502+
}
503+
504+
// Enhanced content reader with better error handling
505+
let read_content = |file: &PathBuf, hash: &SHA1| -> Vec<u8> {
506+
match blob_cache.get(hash) {
507+
Some(content) => content.clone(),
508+
None => {
509+
tracing::warn!("Missing blob content for file: {:?}, hash: {}", file, hash);
510+
Vec::new()
511+
}
512+
}
450513
};
451514

452-
// Use the unified diff function that returns a single string
515+
// Use the unified diff function with configurable algorithm
453516
let diff_output = Diff::diff(
454-
page_old_blobs,
455-
page_new_blobs,
517+
old_blobs,
518+
new_blobs,
456519
"histogram".to_string(),
457520
Vec::new(),
458521
read_content,
459522
)
460523
.await;
461524

462-
Ok(MrDiff {
463-
data: diff_output,
464-
page_info: Some(MrPageInfo {
465-
total_pages: (sorted_changed_files.len() - 1).div_ceil(page_size),
466-
current_page: page_id,
467-
page_size,
468-
}),
469-
})
525+
Ok(diff_output)
470526
}
471527

472528
fn collect_page_blobs(
@@ -573,7 +629,7 @@ impl MonoApiService {
573629
#[cfg(test)]
574630
mod test {
575631
use super::*;
576-
use crate::model::mr::{MrDiffFile, MrPageInfo};
632+
use crate::model::mr::MrDiffFile;
577633
use mercury::hash::SHA1;
578634
use std::path::PathBuf;
579635
use std::str::FromStr;
@@ -798,20 +854,18 @@ mod test {
798854
}
799855

800856
#[test]
801-
fn test_paging_page_info_construction() {
857+
fn test_paging_algorithm() {
802858
let total_files = 10usize;
803859
let current_page = 2u32;
804860
let page_size = 3u32;
805861

806-
let page_info = MrPageInfo {
807-
total_pages: (total_files + page_size as usize - 1) / page_size as usize,
808-
current_page: current_page as usize,
809-
page_size: page_size as usize,
810-
};
862+
let total_pages = (total_files + page_size as usize - 1) / page_size as usize;
863+
let current_page = current_page as usize;
864+
let page_size = page_size as usize;
811865

812-
assert_eq!(page_info.total_pages, 4);
813-
assert_eq!(page_info.current_page, 2);
814-
assert_eq!(page_info.page_size, 3);
866+
assert_eq!(total_pages, 4);
867+
assert_eq!(current_page, 2);
868+
assert_eq!(page_size, 3);
815869
}
816870

817871
#[test]
@@ -914,4 +968,100 @@ mod test {
914968
assert_eq!(new_blobs[0].0, PathBuf::from("new.txt"));
915969
assert_eq!(new_blobs[1].0, PathBuf::from("modified.txt"));
916970
}
971+
972+
#[tokio::test]
973+
async fn test_content_diff_functionality() {
974+
use mercury::internal::object::blob::Blob;
975+
use std::collections::HashMap;
976+
977+
// Test basic diff generation with sample data
978+
let old_content = "Hello World\nLine 2\nLine 3";
979+
let new_content = "Hello Universe\nLine 2\nLine 3 modified";
980+
981+
let old_blob = Blob::from_content(old_content);
982+
let new_blob = Blob::from_content(new_content);
983+
984+
let old_blobs = vec![(PathBuf::from("test_file.txt"), old_blob.id)];
985+
let new_blobs = vec![(PathBuf::from("test_file.txt"), new_blob.id)];
986+
987+
// Create a blob cache for the test
988+
let mut blob_cache: HashMap<SHA1, Vec<u8>> = HashMap::new();
989+
blob_cache.insert(old_blob.id, old_content.as_bytes().to_vec());
990+
blob_cache.insert(new_blob.id, new_content.as_bytes().to_vec());
991+
992+
// Test the diff engine directly
993+
let read_content = |_file: &PathBuf, hash: &SHA1| -> Vec<u8> {
994+
blob_cache.get(hash).cloned().unwrap_or_default()
995+
};
996+
997+
let diff_output = Diff::diff(
998+
old_blobs,
999+
new_blobs,
1000+
"histogram".to_string(),
1001+
Vec::new(),
1002+
read_content,
1003+
)
1004+
.await;
1005+
1006+
// Verify diff output contains expected content
1007+
assert!(!diff_output.is_empty(), "Diff output should not be empty");
1008+
assert_eq!(diff_output.len(), 1, "Should have diff for one file");
1009+
1010+
let diff_item = &diff_output[0];
1011+
assert_eq!(diff_item.path, "test_file.txt");
1012+
assert!(
1013+
diff_item.data.contains("diff --git"),
1014+
"Should contain git diff header"
1015+
);
1016+
assert!(
1017+
diff_item.data.contains("-Hello World"),
1018+
"Should show removed line"
1019+
);
1020+
assert!(
1021+
diff_item.data.contains("+Hello Universe"),
1022+
"Should show added line"
1023+
);
1024+
assert!(diff_item.data.contains("-Line 3"), "Should show old line 3");
1025+
assert!(
1026+
diff_item.data.contains("+Line 3 modified"),
1027+
"Should show new line 3"
1028+
);
1029+
}
1030+
1031+
#[tokio::test]
1032+
async fn test_get_diff_by_blobs_with_empty_content() {
1033+
// Test diff generation with empty content (simulating missing blobs)
1034+
let old_hash = SHA1::from_str("1234567890123456789012345678901234567890").unwrap();
1035+
let new_hash = SHA1::from_str("abcdefabcdefabcdefabcdefabcdefabcdefabcd").unwrap();
1036+
1037+
let old_blobs = vec![(PathBuf::from("empty_file.txt"), old_hash)];
1038+
let new_blobs = vec![(PathBuf::from("empty_file.txt"), new_hash)];
1039+
1040+
// Create empty blob cache to simulate missing blobs
1041+
let blob_cache: HashMap<SHA1, Vec<u8>> = HashMap::new();
1042+
1043+
let read_content = |_file: &PathBuf, hash: &SHA1| -> Vec<u8> {
1044+
blob_cache.get(hash).cloned().unwrap_or_default()
1045+
};
1046+
1047+
// Test the diff engine with empty content
1048+
let diff_output = Diff::diff(
1049+
old_blobs,
1050+
new_blobs,
1051+
"histogram".to_string(),
1052+
Vec::new(),
1053+
read_content,
1054+
)
1055+
.await;
1056+
1057+
assert!(
1058+
!diff_output.is_empty(),
1059+
"Should generate diff even with empty blobs"
1060+
);
1061+
assert_eq!(diff_output[0].path, "empty_file.txt");
1062+
assert!(
1063+
diff_output[0].data.contains("diff --git"),
1064+
"Should contain git diff header"
1065+
);
1066+
}
9171067
}

ceres/src/model/mr.rs

Lines changed: 0 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
use std::path::PathBuf;
22

33
use serde::{Deserialize, Serialize};
4-
use utoipa::ToSchema;
54

65
use mercury::hash::SHA1;
76

@@ -31,19 +30,6 @@ impl MrDiffFile {
3130
}
3231
}
3332

34-
#[derive(Debug, ToSchema, Serialize, Deserialize)]
35-
pub struct MrDiff {
36-
pub data: String,
37-
pub page_info: Option<MrPageInfo>,
38-
}
39-
40-
#[derive(Debug, ToSchema, Serialize, Deserialize)]
41-
pub struct MrPageInfo {
42-
pub total_pages: usize,
43-
pub current_page: usize,
44-
pub page_size: usize,
45-
}
46-
4733
#[derive(Serialize)]
4834
pub struct BuckFile {
4935
pub buck: SHA1,

common/src/model.rs

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,6 @@ pub struct PageParams<T> {
7777
}
7878

7979
#[derive(PartialEq, Eq, Debug, Clone, Default, Serialize, Deserialize, ToSchema)]
80-
8180
pub struct CommonPage<T> {
8281
pub total: u64,
8382
pub items: Vec<T>,

libra/src/command/diff.rs

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -147,10 +147,12 @@ pub async fn execute(args: DiffArgs) {
147147
)
148148
.await;
149149

150+
let results: Vec<String> = diff_output.iter().map(|i| i.data.clone()).collect();
151+
150152
// Handle output - libra processes the string according to its needs
151153
match w {
152154
Some(ref mut file) => {
153-
file.write_all(diff_output.as_bytes()).unwrap();
155+
file.write_all(results.join("").as_bytes()).unwrap();
154156
}
155157
None => {
156158
#[cfg(unix)]
@@ -162,7 +164,7 @@ pub async fn execute(args: DiffArgs) {
162164
.spawn()
163165
.expect("failed to execute process");
164166
let stdin = child.stdin.as_mut().unwrap();
165-
stdin.write_all(diff_output.as_bytes()).unwrap();
167+
stdin.write_all(results.join("").as_bytes()).unwrap();
166168
child.wait().unwrap();
167169
}
168170
#[cfg(not(unix))]

mono/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,7 @@ utoipa = { workspace = true, features = ["axum_extras"] }
5959
utoipa-axum = { workspace = true }
6060
utoipa-swagger-ui = { workspace = true, features = ["axum"] }
6161
uuid = { workspace = true, features = ["v4"] }
62+
neptune = { workspace = true }
6263

6364

6465

0 commit comments

Comments
 (0)