@@ -9,9 +9,9 @@ use lru_mem::{HeapSize, MemSize};
99use serde:: { Deserialize , Serialize } ;
1010use threadpool:: ThreadPool ;
1111
12+ use crate :: internal:: pack:: entry:: Entry ;
1213use crate :: internal:: pack:: utils;
1314use crate :: { hash:: SHA1 , internal:: object:: types:: ObjectType } ;
14- use crate :: internal:: pack:: entry:: Entry ;
1515
1616// /// record heap-size of all CacheObjects, used for memory limit.
1717// static CACHE_OBJS_MEM_SIZE: AtomicUsize = AtomicUsize::new(0);
@@ -56,7 +56,14 @@ pub struct CacheObject {
5656 pub data_decompress : Vec < u8 > ,
5757 pub offset : usize ,
5858 pub hash : SHA1 ,
59- pub mem_recorder : Option < Arc < AtomicUsize > > // record mem-size of all CacheObjects of a Pack
59+ /// If a [`CacheObject`] is an [`ObjectType::HashDelta`] or an [`ObjectType::OffsetDelta`],
60+ /// it will expand to another [`CacheObject`] of other types. To prevent potential OOM,
61+ /// we record the size of the expanded object as well as that of the object itself.
62+ ///
63+ /// See [Comment in PR #755](https://github.com/web3infra-foundation/mega/pull/755#issuecomment-2543100481) for more details.
64+ #[ serde( skip, default = "usize::default" ) ]
65+ pub delta_final_size : usize ,
66+ pub mem_recorder : Option < Arc < AtomicUsize > > , // record mem-size of all CacheObjects of a Pack
6067}
6168
6269impl Clone for CacheObject {
@@ -68,6 +75,7 @@ impl Clone for CacheObject {
6875 data_decompress : self . data_decompress . clone ( ) ,
6976 offset : self . offset ,
7077 hash : self . hash ,
78+ delta_final_size : self . delta_final_size ,
7179 mem_recorder : self . mem_recorder . clone ( ) ,
7280 } ;
7381 obj. record_mem_size ( ) ;
@@ -87,6 +95,7 @@ impl Default for CacheObject {
8795 obj_type : ObjectType :: Blob ,
8896 offset : 0 ,
8997 hash : SHA1 :: default ( ) ,
98+ delta_final_size : 0 ,
9099 mem_recorder : None ,
91100 } ;
92101 obj. record_mem_size ( ) ;
@@ -98,8 +107,21 @@ impl Default for CacheObject {
98107// ! the implementation of HeapSize is not accurate, only calculate the size of the data_decompress
99108// Note that: mem_size == value_size + heap_size, and we only need to impl HeapSize because value_size is known
100109impl HeapSize for CacheObject {
110+ /// For [`ObjectType::OffsetDelta`] and [`ObjectType::HashDelta`],
111+ /// `delta_final_size` is the size of the expanded object;
112+ /// for other types, `delta_final_size` is 0 as they won't expand.
101113 fn heap_size ( & self ) -> usize {
102- self . data_decompress . heap_size ( )
114+ // To those who are concerned about why these two values are added,
115+ // let's consider the lifetime of two `CacheObject`s, say `delta_obj`
116+ // and `final_obj` in the function `Pack::rebuild_delta`.
117+ //
118+ // `delta_obj` is dropped only after `Pack::rebuild_delta` returns,
119+ // but the space for `final_obj` is allocated in that function.
120+ //
121+ // Therefore, during the execution of `Pack::rebuild_delta`, both `delta_obj`
122+ // and `final_obj` coexist. The maximum memory usage is the sum of the memory
123+ // usage of `delta_obj` and `final_obj`.
124+ self . data_decompress . heap_size ( ) + self . delta_final_size
103125 }
104126}
105127
@@ -111,7 +133,6 @@ impl Drop for CacheObject {
111133 if let Some ( mem_recorder) = & self . mem_recorder {
112134 mem_recorder. fetch_sub ( ( * self ) . mem_size ( ) , Ordering :: SeqCst ) ;
113135 }
114-
115136 }
116137}
117138
@@ -146,14 +167,15 @@ impl MemSizeRecorder for CacheObject {
146167}
147168
148169impl CacheObject {
149- /// Create a new CacheObject witch is not offset_delta or hash_delta
170+ /// Create a new CacheObject which is neither [`ObjectType::OffsetDelta`] nor [`ObjectType::HashDelta`].
150171 pub fn new_for_undeltified ( obj_type : ObjectType , data : Vec < u8 > , offset : usize ) -> Self {
151172 let hash = utils:: calculate_object_hash ( obj_type, & data) ;
152173 CacheObject {
153174 data_decompress : data,
154175 obj_type,
155176 offset,
156177 hash,
178+ delta_final_size : 0 , // Only delta objects have `delta_final_size`
157179 mem_recorder : None ,
158180 ..Default :: default ( )
159181 }
@@ -162,13 +184,11 @@ impl CacheObject {
162184 /// transform the CacheObject to Entry
163185 pub fn to_entry ( & self ) -> Entry {
164186 match self . obj_type {
165- ObjectType :: Blob | ObjectType :: Tree | ObjectType :: Commit | ObjectType :: Tag => {
166- Entry {
167- obj_type : self . obj_type ,
168- data : self . data_decompress . clone ( ) ,
169- hash : self . hash ,
170- }
171- }
187+ ObjectType :: Blob | ObjectType :: Tree | ObjectType :: Commit | ObjectType :: Tag => Entry {
188+ obj_type : self . obj_type ,
189+ data : self . data_decompress . clone ( ) ,
190+ hash : self . hash ,
191+ } ,
172192 _ => {
173193 unreachable ! ( "delta object should not persist!" )
174194 }
@@ -177,10 +197,16 @@ impl CacheObject {
177197}
178198
179199/// trait alias for simple use
180- pub trait ArcWrapperBounds : HeapSize + Serialize + for < ' a > Deserialize < ' a > + Send + Sync + ' static { }
200+ pub trait ArcWrapperBounds :
201+ HeapSize + Serialize + for < ' a > Deserialize < ' a > + Send + Sync + ' static
202+ {
203+ }
181204// You must impl `Alias Trait` for all the `T` satisfying Constraints
182205// Or, `T` will not satisfy `Alias Trait` even if it satisfies the Original traits
183- impl < T : HeapSize + Serialize + for < ' a > Deserialize < ' a > + Send + Sync + ' static > ArcWrapperBounds for T { }
206+ impl < T : HeapSize + Serialize + for < ' a > Deserialize < ' a > + Send + Sync + ' static > ArcWrapperBounds
207+ for T
208+ {
209+ }
184210
185211/// Implementing encapsulation of Arc to enable third-party Trait HeapSize implementation for the Arc type
186212/// Because of use Arc in LruCache, the LruCache is not clear whether a pointer will drop the referenced
@@ -300,6 +326,7 @@ mod test {
300326 obj_type : ObjectType :: Blob ,
301327 offset : 0 ,
302328 hash : SHA1 :: new ( & vec ! [ 0 ; 20 ] ) ,
329+ delta_final_size : 0 ,
303330 mem_recorder : None ,
304331 } ;
305332 assert ! ( a. heap_size( ) == 1024 ) ;
@@ -318,6 +345,7 @@ mod test {
318345 obj_type : ObjectType :: Blob ,
319346 offset : 0 ,
320347 hash : SHA1 :: new ( & vec ! [ 0 ; 20 ] ) ,
348+ delta_final_size : 0 ,
321349 mem_recorder : None ,
322350 } ;
323351 println ! ( "a.heap_size() = {}" , a. heap_size( ) ) ;
@@ -329,6 +357,7 @@ mod test {
329357 obj_type : ObjectType :: Blob ,
330358 offset : 0 ,
331359 hash : SHA1 :: new ( & vec ! [ 1 ; 20 ] ) ,
360+ delta_final_size : 0 ,
332361 mem_recorder : None ,
333362 } ;
334363 {
@@ -433,6 +462,7 @@ mod test {
433462 obj_type : ObjectType :: Blob ,
434463 offset : 0 ,
435464 hash : SHA1 :: new ( & vec ! [ 0 ; 20 ] ) ,
465+ delta_final_size : 0 ,
436466 mem_recorder : None ,
437467 } ;
438468 let s = bincode:: serialize ( & a) . unwrap ( ) ;
0 commit comments