@@ -11,6 +11,7 @@ use nydus_storage::device::BlobInfo;
11
11
use rusqlite:: { params, Connection } ;
12
12
use std:: fs;
13
13
use std:: path:: Path ;
14
+ use std:: result:: Result :: Ok ;
14
15
use std:: sync:: { Arc , Mutex } ;
15
16
16
17
#[ derive( Debug ) ]
@@ -66,15 +67,11 @@ pub struct SqliteDatabase {
66
67
67
68
impl SqliteDatabase {
68
69
pub fn new ( database_url : & str ) -> Result < Self , rusqlite:: Error > {
69
- // Delete the database file if it exists.
70
+ // Connect to a database that already exists
70
71
if let Ok ( metadata) = fs:: metadata ( database_url) {
71
72
if metadata. is_file ( ) {
72
- if let Err ( err) = fs:: remove_file ( database_url) {
73
- warn ! (
74
- "Warning: Unable to delete existing database file: {:?}." ,
75
- err
76
- ) ;
77
- }
73
+ } else {
74
+ panic ! ( "Warning: Unable to find existing database file." ) ;
78
75
}
79
76
}
80
77
@@ -95,24 +92,6 @@ impl SqliteDatabase {
95
92
blob_table,
96
93
} )
97
94
}
98
-
99
- pub fn connect ( database_url : & str ) -> Result < Self , rusqlite:: Error > {
100
- // Connect to a database that already exists
101
- if let Ok ( metadata) = fs:: metadata ( database_url) {
102
- if metadata. is_file ( ) {
103
- } else {
104
- panic ! ( "Warning: Unable to find existing database file." ) ;
105
- }
106
- }
107
-
108
- let chunk_table = ChunkTable :: new ( database_url) ?;
109
- let blob_table = BlobTable :: new ( database_url) ?;
110
-
111
- Ok ( Self {
112
- chunk_table,
113
- blob_table,
114
- } )
115
- }
116
95
}
117
96
118
97
impl Database for SqliteDatabase {
@@ -163,11 +142,15 @@ impl Deduplicate<SqliteDatabase> {
163
142
Ok ( Self { sb, db } )
164
143
}
165
144
166
- pub fn save_metadata ( & mut self ) -> anyhow:: Result < Vec < Arc < BlobInfo > > > {
145
+ pub fn save_metadata (
146
+ & mut self ,
147
+ image_name : String ,
148
+ version_name : String ,
149
+ ) -> anyhow:: Result < Vec < Arc < BlobInfo > > > {
167
150
self . create_tables ( ) ?;
168
151
let blob_infos = self . sb . superblock . get_blob_infos ( ) ;
169
152
self . insert_blobs ( & blob_infos) ?;
170
- self . insert_chunks ( & blob_infos) ?;
153
+ self . insert_chunks ( & blob_infos, image_name , version_name ) ?;
171
154
Ok ( blob_infos)
172
155
}
173
156
@@ -194,14 +177,21 @@ impl Deduplicate<SqliteDatabase> {
194
177
Ok ( ( ) )
195
178
}
196
179
197
- fn insert_chunks ( & mut self , blob_infos : & [ Arc < BlobInfo > ] ) -> anyhow:: Result < ( ) > {
180
+ fn insert_chunks (
181
+ & mut self ,
182
+ blob_infos : & [ Arc < BlobInfo > ] ,
183
+ image_name : String ,
184
+ version_name : String ,
185
+ ) -> anyhow:: Result < ( ) > {
198
186
let process_chunk = & mut |t : & Tree | -> Result < ( ) > {
199
187
let node = t. lock_node ( ) ;
200
188
for chunk in & node. chunks {
201
189
let index = chunk. inner . blob_index ( ) ;
202
190
let chunk_blob_id = blob_infos[ index as usize ] . blob_id ( ) ;
203
191
self . db
204
192
. insert_chunk ( & Chunk {
193
+ image_name : image_name. to_string ( ) ,
194
+ version_name : version_name. to_string ( ) ,
205
195
chunk_blob_id,
206
196
chunk_digest : chunk. inner . id ( ) . to_string ( ) ,
207
197
chunk_compressed_size : chunk. inner . compressed_size ( ) ,
@@ -220,75 +210,14 @@ impl Deduplicate<SqliteDatabase> {
220
210
}
221
211
}
222
212
223
- pub struct Algorithm < D : Database + Send + Sync > {
224
- algorithm_name : String ,
225
- db : D ,
226
- }
227
-
228
- impl Algorithm < SqliteDatabase > {
229
- pub fn new ( algorithm : String , db_url : & str ) -> anyhow:: Result < Self > {
230
- let algorithm_name = algorithm;
231
- let db = SqliteDatabase :: connect ( db_url) ?;
232
- Ok ( Self { algorithm_name, db } )
233
- }
234
-
235
- pub fn chunkdict_generate ( & mut self ) -> anyhow:: Result < Vec < Chunk > > {
236
- let all_chunks = self . db . chunk_table . list_all ( ) ?;
237
- let chunkdict = match & self . algorithm_name as & str {
238
- "exponential_smoothing" => Self :: exponential_smoothing ( self , all_chunks) ?,
239
- _ => {
240
- bail ! ( "Unsupported algorithm name:, please use a valid algorithm name, such as exponential_smoothing" )
241
- }
242
- } ;
243
- Ok ( chunkdict)
244
- }
245
-
246
- // Algorithm "exponential_smoothing"
247
- // List all chunk and sort them by the order in chunk table
248
- // Score each chunk by "exponential_smoothing" formula
249
- // Select chunks whose score is greater than threshold and generate chunk dictionary
250
- fn exponential_smoothing ( & mut self , all_chunks : Vec < Chunk > ) -> anyhow:: Result < Vec < Chunk > > {
251
- let alpha = 0.5 ;
252
- let previou_length = 1000 ;
253
- let threshold = 0.1 ;
254
- let mut smoothed_data = Vec :: new ( ) ;
255
- for ( chunk_index, chunk) in all_chunks. iter ( ) . enumerate ( ) {
256
- let mut is_duplicate: f64 = 0.0 ;
257
- let mut temp = 0 ;
258
- if chunk_index > previou_length {
259
- temp = chunk_index - previou_length;
260
- }
261
-
262
- for previou_index in all_chunks. iter ( ) . take ( chunk_index) . skip ( temp) {
263
- if chunk. chunk_digest == previou_index. chunk_digest {
264
- is_duplicate = 1.0 ;
265
- break ;
266
- }
267
- }
268
- if chunk_index == 0 {
269
- smoothed_data. push ( 0.0 ) ;
270
- } else {
271
- let smoothed_score: f64 =
272
- alpha * is_duplicate + ( 1.0 - alpha) * smoothed_data[ chunk_index - 1 ] ;
273
- smoothed_data. push ( smoothed_score) ;
274
- }
275
- }
276
- let mut chunkdict: Vec < Chunk > = Vec :: new ( ) ;
277
- for i in 0 ..smoothed_data. len ( ) {
278
- let chunk = Chunk {
279
- chunk_blob_id : all_chunks[ i] . chunk_blob_id . clone ( ) ,
280
- chunk_digest : all_chunks[ i] . chunk_digest . clone ( ) ,
281
- chunk_compressed_offset : all_chunks[ i] . chunk_compressed_offset ,
282
- chunk_uncompressed_offset : all_chunks[ i] . chunk_uncompressed_offset ,
283
- chunk_compressed_size : all_chunks[ i] . chunk_compressed_size ,
284
- chunk_uncompressed_size : all_chunks[ i] . chunk_uncompressed_size ,
285
- } ;
286
- if smoothed_data[ i] > threshold {
287
- chunkdict. push ( chunk) ;
288
- }
289
- }
290
- Ok ( chunkdict)
291
- }
213
+ #[ allow( dead_code) ]
214
+ #[ derive( Debug ) ]
215
+ struct DataPoint {
216
+ image_name : String ,
217
+ chunk_list : Vec < Chunk > ,
218
+ visited : bool ,
219
+ clustered : bool ,
220
+ cluster_id : i32 ,
292
221
}
293
222
294
223
pub trait Table < T , Err > : Sync + Send + Sized + ' static
@@ -311,7 +240,7 @@ where
311
240
fn list_paged ( & self , offset : i64 , limit : i64 ) -> Result < Vec < T > , Err > ;
312
241
}
313
242
314
- #[ derive( Debug ) ]
243
+ #[ derive( ) ]
315
244
pub struct ChunkTable {
316
245
conn : Arc < Mutex < Connection > > ,
317
246
}
@@ -332,8 +261,10 @@ impl ChunkTable {
332
261
}
333
262
}
334
263
335
- #[ derive( Debug ) ]
264
+ #[ derive( Debug , Clone , PartialEq , Eq , Hash ) ]
336
265
pub struct Chunk {
266
+ image_name : String ,
267
+ version_name : String ,
337
268
chunk_blob_id : String ,
338
269
chunk_digest : String ,
339
270
chunk_compressed_size : u32 ,
@@ -359,6 +290,8 @@ impl Table<Chunk, DatabaseError> for ChunkTable {
359
290
. execute (
360
291
"CREATE TABLE IF NOT EXISTS chunk (
361
292
id INTEGER PRIMARY KEY,
293
+ image_name TEXT,
294
+ version_name TEXT,
362
295
chunk_blob_id TEXT NOT NULL,
363
296
chunk_digest TEXT,
364
297
chunk_compressed_size INT,
@@ -378,16 +311,20 @@ impl Table<Chunk, DatabaseError> for ChunkTable {
378
311
. map_err ( |e| DatabaseError :: PoisonError ( e. to_string ( ) ) ) ?
379
312
. execute (
380
313
"INSERT INTO chunk(
314
+ image_name,
315
+ version_name,
381
316
chunk_blob_id,
382
317
chunk_digest,
383
318
chunk_compressed_size,
384
319
chunk_uncompressed_size,
385
320
chunk_compressed_offset,
386
321
chunk_uncompressed_offset
387
322
)
388
- VALUES (?1, ?2, ?3, ?4, ?5, ?6);
323
+ VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8 );
389
324
" ,
390
325
rusqlite:: params![
326
+ chunk. image_name,
327
+ chunk. version_name,
391
328
chunk. chunk_blob_id,
392
329
chunk. chunk_digest,
393
330
chunk. chunk_compressed_size,
@@ -425,18 +362,20 @@ impl Table<Chunk, DatabaseError> for ChunkTable {
425
362
. map_err ( |e| DatabaseError :: PoisonError ( e. to_string ( ) ) ) ?;
426
363
let mut stmt: rusqlite:: Statement < ' _ > = conn_guard
427
364
. prepare (
428
- "SELECT id, chunk_blob_id, chunk_digest, chunk_compressed_size,
365
+ "SELECT id, image_name, version_name, chunk_blob_id, chunk_digest, chunk_compressed_size,
429
366
chunk_uncompressed_size, chunk_compressed_offset, chunk_uncompressed_offset from chunk
430
367
ORDER BY id LIMIT ?1 OFFSET ?2" ,
431
368
) ?;
432
369
let chunk_iterator = stmt. query_map ( params ! [ limit, offset] , |row| {
433
370
Ok ( Chunk {
434
- chunk_blob_id : row. get ( 1 ) ?,
435
- chunk_digest : row. get ( 2 ) ?,
436
- chunk_compressed_size : row. get ( 3 ) ?,
437
- chunk_uncompressed_size : row. get ( 4 ) ?,
438
- chunk_compressed_offset : row. get ( 5 ) ?,
439
- chunk_uncompressed_offset : row. get ( 6 ) ?,
371
+ image_name : row. get ( 1 ) ?,
372
+ version_name : row. get ( 2 ) ?,
373
+ chunk_blob_id : row. get ( 3 ) ?,
374
+ chunk_digest : row. get ( 4 ) ?,
375
+ chunk_compressed_size : row. get ( 5 ) ?,
376
+ chunk_uncompressed_size : row. get ( 6 ) ?,
377
+ chunk_compressed_offset : row. get ( 7 ) ?,
378
+ chunk_uncompressed_offset : row. get ( 8 ) ?,
440
379
} )
441
380
} ) ?;
442
381
let mut chunks = Vec :: new ( ) ;
@@ -593,6 +532,8 @@ mod tests {
593
532
let chunk_table = ChunkTable :: new_in_memory ( ) ?;
594
533
chunk_table. create ( ) ?;
595
534
let chunk = Chunk {
535
+ image_name : "REDIS" . to_string ( ) ,
536
+ version_name : "1.0.0" . to_string ( ) ,
596
537
chunk_blob_id : "BLOB123" . to_string ( ) ,
597
538
chunk_digest : "DIGEST123" . to_string ( ) ,
598
539
chunk_compressed_size : 512 ,
@@ -602,6 +543,8 @@ mod tests {
602
543
} ;
603
544
chunk_table. insert ( & chunk) ?;
604
545
let chunks = chunk_table. list_all ( ) ?;
546
+ assert_eq ! ( chunks[ 0 ] . image_name, chunk. image_name) ;
547
+ assert_eq ! ( chunks[ 0 ] . version_name, chunk. version_name) ;
605
548
assert_eq ! ( chunks. len( ) , 1 ) ;
606
549
assert_eq ! ( chunks[ 0 ] . chunk_blob_id, chunk. chunk_blob_id) ;
607
550
assert_eq ! ( chunks[ 0 ] . chunk_digest, chunk. chunk_digest) ;
@@ -648,6 +591,8 @@ mod tests {
648
591
for i in 0 ..200 {
649
592
let i64 = i as u64 ;
650
593
let chunk = Chunk {
594
+ image_name : format ! ( "REDIS{}" , i) ,
595
+ version_name : format ! ( "1.0.0{}" , i) ,
651
596
chunk_blob_id : format ! ( "BLOB{}" , i) ,
652
597
chunk_digest : format ! ( "DIGEST{}" , i) ,
653
598
chunk_compressed_size : i,
@@ -659,6 +604,8 @@ mod tests {
659
604
}
660
605
let chunks = chunk_table. list_paged ( 100 , 100 ) ?;
661
606
assert_eq ! ( chunks. len( ) , 100 ) ;
607
+ assert_eq ! ( chunks[ 0 ] . image_name, "REDIS100" ) ;
608
+ assert_eq ! ( chunks[ 0 ] . version_name, "1.0.0100" ) ;
662
609
assert_eq ! ( chunks[ 0 ] . chunk_blob_id, "BLOB100" ) ;
663
610
assert_eq ! ( chunks[ 0 ] . chunk_digest, "DIGEST100" ) ;
664
611
assert_eq ! ( chunks[ 0 ] . chunk_compressed_size, 100 ) ;
@@ -668,38 +615,4 @@ mod tests {
668
615
Ok ( ( ) )
669
616
}
670
617
671
- #[ test]
672
- fn test_alalgorithm_exponential_smoothing ( ) -> Result < ( ) , Box < dyn std:: error:: Error > > {
673
- let algorithm = String :: from ( "exponential_smoothing" ) ;
674
- let db_url = "./metadata.db" ;
675
- let chunk_table = ChunkTable :: new ( db_url) ?;
676
- chunk_table. clear ( ) ?;
677
- chunk_table. create ( ) ?;
678
- for i in 0 ..200 {
679
- let i64 = i as u64 ;
680
- let chunk = Chunk {
681
- chunk_blob_id : format ! ( "BLOB{}" , i) ,
682
- chunk_digest : format ! ( "DIGEST{}" , ( i + 1 ) % 2 ) ,
683
- chunk_compressed_size : i,
684
- chunk_uncompressed_size : i * 2 ,
685
- chunk_compressed_offset : i64 * 3 ,
686
- chunk_uncompressed_offset : i64 * 4 ,
687
- } ;
688
- chunk_table. insert ( & chunk) ?;
689
- }
690
- let mut algorithm = Algorithm :: < SqliteDatabase > :: new ( algorithm, db_url) ?;
691
- assert_eq ! (
692
- algorithm. algorithm_name,
693
- "exponential_smoothing" . to_string( )
694
- ) ;
695
- let chunkdict = algorithm. chunkdict_generate ( ) ?;
696
- assert_eq ! ( chunkdict. len( ) , 198 ) ;
697
- assert_eq ! ( chunkdict[ 0 ] . chunk_blob_id, "BLOB2" ) ;
698
- assert_eq ! ( chunkdict[ 0 ] . chunk_digest, "DIGEST1" ) ;
699
- assert_eq ! ( chunkdict[ 0 ] . chunk_compressed_size, 2 ) ;
700
- assert_eq ! ( chunkdict[ 0 ] . chunk_uncompressed_size, 4 ) ;
701
- assert_eq ! ( chunkdict[ 0 ] . chunk_compressed_offset, 6 ) ;
702
- assert_eq ! ( chunkdict[ 0 ] . chunk_uncompressed_offset, 8 ) ;
703
- Ok ( ( ) )
704
- }
705
618
}
0 commit comments