|
6 | 6 | bytemuck::{Pod, Zeroable},
|
7 | 7 | memmap2::MmapMut,
|
8 | 8 | solana_measure::measure::Measure,
|
| 9 | + solana_sdk::clock::Slot, |
9 | 10 | std::{
|
10 | 11 | collections::HashSet,
|
11 | 12 | fs::{self, remove_file, File, OpenOptions},
|
@@ -192,40 +193,59 @@ impl CacheHashDataFile {
|
192 | 193 | pub(crate) struct CacheHashData {
|
193 | 194 | cache_dir: PathBuf,
|
194 | 195 | pre_existing_cache_files: Arc<Mutex<HashSet<PathBuf>>>,
|
195 |
| - should_delete_old_cache_files_on_drop: bool, |
| 196 | + /// Decides which old cache files to delete. See `delete_old_cache_files()` for more info. |
| 197 | + storages_start_slot: Option<Slot>, |
196 | 198 | pub stats: Arc<CacheHashDataStats>,
|
197 | 199 | }
|
198 | 200 |
|
199 | 201 | impl Drop for CacheHashData {
|
200 | 202 | fn drop(&mut self) {
|
201 |
| - if self.should_delete_old_cache_files_on_drop { |
202 |
| - self.delete_old_cache_files(); |
203 |
| - } |
| 203 | + self.delete_old_cache_files(); |
204 | 204 | self.stats.report();
|
205 | 205 | }
|
206 | 206 | }
|
207 | 207 |
|
208 | 208 | impl CacheHashData {
|
209 |
| - pub(crate) fn new( |
210 |
| - cache_dir: PathBuf, |
211 |
| - should_delete_old_cache_files_on_drop: bool, |
212 |
| - ) -> CacheHashData { |
| 209 | + pub(crate) fn new(cache_dir: PathBuf, storages_start_slot: Option<Slot>) -> CacheHashData { |
213 | 210 | std::fs::create_dir_all(&cache_dir).unwrap_or_else(|err| {
|
214 | 211 | panic!("error creating cache dir {}: {err}", cache_dir.display())
|
215 | 212 | });
|
216 | 213 |
|
217 | 214 | let result = CacheHashData {
|
218 | 215 | cache_dir,
|
219 | 216 | pre_existing_cache_files: Arc::new(Mutex::new(HashSet::default())),
|
220 |
| - should_delete_old_cache_files_on_drop, |
| 217 | + storages_start_slot, |
221 | 218 | stats: Arc::default(),
|
222 | 219 | };
|
223 | 220 |
|
224 | 221 | result.get_cache_files();
|
225 | 222 | result
|
226 | 223 | }
|
| 224 | + |
| 225 | + /// delete all pre-existing files that will not be used |
227 | 226 | fn delete_old_cache_files(&self) {
|
228 |
| - let old_cache_files = std::mem::take(&mut *self.pre_existing_cache_files.lock().unwrap()); |
| 227 | + // all the renaming files in `pre_existing_cache_files` were *not* used for this |
| 228 | + // accounts hash calculation |
| 229 | + let mut old_cache_files = |
| 230 | + std::mem::take(&mut *self.pre_existing_cache_files.lock().unwrap()); |
| 231 | + |
| 232 | + // If `storages_start_slot` is None, we're doing a full accounts hash calculation, and thus |
| 233 | + // all unused cache files can be deleted. |
| 234 | + // If `storages_start_slot` is Some, we're doing an incremental accounts hash calculation, |
| 235 | + // and we only want to delete the unused cache files *that IAH considered*. |
| 236 | + if let Some(storages_start_slot) = self.storages_start_slot { |
| 237 | + old_cache_files.retain(|old_cache_file| { |
| 238 | + let Some(parsed_filename) = parse_filename(old_cache_file) else { |
| 239 | + // if parsing the cache filename fails, we *do* want to delete it |
| 240 | + return true; |
| 241 | + }; |
| 242 | + |
| 243 | + // if the old cache file is in the incremental accounts hash calculation range, |
| 244 | + // then delete it |
| 245 | + parsed_filename.slot_range_start >= storages_start_slot |
| 246 | + }); |
| 247 | + } |
| 248 | + |
229 | 249 | if !old_cache_files.is_empty() {
|
230 | 250 | self.stats
|
231 | 251 | .unused_cache_files
|
@@ -356,6 +376,39 @@ impl CacheHashData {
|
356 | 376 | }
|
357 | 377 | }
|
358 | 378 |
|
| 379 | +/// The values of each part of a cache hash data filename |
| 380 | +#[derive(Debug)] |
| 381 | +pub struct ParsedFilename { |
| 382 | + pub slot_range_start: Slot, |
| 383 | + pub slot_range_end: Slot, |
| 384 | + pub bin_range_start: u64, |
| 385 | + pub bin_range_end: u64, |
| 386 | + pub hash: u64, |
| 387 | +} |
| 388 | + |
| 389 | +/// Parses a cache hash data filename into its parts |
| 390 | +/// |
| 391 | +/// Returns None if the filename is invalid |
| 392 | +fn parse_filename(cache_filename: impl AsRef<Path>) -> Option<ParsedFilename> { |
| 393 | + let filename = cache_filename.as_ref().to_string_lossy().to_string(); |
| 394 | + let parts: Vec<_> = filename.split('.').collect(); // The parts are separated by a `.` |
| 395 | + if parts.len() != 5 { |
| 396 | + return None; |
| 397 | + } |
| 398 | + let slot_range_start = parts.first()?.parse().ok()?; |
| 399 | + let slot_range_end = parts.get(1)?.parse().ok()?; |
| 400 | + let bin_range_start = parts.get(2)?.parse().ok()?; |
| 401 | + let bin_range_end = parts.get(3)?.parse().ok()?; |
| 402 | + let hash = u64::from_str_radix(parts.get(4)?, 16).ok()?; // the hash is in hex |
| 403 | + Some(ParsedFilename { |
| 404 | + slot_range_start, |
| 405 | + slot_range_end, |
| 406 | + bin_range_start, |
| 407 | + bin_range_end, |
| 408 | + hash, |
| 409 | + }) |
| 410 | +} |
| 411 | + |
359 | 412 | #[cfg(test)]
|
360 | 413 | mod tests {
|
361 | 414 | use {super::*, rand::Rng};
|
@@ -423,7 +476,7 @@ mod tests {
|
423 | 476 | data_this_pass.push(this_bin_data);
|
424 | 477 | }
|
425 | 478 | }
|
426 |
| - let cache = CacheHashData::new(cache_dir.clone(), true); |
| 479 | + let cache = CacheHashData::new(cache_dir.clone(), None); |
427 | 480 | let file_name = PathBuf::from("test");
|
428 | 481 | cache.save(&file_name, &data_this_pass).unwrap();
|
429 | 482 | cache.get_cache_files();
|
@@ -513,4 +566,39 @@ mod tests {
|
513 | 566 | ct,
|
514 | 567 | )
|
515 | 568 | }
|
| 569 | + |
| 570 | + #[test] |
| 571 | + fn test_parse_filename() { |
| 572 | + let good_filename = "123.456.0.65536.537d65697d9b2baa"; |
| 573 | + let parsed_filename = parse_filename(good_filename).unwrap(); |
| 574 | + assert_eq!(parsed_filename.slot_range_start, 123); |
| 575 | + assert_eq!(parsed_filename.slot_range_end, 456); |
| 576 | + assert_eq!(parsed_filename.bin_range_start, 0); |
| 577 | + assert_eq!(parsed_filename.bin_range_end, 65536); |
| 578 | + assert_eq!(parsed_filename.hash, 0x537d65697d9b2baa); |
| 579 | + |
| 580 | + let bad_filenames = [ |
| 581 | + // bad separator |
| 582 | + "123-456-0-65536.537d65697d9b2baa", |
| 583 | + // bad values |
| 584 | + "abc.456.0.65536.537d65697d9b2baa", |
| 585 | + "123.xyz.0.65536.537d65697d9b2baa", |
| 586 | + "123.456.?.65536.537d65697d9b2baa", |
| 587 | + "123.456.0.@#$%^.537d65697d9b2baa", |
| 588 | + "123.456.0.65536.base19shouldfail", |
| 589 | + "123.456.0.65536.123456789012345678901234567890", |
| 590 | + // missing values |
| 591 | + "123.456.0.65536.", |
| 592 | + "123.456.0.65536", |
| 593 | + // extra junk |
| 594 | + "123.456.0.65536.537d65697d9b2baa.42", |
| 595 | + "123.456.0.65536.537d65697d9b2baa.", |
| 596 | + "123.456.0.65536.537d65697d9b2baa/", |
| 597 | + ".123.456.0.65536.537d65697d9b2baa", |
| 598 | + "/123.456.0.65536.537d65697d9b2baa", |
| 599 | + ]; |
| 600 | + for bad_filename in bad_filenames { |
| 601 | + assert!(parse_filename(bad_filename).is_none()); |
| 602 | + } |
| 603 | + } |
516 | 604 | }
|
0 commit comments