|
26 | 26 | atomic::{AtomicU64, AtomicUsize, Ordering},
|
27 | 27 | Arc,
|
28 | 28 | },
|
| 29 | + thread, time, |
29 | 30 | },
|
30 | 31 | tempfile::tempfile_in,
|
31 | 32 | };
|
@@ -87,21 +88,59 @@ impl AccountHashesFile {
|
87 | 88 | if self.writer.is_none() {
|
88 | 89 | // we have hashes to write but no file yet, so create a file that will auto-delete on drop
|
89 | 90 |
|
90 |
| - let mut data = tempfile_in(&self.dir_for_temp_cache_files).unwrap_or_else(|err| { |
91 |
| - panic!( |
92 |
| - "Unable to create file within {}: {err}", |
93 |
| - self.dir_for_temp_cache_files.display() |
94 |
| - ) |
95 |
| - }); |
| 91 | + let get_file = || -> Result<_, std::io::Error> { |
| 92 | + let mut data = tempfile_in(&self.dir_for_temp_cache_files).unwrap_or_else(|err| { |
| 93 | + panic!( |
| 94 | + "Unable to create file within {}: {err}", |
| 95 | + self.dir_for_temp_cache_files.display() |
| 96 | + ) |
| 97 | + }); |
| 98 | + |
| 99 | + // Theoretical performance optimization: write a zero to the end of |
| 100 | + // the file so that we won't have to resize it later, which may be |
| 101 | + // expensive. |
| 102 | + assert!(self.capacity > 0); |
| 103 | + data.seek(SeekFrom::Start((self.capacity - 1) as u64))?; |
| 104 | + data.write_all(&[0])?; |
| 105 | + data.rewind()?; |
| 106 | + data.flush()?; |
| 107 | + Ok(data) |
| 108 | + }; |
| 109 | + |
| 110 | + // Retry 5 times to allocate the AccountHashesFile. The memory might be fragmented and |
| 111 | + // causes memory allocation failure. Therefore, let's retry after failure. Hoping that the |
| 112 | + // kernel has the chance to defrag the memory between the retries, and retries succeed. |
| 113 | + let mut num_retries = 0; |
| 114 | + let data = loop { |
| 115 | + num_retries += 1; |
| 116 | + |
| 117 | + match get_file() { |
| 118 | + Ok(data) => { |
| 119 | + break data; |
| 120 | + } |
| 121 | + Err(err) => { |
| 122 | + info!( |
| 123 | + "Unable to create account hashes file within {}: {}, retry counter {}", |
| 124 | + self.dir_for_temp_cache_files.display(), |
| 125 | + err, |
| 126 | + num_retries |
| 127 | + ); |
96 | 128 |
|
97 |
| - // Theoretical performance optimization: write a zero to the end of |
98 |
| - // the file so that we won't have to resize it later, which may be |
99 |
| - // expensive. |
100 |
| - data.seek(SeekFrom::Start((self.capacity - 1) as u64)) |
101 |
| - .unwrap(); |
102 |
| - data.write_all(&[0]).unwrap(); |
103 |
| - data.rewind().unwrap(); |
104 |
| - data.flush().unwrap(); |
| 129 | + if num_retries > 5 { |
| 130 | + panic!( |
| 131 | + "Unable to create account hashes file within {}: after {} retries", |
| 132 | + self.dir_for_temp_cache_files.display(), |
| 133 | + num_retries |
| 134 | + ); |
| 135 | + } |
| 136 | + datapoint_info!( |
| 137 | + "retry_account_hashes_file_allocation", |
| 138 | + ("retry", num_retries, i64) |
| 139 | + ); |
| 140 | + thread::sleep(time::Duration::from_millis(num_retries * 100)); |
| 141 | + } |
| 142 | + } |
| 143 | + }; |
105 | 144 |
|
106 | 145 | //UNSAFE: Required to create a Mmap
|
107 | 146 | let map = unsafe { MmapMut::map_mut(&data) };
|
|
0 commit comments