Skip to content
This repository was archived by the owner on Jan 22, 2025. It is now read-only.

Commit 2e5a20f

Browse files
mergify[bot]HaoranYiHaoranYi
authored
v1.17: Retry hash file allocation (backport of #33565) (#33918)
* Retry hash file allocation (#33565) * retry hash file allocation * add sleep * submit a datapoint for retry * typo * more typos * Update accounts-db/src/accounts_hash.rs Co-authored-by: Brooks <[email protected]> * fmt --------- Co-authored-by: HaoranYi <[email protected]> Co-authored-by: Brooks <[email protected]> (cherry picked from commit 167dac2) # Conflicts: # accounts-db/src/accounts_hash.rs * fix conflicts --------- Co-authored-by: HaoranYi <[email protected]> Co-authored-by: HaoranYi <[email protected]>
1 parent b948b2b commit 2e5a20f

File tree

1 file changed

+53
-14
lines changed

1 file changed

+53
-14
lines changed

accounts-db/src/accounts_hash.rs

+53-14
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ use {
2626
atomic::{AtomicU64, AtomicUsize, Ordering},
2727
Arc,
2828
},
29+
thread, time,
2930
},
3031
tempfile::tempfile_in,
3132
};
@@ -87,21 +88,59 @@ impl AccountHashesFile {
8788
if self.writer.is_none() {
8889
// we have hashes to write but no file yet, so create a file that will auto-delete on drop
8990

90-
let mut data = tempfile_in(&self.dir_for_temp_cache_files).unwrap_or_else(|err| {
91-
panic!(
92-
"Unable to create file within {}: {err}",
93-
self.dir_for_temp_cache_files.display()
94-
)
95-
});
91+
let get_file = || -> Result<_, std::io::Error> {
92+
let mut data = tempfile_in(&self.dir_for_temp_cache_files).unwrap_or_else(|err| {
93+
panic!(
94+
"Unable to create file within {}: {err}",
95+
self.dir_for_temp_cache_files.display()
96+
)
97+
});
98+
99+
// Theoretical performance optimization: write a zero to the end of
100+
// the file so that we won't have to resize it later, which may be
101+
// expensive.
102+
assert!(self.capacity > 0);
103+
data.seek(SeekFrom::Start((self.capacity - 1) as u64))?;
104+
data.write_all(&[0])?;
105+
data.rewind()?;
106+
data.flush()?;
107+
Ok(data)
108+
};
109+
110+
// Retry 5 times to allocate the AccountHashesFile. The memory might be fragmented and
111+
// causes memory allocation failure. Therefore, let's retry after failure. Hoping that the
112+
// kernel has the chance to defrag the memory between the retries, and retries succeed.
113+
let mut num_retries = 0;
114+
let data = loop {
115+
num_retries += 1;
116+
117+
match get_file() {
118+
Ok(data) => {
119+
break data;
120+
}
121+
Err(err) => {
122+
info!(
123+
"Unable to create account hashes file within {}: {}, retry counter {}",
124+
self.dir_for_temp_cache_files.display(),
125+
err,
126+
num_retries
127+
);
96128

97-
// Theoretical performance optimization: write a zero to the end of
98-
// the file so that we won't have to resize it later, which may be
99-
// expensive.
100-
data.seek(SeekFrom::Start((self.capacity - 1) as u64))
101-
.unwrap();
102-
data.write_all(&[0]).unwrap();
103-
data.rewind().unwrap();
104-
data.flush().unwrap();
129+
if num_retries > 5 {
130+
panic!(
131+
"Unable to create account hashes file within {}: after {} retries",
132+
self.dir_for_temp_cache_files.display(),
133+
num_retries
134+
);
135+
}
136+
datapoint_info!(
137+
"retry_account_hashes_file_allocation",
138+
("retry", num_retries, i64)
139+
);
140+
thread::sleep(time::Duration::from_millis(num_retries * 100));
141+
}
142+
}
143+
};
105144

106145
//UNSAFE: Required to create a Mmap
107146
let map = unsafe { MmapMut::map_mut(&data) };

0 commit comments

Comments
 (0)