Skip to content

Commit 56f427d

Browse files
committed
[Breaking] Remove ResumableFileSlot and rely on high ulimits
ResumableFileSlot became to difficult to manage, instead of managing resources this way, we use much higher DEFAULT_OPEN_FILE_PERMITS, set ulimit (when unix) and warn user if the limits are likely too low. Breaking: Removed idle_file_descriptor_timeout_millis from config. closes: #1288, #513, #1298, #527
1 parent 7afe286 commit 56f427d

21 files changed

+555
-961
lines changed

Cargo.lock

+11-1
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

nativelink-config/src/cas_server.rs

-15
Original file line numberDiff line numberDiff line change
@@ -716,21 +716,6 @@ pub struct GlobalConfig {
716716
#[serde(deserialize_with = "convert_numeric_with_shellexpand")]
717717
pub max_open_files: usize,
718718

719-
/// If a file descriptor is idle for this many milliseconds, it will be closed.
720-
/// In the event a client or store takes a long time to send or receive data
721-
/// the file descriptor will be closed, and since `max_open_files` blocks new
722-
/// `open_file` requests until a slot opens up, it will allow new requests to be
723-
/// processed. If a read or write is attempted on a closed file descriptor, the
724-
/// file will be reopened and the operation will continue.
725-
///
726-
/// On services where worker(s) and scheduler(s) live in the same process, this
727-
/// also prevents deadlocks if a file->file copy is happening, but cannot open
728-
/// a new file descriptor because the limit has been reached.
729-
///
730-
/// Default: 1000 (1 second)
731-
#[serde(default, deserialize_with = "convert_duration_with_shellexpand")]
732-
pub idle_file_descriptor_timeout_millis: u64,
733-
734719
/// This flag can be used to prevent metrics from being collected at runtime.
735720
/// Metrics are still able to be collected, but this flag prevents metrics that
736721
/// are collected at runtime (performance metrics) from being tallied. The

nativelink-macro/src/lib.rs

+1-3
Original file line numberDiff line numberDiff line change
@@ -36,12 +36,10 @@ pub fn nativelink_test(attr: TokenStream, item: TokenStream) -> TokenStream {
3636
async fn #fn_name(#fn_inputs) #fn_output {
3737
// Error means already initialized, which is ok.
3838
let _ = nativelink_util::init_tracing();
39-
// If already set it's ok.
40-
let _ = nativelink_util::fs::set_idle_file_descriptor_timeout(std::time::Duration::from_millis(100));
4139

4240
#[warn(clippy::disallowed_methods)]
4341
::std::sync::Arc::new(::nativelink_util::origin_context::OriginContext::new()).wrap_async(
44-
::nativelink_util::__tracing::trace_span!("test"), async move {
42+
::nativelink_util::__tracing::error_span!(stringify!(#fn_name)), async move {
4543
#fn_block
4644
}
4745
)

nativelink-store/Cargo.toml

-3
Original file line numberDiff line numberDiff line change
@@ -78,9 +78,6 @@ aws-sdk-s3 = { version = "=1.68.0", features = [
7878
"rt-tokio",
7979
], default-features = false }
8080
aws-smithy-runtime-api = "1.7.3"
81-
serial_test = { version = "3.2.0", features = [
82-
"async",
83-
], default-features = false }
8481
serde_json = "1.0.135"
8582
fred = { version = "10.0.3", default-features = false, features = ["mocks"] }
8683
tracing-subscriber = { version = "0.3.19", default-features = false }

nativelink-store/src/fast_slow_store.rs

+6-4
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414

1515
use std::borrow::BorrowMut;
1616
use std::cmp::{max, min};
17+
use std::ffi::OsString;
1718
use std::ops::Range;
1819
use std::pin::Pin;
1920
use std::sync::atomic::{AtomicU64, Ordering};
@@ -224,9 +225,10 @@ impl StoreDriver for FastSlowStore {
224225
async fn update_with_whole_file(
225226
self: Pin<&Self>,
226227
key: StoreKey<'_>,
227-
mut file: fs::ResumeableFileSlot,
228+
path: OsString,
229+
mut file: fs::FileSlot,
228230
upload_size: UploadSizeInfo,
229-
) -> Result<Option<fs::ResumeableFileSlot>, Error> {
231+
) -> Result<Option<fs::FileSlot>, Error> {
230232
if self
231233
.fast_store
232234
.optimized_for(StoreOptimizations::FileUpdates)
@@ -246,7 +248,7 @@ impl StoreDriver for FastSlowStore {
246248
}
247249
return self
248250
.fast_store
249-
.update_with_whole_file(key, file, upload_size)
251+
.update_with_whole_file(key, path, file, upload_size)
250252
.await;
251253
}
252254

@@ -269,7 +271,7 @@ impl StoreDriver for FastSlowStore {
269271
}
270272
return self
271273
.slow_store
272-
.update_with_whole_file(key, file, upload_size)
274+
.update_with_whole_file(key, path, file, upload_size)
273275
.await;
274276
}
275277

nativelink-store/src/filesystem_store.rs

+34-98
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ use std::fmt::{Debug, Formatter};
1818
use std::pin::Pin;
1919
use std::sync::atomic::{AtomicU64, Ordering};
2020
use std::sync::{Arc, Weak};
21-
use std::time::{Duration, SystemTime};
21+
use std::time::SystemTime;
2222

2323
use async_lock::RwLock;
2424
use async_trait::async_trait;
@@ -39,8 +39,7 @@ use nativelink_util::store_trait::{
3939
StoreDriver, StoreKey, StoreKeyBorrow, StoreOptimizations, UploadSizeInfo,
4040
};
4141
use nativelink_util::{background_spawn, spawn_blocking};
42-
use tokio::io::{AsyncReadExt, AsyncSeekExt, AsyncWriteExt, SeekFrom};
43-
use tokio::time::{sleep, timeout, Sleep};
42+
use tokio::io::{AsyncReadExt, AsyncWriteExt, Take};
4443
use tokio_stream::wrappers::ReadDirStream;
4544
use tracing::{event, Level};
4645

@@ -168,7 +167,7 @@ pub trait FileEntry: LenEntry + Send + Sync + Debug + 'static {
168167
fn make_and_open_file(
169168
block_size: u64,
170169
encoded_file_path: EncodedFilePath,
171-
) -> impl Future<Output = Result<(Self, fs::ResumeableFileSlot, OsString), Error>> + Send
170+
) -> impl Future<Output = Result<(Self, fs::FileSlot, OsString), Error>> + Send
172171
where
173172
Self: Sized;
174173

@@ -186,7 +185,7 @@ pub trait FileEntry: LenEntry + Send + Sync + Debug + 'static {
186185
&self,
187186
offset: u64,
188187
length: u64,
189-
) -> impl Future<Output = Result<fs::ResumeableFileSlot, Error>> + Send;
188+
) -> impl Future<Output = Result<Take<fs::FileSlot>, Error>> + Send;
190189

191190
/// This function is a safe way to extract the file name of the underlying file. To protect users from
192191
/// accidentally creating undefined behavior we encourage users to do the logic they need to do with
@@ -231,7 +230,7 @@ impl FileEntry for FileEntryImpl {
231230
async fn make_and_open_file(
232231
block_size: u64,
233232
encoded_file_path: EncodedFilePath,
234-
) -> Result<(FileEntryImpl, fs::ResumeableFileSlot, OsString), Error> {
233+
) -> Result<(FileEntryImpl, fs::FileSlot, OsString), Error> {
235234
let temp_full_path = encoded_file_path.get_file_path().to_os_string();
236235
let temp_file_result = fs::create_file(temp_full_path.clone())
237236
.or_else(|mut err| async {
@@ -276,30 +275,19 @@ impl FileEntry for FileEntryImpl {
276275
&self.encoded_file_path
277276
}
278277

279-
async fn read_file_part(
278+
fn read_file_part(
280279
&self,
281280
offset: u64,
282281
length: u64,
283-
) -> Result<fs::ResumeableFileSlot, Error> {
284-
let (mut file, full_content_path_for_debug_only) = self
285-
.get_file_path_locked(|full_content_path| async move {
286-
let file = fs::open_file(full_content_path.clone(), length)
287-
.await
288-
.err_tip(|| {
289-
format!("Failed to open file in filesystem store {full_content_path:?}")
290-
})?;
291-
Ok((file, full_content_path))
292-
})
293-
.await?;
294-
295-
file.as_reader()
296-
.await
297-
.err_tip(|| "Could not seek file in read_file_part()")?
298-
.get_mut()
299-
.seek(SeekFrom::Start(offset))
300-
.await
301-
.err_tip(|| format!("Failed to seek file: {full_content_path_for_debug_only:?}"))?;
302-
Ok(file)
282+
) -> impl Future<Output = Result<Take<fs::FileSlot>, Error>> + Send {
283+
self.get_file_path_locked(move |full_content_path| async move {
284+
let file = fs::open_file(&full_content_path, offset, length)
285+
.await
286+
.err_tip(|| {
287+
format!("Failed to open file in filesystem store {full_content_path:?}")
288+
})?;
289+
Ok(file)
290+
})
303291
}
304292

305293
async fn get_file_path_locked<
@@ -524,6 +512,7 @@ async fn add_files_to_cache<Fe: FileEntry>(
524512
);
525513
}
526514
};
515+
527516
Result::<(String, SystemTime, u64, bool), Error>::Ok((
528517
file_name,
529518
atime,
@@ -668,19 +657,16 @@ pub struct FilesystemStore<Fe: FileEntry = FileEntryImpl> {
668657
#[metric(help = "Size of the configured read buffer size")]
669658
read_buffer_size: usize,
670659
weak_self: Weak<Self>,
671-
sleep_fn: fn(Duration) -> Sleep,
672660
rename_fn: fn(&OsStr, &OsStr) -> Result<(), std::io::Error>,
673661
}
674662

675663
impl<Fe: FileEntry> FilesystemStore<Fe> {
676664
pub async fn new(spec: &FilesystemSpec) -> Result<Arc<Self>, Error> {
677-
Self::new_with_timeout_and_rename_fn(spec, sleep, |from, to| std::fs::rename(from, to))
678-
.await
665+
Self::new_with_timeout_and_rename_fn(spec, |from, to| std::fs::rename(from, to)).await
679666
}
680667

681668
pub async fn new_with_timeout_and_rename_fn(
682669
spec: &FilesystemSpec,
683-
sleep_fn: fn(Duration) -> Sleep,
684670
rename_fn: fn(&OsStr, &OsStr) -> Result<(), std::io::Error>,
685671
) -> Result<Arc<Self>, Error> {
686672
async fn create_subdirs(path: &str) -> Result<(), Error> {
@@ -735,7 +721,6 @@ impl<Fe: FileEntry> FilesystemStore<Fe> {
735721
block_size,
736722
read_buffer_size,
737723
weak_self: weak_self.clone(),
738-
sleep_fn,
739724
rename_fn,
740725
}))
741726
}
@@ -754,50 +739,34 @@ impl<Fe: FileEntry> FilesystemStore<Fe> {
754739
async fn update_file<'a>(
755740
self: Pin<&'a Self>,
756741
mut entry: Fe,
757-
mut resumeable_temp_file: fs::ResumeableFileSlot,
742+
mut temp_file: fs::FileSlot,
758743
final_key: StoreKey<'static>,
759744
mut reader: DropCloserReadHalf,
760745
) -> Result<(), Error> {
761746
let mut data_size = 0;
762747
loop {
763-
let Ok(data_result) = timeout(fs::idle_file_descriptor_timeout(), reader.recv()).await
764-
else {
765-
// In the event we timeout, we want to close the writing file, to prevent
766-
// the file descriptor left open for long periods of time.
767-
// This is needed because we wrap `fs` so only a fixed number of file
768-
// descriptors may be open at any given time. If we are streaming from
769-
// File -> File, it can cause a deadlock if the Write file is not sending
770-
// data because it is waiting for a file descriotor to open before sending data.
771-
resumeable_temp_file.close_file().await.err_tip(|| {
772-
"Could not close file due to timeout in FileSystemStore::update_file"
773-
})?;
774-
continue;
775-
};
776-
let mut data = data_result.err_tip(|| "Failed to receive data in filesystem store")?;
748+
let mut data = reader
749+
.recv()
750+
.await
751+
.err_tip(|| "Failed to receive data in filesystem store")?;
777752
let data_len = data.len();
778753
if data_len == 0 {
779754
break; // EOF.
780755
}
781-
resumeable_temp_file
782-
.as_writer()
783-
.await
784-
.err_tip(|| "in filesystem_store::update_file")?
756+
temp_file
785757
.write_all_buf(&mut data)
786758
.await
787759
.err_tip(|| "Failed to write data into filesystem store")?;
788760
data_size += data_len as u64;
789761
}
790762

791-
resumeable_temp_file
792-
.as_writer()
793-
.await
794-
.err_tip(|| "in filesystem_store::update_file")?
763+
temp_file
795764
.as_ref()
796765
.sync_all()
797766
.await
798767
.err_tip(|| "Failed to sync_data in filesystem store")?;
799768

800-
drop(resumeable_temp_file);
769+
drop(temp_file);
801770

802771
*entry.data_size_mut() = data_size;
803772
self.emplace_file(final_key, Arc::new(entry)).await
@@ -942,19 +911,13 @@ impl<Fe: FileEntry> StoreDriver for FilesystemStore<Fe> {
942911
async fn update_with_whole_file(
943912
self: Pin<&Self>,
944913
key: StoreKey<'_>,
945-
mut file: fs::ResumeableFileSlot,
914+
path: OsString,
915+
file: fs::FileSlot,
946916
upload_size: UploadSizeInfo,
947-
) -> Result<Option<fs::ResumeableFileSlot>, Error> {
948-
let path = file.get_path().as_os_str().to_os_string();
917+
) -> Result<Option<fs::FileSlot>, Error> {
949918
let file_size = match upload_size {
950919
UploadSizeInfo::ExactSize(size) => size,
951920
UploadSizeInfo::MaxSize(_) => file
952-
.as_reader()
953-
.await
954-
.err_tip(|| {
955-
format!("While getting metadata for {path:?} in update_with_whole_file")
956-
})?
957-
.get_ref()
958921
.as_ref()
959922
.metadata()
960923
.await
@@ -995,7 +958,6 @@ impl<Fe: FileEntry> StoreDriver for FilesystemStore<Fe> {
995958
.err_tip(|| "Failed to send zero EOF in filesystem store get_part")?;
996959
return Ok(());
997960
}
998-
999961
let entry = self.evicting_map.get(&key).await.ok_or_else(|| {
1000962
make_err!(
1001963
Code::NotFound,
@@ -1004,47 +966,21 @@ impl<Fe: FileEntry> StoreDriver for FilesystemStore<Fe> {
1004966
)
1005967
})?;
1006968
let read_limit = length.unwrap_or(u64::MAX);
1007-
let mut resumeable_temp_file = entry.read_file_part(offset, read_limit).await?;
969+
let mut temp_file = entry.read_file_part(offset, read_limit).await?;
1008970

1009971
loop {
1010972
let mut buf = BytesMut::with_capacity(self.read_buffer_size);
1011-
resumeable_temp_file
1012-
.as_reader()
1013-
.await
1014-
.err_tip(|| "In FileSystemStore::get_part()")?
973+
temp_file
1015974
.read_buf(&mut buf)
1016975
.await
1017976
.err_tip(|| "Failed to read data in filesystem store")?;
1018977
if buf.is_empty() {
1019978
break; // EOF.
1020979
}
1021-
// In the event it takes a while to send the data to the client, we want to close the
1022-
// reading file, to prevent the file descriptor left open for long periods of time.
1023-
// Failing to do so might cause deadlocks if the receiver is unable to receive data
1024-
// because it is waiting for a file descriptor to open before receiving data.
1025-
// Using `ResumeableFileSlot` will re-open the file in the event it gets closed on the
1026-
// next iteration.
1027-
let buf_content = buf.freeze();
1028-
loop {
1029-
let sleep_fn = (self.sleep_fn)(fs::idle_file_descriptor_timeout());
1030-
tokio::pin!(sleep_fn);
1031-
tokio::select! {
1032-
() = & mut (sleep_fn) => {
1033-
resumeable_temp_file
1034-
.close_file()
1035-
.await
1036-
.err_tip(|| "Could not close file due to timeout in FileSystemStore::get_part")?;
1037-
}
1038-
res = writer.send(buf_content.clone()) => {
1039-
match res {
1040-
Ok(()) => break,
1041-
Err(err) => {
1042-
return Err(err).err_tip(|| "Failed to send chunk in filesystem store get_part");
1043-
}
1044-
}
1045-
}
1046-
}
1047-
}
980+
writer
981+
.send(buf.freeze())
982+
.await
983+
.err_tip(|| "Failed to send chunk in filesystem store get_part")?;
1048984
}
1049985
writer
1050986
.send_eof()

0 commit comments

Comments
 (0)