Skip to content

Commit 6d31d0d

Browse files
authored
[rust/rqd] Handle finished frames before sanitizing reservations (#1770)
Sanitizing reservations prior to handling finished frames lead to an undesired error message when attempting to release cores.
1 parent f677cb1 commit 6d31d0d

File tree

2 files changed

+25
-15
lines changed

2 files changed

+25
-15
lines changed

rust/crates/rqd/src/system/machine.rs

Lines changed: 18 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -301,18 +301,8 @@ impl MachineMonitor {
301301
}
302302
});
303303

304-
// Sanitize dangling reservations
305-
{
306-
self.core_manager.write().await.sanitize_reservations(
307-
&running_frames
308-
.iter()
309-
.map(|(running_frame, _)| running_frame.request.resource_id())
310-
.collect(),
311-
);
312-
}
313-
314304
// Handle Running frames separately to avoid deadlocks when trying to get a frame state
315-
for (running_frame, running_state) in running_frames {
305+
for (running_frame, running_state) in &running_frames {
316306
// Collect stats about the procs related to this frame
317307
let proc_stats_opt = {
318308
let system_monitor = self.system_manager.lock().await;
@@ -336,7 +326,7 @@ impl MachineMonitor {
336326
);
337327
// Attempt to finish the process
338328
let _ = running_frame.finish(1, Some(19));
339-
finished_frames.push(Arc::clone(&running_frame));
329+
finished_frames.push(Arc::clone(running_frame));
340330
self.running_frames_cache.remove(&running_frame.frame_id);
341331
} else {
342332
// Proc finished but frame is waiting for the lock on `is_finished` to update the status
@@ -346,6 +336,22 @@ impl MachineMonitor {
346336
}
347337

348338
self.handle_finished_frames(finished_frames).await;
339+
340+
// Sanitize dangling reservations
341+
// This mechanism is redundant as handle_finished_frames releases resources reserved to
342+
// finished frames. But leaking core reservations would lead to waste of resoures, so
343+
// having a safety check sounds reasonable even when reduntant.
344+
{
345+
let running_resources: Vec<Uuid> = running_frames
346+
.iter()
347+
.map(|(running_frame, _)| running_frame.request.resource_id())
348+
.collect();
349+
self.core_manager
350+
.write()
351+
.await
352+
.sanitize_reservations(&running_resources);
353+
}
354+
349355
Ok(())
350356
}
351357

rust/crates/rqd/src/system/reservation.rs

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -387,10 +387,14 @@ impl CoreStateManager {
387387
/// # Arguments
388388
///
389389
/// * `active_resource_ids` - A vector of resource IDs that are currently active
390-
pub fn sanitize_reservations(&mut self, active_resource_ids: &Vec<ResourceId>) {
390+
pub fn sanitize_reservations(&mut self, active_resource_ids: &[ResourceId]) {
391391
self.bookings.retain(|resource_id, booking| {
392-
active_resource_ids.contains(&resource_id)
393-
|| !booking.expired(self.reservation_grace_period)
392+
let retain = active_resource_ids.contains(resource_id)
393+
|| !booking.expired(self.reservation_grace_period);
394+
if !retain {
395+
warn!("Cleaning up dangling reservation for {}", resource_id);
396+
}
397+
retain
394398
});
395399
}
396400
}

0 commit comments

Comments
 (0)