Skip to content

Commit 1a24b3d

Browse files
committed
fix(vcpu): bound vCPU thread join on handle drop
VcpuHandle::drop join()ed the vCPU thread unconditionally, so a thread that never observed its Finish event would block teardown forever. Poll for exit with a 1s timeout and panic if it is exceeded, so teardown fails fast instead of hanging. Signed-off-by: Riccardo Mancini <mancio@amazon.com>
1 parent c7c2221 commit 1a24b3d

1 file changed

Lines changed: 17 additions & 3 deletions

File tree

src/vmm/src/vstate/vcpu.rs

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ use std::os::fd::AsRawFd;
99
use std::sync::atomic::{Ordering, fence};
1010
use std::sync::mpsc::{Receiver, Sender, TryRecvError, channel};
1111
use std::sync::{Arc, Barrier};
12+
use std::time::{Duration, Instant};
1213
use std::{fmt, io, thread};
1314

1415
use kvm_bindings::{KVM_SYSTEM_EVENT_RESET, KVM_SYSTEM_EVENT_SHUTDOWN};
@@ -32,6 +33,9 @@ use crate::vstate::vm::KvmVm;
3233
/// Signal number (SIGRTMIN) used to kick Vcpus.
3334
pub const VCPU_RTSIG_OFFSET: i32 = 0;
3435

36+
/// Maximum time to wait for a vCPU thread to exit when dropping its handle.
37+
const VCPU_JOIN_TIMEOUT: Duration = Duration::from_secs(1);
38+
3539
/// Errors associated with the wrappers over KVM ioctls.
3640
#[derive(Debug, thiserror::Error, displaydoc::Display)]
3741
pub enum VcpuError {
@@ -629,9 +633,19 @@ impl Drop for VcpuHandle {
629633
// The strategy of avoiding more complex messaging protocols during the Drop
630634
// helps avoid cycles which were preventing a truly clean shutdown.
631635
//
632-
// If the code hangs at this point, that means that a Finish event was not
633-
// sent by Vmm.
634-
self.vcpu_thread.take().unwrap().join().unwrap();
636+
// If the thread is not making progress towards exiting (e.g. a Finish event
637+
// was not delivered), abort instead of blocking forever so teardown fails
638+
// fast rather than hanging.
639+
let thread = self.vcpu_thread.take().unwrap();
640+
let deadline = Instant::now() + VCPU_JOIN_TIMEOUT;
641+
while !thread.is_finished() {
642+
assert!(
643+
Instant::now() < deadline,
644+
"Timed out waiting for vCPU thread to exit"
645+
);
646+
thread::sleep(Duration::from_millis(1));
647+
}
648+
thread.join().unwrap();
635649
}
636650
}
637651

0 commit comments

Comments
 (0)