66// found in the THIRD-PARTY file.
77
88use std:: os:: fd:: AsRawFd ;
9- use std:: sync:: atomic:: { Ordering , fence} ;
9+ use std:: sync:: atomic:: { AtomicI64 , AtomicU8 , AtomicU64 , Ordering , fence} ;
1010use std:: sync:: mpsc:: { Receiver , Sender , TryRecvError , channel} ;
1111use std:: sync:: { Arc , Barrier } ;
1212use std:: { fmt, io, thread} ;
@@ -32,6 +32,48 @@ use crate::vstate::vm::KvmVm;
3232/// Signal number (SIGRTMIN) used to kick Vcpus.
3333pub const VCPU_RTSIG_OFFSET : i32 = 0 ;
3434
35+ /// DEBUG: global monotonic sequence counter for ordering log lines across threads.
36+ static DEBUG_SEQ : std:: sync:: atomic:: AtomicU64 = std:: sync:: atomic:: AtomicU64 :: new ( 0 ) ;
37+
38+ /// DEBUG: whether verbose vCPU tracing is enabled. Off by default so the
39+ /// production vCPU path used by functional/performance tests is not flooded.
40+ /// Enabled explicitly by tests via `debug_enable_vcpu_trace()`.
41+ static DEBUG_TRACE_ENABLED : AtomicU8 = AtomicU8 :: new ( 0 ) ;
42+
43+ /// DEBUG: enable verbose vCPU tracing for the current process. Intended to be
44+ /// called only from the relevant integration test.
45+ pub fn debug_enable_vcpu_trace ( ) {
46+ DEBUG_TRACE_ENABLED . store ( 1 , Ordering :: Relaxed ) ;
47+ }
48+
49+ /// DEBUG: emit an ordered log line with a sequence number.
50+ fn debug_seq_log ( args : std:: fmt:: Arguments ) {
51+ if DEBUG_TRACE_ENABLED . load ( Ordering :: Relaxed ) == 0 {
52+ return ;
53+ }
54+ let seq = DEBUG_SEQ . fetch_add ( 1 , Ordering :: SeqCst ) ;
55+ eprintln ! ( "[DBGSEQ {seq}] {args}" ) ;
56+ }
57+
58+ /// DEBUG: number of times the vCPU kick signal handler has fired (process-wide).
59+ pub static KICK_SIGNAL_COUNT : AtomicU64 = AtomicU64 :: new ( 0 ) ;
60+ /// DEBUG: TID observed inside the kick signal handler on its most recent firing.
61+ pub static KICK_SIGNAL_LAST_TID : AtomicI64 = AtomicI64 :: new ( -1 ) ;
62+
63+ /// DEBUG: returns (handler_fire_count, last_handler_tid) for the kick signal.
64+ pub fn debug_kick_signal_stats ( ) -> ( u64 , i64 ) {
65+ (
66+ KICK_SIGNAL_COUNT . load ( Ordering :: SeqCst ) ,
67+ KICK_SIGNAL_LAST_TID . load ( Ordering :: SeqCst ) ,
68+ )
69+ }
70+
71+ /// DEBUG: async-signal-safe gettid().
72+ fn debug_gettid ( ) -> i64 {
73+ // SAFETY: SYS_gettid is always available and takes no arguments.
74+ unsafe { libc:: syscall ( libc:: SYS_gettid ) }
75+ }
76+
3577/// Errors associated with the wrappers over KVM ioctls.
3678#[ derive( Debug , thiserror:: Error , displaydoc:: Display ) ]
3779pub enum VcpuError {
@@ -109,6 +151,10 @@ impl Vcpu {
109151 /// one.
110152 fn register_kick_signal_handler ( & mut self ) {
111153 extern "C" fn handle_signal ( _: c_int , _: * mut siginfo_t , _: * mut c_void ) {
154+ // DEBUG: record which TID actually received the kick and how many times.
155+ // Atomic stores + the gettid syscall are async-signal-safe.
156+ KICK_SIGNAL_LAST_TID . store ( debug_gettid ( ) , Ordering :: SeqCst ) ;
157+ KICK_SIGNAL_COUNT . fetch_add ( 1 , Ordering :: SeqCst ) ;
112158 // We write to the immediate_exit from other thread, so make sure the read in the
113159 // KVM_RUN sees the up to date value
114160 fence ( Ordering :: Acquire ) ;
@@ -181,6 +227,11 @@ impl Vcpu {
181227 . spawn ( move || {
182228 let filter = & * seccomp_filter;
183229 self . register_kick_signal_handler ( ) ;
230+ debug_seq_log ( format_args ! (
231+ "vcpu{} thread started, tid={}" ,
232+ self . kvm_vcpu. index,
233+ debug_gettid( )
234+ ) ) ;
184235 // Synchronization to make sure thread local data is initialized.
185236 barrier. wait ( ) ;
186237 self . run ( filter) ;
@@ -245,7 +296,12 @@ impl Vcpu {
245296 let mut state = StateMachine :: next ( Self :: running) ;
246297
247298 // Break this emulation loop on any transition request/external event.
248- match self . event_receiver . try_recv ( ) {
299+ let recv_result = self . event_receiver . try_recv ( ) ;
300+ debug_seq_log ( format_args ! (
301+ "vcpu{} running: inner loop broke, try_recv = {:?}" ,
302+ self . kvm_vcpu. index, recv_result
303+ ) ) ;
304+ match recv_result {
249305 // Running ---- Pause ----> Paused
250306 Ok ( VcpuEvent :: Pause ) => {
251307 // Nothing special to do.
@@ -280,7 +336,13 @@ impl Vcpu {
280336 ) ) )
281337 . expect ( "vcpu channel unexpectedly closed" ) ;
282338 }
283- Ok ( VcpuEvent :: Finish ) => return StateMachine :: finish ( ) ,
339+ Ok ( VcpuEvent :: Finish ) => {
340+ debug_seq_log ( format_args ! (
341+ "vcpu{} running: got Finish -> StateMachine::finish" ,
342+ self . kvm_vcpu. index
343+ ) ) ;
344+ return StateMachine :: finish ( ) ;
345+ }
284346 // Unhandled exit of the other end.
285347 Err ( TryRecvError :: Disconnected ) => {
286348 // Move to 'exited' state.
@@ -295,6 +357,10 @@ impl Vcpu {
295357
296358 // This is the main loop of the `Paused` state.
297359 fn paused ( & mut self ) -> StateMachine < Self > {
360+ debug_seq_log ( format_args ! (
361+ "vcpu{} paused: blocking on recv()" ,
362+ self . kvm_vcpu. index
363+ ) ) ;
298364 match self . event_receiver . recv ( ) {
299365 // Paused ---- Resume ----> Running
300366 Ok ( VcpuEvent :: Resume ) => {
@@ -363,6 +429,10 @@ impl Vcpu {
363429 // Note that this function isn't called when the guest asks for a CPU
364430 // reset via the i8042 controller on x86.
365431 fn exit ( & mut self , exit_code : FcExitCode ) -> StateMachine < Self > {
432+ let idx = self . kvm_vcpu . index ;
433+ debug_seq_log ( format_args ! (
434+ "vcpu{idx} exit() entered, exit_code={exit_code:?}"
435+ ) ) ;
366436 if let Err ( err) = self . exit_evt . write ( 1 ) {
367437 METRICS . vcpu . failures . inc ( ) ;
368438 error ! ( "Failed signaling vcpu exit event: {}" , err) ;
@@ -373,7 +443,9 @@ impl Vcpu {
373443 . send ( VcpuResponse :: Exited ( exit_code) )
374444 . expect ( "vcpu channel unexpectedly closed" ) ;
375445 // Wait for and only accept 'VcpuEvent::Finish'.
376- if let Ok ( VcpuEvent :: Finish ) = self . event_receiver . recv ( ) {
446+ let ev = self . event_receiver . recv ( ) ;
447+ debug_seq_log ( format_args ! ( "vcpu{idx} exit() recv = {ev:?}" ) ) ;
448+ if let Ok ( VcpuEvent :: Finish ) = ev {
377449 break ;
378450 }
379451 }
@@ -384,15 +456,29 @@ impl Vcpu {
384456 ///
385457 /// Returns error or enum specifying whether emulation was handled or interrupted.
386458 pub fn run_emulation ( & mut self ) -> Result < VcpuEmulation , VcpuError > {
459+ let idx = self . kvm_vcpu . index ;
387460 if self . kvm_vcpu . fd . get_kvm_run ( ) . immediate_exit == 1u8 {
388461 warn ! ( "Requested a vCPU run with immediate_exit enabled. The operation was skipped" ) ;
389462 self . kvm_vcpu . fd . set_kvm_immediate_exit ( 0 ) ;
463+ debug_seq_log ( format_args ! (
464+ "vcpu{idx} run_emulation: immediate_exit path -> Interrupted"
465+ ) ) ;
390466 return Ok ( VcpuEmulation :: Interrupted ) ;
391467 }
392468
393- match self . kvm_vcpu . fd . run ( ) {
469+ debug_seq_log ( format_args ! ( "vcpu{idx} KVM_RUN enter" ) ) ;
470+ let run_result = self . kvm_vcpu . fd . run ( ) ;
471+ match & run_result {
472+ Ok ( exit) => debug_seq_log ( format_args ! ( "vcpu{idx} KVM_RUN exit: {exit:?}" ) ) ,
473+ Err ( err) => debug_seq_log ( format_args ! ( "vcpu{idx} KVM_RUN err: errno={}" , err. errno( ) ) ) ,
474+ }
475+
476+ match run_result {
394477 Err ( ref err) if err. errno ( ) == libc:: EINTR => {
395478 self . kvm_vcpu . fd . set_kvm_immediate_exit ( 0 ) ;
479+ debug_seq_log ( format_args ! (
480+ "vcpu{idx} run_emulation: EINTR path -> Interrupted"
481+ ) ) ;
396482 // Notify that this KVM_RUN was interrupted.
397483 Ok ( VcpuEmulation :: Interrupted )
398484 }
@@ -406,7 +492,11 @@ impl Vcpu {
406492
407493 Ok ( VcpuEmulation :: Paused )
408494 }
409- emulation_result => handle_kvm_exit ( & mut self . kvm_vcpu . peripherals , emulation_result) ,
495+ emulation_result => {
496+ let res = handle_kvm_exit ( & mut self . kvm_vcpu . peripherals , emulation_result) ;
497+ debug_seq_log ( format_args ! ( "vcpu{idx} run_emulation result: {res:?}" ) ) ;
498+ res
499+ }
410500 }
411501 }
412502}
@@ -599,6 +689,7 @@ impl VcpuHandle {
599689 ///
600690 /// When [`vmm_sys_util::linux::signal::Killable::kill`] errors.
601691 pub fn send_event ( & mut self , event : VcpuEvent ) -> Result < ( ) , VcpuSendEventError > {
692+ debug_seq_log ( format_args ! ( "send_event: sending {:?} to channel" , event) ) ;
602693 // Use expect() to crash if the other thread closed this channel.
603694 self . event_sender
604695 . send ( event)
@@ -607,11 +698,17 @@ impl VcpuHandle {
607698 // Add a fence to ensure the write is visible to the vpu thread
608699 self . vcpu_fd . set_kvm_immediate_exit ( 1 ) ;
609700 fence ( Ordering :: Release ) ;
610- self . vcpu_thread
701+ let kill_res = self
702+ . vcpu_thread
611703 . as_ref ( )
612704 // Safe to unwrap since constructor make this 'Some'.
613705 . unwrap ( )
614- . kill ( sigrtmin ( ) + VCPU_RTSIG_OFFSET ) ?;
706+ . kill ( sigrtmin ( ) + VCPU_RTSIG_OFFSET ) ;
707+ debug_seq_log ( format_args ! (
708+ "send_event: set immediate_exit=1, kill result: {:?}" ,
709+ kill_res
710+ ) ) ;
711+ kill_res?;
615712 Ok ( ( ) )
616713 }
617714
@@ -624,13 +721,6 @@ impl VcpuHandle {
624721// Wait for the Vcpu thread to finish execution
625722impl Drop for VcpuHandle {
626723 fn drop ( & mut self ) {
627- // We assume that by the time a VcpuHandle is dropped, other code has run to
628- // get the state machine loop to finish so the thread is ready to join.
629- // The strategy of avoiding more complex messaging protocols during the Drop
630- // helps avoid cycles which were preventing a truly clean shutdown.
631- //
632- // If the code hangs at this point, that means that a Finish event was not
633- // sent by Vmm.
634724 self . vcpu_thread . take ( ) . unwrap ( ) . join ( ) . unwrap ( ) ;
635725 }
636726}
0 commit comments