@@ -37,10 +37,47 @@ use std::sync::atomic::{AtomicBool, AtomicU64, Ordering};
3737use std:: sync:: Arc ;
3838use std:: time:: Duration ;
3939
40+ use tokio:: time:: MissedTickBehavior ;
41+
42+ use crate :: observation:: epoch_ms;
43+
44+ /// Tracks real PCIe activity so the keepalive can skip redundant
45+ /// synthetic heartbeats when the device is actively being used.
46+ ///
47+ /// Share an `ActivityTracker` between the keepalive and any code that
48+ /// performs PCIe operations (VFIO opens, config reads, BAR accesses).
49+ #[ derive( Debug , Clone , Default ) ]
50+ pub struct ActivityTracker ( Arc < AtomicU64 > ) ;
51+
52+ impl ActivityTracker {
53+ /// Create a new tracker with no recorded activity.
54+ #[ must_use]
55+ pub fn new ( ) -> Self {
56+ Self ( Arc :: new ( AtomicU64 :: new ( 0 ) ) )
57+ }
58+
59+ /// Record that real PCIe traffic just occurred.
60+ pub fn record ( & self ) {
61+ self . 0 . store ( epoch_ms ( ) , Ordering :: Release ) ;
62+ }
63+
64+ /// Milliseconds since the last recorded activity (`u64::MAX` if none).
65+ #[ must_use]
66+ pub fn ms_since_last ( & self ) -> u64 {
67+ let last = self . 0 . load ( Ordering :: Acquire ) ;
68+ if last == 0 {
69+ return u64:: MAX ;
70+ }
71+ epoch_ms ( ) . saturating_sub ( last)
72+ }
73+ }
74+
4075/// Keepalive state for a PLX-bridged device.
4176///
4277/// Performs periodic config space reads on the device and its bridge
4378/// ancestry to prevent the kernel from putting the PLX fabric into D3cold.
79+ /// Uses `tokio::time::interval` for drift-free timing with immediate first
80+ /// tick, and skips synthetic heartbeats when real PCIe traffic was recent.
4481#[ derive( Debug ) ]
4582pub struct PlxKeepalive {
4683 /// PCI BDF of the device behind the PLX bridge.
@@ -52,6 +89,9 @@ pub struct PlxKeepalive {
5289 /// All BDFs in the bridge hierarchy (device + ancestors).
5390 /// Populated by [`detect_bridge_chain`].
5491 bridge_chain : Vec < String > ,
92+
93+ /// Optional activity tracker for backpressure.
94+ activity : Option < ActivityTracker > ,
5595}
5696
5797/// Handle to a running keepalive task.
@@ -112,6 +152,7 @@ impl PlxKeepalive {
112152 bdf : bdf. to_string ( ) ,
113153 interval,
114154 bridge_chain,
155+ activity : None ,
115156 }
116157 }
117158
@@ -121,6 +162,15 @@ impl PlxKeepalive {
121162 Self :: new ( bdf, Duration :: from_secs ( secs) )
122163 }
123164
165+ /// Attach an [`ActivityTracker`] for backpressure. When real PCIe
166+ /// traffic is recorded via the tracker, the keepalive skips its
167+ /// synthetic heartbeat for that cycle.
168+ #[ must_use]
169+ pub fn with_activity_tracker ( mut self , tracker : ActivityTracker ) -> Self {
170+ self . activity = Some ( tracker) ;
171+ self
172+ }
173+
124174 /// The BDFs that will be read on each heartbeat.
125175 #[ must_use]
126176 pub fn bridge_chain ( & self ) -> & [ String ] {
@@ -138,6 +188,10 @@ impl PlxKeepalive {
138188 /// Returns a [`KeepaliveHandle`] that can be used to stop the task
139189 /// and query heartbeat count. The task runs until stopped or the
140190 /// tokio runtime shuts down.
191+ ///
192+ /// Uses `tokio::time::interval` with `MissedTickBehavior::Skip` for
193+ /// drift-free scheduling. The first tick fires immediately — no
194+ /// initial delay before the first heartbeat.
141195 #[ must_use]
142196 pub fn spawn ( self ) -> KeepaliveHandle {
143197 let running = Arc :: new ( AtomicBool :: new ( true ) ) ;
@@ -152,12 +206,14 @@ impl PlxKeepalive {
152206 let interval = self . interval ;
153207 let chain = self . bridge_chain ;
154208 let bdf = self . bdf ;
209+ let activity = self . activity ;
155210
156211 tokio:: spawn ( async move {
157212 tracing:: info!(
158213 bdf = %bdf,
159214 chain_len = chain. len( ) ,
160215 interval_ms = interval. as_millis( ) as u64 ,
216+ activity_aware = activity. is_some( ) ,
161217 "PLX keepalive started" ,
162218 ) ;
163219
@@ -166,7 +222,20 @@ impl PlxKeepalive {
166222 crate :: sysfs:: pin_power ( bridge_bdf) ;
167223 }
168224
225+ let mut ticker = tokio:: time:: interval ( interval) ;
226+ ticker. set_missed_tick_behavior ( MissedTickBehavior :: Skip ) ;
227+
169228 while running. load ( Ordering :: Acquire ) {
229+ ticker. tick ( ) . await ;
230+
231+ // Skip synthetic heartbeat if real traffic was recent
232+ if let Some ( ref tracker) = activity {
233+ if tracker. ms_since_last ( ) < interval. as_millis ( ) as u64 {
234+ heartbeats. fetch_add ( 1 , Ordering :: Relaxed ) ;
235+ continue ;
236+ }
237+ }
238+
170239 let mut all_alive = true ;
171240
172241 for target_bdf in & chain {
@@ -183,13 +252,10 @@ impl PlxKeepalive {
183252 heartbeats. fetch_add ( 1 , Ordering :: Relaxed ) ;
184253
185254 if !all_alive {
186- // Re-pin power on all bridges — kernel may have overridden
187255 for bridge_bdf in & chain {
188256 crate :: sysfs:: pin_power ( bridge_bdf) ;
189257 }
190258 }
191-
192- tokio:: time:: sleep ( interval) . await ;
193259 }
194260
195261 tracing:: info!(
@@ -237,6 +303,15 @@ fn config_read_heartbeat(bdf: &str) -> bool {
237303 }
238304}
239305
306+ /// Check whether a sysfs directory name looks like a PCI BDF (`DDDD:BB:DD.F`).
307+ ///
308+ /// Rejects PCI domain roots like `pci0000:40` which contain a colon but
309+ /// no dot, and would otherwise pollute bridge chain walks.
310+ #[ must_use]
311+ pub fn is_pci_bdf ( name : & str ) -> bool {
312+ name. contains ( ':' ) && name. contains ( '.' )
313+ }
314+
240315/// Walk sysfs ancestry to find all PCI bridges between a device and the root.
241316///
242317/// Returns a vec starting with the device BDF, followed by each upstream
@@ -256,8 +331,7 @@ fn detect_bridge_chain(bdf: &str) -> Vec<String> {
256331 break ;
257332 } ;
258333
259- // PCI BDFs contain colons (e.g., "0000:4a:08.0")
260- if !name. contains ( ':' ) {
334+ if !is_pci_bdf ( name) {
261335 break ;
262336 }
263337
@@ -307,6 +381,7 @@ mod tests {
307381 assert_eq ! ( ka. bdf, "9999:99:99.9" ) ;
308382 assert_eq ! ( ka. bridge_chain. len( ) , 1 ) ;
309383 assert ! ( !ka. has_bridges( ) ) ;
384+ assert ! ( ka. activity. is_none( ) ) ;
310385 }
311386
312387 #[ test]
@@ -315,12 +390,61 @@ mod tests {
315390 assert_eq ! ( ka. interval, Duration :: from_secs( 3 ) ) ;
316391 }
317392
393+ #[ test]
394+ fn keepalive_with_activity_tracker ( ) {
395+ let tracker = ActivityTracker :: new ( ) ;
396+ let ka = PlxKeepalive :: new ( "9999:99:99.9" , Duration :: from_secs ( 5 ) )
397+ . with_activity_tracker ( tracker) ;
398+ assert ! ( ka. activity. is_some( ) ) ;
399+ }
400+
401+ #[ test]
402+ fn activity_tracker_initial_state ( ) {
403+ let tracker = ActivityTracker :: new ( ) ;
404+ assert_eq ! ( tracker. ms_since_last( ) , u64 :: MAX ) ;
405+ }
406+
407+ #[ test]
408+ fn activity_tracker_record_and_check ( ) {
409+ let tracker = ActivityTracker :: new ( ) ;
410+ tracker. record ( ) ;
411+ assert ! ( tracker. ms_since_last( ) < 1000 ) ;
412+ }
413+
414+ #[ test]
415+ fn activity_tracker_clone_shares_state ( ) {
416+ let tracker = ActivityTracker :: new ( ) ;
417+ let clone = tracker. clone ( ) ;
418+ tracker. record ( ) ;
419+ assert ! ( clone. ms_since_last( ) < 1000 ) ;
420+ }
421+
318422 #[ test]
319423 fn heartbeat_once_nonexistent ( ) {
320424 let ka = PlxKeepalive :: new ( "9999:99:99.9" , Duration :: from_secs ( 5 ) ) ;
321425 assert ! ( !ka. heartbeat_once( ) ) ;
322426 }
323427
428+ #[ test]
429+ fn is_pci_bdf_valid ( ) {
430+ assert ! ( is_pci_bdf( "0000:49:00.0" ) ) ;
431+ assert ! ( is_pci_bdf( "0000:4a:08.0" ) ) ;
432+ assert ! ( is_pci_bdf( "0001:00:00.0" ) ) ;
433+ }
434+
435+ #[ test]
436+ fn is_pci_bdf_rejects_domain_root ( ) {
437+ assert ! ( !is_pci_bdf( "pci0000:40" ) ) ;
438+ assert ! ( !is_pci_bdf( "pci0000:00" ) ) ;
439+ }
440+
441+ #[ test]
442+ fn is_pci_bdf_rejects_garbage ( ) {
443+ assert ! ( !is_pci_bdf( "" ) ) ;
444+ assert ! ( !is_pci_bdf( "not-a-bdf" ) ) ;
445+ assert ! ( !is_pci_bdf( "pci0000" ) ) ;
446+ }
447+
324448 #[ test]
325449 fn detect_plx_bridge_nonexistent ( ) {
326450 assert ! ( detect_plx_bridge( "9999:99:99.9" ) . is_none( ) ) ;
0 commit comments