|
| 1 | +use metrics::gauge; |
| 2 | +use procfs::process::{MemoryMap, MemoryPageFlags, PageInfo, Pfn, Process}; |
| 3 | +use std::collections::HashMap; |
| 4 | +use std::io::{Read, Seek, SeekFrom, Write}; |
| 5 | +use tracing::debug; |
| 6 | + |
| 7 | +const PAGE_OFFSET: u64 = 0xffff_8800_0000_0000; |
| 8 | + |
| 9 | +#[derive(thiserror::Error, Debug)] |
| 10 | +pub enum Error { |
| 11 | + /// Wrapper for [`std::io::Error`] |
| 12 | + #[error("IO error: {0}")] |
| 13 | + Io(#[from] std::io::Error), |
| 14 | + /// Wrapper for [`procfs::ProcError`] |
| 15 | + #[error("Unable to read procfs: {0}")] |
| 16 | + Proc(#[from] procfs::ProcError), |
| 17 | +} |
| 18 | + |
| 19 | +#[derive(Debug)] |
| 20 | +pub(crate) struct Sampler { |
| 21 | + parent_pid: i32, |
| 22 | +} |
| 23 | + |
| 24 | +impl Sampler { |
| 25 | + pub(crate) fn new(parent_pid: i32) -> Result<Self, Error> { |
| 26 | + Ok(Self { parent_pid }) |
| 27 | + } |
| 28 | + |
| 29 | + pub(crate) async fn poll(&mut self) -> Result<(), Error> { |
| 30 | + let page_size = page_size::get(); |
| 31 | + let mut pfn_set = PfnSet::new(); |
| 32 | + |
| 33 | + for process in ProcessDescendentsIterator::new(self.parent_pid) { |
| 34 | + debug!("Process PID: {}", process.pid()); |
| 35 | + let mut pagemap = process.pagemap()?; |
| 36 | + for MemoryMap { |
| 37 | + address: (begin, end), |
| 38 | + .. |
| 39 | + } in process.maps()? |
| 40 | + { |
| 41 | + if begin > PAGE_OFFSET { |
| 42 | + continue; // page idle tracking is user mem only |
| 43 | + } |
| 44 | + debug!("Memory region: {:#x} — {:#x}", begin, end); |
| 45 | + let begin = begin as usize / page_size; |
| 46 | + let end = end as usize / page_size; |
| 47 | + for page in pagemap.get_range_info(begin..end)? { |
| 48 | + if let PageInfo::MemoryPage(memory_page_flags) = page { |
| 49 | + if memory_page_flags.contains(MemoryPageFlags::PRESENT) { |
| 50 | + pfn_set.insert(memory_page_flags.get_page_frame_number()); |
| 51 | + } |
| 52 | + } |
| 53 | + } |
| 54 | + } |
| 55 | + } |
| 56 | + |
| 57 | + let mut nb_pages = 0; |
| 58 | + |
| 59 | + // See https://www.kernel.org/doc/html/latest/admin-guide/mm/idle_page_tracking.html |
| 60 | + let mut page_idle_bitmap = std::fs::OpenOptions::new() |
| 61 | + .read(true) |
| 62 | + .write(true) |
| 63 | + .open("/sys/kernel/mm/page_idle/bitmap")?; |
| 64 | + |
| 65 | + for (pfn_block, pfn_bitset) in pfn_set { |
| 66 | + page_idle_bitmap.seek(SeekFrom::Start(pfn_block * 8))?; |
| 67 | + |
| 68 | + let mut buffer = [0; 8]; |
| 69 | + page_idle_bitmap.read_exact(&mut buffer)?; |
| 70 | + let bitset = u64::from_ne_bytes(buffer); |
| 71 | + |
| 72 | + nb_pages += (!bitset & pfn_bitset).count_ones() as usize; |
| 73 | + |
| 74 | + page_idle_bitmap.seek(SeekFrom::Start(pfn_block * 8))?; |
| 75 | + page_idle_bitmap.write_all(&pfn_bitset.to_ne_bytes())?; |
| 76 | + } |
| 77 | + |
| 78 | + gauge!("total_wss_bytes").set((nb_pages * page_size) as f64); |
| 79 | + |
| 80 | + Ok(()) |
| 81 | + } |
| 82 | +} |
| 83 | + |
| 84 | +struct ProcessDescendentsIterator { |
| 85 | + stack: Vec<Process>, |
| 86 | +} |
| 87 | + |
| 88 | +impl ProcessDescendentsIterator { |
| 89 | + fn new(parent_pid: i32) -> Self { |
| 90 | + Self { |
| 91 | + stack: vec![ |
| 92 | + Process::new(parent_pid).expect(format!("process {parent_pid} not found").as_str()), |
| 93 | + ], |
| 94 | + } |
| 95 | + } |
| 96 | +} |
| 97 | + |
| 98 | +impl Iterator for ProcessDescendentsIterator { |
| 99 | + type Item = Process; |
| 100 | + |
| 101 | + fn next(&mut self) -> Option<Self::Item> { |
| 102 | + while let Some(process) = self.stack.pop() { |
| 103 | + if let Ok(tasks) = process.tasks() { |
| 104 | + for task in tasks.flatten() { |
| 105 | + if let Ok(children) = task.children() { |
| 106 | + for child in children { |
| 107 | + if let Ok(c) = Process::new(child as i32) { |
| 108 | + self.stack.push(c); |
| 109 | + } |
| 110 | + } |
| 111 | + } |
| 112 | + } |
| 113 | + } |
| 114 | + return Some(process); |
| 115 | + } |
| 116 | + None |
| 117 | + } |
| 118 | +} |
| 119 | + |
| 120 | +#[derive(Debug)] |
| 121 | +struct PfnSet(HashMap<u64, u64>); |
| 122 | + |
| 123 | +impl PfnSet { |
| 124 | + fn new() -> Self { |
| 125 | + Self(HashMap::with_capacity(1024)) |
| 126 | + } |
| 127 | + |
| 128 | + fn insert(&mut self, pfn: Pfn) { |
| 129 | + *self.0.entry(pfn.0 / 64).or_default() |= 1 << (pfn.0 % 64); |
| 130 | + } |
| 131 | +} |
| 132 | + |
| 133 | +impl IntoIterator for PfnSet { |
| 134 | + type Item = (u64, u64); |
| 135 | + type IntoIter = std::collections::hash_map::IntoIter<u64, u64>; |
| 136 | + |
| 137 | + fn into_iter(self) -> Self::IntoIter { |
| 138 | + self.0.into_iter() |
| 139 | + } |
| 140 | +} |
| 141 | + |
| 142 | +#[cfg(test)] |
| 143 | +mod tests { |
| 144 | + use super::*; |
| 145 | + use std::collections::HashSet; |
| 146 | + use std::io::BufRead; |
| 147 | + use std::io::BufReader; |
| 148 | + use std::process::{Command, Stdio}; |
| 149 | + |
| 150 | + #[test] |
| 151 | + fn process_descendants_iterator() { |
| 152 | + const NB_PROCESSES_PER_LEVEL: usize = 3; |
| 153 | + const NB_LEVELS: u32 = 3; |
| 154 | + // The total number of processes is the sum of the NB_LEVELS first terms |
| 155 | + // of the geometric progression with common ratio of NB_PROCESSES_PER_LEVEL. |
| 156 | + const NB_PROCESSES: usize = |
| 157 | + (NB_PROCESSES_PER_LEVEL.pow(NB_LEVELS + 1) - 1) / (NB_PROCESSES_PER_LEVEL - 1); |
| 158 | + |
| 159 | + let mut child = Command::new("src/observer/linux/wss/tests/create_process_tree.py") |
| 160 | + .arg(NB_PROCESSES_PER_LEVEL.to_string()) |
| 161 | + .arg(NB_LEVELS.to_string()) |
| 162 | + .stdout(Stdio::piped()) |
| 163 | + .spawn() |
| 164 | + .expect("Failed to create process tree"); |
| 165 | + |
| 166 | + let mut children_pids = HashSet::with_capacity(NB_PROCESSES); |
| 167 | + children_pids.insert(child.id() as i32); |
| 168 | + |
| 169 | + let mut reader = BufReader::new(child.stdout.take().unwrap()); |
| 170 | + for _ in 0..NB_PROCESSES - 1 { |
| 171 | + let mut line = String::new(); |
| 172 | + reader.read_line(&mut line).expect("Failed to read line"); |
| 173 | + let pid: i32 = line.trim().parse().expect("Failed to parse PID"); |
| 174 | + assert!(children_pids.insert(pid)); |
| 175 | + } |
| 176 | + |
| 177 | + for process in ProcessDescendentsIterator::new(child.id() as i32) { |
| 178 | + assert!( |
| 179 | + children_pids.remove(&process.pid()), |
| 180 | + "ProcessDescendentsIterator returned unexpected PID {pid}", |
| 181 | + pid = process.pid() |
| 182 | + ); |
| 183 | + } |
| 184 | + assert!( |
| 185 | + children_pids.is_empty(), |
| 186 | + "ProcessDescendentsIterator didn’t return all PIDs: {children_pids:?}" |
| 187 | + ); |
| 188 | + |
| 189 | + nix::sys::signal::kill( |
| 190 | + nix::unistd::Pid::from_raw(child.id() as i32), |
| 191 | + nix::sys::signal::Signal::SIGTERM, |
| 192 | + ) |
| 193 | + .expect("Failed to kill process tree"); |
| 194 | + |
| 195 | + child |
| 196 | + .wait() |
| 197 | + .expect("Failed to wait for process tree completion"); |
| 198 | + } |
| 199 | + |
| 200 | + #[test] |
| 201 | + fn pfn_set() { |
| 202 | + let input = vec![0, 64, 65, 66, 128, 136, 255]; |
| 203 | + let expected = vec![(0, 0x1), (1, 0x7), (2, 0x101), (3, 0x8000_0000_0000_0000)]; |
| 204 | + |
| 205 | + let mut pfn_set = PfnSet::new(); |
| 206 | + for i in input { |
| 207 | + pfn_set.insert(Pfn(i)); |
| 208 | + } |
| 209 | + let mut output: Vec<_> = pfn_set.into_iter().collect(); |
| 210 | + output.sort_by_key(|(k, _)| *k); |
| 211 | + |
| 212 | + assert_eq!(output, expected); |
| 213 | + } |
| 214 | +} |
0 commit comments