Skip to content

Commit 81da5c9

Browse files
committed
Rewrite perf parser to be more robust
This also changes accelerant's perf parsing code to factor in the period of a sample when summing hit counts.
1 parent c19a25d commit 81da5c9

File tree

7 files changed

+212
-45
lines changed

7 files changed

+212
-45
lines changed

perfparser-py/Cargo.lock

Lines changed: 5 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

perfparser-py/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,3 +10,4 @@ crate-type = ["cdylib"]
1010

1111
[dependencies]
1212
pyo3 = "0.23.3"
13+
perfparser = { path = "../perfparser" }

perfparser-py/src/perf.rs

Lines changed: 17 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,17 @@
11
use std::cmp;
22
use std::collections::HashMap;
3-
use std::io::{self, BufRead, BufReader};
3+
use std::io;
44
use std::path::Path;
55
use std::process::Command;
66

7+
use perfparser::Parser;
78
use pyo3::{pyclass, pymethods};
89

910
use crate::LineLoc;
1011

1112
pub fn run_perf_script(data_path: &Path) -> io::Result<Vec<u8>> {
1213
let output = Command::new("perf")
13-
.args(&["script", "-Fip,srcline", "--full-source-path", "-i"])
14+
.args(&["script", "-F+srcline", "--full-source-path", "-i"])
1415
.arg(data_path)
1516
.output()?;
1617
if output.status.success() {
@@ -27,14 +28,22 @@ pub fn parse_and_attribute<R: io::Read>(r: R, project_root: &Path) -> io::Result
2728
.and_then(Path::to_str)
2829
.map(str::to_owned)
2930
};
30-
let mut lines = BufReader::new(r).lines().filter_map(Result::ok);
31+
let parser = Parser::new(r);
3132
let mut hit_count = HashMap::new();
3233

33-
loop {
34-
match extract_srcline_from_perf_entry(&mut lines, is_srcline_good) {
35-
Err(ExtractError::Done) => break,
36-
Err(ExtractError::Invalid) => continue,
37-
Ok(loc) => *hit_count.entry(loc).or_default() += 1,
34+
for event in parser {
35+
let lineloc = event
36+
.stack
37+
.iter()
38+
.filter_map(|frame| frame.srcline.as_ref())
39+
.find_map(|srcline| {
40+
is_srcline_good(Path::new(&srcline.path)).map(|path| LineLoc {
41+
path,
42+
line: srcline.line as u64,
43+
})
44+
});
45+
if let Some(lineloc) = lineloc {
46+
*hit_count.entry(lineloc).or_insert(0) += event.period.unwrap_or(1) as u64;
3847
}
3948
}
4049

@@ -45,43 +54,6 @@ pub fn parse_and_attribute<R: io::Read>(r: R, project_root: &Path) -> io::Result
4554
})
4655
}
4756

48-
enum ExtractError {
49-
Invalid,
50-
Done,
51-
}
52-
53-
/// Parses a `perf script` entry from the provided iterator,
54-
/// and returns the first "good" line location from the stack trace,
55-
/// based on the provided callback.
56-
///
57-
/// Meant to be run on output from `perf script -Fip,srcline --full-source-path`.
58-
fn extract_srcline_from_perf_entry(
59-
mut lines: impl Iterator<Item = String>,
60-
is_srcline_good: impl Fn(&Path) -> Option<String>,
61-
) -> Result<LineLoc, ExtractError> {
62-
loop {
63-
// ip/sym
64-
if lines.next().ok_or(ExtractError::Done)?.trim().is_empty() {
65-
return Err(ExtractError::Invalid);
66-
}
67-
// srcline
68-
let srcline_raw = lines.next().ok_or(ExtractError::Done)?;
69-
let srcline = srcline_raw.trim();
70-
if srcline.is_empty() {
71-
return Err(ExtractError::Invalid);
72-
}
73-
let (loc, _) = srcline.split_once(" ").unwrap_or((srcline, ""));
74-
if !loc.contains(':') {
75-
return Err(ExtractError::Invalid);
76-
}
77-
let (path, lineno_str) = loc.rsplit_once(":").ok_or(ExtractError::Invalid)?;
78-
let line = lineno_str.parse().map_err(|_| ExtractError::Invalid)?;
79-
if let Some(path) = is_srcline_good(Path::new(path)) {
80-
return Ok(LineLoc { path, line });
81-
}
82-
}
83-
}
84-
8557
#[pyclass]
8658
#[derive(Debug)]
8759
pub struct AttributedPerf {

perfparser/Cargo.lock

Lines changed: 7 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

perfparser/Cargo.toml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
[package]
2+
name = "perfparser"
3+
version = "0.1.0"
4+
edition = "2024"
5+
6+
[dependencies]

perfparser/examples/debug.rs

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
use perfparser::Parser;
2+
3+
fn main() {
4+
let parser = Parser::new(std::io::stdin());
5+
for event in parser {
6+
println!("{:?}", event);
7+
}
8+
}

perfparser/src/lib.rs

Lines changed: 168 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,168 @@
1+
// TODO: make this more generic rather than specific to accelerant's use case
2+
3+
use std::io::{self, BufRead, BufReader};
4+
use std::mem;
5+
6+
#[derive(Debug, Clone, Default)]
7+
pub struct Event {
8+
pub period: Option<usize>,
9+
pub kind: String,
10+
pub stack: Vec<StackFrame>,
11+
}
12+
13+
#[derive(Debug, Clone, Default)]
14+
pub struct StackFrame {
15+
pub funcname: String,
16+
pub srcline: Option<SourceLine>,
17+
}
18+
19+
#[derive(Debug, Clone)]
20+
pub struct SourceLine {
21+
pub path: String,
22+
pub line: usize,
23+
}
24+
25+
const SPECIAL_UNKNOWN: &str = "[unknown]";
26+
27+
pub struct Parser<R> {
28+
src: BufReader<R>,
29+
state: ParserState,
30+
cur_event: Event,
31+
}
32+
33+
impl<R: io::Read> Parser<R> {
34+
pub fn new(reader: R) -> Self {
35+
Self {
36+
src: BufReader::new(reader),
37+
state: ParserState::Start,
38+
cur_event: Event::default(),
39+
}
40+
}
41+
42+
fn parse_event_line(&mut self, line: &str) -> Result<(), ()> {
43+
let mut chunks = line.trim().split(':').filter(|s| !s.is_empty());
44+
let _ = chunks.next();
45+
let Some(period_and_kind) = chunks.next() else {
46+
return Err(());
47+
};
48+
let Some((period_str, kind)) = period_and_kind.trim().split_once(' ') else {
49+
return Err(());
50+
};
51+
self.cur_event.period = period_str.parse().ok();
52+
self.cur_event.kind = kind.to_owned();
53+
54+
if let Some(single_stack_line) = chunks.next() {
55+
// Combined event/stack line
56+
if self.parse_stack_line(single_stack_line).is_err() {
57+
self.state = ParserState::AfterEventLine;
58+
return Err(());
59+
}
60+
self.state = ParserState::AfterCombinedLine;
61+
} else {
62+
self.state = ParserState::AfterEventLine;
63+
}
64+
Ok(())
65+
}
66+
67+
fn parse_stack_line(&mut self, line: &str) -> Result<(), ()> {
68+
let Some((_addr, rest)) = line.trim().split_once(' ') else {
69+
return Err(());
70+
};
71+
let (funcname, module) = rest
72+
.rsplit_once(" (")
73+
.and_then(|(f, m)| m.strip_suffix(')').map(|m| (f, m)))
74+
.unwrap_or((rest, ""));
75+
let (funcname, _offset) = funcname.rsplit_once('+').unwrap_or((funcname, ""));
76+
77+
self.cur_event.stack.push(StackFrame {
78+
funcname: funcname.to_owned(),
79+
srcline: None,
80+
});
81+
if funcname != SPECIAL_UNKNOWN || module != SPECIAL_UNKNOWN {
82+
self.state = ParserState::AfterStackLine;
83+
}
84+
Ok(())
85+
}
86+
87+
fn parse_src_line(&mut self, line: &str) -> Result<(), ()> {
88+
let line = line.trim();
89+
let (srcinfo, _module) = line.rsplit_once(' ').unwrap_or((line, ""));
90+
let Some((path, lineno_str)) = srcinfo.rsplit_once(':') else {
91+
self.state = ParserState::AfterSrcLine;
92+
return Ok(());
93+
};
94+
let Ok(lineno) = lineno_str.parse::<usize>() else {
95+
return Err(());
96+
};
97+
if let Some(last_frame) = self.cur_event.stack.last_mut() {
98+
last_frame.srcline = Some(SourceLine {
99+
path: path.to_owned(),
100+
line: lineno,
101+
});
102+
}
103+
self.state = ParserState::AfterSrcLine;
104+
Ok(())
105+
}
106+
}
107+
108+
impl<R: io::Read> Iterator for Parser<R> {
109+
type Item = Event;
110+
111+
fn next(&mut self) -> Option<Self::Item> {
112+
let mut line = String::new();
113+
loop {
114+
line.clear();
115+
let bytes_read = self.src.read_line(&mut line).ok()?;
116+
if bytes_read == 0 {
117+
// EOF
118+
if self.state != ParserState::Start {
119+
let event = mem::take(&mut self.cur_event);
120+
self.state = ParserState::Start;
121+
return Some(event);
122+
} else {
123+
return None;
124+
}
125+
}
126+
127+
let line = line.trim();
128+
if line.is_empty() {
129+
self.state = ParserState::Start;
130+
return Some(mem::take(&mut self.cur_event));
131+
}
132+
133+
let result = match self.state {
134+
ParserState::Start => self.parse_event_line(line),
135+
ParserState::AfterEventLine => self.parse_stack_line(line),
136+
ParserState::AfterCombinedLine => {
137+
maybe_handle_weird_line(line, self.parse_src_line(line));
138+
self.state = ParserState::Start;
139+
return Some(mem::take(&mut self.cur_event));
140+
}
141+
ParserState::AfterStackLine => self.parse_src_line(line),
142+
ParserState::AfterSrcLine => self.parse_stack_line(line),
143+
};
144+
maybe_handle_weird_line(line, result);
145+
}
146+
}
147+
}
148+
149+
fn maybe_handle_weird_line(line: &str, result: Result<(), ()>) {
150+
if result.is_err() {
151+
// FIXME: use logging infrastructure instead
152+
eprintln!("Weird line: {}", line);
153+
}
154+
}
155+
156+
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
157+
enum ParserState {
158+
/// Ready to parse a new event.
159+
Start,
160+
/// After parsing the first line of the event.
161+
AfterEventLine,
162+
/// After parsing a combined event/stack line.
163+
AfterCombinedLine,
164+
/// After parsing a stack line.
165+
AfterStackLine,
166+
/// After parsing a stack srcline line.
167+
AfterSrcLine,
168+
}

0 commit comments

Comments
 (0)