Skip to content

Commit 39f4b97

Browse files
Merge pull request #30 from darrell-roberts/push-ykqxqkxovvxn
Add normalization for error messsage uuids in analysis top errors
2 parents 21cc4fc + 365bdc5 commit 39f4b97

6 files changed

Lines changed: 112 additions & 116 deletions

File tree

Cargo.lock

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

diagnostic-tui/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,3 +11,4 @@ ratatui-widgets = "0.3"
1111
crossterm = "0.29"
1212
chrono = { workspace = true }
1313
arboard = "3"
14+
regex = "1"

diagnostic-tui/src/app.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,7 @@ impl<'a> App<'a> {
8383

8484
let logs = LogsState::new(all_entries);
8585
let crashes = CrashReportsState::new(has_crashes);
86-
let analysis_data = AnalysisData::compute(all_entries, &report.crash_report_entries);
86+
let analysis_data = AnalysisData::analyze(all_entries, &report.crash_report_entries);
8787

8888
Self {
8989
report,

diagnostic-tui/src/app/analysis_state.rs

Lines changed: 85 additions & 103 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,11 @@
77
use super::pane_state::ScrollablePaneState;
88
use chrono::{DateTime, FixedOffset, TimeDelta};
99
use diagnostic_parser::{LogEntryRef, log_entry::LogLevel, model::CrashReportEntry};
10-
use std::collections::HashMap;
10+
use regex::Regex;
11+
use std::{borrow::Cow, collections::HashMap, sync::LazyLock};
12+
13+
#[cfg(test)]
14+
mod test;
1115

1216
/// Persistent state for the Analysis tab.
1317
pub type AnalysisState = ScrollablePaneState;
@@ -26,14 +30,8 @@ pub struct AnalysisData<'a> {
2630
pub top_errors: Vec<ErrorGroup<'a>>,
2731
/// Per-component health statistics, sorted by error count descending.
2832
pub component_health: Vec<ComponentStats<'a>>,
29-
/// Timeline buckets for error/warn frequency.
30-
pub timeline_buckets: Vec<TimeBucket>,
31-
/// Human-readable label for the bucket width (e.g. "5 min", "1 hour").
32-
pub bucket_label: String,
33-
/// Detected bursts (spikes in error/warn rate).
34-
pub bursts: Vec<BurstInfo>,
35-
/// Detected gaps (periods with no log entries).
36-
pub gaps: Vec<GapInfo>,
33+
/// Timeline
34+
pub time_line: TimeLine,
3735
/// Summary of all panic log entries.
3836
pub panics: Vec<PanicSummary<'a>>,
3937
/// Crash-to-panic correlations.
@@ -43,7 +41,7 @@ pub struct AnalysisData<'a> {
4341
/// A group of deduplicated error messages.
4442
pub struct ErrorGroup<'a> {
4543
/// The normalized/representative message (truncated).
46-
pub message: &'a str,
44+
pub message: Cow<'a, str>,
4745
/// How many entries matched this group.
4846
pub count: usize,
4947
/// Components that produced this error.
@@ -96,20 +94,40 @@ pub struct CrashCorrelation<'a> {
9694
pub matched_panic_message: Option<&'a str>,
9795
}
9896

97+
struct ErrorMessageData<'a> {
98+
count: usize,
99+
normalized_message: Cow<'a, str>,
100+
components: Vec<&'a str>,
101+
}
102+
103+
#[derive(Default)]
104+
struct ComponentCounts {
105+
error_count: usize,
106+
warn_count: usize,
107+
total_count: usize,
108+
}
109+
110+
#[derive(Default)]
111+
pub struct TimeLine {
112+
pub buckets: Vec<TimeBucket>,
113+
pub label: &'static str,
114+
pub bursts: Vec<BurstInfo>,
115+
pub gaps: Vec<GapInfo>,
116+
}
117+
99118
// ---------------------------------------------------------------------------
100119
// Computation
101120
// ---------------------------------------------------------------------------
102121

103122
impl<'a> AnalysisData<'a> {
104123
/// Compute all analytics from parsed log entries and crash reports.
105-
pub fn compute(entries: &'a [LogEntryRef<'a>], crashes: &'a [CrashReportEntry]) -> Self {
124+
pub fn analyze(entries: &'a [LogEntryRef<'a>], crashes: &'a [CrashReportEntry]) -> Self {
106125
let total_entries = entries.len();
107126

108127
// -- Level counts & component stats (single pass) --
109-
let mut level_counts = [0usize; 5];
110-
let mut component_map: HashMap<&str, (usize, usize, usize)> = HashMap::new();
111-
// (count, components, first full message)
112-
let mut error_map: HashMap<&str, (usize, Vec<&str>, &str)> = HashMap::new();
128+
let mut level_counts = [0; 5];
129+
let mut component_map: HashMap<&str, ComponentCounts> = HashMap::new();
130+
let mut error_map: HashMap<Cow<'_, str>, ErrorMessageData> = HashMap::new();
113131

114132
for entry in entries {
115133
let idx = match entry.level {
@@ -122,34 +140,39 @@ impl<'a> AnalysisData<'a> {
122140
level_counts[idx] += 1;
123141

124142
let comp = entry.source.component;
125-
let stats = component_map.entry(comp).or_insert((0, 0, 0));
126-
stats.2 += 1; // total
143+
let component_counts = component_map.entry(comp).or_default();
144+
component_counts.total_count += 1;
127145
match entry.level {
128-
LogLevel::Error => stats.0 += 1,
129-
LogLevel::Warn => stats.1 += 1,
146+
LogLevel::Error => component_counts.error_count += 1,
147+
LogLevel::Warn => component_counts.warn_count += 1,
130148
_ => {}
131149
}
132150

133151
// Collect errors for deduplication.
134152
if entry.level == LogLevel::Error {
135-
let key = entry.message;
136-
let group = error_map
137-
.entry(key)
138-
.or_insert_with(|| (0, Vec::new(), entry.message));
139-
group.0 += 1;
140-
if !group.1.contains(&comp) {
141-
group.1.push(comp);
153+
let normalized = normalize(entry.message);
154+
let group =
155+
error_map
156+
.entry(normalized.clone())
157+
.or_insert_with(|| ErrorMessageData {
158+
count: 0,
159+
normalized_message: normalized,
160+
components: Vec::new(),
161+
});
162+
group.count += 1;
163+
if !group.components.contains(&comp) {
164+
group.components.push(comp);
142165
}
143166
}
144167
}
145168

146169
// -- Top errors --
147170
let mut top_errors = error_map
148-
.into_iter()
149-
.map(|(_, (count, components, message))| ErrorGroup {
150-
message,
151-
count,
152-
components,
171+
.into_values()
172+
.map(|error_message_data| ErrorGroup {
173+
message: error_message_data.normalized_message,
174+
count: error_message_data.count,
175+
components: error_message_data.components,
153176
})
154177
.collect::<Vec<_>>();
155178
top_errors.sort_by(|a, b| b.count.cmp(&a.count));
@@ -158,11 +181,11 @@ impl<'a> AnalysisData<'a> {
158181
// -- Component health --
159182
let mut component_health = component_map
160183
.into_iter()
161-
.map(|(component, (e, w, t))| ComponentStats {
184+
.map(|(component, component_counts)| ComponentStats {
162185
component,
163-
error_count: e,
164-
warn_count: w,
165-
total_count: t,
186+
error_count: component_counts.error_count,
187+
warn_count: component_counts.warn_count,
188+
total_count: component_counts.total_count,
166189
})
167190
.collect::<Vec<_>>();
168191
component_health.sort_by(|a, b| {
@@ -172,7 +195,7 @@ impl<'a> AnalysisData<'a> {
172195
});
173196

174197
// -- Timeline --
175-
let (timeline_buckets, bucket_label, bursts, gaps) = compute_timeline(entries);
198+
let time_line = compute_timeline(entries);
176199

177200
// -- Panics --
178201
let panics = entries
@@ -208,10 +231,7 @@ impl<'a> AnalysisData<'a> {
208231
total_entries,
209232
top_errors,
210233
component_health,
211-
timeline_buckets,
212-
bucket_label,
213-
bursts,
214-
gaps,
234+
time_line,
215235
panics,
216236
crash_correlations,
217237
}
@@ -222,19 +242,17 @@ impl<'a> AnalysisData<'a> {
222242
// Timeline computation
223243
// ---------------------------------------------------------------------------
224244

225-
fn compute_timeline(
226-
entries: &[LogEntryRef<'_>],
227-
) -> (Vec<TimeBucket>, String, Vec<BurstInfo>, Vec<GapInfo>) {
245+
fn compute_timeline(entries: &[LogEntryRef<'_>]) -> TimeLine {
228246
if entries.is_empty() {
229-
return (Vec::new(), String::new(), Vec::new(), Vec::new());
247+
return TimeLine::default();
230248
}
231249

232250
let Some((first_ts, last_ts)) = entries
233251
.first()
234252
.map(|first| first.timestamp)
235253
.zip(entries.last().map(|last| last.timestamp))
236254
else {
237-
return (Vec::new(), String::new(), Vec::new(), Vec::new());
255+
return TimeLine::default();
238256
};
239257

240258
let span = last_ts - first_ts;
@@ -282,7 +300,12 @@ fn compute_timeline(
282300
// We track zero-activity by checking if error+warn is 0 in consecutive buckets.
283301
let gaps = detect_gaps(&buckets, bucket_delta, entries, first_ts, bucket_secs);
284302

285-
(buckets, label.to_string(), bursts, gaps)
303+
TimeLine {
304+
buckets,
305+
label,
306+
bursts,
307+
gaps,
308+
}
286309
}
287310

288311
fn detect_bursts(buckets: &[TimeBucket], counts: &[u64]) -> Vec<BurstInfo> {
@@ -384,54 +407,12 @@ fn detect_gaps(
384407
// Helpers
385408
// ---------------------------------------------------------------------------
386409

387-
/// Normalize an error message for deduplication grouping.
388-
/// Takes first 80 chars and replaces long hex sequences and numeric runs.
389-
#[expect(dead_code, reason = "May use this again as an option")]
390-
fn normalize_error_message(msg: &str) -> String {
391-
let truncated = truncate_str(msg, 80);
392-
let mut result = String::with_capacity(truncated.len());
393-
let mut run_buf = String::new();
394-
let mut hex_run = 0;
395-
let mut digit_run = 0;
396-
397-
for ch in truncated.chars() {
398-
if ch.is_ascii_hexdigit() {
399-
hex_run += 1;
400-
if ch.is_ascii_digit() {
401-
digit_run += 1;
402-
}
403-
run_buf.push(ch);
404-
} else {
405-
if hex_run >= 8 {
406-
result.push_str("<id>");
407-
} else if digit_run >= 5 {
408-
result.push_str("<N>");
409-
} else {
410-
result.push_str(&run_buf);
411-
}
412-
run_buf.clear();
413-
hex_run = 0;
414-
digit_run = 0;
415-
result.push(ch);
416-
}
417-
}
418-
// Flush trailing run.
419-
if hex_run >= 8 {
420-
result.push_str("<id>");
421-
} else if digit_run >= 5 {
422-
result.push_str("<N>");
423-
} else {
424-
result.push_str(&run_buf);
425-
}
410+
/// Matcher for a 26 character uuid.
411+
static ID_MATCHER: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"[A-Z0-9]{26}").unwrap());
426412

427-
result
428-
}
429-
430-
fn truncate_str(s: &str, max_chars: usize) -> &str {
431-
match s.char_indices().nth(max_chars) {
432-
Some((idx, _)) => &s[..idx],
433-
None => s,
434-
}
413+
/// Add any known IDs to be stripped out for error message normalization.
414+
fn normalize<'a>(msg: &'a str) -> Cow<'a, str> {
415+
ID_MATCHER.replace_all(msg, "<ID>")
435416
}
436417

437418
// ---------------------------------------------------------------------------
@@ -507,24 +488,25 @@ impl AnalysisData<'_> {
507488
// UI: header, blank, time-range label, blank = 4 Lines
508489
// then Sparkline segment (height=3)
509490
// then blank, bursts/gaps lines, trailing blank
510-
if !self.timeline_buckets.is_empty() {
491+
if !self.time_line.buckets.is_empty() {
511492
lines.extend([
512-
format!("Timeline — errors + warns per {}", self.bucket_label),
493+
format!("Timeline — errors + warns per {}", self.time_line.label),
513494
String::new(),
514495
]);
515496

516497
// Time range label (matches UI line).
517498
if let Some((first, last)) = self
518-
.timeline_buckets
499+
.time_line
500+
.buckets
519501
.first()
520-
.zip(self.timeline_buckets.last())
502+
.zip(self.time_line.buckets.last())
521503
{
522504
lines.extend([
523505
format!(
524506
" {} — {} ({} buckets)",
525507
first.start.format("%H:%M"),
526508
last.start.format("%H:%M"),
527-
self.timeline_buckets.len()
509+
self.time_line.buckets.len()
528510
),
529511
String::new(),
530512
]);
@@ -539,23 +521,23 @@ impl AnalysisData<'_> {
539521
]);
540522

541523
// Bursts.
542-
if !self.bursts.is_empty() {
524+
if !self.time_line.bursts.is_empty() {
543525
lines.push(" Bursts detected:".to_string());
544-
lines.extend(self.bursts.iter().map(|burst| {
526+
lines.extend(self.time_line.bursts.iter().map(|burst| {
545527
format!(
546528
" {} — {} errors+warns in {}",
547529
burst.start.format("%H:%M"),
548530
burst.count,
549-
self.bucket_label
531+
self.time_line.label
550532
)
551533
}));
552534
lines.push(String::new());
553535
}
554536

555537
// Gaps.
556-
if !self.gaps.is_empty() {
538+
if !self.time_line.gaps.is_empty() {
557539
lines.push(" Gaps detected:".to_string());
558-
lines.extend(self.gaps.iter().map(|gap| {
540+
lines.extend(self.time_line.gaps.iter().map(|gap| {
559541
let duration = if gap.duration.num_hours() > 0 {
560542
format!(
561543
"{}h {}m",
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
#[test]
2+
fn test_normalized() {
3+
let s = "Error creating system vault for account (YFIYEKAI6NGMZOW6AR3CQBCPRE): FetchDataError(FetchError(<unknown reason>, code: HttpStatus(400), Session ID: Some(Session ID: FPQYCQJLX5EDNL4CN3UW65SQOA)))";
4+
let normalized = super::normalize(s);
5+
assert_eq!(
6+
"Error creating system vault for account (<ID>): FetchDataError(FetchError(<unknown reason>, code: HttpStatus(400), Session ID: Some(Session ID: <ID>)))",
7+
normalized,
8+
);
9+
}

0 commit comments

Comments
 (0)