Skip to content

Commit 79fdb5a

Browse files
authored
Merge pull request #11 from sangshuduo/feat/sangshuduo/find-longtime-files
feat(log_processing): add log processing time analysis tool
2 parents 15d7bb5 + 89198fb commit 79fdb5a

File tree

3 files changed

+187
-1
lines changed

3 files changed

+187
-1
lines changed

Cargo.toml

+1-1
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ members = [
99
"cat_xlsx",
1010
"find_files_in_list",
1111
"random_pairs_of_s3file",
12-
"find_longtime_files",
12+
"find_log_processtime",
1313
# Add other tools here
1414
]
1515
resolver = "2" # Add this line to specify resolver version 2

find_log_processtime/Cargo.toml

+11
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
[package]
2+
name = "find_log_processtime"
3+
version = "0.1.0"
4+
edition = "2021"
5+
6+
[dependencies]
7+
chrono = "0.4"
8+
regex = "1"
9+
indicatif = "0.17.11"
10+
clap = { version = "4.5.30", features = ["derive"] }
11+
anyhow = "1.0.95"

find_log_processtime/src/main.rs

+175
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,175 @@
1+
use anyhow::{Context, Result};
2+
use chrono::NaiveDateTime;
3+
use clap::{Parser, Subcommand};
4+
use indicatif::{ProgressBar, ProgressStyle};
5+
use regex::Regex;
6+
use std::fs::File;
7+
use std::io::{BufRead, BufReader};
8+
9+
/// Represents a file's processing time.
10+
#[derive(Debug)]
11+
struct ProcessingTime {
12+
duration: f64,
13+
filename: String,
14+
}
15+
16+
/// Find log processing times from a log file.
17+
#[derive(Parser, Debug)]
18+
#[command(author, version, about, long_about = None)]
19+
struct Args {
20+
#[command(subcommand)]
21+
command: Command,
22+
}
23+
24+
#[derive(Subcommand, Debug)]
25+
enum Command {
26+
/// Display top files with the longest processing times.
27+
Top {
28+
/// Number of top files to display.
29+
num_files: usize,
30+
/// Path to the log file.
31+
log_file: String,
32+
},
33+
/// Calculate and display the average processing time across all files.
34+
Avg {
35+
/// Path to the log file.
36+
log_file: String,
37+
},
38+
}
39+
40+
/// Remove ANSI escape codes from a string.
41+
fn remove_ansi_codes(s: &str) -> String {
42+
// Regex to match ANSI escape sequences.
43+
let ansi_re = Regex::new(r"\x1B\[[0-9;]*[a-zA-Z]").unwrap();
44+
ansi_re.replace_all(s, "").to_string()
45+
}
46+
47+
/// Extract the timestamp from a line (first two whitespace-separated tokens).
48+
fn extract_timestamp(line: &str) -> Option<String> {
49+
let parts: Vec<&str> = line.split_whitespace().collect();
50+
if parts.len() < 2 {
51+
None
52+
} else {
53+
Some(format!("{} {}", parts[0], parts[1]))
54+
}
55+
}
56+
57+
/// Extract the filename from a line using a regex.
58+
/// Captures the filename following "The format of" and before "is <format>".
59+
fn extract_filename(line: &str) -> Option<String> {
60+
let re = Regex::new(r"The format of\s+(\S+)\s+is\s+\S+").unwrap();
61+
re.captures(line)
62+
.and_then(|caps| caps.get(1).map(|m| m.as_str().to_string()))
63+
}
64+
65+
/// Compute processing time differences from the log file.
66+
/// Returns a vector of ProcessingTime structs containing the duration (in seconds)
67+
/// and the corresponding filename.
68+
fn compute_diffs(log_file: &str) -> Result<Vec<ProcessingTime>> {
69+
// Open the log file.
70+
let file =
71+
File::open(log_file).with_context(|| format!("Error opening log file: {}", log_file))?;
72+
let metadata = file.metadata().context("Failed to get file metadata")?;
73+
let total_size = metadata.len();
74+
let reader = BufReader::new(file);
75+
76+
// Create a progress bar based on the total file size.
77+
let pb = ProgressBar::new(total_size);
78+
pb.set_style(
79+
ProgressStyle::default_bar()
80+
.template("[{elapsed_precise}] [{bar:40.cyan/blue}] {bytes}/{total_bytes} ({eta})")
81+
.unwrap()
82+
.progress_chars("##-"),
83+
);
84+
85+
// Store processing time entries.
86+
let mut diffs: Vec<ProcessingTime> = Vec::new();
87+
88+
// Variables to hold the previous log entry's timestamp and file name.
89+
let mut prev_dt: Option<NaiveDateTime> = None;
90+
let mut prev_file: Option<String> = None;
91+
92+
// Timestamp format: milliseconds (3 digits).
93+
const TIMESTAMP_FORMAT: &str = "%Y-%m-%d %H:%M:%S.%3f";
94+
95+
for line in reader.lines() {
96+
let line = line.context("Error reading a line")?;
97+
pb.inc((line.len() + 1) as u64);
98+
99+
// Remove ANSI escape sequences.
100+
let clean_line = remove_ansi_codes(&line);
101+
102+
// Extract and parse the timestamp.
103+
let ts_str = match extract_timestamp(&clean_line) {
104+
Some(ts) => ts,
105+
None => continue,
106+
};
107+
108+
let naive_dt = match NaiveDateTime::parse_from_str(&ts_str, TIMESTAMP_FORMAT) {
109+
Ok(dt) => dt,
110+
Err(e) => {
111+
eprintln!("Error parsing date '{}': {}", ts_str, e);
112+
continue;
113+
}
114+
};
115+
116+
// Extract the filename.
117+
let filename = match extract_filename(&clean_line) {
118+
Some(f) => f,
119+
None => continue,
120+
};
121+
122+
// If we have a previous timestamp, compute the processing duration.
123+
if let (Some(prev), Some(prev_filename)) = (prev_dt, &prev_file) {
124+
let duration = naive_dt.signed_duration_since(prev);
125+
let diff_seconds = duration.num_microseconds().unwrap_or(0) as f64 / 1_000_000.0;
126+
diffs.push(ProcessingTime {
127+
duration: diff_seconds,
128+
filename: prev_filename.clone(),
129+
});
130+
}
131+
132+
prev_dt = Some(naive_dt);
133+
prev_file = Some(filename);
134+
}
135+
136+
pb.finish_with_message("Processing complete");
137+
Ok(diffs)
138+
}
139+
140+
fn main() -> Result<()> {
141+
let args = Args::parse();
142+
143+
match args.command {
144+
Command::Top {
145+
num_files,
146+
log_file,
147+
} => {
148+
let mut diffs = compute_diffs(&log_file)?;
149+
// Sort by processing time in descending order.
150+
diffs.sort_by(|a, b| b.duration.partial_cmp(&a.duration).unwrap());
151+
152+
println!("Top {} files with longest processing times:", num_files);
153+
for (i, entry) in diffs.iter().take(num_files).enumerate() {
154+
println!(
155+
"{}. {} took {:.6} seconds",
156+
i + 1,
157+
entry.filename,
158+
entry.duration
159+
);
160+
}
161+
}
162+
Command::Avg { log_file } => {
163+
let diffs = compute_diffs(&log_file)?;
164+
if diffs.is_empty() {
165+
println!("No processing times found in the log file.");
166+
} else {
167+
let total: f64 = diffs.iter().map(|entry| entry.duration).sum();
168+
let avg = total / (diffs.len() as f64);
169+
println!("Average processing time: {:.6} seconds", avg);
170+
}
171+
}
172+
}
173+
174+
Ok(())
175+
}

0 commit comments

Comments
 (0)