Skip to content

Commit 15d7bb5

Browse files
authored
Merge pull request #10 from sangshuduo/feat/sangshuduo/find-longtime-files
feat(find_longtime_files): add new tool for analyzing log file proces…
2 parents 6effea5 + e60f7d2 commit 15d7bb5

File tree

3 files changed

+140
-1
lines changed

3 files changed

+140
-1
lines changed

Cargo.toml

+2-1
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,8 @@ members = [
88
"hit_rate_converter",
99
"cat_xlsx",
1010
"find_files_in_list",
11-
"random_pairs_of_s3file"
11+
"random_pairs_of_s3file",
12+
"find_longtime_files",
1213
# Add other tools here
1314
]
1415
resolver = "2" # Add this line to specify resolver version 2

find_longtime_files/Cargo.toml

+11
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
[package]
2+
name = "find_longtime_files"
3+
version = "0.1.0"
4+
edition = "2021"
5+
6+
[dependencies]
7+
chrono = "0.4"
8+
regex = "1"
9+
indicatif = "0.17.11"
10+
clap = { version = "4.5.30", features = ["derive"] }
11+
anyhow = "1.0.95"

find_longtime_files/src/main.rs

+127
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,127 @@
1+
use anyhow::{Context, Result};
2+
use chrono::NaiveDateTime;
3+
use clap::Parser;
4+
use indicatif::{ProgressBar, ProgressStyle};
5+
use regex::Regex;
6+
use std::fs::File;
7+
use std::io::{BufRead, BufReader};
8+
9+
/// Find files with the longest processing times in a log file.
10+
#[derive(Parser, Debug)]
11+
#[command(author, version, about, long_about = None)]
12+
struct Args {
13+
/// Number of top files to display.
14+
num_files: usize,
15+
16+
/// Path to the log file.
17+
log_file: String,
18+
}
19+
20+
/// Remove ANSI escape codes from a string.
21+
fn remove_ansi_codes(s: &str) -> String {
22+
// Regex to match ANSI escape sequences.
23+
let ansi_re = Regex::new(r"\x1B\[[0-9;]*[a-zA-Z]").unwrap();
24+
ansi_re.replace_all(s, "").to_string()
25+
}
26+
27+
/// Extract the timestamp from a line (first two whitespace-separated tokens).
28+
fn extract_timestamp(line: &str) -> Option<String> {
29+
let parts: Vec<&str> = line.split_whitespace().collect();
30+
if parts.len() < 2 {
31+
None
32+
} else {
33+
Some(format!("{} {}", parts[0], parts[1]))
34+
}
35+
}
36+
37+
/// Extract the filename from a line using a regex.
38+
/// Captures the filename following "The format of" and before "is <format>".
39+
fn extract_filename(line: &str) -> Option<String> {
40+
let re = Regex::new(r"The format of\s+(\S+)\s+is\s+\S+").unwrap();
41+
re.captures(line)
42+
.and_then(|caps| caps.get(1).map(|m| m.as_str().to_string()))
43+
}
44+
45+
fn main() -> Result<()> {
46+
// Parse command-line arguments using clap.
47+
let args = Args::parse();
48+
49+
// Open the log file.
50+
let file = File::open(&args.log_file)
51+
.with_context(|| format!("Error opening log file: {}", args.log_file))?;
52+
let metadata = file.metadata().context("Failed to get file metadata")?;
53+
let total_size = metadata.len();
54+
let reader = BufReader::new(file);
55+
56+
// Create a progress bar based on the total file size.
57+
let pb = ProgressBar::new(total_size);
58+
pb.set_style(
59+
ProgressStyle::default_bar()
60+
.template("[{elapsed_precise}] [{bar:40.cyan/blue}] {bytes}/{total_bytes} ({eta})")
61+
.unwrap()
62+
.progress_chars("##-"),
63+
);
64+
65+
// We'll store (duration_in_seconds, filename) pairs.
66+
let mut diffs: Vec<(f64, String)> = Vec::new();
67+
68+
// Variables to hold the previous log entry's timestamp and file name.
69+
let mut prev_dt: Option<NaiveDateTime> = None;
70+
let mut prev_file: Option<String> = None;
71+
72+
// Timestamp format: milliseconds (3 digits).
73+
const TIMESTAMP_FORMAT: &str = "%Y-%m-%d %H:%M:%S.%3f";
74+
75+
for line in reader.lines() {
76+
let line = line.context("Error reading a line")?;
77+
pb.inc((line.len() + 1) as u64);
78+
79+
// Remove ANSI escape sequences.
80+
let clean_line = remove_ansi_codes(&line);
81+
82+
// Extract and parse the timestamp.
83+
let ts_str = match extract_timestamp(&clean_line) {
84+
Some(ts) => ts,
85+
None => continue,
86+
};
87+
88+
let naive_dt = match NaiveDateTime::parse_from_str(&ts_str, TIMESTAMP_FORMAT) {
89+
Ok(dt) => dt,
90+
Err(e) => {
91+
eprintln!("Error parsing date '{}': {}", ts_str, e);
92+
continue;
93+
}
94+
};
95+
96+
// Extract the filename.
97+
let filename = match extract_filename(&clean_line) {
98+
Some(f) => f,
99+
None => continue,
100+
};
101+
102+
// If we have a previous timestamp, compute the processing duration.
103+
if let (Some(prev), Some(prev_filename)) = (prev_dt, &prev_file) {
104+
let duration = naive_dt.signed_duration_since(prev);
105+
let diff_seconds = duration.num_microseconds().unwrap_or(0) as f64 / 1_000_000.0;
106+
diffs.push((diff_seconds, prev_filename.clone()));
107+
}
108+
109+
prev_dt = Some(naive_dt);
110+
prev_file = Some(filename);
111+
}
112+
113+
pb.finish_with_message("Processing complete");
114+
115+
// Sort by processing time in descending order.
116+
diffs.sort_by(|a, b| b.0.partial_cmp(&a.0).unwrap());
117+
118+
println!(
119+
"Top {} files with longest processing times:",
120+
args.num_files
121+
);
122+
for (i, (duration, file)) in diffs.iter().take(args.num_files).enumerate() {
123+
println!("{}. {} took {:.6} seconds", i + 1, file, duration);
124+
}
125+
126+
Ok(())
127+
}

0 commit comments

Comments
 (0)