From de12b8a1e03bab7566768289e306de7beb570321 Mon Sep 17 00:00:00 2001 From: Shuduo Sang Date: Wed, 19 Feb 2025 21:29:55 +0000 Subject: [PATCH 1/2] feat(find_longtime_files): add new tool for analyzing log file processing times - Implemented a new tool to analyze log files and extract processing durations. - Added progress bar to enhance user feedback during file processing. - Included functions to remove ANSI codes and extract timestamps and filenames. - Utilized `chrono`, `regex`, and `indicatif` dependencies for functionality. --- Cargo.toml | 3 +- find_longtime_files/Cargo.toml | 9 +++ find_longtime_files/src/main.rs | 129 ++++++++++++++++++++++++++++++++ 3 files changed, 140 insertions(+), 1 deletion(-) create mode 100644 find_longtime_files/Cargo.toml create mode 100644 find_longtime_files/src/main.rs diff --git a/Cargo.toml b/Cargo.toml index 1381623..ae779dd 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -8,7 +8,8 @@ members = [ "hit_rate_converter", "cat_xlsx", "find_files_in_list", - "random_pairs_of_s3file" + "random_pairs_of_s3file", + "find_longtime_files", # Add other tools here ] resolver = "2" # Add this line to specify resolver version 2 diff --git a/find_longtime_files/Cargo.toml b/find_longtime_files/Cargo.toml new file mode 100644 index 0000000..933a757 --- /dev/null +++ b/find_longtime_files/Cargo.toml @@ -0,0 +1,9 @@ +[package] +name = "find_longtime_files" +version = "0.1.0" +edition = "2021" + +[dependencies] +chrono = "0.4" +regex = "1" +indicatif = "0.17.11" diff --git a/find_longtime_files/src/main.rs b/find_longtime_files/src/main.rs new file mode 100644 index 0000000..48dd10b --- /dev/null +++ b/find_longtime_files/src/main.rs @@ -0,0 +1,129 @@ +use chrono::NaiveDateTime; +use indicatif::{ProgressBar, ProgressStyle}; +use regex::Regex; +use std::env; +use std::fs::File; +use std::io::{BufRead, BufReader}; + +/// Remove ANSI escape codes from a string. +fn remove_ansi_codes(s: &str) -> String { + // This regex matches ANSI escape sequences. + let ansi_re = Regex::new(r"\x1B\[[0-9;]*[a-zA-Z]").unwrap(); + ansi_re.replace_all(s, "").to_string() +} + +/// Extract the timestamp from a line (first two whitespace-separated tokens). +fn extract_timestamp(line: &str) -> Option { + let parts: Vec<&str> = line.split_whitespace().collect(); + if parts.len() < 2 { + None + } else { + Some(format!("{} {}", parts[0], parts[1])) + } +} + +/// Extract the filename from the line using a regex. +/// The regex captures the filename following "The format of" and before "is ". +fn extract_filename(line: &str) -> Option { + let re = Regex::new(r"The format of\s+(\S+)\s+is\s+\S+").unwrap(); + re.captures(line) + .and_then(|caps| caps.get(1).map(|m| m.as_str().to_string())) +} + +fn main() { + // Check command-line arguments. + let args: Vec = env::args().collect(); + if args.len() != 3 { + eprintln!("Usage: {} ", args[0]); + std::process::exit(1); + } + let num_files: usize = args[1].parse().expect("Invalid number-of-files"); + let log_file = &args[2]; + + // Open the log file. + let file = File::open(log_file).unwrap_or_else(|err| { + eprintln!("Error opening {}: {}", log_file, err); + std::process::exit(1); + }); + // Get file metadata to determine total file size. + let metadata = file.metadata().expect("Failed to get metadata"); + let total_size = metadata.len(); + + let reader = BufReader::new(file); + + // Create a progress bar based on total file size. + let pb = ProgressBar::new(total_size); + pb.set_style( + ProgressStyle::default_bar() + .template("[{elapsed_precise}] [{bar:40.cyan/blue}] {bytes}/{total_bytes} ({eta})") + .unwrap() + .progress_chars("##-"), + ); + + // We'll store (duration_in_seconds, filename) pairs. + let mut diffs: Vec<(f64, String)> = Vec::new(); + + // Variables to hold the previous log entry's timestamp and file name. + let mut prev_dt: Option = None; + let mut prev_file: Option = None; + + // Timestamp format for milliseconds (3 digits). + let timestamp_format = "%Y-%m-%d %H:%M:%S.%3f"; + + for line in reader.lines() { + // Update progress bar with the length of the line plus newline. + let line = match line { + Ok(l) => l, + Err(e) => { + eprintln!("Error reading line: {}", e); + continue; + } + }; + pb.inc((line.len() + 1) as u64); + + // Remove ANSI escape sequences. + let clean_line = remove_ansi_codes(&line); + + // Extract the timestamp string. + let ts_str = match extract_timestamp(&clean_line) { + Some(ts) => ts, + None => continue, + }; + + // Parse the timestamp as a NaiveDateTime (without timezone). + let naive_dt = match NaiveDateTime::parse_from_str(&ts_str, timestamp_format) { + Ok(dt) => dt, + Err(e) => { + eprintln!("Error parsing date '{}': {}", ts_str, e); + continue; + } + }; + + // Extract the filename. + let filename = match extract_filename(&clean_line) { + Some(f) => f, + None => continue, + }; + + // If we have a previous timestamp, compute the duration. + if let (Some(prev), Some(prev_filename)) = (prev_dt, &prev_file) { + let duration = naive_dt.signed_duration_since(prev); + let diff_seconds = duration.num_microseconds().unwrap_or(0) as f64 / 1_000_000.0; + diffs.push((diff_seconds, prev_filename.clone())); + } + + // Update the previous values. + prev_dt = Some(naive_dt); + prev_file = Some(filename); + } + + pb.finish_with_message("Processing complete"); + + // Sort by processing time in descending order. + diffs.sort_by(|a, b| b.0.partial_cmp(&a.0).unwrap()); + + println!("Top {} files with longest processing times:", num_files); + for (i, (duration, file)) in diffs.iter().take(num_files).enumerate() { + println!("{}. {} took {:.6} seconds", i + 1, file, duration); + } +} From e60f7d2d29f28658b7bf9822dfb7a9bb936244ee Mon Sep 17 00:00:00 2001 From: Shuduo Sang Date: Wed, 19 Feb 2025 21:40:29 +0000 Subject: [PATCH 2/2] fix: update dependencies and improve error handling - Add `clap` for command-line argument parsing. - Add `anyhow` for better error handling. - Refactor main function to improve readability and error context. - Ensure proper extraction and parsing of timestamps in log files. --- find_longtime_files/Cargo.toml | 2 + find_longtime_files/src/main.rs | 74 ++++++++++++++++----------------- 2 files changed, 38 insertions(+), 38 deletions(-) diff --git a/find_longtime_files/Cargo.toml b/find_longtime_files/Cargo.toml index 933a757..0ddc6e9 100644 --- a/find_longtime_files/Cargo.toml +++ b/find_longtime_files/Cargo.toml @@ -7,3 +7,5 @@ edition = "2021" chrono = "0.4" regex = "1" indicatif = "0.17.11" +clap = { version = "4.5.30", features = ["derive"] } +anyhow = "1.0.95" diff --git a/find_longtime_files/src/main.rs b/find_longtime_files/src/main.rs index 48dd10b..96e7c93 100644 --- a/find_longtime_files/src/main.rs +++ b/find_longtime_files/src/main.rs @@ -1,13 +1,25 @@ +use anyhow::{Context, Result}; use chrono::NaiveDateTime; +use clap::Parser; use indicatif::{ProgressBar, ProgressStyle}; use regex::Regex; -use std::env; use std::fs::File; use std::io::{BufRead, BufReader}; +/// Find files with the longest processing times in a log file. +#[derive(Parser, Debug)] +#[command(author, version, about, long_about = None)] +struct Args { + /// Number of top files to display. + num_files: usize, + + /// Path to the log file. + log_file: String, +} + /// Remove ANSI escape codes from a string. fn remove_ansi_codes(s: &str) -> String { - // This regex matches ANSI escape sequences. + // Regex to match ANSI escape sequences. let ansi_re = Regex::new(r"\x1B\[[0-9;]*[a-zA-Z]").unwrap(); ansi_re.replace_all(s, "").to_string() } @@ -22,36 +34,26 @@ fn extract_timestamp(line: &str) -> Option { } } -/// Extract the filename from the line using a regex. -/// The regex captures the filename following "The format of" and before "is ". +/// Extract the filename from a line using a regex. +/// Captures the filename following "The format of" and before "is ". fn extract_filename(line: &str) -> Option { let re = Regex::new(r"The format of\s+(\S+)\s+is\s+\S+").unwrap(); re.captures(line) .and_then(|caps| caps.get(1).map(|m| m.as_str().to_string())) } -fn main() { - // Check command-line arguments. - let args: Vec = env::args().collect(); - if args.len() != 3 { - eprintln!("Usage: {} ", args[0]); - std::process::exit(1); - } - let num_files: usize = args[1].parse().expect("Invalid number-of-files"); - let log_file = &args[2]; +fn main() -> Result<()> { + // Parse command-line arguments using clap. + let args = Args::parse(); // Open the log file. - let file = File::open(log_file).unwrap_or_else(|err| { - eprintln!("Error opening {}: {}", log_file, err); - std::process::exit(1); - }); - // Get file metadata to determine total file size. - let metadata = file.metadata().expect("Failed to get metadata"); + let file = File::open(&args.log_file) + .with_context(|| format!("Error opening log file: {}", args.log_file))?; + let metadata = file.metadata().context("Failed to get file metadata")?; let total_size = metadata.len(); - let reader = BufReader::new(file); - // Create a progress bar based on total file size. + // Create a progress bar based on the total file size. let pb = ProgressBar::new(total_size); pb.set_style( ProgressStyle::default_bar() @@ -67,31 +69,23 @@ fn main() { let mut prev_dt: Option = None; let mut prev_file: Option = None; - // Timestamp format for milliseconds (3 digits). - let timestamp_format = "%Y-%m-%d %H:%M:%S.%3f"; + // Timestamp format: milliseconds (3 digits). + const TIMESTAMP_FORMAT: &str = "%Y-%m-%d %H:%M:%S.%3f"; for line in reader.lines() { - // Update progress bar with the length of the line plus newline. - let line = match line { - Ok(l) => l, - Err(e) => { - eprintln!("Error reading line: {}", e); - continue; - } - }; + let line = line.context("Error reading a line")?; pb.inc((line.len() + 1) as u64); // Remove ANSI escape sequences. let clean_line = remove_ansi_codes(&line); - // Extract the timestamp string. + // Extract and parse the timestamp. let ts_str = match extract_timestamp(&clean_line) { Some(ts) => ts, None => continue, }; - // Parse the timestamp as a NaiveDateTime (without timezone). - let naive_dt = match NaiveDateTime::parse_from_str(&ts_str, timestamp_format) { + let naive_dt = match NaiveDateTime::parse_from_str(&ts_str, TIMESTAMP_FORMAT) { Ok(dt) => dt, Err(e) => { eprintln!("Error parsing date '{}': {}", ts_str, e); @@ -105,14 +99,13 @@ fn main() { None => continue, }; - // If we have a previous timestamp, compute the duration. + // If we have a previous timestamp, compute the processing duration. if let (Some(prev), Some(prev_filename)) = (prev_dt, &prev_file) { let duration = naive_dt.signed_duration_since(prev); let diff_seconds = duration.num_microseconds().unwrap_or(0) as f64 / 1_000_000.0; diffs.push((diff_seconds, prev_filename.clone())); } - // Update the previous values. prev_dt = Some(naive_dt); prev_file = Some(filename); } @@ -122,8 +115,13 @@ fn main() { // Sort by processing time in descending order. diffs.sort_by(|a, b| b.0.partial_cmp(&a.0).unwrap()); - println!("Top {} files with longest processing times:", num_files); - for (i, (duration, file)) in diffs.iter().take(num_files).enumerate() { + println!( + "Top {} files with longest processing times:", + args.num_files + ); + for (i, (duration, file)) in diffs.iter().take(args.num_files).enumerate() { println!("{}. {} took {:.6} seconds", i + 1, file, duration); } + + Ok(()) }