Skip to content

Commit 1a55cae

Browse files
pinin4fjordsclaude
andcommitted
feat(output): emit Log.out, Log.progress.out, _STARpass1/SJ.out.tab
STAR writes three log files alongside Log.final.out and keeps two-pass intermediates in <prefix>_STARpass1/. rustar was only writing Log.final.out and emitting the pass-1 SJ tab as <prefix>SJ.pass1.out.tab at the top level. Add minimal Log.out (parameters dump + per-phase timestamps) and Log.progress.out (timestamp + mapping speed) writers next to the existing Log.final.out writer. Move the pass-1 SJ tab into <prefix>_STARpass1/SJ.out.tab and mkdir the parent first. The Log.out content is intentionally a stub matching the file's existence rather than STAR's full verbosity; that's a follow-up. Fixes #28 Co-Authored-By: Claude <noreply@anthropic.com>
1 parent 70be24d commit 1a55cae

3 files changed

Lines changed: 175 additions & 9 deletions

File tree

src/lib.rs

Lines changed: 18 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -258,6 +258,21 @@ fn align_reads(params: &Parameters) -> anyhow::Result<()> {
258258
stats.write_log_final(&log_path, time_start, time_map_start, time_finish)?;
259259
info!("Wrote {}", log_path.display());
260260

261+
let log_out_path = params.out_file_name_prefix.join("Log.out");
262+
crate::stats::write_log_out(
263+
&log_out_path,
264+
&params,
265+
time_start,
266+
time_map_start,
267+
time_finish,
268+
)?;
269+
info!("Wrote {}", log_out_path.display());
270+
271+
let log_progress_path = params.out_file_name_prefix.join("Log.progress.out");
272+
let total_reads = stats.total_reads.load(std::sync::atomic::Ordering::Relaxed);
273+
crate::stats::write_log_progress(&log_progress_path, total_reads, time_map_start, time_finish)?;
274+
info!("Wrote {}", log_progress_path.display());
275+
261276
// Write ReadsPerGene.out.tab if quantMode GeneCounts was requested.
262277
if let Some(ref ctx) = quant_ctx {
263278
let quant_path = params.out_file_name_prefix.join("ReadsPerGene.out.tab");
@@ -457,13 +472,9 @@ fn run_two_pass(
457472
info!("Two-pass mode: Pass 1 - Junction discovery");
458473
let (sj_stats_pass1, novel_junctions) = run_pass1(index, params)?;
459474

460-
// Write SJ.pass1.out.tab
461-
let pass1_path = params.out_file_name_prefix.join("SJ.pass1.out.tab");
462-
463-
// Create output directory if it doesn't exist
464-
if let Some(parent) = pass1_path.parent() {
465-
std::fs::create_dir_all(parent)?;
466-
}
475+
let pass1_dir = params.out_file_name_prefix.join("_STARpass1");
476+
std::fs::create_dir_all(&pass1_dir)?;
477+
let pass1_path = pass1_dir.join("SJ.out.tab");
467478

468479
info!("Writing pass 1 junctions to {}", pass1_path.display());
469480
sj_stats_pass1.write_output(&pass1_path, &index.genome, params)?;

src/stats.rs

Lines changed: 133 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,74 @@ use std::sync::atomic::{AtomicU64, Ordering};
55

66
use crate::align::transcript::{CigarOp, Transcript};
77
use crate::junction::encode_motif;
8+
use crate::params::Parameters;
9+
10+
/// Shared timestamp format used across Log.final.out / Log.out / Log.progress.out.
11+
pub const LOG_TIME_FMT: &str = "%b %d %H:%M:%S";
12+
13+
/// Write a minimal STAR-compatible Log.out alongside Log.final.out.
14+
///
15+
/// Carries a parameters dump and per-phase timestamps. Intentionally a stub
16+
/// rather than a full STAR-verbose-log reproduction.
17+
pub fn write_log_out(
18+
path: &Path,
19+
params: &Parameters,
20+
time_start: chrono::DateTime<chrono::Local>,
21+
time_map_start: chrono::DateTime<chrono::Local>,
22+
time_finish: chrono::DateTime<chrono::Local>,
23+
) -> std::io::Result<()> {
24+
use std::io::Write;
25+
26+
let mut f = std::fs::File::create(path)?;
27+
28+
writeln!(f, "##### Run parameters")?;
29+
writeln!(f, "{:#?}", params)?;
30+
writeln!(f)?;
31+
writeln!(f, "##### Run started")?;
32+
writeln!(f, "{}", time_start.format(LOG_TIME_FMT))?;
33+
writeln!(f)?;
34+
writeln!(f, "##### Mapping started")?;
35+
writeln!(f, "{}", time_map_start.format(LOG_TIME_FMT))?;
36+
writeln!(f)?;
37+
writeln!(f, "##### Mapping finished")?;
38+
writeln!(f, "{}", time_finish.format(LOG_TIME_FMT))?;
39+
40+
Ok(())
41+
}
42+
43+
/// Write a minimal STAR-compatible Log.progress.out alongside Log.final.out.
44+
///
45+
/// A header line plus a single "done" line with the final timestamp and
46+
/// mapping speed (million reads per hour). Intentionally a stub.
47+
pub fn write_log_progress(
48+
path: &Path,
49+
total_reads: u64,
50+
time_map_start: chrono::DateTime<chrono::Local>,
51+
time_finish: chrono::DateTime<chrono::Local>,
52+
) -> std::io::Result<()> {
53+
use std::io::Write;
54+
55+
let elapsed_hours = {
56+
let elapsed = time_finish - time_map_start;
57+
elapsed.num_milliseconds() as f64 / 3_600_000.0
58+
};
59+
let mapping_speed = if elapsed_hours > 0.0 {
60+
total_reads as f64 / elapsed_hours / 1_000_000.0
61+
} else {
62+
0.0
63+
};
64+
65+
let mut f = std::fs::File::create(path)?;
66+
writeln!(f, "# completed\tmapping_speed_M_reads_per_hour")?;
67+
writeln!(
68+
f,
69+
"{}\t{:.2}",
70+
time_finish.format(LOG_TIME_FMT),
71+
mapping_speed
72+
)?;
73+
74+
Ok(())
75+
}
876

977
/// Reason a read could not be mapped
1078
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
@@ -1018,4 +1086,69 @@ mod tests {
10181086
stats.record_half_mapped();
10191087
assert_eq!(stats.half_mapped_pairs.load(Ordering::Relaxed), 3);
10201088
}
1089+
1090+
#[test]
1091+
fn test_write_log_out_minimal() {
1092+
use chrono::TimeZone;
1093+
use clap::Parser;
1094+
1095+
let params = Parameters::parse_from(["rustar-aligner", "--readFilesIn", "reads.fq"]);
1096+
let t_start = chrono::Local
1097+
.with_ymd_and_hms(2026, 5, 12, 14, 14, 23)
1098+
.unwrap();
1099+
let t_map = chrono::Local
1100+
.with_ymd_and_hms(2026, 5, 12, 14, 14, 30)
1101+
.unwrap();
1102+
let t_finish = chrono::Local
1103+
.with_ymd_and_hms(2026, 5, 12, 14, 14, 58)
1104+
.unwrap();
1105+
1106+
let dir = tempfile::tempdir().unwrap();
1107+
let path = dir.path().join("Log.out");
1108+
write_log_out(&path, &params, t_start, t_map, t_finish).unwrap();
1109+
1110+
let content = std::fs::read_to_string(&path).unwrap();
1111+
assert!(!content.is_empty(), "Log.out should not be empty");
1112+
assert!(content.contains("##### Run parameters"));
1113+
assert!(content.contains("##### Run started"));
1114+
assert!(content.contains("##### Mapping started"));
1115+
assert!(content.contains("##### Mapping finished"));
1116+
// Each phase header should be followed by a timestamp shaped like "May 12 14:14:23".
1117+
assert!(content.contains("May 12 14:14:23"));
1118+
assert!(content.contains("May 12 14:14:58"));
1119+
// The parameters dump should mention at least one familiar field.
1120+
assert!(content.contains("read_files_in"));
1121+
}
1122+
1123+
#[test]
1124+
fn test_write_log_progress_minimal() {
1125+
use chrono::TimeZone;
1126+
1127+
let t_map = chrono::Local
1128+
.with_ymd_and_hms(2026, 5, 12, 14, 14, 30)
1129+
.unwrap();
1130+
let t_finish = chrono::Local
1131+
.with_ymd_and_hms(2026, 5, 12, 14, 14, 58)
1132+
.unwrap();
1133+
1134+
let dir = tempfile::tempdir().unwrap();
1135+
let path = dir.path().join("Log.progress.out");
1136+
write_log_progress(&path, 10_000, t_map, t_finish).unwrap();
1137+
1138+
let content = std::fs::read_to_string(&path).unwrap();
1139+
let lines: Vec<&str> = content.lines().collect();
1140+
assert!(lines.len() >= 2, "expected header + data line");
1141+
assert!(lines[0].starts_with('#'), "first line should be a header");
1142+
assert!(
1143+
lines[1].contains("May 12 14:14:58"),
1144+
"second line should contain the finish timestamp, got: {}",
1145+
lines[1]
1146+
);
1147+
// Mapping speed column should be a number with two decimals.
1148+
let cols: Vec<&str> = lines[1].split('\t').collect();
1149+
assert_eq!(cols.len(), 2, "expected 2 tab-separated columns");
1150+
cols[1]
1151+
.parse::<f64>()
1152+
.expect("second column should parse as a float");
1153+
}
10211154
}

tests/alignment_features.rs

Lines changed: 24 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -784,10 +784,32 @@ fn test_two_pass_mode() {
784784
.assert()
785785
.success();
786786

787-
let pass1_path = output_dir.join("SJ.pass1.out.tab");
787+
let pass1_path = output_dir.join("_STARpass1").join("SJ.out.tab");
788788
assert!(
789789
pass1_path.exists(),
790-
"SJ.pass1.out.tab not found — two-pass mode did not write pass-1 junctions"
790+
"_STARpass1/SJ.out.tab not found — two-pass mode did not write pass-1 junctions"
791+
);
792+
let top_level_pass1 = output_dir.join("SJ.pass1.out.tab");
793+
assert!(
794+
!top_level_pass1.exists(),
795+
"SJ.pass1.out.tab should no longer be emitted at the top level"
796+
);
797+
798+
let log_out = output_dir.join("Log.out");
799+
assert!(log_out.exists(), "Log.out not found");
800+
let log_out_content = fs::read_to_string(&log_out).unwrap();
801+
assert!(!log_out_content.is_empty(), "Log.out is empty");
802+
assert!(
803+
log_out_content.contains("##### Run parameters"),
804+
"Log.out missing parameters section"
805+
);
806+
807+
let log_progress = output_dir.join("Log.progress.out");
808+
assert!(log_progress.exists(), "Log.progress.out not found");
809+
let log_progress_content = fs::read_to_string(&log_progress).unwrap();
810+
assert!(
811+
log_progress_content.lines().count() >= 2,
812+
"Log.progress.out should have a header and at least one data line"
791813
);
792814

793815
let sam_path = output_dir.join("Aligned.out.sam");

0 commit comments

Comments
 (0)