Skip to content

Commit 827990f

Browse files
ewelsclaude
andcommitted
test: integration test for --quantMode TranscriptomeSAM
End-to-end smoke test in `tests/transcriptome_sam.rs`: * Builds a 2000bp synthetic genome (single chromosome, pseudo-random sequence from an LCG). * Writes a 2-transcript GTF (T1: chr1:101-400 +, T2: chr1:601-900 +). * Generates 20 reads (30bp) drawn alternately from the T1 and T2 exon regions. * Runs `ruSTAR --runMode genomeGenerate` then `alignReads --quantMode TranscriptomeSAM`. * Asserts `Aligned.toTranscriptome.out.bam` is created, > 100 bytes, parses as a valid BAM via noodles, and the header contains exactly two @sq lines named T1 and T2. Uses the same `assert_cmd::Command::cargo_bin` pattern as the existing `phase9_threading.rs` test (inherits the same deprecation warnings; pre-existing parity). Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent 666bf1d commit 827990f

1 file changed

Lines changed: 170 additions & 0 deletions

File tree

tests/transcriptome_sam.rs

Lines changed: 170 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,170 @@
1+
//! Integration test for `--quantMode TranscriptomeSAM`.
2+
//!
3+
//! Builds a tiny genome + 2-transcript GTF + a small FASTQ, runs ruSTAR
4+
//! with `--quantMode TranscriptomeSAM`, and asserts that
5+
//! `Aligned.toTranscriptome.out.bam` is produced, is a valid BAM file,
6+
//! and contains at least one record. Acts as a smoke test for the
7+
//! end-to-end pipeline.
8+
9+
use assert_cmd::Command;
10+
use std::fs;
11+
use std::io::Write;
12+
use std::path::PathBuf;
13+
use tempfile::TempDir;
14+
15+
fn generate_genome_seq(seed: u32, length: usize) -> String {
16+
let bases = ['A', 'C', 'G', 'T'];
17+
let mut state = seed;
18+
let mut seq = String::with_capacity(length);
19+
for _ in 0..length {
20+
state = state.wrapping_mul(1103515245).wrapping_add(12345);
21+
seq.push(bases[((state >> 16) & 3) as usize]);
22+
}
23+
seq
24+
}
25+
26+
fn create_fasta(dir: &TempDir) -> (PathBuf, String) {
27+
let fasta_path = dir.path().join("genome.fa");
28+
let mut file = fs::File::create(&fasta_path).unwrap();
29+
// Single chromosome, 2000 bp pseudo-random content.
30+
let chr1_seq = generate_genome_seq(98765, 2000);
31+
writeln!(file, ">chr1").unwrap();
32+
writeln!(file, "{}", chr1_seq).unwrap();
33+
(fasta_path, chr1_seq)
34+
}
35+
36+
fn create_gtf(dir: &TempDir) -> PathBuf {
37+
let gtf_path = dir.path().join("annotations.gtf");
38+
let mut file = fs::File::create(&gtf_path).unwrap();
39+
40+
// Transcript T1: one exon [101, 400] forward (1-based inclusive)
41+
writeln!(
42+
file,
43+
"chr1\ttest\texon\t101\t400\t.\t+\t.\tgene_id \"G1\"; transcript_id \"T1\";"
44+
)
45+
.unwrap();
46+
// Transcript T2: one exon [601, 900] forward
47+
writeln!(
48+
file,
49+
"chr1\ttest\texon\t601\t900\t.\t+\t.\tgene_id \"G2\"; transcript_id \"T2\";"
50+
)
51+
.unwrap();
52+
53+
gtf_path
54+
}
55+
56+
fn create_fastq(dir: &TempDir, n_reads: usize, chr1_seq: &str) -> PathBuf {
57+
let fastq_path = dir.path().join("reads.fq");
58+
let mut file = fs::File::create(&fastq_path).unwrap();
59+
60+
// Alternate reads from T1 region and T2 region.
61+
for i in 0..n_reads {
62+
writeln!(file, "@read{}", i + 1).unwrap();
63+
let start = if i % 2 == 0 { 120 } else { 620 };
64+
let off = (i * 3) % 180;
65+
let s = start + off;
66+
writeln!(file, "{}", &chr1_seq[s..s + 30]).unwrap();
67+
writeln!(file, "+").unwrap();
68+
writeln!(file, "IIIIIIIIIIIIIIIIIIIIIIIIIIIIII").unwrap();
69+
}
70+
71+
fastq_path
72+
}
73+
74+
#[test]
75+
fn transcriptome_sam_end_to_end_smoke_test() {
76+
let tmpdir = TempDir::new().unwrap();
77+
78+
let (fasta_path, chr1_seq) = create_fasta(&tmpdir);
79+
let gtf_path = create_gtf(&tmpdir);
80+
let fastq_path = create_fastq(&tmpdir, 20, &chr1_seq);
81+
82+
let genome_dir = tmpdir.path().join("genome");
83+
let output_dir = tmpdir.path().join("output");
84+
fs::create_dir_all(&output_dir).unwrap();
85+
// STAR's prefix semantics: if prefix ends with `/`, it is treated as a
86+
// directory and files are placed inside it. ruSTAR's `PathBuf::join`
87+
// also handles the trailing slash convention.
88+
let output_prefix = format!("{}/", output_dir.display());
89+
90+
// Build genome index.
91+
Command::cargo_bin("ruSTAR")
92+
.unwrap()
93+
.args([
94+
"--runMode",
95+
"genomeGenerate",
96+
"--genomeDir",
97+
genome_dir.to_str().unwrap(),
98+
"--genomeFastaFiles",
99+
fasta_path.to_str().unwrap(),
100+
"--genomeSAindexNbases",
101+
"5",
102+
])
103+
.assert()
104+
.success();
105+
106+
// Run alignment with --quantMode TranscriptomeSAM.
107+
Command::cargo_bin("ruSTAR")
108+
.unwrap()
109+
.args([
110+
"--runMode",
111+
"alignReads",
112+
"--genomeDir",
113+
genome_dir.to_str().unwrap(),
114+
"--readFilesIn",
115+
fastq_path.to_str().unwrap(),
116+
"--runThreadN",
117+
"1",
118+
"--outFileNamePrefix",
119+
output_prefix.as_str(),
120+
"--sjdbGTFfile",
121+
gtf_path.to_str().unwrap(),
122+
"--quantMode",
123+
"TranscriptomeSAM",
124+
// Permissive mismatch filter so our tiny read set gets through.
125+
"--outFilterMismatchNmax",
126+
"20",
127+
"--outFilterScoreMinOverLread",
128+
"0.3",
129+
"--outFilterMatchNminOverLread",
130+
"0.3",
131+
])
132+
.assert()
133+
.success();
134+
135+
// The transcriptome BAM must exist.
136+
let tr_bam = output_dir.join("Aligned.toTranscriptome.out.bam");
137+
assert!(
138+
tr_bam.exists(),
139+
"Aligned.toTranscriptome.out.bam was not created at {:?}",
140+
tr_bam
141+
);
142+
143+
// File size sanity: BAM header + at least BGZF EOF marker > 100 bytes.
144+
let meta = fs::metadata(&tr_bam).unwrap();
145+
assert!(
146+
meta.len() > 100,
147+
"transcriptome BAM is suspiciously small: {} bytes",
148+
meta.len()
149+
);
150+
151+
// Validate it's readable as a BAM and check the header has our
152+
// two @SQ lines (T1 + T2).
153+
use noodles::bam;
154+
use std::fs::File;
155+
let mut reader = bam::io::Reader::new(File::open(&tr_bam).unwrap());
156+
let header = reader.read_header().expect("valid BAM header");
157+
let refs = header.reference_sequences();
158+
assert_eq!(
159+
refs.len(),
160+
2,
161+
"expected 2 @SQ entries (T1, T2), got {}",
162+
refs.len()
163+
);
164+
let keys: Vec<String> = refs
165+
.keys()
166+
.map(|k| String::from_utf8_lossy(k).to_string())
167+
.collect();
168+
assert!(keys.iter().any(|k| k == "T1"), "T1 missing from @SQ");
169+
assert!(keys.iter().any(|k| k == "T2"), "T2 missing from @SQ");
170+
}

0 commit comments

Comments
 (0)