|
| 1 | +//! Integration test for `--quantMode TranscriptomeSAM`. |
| 2 | +//! |
| 3 | +//! Builds a tiny genome + 2-transcript GTF + a small FASTQ, runs ruSTAR |
| 4 | +//! with `--quantMode TranscriptomeSAM`, and asserts that |
| 5 | +//! `Aligned.toTranscriptome.out.bam` is produced, is a valid BAM file, |
| 6 | +//! and contains at least one record. Acts as a smoke test for the |
| 7 | +//! end-to-end pipeline. |
| 8 | +
|
| 9 | +use assert_cmd::Command; |
| 10 | +use std::fs; |
| 11 | +use std::io::Write; |
| 12 | +use std::path::PathBuf; |
| 13 | +use tempfile::TempDir; |
| 14 | + |
| 15 | +fn generate_genome_seq(seed: u32, length: usize) -> String { |
| 16 | + let bases = ['A', 'C', 'G', 'T']; |
| 17 | + let mut state = seed; |
| 18 | + let mut seq = String::with_capacity(length); |
| 19 | + for _ in 0..length { |
| 20 | + state = state.wrapping_mul(1103515245).wrapping_add(12345); |
| 21 | + seq.push(bases[((state >> 16) & 3) as usize]); |
| 22 | + } |
| 23 | + seq |
| 24 | +} |
| 25 | + |
| 26 | +fn create_fasta(dir: &TempDir) -> (PathBuf, String) { |
| 27 | + let fasta_path = dir.path().join("genome.fa"); |
| 28 | + let mut file = fs::File::create(&fasta_path).unwrap(); |
| 29 | + // Single chromosome, 2000 bp pseudo-random content. |
| 30 | + let chr1_seq = generate_genome_seq(98765, 2000); |
| 31 | + writeln!(file, ">chr1").unwrap(); |
| 32 | + writeln!(file, "{}", chr1_seq).unwrap(); |
| 33 | + (fasta_path, chr1_seq) |
| 34 | +} |
| 35 | + |
| 36 | +fn create_gtf(dir: &TempDir) -> PathBuf { |
| 37 | + let gtf_path = dir.path().join("annotations.gtf"); |
| 38 | + let mut file = fs::File::create(>f_path).unwrap(); |
| 39 | + |
| 40 | + // Transcript T1: one exon [101, 400] forward (1-based inclusive) |
| 41 | + writeln!( |
| 42 | + file, |
| 43 | + "chr1\ttest\texon\t101\t400\t.\t+\t.\tgene_id \"G1\"; transcript_id \"T1\";" |
| 44 | + ) |
| 45 | + .unwrap(); |
| 46 | + // Transcript T2: one exon [601, 900] forward |
| 47 | + writeln!( |
| 48 | + file, |
| 49 | + "chr1\ttest\texon\t601\t900\t.\t+\t.\tgene_id \"G2\"; transcript_id \"T2\";" |
| 50 | + ) |
| 51 | + .unwrap(); |
| 52 | + |
| 53 | + gtf_path |
| 54 | +} |
| 55 | + |
| 56 | +fn create_fastq(dir: &TempDir, n_reads: usize, chr1_seq: &str) -> PathBuf { |
| 57 | + let fastq_path = dir.path().join("reads.fq"); |
| 58 | + let mut file = fs::File::create(&fastq_path).unwrap(); |
| 59 | + |
| 60 | + // Alternate reads from T1 region and T2 region. |
| 61 | + for i in 0..n_reads { |
| 62 | + writeln!(file, "@read{}", i + 1).unwrap(); |
| 63 | + let start = if i % 2 == 0 { 120 } else { 620 }; |
| 64 | + let off = (i * 3) % 180; |
| 65 | + let s = start + off; |
| 66 | + writeln!(file, "{}", &chr1_seq[s..s + 30]).unwrap(); |
| 67 | + writeln!(file, "+").unwrap(); |
| 68 | + writeln!(file, "IIIIIIIIIIIIIIIIIIIIIIIIIIIIII").unwrap(); |
| 69 | + } |
| 70 | + |
| 71 | + fastq_path |
| 72 | +} |
| 73 | + |
| 74 | +#[test] |
| 75 | +fn transcriptome_sam_end_to_end_smoke_test() { |
| 76 | + let tmpdir = TempDir::new().unwrap(); |
| 77 | + |
| 78 | + let (fasta_path, chr1_seq) = create_fasta(&tmpdir); |
| 79 | + let gtf_path = create_gtf(&tmpdir); |
| 80 | + let fastq_path = create_fastq(&tmpdir, 20, &chr1_seq); |
| 81 | + |
| 82 | + let genome_dir = tmpdir.path().join("genome"); |
| 83 | + let output_dir = tmpdir.path().join("output"); |
| 84 | + fs::create_dir_all(&output_dir).unwrap(); |
| 85 | + // STAR's prefix semantics: if prefix ends with `/`, it is treated as a |
| 86 | + // directory and files are placed inside it. ruSTAR's `PathBuf::join` |
| 87 | + // also handles the trailing slash convention. |
| 88 | + let output_prefix = format!("{}/", output_dir.display()); |
| 89 | + |
| 90 | + // Build genome index. |
| 91 | + Command::cargo_bin("ruSTAR") |
| 92 | + .unwrap() |
| 93 | + .args([ |
| 94 | + "--runMode", |
| 95 | + "genomeGenerate", |
| 96 | + "--genomeDir", |
| 97 | + genome_dir.to_str().unwrap(), |
| 98 | + "--genomeFastaFiles", |
| 99 | + fasta_path.to_str().unwrap(), |
| 100 | + "--genomeSAindexNbases", |
| 101 | + "5", |
| 102 | + ]) |
| 103 | + .assert() |
| 104 | + .success(); |
| 105 | + |
| 106 | + // Run alignment with --quantMode TranscriptomeSAM. |
| 107 | + Command::cargo_bin("ruSTAR") |
| 108 | + .unwrap() |
| 109 | + .args([ |
| 110 | + "--runMode", |
| 111 | + "alignReads", |
| 112 | + "--genomeDir", |
| 113 | + genome_dir.to_str().unwrap(), |
| 114 | + "--readFilesIn", |
| 115 | + fastq_path.to_str().unwrap(), |
| 116 | + "--runThreadN", |
| 117 | + "1", |
| 118 | + "--outFileNamePrefix", |
| 119 | + output_prefix.as_str(), |
| 120 | + "--sjdbGTFfile", |
| 121 | + gtf_path.to_str().unwrap(), |
| 122 | + "--quantMode", |
| 123 | + "TranscriptomeSAM", |
| 124 | + // Permissive mismatch filter so our tiny read set gets through. |
| 125 | + "--outFilterMismatchNmax", |
| 126 | + "20", |
| 127 | + "--outFilterScoreMinOverLread", |
| 128 | + "0.3", |
| 129 | + "--outFilterMatchNminOverLread", |
| 130 | + "0.3", |
| 131 | + ]) |
| 132 | + .assert() |
| 133 | + .success(); |
| 134 | + |
| 135 | + // The transcriptome BAM must exist. |
| 136 | + let tr_bam = output_dir.join("Aligned.toTranscriptome.out.bam"); |
| 137 | + assert!( |
| 138 | + tr_bam.exists(), |
| 139 | + "Aligned.toTranscriptome.out.bam was not created at {:?}", |
| 140 | + tr_bam |
| 141 | + ); |
| 142 | + |
| 143 | + // File size sanity: BAM header + at least BGZF EOF marker > 100 bytes. |
| 144 | + let meta = fs::metadata(&tr_bam).unwrap(); |
| 145 | + assert!( |
| 146 | + meta.len() > 100, |
| 147 | + "transcriptome BAM is suspiciously small: {} bytes", |
| 148 | + meta.len() |
| 149 | + ); |
| 150 | + |
| 151 | + // Validate it's readable as a BAM and check the header has our |
| 152 | + // two @SQ lines (T1 + T2). |
| 153 | + use noodles::bam; |
| 154 | + use std::fs::File; |
| 155 | + let mut reader = bam::io::Reader::new(File::open(&tr_bam).unwrap()); |
| 156 | + let header = reader.read_header().expect("valid BAM header"); |
| 157 | + let refs = header.reference_sequences(); |
| 158 | + assert_eq!( |
| 159 | + refs.len(), |
| 160 | + 2, |
| 161 | + "expected 2 @SQ entries (T1, T2), got {}", |
| 162 | + refs.len() |
| 163 | + ); |
| 164 | + let keys: Vec<String> = refs |
| 165 | + .keys() |
| 166 | + .map(|k| String::from_utf8_lossy(k).to_string()) |
| 167 | + .collect(); |
| 168 | + assert!(keys.iter().any(|k| k == "T1"), "T1 missing from @SQ"); |
| 169 | + assert!(keys.iter().any(|k| k == "T2"), "T2 missing from @SQ"); |
| 170 | +} |
0 commit comments