Skip to content

Commit 14e2e5d

Browse files
committed
add rust print and gzip loading
1 parent 01066e1 commit 14e2e5d

File tree

5 files changed

+249
-6
lines changed

5 files changed

+249
-6
lines changed

src/lib.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ pub mod seqspec_upgrade;
2020
pub mod seqspec_insert;
2121
pub mod seqspec_check;
2222
pub mod seqspec_onlist;
23+
pub mod seqspec_print;
2324

2425
// #[cfg(feature = "python-binding")]
2526
// mod py_module; // lives in src/py_module.rs

src/main.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ use seqspec::seqspec_upgrade;
1818
use seqspec::seqspec_insert;
1919
use seqspec::seqspec_check;
2020
use seqspec::seqspec_onlist;
21+
use seqspec::seqspec_print;
2122
use seqspec::utils;
2223

2324
use clap::{Parser, Subcommand};
@@ -46,6 +47,7 @@ enum Commands {
4647
Insert(seqspec_insert::InsertArgs),
4748
Check(seqspec_check::CheckArgs),
4849
Onlist(seqspec_onlist::OnlistArgs),
50+
Print(seqspec_print::PrintArgs),
4951
// other subcommands later...
5052
}
5153

@@ -67,5 +69,6 @@ fn main() {
6769
,Commands::Insert(args) => seqspec_insert::run_insert(&args)
6870
,Commands::Check(args) => { seqspec_check::run_check(&args); }
6971
,Commands::Onlist(args) => seqspec_onlist::run_onlist(&args)
72+
,Commands::Print(args) => seqspec_print::run_print(&args)
7073
}
7174
}

src/seqspec_print.rs

Lines changed: 211 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,211 @@
1+
use crate::models::assay::Assay;
2+
use crate::models::region::{Region, RegionCoordinate};
3+
use crate::utils;
4+
use clap::Args;
5+
use std::fs;
6+
use std::io::Write;
7+
use std::path::PathBuf;
8+
9+
#[derive(Debug, Args)]
10+
pub struct PrintArgs {
11+
#[clap(help = "Sequencing specification yaml file", required = true)]
12+
yaml: PathBuf,
13+
14+
#[clap(short, long, help = "Path to output file", value_name = "OUT")]
15+
output: Option<PathBuf>,
16+
17+
#[clap(
18+
short,
19+
long,
20+
help = "Format",
21+
value_name = "FORMAT",
22+
default_value = "library-ascii",
23+
value_parser = ["library-ascii", "seqspec-ascii", "seqspec-html", "seqspec-png"],
24+
)]
25+
format: String,
26+
}
27+
28+
pub fn run_print(args: &PrintArgs) {
29+
validate_print_args(args);
30+
31+
let spec = utils::load_spec(&args.yaml);
32+
let result = seqspec_print(&spec, &args.format).unwrap_or_else(|err| {
33+
eprintln!("{}", err);
34+
std::process::exit(1);
35+
});
36+
37+
if let Some(out) = &args.output {
38+
let mut fh = fs::File::create(out).unwrap();
39+
writeln!(fh, "{result}").unwrap();
40+
} else {
41+
println!("{result}");
42+
}
43+
}
44+
45+
fn validate_print_args(args: &PrintArgs) {
46+
if !args.yaml.exists() {
47+
eprintln!("Input file does not exist: {}", args.yaml.display());
48+
std::process::exit(1);
49+
}
50+
if let Some(out) = &args.output {
51+
if out.exists() && !out.is_file() {
52+
eprintln!("Output path exists but is not a file: {}", out.display());
53+
std::process::exit(1);
54+
}
55+
}
56+
}
57+
58+
pub fn seqspec_print(spec: &Assay, fmt: &str) -> Result<String, String> {
59+
match fmt {
60+
"library-ascii" => Ok(print_library_ascii(spec)),
61+
"seqspec-ascii" => print_seqspec_ascii(spec),
62+
"seqspec-html" => Err("seqspec-html is not implemented in the Rust CLI yet".to_string()),
63+
"seqspec-png" => Err("seqspec-png is not implemented in the Rust CLI yet".to_string()),
64+
_ => Err(format!("Unsupported format: {}", fmt)),
65+
}
66+
}
67+
68+
fn print_seqspec_ascii(spec: &Assay) -> Result<String, String> {
69+
let mut parts = Vec::new();
70+
for modality in &spec.modalities {
71+
let (p, n) = libseq(spec, modality)?;
72+
parts.push(format_libseq(spec, modality, p, n)?);
73+
}
74+
Ok(parts.join("\n"))
75+
}
76+
77+
fn format_libseq(
78+
spec: &Assay,
79+
modality: &str,
80+
positive: Vec<String>,
81+
negative: Vec<String>,
82+
) -> Result<String, String> {
83+
let libspec = spec
84+
.get_libspec(modality)
85+
.ok_or_else(|| format!("modality '{}' not found", modality))?;
86+
87+
Ok(
88+
[
89+
modality.to_string(),
90+
"---".to_string(),
91+
positive.join("\n"),
92+
libspec.sequence.clone(),
93+
utils::complement_seq(&libspec.sequence),
94+
negative.join("\n"),
95+
]
96+
.join("\n"),
97+
)
98+
}
99+
100+
fn libseq(spec: &Assay, modality: &str) -> Result<(Vec<String>, Vec<String>), String> {
101+
let libspec = spec
102+
.get_libspec(modality)
103+
.ok_or_else(|| format!("modality '{}' not found", modality))?;
104+
let seqspec = spec.get_seqspec(modality);
105+
106+
let mut positive = Vec::new();
107+
let mut negative = Vec::new();
108+
109+
for (idx, read) in seqspec.iter().enumerate() {
110+
let leaves = libspec.get_leaves_with_region_id(&read.primer_id);
111+
let primer_idx = leaves
112+
.iter()
113+
.position(|leaf| leaf.region_id == read.primer_id)
114+
.ok_or_else(|| {
115+
format!(
116+
"primer_id '{}' not found in modality '{}'",
117+
read.primer_id, modality
118+
)
119+
})?;
120+
121+
let cuts: Vec<RegionCoordinate> = utils::project_regions_to_coordinates(leaves);
122+
let primer_pos = &cuts[primer_idx];
123+
let arrow_len = read.max_len.saturating_sub(1) as usize;
124+
let arrow = "-".repeat(arrow_len);
125+
126+
if read.strand == "pos" {
127+
let space_len = primer_pos.stop.saturating_sub(1) as usize;
128+
let ws = " ".repeat(space_len);
129+
positive.push(format!("{}|{}>({}) {}", ws, arrow, idx + 1, read.read_id));
130+
} else {
131+
let space_len = primer_pos.start.saturating_sub(read.max_len) as usize;
132+
let ws = " ".repeat(space_len);
133+
negative.push(format!("{}<{}|({}) {}", ws, arrow, idx + 1, read.read_id));
134+
}
135+
}
136+
137+
Ok((positive, negative))
138+
}
139+
140+
fn print_library_ascii(spec: &Assay) -> String {
141+
spec.library_spec
142+
.iter()
143+
.map(|region| render_region_tree(region, "", true))
144+
.collect::<Vec<_>>()
145+
.join("\n")
146+
}
147+
148+
fn render_region_tree(region: &Region, prefix: &str, is_last: bool) -> String {
149+
let branch = if prefix.is_empty() {
150+
""
151+
} else if is_last {
152+
"└── "
153+
} else {
154+
"├── "
155+
};
156+
let mut lines = vec![format!(
157+
"{}{}{}({},{})",
158+
prefix, branch, region.region_id, region.min_len, region.max_len
159+
)];
160+
161+
let child_prefix = if prefix.is_empty() {
162+
String::new()
163+
} else if is_last {
164+
format!("{} ", prefix)
165+
} else {
166+
format!("{}│ ", prefix)
167+
};
168+
169+
for (idx, child) in region.regions.iter().enumerate() {
170+
lines.push(render_region_tree(
171+
child,
172+
&child_prefix,
173+
idx + 1 == region.regions.len(),
174+
));
175+
}
176+
177+
lines.join("\n")
178+
}
179+
180+
#[cfg(test)]
181+
mod tests {
182+
use super::*;
183+
use crate::utils::load_spec;
184+
185+
fn dogma_spec() -> Assay {
186+
load_spec(&PathBuf::from("tests/fixtures/spec.yaml"))
187+
}
188+
189+
#[test]
190+
fn test_print_library_ascii_contains_modalities_and_regions() {
191+
let rendered = print_library_ascii(&dogma_spec());
192+
assert!(rendered.contains("rna("));
193+
assert!(rendered.contains("rna_cell_bc"));
194+
assert!(rendered.contains("atac"));
195+
}
196+
197+
#[test]
198+
fn test_print_seqspec_ascii_contains_reads_and_sequence() {
199+
let rendered = print_seqspec_ascii(&dogma_spec()).unwrap();
200+
assert!(rendered.contains("rna"));
201+
assert!(rendered.contains("rna_R1"));
202+
assert!(rendered.contains("rna_R2"));
203+
assert!(rendered.contains("---"));
204+
}
205+
206+
#[test]
207+
fn test_print_reports_unimplemented_html() {
208+
let err = seqspec_print(&dogma_spec(), "seqspec-html").unwrap_err();
209+
assert!(err.contains("not implemented"));
210+
}
211+
}

src/utils.rs

Lines changed: 20 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -42,11 +42,20 @@ pub fn complement_seq(s: &str) -> String {
4242
// }
4343

4444
pub fn load_spec(spec: &std::path::PathBuf) -> Assay {
45-
// read in the spec file
46-
let f: std::fs::File = std::fs::File::open(spec).expect("Could not open file.");
45+
let mut f: std::fs::File = std::fs::File::open(spec).expect("Could not open file.");
46+
let mut magic = [0_u8; 2];
47+
f.read_exact(&mut magic)
48+
.expect("Could not read file header.");
49+
drop(f);
50+
51+
let reader: Box<dyn IoRead> = if magic == [0x1f, 0x8b] {
52+
let gz = GzDecoder::new(std::fs::File::open(spec).expect("Could not open file."));
53+
Box::new(gz)
54+
} else {
55+
Box::new(std::fs::File::open(spec).expect("Could not open file."))
56+
};
4757

48-
// parse through a permissive compatibility layer, then normalize into Assay
49-
let spec: AssayCompat = serde_yaml::from_reader(f).expect("Could not read values.");
58+
let spec: AssayCompat = serde_yaml::from_reader(reader).expect("Could not read values.");
5059

5160
spec.into_assay()
5261
}
@@ -201,6 +210,13 @@ mod tests {
201210
load_spec(&PathBuf::from("tests/fixtures/spec.yaml"))
202211
}
203212

213+
#[test]
214+
fn test_load_spec_reads_gzipped_yaml() {
215+
let spec = load_spec(&PathBuf::from("tests/fixtures/spec.yaml.gz"));
216+
assert_eq!(spec.assay_id, "DOGMAseq-DIG");
217+
assert_eq!(spec.seqspec_version, Some("0.4.0".to_string()));
218+
}
219+
204220
fn leaf(id: &str, len: i64) -> Region {
205221
Region::new(
206222
id.into(),

tests/test_seqspec_check.py

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,20 @@
1212

1313
def test_seqspec_check(dogmaseq_dig_spec: Assay):
1414
"""Test seqspec_check function"""
15+
spec = dogmaseq_dig_spec.model_copy(deep=True)
16+
17+
def localize_onlists(region):
18+
if region.onlist is not None and region.onlist.urltype in {"http", "https", "ftp"}:
19+
region.onlist.urltype = "local"
20+
region.onlist.url = region.onlist.filename + ".gz"
21+
for child in region.regions:
22+
localize_onlists(child)
23+
24+
for region in spec.library_spec:
25+
localize_onlists(region)
26+
1527
# Test with valid spec
16-
errors = seqspec_check(spec=dogmaseq_dig_spec)
28+
errors = seqspec_check(spec=spec)
1729
assert len(errors) == 0 # No errors for valid spec
1830

1931
# Test with invalid spec (missing required fields)
@@ -35,4 +47,4 @@ def test_seqspec_check(dogmaseq_dig_spec: Assay):
3547
)
3648

3749
errors = seqspec_check(spec=invalid_spec)
38-
assert len(errors) > 0 # Should have errors for invalid spec
50+
assert len(errors) > 0 # Should have errors for invalid spec

0 commit comments

Comments
 (0)