Skip to content

Commit de8a52d

Browse files
authored
Reorganize GFA operations into ops and cli modules (#198)
2 parents ccdd97b + 563a0ba commit de8a52d

File tree

12 files changed

+633
-597
lines changed

12 files changed

+633
-597
lines changed

flatgfa/Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ edition = "2021"
55

66
[[bin]]
77
name = "fgfa"
8-
path = "src/main.rs"
8+
path = "src/cli/main.rs"
99

1010
[dependencies]
1111
argh = "0.1.12"

flatgfa/src/cli/cmds.rs

Lines changed: 231 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,231 @@
1+
use crate::flatgfa::{self, Segment};
2+
use crate::ops;
3+
use crate::pool::Id;
4+
use argh::FromArgs;
5+
use std::collections::HashMap;
6+
7+
/// print the FlatGFA table of contents
8+
#[derive(FromArgs, PartialEq, Debug)]
9+
#[argh(subcommand, name = "toc")]
10+
pub struct Toc {}
11+
12+
pub fn toc(gfa: &flatgfa::FlatGFA) {
13+
eprintln!("header: {}", gfa.header.len());
14+
eprintln!("segs: {}", gfa.segs.len());
15+
eprintln!("paths: {}", gfa.paths.len());
16+
eprintln!("links: {}", gfa.links.len());
17+
eprintln!("steps: {}", gfa.steps.len());
18+
eprintln!("seq_data: {}", gfa.seq_data.len());
19+
eprintln!("overlaps: {}", gfa.overlaps.len());
20+
eprintln!("alignment: {}", gfa.alignment.len());
21+
eprintln!("name_data: {}", gfa.name_data.len());
22+
eprintln!("optional_data: {}", gfa.optional_data.len());
23+
eprintln!("line_order: {}", gfa.line_order.len());
24+
}
25+
26+
/// list the paths
27+
#[derive(FromArgs, PartialEq, Debug)]
28+
#[argh(subcommand, name = "paths")]
29+
pub struct Paths {}
30+
31+
pub fn paths(gfa: &flatgfa::FlatGFA) {
32+
for path in gfa.paths.all().iter() {
33+
println!("{}", gfa.get_path_name(path));
34+
}
35+
}
36+
37+
/// calculate graph statistics
38+
#[derive(FromArgs, PartialEq, Debug)]
39+
#[argh(subcommand, name = "stats")]
40+
pub struct Stats {
41+
/// show basic metrics
42+
#[argh(switch, short = 'S')]
43+
summarize: bool,
44+
45+
/// number of segments with at least one self-loop link
46+
#[argh(switch, short = 'L')]
47+
self_loops: bool,
48+
}
49+
50+
pub fn stats(gfa: &flatgfa::FlatGFA, args: Stats) {
51+
if args.summarize {
52+
println!("#length\tnodes\tedges\tpaths\tsteps");
53+
println!(
54+
"{}\t{}\t{}\t{}\t{}",
55+
gfa.seq_data.len(),
56+
gfa.segs.len(),
57+
gfa.links.len(),
58+
gfa.paths.len(),
59+
gfa.steps.len()
60+
);
61+
} else if args.self_loops {
62+
let mut counts: HashMap<Id<Segment>, usize> = HashMap::new();
63+
let mut total: usize = 0;
64+
for link in gfa.links.all().iter() {
65+
if link.from.segment() == link.to.segment() {
66+
let count = counts.entry(link.from.segment()).or_insert(0);
67+
*count += 1;
68+
total += 1;
69+
}
70+
}
71+
println!("#type\tnum");
72+
println!("total\t{}", total);
73+
println!("unique\t{}", counts.len());
74+
}
75+
}
76+
77+
/// find a nucleotide position within a path
78+
#[derive(FromArgs, PartialEq, Debug)]
79+
#[argh(subcommand, name = "position")]
80+
pub struct Position {
81+
/// path_name,offset,orientation
82+
#[argh(option, short = 'p')]
83+
path_pos: String,
84+
}
85+
86+
pub fn position(gfa: &flatgfa::FlatGFA, args: Position) -> Result<(), &'static str> {
87+
// Parse the position triple, which looks like `path,42,+`.
88+
let (path_name, offset, orientation) = {
89+
let parts: Vec<_> = args.path_pos.split(',').collect();
90+
if parts.len() != 3 {
91+
return Err("position must be path_name,offset,orientation");
92+
}
93+
let off: usize = parts[1].parse().or(Err("offset must be a number"))?;
94+
let ori: flatgfa::Orientation = parts[2].parse().or(Err("orientation must be + or -"))?;
95+
(parts[0], off, ori)
96+
};
97+
98+
let path_id = gfa.find_path(path_name.into()).ok_or("path not found")?;
99+
let path = &gfa.paths[path_id];
100+
assert_eq!(
101+
orientation,
102+
flatgfa::Orientation::Forward,
103+
"only + is implemented so far"
104+
);
105+
106+
// Print the match.
107+
let found = ops::position::position(gfa, path, offset);
108+
if let Some((handle, seg_off)) = found {
109+
let seg = gfa.get_handle_seg(handle);
110+
let seg_name = seg.name;
111+
println!("#source.path.pos\ttarget.graph.pos");
112+
println!(
113+
"{},{},{}\t{},{},{}",
114+
path_name,
115+
offset,
116+
orientation,
117+
seg_name,
118+
seg_off,
119+
handle.orient()
120+
);
121+
}
122+
123+
Ok(())
124+
}
125+
126+
/// benchmarks
127+
#[derive(FromArgs, PartialEq, Debug)]
128+
#[argh(subcommand, name = "bench")]
129+
pub struct Bench {
130+
/// count lines in a text file
131+
#[argh(option)]
132+
wcl: Option<String>,
133+
134+
/// enable parallelism when available
135+
#[argh(switch, short = 'p')]
136+
parallel: bool,
137+
}
138+
139+
pub fn bench(args: Bench) {
140+
// TODO: We don't need a GFA for (some of) these? So avoid opening it.
141+
if let Some(filename) = args.wcl {
142+
println!("{}", ops::bench::line_count(&filename, args.parallel));
143+
}
144+
}
145+
146+
/// create a subset graph
147+
#[derive(FromArgs, PartialEq, Debug)]
148+
#[argh(subcommand, name = "extract")]
149+
pub struct Extract {
150+
/// segment to extract around
151+
#[argh(option, short = 'n')]
152+
seg_name: usize,
153+
154+
/// number of edges "away" from the node to include
155+
#[argh(option, short = 'c')]
156+
link_distance: usize,
157+
158+
/// maximum number of basepairs allowed between subpaths s.t. the subpaths are merged together
159+
#[argh(
160+
option,
161+
short = 'd',
162+
long = "max-distance-subpaths",
163+
default = "300000"
164+
)]
165+
max_distance_subpaths: usize, // TODO: possibly make this bigger
166+
167+
/// maximum number of iterations before we stop merging subpaths
168+
#[argh(option, short = 'e', long = "max-merging-iterations", default = "6")]
169+
num_iterations: usize, // TODO: probably make this smaller
170+
}
171+
172+
pub fn extract(
173+
gfa: &flatgfa::FlatGFA,
174+
args: Extract,
175+
) -> Result<flatgfa::HeapGFAStore, &'static str> {
176+
let origin_seg = gfa.find_seg(args.seg_name).ok_or("segment not found")?;
177+
178+
let mut subgraph = ops::extract::SubgraphBuilder::new(gfa);
179+
subgraph.add_header();
180+
subgraph.extract(
181+
origin_seg,
182+
args.link_distance,
183+
args.max_distance_subpaths,
184+
args.num_iterations,
185+
);
186+
Ok(subgraph.store)
187+
}
188+
189+
/// compute node depth, the number of times paths cross a node
190+
#[derive(FromArgs, PartialEq, Debug)]
191+
#[argh(subcommand, name = "depth")]
192+
pub struct Depth {}
193+
194+
pub fn depth(gfa: &flatgfa::FlatGFA) {
195+
let (depths, uniq_paths) = ops::depth::depth(gfa);
196+
197+
println!("#node.id\tdepth\tdepth.uniq");
198+
for (id, seg) in gfa.segs.items() {
199+
let name: u32 = seg.name as u32;
200+
println!(
201+
"{}\t{}\t{}",
202+
name,
203+
depths[id.index()],
204+
uniq_paths[id.index()].len()
205+
);
206+
}
207+
}
208+
209+
/// chop the segments in a graph into sizes of N or smaller
210+
#[derive(FromArgs, PartialEq, Debug)]
211+
#[argh(subcommand, name = "chop")]
212+
pub struct Chop {
213+
/// maximimum segment size
214+
#[argh(option, short = 'c')]
215+
count: usize,
216+
217+
/// compute new links
218+
#[argh(switch, short = 'l')]
219+
links: bool,
220+
}
221+
222+
/// Chop a graph into segments of size no larger than c
223+
/// By default, compact node ids
224+
/// CIGAR strings, links, and optional Segment data are invalidated by chop
225+
/// Generates a new graph, rather than modifying the old one in place
226+
pub fn chop<'a>(
227+
gfa: &'a flatgfa::FlatGFA<'a>,
228+
args: Chop,
229+
) -> Result<flatgfa::HeapGFAStore, &'static str> {
230+
Ok(ops::chop::chop(gfa, args.count, args.links))
231+
}
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ use flatgfa::flatgfa::FlatGFA;
33
use flatgfa::gaf;
44
use flatgfa::parse::Parser;
55
use flatgfa::pool::Store;
6-
use flatgfa::{cmds, file, memfile, parse}; // TODO: hopefully remove at some point, this breaks a lot of principles
6+
use flatgfa::{cli::cmds, file, memfile, parse};
77

88
#[derive(FromArgs)]
99
/// Convert between GFA text and FlatGFA binary formats.

flatgfa/src/cli/mod.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
pub mod cmds;

0 commit comments

Comments
 (0)