Skip to content

Commit dd8de29

Browse files
committed
keep reference paths when trimming the graph
1 parent 92b7d57 commit dd8de29

File tree

2 files changed

+23
-4
lines changed

2 files changed

+23
-4
lines changed

src/correct.rs

Lines changed: 18 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,15 +4,21 @@ use msbwt2::dynamic_bwt::{create_from_fastx,DynamicBWT};
44
use msbwt2::msbwt_core::BWT;
55
use msbwt2::string_util;
66
use msbwt2::rle_bwt::RleBWT;
7+
use rust_htslib::htslib::fai_load_options_FAI_CREATE;
78

89
use crate::{agg::*};
910
use std::{path::PathBuf, fs::File, io::{self, Write}};
1011
use std::collections::{HashMap, HashSet};
12+
use bio::io::fasta::{Reader, Record};
1113

1214

1315

14-
pub fn start (graph_file: &PathBuf, bwt_file: &String, output_file: &PathBuf, query_length:usize, min_support_counts: usize) {
16+
pub fn start (graph_file: &PathBuf, bwt_file: &String, reference_file: &PathBuf, output_file: &PathBuf, query_length:usize, min_support_counts: usize) {
1517
println!("Correcting graph based on srWGS data");
18+
// get reference path name
19+
let ref_reader = Reader::from_file(reference_file).unwrap();
20+
let reference_sequence: Vec<Record> = ref_reader.records().map(|r| r.unwrap()).collect();
21+
let ref_header = reference_sequence[0].id().to_string();
1622

1723
// construct msbwt
1824
println!("Loading msbwt");
@@ -26,7 +32,7 @@ pub fn start (graph_file: &PathBuf, bwt_file: &String, output_file: &PathBuf, qu
2632
// kmerize graph and check sr support counts
2733
println!("Kmerizing graph and checking sr support counts");
2834
let edgelist = graph.edges.keys().collect::<Vec<&String>>();
29-
let mut kmer_graph: HashMap<String, String> = HashMap::new(); // kmer_graph[kmer] = edge_id
35+
3036
for edge in edgelist {
3137
let mut status: bool = false; // not discarded
3238
let edge_data = graph.edges.get(edge).unwrap();
@@ -65,6 +71,15 @@ pub fn start (graph_file: &PathBuf, bwt_file: &String, output_file: &PathBuf, qu
6571
}
6672
}
6773

74+
//keep reference paths
75+
let reads_list = edge_data.get("reads").unwrap().as_array().unwrap();
76+
for read in reads_list{
77+
if read.as_str().unwrap() == ref_header{
78+
status = false; //keep reference path
79+
break;
80+
}
81+
}
82+
6883
if status == true{
6984
discarded_edges.push(edge.clone());
7085
}
@@ -73,7 +88,7 @@ pub fn start (graph_file: &PathBuf, bwt_file: &String, output_file: &PathBuf, qu
7388
// remove discarded edges from graph
7489
println!("Removing discarded edges from graph");
7590
println!("Number of discarded edges: {}", discarded_edges.len());
76-
//compute total number of bases in discarded edges
91+
// compute total number of bases in discarded edges
7792
let mut total_bp = 0;
7893
for edge in discarded_edges.clone(){
7994
let edge_data = graph.edges.get(&edge).unwrap();

src/main.rs

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,9 @@ enum Commands {
8686
/// path for corrected graph gfa file
8787
#[clap(short, long, value_parser)]
8888
outputfile: PathBuf,
89+
/// path for standard linear reference FASTA file
90+
#[clap(short, long, value_parser)]
91+
reference_file: PathBuf,
8992
/// query length for kmerizing graph, should be less than the short read length
9093
#[clap(short, long, value_parser, default_value_t = 99)]
9194
query_length: usize,
@@ -191,10 +194,11 @@ fn main() {
191194
graphfile,
192195
bwt_file,
193196
outputfile,
197+
reference_file,
194198
query_length,
195199
min_support_counts
196200
} => {
197-
let _ = correct::start(&graphfile, &bwt_file, &outputfile, query_length, min_support_counts);
201+
let _ = correct::start(&graphfile, &bwt_file, &reference_file, &outputfile, query_length, min_support_counts);
198202
}
199203

200204
Commands::Call {

0 commit comments

Comments
 (0)