@@ -4,15 +4,21 @@ use msbwt2::dynamic_bwt::{create_from_fastx,DynamicBWT};
44use msbwt2:: msbwt_core:: BWT ;
55use msbwt2:: string_util;
66use msbwt2:: rle_bwt:: RleBWT ;
7+ use rust_htslib:: htslib:: fai_load_options_FAI_CREATE;
78
89use crate :: { agg:: * } ;
910use std:: { path:: PathBuf , fs:: File , io:: { self , Write } } ;
1011use std:: collections:: { HashMap , HashSet } ;
12+ use bio:: io:: fasta:: { Reader , Record } ;
1113
1214
1315
14- pub fn start ( graph_file : & PathBuf , bwt_file : & String , output_file : & PathBuf , query_length : usize , min_support_counts : usize ) {
16+ pub fn start ( graph_file : & PathBuf , bwt_file : & String , reference_file : & PathBuf , output_file : & PathBuf , query_length : usize , min_support_counts : usize ) {
1517 println ! ( "Correcting graph based on srWGS data" ) ;
18+ // get reference path name
19+ let ref_reader = Reader :: from_file ( reference_file) . unwrap ( ) ;
20+ let reference_sequence: Vec < Record > = ref_reader. records ( ) . map ( |r| r. unwrap ( ) ) . collect ( ) ;
21+ let ref_header = reference_sequence[ 0 ] . id ( ) . to_string ( ) ;
1622
1723 // construct msbwt
1824 println ! ( "Loading msbwt" ) ;
@@ -26,7 +32,7 @@ pub fn start (graph_file: &PathBuf, bwt_file: &String, output_file: &PathBuf, qu
2632 // kmerize graph and check sr support counts
2733 println ! ( "Kmerizing graph and checking sr support counts" ) ;
2834 let edgelist = graph. edges . keys ( ) . collect :: < Vec < & String > > ( ) ;
29- let mut kmer_graph : HashMap < String , String > = HashMap :: new ( ) ; // kmer_graph[kmer] = edge_id
35+
3036 for edge in edgelist {
3137 let mut status: bool = false ; // not discarded
3238 let edge_data = graph. edges . get ( edge) . unwrap ( ) ;
@@ -65,6 +71,15 @@ pub fn start (graph_file: &PathBuf, bwt_file: &String, output_file: &PathBuf, qu
6571 }
6672 }
6773
74+ //keep reference paths
75+ let reads_list = edge_data. get ( "reads" ) . unwrap ( ) . as_array ( ) . unwrap ( ) ;
76+ for read in reads_list{
77+ if read. as_str ( ) . unwrap ( ) == ref_header{
78+ status = false ; //keep reference path
79+ break ;
80+ }
81+ }
82+
6883 if status == true {
6984 discarded_edges. push ( edge. clone ( ) ) ;
7085 }
@@ -73,7 +88,7 @@ pub fn start (graph_file: &PathBuf, bwt_file: &String, output_file: &PathBuf, qu
7388 // remove discarded edges from graph
7489 println ! ( "Removing discarded edges from graph" ) ;
7590 println ! ( "Number of discarded edges: {}" , discarded_edges. len( ) ) ;
76- //compute total number of bases in discarded edges
91+ // compute total number of bases in discarded edges
7792 let mut total_bp = 0 ;
7893 for edge in discarded_edges. clone ( ) {
7994 let edge_data = graph. edges . get ( & edge) . unwrap ( ) ;
0 commit comments