1- use rust_htslib:: bam:: { Read , IndexedReader , Record } ;
1+ use rust_htslib:: bam:: { Read , IndexedReader } ;
22use rust_htslib:: bam:: ext:: BamRecordExtensions ;
33use core:: panic;
44use std:: collections:: HashMap ;
@@ -9,6 +9,7 @@ use std::fmt;
99use ndarray:: Array1 ;
1010use std:: collections:: HashSet ;
1111use std:: fs:: OpenOptions ;
12+ use crate :: filtering:: Alignmentfilters ;
1213
1314pub fn parse_regions ( region : & str , bam_ifile : Vec < & str > ) -> ( Vec < Region > , HashMap < String , u32 > ) {
1415 // Takes a vector of regions, and a bam reference
@@ -199,15 +200,11 @@ pub fn bam_pileup<'a>(
199200 counts = vec ! [ 0.0 ; 1 ] ;
200201 for record in bam. records ( ) {
201202 let mut record = record. expect ( "Error parsing record." ) ;
202- if filters. manipulate {
203- filters. manipulate_record ( & mut record) ;
204- }
205203 if filters. filter {
206204 if filters. filter_record ( & record) {
207205 continue ;
208206 }
209207 }
210-
211208 if !ignorechr. contains ( & region. 0 ) {
212209 if record. is_unmapped ( ) {
213210 unmapped_reads += 1 ;
@@ -242,9 +239,6 @@ pub fn bam_pileup<'a>(
242239 . expect ( & format ! ( "Error fetching region: {}:{},{}" , regstruct. chrom, exon. 0 , exon. 1 ) ) ;
243240 for record in bam. records ( ) {
244241 let mut record = record. expect ( "Error parsing record." ) ;
245- if filters. manipulate {
246- filters. manipulate_record ( & mut record) ;
247- }
248242 if filters. filter {
249243 if filters. filter_record ( & record) {
250244 continue ;
@@ -280,44 +274,58 @@ pub fn bam_pileup<'a>(
280274 let mut binix: u32 = 0 ;
281275 for record in bam. records ( ) {
282276 let mut record = record. expect ( "Error parsing record." ) ;
283- if filters. manipulate {
284- filters. manipulate_record ( & mut record) ;
285- }
277+
286278 if filters. filter {
287279 if filters. filter_record ( & record) {
288280 continue ;
289281 }
290282 }
291- if !ignorechr. contains ( & region. 0 ) {
283+ if filters. manipulate {
284+ let manipulated_blocks = filters. manipulate_record ( & mut record) ;
285+ if manipulated_blocks. is_none ( ) {
286+ continue ;
287+ }
288+ let block = manipulated_blocks. unwrap ( ) ;
289+ let ixstart = ( ( block[ 0 ] - region. 1 ) / binsize) as usize ;
290+ let ixend = ( ( block[ 1 ] - region. 1 ) / binsize) as usize ;
291+
292+ let indices: HashSet < usize > = ( ixstart..ixend) . collect ( ) ;
293+ indices. into_iter ( )
294+ . for_each ( |ix| {
295+ if ix < counts. len ( ) {
296+ counts[ ix] += 1.0 ;
297+ }
298+ } ) ;
299+ } else {
300+ let indices: HashSet < usize > = record
301+ . aligned_blocks ( )
302+ . filter ( |x| ( x[ 1 ] as u32 ) >= region. 1 && ( x[ 1 ] as u32 ) <= region. 2 )
303+ . filter ( |x| ( x[ 0 ] as u32 ) >= region. 1 && ( x[ 0 ] as u32 ) <= region. 2 )
304+ . flat_map ( |x| x[ 0 ] as u32 ..x[ 1 ] as u32 )
305+ . map ( |x| ( ( x - region. 1 ) / binsize) as usize )
306+ . collect ( ) ;
307+ indices. into_iter ( )
308+ . for_each ( |ix| {
309+ if ix < counts. len ( ) {
310+ counts[ ix] += 1.0 ;
311+ }
312+ } ) ;
313+ }
314+
315+ if !ignorechr. contains ( & region. 0 ) && gather_lengths {
292316 if record. is_unmapped ( ) {
293317 unmapped_reads += 1 ;
294318 } else {
295319 mapped_reads += 1 ;
296320 if * ispe {
297321 if record. is_paired ( ) && record. is_proper_pair ( ) && ( record. flags ( ) & FREAD != 0 ) {
298- if gather_lengths {
299- fraglens. push ( record. insert_size ( ) . abs ( ) as u32 ) ;
300- }
322+ fraglens. push ( record. insert_size ( ) . abs ( ) as u32 ) ;
301323 }
302324 }
303- if gather_lengths {
304- readlens. push ( record. seq_len ( ) as u32 ) ;
305- }
325+ readlens. push ( record. seq_len ( ) as u32 ) ;
306326 }
307327 }
308- let indices: HashSet < usize > = record
309- . aligned_blocks ( )
310- . filter ( |x| ( x[ 1 ] as u32 ) >= region. 1 && ( x[ 1 ] as u32 ) <= region. 2 )
311- . filter ( |x| ( x[ 0 ] as u32 ) >= region. 1 && ( x[ 0 ] as u32 ) <= region. 2 )
312- . flat_map ( |x| x[ 0 ] as u32 ..x[ 1 ] as u32 )
313- . map ( |x| ( ( x - region. 1 ) / binsize) as usize )
314- . collect ( ) ;
315- indices. into_iter ( )
316- . for_each ( |ix| {
317- if ix < counts. len ( ) {
318- counts[ ix] += 1.0 ;
319- }
320- } ) ;
328+
321329 }
322330 }
323331 let file = OpenOptions :: new ( )
@@ -330,6 +338,7 @@ pub fn bam_pileup<'a>(
330338 // There are two scenarios:
331339 // bamCoverage mode -> we can collapse bins with same coverage (collapse = true)
332340 // bamCompare & others -> We cannot collapse the bins, yet. (collapse = false)
341+ // Note that collapse can also be passed as a CLI, for those that want that.
333342 if counts. len ( ) == 1 {
334343 writeln ! ( writer, "{}\t {}\t {}\t {}" , region. 0 , startstr, endstr, counts[ 0 ] ) . unwrap ( ) ;
335344 } else {
@@ -383,155 +392,6 @@ fn pos_in_blacklist(pos: i64, chrom: &str, blacklist: &Vec<Region>) -> bool {
383392 return false ;
384393}
385394
386- pub struct Alignmentfilters {
387- pub blacklist : Option < Vec < Region > > ,
388- pub minmappingquality : u8 ,
389- pub samflaginclude : u16 ,
390- pub samflagexclude : u16 ,
391- pub minfraglen : u32 ,
392- pub maxfraglen : u32 ,
393- pub mnase : bool ,
394- pub offset : ( i32 , i32 ) ,
395- pub filterrnastrand : String ,
396- pub extendreads : u32 ,
397- pub centerreads : bool ,
398- pub filter : bool ,
399- pub manipulate : bool ,
400- }
401- impl Alignmentfilters {
402- pub fn new (
403- blacklist : Option < Vec < Region > > ,
404- minmappingquality : Option < u8 > ,
405- samflaginclude : Option < u16 > ,
406- samflagexclude : Option < u16 > ,
407- minfraglen : Option < u32 > ,
408- maxfraglen : Option < u32 > ,
409- mnase : Option < bool > ,
410- offset : Option < ( i32 , i32 ) > ,
411- filterrnastrand : Option < String > ,
412- extendreads : Option < u32 > ,
413- centerreads : Option < bool >
414- ) -> Self {
415- // Go through the arguments, and if they are not set or have default values, we set a filter boolean to false.
416- // Only when filtering needs to happen the filterrecord will be invoked, for performance.
417- let mut filter: bool = false ;
418- let mut manipulate: bool = false ;
419- let _mmq = minmappingquality. unwrap_or ( 0 ) ;
420- let _sfi = samflaginclude. unwrap_or ( 0 ) ;
421- let _sfe = samflagexclude. unwrap_or ( 0 ) ;
422- let _mifl = minfraglen. unwrap_or ( 0 ) ;
423- let _mafl = maxfraglen. unwrap_or ( 0 ) ;
424- let _mnase = mnase. unwrap_or ( false ) ;
425- let _offset = offset. unwrap_or ( ( 1 , -1 ) ) ;
426- let _frs = filterrnastrand. unwrap_or ( String :: from ( "None" ) ) ;
427- let _extend = extendreads. unwrap_or ( 0 ) ;
428- let _center = centerreads. unwrap_or ( false ) ;
429-
430- // Set the manipulate bool for a quick escape in case manipulation is not needed.
431- if _offset != ( 1 , -1 ) || _mnase || _extend > 0 || _center {
432- manipulate = true ;
433- }
434-
435- // Set the filter bool for a quick escape in case filtering is not needed.
436- if _mmq > 0 || _sfi > 0 || _sfe > 0 || _mifl > 0 || _mafl > 0 || _frs != "None" || blacklist. is_some ( ) {
437- filter = true ;
438- }
439-
440- Self {
441- blacklist : blacklist,
442- minmappingquality : _mmq,
443- samflaginclude : _sfi,
444- samflagexclude : _sfe,
445- minfraglen : _mifl,
446- maxfraglen : _mafl,
447- mnase : _mnase,
448- offset : _offset,
449- filterrnastrand : _frs,
450- extendreads : _extend,
451- centerreads : _center,
452- filter : filter,
453- manipulate : manipulate,
454- }
455- }
456- pub fn filter_record ( & self , rec : & Record ) -> bool {
457- // Decides filtering of a record. The bool return is used to 'continue', i.e. skip the record.
458- if rec. is_unmapped ( ) {
459- return true ;
460- } else if !self . filter {
461- return false ;
462- } else {
463- // True filtering.
464- // quality > samflags > min/max fraglen
465- // quality
466- let mut skip: bool = false ;
467- if rec. mapq ( ) < self . minmappingquality {
468- skip = true ;
469- }
470- // samflags
471- if self . samflaginclude > 0 {
472- if ( rec. flags ( ) & self . samflaginclude ) == 0 {
473- skip = true ;
474- }
475- }
476- if self . samflagexclude > 0 {
477- if ( rec. flags ( ) & self . samflagexclude ) != 0 {
478- skip = true ;
479- }
480- }
481- // min/max fraglen
482- if self . minfraglen != 0 || self . maxfraglen != 0 {
483- if rec. is_paired ( ) {
484- if rec. insert_size ( ) . abs ( ) < self . minfraglen as i64 || rec. insert_size ( ) . abs ( ) > self . maxfraglen as i64 {
485- skip = true ;
486- }
487- } else {
488- let fragsize: u32 = rec
489- . aligned_blocks ( )
490- . map ( |x| x[ 1 ] as u32 - x[ 0 ] as u32 )
491- . sum ( ) ;
492- if fragsize < self . minfraglen || fragsize > self . maxfraglen {
493- skip = true ;
494- }
495- }
496- }
497- // filterrnastrand
498- if self . filterrnastrand . as_str ( ) != "None" {
499- match ( self . filterrnastrand . as_str ( ) , rec. is_paired ( ) ) {
500- ( "forward" , true ) => {
501- if !( ( rec. flags ( ) & 144 == 128 ) || ( rec. flags ( ) & 96 == 64 ) ) {
502- skip = true ;
503- }
504- } ,
505- ( "forward" , false ) => {
506- if !( rec. flags ( ) & 16 == 16 ) {
507- skip = true ;
508- }
509- } ,
510- ( "reverse" , true ) => {
511- if !( ( rec. flags ( ) & 144 == 144 ) || ( rec. flags ( ) & 96 == 96 ) ) {
512- skip = true ;
513- }
514- } ,
515- ( "reverse" , false ) => {
516- if !( rec. flags ( ) & 16 == 0 ) {
517- skip = true ;
518- }
519- } ,
520- _ => {
521- panic ! ( "filterrnastrand should be either forward or reverse. {:?} is not supported." , self . filterrnastrand)
522- } ,
523- }
524- }
525- return skip;
526- }
527- }
528-
529- pub fn manipulate_record ( & self , rec : & mut Record ) {
530- println ! ( "Will manipulate the record, but not now lol" ) ;
531- }
532- }
533-
534-
535395#[ derive( Clone , Debug ) ]
536396pub struct Region {
537397 pub chrom : String ,
0 commit comments