Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 12 additions & 2 deletions pydeeptools/deeptools/bamCompare2.py
Original file line number Diff line number Diff line change
Expand Up @@ -250,8 +250,18 @@ def main(args=None):
args.samFlagInclude = 0
if not args.samFlagExclude:
args.samFlagExclude = 0
print(args)
if not args.region:
args.region = 'None'
if not args.blackListFileName:
args.blackListFileName = 'None'
else:
if len(args.blackListFileName) != 1:
print("Please only provide one blacklist file.")
sys.exit()
args.blackListFileName = args.blackListFileName[0]

args.pseudocount = 1

r_bamcompare(
args.bamfile1, # bam file 1
args.bamfile2, # bam file 2
Expand All @@ -271,7 +281,7 @@ def main(args=None):
args.numberOfProcessors, # threads
args.ignoreForNormalization,
args.binSize, # bin size
[], # regions
args.region, # regions
True # verbose
)

Expand Down
12 changes: 9 additions & 3 deletions pydeeptools/deeptools/bamCoverage2.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import argparse
from deeptools import parserCommon
from deeptools.hp import r_bamcoverage

import sys

def parseArguments():
parentParser = parserCommon.getParentArgParse()
Expand Down Expand Up @@ -143,13 +143,19 @@ def main(args=None):
args.filterRNAstrand = 'None'
if not args.blackListFileName:
args.blackListFileName = 'None'
else:
if len(args.blackListFileName) != 1:
print("Please only provide one blacklist file.")
sys.exit()
args.blackListFileName = args.blackListFileName[0]
if not args.minMappingQuality:
args.minMappingQuality = 0
if not args.samFlagInclude:
args.samFlagInclude = 0
if not args.samFlagExclude:
args.samFlagExclude = 0

if not args.region:
args.region = 'None'
print(args)

r_bamcoverage(
Expand Down Expand Up @@ -180,6 +186,6 @@ def main(args=None):
args.maxFragmentLength,
# running options
args.numberOfProcessors, # threads
[], # regions
args.region, # regions
args.verbose # verbose
)
9 changes: 7 additions & 2 deletions pydeeptools/deeptools/multiBamSummary2.py
Original file line number Diff line number Diff line change
Expand Up @@ -212,9 +212,14 @@ def process_args(args=None):
if not args.BED:
args.BED = []
if not args.region:
args.region = []
args.region = "None"
if not args.blackListFileName:
args.blackListFileName = "None"
args.blackListFileName = 'None'
else:
if len(args.blackListFileName) != 1:
print("Please only provide one blacklist file.")
sys.exit()
args.blackListFileName = args.blackListFileName[0]
if not args.outRawCounts:
args.outRawCounts = "None"
if not args.scalingFactors:
Expand Down
20 changes: 9 additions & 11 deletions src/alignmentsieve.rs
Original file line number Diff line number Diff line change
@@ -1,11 +1,9 @@
use bigtools::utils::misc::Name;
use flate2::write;
use pyo3::prelude::*;
use pyo3::types::PyList;
use rayon::prelude::*;
use rayon::ThreadPoolBuilder;
use rust_htslib::bam::{self, record, Header, IndexedReader, Read, Reader, Writer};
use tempfile::{Builder, TempPath, NamedTempFile};
use rust_htslib::bam::{self, Header, IndexedReader, Read, Reader, Writer};
use tempfile::{Builder, TempPath};
use std::fs::File;
use std::io::Write;
use crate::covcalc::{parse_regions, Alignmentfilters, Region};
Expand All @@ -27,14 +25,14 @@ pub fn r_alignmentsieve(
_blacklist: &str, // blacklist file name.
min_fragment_length: u32, // minimum fragment length.
max_fragment_length: u32, // maximum fragment length.
extend_reads: u32,
center_reads: bool,
_extend_reads: u32,
_center_reads: bool,

) -> PyResult<()> {
// Input bam file
let mut bam = Reader::from_path(bamifile).unwrap();
let bam = Reader::from_path(bamifile).unwrap();
let header = Header::from_template(bam.header());
let header_view = bam.header().clone();
let _header_view = bam.header().clone();

let mut write_filters: bool = false;
if filtered_out_readsfile != "None" {
Expand All @@ -47,7 +45,7 @@ pub fn r_alignmentsieve(
// shift is of length 0, 2, or 4.

// Define regions
let (regions, chromsizes) = parse_regions(&Vec::new(), vec![bamifile]);
let (regions, _chromsizes) = parse_regions("None", vec![bamifile]);

let filters = Alignmentfilters{
minmappingquality: min_mapping_quality,
Expand Down Expand Up @@ -99,7 +97,7 @@ pub fn r_alignmentsieve(
}
}

let mut ofilterbam = Writer::from_path(filtered_out_readsfile, &header, bam::Format::Bam).unwrap();
let _ofilterbam = Writer::from_path(filtered_out_readsfile, &header, bam::Format::Bam).unwrap();

if filter_metrics != "None" {
let mut of = File::create(filter_metrics).unwrap();
Expand All @@ -113,7 +111,7 @@ pub fn r_alignmentsieve(
}


fn sieve_bamregion(ibam: &str, regstruct: &Region, alfilters: &Alignmentfilters, filter_rna_strand: &str, shift: &Vec<i32>, write_filters: bool, nproc: usize, verbose: bool) -> (Vec<Option<TempPath>>, Vec<Option<TempPath>>, u64, u64) {
fn sieve_bamregion(ibam: &str, regstruct: &Region, alfilters: &Alignmentfilters, filter_rna_strand: &str, _shift: &Vec<i32>, write_filters: bool, nproc: usize, verbose: bool) -> (Vec<Option<TempPath>>, Vec<Option<TempPath>>, u64, u64) {
let region = (regstruct.chrom.clone(), regstruct.get_startu(), regstruct.get_endu());
let mut total_reads: u64 = 0;
let mut filtered_reads: u64 = 0;
Expand Down
39 changes: 29 additions & 10 deletions src/bamcompare.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@ use std::io::{BufReader};
use std::fs::File;
use itertools::Itertools;
use bigtools::{Value};
use crate::filehandler::{bam_ispaired, write_covfile};
use crate::covcalc::{bam_pileup, parse_regions, Alignmentfilters, TempZip, region_divider};
use crate::filehandler::{bam_ispaired, write_covfile, is_bed_or_gtf, read_bedfile};
use crate::covcalc::{bam_pileup, parse_regions, Alignmentfilters, TempZip, region_divider, Region};
use crate::normalization::scale_factor_bamcompare;
use crate::calc::{median, calc_ratio};
use tempfile::{TempPath};
Expand All @@ -27,7 +27,8 @@ pub fn r_bamcompare(
operation: &str,
pseudocount: f32,
// filtering options
ignoreduplicates: bool,
blacklist: &str, // path to blacklist filename, or 'None'
_ignoreduplicates: bool,
minmappingquality: u8, //
samflaginclude: u16,
samflagexclude: u16,
Expand All @@ -36,7 +37,7 @@ pub fn r_bamcompare(
nproc: usize,
_ignorechr: Py<PyList>,
binsize: u32,
regions: Vec<(String, u32, u32)>,
supregion: &str,
verbose: bool
) -> PyResult<()> {
let ispe1 = bam_ispaired(bamifile1);
Expand All @@ -60,9 +61,24 @@ pub fn r_bamcompare(
};

// Parse regions & calculate coverage. Note that
let (regions, chromsizes) = parse_regions(&regions, vec![bamifile1, bamifile2]);
let (regions, chromsizes) = parse_regions(supregion, vec![bamifile1, bamifile2]);
let regionblocks = region_divider(&regions);

// If there is a blacklist, read it.
let mut backlistregions: Option<Vec<Region>> = None;
if blacklist != "None" {
// Check if it's a bed or gtf file
let isbed = is_bed_or_gtf(blacklist);
match isbed.as_str() {
"gtf" => panic!("Error: Please provide a bed file for the blacklist."),
"bed" => {
let (bls, _) = read_bedfile(&blacklist.to_string(), false, chromsizes.keys().collect());
backlistregions = Some(bls);
},
_ => panic!("Error: Cannot determine filetype of blacklist file.")
}
}

let pool = ThreadPoolBuilder::new().num_threads(nproc).build().unwrap();

// Set up the bam files in a Vec.
Expand All @@ -72,7 +88,7 @@ pub fn r_bamcompare(
bamfiles.par_iter()
.map(|(bamfile, ispe)| {
let (bg, mapped, unmapped, readlen, fraglen) = regionblocks.par_iter()
.map(|i| bam_pileup(bamfile, &i, &binsize, &ispe, &ignorechr, &filters, false, false))
.map(|i| bam_pileup(bamfile, &i, &binsize, &ispe, &ignorechr, &filters, false, false, &backlistregions))
.reduce(
|| (vec![], 0, 0, vec![], vec![]),
|(mut _bg, mut _mapped, mut _unmapped, mut _readlen, mut _fraglen), (bg, mapped, unmapped, readlen, fraglen)| {
Expand All @@ -96,13 +112,16 @@ pub fn r_bamcompare(
})
.collect()
});

// Print out some stats if verbose
if verbose {
println!("bamfile\tPE\tmapped\tunmapped\tmed_readlen\tmed_fraglen");
println!("{}\t{}\t{}\t{}\t{}\t{}", covcalcs[0].bamfile, covcalcs[0].ispe, covcalcs[0].mapped, covcalcs[0].unmapped, covcalcs[0].readlen, covcalcs[0].fraglen);
println!("{}\t{}\t{}\t{}\t{}\t{}", covcalcs[1].bamfile, covcalcs[1].ispe, covcalcs[1].mapped, covcalcs[1].unmapped, covcalcs[1].readlen, covcalcs[1].fraglen);
}
// Calculate scale factors.
let sf = scale_factor_bamcompare(scalefactorsmethod, covcalcs[0].mapped, covcalcs[1].mapped, binsize, effective_genome_size, norm);
println!("scale factor1 = {}, scale factor2 = {}", sf.0, sf.1);
// Create output stream
let mut chrom = "".to_string();


// Extract both vecs of TempPaths into a single vector
let its = vec![
covcalcs[0].bg.drain(..).collect::<Vec<_>>(),
Expand Down
41 changes: 28 additions & 13 deletions src/bamcoverage.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@ use std::io::prelude::*;
use std::io::BufReader;
use std::fs::File;
use bigtools::Value;
use crate::covcalc::{bam_pileup, parse_regions, Alignmentfilters, region_divider};
use crate::filehandler::{bam_ispaired, write_covfile};
use crate::covcalc::{bam_pileup, parse_regions, Alignmentfilters, region_divider, Region};
use crate::filehandler::{bam_ispaired, write_covfile, is_bed_or_gtf, read_bedfile};
use crate::normalization::scale_factor;
use crate::calc::median;

Expand All @@ -25,23 +25,23 @@ pub fn r_bamcoverage(
// processing options
mnase: bool,
_offset: Py<PyList>, // list of max 2 [offset 5', offset 3'], if no offset is required we have [1, -1]
extendreads: u32, // if 0, no extension
centerreads: bool,
filterrnastrand: &str, // forward, reverse or 'None'
_extendreads: u32, // if 0, no extension
_centerreads: bool,
_filterrnastrand: &str, // forward, reverse or 'None'
blacklist: &str, // path to blacklist filename, or 'None'
_ignorechr: Py<PyList>, // list of chromosomes to ignore. Is empty if none.
skipnoncovregions: bool,
smoothlength: u32, // 0 = no smoothing, else it's a strictly larger then binsize
_skipnoncovregions: bool,
_smoothlength: u32, // 0 = no smoothing, else it's a strictly larger then binsize
binsize: u32,
// filtering options
ignoreduplicates: bool,
_ignoreduplicates: bool,
minmappingquality: u8, //
samflaginclude: u16,
samflagexclude: u16,
minfraglen: u32,
maxfraglen: u32,
nproc: usize,
regions: Vec<(String, u32, u32)>,
supregion: &str,
verbose: bool
) -> PyResult<()> {
let mut offset: Vec<i32> = Vec::new();
Expand All @@ -68,14 +68,29 @@ pub fn r_bamcoverage(
if verbose {
println!("Sample: {} is-paired: {}", bamifile, ispe);
}

// Parse regions & calculate coverage
let (regions, chromsizes) = parse_regions(&regions, vec![bamifile]);
let (regions, chromsizes) = parse_regions(supregion, vec![bamifile]);
let regionblocks = region_divider(&regions);

// If there is a blacklist, read it.
let mut backlistregions: Option<Vec<Region>> = None;
if blacklist != "None" {
// Check if it's a bed or gtf file
let isbed = is_bed_or_gtf(blacklist);
match isbed.as_str() {
"gtf" => panic!("Error: Please provide a bed file for the blacklist."),
"bed" => {
let (bls, _) = read_bedfile(&blacklist.to_string(), false, chromsizes.keys().collect());
backlistregions = Some(bls);
},
_ => panic!("Error: Cannot determine filetype of blacklist file.")
}
}

let pool = ThreadPoolBuilder::new().num_threads(nproc).build().unwrap();
let (bg, mapped, _unmapped, readlen, fraglen) = pool.install(|| {
regionblocks.par_iter()
.map(|i| bam_pileup(bamifile, &i, &binsize, &ispe, &ignorechr, &filters, true, false))
.map(|i| bam_pileup(bamifile, &i, &binsize, &ispe, &ignorechr, &filters, true, false, &backlistregions))
.reduce(
|| (vec![], 0, 0, vec![], vec![]),
|(mut _bg, mut _mapped, mut _unmapped, mut _readlen, mut _fraglen), (bg, mapped, unmapped, readlen, fraglen)| {
Expand Down Expand Up @@ -134,7 +149,7 @@ fn validate_args(
norm: &str,
effectivegenomesize: &u64,
scalefactor: &f32,
offset: &Vec<i32>,
_offset: &Vec<i32>,
ignorechr: &Vec<String>,
verbose: &bool
) {
Expand Down
6 changes: 4 additions & 2 deletions src/computematrix.rs
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,7 @@ pub fn r_computematrix(

// Discriminate between reference-point and scale-regions mode.

let mut matrix: Vec<Vec<f32>> = pool.install(|| {
let matrix: Vec<Vec<f32>> = pool.install(|| {
bw_files.par_iter()
.map(|i| bwintervals(&i, &regions, &slopregions, &scale_regions))
.reduce(
Expand Down Expand Up @@ -197,6 +197,8 @@ fn slop_region(
region.get_anchor_bins(scale_regions, chromend)
}

#[allow(unused_mut)]
#[allow(unused_assignments)]
fn matrix_dump(
sortregions: &str,
sortusing: &str,
Expand Down Expand Up @@ -238,7 +240,7 @@ fn matrix_dump(
rend = ix;
}
regionslices.push((rstart, rend));
let mut sortedix: Vec<usize> = Vec::new();
let mut sortedix: Vec<usize>;
if sortusing == "region_length" {
if !sort_using_samples.is_empty() && verbose {
println!("Sort using samples is set ({:?}), but is not used when sorting on region_length. It is thus ignored.", sort_using_samples);
Expand Down
Loading
Loading