Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Rtools/malDrugR/copynumQDNAseq.R
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ groupId <- argv$samplegroup
parentID <- argv$parentId
refDir <- argv$refDir
groupbamL <- strsplit(argv$bams, " ")[[1]]
groupbamL <- groupbamL[!grepl(parentID, groupbamL)]
groupbamL <- groupbamL[!grepl(parentID, groupbamL)] # doesn't work for merged parents
sampleL <- sub("_nodup\\.bam$", "", groupbamL)

library(argv$bsref,
Expand Down
73 changes: 73 additions & 0 deletions Rtools/malDrugR/genesROI.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
suppressPackageStartupMessages(library(stringr))
suppressPackageStartupMessages(library(readr))
suppressPackageStartupMessages(library(genomeIntervals)) ## Used for readGff3
suppressPackageStartupMessages(library(GenomicRanges))
suppressPackageStartupMessages(library(argparser))

#### Read command-line arguments ####
argp <- arg_parser(paste(
"Use a reference genomic-features file (.gff)",
"to find genes in a region of interest"
))
argp <- add_argument(argp, "--refpath",
help = "file path of reference"
)
argp <- add_argument(argp, "--refstrain",
help = "strain name for reference"
)
argp <- add_argument(argp, "--region",
type = "character",
help = "Regions are specified as: seqname[:STARTPOS[-ENDPOS]] e.g. 08:409282-429234"
)

argv <- parse_args(argp)

refDir <- argv$refpath
pfCurrRefs <- data.frame(
strain = c("3D7", "Dd2", "Supp"),
version = c("52", "57", "52")
)
ref <- dplyr::filter(pfCurrRefs, strain == argv$refstrain)
## Not looking for regions of interest in supplementary sequences yet

# ---------- Read genomic features --------------------------------------------
file.gff <- file.path(
refDir,
paste0(
"PlasmoDB-", ref$version, "_Pfalciparum", ref$strain,
".gff"
)
)

pf_features <- tryCatch(
readGff3(file.gff, quiet = TRUE),
error = function(e) {
stop(paste(e, "Filepath:", file.gff))
}
)
# ---------- Find features in region of interest ------------------------------
chrname <- str_remove(argv$region, ":.+")
gstart <- str_remove(argv$region, ".+:") |>
str_remove("-.+") |> as.numeric()
gend <- str_remove(argv$region, ".+:") |>
str_remove(".+-") |> as.numeric()
roigr <- GRanges(
seqnames = levels(seqnames(pf_features)) |>
str_subset(paste0("Pf", ref$strain, "_", chrname)),
ranges = IRanges(start = gstart, end = gend)
)
featROIidx <- findOverlaps(GRanges(pf_features), roigr,
type = "within", select = "last"
)
featROI <- pf_features[!is.na(featROIidx)]
# ---------- Filter to genes and write to csv ------------------------------
geneROI <- featROI[
annotation(featROI)$type == "protein_coding_gene",
]

data.frame(ID = getGffAttribute(geneROI, "ID"),
GeneName = getGffAttribute(geneROI, "Name"),
Description = getGffAttribute(geneROI, "description") |>
str_replace_all("\\+", " ")
) |>
write_csv(paste0("genes_chr", chrname, "_ROI.csv"))
255 changes: 113 additions & 142 deletions main.nf
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
// include modules

include {
LaneMerge ;
Bwa ;
Expand Down Expand Up @@ -191,152 +192,122 @@ workflow {
error(
"""
No parent samples found.
"""
)
}
.set { parent_all_ch }
// Emits tuple val(parentId),path(bai),path(bams), val(parentlist)

merged_ch = Merge(parent_ch)
// Emits tuple val(parentId),path(parentId.bam)
//----------------QC tools------------------------------------------
//MultiQC integrates files from fastqc and mosdepth, found in input dir, into single report
fastqc_ch = FastQC(bam_ch.bamnodup.map { row -> row[1] }.unique { it.baseName }.collect())

// MosDepth Input Channel include val(sampleId), path(bam), path(bai)
mosdepth_ch = MosDepth(bam_ch.bysampleid)
// join bams with their index based on groupId
// FlagStats Input Channel include val(sampleId), path(bam)
flagstat_ch = FlagStats(bam_ch.bysampleid)

MultiQC(fastqc_ch.zip.mix(mosdepth_ch, flagstat_ch).collect().ifEmpty([]), Channel.fromPath(params.multiqc_config))
//----------------BCF tools----------------------------------------
//BCF Input Channel Emits parentId,groupId,ref,bamlist,bams,parentbams
input_ch
.map { row -> tuple(row[0], row[4], row[5] + ".fasta") }
.unique()
.join(bamlist_ch)
.combine(bam_ch.bamnodup, by: 0)
.groupTuple(by: [0, 1, 2, 3])
.combine(parent_ch.map { row -> tuple(row[1], row[0]) }, by: 1)
.set { bcf_input_ch }
// Emits val(parentId),val(groupId),path(ref), path(bamlist), path(bams),path(parentbams)
bcf_ch = Bcf(bcf_input_ch)
//----------------------Gridss-------------------------------------
input_ch
.map { row -> tuple(row[0], row[4], row[5] + ".fasta") }
.unique()
.join(
bamlist_ch.map { gid, filepath ->
def fileLines = filepath.readLines()
def fileContents = fileLines.join(' ')
return tuple(gid, fileContents)
}
)
.combine(bam_ch.bamnodup, by: 0)
.groupTuple(by: [0, 1, 2, 3])
.combine(parent_ch.map { row -> tuple(row[1], row[0]) }, by: 1)
.ifEmpty {
error(
"""
""")
}
.set{parent_all_ch} // Emits tuple val(parentId),path(bai),path(bams), val(parentlist)

merged_ch=Merge(parent_ch) // Emits tuple val(parentId),path(parentId.bam)
//----------------QC tools------------------------------------------
//MultiQC integrates files from fastqc and mosdepth, found in input dir, into single report
fastqc_ch=FastQC(bam_ch.bamnodup.map{row->row[1]}.unique({it.baseName}).collect())

// MosDepth Input Channel include val(sampleId), path(bam), path(bai)
mosdepth_ch=MosDepth(bam_ch.bysampleid) // join bams with their index based on groupId
// FlagStats Input Channel include val(sampleId), path(bam)
flagstat_ch=FlagStats(bam_ch.bysampleid)

MultiQC( fastqc_ch.zip.mix(mosdepth_ch, flagstat_ch).collect().ifEmpty([]),Channel.fromPath(params.multiqc_config) )
//----------------BCF tools----------------------------------------
//BCF Input Channel Emits parentId,groupId,ref,bamlist,bams,parentbams
input_ch.map{row -> tuple(row[0],row[4],row[5]+".fasta")}
.unique()
.join(bamlist_ch)
.combine(bam_ch.bamnodup,by:0)
.groupTuple(by:[0,1,2,3])
.combine(parent_ch.map{row->tuple(row[1],row[0])},by:1).set{bcf_input_ch}
// Emits val(parentId),val(groupId),path(ref), path(bamlist), path(bams),path(parentbams)
bcf_ch=Bcf(bcf_input_ch)
//----------------------Gridss-------------------------------------
input_ch.map{row -> tuple(row[0],row[4],row[5]+".fasta")}
.unique()
.join(bamlist_ch.map{gid,filepath ->
def fileLines = filepath.readLines()
def fileContents = fileLines.join(' ')
return tuple(gid, fileContents)
})
.combine(bam_ch.bamnodup,by:0)
.groupTuple(by:[0,1,2,3])
.combine(parent_ch.map{row->tuple(row[1],row[0])},by:1)
.ifEmpty {
error("""
Input to gridss is empty.
"""
)
}
.set { gridss_input_ch }
// Emits val(parentId),val(groupId),path(ref), val(bamlistcontent), path(bams),path(parentbams)
gridss_ch = Gridss(gridss_input_ch.combine(Channel.fromPath(params.gridss_jar_path)))
""")
}
.set{gridss_input_ch}// Emits val(parentId),val(groupId),path(ref), val(bamlistcontent), path(bams),path(parentbams)
gridss_ch=Gridss(gridss_input_ch.combine(Channel.fromPath(params.gridss_jar_path)))

input_ch.map{row -> tuple(row[4], row[0],row[7] )}
.unique()
.combine(parent_ch,by:0).map{row -> tuple(row[1],row[2],row[5])}
.join(bamlist_ch).join(gridss_ch.vcf)
.set{sv_input_ch} //Emit val(groupId),val(bsref),val(parentbamlist), path(bamlist), path(vcf)

input_ch
.map { row -> tuple(row[4], row[0], row[7]) }
.unique()
.combine(parent_ch, by: 0)
.map { row -> tuple(row[1], row[2], row[5]) }
.join(bamlist_ch)
.join(gridss_ch.vcf)
.set { sv_input_ch }
//Emit val(groupId),val(bsref),val(parentbamlist), path(bamlist), path(vcf)

sfilter_ch = SomaticFilter(
sv_input_ch.combine(Channel.fromPath("${projectDir}/Rtools/gridss_assets/gridss_somatic_filter.R"))
)
//----------------------CopyNum--------------------------------------
input_ch
.map { row -> tuple(row[4], row[0], row[6], row[7]) }
.unique()
.combine(merged_ch, by: 0)
.map { row ->
if (row[0]) {
tuple(row[1], row[0], row[2], row[3], row[4])
}
}
.join(
bamlist_ch.map { gid, filepath ->
def fileLines = filepath.readLines()
def fileContents = fileLines.join(' ')
return tuple(gid, fileContents)
}
)
.combine(bam_ch.bamnodup, by: 0)
.groupTuple(by: [0, 1, 2, 3, 4, 5])
.ifEmpty {
error(
"""
sfilter_ch=SomaticFilter(sv_input_ch
.combine(Channel.fromPath("${projectDir}/Rtools/gridss_assets/gridss_somatic_filter.R")))
//----------------------CopyNum--------------------------------------
input_ch.map{row -> tuple(row[4], row[0],row[6],row[7] )}
.unique()
.combine(merged_ch,by:0)
.map{row -> if (row[0]) tuple(row[1],row[0],row[2],row[3],row[4])}
.combine(bam_ch.bamnodup,by:0)
.groupTuple(by:[0,1,2,3,4])
.ifEmpty {
error("""
Input to CopyNum Analysis is empty.
"""
""")
}
.set{copynum_input_ch}
//Emits val(groupId),val(parentId),path(refpath),val(bsref),path(mergedparent), path(bamlistcontent), path(bams), val(dummy)

copynum_ch=RCopyNum(copynum_input_ch
.combine(Channel.fromList( [params.bin_CNroi, params.bin_CNfull] ))
.combine(Channel.fromPath("${projectDir}/Rtools/malDrugR/copynumQDNAseq.R"))
)
}
.set { copynum_input_ch }
copynum_input_ch.view()
//Emits val(groupId),val(parentId),path(refpath),val(bsref),path(mergedparent), path(bamlistcontent), path(bams), val(dummy)

copynum_ch = RCopyNum(
copynum_input_ch.combine(Channel.fromList([params.bin_CNroi, params.bin_CNfull])).combine(Channel.fromPath("${projectDir}/Rtools/malDrugR/copynumQDNAseq.R"))
)
//----------------------filter BCF-----------------------------------
input_ch
.map { row -> tuple(row[4], row[0], row[6], row[3]) }
.unique()
.combine(parent_all_ch, by: 0)
.map { row -> tuple(row[1], row[2], row[3], row[4], row[5], row[6]) }
.join(bcf_ch, by: 0)
.join(bam_ch.bamnodup.groupTuple(), by: 0)
.join(bam_ch.bai.groupTuple(), by: 0)
.ifEmpty {
error(
"""
//----------------------filter BCF-----------------------------------
input_ch.map{row -> tuple(row[4],row[0],row[6],row[3])}
.unique()
.combine(parent_all_ch,by:0)
.map{row-> tuple(row[1],row[2],row[3],row[4],row[5],row[6])}
.join(bcf_ch, by:0)
.join(bam_ch.bamnodup.groupTuple(),by:0)
.join(bam_ch.bai.groupTuple(),by:0)
.ifEmpty {
error("""
Input to filter is empty.
"""
)
}
.set { fbcf_ch }
// Emits val(groupId),path(refpath),val(prefix),
// path(parentbai),path(parentbam), val(parentbamlist),
// path(vcf),
// path(bams), path(bai), val(dummy)
FilterBcfVcf(
fbcf_ch.combine(Channel.fromPath("${projectDir}/Rtools/malDrugR/filterBcfVcf.R"))
)
//----------------------GRIDSS filter-----------------------------------
input_ch
.map { row -> tuple(row[0], row[4]) }
.unique()
.combine(parent_ch.map { row -> tuple(row[2], row[0]) }, by: 1)
.map { row -> tuple(row[1], row[2]) }
.join(sfilter_ch.vcf)
.set { gf_ch }
// Emits val(groupId),val(parentlist), path(vcf), val(dummy)
FilterGridssSV(
gf_ch.combine(Channel.fromPath("${projectDir}/Rtools/malDrugR/gridss_majorityfilt.R")).combine(Channel.fromPath("${projectDir}/Rtools/gridss_assets/"))
""")
}
.set{fbcf_ch} // Emits val(groupId),path(refpath),val(prefix),
// path(parentbai),path(parentbam), val(parentbamlist),
// path(vcf),
// path(bams), path(bai), val(dummy)
FilterBcfVcf(fbcf_ch
.combine(Channel.fromPath("${projectDir}/Rtools/malDrugR/filterBcfVcf.R"))
)
//----------------------GRIDSS filter-----------------------------------
input_ch.map{row -> tuple(row[0],row[4])}
.unique()
.combine(parent_ch.map{row->tuple(row[2],row[0])},by:1)
.map{row->tuple(row[1],row[2])}
.join(sfilter_ch.vcf)
.set{gf_ch} // Emits val(groupId),val(parentlist), path(vcf), val(dummy)
FilterGridssSV(gf_ch
.combine(Channel.fromPath("${projectDir}/Rtools/malDrugR/gridss_majorityfilt.R"))
.combine(Channel.fromPath("${projectDir}/Rtools/gridss_assets/"))
)
//----------------------Plot-----------------------------------------
// Input Channel Emits val(groupId),val(parentId),path(rds)
input_ch
.map { row -> tuple(row[0], row[4]) }
.unique()
.join(copynum_ch.groupTuple().map { row -> tuple(row[0], row[1].flatten()) }, by: 0)
.set { plot_ch }
//----------------------Plot CopyNums-----------------------------------------
// Input Channel Emits val(groupId),val(parentId),path(rds)
input_ch.map{row -> tuple(row[0],row[4])}
.unique()
.join(copynum_ch.groupTuple().map{row-> tuple(row[0], row[1].flatten())},by:0).set{plot_ch}

RPlotFull(plot_ch.combine(Channel.fromPath("${projectDir}/Rtools/malDrugR/copynumPlotsFull.R")))
RPlotROI(plot_ch.combine(Channel.fromPath("${projectDir}/Rtools/malDrugR/copynumPlotsROI.R")))
}
RPlotFull(plot_ch.combine(Channel.fromPath("${projectDir}/Rtools/malDrugR/copynumPlotsFull.R")))
RPlotROI(plot_ch.combine(Channel.fromPath("${projectDir}/Rtools/malDrugR/copynumPlotsROI.R")))

//----------------------List genes in a region of interest, if specified------
// Input Channel emits path(refpath), val(ref_prefix)
input_ch.map{row -> tuple(row[6],row[3])}.unique().set{genesROI_input_ch}

if (params.genesRegion != "")
genesROI(genesROI_input_ch.combine(Channel.fromPath("${projectDir}/Rtools/malDrugR/genesROI.R")))
//-------------------------------------------------------------------
}
23 changes: 21 additions & 2 deletions modules/stvariant.nf
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,6 @@ process RCopyNum {

tuple val(groupId),val(parentId),path(refpath),val(bsref),
path(mergedparent),
val(bamfilenames),
path(bams),
val(bins),path(script)

Expand All @@ -118,7 +117,7 @@ process RCopyNum {
Rscript --vanilla ${script} \
--samplegroup ${groupId} \
--parentId ${parentId} \
--bams "${bamfilenames}" \
--bams "${bams}" \
--bin_in_kbases ${bins} \
--refDir ${refpath}\
--bsref ${bsref}
Expand Down Expand Up @@ -227,5 +226,25 @@ process RPlotROI {
--endROI ${params.end_CNroi}
"""
}
process genesROI {
label 'Rfilter'
tag "${groupId}"

publishDir "${params.outdir}/variants/copynumPlots", mode: 'copy'

input:
tuple path(refpath),val(prefix),path(script)

output:
path("*.csv")

script:
"""
Rscript --vanilla ${projectDir}/Rtools/malDrugR/genesROI.R \
--refpath ${refpath} \
--refstrain ${prefix} \
--region "${params.genesRegion}"
"""
}


Loading