1+ #! /usr/bin/env nextflow
2+
3+ /*
4+ Copyright (c) 2021, ICGC-ARGO-Structural-Variation-CN-WG
5+
6+ Permission is hereby granted, free of charge, to any person obtaining a copy
7+ of this software and associated documentation files (the "Software"), to deal
8+ in the Software without restriction, including without limitation the rights
9+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10+ copies of the Software, and to permit persons to whom the Software is
11+ furnished to do so, subject to the following conditions:
12+
13+ The above copyright notice and this permission notice shall be included in all
14+ copies or substantial portions of the Software.
15+
16+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22+ SOFTWARE.
23+
24+ Authors:
25+ Alvaro Ferriz
26+ */
27+
28+ /* *******************************************************************/
29+ /* this block is auto-generated based on info from pkg.json where */
30+ /* changes can be made if needed, do NOT modify this block manually */
31+ nextflow. enable. dsl = 2
32+ version = ' 0.2.0' // package version
33+
34+ container = [
35+ ' ghcr.io' : ' ghcr.io/icgc-argo-structural-variation-cn-wg/icgc-argo-sv-copy-number.manta'
36+ ]
37+ default_container_registry = ' ghcr.io'
38+ /* *******************************************************************/
39+
40+
41+ // universal params go here
42+ params. container_registry = " "
43+ params. container_version = " "
44+ params. container = " "
45+
46+ params. cpus = 1
47+ params. mem = 4 // GB
48+ params. publish_dir = " " // set to empty string will disable publishDir
49+
50+ params. help = null
51+
52+ // tool specific parmas go here, add / change as needed
53+ params. normalBam = " "
54+ params. tumorBam = " "
55+ params. referenceFasta = " "
56+
57+ // These are not neccesary, the function will take them automatically using the name of the main file and adding the proper extension
58+ // They remain here in case there is a need to input them manually
59+ params. normalBai = " "
60+ params. tumorBai = " "
61+ params. referenceFai = " "
62+ params. runDir = " "
63+
64+
65+
66+ include { getSecondaryFiles } from ' ./wfpr_modules/github.com/icgc-argo-workflows/data-processing-utility-tools/helper-functions@1.0.1.1/main.nf'
67+
68+
69+ def helpMessage() {
70+ log. info"""
71+
72+ USAGE
73+
74+ Usage: configManta.py [options]
75+
76+ Version: 1.6.0
77+
78+ This script configures the Manta SV analysis pipeline.
79+ You must specify a BAM or CRAM file for at least one sample.
80+
81+ Configuration will produce a workflow run script which
82+ can execute the workflow on a single node or through
83+ sge and resume any interrupted execution.
84+
85+ Options:
86+ --version show program's version number and exit
87+ -h, --help show this help message and exit
88+ --config=FILE provide a configuration file to override defaults in
89+ global config file (/gpfs/scratch/bsc05/bsc05017/MOUNT
90+ /apps/manta-1.6.0.centos6_x86_64/bin/configManta.py.in
91+ i)
92+ --allHelp show all extended/hidden options
93+
94+ Workflow options:
95+ --bam=FILE, --normalBam=FILE
96+ Normal sample BAM or CRAM file. May be specified more
97+ than once, multiple inputs will be treated as each BAM
98+ file representing a different sample. [optional] (no
99+ default)
100+ --tumorBam=FILE, --tumourBam=FILE
101+ Tumor sample BAM or CRAM file. Only up to one tumor
102+ bam file accepted. [optional] (no default)
103+ --exome Set options for WES input: turn off depth filters
104+ --rna Set options for RNA-Seq input. Must specify exactly
105+ one bam input file
106+ --unstrandedRNA Set if RNA-Seq input is unstranded: Allows splice-
107+ junctions on either strand
108+ --referenceFasta=FILE
109+ samtools-indexed reference fasta file [required]
110+ --runDir=DIR Name of directory to be created where all workflow
111+ scripts and output will be written. Each analysis
112+ requires a separate directory. (default:
113+ MantaWorkflow)
114+ --callRegions=FILE Optionally provide a bgzip-compressed/tabix-indexed
115+ BED file containing the set of regions to call. No VCF
116+ output will be provided outside of these regions. The
117+ full genome will still be used to estimate statistics
118+ from the input (such as expected fragment size
119+ distribution). Only one BED file may be specified.
120+ (default: call the entire genome)
121+
122+ Extended options:
123+ These options are either unlikely to be reset after initial site
124+ configuration or only of interest for workflow development/debugging.
125+ They will not be printed here if a default exists unless --allHelp is
126+ specified
127+
128+ --existingAlignStatsFile=FILE
129+ Pre-calculated alignment statistics file. Skips
130+ alignment stats calculation.
131+ --useExistingChromDepths
132+ Use pre-calculated chromosome depths.
133+ --retainTempFiles Keep all temporary files (for workflow debugging)
134+ --generateEvidenceBam
135+ Generate a bam of supporting reads for all SVs
136+ --outputContig Output assembled contig sequences in VCF file
137+ --scanSizeMb=INT Maximum sequence region size (in megabases) scanned by
138+ each task during SV Locus graph generation. (default:
139+ 12)
140+ --region=REGION Limit the analysis to a region of the genome for
141+ debugging purposes. If this argument is provided
142+ multiple times all specified regions will be analyzed
143+ together. All regions must be non-overlapping to get a
144+ meaningful result. Examples: '--region chr20' (whole
145+ chromosome), '--region chr2:100-2000 --region
146+ chr3:2500-3000' (two regions)'. If this option is
147+ specified (one or more times) together with the
148+ --callRegions BED file, then all region arguments will
149+ be intersected with the callRegions BED track.
150+ --callMemMb=INT Set default task memory requirement (in megabytes) for
151+ common tasks. This may benefit an analysis of unusual
152+ depth, chimera rate, etc.. 'Common' tasks refers to
153+ most compute intensive scatter-phase tasks of graph
154+ creation and candidate generation.
155+ """ . stripIndent()
156+ }
157+
158+ if (params. help) exit 0 , helpMessage()
159+
160+
161+
162+ process manta {
163+ container " ${ params.container ?: container[params.container_registry ?: default_container_registry]} :${ params.container_version ?: version} "
164+ publishDir " ${ params.publish_dir} /${ task.process.replaceAll(':', '_')} " , mode: " copy" , enabled: params. publish_dir
165+
166+ cpus params. cpus
167+ memory " ${ params.mem} GB"
168+
169+ input: // input, make update as needed
170+ path normalBam
171+ path tumorBam
172+ path referenceFasta
173+ path normalBai
174+ path tumorBai
175+ path referenceFai
176+
177+ output: // output, make update as needed
178+ path " output_dir" , emit: output_file
179+
180+ script:
181+ // add and initialize variables here as needed
182+
183+ """
184+
185+ mkdir -p output_dir
186+
187+ echo "RUNNING VARIANT CALLER"
188+
189+ configManta.py \
190+ --normalBam ${ normalBam} \
191+ --tumorBam ${ tumorBam} \
192+ --referenceFasta ${ referenceFasta} \
193+ --runDir output_dir
194+
195+ output_dir/runWorkflow.py --memGb ${ params.mem}
196+
197+ """
198+
199+
200+ }
201+
202+
203+
204+ // this provides an entry point for this main script, so it can be run directly without clone the repo
205+ // using this command: nextflow run <git_acc>/<repo>/<pkg_name>/<main_script>.nf -r <pkg_name>.v<pkg_version> --params-file xxx
206+ workflow {
207+ manta(
208+ file(params. normalBam),
209+ file(params. tumorBam),
210+ file(params. referenceFasta),
211+ Channel . fromPath(getSecondaryFiles(params. normalBam,[' bai' ]), checkIfExists : true ). collect(),
212+ Channel . fromPath(getSecondaryFiles(params. tumorBam,[' bai' ]), checkIfExists : true ). collect(),
213+ Channel . fromPath(getSecondaryFiles(params. referenceFasta,[' fai' ]), checkIfExists : true ). collect()
214+ )
215+ }
0 commit comments