Skip to content

Commit 1adfe2b

Browse files
authored
Merge pull request #1 from icgc-argo-workflows/strelka2@0.1.0
Strelka2@0.1.0
2 parents e6715a7 + dea5b0c commit 1adfe2b

28 files changed

Lines changed: 677 additions & 0 deletions

strelka2/.dockerignore

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
.gitignore
2+
.nextflow*
3+
tests
4+
work
5+
outdir

strelka2/Dockerfile

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
FROM ubuntu:18.04
2+
3+
LABEL org.opencontainers.image.source https://github.com/icgc-argo-workflows/argo-somatic-variant-calling
4+
5+
ARG PYTHON_VERSION=2.7.5
6+
7+
# Install dependencies
8+
# PIP - openssl version > 1.1 may be an issue (try older ubuntu images)
9+
RUN apt-get update \
10+
&& apt-get install -y wget gcc make openssl libffi-dev libgdbm-dev libsqlite3-dev libssl-dev zlib1g-dev \
11+
&& apt-get clean
12+
13+
WORKDIR /tmp/
14+
15+
# Build Python from source
16+
RUN wget https://www.python.org/ftp/python/$PYTHON_VERSION/Python-$PYTHON_VERSION.tgz \
17+
&& tar --extract -f Python-$PYTHON_VERSION.tgz \
18+
&& cd ./Python-$PYTHON_VERSION/ \
19+
&& ./configure --enable-optimizations --prefix=/usr/local \
20+
&& make && make install \
21+
&& cd ../ \
22+
&& rm -r ./Python-$PYTHON_VERSION*
23+
24+
RUN apt-get install -y cmake g++ libboost-all-dev
25+
26+
ARG STRELKA_VERSION=2.9.10
27+
28+
RUN wget https://github.com/Illumina/strelka/releases/download/v${STRELKA_VERSION}/strelka-${STRELKA_VERSION}.release_src.tar.bz2 \
29+
&& tar -xjf strelka-${STRELKA_VERSION}.release_src.tar.bz2 \
30+
&& mkdir build && cd build \
31+
&& ../strelka-${STRELKA_VERSION}.release_src/configure --jobs=4 --prefix=/opt/strelka2 \
32+
&& make -j4 install \
33+
&& rm -fr /tmp/*
34+
35+
36+
ENV PATH="/tools:/opt/strelka2/bin:${PATH}"
37+
38+
COPY *.py /tools/
39+
40+
RUN groupadd -g 1000 ubuntu && \
41+
useradd -l -u 1000 -g ubuntu ubuntu && \
42+
install -d -m 0755 -o ubuntu -g ubuntu /home/ubuntu
43+
44+
USER ubuntu
45+
46+
WORKDIR /home/ubuntu
47+
48+
ENTRYPOINT ["/usr/bin/env"]
49+
50+
CMD ["/bin/bash"]

strelka2/main.nf

Lines changed: 117 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,117 @@
1+
#!/usr/bin/env nextflow
2+
3+
/*
4+
Copyright (c) 2021, ICGC ARGO
5+
6+
Permission is hereby granted, free of charge, to any person obtaining a copy
7+
of this software and associated documentation files (the "Software"), to deal
8+
in the Software without restriction, including without limitation the rights
9+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10+
copies of the Software, and to permit persons to whom the Software is
11+
furnished to do so, subject to the following conditions:
12+
13+
The above copyright notice and this permission notice shall be included in all
14+
copies or substantial portions of the Software.
15+
16+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22+
SOFTWARE.
23+
24+
Authors:
25+
Junjun Zhang
26+
*/
27+
28+
/********************************************************************/
29+
/* this block is auto-generated based on info from pkg.json where */
30+
/* changes can be made if needed, do NOT modify this block manually */
31+
nextflow.enable.dsl = 2
32+
version = '0.1.0' // package version
33+
34+
container = [
35+
'ghcr.io': 'ghcr.io/icgc-argo-workflows/argo-somatic-variant-calling.strelka2'
36+
]
37+
default_container_registry = 'ghcr.io'
38+
/********************************************************************/
39+
40+
41+
// universal params go here
42+
params.container_registry = ""
43+
params.container_version = ""
44+
params.container = ""
45+
46+
params.cpus = 1
47+
params.mem = 1 // GB
48+
params.publish_dir = "" // set to empty string will disable publishDir
49+
50+
params.tumourBam = ""
51+
params.normalBam = ""
52+
params.referenceFa = ""
53+
params.isExome = false
54+
55+
include { getSecondaryFiles as getSec } from './wfpr_modules/github.com/icgc-argo-workflows/data-processing-utility-tools/helper-functions@1.0.2/main'
56+
57+
process strelka2 {
58+
container "${params.container ?: container[params.container_registry ?: default_container_registry]}:${params.container_version ?: version}"
59+
publishDir "${params.publish_dir}/${task.process.replaceAll(':', '_')}", mode: "copy", enabled: params.publish_dir
60+
61+
cpus params.cpus
62+
memory "${params.mem} GB"
63+
64+
input:
65+
path tumourBam
66+
path tumourBai
67+
path normalBam
68+
path normalBai
69+
path referenceFa
70+
path referenceFai
71+
val isExome
72+
73+
74+
output:
75+
path "output_dir/results/variants/somatic.snvs.vcf.gz", emit: somaticSnvVcf
76+
path "output_dir/results/variants/somatic.snvs.vcf.gz.tbi", emit: somaticSnvVcfTbi
77+
path "output_dir/results/variants/somatic.indels.vcf.gz", emit: somaticIndelVcf
78+
path "output_dir/results/variants/somatic.indels.vcf.gz.tbi", emit: somaticIndelVcfTbi
79+
path "output_dir/results/stats/runStats.tsv", emit: runStats
80+
81+
script:
82+
arg_exome = isExome == "true" ? "--exome" : ""
83+
84+
"""
85+
86+
mkdir -p output_dir
87+
88+
configureStrelkaSomaticWorkflow.py \
89+
--tumorBam=${tumourBam} \
90+
--normalBam=${normalBam} \
91+
--referenceFasta=${referenceFa} \
92+
--callMemMb=${Math.round(params.mem * 1000 / params.cpus)} \
93+
--runDir=./output_dir ${arg_exome}
94+
95+
./output_dir/runWorkflow.py -m local -j ${params.cpus}
96+
97+
"""
98+
}
99+
100+
101+
// this provides an entry point for this main script, so it can be run directly without clone the repo
102+
// using this command: nextflow run <git_acc>/<repo>/<pkg_name>/<main_script>.nf -r <pkg_name>.v<pkg_version> --params-file xxx
103+
workflow {
104+
tumourIdx = params.tumourBam.endsWith('.bam') ? params.tumourBam + '.bai' : params.tumourBam + '.crai'
105+
normalIdx = params.normalBam.endsWith('.bam') ? params.normalBam + '.bai' : params.normalBam + '.crai'
106+
referenceIdx = params.referenceFa.endsWith('.fa.gz') ? getSec(params.referenceFa, ['fai', 'gzi']) : getSec(params.referenceFa, ['fai'])
107+
108+
strelka2(
109+
file(params.tumourBam),
110+
file(tumourIdx),
111+
file(params.normalBam),
112+
file(normalIdx),
113+
file(params.referenceFa),
114+
Channel.fromPath(referenceIdx, checkIfExists: true).collect(),
115+
params.isExome
116+
)
117+
}

strelka2/nextflow.config

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
docker {
2+
enabled = true
3+
runOptions = '-u \$(id -u):\$(id -g)'
4+
}

strelka2/pkg.json

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
{
2+
"name": "strelka2",
3+
"version": "0.1.0",
4+
"description": "Strelka2 Small Variant Caller",
5+
"main": "main.nf",
6+
"deprecated": false,
7+
"keywords": [
8+
"bioinformatics",
9+
"genomics",
10+
"variant caller",
11+
"ngs"
12+
],
13+
"repository": {
14+
"type": "git",
15+
"url": "https://github.com/icgc-argo-workflows/argo-somatic-variant-calling.git"
16+
},
17+
"container": {
18+
"registries": [
19+
{
20+
"registry": "ghcr.io",
21+
"type": "docker",
22+
"org": "icgc-argo-workflows",
23+
"default": true
24+
}
25+
]
26+
},
27+
"dependencies": [
28+
"github.com/icgc-argo-workflows/data-processing-utility-tools/helper-functions@1.0.2"
29+
],
30+
"devDependencies": [],
31+
"contributors": [
32+
{
33+
"name": "Junjun Zhang",
34+
"email": "junjun.ca@gmail.com"
35+
}
36+
],
37+
"license": "MIT",
38+
"bugReport": "https://github.com/icgc-argo-workflows/argo-somatic-variant-calling/issues",
39+
"homepage": "https://github.com/icgc-argo-workflows/argo-somatic-variant-calling#readme"
40+
}

strelka2/tests/checker.nf

Lines changed: 147 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,147 @@
1+
#!/usr/bin/env nextflow
2+
3+
/*
4+
Copyright (c) 2021, ICGC ARGO
5+
6+
Permission is hereby granted, free of charge, to any person obtaining a copy
7+
of this software and associated documentation files (the "Software"), to deal
8+
in the Software without restriction, including without limitation the rights
9+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10+
copies of the Software, and to permit persons to whom the Software is
11+
furnished to do so, subject to the following conditions:
12+
13+
The above copyright notice and this permission notice shall be included in all
14+
copies or substantial portions of the Software.
15+
16+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22+
SOFTWARE.
23+
24+
Authors:
25+
Junjun Zhang
26+
*/
27+
28+
/*
29+
This is an auto-generated checker workflow to test the generated main template workflow, it's
30+
meant to illustrate how testing works. Please update to suit your own needs.
31+
*/
32+
33+
/********************************************************************/
34+
/* this block is auto-generated based on info from pkg.json where */
35+
/* changes can be made if needed, do NOT modify this block manually */
36+
nextflow.enable.dsl = 2
37+
version = '0.1.0' // package version
38+
39+
container = [
40+
'ghcr.io': 'ghcr.io/icgc-argo-workflows/argo-somatic-variant-calling.strelka2'
41+
]
42+
default_container_registry = 'ghcr.io'
43+
/********************************************************************/
44+
45+
// universal params
46+
params.container_registry = ""
47+
params.container_version = ""
48+
params.container = ""
49+
50+
// tool specific parmas go here, add / change as needed
51+
params.tumourBam = ""
52+
params.normalBam = ""
53+
params.referenceFa = ""
54+
params.isExome = true
55+
56+
params.expected_snv_output = ""
57+
params.expected_indel_output = ""
58+
59+
include { strelka2 } from '../main'
60+
include { getSecondaryFiles as getSec } from './wfpr_modules/github.com/icgc-argo-workflows/data-processing-utility-tools/helper-functions@1.0.2/main'
61+
62+
63+
process file_smart_diff {
64+
container "${params.container ?: container[params.container_registry ?: default_container_registry]}:${params.container_version ?: version}"
65+
66+
input:
67+
path output_somaticSnvVcf
68+
path expected_snv_output
69+
path output_somaticIndelVcf
70+
path expected_indel_output
71+
72+
output:
73+
stdout()
74+
75+
script:
76+
"""
77+
gunzip -c ${output_somaticSnvVcf} \
78+
| grep -v '^#' > normalized_output_somaticSnvVcf
79+
80+
gunzip -c ${expected_snv_output} \
81+
| grep -v '^#' > normalized_expected_snv_output
82+
83+
diff normalized_output_somaticSnvVcf normalized_expected_snv_output \
84+
&& ( echo -n "SNV calls MATCH. " ) || ( echo "Test FAILED, output SNV calls mismatch." && exit 1 )
85+
86+
gunzip -c ${output_somaticIndelVcf} \
87+
| grep -v '^#' > normalized_output_somaticIndelVcf
88+
89+
gunzip -c ${expected_indel_output} \
90+
| grep -v '^#' > normalized_expected_indel_output
91+
92+
diff normalized_output_somaticIndelVcf normalized_expected_indel_output \
93+
&& ( echo "Indel calls MATCH. Test PASSED" && exit 0 ) || ( echo "Test FAILED, output Indel calls mismatch." && exit 1 )
94+
95+
"""
96+
}
97+
98+
99+
workflow checker {
100+
take:
101+
tumourBam
102+
tumourBai
103+
normalBam
104+
normalBai
105+
referenceFa
106+
referenceFai
107+
isExome
108+
expected_snv_output
109+
expected_indel_output
110+
111+
main:
112+
strelka2(
113+
tumourBam,
114+
tumourBai,
115+
normalBam,
116+
normalBai,
117+
referenceFa,
118+
referenceFai,
119+
isExome
120+
)
121+
122+
file_smart_diff(
123+
strelka2.out.somaticSnvVcf,
124+
expected_snv_output,
125+
strelka2.out.somaticIndelVcf,
126+
expected_indel_output
127+
)
128+
}
129+
130+
131+
workflow {
132+
tumourIdx = params.tumourBam.endsWith('.bam') ? params.tumourBam + '.bai' : params.tumourBam + '.crai'
133+
normalIdx = params.normalBam.endsWith('.bam') ? params.normalBam + '.bai' : params.normalBam + '.crai'
134+
referenceIdx = params.referenceFa.endsWith('.fa.gz') ? getSec(params.referenceFa, ['fai', 'gzi']) : getSec(params.referenceFa, ['fai'])
135+
136+
checker(
137+
file(params.tumourBam),
138+
file(tumourIdx),
139+
file(params.normalBam),
140+
file(normalIdx),
141+
file(params.referenceFa),
142+
Channel.fromPath(referenceIdx, checkIfExists: true).collect(),
143+
params.isExome,
144+
file(params.expected_snv_output),
145+
file(params.expected_indel_output)
146+
)
147+
}
1.61 KB
Binary file not shown.
2.09 KB
Binary file not shown.
54.2 KB
Binary file not shown.
96 Bytes
Binary file not shown.

0 commit comments

Comments
 (0)