Skip to content

Commit 14eec91

Browse files
authored
Merge pull request #5 from ICGC-ARGO-Structural-Variation-CN-WG/seqz-preprocess@0.2.0
[release]
2 parents 490e9d2 + 2d36a28 commit 14eec91

19 files changed

Lines changed: 11086 additions & 0 deletions

seqz-preprocess/.dockerignore

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
.gitignore
2+
.nextflow*
3+
tests
4+
work
5+
outdir

seqz-preprocess/Dockerfile

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
FROM continuumio/miniconda3:4.8.2
2+
# newer version results in conflicts.
3+
4+
RUN apt-get update \
5+
&& apt-get install -y procps \
6+
7+
&& /opt/conda/bin/conda install --yes -c bioconda samtools=1.9 tabix \
8+
&& /opt/conda/bin/conda install --yes -c bioconda sequenza-utils=3.0.0
9+
10+
USER sequenza
11+
WORKDIR /home/sequenza
12+
13+
CMD ["/bin/bash"]

seqz-preprocess/main.nf

Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,93 @@
1+
#!/usr/bin/env nextflow
2+
3+
/*
4+
Copyright (c) 2021, ICGC ARGO
5+
6+
Permission is hereby granted, free of charge, to any person obtaining a copy
7+
of this software and associated documentation files (the "Software"), to deal
8+
in the Software without restriction, including without limitation the rights
9+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10+
copies of the Software, and to permit persons to whom the Software is
11+
furnished to do so, subject to the following conditions:
12+
13+
The above copyright notice and this permission notice shall be included in all
14+
copies or substantial portions of the Software.
15+
16+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22+
SOFTWARE.
23+
24+
Authors:
25+
Desiree Schnidrig
26+
*/
27+
28+
/********************************************************************/
29+
/* this block is auto-generated based on info from pkg.json where */
30+
/* changes can be made if needed, do NOT modify this block manually */
31+
nextflow.enable.dsl = 2
32+
version = '0.2.0' // package version
33+
34+
container = [
35+
'ghcr.io': 'ghcr.io/icgc-argo-structural-variation-cn-wg/wfpm-demo.seqz-preprocess'
36+
]
37+
default_container_registry = 'ghcr.io'
38+
/********************************************************************/
39+
40+
41+
// universal params go here
42+
params.container_registry = ""
43+
params.container_version = ""
44+
params.container = ""
45+
46+
params.cpus = 1
47+
params.mem = 1 // GB
48+
params.publish_dir = "output_dir/" // set to empty string will disable publishDir
49+
50+
51+
// tool specific parmas go here, add / change as needed
52+
params.tumor_bam = ""
53+
params.normal_bam = ""
54+
params.fasta = ""
55+
params.gcwiggle = "${baseDir}/resources/hg38.gc50Base.wig.gz"
56+
params.output_pattern = "*bin50.seqz.gz" // output file name pattern
57+
58+
process seqzPreprocess {
59+
container "${params.container ?: container[params.container_registry ?: default_container_registry]}:${params.container_version ?: version}"
60+
publishDir "${params.publish_dir}/${task.process.replaceAll(':', '_')}", mode: "copy", enabled: params.publish_dir
61+
62+
cpus params.cpus
63+
memory "${params.mem} GB"
64+
65+
input: // input, make update as needed
66+
path tumor_bam
67+
path normal_bam
68+
path fasta
69+
path gcwiggle
70+
71+
output: // output, make update as needed
72+
path "${params.output_pattern}", emit: seqz
73+
74+
script:
75+
// add and initialize variables here as needed
76+
77+
"""
78+
sequenza-utils bam2seqz --normal ${normal_bam} --tumor ${tumor_bam} --fasta ${fasta} -gc ${gcwiggle} --output sample.seqz.gz;
79+
sequenza-utils seqz_binning --seqz sample.seqz.gz --window 50 -o sample_bin50.seqz.gz
80+
"""
81+
}
82+
83+
84+
// this provides an entry point for this main script, so it can be run directly without clone the repo
85+
// using this command: nextflow run <git_acc>/<repo>/<pkg_name>/<main_script>.nf -r <pkg_name>.v<pkg_version> --params-file xxx
86+
workflow {
87+
seqzPreprocess(
88+
file(params.tumor_bam),
89+
file(params.normal_bam),
90+
file(params.fasta),
91+
file(params.gcwiggle)
92+
)
93+
}

seqz-preprocess/main.py

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
#!/usr/bin/env python3
2+
# -*- coding: utf-8 -*-
3+
4+
"""
5+
Copyright (c) 2021, ICGC ARGO
6+
7+
Permission is hereby granted, free of charge, to any person obtaining a copy
8+
of this software and associated documentation files (the "Software"), to deal
9+
in the Software without restriction, including without limitation the rights
10+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11+
copies of the Software, and to permit persons to whom the Software is
12+
furnished to do so, subject to the following conditions:
13+
14+
The above copyright notice and this permission notice shall be included in all
15+
copies or substantial portions of the Software.
16+
17+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23+
SOFTWARE.
24+
25+
Authors:
26+
lDesiree
27+
"""
28+
29+
import os
30+
import sys
31+
import argparse
32+
import subprocess
33+
34+
35+
def main():
36+
"""
37+
Python implementation of tool: seqz-preprocess
38+
39+
This is auto-generated Python code, please update as needed!
40+
"""
41+
42+
parser = argparse.ArgumentParser(description='Tool: seqz-preprocess')
43+
parser.add_argument('-i', '--input-file', dest='input_file', type=str,
44+
help='Input file', required=True)
45+
parser.add_argument('-o', '--output-dir', dest='output_dir', type=str,
46+
help='Output directory', required=True)
47+
args = parser.parse_args()
48+
49+
if not os.path.isfile(args.input_file):
50+
sys.exit('Error: specified input file %s does not exist or is not accessible!' % args.input_file)
51+
52+
if not os.path.isdir(args.output_dir):
53+
sys.exit('Error: specified output dir %s does not exist or is not accessible!' % args.output_dir)
54+
55+
subprocess.run(f"fastqc -o {args.output_dir} {args.input_file}", shell=True, check=True)
56+
57+
58+
if __name__ == "__main__":
59+
main()

seqz-preprocess/nextflow.config

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
docker {
2+
enabled = true
3+
runOptions = '-u \$(id -u):\$(id -g)'
4+
}

seqz-preprocess/pkg.json

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
{
2+
"name": "seqz-preprocess",
3+
"version": "0.2.0",
4+
"description": "Sequenza preprocessing",
5+
"main": "main.nf",
6+
"deprecated": false,
7+
"keywords": [
8+
"bioinformatics",
9+
"seq",
10+
"qc metrics"
11+
],
12+
"repository": {
13+
"type": "git",
14+
"url": "https://github.com/icgc-argo-structural-variation-cn-wg/wfpm-demo.git"
15+
},
16+
"container": {
17+
"registries": [
18+
{
19+
"registry": "ghcr.io",
20+
"type": "docker",
21+
"org": "icgc-argo-structural-variation-cn-wg",
22+
"default": true
23+
}
24+
]
25+
},
26+
"dependencies": [],
27+
"devDependencies": [],
28+
"contributors": [
29+
{
30+
"name": "lDesiree",
31+
"email": "desiree.schnidrig@gmail.com"
32+
}
33+
],
34+
"license": "MIT",
35+
"bugReport": "https://github.com/icgc-argo-structural-variation-cn-wg/wfpm-demo/issues",
36+
"homepage": "https://github.com/icgc-argo-structural-variation-cn-wg/wfpm-demo#readme"
37+
}
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
e940cff9bff4e75960127201665e838f hg38.gc50Base.wig.gz

seqz-preprocess/resources/readme

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
"# File 'hg38.gc50Base.wig.gz' is >100mb. Please download it from ''https://object.cancercollaboratory.org:9080/swift/v1/genomics-public-data/sequenza_references/hg38.gc50Base.wig.gz"" and compare it to the md5sum in this folder."

seqz-preprocess/tests/checker.nf

Lines changed: 109 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,109 @@
1+
#!/usr/bin/env nextflow
2+
3+
/*
4+
Copyright (c) 2021, ICGC ARGO
5+
6+
Permission is hereby granted, free of charge, to any person obtaining a copy
7+
of this software and associated documentation files (the "Software"), to deal
8+
in the Software without restriction, including without limitation the rights
9+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10+
copies of the Software, and to permit persons to whom the Software is
11+
furnished to do so, subject to the following conditions:
12+
13+
The above copyright notice and this permission notice shall be included in all
14+
copies or substantial portions of the Software.
15+
16+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22+
SOFTWARE.
23+
24+
Authors:
25+
Desiree Schnidrig
26+
*/
27+
28+
/*
29+
This is an auto-generated checker workflow to test the generated main template workflow, it's
30+
meant to illustrate how testing works. Please update to suit your own needs.
31+
*/
32+
33+
/********************************************************************/
34+
/* this block is auto-generated based on info from pkg.json where */
35+
/* changes can be made if needed, do NOT modify this block manually */
36+
nextflow.enable.dsl = 2
37+
version = '0.2.0' // package version
38+
39+
container = [
40+
'ghcr.io': 'ghcr.io/icgc-argo-structural-variation-cn-wg/wfpm-demo.seqz-preprocess'
41+
]
42+
default_container_registry = 'ghcr.io'
43+
/********************************************************************/
44+
45+
// universal params
46+
params.container_registry = ""
47+
params.container_version = ""
48+
params.container = ""
49+
50+
// tool specific parmas go here, add / change as needed
51+
params.tumor_bam = ""
52+
params.normal_bam = ""
53+
params.gcwiggle = ""
54+
params.fasta = ""
55+
params.expected_output = ""
56+
57+
include { seqzPreprocess } from '../main'
58+
59+
60+
process file_smart_diff {
61+
container "${params.container ?: container[params.container_registry ?: default_container_registry]}:${params.container_version ?: version}"
62+
63+
input:
64+
path seqz
65+
path expected_file
66+
67+
output:
68+
stdout()
69+
70+
script:
71+
"""
72+
zdiff ${seqz} ${expected_file} \
73+
&& ( echo "Test PASSED" && exit 0 ) || ( echo "Test FAILED, output file mismatch." && exit 1 )
74+
"""
75+
}
76+
77+
78+
workflow checker {
79+
take:
80+
tumor_bam
81+
normal_bam
82+
gcwiggle
83+
fasta
84+
expected_output
85+
86+
main:
87+
seqzPreprocess(
88+
tumor_bam,
89+
normal_bam,
90+
gcwiggle,
91+
fasta
92+
)
93+
94+
file_smart_diff(
95+
seqzPreprocess.out.seqz,
96+
expected_output
97+
)
98+
}
99+
100+
101+
workflow {
102+
checker(
103+
file(params.tumor_bam),
104+
file(params.normal_bam),
105+
file(params.fasta),
106+
file(params.gcwiggle),
107+
file(params.expected_output)
108+
)
109+
}
4.63 KB
Binary file not shown.

0 commit comments

Comments
 (0)