Skip to content

Commit 1251727

Browse files
authored
Merge pull request #46 from icgc-argo-workflows/seqz-preprocess@0.3.0
[release]
2 parents cebe029 + f22fb1b commit 1251727

8 files changed

Lines changed: 77 additions & 18 deletions

seqz-preprocess/main.nf

Lines changed: 50 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@
2929
/* this block is auto-generated based on info from pkg.json where */
3030
/* changes can be made if needed, do NOT modify this block manually */
3131
nextflow.enable.dsl = 2
32-
version = '0.2.5.1'
32+
version = '0.3.0'
3333

3434
container = [
3535
'ghcr.io': 'ghcr.io/icgc-argo-workflows/icgc-argo-sv-copy-number.seqz-preprocess'
@@ -45,15 +45,18 @@ params.container = ""
4545

4646
params.cpus = 1
4747
params.mem = 1 // GB
48-
params.publish_dir = "output_dir/" // set to empty string will disable publishDir
48+
params.publish_dir = "" // set to empty string will disable publishDir
4949

5050

5151
// tool specific parmas go here, add / change as needed
5252
params.tumor_bam = ""
5353
params.normal_bam = ""
5454
params.fasta = ""
5555
params.gcwiggle = "${baseDir}/resources/hg38.gc50Base.wig.gz"
56-
params.output_pattern = "*bin50.seqz.gz" // output file name pattern
56+
params.chromosomes = ["chr1", "chr2", "chr3", "chr4", "chr5", "chr6", "chr7", "chr8", "chr9", "chr10", "chr11", "chr12", "chr13", "chr14", "chr15", "chr16", "chr17", "chr18", "chr19", "chr20", "chr21", "chr22", "chrX", "chrY"]
57+
params.output_pattern = "*.seqz.gz" // output file name pattern
58+
59+
include { getSecondaryFiles } from './wfpr_modules/github.com/icgc-argo-workflows/data-processing-utility-tools/helper-functions@1.0.1.1/main.nf'
5760

5861
process seqzPreprocess {
5962
container "${params.container ?: container[params.container_registry ?: default_container_registry]}:${params.container_version ?: version}"
@@ -64,20 +67,49 @@ process seqzPreprocess {
6467

6568
input: // input, make update as needed
6669
path tumor_bam
70+
path tumour_bai
6771
path normal_bam
72+
path normal_bai
6873
path fasta
74+
path fasta_fai
6975
path gcwiggle
76+
each chrom
7077

7178
output: // output, make update as needed
72-
path "${params.output_pattern}", emit: seqz
79+
path "${params.output_pattern}", emit: seqzperchromosome
80+
81+
shell:
82+
// add and initialize variables here as needed
83+
'''
84+
sequenza-utils bam2seqz --chromosome !{chrom} -n !{normal_bam} -t !{tumor_bam} --fasta !{fasta} -gc !{gcwiggle} -o "!{chrom}.seqz.gz"
85+
'''
86+
87+
88+
}
89+
90+
process seqzPreprocessMerge {
91+
container "${params.container ?: container[params.container_registry ?: default_container_registry]}:${params.container_version ?: version}"
92+
publishDir "${params.publish_dir}/${task.process.replaceAll(':', '_')}", mode: "copy", enabled: params.publish_dir
93+
94+
cpus params.cpus
95+
memory "${params.mem} GB"
96+
97+
input: // input, make update as needed
98+
file seqzperchromosome
99+
100+
output: // output, make update as needed
101+
path "sample_bin50.seqz.gz", emit: seqz
102+
103+
shell:
104+
// add and initialize variables here as needed
105+
seqzfiles = params.chromosomes.join('.seqz.gz ')
106+
'''
107+
zcat !{seqzfiles}.seqz.gz | awk '{if (NR!=1 && $1 != "chromosome") {print $0}}' | bgzip > sample.seqz.gz
108+
tabix -f -s 1 -b 2 -e 2 -S 1 sample.seqz.gz
109+
sequenza-utils seqz_binning --seqz sample.seqz.gz --window 50 -o sample_bin50.seqz.gz
110+
'''
73111

74-
script:
75-
// add and initialize variables here as needed
76112

77-
"""
78-
sequenza-utils bam2seqz --normal ${normal_bam} --tumor ${tumor_bam} --fasta ${fasta} -gc ${gcwiggle} --output sample.seqz.gz;
79-
sequenza-utils seqz_binning --seqz sample.seqz.gz --window 50 -o sample_bin50.seqz.gz
80-
"""
81113
}
82114

83115

@@ -86,8 +118,15 @@ process seqzPreprocess {
86118
workflow {
87119
seqzPreprocess(
88120
file(params.tumor_bam),
121+
Channel.fromPath(getSecondaryFiles(params.tumor_bam,['{b,cr}ai']), checkIfExists: true).collect(),
89122
file(params.normal_bam),
123+
Channel.fromPath(getSecondaryFiles(params.normal_bam,['{b,cr}ai']), checkIfExists: true).collect(),
90124
file(params.fasta),
91-
file(params.gcwiggle)
125+
Channel.fromPath(getSecondaryFiles(params.fasta,['fai']), checkIfExists: true).collect(),
126+
file(params.gcwiggle),
127+
params.chromosomes.flatten()
128+
)
129+
seqzPreprocessMerge(
130+
seqzPreprocess.out.seqzperchromosome.collect()
92131
)
93132
}

seqz-preprocess/pkg.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"name": "seqz-preprocess",
3-
"version": "0.2.5.1",
3+
"version": "0.3.0",
44
"description": "Sequenza preprocessing",
55
"main": "main.nf",
66
"deprecated": false,

seqz-preprocess/resources/readme

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1,2 @@
1-
"# File 'hg38.gc50Base.wig.gz' is >100mb. Please download it from ''https://object.cancercollaboratory.org:9080/swift/v1/genomics-public-data/sequenza_references/hg38.gc50Base.wig.gz"" and compare it to the md5sum in this folder."
1+
"File 'hg38.gc50Base.wig.gz' is >100mb. Please download it from 'https://object.cancercollaboratory.org:9080/swift/v1/genomics-public-data/sequenza_references/hg38.gc50Base.wig.gz' and compare
2+
it to the md5sum in this folder."

seqz-preprocess/tests/checker.nf

Lines changed: 23 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@
3434
/* this block is auto-generated based on info from pkg.json where */
3535
/* changes can be made if needed, do NOT modify this block manually */
3636
nextflow.enable.dsl = 2
37-
version = '0.2.5.1'
37+
version = '0.3.0'
3838

3939
container = [
4040
'ghcr.io': 'ghcr.io/icgc-argo-workflows/icgc-argo-sv-copy-number.seqz-preprocess'
@@ -52,10 +52,12 @@ params.tumor_bam = ""
5252
params.normal_bam = ""
5353
params.gcwiggle = ""
5454
params.fasta = ""
55+
params.chromosomes = ["chr11"]
5556
params.expected_output = ""
5657

58+
include { getSecondaryFiles } from '../wfpr_modules/github.com/icgc-argo-workflows/data-processing-utility-tools/helper-functions@1.0.1.1/main.nf'
5759
include { seqzPreprocess } from '../main'
58-
60+
include { seqzPreprocessMerge } from '../main'
5961

6062
process file_smart_diff {
6163
container "${params.container ?: container[params.container_registry ?: default_container_registry]}:${params.container_version ?: version}"
@@ -78,21 +80,33 @@ process file_smart_diff {
7880
workflow checker {
7981
take:
8082
tumor_bam
83+
tumor_bai
8184
normal_bam
82-
gcwiggle
85+
normal_bai
8386
fasta
87+
fasta_fai
88+
gcwiggle
89+
chrom
8490
expected_output
8591

8692
main:
8793
seqzPreprocess(
8894
tumor_bam,
95+
tumor_bai,
8996
normal_bam,
97+
normal_bai,
98+
fasta,
99+
fasta_fai,
90100
gcwiggle,
91-
fasta
101+
chrom
102+
)
103+
104+
seqzPreprocessMerge(
105+
seqzPreprocess.out.seqzperchromosome
92106
)
93107

94108
file_smart_diff(
95-
seqzPreprocess.out.seqz,
109+
seqzPreprocessMerge.out.seqz,
96110
expected_output
97111
)
98112
}
@@ -101,9 +115,13 @@ workflow checker {
101115
workflow {
102116
checker(
103117
file(params.tumor_bam),
118+
Channel.fromPath(getSecondaryFiles(params.tumor_bam,['{b,cr}ai']), checkIfExists: true).collect(),
104119
file(params.normal_bam),
120+
Channel.fromPath(getSecondaryFiles(params.normal_bam,['{b,cr}ai']), checkIfExists: true).collect(),
105121
file(params.fasta),
122+
Channel.fromPath(getSecondaryFiles(params.fasta,['fai']), checkIfExists: true).collect(),
106123
file(params.gcwiggle),
124+
params.chromosomes.flatten(),
107125
file(params.expected_output)
108126
)
109127
}
-130 Bytes
Binary file not shown.
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
chr11 537000 40 50 51

0 commit comments

Comments
 (0)