Skip to content

Commit 8b6c7fa

Browse files
Add ability to explore various values of AED and LD. (#93)
* Update parameters to read from meta map * Introduce channel to construct sweep parameter map * Update nf-core modules * Add meta map to agat separatebyrecord * Add meta map to agat filterbyattribute * update blast path * Add meta map to agat keeplongestisoform * Add meta map to agat filterincompletegenecodingmodels * Add meta map to agat filterbylocusdistance * Add meta map to agat extractsequences * Add meta map to blast blastp * Add meta map to agat filterbymrnablastvalue * Add meta map to augustus gff2gbk * Add meta map to augustus gbk2augustus * Add meta map to agustus training modules * Add meta map to agat gff2zff * Add meta map to snap training * Update channel logic for abinitio pipeline * Update README * Update abinitio test profile * Fix module links for annotation preprocessing workflow * Fix module links for functional annotation workflow * Fix module links * Update publish paths to include parameter sweep values * Add container registries to profiles for nf-core * Remove registries from docker container paths * Add aed and locus distance parameters to config * Update annotation preprocessing workflow for updated modules * Add meta map to interproscan module * Add meta map to agat managefunctionalannotation * Update functional annotation workflow to reflect updated modules * Fix includeInputs placement * Fix container paths * Fix workflow for module updates * Remove references to params.enable_conda * Update minimum Nextflow version to 22.10.0 * Run split maker evidence once * Update publishing path * Update test config to do parameter sweep * Associate proteindb with protein * Patch blast/makeblastdb to use a meta map * Fix functional annotation subworkflow from module change * Fix file staging * Update file prefix to include LD and AED values * Add start of table rank code * Add rank model to abinitio workflow * Escape dollars * Syntax fixes * Fix syntax * Add publish path * rename output folders * Append training data gene count to log
1 parent 62b725c commit 8b6c7fa

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

42 files changed

+478
-257
lines changed

config/abinitio_training_modules.config

Lines changed: 22 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -4,44 +4,44 @@ process {
44
withName: 'SPLIT_MAKER_EVIDENCE' {
55
ext.args = ''
66
publishDir = [
7-
path: "${params.outdir}/${publish_subdir}",
7+
path: { "${params.outdir}/${publish_subdir}/split_evidence" },
88
mode: params.publishDir_mode,
99
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
1010
]
1111
}
1212
withName: 'MODEL_SELECTION_BY_AED' {
13-
ext.args = [
14-
'--value 0.3',
13+
ext.args = { [
14+
"--value ${meta.aed_value}",
1515
'-a _AED',
1616
'-t ">"'
17-
].join(' ').trim()
17+
].join(' ').trim() }
1818
ext.prefix = 'codingGeneFeatures'
1919
publishDir = [
20-
path: "${params.outdir}/${publish_subdir}/filter",
20+
path: {"${params.outdir}/${publish_subdir}/LD-${meta.locus_distance}_AED-${meta.aed_value}/filter"},
2121
mode: params.publishDir_mode,
2222
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
2323
]
2424
}
2525
withName: 'RETAIN_LONGEST_ISOFORM' {
2626
ext.args = ''
2727
publishDir = [
28-
path: "${params.outdir}/${publish_subdir}/filter",
28+
path: {"${params.outdir}/${publish_subdir}/LD-${meta.locus_distance}_AED-${meta.aed_value}/filter"},
2929
mode: params.publishDir_mode,
3030
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
3131
]
3232
}
3333
withName: 'REMOVE_INCOMPLETE_GENE_MODELS' {
3434
ext.args = ''
3535
publishDir = [
36-
path: "${params.outdir}/${publish_subdir}/filter",
36+
path: {"${params.outdir}/${publish_subdir}/LD-${meta.locus_distance}_AED-${meta.aed_value}/filter"},
3737
mode: params.publishDir_mode,
3838
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
3939
]
4040
}
4141
withName: 'FILTER_BY_LOCUS_DISTANCE' {
42-
ext.args = '-d 3000'
42+
ext.args = { "-d ${meta.locus_distance}" }
4343
publishDir = [
44-
path: "${params.outdir}/${publish_subdir}/filter",
44+
path: {"${params.outdir}/${publish_subdir}/LD-${meta.locus_distance}_AED-${meta.aed_value}/filter"},
4545
mode: params.publishDir_mode,
4646
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
4747
]
@@ -63,15 +63,15 @@ process {
6363
withName: 'GFF_FILTER_BY_BLAST' {
6464
ext.args = ''
6565
publishDir = [
66-
path: "${params.outdir}/${publish_subdir}/blast_filtered_gff",
66+
path: {"${params.outdir}/${publish_subdir}/LD-${meta.locus_distance}_AED-${meta.aed_value}/blast_filtered_gff"},
6767
mode: params.publishDir_mode,
6868
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
6969
]
7070
}
7171
withName: 'GFF2GBK' {
7272
ext.args = params.flank_region_size
7373
publishDir = [
74-
path: "${params.outdir}/${publish_subdir}/augustus/gbk_files",
74+
path: {"${params.outdir}/${publish_subdir}/LD-${meta.locus_distance}_AED-${meta.aed_value}/augustus/gbk_files"},
7575
mode: params.publishDir_mode,
7676
pattern: "*.gbk"
7777
]
@@ -80,41 +80,47 @@ process {
8080
ext.args = '100'
8181
publishDir = [
8282
[
83-
path: "${params.outdir}/${publish_subdir}/augustus/training_data",
83+
path: {"${params.outdir}/${publish_subdir}/LD-${meta.locus_distance}_AED-${meta.aed_value}/augustus/training_data"},
8484
mode: params.publishDir_mode,
8585
pattern: "*.train"
8686
],
8787
[
88-
path: "${params.outdir}/${publish_subdir}/augustus/test_data",
88+
path: {"${params.outdir}/${publish_subdir}/LD-${meta.locus_distance}_AED-${meta.aed_value}/augustus/test_data"},
8989
mode: params.publishDir_mode,
9090
pattern: "*.test"
9191
]
9292
]
9393
}
9494
withName: 'AUGUSTUS_TRAINING' {
9595
ext.args = ''
96+
ext.prefix = { "${species_label}-LD${meta.locus_distance}-AED${meta.aed_value}" }
9697
publishDir = [
9798
[
98-
path: "${params.outdir}/${publish_subdir}/augustus_training",
99+
path: {"${params.outdir}/${publish_subdir}/LD-${meta.locus_distance}_AED-${meta.aed_value}/augustus_training"},
99100
mode: params.publishDir_mode,
100101
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
101102
],
102103
[
103-
path: "${params.maker_species_publishdir}",
104+
path: {"${params.maker_species_publishdir}/LD-${meta.locus_distance}_AED-${meta.aed_value}"},
104105
mode: 'copy',
105106
enabled: params.maker_species_publishdir != null,
106107
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
107108
]
108109
]
109110
}
111+
withName: 'RANK_AUGUSTUS_MODELS' {
112+
publishDir = [
113+
path: { "${params.outdir}/${publish_subdir}/augustus_sweep_summary" }
114+
]
115+
}
110116
withName: 'CONVERT_GFF2ZFF' {
111117
ext.args = ''
112118
}
113119
withName: 'SNAP_TRAINING' {
114120
ext.args = "-categorize ${params.flank_region_size}"
115121
ext.args2 = "-export ${params.flank_region_size} -plus"
116122
publishDir = [
117-
path: "${params.outdir}/${publish_subdir}/snap_training",
123+
path: {"${params.outdir}/${publish_subdir}/LD-${meta.locus_distance}_AED-${meta.aed_value}/snap_training"},
118124
mode: params.publishDir_mode,
119125
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
120126
]

config/test.config

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,13 +4,12 @@ if ( params.subworkflow == 'abinitio_training' ) {
44
genome = 'https://github.com/nf-core/test-datasets/raw/rnaseq/reference/genome.fa'
55
species_label = 'test_species' // e.g. 'asecodes_parviclava'
66
flank_region_size = 500
7+
aed_value = [ 0.3, 0.2 ]
8+
locus_distance = [ 500 ]
79
}
810
process {
911
// Trick: Fully qualified process name has higher priority than simple name
1012
// Otherwise settings are overridden by those in modules.config loaded after this
11-
withName: 'ABINITIO_TRAINING:FILTER_BY_LOCUS_DISTANCE' {
12-
ext.args = '-d 500'
13-
}
1413
withName: 'ABINITIO_TRAINING:GBK2AUGUSTUS' {
1514
ext.args = '10'
1615
}

modules.json

Lines changed: 37 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,23 +1,41 @@
11
{
2-
"name": "NBIS Genome Annotation Workflow",
3-
"homePage": "",
4-
"repos": {
5-
"nf-core/modules": {
6-
"blast/makeblastdb": {
7-
"git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d"
8-
},
9-
"busco": {
10-
"git_sha": "89a84538bede7c6919f7c042fdb4c79e5e2d9d2a"
11-
},
12-
"fastp": {
13-
"git_sha": "9b51362a532a14665f513cf987531f9ea5046b74"
14-
},
15-
"fastqc": {
16-
"git_sha": "49b18b1639f4f7104187058866a8fab33332bdfe"
17-
},
18-
"multiqc": {
19-
"git_sha": "5138acca0985ca01c38a1c4fba917d83772b1106"
20-
}
2+
"name": "NBIS Genome Annotation Workflow",
3+
"homePage": "",
4+
"repos": {
5+
"https://github.com/nf-core/modules.git": {
6+
"modules": {
7+
"nf-core": {
8+
"blast/makeblastdb": {
9+
"branch": "master",
10+
"git_sha": "911696ea0b62df80e900ef244d7867d177971f73",
11+
"installed_by": ["modules"],
12+
"patch": "modules/nf-core/blast/makeblastdb/blast-makeblastdb.diff"
13+
},
14+
"busco": {
15+
"branch": "master",
16+
"git_sha": "6d6552cb582f56b6101c452e16ee7c23073f91de",
17+
"installed_by": ["modules"]
18+
},
19+
"fastp": {
20+
"branch": "master",
21+
"git_sha": "d497a4868ace3302016ea8ed4b395072d5e833cd",
22+
"installed_by": ["modules"]
23+
},
24+
"fastqc": {
25+
"branch": "master",
26+
"git_sha": "9a4517e720bc812e95b56d23d15a1653b6db4f53",
27+
"installed_by": ["modules"]
28+
},
29+
"multiqc": {
30+
"branch": "master",
31+
"git_sha": "a6e11ac655e744f7ebc724be669dd568ffdc0e80",
32+
"installed_by": ["modules"]
33+
}
2134
}
35+
},
36+
"subworkflows": {
37+
"nf-core": {}
38+
}
2239
}
40+
}
2341
}

modules/local/agat/extractsequences.nf

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3,18 +3,18 @@ process AGAT_EXTRACTSEQUENCES {
33
label 'process_single'
44

55
// WARN: Version information not provided by tool on CLI. Please update version string below when bumping container versions.
6-
conda (params.enable_conda ? "bioconda::agat=0.9.2" : null)
6+
conda "bioconda::agat=0.9.2"
77
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
88
'https://depot.galaxyproject.org/singularity/agat:0.9.2--pl5321hdfd78af_1':
9-
'quay.io/biocontainers/agat:0.9.2--pl5321hdfd78af_1' }"
9+
'biocontainers/agat:0.9.2--pl5321hdfd78af_1' }"
1010

1111
input:
12-
path gff
12+
tuple val(meta), path (gff)
1313
path genome
1414

1515
output:
16-
path "${gff.baseName}_proteins.fasta", emit: proteins
17-
path "versions.yml" , emit: versions
16+
tuple val(meta), path ("${gff.baseName}_proteins.fasta"), emit: proteins
17+
path "versions.yml" , emit: versions
1818

1919
when:
2020
task.ext.when == null || task.ext.when

modules/local/agat/filterbyattribute.nf

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3,17 +3,17 @@ process AGAT_FILTERBYATTRIBUTE {
33
label 'process_single'
44

55
// WARN: Version information not provided by tool on CLI. Please update version string below when bumping container versions.
6-
conda (params.enable_conda ? "bioconda::agat=0.9.2" : null)
6+
conda "bioconda::agat=0.9.2"
77
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
88
'https://depot.galaxyproject.org/singularity/agat:0.9.2--pl5321hdfd78af_1':
9-
'quay.io/biocontainers/agat:0.9.2--pl5321hdfd78af_1' }"
9+
'biocontainers/agat:0.9.2--pl5321hdfd78af_1' }"
1010

1111
input:
12-
path mrna_gff
12+
tuple val(meta), path(mrna_gff)
1313

1414
output:
15-
path "*.filter.gff", emit: selected_models
16-
path "versions.yml", emit: versions
15+
tuple val(meta), path("*.filter.gff"), emit: selected_models
16+
path "versions.yml" , emit: versions
1717

1818
when:
1919
task.ext.when == null || task.ext.when

modules/local/agat/filterbylocusdistance.nf

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3,17 +3,17 @@ process AGAT_FILTERBYLOCUSDISTANCE {
33
label 'process_single'
44

55
// WARN: Version information not provided by tool on CLI. Please update version string below when bumping container versions.
6-
conda (params.enable_conda ? "bioconda::agat=0.9.2" : null)
6+
conda "bioconda::agat=0.9.2"
77
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
88
'https://depot.galaxyproject.org/singularity/agat:0.9.2--pl5321hdfd78af_1':
9-
'quay.io/biocontainers/agat:0.9.2--pl5321hdfd78af_1' }"
9+
'biocontainers/agat:0.9.2--pl5321hdfd78af_1' }"
1010

1111
input:
12-
path coding_gene_features_gff
12+
tuple val(meta), path (coding_gene_features_gff)
1313

1414
output:
15-
path "*.good_distance.gff", emit: distanced_models
16-
path "versions.yml" , emit: versions
15+
tuple val(meta), path ("*.good_distance.gff"), emit: distanced_models
16+
path "versions.yml" , emit: versions
1717

1818
when:
1919
task.ext.when == null || task.ext.when

modules/local/agat/filterbymrnablastvalue.nf

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3,18 +3,18 @@ process AGAT_FILTERBYMRNABLASTVALUE {
33
label 'process_single'
44

55
// WARN: Version information not provided by tool on CLI. Please update version string below when bumping container versions.
6-
conda (params.enable_conda ? "bioconda::agat=0.9.2" : null)
6+
conda "bioconda::agat=0.9.2"
77
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
88
'https://depot.galaxyproject.org/singularity/agat:0.9.2--pl5321hdfd78af_1':
9-
'quay.io/biocontainers/agat:0.9.2--pl5321hdfd78af_1' }"
9+
'biocontainers/agat:0.9.2--pl5321hdfd78af_1' }"
1010

1111
input:
12-
path gff
12+
tuple val(meta), path(gff)
1313
path blast_tbl
1414

1515
output:
16-
path "*_blast-filtered.gff3", emit: blast_filtered
17-
path "versions.yml" , emit: versions
16+
tuple val(meta), path("*_blast-filtered.gff3"), emit: blast_filtered
17+
path "versions.yml" , emit: versions
1818

1919
when:
2020
task.ext.when == null || task.ext.when

modules/local/agat/filterincompletegenecodingmodels.nf

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3,18 +3,18 @@ process AGAT_FILTERINCOMPLETEGENECODINGMODELS {
33
label 'process_single'
44

55
// WARN: Version information not provided by tool on CLI. Please update version string below when bumping container versions.
6-
conda (params.enable_conda ? "bioconda::agat=0.9.2" : null)
6+
conda "bioconda::agat=0.9.2"
77
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
88
'https://depot.galaxyproject.org/singularity/agat:0.9.2--pl5321hdfd78af_1':
9-
'quay.io/biocontainers/agat:0.9.2--pl5321hdfd78af_1' }"
9+
'biocontainers/agat:0.9.2--pl5321hdfd78af_1' }"
1010

1111
input:
12-
path coding_gene_features_gff
12+
tuple val(meta), path (coding_gene_features_gff)
1313
path genome
1414

1515
output:
16-
path "*.complete.gff", emit: complete_gene_models
17-
path "versions.yml" , emit: versions
16+
tuple val(meta), path ("*.complete.gff"), emit: complete_gene_models
17+
path "versions.yml" , emit: versions
1818

1919
when:
2020
task.ext.when == null || task.ext.when

modules/local/agat/gff2zff.nf

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2,18 +2,18 @@ process AGAT_GFF2ZFF {
22
tag "${annotation}"
33
label 'process_single'
44

5-
conda (params.enable_conda ? "bioconda::agat=0.9.2" : null)
5+
conda "bioconda::agat=0.9.2"
66
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
77
'https://depot.galaxyproject.org/singularity/agat:0.9.2--pl5321hdfd78af_1':
8-
'quay.io/biocontainers/agat:0.9.2--pl5321hdfd78af_1' }"
8+
'biocontainers/agat:0.9.2--pl5321hdfd78af_1' }"
99

1010
input:
11-
path annotation
11+
tuple val(meta), path (annotation)
1212
path genome
1313

1414
output:
15-
path "*.{ann,dna}" , emit: zff
16-
path "versions.yml", emit: versions
15+
tuple val(meta), path ("*.{ann,dna}"), emit: zff
16+
path "versions.yml" , emit: versions
1717

1818
when:
1919
task.ext.when == null || task.ext.when

modules/local/agat/keeplongestisoform.nf

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3,17 +3,17 @@ process AGAT_KEEPLONGESTISOFORM {
33
label 'process_single'
44

55
// WARN: Version information not provided by tool on CLI. Please update version string below when bumping container versions.
6-
conda (params.enable_conda ? "bioconda::agat=0.9.2" : null)
6+
conda "bioconda::agat=0.9.2"
77
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
88
'https://depot.galaxyproject.org/singularity/agat:0.9.2--pl5321hdfd78af_1':
9-
'quay.io/biocontainers/agat:0.9.2--pl5321hdfd78af_1' }"
9+
'biocontainers/agat:0.9.2--pl5321hdfd78af_1' }"
1010

1111
input:
12-
path coding_gene_features_gff
12+
tuple val(meta), path(coding_gene_features_gff)
1313

1414
output:
15-
path "*.longest_cds.gff", emit: longest_isoform
16-
path "versions.yml" , emit: versions
15+
tuple val(meta), path("*.longest_cds.gff"), emit: longest_isoform
16+
path "versions.yml" , emit: versions
1717

1818
when:
1919
task.ext.when == null || task.ext.when

0 commit comments

Comments
 (0)