Skip to content

Commit e63a4c7

Browse files
authored
Feature/pathofact2/extractfasta (#139)
* Added tests for empty inputs * Fixing python script after PR comments * Add optional arguments to pathofact_fasta_extractor * Adding threshold filtering to python script * Pathofact2 version added * Refactor FASTA extraction and error handling * Improve error handling for missing sequences. * Improve TSV parsing with better error handling * Adding pytest
1 parent 6f5e348 commit e63a4c7

15 files changed

+1296
-0
lines changed
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
---
2+
# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json
3+
channels:
4+
- conda-forge
5+
- bioconda
6+
dependencies:
7+
- bioconda::biopython=1.70
Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
process PATHOFACT2_EXTRACTFASTA {
2+
tag "$meta.id"
3+
label 'process_single'
4+
5+
conda "${moduleDir}/environment.yml"
6+
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
7+
'https://depot.galaxyproject.org/singularity/biopython:1.84':
8+
'biocontainers/biopython:1.84' }"
9+
10+
input:
11+
tuple val(meta), path(fasta), path(blastp_out), path(pathofact2_tox), path(pathofact2_vf)
12+
13+
output:
14+
tuple val(meta), path("*_pathofact2.fasta"), optional: true, emit: fasta
15+
tuple val(meta), path("*_support.tsv") , optional: true, emit: tsv
16+
tuple val("${task.process}"), val('python'), eval("python --version | sed 's/Python //g'"), topic: versions, emit: versions_python
17+
tuple val("${task.process}"), val('pathofact2'), eval("echo ${VERSION}"), topic: versions, emit: versions_pathofact2
18+
19+
when:
20+
task.ext.when == null || task.ext.when
21+
22+
script:
23+
VERSION = '1.0.4' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions.
24+
def args = task.ext.args ?: ''
25+
def prefix = task.ext.prefix ?: "${meta.id}"
26+
"""
27+
pathofact_fasta_extractor.py \\
28+
${args} \\
29+
-f ${fasta} \\
30+
-b ${blastp_out} \\
31+
-t ${pathofact2_tox} \\
32+
-v ${pathofact2_vf} \\
33+
-o ${prefix}
34+
"""
35+
36+
stub:
37+
VERSION = '1.0.4' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions.
38+
def prefix = task.ext.prefix ?: "${meta.id}"
39+
"""
40+
echo $args
41+
touch ${prefix}_pathofact2.fasta
42+
touch ${prefix}_support.tsv
43+
"""
44+
}
Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,93 @@
1+
# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json
2+
name: "pathofact2_extractfasta"
3+
description: Extract the fasta file of the proteins predicted by Pathofact2 for annotation using rpsblast vs CDD
4+
keywords:
5+
- pathofact2
6+
- vfdb
7+
- fasta
8+
- virulence
9+
- toxins
10+
tools:
11+
- "pathofact2":
12+
description: "PathoFact 2.0, an enhanced pipeline for improved ARG, VF, and toxin prediction in prokaryotic protein sequences"
13+
homepage: "https://gitlab.lcsb.uni.lu/ESB/PathoFact2"
14+
documentation: "https://gitlab.lcsb.uni.lu/ESB/PathoFact2"
15+
tool_dev_url: "https://gitlab.lcsb.uni.lu/ESB/PathoFact2"
16+
doi: "10.1101/2024.12.09.627531"
17+
licence: ["GNU v3.0"]
18+
identifier: ""
19+
20+
input:
21+
- - meta:
22+
type: map
23+
description: Groovy Map containing sample information e.g. `[ id:'sample1' ]`
24+
- fasta:
25+
type: file
26+
description: Protein sequences in FASTA format
27+
pattern: "*{fasta,fasta.gz,fa,fa.gz,fna,fna.gz,faa,faa.gz}"
28+
ontologies:
29+
- edam: "http://edamontology.org/format_1929" # FASTA
30+
- blastp_out:
31+
type: file
32+
description: Result of diamond blastp vs VFDB. Columns qseqid sseqid pident length qlen slen evalue bitscore
33+
pattern: "*.{txt,txt.gz}"
34+
ontologies:
35+
- edam: "http://edamontology.org/format_1964" # TXT
36+
- pathofact2_tox:
37+
type: file
38+
description: Result of Pathofact2 toxins prediction
39+
pattern: "*.{tsv,tsv.gz}"
40+
ontologies:
41+
- edam: "http://edamontology.org/format_3475" # TSV
42+
- pathofact2_vf:
43+
type: file
44+
description: Result of Pathofact2 virulence factors prediction
45+
pattern: "*.{tsv,tsv.gz}"
46+
ontologies:
47+
- edam: "http://edamontology.org/format_3475" # TSV
48+
output:
49+
fasta:
50+
- - meta:
51+
type: map
52+
description: Groovy Map containing sample information e.g. `[ id:'sample1' ]`
53+
- "*_pathofact2.fasta":
54+
type: file
55+
description: Results of Pathofact2 proteins fasta
56+
pattern: "*.fasta"
57+
ontologies:
58+
- edam: "http://edamontology.org/format_1929" # FASTA
59+
tsv:
60+
- - meta:
61+
type: map
62+
description: Groovy Map containing sample information e.g. `[ id:'sample1' ]`
63+
- "*_support.tsv":
64+
type: file
65+
description: Results of Pathofact2 predictions support
66+
pattern: "*.tsv"
67+
ontologies:
68+
- edam: "http://edamontology.org/format_3475" # TSV
69+
versions_python:
70+
- - "${task.process}":
71+
type: string
72+
description: The name of the process
73+
- python:
74+
type: string
75+
description: The name of the tool
76+
- "python --version | sed 's/Python //g'":
77+
type: string
78+
description: Command used to obtain the tool version
79+
versions_pathofact2:
80+
- - "${task.process}":
81+
type: string
82+
description: The name of the process
83+
- pathofact2:
84+
type: string
85+
description: The name of the tool
86+
- "echo ${VERSION}":
87+
type: string
88+
description: Command used to obtain the tool version
89+
90+
authors:
91+
- "@Ales-ibt"
92+
maintainers:
93+
- "@Ales-ibt"

0 commit comments

Comments
 (0)