diff --git a/modules/bigbio/thermorawfileparser/environment.yml b/modules/bigbio/thermorawfileparser/environment.yml index 63b8fc6..dbe6ffe 100644 --- a/modules/bigbio/thermorawfileparser/environment.yml +++ b/modules/bigbio/thermorawfileparser/environment.yml @@ -1,4 +1,3 @@ -name: thermorawfileparser channels: - conda-forge - bioconda diff --git a/modules/bigbio/thermorawfileparser/main.nf b/modules/bigbio/thermorawfileparser/main.nf index 31ce4d0..47b91e5 100644 --- a/modules/bigbio/thermorawfileparser/main.nf +++ b/modules/bigbio/thermorawfileparser/main.nf @@ -1,62 +1,67 @@ process THERMORAWFILEPARSER { - tag "$meta.mzml_id" + tag "${meta.id}" label 'process_low' label 'process_single' label 'error_retry' conda "${moduleDir}/environment.yml" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/thermorawfileparser:1.4.5--h05cac1d_1' : - 'biocontainers/thermorawfileparser:1.4.5--h05cac1d_1' }" + container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container + ? 'https://depot.galaxyproject.org/singularity/thermorawfileparser:1.4.5--h05cac1d_1' + : 'biocontainers/thermorawfileparser:1.4.5--h05cac1d_1'}" - stageInMode { - if (task.attempt == 1) { - if (task.executor == "awsbatch") { - 'symlink' - } else { - 'link' - } - } else if (task.attempt == 2) { - if (task.executor == "awsbatch") { - 'copy' - } else { - 'symlink' - } - } else { - 'copy' - } - } input: - tuple val(meta), path(rawfile) + tuple val(meta), path(raw) output: - tuple val(meta), path("*.{mzML,mgf,parquet}"), emit: convert_files - path "versions.yml", emit: versions - path "*.log", emit: log + tuple val(meta), path("*.{mzML,mzML.gz,mgf,mgf.gz,parquet,parquet.gz}"), emit: spectra + tuple val("${task.process}"), val('thermorawfileparser'), eval("ThermoRawFileParser.sh --version"), emit: versions_thermorawfileparser, topic: versions + path "*.log", emit: log + + when: + task.ext.when == null || task.ext.when script: def args = task.ext.args ?: '' + // Detect existing format options in any supported syntax: -f=2, -f 2, --format=2, + // or --format 2. + def hasFormatArg = (args =~ /(^|\s)(-f(=|\s)\d+|--format(=|\s)\d+)/).find() // Default to indexed mzML format (-f=2) if not specified in args - def formatArg = args.contains('-f=') ? '' : '-f=2' + def formatArg = hasFormatArg ? '' : '-f=2' + def prefix = task.ext.prefix ?: "${meta.id}" + def suffix = args.contains("--format 0") || args.contains("-f 0") + ? "mgf" + : args.contains("--format 1") || args.contains("-f 1") + ? "mzML" + : args.contains("--format 2") || args.contains("-f 2") + ? "mzML" + : args.contains("--format 3") || args.contains("-f 3") + ? "parquet" + : "mzML" + suffix = args.contains("--gzip") ? "${suffix}.gz" : "${suffix}" """ - ThermoRawFileParser.sh -i='${rawfile}' ${formatArg} ${args} -o=./ 2>&1 | tee '${rawfile.baseName}_conversion.log' - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - ThermoRawFileParser: \$(ThermoRawFileParser.sh --version) - END_VERSIONS + ThermoRawFileParser.sh \\ + -i='${raw}' \\ + ${formatArg} ${args} \\ + -o=./ 2>&1 | tee '${prefix}_conversion.log' """ stub: - def prefix = task.ext.prefix ?: "${meta.mzml_id}" def args = task.ext.args ?: '' - // Determine output format from args, default to mzML - // Format 0 = MGF, formats 1-2 = mzML, format 3 = Parquet, format 4 = None - def outputExt = (args =~ /-f=0\b/).find() ? 'mgf' : 'mzML' + def prefix = task.ext.prefix ?: "${meta.id}" + def suffix = args.contains("--format 0") || args.contains("-f 0") + ? "mgf" + : args.contains("--format 1") || args.contains("-f 1") + ? "mzML" + : args.contains("--format 2") || args.contains("-f 2") + ? "mzML" + : args.contains("--format 3") || args.contains("-f 3") + ? "parquet" + : "mzML" + suffix = args.contains("--gzip") ? "${suffix}.gz" : "${suffix}" """ - touch '${prefix}.${outputExt}' + touch '${prefix}.${suffix}' touch '${prefix}_conversion.log' cat <<-END_VERSIONS > versions.yml diff --git a/modules/bigbio/thermorawfileparser/meta.yml b/modules/bigbio/thermorawfileparser/meta.yml index b6f99c9..48b7d34 100644 --- a/modules/bigbio/thermorawfileparser/meta.yml +++ b/modules/bigbio/thermorawfileparser/meta.yml @@ -1,10 +1,12 @@ name: thermorawfileparser -description: Convert RAW file to mzML or MGF files +description: Convert RAW file to mzML or MGF files format keywords: - raw - - mzML - - MGF - - OpenMS + - mzml + - mgf + - parquet + - parser + - proteomics tools: - thermorawfileparser: description: | @@ -14,36 +16,66 @@ tools: - `-L` or `--msLevel=VALUE` to select MS levels (e.g., `-L=1,2` or `--msLevel=1-3`) homepage: https://github.com/compomics/ThermoRawFileParser documentation: https://github.com/compomics/ThermoRawFileParser + tool_dev_url: https://github.com/compomics/ThermoRawFileParser + doi: "10.1021/acs.jproteome.9b00328" + licence: + - "Apache Software" + identifier: biotools:ThermoRawFileParser input: - - meta: - type: map - description: | - Groovy Map containing sample information - - rawfile: - type: file - description: | - Thermo RAW file - pattern: "*.{raw,RAW}" + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - raw: + type: file + description: Thermo RAW file + pattern: "*.{raw,RAW}" + ontologies: [] output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'sample1', mzml_id:'UPS1_50amol_R3' ] - - convert_files: - type: file - description: | - Converted files in mzML or MGF format depending on the format parameter (-f). - Format options: 0 for MGF, 1 for mzML, 2 for indexed mzML (default), 3 for Parquet, 4 for None. - pattern: "*.{mzML,mgf,parquet}" - - log: - type: file - description: log file - pattern: "*.log" - - versions: - type: file - description: File containing software version - pattern: "versions.yml" + spectra: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - "*.{mzML,mzML.gz,mgf,mgf.gz,parquet,parquet.gz}": + type: file + description: Mass spectra in open format + pattern: "*.{mzML,mzML.gz,mgf,mgf.gz,parquet,parquet.gz}" + ontologies: [] + versions_thermorawfileparser: + - - ${task.process}: + type: string + description: The process the versions were collected from + - thermorawfileparser: + type: string + description: The name of the tool + - ThermoRawFileParser.sh --version: + type: eval + description: The expression to obtain the version of the tool + log: + - "*.log": + type: file + description: Log file from the conversion process + pattern: "*.log" + ontologies: [] +topics: + versions: + - - ${task.process}: + type: string + description: The process the versions were collected from + - thermorawfileparser: + type: string + description: The name of the tool + - ThermoRawFileParser.sh --version: + type: eval + description: The expression to obtain the version of the tool authors: + - "@jonasscheid" + - "@daichengxin" + - "@ypriverol" +maintainers: + - "@jonasscheid" - "@daichengxin" - "@ypriverol" diff --git a/modules/bigbio/thermorawfileparser/tests/main.nf.test b/modules/bigbio/thermorawfileparser/tests/main.nf.test index 355fbb1..c1f8ce9 100644 --- a/modules/bigbio/thermorawfileparser/tests/main.nf.test +++ b/modules/bigbio/thermorawfileparser/tests/main.nf.test @@ -22,8 +22,8 @@ nextflow_process { then { assert process.success - assert snapshot(process.out.versions).match("versions") - assert new File(process.out.convert_files[0][1]).name == 'TMT_Erwinia_1uLSike_Top10HCD_isol2_45stepped_60min_01.mzML' + assert snapshot(process.out.versions_thermorawfileparser).match("versions") + assert new File(process.out.spectra[0][1]).name == 'TMT_Erwinia_1uLSike_Top10HCD_isol2_45stepped_60min_01.mzML' assert process.out.log.size() == 1 } } @@ -36,7 +36,7 @@ nextflow_process { process { """ input[0] = [ - [ id: 'test', mzml_id: 'test_sample' ], + [ id: 'test_sample', mzml_id: 'test_sample' ], file(params.test_data['proteomics']['msspectra']['ups1_50amol_r3'], checkIfExists: false) ] """ @@ -45,8 +45,9 @@ nextflow_process { then { assert process.success - assert snapshot(process.out.versions).match("versions_stub") - assert new File(process.out.convert_files[0][1]).name == 'test_sample.mzML' + assert snapshot(process.out.versions_thermorawfileparser).match("versions_stub") + assert new File(process.out.spectra[0][1]).name == 'test_sample.mzML' + assert snapshot(process.out).match() assert process.out.log.size() == 1 } } diff --git a/modules/bigbio/thermorawfileparser/tests/main.nf.test.snap b/modules/bigbio/thermorawfileparser/tests/main.nf.test.snap index 6562491..f194dbb 100644 --- a/modules/bigbio/thermorawfileparser/tests/main.nf.test.snap +++ b/modules/bigbio/thermorawfileparser/tests/main.nf.test.snap @@ -1,26 +1,34 @@ { - "versions": { - "content": [ - [ - "versions.yml:md5,dc9625538c025d615109ef8cac3a86ab" - ] - ], - "meta": { - "nf-test": "0.9.3", - "nextflow": "25.04.8" + "versions_stub": { + "content": [ + [ + [ + "THERMORAWFILEPARSER", + "thermorawfileparser", + "1.4.5" + ] + ] + ], + "timestamp": "2026-03-20T12:41:22.8183", + "meta": { + "nf-test": "0.9.4", + "nextflow": "25.10.4" + } }, - "timestamp": "2025-12-11T06:27:00.000000" - }, - "versions_stub": { - "content": [ - [ - "versions.yml:md5,dc9625538c025d615109ef8cac3a86ab" - ] - ], - "meta": { - "nf-test": "0.9.3", - "nextflow": "25.04.8" - }, - "timestamp": "2025-12-11T06:27:00.000000" - } -} + "versions": { + "content": [ + [ + [ + "THERMORAWFILEPARSER", + "thermorawfileparser", + "1.4.5" + ] + ] + ], + "timestamp": "2026-03-20T12:36:30.88531", + "meta": { + "nf-test": "0.9.4", + "nextflow": "25.10.4" + } + } +} \ No newline at end of file