diff --git a/.gitignore b/.gitignore index e75900de..7d72d8cf 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,6 @@ .nextflow* .nf-test* +__pycache__/ +*.pyc +.python-version +.DS_Store diff --git a/modules/ensembl/fasta/recombine/environment.yml b/modules/ensembl/fasta/recombine/environment.yml new file mode 100644 index 00000000..3ae2bb53 --- /dev/null +++ b/modules/ensembl/fasta/recombine/environment.yml @@ -0,0 +1,6 @@ +--- +channels: + - conda-forge + - bioconda +dependencies: + - ensembl-genomio=1.7.0 diff --git a/modules/ensembl/fasta/recombine/main.nf b/modules/ensembl/fasta/recombine/main.nf new file mode 100644 index 00000000..875e0ccb --- /dev/null +++ b/modules/ensembl/fasta/recombine/main.nf @@ -0,0 +1,66 @@ +// See the NOTICE file distributed with this work for additional information +// regarding copyright ownership. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +process FASTA_RECOMBINE { + + tag "${meta.id}" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "ensemblorg/ensembl-genomio:v1.7.0" + + input: + tuple val(meta), path(fasta_manifest), path(agp) + + output: + tuple val(meta), path("${meta.id}.fa"), emit: recombined_fasta + tuple val("${task.process}"), val('fasta_recombine'), eval("fasta_recombine --version"), emit: versions_fasta_recombine, topic: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = [] + + if (params.chunk_id_regex) { + def rx = params.chunk_id_regex.replace("'", "'\"'\"'") + args << "--chunk-id-regex '${rx}'" + } + + if (params.allow_revcomp) { + args << "--allow-revcomp" + } + + def has_agp = agp && agp.baseName != 'NO_FILE' + if (has_agp) { + args << "--agp-file ${agp}" + } + + def out_fasta = "${meta.id}.fa" + """ + fasta_recombine \\ + --fasta-manifest ${fasta_manifest} \\ + --out-fasta ${out_fasta} \\ + ${args.join(' ')} + """ + + stub: + """ + cat > "${meta.id}.fa" <${meta.id} + A + EOF + """ +} diff --git a/modules/ensembl/fasta/recombine/meta.yml b/modules/ensembl/fasta/recombine/meta.yml new file mode 100644 index 00000000..858aabd5 --- /dev/null +++ b/modules/ensembl/fasta/recombine/meta.yml @@ -0,0 +1,73 @@ +name: "fasta_recombine" +description: Recombine split FASTA sequences into a single FASTA file, + optionally using an AGP file. +keywords: + - ensembl + - fasta + - genomics + - genomio + - recombine +tools: + - "fasta_recombine": + description: "Recombine split FASTA sequences generated by ensembl-genomio." + homepage: "https://github.com/Ensembl/ensembl-genomio" + licence: + - "Apache License version 2.0" + identifier: "" +input: + - - meta: + type: map + description: | + Groovy Map containing meta information + e.g. `[ id:'accession1' ]` + - fasta_manifest: + type: file + description: Manifest file listing split FASTA files to recombine. + pattern: "*.txt" + ontologies: [] + - agp: + type: file + description: + Optional AGP file describing how split sequence chunks should + be recombined. Use NO_FILE when not required. + pattern: "{*.agp,NO_FILE}" + ontologies: + - edam: "http://edamontology.org/format_3693" # AGP +output: + recombined_fasta: + - - meta: + type: map + description: | + Groovy Map containing meta information + e.g. `[ id:'accession1' ]` + - ${meta.id}.fa: + type: file + description: Recombined FASTA file. + pattern: "*.fa" + ontologies: + - edam: http://edamontology.org/format_1929 # FASTA + versions_fasta_recombine: + - - ${task.process}: + type: string + description: The name of the process. + - fasta_recombine: + type: string + description: The name of the tool. + - "fasta_recombine --version": + type: eval + description: The expression to obtain the version of the tool +topics: + versions: + - - ${task.process}: + type: string + description: The name of the process. + - fasta_recombine: + type: string + description: The name of the tool. + - "fasta_recombine --version": + type: eval + description: The expression to obtain the version of the tool +authors: + - "ensembl-dev@ebi.ac.uk" +maintainers: + - "ensembl-dev@ebi.ac.uk" diff --git a/modules/ensembl/fasta/recombine/tests/main.nf.test b/modules/ensembl/fasta/recombine/tests/main.nf.test new file mode 100644 index 00000000..4448cf75 --- /dev/null +++ b/modules/ensembl/fasta/recombine/tests/main.nf.test @@ -0,0 +1,86 @@ +// See the NOTICE file distributed with this work for additional information +// regarding copyright ownership. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// nf-core modules test fasta/recombine +nextflow_process { + + name "Test Process FASTA_RECOMBINE" + script "../main.nf" + process "FASTA_RECOMBINE" + + tag "modules" + tag "modules_ensembl" + tag "fasta" + tag "fasta/recombine" + + test("stub outputs: header mode") { + + when { + options "-stub" + + process { + """ + def manifest = file("manifest.txt") + manifest.text = "x\\n" + + def no_file = file("NO_FILE") + no_file.text = "" + + input[0] = [ + [ id: 'test' ], + manifest, + no_file + ] + """ + } + } + + then { + assert process.trace.tasks().size() == 1 + assert process.out.recombined_fasta.size() == 1 + assert process.success + assert snapshot(process.out).match() + } + } + + test("stub outputs: AGP mode") { + + when { + options "-stub" + + process { + """ + def manifest = file("manifest.txt") + manifest.text = "x\\n" + + def agp = file("test.agp") + agp.text = "" + input[0] = [ + [ id: 'test' ], + manifest, + agp + ] + """ + } + } + + then { + assert process.trace.tasks().size() == 1 + assert process.out.recombined_fasta.size() == 1 + assert process.success + assert snapshot(process.out).match() + } + } +} diff --git a/modules/ensembl/fasta/recombine/tests/main.nf.test.snap b/modules/ensembl/fasta/recombine/tests/main.nf.test.snap new file mode 100644 index 00000000..c1384270 --- /dev/null +++ b/modules/ensembl/fasta/recombine/tests/main.nf.test.snap @@ -0,0 +1,84 @@ +{ + "stub outputs: AGP mode": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.fa:md5,c40116e7d725da4662e6bdd654f70075" + ] + ], + "1": [ + [ + "FASTA_RECOMBINE", + "fasta_recombine", + "1.7.0" + ] + ], + "recombined_fasta": [ + [ + { + "id": "test" + }, + "test.fa:md5,c40116e7d725da4662e6bdd654f70075" + ] + ], + "versions_fasta_recombine": [ + [ + "FASTA_RECOMBINE", + "fasta_recombine", + "1.7.0" + ] + ] + } + ], + "timestamp": "2026-06-11T18:08:46.722339", + "meta": { + "nf-test": "0.9.4", + "nextflow": "25.10.3" + } + }, + "stub outputs: header mode": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.fa:md5,c40116e7d725da4662e6bdd654f70075" + ] + ], + "1": [ + [ + "FASTA_RECOMBINE", + "fasta_recombine", + "1.7.0" + ] + ], + "recombined_fasta": [ + [ + { + "id": "test" + }, + "test.fa:md5,c40116e7d725da4662e6bdd654f70075" + ] + ], + "versions_fasta_recombine": [ + [ + "FASTA_RECOMBINE", + "fasta_recombine", + "1.7.0" + ] + ] + } + ], + "timestamp": "2026-06-11T18:08:44.434921", + "meta": { + "nf-test": "0.9.4", + "nextflow": "25.10.3" + } + } +} \ No newline at end of file diff --git a/modules/ensembl/fasta/split/environment.yml b/modules/ensembl/fasta/split/environment.yml new file mode 100644 index 00000000..3ae2bb53 --- /dev/null +++ b/modules/ensembl/fasta/split/environment.yml @@ -0,0 +1,6 @@ +--- +channels: + - conda-forge + - bioconda +dependencies: + - ensembl-genomio=1.7.0 diff --git a/modules/ensembl/fasta/split/main.nf b/modules/ensembl/fasta/split/main.nf new file mode 100644 index 00000000..a4208433 --- /dev/null +++ b/modules/ensembl/fasta/split/main.nf @@ -0,0 +1,122 @@ +// See the NOTICE file distributed with this work for additional information +// regarding copyright ownership. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +process FASTA_SPLIT { + tag "${meta.id}" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "ensemblorg/ensembl-genomio:v1.7.0" + + input: + tuple val(meta), path(fasta), val(longest_seq_bp) + + output: + tuple val(meta), path("splits/**/*.fa"), emit: fastas + tuple val(meta), path("splits/*.agp"), emit: agp, optional: true + tuple val("${task.process}"), val('fasta_split'), eval("fasta_split --version"), emit: versions_fasta_split, topic: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = [] + + if (params.max_seqs_per_file) { + args << "--max-seqs-per-file ${params.max_seqs_per_file}" + } + + if (params.max_seq_length_per_file) { + args << "--max-seq-length-per-file ${params.max_seq_length_per_file}" + } + + if (params.min_chunk_length) { + args << "--min-chunk-length ${params.min_chunk_length}" + } + + if (params.max_files_per_directory) { + args << "--max-files-per-directory ${params.max_files_per_directory}" + } + + if (params.max_dirs_per_directory) { + args << "--max-dirs-per-directory ${params.max_dirs_per_directory}" + } + + if (params.force_max_seq_length) { + args << "--force-max-seq-length" + } + + if (params.write_agp) { + args << "--write-agp" + } + + if (params.unique_file_names) { + args << "--unique-file-names" + } + + if (params.delete_existing_files) { + args << "--delete-existing-files" + } + + """ + fasta_split \\ + --fasta-file ${fasta} \\ + --out-dir splits \\ + ${args.join(' ')} + """ + + stub: + """ + layout="default" + if [[ "${params.unique_file_names ?: false}" == "true" ]]; then + layout="unique" + elif [[ -n "${params.max_dirs_per_directory ?: ''}" || -n "${params.max_files_per_directory ?: ''}" ]]; then + layout="multi_dir" + fi + + create_fasta() { + local file="\$1" + cat > "\$file" <stub + A + EOF + } + + mkdir -p splits + + if [[ "\$layout" == "default" ]]; then + mkdir -p splits/0 + create_fasta splits/0/test.1.fa + create_fasta splits/0/test.2.fa + + elif [[ "\$layout" == "unique" ]]; then + mkdir -p splits/0 + create_fasta splits/0/test.0.1.fa + create_fasta splits/0/test.0.2.fa + + elif [[ "\$layout" == "multi_dir" ]]; then + mkdir -p splits/0/0 + mkdir -p splits/0/1 + create_fasta splits/0/0/test.1.fa + create_fasta splits/0/1/test.2.fa + fi + + if [[ "${params.write_agp ?: false}" == "true" ]]; then + cat > "splits/${meta.id}.agp" < "${meta.id}.${analysis}.json" <<'EOF' + {} + EOF + """ + +} diff --git a/modules/ensembl/features/combine_json/meta.yml b/modules/ensembl/features/combine_json/meta.yml new file mode 100644 index 00000000..90a7b78f --- /dev/null +++ b/modules/ensembl/features/combine_json/meta.yml @@ -0,0 +1,76 @@ +name: "features_combine_json" +description: Combine split feature JSON files into a single JSON file, + optionally using an AGP file. +keywords: + - ensembl + - features + - genomics + - genomio + - json +tools: + - "features_combine_json": + description: "Combine split feature JSON files generated by ensembl-genomio." + homepage: "https://github.com/Ensembl/ensembl-genomio" + licence: + - "Apache License version 2.0" + identifier: "" +input: + - - meta: + type: map + description: | + Groovy Map containing meta information + e.g. `[ id:'accession1' ]` + - analysis: + type: string + description: Analysis name to include in the combined JSON filename. + - json_manifest: + type: file + description: Manifest file listing split JSON files to combine. + pattern: "*.txt" + ontologies: [] + - agp: + type: file + description: + Optional AGP file describing how split sequence chunks should + be recombined. Use NO_FILE when not required. + pattern: "{*.agp,NO_FILE}" + ontologies: + - edam: "http://edamontology.org/format_3693" # AGP +output: + combined_json: + - - meta: + type: map + description: | + Groovy Map containing meta information + e.g. `[ id:'accession1' ]` + - ${meta.id}.${analysis}.json: + type: file + description: Combined feature JSON file. + pattern: "*.json" + ontologies: + - edam: http://edamontology.org/format_3464 # JSON + versions_features_combine_json: + - - ${task.process}: + type: string + description: The name of the process. + - features_combine_json: + type: string + description: The name of the tool. + - "features_combine_json --version": + type: eval + description: The expression to obtain the version of the tool +topics: + versions: + - - ${task.process}: + type: string + description: The name of the process. + - features_combine_json: + type: string + description: The name of the tool. + - "features_combine_json --version": + type: eval + description: The expression to obtain the version of the tool +authors: + - "ensembl-dev@ebi.ac.uk" +maintainers: + - "ensembl-dev@ebi.ac.uk" diff --git a/modules/ensembl/features/combine_json/tests/main.nf.test b/modules/ensembl/features/combine_json/tests/main.nf.test new file mode 100644 index 00000000..e4bc969c --- /dev/null +++ b/modules/ensembl/features/combine_json/tests/main.nf.test @@ -0,0 +1,58 @@ +// See the NOTICE file distributed with this work for additional information +// regarding copyright ownership. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// nf-core modules test features/combine_json +nextflow_process { + + name "Test Process FEATURES_COMBINE_JSON" + script "../main.nf" + process "FEATURES_COMBINE_JSON" + + tag "modules" + tag "modules_ensembl" + tag "features" + tag "features/combine_json" + + test("Stub outputs") { + + when { + options "-stub" + + process { + """ + def manifest = file("manifest.txt") + manifest.text = "" + + def noFile = file("NO_FILE") + noFile.text = "" + + input[0] = [ + [ id:'test' ], + 'features', + manifest, + noFile + ] + """ + } + } + + then { + assert process.trace.tasks().size() == 1 + assert process.out.combined_json.size() == 1 + assert process.success + assert snapshot(process.out).match() + } + } +} diff --git a/modules/ensembl/features/combine_json/tests/main.nf.test.snap b/modules/ensembl/features/combine_json/tests/main.nf.test.snap new file mode 100644 index 00000000..c96b73a7 --- /dev/null +++ b/modules/ensembl/features/combine_json/tests/main.nf.test.snap @@ -0,0 +1,43 @@ +{ + "Stub outputs": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.features.json:md5,8a80554c91d9fca8acb82f023de02f11" + ] + ], + "1": [ + [ + "FEATURES_COMBINE_JSON", + "features_combine_json", + "1.7.0" + ] + ], + "combined_json": [ + [ + { + "id": "test" + }, + "test.features.json:md5,8a80554c91d9fca8acb82f023de02f11" + ] + ], + "versions_features_combine_json": [ + [ + "FEATURES_COMBINE_JSON", + "features_combine_json", + "1.7.0" + ] + ] + } + ], + "timestamp": "2026-06-11T18:09:19.997741", + "meta": { + "nf-test": "0.9.4", + "nextflow": "25.10.3" + } + } +} \ No newline at end of file