diff --git a/.github/workflows/branch.yml b/.github/workflows/branch.yml index 433130d8..6adb0fff 100644 --- a/.github/workflows/branch.yml +++ b/.github/workflows/branch.yml @@ -1,46 +1,24 @@ -name: nf-core branch protection -# This workflow is triggered on PRs to `main`/`master` branch on the repository -# It fails when someone tries to make a PR against the nf-core `main`/`master` branch instead of `dev` +name: "Close user-tagged issues and PRs" on: - pull_request_target: - branches: - - main - - master + schedule: + - cron: "0 0 * * 0" # Once a week jobs: - test: + clean-up: runs-on: ubuntu-latest + permissions: + issues: write + pull-requests: write steps: - # PRs to the nf-core repo main/master branch are only ok if coming from the nf-core repo `dev` or any `patch` branches - - name: Check PRs - if: github.repository == 'sanger-tol/curationpretext' - run: | - { [[ ${{github.event.pull_request.head.repo.full_name }} == sanger-tol/curationpretext ]] && [[ $GITHUB_HEAD_REF == "dev" ]]; } || [[ $GITHUB_HEAD_REF == "patch" ]] - - # If the above check failed, post a comment on the PR explaining the failure - # NOTE - this doesn't currently work if the PR is coming from a fork, due to limitations in GitHub actions secrets - - name: Post PR comment - if: failure() - uses: mshick/add-pr-comment@b8f338c590a895d50bcbfa6c5859251edc8952fc # v2 + - uses: actions/stale@5f858e3efba33a5ca4407a664cc011ad407f2008 # v10 with: - message: | - ## This PR is against the `${{github.event.pull_request.base.ref}}` branch :x: - - * Do not close this PR - * Click _Edit_ and change the `base` to `dev` - * This CI test will remain failed until you push a new commit - - --- - - Hi @${{ github.event.pull_request.user.login }}, - - It looks like this pull-request is has been made against the [${{github.event.pull_request.head.repo.full_name }}](https://github.com/${{github.event.pull_request.head.repo.full_name }}) ${{github.event.pull_request.base.ref}} branch. - The ${{github.event.pull_request.base.ref}} branch on nf-core repositories should always contain code from the latest release. - Because of this, PRs to ${{github.event.pull_request.base.ref}} are only allowed if they come from the [${{github.event.pull_request.head.repo.full_name }}](https://github.com/${{github.event.pull_request.head.repo.full_name }}) `dev` branch. - - You do not need to close this PR, you can change the target branch to `dev` by clicking the _"Edit"_ button at the top of this page. - Note that even after this, the test will continue to show as failing until you push a new commit. - - Thanks again for your contribution! - repo-token: ${{ secrets.GITHUB_TOKEN }} - allow-repeats: false + stale-issue-message: "This issue has been tagged as awaiting-changes or awaiting-feedback by an nf-core contributor. Remove stale label or add a comment otherwise this issue will be closed in 20 days." + stale-pr-message: "This PR has been tagged as awaiting-changes or awaiting-feedback by an nf-core contributor. Remove stale label or add a comment if it is still useful." + close-issue-message: "This issue was closed because it has been tagged as awaiting-changes or awaiting-feedback by an nf-core contributor and then staled for 20 days with no activity." + days-before-stale: 30 + days-before-close: 20 + days-before-pr-close: -1 + any-of-labels: "awaiting-changes,awaiting-feedback" + exempt-issue-labels: "WIP" + exempt-pr-labels: "WIP" + repo-token: "${{ secrets.GITHUB_TOKEN }}" diff --git a/.github/workflows/clean-up.yml b/.github/workflows/clean-up.yml index ac030fd5..6adb0fff 100644 --- a/.github/workflows/clean-up.yml +++ b/.github/workflows/clean-up.yml @@ -10,7 +10,7 @@ jobs: issues: write pull-requests: write steps: - - uses: actions/stale@5bef64f19d7facfb25b37b414482c7164d639639 # v9 + - uses: actions/stale@5f858e3efba33a5ca4407a664cc011ad407f2008 # v10 with: stale-issue-message: "This issue has been tagged as awaiting-changes or awaiting-feedback by an nf-core contributor. Remove stale label or add a comment otherwise this issue will be closed in 20 days." stale-pr-message: "This PR has been tagged as awaiting-changes or awaiting-feedback by an nf-core contributor. Remove stale label or add a comment if it is still useful." diff --git a/.github/workflows/download_pipeline.yml b/.github/workflows/download_pipeline.yml index ea526d2d..abd3eb7d 100644 --- a/.github/workflows/download_pipeline.yml +++ b/.github/workflows/download_pipeline.yml @@ -44,9 +44,9 @@ jobs: - name: Disk space cleanup uses: jlumbroso/free-disk-space@54081f138730dfa15788a46383842cd2f914a1be # v1.3.1 - - uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5 + - uses: actions/setup-python@e797f83bcb11b83ae66e0230d6156d7c80228e7c # v6 with: - python-version: "3.13" + python-version: "3.14" architecture: "x64" - name: Setup Apptainer @@ -57,7 +57,7 @@ jobs: - name: Install dependencies run: | python -m pip install --upgrade pip - pip install git+https://github.com/nf-core/tools.git@dev + pip install git+https://github.com/nf-core/tools.git - name: Make a cache directory for the container images run: | @@ -127,7 +127,7 @@ jobs: # fi # - name: Upload Nextflow logfile for debugging purposes - # uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4 + # uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v5 # with: # name: nextflow_logfile.txt # path: .nextflow.log* diff --git a/.github/workflows/fix_linting.yml b/.github/workflows/fix_linting.yml index 1c97b461..48bc7a74 100644 --- a/.github/workflows/fix_linting.yml +++ b/.github/workflows/fix_linting.yml @@ -13,13 +13,13 @@ jobs: runs-on: ubuntu-latest steps: # Use the @nf-core-bot token to check out so we can push later - - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4 + - uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # v5 with: token: ${{ secrets.nf_core_bot_auth_token }} # indication that the linting is being fixed - name: React on comment - uses: peter-evans/create-or-update-comment@71345be0265236311c031f5c7866368bd1eff043 # v4 + uses: peter-evans/create-or-update-comment@e8674b075228eee787fea43ef493e45ece1004c9 # v5 with: comment-id: ${{ github.event.comment.id }} reactions: eyes @@ -32,9 +32,9 @@ jobs: GITHUB_TOKEN: ${{ secrets.nf_core_bot_auth_token }} # Install and run pre-commit - - uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5 + - uses: actions/setup-python@e797f83bcb11b83ae66e0230d6156d7c80228e7c # v6 with: - python-version: "3.13" + python-version: "3.14" - name: Install pre-commit run: pip install pre-commit @@ -47,7 +47,7 @@ jobs: # indication that the linting has finished - name: react if linting finished succesfully if: steps.pre-commit.outcome == 'success' - uses: peter-evans/create-or-update-comment@71345be0265236311c031f5c7866368bd1eff043 # v4 + uses: peter-evans/create-or-update-comment@e8674b075228eee787fea43ef493e45ece1004c9 # v5 with: comment-id: ${{ github.event.comment.id }} reactions: "+1" @@ -67,21 +67,21 @@ jobs: - name: react if linting errors were fixed id: react-if-fixed if: steps.commit-and-push.outcome == 'success' - uses: peter-evans/create-or-update-comment@71345be0265236311c031f5c7866368bd1eff043 # v4 + uses: peter-evans/create-or-update-comment@e8674b075228eee787fea43ef493e45ece1004c9 # v5 with: comment-id: ${{ github.event.comment.id }} reactions: hooray - name: react if linting errors were not fixed if: steps.commit-and-push.outcome == 'failure' - uses: peter-evans/create-or-update-comment@71345be0265236311c031f5c7866368bd1eff043 # v4 + uses: peter-evans/create-or-update-comment@e8674b075228eee787fea43ef493e45ece1004c9 # v5 with: comment-id: ${{ github.event.comment.id }} reactions: confused - name: react if linting errors were not fixed if: steps.commit-and-push.outcome == 'failure' - uses: peter-evans/create-or-update-comment@71345be0265236311c031f5c7866368bd1eff043 # v4 + uses: peter-evans/create-or-update-comment@e8674b075228eee787fea43ef493e45ece1004c9 # v5 with: issue-number: ${{ github.event.issue.number }} body: | diff --git a/.github/workflows/linting.yml b/.github/workflows/linting.yml index 82c4f2f1..e5f73d58 100644 --- a/.github/workflows/linting.yml +++ b/.github/workflows/linting.yml @@ -11,12 +11,12 @@ jobs: pre-commit: runs-on: ubuntu-latest steps: - - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4 + - uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # v5 - - name: Set up Python 3.13 - uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5 + - name: Set up Python 3.14 + uses: actions/setup-python@e797f83bcb11b83ae66e0230d6156d7c80228e7c # v6 with: - python-version: "3.13" + python-version: "3.14" - name: Install pre-commit run: pip install pre-commit @@ -28,14 +28,14 @@ jobs: runs-on: ubuntu-latest steps: - name: Check out pipeline code - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4 + uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # v5 - name: Install Nextflow uses: nf-core/setup-nextflow@v2 - - uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5 + - uses: actions/setup-python@e797f83bcb11b83ae66e0230d6156d7c80228e7c # v6 with: - python-version: "3.13" + python-version: "3.14" architecture: "x64" - name: read .nf-core.yml @@ -71,7 +71,7 @@ jobs: - name: Upload linting log file artifact if: ${{ always() }} - uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4 + uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v5 with: name: linting-logs path: | diff --git a/.github/workflows/linting_comment.yml b/.github/workflows/linting_comment.yml index d43797d9..e6e9bc26 100644 --- a/.github/workflows/linting_comment.yml +++ b/.github/workflows/linting_comment.yml @@ -21,7 +21,7 @@ jobs: run: echo "pr_number=$(cat linting-logs/PR_number.txt)" >> $GITHUB_OUTPUT - name: Post PR comment - uses: marocchino/sticky-pull-request-comment@52423e01640425a022ef5fd42c6fb5f633a02728 # v2 + uses: marocchino/sticky-pull-request-comment@773744901bac0e8cbb5a0dc842800d45e9b2b405 # v2 with: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} number: ${{ steps.pr_number.outputs.pr_number }} diff --git a/.github/workflows/nf-test.yml b/.github/workflows/nf-test.yml index e113a611..429fe19c 100644 --- a/.github/workflows/nf-test.yml +++ b/.github/workflows/nf-test.yml @@ -18,7 +18,7 @@ concurrency: env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - NFT_VER: "0.9.2" + NFT_VER: "0.9.3" NFT_WORKDIR: "~" NXF_ANSI_LOG: false NXF_SINGULARITY_CACHEDIR: ${{ github.workspace }}/.singularity @@ -39,7 +39,7 @@ jobs: rm -rf ./* || true rm -rf ./.??* || true ls -la ./ - - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4 + - uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # v5 with: fetch-depth: 0 @@ -76,14 +76,14 @@ jobs: - isMain: false profile: "singularity" NXF_VER: - - "24.10.5" + - "25.04.0" - "latest-everything" env: NXF_ANSI_LOG: false TOTAL_SHARDS: ${{ needs.nf-test-changes.outputs.total_shards }} steps: - - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4 + - uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # v5 with: fetch-depth: 0 @@ -93,6 +93,7 @@ jobs: continue-on-error: ${{ matrix.NXF_VER == 'latest-everything' }} env: NFT_WORKDIR: ${{ env.NFT_WORKDIR }} + NXF_VERSION: ${{ matrix.NXF_VER }} with: profile: ${{ matrix.profile }} shard: ${{ matrix.shard }} diff --git a/.github/workflows/template-version-comment.yml b/.github/workflows/template-version-comment.yml index beb5c77f..e8560fc7 100644 --- a/.github/workflows/template-version-comment.yml +++ b/.github/workflows/template-version-comment.yml @@ -9,7 +9,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Check out pipeline code - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4 + uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # v5 with: ref: ${{ github.event.pull_request.head.sha }} diff --git a/.gitpod.yml b/.gitpod.yml deleted file mode 100644 index 83599f63..00000000 --- a/.gitpod.yml +++ /dev/null @@ -1,10 +0,0 @@ -image: nfcore/gitpod:latest -tasks: - - name: Update Nextflow and setup pre-commit - command: | - pre-commit install --install-hooks - nextflow self-update - -vscode: - extensions: - - nf-core.nf-core-extensionpack # https://github.com/nf-core/vscode-extensionpack diff --git a/.nf-core.yml b/.nf-core.yml index 62ee0ce7..c51b9619 100644 --- a/.nf-core.yml +++ b/.nf-core.yml @@ -30,7 +30,7 @@ lint: nextflow_config: - manifest.name - manifest.homePage -nf_core_version: 3.3.2 +nf_core_version: 3.5.2 repository_type: pipeline template: author: Damon-Lee B Pointon (@DLBPointon) @@ -48,4 +48,4 @@ template: - seqera_platform - multiqc - rocrate - version: 1.5.1 + version: 1.6.0 diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index bb41beec..d06777a8 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -6,7 +6,7 @@ repos: additional_dependencies: - prettier@3.6.2 - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v5.0.0 + rev: v6.0.0 hooks: - id: trailing-whitespace args: [--markdown-linebreak-ext=md] diff --git a/.prettierignore b/.prettierignore index 2b70c2f0..ebc5c7e1 100644 --- a/.prettierignore +++ b/.prettierignore @@ -10,3 +10,6 @@ testing* *.pyc bin/ CITATION.cff +.nf-test/ +modules/nf-core/ +subworkflows/nf-core/ diff --git a/CHANGELOG.md b/CHANGELOG.md index 2647dee3..ac220849 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,7 +3,69 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). -## [[1.5.1]](https://github.com/sanger-tol/curationpretext/releases/tag/1.5.1)] - UNSC Punic (H1) - [2025-10-01] +## [[1.6.0](https://github.com/sanger-tol/curationpretext/releases/tag/1.6.0)] - UNSC Trafalgar - [2025-02-19] + +## Added and Fixed + +- Template update to 3.5.2. +- - The previous `GENERATE_MAPS` subworkflow has been replaced with `ALIGN_CRAM` and `CREATE_MAPS_{STDRD,HIRES}` (renamed from `CRAM_MAP_ILLUMINA_HIC` and `PAIRS_CREATE_CONTACT_MAPS`, from the [`sanger-tol/nf-core-modules`](https://github.com/sanger-tol/nf-core-modules) repository, respectively) +- Files can now be given explicitly in the `--reads` parameter in the format of `[, , ...]`, alternatively it can accept a FOFN (File of file names). +- Files can now be given explicitly in the `--cram` parameter in the format of `[, , ...]`, alternatively it can accept a FOFN (File of file names). +- `--pre_mapped_bam` parameter added in order to supply 1 pre-mapped BAM file, in this case `--cram` would be empty. + - Warnings have been added to ensure: + - Only 1 pre-mapped BAM file is provided if `--pre_mapped_bam` is used. + - Only 1 of `--pre_mapped_bam` or `--cram` is used` +- `--cram_chunk_size` parameter added by `ALIGN_CRAM` to make cram chunking configurable, defaulting to 10000. +- `LONGREAD_COVERAGE` subworkflow has been updated to accept an array list of files. +- Major Update to modules coinciding with changes to use Nextflow topics +- Update to move all modules/subworkflows to version topics. + - Required a small change to the template topic collection otherwise it would fail as there is no ch_versions channel. +- Update docs to include the features from the past few releases. +- Remove duplicated `selected_aligner` code from `PIPELINE_INITIALISATION`. +- Change install for `TELOMERE` modules so that we use the `SANGER-TOL` repository rather than local. +- Removed now unused `bin` files. +- Migrated from `local/telo_finder` subworkflow to `sanger-tol/telo_finder`. +- Migrated from `local/gap_finder` subworkflow to `sanger-tol/gap_finder`. +- Updated the schema to include patterns for the correct input file and to also allow fastq for reads along with fasta. + +### Parameters + +| Old Version | New Versions | +| ----------- | ----------------- | +| NA | --pre_mapped | +| NA | --cram_chunk_size | + +### Software Dependencies + +Note, since the pipeline is using Nextflow DSL2, each process will be run with its own Biocontainer. This means that on occasion it is entirely possible for the pipeline to be using different versions of the same tool. However, the overall software dependency changes compared to the last release have been listed below for reference. + +| Module | Old Version | New Versions | +| ------------------------------ | ---------------- | -------------------------------------------------------------- | +| `BEDTOOLS_BAMTOBED` | 2.30.0 | 2.31.1 | +| `BEDTOOLS_GENOMECOV` | 2.30.0 | 2.31.1 | +| `BEDTOOLS_INTERSECT` | 2.30.0 | 2.31.1 | +| `BEDTOOLS_MAKEWINDOWS` | 2.30.0 | 2.31.1 | +| `BEDTOOLS_MAP` | 2.30.0 | 2.31.1 | +| `CRAMALIGN_BWAMEM2ALIGNHIC` | NEW_ADDITION | bwamem2: 2.2.1, samtools: 1.22.1 | +| `GAWK` | 5.2.0 | 5.3.1 | +| `GNU_SORT` | 9.1 | 9.5 | +| `MINIMAP2_ALIGN` | 2.28--he4a0461_0 | 2.29-r1283 | +| `PRETEXTMAP` | 0.1.9 | 0.1.9 (Temporary Patch, to be updated to 0.2.4 once available) | +| `SAMTOOLS_FAIDX` | 1.21.2 | 1.22.1 | +| `SAMTOOLS_MERGE` | 1.21.2 | 1.22.1 | +| `SAMTOOLS_SORT` | 1.21.2 | 1.22.1 | +| `SAMTOOLS_SPLITHEADER` | 1.21.2 | 1.22.1 | +| `SAMTOOLS_VIEW_FILTER_PRIMARY` | 1.21.2 | 1.22.1 | +| `SAMTOOLS_MERGEDUP` | NEW_ADDITION | 1.23.0 | +| `FIND_TELOMERE_WINDOWS` | 1.0.0 | REMOVED | +| `TELOMERE_WINDOWS` | NEW_ADDITION | 1.0.0 | +| `FIND_TELOMERE_REGIONS` | 1.0.0 | REMOVED | +| `TELOMERE_REGIONS` | NEW_ADDITION | 1.0.0 | +| `EXTRACT_TELOMERE` | 1.0.0 | REMOVED | +| `TELOMERE_EXTRACT` | NEW_ADDITION | 1.0.0 | +| `UCSC_BEDGRAPHTOBIGWIG` | 447 | 482 | + +## [[1.5.1](https://github.com/sanger-tol/curationpretext/releases/tag/1.5.1)] - UNSC Punic (H1) - [2025-10-01] ### Added and Fixed diff --git a/README.md b/README.md index 7d9a3b3e..cd38ec33 100644 --- a/README.md +++ b/README.md @@ -4,8 +4,8 @@ [![GitHub Actions Linting Status](https://github.com/sanger-tol/curationpretext/actions/workflows/linting.yml/badge.svg)](https://github.com/sanger-tol/curationpretext/actions/workflows/linting.yml)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.12773958-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.12773958) [![nf-test](https://img.shields.io/badge/unit_tests-nf--test-337ab7.svg)](https://www.nf-test.com) -[![Nextflow](https://img.shields.io/badge/version-%E2%89%A524.10.5-green?style=flat&logo=nextflow&logoColor=white&color=%230DC09D&link=https%3A%2F%2Fnextflow.io)](https://www.nextflow.io/) -[![nf-core template version](https://img.shields.io/badge/nf--core_template-3.3.2-green?style=flat&logo=nfcore&logoColor=white&color=%2324B064&link=https%3A%2F%2Fnf-co.re)](https://github.com/nf-core/tools/releases/tag/3.3.2) +[![Nextflow](https://img.shields.io/badge/version-%E2%89%A525.04.0-green?style=flat&logo=nextflow&logoColor=white&color=%230DC09D&link=https%3A%2F%2Fnextflow.io)](https://www.nextflow.io/) +[![nf-core template version](https://img.shields.io/badge/nf--core_template-3.5.2-green?style=flat&logo=nfcore&logoColor=white&color=%2324B064&link=https%3A%2F%2Fnf-co.re)](https://github.com/nf-core/tools/releases/tag/3.5.2) [![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/) [![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000&logo=docker)](https://www.docker.com/) [![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/) @@ -17,11 +17,15 @@ This is intended as a supplementary pipeline for the [treeval](https://github.com/sanger-tol/treeval) project. This pipeline can be simply used to generate pretext maps, information on how to run this pipeline can be found in the [usage documentation](https://pipelines.tol.sanger.ac.uk/curationpretext/usage). -![Workflow Diagram](./docs/images/CurationPretext_1_3_0.png) +![Workflow Diagram](./docs/images/CurationPretext-1.6.0.jpeg) -1. Generate Maps - Generates pretext maps as well as a static image. +The above image shows the use of this pipeline inside of the manual curation process and follows the below major steps. -2. Accessory files - Generates the repeat density, gap, telomere, and coverage tracks. +1. CRAM_MAP_ILLUMINA_HIC (ALIGN_CRAM) + PAIRS_CREATE_CONTACT_MAPS (CREATE_MAPS) - Generates pretext maps as well as a static image. + +2. ACCESSORY_FILES - Generates the repeat density, gap, telomere, and coverage tracks. + +3. PRETEXT_INGEST - Imports the generated tracks into pretext for visualisation. ## Usage @@ -44,7 +48,7 @@ Currently, the pipeline uses the following flags: - The type of longread data you are utilising, e.g., ont, illumina, hifi. - `--aligner` - - The aligner yopu wish to use for the coverage generation, defaults to bwamem2 but minimap2 is also supported. + - The aligner you wish to use for the coverage generation, defaults to `AUTO` but options include `bwamem2` and `minimap2`. - `--cram` - The directory of the cram _and_ cram.crai files, e.g., `/path/to/cram/` @@ -61,6 +65,18 @@ Currently, the pipeline uses the following flags: - `--all_output` - An option to output all maps + accessory files, the default will only output the pretextmaps where ingestion has occured. +- `--skip_tracks` + - A csv list of accessory tracks to skip, options are: `ALL`, `gap`, `coverage`, `telo`, `repeats`, `NONE`. Default is `NONE`. Please note that capitalization matters. + +- `--split_telomere` + - A boolean to also generate the telomere track in 5Prime and 3Prime styles, this is also include the original telomere track. + +- `--pre_mapped_bam` + - A boolean option to use `--cram` as input for _A_ pre-mapped bam file. + +- `--cram_chunk_size` + - The number of records in a cram file should be chunked into, defaults to 10000. + Now, you can run the pipeline using: ```bash @@ -72,7 +88,7 @@ nextflow run sanger-tol/curationpretext \ --sample { default is "pretext_rerun" } \ --teloseq { default is "TTAGGG" } \ --map_order { default is "unsorted" } \ - --multi_mapping { default is "0" (for no mapping)} \ + --multi_mapping { default is "0" (for no filtering of multi-mapping reads)} \ --all_output \ --outdir { OUTDIR } \ -profile diff --git a/bin/awk_filter_reads.sh b/bin/awk_filter_reads.sh deleted file mode 100755 index d50aa9ec..00000000 --- a/bin/awk_filter_reads.sh +++ /dev/null @@ -1 +0,0 @@ -awk 'BEGIN{OFS="\t"}{if($1 ~ /^\@/) {print($0)} else {$2=and($2,compl(2048)); print(substr($0,2))}}' diff --git a/bin/gawk_split_directions.awk b/bin/gawk_split_directions.awk deleted file mode 100644 index df82aa10..00000000 --- a/bin/gawk_split_directions.awk +++ /dev/null @@ -1,8 +0,0 @@ -## Split telomere file based on column 4 contents -## Date: 03/07/2025 - -BEGIN { - FS="\t"; OFS="\t" -} { - print > "direction."$3".telomere" -} diff --git a/bin/generate_cram_csv.sh b/bin/generate_cram_csv.sh deleted file mode 100755 index 5e7a4822..00000000 --- a/bin/generate_cram_csv.sh +++ /dev/null @@ -1,101 +0,0 @@ -#!/bin/bash - -set -euo pipefail - -# generate_cram_csv.sh -# ------------------- -# Generate a csv file describing the CRAM folder -# ><((((°> Y ><((((°> U ><((((°> M ><((((°> I ><((((°> -# Author = yy5 -# ><((((°> Y ><((((°> U ><((((°> M ><((((°> I ><((((°> - -# Function to process chunking of a CRAM file -chunk_cram() { - local cram="$1" - local chunkn="$2" - local outcsv="$3" - local realcram - local realcrai - realcram=$(readlink -f "$cram") - realcrai=$(readlink -f "${cram}.crai") - - if [ ! -f "$realcrai" ]; then - echo "Error: $realcrai does not exist" >&2 - exit 1 - fi - - local rgline - rgline=$(samtools view -H "$realcram" | grep "@RG" | sed 's/\t/\\t/g' | tr -d "',") - local ncontainers - ncontainers=$(zcat "$realcrai" | wc -l) - local base - base=$(basename "$realcram" .cram) - local from=0 - local to=10000 - - while [ "$to" -lt "$ncontainers" ]; do - echo "${realcram},${realcrai},${from},${to},${base},${chunkn},${rgline}" >> "$outcsv" - from=$((to + 1)) - to=$((to + 10000)) - chunkn=$((chunkn + 1)) - done - - if [ "$from" -le "$ncontainers" ]; then - echo "${realcram},${realcrai},${from},${ncontainers},${base},${chunkn},${rgline}" >> "$outcsv" - chunkn=$((chunkn + 1)) - fi - - echo "$chunkn" -} - -# Function to process a CRAM file -process_cram_file() { - local cram="$1" - local chunkn="$2" - local outcsv="$3" - - local read_groups - read_groups=$(samtools samples -T ID "$cram" | cut -f1) - local num_read_groups - num_read_groups=$(echo "$read_groups" | wc -w) - - echo "READ GROUPS FOUND IN $cram :$: $num_read_groups" >&2 - echo "READ GROUPS FOUND :$: $read_groups" >&2 - - if [ "$num_read_groups" -gt 1 ]; then - for rg in $read_groups; do - echo "SPLITTING OUT READ GROUP $rg" >&2 - local output_cram - output_cram="$(basename "${cram%.cram}")_output_${rg}.cram" - samtools view -h -r "$rg" -o "$output_cram" "$cram" - samtools index "$output_cram" - chunkn=$(chunk_cram "$output_cram" "$chunkn" "$outcsv") - done - else - chunkn=$(chunk_cram "$cram" "$chunkn" "$outcsv") - fi - - echo "DATA :$: $chunkn" >&2 - echo "$chunkn" -} - -# Main script to generate a CSV file describing the CRAM folder -# /\_/\ /\_/\ -# ( o.o ) main ( o.o ) -# > ^ < > ^ < - -# Check if cram_path is provided -if [ -z "${1:-}" ]; then - echo "Usage: $0 " >&2 - exit 1 -fi - -cram_path="$1" -chunkn=0 -outcsv="${2:-output.csv}" - -# Loop through each CRAM file in the specified directory. cram cannot be the symlinked cram -for cram in "${cram_path}"/*.cram; do - realcram=$(readlink -f "$cram") - chunkn=$(process_cram_file "$realcram" "$chunkn" "$outcsv") -done diff --git a/bin/grep_pg.sh b/bin/grep_pg.sh deleted file mode 100755 index 680b5ec2..00000000 --- a/bin/grep_pg.sh +++ /dev/null @@ -1,10 +0,0 @@ -#!/bin/bash - -# grep_pg.sh -# ------------------- -# A shell script to exclude pg lines and label read 1 and read 2 from cram containers -# -# ------------------- -# Author = yy5 - -grep -v "^\@PG" | awk '{if($1 ~ /^\@/) {print($0)} else {if(and($2,64)>0) {print(1$0)} else {print(2$0)}}}' diff --git a/conf/base.config b/conf/base.config index 9ca32fe1..dca4223c 100644 --- a/conf/base.config +++ b/conf/base.config @@ -14,14 +14,20 @@ process { time = { 4.h * task.attempt } errorStrategy = { task.exitStatus in ((130..145) + 104 + 175) ? 'retry' : 'finish' } - maxRetries = 1 + maxRetries = 2 maxErrors = '-1' // IN CASES WHERE THERE IS ONE HIC FILE THIS WILL NEED ALMOST NOTHING - withName:SAMTOOLS_MERGE { + withName: SAMTOOLS_MERGE { cpus = { 16 } memory = { 50.GB * task.attempt } - time = { 30.h * task.attempt } + time = { 20.h * task.attempt } + } + + withName: SAMTOOLS_MERGEDUP { + cpus = { 6 } + memory = { 10.GB * task.attempt } + time = { 20.h * task.attempt } } withName: '.*:.*:LONGREAD_COVERAGE:(MINIMAP2_ALIGN|MINIMAP2_ALIGN_SPLIT)' { @@ -39,12 +45,12 @@ process { time = { 1.h * ( reference.size() < 1e9 ? 10 : reference.size() < 10e9 ? 30 : 48) } } - withName: CRAM_FILTER_ALIGN_BWAMEM2_FIXMATE_SORT { + withName: ".*:ALIGN_CRAM:CRAMALIGN_BWAMEM2ALIGNHIC" { cpus = { 16 * 1 } memory = { 1.GB * ( reference.size() < 2e9 ? 80 : Math.ceil( ( reference.size() / 1e+9 ) * 30 ) * Math.ceil( task.attempt * 1 ) ) } } - withName: CRAM_FILTER_MINIMAP2_FILTER5END_FIXMATE_SORT { + withName: ".*:ALIGN_CRAM:CRAMALIGN_MINIMAP2ALIGNHIC" { cpus = { 16 * 1 } memory = { 1.GB * ( @@ -62,23 +68,28 @@ process { memory = { 128.MB * Math.ceil( task.attempt * 1.5 ) } } - withName: PRETEXTMAP_STANDRD{ + withName: ".*:CREATE_MAPS_STDRD:PRETEXTMAP" { cpus = { 8 * 1 } memory = { 3.GB * task.attempt } time = { 1.h * ( ( fasta.size() < 4e9 ? 24 : 48 ) * task.attempt ) } } - withName: PRETEXTMAP_HIGHRES { - cpus = { 6 * task.attempt } + withName: ".*:CREATE_MAPS_HIRES:PRETEXTMAP" { + cpus = { 8 * task.attempt } memory = { 20.GB * Math.ceil( task.attempt * 2.6 ) } time = { 1.h * ( ( fasta.size() < 4e9 ? 24 : 48 ) * Math.ceil( task.attempt * 1 ) ) } } - withName: SNAPSHOT_SRES { + withName: '.*:CREATE_MAPS_STDRD:PRETEXTSNAPSHOT' { cpus = { 1 * task.attempt } memory = { 1.GB * task.attempt } } + withName: '.*:CREATE_MAPS_HIRES:PRETEXTSNAPSHOT' { + cpus = { 1 * task.attempt } + memory = { 4.GB * task.attempt } + } + withName: BWAMEM2_INDEX { memory = { 1.GB * Math.ceil( 28 * fasta.size() / 1e+9 ) * task.attempt } } @@ -88,20 +99,11 @@ process { memory = { 1.GB * Math.ceil( 4 * fasta.size() / 1e+9 ) * task.attempt } } - withName: GAP_LENGTH { + withName: ".*:GAP_FINDER:GAWK_GAP_LENGTH" { cpus = { 1 } memory = { 100.MB * task.attempt } } - // Parity with TreeVal - // Module needs what seems like alot of memory incase the cram file contains multiple - // read groups, in this case it needs to split the CRAM - withName: CRAM_GENERATE_CSV { - cpus = { 6 } - memory = { 30.GB * task.attempt } - time = { 10.h * task.attempt } - } - withName: GNU_SORT { memory = { 1.GB * task.attempt } } @@ -111,6 +113,10 @@ process { time = { 20.h * task.attempt } } + withName: TELOMERE_WINDOWS { + memory = { 4.GB * task.attempt } + } + // Process-specific resource requirements // NOTE - Please try and reuse the labels below as much as possible. // These labels are used and recognised by default in DSL2 files hosted on nf-core/modules. diff --git a/conf/long_minimap.config b/conf/long_minimap.config new file mode 100644 index 00000000..0c718610 --- /dev/null +++ b/conf/long_minimap.config @@ -0,0 +1,16 @@ +process { + withName: '.*:.*:LONGREAD_COVERAGE:(MINIMAP2_ALIGN|MINIMAP2_ALIGN_SPLIT)' { + cpus = { 20 * 1 } + memory = { + 1.GB * ( + reference.size() < 2e9 ? 30 : + (reference.size() < 5e9 ? 40 : + (reference.size() < 10e9 ? 60 : + Math.ceil((reference.size() / 1e9) * 3) + ) + ) + ) * Math.ceil(task.attempt * 1) + } + time = { 96.h * Math.ceil( task.attempt * 1 ) } + } +} diff --git a/conf/modules.config b/conf/modules.config index ded4197d..cfe7ae5f 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -32,30 +32,46 @@ process { ] } - withName: 'SNAPSHOT_SRES' { + withName: "PRETEXTSNAPSHOT" { publishDir = [ - path: { "${params.outdir}/pretext_snapshot" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + [ + path: { "${params.outdir}/pretext_snapshot" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + ] ] + ext.args = { "--sequences '=full' --resolution 1440 --memory ${task.memory}" } + ext.prefix = { "${meta.id}_normal" } } - withName: "PRETEXTMAP_HIGHRES|PRETEXTMAP_STANDRD" { + withName: "PRETEXTMAP" { publishDir = [ - path: { "${params.outdir}/pretext_maps_raw" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, - enabled: params.all_output + [ + path: { "${params.outdir}/pretext_maps_raw" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + enabled: params.all_output + ] ] } + withName: ".*:REPEAT_DENSITY:UCSC_BEDGRAPHTOBIGWIG" { + ext.prefix = { "${meta.id}_repeat_density" } + } + + withName: ".*:LONGREAD_COVERAGE:UCSC_BEDGRAPHTOBIGWIG" { + ext.prefix = { "${meta.id}_longread_coverage" } + } + // BOTH COVERAGE AND REPEATS ARE CONTROLLED BY UCSC_BEDGRAPH TOGBIGWIG - withName: 'UCSC_BEDGRAPHTOBIGWIG|GAWK_GAP_LENGTH|EXTRACT_TELOMERE' { + withName: "UCSC_BEDGRAPHTOBIGWIG|.*:GAP_FINDER:GAWK_GAP_LENGTH|TELOMERE_EXTRACT" { publishDir = [ - path: { "${params.outdir}/accessory_files" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, - enabled: params.all_output + [ + path: { "${params.outdir}/accessory_files" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + enabled: params.all_output + ] ] } @@ -77,8 +93,7 @@ process { ext.suffix = 'bed' } - withName: 'GAWK_GAP_LENGTH' { - ext.args2 = "'BEGIN { OFS = \"\\t\" }{print \$0, sqrt((\$3-\$2)*(\$3-\$2))}'" + withName: ".*:GAP_FINDER:GAWK_GAP_LENGTH" { ext.suffix = 'gap.bedgraph' } @@ -94,7 +109,7 @@ process { ext.suffix = 'fasta' } - withName: 'GAWK_SPLIT_DIRECTIONS' { + withName: ".*:TELO_FINDER:GAWK_SPLIT_TELOMERE" { ext.prefix = { "${input}_telo" } ext.suffix = 'telomere' } @@ -155,12 +170,11 @@ process { ext.prefix = { "${meta.id}_hr" } } - withName: 'SNAPSHOT_SRES' { - ext.args = "--sequences '=full' --resolution 1440" - ext.prefix = { "${meta.id}_normal" } + withName: "PRETEXTSNAPSHOT" { + ext.args = { "--sequences '=full' --resolution 1440 --memory ${task.memory}" } + ext.prefix = { "${meta.id}_normal" } } - // // NOTE: ACCESSORY_FILES -> LONGREAD_COVERAGE // @@ -186,7 +200,7 @@ process { ext.prefix = { "${meta.id}_alignment_${reference.getName().tokenize('.')[0]}" } } - withName: '.*:.*:.*:LONGREAD_COVERAGE:SAMTOOLS_MERGE' { + withName: '.*:LONGREAD_COVERAGE:SAMTOOLS_MERGE' { ext.prefix = { "${meta.id}_merge" } } @@ -210,43 +224,34 @@ process { ext.prefix = { "${meta.id}_sorted" } } - withName: '.*:LONGREAD_COVERAGE:UCSC_BEDGRAPHTOBIGWIG' { - ext.prefix = 'coverage' - } - - withName: "FIND_TELOMERE_REGIONS" { - ext.find_telomere = "find_telomere" - } - - withName: "FIND_TELOMERE_WINDOWS" { - ext.telomere_jar = "telomere.jar" - ext.telomere_jvm_params = "-Xms1g -Xmx1g" + withName: "TELOMERE_WINDOWS" { + ext.args = "99.9" } - withName: "PRETEXTMAP_STANDRD" { - ext.args = { "${meta.map_order.equals("length") ? "--sortby length": "--sortby nosort" } --mapq ${meta2.multi_mapping}" } + withName: ".*:CREATE_MAPS_STDRD:PRETEXTMAP" { + ext.args = { "${meta.map_order.equals("length") ? "--sortby length": "--sortby nosort" } --mapq ${params.multi_mapping}" } ext.prefix = { "${meta.id}_normal_pi" } } - withName: "PRETEXTMAP_HIGHRES" { - ext.args = { "${meta.map_order.equals("length") ? "--sortby length": "--sortby nosort" } --highRes --mapq ${meta2.multi_mapping}" } + withName: ".*:CREATE_MAPS_HIRES:PRETEXTMAP" { + ext.args = { "${meta.map_order.equals("length") ? "--sortby length": "--sortby nosort" } --highRes --mapq ${params.multi_mapping}" } ext.prefix = { "${meta.id}_hr_pi" } } - withName: ".*:GENERATE_MAPS:HIC_BWAMEM2:CRAM_FILTER_ALIGN_BWAMEM2_FIXMATE_SORT" { + withName: ".*:ALIGN_CRAM:CRAMALIGN_BWAMEM2ALIGNHIC" { ext.args = "" - ext.args1 = "-F0xB00 -nt" - ext.args2 = { "-5SPCp -H'${rglines}'" } - ext.args3 = "-mpu" - ext.args4 = { "--write-index -l1" } + ext.args2 = "-F0xB00 -nt" + ext.args3 = { "-5SPCp" } + ext.args4 = "-mpu" + ext.args6 = { "--write-index -l1" } } - withName: ".*:GENERATE_MAPS:HIC_MINIMAP2:CRAM_FILTER_MINIMAP2_FILTER5END_FIXMATE_SORT" { + withName: ".*:ALIGN_CRAM:CRAMALIGN_MINIMAP2ALIGNHIC" { ext.args = "" - ext.args1 = "" - ext.args2 = { "-ax sr" } - ext.args3 = "-mpu" - ext.args4 = { "--write-index -l1" } + ext.args2 = "" + ext.args3 = { "-ax sr" } + ext.args4 = "-mpu" + ext.args6 = { "--write-index -l1" } } } diff --git a/conf/test.config b/conf/test.config index f98582dd..2013e915 100644 --- a/conf/test.config +++ b/conf/test.config @@ -22,11 +22,12 @@ params { config_profile_name = 'Full test profile' config_profile_description = 'Full test dataset to check pipeline function' - input = "${baseDir}/TreeValTinyData/assembly/draft/grTriPseu1.fa" - reads = "${baseDir}/TreeValTinyData/genomic_data/pacbio/" - cram = "${baseDir}/TreeValTinyData/genomic_data/hic-arima/" - sample = "CurationPretextTest" - teloseq = "TTAGGG" + input = 'https://tolit.cog.sanger.ac.uk/test-data/resources/treeval/TreeValTinyData/assembly/draft/grTriPseu1.fa' + reads = ['https://tolit.cog.sanger.ac.uk/test-data/resources/treeval/TreeValTinyData/genomic_data/pacbio/seqkitPacbio50000.fasta.gz'] + cram = ['https://tolit.cog.sanger.ac.uk/test-data/resources/treeval/TreeValTinyData/genomic_data/hic-arima/SUBSET-1000.cram', 'https://tolit.cog.sanger.ac.uk/test-data/resources/treeval/TreeValTinyData/genomic_data/hic-arima/SUBSET-2000.cram'] + pre_mapped_bam = [] + sample = "CurationPretextTest_MIN" + teloseq = "TTA" aligner = "bwamem2" all_output = false skip_tracks = "NONE" diff --git a/conf/test_full.config b/conf/test_full.config index 3166bfd1..37179351 100644 --- a/conf/test_full.config +++ b/conf/test_full.config @@ -20,15 +20,15 @@ params { // Input data for full size test // Limit resources so that this can run on GitHub Actions - sample = "testing" - input = "/nfs/treeoflife-01/resources/nextflow/test-data/resources/treeval/TreeValTinyData/assembly/draft/grTriPseu1.fa" - reads = "/nfs/treeoflife-01/resources/nextflow/test-data/resources/treeval/TreeValTinyData/genomic_data/pacbio/" - cram = "/nfs/treeoflife-01/resources/nextflow/test-data/resources/treeval/TreeValTinyData/genomic_data/hic-arima/" - sample = "CurationPretextTest" - teloseq = "TTAGGG" + input = 'https://tolit.cog.sanger.ac.uk/test-data/resources/treeval/TreeValTinyData/assembly/draft/grTriPseu1.fa' + reads = ['https://tolit.cog.sanger.ac.uk/test-data/resources/treeval/TreeValTinyData/genomic_data/pacbio/seqkitPacbio50000.fasta.gz'] + cram = ['https://tolit.cog.sanger.ac.uk/test-data/resources/treeval/TreeValTinyData/genomic_data/hic-arima/SUBSET-1000.cram', 'https://tolit.cog.sanger.ac.uk/test-data/resources/treeval/TreeValTinyData/genomic_data/hic-arima/SUBSET-2000.cram'] + sample = "CurationPretextTest_FULL" + teloseq = "TTA" aligner = "bwamem2" all_output = true skip_tracks = "NONE" + run_hires = true split_telomere = true } diff --git a/conf/very_large_resources.config b/conf/very_large_resources.config new file mode 100644 index 00000000..dd557ce9 --- /dev/null +++ b/conf/very_large_resources.config @@ -0,0 +1,76 @@ +process { + cpus = { 1 * task.attempt } + memory = { 12.GB * task.attempt } + time = { 8.h * task.attempt } + + errorStrategy = { task.exitStatus in ((130..145) + 104 + 175) ? 'retry' : 'finish' } + maxRetries = 1 + maxErrors = '-1' + + // IN CASES WHERE THERE IS ONE HIC FILE THIS WILL NEED ALMOST NOTHING + withName:SAMTOOLS_MERGE { + cpus = { 16 } + memory = { 100.GB * task.attempt } + time = { 30.h * task.attempt } + } + + withName: '.*:.*:LONGREAD_COVERAGE:(MINIMAP2_ALIGN|MINIMAP2_ALIGN_SPLIT)' { + cpus = { 20 * 1 } + memory = { + 4.GB * ( + reference.size() < 2e9 ? 30 : + (reference.size() < 5e9 ? 40 : + (reference.size() < 10e9 ? 60 : + Math.ceil((reference.size() / 1e9) * 5) + ) + ) + ) * Math.ceil(task.attempt * 1) + } + time = { 40.h * Math.ceil(task.attempt * 1) } + } + + + withName: PRETEXT_GRAPH { + memory = { 128.MB * Math.ceil( task.attempt * 1.5 ) } + } + + withName: ".*:CREATE_MAPS_STDRD:PRETEXTMAP" { + cpus = { 8 * 1 } + memory = { 3.GB * task.attempt } + time = { 1.h * ( ( fasta.size() < 4e9 ? 24 : 48 ) * task.attempt ) } + } + + withName: ".*:CREATE_MAPS_HIRES:PRETEXTMAP" { + cpus = { 8 * task.attempt } + memory = { 60.GB * Math.ceil( task.attempt * 2.6 ) } + time = { 1.h * ( ( fasta.size() < 4e9 ? 24 : 48 ) * Math.ceil( task.attempt * 1 ) ) } + } + + withName: '.*:CREATE_MAPS_STDRD:PRETEXTSNAPSHOT' { + cpus = { 1 * task.attempt } + memory = { 1.GB * task.attempt } + } + + withName: BWAMEM2_INDEX { + memory = { 4.GB * Math.ceil( 28 * fasta.size() / 1e+9 ) * task.attempt } + } + + withName: MINIMAP2_INDEX { + cpus = { 2 * task.attempt } + memory = { 4.GB * Math.ceil( 4 * fasta.size() / 1e+9 ) * task.attempt } + } + + withName: GAP_LENGTH { + cpus = { 1 } + memory = { 1.GB * task.attempt } + } + + withName: GNU_SORT { + memory = { 1.GB * task.attempt } + } + + withName: BEDTOOLS_INTERSECT { + memory = { 40.GB * task.attempt } + time = { 20.h * task.attempt } + } +} diff --git a/docs/images/CurationPretext-1.6.0.jpeg b/docs/images/CurationPretext-1.6.0.jpeg new file mode 100644 index 00000000..2af082b2 Binary files /dev/null and b/docs/images/CurationPretext-1.6.0.jpeg differ diff --git a/docs/images/CurationPretext_1_3_0.png b/docs/images/CurationPretext_1_3_0.png deleted file mode 100644 index 55ce4a0d..00000000 Binary files a/docs/images/CurationPretext_1_3_0.png and /dev/null differ diff --git a/docs/images/gap_finder_1_3_0.png b/docs/images/gap_finder_1_3_0.png deleted file mode 100644 index 0d11c43c..00000000 Binary files a/docs/images/gap_finder_1_3_0.png and /dev/null differ diff --git a/docs/images/hic_bwamem2_1_3_0.png b/docs/images/hic_bwamem2_1_3_0.png deleted file mode 100644 index fbe5f828..00000000 Binary files a/docs/images/hic_bwamem2_1_3_0.png and /dev/null differ diff --git a/docs/images/hic_minimap2_1_3_0.png b/docs/images/hic_minimap2_1_3_0.png deleted file mode 100644 index 98acefc6..00000000 Binary files a/docs/images/hic_minimap2_1_3_0.png and /dev/null differ diff --git a/docs/images/longread_coverage_1_3_0.png b/docs/images/longread_coverage_1_3_0.png deleted file mode 100644 index a7870f68..00000000 Binary files a/docs/images/longread_coverage_1_3_0.png and /dev/null differ diff --git a/docs/images/repeat_density_1_3_0.png b/docs/images/repeat_density_1_3_0.png deleted file mode 100644 index d841ac9a..00000000 Binary files a/docs/images/repeat_density_1_3_0.png and /dev/null differ diff --git a/docs/images/telo_finder_1_3_0.png b/docs/images/telo_finder_1_3_0.png deleted file mode 100644 index 7be0a27f..00000000 Binary files a/docs/images/telo_finder_1_3_0.png and /dev/null differ diff --git a/docs/usage.md b/docs/usage.md index 6bfa6757..35c265a7 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -265,7 +265,7 @@ If `-profile` is not specified, the pipeline will run locally and expect all sof - `shifter` - A generic configuration profile to be used with [Shifter](https://nersc.gitlab.io/development/shifter/how-to-use/) - `charliecloud` - - A generic configuration profile to be used with [Charliecloud](https://hpc.github.io/charliecloud/) + - A generic configuration profile to be used with [Charliecloud](https://charliecloud.io/) - `apptainer` - A generic configuration profile to be used with [Apptainer](https://apptainer.org/) - `wave` diff --git a/functions/local/utils.nf b/functions/local/utils.nf new file mode 100644 index 00000000..7482f26c --- /dev/null +++ b/functions/local/utils.nf @@ -0,0 +1,64 @@ +def fn_get_validated_channel (data_type, tolid_meta, files_list) { + // Based on the the functions added in TreeVal - commit: 61f4ad9 + // Edited to be a function working on the raw yaml data + // rather than channels as it was previously + + // Initialise defaults + def fofn_files = [] + def direct_files = [] + + // Process each file - separate FOFN from direct files + files_list.each { file_path -> + if (file_path.toString().contains(".fofn")) { + def fofn_content = file(file_path).text.split('\n') + .collect { data -> data.trim() } + .findAll { file -> file } // Remove empty lines + fofn_files.addAll(fofn_content) + } else { + direct_files.add(file_path) + } + } + + // Combine all files + def all_files = direct_files + fofn_files + + // Validate files based on data type + if (data_type == "cram") { + def invalid_files = all_files.findAll { cram -> + !cram.toString().contains(".cram") && + !cram.toString().contains(".bam") + } + if (invalid_files.size() > 0) { + error "[Error] One of the input hic files does not match cram format. Invalid files: ${invalid_files}" + } + } else if (data_type == "longread") { + def invalid_files = all_files.findAll { reads -> + !reads.toString().contains(".fasta.gz") && + !reads.toString().contains(".fa.gz") && + !reads.toString().contains(".fn.gz") + } + if (invalid_files.size() > 0) { + error "[Error] One of the input longread files does not match expected formats (fn.gz, fa.gz, fasta.gz). Invalid files: ${invalid_files}" + } + } + + // get lengths of the total list of files and unique(list of files) + // a difference in these numbers mean there is a duplicate + def raw_list = all_files.size() + def unique_list = all_files.toSet().size() + + // This may not bring the error to the surface, check the .nextflow.log for details + if (raw_list != unique_list) { + error "[Treeval: Error] There is a duplicate value in your ${data_type} list, check your inputs! Found ${raw_list} total items but only ${unique_list} unique items." + } + + // Create the resolved channel tuple + def resolved_channel = channel.of( + [ + tolid_meta, + all_files.collect { new_file -> file(new_file, checkIfExists: true) } + ] + ) + + return resolved_channel +} diff --git a/main.nf b/main.nf index dc8eb8ab..4dd04dc6 100644 --- a/main.nf +++ b/main.nf @@ -33,7 +33,14 @@ workflow SANGER_TOL_CURATIONPRETEXT { input_fasta reads cram + mapped teloseq + input_file_string + aligner + skip_tracks + run_hires + split_telomere + cram_chunk_size main: @@ -41,7 +48,14 @@ workflow SANGER_TOL_CURATIONPRETEXT { input_fasta, reads, cram, - teloseq + mapped, + teloseq, + input_file_string, + aligner, + skip_tracks, + run_hires, + split_telomere, + cram_chunk_size ) // CURATIONPRETEXT_MAPS } @@ -63,7 +77,10 @@ workflow { params.monochrome_logs, args, params.outdir, - [] // We are not using the samplesheet for this pipeline + [], // We are not using the samplesheet for this pipeline + params.help, + params.help_full, + params.show_hidden ) // MOVE THE CHANNEL CREATION INTO THE PIPELINE INITIALISATION @@ -73,9 +90,16 @@ workflow { // SANGER_TOL_CURATIONPRETEXT ( PIPELINE_INITIALISATION.out.ch_reference, - PIPELINE_INITIALISATION.out.ch_reads, + PIPELINE_INITIALISATION.out.ch_longreads, PIPELINE_INITIALISATION.out.ch_cram_reads, - PIPELINE_INITIALISATION.out.teloseq + PIPELINE_INITIALISATION.out.ch_mapped_bam, + PIPELINE_INITIALISATION.out.teloseq, + params.input, + params.aligner, + params.skip_tracks, + params.run_hires, + params.split_telomere, + params.cram_chunk_size ) // diff --git a/modules.json b/modules.json index 61f01451..7ae3b5f0 100644 --- a/modules.json +++ b/modules.json @@ -7,109 +7,139 @@ "nf-core": { "bedtools/bamtobed": { "branch": "master", - "git_sha": "05954dab2ff481bcb999f24455da29a5828af08d", + "git_sha": "88d43dad73a675e66bff49ebb57fe657a5909018", "installed_by": ["modules"] }, "bedtools/genomecov": { "branch": "master", - "git_sha": "81880787133db07d9b4c1febd152c090eb8325dc", + "git_sha": "88d43dad73a675e66bff49ebb57fe657a5909018", "installed_by": ["modules"] }, "bedtools/intersect": { "branch": "master", - "git_sha": "05954dab2ff481bcb999f24455da29a5828af08d", + "git_sha": "88d43dad73a675e66bff49ebb57fe657a5909018", "installed_by": ["modules"] }, "bedtools/makewindows": { "branch": "master", - "git_sha": "81880787133db07d9b4c1febd152c090eb8325dc", + "git_sha": "88d43dad73a675e66bff49ebb57fe657a5909018", "installed_by": ["modules"] }, "bedtools/map": { "branch": "master", - "git_sha": "05954dab2ff481bcb999f24455da29a5828af08d", + "git_sha": "88d43dad73a675e66bff49ebb57fe657a5909018", "installed_by": ["modules"] }, "bwamem2/index": { "branch": "master", - "git_sha": "a29f18660f5e3748d44d6f716241e70c942c065d", - "installed_by": ["modules"] + "git_sha": "5dd46a36fca68d6ad1a6b22ec47adc8c6863717d", + "installed_by": ["cram_map_illumina_hic", "modules"] + }, + "cooler/cload": { + "branch": "master", + "git_sha": "5d491ae33d61ab37e13850951b92ae7d6e3f4e31", + "installed_by": ["pairs_create_contact_maps"] + }, + "cooler/zoomify": { + "branch": "master", + "git_sha": "5d491ae33d61ab37e13850951b92ae7d6e3f4e31", + "installed_by": ["pairs_create_contact_maps"] }, "gawk": { "branch": "master", - "git_sha": "05954dab2ff481bcb999f24455da29a5828af08d", - "installed_by": ["modules"] + "git_sha": "c0da8f3a26835d663873001382a708f75766fec6", + "installed_by": ["gap_finder", "modules", "pairs_create_contact_maps", "telo_finder"] }, "gnu/sort": { "branch": "master", - "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", + "git_sha": "5e748ff2b0f990949081c9e49792622eb3fe9ee9", "installed_by": ["modules"] }, "gunzip": { "branch": "master", - "git_sha": "41dfa3f7c0ffabb96a6a813fe321c6d1cc5b6e46", + "git_sha": "96c57dfd98a0641886a67bd449fe33ee2ec0e374", "installed_by": ["modules"] }, + "juicertools/pre": { + "branch": "master", + "git_sha": "559ca10e059b42eaf1ccf580c0193c150ed3dc94", + "installed_by": ["pairs_create_contact_maps"] + }, "minimap2/align": { "branch": "master", - "git_sha": "05954dab2ff481bcb999f24455da29a5828af08d", + "git_sha": "5c9f8d5b7671237c906abadc9ff732b301ca15ca", "installed_by": ["modules"] }, "minimap2/index": { "branch": "master", - "git_sha": "05954dab2ff481bcb999f24455da29a5828af08d", - "installed_by": ["modules"] + "git_sha": "14980f759266eec42dac401fcafeb83d6c957b41", + "installed_by": ["cram_map_illumina_hic", "modules"] }, "pretextmap": { "branch": "master", - "git_sha": "05954dab2ff481bcb999f24455da29a5828af08d", - "installed_by": ["modules"], + "git_sha": "000647dd5c075642ac90213b17f67f76236a7346", + "installed_by": ["modules", "pairs_create_contact_maps"], "patch": "modules/nf-core/pretextmap/pretextmap.diff" }, "pretextsnapshot": { "branch": "master", - "git_sha": "81880787133db07d9b4c1febd152c090eb8325dc", - "installed_by": ["modules"], + "git_sha": "2d0a16b1118f0112680222beeba2e4393c16dfe4", + "installed_by": ["modules", "pairs_create_contact_maps"], "patch": "modules/nf-core/pretextsnapshot/pretextsnapshot.diff" }, "samtools/faidx": { "branch": "master", - "git_sha": "05954dab2ff481bcb999f24455da29a5828af08d", - "installed_by": ["modules"] + "git_sha": "b2e78932ef01165fd85829513eaca29eff8e640a", + "installed_by": ["bam_samtools_merge_markdup", "modules"] + }, + "samtools/index": { + "branch": "master", + "git_sha": "1d2fbdcbca677bbe8da0f9d0d2bb7c02f2cab1c9", + "installed_by": ["cram_map_illumina_hic"] }, "samtools/merge": { "branch": "master", - "git_sha": "05954dab2ff481bcb999f24455da29a5828af08d", - "installed_by": ["modules"] + "git_sha": "b2e78932ef01165fd85829513eaca29eff8e640a", + "installed_by": ["bam_samtools_merge_markdup", "modules"] }, "samtools/sort": { "branch": "master", - "git_sha": "05954dab2ff481bcb999f24455da29a5828af08d", + "git_sha": "5cb9a8694da0a0e550921636bb60bc8c56445fd7", "installed_by": ["modules"] }, + "samtools/splitheader": { + "branch": "master", + "git_sha": "b4962f11d1bf66aa3949695d7e35c1491d8c21dc", + "installed_by": ["cram_map_illumina_hic"] + }, "samtools/view": { "branch": "master", - "git_sha": "05954dab2ff481bcb999f24455da29a5828af08d", + "git_sha": "b2e78932ef01165fd85829513eaca29eff8e640a", "installed_by": ["modules"] }, "seqtk/cutn": { "branch": "master", - "git_sha": "05954dab2ff481bcb999f24455da29a5828af08d", - "installed_by": ["modules"] + "git_sha": "a46713779030a5f508117080cbf4b693dd4c6e33", + "installed_by": ["gap_finder", "modules"] + }, + "tabix/bgziptabix": { + "branch": "master", + "git_sha": "91a902fb32d6717da38a9694eb4ad3fade53a8db", + "installed_by": ["gap_finder", "telo_finder"] }, "ucsc/bedgraphtobigwig": { "branch": "master", - "git_sha": "05954dab2ff481bcb999f24455da29a5828af08d", + "git_sha": "b1c64e862fb5ed7f13b6f8f3ef8f04e700d41438", "installed_by": ["modules"] }, "windowmasker/mkcounts": { "branch": "master", - "git_sha": "05954dab2ff481bcb999f24455da29a5828af08d", + "git_sha": "ed6f7d2e090911f69f0f3af563f5af9ba1fc1fa0", "installed_by": ["modules"] }, "windowmasker/ustat": { "branch": "master", - "git_sha": "05954dab2ff481bcb999f24455da29a5828af08d", + "git_sha": "ed6f7d2e090911f69f0f3af563f5af9ba1fc1fa0", "installed_by": ["modules"] } } @@ -118,17 +148,87 @@ "nf-core": { "utils_nextflow_pipeline": { "branch": "master", - "git_sha": "c2b22d85f30a706a3073387f30380704fcae013b", + "git_sha": "05954dab2ff481bcb999f24455da29a5828af08d", "installed_by": ["subworkflows"] }, "utils_nfcore_pipeline": { "branch": "master", - "git_sha": "51ae5406a030d4da1e49e4dab49756844fdd6c7a", + "git_sha": "271e7fc14eb1320364416d996fb077421f3faed2", "installed_by": ["subworkflows"] }, "utils_nfschema_plugin": { "branch": "master", - "git_sha": "2fd2cd6d0e7b273747f32e465fdc6bcc3ae0814e", + "git_sha": "4b406a74dc0449c0401ed87d5bfff4252fd277fd", + "installed_by": ["subworkflows"] + } + } + } + }, + "https://github.com/sanger-tol/nf-core-modules": { + "modules": { + "sanger-tol": { + "cramalign/bwamem2alignhic": { + "branch": "main", + "git_sha": "2e28234fb0e2970767dab28310c7eb553ad40081", + "installed_by": ["cram_map_illumina_hic"] + }, + "cramalign/gencramchunks": { + "branch": "main", + "git_sha": "d4ca71bc08dc21d68af38c147cbd972c8e25c3e0", + "installed_by": ["cram_map_illumina_hic"] + }, + "cramalign/minimap2alignhic": { + "branch": "main", + "git_sha": "2e28234fb0e2970767dab28310c7eb553ad40081", + "installed_by": ["cram_map_illumina_hic"] + }, + "samtools/mergedup": { + "branch": "main", + "git_sha": "2e28234fb0e2970767dab28310c7eb553ad40081", + "installed_by": ["bam_samtools_merge_markdup"] + }, + "telomere/extract": { + "branch": "main", + "git_sha": "780f0c747b0aabfe5f827f88b5bbef16b6640e64", + "installed_by": ["telo_finder"] + }, + "telomere/regions": { + "branch": "main", + "git_sha": "88ba0abe3c06373b506509a5359092e22a912f1b", + "installed_by": ["telo_finder"] + }, + "telomere/windows": { + "branch": "main", + "git_sha": "6dec7b4e1a28b484d56c79836c07eba37e596a5b", + "installed_by": ["telo_finder"] + } + } + }, + "subworkflows": { + "sanger-tol": { + "bam_samtools_merge_markdup": { + "branch": "main", + "git_sha": "2e28234fb0e2970767dab28310c7eb553ad40081", + "installed_by": ["cram_map_illumina_hic"] + }, + "cram_map_illumina_hic": { + "branch": "main", + "git_sha": "2e28234fb0e2970767dab28310c7eb553ad40081", + "installed_by": ["subworkflows"] + }, + "gap_finder": { + "branch": "main", + "git_sha": "c447733d1899275a01cb3bac0719d2f68750ff72", + "installed_by": ["subworkflows"] + }, + "pairs_create_contact_maps": { + "branch": "main", + "git_sha": "2e28234fb0e2970767dab28310c7eb553ad40081", + "installed_by": ["subworkflows"] + }, + "telo_finder": { + "branch": "main", + "git_sha": "c447733d1899275a01cb3bac0719d2f68750ff72", "installed_by": ["subworkflows"] } } diff --git a/modules/local/cram/filter_align_bwamem2_fixmate_sort/main.nf b/modules/local/cram/filter_align_bwamem2_fixmate_sort/main.nf deleted file mode 100644 index 5a5d0dac..00000000 --- a/modules/local/cram/filter_align_bwamem2_fixmate_sort/main.nf +++ /dev/null @@ -1,58 +0,0 @@ -process CRAM_FILTER_ALIGN_BWAMEM2_FIXMATE_SORT { - tag "$meta.id" - label 'process_high' - - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/mulled-v2-1a6fe65bd6674daba65066aa796ed8f5e8b4687b:688e175eb0db54de17822ba7810cc9e20fa06dd5-0' : - 'biocontainers/mulled-v2-1a6fe65bd6674daba65066aa796ed8f5e8b4687b:688e175eb0db54de17822ba7810cc9e20fa06dd5-0' }" - - input: - tuple val(meta), path(cramfile), path(cramindex), val(from), val(to), val(base), val(chunkid), val(rglines) - tuple val(meta2), path(bwa_index_dir) - - output: - tuple val(meta), path("*.bam"), emit: mappedbam - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - def args1 = task.ext.args1 ?: '' - def args2 = task.ext.args2 ?: '' - def args3 = task.ext.args3 ?: '' - def args4 = task.ext.args4 ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - // Please be aware one of the tools here required mem = 28 * reference size!!! - """ - BWAPREFIX=( $bwa_index_dir/*.ann ) - cram_filter -n ${from}-${to} ${cramfile} - | \\ - samtools fastq ${args1} | \\ - bwa-mem2 mem -p \${BWAPREFIX/%.ann/} -t${task.cpus} -5SPCp -H'${rglines}' - | \\ - samtools fixmate ${args3} - - | \\ - samtools sort ${args4} -@${task.cpus} -T ${base}_${chunkid}_sort_tmp -o ${prefix}_${base}_${chunkid}_mem.bam - - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//' ) - bwa-mem2: \$(bwa-mem2 version | tail -n 1) - staden_io_lib: \$(ls /usr/local/conda-meta/staden_io_lib-* | cut -d- -f3) - END_VERSIONS - """ - - stub: - def prefix = task.ext.prefix ?: "${meta.id}" - def base = "45022_3#2" - def chunkid = "1" - """ - touch ${prefix}_${base}_${chunkid}_mem.bam - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//' ) - bwa-mem2: \$(bwa-mem2 version | tail -n 1) - staden_io_lib: \$(ls /usr/local/conda-meta/staden_io_lib-* | cut -d- -f3) - END_VERSIONS - """ -} diff --git a/modules/local/cram/filter_minimap2_filter5end_fixmate_sort/main.nf b/modules/local/cram/filter_minimap2_filter5end_fixmate_sort/main.nf deleted file mode 100644 index 74de35de..00000000 --- a/modules/local/cram/filter_minimap2_filter5end_fixmate_sort/main.nf +++ /dev/null @@ -1,59 +0,0 @@ -process CRAM_FILTER_MINIMAP2_FILTER5END_FIXMATE_SORT { - tag "$meta.id" - label 'process_high' - - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/mulled-v2-1a6fe65bd6674daba65066aa796ed8f5e8b4687b:688e175eb0db54de17822ba7810cc9e20fa06dd5-0' : - 'biocontainers/mulled-v2-1a6fe65bd6674daba65066aa796ed8f5e8b4687b:688e175eb0db54de17822ba7810cc9e20fa06dd5-0' }" - - input: - tuple val(meta), path(cramfile), path(cramindex), val(from), val(to), val(base), val(chunkid), val(rglines) - tuple val(meta2), path(ref) - - output: - tuple val(meta), path("*.bam"), emit: mappedbam - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - def args1 = task.ext.args1 ?: '' - def args2 = task.ext.args2 ?: '' - def args3 = task.ext.args3 ?: '' - def args4 = task.ext.args4 ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - """ - cram_filter -n ${from}-${to} ${cramfile} - | \\ - samtools fastq ${args1} - | \\ - minimap2 -t${task.cpus} -R '${rglines}' ${args2} ${ref} - | \\ - grep_pg.sh | \\ - filter_five_end.pl | \\ - awk_filter_reads.sh | \\ - samtools fixmate ${args3} - - | \\ - samtools sort ${args4} -@${task.cpus} -T ${base}_${chunkid}_sort_tmp -o ${prefix}_${base}_${chunkid}_mm.bam - - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//' ) - minimap2: \$(minimap2 --version) - staden_io_lib: \$(ls /usr/local/conda-meta/staden_io_lib-* | cut -d- -f3) - END_VERSIONS - """ - - stub: - def prefix = task.ext.prefix ?: "${meta.id}" - def base = "45022_3#2" - def chunkid = "1" - """ - touch ${prefix}_${base}_${chunkid}_mm.bam - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//' ) - minimap2: \$(minimap2 --version) - staden_io_lib: \$(ls /usr/local/conda-meta/staden_io_lib-* | cut -d- -f3) - END_VERSIONS - """ -} diff --git a/modules/local/cram/generate_csv/main.nf b/modules/local/cram/generate_csv/main.nf deleted file mode 100644 index 1327dc07..00000000 --- a/modules/local/cram/generate_csv/main.nf +++ /dev/null @@ -1,49 +0,0 @@ -process CRAM_GENERATE_CSV { - tag "${meta.id}" - label 'process_single' - - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/mulled-v2-1a6fe65bd6674daba65066aa796ed8f5e8b4687b:688e175eb0db54de17822ba7810cc9e20fa06dd5-0' : - 'biocontainers/mulled-v2-1a6fe65bd6674daba65066aa796ed8f5e8b4687b:688e175eb0db54de17822ba7810cc9e20fa06dd5-0' }" - - input: - tuple val(meta), path(crampath) - - output: - tuple val(meta), path('*.csv'), emit: csv - path "versions.yml", emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - // Exit if running this module with -profile conda / -profile mamba - if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { - error "CRAM_GENERATE_CSV module does not support Conda. Please use Docker / Singularity instead." - } - - def prefix = task.ext.prefix ?: "${meta.id}" - """ - generate_cram_csv.sh $crampath ${prefix}_cram.csv - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//' ) - END_VERSIONS - """ - - stub: - // Exit if running this module with -profile conda / -profile mamba - if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { - error "CRAM_GENERATE_CSV module does not support Conda. Please use Docker / Singularity instead." - } - - """ - touch ${meta.id}.csv - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//' ) - END_VERSIONS - """ -} diff --git a/modules/local/extract/repeat/main.nf b/modules/local/extract/repeat/main.nf index 78a9cbd8..02aee61d 100644 --- a/modules/local/extract/repeat/main.nf +++ b/modules/local/extract/repeat/main.nf @@ -12,34 +12,21 @@ process EXTRACT_REPEAT { output: tuple val( meta ), path( "*.bed" ) , emit: bed - path "versions.yml" , emit: versions + tuple val("${task.process}"), val('extract_repeat.pl'), eval("echo '1.0.0'"), topic: versions, emit: versions_extractrepeat + tuple val("${task.process}"), val('perl'), eval("perl --version | sed -n 's/.*(v\\([0-9.]\\+\\)).*/\\1/p'"), topic: versions, emit: versions_perl when: task.ext.when == null || task.ext.when script: def prefix = task.ext.prefix ?: "${meta.id}" - def VERSION = "1.0" // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. """ extract_repeat.pl $file > ${prefix}_repeats.bed - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - perl: \$(echo \$(perl --version 2>&1) | awk '/This/ {print \$9}')) - extract_repeat.pl: $VERSION - END_VERSIONS """ stub: def prefix = task.ext.prefix ?: "${meta.id}" - def VERSION = "1.0" // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. """ touch ${prefix}_repeats.bed - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - perl: \$(echo \$(perl --version 2>&1) | awk '/This/ {print \$9}')) - extract_repeat.pl: $VERSION - END_VERSIONS """ } diff --git a/modules/local/extract/telomere/main.nf b/modules/local/extract/telomere/main.nf deleted file mode 100644 index 41022f00..00000000 --- a/modules/local/extract/telomere/main.nf +++ /dev/null @@ -1,47 +0,0 @@ -process EXTRACT_TELOMERE { - tag "${meta.id}" - label 'process_single' - - conda "conda-forge::coreutils=9.1" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : - 'docker.io/ubuntu:20.04' }" - - input: - tuple val( meta ), path( file ) - - output: - tuple val( meta ), file( "*bed" ) , emit: bed - tuple val( meta ), file("*bedgraph"), emit: bedgraph - path "versions.yml" , emit: versions - - script: - def prefix = task.ext.prefix ?: "${meta.id}" - def ETELO_VERSION = "2.0" - def VERSION = "9.1" // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. - """ - awk 'BEGIN {OFS = "\\t"} {print \$2, \$4, \$5}' ${file} | sed 's/>//g' > ${prefix}_telomere.bed - awk 'BEGIN {OFS = "\\t"} {print \$2,\$4,\$5,(((\$5-\$4)<0)?-(\$5-\$4):(\$5-\$4))}' ${file} | sed 's/>//g' > ${prefix}_telomere.bedgraph - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - extract_telomere: $ETELO_VERSION - coreutils: $VERSION - END_VERSIONS - """ - - stub: - def prefix = task.ext.prefix ?: "${meta.id}" - def ETELO_VERSION = "2.0" - def VERSION = "9.1" // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. - """ - touch ${prefix}_telomere.bed - touch ${prefix}_telomere.bedgraph - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - extract_telomere: $ETELO_VERSION - coreutils: $VERSION - END_VERSIONS - """ -} diff --git a/modules/local/find/telomere_regions/main.nf b/modules/local/find/telomere_regions/main.nf deleted file mode 100644 index 2f71057b..00000000 --- a/modules/local/find/telomere_regions/main.nf +++ /dev/null @@ -1,54 +0,0 @@ -process FIND_TELOMERE_REGIONS { - tag "${meta.id}" - label 'process_low' - - container 'quay.io/sanger-tol/telomere:0.0.1-c1' - - input: - tuple val(meta), path(file) - val (telomereseq) - - output: - tuple val( meta ), file( "*.telomere" ) , emit: telomere - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - // Exit if running this module with -profile conda / -profile mamba - if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { - error "FIND_TELOMERE_REGIONS module does not support Conda. Please use Docker / Singularity instead." - } - - def prefix = task.ext.prefix ?: "${meta.id}" - def VERSION = "1.0" // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. - def find_telomere = task.ext.find_telomere ?: '' - """ - find_telomere ${file} $telomereseq > ${prefix}.telomere - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - find_telomere: ${VERSION} - END_VERSIONS - """ - - stub: - // Exit if running this module with -profile conda / -profile mamba - if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { - error "FIND_TELOMERE_REGIONS module does not support Conda. Please use Docker / Singularity instead." - } - - def prefix = task.ext.prefix ?: "${meta.id}" - def VERSION = "1.0" // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. - def find_telomere = task.ext.find_telomere ?: '' - """ - touch ${prefix}.telomere - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - find_telomere: ${VERSION} - END_VERSIONS - """ - -} diff --git a/modules/local/find/telomere_windows/main.nf b/modules/local/find/telomere_windows/main.nf deleted file mode 100644 index 134ce51d..00000000 --- a/modules/local/find/telomere_windows/main.nf +++ /dev/null @@ -1,48 +0,0 @@ -process FIND_TELOMERE_WINDOWS { - tag "${meta.id}" - label 'process_low' - - conda "bioconda::java-jdk=8.0.112" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/java-jdk:8.0.112--1' : - 'biocontainers/java-jdk:8.0.112--1' }" - - input: - tuple val(meta), path(file) - - output: - tuple val( meta ), file( "*.windows" ) , emit: windows - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def prefix = task.ext.prefix ?: "${meta.id}" - def VERSION = "1.0" // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. - def telomere_jar = task.ext.telomere_jar ?: '' - def telomere_jvm_params = task.ext.telomere_jvm_params ?: '' - def telomere_window_cut = task.ext.telomere_window_cut ?: 99.9 - """ - java ${telomere_jvm_params} -cp ${projectDir}/bin/${telomere_jar} FindTelomereWindows $file $telomere_window_cut > ${prefix}.windows - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - telomere: $VERSION - END_VERSIONS - """ - - stub: - def prefix = task.ext.prefix ?: "${meta.id}" - def VERSION = "1.0" // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. - def telomere = task.ext.telomere ?: '' - """ - touch ${prefix}.windows - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - telomere: $VERSION - END_VERSIONS - """ - -} diff --git a/modules/local/gawk_split_directions/main.nf b/modules/local/gawk_split_directions/main.nf deleted file mode 100644 index 29b4af8a..00000000 --- a/modules/local/gawk_split_directions/main.nf +++ /dev/null @@ -1,58 +0,0 @@ -process GAWK_SPLIT_DIRECTIONS { - tag "$meta.id" - label 'process_single' - - conda "${moduleDir}/environment.yml" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/gawk:5.3.0' : - 'biocontainers/gawk:5.3.0' }" - - input: - tuple val(meta), path(input) - path(program_file) - - output: - tuple val(meta), path("direction.0.${suffix}"), emit: prime5 - tuple val(meta), path("direction.1.${suffix}"), emit: prime3 - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' // args is used for the main arguments of the tool - def args2 = task.ext.args2 ?: '' // args2 is used to specify a program when no program file has been given - prefix = task.ext.prefix ?: "${meta.id}" - suffix = task.ext.suffix ?: "${input.collect{ it.getExtension()}.get(0)}" // use the first extension of the input files - - program = program_file ? "-f ${program_file}" : "${args2}" - - input.collect{ - assert it.name != "${prefix}.${suffix}" : "Input and output names are the same, set prefix in module configuration to disambiguate!" - } - - """ - awk \\ - ${args} \\ - ${program} \\ - ${input} - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - gawk: \$(awk -Wversion | sed '1!d; s/.*Awk //; s/,.*//') - END_VERSIONS - """ - - stub: - prefix = task.ext.prefix ?: "${meta.id}" - suffix = task.ext.suffix ?: "${input.getExtension()}" - - """ - touch ${prefix}.${suffix} - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - gawk: \$(awk -Wversion | sed '1!d; s/.*Awk //; s/,.*//') - END_VERSIONS - """ -} diff --git a/modules/local/gawk_split_directions/meta.yml b/modules/local/gawk_split_directions/meta.yml deleted file mode 100644 index 34c50b12..00000000 --- a/modules/local/gawk_split_directions/meta.yml +++ /dev/null @@ -1,63 +0,0 @@ -name: "gawk" -description: | - If you are like many computer users, you would frequently like to make changes in various text files - wherever certain patterns appear, or extract data from parts of certain lines while discarding the rest. - The job is easy with awk, especially the GNU implementation gawk. -keywords: - - gawk - - awk - - txt - - text - - file parsing -tools: - - "gawk": - description: "GNU awk" - homepage: "https://www.gnu.org/software/gawk/" - documentation: "https://www.gnu.org/software/gawk/manual/" - tool_dev_url: "https://www.gnu.org/prep/ftp.html" - licence: ["GPL v3"] - identifier: "" -input: - - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - input: - type: file - description: The input file - Specify the logic that needs to be executed on - this file on the `ext.args2` or in the program file. - If the files have a `.gz` extension, they will be unzipped using `zcat`. - pattern: "*" - - - program_file: - type: file - description: Optional file containing logic for awk to execute. If you don't - wish to use a file, you can use `ext.args2` to specify the logic. - pattern: "*" - - - disable_redirect_output: - type: boolean - description: Disable the redirection of awk output to a given file. This is - useful if you want to use awk's built-in redirect to write files instead - of the shell's redirect. -output: - - output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - "*.${suffix}": - type: file - description: The output file - if using shell redirection, specify the name of this - file using `ext.prefix` and the extension using `ext.suffix`. Otherwise, ensure - the awk program produces files with the extension in `ext.suffix`. - pattern: "*" - - versions: - - versions.yml: - type: file - description: File containing software versions - pattern: "versions.yml" -authors: - - "@nvnieuwk" -maintainers: - - "@nvnieuwk" diff --git a/modules/local/pretext/graph/main.nf b/modules/local/pretext/graph/main.nf index df351d1f..c0c00e40 100644 --- a/modules/local/pretext/graph/main.nf +++ b/modules/local/pretext/graph/main.nf @@ -14,7 +14,8 @@ process PRETEXT_GRAPH { output: tuple val(meta), path("*.pretext") , emit: pretext - path "versions.yml" , emit: versions + tuple val("${task.process}"), val('ucsc'), eval("echo $VERSION"), topic: versions, emit: versions_ucsc + tuple val("${task.process}"), val('PretextGraph'), eval('PretextGraph | sed "/Version/!d; s/.*Version //"'), emit: versions_pretextgraph, topic: versions when: task.ext.when == null || task.ext.when @@ -27,7 +28,7 @@ process PRETEXT_GRAPH { def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" - def UCSC_VERSION = '447' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. + VERSION = '447' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. // Using single [ ] as nextflow will use sh where possible not bash // @@ -136,13 +137,6 @@ process PRETEXT_GRAPH { else cp "\$input_file" "${prefix}.pretext" fi - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - PretextGraph: \$(PretextGraph | grep "Version" | sed 's/Pretext.* Version //;') - PretextMap: \$(PretextMap | grep "Version" | sed 's/Pretext.* Version//;') - bigWigToBedGraph: ${UCSC_VERSION} - END_VERSIONS """ stub: @@ -152,14 +146,7 @@ process PRETEXT_GRAPH { } def prefix = task.ext.prefix ?: "${meta.id}" - def UCSC_VERSION = '448' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. """ touch ${prefix}.pretext - cat <<-END_VERSIONS > versions.yml - "${task.process}": - PretextGraph: \$(PretextGraph | grep "Version" | sed 's/Pretext* Version //;') - PretextMap: \$(PretextMap | grep "Version" | sed 's/PretextMap Version//;') - bigWigToBedGraph: ${UCSC_VERSION} - END_VERSIONS """ } diff --git a/modules/nf-core/bedtools/bamtobed/main.nf b/modules/nf-core/bedtools/bamtobed/main.nf index bb8295dc..61c3e94a 100644 --- a/modules/nf-core/bedtools/bamtobed/main.nf +++ b/modules/nf-core/bedtools/bamtobed/main.nf @@ -1,18 +1,18 @@ process BEDTOOLS_BAMTOBED { - tag "$meta.id" + tag "${meta.id}" label 'process_medium' conda "${moduleDir}/environment.yml" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/bedtools:2.31.1--hf5e1c6e_0' : - 'biocontainers/bedtools:2.31.1--hf5e1c6e_0' }" + container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container + ? 'https://depot.galaxyproject.org/singularity/bedtools:2.31.1--hf5e1c6e_0' + : 'biocontainers/bedtools:2.31.1--hf5e1c6e_0'}" input: tuple val(meta), path(bam) output: tuple val(meta), path("*.bed"), emit: bed - path "versions.yml" , emit: versions + tuple val("${task.process}"), val('bedtools'), eval("bedtools --version | sed -e 's/bedtools v//g'"), topic: versions, emit: versions_bedtools when: task.ext.when == null || task.ext.when @@ -23,24 +23,14 @@ process BEDTOOLS_BAMTOBED { """ bedtools \\ bamtobed \\ - $args \\ - -i $bam \\ + ${args} \\ + -i ${bam} \\ > ${prefix}.bed - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - bedtools: \$(bedtools --version | sed -e "s/bedtools v//g") - END_VERSIONS """ stub: def prefix = task.ext.prefix ?: "${meta.id}" """ touch ${prefix}.bed - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - bedtools: \$(bedtools --version | sed -e "s/bedtools v//g") - END_VERSIONS """ } diff --git a/modules/nf-core/bedtools/bamtobed/meta.yml b/modules/nf-core/bedtools/bamtobed/meta.yml index 20171885..4068f694 100644 --- a/modules/nf-core/bedtools/bamtobed/meta.yml +++ b/modules/nf-core/bedtools/bamtobed/meta.yml @@ -23,9 +23,10 @@ input: type: file description: Input BAM file pattern: "*.{bam}" + ontologies: [] output: - - bed: - - meta: + bed: + - - meta: type: map description: | Groovy Map containing sample information @@ -34,11 +35,28 @@ output: type: file description: Bed file containing genomic intervals. pattern: "*.{bed}" - - versions: - - versions.yml: - type: file - description: File containing software versions - pattern: "versions.yml" + ontologies: [] + versions_bedtools: + - - ${task.process}: + type: string + description: The name of the process + - bedtools: + type: string + description: The name of the tool + - "bedtools --version | sed -e 's/bedtools v//g'": + type: eval + description: The expression to obtain the version of the tool +topics: + versions: + - - ${task.process}: + type: string + description: The name of the process + - bedtools: + type: string + description: The name of the tool + - "bedtools --version | sed -e 's/bedtools v//g'": + type: eval + description: The expression to obtain the version of the tool authors: - "@yuukiiwa" - "@drpatelh" diff --git a/modules/nf-core/bedtools/bamtobed/tests/main.nf.test b/modules/nf-core/bedtools/bamtobed/tests/main.nf.test index 297f1813..a8988d44 100644 --- a/modules/nf-core/bedtools/bamtobed/tests/main.nf.test +++ b/modules/nf-core/bedtools/bamtobed/tests/main.nf.test @@ -11,7 +11,7 @@ nextflow_process { test("sarscov2 - bam") { when { process { - """ + """ input[0] = [ [ id:'test' ], // meta map file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.single_end.bam', checkIfExists: true) @@ -26,16 +26,15 @@ nextflow_process { { assert snapshot(process.out).match() } ) } - } test("stub") { options "-stub" - + when { process { - """ + """ input[0] = [ [ id:'test' ], // meta map file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.single_end.bam', checkIfExists: true) @@ -47,10 +46,8 @@ nextflow_process { then { assertAll( { assert process.success }, - { assert snapshot(file(process.out.bed[0][1]).name).match() } + { assert snapshot(process.out).match() } ) } - } - } diff --git a/modules/nf-core/bedtools/bamtobed/tests/main.nf.test.snap b/modules/nf-core/bedtools/bamtobed/tests/main.nf.test.snap index d28ddd3e..07474690 100644 --- a/modules/nf-core/bedtools/bamtobed/tests/main.nf.test.snap +++ b/modules/nf-core/bedtools/bamtobed/tests/main.nf.test.snap @@ -1,9 +1,44 @@ { "stub": { "content": [ - "test.bed" + { + "0": [ + [ + { + "id": "test" + }, + "test.bed:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + "BEDTOOLS_BAMTOBED", + "bedtools", + "2.31.1" + ] + ], + "bed": [ + [ + { + "id": "test" + }, + "test.bed:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions_bedtools": [ + [ + "BEDTOOLS_BAMTOBED", + "bedtools", + "2.31.1" + ] + ] + } ], - "timestamp": "2023-12-05T17:37:27.785556" + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-01-21T10:40:40.234185059" }, "sarscov2 - bam": { "content": [ @@ -17,7 +52,11 @@ ] ], "1": [ - "versions.yml:md5,90a53b0acd234b4f7d125dadd0dbbdfb" + [ + "BEDTOOLS_BAMTOBED", + "bedtools", + "2.31.1" + ] ], "bed": [ [ @@ -27,11 +66,19 @@ "test.bed:md5,a6a299bd39dc56225f8029c05ea97dcb" ] ], - "versions": [ - "versions.yml:md5,90a53b0acd234b4f7d125dadd0dbbdfb" + "versions_bedtools": [ + [ + "BEDTOOLS_BAMTOBED", + "bedtools", + "2.31.1" + ] ] } ], - "timestamp": "2023-12-05T17:37:20.997988" + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-01-21T10:40:32.731340605" } } \ No newline at end of file diff --git a/modules/nf-core/bedtools/genomecov/main.nf b/modules/nf-core/bedtools/genomecov/main.nf index 35e2ab14..710da5b4 100644 --- a/modules/nf-core/bedtools/genomecov/main.nf +++ b/modules/nf-core/bedtools/genomecov/main.nf @@ -1,66 +1,57 @@ process BEDTOOLS_GENOMECOV { - tag "$meta.id" + tag "${meta.id}" label 'process_single' conda "${moduleDir}/environment.yml" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/63/6397750e9730a3fbcc5b4c43f14bd141c64c723fd7dad80e47921a68a7c3cd21/data': - 'community.wave.seqera.io/library/bedtools_coreutils:a623c13f66d5262b' }" + container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container + ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/63/6397750e9730a3fbcc5b4c43f14bd141c64c723fd7dad80e47921a68a7c3cd21/data' + : 'community.wave.seqera.io/library/bedtools_coreutils:a623c13f66d5262b'}" input: tuple val(meta), path(intervals), val(scale) - path sizes - val extension - val sort + path sizes + val extension + val sort output: tuple val(meta), path("*.${extension}"), emit: genomecov - path "versions.yml" , emit: versions + tuple val("${task.process}"), val('bedtools'), eval("bedtools --version | sed -e 's/bedtools v//g'"), topic: versions, emit: versions_bedtools when: task.ext.when == null || task.ext.when script: - def args = task.ext.args ?: '' + def args = task.ext.args ?: '' def args_list = args.tokenize() - args += (scale > 0 && scale != 1) ? " -scale $scale" : "" + args += scale > 0 && scale != 1 ? " -scale ${scale}" : "" if (!args_list.contains('-bg') && (scale > 0 && scale != 1)) { args += " -bg" } // Sorts output file by chromosome and position using additional options for performance and consistency // See https://www.biostars.org/p/66927/ for further details - def buffer = task.memory ? "--buffer-size=${task.memory.toGiga().intdiv(2)}G" : '' - def sort_cmd = sort ? "| LC_ALL=C sort --parallel=$task.cpus $buffer -k1,1 -k2,2n" : '' + def buffer = task.memory ? "--buffer-size=${task.memory.toGiga().intdiv(2)}G" : '' + def sort_cmd = sort ? "| LC_ALL=C sort --parallel=${task.cpus} ${buffer} -k1,1 -k2,2n" : '' def prefix = task.ext.prefix ?: "${meta.id}" if (intervals.name =~ /\.bam/) { """ bedtools \\ genomecov \\ - -ibam $intervals \\ - $args \\ - $sort_cmd \\ + -ibam ${intervals} \\ + ${args} \\ + ${sort_cmd} \\ > ${prefix}.${extension} - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - bedtools: \$(bedtools --version | sed -e "s/bedtools v//g") - END_VERSIONS """ - } else { + } + else { """ bedtools \\ genomecov \\ - -i $intervals \\ - -g $sizes \\ - $args \\ - $sort_cmd \\ + -i ${intervals} \\ + -g ${sizes} \\ + ${args} \\ + ${sort_cmd} \\ > ${prefix}.${extension} - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - bedtools: \$(bedtools --version | sed -e "s/bedtools v//g") - END_VERSIONS """ } @@ -68,10 +59,5 @@ process BEDTOOLS_GENOMECOV { def prefix = task.ext.prefix ?: "${meta.id}" """ touch ${prefix}.${extension} - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - bedtools: \$(bedtools --version | sed -e "s/bedtools v//g") - END_VERSIONS """ } diff --git a/modules/nf-core/bedtools/genomecov/meta.yml b/modules/nf-core/bedtools/genomecov/meta.yml index 41b1f8f4..69b911c2 100644 --- a/modules/nf-core/bedtools/genomecov/meta.yml +++ b/modules/nf-core/bedtools/genomecov/meta.yml @@ -24,26 +24,28 @@ input: type: file description: BAM/BED/GFF/VCF pattern: "*.{bam|bed|gff|vcf}" + ontologies: [] - scale: type: integer description: Number containing the scale factor for the output. Set to 1 to disable. Setting to a value other than 1 will also get the -bg bedgraph output format as this is required for this command switch - - - sizes: - type: file - description: Tab-delimited table of chromosome names in the first column and - chromosome sizes in the second column - - - extension: - type: string - description: Extension of the output file (e. g., ".bg", ".bedgraph", ".txt", - ".tab", etc.) It is set arbitrarily by the user and corresponds to the file - format which depends on arguments. - - - sort: - type: boolean - description: Sort the output + - sizes: + type: file + description: Tab-delimited table of chromosome names in the first column and chromosome + sizes in the second column + ontologies: [] + - extension: + type: string + description: Extension of the output file (e. g., ".bg", ".bedgraph", ".txt", + ".tab", etc.) It is set arbitrarily by the user and corresponds to the file + format which depends on arguments. + - sort: + type: boolean + description: Sort the output output: - - genomecov: - - meta: + genomecov: + - - meta: type: map description: | Groovy Map containing sample information @@ -52,11 +54,28 @@ output: type: file description: Computed genome coverage file pattern: "*.${extension}" - - versions: - - versions.yml: - type: file - description: File containing software versions - pattern: "versions.yml" + ontologies: [] + versions_bedtools: + - - ${task.process}: + type: string + description: The name of the process + - bedtools: + type: string + description: The name of the tool + - "bedtools --version | sed -e 's/bedtools v//g'": + type: eval + description: The expression to obtain the version of the tool +topics: + versions: + - - ${task.process}: + type: string + description: The name of the process + - bedtools: + type: string + description: The name of the tool + - "bedtools --version | sed -e 's/bedtools v//g'": + type: eval + description: The expression to obtain the version of the tool authors: - "@edmundmiller" - "@sruthipsuresh" diff --git a/modules/nf-core/bedtools/genomecov/tests/main.nf.test.snap b/modules/nf-core/bedtools/genomecov/tests/main.nf.test.snap index da6dbe87..4175ae7c 100644 --- a/modules/nf-core/bedtools/genomecov/tests/main.nf.test.snap +++ b/modules/nf-core/bedtools/genomecov/tests/main.nf.test.snap @@ -11,7 +11,11 @@ ] ], "1": [ - "versions.yml:md5,5fd44452613992a6f71f2c73d2e117f2" + [ + "BEDTOOLS_GENOMECOV", + "bedtools", + "2.31.1" + ] ], "genomecov": [ [ @@ -21,16 +25,20 @@ "test.coverage.txt:md5,01291b6e1beab72e046653e709eb0e10" ] ], - "versions": [ - "versions.yml:md5,5fd44452613992a6f71f2c73d2e117f2" + "versions_bedtools": [ + [ + "BEDTOOLS_GENOMECOV", + "bedtools", + "2.31.1" + ] ] } ], "meta": { - "nf-test": "0.8.4", - "nextflow": "24.04.2" + "nf-test": "0.9.3", + "nextflow": "25.10.2" }, - "timestamp": "2024-07-05T11:59:33.898146" + "timestamp": "2026-01-21T11:30:01.187722797" }, "sarscov2 - no scale - stub": { "content": [ @@ -44,7 +52,11 @@ ] ], "1": [ - "versions.yml:md5,5fd44452613992a6f71f2c73d2e117f2" + [ + "BEDTOOLS_GENOMECOV", + "bedtools", + "2.31.1" + ] ], "genomecov": [ [ @@ -54,16 +66,20 @@ "test.coverage.txt:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], - "versions": [ - "versions.yml:md5,5fd44452613992a6f71f2c73d2e117f2" + "versions_bedtools": [ + [ + "BEDTOOLS_GENOMECOV", + "bedtools", + "2.31.1" + ] ] } ], "meta": { - "nf-test": "0.8.4", - "nextflow": "24.04.2" + "nf-test": "0.9.3", + "nextflow": "25.10.2" }, - "timestamp": "2024-07-05T11:59:52.483371" + "timestamp": "2026-01-21T11:30:16.248235814" }, "sarscov2 - scale": { "content": [ @@ -77,7 +93,11 @@ ] ], "1": [ - "versions.yml:md5,5fd44452613992a6f71f2c73d2e117f2" + [ + "BEDTOOLS_GENOMECOV", + "bedtools", + "2.31.1" + ] ], "genomecov": [ [ @@ -87,16 +107,20 @@ "test.coverage.txt:md5,de3c59c0ea123bcdbbad27bc0a0a601e" ] ], - "versions": [ - "versions.yml:md5,5fd44452613992a6f71f2c73d2e117f2" + "versions_bedtools": [ + [ + "BEDTOOLS_GENOMECOV", + "bedtools", + "2.31.1" + ] ] } ], "meta": { - "nf-test": "0.8.4", - "nextflow": "24.04.2" + "nf-test": "0.9.3", + "nextflow": "25.10.2" }, - "timestamp": "2024-07-05T11:59:43.69501" + "timestamp": "2026-01-21T11:30:08.76325362" }, "sarscov2 - scale - stub": { "content": [ @@ -110,7 +134,11 @@ ] ], "1": [ - "versions.yml:md5,5fd44452613992a6f71f2c73d2e117f2" + [ + "BEDTOOLS_GENOMECOV", + "bedtools", + "2.31.1" + ] ], "genomecov": [ [ @@ -120,16 +148,20 @@ "test.coverage.txt:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], - "versions": [ - "versions.yml:md5,5fd44452613992a6f71f2c73d2e117f2" + "versions_bedtools": [ + [ + "BEDTOOLS_GENOMECOV", + "bedtools", + "2.31.1" + ] ] } ], "meta": { - "nf-test": "0.8.4", - "nextflow": "24.04.2" + "nf-test": "0.9.3", + "nextflow": "25.10.2" }, - "timestamp": "2024-07-05T12:00:09.930036" + "timestamp": "2026-01-21T11:30:32.324649338" }, "sarscov2 - no scale": { "content": [ @@ -143,7 +175,11 @@ ] ], "1": [ - "versions.yml:md5,5fd44452613992a6f71f2c73d2e117f2" + [ + "BEDTOOLS_GENOMECOV", + "bedtools", + "2.31.1" + ] ], "genomecov": [ [ @@ -153,16 +189,20 @@ "test.coverage.txt:md5,66083198daca6c001d328ba9616e9b53" ] ], - "versions": [ - "versions.yml:md5,5fd44452613992a6f71f2c73d2e117f2" + "versions_bedtools": [ + [ + "BEDTOOLS_GENOMECOV", + "bedtools", + "2.31.1" + ] ] } ], "meta": { - "nf-test": "0.8.4", - "nextflow": "24.04.2" + "nf-test": "0.9.3", + "nextflow": "25.10.2" }, - "timestamp": "2024-07-05T11:59:25.448817" + "timestamp": "2026-01-21T11:29:54.109132031" }, "sarscov2 - dummy sizes - stub": { "content": [ @@ -176,7 +216,11 @@ ] ], "1": [ - "versions.yml:md5,5fd44452613992a6f71f2c73d2e117f2" + [ + "BEDTOOLS_GENOMECOV", + "bedtools", + "2.31.1" + ] ], "genomecov": [ [ @@ -186,15 +230,19 @@ "test.coverage.txt:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], - "versions": [ - "versions.yml:md5,5fd44452613992a6f71f2c73d2e117f2" + "versions_bedtools": [ + [ + "BEDTOOLS_GENOMECOV", + "bedtools", + "2.31.1" + ] ] } ], "meta": { - "nf-test": "0.8.4", - "nextflow": "24.04.2" + "nf-test": "0.9.3", + "nextflow": "25.10.2" }, - "timestamp": "2024-07-05T12:00:01.086433" + "timestamp": "2026-01-21T11:30:23.839839194" } } \ No newline at end of file diff --git a/modules/nf-core/bedtools/genomecov/tests/nextflow.config b/modules/nf-core/bedtools/genomecov/tests/nextflow.config index bdb74ae5..670ef7fd 100644 --- a/modules/nf-core/bedtools/genomecov/tests/nextflow.config +++ b/modules/nf-core/bedtools/genomecov/tests/nextflow.config @@ -3,5 +3,5 @@ process { withName: BEDTOOLS_GENOMECOV { ext.prefix = { "${meta.id}.coverage" } } - + } diff --git a/modules/nf-core/bedtools/intersect/main.nf b/modules/nf-core/bedtools/intersect/main.nf index d9e79e7f..4f7b1dac 100644 --- a/modules/nf-core/bedtools/intersect/main.nf +++ b/modules/nf-core/bedtools/intersect/main.nf @@ -1,11 +1,11 @@ process BEDTOOLS_INTERSECT { - tag "$meta.id" + tag "${meta.id}" label 'process_single' conda "${moduleDir}/environment.yml" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/bedtools:2.31.1--hf5e1c6e_0' : - 'biocontainers/bedtools:2.31.1--hf5e1c6e_0' }" + container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container + ? 'https://depot.galaxyproject.org/singularity/bedtools:2.31.1--hf5e1c6e_0' + : 'biocontainers/bedtools:2.31.1--hf5e1c6e_0'}" input: tuple val(meta), path(intervals1), path(intervals2) @@ -13,7 +13,7 @@ process BEDTOOLS_INTERSECT { output: tuple val(meta), path("*.${extension}"), emit: intersect - path "versions.yml" , emit: versions + tuple val("${task.process}"), val('bedtools'), eval("bedtools --version | sed -e 's/bedtools v//g'"), topic: versions, emit: versions_bedtools when: task.ext.when == null || task.ext.when @@ -24,36 +24,26 @@ process BEDTOOLS_INTERSECT { //Extension of the output file. It is set by the user via "ext.suffix" in the config. Corresponds to the file format which depends on arguments (e. g., ".bed", ".bam", ".txt", etc.). extension = task.ext.suffix ?: "${intervals1.extension}" def sizes = chrom_sizes ? "-g ${chrom_sizes}" : '' - if ("$intervals1" == "${prefix}.${extension}" || - "$intervals2" == "${prefix}.${extension}") - error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!" + if ("${intervals1}" == "${prefix}.${extension}" || "${intervals2}" == "${prefix}.${extension}") { + error("Input and output names are the same, use \"task.ext.prefix\" to disambiguate!") + } """ bedtools \\ intersect \\ - -a $intervals1 \\ - -b $intervals2 \\ - $args \\ - $sizes \\ + -a ${intervals1} \\ + -b ${intervals2} \\ + ${args} \\ + ${sizes} \\ > ${prefix}.${extension} - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - bedtools: \$(bedtools --version | sed -e "s/bedtools v//g") - END_VERSIONS """ stub: def prefix = task.ext.prefix ?: "${meta.id}" extension = task.ext.suffix ?: "bed" - if ("$intervals1" == "${prefix}.${extension}" || - "$intervals2" == "${prefix}.${extension}") - error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!" + if ("${intervals1}" == "${prefix}.${extension}" || "${intervals2}" == "${prefix}.${extension}") { + error("Input and output names are the same, use \"task.ext.prefix\" to disambiguate!") + } """ touch ${prefix}.${extension} - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - bedtools: \$(bedtools --version | sed -e "s/bedtools v//g") - END_VERSIONS """ } diff --git a/modules/nf-core/bedtools/intersect/meta.yml b/modules/nf-core/bedtools/intersect/meta.yml index 45ecf377..0500efb9 100644 --- a/modules/nf-core/bedtools/intersect/meta.yml +++ b/modules/nf-core/bedtools/intersect/meta.yml @@ -21,10 +21,12 @@ input: type: file description: BAM/BED/GFF/VCF pattern: "*.{bam|bed|gff|vcf}" + ontologies: [] - intervals2: type: file description: BAM/BED/GFF/VCF pattern: "*.{bam|bed|gff|vcf}" + ontologies: [] - - meta2: type: map description: | @@ -34,23 +36,41 @@ input: type: file description: Chromosome sizes file pattern: "*{.sizes,.txt}" + ontologies: [] output: - - intersect: - - meta: + intersect: + - - meta: type: map description: | Groovy Map containing sample information e.g. [ id:'test', single_end:false ] - "*.${extension}": type: file - description: File containing the description of overlaps found between the two - features + description: File containing the description of overlaps found between the + two features pattern: "*.${extension}" - - versions: - - versions.yml: - type: file - description: File containing software versions - pattern: "versions.yml" + ontologies: [] + versions_bedtools: + - - ${task.process}: + type: string + description: The name of the process + - bedtools: + type: string + description: The name of the tool + - "bedtools --version | sed -e 's/bedtools v//g'": + type: eval + description: The expression to obtain the version of the tool +topics: + versions: + - - ${task.process}: + type: string + description: The name of the process + - bedtools: + type: string + description: The name of the tool + - "bedtools --version | sed -e 's/bedtools v//g'": + type: eval + description: The expression to obtain the version of the tool authors: - "@edmundmiller" - "@sruthipsuresh" diff --git a/modules/nf-core/bedtools/intersect/tests/main.nf.test.snap b/modules/nf-core/bedtools/intersect/tests/main.nf.test.snap index b748dd49..30da8be1 100644 --- a/modules/nf-core/bedtools/intersect/tests/main.nf.test.snap +++ b/modules/nf-core/bedtools/intersect/tests/main.nf.test.snap @@ -11,7 +11,11 @@ ] ], "1": [ - "versions.yml:md5,42ba439339672f4a9193f0f0fe7a7f64" + [ + "BEDTOOLS_INTERSECT", + "bedtools", + "2.31.1" + ] ], "intersect": [ [ @@ -21,16 +25,20 @@ "test_out.bam:md5,738324efe2b1e442ceb6539a630c3fe6" ] ], - "versions": [ - "versions.yml:md5,42ba439339672f4a9193f0f0fe7a7f64" + "versions_bedtools": [ + [ + "BEDTOOLS_INTERSECT", + "bedtools", + "2.31.1" + ] ] } ], "meta": { - "nf-test": "0.8.4", - "nextflow": "24.04.4" + "nf-test": "0.9.3", + "nextflow": "25.10.2" }, - "timestamp": "2024-09-17T20:55:57.454847668" + "timestamp": "2026-01-21T11:31:18.811473651" }, "sarscov2 - bed - bed": { "content": [ @@ -44,7 +52,11 @@ ] ], "1": [ - "versions.yml:md5,42ba439339672f4a9193f0f0fe7a7f64" + [ + "BEDTOOLS_INTERSECT", + "bedtools", + "2.31.1" + ] ], "intersect": [ [ @@ -54,16 +66,20 @@ "test_out.bed:md5,afcbf01c2f2013aad71dbe8e34f2c15c" ] ], - "versions": [ - "versions.yml:md5,42ba439339672f4a9193f0f0fe7a7f64" + "versions_bedtools": [ + [ + "BEDTOOLS_INTERSECT", + "bedtools", + "2.31.1" + ] ] } ], "meta": { - "nf-test": "0.8.4", - "nextflow": "24.04.4" + "nf-test": "0.9.3", + "nextflow": "25.10.2" }, - "timestamp": "2024-09-17T20:55:49.072132931" + "timestamp": "2026-01-21T11:31:11.122621263" }, "sarscov2 - bed - stub": { "content": [ @@ -77,7 +93,11 @@ ] ], "1": [ - "versions.yml:md5,42ba439339672f4a9193f0f0fe7a7f64" + [ + "BEDTOOLS_INTERSECT", + "bedtools", + "2.31.1" + ] ], "intersect": [ [ @@ -87,15 +107,19 @@ "test_out.bed:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], - "versions": [ - "versions.yml:md5,42ba439339672f4a9193f0f0fe7a7f64" + "versions_bedtools": [ + [ + "BEDTOOLS_INTERSECT", + "bedtools", + "2.31.1" + ] ] } ], "meta": { - "nf-test": "0.8.4", - "nextflow": "24.04.4" + "nf-test": "0.9.3", + "nextflow": "25.10.2" }, - "timestamp": "2024-09-17T20:56:06.259192552" + "timestamp": "2026-01-21T11:31:27.957038806" } } \ No newline at end of file diff --git a/modules/nf-core/bedtools/makewindows/main.nf b/modules/nf-core/bedtools/makewindows/main.nf index 36d6cac2..db1343f8 100644 --- a/modules/nf-core/bedtools/makewindows/main.nf +++ b/modules/nf-core/bedtools/makewindows/main.nf @@ -1,18 +1,18 @@ process BEDTOOLS_MAKEWINDOWS { - tag "$meta.id" + tag "${meta.id}" label 'process_single' conda "${moduleDir}/environment.yml" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/bedtools:2.31.1--hf5e1c6e_0' : - 'biocontainers/bedtools:2.31.1--hf5e1c6e_0' }" + container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container + ? 'https://depot.galaxyproject.org/singularity/bedtools:2.31.1--hf5e1c6e_0' + : 'biocontainers/bedtools:2.31.1--hf5e1c6e_0'}" input: tuple val(meta), path(regions) output: tuple val(meta), path("*.bed"), emit: bed - path "versions.yml" , emit: versions + tuple val("${task.process}"), val('bedtools'), eval("bedtools --version | sed -e 's/bedtools v//g'"), topic: versions, emit: versions_bedtools when: task.ext.when == null || task.ext.when @@ -21,29 +21,23 @@ process BEDTOOLS_MAKEWINDOWS { def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" def arg_input = regions.extension in ["bed", "tab"] ? "-b ${regions}" : "-g ${regions}" - if ("${regions}" == "${prefix}.bed") error "Input and output names are the same, set prefix in module configuration to disambiguate!" + if ("${regions}" == "${prefix}.bed") { + error("Input and output names are the same, set prefix in module configuration to disambiguate!") + } """ bedtools \\ makewindows \\ ${arg_input} \\ ${args} \\ > ${prefix}.bed - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - bedtools: \$(bedtools --version | sed -e "s/bedtools v//g") - END_VERSIONS """ stub: def prefix = task.ext.prefix ?: "${meta.id}" - if ("${regions}" == "${prefix}.bed") error "Input and output names are the same, set prefix in module configuration to disambiguate!" + if ("${regions}" == "${prefix}.bed") { + error("Input and output names are the same, set prefix in module configuration to disambiguate!") + } """ touch ${prefix}.bed - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - bedtools: \$(bedtools --version | sed -e "s/bedtools v//g") - END_VERSIONS """ } diff --git a/modules/nf-core/bedtools/makewindows/meta.yml b/modules/nf-core/bedtools/makewindows/meta.yml index df047f66..bb695f48 100644 --- a/modules/nf-core/bedtools/makewindows/meta.yml +++ b/modules/nf-core/bedtools/makewindows/meta.yml @@ -24,9 +24,11 @@ input: type: file description: BED file OR Genome details file () pattern: "*.{bed,tab,fai}" + ontologies: + - edam: http://edamontology.org/format_3475 # TSV output: - - bed: - - meta: + bed: + - - meta: type: map description: | Groovy Map containing sample information @@ -35,11 +37,28 @@ output: type: file description: BED file containing the windows pattern: "*.bed" - - versions: - - versions.yml: - type: file - description: File containing software versions - pattern: "versions.yml" + ontologies: [] + versions_bedtools: + - - ${task.process}: + type: string + description: The name of the process + - bedtools: + type: string + description: The name of the tool + - "bedtools --version | sed -e 's/bedtools v//g'": + type: eval + description: The expression to obtain the version of the tool +topics: + versions: + - - ${task.process}: + type: string + description: The name of the process + - bedtools: + type: string + description: The name of the tool + - "bedtools --version | sed -e 's/bedtools v//g'": + type: eval + description: The expression to obtain the version of the tool authors: - "@kevbrick" - "@nvnieuwk" diff --git a/modules/nf-core/bedtools/makewindows/tests/main.nf.test b/modules/nf-core/bedtools/makewindows/tests/main.nf.test index b27e59b6..38302541 100644 --- a/modules/nf-core/bedtools/makewindows/tests/main.nf.test +++ b/modules/nf-core/bedtools/makewindows/tests/main.nf.test @@ -1,4 +1,3 @@ - nextflow_process { name "Test Process BEDTOOLS_MAKEWINDOWS" @@ -17,10 +16,9 @@ nextflow_process { process { """ input[0] = [ - [ id:'test2'], - file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/bed/test.bed', checkIfExists: true) - ] - + [ id:'test2'], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/bed/test.bed', checkIfExists: true) + ] """ } } @@ -39,10 +37,30 @@ nextflow_process { process { """ input[0] = [ - [ id:'test2'], - file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.fai', checkIfExists: true) - ] + [ id:'test2'], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.fai', checkIfExists: true) + ] + """ + } + } + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("test-bedtools-makewindows-bed -- stub") { + options "-stub" + when { + process { + """ + input[0] = [ + [ id:'test2'], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/bed/test.bed', checkIfExists: true) + ] """ } } diff --git a/modules/nf-core/bedtools/makewindows/tests/main.nf.test.snap b/modules/nf-core/bedtools/makewindows/tests/main.nf.test.snap index 22cfbc17..51717ca1 100644 --- a/modules/nf-core/bedtools/makewindows/tests/main.nf.test.snap +++ b/modules/nf-core/bedtools/makewindows/tests/main.nf.test.snap @@ -1,4 +1,45 @@ { + "test-bedtools-makewindows-bed -- stub": { + "content": [ + { + "0": [ + [ + { + "id": "test2" + }, + "test2.bed:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + "BEDTOOLS_MAKEWINDOWS", + "bedtools", + "2.31.1" + ] + ], + "bed": [ + [ + { + "id": "test2" + }, + "test2.bed:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions_bedtools": [ + [ + "BEDTOOLS_MAKEWINDOWS", + "bedtools", + "2.31.1" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-01-21T11:40:48.35416302" + }, "test-bedtools-makewindows-fai": { "content": [ { @@ -11,7 +52,11 @@ ] ], "1": [ - "versions.yml:md5,f797078cc8b8bac7e6906685d4867be5" + [ + "BEDTOOLS_MAKEWINDOWS", + "bedtools", + "2.31.1" + ] ], "bed": [ [ @@ -21,16 +66,20 @@ "test2.bed:md5,622d1f62786fe4239b76c53168f21c54" ] ], - "versions": [ - "versions.yml:md5,f797078cc8b8bac7e6906685d4867be5" + "versions_bedtools": [ + [ + "BEDTOOLS_MAKEWINDOWS", + "bedtools", + "2.31.1" + ] ] } ], "meta": { - "nf-test": "0.8.4", - "nextflow": "24.04.4" + "nf-test": "0.9.3", + "nextflow": "25.10.2" }, - "timestamp": "2024-08-26T14:03:31.430455" + "timestamp": "2026-01-21T11:31:58.740645963" }, "test-bedtools-makewindows-bed": { "content": [ @@ -44,7 +93,11 @@ ] ], "1": [ - "versions.yml:md5,f797078cc8b8bac7e6906685d4867be5" + [ + "BEDTOOLS_MAKEWINDOWS", + "bedtools", + "2.31.1" + ] ], "bed": [ [ @@ -54,15 +107,19 @@ "test2.bed:md5,0cf6ed2b6f470cd44a247da74ca4fe4e" ] ], - "versions": [ - "versions.yml:md5,f797078cc8b8bac7e6906685d4867be5" + "versions_bedtools": [ + [ + "BEDTOOLS_MAKEWINDOWS", + "bedtools", + "2.31.1" + ] ] } ], "meta": { - "nf-test": "0.8.4", - "nextflow": "24.04.4" + "nf-test": "0.9.3", + "nextflow": "25.10.2" }, - "timestamp": "2024-08-26T14:03:27.118372" + "timestamp": "2026-01-21T11:31:51.390932315" } } \ No newline at end of file diff --git a/modules/nf-core/bedtools/map/main.nf b/modules/nf-core/bedtools/map/main.nf index 59281e8b..ef4b4372 100644 --- a/modules/nf-core/bedtools/map/main.nf +++ b/modules/nf-core/bedtools/map/main.nf @@ -1,11 +1,11 @@ process BEDTOOLS_MAP { - tag "$meta.id" + tag "${meta.id}" label 'process_single' conda "${moduleDir}/environment.yml" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/bedtools:2.31.1--hf5e1c6e_0' : - 'biocontainers/bedtools:2.31.1--hf5e1c6e_0' }" + container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container + ? 'https://depot.galaxyproject.org/singularity/bedtools:2.31.1--hf5e1c6e_0' + : 'biocontainers/bedtools:2.31.1--hf5e1c6e_0'}" input: tuple val(meta), path(intervals1), path(intervals2) @@ -13,7 +13,7 @@ process BEDTOOLS_MAP { output: tuple val(meta), path("*.${extension}"), emit: mapped - path "versions.yml" , emit: versions + tuple val("${task.process}"), val('bedtools'), eval("bedtools --version | sed -e 's/bedtools v//g'"), topic: versions, emit: versions_bedtools when: task.ext.when == null || task.ext.when @@ -23,36 +23,26 @@ process BEDTOOLS_MAP { def prefix = task.ext.prefix ?: "${meta.id}" extension = intervals1.getExtension() def sizes = chrom_sizes ? "-g ${chrom_sizes}" : '' - if ("$intervals1" == "${prefix}.${extension}" || - "$intervals2" == "${prefix}.${extension}") - error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!" + if ("${intervals1}" == "${prefix}.${extension}" || "${intervals2}" == "${prefix}.${extension}") { + error("Input and output names are the same, use \"task.ext.prefix\" to disambiguate!") + } """ bedtools \\ map \\ - -a $intervals1 \\ - -b $intervals2 \\ - $args \\ - $sizes \\ + -a ${intervals1} \\ + -b ${intervals2} \\ + ${args} \\ + ${sizes} \\ > ${prefix}.${extension} - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - bedtools: \$(bedtools --version | sed -e "s/bedtools v//g") - END_VERSIONS """ stub: def prefix = task.ext.prefix ?: "${meta.id}" extension = intervals1.getExtension() - if ("$intervals1" == "${prefix}.${extension}" || - "$intervals2" == "${prefix}.${extension}") - error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!" + if ("${intervals1}" == "${prefix}.${extension}" || "${intervals2}" == "${prefix}.${extension}") { + error("Input and output names are the same, use \"task.ext.prefix\" to disambiguate!") + } """ touch ${prefix}.${extension} - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - bedtools: \$(bedtools --version | sed -e "s/bedtools v//g") - END_VERSIONS """ } diff --git a/modules/nf-core/bedtools/map/meta.yml b/modules/nf-core/bedtools/map/meta.yml index 4e56bb94..2635b63c 100644 --- a/modules/nf-core/bedtools/map/meta.yml +++ b/modules/nf-core/bedtools/map/meta.yml @@ -23,10 +23,12 @@ input: type: file description: BAM/BED/GFF/VCF pattern: "*.{bed|gff|vcf}" + ontologies: [] - intervals2: type: file description: BAM/BED/GFF/VCF pattern: "*.{bed|gff|vcf}" + ontologies: [] - - meta2: type: map description: | @@ -36,23 +38,41 @@ input: type: file description: Chromosome sizes file pattern: "*{.sizes,.txt}" + ontologies: [] output: - - mapped: - - meta: + mapped: + - - meta: type: map description: | Groovy Map containing sample information e.g. [ id:'test', single_end:false ] - "*.${extension}": type: file - description: File containing the description of overlaps found between the features - in A and the features in B, with statistics + description: File containing the description of overlaps found between the + features in A and the features in B, with statistics pattern: "*.${extension}" - - versions: - - versions.yml: - type: file - description: File containing software versions - pattern: "versions.yml" + ontologies: [] + versions_bedtools: + - - ${task.process}: + type: string + description: The name of the process + - bedtools: + type: string + description: The name of the tool + - "bedtools --version | sed -e 's/bedtools v//g'": + type: eval + description: The expression to obtain the version of the tool +topics: + versions: + - - ${task.process}: + type: string + description: The name of the process + - bedtools: + type: string + description: The name of the tool + - "bedtools --version | sed -e 's/bedtools v//g'": + type: eval + description: The expression to obtain the version of the tool authors: - "@ekushele" maintainers: diff --git a/modules/nf-core/bedtools/map/tests/main.nf.test.snap b/modules/nf-core/bedtools/map/tests/main.nf.test.snap index 48ea6b2c..d95adc3d 100644 --- a/modules/nf-core/bedtools/map/tests/main.nf.test.snap +++ b/modules/nf-core/bedtools/map/tests/main.nf.test.snap @@ -11,7 +11,11 @@ ] ], "1": [ - "versions.yml:md5,1a9145744687b0d2191491d534697dc4" + [ + "BEDTOOLS_MAP", + "bedtools", + "2.31.1" + ] ], "mapped": [ [ @@ -21,21 +25,31 @@ "test_out.bed:md5,d3aeb1ec7b90e0d5a6d1b9a4614ab96a" ] ], - "versions": [ - "versions.yml:md5,1a9145744687b0d2191491d534697dc4" + "versions_bedtools": [ + [ + "BEDTOOLS_MAP", + "bedtools", + "2.31.1" + ] ] } ], - "timestamp": "2023-11-30T09:46:52.843854571" + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-01-21T11:32:13.585932455" }, "sarscov2 - [bed1, bed2], [] - stub": { "content": [ "test_out.bed", - [ - "versions.yml:md5,1a9145744687b0d2191491d534697dc4" - ] + null ], - "timestamp": "2023-11-30T09:56:57.011945259" + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-01-21T11:32:28.971257714" }, "sarscov2 - [bed, vcf], []": { "content": [ @@ -49,7 +63,11 @@ ] ], "1": [ - "versions.yml:md5,1a9145744687b0d2191491d534697dc4" + [ + "BEDTOOLS_MAP", + "bedtools", + "2.31.1" + ] ], "mapped": [ [ @@ -59,11 +77,19 @@ "test_out.bed:md5,cabd34d1132834581e31f53dfa66ec03" ] ], - "versions": [ - "versions.yml:md5,1a9145744687b0d2191491d534697dc4" + "versions_bedtools": [ + [ + "BEDTOOLS_MAP", + "bedtools", + "2.31.1" + ] ] } ], - "timestamp": "2023-11-30T09:46:58.912139308" + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-01-21T11:32:21.316572693" } } \ No newline at end of file diff --git a/modules/nf-core/bedtools/map/tests/nextflow.config b/modules/nf-core/bedtools/map/tests/nextflow.config index df373958..3d2eea40 100644 --- a/modules/nf-core/bedtools/map/tests/nextflow.config +++ b/modules/nf-core/bedtools/map/tests/nextflow.config @@ -1,3 +1,3 @@ process { ext.prefix = { "${meta.id}_out" } -} \ No newline at end of file +} diff --git a/modules/nf-core/bwamem2/index/environment.yml b/modules/nf-core/bwamem2/index/environment.yml index c069e281..f3637444 100644 --- a/modules/nf-core/bwamem2/index/environment.yml +++ b/modules/nf-core/bwamem2/index/environment.yml @@ -6,8 +6,8 @@ channels: dependencies: # renovate: datasource=conda depName=bioconda/bwa-mem2 - - bwa-mem2=2.2.1 + - bwa-mem2=2.3 # renovate: datasource=conda depName=bioconda/htslib - - htslib=1.21 + - htslib=1.22.1 # renovate: datasource=conda depName=bioconda/samtools - - samtools=1.21 + - samtools=1.22.1 diff --git a/modules/nf-core/bwamem2/index/main.nf b/modules/nf-core/bwamem2/index/main.nf index 09826e66..cb2c4bb2 100644 --- a/modules/nf-core/bwamem2/index/main.nf +++ b/modules/nf-core/bwamem2/index/main.nf @@ -1,20 +1,20 @@ process BWAMEM2_INDEX { tag "$fasta" - // NOTE Requires 28N GB memory where N is the size of the reference sequence + // NOTE Requires 28N GB memory where N is the size of the reference sequence, floor of 280M // source: https://github.com/bwa-mem2/bwa-mem2/issues/9 - memory { 28.B * fasta.size() } + memory { 280.MB * Math.ceil(fasta.size() / 10000000) * task.attempt } conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/9a/9ac054213e67b3c9308e409b459080bbe438f8fd6c646c351bc42887f35a42e7/data' : - 'community.wave.seqera.io/library/bwa-mem2_htslib_samtools:e1f420694f8e42bd' }" + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/e0/e05ce34b46ad42810eb29f74e4e304c0cb592b2ca15572929ed8bbaee58faf01/data' : + 'community.wave.seqera.io/library/bwa-mem2_htslib_samtools:db98f81f55b64113' }" input: tuple val(meta), path(fasta) output: tuple val(meta), path("bwamem2"), emit: index - path "versions.yml" , emit: versions + tuple val("${task.process}"), val('bwamem2'), eval('bwa-mem2 version | grep -o -E "[0-9]+(\\.[0-9]+)+"'), emit: versions_bwamem2, topic: versions when: task.ext.when == null || task.ext.when @@ -27,12 +27,8 @@ process BWAMEM2_INDEX { bwa-mem2 \\ index \\ $args \\ - $fasta -p bwamem2/${prefix} - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - bwamem2: \$(echo \$(bwa-mem2 version 2>&1) | sed 's/.* //') - END_VERSIONS + -p bwamem2/${prefix} \\ + $fasta """ stub: @@ -45,10 +41,5 @@ process BWAMEM2_INDEX { touch bwamem2/${prefix}.pac touch bwamem2/${prefix}.amb touch bwamem2/${prefix}.bwt.2bit.64 - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - bwamem2: \$(echo \$(bwa-mem2 version 2>&1) | sed 's/.* //') - END_VERSIONS """ } diff --git a/modules/nf-core/bwamem2/index/meta.yml b/modules/nf-core/bwamem2/index/meta.yml index 1437b20f..12074860 100644 --- a/modules/nf-core/bwamem2/index/meta.yml +++ b/modules/nf-core/bwamem2/index/meta.yml @@ -12,7 +12,8 @@ tools: a large reference genome, such as the human genome. homepage: https://github.com/bwa-mem2/bwa-mem2 documentation: https://github.com/bwa-mem2/bwa-mem2#usage - licence: ["MIT"] + licence: + - "MIT" identifier: "biotools:bwa-mem2" input: - - meta: @@ -24,26 +25,48 @@ input: type: file description: Input genome fasta file ontologies: - - edam: "http://edamontology.org/data_2044" # Sequence - - edam: "http://edamontology.org/format_1929" # FASTA + - edam: "http://edamontology.org/data_2044" + - edam: "http://edamontology.org/format_1929" output: - - index: - - meta: + index: + - - meta: type: map description: | Groovy Map containing sample information e.g. [ id:'test', single_end:false ] - bwamem2: - type: file + type: string description: BWA genome index files pattern: "*.{0123,amb,ann,bwt.2bit.64,pac}" ontologies: - - edam: "http://edamontology.org/data_3210" # Genome index - - versions: - - versions.yml: - type: file - description: File containing software versions - pattern: "versions.yml" + - edam: "http://edamontology.org/data_3210" + versions_bwamem2: + - - ${task.process}: + type: string + description: The name of the process + - bwamem2: + type: string + description: BWA genome index files + pattern: "*.{0123,amb,ann,bwt.2bit.64,pac}" + ontologies: + - edam: "http://edamontology.org/data_3210" + - bwa-mem2 version | grep -o -E "[0-9]+(\.[0-9]+)+": + type: eval + description: The expression to obtain the version of the tool +topics: + versions: + - - ${task.process}: + type: string + description: The name of the process + - bwamem2: + type: string + description: BWA genome index files + pattern: "*.{0123,amb,ann,bwt.2bit.64,pac}" + ontologies: + - edam: "http://edamontology.org/data_3210" + - bwa-mem2 version | grep -o -E "[0-9]+(\.[0-9]+)+": + type: eval + description: The expression to obtain the version of the tool authors: - "@maxulysse" maintainers: diff --git a/modules/nf-core/bwamem2/index/tests/main.nf.test b/modules/nf-core/bwamem2/index/tests/main.nf.test index dbf11132..3ee91048 100644 --- a/modules/nf-core/bwamem2/index/tests/main.nf.test +++ b/modules/nf-core/bwamem2/index/tests/main.nf.test @@ -8,7 +8,30 @@ nextflow_process { script "../main.nf" process "BWAMEM2_INDEX" - test("BWAMEM2 index") { + test("fasta") { + + when { + process { + """ + input[0] = [ + [id: 'test'], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("fasta - stub") { + + options "-stub" when { process { diff --git a/modules/nf-core/bwamem2/index/tests/main.nf.test.snap b/modules/nf-core/bwamem2/index/tests/main.nf.test.snap index 69b268ee..776e87be 100644 --- a/modules/nf-core/bwamem2/index/tests/main.nf.test.snap +++ b/modules/nf-core/bwamem2/index/tests/main.nf.test.snap @@ -1,5 +1,58 @@ { - "BWAMEM2 index": { + "fasta - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + [ + "genome.fasta.0123:md5,d41d8cd98f00b204e9800998ecf8427e", + "genome.fasta.amb:md5,d41d8cd98f00b204e9800998ecf8427e", + "genome.fasta.ann:md5,d41d8cd98f00b204e9800998ecf8427e", + "genome.fasta.bwt.2bit.64:md5,d41d8cd98f00b204e9800998ecf8427e", + "genome.fasta.pac:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "1": [ + [ + "BWAMEM2_INDEX", + "bwamem2", + "2.2.1" + ] + ], + "index": [ + [ + { + "id": "test" + }, + [ + "genome.fasta.0123:md5,d41d8cd98f00b204e9800998ecf8427e", + "genome.fasta.amb:md5,d41d8cd98f00b204e9800998ecf8427e", + "genome.fasta.ann:md5,d41d8cd98f00b204e9800998ecf8427e", + "genome.fasta.bwt.2bit.64:md5,d41d8cd98f00b204e9800998ecf8427e", + "genome.fasta.pac:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "versions_bwamem2": [ + [ + "BWAMEM2_INDEX", + "bwamem2", + "2.2.1" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.3" + }, + "timestamp": "2026-02-09T16:19:38.013344" + }, + "fasta": { "content": [ { "0": [ @@ -17,7 +70,11 @@ ] ], "1": [ - "versions.yml:md5,9ffd13d12e7108ed15c58566bc4717d6" + [ + "BWAMEM2_INDEX", + "bwamem2", + "2.2.1" + ] ], "index": [ [ @@ -33,15 +90,19 @@ ] ] ], - "versions": [ - "versions.yml:md5,9ffd13d12e7108ed15c58566bc4717d6" + "versions_bwamem2": [ + [ + "BWAMEM2_INDEX", + "bwamem2", + "2.2.1" + ] ] } ], "meta": { - "nf-test": "0.8.4", - "nextflow": "24.02.0" + "nf-test": "0.9.3", + "nextflow": "25.10.3" }, - "timestamp": "2024-03-18T12:59:39.132616" + "timestamp": "2026-02-09T16:19:32.542622" } } \ No newline at end of file diff --git a/modules/nf-core/cooler/cload/environment.yml b/modules/nf-core/cooler/cload/environment.yml new file mode 100644 index 00000000..0a8647f5 --- /dev/null +++ b/modules/nf-core/cooler/cload/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::cooler=0.10.4 diff --git a/modules/nf-core/cooler/cload/main.nf b/modules/nf-core/cooler/cload/main.nf new file mode 100644 index 00000000..109beac6 --- /dev/null +++ b/modules/nf-core/cooler/cload/main.nf @@ -0,0 +1,41 @@ +process COOLER_CLOAD { + tag "${meta.id}" + label 'process_high' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/cooler:0.10.4--pyhdfd78af_0' : + 'biocontainers/cooler:0.10.4--pyhdfd78af_0' }" + + input: + tuple val(meta), path(contacts), path(index) + tuple val(meta2), path(chromsizes) + val(mode) + val(cool_bin) + + output: + tuple val(meta), path("*.cool"), emit: cool + tuple val("${task.process}"), val('cooler'), eval('cooler --version 2>&1 | sed "s/cooler, version //"'), emit: versions_cooler, topic: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def nproc = mode in ["pairix", "tabix"] ? "--nproc ${task.cpus}" : "" + """ + cooler cload ${mode} \\ + ${args} \\ + ${nproc} \\ + ${chromsizes}:${cool_bin} \\ + ${contacts} \\ + ${prefix}.cool + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.cool + """ +} diff --git a/modules/nf-core/cooler/cload/meta.yml b/modules/nf-core/cooler/cload/meta.yml new file mode 100644 index 00000000..e7a79b15 --- /dev/null +++ b/modules/nf-core/cooler/cload/meta.yml @@ -0,0 +1,88 @@ +name: cooler_cload +description: Create a cooler from genomic pairs and bins +keywords: + - cool + - cooler + - cload + - hic +tools: + - cooler: + description: Sparse binary format for genomic interaction matrices + homepage: https://open2c.github.io/cooler/ + documentation: https://cooler.readthedocs.io/en/latest/index.html + tool_dev_url: https://github.com/open2c/cooler + doi: "10.1093/bioinformatics/btz540" + licence: ["BSD-3-clause"] + identifier: "" +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - contacts: + type: file + description: Path to contacts (e.g. read pairs, pairix, tabix) file. + ontologies: + - edam: http://edamontology.org/format_3475 # TSV + - index: + type: file + description: Path to index file of the contacts. + ontologies: + - edam: http://edamontology.org/format_3475 # TSV + - - meta2: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - chromsizes: + type: file + description: Path to a chromsizes file. + ontologies: + - edam: http://edamontology.org/format_3475 # TSV + - mode: + type: integer + description: | + Input mode for cooler cload - one of pairs, pairix, tabix + - cool_bin: + type: integer + description: Bins size in bp +output: + cool: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.cool": + type: file + description: Output COOL file path + pattern: "*.cool" + ontologies: [] + versions_cooler: + - - ${task.process}: + type: string + description: The name of the process + - cooler: + type: string + description: The name of the tool + - cooler --version 2>&1 | sed "s/cooler, version //": + type: eval + description: The expression to obtain the version of the tool +topics: + versions: + - - ${task.process}: + type: string + description: The name of the process + - cooler: + type: string + description: The name of the tool + - cooler --version 2>&1 | sed "s/cooler, version //": + type: eval + description: The expression to obtain the version of the tool +authors: + - "@jianhong" + - "@muffato" +maintainers: + - "@jianhong" + - "@muffato" diff --git a/modules/nf-core/cooler/cload/tests/main.nf.test b/modules/nf-core/cooler/cload/tests/main.nf.test new file mode 100644 index 00000000..7ee4ec86 --- /dev/null +++ b/modules/nf-core/cooler/cload/tests/main.nf.test @@ -0,0 +1,167 @@ +nextflow_process { + + name "Test Process COOLER_CLOAD" + config "./nextflow.config" + script "../main.nf" + process "COOLER_CLOAD" + + tag "modules" + tag "modules_nfcore" + tag "cooler" + tag "cooler/cload" + tag "cooler/dump" + + test("test_cooler_cload_pairix") { + when { + + params { + module_args = "" + } + + process { + """ + input[0] = [ + [id:'test_pairix', single_end:false],// meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/cooler/cload/hg19/hg19.GM12878-MboI.pairs.subsample.blksrt.txt.gz', checkIfExists:true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/cooler/cload/hg19/hg19.GM12878-MboI.pairs.subsample.blksrt.txt.gz.px2', checkIfExists:true) + ] + input[1] = [ + [id:'test_pairix', single_end:false], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/cooler/cload/hg19/hg19.chrom.sizes', checkIfExists:true) + ] + input[2] = "pairix" + input[3] = 2000000 + """ + } + } + + then { + + assertAll( + { assert process.success }, + { assert snapshot( + process.out.findAll { key, val -> key.startsWith("versions")}, + process.out.cool.collect{file(it[1]).name} + ).match() } + ) + + } + } + + + test("test_cooler_cload_pairs") { + + when { + + params { + module_args = '--chrom1 1 --pos1 2 --chrom2 4 --pos2 5 -N' + } + + process { + """ + input[0] = [ + [id:'test_pairs', single_end:false],// meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/cooler/cload/hg19/hg19.sample1.pairs', checkIfExists:true), + [] + ] + input[1] = [ + [id:'test_pairs', single_end:false], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/cooler/cload/hg19/hg19.chrom.sizes', checkIfExists:true) + ] + input[2] = "pairs" + input[3] = 2000000 + """ + } + } + + then { + + assertAll( + { assert process.success }, + { assert snapshot( + process.out.findAll { key, val -> key.startsWith("versions")}, + process.out.cool.collect{file(it[1]).name} + ).match() } + ) + + } + } + + + test("test_cooler_cload_tabix") { + + when { + + params { + module_args = "" + } + + process { + """ + input[0] = [ + [id:'test_tabix', single_end:false],// meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/cooler/cload/hg19/hg19.GM12878-MboI.pairs.subsample.sorted.possrt.txt.gz', checkIfExists:true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/cooler/cload/hg19/hg19.GM12878-MboI.pairs.subsample.sorted.possrt.txt.gz.tbi', checkIfExists:true) + ] + input[1] = [ + [id:'test_tabix', single_end:false], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/cooler/cload/hg19/hg19.chrom.sizes',checkIfExists:true) + ] + input[2] = "tabix" + input[3] = 2000000 + """ + } + } + + then { + + assertAll( + { assert process.success }, + { assert snapshot( + process.out.findAll { key, val -> key.startsWith("versions")}, + process.out.cool.collect{file(it[1]).name} + ).match() } + ) + + } + + } + + + test("test_cooler_cload_pairix - stub") { + + options '-stub' + + when { + + params { + module_args = "" + } + + process { + """ + input[0] = [ + [id:'test_pairix', single_end:false],// meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/cooler/cload/hg19/hg19.GM12878-MboI.pairs.subsample.blksrt.txt.gz', checkIfExists:true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/cooler/cload/hg19/hg19.GM12878-MboI.pairs.subsample.blksrt.txt.gz.px2', checkIfExists:true) + ] + input[1] = [ + [id:'test_pairix', single_end:false], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/cooler/cload/hg19/hg19.chrom.sizes', checkIfExists:true) + ] + input[2] = "pairix" + input[3] = 2000000 + """ + } + } + + then { + + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + + } + } +} diff --git a/modules/nf-core/cooler/cload/tests/main.nf.test.snap b/modules/nf-core/cooler/cload/tests/main.nf.test.snap new file mode 100644 index 00000000..80476396 --- /dev/null +++ b/modules/nf-core/cooler/cload/tests/main.nf.test.snap @@ -0,0 +1,108 @@ +{ + "test_cooler_cload_pairs": { + "content": [ + { + "versions_cooler": [ + [ + "COOLER_CLOAD", + "cooler", + "0.10.4" + ] + ] + }, + [ + "test_pairs.cool" + ] + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.3" + }, + "timestamp": "2026-02-10T12:57:13.675799" + }, + "test_cooler_cload_tabix": { + "content": [ + { + "versions_cooler": [ + [ + "COOLER_CLOAD", + "cooler", + "0.10.4" + ] + ] + }, + [ + "test_tabix.cool" + ] + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.3" + }, + "timestamp": "2026-02-10T12:57:23.827346" + }, + "test_cooler_cload_pairix": { + "content": [ + { + "versions_cooler": [ + [ + "COOLER_CLOAD", + "cooler", + "0.10.4" + ] + ] + }, + [ + "test_pairix.cool" + ] + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.3" + }, + "timestamp": "2026-02-10T12:57:05.111461" + }, + "test_cooler_cload_pairix - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test_pairix", + "single_end": false + }, + "test_pairix.cool:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + "COOLER_CLOAD", + "cooler", + "0.10.4" + ] + ], + "cool": [ + [ + { + "id": "test_pairix", + "single_end": false + }, + "test_pairix.cool:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions_cooler": [ + [ + "COOLER_CLOAD", + "cooler", + "0.10.4" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.3" + }, + "timestamp": "2026-02-10T12:57:29.005184" + } +} \ No newline at end of file diff --git a/modules/nf-core/cooler/cload/tests/nextflow.config b/modules/nf-core/cooler/cload/tests/nextflow.config new file mode 100644 index 00000000..095becee --- /dev/null +++ b/modules/nf-core/cooler/cload/tests/nextflow.config @@ -0,0 +1,5 @@ +process { + withName: "COOLER_CLOAD" { + ext.args = params.module_args + } +} diff --git a/modules/nf-core/cooler/zoomify/environment.yml b/modules/nf-core/cooler/zoomify/environment.yml new file mode 100644 index 00000000..be496d89 --- /dev/null +++ b/modules/nf-core/cooler/zoomify/environment.yml @@ -0,0 +1,8 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::cooler=0.10.4 + - conda-forge::numpy=1.26.4 diff --git a/modules/nf-core/cooler/zoomify/main.nf b/modules/nf-core/cooler/zoomify/main.nf new file mode 100644 index 00000000..b2a5b2b7 --- /dev/null +++ b/modules/nf-core/cooler/zoomify/main.nf @@ -0,0 +1,36 @@ +process COOLER_ZOOMIFY { + tag "$meta.id" + label 'process_high' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/cooler:0.10.4--pyhdfd78af_0' : + 'biocontainers/cooler:0.10.4--pyhdfd78af_0' }" + + input: + tuple val(meta), path(cool) + + output: + tuple val(meta), path("*.mcool"), emit: mcool + tuple val("${task.process}"), val('cooler'), eval('cooler --version 2>&1 | sed "s/cooler, version //"'), emit: versions_cooler, topic: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + cooler zoomify \\ + $args \\ + -n $task.cpus \\ + -o ${prefix}.mcool \\ + $cool + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.mcool + """ +} diff --git a/modules/nf-core/cooler/zoomify/meta.yml b/modules/nf-core/cooler/zoomify/meta.yml new file mode 100644 index 00000000..4bfc50ce --- /dev/null +++ b/modules/nf-core/cooler/zoomify/meta.yml @@ -0,0 +1,63 @@ +name: cooler_zoomify +description: Generate a multi-resolution cooler file by coarsening +keywords: + - mcool + - cool + - cooler +tools: + - cooler: + description: Sparse binary format for genomic interaction matrices + homepage: https://open2c.github.io/cooler/ + documentation: https://cooler.readthedocs.io/en/latest/index.html + tool_dev_url: https://github.com/open2c/cooler + doi: "10.1093/bioinformatics/btz540" + licence: ["BSD-3-clause"] + identifier: "" +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - cool: + type: file + description: Path to COOL file + pattern: "*.{cool,mcool}" + ontologies: [] +output: + mcool: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.mcool": + type: file + description: Output mcool file + pattern: "*.mcool" + ontologies: [] + versions_cooler: + - - ${task.process}: + type: string + description: The name of the process + - cooler: + type: string + description: The name of the tool + - cooler --version 2>&1 | sed "s/cooler, version //": + type: eval + description: The expression to obtain the version of the tool +topics: + versions: + - - ${task.process}: + type: string + description: The name of the process + - cooler: + type: string + description: The name of the tool + - cooler --version 2>&1 | sed "s/cooler, version //": + type: eval + description: The expression to obtain the version of the tool +authors: + - "@jianhong" +maintainers: + - "@jianhong" diff --git a/modules/nf-core/cooler/zoomify/tests/main.nf.test b/modules/nf-core/cooler/zoomify/tests/main.nf.test new file mode 100644 index 00000000..bc8ae4c1 --- /dev/null +++ b/modules/nf-core/cooler/zoomify/tests/main.nf.test @@ -0,0 +1,64 @@ +nextflow_process { + + name "Test Process COOLER_ZOOMIFY" + config "./nextflow.config" + script "../main.nf" + process "COOLER_ZOOMIFY" + + tag "modules" + tag "modules_nfcore" + tag "cooler" + tag "cooler/zoomify" + + test("test_cooler_zoomify") { + + when { + params { + module_args = '-r 2,4,8' + } + process { + """ + input[0] = [ + [id:'test'],// meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/cooler/merge/toy/toy.symm.upper.2.cool', checkIfExists:true) + ] + """ + } + } + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.findAll { key, val -> key.startsWith("versions")}, + process.out.mcool.collect{ file(it[1]).name } + ).match() } + ) + } + } + + + test("test_cooler_zoomify -- stub") { + + options '-stub' + + when { + params { + module_args = '-r 2,4,8' + } + process { + """ + input[0] = [ + [id:'test'],// meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/cooler/merge/toy/toy.symm.upper.2.cool', checkIfExists:true) + ] + """ + } + } + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } +} diff --git a/modules/nf-core/cooler/zoomify/tests/main.nf.test.snap b/modules/nf-core/cooler/zoomify/tests/main.nf.test.snap new file mode 100644 index 00000000..5d5a207b --- /dev/null +++ b/modules/nf-core/cooler/zoomify/tests/main.nf.test.snap @@ -0,0 +1,64 @@ +{ + "test_cooler_zoomify -- stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.mcool:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + "COOLER_ZOOMIFY", + "cooler", + "0.10.4" + ] + ], + "mcool": [ + [ + { + "id": "test" + }, + "test.mcool:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions_cooler": [ + [ + "COOLER_ZOOMIFY", + "cooler", + "0.10.4" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.3" + }, + "timestamp": "2026-02-10T12:59:02.183744" + }, + "test_cooler_zoomify": { + "content": [ + { + "versions_cooler": [ + [ + "COOLER_ZOOMIFY", + "cooler", + "0.10.4" + ] + ] + }, + [ + "test.mcool" + ] + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.3" + }, + "timestamp": "2026-02-10T12:58:56.885229" + } +} \ No newline at end of file diff --git a/modules/nf-core/cooler/zoomify/tests/nextflow.config b/modules/nf-core/cooler/zoomify/tests/nextflow.config new file mode 100644 index 00000000..606da9f6 --- /dev/null +++ b/modules/nf-core/cooler/zoomify/tests/nextflow.config @@ -0,0 +1,5 @@ +process { + withName: "COOLER_ZOOMIFY" { + ext.args = params.module_args + } +} diff --git a/modules/nf-core/gawk/environment.yml b/modules/nf-core/gawk/environment.yml index f52109e8..185a0f54 100644 --- a/modules/nf-core/gawk/environment.yml +++ b/modules/nf-core/gawk/environment.yml @@ -4,4 +4,4 @@ channels: - conda-forge - bioconda dependencies: - - conda-forge::gawk=5.3.0 + - conda-forge::gawk=5.3.1 diff --git a/modules/nf-core/gawk/main.nf b/modules/nf-core/gawk/main.nf index 615b2ce9..33dd24cc 100644 --- a/modules/nf-core/gawk/main.nf +++ b/modules/nf-core/gawk/main.nf @@ -4,8 +4,8 @@ process GAWK { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/gawk:5.3.0' : - 'biocontainers/gawk:5.3.0' }" + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/a1/a125c778baf3865331101a104b60d249ee15fe1dca13bdafd888926cc5490a34/data' : + 'community.wave.seqera.io/library/gawk:5.3.1--e09efb5dfc4b8156' }" input: tuple val(meta), path(input, arity: '0..*') @@ -14,7 +14,7 @@ process GAWK { output: tuple val(meta), path("*.${suffix}"), emit: output - path "versions.yml" , emit: versions + tuple val("${task.process}"), val('gawk'), eval("awk -Wversion | sed '1!d; s/.*Awk //; s/,.*//'"), topic: versions, emit: versions_gawk when: task.ext.when == null || task.ext.when @@ -23,18 +23,18 @@ process GAWK { def args = task.ext.args ?: '' // args is used for the main arguments of the tool def args2 = task.ext.args2 ?: '' // args2 is used to specify a program when no program file has been given prefix = task.ext.prefix ?: "${meta.id}" - suffix = task.ext.suffix ?: "${input.collect{ it.getExtension()}.get(0)}" // use the first extension of the input files + suffix = task.ext.suffix ?: "${input.collect{ file -> file.getExtension()}.get(0)}" // use the first extension of the input files program = program_file ? "-f ${program_file}" : "${args2}" - lst_gz = input.findResults{ it.getExtension().endsWith("gz") ? it.toString() : null } + lst_gz = input.findResults{ file -> file.getExtension().endsWith("gz") ? file.toString() : null } unzip = lst_gz ? "gunzip -q -f ${lst_gz.join(" ")}" : "" - input_cmd = input.collect { it.toString() - ~/\.gz$/ }.join(" ") + input_cmd = input.collect { file -> file.toString() - ~/\.gz$/ }.join(" ") output_cmd = suffix.endsWith("gz") ? "| gzip > ${prefix}.${suffix}" : "> ${prefix}.${suffix}" output = disable_redirect_output ? "" : output_cmd - cleanup = lst_gz ? "rm ${lst_gz.collect{ it - ~/\.gz$/ }.join(" ")}" : "" + cleanup = lst_gz ? "rm ${lst_gz.collect{ file -> file - ~/\.gz$/ }.join(" ")}" : "" - input.collect{ - assert it.name != "${prefix}.${suffix}" : "Input and output names are the same, set prefix in module configuration to disambiguate!" + input.collect{ file -> + assert file.name != "${prefix}.${suffix}" : "Input and output names are the same, set prefix in module configuration to disambiguate!" } """ @@ -47,24 +47,14 @@ process GAWK { ${output} ${cleanup} - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - gawk: \$(awk -Wversion | sed '1!d; s/.*Awk //; s/,.*//') - END_VERSIONS """ stub: prefix = task.ext.prefix ?: "${meta.id}" - suffix = task.ext.suffix ?: "${input.getExtension()}" + suffix = task.ext.suffix ?: "${input.collect{ file -> file.getExtension()}.get(0)}" def create_cmd = suffix.endsWith("gz") ? "echo '' | gzip >" : "touch" """ ${create_cmd} ${prefix}.${suffix} - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - gawk: \$(awk -Wversion | sed '1!d; s/.*Awk //; s/,.*//') - END_VERSIONS """ } diff --git a/modules/nf-core/gawk/meta.yml b/modules/nf-core/gawk/meta.yml index 34c50b12..96cd0c72 100644 --- a/modules/nf-core/gawk/meta.yml +++ b/modules/nf-core/gawk/meta.yml @@ -15,7 +15,8 @@ tools: homepage: "https://www.gnu.org/software/gawk/" documentation: "https://www.gnu.org/software/gawk/manual/" tool_dev_url: "https://www.gnu.org/prep/ftp.html" - licence: ["GPL v3"] + licence: + - "GPL v3" identifier: "" input: - - meta: @@ -25,38 +26,58 @@ input: e.g. [ id:'test', single_end:false ] - input: type: file - description: The input file - Specify the logic that needs to be executed on - this file on the `ext.args2` or in the program file. - If the files have a `.gz` extension, they will be unzipped using `zcat`. + description: The input file - Specify the logic that needs to be executed + on this file on the `ext.args2` or in the program file. If the files + have a `.gz` extension, they will be unzipped using `zcat`. pattern: "*" - - - program_file: - type: file - description: Optional file containing logic for awk to execute. If you don't - wish to use a file, you can use `ext.args2` to specify the logic. - pattern: "*" - - - disable_redirect_output: - type: boolean - description: Disable the redirection of awk output to a given file. This is - useful if you want to use awk's built-in redirect to write files instead - of the shell's redirect. + ontologies: [] + - program_file: + type: file + description: Optional file containing logic for awk to execute. If you don't + wish to use a file, you can use `ext.args2` to specify the logic. + pattern: "*" + ontologies: [] + - disable_redirect_output: + type: boolean + description: Disable the redirection of awk output to a given file. This is + useful if you want to use awk's built-in redirect to write files instead + of the shell's redirect. output: - - output: - - meta: + output: + - - meta: type: map description: | Groovy Map containing sample information e.g. [ id:'test', single_end:false ] - "*.${suffix}": type: file - description: The output file - if using shell redirection, specify the name of this - file using `ext.prefix` and the extension using `ext.suffix`. Otherwise, ensure - the awk program produces files with the extension in `ext.suffix`. + description: The output file - if using shell redirection, specify the + name of this file using `ext.prefix` and the extension using + `ext.suffix`. Otherwise, ensure the awk program produces files with + the extension in `ext.suffix`. pattern: "*" - - versions: - - versions.yml: - type: file - description: File containing software versions - pattern: "versions.yml" + ontologies: [] + versions_gawk: + - - ${task.process}: + type: string + description: The name of the process + - gawk: + type: string + description: The name of the tool + - awk -Wversion | sed '1!d; s/.*Awk //; s/,.*//': + type: eval + description: The expression to obtain the version of the tool +topics: + versions: + - - ${task.process}: + type: string + description: The name of the process + - gawk: + type: string + description: The name of the tool + - awk -Wversion | sed '1!d; s/.*Awk //; s/,.*//': + type: eval + description: The expression to obtain the version of the tool authors: - "@nvnieuwk" maintainers: diff --git a/modules/nf-core/gawk/tests/main.nf.test b/modules/nf-core/gawk/tests/main.nf.test index 54462271..3bd0a43d 100644 --- a/modules/nf-core/gawk/tests/main.nf.test +++ b/modules/nf-core/gawk/tests/main.nf.test @@ -19,7 +19,7 @@ nextflow_process { process { """ input[0] = [ - [ id:'test' ], // meta map + [ id:'test' ], file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true) ] input[1] = [] @@ -29,10 +29,35 @@ nextflow_process { } then { - assertAll( - { assert process.success }, - { assert snapshot(process.out).match() } - ) + assert process.success + assert snapshot(sanitizeOutput(process.out)).match() + } + } + + test("Convert fasta to bed - stub") { + + options "-stub" + + when { + params { + gawk_suffix = "bed" + gawk_args2 = '\'BEGIN { FS = OFS = "\t"}; { print \$1, "0", \$2 }\'' + } + process { + """ + input[0] = [ + [ id:'test' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true) + ] + input[1] = [] + input[2] = false + """ + } + } + + then { + assert process.success + assert snapshot(process.out).match() } } @@ -45,7 +70,7 @@ nextflow_process { process { """ input[0] = [ - [ id:'test' ], // meta map + [ id:'test' ], file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true) ] input[1] = Channel.of('BEGIN { FS = OFS = "\t"}; { print \$1, "0", \$2 }').collectFile(name:"program.awk") @@ -55,10 +80,8 @@ nextflow_process { } then { - assertAll( - { assert process.success }, - { assert snapshot(process.out).match() } - ) + assert process.success + assert snapshot(sanitizeOutput(process.out)).match() } } @@ -71,7 +94,7 @@ nextflow_process { process { """ input[0] = [ - [ id:'test' ], // meta map + [ id:'test' ], file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true) ] input[1] = [] @@ -81,10 +104,8 @@ nextflow_process { } then { - assertAll( - { assert process.success }, - { assert snapshot(process.out).match() } - ) + assert process.success + assert snapshot(sanitizeOutput(process.out)).match() } } @@ -97,7 +118,7 @@ nextflow_process { process { """ input[0] = [ - [ id:'test' ], // meta map + [ id:'test' ], [file(params.modules_testdata_base_path + 'generic/txt/hello.txt', checkIfExists: true), file(params.modules_testdata_base_path + 'generic/txt/species_names.txt', checkIfExists: true)] ] @@ -108,10 +129,8 @@ nextflow_process { } then { - assertAll( - { assert process.success }, - { assert snapshot(process.out).match() } - ) + assert process.success + assert snapshot(sanitizeOutput(process.out)).match() } } @@ -124,7 +143,7 @@ nextflow_process { process { """ input[0] = [ - [ id:'test' ], // meta map + [ id:'test' ], [file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/vcf/NA12878_chrM.vcf.gz', checkIfExists: true), file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/vcf/NA24385_sv.vcf.gz', checkIfExists: true)] ] @@ -135,10 +154,8 @@ nextflow_process { } then { - assertAll( - { assert process.success }, - { assert snapshot(process.out).match() } - ) + assert process.success + assert snapshot(sanitizeOutput(process.out)).match() } } @@ -151,7 +168,7 @@ nextflow_process { process { """ input[0] = [ - [ id:'test' ], // meta map + [ id:'test' ], file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true) ] input[1] = [] @@ -161,10 +178,8 @@ nextflow_process { } then { - assertAll( - { assert process.success }, - { assert snapshot(process.out).match() } - ) + assert process.success + assert snapshot(sanitizeOutput(process.out)).match() } } @@ -178,7 +193,7 @@ nextflow_process { process { """ input[0] = [ - [ id:'hello' ], // meta map + [ id:'hello' ], [file(params.modules_testdata_base_path + 'generic/txt/hello.txt', checkIfExists: true), file(params.modules_testdata_base_path + 'generic/txt/species_names.txt', checkIfExists: true)] ] @@ -189,10 +204,8 @@ nextflow_process { } then { - assertAll( - { assert process.failed }, - { assert process.errorReport.contains("Input and output names are the same, set prefix in module configuration to disambiguate!") } - ) + assert process.failed + assert process.errorReport.contains("Input and output names are the same, set prefix in module configuration to disambiguate!") } } -} \ No newline at end of file +} diff --git a/modules/nf-core/gawk/tests/main.nf.test.snap b/modules/nf-core/gawk/tests/main.nf.test.snap index d8e8ac75..9d6a3697 100644 --- a/modules/nf-core/gawk/tests/main.nf.test.snap +++ b/modules/nf-core/gawk/tests/main.nf.test.snap @@ -2,7 +2,7 @@ "Compress after processing": { "content": [ { - "0": [ + "output": [ [ { "id": "test" @@ -10,32 +10,25 @@ "test.txt.gz:md5,87a15eb9c2ff20ccd5cd8735a28708f7" ] ], - "1": [ - "versions.yml:md5,842acc9870dc8ac280954047cb2aa23a" - ], - "output": [ + "versions_gawk": [ [ - { - "id": "test" - }, - "test.txt.gz:md5,87a15eb9c2ff20ccd5cd8735a28708f7" + "GAWK", + "gawk", + "5.3.1" ] - ], - "versions": [ - "versions.yml:md5,842acc9870dc8ac280954047cb2aa23a" ] } ], + "timestamp": "2026-03-04T11:31:50.761549948", "meta": { - "nf-test": "0.9.2", - "nextflow": "24.10.1" - }, - "timestamp": "2024-11-27T17:11:20.054143406" + "nf-test": "0.9.4", + "nextflow": "25.10.4" + } }, "Convert fasta to bed": { "content": [ { - "0": [ + "output": [ [ { "id": "test" @@ -43,9 +36,24 @@ "test.bed:md5,87a15eb9c2ff20ccd5cd8735a28708f7" ] ], - "1": [ - "versions.yml:md5,842acc9870dc8ac280954047cb2aa23a" - ], + "versions_gawk": [ + [ + "GAWK", + "gawk", + "5.3.1" + ] + ] + } + ], + "timestamp": "2026-03-04T11:30:50.804933797", + "meta": { + "nf-test": "0.9.4", + "nextflow": "25.10.4" + } + }, + "Convert fasta to bed with program file": { + "content": [ + { "output": [ [ { @@ -54,18 +62,22 @@ "test.bed:md5,87a15eb9c2ff20ccd5cd8735a28708f7" ] ], - "versions": [ - "versions.yml:md5,842acc9870dc8ac280954047cb2aa23a" + "versions_gawk": [ + [ + "GAWK", + "gawk", + "5.3.1" + ] ] } ], + "timestamp": "2026-03-04T11:31:10.838989113", "meta": { - "nf-test": "0.8.4", - "nextflow": "24.04.4" - }, - "timestamp": "2024-10-19T13:14:02.347809811" + "nf-test": "0.9.4", + "nextflow": "25.10.4" + } }, - "Convert fasta to bed with program file": { + "Convert fasta to bed - stub": { "content": [ { "0": [ @@ -73,35 +85,43 @@ { "id": "test" }, - "test.bed:md5,87a15eb9c2ff20ccd5cd8735a28708f7" + "test.bed:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], "1": [ - "versions.yml:md5,842acc9870dc8ac280954047cb2aa23a" + [ + "GAWK", + "gawk", + "5.3.1" + ] ], "output": [ [ { "id": "test" }, - "test.bed:md5,87a15eb9c2ff20ccd5cd8735a28708f7" + "test.bed:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], - "versions": [ - "versions.yml:md5,842acc9870dc8ac280954047cb2aa23a" + "versions_gawk": [ + [ + "GAWK", + "gawk", + "5.3.1" + ] ] } ], + "timestamp": "2026-03-04T11:31:00.182649403", "meta": { - "nf-test": "0.8.4", - "nextflow": "24.04.4" - }, - "timestamp": "2024-10-19T13:14:11.894616209" + "nf-test": "0.9.4", + "nextflow": "25.10.4" + } }, "Extract first column from multiple files": { "content": [ { - "0": [ + "output": [ [ { "id": "test" @@ -109,32 +129,25 @@ "test.bed:md5,566c51674bd643227bb2d83e0963376d" ] ], - "1": [ - "versions.yml:md5,842acc9870dc8ac280954047cb2aa23a" - ], - "output": [ + "versions_gawk": [ [ - { - "id": "test" - }, - "test.bed:md5,566c51674bd643227bb2d83e0963376d" + "GAWK", + "gawk", + "5.3.1" ] - ], - "versions": [ - "versions.yml:md5,842acc9870dc8ac280954047cb2aa23a" ] } ], + "timestamp": "2026-03-04T11:31:30.796772884", "meta": { - "nf-test": "0.8.4", - "nextflow": "24.04.4" - }, - "timestamp": "2024-10-19T22:04:47.729300129" + "nf-test": "0.9.4", + "nextflow": "25.10.4" + } }, "Unzip files before processing": { "content": [ { - "0": [ + "output": [ [ { "id": "test" @@ -142,32 +155,25 @@ "test.bed:md5,1e31ebd4a060aab5433bbbd9ab24e403" ] ], - "1": [ - "versions.yml:md5,842acc9870dc8ac280954047cb2aa23a" - ], - "output": [ + "versions_gawk": [ [ - { - "id": "test" - }, - "test.bed:md5,1e31ebd4a060aab5433bbbd9ab24e403" + "GAWK", + "gawk", + "5.3.1" ] - ], - "versions": [ - "versions.yml:md5,842acc9870dc8ac280954047cb2aa23a" ] } ], + "timestamp": "2026-03-04T11:31:40.72259289", "meta": { - "nf-test": "0.8.4", - "nextflow": "24.04.4" - }, - "timestamp": "2024-10-19T22:08:19.533527657" + "nf-test": "0.9.4", + "nextflow": "25.10.4" + } }, "Convert fasta to bed using awk redirect instead of shell redirect": { "content": [ { - "0": [ + "output": [ [ { "id": "test" @@ -175,26 +181,19 @@ "test.bed:md5,87a15eb9c2ff20ccd5cd8735a28708f7" ] ], - "1": [ - "versions.yml:md5,842acc9870dc8ac280954047cb2aa23a" - ], - "output": [ + "versions_gawk": [ [ - { - "id": "test" - }, - "test.bed:md5,87a15eb9c2ff20ccd5cd8735a28708f7" + "GAWK", + "gawk", + "5.3.1" ] - ], - "versions": [ - "versions.yml:md5,842acc9870dc8ac280954047cb2aa23a" ] } ], + "timestamp": "2026-03-04T11:31:20.33222004", "meta": { - "nf-test": "0.9.2", - "nextflow": "24.10.4" - }, - "timestamp": "2025-03-05T08:31:09.88842854" + "nf-test": "0.9.4", + "nextflow": "25.10.4" + } } } \ No newline at end of file diff --git a/modules/nf-core/gnu/sort/environment.yml b/modules/nf-core/gnu/sort/environment.yml index babcfb55..0c4cd942 100644 --- a/modules/nf-core/gnu/sort/environment.yml +++ b/modules/nf-core/gnu/sort/environment.yml @@ -4,4 +4,4 @@ channels: - conda-forge - bioconda dependencies: - - conda-forge::coreutils=9.3 + - conda-forge::coreutils=9.5 diff --git a/modules/nf-core/gnu/sort/main.nf b/modules/nf-core/gnu/sort/main.nf index e1167666..a16f6291 100644 --- a/modules/nf-core/gnu/sort/main.nf +++ b/modules/nf-core/gnu/sort/main.nf @@ -4,15 +4,15 @@ process GNU_SORT { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/coreutils:9.3': - 'biocontainers/coreutils:9.3' }" + 'https://depot.galaxyproject.org/singularity/coreutils:9.5': + 'biocontainers/coreutils:9.5' }" input: tuple val(meta), path(input) output: - tuple val(meta), file( "${output_file}" ) , emit: sorted - path "versions.yml" , emit: versions + tuple val(meta), path( "${output_file}" ) , emit: sorted + tuple val("${task.process}"), val('coreutils'), eval("sort --version |& sed '1!d ; s/sort (GNU coreutils) //'"), emit: versions_coreutils, topic: versions when: task.ext.when == null || task.ext.when @@ -22,30 +22,19 @@ process GNU_SORT { def prefix = task.ext.prefix ?: "${meta.id}" suffix = task.ext.suffix ?: "${input.extension}" output_file = "${prefix}.${suffix}" - def VERSION = "9.3" // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. if ("$input" == "$output_file") error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!" """ sort ${args} ${input} > ${output_file} - cat <<-END_VERSIONS > versions.yml - "${task.process}": - coreutils: $VERSION - END_VERSIONS """ stub: def prefix = task.ext.prefix ?: "${meta.id}" suffix = task.ext.suffix ?: "${input.extension}" output_file = "${prefix}.${suffix}" - def VERSION = "9.3" - if ("$input" == "$output_file") error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!" """ touch ${output_file} - cat <<-END_VERSIONS > versions.yml - "${task.process}": - coreutils: $VERSION - END_VERSIONS """ } diff --git a/modules/nf-core/gnu/sort/meta.yml b/modules/nf-core/gnu/sort/meta.yml index c555dbb5..48dde4a0 100644 --- a/modules/nf-core/gnu/sort/meta.yml +++ b/modules/nf-core/gnu/sort/meta.yml @@ -3,13 +3,18 @@ description: | Writes a sorted concatenation of file/s keywords: - GNU + - coreutils - sort - merge compare tools: - - sort: - description: "Writes a sorted concatenation of file/s" - homepage: "https://github.com/vgl-hub/gfastats" - documentation: "https://www.gnu.org/software/coreutils/manual/html_node/sort-invocation.html" + - gnu: + description: "The GNU Core Utilities are the basic file, shell and text manipulation + utilities of the GNU operating system. These are the core utilities which are + expected to exist on every operating system." + homepage: "https://www.gnu.org/software/coreutils/" + documentation: "https://www.gnu.org/software/coreutils/manual/html_node/index.html" + tool_dev_url: "https://git.savannah.gnu.org/cgit/coreutils.git" + doi: "10.5281/zenodo.581670" licence: ["GPL"] identifier: "" input: @@ -22,18 +27,42 @@ input: type: file description: Draft assembly file pattern: "*.{txt,bed,interval,genome,bins}" + ontologies: [] output: - - sorted: - - meta: + sorted: + - - meta: type: map description: | Groovy Map containing sample information e.g. [ id:'test', single_end:false ] - - versions: - - versions.yml: - type: file - description: File containing software versions - pattern: "versions.yml" + pattern: "${output_file}" + - "${output_file}": + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + pattern: "${output_file}" + versions_coreutils: + - - ${task.process}: + type: string + description: The process the versions were collected from + - coreutils: + type: string + description: The tool name + - "sort --version |& sed '1!d ; s/sort (GNU coreutils) //'": + type: string + description: The command used to generate the version of the tool +topics: + versions: + - - ${task.process}: + type: string + description: The process the versions were collected from + - coreutils: + type: string + description: The tool name + - "sort --version |& sed '1!d ; s/sort (GNU coreutils) //'": + type: string + description: The command used to generate the version of the tool authors: - "@DLBPointon" maintainers: diff --git a/modules/nf-core/gnu/sort/tests/main.nf.test b/modules/nf-core/gnu/sort/tests/main.nf.test index e4030187..9aef3daf 100644 --- a/modules/nf-core/gnu/sort/tests/main.nf.test +++ b/modules/nf-core/gnu/sort/tests/main.nf.test @@ -16,10 +16,9 @@ nextflow_process { process { """ input[0] = [ - [id:'genome_test'], - file(params.test_data['generic']['unsorted_data']['unsorted_text']['genome_file'], - checkIfExists: true) - ] + [id:'genome_test'], + file(params.modules_testdata_base_path + 'generic/unsorted_data/unsorted_text/test.genome', checkIfExists: true) + ] """ } } @@ -27,11 +26,7 @@ nextflow_process { then { assertAll ( { assert process.success }, - { assert snapshot(process.out).match() }, - { assert snapshot( - file(process.out.sorted[0][1]).name - ).match("genome_sort") - } + { assert snapshot(process.out).match() } ) } @@ -43,10 +38,9 @@ nextflow_process { process { """ input[0] = [ - [id:'test'], - file(params.test_data['generic']['unsorted_data']['unsorted_text']['intervals'], - checkIfExists: true) - ] + [id:'test'], + file(params.modules_testdata_base_path + 'generic/unsorted_data/unsorted_text/test.bed', checkIfExists: true) + ] """ } } @@ -54,11 +48,7 @@ nextflow_process { then { assertAll ( { assert process.success }, - { assert snapshot(process.out).match() }, - { assert snapshot( - file(process.out.sorted[0][1]).name - ).match("interval_sort") - } + { assert snapshot(process.out).match() } ) } @@ -71,10 +61,9 @@ nextflow_process { process { """ input[0] = [ - [id:'test'], - file(params.test_data['generic']['unsorted_data']['unsorted_text']['numbers_csv'], - checkIfExists: true) - ] + [id:'test'], + file(params.modules_testdata_base_path + 'generic/unsorted_data/unsorted_text/test.csv', checkIfExists: true) + ] """ } } @@ -82,11 +71,7 @@ nextflow_process { then { assertAll ( { assert process.success }, - { assert snapshot(process.out).match() }, - { assert snapshot( - file(process.out.sorted[0][1]).name - ).match("csv_sort") - } + { assert snapshot(process.out).match() } ) } @@ -100,10 +85,9 @@ nextflow_process { process { """ input[0] = [ - [id:'test'], - file(params.test_data['generic']['unsorted_data']['unsorted_text']['numbers_csv'], - checkIfExists: true) - ] + [id:'test'], + file(params.modules_testdata_base_path + 'generic/unsorted_data/unsorted_text/test.csv', checkIfExists: true) + ] """ } } @@ -111,7 +95,7 @@ nextflow_process { then { assertAll ( { assert process.success }, - { assert snapshot(process.out).match() }, + { assert snapshot(process.out).match() } ) } diff --git a/modules/nf-core/gnu/sort/tests/main.nf.test.snap b/modules/nf-core/gnu/sort/tests/main.nf.test.snap index 63891bc4..f57dc6bb 100644 --- a/modules/nf-core/gnu/sort/tests/main.nf.test.snap +++ b/modules/nf-core/gnu/sort/tests/main.nf.test.snap @@ -11,7 +11,11 @@ ] ], "1": [ - "versions.yml:md5,dd412503ec9dd665203e083ea44326cb" + [ + "GNU_SORT", + "coreutils", + "9.5" + ] ], "sorted": [ [ @@ -21,26 +25,20 @@ "test.csv.sorted:md5,0b52d1b4c4a0c6e972c6f94aafd75a1d" ] ], - "versions": [ - "versions.yml:md5,dd412503ec9dd665203e083ea44326cb" + "versions_coreutils": [ + [ + "GNU_SORT", + "coreutils", + "9.5" + ] ] } ], "meta": { - "nf-test": "0.8.4", - "nextflow": "24.04.2" - }, - "timestamp": "2024-06-14T11:13:44.714632791" - }, - "interval_sort": { - "content": [ - "test.bed.sorted" - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "24.04.2" + "nf-test": "0.9.3", + "nextflow": "25.04.8" }, - "timestamp": "2024-06-14T11:13:37.962807086" + "timestamp": "2026-01-23T15:48:28.77537237" }, "unsorted_csv_sort_stub": { "content": [ @@ -54,7 +52,11 @@ ] ], "1": [ - "versions.yml:md5,dd412503ec9dd665203e083ea44326cb" + [ + "GNU_SORT", + "coreutils", + "9.5" + ] ], "sorted": [ [ @@ -64,26 +66,20 @@ "test.csv.sorted:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], - "versions": [ - "versions.yml:md5,dd412503ec9dd665203e083ea44326cb" + "versions_coreutils": [ + [ + "GNU_SORT", + "coreutils", + "9.5" + ] ] } ], "meta": { - "nf-test": "0.8.4", - "nextflow": "24.04.2" + "nf-test": "0.9.3", + "nextflow": "25.04.8" }, - "timestamp": "2024-06-14T11:13:51.456258705" - }, - "csv_sort": { - "content": [ - "test.csv.sorted" - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "24.04.2" - }, - "timestamp": "2024-06-14T11:13:44.725431761" + "timestamp": "2026-01-23T15:48:45.534463019" }, "unsorted_genome_sort": { "content": [ @@ -97,7 +93,11 @@ ] ], "1": [ - "versions.yml:md5,dd412503ec9dd665203e083ea44326cb" + [ + "GNU_SORT", + "coreutils", + "9.5" + ] ], "sorted": [ [ @@ -107,26 +107,20 @@ "genome_test.bed.sorted:md5,fd97f7efafdbbfa71d9b560f10b4b048" ] ], - "versions": [ - "versions.yml:md5,dd412503ec9dd665203e083ea44326cb" + "versions_coreutils": [ + [ + "GNU_SORT", + "coreutils", + "9.5" + ] ] } ], "meta": { - "nf-test": "0.8.4", - "nextflow": "24.04.2" + "nf-test": "0.9.3", + "nextflow": "25.04.8" }, - "timestamp": "2024-06-14T11:13:31.041778719" - }, - "genome_sort": { - "content": [ - "genome_test.bed.sorted" - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "24.04.2" - }, - "timestamp": "2024-06-14T11:13:31.060201722" + "timestamp": "2026-01-23T15:47:54.290932481" }, "unsorted_intervals_sort": { "content": [ @@ -140,7 +134,11 @@ ] ], "1": [ - "versions.yml:md5,dd412503ec9dd665203e083ea44326cb" + [ + "GNU_SORT", + "coreutils", + "9.5" + ] ], "sorted": [ [ @@ -150,15 +148,19 @@ "test.bed.sorted:md5,abbce903ef263d38b2f71856387799ab" ] ], - "versions": [ - "versions.yml:md5,dd412503ec9dd665203e083ea44326cb" + "versions_coreutils": [ + [ + "GNU_SORT", + "coreutils", + "9.5" + ] ] } ], "meta": { - "nf-test": "0.8.4", - "nextflow": "24.04.2" + "nf-test": "0.9.3", + "nextflow": "25.04.8" }, - "timestamp": "2024-06-14T11:13:37.951397547" + "timestamp": "2026-01-23T15:48:11.626509684" } } \ No newline at end of file diff --git a/modules/nf-core/gnu/sort/tests/sort_complex.config b/modules/nf-core/gnu/sort/tests/sort_complex.config index 103eaaf6..ce4f1518 100644 --- a/modules/nf-core/gnu/sort/tests/sort_complex.config +++ b/modules/nf-core/gnu/sort/tests/sort_complex.config @@ -3,4 +3,4 @@ process { ext.args = { "-t ';' -g -k 1,1 -k 2,2" } ext.suffix = { "csv.sorted" } } -} \ No newline at end of file +} diff --git a/modules/nf-core/gnu/sort/tests/sort_simple_bed.config b/modules/nf-core/gnu/sort/tests/sort_simple_bed.config index d7d52e0f..8496c8d7 100644 --- a/modules/nf-core/gnu/sort/tests/sort_simple_bed.config +++ b/modules/nf-core/gnu/sort/tests/sort_simple_bed.config @@ -3,4 +3,4 @@ process { ext.args = { "-k1,1 -k2,2n" } ext.suffix = { "bed.sorted" } } -} \ No newline at end of file +} diff --git a/modules/nf-core/gnu/sort/tests/sort_simple_genome.config b/modules/nf-core/gnu/sort/tests/sort_simple_genome.config index 4dcec385..c408ece1 100644 --- a/modules/nf-core/gnu/sort/tests/sort_simple_genome.config +++ b/modules/nf-core/gnu/sort/tests/sort_simple_genome.config @@ -3,4 +3,4 @@ process { ext.args = { "-k1,1 -k2,2n" } ext.suffix = { "genome.sorted" } } -} \ No newline at end of file +} diff --git a/modules/nf-core/gnu/sort/tests/tags.yml b/modules/nf-core/gnu/sort/tests/tags.yml deleted file mode 100644 index ac40e376..00000000 --- a/modules/nf-core/gnu/sort/tests/tags.yml +++ /dev/null @@ -1,2 +0,0 @@ -gnu/sort: - - "modules/nf-core/gnu/sort/**" diff --git a/modules/nf-core/gunzip/main.nf b/modules/nf-core/gunzip/main.nf index 3ffc8e92..a8533e74 100644 --- a/modules/nf-core/gunzip/main.nf +++ b/modules/nf-core/gunzip/main.nf @@ -12,7 +12,7 @@ process GUNZIP { output: tuple val(meta), path("${gunzip}"), emit: gunzip - path "versions.yml", emit: versions + tuple val("${task.process}"), val('gunzip'), eval('gunzip --version 2>&1 | head -1 | sed "s/^.*(gzip) //; s/ Copyright.*//"'), topic: versions, emit: versions_gunzip when: task.ext.when == null || task.ext.when @@ -32,24 +32,14 @@ process GUNZIP { ${args} \\ ${archive} \\ > ${gunzip} - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - gunzip: \$(echo \$(gunzip --version 2>&1) | sed 's/^.*(gzip) //; s/ Copyright.*\$//') - END_VERSIONS """ stub: - def args = task.ext.args ?: '' def extension = (archive.toString() - '.gz').tokenize('.')[-1] def name = archive.toString() - '.gz' - ".${extension}" def prefix = task.ext.prefix ?: name gunzip = prefix + ".${extension}" """ touch ${gunzip} - cat <<-END_VERSIONS > versions.yml - "${task.process}": - gunzip: \$(echo \$(gunzip --version 2>&1) | sed 's/^.*(gzip) //; s/ Copyright.*\$//') - END_VERSIONS """ } diff --git a/modules/nf-core/gunzip/meta.yml b/modules/nf-core/gunzip/meta.yml index 926bb22a..bba6b3ba 100644 --- a/modules/nf-core/gunzip/meta.yml +++ b/modules/nf-core/gunzip/meta.yml @@ -34,13 +34,29 @@ output: description: Compressed/uncompressed file pattern: "*.*" ontologies: [] + versions_gunzip: + - - ${task.process}: + type: string + description: The process the versions were collected from + - gunzip: + type: string + description: The tool name + - gunzip --version 2>&1 | head -1 | sed "s/^.*(gzip) //; s/ Copyright.*//": + type: eval + description: The expression to obtain the version of the tool + +topics: versions: - - versions.yml: - type: file - description: File containing software versions - pattern: "versions.yml" - ontologies: - - edam: http://edamontology.org/format_3750 # YAML + - - ${task.process}: + type: string + description: The process the versions were collected from + - gunzip: + type: string + description: The tool name + - gunzip --version 2>&1 | head -1 | sed "s/^.*(gzip) //; s/ Copyright.*//": + type: eval + description: The expression to obtain the version of the tool + authors: - "@joseespinosa" - "@drpatelh" diff --git a/modules/nf-core/gunzip/tests/main.nf.test.snap b/modules/nf-core/gunzip/tests/main.nf.test.snap index a0f0e67e..111ba1bc 100644 --- a/modules/nf-core/gunzip/tests/main.nf.test.snap +++ b/modules/nf-core/gunzip/tests/main.nf.test.snap @@ -11,7 +11,11 @@ ] ], "1": [ - "versions.yml:md5,d327e4a19a6d5c5e974136cef8999d8c" + [ + "GUNZIP", + "gunzip", + "1.13" + ] ], "gunzip": [ [ @@ -21,16 +25,20 @@ "test.xyz.fastq:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], - "versions": [ - "versions.yml:md5,d327e4a19a6d5c5e974136cef8999d8c" + "versions_gunzip": [ + [ + "GUNZIP", + "gunzip", + "1.13" + ] ] } ], "meta": { - "nf-test": "0.9.2", - "nextflow": "24.10.2" + "nf-test": "0.9.3", + "nextflow": "25.10.2" }, - "timestamp": "2024-12-13T11:48:22.080222697" + "timestamp": "2026-01-19T17:21:56.633550769" }, "Should run without failures - stub": { "content": [ @@ -44,7 +52,11 @@ ] ], "1": [ - "versions.yml:md5,d327e4a19a6d5c5e974136cef8999d8c" + [ + "GUNZIP", + "gunzip", + "1.13" + ] ], "gunzip": [ [ @@ -54,16 +66,20 @@ "test_1.fastq:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], - "versions": [ - "versions.yml:md5,d327e4a19a6d5c5e974136cef8999d8c" + "versions_gunzip": [ + [ + "GUNZIP", + "gunzip", + "1.13" + ] ] } ], "meta": { - "nf-test": "0.9.2", - "nextflow": "24.10.2" + "nf-test": "0.9.3", + "nextflow": "25.10.2" }, - "timestamp": "2024-12-13T11:48:14.593020264" + "timestamp": "2026-01-19T17:21:51.435621199" }, "Should run without failures": { "content": [ @@ -77,7 +93,11 @@ ] ], "1": [ - "versions.yml:md5,d327e4a19a6d5c5e974136cef8999d8c" + [ + "GUNZIP", + "gunzip", + "1.13" + ] ], "gunzip": [ [ @@ -87,16 +107,20 @@ "test_1.fastq:md5,4161df271f9bfcd25d5845a1e220dbec" ] ], - "versions": [ - "versions.yml:md5,d327e4a19a6d5c5e974136cef8999d8c" + "versions_gunzip": [ + [ + "GUNZIP", + "gunzip", + "1.13" + ] ] } ], "meta": { - "nf-test": "0.9.2", - "nextflow": "24.10.2" + "nf-test": "0.9.3", + "nextflow": "25.10.2" }, - "timestamp": "2024-12-13T11:48:01.295397925" + "timestamp": "2026-01-19T17:21:40.613975821" }, "Should run without failures - prefix": { "content": [ @@ -110,7 +134,11 @@ ] ], "1": [ - "versions.yml:md5,d327e4a19a6d5c5e974136cef8999d8c" + [ + "GUNZIP", + "gunzip", + "1.13" + ] ], "gunzip": [ [ @@ -120,15 +148,19 @@ "test.xyz.fastq:md5,4161df271f9bfcd25d5845a1e220dbec" ] ], - "versions": [ - "versions.yml:md5,d327e4a19a6d5c5e974136cef8999d8c" + "versions_gunzip": [ + [ + "GUNZIP", + "gunzip", + "1.13" + ] ] } ], "meta": { - "nf-test": "0.9.2", - "nextflow": "24.10.2" + "nf-test": "0.9.3", + "nextflow": "25.10.2" }, - "timestamp": "2024-12-13T11:48:07.414271387" + "timestamp": "2026-01-19T17:21:46.086880414" } } \ No newline at end of file diff --git a/modules/nf-core/juicertools/pre/environment.yml b/modules/nf-core/juicertools/pre/environment.yml new file mode 100644 index 00000000..1e4ce8d5 --- /dev/null +++ b/modules/nf-core/juicertools/pre/environment.yml @@ -0,0 +1,8 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - "bioconda::juicertools=2.20.00" + - "conda-forge::openjdk=23.0.2" diff --git a/modules/nf-core/juicertools/pre/main.nf b/modules/nf-core/juicertools/pre/main.nf new file mode 100644 index 00000000..ad1ef20f --- /dev/null +++ b/modules/nf-core/juicertools/pre/main.nf @@ -0,0 +1,47 @@ +process JUICERTOOLS_PRE { + tag "${meta.id}" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/a2/a268a257cdea987bd60f7717686134f1a3c949e2ae268284642f1ce5a0434289/data' : + 'community.wave.seqera.io/library/juicertools_openjdk:fe58dd49794d6603' }" + + input: + tuple val(meta) , path(pairs) + tuple val(meta2), val(genome_id), path(chromsizes) + + output: + tuple val(meta), path("*.hic"), emit: hic + tuple val("${task.process}"), val('juicer_tools'), eval('juicer_tools -V | grep "Version" | sed "s/Juicer Tools Version //"'), emit: versions_juicertools, topic: versions + + when: + task.ext.when == null || task.ext.when + + script: + if(genome_id && chromsizes) { + log.error("Error: both genome_id and chromsizes provided to juicertools/pre! Only one of these may be specified.") + } + if(!genome_id && !chromsizes) { + log.error("Error: neither genome_id nor chromsizes provided to juicertools/pre!") + } + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + input_genome = genome_id ?: chromsizes + """ + export _JAVA_OPTIONS="-Xms${task.memory.toMega().intdiv(4)}m -Xmx${task.memory.toGiga()}g" + + juicer_tools pre \\ + --threads ${task.cpus} \\ + ${args} \\ + ${pairs} \\ + ${prefix}.hic \\ + ${input_genome} + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.hic + """ +} diff --git a/modules/nf-core/juicertools/pre/meta.yml b/modules/nf-core/juicertools/pre/meta.yml new file mode 100644 index 00000000..47a7662b --- /dev/null +++ b/modules/nf-core/juicertools/pre/meta.yml @@ -0,0 +1,89 @@ +name: "juicertools_pre" +description: Create a multi-resolution .hic contact matrix for analysis with + Juicer +keywords: + - hic + - contact map + - genomics +tools: + - "juicertools": + description: "Visualization and analysis software for Hi-C data" + homepage: "https://github.com/aidenlab/juicer" + documentation: "https://github.com/aidenlab/juicer" + tool_dev_url: "https://github.com/aidenlab/juicer" + doi: "10.1016/j.cels.2016.07.002" + licence: + - "MIT" + identifier: "" +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1' ]` + - pairs: + type: file + description: | + Optionally gzipped TSV file, in one of a number of formats, + including pairs format. See https://github.com/aidenlab/juicer/wiki/Pre#file-format + for details. + pattern: "*.{pairs,tsv}" + ontologies: + - edam: "http://edamontology.org/format_3475" # TSV + - - meta2: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1' ]` + - genome_id: + type: string + description: | + String of a supported genome ID, see https://github.com/aidenlab/juicer/wiki/Pre#usage + for details. Incompatible with chromsizes option. + - chromsizes: + type: file + description: | + Headerless TSV file describing chromosome sizes with format: + chrom_name\tlength + + Incompatible with genome_id option. + pattern: "*.{tsv}" + ontologies: + - edam: "http://edamontology.org/format_3475" # TSV +output: + hic: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1' ]` + - "*.hic": + type: file + description: .hic file for analysis and visualisation in Juicer + pattern: "*.{hic}" + ontologies: [] + versions_juicertools: + - - ${task.process}: + type: string + description: The name of the process + - juicer_tools: + type: string + description: The name of the tool + - juicer_tools -V | grep "Version" | sed "s/Juicer Tools Version //": + type: eval + description: The expression to obtain the version of the tool +topics: + versions: + - - ${task.process}: + type: string + description: The name of the process + - juicer_tools: + type: string + description: The name of the tool + - juicer_tools -V | grep "Version" | sed "s/Juicer Tools Version //": + type: eval + description: The expression to obtain the version of the tool +authors: + - "@prototaxites" +maintainers: + - "@prototaxites" diff --git a/modules/nf-core/juicertools/pre/tests/main.nf.test b/modules/nf-core/juicertools/pre/tests/main.nf.test new file mode 100644 index 00000000..b3241b33 --- /dev/null +++ b/modules/nf-core/juicertools/pre/tests/main.nf.test @@ -0,0 +1,126 @@ +nextflow_process { + + name "Test Process JUICERTOOLS_PRE" + script "../main.nf" + process "JUICERTOOLS_PRE" + config "./nextflow.config" + + tag "modules" + tag "modules_nfcore" + tag "juicertools" + tag "juicertools/pre" + tag "gawk" + + setup { + run("GAWK") { + script "../../../gawk/main.nf" + process { + """ + input[0] = [ + [id: "test"], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/cooler/cload/hg19/hg19.sample1.pairs', checkIfExists: true) + ] + input[1] = [] + input[2] = false + """ + } + } + } + + test("homo sapiens - pairs - chrom.sizes") { + + when { + + params { + gawk_args2 = "'BEGIN { OFS = \"\\t\" } { gsub(\"+\", \"0\"); gsub(\"-\", \"1\"); print \$3, \$1, \$2, 0, \$6, \$4, \$5, 1}'" + gawk_suffix = "pairs" + module_args = "-n" + } + + process { + """ + input[0] = GAWK.out.output + input[1] = [ + [ id:'test', single_end:false ], // meta map + [], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/cooler/cload/hg19/hg19.chrom.sizes', checkIfExists: true), + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("homo sapiens - pairs - hg19") { + + when { + + params { + gawk_args2 = "'BEGIN { OFS = \"\\t\" } { gsub(\"+\", \"0\"); gsub(\"-\", \"1\"); print \$3, \$1, \$2, 0, \$6, \$4, \$5, 1}'" + gawk_suffix = "pairs" + module_args = "-n" + } + + process { + """ + input[0] = GAWK.out.output + input[1] = [ + [ id:'test', single_end:false ], // meta map + "hg19", + [] + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("homo sapiens - pairs - chrom.sizes - stub") { + + options "-stub" + + when { + + params { + gawk_args2 = "'BEGIN { OFS = \"\\t\" } { gsub(\"+\", \"0\"); gsub(\"-\", \"1\"); print \$3, \$1, \$2, 0, \$6, \$4, \$5, 1}'" + gawk_suffix = "pairs" + module_args = "-n" + } + + process { + """ + input[0] = GAWK.out.output + input[1] = [ + [ id:'test', single_end:false ], // meta map + [], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/cooler/cload/hg19/hg19.chrom.sizes', checkIfExists: true), + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() + } + ) + } + + } + +} diff --git a/modules/nf-core/juicertools/pre/tests/main.nf.test.snap b/modules/nf-core/juicertools/pre/tests/main.nf.test.snap new file mode 100644 index 00000000..d7b25ab2 --- /dev/null +++ b/modules/nf-core/juicertools/pre/tests/main.nf.test.snap @@ -0,0 +1,125 @@ +{ + "homo sapiens - pairs - chrom.sizes": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.hic:md5,d5bbcb30d7bbbc93390e3d39bc8d9919" + ] + ], + "1": [ + [ + "JUICERTOOLS_PRE", + "juicer_tools", + "2.20.00" + ] + ], + "hic": [ + [ + { + "id": "test" + }, + "test.hic:md5,d5bbcb30d7bbbc93390e3d39bc8d9919" + ] + ], + "versions_juicertools": [ + [ + "JUICERTOOLS_PRE", + "juicer_tools", + "2.20.00" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.3" + }, + "timestamp": "2026-02-10T13:03:31.766801" + }, + "homo sapiens - pairs - hg19": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.hic:md5,308a915c06983d49055068671d7d2358" + ] + ], + "1": [ + [ + "JUICERTOOLS_PRE", + "juicer_tools", + "2.20.00" + ] + ], + "hic": [ + [ + { + "id": "test" + }, + "test.hic:md5,308a915c06983d49055068671d7d2358" + ] + ], + "versions_juicertools": [ + [ + "JUICERTOOLS_PRE", + "juicer_tools", + "2.20.00" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.3" + }, + "timestamp": "2026-02-10T13:03:56.129414" + }, + "homo sapiens - pairs - chrom.sizes - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.hic:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + "JUICERTOOLS_PRE", + "juicer_tools", + "2.20.00" + ] + ], + "hic": [ + [ + { + "id": "test" + }, + "test.hic:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions_juicertools": [ + [ + "JUICERTOOLS_PRE", + "juicer_tools", + "2.20.00" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.3" + }, + "timestamp": "2026-02-10T13:04:04.93171" + } +} \ No newline at end of file diff --git a/modules/nf-core/juicertools/pre/tests/nextflow.config b/modules/nf-core/juicertools/pre/tests/nextflow.config new file mode 100644 index 00000000..9d637a0f --- /dev/null +++ b/modules/nf-core/juicertools/pre/tests/nextflow.config @@ -0,0 +1,10 @@ +process { + withName: GAWK { + ext.args2 = params.gawk_args2 + ext.suffix = params.gawk_suffix + } + + withName: JUICERTOOLS_PRE { + ext.args = params.module_args + } +} diff --git a/modules/nf-core/minimap2/align/environment.yml b/modules/nf-core/minimap2/align/environment.yml index 60677e65..17886061 100644 --- a/modules/nf-core/minimap2/align/environment.yml +++ b/modules/nf-core/minimap2/align/environment.yml @@ -5,6 +5,5 @@ channels: - bioconda dependencies: - - bioconda::htslib=1.20 - - bioconda::minimap2=2.28 - - bioconda::samtools=1.20 + - bioconda::minimap2=2.29 + - bioconda::samtools=1.21 diff --git a/modules/nf-core/minimap2/align/main.nf b/modules/nf-core/minimap2/align/main.nf index d82dc14d..50d5ab25 100644 --- a/modules/nf-core/minimap2/align/main.nf +++ b/modules/nf-core/minimap2/align/main.nf @@ -5,8 +5,8 @@ process MINIMAP2_ALIGN { // Note: the versions here need to match the versions used in the mulled container below and minimap2/index conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/mulled-v2-66534bcbb7031a148b13e2ad42583020b9cd25c4:3161f532a5ea6f1dec9be5667c9efc2afdac6104-0' : - 'biocontainers/mulled-v2-66534bcbb7031a148b13e2ad42583020b9cd25c4:3161f532a5ea6f1dec9be5667c9efc2afdac6104-0' }" + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/66/66dc96eff11ab80dfd5c044e9b3425f52d818847b9c074794cf0c02bfa781661/data' : + 'community.wave.seqera.io/library/minimap2_samtools:33bb43c18d22e29c' }" input: tuple val(meta), path(reads) @@ -20,7 +20,7 @@ process MINIMAP2_ALIGN { tuple val(meta), path("*.paf") , optional: true, emit: paf tuple val(meta), path("*.bam") , optional: true, emit: bam tuple val(meta), path("*.bam.${bam_index_extension}"), optional: true, emit: index - path "versions.yml" , emit: versions + tuple val("${task.process}"), val("minimap2"), eval("minimap2 --version"), topic: versions, emit: versions_minimap2 when: task.ext.when == null || task.ext.when @@ -38,25 +38,17 @@ process MINIMAP2_ALIGN { def bam_input = "${reads.extension}".matches('sam|bam|cram') def samtools_reset_fastq = bam_input ? "samtools reset --threads ${task.cpus-1} $args3 $reads | samtools fastq --threads ${task.cpus-1} $args4 |" : '' def query = bam_input ? "-" : reads - def target = reference ?: (bam_input ? error("BAM input requires reference") : reads) - + def target = reference ?: (bam_input ? error("Error: minimap2/align BAM input mode requires reference") : reads) """ $samtools_reset_fastq \\ minimap2 \\ - $args \\ - -t $task.cpus \\ - $target \\ - $query \\ - $cigar_paf \\ - $set_cigar_bam \\ - $bam_output - - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - minimap2: \$(minimap2 --version 2>&1) - samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') - END_VERSIONS + ${args} \\ + -t ${task.cpus} \\ + ${target} \\ + ${query} \\ + ${cigar_paf} \\ + ${set_cigar_bam} \\ + ${bam_output} """ stub: @@ -64,15 +56,11 @@ process MINIMAP2_ALIGN { def output_file = bam_format ? "${prefix}.bam" : "${prefix}.paf" def bam_index = bam_index_extension ? "touch ${prefix}.bam.${bam_index_extension}" : "" def bam_input = "${reads.extension}".matches('sam|bam|cram') - def target = reference ?: (bam_input ? error("BAM input requires reference") : reads) - + if(bam_input && !reference) { + error("Error: minimap2/align BAM input mode requires reference!") + } """ touch $output_file ${bam_index} - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - minimap2: \$(minimap2 --version 2>&1) - END_VERSIONS """ } diff --git a/modules/nf-core/minimap2/align/meta.yml b/modules/nf-core/minimap2/align/meta.yml index a4cfc891..40bb20ad 100644 --- a/modules/nf-core/minimap2/align/meta.yml +++ b/modules/nf-core/minimap2/align/meta.yml @@ -26,6 +26,7 @@ input: description: | List of input FASTA or FASTQ files of size 1 and 2 for single-end and paired-end data, respectively. + ontologies: [] - - meta2: type: map description: | @@ -35,23 +36,24 @@ input: type: file description: | Reference database in FASTA format. - - - bam_format: - type: boolean - description: Specify that output should be in BAM format - - - bam_index_extension: - type: string - description: BAM alignment index extension (e.g. "bai") - - - cigar_paf_format: - type: boolean - description: Specify that output CIGAR should be in PAF format - - - cigar_bam: - type: boolean - description: | - Write CIGAR with >65535 ops at the CG tag. This is recommended when - doing XYZ (https://github.com/lh3/minimap2#working-with-65535-cigar-operations) + ontologies: [] + - bam_format: + type: boolean + description: Specify that output should be in BAM format + - bam_index_extension: + type: string + description: BAM alignment index extension (e.g. "bai") + - cigar_paf_format: + type: boolean + description: Specify that output CIGAR should be in PAF format + - cigar_bam: + type: boolean + description: | + Write CIGAR with >65535 ops at the CG tag. This is recommended when + doing XYZ (https://github.com/lh3/minimap2#working-with-65535-cigar-operations) output: - - paf: - - meta: + paf: + - - meta: type: map description: | Groovy Map containing sample information @@ -60,8 +62,9 @@ output: type: file description: Alignment in PAF format pattern: "*.paf" - - bam: - - meta: + ontologies: [] + bam: + - - meta: type: map description: | Groovy Map containing sample information @@ -70,8 +73,9 @@ output: type: file description: Alignment in BAM format pattern: "*.bam" - - index: - - meta: + ontologies: [] + index: + - - meta: type: map description: | Groovy Map containing sample information @@ -80,11 +84,28 @@ output: type: file description: BAM alignment index pattern: "*.bam.*" - - versions: - - versions.yml: - type: file - description: File containing software versions - pattern: "versions.yml" + ontologies: [] + versions_minimap2: + - - ${task.process}: + type: string + description: The process name + - minimap2: + type: string + description: The tool name + - minimap2 --version: + type: eval + description: The tool version +topics: + versions: + - - ${task.process}: + type: string + description: The process name + - minimap2: + type: string + description: The tool name + - minimap2 --version: + type: eval + description: The tool version authors: - "@heuermh" - "@sofstam" diff --git a/modules/nf-core/minimap2/align/tests/main.nf.test b/modules/nf-core/minimap2/align/tests/main.nf.test index 4072c171..34597d6f 100644 --- a/modules/nf-core/minimap2/align/tests/main.nf.test +++ b/modules/nf-core/minimap2/align/tests/main.nf.test @@ -36,7 +36,7 @@ nextflow_process { { assert snapshot( bam(process.out.bam[0][1]).getHeader(), bam(process.out.bam[0][1]).getReadsMD5(), - process.out.versions + process.out.findAll { key, val -> key.startsWith("versions_") } ).match() } ) } @@ -71,7 +71,7 @@ nextflow_process { bam(process.out.bam[0][1]).getHeader(), bam(process.out.bam[0][1]).getReadsMD5(), file(process.out.index[0][1]).name, - process.out.versions + process.out.findAll { key, val -> key.startsWith("versions_") } ).match() } ) } @@ -108,7 +108,7 @@ nextflow_process { { assert snapshot( bam(process.out.bam[0][1]).getHeader(), bam(process.out.bam[0][1]).getReadsMD5(), - process.out.versions + process.out.findAll { key, val -> key.startsWith("versions_") } ).match() } ) } @@ -142,7 +142,7 @@ nextflow_process { { assert snapshot( bam(process.out.bam[0][1]).getHeader(), bam(process.out.bam[0][1]).getReadsMD5(), - process.out.versions + process.out.findAll { key, val -> key.startsWith("versions_") } ).match() } ) } @@ -176,7 +176,7 @@ nextflow_process { { assert snapshot( bam(process.out.bam[0][1]).getHeader(), bam(process.out.bam[0][1]).getReadsMD5(), - process.out.versions + process.out.findAll { key, val -> key.startsWith("versions_") } ).match() } ) } @@ -211,7 +211,7 @@ nextflow_process { bam(process.out.bam[0][1]).getHeader(), bam(process.out.bam[0][1]).getReadsMD5(), file(process.out.index[0][1]).name, - process.out.versions + process.out.findAll { key, val -> key.startsWith("versions_") } ).match() } ) } @@ -438,4 +438,4 @@ nextflow_process { } -} \ No newline at end of file +} diff --git a/modules/nf-core/minimap2/align/tests/main.nf.test.snap b/modules/nf-core/minimap2/align/tests/main.nf.test.snap index 12264a85..93e0eb3b 100644 --- a/modules/nf-core/minimap2/align/tests/main.nf.test.snap +++ b/modules/nf-core/minimap2/align/tests/main.nf.test.snap @@ -4,20 +4,26 @@ [ "@HD\tVN:1.6\tSO:coordinate", "@SQ\tSN:MT192765.1\tLN:29829", - "@PG\tID:minimap2\tPN:minimap2\tVN:2.28-r1209\tCL:minimap2 -t 2 -a genome.fasta -", - "@PG\tID:samtools\tPN:samtools\tPP:minimap2\tVN:1.20\tCL:samtools sort -@ 1 -o test.bam##idx##test.bam.bai --write-index" + "@PG\tID:minimap2\tPN:minimap2\tVN:2.29-r1283\tCL:minimap2 -t 2 -a genome.fasta -", + "@PG\tID:samtools\tPN:samtools\tPP:minimap2\tVN:1.21\tCL:samtools sort -@ 1 -o test.bam##idx##test.bam.bai --write-index" ], "5d426b9a5f5b2c54f1d7f1e4c238ae94", "test.bam.bai", - [ - "versions.yml:md5,3548eeba9066efbf8d78ea99f8d813fd" - ] + { + "versions_minimap2": [ + [ + "MINIMAP2_ALIGN", + "minimap2", + "2.29-r1283" + ] + ] + } ], "meta": { - "nf-test": "0.8.4", - "nextflow": "24.04.2" + "nf-test": "0.9.3", + "nextflow": "25.10.2" }, - "timestamp": "2024-07-25T09:03:00.827260362" + "timestamp": "2026-01-22T15:02:10.851485367" }, "sarscov2 - bam, fasta, true, 'bai', false, false - stub": { "content": [ @@ -44,7 +50,11 @@ ] ], "3": [ - "versions.yml:md5,98b8f5f36aa54b82210094f0b0d11938" + [ + "MINIMAP2_ALIGN", + "minimap2", + "2.29-r1283" + ] ], "bam": [ [ @@ -67,16 +77,20 @@ "paf": [ ], - "versions": [ - "versions.yml:md5,98b8f5f36aa54b82210094f0b0d11938" + "versions_minimap2": [ + [ + "MINIMAP2_ALIGN", + "minimap2", + "2.29-r1283" + ] ] } ], "meta": { - "nf-test": "0.8.4", - "nextflow": "24.04.2" + "nf-test": "0.9.3", + "nextflow": "25.10.2" }, - "timestamp": "2024-07-23T11:21:37.92353539" + "timestamp": "2026-01-22T15:02:56.708796666" }, "sarscov2 - fastq, fasta, true, 'bai', false, false - stub": { "content": [ @@ -103,7 +117,11 @@ ] ], "3": [ - "versions.yml:md5,98b8f5f36aa54b82210094f0b0d11938" + [ + "MINIMAP2_ALIGN", + "minimap2", + "2.29-r1283" + ] ], "bam": [ [ @@ -126,16 +144,20 @@ "paf": [ ], - "versions": [ - "versions.yml:md5,98b8f5f36aa54b82210094f0b0d11938" + "versions_minimap2": [ + [ + "MINIMAP2_ALIGN", + "minimap2", + "2.29-r1283" + ] ] } ], "meta": { - "nf-test": "0.8.4", - "nextflow": "24.04.2" + "nf-test": "0.9.3", + "nextflow": "25.10.2" }, - "timestamp": "2024-06-03T11:29:44.669021368" + "timestamp": "2026-01-22T15:02:32.614463827" }, "sarscov2 - fastq, fasta, false, [], false, false - stub": { "content": [ @@ -156,7 +178,11 @@ ], "3": [ - "versions.yml:md5,98b8f5f36aa54b82210094f0b0d11938" + [ + "MINIMAP2_ALIGN", + "minimap2", + "2.29-r1283" + ] ], "bam": [ @@ -173,16 +199,20 @@ "test.paf:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], - "versions": [ - "versions.yml:md5,98b8f5f36aa54b82210094f0b0d11938" + "versions_minimap2": [ + [ + "MINIMAP2_ALIGN", + "minimap2", + "2.29-r1283" + ] ] } ], "meta": { - "nf-test": "0.8.4", - "nextflow": "24.04.2" + "nf-test": "0.9.3", + "nextflow": "25.10.2" }, - "timestamp": "2024-06-03T11:15:52.738781039" + "timestamp": "2026-01-22T15:02:40.02163098" }, "sarscov2 - fastq, fasta, true, [], false, false - stub": { "content": [ @@ -203,7 +233,11 @@ ], "3": [ - "versions.yml:md5,98b8f5f36aa54b82210094f0b0d11938" + [ + "MINIMAP2_ALIGN", + "minimap2", + "2.29-r1283" + ] ], "bam": [ [ @@ -220,93 +254,121 @@ "paf": [ ], - "versions": [ - "versions.yml:md5,98b8f5f36aa54b82210094f0b0d11938" + "versions_minimap2": [ + [ + "MINIMAP2_ALIGN", + "minimap2", + "2.29-r1283" + ] ] } ], "meta": { - "nf-test": "0.8.4", - "nextflow": "24.04.2" + "nf-test": "0.9.3", + "nextflow": "25.10.2" }, - "timestamp": "2024-06-03T11:15:23.033808223" + "timestamp": "2026-01-22T15:02:25.102539679" }, "sarscov2 - [fastq1, fastq2], fasta, true, false, false": { "content": [ [ "@HD\tVN:1.6\tSO:coordinate", "@SQ\tSN:MT192765.1\tLN:29829", - "@PG\tID:minimap2\tPN:minimap2\tVN:2.28-r1209\tCL:minimap2 -t 2 -a genome.fasta test_1.fastq.gz test_2.fastq.gz", - "@PG\tID:samtools\tPN:samtools\tPP:minimap2\tVN:1.20\tCL:samtools sort -@ 1 -o test.bam" + "@PG\tID:minimap2\tPN:minimap2\tVN:2.29-r1283\tCL:minimap2 -t 2 -a genome.fasta test_1.fastq.gz test_2.fastq.gz", + "@PG\tID:samtools\tPN:samtools\tPP:minimap2\tVN:1.21\tCL:samtools sort -@ 1 -o test.bam" ], "1bc392244f228bf52cf0b5a8f6a654c9", - [ - "versions.yml:md5,3548eeba9066efbf8d78ea99f8d813fd" - ] + { + "versions_minimap2": [ + [ + "MINIMAP2_ALIGN", + "minimap2", + "2.29-r1283" + ] + ] + } ], "meta": { - "nf-test": "0.8.4", - "nextflow": "24.04.2" + "nf-test": "0.9.3", + "nextflow": "25.10.2" }, - "timestamp": "2024-07-23T11:18:18.964586894" + "timestamp": "2026-01-22T15:01:46.456636022" }, "sarscov2 - fastq, fasta, true, [], false, false": { "content": [ [ "@HD\tVN:1.6\tSO:coordinate", "@SQ\tSN:MT192765.1\tLN:29829", - "@PG\tID:minimap2\tPN:minimap2\tVN:2.28-r1209\tCL:minimap2 -t 2 -a genome.fasta test_1.fastq.gz", - "@PG\tID:samtools\tPN:samtools\tPP:minimap2\tVN:1.20\tCL:samtools sort -@ 1 -o test.bam" + "@PG\tID:minimap2\tPN:minimap2\tVN:2.29-r1283\tCL:minimap2 -t 2 -a genome.fasta test_1.fastq.gz", + "@PG\tID:samtools\tPN:samtools\tPP:minimap2\tVN:1.21\tCL:samtools sort -@ 1 -o test.bam" ], "f194745c0ccfcb2a9c0aee094a08750", - [ - "versions.yml:md5,3548eeba9066efbf8d78ea99f8d813fd" - ] + { + "versions_minimap2": [ + [ + "MINIMAP2_ALIGN", + "minimap2", + "2.29-r1283" + ] + ] + } ], "meta": { - "nf-test": "0.8.4", - "nextflow": "24.04.2" + "nf-test": "0.9.3", + "nextflow": "25.10.2" }, - "timestamp": "2024-07-23T11:17:48.667488325" + "timestamp": "2026-01-22T15:01:30.525133177" }, "sarscov2 - fastq, fasta, true, 'bai', false, false": { "content": [ [ "@HD\tVN:1.6\tSO:coordinate", "@SQ\tSN:MT192765.1\tLN:29829", - "@PG\tID:minimap2\tPN:minimap2\tVN:2.28-r1209\tCL:minimap2 -t 2 -a genome.fasta test_1.fastq.gz", - "@PG\tID:samtools\tPN:samtools\tPP:minimap2\tVN:1.20\tCL:samtools sort -@ 1 -o test.bam##idx##test.bam.bai --write-index" + "@PG\tID:minimap2\tPN:minimap2\tVN:2.29-r1283\tCL:minimap2 -t 2 -a genome.fasta test_1.fastq.gz", + "@PG\tID:samtools\tPN:samtools\tPP:minimap2\tVN:1.21\tCL:samtools sort -@ 1 -o test.bam##idx##test.bam.bai --write-index" ], "f194745c0ccfcb2a9c0aee094a08750", "test.bam.bai", - [ - "versions.yml:md5,3548eeba9066efbf8d78ea99f8d813fd" - ] + { + "versions_minimap2": [ + [ + "MINIMAP2_ALIGN", + "minimap2", + "2.29-r1283" + ] + ] + } ], "meta": { - "nf-test": "0.8.4", - "nextflow": "24.04.2" + "nf-test": "0.9.3", + "nextflow": "25.10.2" }, - "timestamp": "2024-07-23T11:18:02.517416733" + "timestamp": "2026-01-22T15:01:38.84829029" }, "sarscov2 - bam, fasta, true, [], false, false": { "content": [ [ "@HD\tVN:1.6\tSO:coordinate", "@SQ\tSN:MT192765.1\tLN:29829", - "@PG\tID:minimap2\tPN:minimap2\tVN:2.28-r1209\tCL:minimap2 -t 2 -a genome.fasta -", - "@PG\tID:samtools\tPN:samtools\tPP:minimap2\tVN:1.20\tCL:samtools sort -@ 1 -o test.bam" + "@PG\tID:minimap2\tPN:minimap2\tVN:2.29-r1283\tCL:minimap2 -t 2 -a genome.fasta -", + "@PG\tID:samtools\tPN:samtools\tPP:minimap2\tVN:1.21\tCL:samtools sort -@ 1 -o test.bam" ], "5d426b9a5f5b2c54f1d7f1e4c238ae94", - [ - "versions.yml:md5,3548eeba9066efbf8d78ea99f8d813fd" - ] + { + "versions_minimap2": [ + [ + "MINIMAP2_ALIGN", + "minimap2", + "2.29-r1283" + ] + ] + } ], "meta": { - "nf-test": "0.8.4", - "nextflow": "24.04.2" + "nf-test": "0.9.3", + "nextflow": "25.10.2" }, - "timestamp": "2024-07-25T09:02:49.64829488" + "timestamp": "2026-01-22T15:02:02.351060285" }, "sarscov2 - bam, fasta, true, [], false, false - stub": { "content": [ @@ -327,7 +389,11 @@ ], "3": [ - "versions.yml:md5,98b8f5f36aa54b82210094f0b0d11938" + [ + "MINIMAP2_ALIGN", + "minimap2", + "2.29-r1283" + ] ], "bam": [ [ @@ -344,16 +410,20 @@ "paf": [ ], - "versions": [ - "versions.yml:md5,98b8f5f36aa54b82210094f0b0d11938" + "versions_minimap2": [ + [ + "MINIMAP2_ALIGN", + "minimap2", + "2.29-r1283" + ] ] } ], "meta": { - "nf-test": "0.8.4", - "nextflow": "24.04.2" + "nf-test": "0.9.3", + "nextflow": "25.10.2" }, - "timestamp": "2024-07-23T11:21:22.162291795" + "timestamp": "2026-01-22T15:02:47.579634041" }, "sarscov2 - fastq, [], true, false, false": { "content": [ @@ -459,18 +529,24 @@ "@SQ\tSN:ERR5069949.3258358\tLN:151", "@SQ\tSN:ERR5069949.1476386\tLN:151", "@SQ\tSN:ERR5069949.2415814\tLN:150", - "@PG\tID:minimap2\tPN:minimap2\tVN:2.28-r1209\tCL:minimap2 -t 2 -a test_1.fastq.gz test_1.fastq.gz", - "@PG\tID:samtools\tPN:samtools\tPP:minimap2\tVN:1.20\tCL:samtools sort -@ 1 -o test.bam" + "@PG\tID:minimap2\tPN:minimap2\tVN:2.29-r1283\tCL:minimap2 -t 2 -a test_1.fastq.gz test_1.fastq.gz", + "@PG\tID:samtools\tPN:samtools\tPP:minimap2\tVN:1.21\tCL:samtools sort -@ 1 -o test.bam" ], "16c1c651f8ec67383bcdee3c55aed94f", - [ - "versions.yml:md5,3548eeba9066efbf8d78ea99f8d813fd" - ] + { + "versions_minimap2": [ + [ + "MINIMAP2_ALIGN", + "minimap2", + "2.29-r1283" + ] + ] + } ], "meta": { - "nf-test": "0.8.4", - "nextflow": "24.04.2" + "nf-test": "0.9.3", + "nextflow": "25.10.2" }, - "timestamp": "2024-07-23T11:18:34.246998277" + "timestamp": "2026-01-22T15:01:54.090788633" } } \ No newline at end of file diff --git a/modules/nf-core/minimap2/index/environment.yml b/modules/nf-core/minimap2/index/environment.yml index c2dd2cfd..2f3ba0eb 100644 --- a/modules/nf-core/minimap2/index/environment.yml +++ b/modules/nf-core/minimap2/index/environment.yml @@ -4,4 +4,4 @@ channels: - conda-forge - bioconda dependencies: - - bioconda::minimap2=2.28 + - bioconda::minimap2=2.29 diff --git a/modules/nf-core/minimap2/index/main.nf b/modules/nf-core/minimap2/index/main.nf index 38320214..dd81eab0 100644 --- a/modules/nf-core/minimap2/index/main.nf +++ b/modules/nf-core/minimap2/index/main.nf @@ -4,15 +4,15 @@ process MINIMAP2_INDEX { // Note: the versions here need to match the versions used in minimap2/align conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/minimap2:2.28--he4a0461_0' : - 'biocontainers/minimap2:2.28--he4a0461_0' }" + 'https://depot.galaxyproject.org/singularity/minimap2:2.29--h577a1d6_0' : + 'biocontainers/minimap2:2.29--h577a1d6_0' }" input: tuple val(meta), path(fasta) output: tuple val(meta), path("*.mmi"), emit: index - path "versions.yml" , emit: versions + tuple val("${task.process}"), val("minimap2"), eval("minimap2 --version"), topic: versions, emit: versions_minimap2 when: task.ext.when == null || task.ext.when @@ -25,20 +25,10 @@ process MINIMAP2_INDEX { -d ${fasta.baseName}.mmi \\ $args \\ $fasta - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - minimap2: \$(minimap2 --version 2>&1) - END_VERSIONS """ stub: """ touch ${fasta.baseName}.mmi - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - minimap2: \$(minimap2 --version 2>&1) - END_VERSIONS """ } diff --git a/modules/nf-core/minimap2/index/meta.yml b/modules/nf-core/minimap2/index/meta.yml index 57c80e29..6985fb0c 100644 --- a/modules/nf-core/minimap2/index/meta.yml +++ b/modules/nf-core/minimap2/index/meta.yml @@ -10,7 +10,8 @@ tools: A versatile pairwise aligner for genomic and spliced nucleotide sequences. homepage: https://github.com/lh3/minimap2 documentation: https://github.com/lh3/minimap2#uguide - licence: ["MIT"] + licence: + - "MIT" identifier: "" input: - - meta: @@ -22,9 +23,10 @@ input: type: file description: | Reference database in FASTA format. + ontologies: [] output: - - index: - - meta: + index: + - - meta: type: map description: | Groovy Map containing sample information @@ -33,11 +35,28 @@ output: type: file description: Minimap2 fasta index. pattern: "*.mmi" - - versions: - - versions.yml: - type: file - description: File containing software versions - pattern: "versions.yml" + ontologies: [] + versions_minimap2: + - - ${task.process}: + type: string + description: The name of the process + - minimap2: + type: string + description: The name of the tool + - minimap2 --version: + type: eval + description: The expression to obtain the version of the tool +topics: + versions: + - - ${task.process}: + type: string + description: The name of the process + - minimap2: + type: string + description: The name of the tool + - minimap2 --version: + type: eval + description: The expression to obtain the version of the tool authors: - "@yuukiiwa" - "@drpatelh" diff --git a/modules/nf-core/minimap2/index/tests/main.nf.test b/modules/nf-core/minimap2/index/tests/main.nf.test index 97840ff7..79b7cc55 100644 --- a/modules/nf-core/minimap2/index/tests/main.nf.test +++ b/modules/nf-core/minimap2/index/tests/main.nf.test @@ -29,4 +29,26 @@ nextflow_process { } -} \ No newline at end of file + test("minimap2 index - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + """ + } + } + + then { + assert process.success + assert snapshot(process.out).match() + } + + } + +} diff --git a/modules/nf-core/minimap2/index/tests/main.nf.test.snap b/modules/nf-core/minimap2/index/tests/main.nf.test.snap index 0b098828..a3ec750b 100644 --- a/modules/nf-core/minimap2/index/tests/main.nf.test.snap +++ b/modules/nf-core/minimap2/index/tests/main.nf.test.snap @@ -1,38 +1,46 @@ { - "Should run without failures": { + "minimap2 index": { "content": [ { "0": [ [ { - "id": "test_ref" + "id": "test" }, "genome.mmi:md5,72e450f12dc691e763c697463bdb1571" ] ], "1": [ - "versions.yml:md5,0fced0ee8015e7f50b82566e3db8f7b0" + [ + "MINIMAP2_INDEX", + "minimap2", + "2.29-r1283" + ] ], "index": [ [ { - "id": "test_ref" + "id": "test" }, "genome.mmi:md5,72e450f12dc691e763c697463bdb1571" ] ], - "versions": [ - "versions.yml:md5,0fced0ee8015e7f50b82566e3db8f7b0" + "versions_minimap2": [ + [ + "MINIMAP2_INDEX", + "minimap2", + "2.29-r1283" + ] ] } ], "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" + "nf-test": "0.9.3", + "nextflow": "25.10.3" }, - "timestamp": "2024-03-18T11:46:30.000058092" + "timestamp": "2026-02-09T16:12:10.625322" }, - "minimap2 index": { + "minimap2 index - stub": { "content": [ { "0": [ @@ -40,29 +48,37 @@ { "id": "test" }, - "genome.mmi:md5,72e450f12dc691e763c697463bdb1571" + "genome.mmi:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], "1": [ - "versions.yml:md5,2f8340380c6741e9261a284262a90bde" + [ + "MINIMAP2_INDEX", + "minimap2", + "2.29-r1283" + ] ], "index": [ [ { "id": "test" }, - "genome.mmi:md5,72e450f12dc691e763c697463bdb1571" + "genome.mmi:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], - "versions": [ - "versions.yml:md5,2f8340380c6741e9261a284262a90bde" + "versions_minimap2": [ + [ + "MINIMAP2_INDEX", + "minimap2", + "2.29-r1283" + ] ] } ], "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" + "nf-test": "0.9.3", + "nextflow": "25.10.3" }, - "timestamp": "2024-04-05T10:58:29.828187662" + "timestamp": "2026-02-09T16:12:15.244242" } } \ No newline at end of file diff --git a/modules/nf-core/pretextmap/environment.yml b/modules/nf-core/pretextmap/environment.yml index 89f02738..721f0e74 100644 --- a/modules/nf-core/pretextmap/environment.yml +++ b/modules/nf-core/pretextmap/environment.yml @@ -4,6 +4,6 @@ channels: - conda-forge - bioconda dependencies: + - bioconda::htslib=1.23 - bioconda::pretextmap=0.1.9 - - bioconda::pretextgraph=0.0.2 - - bioconda::samtools=1.21 + - bioconda::samtools=1.23 diff --git a/modules/nf-core/pretextmap/main.nf b/modules/nf-core/pretextmap/main.nf index b6abdbc5..55f251ca 100644 --- a/modules/nf-core/pretextmap/main.nf +++ b/modules/nf-core/pretextmap/main.nf @@ -1,60 +1,41 @@ process PRETEXTMAP { tag "$meta.id" - label 'process_single' + label 'process_low' conda "${moduleDir}/environment.yml" + container "quay.io/sanger-tol/pretext:0.0.9-yy5-c2" + input: tuple val(meta), path(input) tuple val(meta2), path(fasta), path(fai) output: tuple val(meta), path("*.pretext") , emit: pretext - path "versions.yml" , emit: versions + tuple val("${task.process}"), val('PretextMap'), eval('PretextMap | sed "/Version/!d; s/.*Version //"'), emit: versions_pretextmap, topic: versions + tuple val("${task.process}"), val('samtools'), eval('samtools --version | sed "1!d; s/samtools //"'), emit: versions_samtools, topic: versions when: task.ext.when == null || task.ext.when script: - def args = task.ext.args ?: '' // PretextMap args - def args2 = task.ext.args2 ?: '' // Samtools view args - def prefix = task.ext.prefix ?: "${meta.id}" - def reference = fasta ? "--reference ${fasta}" : "" + def args = task.ext.args ?: '' // PretextMap args + def args2 = task.ext.args2 ?: '' // Samtools view args + def prefix = task.ext.prefix ?: "${meta.id}" + def reference = fasta ? "--reference ${fasta}" : "" + def pairs_input = input.toString().endsWith(".pairs.gz") + def input_command = pairs_input ? "zcat ${input}" : "samtools view $args2 $reference -h ${input}" """ - if [[ $input == *.pairs.gz ]]; then - zcat $input | PretextMap \\ - $args \\ - -o ${prefix}.pretext - else - samtools \\ - view \\ - $args2 \\ - $reference \\ - -h \\ - $input | \\ - PretextMap \\ - $args \\ - -o ${prefix}.pretext - fi - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - pretextmap: \$(PretextMap | sed '/Version/!d; s/.*Version //') - samtools: \$(samtools --version | sed '1!d; s/samtools //') - END_VERSIONS + ${input_command} | PretextMap \\ + $args \\ + -o ${prefix}.pretext """ stub: def prefix = task.ext.prefix ?: "${meta.id}" """ touch ${prefix}.pretext - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - pretextmap: \$(PretextMap | sed '/Version/!d; s/.*Version //') - samtools: \$(samtools --version | sed '1!d; s/samtools //') - END_VERSIONS """ } diff --git a/modules/nf-core/pretextmap/meta.yml b/modules/nf-core/pretextmap/meta.yml index d79295cd..a1dbc97f 100644 --- a/modules/nf-core/pretextmap/meta.yml +++ b/modules/nf-core/pretextmap/meta.yml @@ -22,6 +22,7 @@ input: type: file description: BAM/CRAM/SAM file or pairs formatted reads file pattern: "*.{bam,cram,sam,pairs.gz}" + ontologies: [] - - meta2: type: map description: | @@ -31,13 +32,15 @@ input: type: file description: Reference sequence file pattern: "*.{fasta,fna,fa}" + ontologies: [] - fai: type: file description: Reference sequence index file pattern: "*.{fai}" + ontologies: [] output: - - pretext: - - meta: + pretext: + - - meta: type: map description: | Groovy Map containing sample information @@ -46,11 +49,47 @@ output: type: file description: pretext map pattern: "*.pretext" - - versions: - - versions.yml: - type: file - description: File containing software versions - pattern: "versions.yml" + ontologies: [] + versions_pretextmap: + - - ${task.process}: + type: string + description: Name of the process + - PretextMap: + type: string + description: Name of the tool + - PretextMap | sed "/Version/!d; s/.*Version //": + type: eval + description: The expression to obtain the version of the tool + versions_samtools: + - - ${task.process}: + type: string + description: Name of the process + - samtools: + type: string + description: Name of the tool + - samtools --version | sed "1!d; s/samtools //": + type: eval + description: The expression to obtain the version of the tool +topics: + versions: + - - ${task.process}: + type: string + description: Name of the process + - PretextMap: + type: string + description: Name of the tool + - PretextMap | sed "/Version/!d; s/.*Version //": + type: eval + description: The expression to obtain the version of the tool + - - ${task.process}: + type: string + description: Name of the process + - samtools: + type: string + description: Name of the tool + - samtools --version | sed "1!d; s/samtools //": + type: eval + description: The expression to obtain the version of the tool authors: - "@marrip" - "@getrudeln" diff --git a/modules/nf-core/pretextmap/pretextmap.diff b/modules/nf-core/pretextmap/pretextmap.diff index 2cc03785..1128d2a7 100644 --- a/modules/nf-core/pretextmap/pretextmap.diff +++ b/modules/nf-core/pretextmap/pretextmap.diff @@ -1,31 +1,33 @@ Changes in component 'nf-core/pretextmap' 'modules/nf-core/pretextmap/meta.yml' is unchanged +Changes in 'pretextmap/environment.yml': +--- modules/nf-core/pretextmap/environment.yml ++++ modules/nf-core/pretextmap/environment.yml +@@ -5,5 +5,5 @@ + - bioconda + dependencies: + - bioconda::htslib=1.23 +- - bioconda::pretextmap=0.2.3 ++ - bioconda::pretextmap=0.1.9 + - bioconda::samtools=1.23 + Changes in 'pretextmap/main.nf': --- modules/nf-core/pretextmap/main.nf +++ modules/nf-core/pretextmap/main.nf -@@ -4,9 +4,7 @@ - label 'process_single' +@@ -4,9 +4,9 @@ + label 'process_low' conda "${moduleDir}/environment.yml" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? -- 'https://depot.galaxyproject.org/singularity/mulled-v2-f3591ce8609c7b3b33e5715333200aa5c163aa61%3A44321ab4d64f0b6d0c93abbd1406369d1b3da684-0': -- 'biocontainers/mulled-v2-f3591ce8609c7b3b33e5715333200aa5c163aa61:44321ab4d64f0b6d0c93abbd1406369d1b3da684-0' }" +- 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/e9/e9e6a49d9810ef0101a4a003afeda9b32c1d0d06b196ec13a5c9f5919bd1869e/data': +- 'community.wave.seqera.io/library/htslib_pretextmap_samtools:6d973e19ac7b0a1f' }" ++ + container "quay.io/sanger-tol/pretext:0.0.9-yy5-c2" ++ input: tuple val(meta), path(input) -Changes in 'pretextmap/environment.yml': ---- modules/nf-core/pretextmap/environment.yml -+++ modules/nf-core/pretextmap/environment.yml -@@ -5,4 +5,5 @@ - - bioconda - dependencies: - - bioconda::pretextmap=0.1.9 -- - bioconda::samtools=1.17 -+ - bioconda::pretextgraph=0.0.2 -+ - bioconda::samtools=1.21 - 'modules/nf-core/pretextmap/tests/main.nf.test' is unchanged 'modules/nf-core/pretextmap/tests/main.nf.test.snap' is unchanged ************************************************************ diff --git a/modules/nf-core/pretextmap/tests/main.nf.test.snap b/modules/nf-core/pretextmap/tests/main.nf.test.snap index b94050d8..5b78b8cf 100644 --- a/modules/nf-core/pretextmap/tests/main.nf.test.snap +++ b/modules/nf-core/pretextmap/tests/main.nf.test.snap @@ -8,11 +8,22 @@ "id": "test", "single_end": false }, - "test.pretext:md5,ae0f18b472e39de970fdc1aebc0f3f8d" + "test.pretext:md5,36105209541b09098a2d070cce5866a9" ] ], "1": [ - "versions.yml:md5,098015ffd24f3479b3efbac59aa32339" + [ + "PRETEXTMAP", + "PretextMap", + "0.2.3" + ] + ], + "2": [ + [ + "PRETEXTMAP", + "samtools", + "1.23" + ] ], "pretext": [ [ @@ -20,19 +31,30 @@ "id": "test", "single_end": false }, - "test.pretext:md5,ae0f18b472e39de970fdc1aebc0f3f8d" + "test.pretext:md5,36105209541b09098a2d070cce5866a9" ] ], - "versions": [ - "versions.yml:md5,098015ffd24f3479b3efbac59aa32339" + "versions_pretextmap": [ + [ + "PRETEXTMAP", + "PretextMap", + "0.2.3" + ] + ], + "versions_samtools": [ + [ + "PRETEXTMAP", + "samtools", + "1.23" + ] ] } ], "meta": { - "nf-test": "0.9.2", - "nextflow": "24.10.2" + "nf-test": "0.9.3", + "nextflow": "25.10.0" }, - "timestamp": "2024-12-05T13:09:12.619939794" + "timestamp": "2026-01-26T08:48:03.239812" }, "homo_cram_map_stub": { "content": [ @@ -47,7 +69,18 @@ ] ], "1": [ - "versions.yml:md5,098015ffd24f3479b3efbac59aa32339" + [ + "PRETEXTMAP", + "PretextMap", + "0.2.3" + ] + ], + "2": [ + [ + "PRETEXTMAP", + "samtools", + "1.23" + ] ], "pretext": [ [ @@ -58,16 +91,27 @@ "test.pretext:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], - "versions": [ - "versions.yml:md5,098015ffd24f3479b3efbac59aa32339" + "versions_pretextmap": [ + [ + "PRETEXTMAP", + "PretextMap", + "0.2.3" + ] + ], + "versions_samtools": [ + [ + "PRETEXTMAP", + "samtools", + "1.23" + ] ] } ], "meta": { - "nf-test": "0.9.2", - "nextflow": "24.10.2" + "nf-test": "0.9.3", + "nextflow": "25.10.0" }, - "timestamp": "2024-12-05T13:18:01.160426003" + "timestamp": "2026-01-26T08:56:52.454859" }, "homo_cram_map_nofai": { "content": [ @@ -78,11 +122,22 @@ "id": "test", "single_end": false }, - "test.pretext:md5,ae0f18b472e39de970fdc1aebc0f3f8d" + "test.pretext:md5,36105209541b09098a2d070cce5866a9" ] ], "1": [ - "versions.yml:md5,098015ffd24f3479b3efbac59aa32339" + [ + "PRETEXTMAP", + "PretextMap", + "0.2.3" + ] + ], + "2": [ + [ + "PRETEXTMAP", + "samtools", + "1.23" + ] ], "pretext": [ [ @@ -90,19 +145,30 @@ "id": "test", "single_end": false }, - "test.pretext:md5,ae0f18b472e39de970fdc1aebc0f3f8d" + "test.pretext:md5,36105209541b09098a2d070cce5866a9" ] ], - "versions": [ - "versions.yml:md5,098015ffd24f3479b3efbac59aa32339" + "versions_pretextmap": [ + [ + "PRETEXTMAP", + "PretextMap", + "0.2.3" + ] + ], + "versions_samtools": [ + [ + "PRETEXTMAP", + "samtools", + "1.23" + ] ] } ], "meta": { - "nf-test": "0.9.2", - "nextflow": "24.10.2" + "nf-test": "0.9.3", + "nextflow": "25.10.0" }, - "timestamp": "2024-12-05T13:11:46.450760408" + "timestamp": "2026-01-26T08:50:51.096526" }, "mapped_pairs_map": { "content": [ @@ -113,11 +179,22 @@ "id": "test", "single_end": false }, - "test.pretext:md5,b47be7fd285a2de68643f73f85ba84f1" + "test.pretext:md5,4def3f73634f9866aa911ade21ffe75e" ] ], "1": [ - "versions.yml:md5,098015ffd24f3479b3efbac59aa32339" + [ + "PRETEXTMAP", + "PretextMap", + "0.2.3" + ] + ], + "2": [ + [ + "PRETEXTMAP", + "samtools", + "1.23" + ] ], "pretext": [ [ @@ -125,19 +202,30 @@ "id": "test", "single_end": false }, - "test.pretext:md5,b47be7fd285a2de68643f73f85ba84f1" + "test.pretext:md5,4def3f73634f9866aa911ade21ffe75e" + ] + ], + "versions_pretextmap": [ + [ + "PRETEXTMAP", + "PretextMap", + "0.2.3" ] ], - "versions": [ - "versions.yml:md5,098015ffd24f3479b3efbac59aa32339" + "versions_samtools": [ + [ + "PRETEXTMAP", + "samtools", + "1.23" + ] ] } ], "meta": { - "nf-test": "0.9.2", - "nextflow": "24.10.2" + "nf-test": "0.9.3", + "nextflow": "25.10.0" }, - "timestamp": "2024-12-05T13:17:41.668366392" + "timestamp": "2026-01-26T08:56:43.224672" }, "homo_bam_map": { "content": [ @@ -148,11 +236,22 @@ "id": "test", "single_end": false }, - "test.pretext:md5,ae0f18b472e39de970fdc1aebc0f3f8d" + "test.pretext:md5,36105209541b09098a2d070cce5866a9" ] ], "1": [ - "versions.yml:md5,098015ffd24f3479b3efbac59aa32339" + [ + "PRETEXTMAP", + "PretextMap", + "0.2.3" + ] + ], + "2": [ + [ + "PRETEXTMAP", + "samtools", + "1.23" + ] ], "pretext": [ [ @@ -160,18 +259,29 @@ "id": "test", "single_end": false }, - "test.pretext:md5,ae0f18b472e39de970fdc1aebc0f3f8d" + "test.pretext:md5,36105209541b09098a2d070cce5866a9" + ] + ], + "versions_pretextmap": [ + [ + "PRETEXTMAP", + "PretextMap", + "0.2.3" ] ], - "versions": [ - "versions.yml:md5,098015ffd24f3479b3efbac59aa32339" + "versions_samtools": [ + [ + "PRETEXTMAP", + "samtools", + "1.23" + ] ] } ], "meta": { - "nf-test": "0.9.2", - "nextflow": "24.10.2" + "nf-test": "0.9.3", + "nextflow": "25.10.0" }, - "timestamp": "2024-12-05T13:14:38.730900443" + "timestamp": "2026-01-26T08:53:42.104066" } } \ No newline at end of file diff --git a/modules/nf-core/pretextsnapshot/environment.yml b/modules/nf-core/pretextsnapshot/environment.yml index c275e6ad..da627482 100644 --- a/modules/nf-core/pretextsnapshot/environment.yml +++ b/modules/nf-core/pretextsnapshot/environment.yml @@ -4,4 +4,4 @@ channels: - conda-forge - bioconda dependencies: - - bioconda::pretextsnapshot=0.0.4 + - bioconda::pretextsnapshot=0.0.5 diff --git a/modules/nf-core/pretextsnapshot/main.nf b/modules/nf-core/pretextsnapshot/main.nf index 76cd89df..5fd76437 100644 --- a/modules/nf-core/pretextsnapshot/main.nf +++ b/modules/nf-core/pretextsnapshot/main.nf @@ -1,48 +1,37 @@ process PRETEXTSNAPSHOT { - tag "$meta.id" + tag "${meta.id}" label 'process_single' conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/pretextsnapshot:0.0.4--h7d875b9_0': - 'biocontainers/pretextsnapshot:0.0.4--h7d875b9_0' }" + 'https://depot.galaxyproject.org/singularity/pretextsnapshot:0.0.5--h9948957_0': + 'biocontainers/pretextsnapshot:0.0.5--h9948957_0' }" input: tuple val(meta), path(pretext_map) output: tuple val(meta), path('*.{jpeg,png,bmp}'), emit: image - path "versions.yml" , emit: versions + tuple val("${task.process}"), val('PretextSnapshot'), eval("PretextSnapshot --version | sed 's/^.*PretextSnapshot Version //g'"), emit: versions_pretextsnapshot, topic: versions when: task.ext.when == null || task.ext.when script: - def VERSION = "0.0.4" - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}." + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}_" """ PretextSnapshot \\ $args \\ - --memory $task.memory \\ --map $pretext_map \\ --prefix $prefix \\ --folder . - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - pretextsnapshot: \$(echo \$(PretextSnapshot --version 2>&1) | sed 's/^.*PretextSnapshot Version //' ) - END_VERSIONS """ + stub: - def prefix = task.ext.prefix ?: "${meta.id}" + def prefix = task.ext.prefix ?: "${meta.id}_" """ - touch ${prefix}.png - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - pretextsnapshot: $VERSION - END_VERSIONS + touch ${prefix}scaffold_{1,2,3,4}.png """ } diff --git a/modules/nf-core/pretextsnapshot/meta.yml b/modules/nf-core/pretextsnapshot/meta.yml index 434da68e..ccf2a6db 100644 --- a/modules/nf-core/pretextsnapshot/meta.yml +++ b/modules/nf-core/pretextsnapshot/meta.yml @@ -25,9 +25,10 @@ input: type: file description: pretext hic map pattern: "*.pretext" + ontologies: [] output: - - image: - - meta: + image: + - - meta: type: map description: | Groovy Map containing sample information @@ -35,13 +36,33 @@ output: - "*.{jpeg,png,bmp}": type: file description: image of a hic contact map - pattern: "*.{png,jpg,bmp}" - - versions: - - versions.yml: - type: file - description: File containing software versions - pattern: "versions.yml" + pattern: "*.{jpeg,png,bmp}" + ontologies: [] + versions_pretextsnapshot: + - - ${task.process}: + type: string + description: Name of the process + - PretextSnapshot: + type: string + description: Name of the tool + - PretextSnapshot --version | sed 's/^.*PretextSnapshot Version //g': + type: eval + description: The expression to obtain the version of the tool + +topics: + versions: + - - ${task.process}: + type: string + description: The name of the process + - PretextSnapshot: + type: string + description: The name of the tool + - PretextSnapshot --version | sed 's/^.*PretextSnapshot Version //g': + type: eval + description: The expression to obtain the version of the tool + authors: - "@epaule" maintainers: - "@epaule" + - "@DLBPointon" diff --git a/modules/nf-core/pretextsnapshot/pretextsnapshot.diff b/modules/nf-core/pretextsnapshot/pretextsnapshot.diff deleted file mode 100644 index bf37047e..00000000 --- a/modules/nf-core/pretextsnapshot/pretextsnapshot.diff +++ /dev/null @@ -1,40 +0,0 @@ -Changes in component 'nf-core/pretextsnapshot' -Changes in 'pretextsnapshot/main.nf': ---- modules/nf-core/pretextsnapshot/main.nf -+++ modules/nf-core/pretextsnapshot/main.nf -@@ -18,11 +18,13 @@ - task.ext.when == null || task.ext.when - - script: -- def args = task.ext.args ?: '' -- def prefix = task.ext.prefix ?: "${meta.id}." -+ def VERSION = "0.0.4" -+ def args = task.ext.args ?: '' -+ def prefix = task.ext.prefix ?: "${meta.id}." - """ - PretextSnapshot \\ - $args \\ -+ --memory $task.memory \\ - --map $pretext_map \\ - --prefix $prefix \\ - --folder . -@@ -32,4 +34,15 @@ - pretextsnapshot: \$(echo \$(PretextSnapshot --version 2>&1) | sed 's/^.*PretextSnapshot Version //' ) - END_VERSIONS - """ -+ -+ stub: -+ def prefix = task.ext.prefix ?: "${meta.id}" -+ """ -+ touch ${prefix}.png -+ -+ cat <<-END_VERSIONS > versions.yml -+ "${task.process}": -+ pretextsnapshot: $VERSION -+ END_VERSIONS -+ """ - } - -'modules/nf-core/pretextsnapshot/meta.yml' is unchanged -'modules/nf-core/pretextsnapshot/environment.yml' is unchanged -************************************************************ diff --git a/modules/nf-core/pretextsnapshot/tests/main.nf.test b/modules/nf-core/pretextsnapshot/tests/main.nf.test new file mode 100644 index 00000000..5cc34c48 --- /dev/null +++ b/modules/nf-core/pretextsnapshot/tests/main.nf.test @@ -0,0 +1,83 @@ +nextflow_process { + + name "Test Process PRETEXTSNAPSHOT" + config "./nextflow.config" + script "../main.nf" + process "PRETEXTSNAPSHOT" + + tag "modules" + tag "modules_nfcore" + tag "pretextsnapshot" + + test("all") { + + when { + params { + module_args = '--sequences "=all" -c 26 -r 2160' + } + process { + """ + input[0] = [ + [id:'test'], + file(params.modules_testdata_base_path + 'genomics/eukaryotes/galaxea_fascicularis/hic/jaGalFasc40_2.pretext', checkIfExists:true) + ] + """ + } + } + then { + assert process.success + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + test("full") { + + when { + params { + module_args = '--sequences "=full" -c 26 -r 2160' + } + process { + """ + input[0] = [ + [id:'test'], + file(params.modules_testdata_base_path + 'genomics/eukaryotes/galaxea_fascicularis/hic/jaGalFasc40_2.pretext', checkIfExists:true) + ] + """ + } + } + then { + assert process.success + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("all - stub") { + options '-stub' + when { + params { + module_args = '--sequences "=all" -c 26 -r 2160' + } + process { + """ + input[0] = [ + [id:'test'], + file(params.modules_testdata_base_path + 'genomics/eukaryotes/galaxea_fascicularis/hic/jaGalFasc40_2.pretext', checkIfExists:true) + ] + """ + } + } + then { + assert process.success + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + +} diff --git a/modules/nf-core/pretextsnapshot/tests/main.nf.test.snap b/modules/nf-core/pretextsnapshot/tests/main.nf.test.snap new file mode 100644 index 00000000..38a15a0e --- /dev/null +++ b/modules/nf-core/pretextsnapshot/tests/main.nf.test.snap @@ -0,0 +1,179 @@ +{ + "all": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + [ + "test_scaffold_1.png:md5,810a786c1b15377449c5a6d5f5f8945c", + "test_scaffold_10.png:md5,85ff20eaee846c9376e72b643f5ed316", + "test_scaffold_11.png:md5,f8de3a48efcefd5205a2780f80f57521", + "test_scaffold_12.png:md5,0985ed277928c3c000a94c3dceffbc1c", + "test_scaffold_13.png:md5,13b826f23702ecf1531885462ad45f06", + "test_scaffold_14.png:md5,18825f42c5108d67aefd6b6f253e650c", + "test_scaffold_15.png:md5,9823a925efddc92e33105dac58c065a4", + "test_scaffold_16.png:md5,434224e11e7b9ee977cf64c44d6e7218", + "test_scaffold_17.png:md5,56c9c0d67c4d2471cc64f04b5de042e7", + "test_scaffold_18.png:md5,f7df2ee2ec94ce0b1361c26afb29f865", + "test_scaffold_19.png:md5,5ed093975dfe4e1fce4643a2243f87ad", + "test_scaffold_2.png:md5,3bb12d368d94b818bf1b0e939085c717", + "test_scaffold_20.png:md5,75f03c1b750094f338d7a7406f18432b", + "test_scaffold_21.png:md5,8c9253a26331f73b22ad7eb64b0d2e28", + "test_scaffold_3.png:md5,21128ec255fd2a50bed33090a1c68ade", + "test_scaffold_4.png:md5,5aeacb00ea8606d66bb512e2ecdcd7d4", + "test_scaffold_5.png:md5,81fd30c7f7f303903c78a0d8e624ef49", + "test_scaffold_6.png:md5,a2a48e34c8971beb27570dc30c4931ea", + "test_scaffold_7.png:md5,9df0c2b95cfd1b869ba3bb16b8f184a4", + "test_scaffold_8.png:md5,63f1c172f4d433f195d9c49da3adb0a1", + "test_scaffold_9.png:md5,877a76b7a8492b542a1254022dcb19c0" + ] + ] + ], + "1": [ + [ + "PRETEXTSNAPSHOT", + "PretextSnapshot", + "0.0.5" + ] + ], + "image": [ + [ + { + "id": "test" + }, + [ + "test_scaffold_1.png:md5,810a786c1b15377449c5a6d5f5f8945c", + "test_scaffold_10.png:md5,85ff20eaee846c9376e72b643f5ed316", + "test_scaffold_11.png:md5,f8de3a48efcefd5205a2780f80f57521", + "test_scaffold_12.png:md5,0985ed277928c3c000a94c3dceffbc1c", + "test_scaffold_13.png:md5,13b826f23702ecf1531885462ad45f06", + "test_scaffold_14.png:md5,18825f42c5108d67aefd6b6f253e650c", + "test_scaffold_15.png:md5,9823a925efddc92e33105dac58c065a4", + "test_scaffold_16.png:md5,434224e11e7b9ee977cf64c44d6e7218", + "test_scaffold_17.png:md5,56c9c0d67c4d2471cc64f04b5de042e7", + "test_scaffold_18.png:md5,f7df2ee2ec94ce0b1361c26afb29f865", + "test_scaffold_19.png:md5,5ed093975dfe4e1fce4643a2243f87ad", + "test_scaffold_2.png:md5,3bb12d368d94b818bf1b0e939085c717", + "test_scaffold_20.png:md5,75f03c1b750094f338d7a7406f18432b", + "test_scaffold_21.png:md5,8c9253a26331f73b22ad7eb64b0d2e28", + "test_scaffold_3.png:md5,21128ec255fd2a50bed33090a1c68ade", + "test_scaffold_4.png:md5,5aeacb00ea8606d66bb512e2ecdcd7d4", + "test_scaffold_5.png:md5,81fd30c7f7f303903c78a0d8e624ef49", + "test_scaffold_6.png:md5,a2a48e34c8971beb27570dc30c4931ea", + "test_scaffold_7.png:md5,9df0c2b95cfd1b869ba3bb16b8f184a4", + "test_scaffold_8.png:md5,63f1c172f4d433f195d9c49da3adb0a1", + "test_scaffold_9.png:md5,877a76b7a8492b542a1254022dcb19c0" + ] + ] + ], + "versions_pretextsnapshot": [ + [ + "PRETEXTSNAPSHOT", + "PretextSnapshot", + "0.0.5" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-02-10T12:27:39.682701525" + }, + "all - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + [ + "test_scaffold_1.png:md5,d41d8cd98f00b204e9800998ecf8427e", + "test_scaffold_2.png:md5,d41d8cd98f00b204e9800998ecf8427e", + "test_scaffold_3.png:md5,d41d8cd98f00b204e9800998ecf8427e", + "test_scaffold_4.png:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "1": [ + [ + "PRETEXTSNAPSHOT", + "PretextSnapshot", + "0.0.5" + ] + ], + "image": [ + [ + { + "id": "test" + }, + [ + "test_scaffold_1.png:md5,d41d8cd98f00b204e9800998ecf8427e", + "test_scaffold_2.png:md5,d41d8cd98f00b204e9800998ecf8427e", + "test_scaffold_3.png:md5,d41d8cd98f00b204e9800998ecf8427e", + "test_scaffold_4.png:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "versions_pretextsnapshot": [ + [ + "PRETEXTSNAPSHOT", + "PretextSnapshot", + "0.0.5" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-02-10T12:28:04.580609402" + }, + "full": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test_FullMap.png:md5,f4f9d0a79468a0d3ed7f5e0ded46d72b" + ] + ], + "1": [ + [ + "PRETEXTSNAPSHOT", + "PretextSnapshot", + "0.0.5" + ] + ], + "image": [ + [ + { + "id": "test" + }, + "test_FullMap.png:md5,f4f9d0a79468a0d3ed7f5e0ded46d72b" + ] + ], + "versions_pretextsnapshot": [ + [ + "PRETEXTSNAPSHOT", + "PretextSnapshot", + "0.0.5" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-02-10T12:27:55.065273909" + } +} \ No newline at end of file diff --git a/modules/nf-core/pretextsnapshot/tests/nextflow.config b/modules/nf-core/pretextsnapshot/tests/nextflow.config new file mode 100644 index 00000000..0587c4aa --- /dev/null +++ b/modules/nf-core/pretextsnapshot/tests/nextflow.config @@ -0,0 +1,5 @@ +process { + withName: "PRETEXTSNAPSHOT" { + ext.args = params.module_args + } +} diff --git a/modules/nf-core/samtools/faidx/environment.yml b/modules/nf-core/samtools/faidx/environment.yml index 62054fc9..89e12a64 100644 --- a/modules/nf-core/samtools/faidx/environment.yml +++ b/modules/nf-core/samtools/faidx/environment.yml @@ -4,5 +4,7 @@ channels: - conda-forge - bioconda dependencies: - - bioconda::htslib=1.21 - - bioconda::samtools=1.21 + # renovate: datasource=conda depName=bioconda/htslib + - bioconda::htslib=1.22.1 + # renovate: datasource=conda depName=bioconda/samtools + - bioconda::samtools=1.22.1 diff --git a/modules/nf-core/samtools/faidx/main.nf b/modules/nf-core/samtools/faidx/main.nf index 6de0095d..97bfb578 100644 --- a/modules/nf-core/samtools/faidx/main.nf +++ b/modules/nf-core/samtools/faidx/main.nf @@ -4,12 +4,11 @@ process SAMTOOLS_FAIDX { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/samtools:1.21--h50ea8bc_0' : - 'biocontainers/samtools:1.21--h50ea8bc_0' }" + 'https://depot.galaxyproject.org/singularity/samtools:1.22.1--h96c455f_0' : + 'biocontainers/samtools:1.22.1--h96c455f_0' }" input: - tuple val(meta), path(fasta) - tuple val(meta2), path(fai) + tuple val(meta), path(fasta), path(fai) val get_sizes output: @@ -17,7 +16,7 @@ process SAMTOOLS_FAIDX { tuple val(meta), path ("*.sizes") , emit: sizes, optional: true tuple val(meta), path ("*.fai") , emit: fai, optional: true tuple val(meta), path ("*.gzi") , emit: gzi, optional: true - path "versions.yml" , emit: versions + tuple val("${task.process}"), val('samtools'), eval("samtools version | sed '1!d;s/.* //'"), topic: versions, emit: versions_samtools when: task.ext.when == null || task.ext.when @@ -32,11 +31,6 @@ process SAMTOOLS_FAIDX { $args ${get_sizes_command} - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') - END_VERSIONS """ stub: @@ -51,11 +45,5 @@ process SAMTOOLS_FAIDX { fi ${get_sizes_command} - - cat <<-END_VERSIONS > versions.yml - - "${task.process}": - samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') - END_VERSIONS """ } diff --git a/modules/nf-core/samtools/faidx/meta.yml b/modules/nf-core/samtools/faidx/meta.yml index 256a330a..80aae1da 100644 --- a/modules/nf-core/samtools/faidx/meta.yml +++ b/modules/nf-core/samtools/faidx/meta.yml @@ -1,5 +1,6 @@ name: samtools_faidx -description: Index FASTA file, and optionally generate a file of chromosome sizes +description: Index FASTA file, and optionally generate a file of chromosome + sizes keywords: - index - fasta @@ -14,7 +15,8 @@ tools: homepage: http://www.htslib.org/ documentation: http://www.htslib.org/doc/samtools.html doi: 10.1093/bioinformatics/btp352 - licence: ["MIT"] + licence: + - "MIT" identifier: biotools:samtools input: - - meta: @@ -26,22 +28,18 @@ input: type: file description: FASTA file pattern: "*.{fa,fasta}" - - - meta2: - type: map - description: | - Groovy Map containing reference information - e.g. [ id:'test' ] + ontologies: [] - fai: type: file description: FASTA index file pattern: "*.{fai}" - - - get_sizes: - type: boolean - description: use cut to get the sizes of the index (true) or not (false) - + ontologies: [] + - get_sizes: + type: boolean + description: use cut to get the sizes of the index (true) or not (false) output: - - fa: - - meta: + fa: + - - meta: type: map description: | Groovy Map containing sample information @@ -50,28 +48,31 @@ output: type: file description: FASTA file pattern: "*.{fa}" - - fai: - - meta: + ontologies: [] + sizes: + - - meta: type: map description: | Groovy Map containing sample information e.g. [ id:'test', single_end:false ] - - "*.fai": + - "*.sizes": type: file - description: FASTA index file - pattern: "*.{fai}" - - sizes: - - meta: + description: File containing chromosome lengths + pattern: "*.{sizes}" + ontologies: [] + fai: + - - meta: type: map description: | Groovy Map containing sample information e.g. [ id:'test', single_end:false ] - - "*.sizes": + - "*.fai": type: file - description: File containing chromosome lengths - pattern: "*.{sizes}" - - gzi: - - meta: + description: FASTA index file + pattern: "*.{fai}" + ontologies: [] + gzi: + - - meta: type: map description: | Groovy Map containing sample information @@ -80,11 +81,28 @@ output: type: file description: Optional gzip index file for compressed inputs pattern: "*.gzi" - - versions: - - versions.yml: - type: file - description: File containing software versions - pattern: "versions.yml" + ontologies: [] + versions_samtools: + - - ${task.process}: + type: string + description: The process the versions were collected from + - samtools: + type: string + description: The tool name + - "samtools version | sed '1!d;s/.* //'": + type: eval + description: The command used to generate the version of the tool +topics: + versions: + - - ${task.process}: + type: string + description: The process the versions were collected from + - samtools: + type: string + description: The tool name + - "samtools version | sed '1!d;s/.* //'": + type: eval + description: The command used to generate the version of the tool authors: - "@drpatelh" - "@ewels" diff --git a/modules/nf-core/samtools/faidx/tests/main.nf.test b/modules/nf-core/samtools/faidx/tests/main.nf.test index 64219b7d..9a86db86 100644 --- a/modules/nf-core/samtools/faidx/tests/main.nf.test +++ b/modules/nf-core/samtools/faidx/tests/main.nf.test @@ -8,24 +8,30 @@ nextflow_process { tag "modules_nfcore" tag "samtools" tag "samtools/faidx" + config "./nextflow.config" test("test_samtools_faidx") { when { + params { + module_args = '' + } process { """ - input[0] = [ [ id:'test', single_end:false ], // meta map - file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) ] - input[1] = [[],[]] - input[2] = false + input[0] = [ + [ id:'test' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true), + [] + ] + input[1] = false """ } } then { + assert process.success assertAll( - { assert process.success }, - { assert snapshot(process.out).match() } + { assert snapshot(sanitizeOutput(process.out)).match()} ) } } @@ -33,89 +39,105 @@ nextflow_process { test("test_samtools_faidx_bgzip") { when { + params { + module_args = '' + } process { """ - input[0] = [ [ id:'test', single_end:false ], // meta map - file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true)] - input[1] = [[],[]] - input[2] = false + input[0] = [ + [ id:'test' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true), + [] + ] + input[1] = false """ } } then { + assert process.success assertAll( - { assert process.success }, - { assert snapshot(process.out).match() } + { assert snapshot(sanitizeOutput(process.out)).match()} ) } } test("test_samtools_faidx_fasta") { - config "./nextflow.config" - when { + params { + module_args = 'MT192765.1 -o extract.fa' + } process { """ - input[0] = [ [ id:'test', single_end:false ], // meta map - file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) ] - input[1] = [ [ id:'test', single_end:false ], // meta map - file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.fai', checkIfExists: true) ] - input[2] = false + input[0] = [ + [ id:'test' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.fai', checkIfExists: true) + ] + input[1] = false """ } } then { + assert process.success assertAll( - { assert process.success }, - { assert snapshot(process.out).match() } + { assert snapshot(sanitizeOutput(process.out)).match()} ) } } test("test_samtools_faidx_stub_fasta") { - config "./nextflow2.config" - + options "-stub" when { + params { + module_args = '-o extract.fa' + } process { """ - input[0] = [ [ id:'test', single_end:false ], // meta map - file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) ] - input[1] = [ [ id:'test', single_end:false ], // meta map - file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.fai', checkIfExists: true) ] - input[2] = false + input[0] = [ + [ id:'test' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.fai', checkIfExists: true) + ] + input[1] = false """ } } then { + assert process.success assertAll( - { assert process.success }, - { assert snapshot(process.out).match() } + { assert snapshot(sanitizeOutput(process.out)).match()} ) } } test("test_samtools_faidx_stub_fai") { + options "-stub" when { + params { + module_args = '' + } process { """ - input[0] = [ [ id:'test', single_end:false ], // meta map - file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) ] - input[1] = [[],[]] - input[2] = false + input[0] = [ + [ id:'test' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true), + [] + ] + input[1] = false """ } } then { + assert process.success assertAll( - { assert process.success }, - { assert snapshot(process.out).match() } + { assert snapshot(sanitizeOutput(process.out)).match()} ) } } @@ -123,22 +145,25 @@ nextflow_process { test("test_samtools_faidx_get_sizes") { when { + params { + module_args = '' + } process { """ - input[0] = Channel.of([ - [ id:'test' ], // meta map - file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) - ]) - input[1] = [[],[]] - input[2] = true + input[0] = [ + [ id:'test' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true), + [] + ] + input[1] = true """ } } then { - assertAll ( - { assert process.success }, - { assert snapshot(process.out).match() } + assert process.success + assertAll( + { assert snapshot(sanitizeOutput(process.out)).match()} ) } } @@ -146,22 +171,25 @@ nextflow_process { test("test_samtools_faidx_get_sizes_bgzip") { when { + params { + module_args = '' + } process { """ - input[0] = Channel.of([ - [ id:'test' ], // meta map - file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true) - ]) - input[1] = [[],[]] - input[2] = true + input[0] = [ + [ id:'test' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true), + [] + ] + input[1] = true """ } } then { - assertAll ( - { assert process.success }, - { assert snapshot(process.out).match() } + assert process.success + assertAll( + { assert snapshot(sanitizeOutput(process.out)).match()} ) } } @@ -171,22 +199,25 @@ nextflow_process { options "-stub" when { + params { + module_args = '' + } process { """ - input[0] = Channel.of([ - [ id:'test' ], // meta map - file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) - ]) - input[1] = [[],[]] - input[2] = true + input[0] = [ + [ id:'test' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true), + [] + ] + input[1] = true """ } } then { - assertAll ( - { assert process.success }, - { assert snapshot(process.out).match() } + assert process.success + assertAll( + { assert snapshot(sanitizeOutput(process.out)).match()} ) } } @@ -196,24 +227,27 @@ nextflow_process { options "-stub" when { + params { + module_args = '' + } process { """ - input[0] = Channel.of([ - [ id:'test' ], // meta map - file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true) - ]) - input[1] = [[],[]] - input[2] = true + input[0] = [ + [ id:'test' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true), + [] + ] + input[1] = true """ } } then { - assertAll ( - { assert process.success }, - { assert snapshot(process.out).match() } + assert process.success + assertAll( + { assert snapshot(sanitizeOutput(process.out)).match()} ) } } -} \ No newline at end of file +} diff --git a/modules/nf-core/samtools/faidx/tests/main.nf.test.snap b/modules/nf-core/samtools/faidx/tests/main.nf.test.snap index 73722414..41697444 100644 --- a/modules/nf-core/samtools/faidx/tests/main.nf.test.snap +++ b/modules/nf-core/samtools/faidx/tests/main.nf.test.snap @@ -2,35 +2,13 @@ "test_samtools_faidx": { "content": [ { - "0": [ - - ], - "1": [ - - ], - "2": [ - [ - { - "id": "test", - "single_end": false - }, - "genome.fasta.fai:md5,9da2a56e2853dc8c0b86a9e7229c9fe5" - ] - ], - "3": [ - - ], - "4": [ - "versions.yml:md5,6bbe80a2e14bd61202ca63e12d66027f" - ], "fa": [ ], "fai": [ [ { - "id": "test", - "single_end": false + "id": "test" }, "genome.fasta.fai:md5,9da2a56e2853dc8c0b86a9e7229c9fe5" ] @@ -41,50 +19,24 @@ "sizes": [ ], - "versions": [ - "versions.yml:md5,6bbe80a2e14bd61202ca63e12d66027f" + "versions_samtools": [ + [ + "SAMTOOLS_FAIDX", + "samtools", + "1.22.1" + ] ] } ], "meta": { - "nf-test": "0.9.2", - "nextflow": "24.10.1" + "nf-test": "0.9.3", + "nextflow": "25.10.3" }, - "timestamp": "2024-11-20T17:31:48.258623157" + "timestamp": "2026-02-10T15:39:12.541649151" }, "test_samtools_faidx_get_sizes_bgzip - stub": { "content": [ { - "0": [ - - ], - "1": [ - [ - { - "id": "test" - }, - "genome.fasta.gz.sizes:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "2": [ - [ - { - "id": "test" - }, - "genome.fasta.gz.fai:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "3": [ - [ - { - "id": "test" - }, - "genome.fasta.gz.gzi:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "4": [ - "versions.yml:md5,e3e4ba35a02020d173be8d1ee04eaebf" - ], "fa": [ ], @@ -112,45 +64,24 @@ "genome.fasta.gz.sizes:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], - "versions": [ - "versions.yml:md5,e3e4ba35a02020d173be8d1ee04eaebf" + "versions_samtools": [ + [ + "SAMTOOLS_FAIDX", + "samtools", + "1.22.1" + ] ] } ], "meta": { - "nf-test": "0.9.2", - "nextflow": "24.10.1" + "nf-test": "0.9.3", + "nextflow": "25.10.3" }, - "timestamp": "2024-11-20T17:32:41.122428188" + "timestamp": "2026-02-10T15:41:44.040426987" }, "test_samtools_faidx_get_sizes": { "content": [ { - "0": [ - - ], - "1": [ - [ - { - "id": "test" - }, - "genome.fasta.sizes:md5,a57c401f27ae5133823fb09fb21c8a3c" - ] - ], - "2": [ - [ - { - "id": "test" - }, - "genome.fasta.fai:md5,9da2a56e2853dc8c0b86a9e7229c9fe5" - ] - ], - "3": [ - - ], - "4": [ - "versions.yml:md5,6bbe80a2e14bd61202ca63e12d66027f" - ], "fa": [ ], @@ -173,55 +104,31 @@ "genome.fasta.sizes:md5,a57c401f27ae5133823fb09fb21c8a3c" ] ], - "versions": [ - "versions.yml:md5,6bbe80a2e14bd61202ca63e12d66027f" + "versions_samtools": [ + [ + "SAMTOOLS_FAIDX", + "samtools", + "1.22.1" + ] ] } ], "meta": { - "nf-test": "0.9.2", - "nextflow": "24.10.1" + "nf-test": "0.9.3", + "nextflow": "25.10.3" }, - "timestamp": "2024-11-20T17:34:02.353546697" + "timestamp": "2026-02-10T15:47:03.653912015" }, "test_samtools_faidx_bgzip": { "content": [ { - "0": [ - - ], - "1": [ - - ], - "2": [ - [ - { - "id": "test", - "single_end": false - }, - "genome.fasta.gz.fai:md5,9da2a56e2853dc8c0b86a9e7229c9fe5" - ] - ], - "3": [ - [ - { - "id": "test", - "single_end": false - }, - "genome.fasta.gz.gzi:md5,7dea362b3fac8e00956a4952a3d4f474" - ] - ], - "4": [ - "versions.yml:md5,6bbe80a2e14bd61202ca63e12d66027f" - ], "fa": [ ], "fai": [ [ { - "id": "test", - "single_end": false + "id": "test" }, "genome.fasta.gz.fai:md5,9da2a56e2853dc8c0b86a9e7229c9fe5" ] @@ -229,8 +136,7 @@ "gzi": [ [ { - "id": "test", - "single_end": false + "id": "test" }, "genome.fasta.gz.gzi:md5,7dea362b3fac8e00956a4952a3d4f474" ] @@ -238,46 +144,28 @@ "sizes": [ ], - "versions": [ - "versions.yml:md5,6bbe80a2e14bd61202ca63e12d66027f" + "versions_samtools": [ + [ + "SAMTOOLS_FAIDX", + "samtools", + "1.22.1" + ] ] } ], "meta": { - "nf-test": "0.9.2", - "nextflow": "24.10.1" + "nf-test": "0.9.3", + "nextflow": "25.10.3" }, - "timestamp": "2024-11-20T17:31:55.157487176" + "timestamp": "2026-02-10T15:50:04.023566795" }, "test_samtools_faidx_fasta": { "content": [ { - "0": [ - [ - { - "id": "test", - "single_end": false - }, - "extract.fa:md5,6a0774a0ad937ba0bfd2ac7457d90f36" - ] - ], - "1": [ - - ], - "2": [ - - ], - "3": [ - - ], - "4": [ - "versions.yml:md5,6bbe80a2e14bd61202ca63e12d66027f" - ], "fa": [ [ { - "id": "test", - "single_end": false + "id": "test" }, "extract.fa:md5,6a0774a0ad937ba0bfd2ac7457d90f36" ] @@ -291,45 +179,24 @@ "sizes": [ ], - "versions": [ - "versions.yml:md5,6bbe80a2e14bd61202ca63e12d66027f" + "versions_samtools": [ + [ + "SAMTOOLS_FAIDX", + "samtools", + "1.22.1" + ] ] } ], "meta": { - "nf-test": "0.9.2", - "nextflow": "24.10.1" + "nf-test": "0.9.3", + "nextflow": "25.10.3" }, - "timestamp": "2024-11-20T17:32:02.149455586" + "timestamp": "2026-02-10T15:39:23.529404162" }, "test_samtools_faidx_get_sizes - stub": { "content": [ { - "0": [ - - ], - "1": [ - [ - { - "id": "test" - }, - "genome.fasta.sizes:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "2": [ - [ - { - "id": "test" - }, - "genome.fasta.fai:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "3": [ - - ], - "4": [ - "versions.yml:md5,e3e4ba35a02020d173be8d1ee04eaebf" - ], "fa": [ ], @@ -352,48 +219,30 @@ "genome.fasta.sizes:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], - "versions": [ - "versions.yml:md5,e3e4ba35a02020d173be8d1ee04eaebf" + "versions_samtools": [ + [ + "SAMTOOLS_FAIDX", + "samtools", + "1.22.1" + ] ] } ], "meta": { - "nf-test": "0.9.2", - "nextflow": "24.10.1" + "nf-test": "0.9.3", + "nextflow": "25.10.3" }, - "timestamp": "2024-11-20T17:32:34.29376776" + "timestamp": "2026-02-10T15:41:39.039834304" }, "test_samtools_faidx_stub_fasta": { "content": [ { - "0": [ - [ - { - "id": "test", - "single_end": false - }, - "extract.fa:md5,9da2a56e2853dc8c0b86a9e7229c9fe5" - ] - ], - "1": [ - - ], - "2": [ - - ], - "3": [ - - ], - "4": [ - "versions.yml:md5,6bbe80a2e14bd61202ca63e12d66027f" - ], "fa": [ [ { - "id": "test", - "single_end": false + "id": "test" }, - "extract.fa:md5,9da2a56e2853dc8c0b86a9e7229c9fe5" + "extract.fa:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], "fai": [ @@ -405,51 +254,33 @@ "sizes": [ ], - "versions": [ - "versions.yml:md5,6bbe80a2e14bd61202ca63e12d66027f" + "versions_samtools": [ + [ + "SAMTOOLS_FAIDX", + "samtools", + "1.22.1" + ] ] } ], "meta": { - "nf-test": "0.9.2", - "nextflow": "24.10.1" + "nf-test": "0.9.3", + "nextflow": "25.10.3" }, - "timestamp": "2024-11-20T17:32:09.125065185" + "timestamp": "2026-02-10T15:39:28.961701609" }, "test_samtools_faidx_stub_fai": { "content": [ { - "0": [ - - ], - "1": [ - - ], - "2": [ - [ - { - "id": "test", - "single_end": false - }, - "genome.fasta.fai:md5,9da2a56e2853dc8c0b86a9e7229c9fe5" - ] - ], - "3": [ - - ], - "4": [ - "versions.yml:md5,6bbe80a2e14bd61202ca63e12d66027f" - ], "fa": [ ], "fai": [ [ { - "id": "test", - "single_end": false + "id": "test" }, - "genome.fasta.fai:md5,9da2a56e2853dc8c0b86a9e7229c9fe5" + "genome.fasta.fai:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], "gzi": [ @@ -458,50 +289,24 @@ "sizes": [ ], - "versions": [ - "versions.yml:md5,6bbe80a2e14bd61202ca63e12d66027f" + "versions_samtools": [ + [ + "SAMTOOLS_FAIDX", + "samtools", + "1.22.1" + ] ] } ], "meta": { - "nf-test": "0.9.2", - "nextflow": "24.10.1" + "nf-test": "0.9.3", + "nextflow": "25.10.3" }, - "timestamp": "2024-11-20T17:32:16.274287863" + "timestamp": "2026-02-10T15:39:34.471028474" }, "test_samtools_faidx_get_sizes_bgzip": { "content": [ { - "0": [ - - ], - "1": [ - [ - { - "id": "test" - }, - "genome.fasta.gz.sizes:md5,a57c401f27ae5133823fb09fb21c8a3c" - ] - ], - "2": [ - [ - { - "id": "test" - }, - "genome.fasta.gz.fai:md5,9da2a56e2853dc8c0b86a9e7229c9fe5" - ] - ], - "3": [ - [ - { - "id": "test" - }, - "genome.fasta.gz.gzi:md5,7dea362b3fac8e00956a4952a3d4f474" - ] - ], - "4": [ - "versions.yml:md5,6bbe80a2e14bd61202ca63e12d66027f" - ], "fa": [ ], @@ -529,15 +334,19 @@ "genome.fasta.gz.sizes:md5,a57c401f27ae5133823fb09fb21c8a3c" ] ], - "versions": [ - "versions.yml:md5,6bbe80a2e14bd61202ca63e12d66027f" + "versions_samtools": [ + [ + "SAMTOOLS_FAIDX", + "samtools", + "1.22.1" + ] ] } ], "meta": { - "nf-test": "0.9.2", - "nextflow": "24.10.1" + "nf-test": "0.9.3", + "nextflow": "25.10.3" }, - "timestamp": "2024-11-20T17:32:28.117654855" + "timestamp": "2026-02-10T15:39:45.439016495" } } \ No newline at end of file diff --git a/modules/nf-core/samtools/faidx/tests/nextflow.config b/modules/nf-core/samtools/faidx/tests/nextflow.config index f76a3ba0..202c036e 100644 --- a/modules/nf-core/samtools/faidx/tests/nextflow.config +++ b/modules/nf-core/samtools/faidx/tests/nextflow.config @@ -1,7 +1,7 @@ process { withName: SAMTOOLS_FAIDX { - ext.args = 'MT192765.1 -o extract.fa' + ext.args = params.module_args } } diff --git a/modules/nf-core/samtools/faidx/tests/nextflow2.config b/modules/nf-core/samtools/faidx/tests/nextflow2.config deleted file mode 100644 index 33ebbd5d..00000000 --- a/modules/nf-core/samtools/faidx/tests/nextflow2.config +++ /dev/null @@ -1,6 +0,0 @@ -process { - - withName: SAMTOOLS_FAIDX { - ext.args = '-o extract.fa' - } -} diff --git a/modules/nf-core/samtools/index/environment.yml b/modules/nf-core/samtools/index/environment.yml new file mode 100644 index 00000000..89e12a64 --- /dev/null +++ b/modules/nf-core/samtools/index/environment.yml @@ -0,0 +1,10 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + # renovate: datasource=conda depName=bioconda/htslib + - bioconda::htslib=1.22.1 + # renovate: datasource=conda depName=bioconda/samtools + - bioconda::samtools=1.22.1 diff --git a/modules/nf-core/samtools/index/main.nf b/modules/nf-core/samtools/index/main.nf new file mode 100644 index 00000000..e2a0e56d --- /dev/null +++ b/modules/nf-core/samtools/index/main.nf @@ -0,0 +1,39 @@ +process SAMTOOLS_INDEX { + tag "$meta.id" + label 'process_low' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/samtools:1.22.1--h96c455f_0' : + 'biocontainers/samtools:1.22.1--h96c455f_0' }" + + input: + tuple val(meta), path(input) + + output: + tuple val(meta), path("*.bai") , optional:true, emit: bai + tuple val(meta), path("*.csi") , optional:true, emit: csi + tuple val(meta), path("*.crai"), optional:true, emit: crai + tuple val("${task.process}"), val('samtools'), eval("samtools version | sed '1!d;s/.* //'"), emit: versions_samtools, topic: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + """ + samtools \\ + index \\ + -@ ${task.cpus} \\ + $args \\ + $input + """ + + stub: + def args = task.ext.args ?: '' + def extension = file(input).getExtension() == 'cram' ? + "crai" : args.contains("-c") ? "csi" : "bai" + """ + touch ${input}.${extension} + """ +} diff --git a/modules/nf-core/samtools/index/meta.yml b/modules/nf-core/samtools/index/meta.yml new file mode 100644 index 00000000..c6d4ce25 --- /dev/null +++ b/modules/nf-core/samtools/index/meta.yml @@ -0,0 +1,92 @@ +name: samtools_index +description: Index SAM/BAM/CRAM file +keywords: + - index + - bam + - sam + - cram +tools: + - samtools: + description: | + SAMtools is a set of utilities for interacting with and post-processing + short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li. + These files are generated as output by short read aligners like BWA. + homepage: http://www.htslib.org/ + documentation: http://www.htslib.org/doc/samtools.html + doi: 10.1093/bioinformatics/btp352 + licence: + - "MIT" + identifier: biotools:samtools +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - input: + type: file + description: input file + ontologies: [] +output: + bai: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.bai": + type: file + description: BAM/CRAM/SAM index file + pattern: "*.{bai,crai,sai}" + ontologies: [] + csi: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.csi": + type: file + description: CSI index file + pattern: "*.{csi}" + ontologies: [] + crai: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.crai": + type: file + description: BAM/CRAM/SAM index file + pattern: "*.{bai,crai,sai}" + ontologies: [] + versions_samtools: + - - ${task.process}: + type: string + description: The name of the process + - samtools: + type: string + description: The name of the tool + - samtools version | sed '1!d;s/.* //': + type: eval + description: The expression to obtain the version of the tool +topics: + versions: + - - ${task.process}: + type: string + description: The name of the process + - samtools: + type: string + description: The name of the tool + - samtools version | sed '1!d;s/.* //': + type: eval + description: The expression to obtain the version of the tool +authors: + - "@drpatelh" + - "@ewels" + - "@maxulysse" +maintainers: + - "@drpatelh" + - "@ewels" + - "@maxulysse" diff --git a/modules/nf-core/samtools/index/tests/csi.nextflow.config b/modules/nf-core/samtools/index/tests/csi.nextflow.config new file mode 100644 index 00000000..0ed260ef --- /dev/null +++ b/modules/nf-core/samtools/index/tests/csi.nextflow.config @@ -0,0 +1,7 @@ +process { + + withName: SAMTOOLS_INDEX { + ext.args = '-c' + } + +} diff --git a/modules/nf-core/samtools/index/tests/main.nf.test b/modules/nf-core/samtools/index/tests/main.nf.test new file mode 100644 index 00000000..c96cec86 --- /dev/null +++ b/modules/nf-core/samtools/index/tests/main.nf.test @@ -0,0 +1,155 @@ +nextflow_process { + + name "Test Process SAMTOOLS_INDEX" + script "../main.nf" + process "SAMTOOLS_INDEX" + tag "modules" + tag "modules_nfcore" + tag "samtools" + tag "samtools/index" + + test("bai") { + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + process.out.bai, + process.out.findAll { key, val -> key.startsWith('versions') } + ).match() } + ) + } + } + + test("crai") { + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.recalibrated.sorted.cram', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + process.out.crai, + process.out.findAll { key, val -> key.startsWith('versions') } + ).match() } + ) + } + } + + test("csi") { + config "./csi.nextflow.config" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + file(process.out.csi[0][1]).name, + process.out.findAll { key, val -> key.startsWith('versions') } + ).match() } + ) + } + } + + test("bai - stub") { + options "-stub" + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + process.out.bai, + process.out.findAll { key, val -> key.startsWith('versions') } + ).match() } + ) + } + } + + test("crai - stub") { + options "-stub" + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.recalibrated.sorted.cram', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + process.out.crai, + process.out.findAll { key, val -> key.startsWith('versions') } + ).match() } + ) + } + } + + test("csi - stub") { + options "-stub" + config "./csi.nextflow.config" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + process.out.csi, + process.out.findAll { key, val -> key.startsWith('versions') } + ).match() } + ) + } + } +} diff --git a/modules/nf-core/samtools/index/tests/main.nf.test.snap b/modules/nf-core/samtools/index/tests/main.nf.test.snap new file mode 100644 index 00000000..afc8a1ff --- /dev/null +++ b/modules/nf-core/samtools/index/tests/main.nf.test.snap @@ -0,0 +1,156 @@ +{ + "csi - stub": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.paired_end.sorted.bam.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + { + "versions_samtools": [ + [ + "SAMTOOLS_INDEX", + "samtools", + "1.22.1" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-01-28T17:52:10.030187" + }, + "crai - stub": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.paired_end.recalibrated.sorted.cram.crai:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + { + "versions_samtools": [ + [ + "SAMTOOLS_INDEX", + "samtools", + "1.22.1" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-01-28T17:51:59.125484" + }, + "bai - stub": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.paired_end.sorted.bam.bai:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + { + "versions_samtools": [ + [ + "SAMTOOLS_INDEX", + "samtools", + "1.22.1" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-01-28T17:51:47.277042" + }, + "csi": { + "content": [ + "test.paired_end.sorted.bam.csi", + { + "versions_samtools": [ + [ + "SAMTOOLS_INDEX", + "samtools", + "1.22.1" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-01-28T17:51:35.758735" + }, + "crai": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.paired_end.recalibrated.sorted.cram.crai:md5,14bc3bd5c89cacc8f4541f9062429029" + ] + ], + { + "versions_samtools": [ + [ + "SAMTOOLS_INDEX", + "samtools", + "1.22.1" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-01-28T17:51:26.561965" + }, + "bai": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.paired_end.sorted.bam.bai:md5,704c10dd1326482448ca3073fdebc2f4" + ] + ], + { + "versions_samtools": [ + [ + "SAMTOOLS_INDEX", + "samtools", + "1.22.1" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-01-28T17:51:15.299035" + } +} \ No newline at end of file diff --git a/modules/nf-core/samtools/merge/environment.yml b/modules/nf-core/samtools/merge/environment.yml index 62054fc9..89e12a64 100644 --- a/modules/nf-core/samtools/merge/environment.yml +++ b/modules/nf-core/samtools/merge/environment.yml @@ -4,5 +4,7 @@ channels: - conda-forge - bioconda dependencies: - - bioconda::htslib=1.21 - - bioconda::samtools=1.21 + # renovate: datasource=conda depName=bioconda/htslib + - bioconda::htslib=1.22.1 + # renovate: datasource=conda depName=bioconda/samtools + - bioconda::samtools=1.22.1 diff --git a/modules/nf-core/samtools/merge/main.nf b/modules/nf-core/samtools/merge/main.nf index 34da4c7c..87ebc501 100644 --- a/modules/nf-core/samtools/merge/main.nf +++ b/modules/nf-core/samtools/merge/main.nf @@ -4,21 +4,19 @@ process SAMTOOLS_MERGE { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/samtools:1.21--h50ea8bc_0' : - 'biocontainers/samtools:1.21--h50ea8bc_0' }" + 'https://depot.galaxyproject.org/singularity/samtools:1.22.1--h96c455f_0' : + 'biocontainers/samtools:1.22.1--h96c455f_0' }" input: tuple val(meta), path(input_files, stageAs: "?/*") - tuple val(meta2), path(fasta) - tuple val(meta3), path(fai) + tuple val(meta2), path(fasta), path(fai), path(gzi) output: tuple val(meta), path("${prefix}.bam") , optional:true, emit: bam tuple val(meta), path("${prefix}.cram"), optional:true, emit: cram tuple val(meta), path("*.csi") , optional:true, emit: csi tuple val(meta), path("*.crai") , optional:true, emit: crai - path "versions.yml" , emit: versions - + tuple val("${task.process}"), val('samtools'), eval("samtools version | sed '1!d;s/.* //'"), topic: versions, emit: versions_samtools when: task.ext.when == null || task.ext.when @@ -29,6 +27,7 @@ process SAMTOOLS_MERGE { def file_type = input_files instanceof List ? input_files[0].getExtension() : input_files.getExtension() def reference = fasta ? "--reference ${fasta}" : "" """ + # Note: --threads value represents *additional* CPUs to allocate (total CPUs = 1 + --threads). samtools \\ merge \\ --threads ${task.cpus-1} \\ @@ -36,11 +35,6 @@ process SAMTOOLS_MERGE { ${reference} \\ ${prefix}.${file_type} \\ $input_files - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') - END_VERSIONS """ stub: @@ -52,10 +46,5 @@ process SAMTOOLS_MERGE { """ touch ${prefix}.${file_type} ${index} - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') - END_VERSIONS """ } diff --git a/modules/nf-core/samtools/merge/meta.yml b/modules/nf-core/samtools/merge/meta.yml index 235aa219..31cc576b 100644 --- a/modules/nf-core/samtools/merge/meta.yml +++ b/modules/nf-core/samtools/merge/meta.yml @@ -26,6 +26,7 @@ input: type: file description: BAM/CRAM file pattern: "*.{bam,cram,sam}" + ontologies: [] - - meta2: type: map description: | @@ -35,18 +36,21 @@ input: type: file description: Reference file the CRAM was created with (optional) pattern: "*.{fasta,fa}" - - - meta3: - type: map - description: | - Groovy Map containing reference information - e.g. [ id:'genome' ] + ontologies: [] - fai: type: file description: Index of the reference file the CRAM was created with (optional) pattern: "*.fai" + ontologies: [] + - gzi: + type: file + description: Index of the compressed reference file the CRAM was created with + (optional) + pattern: "*.gzi" + ontologies: [] output: - - bam: - - meta: + bam: + - - meta: type: map description: | Groovy Map containing sample information @@ -55,8 +59,9 @@ output: type: file description: BAM file pattern: "*.{bam}" - - cram: - - meta: + ontologies: [] + cram: + - - meta: type: map description: | Groovy Map containing sample information @@ -65,8 +70,9 @@ output: type: file description: CRAM file pattern: "*.{cram}" - - csi: - - meta: + ontologies: [] + csi: + - - meta: type: map description: | Groovy Map containing sample information @@ -75,8 +81,9 @@ output: type: file description: BAM index file (optional) pattern: "*.csi" - - crai: - - meta: + ontologies: [] + crai: + - - meta: type: map description: | Groovy Map containing sample information @@ -85,20 +92,36 @@ output: type: file description: CRAM index file (optional) pattern: "*.crai" - - versions: - - versions.yml: - type: file - description: File containing software versions - pattern: "versions.yml" + ontologies: [] + versions_samtools: + - - ${task.process}: + type: string + description: The process the versions were collected from + - samtools: + type: string + description: The tool name + - "samtools version | sed '1!d;s/.* //'": + type: string + description: The command used to generate the version of the tool + +topics: + versions: + - - ${task.process}: + type: string + description: The process the versions were collected from + - samtools: + type: string + description: The tool name + - "samtools version | sed '1!d;s/.* //'": + type: string + description: The command used to generate the version of the tool authors: - - "@drpatelh" - - "@yuukiiwa " + - "@yuukiiwa" - "@maxulysse" - "@FriederikeHanssen" - "@ramprasadn" maintainers: - - "@drpatelh" - - "@yuukiiwa " + - "@yuukiiwa" - "@maxulysse" - "@FriederikeHanssen" - "@ramprasadn" diff --git a/modules/nf-core/samtools/merge/tests/index.config b/modules/nf-core/samtools/merge/tests/index.config deleted file mode 100644 index 8c5668cf..00000000 --- a/modules/nf-core/samtools/merge/tests/index.config +++ /dev/null @@ -1,3 +0,0 @@ -process { - ext.args = "--write-index" -} \ No newline at end of file diff --git a/modules/nf-core/samtools/merge/tests/main.nf.test b/modules/nf-core/samtools/merge/tests/main.nf.test index 40b36e82..a784c836 100644 --- a/modules/nf-core/samtools/merge/tests/main.nf.test +++ b/modules/nf-core/samtools/merge/tests/main.nf.test @@ -8,70 +8,116 @@ nextflow_process { tag "modules_nfcore" tag "samtools" tag "samtools/merge" + config "./nextflow.config" test("bams") { - config "./index.config" - when { + params { + module_args = '--write-index' + } process { """ - input[0] = Channel.of([ - [ id:'test', single_end:false ], // meta map + input[0] = [ + [ id:'test'], [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.methylated.sorted.bam', checkIfExists: true), file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.single_end.sorted.bam', checkIfExists: true) ] - ]) - input[1] = [[],[]] - input[2] = [[],[]] + ] + input[1] = [[],[],[],[]] """ } } then { + assert process.success assertAll( - { assert process.success }, - { assert snapshot(file(process.out.bam[0][1]).name).match("bams_bam") }, - { assert snapshot(process.out.cram).match("bams_cram") }, - { assert snapshot(file(process.out.csi[0][1]).name).match("bams_csi") }, - { assert snapshot(process.out.crai).match("bams_crai") }, - { assert snapshot(process.out.versions).match("bams_versions") } + { assert snapshot( + bam(process.out.bam[0][1]).getReadsMD5(), + file(process.out.csi[0][1]).name, + process.out.cram, + process.out.crai, + process.out.findAll { key, val -> key.startsWith("versions") } + ).match() + } ) } } - test("crams") { + test("crams_fastq") { + + when { + params { + module_args = '--write-index --output-fmt cram,version=3.0' + } + process { + """ + input[0] = [ + [ id:'test' ], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test2.paired_end.sorted.cram', checkIfExists: true) ] + ] + input[1] = [ + [ id:'genome' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true), + [] + ] + """ + } + } + + then { + def fasta = 'https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/modules/data/genomics/homo_sapiens/genome/genome.fasta' + assert process.success + assertAll( + { assert snapshot( + cram(process.out.cram[0][1], fasta).getReadsMD5(), + process.out.csi, + process.out.bam, + file(process.out.crai[0][1]).name, + process.out.findAll { key, val -> key.startsWith("versions") } + ).match() + } + ) + } + } - config "./index.config" + test("crams_fastq_gz") { when { + params { + module_args = '--write-index' + } process { """ - input[0] = Channel.of([ - [ id:'test', single_end:false ], // meta map - [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.recalibrated.sorted.cram', checkIfExists: true), - file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test2.paired_end.recalibrated.sorted.cram', checkIfExists: true) ] - ]) - input[1] = Channel.of([ - [ id:'genome' ], // meta map - file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) - ]) - input[2] = Channel.of([ - [ id:'genome' ], // meta map - file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true) - ]) + input[0] = [ + [ id:'test' ], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test2.paired_end.sorted.cram', checkIfExists: true) ] + ] + input[1] = [ + [ id:'genome' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta.gz.fai', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta.gz.gzi', checkIfExists: true) + ] """ } } then { + assert process.success assertAll( - { assert process.success }, - { assert snapshot(file(process.out.cram[0][1]).name).match("crams_cram") }, - { assert snapshot(process.out.bam).match("crams_bam") }, - { assert snapshot(file(process.out.crai[0][1]).name).match("crams_crai") }, - { assert snapshot(process.out.csi).match("crams_csi") }, - { assert snapshot(process.out.versions).match("crams_versions") } + { assert snapshot( + //nft-bam doesn't like the fasta.gz + file(process.out.cram[0][1]).name, + process.out.csi, + process.out.bam, + file(process.out.crai[0][1]).name, + process.out.findAll { key, val -> key.startsWith("versions") } + ).match() + } ) } } @@ -79,59 +125,91 @@ nextflow_process { test("bam") { when { + params { + module_args = '' + } process { """ - input[0] = Channel.of([ - [ id:'test', single_end:false ], // meta map + input[0] = [ + [ id:'test' ], [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.methylated.sorted.bam', checkIfExists: true) ] - ]) - input[1] = [[],[]] - input[2] = [[],[]] + ] + input[1] = [[],[],[],[]] + """ + } + } + + then { + assert process.success + assertAll( + { assert snapshot( + bam(process.out.bam[0][1]).getReadsMD5(), + process.out.csi, + process.out.cram, + process.out.crai, + process.out.findAll { key, val -> key.startsWith("versions") } + ).match() + } + ) + } + } + + test("bams - stub") { + + options "-stub" + + when { + params { + module_args = '' + } + process { + """ + input[0] = [ + [ id:'test' ], + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.methylated.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.single_end.sorted.bam', checkIfExists: true) ] + ] + input[1] = [[],[],[],[]] """ } } then { + assert process.success assertAll( - { assert process.success }, - { assert snapshot(file(process.out.bam[0][1]).name).match("bam_bam") }, - { assert snapshot(process.out.cram).match("bam_cram") }, - { assert snapshot(process.out.crai).match("bam_crai") }, - { assert snapshot(process.out.csi).match("bam_csi") }, - { assert snapshot(process.out.versions).match("bam_versions") } + { assert snapshot(process.out).match()} ) } } - test("bams_stub") { + test("bams_no_index - stub") { - config "./index.config" options "-stub" when { + params { + module_args = '' + } process { """ - input[0] = Channel.of([ - [ id:'test', single_end:false ], // meta map + input[0] = [ + [ id:'test'], [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.methylated.sorted.bam', checkIfExists: true), file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.single_end.sorted.bam', checkIfExists: true) ] - ]) - input[1] = [[],[]] - input[2] = [[],[]] + ] + input[1] = [[],[],[],[]] """ } } then { + assert process.success assertAll( - { assert process.success }, - { assert snapshot(file(process.out.bam[0][1]).name).match("bams_stub_bam") }, - { assert snapshot(process.out.cram).match("bams_stub_cram") }, - { assert snapshot(file(process.out.csi[0][1]).name).match("bams_stub_csi") }, - { assert snapshot(process.out.crai).match("bams_stub_crai") }, - { assert snapshot(process.out.versions).match("bams_stub_versions") } + { assert snapshot(process.out).match()} ) } } + } diff --git a/modules/nf-core/samtools/merge/tests/main.nf.test.snap b/modules/nf-core/samtools/merge/tests/main.nf.test.snap index 0a41e01a..e8d4d18a 100644 --- a/modules/nf-core/samtools/merge/tests/main.nf.test.snap +++ b/modules/nf-core/samtools/merge/tests/main.nf.test.snap @@ -1,228 +1,226 @@ { - "crams_cram": { - "content": [ - "test.cram" - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "23.04.3" - }, - "timestamp": "2024-02-12T18:50:00.647389" - }, - "bams_stub_cram": { - "content": [ + "bams_no_index - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + + ], + "2": [ + + ], + "3": [ + + ], + "4": [ + [ + "SAMTOOLS_MERGE", + "samtools", + "1.22.1" + ] + ], + "bam": [ + [ + { + "id": "test" + }, + "test.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "crai": [ + + ], + "cram": [ + + ], + "csi": [ + + ], + "versions_samtools": [ + [ + "SAMTOOLS_MERGE", + "samtools", + "1.22.1" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2025-12-22T09:07:54.643129787" + }, + "crams_fastq": { + "content": [ + "c4525b95f05075208347295e6a1fb232", [ - ] - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "23.04.3" - }, - "timestamp": "2024-02-12T18:50:19.937013" - }, - "bams_crai": { - "content": [ + ], [ - ] - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "23.04.3" - }, - "timestamp": "2024-02-12T18:49:24.928616" - }, - "bams_bam": { - "content": [ - "test.bam" - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "23.04.3" - }, - "timestamp": "2024-02-12T18:49:24.923289" - }, - "bams_cram": { - "content": [ + ], + "test.cram.crai", + { + "versions_samtools": [ + [ + "SAMTOOLS_MERGE", + "samtools", + "1.22.1" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2025-12-21T15:47:17.739468093" + }, + "bams - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + + ], + "2": [ + + ], + "3": [ + + ], + "4": [ + [ + "SAMTOOLS_MERGE", + "samtools", + "1.22.1" + ] + ], + "bam": [ + [ + { + "id": "test" + }, + "test.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "crai": [ + + ], + "cram": [ + + ], + "csi": [ + + ], + "versions_samtools": [ + [ + "SAMTOOLS_MERGE", + "samtools", + "1.22.1" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2025-12-22T09:07:47.303781531" + }, + "bams": { + "content": [ + "47c9f174d8c8afc1a13c75ee4b5e5d43", + "test.bam.csi", [ - ] - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "23.04.3" - }, - "timestamp": "2024-02-12T18:49:24.925716" - }, - "crams_csi": { - "content": [ + ], [ - ] - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "23.04.3" - }, - "timestamp": "2024-02-12T18:50:00.655959" - }, - "bam_bam": { - "content": [ - "test.bam" - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "23.04.3" - }, - "timestamp": "2024-02-12T18:50:10.319539" - }, - "bam_versions": { - "content": [ - [ - "versions.yml:md5,d51d18a97513e370e43f0c891c51dfc4" - ] - ], - "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.4" - }, - "timestamp": "2024-09-16T09:16:30.476887194" - }, - "bams_csi": { - "content": [ - "test.bam.csi" - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "23.04.3" - }, - "timestamp": "2024-02-12T18:49:24.92719" - }, - "bams_stub_csi": { - "content": [ - "test.csi" + ], + { + "versions_samtools": [ + [ + "SAMTOOLS_MERGE", + "samtools", + "1.22.1" + ] + ] + } ], "meta": { - "nf-test": "0.8.4", - "nextflow": "23.04.3" + "nf-test": "0.9.3", + "nextflow": "25.10.2" }, - "timestamp": "2024-02-12T18:50:19.940498" + "timestamp": "2025-12-21T15:47:07.586929894" }, - "bam_crai": { + "crams_fastq_gz": { "content": [ + "test.cram", [ - ] - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "23.04.3" - }, - "timestamp": "2024-02-12T18:50:10.328852" - }, - "bams_stub_versions": { - "content": [ - [ - "versions.yml:md5,d51d18a97513e370e43f0c891c51dfc4" - ] - ], - "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.4" - }, - "timestamp": "2024-09-16T09:16:52.203823961" - }, - "bam_cram": { - "content": [ + ], [ - ] + ], + "test.cram.crai", + { + "versions_samtools": [ + [ + "SAMTOOLS_MERGE", + "samtools", + "1.22.1" + ] + ] + } ], "meta": { - "nf-test": "0.8.4", - "nextflow": "23.04.3" + "nf-test": "0.9.3", + "nextflow": "25.10.2" }, - "timestamp": "2024-02-12T18:50:10.324219" + "timestamp": "2025-12-21T15:47:26.095542073" }, - "bams_stub_bam": { - "content": [ - "test.bam" - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "23.04.3" - }, - "timestamp": "2024-02-12T18:50:19.933153" - }, - "bams_versions": { - "content": [ - [ - "versions.yml:md5,d51d18a97513e370e43f0c891c51dfc4" - ] - ], - "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.4" - }, - "timestamp": "2024-09-16T08:29:57.524363148" - }, - "crams_bam": { + "bam": { "content": [ + "8da8fc1099a955e3ceb198665350e766", [ - ] - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "23.04.3" - }, - "timestamp": "2024-02-12T18:50:00.650652" - }, - "crams_versions": { - "content": [ - [ - "versions.yml:md5,d51d18a97513e370e43f0c891c51dfc4" - ] - ], - "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.4" - }, - "timestamp": "2024-09-16T09:16:06.977096207" - }, - "bam_csi": { - "content": [ + ], [ - ] - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "23.04.3" - }, - "timestamp": "2024-02-12T18:50:10.33292" - }, - "crams_crai": { - "content": [ - "test.cram.crai" - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "23.04.3" - }, - "timestamp": "2024-02-12T18:50:00.653512" - }, - "bams_stub_crai": { - "content": [ + ], [ - ] - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "23.04.3" - }, - "timestamp": "2024-02-12T18:50:19.943839" + ], + { + "versions_samtools": [ + [ + "SAMTOOLS_MERGE", + "samtools", + "1.22.1" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2025-12-21T15:47:33.563513564" } } \ No newline at end of file diff --git a/modules/nf-core/samtools/merge/tests/nextflow.config b/modules/nf-core/samtools/merge/tests/nextflow.config new file mode 100644 index 00000000..2b2a4610 --- /dev/null +++ b/modules/nf-core/samtools/merge/tests/nextflow.config @@ -0,0 +1,5 @@ +process { + withName: "SAMTOOLS_MERGE" { + ext.args = params.module_args + } +} diff --git a/modules/nf-core/samtools/sort/environment.yml b/modules/nf-core/samtools/sort/environment.yml index 62054fc9..89e12a64 100644 --- a/modules/nf-core/samtools/sort/environment.yml +++ b/modules/nf-core/samtools/sort/environment.yml @@ -4,5 +4,7 @@ channels: - conda-forge - bioconda dependencies: - - bioconda::htslib=1.21 - - bioconda::samtools=1.21 + # renovate: datasource=conda depName=bioconda/htslib + - bioconda::htslib=1.22.1 + # renovate: datasource=conda depName=bioconda/samtools + - bioconda::samtools=1.22.1 diff --git a/modules/nf-core/samtools/sort/main.nf b/modules/nf-core/samtools/sort/main.nf index caf3c61a..6b5aa31d 100644 --- a/modules/nf-core/samtools/sort/main.nf +++ b/modules/nf-core/samtools/sort/main.nf @@ -4,30 +4,41 @@ process SAMTOOLS_SORT { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/samtools:1.21--h50ea8bc_0' : - 'biocontainers/samtools:1.21--h50ea8bc_0' }" + 'https://depot.galaxyproject.org/singularity/samtools:1.22.1--h96c455f_0' : + 'biocontainers/samtools:1.22.1--h96c455f_0' }" input: tuple val(meta) , path(bam) tuple val(meta2), path(fasta) + val index_format output: - tuple val(meta), path("*.bam"), emit: bam, optional: true - tuple val(meta), path("*.cram"), emit: cram, optional: true - tuple val(meta), path("*.crai"), emit: crai, optional: true - tuple val(meta), path("*.csi"), emit: csi, optional: true - path "versions.yml", emit: versions + tuple val(meta), path("${prefix}.bam"), emit: bam, optional: true + tuple val(meta), path("${prefix}.cram"), emit: cram, optional: true + tuple val(meta), path("${prefix}.sam"), emit: sam, optional: true + tuple val(meta), path("${prefix}.${extension}.crai"), emit: crai, optional: true + tuple val(meta), path("${prefix}.${extension}.csi"), emit: csi, optional: true + tuple val(meta), path("${prefix}.${extension}.bai"), emit: bai, optional: true + tuple val("${task.process}"), val('samtools'), eval("samtools version | sed '1!d;s/.* //'"), topic: versions, emit: versions_samtools when: task.ext.when == null || task.ext.when script: def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - def extension = args.contains("--output-fmt sam") ? "sam" : - args.contains("--output-fmt cram") ? "cram" : - "bam" + prefix = task.ext.prefix ?: "${meta.id}" + extension = args.contains("--output-fmt sam") ? "sam" : + args.contains("--output-fmt cram") ? "cram" : + "bam" def reference = fasta ? "--reference ${fasta}" : "" + output_file = index_format ? "${prefix}.${extension}##idx##${prefix}.${extension}.${index_format} --write-index" : "${prefix}.${extension}" + if (index_format) { + if (!index_format.matches('bai|csi|crai')) { + error "Index format not one of bai, csi, crai." + } else if (extension == "sam") { + error "Indexing not compatible with SAM output" + } + } if ("$bam" == "${prefix}.bam") error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!" """ @@ -39,34 +50,29 @@ process SAMTOOLS_SORT { -T ${prefix} \\ --threads $task.cpus \\ ${reference} \\ - -o ${prefix}.${extension} \\ + -o ${output_file} \\ - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') - END_VERSIONS """ stub: def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - def extension = args.contains("--output-fmt sam") ? "sam" : - args.contains("--output-fmt cram") ? "cram" : - "bam" + prefix = task.ext.prefix ?: "${meta.id}" + extension = args.contains("--output-fmt sam") ? "sam" : + args.contains("--output-fmt cram") ? "cram" : + "bam" + if (index_format) { + if (!index_format.matches('bai|csi|crai')) { + error "Index format not one of bai, csi, crai." + } else if (extension == "sam") { + error "Indexing not compatible with SAM output" + } + } + index = index_format ? "touch ${prefix}.${extension}.${index_format}" : "" + """ touch ${prefix}.${extension} - if [ "${extension}" == "bam" ]; - then - touch ${prefix}.${extension}.csi - elif [ "${extension}" == "cram" ]; - then - touch ${prefix}.${extension}.crai - fi + ${index} - cat <<-END_VERSIONS > versions.yml - "${task.process}": - samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') - END_VERSIONS """ } diff --git a/modules/nf-core/samtools/sort/meta.yml b/modules/nf-core/samtools/sort/meta.yml index a9dbec5a..69968304 100644 --- a/modules/nf-core/samtools/sort/meta.yml +++ b/modules/nf-core/samtools/sort/meta.yml @@ -26,6 +26,7 @@ input: type: file description: BAM/CRAM/SAM file(s) pattern: "*.{bam,cram,sam}" + ontologies: [] - - meta2: type: map description: | @@ -36,52 +37,101 @@ input: description: Reference genome FASTA file pattern: "*.{fa,fasta,fna}" optional: true + ontologies: [] + - index_format: + type: string + description: Index format to use (optional) + pattern: "bai|csi|crai" output: - - bam: - - meta: + bam: + - - meta: type: map description: | Groovy Map containing sample information e.g. [ id:'test', single_end:false ] - - "*.bam": + - "${prefix}.bam": type: file description: Sorted BAM file pattern: "*.{bam}" - - cram: - - meta: + ontologies: [] + cram: + - - meta: type: map description: | Groovy Map containing sample information e.g. [ id:'test', single_end:false ] - - "*.cram": + - "${prefix}.cram": type: file description: Sorted CRAM file pattern: "*.{cram}" - - crai: - - meta: + ontologies: [] + sam: + - - meta: type: map description: | Groovy Map containing sample information e.g. [ id:'test', single_end:false ] - - "*.crai": + - "${prefix}.sam": + type: file + description: Sorted SAM file + pattern: "*.{sam}" + ontologies: [] + crai: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "${prefix}.${extension}.crai": type: file description: CRAM index file (optional) pattern: "*.crai" - - csi: - - meta: + ontologies: [] + csi: + - - meta: type: map description: | Groovy Map containing sample information e.g. [ id:'test', single_end:false ] - - "*.csi": + - "${prefix}.${extension}.csi": type: file description: BAM index file (optional) pattern: "*.csi" - - versions: - - versions.yml: + ontologies: [] + bai: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "${prefix}.${extension}.bai": type: file - description: File containing software versions - pattern: "versions.yml" + description: BAM index file (optional) + pattern: "*.bai" + ontologies: [] + versions_samtools: + - - ${task.process}: + type: string + description: The process the versions were collected from + - samtools: + type: string + description: The tool name + - "samtools version | sed '1!d;s/.* //'": + type: string + description: The command used to generate the version of the tool + +topics: + versions: + - - ${task.process}: + type: string + description: The process the versions were collected from + - samtools: + type: string + description: The tool name + - "samtools version | sed '1!d;s/.* //'": + type: string + description: The command used to generate the version of the tool + authors: - "@drpatelh" - "@ewels" diff --git a/modules/nf-core/samtools/sort/tests/main.nf.test b/modules/nf-core/samtools/sort/tests/main.nf.test index b05e6691..df47bb25 100644 --- a/modules/nf-core/samtools/sort/tests/main.nf.test +++ b/modules/nf-core/samtools/sort/tests/main.nf.test @@ -8,7 +8,7 @@ nextflow_process { tag "samtools" tag "samtools/sort" - test("bam") { + test("bam_no_index") { config "./nextflow.config" @@ -23,6 +23,7 @@ nextflow_process { [ id:'fasta' ], // meta map file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) ]) + input[2] = '' """ } } @@ -32,8 +33,72 @@ nextflow_process { { assert process.success }, { assert snapshot( process.out.bam, - process.out.csi.collect { it.collect { it instanceof Map ? it : file(it).name } }, - process.out.versions + process.out.bai, + process.out.findAll { key, val -> key.startsWith("versions") } + ).match()} + ) + } + } + + test("bam_bai_index") { + + config "./nextflow.config" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.bam', checkIfExists: true) + ]) + input[1] = Channel.of([ + [ id:'fasta' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ]) + input[2] = 'bai' + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + process.out.bam, + process.out.bai, + process.out.findAll { key, val -> key.startsWith("versions") } + ).match()} + ) + } + } + + test("bam_csi_index") { + + config "./nextflow.config" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.bam', checkIfExists: true) + ]) + input[1] = Channel.of([ + [ id:'fasta' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ]) + input[2] = 'csi' + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + process.out.bam, + process.out.csi, + process.out.findAll { key, val -> key.startsWith("versions") } ).match()} ) } @@ -57,6 +122,77 @@ nextflow_process { [ id:'fasta' ], // meta map file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) ]) + input[2] = '' + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + process.out.bam, + process.out.csi.collect { it.collect { it instanceof Map ? it : file(it).name } }, + process.out.findAll { key, val -> key.startsWith("versions") } + ).match()} + ) + } + } + + test("multiple bam bai index") { + + config "./nextflow.config" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test2.paired_end.sorted.bam', checkIfExists: true) + ] + ]) + input[1] = Channel.of([ + [ id:'fasta' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ]) + input[2] = 'bai' + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + process.out.bam, + process.out.bai.collect { it.collect { it instanceof Map ? it : file(it).name } }, + process.out.findAll { key, val -> key.startsWith("versions") } + ).match()} + ) + } + } + + test("multiple bam csi index") { + + config "./nextflow.config" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test2.paired_end.sorted.bam', checkIfExists: true) + ] + ]) + input[1] = Channel.of([ + [ id:'fasta' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ]) + input[2] = 'csi' """ } } @@ -67,7 +203,7 @@ nextflow_process { { assert snapshot( process.out.bam, process.out.csi.collect { it.collect { it instanceof Map ? it : file(it).name } }, - process.out.versions + process.out.findAll { key, val -> key.startsWith("versions") } ).match()} ) } @@ -88,6 +224,7 @@ nextflow_process { [ id:'fasta' ], // meta map file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) ]) + input[2] = '' """ } } @@ -98,7 +235,7 @@ nextflow_process { { assert snapshot( process.out.cram.collect { it.collect { it instanceof Map ? it : file(it).name } }, process.out.crai.collect { it.collect { it instanceof Map ? it : file(it).name } }, - process.out.versions + process.out.findAll { key, val -> key.startsWith("versions") } ).match()} ) } @@ -120,6 +257,7 @@ nextflow_process { [ id:'fasta' ], // meta map file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) ]) + input[2] = '' """ } } @@ -127,7 +265,7 @@ nextflow_process { then { assertAll ( { assert process.success }, - { assert snapshot(process.out).match() } + { assert snapshot(process.out.findAll { key, val -> key.startsWith("versions") }).match() } ) } } @@ -150,6 +288,7 @@ nextflow_process { [ id:'fasta' ], // meta map file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) ]) + input[2] = '' """ } } @@ -157,7 +296,7 @@ nextflow_process { then { assertAll ( { assert process.success }, - { assert snapshot(process.out).match() } + { assert snapshot(process.out.findAll { key, val -> key.startsWith("versions") }).match() } ) } } @@ -178,6 +317,7 @@ nextflow_process { [ id:'fasta' ], // meta map file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) ]) + input[2] = '' """ } } @@ -185,7 +325,7 @@ nextflow_process { then { assertAll ( { assert process.success }, - { assert snapshot(process.out).match() } + { assert snapshot(process.out.findAll { key, val -> key.startsWith("versions") }).match() } ) } } diff --git a/modules/nf-core/samtools/sort/tests/main.nf.test.snap b/modules/nf-core/samtools/sort/tests/main.nf.test.snap index 469891fe..4e618fa3 100644 --- a/modules/nf-core/samtools/sort/tests/main.nf.test.snap +++ b/modules/nf-core/samtools/sort/tests/main.nf.test.snap @@ -19,147 +19,77 @@ "test.sorted.cram.crai" ] ], - [ - "versions.yml:md5,2659b187d681241451539d4c53500b9f" - ] + { + "versions_samtools": [ + [ + "SAMTOOLS_SORT", + "samtools", + "1.22.1" + ] + ] + } ], "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.4" + "nf-test": "0.9.3", + "nextflow": "25.10.0" }, - "timestamp": "2024-09-16T08:49:58.207549273" + "timestamp": "2025-10-29T12:47:01.171084" }, - "bam - stub": { + "bam_csi_index": { "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.sorted.bam:md5,72ca1dff5344a5e5e6b892fe5f6b134d" + ] + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test.sorted.bam.csi:md5,01394e702c729cb478df914ffaf9f7f8" + ] + ], { - "0": [ + "versions_samtools": [ [ - { - "id": "test", - "single_end": false - }, - "test.sorted.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + "SAMTOOLS_SORT", + "samtools", + "1.22.1" ] - ], - "1": [ - - ], - "2": [ - - ], - "3": [ - [ - { - "id": "test", - "single_end": false - }, - "test.sorted.bam.csi:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "4": [ - "versions.yml:md5,2659b187d681241451539d4c53500b9f" - ], - "bam": [ - [ - { - "id": "test", - "single_end": false - }, - "test.sorted.bam:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "crai": [ - - ], - "cram": [ - - ], - "csi": [ - [ - { - "id": "test", - "single_end": false - }, - "test.sorted.bam.csi:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "versions": [ - "versions.yml:md5,2659b187d681241451539d4c53500b9f" ] } ], "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.4" + "nf-test": "0.9.3", + "nextflow": "25.10.0" }, - "timestamp": "2024-09-16T08:50:08.630951018" + "timestamp": "2025-10-29T12:46:00.961675" }, - "cram - stub": { + "bam - stub": { "content": [ { - "0": [ - - ], - "1": [ + "versions_samtools": [ [ - { - "id": "test", - "single_end": false - }, - "test.sorted.cram:md5,d41d8cd98f00b204e9800998ecf8427e" + "SAMTOOLS_SORT", + "samtools", + "1.22.1" ] - ], - "2": [ - [ - { - "id": "test", - "single_end": false - }, - "test.sorted.cram.crai:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "3": [ - - ], - "4": [ - "versions.yml:md5,2659b187d681241451539d4c53500b9f" - ], - "bam": [ - - ], - "crai": [ - [ - { - "id": "test", - "single_end": false - }, - "test.sorted.cram.crai:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "cram": [ - [ - { - "id": "test", - "single_end": false - }, - "test.sorted.cram:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "csi": [ - - ], - "versions": [ - "versions.yml:md5,2659b187d681241451539d4c53500b9f" ] } ], "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.4" + "nf-test": "0.9.3", + "nextflow": "25.10.0" }, - "timestamp": "2024-09-16T08:50:19.061912443" + "timestamp": "2025-10-29T12:47:12.154354" }, - "multiple bam": { + "multiple bam bai index": { "content": [ [ [ @@ -167,7 +97,7 @@ "id": "test", "single_end": false }, - "test.sorted.bam:md5,8a16ba90c7d294cbb4c33ac0f7127a12" + "test.sorted.bam:md5,3ffa2affc29f0aa6e7b36dded84625fe" ] ], [ @@ -176,85 +106,122 @@ "id": "test", "single_end": false }, - "test.sorted.bam.csi" + "test.sorted.bam.bai" ] ], - [ - "versions.yml:md5,2659b187d681241451539d4c53500b9f" - ] + { + "versions_samtools": [ + [ + "SAMTOOLS_SORT", + "samtools", + "1.22.1" + ] + ] + } ], "meta": { - "nf-test": "0.9.0", - "nextflow": "24.09.0" + "nf-test": "0.9.3", + "nextflow": "25.10.0" }, - "timestamp": "2024-10-08T11:59:55.479443" + "timestamp": "2025-10-29T12:46:25.488622" }, - "multiple bam - stub": { + "cram - stub": { "content": [ { - "0": [ + "versions_samtools": [ [ - { - "id": "test", - "single_end": false - }, - "test.sorted.bam:md5,8a16ba90c7d294cbb4c33ac0f7127a12" + "SAMTOOLS_SORT", + "samtools", + "1.22.1" ] - ], - "1": [ - - ], - "2": [ - - ], - "3": [ + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.0" + }, + "timestamp": "2025-10-29T12:47:28.485045" + }, + "multiple bam": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.sorted.bam:md5,cd4eb0077f25e9cff395366b8883dd1f" + ] + ], + [ + + ], + { + "versions_samtools": [ [ - { - "id": "test", - "single_end": false - }, - "test.sorted.bam.csi:md5,d185916eaff9afeb4d0aeab3310371f9" + "SAMTOOLS_SORT", + "samtools", + "1.22.1" ] - ], - "4": [ - "versions.yml:md5,2659b187d681241451539d4c53500b9f" - ], - "bam": [ + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.0" + }, + "timestamp": "2025-10-29T12:46:13.168476" + }, + "multiple bam - stub": { + "content": [ + { + "versions_samtools": [ [ - { - "id": "test", - "single_end": false - }, - "test.sorted.bam:md5,8a16ba90c7d294cbb4c33ac0f7127a12" + "SAMTOOLS_SORT", + "samtools", + "1.22.1" ] - ], - "crai": [ - - ], - "cram": [ - - ], - "csi": [ + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.0" + }, + "timestamp": "2025-10-29T12:47:21.628088" + }, + "bam_no_index": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.sorted.bam:md5,26b27d1f9bcb61c25da21b562349784e" + ] + ], + [ + + ], + { + "versions_samtools": [ [ - { - "id": "test", - "single_end": false - }, - "test.sorted.bam.csi:md5,d185916eaff9afeb4d0aeab3310371f9" + "SAMTOOLS_SORT", + "samtools", + "1.22.1" ] - ], - "versions": [ - "versions.yml:md5,2659b187d681241451539d4c53500b9f" ] } ], "meta": { - "nf-test": "0.9.0", - "nextflow": "24.09.0" + "nf-test": "0.9.3", + "nextflow": "25.10.0" }, - "timestamp": "2024-10-08T11:36:13.781404" + "timestamp": "2025-10-29T12:45:47.139418" }, - "bam": { + "multiple bam csi index": { "content": [ [ [ @@ -262,7 +229,7 @@ "id": "test", "single_end": false }, - "test.sorted.bam:md5,34aa85e86abefe637f7a4a9887f016fc" + "test.sorted.bam:md5,295503ba5342531a3310c33ad0efbc22" ] ], [ @@ -274,14 +241,56 @@ "test.sorted.bam.csi" ] ], + { + "versions_samtools": [ + [ + "SAMTOOLS_SORT", + "samtools", + "1.22.1" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.0" + }, + "timestamp": "2025-10-29T12:46:51.5531" + }, + "bam_bai_index": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.sorted.bam:md5,cae7564cb83bb4a5911205bf94124b54" + ] + ], [ - "versions.yml:md5,2659b187d681241451539d4c53500b9f" - ] + [ + { + "id": "test", + "single_end": false + }, + "test.sorted.bam.bai:md5,50dd467c169545a4d5d1f709f7e986e0" + ] + ], + { + "versions_samtools": [ + [ + "SAMTOOLS_SORT", + "samtools", + "1.22.1" + ] + ] + } ], "meta": { - "nf-test": "0.9.0", - "nextflow": "24.09.0" + "nf-test": "0.9.3", + "nextflow": "25.10.0" }, - "timestamp": "2024-10-08T11:59:46.372244" + "timestamp": "2025-10-29T12:45:52.796936" } } \ No newline at end of file diff --git a/modules/nf-core/samtools/sort/tests/nextflow.config b/modules/nf-core/samtools/sort/tests/nextflow.config index f642771f..723f62b2 100644 --- a/modules/nf-core/samtools/sort/tests/nextflow.config +++ b/modules/nf-core/samtools/sort/tests/nextflow.config @@ -2,7 +2,6 @@ process { withName: SAMTOOLS_SORT { ext.prefix = { "${meta.id}.sorted" } - ext.args = "--write-index" } } diff --git a/modules/nf-core/samtools/splitheader/environment.yml b/modules/nf-core/samtools/splitheader/environment.yml new file mode 100644 index 00000000..89e12a64 --- /dev/null +++ b/modules/nf-core/samtools/splitheader/environment.yml @@ -0,0 +1,10 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + # renovate: datasource=conda depName=bioconda/htslib + - bioconda::htslib=1.22.1 + # renovate: datasource=conda depName=bioconda/samtools + - bioconda::samtools=1.22.1 diff --git a/modules/nf-core/samtools/splitheader/main.nf b/modules/nf-core/samtools/splitheader/main.nf new file mode 100644 index 00000000..52e8e5fd --- /dev/null +++ b/modules/nf-core/samtools/splitheader/main.nf @@ -0,0 +1,45 @@ +process SAMTOOLS_SPLITHEADER { + tag "$meta.id" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/samtools:1.22.1--h96c455f_0' : + 'biocontainers/samtools:1.22.1--h96c455f_0' }" + + input: + tuple val(meta), path(input) + + output: + tuple val(meta), path("*_readgroups.txt"), emit: readgroup + tuple val(meta), path("*_programs.txt") , emit: programs + tuple val(meta), path("*_sequences.txt") , emit: sequences + tuple val("${task.process}"), val('samtools'), eval("samtools version | sed '1!d;s/.* //'"), topic: versions, emit: versions_samtools + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + samtools \\ + view \\ + -H \\ + $args \\ + $input \\ + | tee \\ + >( grep '^@RG' > ${prefix}_readgroups.txt ) \ + >( grep '^@PG' > ${prefix}_programs.txt ) \ + >( grep '^@SQ' > ${prefix}_sequences.txt ) \ + > /dev/null + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + echo -e "@RG\\tID:${prefix}\\tSM:${prefix}\\tPL:ILLUMINA" > ${prefix}_readgroups.txt + echo -e "@PG\\tID:samtools.4\\tPN:samtools\\tPP:samtools.3\\tVN:1.22.1\\tCL:samtools view -H ${input}" > ${prefix}_programs.txt + echo -e "@SQ\\tSN:chr1\\tLN:10000" > ${prefix}_sequences.txt + """ +} diff --git a/modules/nf-core/samtools/splitheader/meta.yml b/modules/nf-core/samtools/splitheader/meta.yml new file mode 100644 index 00000000..ed59bb3e --- /dev/null +++ b/modules/nf-core/samtools/splitheader/meta.yml @@ -0,0 +1,102 @@ +name: samtools_splitheader +description: Extract header lines from a SAM/BAM/CRAM file into separate files + depending on type +keywords: + - view + - bam + - sam + - cram + - readgroup + - program + - sequence + - header +tools: + - samtools: + description: | + SAMtools is a set of utilities for interacting with and post-processing + short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li. + These files are generated as output by short read aligners like BWA. + homepage: http://www.htslib.org/ + documentation: http://www.htslib.org/doc/samtools.html + doi: 10.1093/bioinformatics/btp352 + licence: ["MIT"] + identifier: biotools:samtools +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - input: + type: file + description: BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" + ontologies: + - edam: http://edamontology.org/format_2573 # SAM + - edam: http://edamontology.org/format_2572 # BAM + - edam: http://edamontology.org/format_3462 # CRAM +output: + readgroup: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*_readgroups.txt": + type: file + description: | + Text file containing read group (@RG) lines from SAM header + ontologies: + - edam: http://edamontology.org/format_3462 # Text + programs: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*_programs.txt": + type: file + description: | + Text file containing program (@PG) lines from SAM header + ontologies: + - edam: http://edamontology.org/format_3462 # Text + sequences: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*_sequences.txt": + type: file + description: | + Text file containing sequence (@SQ) lines from SAM header + ontologies: + - edam: http://edamontology.org/format_3462 # Text + versions_samtools: + - - ${task.process}: + type: string + description: The process the versions were collected from + - samtools: + type: string + description: The tool name + - "samtools version | sed '1!d;s/.* //'": + type: string + description: The command used to generate the version of the tool + +topics: + versions: + - - ${task.process}: + type: string + description: The process the versions were collected from + - samtools: + type: string + description: The tool name + - "samtools version | sed '1!d;s/.* //'": + type: string + description: The command used to generate the version of the tool +authors: + - "@matthdsm" + - "@prototaxites" +maintainers: + - "@matthdsm" + - "@prototaxites" diff --git a/modules/nf-core/samtools/splitheader/tests/main.nf.test b/modules/nf-core/samtools/splitheader/tests/main.nf.test new file mode 100644 index 00000000..f79cfdf2 --- /dev/null +++ b/modules/nf-core/samtools/splitheader/tests/main.nf.test @@ -0,0 +1,65 @@ + +nextflow_process { + + name "Test Process SAMTOOLS_SPLITHEADER" + script "../main.nf" + process "SAMTOOLS_SPLITHEADER" + + tag "modules" + tag "modules_nfcore" + tag "samtools" + tag "samtools/splitheader" + + test("test-samtools-splitheader") { + + when { + process { + """ + input[0] = [ + [ id:'test' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.bam', checkIfExists: true) + ] + + """ + } + } + + then { + assert process.success + assertAll( + { assert snapshot( + file(process.out.readgroup[0][1]).readLines(), + file(process.out.programs[0][1]).readLines(), + file(process.out.sequences[0][1]).readLines(), + process.out.findAll { key, val -> key.startsWith("versions") } + ).match() + } + ) + } + } + + test("test-samtools-splitheader - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.bam', checkIfExists: true) + ] + + """ + } + } + + then { + assert process.success + assertAll( + { assert snapshot(process.out).match()} + ) + } + } + +} diff --git a/modules/nf-core/samtools/splitheader/tests/main.nf.test.snap b/modules/nf-core/samtools/splitheader/tests/main.nf.test.snap new file mode 100644 index 00000000..23c824d5 --- /dev/null +++ b/modules/nf-core/samtools/splitheader/tests/main.nf.test.snap @@ -0,0 +1,104 @@ +{ + "test-samtools-splitheader": { + "content": [ + [ + "@RG\tID:1\tLB:lib1\tPL:ILLUMINA\tSM:test\tPU:barcode1" + ], + [ + "@PG\tID:minimap2\tPN:minimap2\tVN:2.17-r941\tCL:minimap2 -ax sr tests/data/fasta/sarscov2/GCA_011545545.1_ASM1154554v1_genomic.fna tests/data/fastq/dna/sarscov2_1.fastq.gz tests/data/fastq/dna/sarscov2_2.fastq.gz", + "@PG\tID:samtools\tPN:samtools\tPP:minimap2\tVN:1.11\tCL:samtools view -Sb sarscov2_aln.sam", + "@PG\tID:samtools.1\tPN:samtools\tPP:samtools\tVN:1.22.1\tCL:samtools view -H test.paired_end.bam" + ], + [ + "@SQ\tSN:MT192765.1\tLN:29829" + ], + { + "versions_samtools": [ + [ + "SAMTOOLS_SPLITHEADER", + "samtools", + "1.22.1" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-02-03T14:24:15.761619219" + }, + "test-samtools-splitheader - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test_readgroups.txt:md5,2753555b7fee08ebda25868f78ae34a4" + ] + ], + "1": [ + [ + { + "id": "test" + }, + "test_programs.txt:md5,fd5e1967fc571c61456c9289350d002b" + ] + ], + "2": [ + [ + { + "id": "test" + }, + "test_sequences.txt:md5,8366d818a88a379ca885eb497aefc999" + ] + ], + "3": [ + [ + "SAMTOOLS_SPLITHEADER", + "samtools", + "1.22.1" + ] + ], + "programs": [ + [ + { + "id": "test" + }, + "test_programs.txt:md5,fd5e1967fc571c61456c9289350d002b" + ] + ], + "readgroup": [ + [ + { + "id": "test" + }, + "test_readgroups.txt:md5,2753555b7fee08ebda25868f78ae34a4" + ] + ], + "sequences": [ + [ + { + "id": "test" + }, + "test_sequences.txt:md5,8366d818a88a379ca885eb497aefc999" + ] + ], + "versions_samtools": [ + [ + "SAMTOOLS_SPLITHEADER", + "samtools", + "1.22.1" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-02-03T14:24:23.230770372" + } +} \ No newline at end of file diff --git a/modules/nf-core/samtools/view/environment.yml b/modules/nf-core/samtools/view/environment.yml index 8cae5712..89e12a64 100644 --- a/modules/nf-core/samtools/view/environment.yml +++ b/modules/nf-core/samtools/view/environment.yml @@ -5,5 +5,6 @@ channels: - bioconda dependencies: # renovate: datasource=conda depName=bioconda/htslib - - bioconda::htslib=1.21 - - bioconda::samtools=1.21 + - bioconda::htslib=1.22.1 + # renovate: datasource=conda depName=bioconda/samtools + - bioconda::samtools=1.22.1 diff --git a/modules/nf-core/samtools/view/main.nf b/modules/nf-core/samtools/view/main.nf index f43a4c6e..b189b5ba 100644 --- a/modules/nf-core/samtools/view/main.nf +++ b/modules/nf-core/samtools/view/main.nf @@ -4,12 +4,12 @@ process SAMTOOLS_VIEW { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/samtools:1.21--h50ea8bc_0' : - 'biocontainers/samtools:1.21--h50ea8bc_0' }" + 'https://depot.galaxyproject.org/singularity/samtools:1.22.1--h96c455f_0' : + 'biocontainers/samtools:1.22.1--h96c455f_0' }" input: tuple val(meta), path(input), path(index) - tuple val(meta2), path(fasta) + tuple val(meta2), path(fasta), path(fai) path qname val index_format @@ -22,7 +22,7 @@ process SAMTOOLS_VIEW { tuple val(meta), path("${prefix}.${file_type}.crai"), emit: crai, optional: true tuple val(meta), path("${prefix}.unselected.${file_type}"), emit: unselected, optional: true tuple val(meta), path("${prefix}.unselected.${file_type}.{csi,crai}"), emit: unselected_index, optional: true - path "versions.yml", emit: versions + tuple val("${task.process}"), val('samtools'), eval('samtools version | sed "1!d;s/.* //"'), emit: versions_samtools, topic: versions when: task.ext.when == null || task.ext.when @@ -50,6 +50,7 @@ process SAMTOOLS_VIEW { } } """ + # Note: --threads value represents *additional* CPUs to allocate (total CPUs = 1 + --threads). samtools \\ view \\ --threads ${task.cpus-1} \\ @@ -59,11 +60,6 @@ process SAMTOOLS_VIEW { -o ${output_file} \\ $input \\ $args2 - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') - END_VERSIONS """ stub: @@ -94,10 +90,5 @@ process SAMTOOLS_VIEW { ${index} ${unselected} ${unselected_index} - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') - END_VERSIONS """ } diff --git a/modules/nf-core/samtools/view/meta.yml b/modules/nf-core/samtools/view/meta.yml index 28c268a6..23a1af33 100644 --- a/modules/nf-core/samtools/view/meta.yml +++ b/modules/nf-core/samtools/view/meta.yml @@ -26,30 +26,41 @@ input: type: file description: BAM/CRAM/SAM file pattern: "*.{bam,cram,sam}" + ontologies: [] - index: type: file description: BAM.BAI/BAM.CSI/CRAM.CRAI file (optional) pattern: "*.{.bai,.csi,.crai}" + ontologies: [] - - meta2: type: map description: | Groovy Map containing reference information - e.g. [ id:'test' ] + e.g. [ id:'genome' ] - fasta: type: file - description: Reference file the CRAM was created with (optional) + description: Fasta reference file pattern: "*.{fasta,fa}" - - - qname: + ontologies: + - edam: http://edamontology.org/format_1929 # FASTA + - fai: type: file - description: Optional file with read names to output only select alignments - pattern: "*.{txt,list}" - - - index_format: - type: string - description: Index format, used together with ext.args = '--write-index' - pattern: "bai|csi|crai" + description: Fasta reference file index + pattern: "*.{fai}" + ontologies: + - edam: http://edamontology.org/format_3326 # Index + - qname: + type: file + description: Optional file with read names to output only select alignments + pattern: "*.{txt,list}" + ontologies: [] + - index_format: + type: string + description: Index format, used together with ext.args = '--write-index' + pattern: "bai|csi|crai" output: - - bam: - - meta: + bam: + - - meta: type: map description: | Groovy Map containing sample information @@ -58,8 +69,9 @@ output: type: file description: optional filtered/converted BAM file pattern: "*.{bam}" - - cram: - - meta: + ontologies: [] + cram: + - - meta: type: map description: | Groovy Map containing sample information @@ -68,8 +80,9 @@ output: type: file description: optional filtered/converted CRAM file pattern: "*.{cram}" - - sam: - - meta: + ontologies: [] + sam: + - - meta: type: map description: | Groovy Map containing sample information @@ -78,8 +91,9 @@ output: type: file description: optional filtered/converted SAM file pattern: "*.{sam}" - - bai: - - meta: + ontologies: [] + bai: + - - meta: type: map description: | Groovy Map containing sample information @@ -88,8 +102,9 @@ output: type: file description: optional BAM file index pattern: "*.{bai}" - - csi: - - meta: + ontologies: [] + csi: + - - meta: type: map description: | Groovy Map containing sample information @@ -98,8 +113,9 @@ output: type: file description: optional tabix BAM file index pattern: "*.{csi}" - - crai: - - meta: + ontologies: [] + crai: + - - meta: type: map description: | Groovy Map containing sample information @@ -108,8 +124,9 @@ output: type: file description: optional CRAM file index pattern: "*.{crai}" - - unselected: - - meta: + ontologies: [] + unselected: + - - meta: type: map description: | Groovy Map containing sample information @@ -118,8 +135,9 @@ output: type: file description: optional file with unselected alignments pattern: "*.unselected.{bam,cram,sam}" - - unselected_index: - - meta: + ontologies: [] + unselected_index: + - - meta: type: map description: | Groovy Map containing sample information @@ -128,11 +146,30 @@ output: type: file description: index for the "unselected" file pattern: "*.unselected.{csi,crai}" - - versions: - - versions.yml: - type: file - description: File containing software versions - pattern: "versions.yml" + ontologies: [] + versions_samtools: + - - ${task.process}: + type: string + description: Name of the process + - samtools: + type: string + description: Name of the tool + - samtools version | sed "1!d;s/.* //": + type: eval + description: The expression to obtain the version of the tool + +topics: + versions: + - - ${task.process}: + type: string + description: Name of the process + - samtools: + type: string + description: Name of the tool + - samtools version | sed "1!d;s/.* //": + type: eval + description: The expression to obtain the version of the tool + authors: - "@drpatelh" - "@joseespinosa" diff --git a/modules/nf-core/samtools/view/tests/bam.config b/modules/nf-core/samtools/view/tests/bam.config deleted file mode 100644 index c10d1081..00000000 --- a/modules/nf-core/samtools/view/tests/bam.config +++ /dev/null @@ -1,3 +0,0 @@ -process { - ext.args = "--output-fmt bam" -} \ No newline at end of file diff --git a/modules/nf-core/samtools/view/tests/bam_index.config b/modules/nf-core/samtools/view/tests/bam_index.config deleted file mode 100644 index 771ae033..00000000 --- a/modules/nf-core/samtools/view/tests/bam_index.config +++ /dev/null @@ -1,3 +0,0 @@ -process { - ext.args = "--output-fmt bam --write-index" -} \ No newline at end of file diff --git a/modules/nf-core/samtools/view/tests/main.nf.test b/modules/nf-core/samtools/view/tests/main.nf.test index d8551dd8..8d8f292f 100644 --- a/modules/nf-core/samtools/view/tests/main.nf.test +++ b/modules/nf-core/samtools/view/tests/main.nf.test @@ -2,6 +2,7 @@ nextflow_process { name "Test Process SAMTOOLS_VIEW" script "../main.nf" + config "./nextflow.config" process "SAMTOOLS_VIEW" tag "modules" @@ -10,16 +11,18 @@ nextflow_process { tag "samtools/view" test("bam") { - when { + params { + samtools_args = "" + } process { """ - input[0] = Channel.of([ - [ id:'test', single_end:false ], // meta map + input[0] = [ + [ id:'test' ], file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.bam', checkIfExists: true), [] - ]) - input[1] = [[],[]] + ] + input[1] = [[],[],[]] input[2] = [] input[3] = [] """ @@ -29,30 +32,24 @@ nextflow_process { then { assertAll( { assert process.success }, - { assert snapshot(file(process.out.bam[0][1]).name).match("bam_bam") }, - { assert snapshot(process.out.bai).match("bam_bai") }, - { assert snapshot(process.out.crai).match("bam_crai") }, - { assert snapshot(process.out.cram).match("bam_cram") }, - { assert snapshot(process.out.csi).match("bam_csi") }, - { assert snapshot(process.out.sam).match("bam_sam") }, - { assert snapshot(process.out.versions).match("bam_versions") } + { assert snapshot(sanitizeOutput(process.out, unstableKeys: ["bam"])).match()} ) } } test("bam_csi_index") { - - config "./bam_index.config" - when { + params { + samtools_args = "--output-fmt bam --write-index" + } process { """ - input[0] = Channel.of([ - [ id:'test', single_end:false ], // meta map + input[0] = [ + [ id:'test' ], file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), [] - ]) - input[1] = [[],[]] + ] + input[1] = [[],[],[]] input[2] = [] input[3] = 'csi' """ @@ -62,28 +59,24 @@ nextflow_process { then { assertAll( { assert process.success }, - { assert snapshot( - file(process.out.bam[0][1]).name, - file(process.out.csi[0][1]).name, - process.out.versions).match() - } + { assert snapshot(sanitizeOutput(process.out, unstableKeys: ["bam", "csi"])).match()} ) } } test("bam_bai_index") { - - config "./bam_index.config" - when { + params { + samtools_args = "--output-fmt bam --write-index" + } process { """ - input[0] = Channel.of([ - [ id:'test', single_end:false ], // meta map + input[0] = [ + [ id:'test' ], file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), [] - ]) - input[1] = [[],[]] + ] + input[1] = [[],[],[]] input[2] = [] input[3] = 'bai' """ @@ -93,27 +86,24 @@ nextflow_process { then { assertAll( { assert process.success }, - { assert snapshot( - file(process.out.bam[0][1]).name, - file(process.out.bai[0][1]).name, - process.out.versions).match() } + { assert snapshot(sanitizeOutput(process.out, unstableKeys: ["bam", "bai"])).match()} ) } } test("bam_bai_index_unselected") { - - config "./bam_index.config" - when { + params { + samtools_args = "--output-fmt bam --write-index" + } process { """ - input[0] = Channel.of([ - [ id:'test', single_end:false ], // meta map + input[0] = [ + [ id:'test' ], file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), [] - ]) - input[1] = [[],[]] + ] + input[1] = [[],[],[]] input[2] = Channel.of('testN:1') .collectFile(name: 'selected_reads.txt') input[3] = 'bai' @@ -124,30 +114,24 @@ nextflow_process { then { assertAll( { assert process.success }, - { assert snapshot( - file(process.out.bam[0][1]).name, - file(process.out.bai[0][1]).name, - file(process.out.unselected[0][1]).name, - file(process.out.unselected_index[0][1]).name, - process.out.versions).match() - } + { assert snapshot(sanitizeOutput(process.out, unstableKeys: ["bam", "bai", "unselected", "unselected_index"])).match()} ) } } test("cram_crai_index_unselected") { - - config "./cram_index.config" - when { + params { + samtools_args = "--output-fmt cram --write-index" + } process { """ - input[0] = Channel.of([ - [ id:'test', single_end:false ], // meta map + input[0] = [ + [ id:'test' ], file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), [] - ]) - input[1] = [[],[]] + ] + input[1] = [[],[],[]] input[2] = Channel.of('testN:1') .collectFile(name: 'selected_reads.txt') input[3] = 'crai' @@ -158,31 +142,28 @@ nextflow_process { then { assertAll( { assert process.success }, - { assert snapshot( - file(process.out.cram[0][1]).name, - file(process.out.crai[0][1]).name, - file(process.out.unselected[0][1]).name, - file(process.out.unselected_index[0][1]).name, - process.out.versions).match() - } + { assert snapshot(sanitizeOutput(process.out, unstableKeys: ["bam", "bai", "unselected", "unselected_index", "crai"])).match()} ) } } test("cram") { - when { + params { + samtools_args = "" + } process { """ - input[0] = Channel.of([ - [ id:'test', single_end:false ], // meta map + input[0] = [ + [ id:'test' ], file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram', checkIfExists: true), file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram.crai', checkIfExists: true) - ]) - input[1] = Channel.of([ - [ id:'genome' ], // meta map - file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) - ]) + ] + input[1] = [ + [ id:'genome' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true) + ] input[2] = [] input[3] = [] """ @@ -192,33 +173,28 @@ nextflow_process { then { assertAll( { assert process.success }, - { assert snapshot(file(process.out.cram[0][1]).name).match("cram_cram") }, - { assert snapshot(process.out.bai).match("cram_bai") }, - { assert snapshot(process.out.bam).match("cram_bam") }, - { assert snapshot(process.out.crai).match("cram_crai") }, - { assert snapshot(process.out.csi).match("cram_csi") }, - { assert snapshot(process.out.sam).match("cram_sam") }, - { assert snapshot(process.out.versions).match("cram_versions") } + { assert snapshot(sanitizeOutput(process.out, unstableKeys: ["cram"])).match()} ) } } test("cram_to_bam") { - - config "./bam.config" - when { + params { + samtools_args = "--output-fmt bam" + } process { """ - input[0] = Channel.of([ - [ id:'test', single_end:false ], // meta map + input[0] = [ + [ id:'test' ], file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram', checkIfExists: true), [] - ]) - input[1] = Channel.of([ - [ id:'genome' ], // meta map - file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) - ]) + ] + input[1] = [ + [ id:'genome' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true) + ] input[2] = [] input[3] = [] """ @@ -228,33 +204,28 @@ nextflow_process { then { assertAll( { assert process.success }, - { assert snapshot(file(process.out.bam[0][1]).name).match("cram_to_bam_bam") }, - { assert snapshot(process.out.bai).match("cram_to_bam_bai") }, - { assert snapshot(process.out.crai).match("cram_to_bam_crai") }, - { assert snapshot(process.out.cram).match("cram_to_bam_cram") }, - { assert snapshot(process.out.csi).match("cram_to_bam_csi") }, - { assert snapshot(process.out.sam).match("cram_to_bam_sam") }, - { assert snapshot(process.out.versions).match("cram_to_bam_versions") } + { assert snapshot(sanitizeOutput(process.out, unstableKeys: ["bam"])).match()} ) } } test("cram_to_bam_index") { - - config "./bam_index.config" - when { + params { + samtools_args = "--output-fmt bam --write-index" + } process { """ - input[0] = Channel.of([ - [ id:'test', single_end:false ], // meta map + input[0] = [ + [ id:'test' ], file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram', checkIfExists: true), [] - ]) - input[1] = Channel.of([ - [ id:'genome' ], // meta map - file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) - ]) + ] + input[1] = [ + [ id:'genome' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true) + ] input[2] = [] input[3] = [] """ @@ -264,34 +235,30 @@ nextflow_process { then { assertAll( { assert process.success }, - { assert snapshot(file(process.out.bam[0][1]).name).match("cram_to_bam_index_bam") }, - { assert snapshot(file(process.out.csi[0][1]).name).match("cram_to_bam_index_csi") }, - { assert snapshot(process.out.bai).match("cram_to_bam_index_bai") }, - { assert snapshot(process.out.crai).match("cram_to_bam_index_crai") }, - { assert snapshot(process.out.cram).match("cram_to_bam_index_cram") }, - { assert snapshot(process.out.sam).match("cram_to_bam_index_sam") }, - { assert snapshot(process.out.versions).match("cram_to_bam_index_versions") } + { assert snapshot(sanitizeOutput(process.out, unstableKeys: ["bam", "csi"])).match()} ) } } test("cram_to_bam_index_qname") { - - config "./bam_index.config" - when { + params { + samtools_args = "--output-fmt bam --write-index" + } process { """ - input[0] = Channel.of([ - [ id:'test', single_end:false ], // meta map + input[0] = [ + [ id:'test' ], file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram', checkIfExists: true), [] - ]) - input[1] = Channel.of([ - [ id:'genome' ], // meta map - file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) - ]) - input[2] = Channel.of("testN:2817", "testN:2814").collectFile(name: "readnames.list", newLine: true) + ] + input[1] = [ + [ id:'genome' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true) + ] + input[2] = Channel.of("testN:2817", "testN:2814") + .collectFile(name: "readnames.list", newLine: true) input[3] = [] """ } @@ -300,15 +267,7 @@ nextflow_process { then { assertAll( { assert process.success }, - { assert snapshot(file(process.out.bam[0][1]).name).match("cram_to_bam_index_qname_bam") }, - { assert snapshot(file(process.out.csi[0][1]).name).match("cram_to_bam_index_qname_csi") }, - { assert snapshot(process.out.bai).match("cram_to_bam_index_qname_bai") }, - { assert snapshot(process.out.crai).match("cram_to_bam_index_qname_crai") }, - { assert snapshot(process.out.cram).match("cram_to_bam_index_qname_cram") }, - { assert snapshot(process.out.sam).match("cram_to_bam_index_qname_sam") }, - { assert snapshot(file(process.out.unselected[0][1]).name).match("cram_to_bam_index_qname_unselected") }, - { assert snapshot(file(process.out.unselected_index[0][1]).name).match("cram_to_bam_index_qname_unselected_csi") }, - { assert snapshot(process.out.versions).match("cram_to_bam_index_qname_versions") } + { assert snapshot(sanitizeOutput(process.out, unstableKeys: ["bam", "bai", "unselected", "unselected_index"])).match()} ) } } @@ -316,17 +275,19 @@ nextflow_process { test("bam_stub") { options "-stub" - config "./bam_index.config" when { + params { + samtools_args = "--output-fmt bam --write-index" + } process { """ - input[0] = Channel.of([ - [ id:'test', single_end:false ], // meta map + input[0] = [ + [ id:'test' ], file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.bam', checkIfExists: true), [] - ]) - input[1] = [[],[]] + ] + input[1] = [[],[],[]] input[2] = [] input[3] = [] """ @@ -336,13 +297,7 @@ nextflow_process { then { assertAll( { assert process.success }, - { assert snapshot(file(process.out.bam[0][1]).name).match("bam_stub_bam") }, - { assert snapshot(file(process.out.csi[0][1]).name).match("bam_stub_csi") }, - { assert snapshot(process.out.bai).match("bam_stub_bai") }, - { assert snapshot(process.out.crai).match("bam_stub_crai") }, - { assert snapshot(process.out.cram).match("bam_stub_cram") }, - { assert snapshot(process.out.sam).match("bam_stub_sam") }, - { assert snapshot(process.out.versions).match("bam_stub_versions") } + { assert snapshot(sanitizeOutput(process.out, unstableKeys: ["bam", "csi"])).match()} ) } } @@ -350,17 +305,19 @@ nextflow_process { test("bam_csi_index - stub") { options "-stub" - config "./bam_index.config" when { + params { + samtools_args = "--output-fmt bam --write-index" + } process { """ - input[0] = Channel.of([ - [ id:'test', single_end:false ], // meta map + input[0] = [ + [ id:'test' ], file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), [] - ]) - input[1] = [[],[]] + ] + input[1] = [[],[],[]] input[2] = [] input[3] = 'csi' """ @@ -378,17 +335,19 @@ nextflow_process { test("bam_bai_index - stub") { options "-stub" - config "./bam_index.config" when { + params { + samtools_args = "--output-fmt bam --write-index" + } process { """ - input[0] = Channel.of([ - [ id:'test', single_end:false ], // meta map + input[0] = [ + [ id:'test' ], file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), [] - ]) - input[1] = [[],[]] + ] + input[1] = [[],[],[]] input[2] = [] input[3] = 'bai' """ @@ -406,17 +365,19 @@ nextflow_process { test("bam_bai_index_uselected - stub") { options "-stub" - config "./bam_index.config" when { + params { + samtools_args = "--output-fmt bam --write-index" + } process { """ - input[0] = Channel.of([ - [ id:'test', single_end:false ], // meta map + input[0] = [ + [ id:'test' ], file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), [] - ]) - input[1] = [[],[]] + ] + input[1] = [[],[],[]] input[2] = Channel.of('testN:1') .collectFile(name: 'selected_reads.txt') input[3] = 'bai' @@ -435,17 +396,19 @@ nextflow_process { test("cram_crai_index_unselected - stub") { options "-stub" - config "./cram_index.config" when { + params { + samtools_args = "--output-fmt cram --write-index" + } process { """ - input[0] = Channel.of([ - [ id:'test', single_end:false ], // meta map + input[0] = [ + [ id:'test' ], file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), [] - ]) - input[1] = [[],[]] + ] + input[1] = [[],[],[]] input[2] = Channel.of('testN:1') .collectFile(name: 'selected_reads.txt') input[3] = 'crai' diff --git a/modules/nf-core/samtools/view/tests/main.nf.test.snap b/modules/nf-core/samtools/view/tests/main.nf.test.snap index 1cb793f2..95205e56 100644 --- a/modules/nf-core/samtools/view/tests/main.nf.test.snap +++ b/modules/nf-core/samtools/view/tests/main.nf.test.snap @@ -1,139 +1,324 @@ { - "bam_bam": { + "cram_to_bam_index": { "content": [ - "test.bam" - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "23.04.3" - }, - "timestamp": "2024-02-12T19:37:51.256068" - }, - "bam_stub_bam": { - "content": [ - "test.bam" - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "23.04.3" - }, - "timestamp": "2024-02-12T19:38:32.065301" - }, - "bam_bai": { - "content": [ - [ - - ] - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "23.04.3" - }, - "timestamp": "2024-02-12T19:37:51.258578" - }, - "bam_stub_bai": { - "content": [ - [ - - ] - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "23.04.3" - }, - "timestamp": "2024-02-12T19:38:32.071284" - }, - "bam_stub_versions": { - "content": [ - [ - "versions.yml:md5,176db5ec46b965219604bcdbb3ef9e07" - ] - ], - "meta": { - "nf-test": "0.9.2", - "nextflow": "24.10.3" - }, - "timestamp": "2025-02-14T07:43:43.6526401" - }, - "cram_to_bam_index_qname_csi": { - "content": [ - "test.bam.csi" - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "23.04.3" - }, - "timestamp": "2024-02-12T19:38:23.325496" - }, - "cram_to_bam_index_qname_unselected_csi": { - "content": [ - "test.unselected.bam.csi" - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "23.04.3" - }, - "timestamp": "2024-02-12T19:38:23.328458" - }, - "bam_csi": { - "content": [ - [ - - ] + { + "bai": [ + + ], + "bam": [ + [ + { + "id": "test" + }, + "test.bam" + ] + ], + "crai": [ + + ], + "cram": [ + + ], + "csi": [ + [ + { + "id": "test" + }, + "test.bam.csi" + ] + ], + "sam": [ + + ], + "unselected": [ + + ], + "unselected_index": [ + + ], + "versions_samtools": [ + [ + "SAMTOOLS_VIEW", + "samtools", + "1.22.1" + ] + ] + } ], "meta": { - "nf-test": "0.8.4", - "nextflow": "23.04.3" + "nf-test": "0.9.3", + "nextflow": "25.10.3" }, - "timestamp": "2024-02-12T19:37:51.262882" + "timestamp": "2026-02-03T16:49:14.82588493" }, - "cram_to_bam_index_bam": { + "bam_csi_index - stub": { "content": [ - "test.bam" + { + "0": [ + [ + { + "id": "test" + }, + "test.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + + ], + "2": [ + + ], + "3": [ + + ], + "4": [ + [ + { + "id": "test" + }, + "test.bam.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "5": [ + + ], + "6": [ + + ], + "7": [ + + ], + "8": [ + [ + "SAMTOOLS_VIEW", + "samtools", + "1.22.1" + ] + ], + "bai": [ + + ], + "bam": [ + [ + { + "id": "test" + }, + "test.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "crai": [ + + ], + "cram": [ + + ], + "csi": [ + [ + { + "id": "test" + }, + "test.bam.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "sam": [ + + ], + "unselected": [ + + ], + "unselected_index": [ + + ], + "versions_samtools": [ + [ + "SAMTOOLS_VIEW", + "samtools", + "1.22.1" + ] + ] + } ], "meta": { - "nf-test": "0.8.4", - "nextflow": "23.04.3" + "nf-test": "0.9.3", + "nextflow": "25.10.3" }, - "timestamp": "2024-02-12T19:38:12.95456" + "timestamp": "2026-02-03T16:49:31.409368544" }, - "cram_to_bam_index_versions": { + "bam_csi_index": { "content": [ - [ - "versions.yml:md5,176db5ec46b965219604bcdbb3ef9e07" - ] + { + "bai": [ + + ], + "bam": [ + [ + { + "id": "test" + }, + "test.bam" + ] + ], + "crai": [ + + ], + "cram": [ + + ], + "csi": [ + [ + { + "id": "test" + }, + "test.bam.csi" + ] + ], + "sam": [ + + ], + "unselected": [ + + ], + "unselected_index": [ + + ], + "versions_samtools": [ + [ + "SAMTOOLS_VIEW", + "samtools", + "1.22.1" + ] + ] + } ], "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.4" + "nf-test": "0.9.3", + "nextflow": "25.10.3" }, - "timestamp": "2024-09-16T09:25:14.475388399" + "timestamp": "2026-02-03T16:48:46.36824035" }, - "bam_csi_index": { + "cram_to_bam_index_qname": { "content": [ - "test.bam", - "test.bam.csi", - [ - "versions.yml:md5,176db5ec46b965219604bcdbb3ef9e07" - ] + { + "bai": [ + + ], + "bam": [ + [ + { + "id": "test" + }, + "test.bam" + ] + ], + "crai": [ + + ], + "cram": [ + + ], + "csi": [ + [ + { + "id": "test" + }, + "test.bam.csi:md5,15d725bced7ececd45b4312b2af99a6b" + ] + ], + "sam": [ + + ], + "unselected": [ + [ + { + "id": "test" + }, + "test.unselected.bam" + ] + ], + "unselected_index": [ + [ + { + "id": "test" + }, + "test.unselected.bam.csi" + ] + ], + "versions_samtools": [ + [ + "SAMTOOLS_VIEW", + "samtools", + "1.22.1" + ] + ] + } ], "meta": { - "nf-test": "0.9.2", - "nextflow": "24.10.3" + "nf-test": "0.9.3", + "nextflow": "25.10.3" }, - "timestamp": "2025-02-14T07:45:19.718077276" + "timestamp": "2026-02-03T16:49:20.747636525" }, - "bam_versions": { + "bam_bai_index_unselected": { "content": [ - [ - "versions.yml:md5,176db5ec46b965219604bcdbb3ef9e07" - ] + { + "bai": [ + [ + { + "id": "test" + }, + "test.bam.bai" + ] + ], + "bam": [ + [ + { + "id": "test" + }, + "test.bam" + ] + ], + "crai": [ + + ], + "cram": [ + + ], + "csi": [ + + ], + "sam": [ + + ], + "unselected": [ + [ + { + "id": "test" + }, + "test.unselected.bam" + ] + ], + "unselected_index": [ + [ + { + "id": "test" + }, + "test.unselected.bam.csi" + ] + ], + "versions_samtools": [ + [ + "SAMTOOLS_VIEW", + "samtools", + "1.22.1" + ] + ] + } ], "meta": { - "nf-test": "0.9.2", - "nextflow": "24.10.3" + "nf-test": "0.9.3", + "nextflow": "25.10.3" }, - "timestamp": "2025-02-13T16:13:00.739468586" + "timestamp": "2026-02-03T16:48:57.713608154" }, "cram_crai_index_unselected - stub": { "content": [ @@ -144,8 +329,7 @@ "1": [ [ { - "id": "test", - "single_end": false + "id": "test" }, "test.cram:md5,d41d8cd98f00b204e9800998ecf8427e" ] @@ -162,8 +346,7 @@ "5": [ [ { - "id": "test", - "single_end": false + "id": "test" }, "test.cram.crai:md5,d41d8cd98f00b204e9800998ecf8427e" ] @@ -171,8 +354,7 @@ "6": [ [ { - "id": "test", - "single_end": false + "id": "test" }, "test.unselected.cram:md5,d41d8cd98f00b204e9800998ecf8427e" ] @@ -180,14 +362,17 @@ "7": [ [ { - "id": "test", - "single_end": false + "id": "test" }, "test.unselected.cram.crai:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], "8": [ - "versions.yml:md5,176db5ec46b965219604bcdbb3ef9e07" + [ + "SAMTOOLS_VIEW", + "samtools", + "1.22.1" + ] ], "bai": [ @@ -198,8 +383,7 @@ "crai": [ [ { - "id": "test", - "single_end": false + "id": "test" }, "test.cram.crai:md5,d41d8cd98f00b204e9800998ecf8427e" ] @@ -207,8 +391,7 @@ "cram": [ [ { - "id": "test", - "single_end": false + "id": "test" }, "test.cram:md5,d41d8cd98f00b204e9800998ecf8427e" ] @@ -222,8 +405,7 @@ "unselected": [ [ { - "id": "test", - "single_end": false + "id": "test" }, "test.unselected.cram:md5,d41d8cd98f00b204e9800998ecf8427e" ] @@ -231,147 +413,137 @@ "unselected_index": [ [ { - "id": "test", - "single_end": false + "id": "test" }, "test.unselected.cram.crai:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], - "versions": [ - "versions.yml:md5,176db5ec46b965219604bcdbb3ef9e07" + "versions_samtools": [ + [ + "SAMTOOLS_VIEW", + "samtools", + "1.22.1" + ] ] } ], "meta": { - "nf-test": "0.9.2", - "nextflow": "24.10.3" - }, - "timestamp": "2025-02-14T07:47:20.903462221" - }, - "bam_crai": { - "content": [ - [ - - ] - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "23.04.3" - }, - "timestamp": "2024-02-12T19:37:51.259774" - }, - "bam_cram": { - "content": [ - [ - - ] - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "23.04.3" - }, - "timestamp": "2024-02-12T19:37:51.261287" - }, - "cram_sam": { - "content": [ - [ - - ] - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "23.04.3" - }, - "timestamp": "2024-02-12T19:37:56.502625" - }, - "cram_versions": { - "content": [ - [ - "versions.yml:md5,176db5ec46b965219604bcdbb3ef9e07" - ] - ], - "meta": { - "nf-test": "0.9.2", - "nextflow": "24.10.3" - }, - "timestamp": "2025-02-13T16:33:28.319991831" - }, - "cram_to_bam_index_bai": { - "content": [ - [ - - ] - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "23.04.3" - }, - "timestamp": "2024-02-12T19:38:12.962863" - }, - "cram_to_bam_index_qname_sam": { - "content": [ - [ - - ] - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "23.04.3" + "nf-test": "0.9.3", + "nextflow": "25.10.3" }, - "timestamp": "2024-02-12T19:38:23.337634" + "timestamp": "2026-02-03T16:49:48.092654899" }, - "bam_csi_index - stub": { + "bam": { "content": [ { - "0": [ + "bai": [ + + ], + "bam": [ [ { - "id": "test", - "single_end": false + "id": "test" }, - "test.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + "test.bam" ] ], - "1": [ + "crai": [ ], - "2": [ + "cram": [ ], - "3": [ + "csi": [ ], - "4": [ + "sam": [ + + ], + "unselected": [ + + ], + "unselected_index": [ + + ], + "versions_samtools": [ + [ + "SAMTOOLS_VIEW", + "samtools", + "1.22.1" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.3" + }, + "timestamp": "2026-02-03T16:48:27.608944526" + }, + "bam_bai_index": { + "content": [ + { + "bai": [ [ { - "id": "test", - "single_end": false + "id": "test" }, - "test.bam.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + "test.bam.bai" ] ], - "5": [ + "bam": [ + [ + { + "id": "test" + }, + "test.bam" + ] + ], + "crai": [ ], - "6": [ + "cram": [ ], - "7": [ + "csi": [ ], - "8": [ - "versions.yml:md5,176db5ec46b965219604bcdbb3ef9e07" + "sam": [ + + ], + "unselected": [ + + ], + "unselected_index": [ + ], + "versions_samtools": [ + [ + "SAMTOOLS_VIEW", + "samtools", + "1.22.1" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.3" + }, + "timestamp": "2026-02-03T16:48:52.047178732" + }, + "cram_to_bam": { + "content": [ + { "bai": [ ], "bam": [ [ { - "id": "test", - "single_end": false + "id": "test" }, - "test.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + "test.bam" ] ], "crai": [ @@ -381,13 +553,7 @@ ], "csi": [ - [ - { - "id": "test", - "single_end": false - }, - "test.bam.csi:md5,d41d8cd98f00b204e9800998ecf8427e" - ] + ], "sam": [ @@ -398,52 +564,20 @@ "unselected_index": [ ], - "versions": [ - "versions.yml:md5,176db5ec46b965219604bcdbb3ef9e07" + "versions_samtools": [ + [ + "SAMTOOLS_VIEW", + "samtools", + "1.22.1" + ] ] } ], "meta": { - "nf-test": "0.9.2", - "nextflow": "24.10.3" - }, - "timestamp": "2025-02-14T07:46:52.477256747" - }, - "cram_to_bam_index_csi": { - "content": [ - "test.bam.csi" - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "23.04.3" - }, - "timestamp": "2024-02-12T19:38:12.958617" - }, - "bam_bai_index": { - "content": [ - "test.bam", - "test.bam.bai", - [ - "versions.yml:md5,176db5ec46b965219604bcdbb3ef9e07" - ] - ], - "meta": { - "nf-test": "0.9.2", - "nextflow": "24.10.3" - }, - "timestamp": "2025-02-14T07:45:29.205677197" - }, - "cram_to_bam_index_cram": { - "content": [ - [ - - ] - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "23.04.3" + "nf-test": "0.9.3", + "nextflow": "25.10.3" }, - "timestamp": "2024-02-12T19:38:12.972288" + "timestamp": "2026-02-03T16:50:41.727031999" }, "bam_bai_index - stub": { "content": [ @@ -451,8 +585,7 @@ "0": [ [ { - "id": "test", - "single_end": false + "id": "test" }, "test.bam:md5,d41d8cd98f00b204e9800998ecf8427e" ] @@ -466,8 +599,7 @@ "3": [ [ { - "id": "test", - "single_end": false + "id": "test" }, "test.bam.bai:md5,d41d8cd98f00b204e9800998ecf8427e" ] @@ -485,13 +617,16 @@ ], "8": [ - "versions.yml:md5,176db5ec46b965219604bcdbb3ef9e07" + [ + "SAMTOOLS_VIEW", + "samtools", + "1.22.1" + ] ], "bai": [ [ { - "id": "test", - "single_end": false + "id": "test" }, "test.bam.bai:md5,d41d8cd98f00b204e9800998ecf8427e" ] @@ -499,8 +634,7 @@ "bam": [ [ { - "id": "test", - "single_end": false + "id": "test" }, "test.bam:md5,d41d8cd98f00b204e9800998ecf8427e" ] @@ -523,306 +657,67 @@ "unselected_index": [ ], - "versions": [ - "versions.yml:md5,176db5ec46b965219604bcdbb3ef9e07" + "versions_samtools": [ + [ + "SAMTOOLS_VIEW", + "samtools", + "1.22.1" + ] ] } ], "meta": { - "nf-test": "0.9.2", - "nextflow": "24.10.3" - }, - "timestamp": "2025-02-14T07:51:10.220507926" - }, - "cram_to_bam_sam": { - "content": [ - [ - - ] - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "23.04.3" - }, - "timestamp": "2024-02-12T19:38:04.999247" - }, - "cram_to_bam_index_sam": { - "content": [ - [ - - ] - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "23.04.3" - }, - "timestamp": "2024-02-12T19:38:12.976457" - }, - "cram_crai": { - "content": [ - [ - - ] - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "23.04.3" - }, - "timestamp": "2024-02-12T19:37:56.497581" - }, - "cram_csi": { - "content": [ - [ - - ] - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "23.04.3" - }, - "timestamp": "2024-02-12T19:37:56.50038" - }, - "cram_to_bam_cram": { - "content": [ - [ - - ] - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "23.04.3" - }, - "timestamp": "2024-02-12T19:38:04.992239" - }, - "bam_stub_sam": { - "content": [ - [ - - ] - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "23.04.3" - }, - "timestamp": "2024-02-12T19:38:32.079529" - }, - "cram_cram": { - "content": [ - "test.cram" - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "23.04.3" - }, - "timestamp": "2024-02-12T19:37:56.490286" - }, - "cram_to_bam_crai": { - "content": [ - [ - - ] - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "23.04.3" - }, - "timestamp": "2024-02-12T19:38:04.989247" - }, - "cram_to_bam_index_crai": { - "content": [ - [ - - ] - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "23.04.3" - }, - "timestamp": "2024-02-12T19:38:12.967681" - }, - "cram_crai_index_unselected": { - "content": [ - "test.cram", - "test.cram.crai", - "test.unselected.cram", - "test.unselected.cram.crai", - [ - "versions.yml:md5,176db5ec46b965219604bcdbb3ef9e07" - ] - ], - "meta": { - "nf-test": "0.9.2", - "nextflow": "24.10.3" - }, - "timestamp": "2025-02-14T07:45:48.461930073" - }, - "cram_to_bam_index_qname_versions": { - "content": [ - [ - "versions.yml:md5,176db5ec46b965219604bcdbb3ef9e07" - ] - ], - "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.4" - }, - "timestamp": "2024-09-16T09:25:51.953436682" - }, - "cram_to_bam_bam": { - "content": [ - "test.bam" - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "23.04.3" - }, - "timestamp": "2024-02-12T19:38:04.982361" - }, - "cram_to_bam_bai": { - "content": [ - [ - - ] - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "23.04.3" - }, - "timestamp": "2024-02-12T19:38:04.98601" - }, - "cram_to_bam_versions": { - "content": [ - [ - "versions.yml:md5,176db5ec46b965219604bcdbb3ef9e07" - ] - ], - "meta": { - "nf-test": "0.9.2", - "nextflow": "24.10.3" - }, - "timestamp": "2025-02-13T16:33:39.363718229" - }, - "cram_bam": { - "content": [ - [ - - ] - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "23.04.3" - }, - "timestamp": "2024-02-12T19:37:56.495512" - }, - "bam_stub_cram": { - "content": [ - [ - - ] - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "23.04.3" - }, - "timestamp": "2024-02-12T19:38:32.076908" - }, - "cram_to_bam_index_qname_bai": { - "content": [ - [ - - ] - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "23.04.3" - }, - "timestamp": "2024-02-12T19:38:23.328458" - }, - "cram_to_bam_index_qname_crai": { - "content": [ - [ - - ] - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "23.04.3" - }, - "timestamp": "2024-02-12T19:38:23.330789" - }, - "cram_bai": { - "content": [ - [ - - ] - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "23.04.3" - }, - "timestamp": "2024-02-12T19:37:56.493129" - }, - "bam_stub_crai": { - "content": [ - [ - - ] - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "23.04.3" - }, - "timestamp": "2024-02-12T19:38:32.074313" - }, - "cram_to_bam_index_qname_bam": { - "content": [ - "test.bam" - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "23.04.3" - }, - "timestamp": "2024-02-12T19:38:23.322874" - }, - "bam_bai_index_unselected": { - "content": [ - "test.bam", - "test.bam.bai", - "test.unselected.bam", - "test.unselected.bam.csi", - [ - "versions.yml:md5,176db5ec46b965219604bcdbb3ef9e07" - ] - ], - "meta": { - "nf-test": "0.9.2", - "nextflow": "24.10.3" - }, - "timestamp": "2025-02-14T07:45:38.993014707" - }, - "cram_to_bam_index_qname_cram": { - "content": [ - [ - - ] - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "23.04.3" + "nf-test": "0.9.3", + "nextflow": "25.10.3" }, - "timestamp": "2024-02-12T19:38:23.333248" + "timestamp": "2026-02-03T16:49:36.783381688" }, - "cram_to_bam_csi": { + "cram": { "content": [ - [ - - ] + { + "bai": [ + + ], + "bam": [ + + ], + "crai": [ + + ], + "cram": [ + [ + { + "id": "test" + }, + "test.cram" + ] + ], + "csi": [ + + ], + "sam": [ + + ], + "unselected": [ + + ], + "unselected_index": [ + + ], + "versions_samtools": [ + [ + "SAMTOOLS_VIEW", + "samtools", + "1.22.1" + ] + ] + } ], "meta": { - "nf-test": "0.8.4", - "nextflow": "23.04.3" + "nf-test": "0.9.3", + "nextflow": "25.10.3" }, - "timestamp": "2024-02-12T19:38:04.995454" + "timestamp": "2026-02-03T16:49:09.393102901" }, "bam_bai_index_uselected - stub": { "content": [ @@ -830,8 +725,7 @@ "0": [ [ { - "id": "test", - "single_end": false + "id": "test" }, "test.bam:md5,d41d8cd98f00b204e9800998ecf8427e" ] @@ -845,8 +739,7 @@ "3": [ [ { - "id": "test", - "single_end": false + "id": "test" }, "test.bam.bai:md5,d41d8cd98f00b204e9800998ecf8427e" ] @@ -860,8 +753,7 @@ "6": [ [ { - "id": "test", - "single_end": false + "id": "test" }, "test.unselected.bam:md5,d41d8cd98f00b204e9800998ecf8427e" ] @@ -869,20 +761,22 @@ "7": [ [ { - "id": "test", - "single_end": false + "id": "test" }, "test.unselected.bam.csi:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], "8": [ - "versions.yml:md5,176db5ec46b965219604bcdbb3ef9e07" + [ + "SAMTOOLS_VIEW", + "samtools", + "1.22.1" + ] ], "bai": [ [ { - "id": "test", - "single_end": false + "id": "test" }, "test.bam.bai:md5,d41d8cd98f00b204e9800998ecf8427e" ] @@ -890,8 +784,7 @@ "bam": [ [ { - "id": "test", - "single_end": false + "id": "test" }, "test.bam:md5,d41d8cd98f00b204e9800998ecf8427e" ] @@ -911,8 +804,7 @@ "unselected": [ [ { - "id": "test", - "single_end": false + "id": "test" }, "test.unselected.bam:md5,d41d8cd98f00b204e9800998ecf8427e" ] @@ -920,53 +812,138 @@ "unselected_index": [ [ { - "id": "test", - "single_end": false + "id": "test" }, "test.unselected.bam.csi:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], - "versions": [ - "versions.yml:md5,176db5ec46b965219604bcdbb3ef9e07" + "versions_samtools": [ + [ + "SAMTOOLS_VIEW", + "samtools", + "1.22.1" + ] ] } ], "meta": { - "nf-test": "0.9.2", - "nextflow": "24.10.3" - }, - "timestamp": "2025-02-14T07:51:24.947216832" - }, - "cram_to_bam_index_qname_unselected": { - "content": [ - "test.unselected.bam" - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "23.04.3" + "nf-test": "0.9.3", + "nextflow": "25.10.3" }, - "timestamp": "2024-02-12T19:38:23.322874" + "timestamp": "2026-02-03T16:49:42.344755528" }, - "bam_sam": { + "cram_crai_index_unselected": { "content": [ - [ - - ] + { + "bai": [ + + ], + "bam": [ + + ], + "crai": [ + [ + { + "id": "test" + }, + "test.cram.crai" + ] + ], + "cram": [ + [ + { + "id": "test" + }, + "test.cram:md5,9b6eeca8f6b4b744297ae7cc87c031a4" + ] + ], + "csi": [ + + ], + "sam": [ + + ], + "unselected": [ + [ + { + "id": "test" + }, + "test.unselected.cram" + ] + ], + "unselected_index": [ + [ + { + "id": "test" + }, + "test.unselected.cram.crai" + ] + ], + "versions_samtools": [ + [ + "SAMTOOLS_VIEW", + "samtools", + "1.22.1" + ] + ] + } ], "meta": { - "nf-test": "0.8.4", - "nextflow": "23.04.3" + "nf-test": "0.9.3", + "nextflow": "25.10.3" }, - "timestamp": "2024-02-12T19:37:51.264651" + "timestamp": "2026-02-03T16:49:03.431621547" }, - "bam_stub_csi": { + "bam_stub": { "content": [ - "test.bam.csi" + { + "bai": [ + + ], + "bam": [ + [ + { + "id": "test" + }, + "test.bam" + ] + ], + "crai": [ + + ], + "cram": [ + + ], + "csi": [ + [ + { + "id": "test" + }, + "test.bam.csi" + ] + ], + "sam": [ + + ], + "unselected": [ + + ], + "unselected_index": [ + + ], + "versions_samtools": [ + [ + "SAMTOOLS_VIEW", + "samtools", + "1.22.1" + ] + ] + } ], "meta": { - "nf-test": "0.8.4", - "nextflow": "23.04.3" + "nf-test": "0.9.3", + "nextflow": "25.10.3" }, - "timestamp": "2024-02-12T19:38:32.068596" + "timestamp": "2026-02-03T16:49:26.152824543" } } \ No newline at end of file diff --git a/modules/nf-core/samtools/view/tests/nextflow.config b/modules/nf-core/samtools/view/tests/nextflow.config new file mode 100644 index 00000000..37c56439 --- /dev/null +++ b/modules/nf-core/samtools/view/tests/nextflow.config @@ -0,0 +1,5 @@ +process { + withName: SAMTOOLS_VIEW { + ext.args = params.samtools_args + } +} diff --git a/modules/nf-core/seqtk/cutn/main.nf b/modules/nf-core/seqtk/cutn/main.nf index c2344a86..24005703 100644 --- a/modules/nf-core/seqtk/cutn/main.nf +++ b/modules/nf-core/seqtk/cutn/main.nf @@ -12,7 +12,7 @@ process SEQTK_CUTN { output: tuple val(meta), path("*.bed") , emit: bed - path "versions.yml" , emit: versions + tuple val("${task.process}"), val('seqtk'), eval("seqtk 2>&1 | sed -n 's/^Version: //p'"), emit: versions_seqtk, topic: versions when: task.ext.when == null || task.ext.when @@ -27,11 +27,6 @@ process SEQTK_CUTN { $args \\ -g $fasta \\ > ${prefix}.bed - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - seqtk: \$(echo \$(seqtk 2>&1) | sed 's/^.*Version: //; s/ .*\$//') - END_VERSIONS """ stub: @@ -39,11 +34,6 @@ process SEQTK_CUTN { """ touch ${prefix}.bed - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - seqtk: \$(echo \$(seqtk 2>&1) | sed 's/^.*Version: //; s/ .*\$//') - END_VERSIONS """ } diff --git a/modules/nf-core/seqtk/cutn/meta.yml b/modules/nf-core/seqtk/cutn/meta.yml index 2e39a477..037d381f 100644 --- a/modules/nf-core/seqtk/cutn/meta.yml +++ b/modules/nf-core/seqtk/cutn/meta.yml @@ -6,9 +6,9 @@ keywords: - seqtk tools: - seqtk: - description: Seqtk is a fast and lightweight tool for processing sequences in - the FASTA or FASTQ format. Seqtk mergepe command merges pair-end reads into - one interleaved file. + description: Seqtk is a fast and lightweight tool for processing sequences + in the FASTA or FASTQ format. Seqtk mergepe command merges pair-end reads + into one interleaved file. homepage: https://github.com/lh3/seqtk documentation: https://docs.csc.fi/apps/seqtk/ tool_dev_url: https://github.com/lh3/seqtk @@ -24,9 +24,10 @@ input: type: file description: A single fasta file to be split. pattern: "*.{fasta}" + ontologies: [] output: - - bed: - - meta: + bed: + - - meta: type: map description: | Groovy Map containing sample information @@ -35,11 +36,28 @@ output: type: file description: The output bed which summarised locations of cuts pattern: "*.{bed}" - - versions: - - versions.yml: - type: file - description: File containing software versions - pattern: "versions.yml" + ontologies: [] + versions_seqtk: + - - ${task.process}: + type: string + description: The name of the process + - seqtk: + type: string + description: The name of the tool + - "seqtk 2>&1 | sed -n 's/^Version: //p'": + type: eval + description: The expression to obtain the version of the tool +topics: + versions: + - - ${task.process}: + type: string + description: The name of the process + - seqtk: + type: string + description: The name of the tool + - "seqtk 2>&1 | sed -n 's/^Version: //p'": + type: eval + description: The expression to obtain the version of the tool authors: - "@DLBPointon" maintainers: diff --git a/modules/nf-core/seqtk/cutn/tests/main.nf.test.snap b/modules/nf-core/seqtk/cutn/tests/main.nf.test.snap index 998beda5..efafb560 100644 --- a/modules/nf-core/seqtk/cutn/tests/main.nf.test.snap +++ b/modules/nf-core/seqtk/cutn/tests/main.nf.test.snap @@ -3,6 +3,10 @@ "content": [ "test.bed" ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.10.2" + }, "timestamp": "2024-02-22T16:02:14.744148" }, "homo_21_cut_stub": { @@ -18,7 +22,11 @@ ] ], "1": [ - "versions.yml:md5,3da8ed2738f3c093d1e62d796fd76428" + [ + "SEQTK_CUTN", + "seqtk", + "1.4-r122" + ] ], "bed": [ [ @@ -29,12 +37,20 @@ "test.bed:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], - "versions": [ - "versions.yml:md5,3da8ed2738f3c093d1e62d796fd76428" + "versions_seqtk": [ + [ + "SEQTK_CUTN", + "seqtk", + "1.4-r122" + ] ] } ], - "timestamp": "2024-02-22T16:02:23.596389" + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.10.2" + }, + "timestamp": "2026-01-14T13:16:52.871002273" }, "homo_21_cut": { "content": [ @@ -49,7 +65,11 @@ ] ], "1": [ - "versions.yml:md5,3da8ed2738f3c093d1e62d796fd76428" + [ + "SEQTK_CUTN", + "seqtk", + "1.4-r122" + ] ], "bed": [ [ @@ -60,11 +80,19 @@ "test.bed:md5,16cbba84e3a4bdbb52217afb5051f948" ] ], - "versions": [ - "versions.yml:md5,3da8ed2738f3c093d1e62d796fd76428" + "versions_seqtk": [ + [ + "SEQTK_CUTN", + "seqtk", + "1.4-r122" + ] ] } ], - "timestamp": "2024-02-22T16:02:14.695205" + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.10.2" + }, + "timestamp": "2026-01-14T13:16:38.948952544" } } \ No newline at end of file diff --git a/modules/nf-core/tabix/bgziptabix/environment.yml b/modules/nf-core/tabix/bgziptabix/environment.yml new file mode 100644 index 00000000..771b1387 --- /dev/null +++ b/modules/nf-core/tabix/bgziptabix/environment.yml @@ -0,0 +1,8 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda + +dependencies: + - bioconda::htslib=1.21 diff --git a/modules/nf-core/tabix/bgziptabix/main.nf b/modules/nf-core/tabix/bgziptabix/main.nf new file mode 100644 index 00000000..30eae745 --- /dev/null +++ b/modules/nf-core/tabix/bgziptabix/main.nf @@ -0,0 +1,40 @@ +process TABIX_BGZIPTABIX { + tag "$meta.id" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/92/92859404d861ae01afb87e2b789aebc71c0ab546397af890c7df74e4ee22c8dd/data' : + 'community.wave.seqera.io/library/htslib:1.21--ff8e28a189fbecaa' }" + + input: + tuple val(meta), path(input) + + output: + tuple val(meta), path("*.gz"), path("*.{tbi,csi}"), emit: gz_index + tuple val("${task.process}"), val('tabix'), eval("tabix -h 2>&1 | grep -oP 'Version:\\s*\\K[^\\s]+'") , topic: versions , emit: versions_tabix + tuple val("${task.process}"), val('bgzip'), eval("bgzip --version | sed '1!d;s/.* //'"), topic: versions, emit: versions_bgzip + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def args2 = task.ext.args2 ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + bgzip --threads ${task.cpus} -c $args $input > ${prefix}.${input.getExtension()}.gz + tabix --threads ${task.cpus} $args2 ${prefix}.${input.getExtension()}.gz + + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + def args2 = task.ext.args2 ?: '' + def index = args2.contains("-C ") || args2.contains("--csi") ? "csi" : "tbi" + """ + echo "" | gzip > ${prefix}.${input.getExtension()}.gz + touch ${prefix}.${input.getExtension()}.gz.${index} + + """ +} diff --git a/modules/nf-core/tabix/bgziptabix/meta.yml b/modules/nf-core/tabix/bgziptabix/meta.yml new file mode 100644 index 00000000..2a3078c5 --- /dev/null +++ b/modules/nf-core/tabix/bgziptabix/meta.yml @@ -0,0 +1,93 @@ +name: tabix_bgziptabix +description: bgzip a sorted tab-delimited genome file and then create tabix + index +keywords: + - bgzip + - compress + - index + - tabix + - vcf +tools: + - tabix: + description: Generic indexer for TAB-delimited genome position files. + homepage: https://www.htslib.org/doc/tabix.html + documentation: https://www.htslib.org/doc/tabix.1.html + doi: 10.1093/bioinformatics/btq671 + licence: ["MIT"] + identifier: biotools:tabix +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - input: + type: file + description: Sorted tab-delimited genome file + ontologies: [] +output: + gz_index: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.gz": + type: file + description: bgzipped tab-delimited genome file + pattern: "*.gz" + ontologies: + - edam: http://edamontology.org/format_3989 # GZIP format + - "*.{tbi,csi}": + type: file + description: Tabix index file (either tbi or csi) + pattern: "*.{tbi,csi}" + ontologies: [] + versions_tabix: + - - ${task.process}: + type: string + description: The process the versions were collected from + - tabix: + type: string + description: The tool name + - tabix -h 2>&1 | grep -oP 'Version:\s*\K[^\s]+': + type: eval + description: The expression to obtain the version of the tool + versions_bgzip: + - - ${task.process}: + type: string + description: The process the versions were collected from + - bgzip: + type: string + description: The tool name + - bgzip --version | sed '1!d;s/.* //': + type: eval + description: The expression to obtain the version of the tool + +topics: + versions: + - - ${task.process}: + type: string + description: The process the versions were collected from + - tabix: + type: string + description: The tool name + - tabix -h 2>&1 | grep -oP 'Version:\s*\K[^\s]+': + type: eval + description: The expression to obtain the version of the tool + - - ${task.process}: + type: string + description: The process the versions were collected from + - bgzip: + type: string + description: The tool name + - bgzip --version | sed '1!d;s/.* //': + type: eval + description: The expression to obtain the version of the tool + +authors: + - "@maxulysse" + - "@DLBPointon" +maintainers: + - "@maxulysse" + - "@DLBPointon" diff --git a/modules/nf-core/tabix/bgziptabix/tests/main.nf.test b/modules/nf-core/tabix/bgziptabix/tests/main.nf.test new file mode 100644 index 00000000..1955b143 --- /dev/null +++ b/modules/nf-core/tabix/bgziptabix/tests/main.nf.test @@ -0,0 +1,113 @@ +nextflow_process { + + name "Test Process TABIX_BGZIPTABIX" + script "../main.nf" + process "TABIX_BGZIPTABIX" + + tag "modules" + tag "modules_nfcore" + tag "tabix" + tag "tabix/bgziptabix" + + config "./nextflow.config" + + test("sarscov2_bed_tbi") { + when { + params { + module_args = '-p vcf' + } + process { + """ + input[0] = [ + [ id:'tbi_test' ], + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/bed/test.bed', checkIfExists: true) ] + ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("sarscov2_bed_csi") { + when { + params { + module_args = '-p vcf --csi' + } + process { + """ + input[0] = [ + [ id:'csi_test' ], + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/bed/test.bed', checkIfExists: true) ] + ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("sarscov2_bed_csi_stub") { + options "-stub" + + when { + params { + module_args = '-p vcf --csi' + } + process { + """ + input[0] = [ + [ id:'test' ], + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/bed/test.bed', checkIfExists: true) ] + ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("sarscov2_bed_tbi_stub") { + options "-stub" + + when { + params { + module_args = '-p vcf' + } + process { + """ + input[0] = [ + [ id:'test' ], + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/bed/test.bed', checkIfExists: true) ] + ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} diff --git a/modules/nf-core/tabix/bgziptabix/tests/main.nf.test.snap b/modules/nf-core/tabix/bgziptabix/tests/main.nf.test.snap new file mode 100644 index 00000000..cf3ce8aa --- /dev/null +++ b/modules/nf-core/tabix/bgziptabix/tests/main.nf.test.snap @@ -0,0 +1,230 @@ +{ + "sarscov2_bed_tbi": { + "content": [ + { + "0": [ + [ + { + "id": "tbi_test" + }, + "tbi_test.bed.gz:md5,fe4053cf4de3aebbdfc3be2efb125a74", + "tbi_test.bed.gz.tbi:md5,ca06caf88b1e3c67d5fcba0a1460b52c" + ] + ], + "1": [ + [ + "TABIX_BGZIPTABIX", + "tabix", + "1.21" + ] + ], + "2": [ + [ + "TABIX_BGZIPTABIX", + "bgzip", + "1.21" + ] + ], + "gz_index": [ + [ + { + "id": "tbi_test" + }, + "tbi_test.bed.gz:md5,fe4053cf4de3aebbdfc3be2efb125a74", + "tbi_test.bed.gz.tbi:md5,ca06caf88b1e3c67d5fcba0a1460b52c" + ] + ], + "versions_bgzip": [ + [ + "TABIX_BGZIPTABIX", + "bgzip", + "1.21" + ] + ], + "versions_tabix": [ + [ + "TABIX_BGZIPTABIX", + "tabix", + "1.21" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.3" + }, + "timestamp": "2026-01-29T23:36:32.823417" + }, + "sarscov2_bed_csi": { + "content": [ + { + "0": [ + [ + { + "id": "csi_test" + }, + "csi_test.bed.gz:md5,fe4053cf4de3aebbdfc3be2efb125a74", + "csi_test.bed.gz.csi:md5,c9c0377de58fdc89672bb3005a0d69f5" + ] + ], + "1": [ + [ + "TABIX_BGZIPTABIX", + "tabix", + "1.21" + ] + ], + "2": [ + [ + "TABIX_BGZIPTABIX", + "bgzip", + "1.21" + ] + ], + "gz_index": [ + [ + { + "id": "csi_test" + }, + "csi_test.bed.gz:md5,fe4053cf4de3aebbdfc3be2efb125a74", + "csi_test.bed.gz.csi:md5,c9c0377de58fdc89672bb3005a0d69f5" + ] + ], + "versions_bgzip": [ + [ + "TABIX_BGZIPTABIX", + "bgzip", + "1.21" + ] + ], + "versions_tabix": [ + [ + "TABIX_BGZIPTABIX", + "tabix", + "1.21" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.3" + }, + "timestamp": "2026-01-29T23:36:36.715208" + }, + "sarscov2_bed_tbi_stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.bed.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "test.bed.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + "TABIX_BGZIPTABIX", + "tabix", + "1.21" + ] + ], + "2": [ + [ + "TABIX_BGZIPTABIX", + "bgzip", + "1.21" + ] + ], + "gz_index": [ + [ + { + "id": "test" + }, + "test.bed.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "test.bed.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions_bgzip": [ + [ + "TABIX_BGZIPTABIX", + "bgzip", + "1.21" + ] + ], + "versions_tabix": [ + [ + "TABIX_BGZIPTABIX", + "tabix", + "1.21" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.3" + }, + "timestamp": "2026-01-29T23:36:45.016007" + }, + "sarscov2_bed_csi_stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.bed.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "test.bed.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + "TABIX_BGZIPTABIX", + "tabix", + "1.21" + ] + ], + "2": [ + [ + "TABIX_BGZIPTABIX", + "bgzip", + "1.21" + ] + ], + "gz_index": [ + [ + { + "id": "test" + }, + "test.bed.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "test.bed.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions_bgzip": [ + [ + "TABIX_BGZIPTABIX", + "bgzip", + "1.21" + ] + ], + "versions_tabix": [ + [ + "TABIX_BGZIPTABIX", + "tabix", + "1.21" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.3" + }, + "timestamp": "2026-01-29T23:36:40.5401" + } +} \ No newline at end of file diff --git a/modules/nf-core/tabix/bgziptabix/tests/nextflow.config b/modules/nf-core/tabix/bgziptabix/tests/nextflow.config new file mode 100644 index 00000000..5b2316c8 --- /dev/null +++ b/modules/nf-core/tabix/bgziptabix/tests/nextflow.config @@ -0,0 +1,5 @@ +process { + withName: TABIX_BGZIPTABIX { + ext.args2 = params.module_args + } +} diff --git a/modules/nf-core/ucsc/bedgraphtobigwig/environment.yml b/modules/nf-core/ucsc/bedgraphtobigwig/environment.yml index 2e853e09..1211bc40 100644 --- a/modules/nf-core/ucsc/bedgraphtobigwig/environment.yml +++ b/modules/nf-core/ucsc/bedgraphtobigwig/environment.yml @@ -4,4 +4,4 @@ channels: - conda-forge - bioconda dependencies: - - bioconda::ucsc-bedgraphtobigwig=469 + - bioconda::ucsc-bedgraphtobigwig=482 diff --git a/modules/nf-core/ucsc/bedgraphtobigwig/main.nf b/modules/nf-core/ucsc/bedgraphtobigwig/main.nf index 81cdee95..eafac8b8 100644 --- a/modules/nf-core/ucsc/bedgraphtobigwig/main.nf +++ b/modules/nf-core/ucsc/bedgraphtobigwig/main.nf @@ -2,11 +2,10 @@ process UCSC_BEDGRAPHTOBIGWIG { tag "$meta.id" label 'process_single' - // WARN: Version information not provided by tool on CLI. Please update version string below when bumping container versions. conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/ucsc-bedgraphtobigwig:469--h9b8f530_0' : - 'biocontainers/ucsc-bedgraphtobigwig:469--h9b8f530_0' }" + 'https://depot.galaxyproject.org/singularity/ucsc-bedgraphtobigwig:482--hdc0a859_0' : + 'biocontainers/ucsc-bedgraphtobigwig:482--hdc0a859_0' }" input: tuple val(meta), path(bedgraph) @@ -14,36 +13,25 @@ process UCSC_BEDGRAPHTOBIGWIG { output: tuple val(meta), path("*.bigWig"), emit: bigwig - path "versions.yml" , emit: versions - + tuple val("${task.process}"), val('ucsc'), val('482'), topic: versions, emit: versions_ucsc + // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. when: task.ext.when == null || task.ext.when script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" - def VERSION = '469' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. """ bedGraphToBigWig \\ + $args \\ $bedgraph \\ $sizes \\ ${prefix}.bigWig - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - ucsc: $VERSION - END_VERSIONS """ stub: def prefix = task.ext.prefix ?: "${meta.id}" - def VERSION = '469' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. """ touch ${prefix}.bigWig - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - ucsc: $VERSION - END_VERSIONS """ } diff --git a/modules/nf-core/ucsc/bedgraphtobigwig/meta.yml b/modules/nf-core/ucsc/bedgraphtobigwig/meta.yml index 5d42cbde..7d0cf57f 100755 --- a/modules/nf-core/ucsc/bedgraphtobigwig/meta.yml +++ b/modules/nf-core/ucsc/bedgraphtobigwig/meta.yml @@ -23,13 +23,15 @@ input: type: file description: bedGraph file pattern: "*.{bedGraph}" - - - sizes: - type: file - description: chromosome sizes file - pattern: "*.{sizes}" + ontologies: [] + - sizes: + type: file + description: chromosome sizes file + pattern: "*.{sizes}" + ontologies: [] output: - - bigwig: - - meta: + bigwig: + - - meta: type: map description: | Groovy Map containing sample information @@ -38,11 +40,30 @@ output: type: file description: bigWig file pattern: "*.{bigWig}" - - versions: - - versions.yml: - type: file - description: File containing software versions - pattern: "versions.yml" + ontologies: [] + versions_ucsc: + - - ${task.process}: + type: string + description: The process the versions were collected from + - ucsc: + type: string + description: The tool name + - "482": + type: eval + description: The expression to obtain the version of the tool + +topics: + versions: + - - ${task.process}: + type: string + description: The process the versions were collected from + - ucsc: + type: string + description: The tool name + - "482": + type: eval + description: The expression to obtain the version of the tool + authors: - "@drpatelh" maintainers: diff --git a/modules/nf-core/ucsc/bedgraphtobigwig/tests/main.nf.test.snap b/modules/nf-core/ucsc/bedgraphtobigwig/tests/main.nf.test.snap index 7c731f65..7c51213f 100644 --- a/modules/nf-core/ucsc/bedgraphtobigwig/tests/main.nf.test.snap +++ b/modules/nf-core/ucsc/bedgraphtobigwig/tests/main.nf.test.snap @@ -11,7 +11,11 @@ ] ], "1": [ - "versions.yml:md5,db26514184acfdf220bb2f061382cf8c" + [ + "UCSC_BEDGRAPHTOBIGWIG", + "ucsc", + "482" + ] ], "bigwig": [ [ @@ -21,16 +25,20 @@ "test.bigWig:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], - "versions": [ - "versions.yml:md5,db26514184acfdf220bb2f061382cf8c" + "versions_ucsc": [ + [ + "UCSC_BEDGRAPHTOBIGWIG", + "ucsc", + "482" + ] ] } ], "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.4" + "nf-test": "0.9.3", + "nextflow": "25.04.3" }, - "timestamp": "2024-10-18T10:47:58.558813949" + "timestamp": "2026-02-25T14:56:47.843203613" }, "Should run without failures": { "content": [ @@ -44,7 +52,11 @@ ] ], "1": [ - "versions.yml:md5,db26514184acfdf220bb2f061382cf8c" + [ + "UCSC_BEDGRAPHTOBIGWIG", + "ucsc", + "482" + ] ], "bigwig": [ [ @@ -54,15 +66,19 @@ "test.bigWig:md5,910ecc7f57e3bbd5fac5a8edba4f615d" ] ], - "versions": [ - "versions.yml:md5,db26514184acfdf220bb2f061382cf8c" + "versions_ucsc": [ + [ + "UCSC_BEDGRAPHTOBIGWIG", + "ucsc", + "482" + ] ] } ], "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.4" + "nf-test": "0.9.3", + "nextflow": "25.04.3" }, - "timestamp": "2024-10-18T10:47:36.476844229" + "timestamp": "2026-02-25T14:56:35.886146907" } } \ No newline at end of file diff --git a/modules/nf-core/windowmasker/mkcounts/environment.yml b/modules/nf-core/windowmasker/mkcounts/environment.yml index 968930b6..0a7e8e60 100644 --- a/modules/nf-core/windowmasker/mkcounts/environment.yml +++ b/modules/nf-core/windowmasker/mkcounts/environment.yml @@ -4,4 +4,4 @@ channels: - conda-forge - bioconda dependencies: - - bioconda::blast=2.15.0 + - bioconda::blast=2.17.0 diff --git a/modules/nf-core/windowmasker/mkcounts/main.nf b/modules/nf-core/windowmasker/mkcounts/main.nf index 406f7761..5a90c7e4 100644 --- a/modules/nf-core/windowmasker/mkcounts/main.nf +++ b/modules/nf-core/windowmasker/mkcounts/main.nf @@ -1,18 +1,18 @@ process WINDOWMASKER_MKCOUNTS { - tag "$meta.id" + tag "${meta.id}" label 'process_low' conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/blast:2.15.0--pl5321h6f7f691_1': - 'biocontainers/blast:2.15.0--pl5321h6f7f691_1' }" + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/0c/0c86cbb145786bf5c24ea7fb13448da5f7d5cd124fd4403c1da5bc8fc60c2588/data': + 'community.wave.seqera.io/library/blast:2.17.0--d4fb881691596759' }" input: tuple val(meta), path(ref) output: tuple val(meta), path("*.txt") , emit: counts - path "versions.yml" , emit: versions + tuple val("${task.process}"), val('windowmasker'), eval("windowmasker -version-full | head -n 1 | sed 's/^.*windowmasker. //; s/ .*\$//'"), topic: versions, emit: versions_windowmasker when: task.ext.when == null || task.ext.when @@ -30,15 +30,10 @@ process WINDOWMASKER_MKCOUNTS { """ windowmasker -mk_counts \\ - $args \\ + ${args} \\ -mem ${memory} \\ -in ${ref} \\ -out ${prefix}.txt - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - windowmasker: \$(windowmasker -version-full | head -n 1 | sed 's/^.*windowmasker: //; s/ .*\$//') - END_VERSIONS """ stub: @@ -46,10 +41,5 @@ process WINDOWMASKER_MKCOUNTS { """ touch ${prefix}.txt - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - windowmasker: \$(windowmasker -version-full | head -n 1 | sed 's/^.*windowmasker: //; s/ .*\$//') - END_VERSIONS """ } diff --git a/modules/nf-core/windowmasker/mkcounts/meta.yml b/modules/nf-core/windowmasker/mkcounts/meta.yml index 825a0674..5ad302af 100644 --- a/modules/nf-core/windowmasker/mkcounts/meta.yml +++ b/modules/nf-core/windowmasker/mkcounts/meta.yml @@ -21,9 +21,10 @@ input: - ref: type: file description: An input nucleotide fasta file. + ontologies: [] output: - - counts: - - meta: + counts: + - - meta: type: map description: | Groovy Map containing sample information @@ -32,11 +33,28 @@ output: type: file description: A file containing frequency counts of repetitive units. pattern: "*.txt" - - versions: - - versions.yml: - type: file - description: File containing software versions - pattern: "versions.yml" + ontologies: [] + versions_windowmasker: + - - ${task.process}: + type: string + description: The name of the process + - windowmasker: + type: string + description: The name of the tool + - windowmasker -version-full | head -n 1 | sed 's/^.*windowmasker. //; s/ .*\$//': + type: string + description: The expression to obtain the version of the tool +topics: + versions: + - - ${task.process}: + type: string + description: The name of the process + - windowmasker: + type: string + description: The name of the tool + - windowmasker -version-full | head -n 1 | sed 's/^.*windowmasker. //; s/ .*\$//': + type: eval + description: The expression to obtain the version of the tool authors: - "@DLBPointon" maintainers: diff --git a/modules/nf-core/windowmasker/mkcounts/tests/main.nf.test b/modules/nf-core/windowmasker/mkcounts/tests/main.nf.test index bf53d7fa..8352bcaa 100644 --- a/modules/nf-core/windowmasker/mkcounts/tests/main.nf.test +++ b/modules/nf-core/windowmasker/mkcounts/tests/main.nf.test @@ -12,10 +12,6 @@ nextflow_process { test("sarscov2_fasta") { when { - params { - // define parameters here. Example: - // outdir = "tests/results" - } process { """ input[0] = [ diff --git a/modules/nf-core/windowmasker/mkcounts/tests/main.nf.test.snap b/modules/nf-core/windowmasker/mkcounts/tests/main.nf.test.snap index cae2d306..eb95eddb 100644 --- a/modules/nf-core/windowmasker/mkcounts/tests/main.nf.test.snap +++ b/modules/nf-core/windowmasker/mkcounts/tests/main.nf.test.snap @@ -11,7 +11,11 @@ ] ], "1": [ - "versions.yml:md5,57ae356f69298e25eb5d070900865cf2" + [ + "WINDOWMASKER_MKCOUNTS", + "windowmasker", + "1.0.0" + ] ], "counts": [ [ @@ -21,12 +25,20 @@ "test.txt:md5,5f5d7e926fdf13b0c57651f962cc1253" ] ], - "versions": [ - "versions.yml:md5,57ae356f69298e25eb5d070900865cf2" + "versions_windowmasker": [ + [ + "WINDOWMASKER_MKCOUNTS", + "windowmasker", + "1.0.0" + ] ] } ], - "timestamp": "2024-02-15T13:29:58.837482" + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-02-16T12:29:24.977086238" }, "sarscov2_fasta_stub": { "content": [ @@ -40,7 +52,11 @@ ] ], "1": [ - "versions.yml:md5,57ae356f69298e25eb5d070900865cf2" + [ + "WINDOWMASKER_MKCOUNTS", + "windowmasker", + "1.0.0" + ] ], "counts": [ [ @@ -50,11 +66,19 @@ "test.txt:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], - "versions": [ - "versions.yml:md5,57ae356f69298e25eb5d070900865cf2" + "versions_windowmasker": [ + [ + "WINDOWMASKER_MKCOUNTS", + "windowmasker", + "1.0.0" + ] ] } ], - "timestamp": "2024-02-15T13:30:07.618636" + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-02-16T12:29:31.801768092" } } \ No newline at end of file diff --git a/modules/nf-core/windowmasker/mkcounts/tests/nextflow.config b/modules/nf-core/windowmasker/mkcounts/tests/nextflow.config deleted file mode 100644 index 65fc1910..00000000 --- a/modules/nf-core/windowmasker/mkcounts/tests/nextflow.config +++ /dev/null @@ -1,5 +0,0 @@ -process { - withName: WINDOWMASKER_MKCOUNTS { - ext.args = "" - } -} \ No newline at end of file diff --git a/modules/nf-core/windowmasker/ustat/environment.yml b/modules/nf-core/windowmasker/ustat/environment.yml index 968930b6..0a7e8e60 100644 --- a/modules/nf-core/windowmasker/ustat/environment.yml +++ b/modules/nf-core/windowmasker/ustat/environment.yml @@ -4,4 +4,4 @@ channels: - conda-forge - bioconda dependencies: - - bioconda::blast=2.15.0 + - bioconda::blast=2.17.0 diff --git a/modules/nf-core/windowmasker/ustat/main.nf b/modules/nf-core/windowmasker/ustat/main.nf index 7a7d29f6..0385aeab 100644 --- a/modules/nf-core/windowmasker/ustat/main.nf +++ b/modules/nf-core/windowmasker/ustat/main.nf @@ -1,11 +1,11 @@ process WINDOWMASKER_USTAT { - tag "$meta.id" + tag "${meta.id}" label 'process_low' conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/blast:2.15.0--pl5321h6f7f691_1': - 'biocontainers/blast:2.15.0--pl5321h6f7f691_1' }" + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/0c/0c86cbb145786bf5c24ea7fb13448da5f7d5cd124fd4403c1da5bc8fc60c2588/data': + 'community.wave.seqera.io/library/blast:2.17.0--d4fb881691596759' }" input: tuple val(meta) , path(counts) @@ -13,7 +13,7 @@ process WINDOWMASKER_USTAT { output: tuple val(meta), path("${output}") , emit: intervals - path "versions.yml" , emit: versions + tuple val("${task.process}"), val('windowmasker'), eval("windowmasker -version-full | head -n 1 | sed 's/^.*windowmasker. //; s/ .*\$//'"), topic: versions, emit: versions_windowmasker when: task.ext.when == null || task.ext.when @@ -35,14 +35,9 @@ process WINDOWMASKER_USTAT { """ windowmasker -ustat \\ ${counts} \\ - $args \\ + ${args} \\ -in ${ref} \\ -out ${output} - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - windowmasker: \$(windowmasker -version-full | head -n 1 | sed 's/^.*windowmasker: //; s/ .*\$//') - END_VERSIONS """ stub: @@ -60,10 +55,5 @@ process WINDOWMASKER_USTAT { output = "${prefix}.${outfmt}" """ touch ${output} - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - windowmasker: \$(windowmasker -version-full | head -n 1 | sed 's/^.*windowmasker: //; s/ .*\$//') - END_VERSIONS """ } diff --git a/modules/nf-core/windowmasker/ustat/meta.yml b/modules/nf-core/windowmasker/ustat/meta.yml index bc51a934..9f558efd 100644 --- a/modules/nf-core/windowmasker/ustat/meta.yml +++ b/modules/nf-core/windowmasker/ustat/meta.yml @@ -22,6 +22,7 @@ input: - counts: type: file description: Contains count data of repetitive regions. + ontologies: [] - - meta2: type: map description: | @@ -30,9 +31,10 @@ input: - ref: type: file description: An input nucleotide fasta file. + ontologies: [] output: - - intervals: - - meta: + intervals: + - - meta: type: map description: | Groovy Map containing sample information @@ -40,11 +42,28 @@ output: - ${output}: type: file description: intervals - - versions: - - versions.yml: - type: file - description: File containing software versions - pattern: "versions.yml" + ontologies: [] + versions_windowmasker: + - - ${task.process}: + type: string + description: The name of the process + - windowmasker: + type: string + description: The name of the tool + - windowmasker -version-full | head -n 1 | sed 's/^.*windowmasker. //; s/ .*\$//': + type: string + description: The expression to obtain the version of the tool +topics: + versions: + - - ${task.process}: + type: string + description: The name of the process + - windowmasker: + type: string + description: The name of the tool + - windowmasker -version-full | head -n 1 | sed 's/^.*windowmasker. //; s/ .*\$//': + type: eval + description: The expression to obtain the version of the tool authors: - "@DLBPointon" maintainers: diff --git a/modules/nf-core/windowmasker/ustat/tests/main.nf.test.snap b/modules/nf-core/windowmasker/ustat/tests/main.nf.test.snap index 79d3d82d..0679a399 100644 --- a/modules/nf-core/windowmasker/ustat/tests/main.nf.test.snap +++ b/modules/nf-core/windowmasker/ustat/tests/main.nf.test.snap @@ -11,7 +11,11 @@ ] ], "1": [ - "versions.yml:md5,d43f04bb181ac80da9ec79d9b49131cf" + [ + "WINDOWMASKER_USTAT", + "windowmasker", + "1.0.0" + ] ], "intervals": [ [ @@ -21,12 +25,20 @@ "test.interval:md5,c91346601564ab88cbb0f913881d05e2" ] ], - "versions": [ - "versions.yml:md5,d43f04bb181ac80da9ec79d9b49131cf" + "versions_windowmasker": [ + [ + "WINDOWMASKER_USTAT", + "windowmasker", + "1.0.0" + ] ] } ], - "timestamp": "2024-02-15T14:19:12.033774" + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-02-16T12:29:57.79821227" }, "sarscov2_fasta_stub": { "content": [ @@ -40,7 +52,11 @@ ] ], "1": [ - "versions.yml:md5,d43f04bb181ac80da9ec79d9b49131cf" + [ + "WINDOWMASKER_USTAT", + "windowmasker", + "1.0.0" + ] ], "intervals": [ [ @@ -50,11 +66,19 @@ "test.interval:md5,c91346601564ab88cbb0f913881d05e2" ] ], - "versions": [ - "versions.yml:md5,d43f04bb181ac80da9ec79d9b49131cf" + "versions_windowmasker": [ + [ + "WINDOWMASKER_USTAT", + "windowmasker", + "1.0.0" + ] ] } ], - "timestamp": "2024-02-15T14:19:21.850526" + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-02-16T12:30:06.750626869" } } \ No newline at end of file diff --git a/modules/nf-core/windowmasker/ustat/tests/nextflow.config b/modules/nf-core/windowmasker/ustat/tests/nextflow.config index 00b63c45..8f443ef7 100644 --- a/modules/nf-core/windowmasker/ustat/tests/nextflow.config +++ b/modules/nf-core/windowmasker/ustat/tests/nextflow.config @@ -2,4 +2,4 @@ process { withName: 'test_windowmasker_ustat:WINDOWMASKER_USTAT' { ext.args = "-dust true -outfmt interval" } -} \ No newline at end of file +} diff --git a/modules/sanger-tol/cramalign/bwamem2alignhic/environment.yml b/modules/sanger-tol/cramalign/bwamem2alignhic/environment.yml new file mode 100644 index 00000000..0606effc --- /dev/null +++ b/modules/sanger-tol/cramalign/bwamem2alignhic/environment.yml @@ -0,0 +1,9 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/sanger-tol/nf-core-modules/main/modules/environment-schema.json +--- +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::bwa-mem2=2.2.1 + - bioconda::htslib=1.22.1 + - bioconda::samtools=1.22.1 diff --git a/modules/sanger-tol/cramalign/bwamem2alignhic/main.nf b/modules/sanger-tol/cramalign/bwamem2alignhic/main.nf new file mode 100644 index 00000000..9ff2fec0 --- /dev/null +++ b/modules/sanger-tol/cramalign/bwamem2alignhic/main.nf @@ -0,0 +1,57 @@ +process CRAMALIGN_BWAMEM2ALIGNHIC { + tag "$meta.id" + label "process_high" + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/e0/e05ce34b46ad42810eb29f74e4e304c0cb592b2ca15572929ed8bbaee58faf01/data' : + 'community.wave.seqera.io/library/bwa-mem2_htslib_samtools:db98f81f55b64113' }" + + input: + tuple val(meta), path(cram), path(crai), val(rglines) + tuple val(meta2), path(index), path(reference) + tuple val(chunkn), val(range) + + output: + tuple val(meta), path("*.bam"), emit: bam + tuple val("${task.process}"), val('bwamem2'), eval('bwa-mem2 version 2>| grep -o -E "[0-9]+(\\.[0-9]+)+"'), emit: versions_bwamem2, topic: versions + tuple val("${task.process}"), val('samtools'), eval('samtools --version | head -1 | sed -e "s/samtools //"'), emit: versions_samtools, topic: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args1 = task.ext.args1 ?: '' + def args2 = task.ext.args2 ?: '-t' // copy RG, BC and QT tags to the FASTQ header line + def args3 = task.ext.args3 ?: '' + def args4 = task.ext.args4 ?: '' + def args5 = task.ext.args5 ?: '' + def args6 = task.ext.args6 ?: '' + def prefix = task.ext.prefix ?: "${cram}.${chunkn}.${meta.id}" + // Prepare read group arguments if rglines are found, else, empty string + def rg_arg = rglines ? '-C ' + rglines.collect { line -> + // Add SM when not present to avoid errors from downstream tool (e.g. variant callers) + def l = line.contains("SM:") ? line + : meta.sample ? "${line}\tSM:${meta.sample}" + : "${line}\tSM:${meta.id}" + "-H '${l.replaceAll("\t", "\\\\t")}'" + }.join(' ') + : '' + // Please be aware one of the tools here required mem = 28 * reference size!!! + """ + INDEX=`find -L ./ -name "*.amb" | sed 's/\\.amb\$//'` + + samtools cat ${args1} -r "#:${range[0]}-${range[1]}" ${cram} |\\ + samtools fastq ${args2} - |\\ + bwa-mem2 mem ${args3} -t ${task.cpus} \${INDEX} ${rg_arg} - |\\ + samtools fixmate ${args4} - - |\\ + samtools view -h ${args5} |\\ + samtools sort ${args6} -@${task.cpus} -T ${prefix}_tmp -o ${prefix}.bam - + """ + + stub: + def prefix = task.ext.prefix ?: "${cram}.${chunkn}.${meta.id}" + """ + touch ${prefix}.bam + """ +} diff --git a/modules/sanger-tol/cramalign/bwamem2alignhic/meta.yml b/modules/sanger-tol/cramalign/bwamem2alignhic/meta.yml new file mode 100644 index 00000000..3f21c633 --- /dev/null +++ b/modules/sanger-tol/cramalign/bwamem2alignhic/meta.yml @@ -0,0 +1,132 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/sanger-tol/nf-core-modules/main/modules/meta-schema.json +name: "cramalign_bwamem2alignhic" +description: | + Aligns a subset of Hi-C reads from a CRAM file to a reference genome using bwa-mem2, + pipes the resulting alignments through samtools fixmate and samtools sort, outputting + an alignment in BAM format. +keywords: + - sort + - alignment + - genomics +tools: + - samtools: + description: | + SAMtools is a set of utilities for interacting with and post-processing + short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li. + These files are generated as output by short read aligners like BWA. + homepage: http://www.htslib.org/ + documentation: http://www.htslib.org/doc/samtools.html + doi: 10.1093/bioinformatics/btp352 + licence: ["MIT"] + identifier: biotools:samtools + - bwa: + description: | + BWA-mem2 is a software package for mapping DNA sequences against + a large reference genome, such as the human genome. + homepage: https://github.com/bwa-mem2/bwa-mem2 + documentation: http://www.htslib.org/doc/samtools.html + arxiv: arXiv:1303.3997 + licence: ["MIT"] + identifier: "biotools:bwa-mem2" +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1' ]` + - cram: + type: file + description: CRAM file containing Hi-C sequences to align + pattern: "*.cram" + ontologies: + - edam: "http://edamontology.org/format_3462" # CRAM + - crai: + type: file + description: CRAM index file + pattern: "*.crai" + ontologies: + - edam: "http://edamontology.org/format_3475" # TSV + - rglines: + type: list + description: A list of strings, each a valid SAM-format @RG header line, + to pass to the aligner + - - meta2: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1' ]` + - index: + type: file + description: bwa-mem2 index of the reference to be aligned to + ontologies: [] + - reference: + type: file + description: FASTA file containing reference for alignment + pattern: "*.fasta(.gz)?" + ontologies: + - edam: http://edamontology.org/format_1929 # FASTA + - - chunkn: + type: integer + description: Integer index of CRAM chunk to map + - range: + type: list + description: Start and end indices defining CRAM slices to align +output: + bam: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1' ]` + - "*.bam": + type: file + description: BAM file of mapped Hi-C sequences + pattern: "*.bam" + ontologies: + - edam: "http://edamontology.org/format_2572" # BAM + versions_bwamem2: + - - ${task.process}: + type: string + description: Name of the process + - bwamem2: + type: string + description: The name of the tool + - bwa-mem2 version 2>| grep -o -E "[0-9]+(\.[0-9]+)+": + type: eval + description: The expression to obtain the version of the tool + + versions_samtools: + - - ${task.process}: + type: string + description: Name of the process + - samtools: + type: string + description: Name of the tool + - samtools --version | head -1 | sed -e "s/samtools //": + type: eval + description: The expression to obtain the version of the tool +topics: + versions: + - - ${task.process}: + type: string + description: Name of the process + - bwamem2: + type: string + description: The name of the tool + - bwa-mem2 version 2>| grep -o -E "[0-9]+(\.[0-9]+)+": + type: eval + description: The expression to obtain the version of the tool + - - ${task.process}: + type: string + description: Name of the process + - samtools: + type: string + description: Name of the tool + - samtools --version | head -1 | sed -e "s/samtools //": + type: eval + description: The expression to obtain the version of the tool +authors: + - "@yumisims" + - "@prototaxites" +maintainers: + - "@prototaxites" diff --git a/modules/sanger-tol/cramalign/bwamem2alignhic/tests/main.nf.test b/modules/sanger-tol/cramalign/bwamem2alignhic/tests/main.nf.test new file mode 100644 index 00000000..8861f423 --- /dev/null +++ b/modules/sanger-tol/cramalign/bwamem2alignhic/tests/main.nf.test @@ -0,0 +1,172 @@ + +nextflow_process { + + name "Test Process CRAMALIGN_BWAMEM2ALIGNHIC" + script "../main.nf" + process "CRAMALIGN_BWAMEM2ALIGNHIC" + + tag "modules" + tag "modules_sangertol" + tag "bwamem2/index" + tag "cramalign" + tag "cramalign/bwamem2alignhic" + tag "nf-core/bwamem2/index" + + setup { + nfcoreInitialise("${launchDir}/library/") + nfcoreInstall("${launchDir}/library/", ["bwamem2/index"]) + nfcoreLink("${launchDir}/library/", "${baseDir}/modules/") + + run("BWAMEM2_INDEX") { + script "../../../../nf-core/bwamem2/index/main.nf" + process { + """ + input[0] = Channel.of([ + [ id: "test" ], + file(params.modules_testdata_base_path + 'Meles_meles/assembly/release/mMelMel3.1_paternal_haplotype/GCA_922984935.2.subset.fasta.gz', checkIfExists: true) + ]) + """ + } + } + } + + test("meles meles") { + config "./nextflow.config" + + when { + + params { + samtools_cat_args = "" + samtools_fastq_args = "-F0xB00 -nt" + bwamem2_mem_args = "-5SPp" + samtools_fixmate_args = "-mpu" + samtools_view_args = "-q 0 -F 0x904" + samtools_sort_args = "--write-index -l1" + } + + process { + """ + input[0] = channel.of([ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'Meles_meles/genomic_data/mMelMel3/hic-arima2/35528_2%231_subset.cram', checkIfExists: true), + file(params.modules_testdata_base_path + 'Meles_meles/genomic_data/mMelMel3/hic-arima2/35528_2%231_subset.cram.crai', checkIfExists: true), + ['@RG\\\\tID:35528_2#1\\\\tDT:2020-11-17T00:00:00+0000\\\\tPU:201117_A00948_0206_AHGML2DSXY_2#1\\\\tLB:33637906\\\\tPG:SCS\\\\tSM:SAMEA5962964\\\\tCN:SC\\\\tPL:ILLUMINA\\\\tDS:ERP116890: Sequencing and assembly of genomes from British species for the Darwin Tree of Life Project. This data is part of a pre-publication release. For information on the proper use of pre-publication data shared by the Wellcome Trust Sanger Institute (including details of any publication moratoria), please see http://www.sanger.ac.uk/datasharing/'] + ]) + input[1] = channel.of([ + [ id:'test' ], + file(params.modules_testdata_base_path + 'Meles_meles/assembly/release/mMelMel3.1_paternal_haplotype/GCA_922984935.2.subset.fasta.gz', checkIfExists: true), + ]).join(BWAMEM2_INDEX.out.index).map { meta, ref, idx -> [meta, idx, ref] } + input[2] = channel.of([ + 1, [0, 1], + ]) + """ + } + } + + then { + assertAll( + { assert process.success }, + { + assert snapshot( + bam(process.out.bam.get(0).get(1)).getReadsMD5(), + process.out.findAll { key, val -> key.startsWith("versions")} + ).match() + } + ) + } + + cleanup { + nfcoreUnlink("${launchDir}/library/", "${baseDir}/modules/") + } + + } + + test("meles meles - no readgroup lines") { + config "./nextflow.config" + + when { + + params { + samtools_cat_args = "" + samtools_fastq_args = "-F0xB00 -nt" + bwamem2_mem_args = "-5SPp" + samtools_fixmate_args = "-mpu" + samtools_view_args = "-q 0 -F 0x904" + samtools_sort_args = "--write-index -l1" + } + + process { + """ + input[0] = channel.of([ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'Meles_meles/genomic_data/mMelMel3/hic-arima2/35528_2%231_subset.cram', checkIfExists: true), + file(params.modules_testdata_base_path + 'Meles_meles/genomic_data/mMelMel3/hic-arima2/35528_2%231_subset.cram.crai', checkIfExists: true), + [] + ]) + input[1] = channel.of([ + [ id:'test' ], + file(params.modules_testdata_base_path + 'Meles_meles/assembly/release/mMelMel3.1_paternal_haplotype/GCA_922984935.2.subset.fasta.gz', checkIfExists: true), + ]).join(BWAMEM2_INDEX.out.index).map { meta, ref, idx -> [meta, idx, ref] } + input[2] = channel.of([ + 1, [0, 1], + ]) + """ + } + } + + then { + assertAll( + { assert process.success }, + { + assert snapshot( + bam(process.out.bam.get(0).get(1)).getReadsMD5(), + process.out.findAll { key, val -> key.startsWith("versions")} + ).match() + } + ) + } + + cleanup { + nfcoreUnlink("${launchDir}/library/", "${baseDir}/modules/") + } + + } + + test("meles meles - stub") { + + options "-stub" + + when { + process { + """ + input[0] = channel.of([ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'Meles_meles/genomic_data/mMelMel3/hic-arima2/35528_2%231_subset.cram', checkIfExists: true), + file(params.modules_testdata_base_path + 'Meles_meles/genomic_data/mMelMel3/hic-arima2/35528_2%231_subset.cram.crai', checkIfExists: true), + ['@RG\\\\tID:35528_2#1\\\\tDT:2020-11-17T00:00:00+0000\\\\tPU:201117_A00948_0206_AHGML2DSXY_2#1\\\\tLB:33637906\\\\tPG:SCS\\\\tSM:SAMEA5962964\\\\tCN:SC\\\\tPL:ILLUMINA\\\\tDS:ERP116890: Sequencing and assembly of genomes from British species for the Darwin Tree of Life Project. This data is part of a pre-publication release. For information on the proper use of pre-publication data shared by the Wellcome Trust Sanger Institute (including details of any publication moratoria), please see http://www.sanger.ac.uk/datasharing/'] + ]) + input[1] = channel.of([ + [ id:'test' ], + file(params.modules_testdata_base_path + 'Meles_meles/assembly/release/mMelMel3.1_paternal_haplotype/GCA_922984935.2.subset.fasta.gz', checkIfExists: true), + ]).join(BWAMEM2_INDEX.out.index).map { meta, ref, idx -> [meta, idx, ref] } + input[2] = channel.of([ + 1, [0, 1], + ]) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + cleanup { + nfcoreUnlink("${launchDir}/library/", "${baseDir}/modules/") + } + + } + +} diff --git a/modules/sanger-tol/cramalign/bwamem2alignhic/tests/main.nf.test.snap b/modules/sanger-tol/cramalign/bwamem2alignhic/tests/main.nf.test.snap new file mode 100644 index 00000000..bd8f9597 --- /dev/null +++ b/modules/sanger-tol/cramalign/bwamem2alignhic/tests/main.nf.test.snap @@ -0,0 +1,109 @@ +{ + "meles meles": { + "content": [ + "12bbc3437515580a7dcc3c0d677c225a", + { + "versions_bwamem2": [ + [ + "CRAMALIGN_BWAMEM2ALIGNHIC", + "bwamem2", + "2.2.1" + ] + ], + "versions_samtools": [ + [ + "CRAMALIGN_BWAMEM2ALIGNHIC", + "samtools", + "1.22.1" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.0" + }, + "timestamp": "2026-01-15T14:26:45.992327" + }, + "meles meles - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "35528_2%231_subset.cram.1.test.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + "CRAMALIGN_BWAMEM2ALIGNHIC", + "bwamem2", + "2.2.1" + ] + ], + "2": [ + [ + "CRAMALIGN_BWAMEM2ALIGNHIC", + "samtools", + "1.22.1" + ] + ], + "bam": [ + [ + { + "id": "test" + }, + "35528_2%231_subset.cram.1.test.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions_bwamem2": [ + [ + "CRAMALIGN_BWAMEM2ALIGNHIC", + "bwamem2", + "2.2.1" + ] + ], + "versions_samtools": [ + [ + "CRAMALIGN_BWAMEM2ALIGNHIC", + "samtools", + "1.22.1" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.0" + }, + "timestamp": "2026-01-15T14:27:17.465643" + }, + "meles meles - no readgroup lines": { + "content": [ + "12bbc3437515580a7dcc3c0d677c225a", + { + "versions_bwamem2": [ + [ + "CRAMALIGN_BWAMEM2ALIGNHIC", + "bwamem2", + "2.2.1" + ] + ], + "versions_samtools": [ + [ + "CRAMALIGN_BWAMEM2ALIGNHIC", + "samtools", + "1.22.1" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.0" + }, + "timestamp": "2026-01-15T14:27:04.818428" + } +} \ No newline at end of file diff --git a/modules/sanger-tol/cramalign/bwamem2alignhic/tests/nextflow.config b/modules/sanger-tol/cramalign/bwamem2alignhic/tests/nextflow.config new file mode 100644 index 00000000..52e40cbc --- /dev/null +++ b/modules/sanger-tol/cramalign/bwamem2alignhic/tests/nextflow.config @@ -0,0 +1,14 @@ +process { + withName: BWAMEM2_INDEX { + memory = 15.Gb + } + + withName: CRAMALIGN_BWAMEM2ALIGNHIC { + ext.args1 = { "${params.samtools_cat_args}" } + ext.args2 = { "${params.samtools_fastq_args}" } + ext.args3 = { "${params.bwamem2_mem_args}" } + ext.args4 = { "${params.samtools_fixmate_args}" } + ext.args5 = { "${params.samtools_view_args}" } + ext.args6 = { "${params.samtools_sort_args}" } + } +} diff --git a/modules/sanger-tol/cramalign/gencramchunks/main.nf b/modules/sanger-tol/cramalign/gencramchunks/main.nf new file mode 100644 index 00000000..a5135e6f --- /dev/null +++ b/modules/sanger-tol/cramalign/gencramchunks/main.nf @@ -0,0 +1,28 @@ +process CRAMALIGN_GENCRAMCHUNKS { + label "process_single" + executor "local" + + input: + // Native processes can't take path values as inputs + tuple val(meta), val(cram), val(crai) + val cram_bin_size + + output: + tuple val(meta), val(cram), val(crai), val(chunkn), val(slices), emit: cram_slices + tuple val("${task.process}"), val('cramchunks'), val('1.1.0'), emit: versions_cramchunks, topic: versions + + when: + task.ext.when == null || task.ext.when + + exec: + def n_slices = file(crai).countLines(decompress: true) + def size = cram_bin_size + def n_bins = Math.ceil(n_slices / size).toInteger() + chunkn = (0.. + def lower = chunk * size + def upper = [lower + size, n_slices].min() + + return [ lower, upper - 1 ] + } +} diff --git a/modules/sanger-tol/cramalign/gencramchunks/meta.yml b/modules/sanger-tol/cramalign/gencramchunks/meta.yml new file mode 100644 index 00000000..94d42f1b --- /dev/null +++ b/modules/sanger-tol/cramalign/gencramchunks/meta.yml @@ -0,0 +1,86 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json +name: "CRAMALIGN_GENCRAMCHUNKS" +description: Determine the chunks to split a CRAM file into for alignment +keywords: + - cram + - chunk + - align +tools: + - "cramalign/gencramchunks": + description: "not a real tool!" + homepage: "https://i.need.this.to.lint.org" + documentation: "https://i.need.this.to.lint.org" + tool_dev_url: "https://i.need.this.to.lint.org" + doi: no DOI available + identifier: "" +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1' ]` + - cram: + type: file + description: Sorted BAM/CRAM/SAM file + pattern: "*.cram" + ontologies: + - edam: "http://edamontology.org/format_3462" + - crai: + type: file + description: Cram index file + pattern: "*.crai" + ontologies: + - edam: "http://edamontology.org/format_3475" + - cram_bin_size: + type: integer + description: Number of CRAM slices in each CRAM chunk +output: + cram_slices: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1' ]` + - cram: + type: file + description: Input CRAM file + pattern: "*.cram" + ontologies: + - edam: "http://edamontology.org/format_3462" + - crai: + type: file + description: Input CRAM index + pattern: "*.crai" + ontologies: + - edam: "http://edamontology.org/format_3475" + - chunkn: + type: list + description: List containing CRAM chunk indexes + - slices: + type: list + description: List containing the start and end slices of the CRAM chunk + versions_cramchunks: + - - ${task.process}: + type: string + description: The name of the process + - cramchunks: + type: string + description: The name of the tool + - 1.1.0: + type: string + description: The expression to obtain the version of the tool +topics: + versions: + - - ${task.process}: + type: string + description: The name of the process + - cramchunks: + type: string + description: The name of the tool + - 1.1.0: + type: string + description: The expression to obtain the version of the tool +authors: + - "@prototaxites" +maintainers: + - "@prototaxites" diff --git a/modules/sanger-tol/cramalign/gencramchunks/tests/main.nf.test b/modules/sanger-tol/cramalign/gencramchunks/tests/main.nf.test new file mode 100644 index 00000000..e6b4afcf --- /dev/null +++ b/modules/sanger-tol/cramalign/gencramchunks/tests/main.nf.test @@ -0,0 +1,36 @@ +nextflow_process { + + name "Test Process CRAMALIGN_GENCRAMCHUNKS" + script "../main.nf" + process "CRAMALIGN_GENCRAMCHUNKS" + + tag "modules" + tag "modules_sangertol" + tag "cramalign" + tag "cramalign/gencramchunks" + + test("cram") { + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'Undibacterium_unclassified/genomic_data/baUndUnlc1/hic-arima2/41741_2%237.sub.cram', checkIfExists: true), + file(params.modules_testdata_base_path + 'Undibacterium_unclassified/genomic_data/baUndUnlc1/hic-arima2/41741_2%237.sub.cram.crai', checkIfExists: true) + ] + input[1] = 10 + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} diff --git a/modules/sanger-tol/cramalign/gencramchunks/tests/main.nf.test.snap b/modules/sanger-tol/cramalign/gencramchunks/tests/main.nf.test.snap new file mode 100644 index 00000000..cc9171d3 --- /dev/null +++ b/modules/sanger-tol/cramalign/gencramchunks/tests/main.nf.test.snap @@ -0,0 +1,443 @@ +{ + "cram": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "/test-data/Undibacterium_unclassified/genomic_data/baUndUnlc1/hic-arima2/41741_2%237.sub.cram", + "/test-data/Undibacterium_unclassified/genomic_data/baUndUnlc1/hic-arima2/41741_2%237.sub.cram.crai", + [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 12, + 13, + 14, + 15, + 16, + 17, + 18, + 19, + 20, + 21, + 22, + 23, + 24, + 25, + 26, + 27, + 28, + 29, + 30, + 31, + 32, + 33, + 34, + 35, + 36, + 37, + 38 + ], + [ + [ + 0, + 9 + ], + [ + 10, + 19 + ], + [ + 20, + 29 + ], + [ + 30, + 39 + ], + [ + 40, + 49 + ], + [ + 50, + 59 + ], + [ + 60, + 69 + ], + [ + 70, + 79 + ], + [ + 80, + 89 + ], + [ + 90, + 99 + ], + [ + 100, + 109 + ], + [ + 110, + 119 + ], + [ + 120, + 129 + ], + [ + 130, + 139 + ], + [ + 140, + 149 + ], + [ + 150, + 159 + ], + [ + 160, + 169 + ], + [ + 170, + 179 + ], + [ + 180, + 189 + ], + [ + 190, + 199 + ], + [ + 200, + 209 + ], + [ + 210, + 219 + ], + [ + 220, + 229 + ], + [ + 230, + 239 + ], + [ + 240, + 249 + ], + [ + 250, + 259 + ], + [ + 260, + 269 + ], + [ + 270, + 279 + ], + [ + 280, + 289 + ], + [ + 290, + 299 + ], + [ + 300, + 309 + ], + [ + 310, + 319 + ], + [ + 320, + 329 + ], + [ + 330, + 339 + ], + [ + 340, + 349 + ], + [ + 350, + 359 + ], + [ + 360, + 369 + ], + [ + 370, + 379 + ], + [ + 380, + 380 + ] + ] + ] + ], + "1": [ + [ + "CRAMALIGN_GENCRAMCHUNKS", + "cramchunks", + "1.1.0" + ] + ], + "cram_slices": [ + [ + { + "id": "test" + }, + "/test-data/Undibacterium_unclassified/genomic_data/baUndUnlc1/hic-arima2/41741_2%237.sub.cram", + "/test-data/Undibacterium_unclassified/genomic_data/baUndUnlc1/hic-arima2/41741_2%237.sub.cram.crai", + [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 12, + 13, + 14, + 15, + 16, + 17, + 18, + 19, + 20, + 21, + 22, + 23, + 24, + 25, + 26, + 27, + 28, + 29, + 30, + 31, + 32, + 33, + 34, + 35, + 36, + 37, + 38 + ], + [ + [ + 0, + 9 + ], + [ + 10, + 19 + ], + [ + 20, + 29 + ], + [ + 30, + 39 + ], + [ + 40, + 49 + ], + [ + 50, + 59 + ], + [ + 60, + 69 + ], + [ + 70, + 79 + ], + [ + 80, + 89 + ], + [ + 90, + 99 + ], + [ + 100, + 109 + ], + [ + 110, + 119 + ], + [ + 120, + 129 + ], + [ + 130, + 139 + ], + [ + 140, + 149 + ], + [ + 150, + 159 + ], + [ + 160, + 169 + ], + [ + 170, + 179 + ], + [ + 180, + 189 + ], + [ + 190, + 199 + ], + [ + 200, + 209 + ], + [ + 210, + 219 + ], + [ + 220, + 229 + ], + [ + 230, + 239 + ], + [ + 240, + 249 + ], + [ + 250, + 259 + ], + [ + 260, + 269 + ], + [ + 270, + 279 + ], + [ + 280, + 289 + ], + [ + 290, + 299 + ], + [ + 300, + 309 + ], + [ + 310, + 319 + ], + [ + 320, + 329 + ], + [ + 330, + 339 + ], + [ + 340, + 349 + ], + [ + 350, + 359 + ], + [ + 360, + 369 + ], + [ + 370, + 379 + ], + [ + 380, + 380 + ] + ] + ] + ], + "versions_cramchunks": [ + [ + "CRAMALIGN_GENCRAMCHUNKS", + "cramchunks", + "1.1.0" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.3" + }, + "timestamp": "2026-02-09T16:07:03.896003" + } +} \ No newline at end of file diff --git a/modules/sanger-tol/cramalign/minimap2alignhic/environment.yml b/modules/sanger-tol/cramalign/minimap2alignhic/environment.yml new file mode 100644 index 00000000..39c111cd --- /dev/null +++ b/modules/sanger-tol/cramalign/minimap2alignhic/environment.yml @@ -0,0 +1,11 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/sanger-tol/nf-core-modules/main/modules/environment-schema.json +--- +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::htslib=1.22.1 + - bioconda::minimap2=2.30 + - bioconda::samtools=1.22.1 + - conda-forge::gawk=5.3.1 + - conda-forge::perl=5.32.1 diff --git a/modules/sanger-tol/cramalign/minimap2alignhic/main.nf b/modules/sanger-tol/cramalign/minimap2alignhic/main.nf new file mode 100644 index 00000000..e6a0a253 --- /dev/null +++ b/modules/sanger-tol/cramalign/minimap2alignhic/main.nf @@ -0,0 +1,73 @@ +process CRAMALIGN_MINIMAP2ALIGNHIC { + tag "$meta.id" + label "process_high" + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/65/65858e733832166824cfd05291fc456bdf219b02baa3944c2c92efad86a6ee7f/data' : + 'community.wave.seqera.io/library/htslib_minimap2_samtools_gawk_perl:6729620c63652154' }" + + input: + tuple val(meta), path(cram), path(crai), val(rglines) + tuple val(meta2), path(index), path(reference) + tuple val(chunkn), val(range) + + output: + tuple val(meta), path("*.bam"), emit: bam + tuple val("${task.process}"), val('minimap2'), eval('minimap2 --version | sed "s/minimap2 //g"'), emit: versions_minimap2, topic: versions + tuple val("${task.process}"), val('gawk'), eval('gawk --version | grep -o -E "[0-9]+(\\.[0-9]+)+" | head -n1'), emit: versions_gawk, topic: versions + tuple val("${task.process}"), val('filter_five_end.pl'), eval('echo 1.0'), emit: versions_filterfiveend, topic: versions + tuple val("${task.process}"), val('samtools'), eval('samtools --version | head -1 | sed -e "s/samtools //"'), emit: versions_samtools, topic: versions + + when: + task.ext.when == null || task.ext.when + + script: + // WARNING: This module includes the filter_five_end.pl script as a module binary in + // ${moduleDir}/resources/usr/bin/filter_five_end.pl. To use this module, you will + // either have to copy this file to ${projectDir}/bin or set the option + // nextflow.enable.moduleBinaries = true + // in your nextflow.config file. + def args1 = task.ext.args1 ?: '' + def args2 = task.ext.args2 ?: '-t' // copy RG, BC and QT tags to the FASTQ header line + def args3 = task.ext.args3 ?: '' + def args4 = task.ext.args4 ?: '' + def args5 = task.ext.args5 ?: '' + def args6 = task.ext.args6 ?: '' + def prefix = task.ext.prefix ?: "${cram}.${chunkn}.${meta.id}" + // Prepare read group arguments if rglines are found, else, empty string + def rg_arg = rglines ? '-y ' + rglines.collect { line -> + // Add SM when not present to avoid errors from downstream tool (e.g. variant callers) + def l = line.contains("SM:") ? line + : meta.sample ? "${line}\tSM:${meta.sample}" + : "${line}\tSM:${meta.id}" + "-R '${l.replaceAll("\t", "\\\\t")}'" + }.join(' ') + : '' + """ + samtools cat ${args1} -r "#:${range[0]}-${range[1]}" ${cram} |\\ + samtools fastq ${args2} - |\\ + minimap2 -t${task.cpus} ${args3} ${index} ${rg_arg} - |\\ + gawk -F'\t' ' + BEGIN { OFS="\\t" } + \$1 ~ /^\\@/ { print \$0 } + \$1 !~ /^\\@/ && and(\$2, 64) > 0 { print 1 \$0 } + \$1 !~ /^\\@/ && and(\$2, 64) == 0 { print 2 \$0 } + ' |\\ + filter_five_end.pl |\\ + gawk ' + BEGIN { FS = OFS="\\t" } + \$1 ~ /^\\@/ { print \$0 } + \$1 !~ /^\\@/ { \$2 = and(\$2, compl(2048)); print substr(\$0, 2) } + ' |\\ + samtools fixmate ${args4} - - |\\ + samtools view -h ${args5} |\\ + samtools sort ${args6} -@${task.cpus} -T ${prefix}_tmp -o ${prefix}.bam - + """ + + stub: + def prefix = task.ext.prefix ?: "${cram}.${chunkn}.${meta.id}" + """ + touch ${prefix}.bam + """ +} diff --git a/modules/sanger-tol/cramalign/minimap2alignhic/meta.yml b/modules/sanger-tol/cramalign/minimap2alignhic/meta.yml new file mode 100644 index 00000000..3841820b --- /dev/null +++ b/modules/sanger-tol/cramalign/minimap2alignhic/meta.yml @@ -0,0 +1,173 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/sanger-tol/nf-core-modules/main/modules/meta-schema.json +name: "cramalign_minimap2alignhic" +description: | + Aligns a subset of Hi-C reads from a CRAM file to a reference genome using minimap2, + pipes the resulting alignments through filterfiveend.pl (included), samtools fixmate and samtools sort, outputting + an alignment in BAM format. +keywords: + - align + - cram + - chunk +tools: + - samtools: + description: | + SAMtools is a set of utilities for interacting with and post-processing + short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li. + These files are generated as output by short read aligners like BWA. + homepage: http://www.htslib.org/ + documentation: http://www.htslib.org/doc/samtools.html + doi: 10.1093/bioinformatics/btp352 + licence: ["MIT"] + identifier: biotools:samtools + - minimap2: + description: | + A versatile pairwise aligner for genomic and spliced nucleotide sequences. + homepage: https://github.com/lh3/minimap2 + documentation: https://github.com/lh3/minimap2#uguide + licence: ["MIT"] + identifier: "" + - filter5end.pl: + description: | + Filter alignments + homepage: https://github.com/ArimaGenomics/mapping_pipeline + documentation: https://github.com/ArimaGenomics/mapping_pipeline + licence: ["MIT"] + identifier: "" +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1' ]` + - cram: + type: file + description: CRAM file containing sequences to align + pattern: "*.cram" + ontologies: + - edam: "http://edamontology.org/format_3462" # CRAM + - crai: + type: file + description: CRAM index file + pattern: "*.crai" + ontologies: + - edam: "http://edamontology.org/format_3475" # TSV + - rglines: + type: list + description: A list of strings, each a valid SAM-format @RG header line, + to pass to the aligner + - - meta2: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1' ]` + - index: + type: file + description: minimap2 index of the reference to be aligned to + ontologies: [] + - reference: + type: file + description: FASTA file containing reference for alignment + pattern: "*.fasta(.gz)?" + ontologies: + - edam: http://edamontology.org/format_1929 # FASTA + - - chunkn: + type: integer + description: Integer index of CRAM chunk to map + - range: + type: list + description: Start and end indices defining CRAM slices to align +output: + bam: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1' ]` + - "*.bam": + type: file + description: BAM file of mapped Hi-C sequences + pattern: "*.bam" + ontologies: + - edam: "http://edamontology.org/format_2572" # BAM + versions_minimap2: + - - ${task.process}: + type: string + description: Name of the process + - minimap2: + type: string + description: The name of the tool + - minimap2 --version | sed "s/minimap2 //g": + type: eval + description: The expression to obtain the version of the tool + versions_gawk: + - - ${task.process}: + type: string + description: Name of the process + - gawk: + type: string + description: The name of the tool + - gawk --version | grep -o -E "[0-9]+(\.[0-9]+)+" | head -n1: + type: eval + description: The expression to obtain the version of the tool + versions_filterfiveend: + - - ${task.process}: + type: string + description: Name of the process + - filter_five_end.pl: + type: string + description: The name of the tool + - echo 1.0: + type: eval + description: The expression to obtain the version of the tool + versions_samtools: + - - ${task.process}: + type: string + description: Name of the process + - samtools: + type: string + description: Name of the tool + - samtools --version | head -1 | sed -e "s/samtools //": + type: eval + description: The expression to obtain the version of the tool +topics: + versions: + - - ${task.process}: + type: string + description: Name of the process + - minimap2: + type: string + description: The name of the tool + - minimap2 --version | sed "s/minimap2 //g": + type: eval + description: The expression to obtain the version of the tool + - - ${task.process}: + type: string + description: Name of the process + - gawk: + type: string + description: The name of the tool + - gawk --version | grep -o -E "[0-9]+(\.[0-9]+)+" | head -n1: + type: eval + description: The expression to obtain the version of the tool + - - ${task.process}: + type: string + description: Name of the process + - filter_five_end.pl: + type: string + description: The name of the tool + - echo 1.0: + type: eval + description: The expression to obtain the version of the tool + - - ${task.process}: + type: string + description: Name of the process + - samtools: + type: string + description: Name of the tool + - samtools --version | head -1 | sed -e "s/samtools //": + type: eval + description: The expression to obtain the version of the tool +authors: + - "@prototaxites" +maintainers: + - "@prototaxites" diff --git a/bin/filter_five_end.pl b/modules/sanger-tol/cramalign/minimap2alignhic/resources/usr/bin/filter_five_end.pl similarity index 76% rename from bin/filter_five_end.pl rename to modules/sanger-tol/cramalign/minimap2alignhic/resources/usr/bin/filter_five_end.pl index 41ca9ff3..4925d260 100755 --- a/bin/filter_five_end.pl +++ b/modules/sanger-tol/cramalign/minimap2alignhic/resources/usr/bin/filter_five_end.pl @@ -1,4 +1,27 @@ -#! /usr/bin/env perl +#!/usr/bin/perl + +# MIT License +# +# Copyright (c) 2017 Arima Genomics, Inc. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + use strict; use warnings; diff --git a/modules/sanger-tol/cramalign/minimap2alignhic/tests/main.nf.test b/modules/sanger-tol/cramalign/minimap2alignhic/tests/main.nf.test new file mode 100644 index 00000000..5c03ac40 --- /dev/null +++ b/modules/sanger-tol/cramalign/minimap2alignhic/tests/main.nf.test @@ -0,0 +1,171 @@ + +nextflow_process { + + name "Test Process CRAMALIGN_MINIMAP2ALIGNHIC" + script "../main.nf" + process "CRAMALIGN_MINIMAP2ALIGNHIC" + + tag "modules" + tag "modules_sangertol" + tag "minimap2/index" + tag "cramalign" + tag "cramalign/minimap2alignhic" + tag "nf-core/minimap2/index" + + setup { + nfcoreInitialise("${launchDir}/library/") + nfcoreInstall("${launchDir}/library/", ["minimap2/index"]) + nfcoreLink("${launchDir}/library/", "${baseDir}/modules") + + run("MINIMAP2_INDEX") { + script "../../../../nf-core/minimap2/index/main.nf" + process { + """ + input[0] = Channel.of([ + [ id: "test" ], + file(params.modules_testdata_base_path + 'Meles_meles/assembly/release/mMelMel3.1_paternal_haplotype/GCA_922984935.2.subset.fasta.gz', checkIfExists: true) + ]) + """ + } + } + } + + test("meles meles") { + config "./nextflow.config" + + when { + + params { + samtools_cat_args = "" + samtools_fastq_args = "-F0xB00 -nt" + minimap2_args = "-ax sr" + samtools_fixmate_args = "-mpu" + samtools_view_args = "-q 0 -F 0x904" + samtools_sort_args = "--write-index -l1" + } + + process { + """ + input[0] = Channel.of([ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'Meles_meles/genomic_data/mMelMel3/hic-arima2/35528_2%231_subset.cram', checkIfExists: true), + file(params.modules_testdata_base_path + 'Meles_meles/genomic_data/mMelMel3/hic-arima2/35528_2%231_subset.cram.crai', checkIfExists: true), + ['@RG\\\\tID:35528_2#1\\\\tDT:2020-11-17T00:00:00+0000\\\\tPU:201117_A00948_0206_AHGML2DSXY_2#1\\\\tLB:33637906\\\\tPG:SCS\\\\tSM:SAMEA5962964\\\\tCN:SC\\\\tPL:ILLUMINA\\\\tDS:ERP116890: Sequencing and assembly of genomes from British species for the Darwin Tree of Life Project. This data is part of a pre-publication release. For information on the proper use of pre-publication data shared by the Wellcome Trust Sanger Institute (including details of any publication moratoria), please see http://www.sanger.ac.uk/datasharing/'] + ]) + input[1] = channel.of([ + [ id: "test" ], + file(params.modules_testdata_base_path + 'Meles_meles/assembly/release/mMelMel3.1_paternal_haplotype/GCA_922984935.2.subset.fasta.gz', checkIfExists: true) + ]).join(MINIMAP2_INDEX.out.index).map { meta, ref, idx -> [meta, idx, ref] } + input[2] = channel.of([ + 1, [0, 1] + ]) + """ + } + } + + then { + assertAll( + { assert process.success }, + { + assert snapshot( + bam(process.out.bam.get(0).get(1)).getReadsMD5(), + process.out.findAll { key, val -> key.startsWith("versions")} + ).match() + } + ) + } + + cleanup { + nfcoreUnlink("${launchDir}/library/", "${baseDir}/modules") + } + + } + + test("meles meles - no readgroup lines") { + config "./nextflow.config" + + when { + + params { + samtools_cat_args = "" + samtools_fastq_args = "-F0xB00 -nt" + minimap2_args = "-ax sr" + samtools_fixmate_args = "-mpu" + samtools_view_args = "-q 0 -F 0x904" + samtools_sort_args = "--write-index -l1" + } + + process { + """ + input[0] = Channel.of([ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'Meles_meles/genomic_data/mMelMel3/hic-arima2/35528_2%231_subset.cram', checkIfExists: true), + file(params.modules_testdata_base_path + 'Meles_meles/genomic_data/mMelMel3/hic-arima2/35528_2%231_subset.cram.crai', checkIfExists: true), + [] + ]) + input[1] = channel.of([ + [ id: "test" ], + file(params.modules_testdata_base_path + 'Meles_meles/assembly/release/mMelMel3.1_paternal_haplotype/GCA_922984935.2.subset.fasta.gz', checkIfExists: true) + ]).join(MINIMAP2_INDEX.out.index).map { meta, ref, idx -> [meta, idx, ref] } + input[2] = channel.of([ + 1, [0, 1] + ]) + """ + } + } + + then { + assertAll( + { assert process.success }, + { + assert snapshot( + bam(process.out.bam.get(0).get(1)).getReadsMD5(), + process.out.findAll { key, val -> key.startsWith("versions")} + ).match() + } + ) + } + + cleanup { + nfcoreUnlink("${launchDir}/library/", "${baseDir}/modules") + } + + } + + test("meles meles - stub") { + + options "-stub" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'Meles_meles/genomic_data/mMelMel3/hic-arima2/35528_2%231_subset.cram', checkIfExists: true), + file(params.modules_testdata_base_path + 'Meles_meles/genomic_data/mMelMel3/hic-arima2/35528_2%231_subset.cram.crai', checkIfExists: true), + ['@RG\\\\tID:35528_2#1\\\\tDT:2020-11-17T00:00:00+0000\\\\tPU:201117_A00948_0206_AHGML2DSXY_2#1\\\\tLB:33637906\\\\tPG:SCS\\\\tSM:SAMEA5962964\\\\tCN:SC\\\\tPL:ILLUMINA\\\\tDS:ERP116890: Sequencing and assembly of genomes from British species for the Darwin Tree of Life Project. This data is part of a pre-publication release. For information on the proper use of pre-publication data shared by the Wellcome Trust Sanger Institute (including details of any publication moratoria), please see http://www.sanger.ac.uk/datasharing/'] + ]) + input[1] = channel.of([ + [ id: "test" ], + file(params.modules_testdata_base_path + 'Meles_meles/assembly/release/mMelMel3.1_paternal_haplotype/GCA_922984935.2.subset.fasta.gz', checkIfExists: true) + ]).join(MINIMAP2_INDEX.out.index).map { meta, ref, idx -> [meta, idx, ref] } + input[2] = channel.of([ + 1, [0, 1] + ]) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + cleanup { + nfcoreUnlink("${launchDir}/library/", "${baseDir}/modules") + } + + } +} diff --git a/modules/sanger-tol/cramalign/minimap2alignhic/tests/main.nf.test.snap b/modules/sanger-tol/cramalign/minimap2alignhic/tests/main.nf.test.snap new file mode 100644 index 00000000..6a68e7d1 --- /dev/null +++ b/modules/sanger-tol/cramalign/minimap2alignhic/tests/main.nf.test.snap @@ -0,0 +1,165 @@ +{ + "meles meles": { + "content": [ + "3196a1997c55d543d773a1af0595fa6d", + { + "versions_filterfiveend": [ + [ + "CRAMALIGN_MINIMAP2ALIGNHIC", + "filter_five_end.pl", + "1.0" + ] + ], + "versions_gawk": [ + [ + "CRAMALIGN_MINIMAP2ALIGNHIC", + "gawk", + "5.3.1" + ] + ], + "versions_minimap2": [ + [ + "CRAMALIGN_MINIMAP2ALIGNHIC", + "minimap2", + "2.30-r1287" + ] + ], + "versions_samtools": [ + [ + "CRAMALIGN_MINIMAP2ALIGNHIC", + "samtools", + "1.22.1" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.0" + }, + "timestamp": "2026-01-15T15:22:49.448607" + }, + "meles meles - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "35528_2%231_subset.cram.1.test.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + "CRAMALIGN_MINIMAP2ALIGNHIC", + "minimap2", + "2.30-r1287" + ] + ], + "2": [ + [ + "CRAMALIGN_MINIMAP2ALIGNHIC", + "gawk", + "5.3.1" + ] + ], + "3": [ + [ + "CRAMALIGN_MINIMAP2ALIGNHIC", + "filter_five_end.pl", + "1.0" + ] + ], + "4": [ + [ + "CRAMALIGN_MINIMAP2ALIGNHIC", + "samtools", + "1.22.1" + ] + ], + "bam": [ + [ + { + "id": "test" + }, + "35528_2%231_subset.cram.1.test.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions_filterfiveend": [ + [ + "CRAMALIGN_MINIMAP2ALIGNHIC", + "filter_five_end.pl", + "1.0" + ] + ], + "versions_gawk": [ + [ + "CRAMALIGN_MINIMAP2ALIGNHIC", + "gawk", + "5.3.1" + ] + ], + "versions_minimap2": [ + [ + "CRAMALIGN_MINIMAP2ALIGNHIC", + "minimap2", + "2.30-r1287" + ] + ], + "versions_samtools": [ + [ + "CRAMALIGN_MINIMAP2ALIGNHIC", + "samtools", + "1.22.1" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.0" + }, + "timestamp": "2026-01-15T14:47:03.804302" + }, + "meles meles - no readgroup lines": { + "content": [ + "3196a1997c55d543d773a1af0595fa6d", + { + "versions_filterfiveend": [ + [ + "CRAMALIGN_MINIMAP2ALIGNHIC", + "filter_five_end.pl", + "1.0" + ] + ], + "versions_gawk": [ + [ + "CRAMALIGN_MINIMAP2ALIGNHIC", + "gawk", + "5.3.1" + ] + ], + "versions_minimap2": [ + [ + "CRAMALIGN_MINIMAP2ALIGNHIC", + "minimap2", + "2.30-r1287" + ] + ], + "versions_samtools": [ + [ + "CRAMALIGN_MINIMAP2ALIGNHIC", + "samtools", + "1.22.1" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.0" + }, + "timestamp": "2026-01-15T15:23:01.032824" + } +} \ No newline at end of file diff --git a/modules/sanger-tol/cramalign/minimap2alignhic/tests/nextflow.config b/modules/sanger-tol/cramalign/minimap2alignhic/tests/nextflow.config new file mode 100644 index 00000000..bac4c654 --- /dev/null +++ b/modules/sanger-tol/cramalign/minimap2alignhic/tests/nextflow.config @@ -0,0 +1,12 @@ +nextflow.enable.moduleBinaries = true + +process { + withName: CRAMALIGN_MINIMAP2ALIGNHIC { + ext.args1 = { "${params.samtools_cat_args}" } + ext.args2 = { "${params.samtools_fastq_args}" } + ext.args3 = { "${params.minimap2_args}" } + ext.args4 = { "${params.samtools_fixmate_args}" } + ext.args5 = { "${params.samtools_view_args}" } + ext.args6 = { "${params.samtools_sort_args}" } + } +} diff --git a/modules/sanger-tol/samtools/mergedup/environment.yml b/modules/sanger-tol/samtools/mergedup/environment.yml new file mode 100644 index 00000000..ad6aea73 --- /dev/null +++ b/modules/sanger-tol/samtools/mergedup/environment.yml @@ -0,0 +1,8 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/sanger-tol/nf-core-modules/main/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::htslib=1.23 + - bioconda::samtools=1.23 diff --git a/modules/sanger-tol/samtools/mergedup/main.nf b/modules/sanger-tol/samtools/mergedup/main.nf new file mode 100644 index 00000000..e40f162e --- /dev/null +++ b/modules/sanger-tol/samtools/mergedup/main.nf @@ -0,0 +1,58 @@ +process SAMTOOLS_MERGEDUP { + tag "${meta.id}" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/e5/e5598451c6d348cce36191bafe1911ad71e440137d7a329da946f2b0dbb0e7f3/data' : + 'community.wave.seqera.io/library/htslib_samtools:1.23--cde2c40a51d6f752' }" + + input: + tuple val(meta) , path(input) + tuple val(meta2), path(fasta), path(fai), path(gzi) + + output: + tuple val(meta), path("*.bam") , emit: bam, optional: true + tuple val(meta), path("*.cram") , emit: cram, optional: true + tuple val(meta), path("*.csi") , emit: csi, optional: true + tuple val(meta), path("*.crai") , emit: crai, optional: true + tuple val(meta), path("*.metrics") , emit: metrics + tuple val("${task.process}"), val('samtools'), eval('samtools version | sed "1!d;s/.* //"'), emit: versions_samtools, topic: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def args2 = task.ext.args2 ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def reference = fasta ? "--reference ${fasta}" : "" + def extension = args2.contains("--output-fmt sam") ? "sam" : + args2.contains("--output-fmt cram") ? "cram" : + "bam" + """ + samtools merge \\ + ${args} \\ + - \\ + ${input} |\\ + samtools markdup \\ + -T ${prefix} \\ + -f ${prefix}.metrics \\ + --threads ${task.cpus} \\ + $reference \\ + $args2 \\ + - \\ + ${prefix}.${extension} + """ + + stub: + def args2 = task.ext.args2 ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def extension = args2.contains("--output-fmt sam") ? "sam" : + args2.contains("--output-fmt cram") ? "cram" : + "bam" + """ + touch ${prefix}.${extension} + touch ${prefix}.metrics + """ +} diff --git a/modules/sanger-tol/samtools/mergedup/meta.yml b/modules/sanger-tol/samtools/mergedup/meta.yml new file mode 100644 index 00000000..35ad4e87 --- /dev/null +++ b/modules/sanger-tol/samtools/mergedup/meta.yml @@ -0,0 +1,150 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json +name: "samtools_mergedup" +description: Merge multiple fixmated, sorted SAM/BAM/CRAM and then mark + duplicates +keywords: + - merge + - markduplicates + - genomics + - multi-tool + - bam + - sam + - cram +tools: + - samtools_merge: + description: | + SAMtools is a set of utilities for interacting with and post-processing + short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li. + These files are generated as output by short read aligners like BWA. + homepage: http://www.htslib.org/ + documentation: http://www.htslib.org/doc/samtools.html + doi: 10.1093/bioinformatics/btp352 + licence: + - "MIT" + identifier: biotools:samtools + - samtools_markdup: + description: | + SAMtools is a set of utilities for interacting with and post-processing + short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li. + These files are generated as output by short read aligners like BWA. + homepage: http://www.htslib.org/ + documentation: http://www.htslib.org/doc/samtools.html + doi: 10.1093/bioinformatics/btp352 + licence: + - "MIT" + args_id: "$args2" + identifier: biotools:samtools +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - input: + type: file + description: BAM/CRAM/SAM files + pattern: "*.{bam,cram,sam}" + ontologies: [] + - - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] + - fasta: + type: file + description: Reference genome file + pattern: "*.{fasta,fa,fna}" + ontologies: [] + - fai: + type: file + description: Index of the reference file the CRAM was created with + (optional) + pattern: "*.fai" + ontologies: [] + - gzi: + type: file + description: Index of the compressed reference file the CRAM was created + with (optional) + pattern: "*.gzi" + ontologies: [] +output: + bam: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.bam": + type: file + description: Sorted and duplicate marked BAM file + pattern: "*.bam" + ontologies: [] + cram: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.cram": + type: file + description: Sorted and duplicate marked CRAM file + pattern: "*.cram" + ontologies: [] + csi: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.csi": + type: file + description: Sorted and duplicate marked BAM index file + pattern: "*.csi" + ontologies: [] + crai: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.crai": + type: file + description: Sorted and duplicate marked CRAM index file + pattern: "*.crai" + ontologies: [] + metrics: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.metrics": + type: file + description: Duplicate metrics file + pattern: "*.metrics" + ontologies: [] + versions_samtools: + - - ${task.process}: + type: string + description: The name of the process + - samtools: + type: string + description: The name of the tool + - samtools version | sed "1!d;s/.* //": + type: eval + description: The expression to obtain the version of the tool +topics: + versions: + - - ${task.process}: + type: string + description: The name of the process + - samtools: + type: string + description: The name of the tool + - samtools version | sed "1!d;s/.* //": + type: eval + description: The expression to obtain the version of the tool +authors: + - "@prototaxites" +maintainers: + - "@prototaxites" diff --git a/modules/sanger-tol/samtools/mergedup/tests/main.nf.test b/modules/sanger-tol/samtools/mergedup/tests/main.nf.test new file mode 100644 index 00000000..9061aef3 --- /dev/null +++ b/modules/sanger-tol/samtools/mergedup/tests/main.nf.test @@ -0,0 +1,91 @@ +nextflow_process { + + name "Test Process SAMTOOLS_MERGEDUP" + script "../main.nf" + process "SAMTOOLS_MERGEDUP" + + tag "modules" + tag "modules_sangertol" + tag "samtools" + tag "samtools/mergedup" + + config "./nextflow.config" + + test("sarscov2 - bam") { + + when { + + params { + samtools_markdup_args = [ + "-s", // print some stats + "--json", // output stats in json format for MultiQC + "-d 2500", // The optical duplicate distance + "--barcode-name", // Use the UMI/barcode embedded in the read name (eigth colon delimited part). + "--write-index", // Write csi/crai index + ].join(" ").trim() + } + + process { + """ + input[0] = Channel.of([ + [ id:'test' ], // meta map + [ + file(params.modules_testdata_base_path + 'Undibacterium_unclassified/analysis/hic_mapping/41741_2.7.sub.cram.baUndUnlc1_hic_phased_hap1.2.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'Undibacterium_unclassified/analysis/hic_mapping/41741_2.7.sub.cram.baUndUnlc1_hic_phased_hap1.3.bam', checkIfExists: true), + ] + ]) + input[1] = Channel.of([[:],[],[],[]]) + """ + } + + } + + then { + assertAll( + { assert process.success }, + { + assert snapshot( + bam(process.out.bam[0][1]).getReadsMD5(), + file(process.out.csi[0][1]).getName(), + process.out.metrics, + process.out.findAll { key, val -> key.startsWith("versions") } + ).match() + } + ) + } + } + + test("sarscov2 - bam - stub") { + + options "-stub" + + when { + + params { + samtools_markdup_args = "" + } + + process { + """ + input[0] = Channel.of([ + [ id:'test' ], // meta map + [ + file(params.modules_testdata_base_path + 'Undibacterium_unclassified/analysis/hic_mapping/41741_2.7.sub.cram.baUndUnlc1_hic_phased_hap1.2.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'Undibacterium_unclassified/analysis/hic_mapping/41741_2.7.sub.cram.baUndUnlc1_hic_phased_hap1.3.bam', checkIfExists: true), + ] + ]) + input[1] = Channel.of([[:],[],[],[]]) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} diff --git a/modules/sanger-tol/samtools/mergedup/tests/main.nf.test.snap b/modules/sanger-tol/samtools/mergedup/tests/main.nf.test.snap new file mode 100644 index 00000000..e32b1bb4 --- /dev/null +++ b/modules/sanger-tol/samtools/mergedup/tests/main.nf.test.snap @@ -0,0 +1,105 @@ +{ + "sarscov2 - bam - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + + ], + "2": [ + + ], + "3": [ + + ], + "4": [ + [ + { + "id": "test" + }, + "test.metrics:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "5": [ + [ + "SAMTOOLS_MERGEDUP", + "samtools", + "1.23" + ] + ], + "bam": [ + [ + { + "id": "test" + }, + "test.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "crai": [ + + ], + "cram": [ + + ], + "csi": [ + + ], + "metrics": [ + [ + { + "id": "test" + }, + "test.metrics:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions_samtools": [ + [ + "SAMTOOLS_MERGEDUP", + "samtools", + "1.23" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.3" + }, + "timestamp": "2026-02-09T15:16:40.761606" + }, + "sarscov2 - bam": { + "content": [ + "a709d2c1d54fe7640c21b53ef0fbfc4c", + "test.bam.csi", + [ + [ + { + "id": "test" + }, + "test.metrics:md5,7969aeed215660c201ab04e527fee918" + ] + ], + { + "versions_samtools": [ + [ + "SAMTOOLS_MERGEDUP", + "samtools", + "1.23" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.3" + }, + "timestamp": "2026-02-09T15:16:36.500954" + } +} \ No newline at end of file diff --git a/modules/sanger-tol/samtools/mergedup/tests/nextflow.config b/modules/sanger-tol/samtools/mergedup/tests/nextflow.config new file mode 100644 index 00000000..5591f7f9 --- /dev/null +++ b/modules/sanger-tol/samtools/mergedup/tests/nextflow.config @@ -0,0 +1,5 @@ +process { + withName: "SAMTOOLS_MERGEDUP" { + ext.args2 = { params.samtools_markdup_args } + } +} diff --git a/modules/local/gawk_split_directions/environment.yml b/modules/sanger-tol/telomere/extract/environment.yml similarity index 100% rename from modules/local/gawk_split_directions/environment.yml rename to modules/sanger-tol/telomere/extract/environment.yml diff --git a/modules/sanger-tol/telomere/extract/main.nf b/modules/sanger-tol/telomere/extract/main.nf new file mode 100644 index 00000000..47fc6d7d --- /dev/null +++ b/modules/sanger-tol/telomere/extract/main.nf @@ -0,0 +1,38 @@ +process TELOMERE_EXTRACT { + tag "${meta.id}" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/gawk:5.3.0' : + 'biocontainers/gawk:5.3.0' }" + + input: + tuple val(meta), path(telomere) + + output: + tuple val(meta), path("*.bed") , emit: bed + tuple val(meta), path("*.bedgraph") , emit: bedgraph + tuple val("${task.process}"), val('telomere_extract'), eval("awk -Wversion | sed '1!d; s/.*Awk //; s/,.*//'"), topic: versions, emit: versions_telomereextract + + when: + task.ext.when == null || task.ext.when + + script: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + awk 'BEGIN { OFS = "\t" } + { + gsub(">", "") + print \$2, \$4, \$5 >> "${prefix}_telomere.bed" + print \$2, \$4, \$5, (((\$5-\$4)<0)?-(\$5-\$4):(\$5-\$4)) >> "${prefix}_telomere.bedgraph" + }' ${telomere} + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}_telomere.bed + touch ${prefix}_telomere.bedgraph + """ +} diff --git a/modules/sanger-tol/telomere/extract/meta.yml b/modules/sanger-tol/telomere/extract/meta.yml new file mode 100644 index 00000000..266ba39d --- /dev/null +++ b/modules/sanger-tol/telomere/extract/meta.yml @@ -0,0 +1,82 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json +name: "telomere_extract" +description: | + Extract telomeric sites from telomere file into bed and bedgraph outputs +keywords: + - telomere + - extract + - awk + - bed + - bedgraph +tools: + - gawk: + description: "GNU awk" + homepage: "https://www.gnu.org/software/gawk/" + documentation: "https://www.gnu.org/software/gawk/manual/" + tool_dev_url: "https://www.gnu.org/prep/ftp.html" + licence: ["GPL v3"] + identifier: "" +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1' ]` + - telomere: + type: file + description: | + Telomere file containing telomeric sites. + pattern: "*.telomere" + ontologies: [] + +output: + bed: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1' ]` + - "*.bed": + type: file + description: | + Telomere file formatted to bed + ontologies: + - edam: "http://edamontology.org/format_3003" # BED + + bedgraph: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1' ]` + - "*.bedgraph": + type: file + description: | + Telomere file formatted to bedgraph + ontologies: + - edam: "http://edamontology.org/format_3583" # BEDGRAPH + versions_telomereextract: + - - ${task.process}: + type: string + description: The name of the process + - telomere_extract: + type: string + description: The name of the module + - awk -Wversion | sed '1!d; s/.*Awk //; s/,.*//': + type: eval + description: The expression to obtain the version of the tool +topics: + versions: + - - ${task.process}: + type: string + description: The name of the process + - telomere_extract: + type: string + description: The name of the module + - awk -Wversion | sed '1!d; s/.*Awk //; s/,.*//': + type: eval + description: The expression to obtain the version of the tool +authors: + - "@DLBPointon" +maintainers: + - "@DLBPointon" diff --git a/modules/sanger-tol/telomere/extract/tests/main.nf.test b/modules/sanger-tol/telomere/extract/tests/main.nf.test new file mode 100644 index 00000000..d4797b51 --- /dev/null +++ b/modules/sanger-tol/telomere/extract/tests/main.nf.test @@ -0,0 +1,55 @@ +nextflow_process { + + name "Test Process TELOMERE_EXTRACT" + script "../main.nf" + process "TELOMERE_EXTRACT" + + tag "modules" + tag "modules_sangertol" + tag "telomere" + tag "telomere/extract" + + test("Cloeon_dipterum - telomere") { + + when { + process { + """ + input[0] = [ + [ id: 'Cloeon_dipterum' ], // meta map + file(params.modules_testdata_base_path + 'resources/modules/telomere/idFanCani4/idFanCani4.windows', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("Cloeon_dipterum - telomere - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id: 'Cloeon_dipterum' ], // meta map + file(params.modules_testdata_base_path + 'resources/modules/telomere/idFanCani4/idFanCani4.windows', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } +} diff --git a/modules/sanger-tol/telomere/extract/tests/main.nf.test.snap b/modules/sanger-tol/telomere/extract/tests/main.nf.test.snap new file mode 100644 index 00000000..21b61b0f --- /dev/null +++ b/modules/sanger-tol/telomere/extract/tests/main.nf.test.snap @@ -0,0 +1,116 @@ +{ + "Cloeon_dipterum - telomere": { + "content": [ + { + "0": [ + [ + { + "id": "Cloeon_dipterum" + }, + "Cloeon_dipterum_telomere.bed:md5,dcc1474c5421f788ee642a9caa6f8224" + ] + ], + "1": [ + [ + { + "id": "Cloeon_dipterum" + }, + "Cloeon_dipterum_telomere.bedgraph:md5,b62d187a03802d145039b88353aa3a2a" + ] + ], + "2": [ + [ + "TELOMERE_EXTRACT", + "telomere_extract", + "5.3.0" + ] + ], + "bed": [ + [ + { + "id": "Cloeon_dipterum" + }, + "Cloeon_dipterum_telomere.bed:md5,dcc1474c5421f788ee642a9caa6f8224" + ] + ], + "bedgraph": [ + [ + { + "id": "Cloeon_dipterum" + }, + "Cloeon_dipterum_telomere.bedgraph:md5,b62d187a03802d145039b88353aa3a2a" + ] + ], + "versions_telomereextract": [ + [ + "TELOMERE_EXTRACT", + "telomere_extract", + "5.3.0" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-02-25T11:56:21.812753542" + }, + "Cloeon_dipterum - telomere - stub": { + "content": [ + { + "0": [ + [ + { + "id": "Cloeon_dipterum" + }, + "Cloeon_dipterum_telomere.bed:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "Cloeon_dipterum" + }, + "Cloeon_dipterum_telomere.bedgraph:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + [ + "TELOMERE_EXTRACT", + "telomere_extract", + "5.3.0" + ] + ], + "bed": [ + [ + { + "id": "Cloeon_dipterum" + }, + "Cloeon_dipterum_telomere.bed:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "bedgraph": [ + [ + { + "id": "Cloeon_dipterum" + }, + "Cloeon_dipterum_telomere.bedgraph:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions_telomereextract": [ + [ + "TELOMERE_EXTRACT", + "telomere_extract", + "5.3.0" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-02-25T11:56:27.303144813" + } +} \ No newline at end of file diff --git a/modules/sanger-tol/telomere/regions/main.nf b/modules/sanger-tol/telomere/regions/main.nf new file mode 100644 index 00000000..043d7e67 --- /dev/null +++ b/modules/sanger-tol/telomere/regions/main.nf @@ -0,0 +1,40 @@ +process TELOMERE_REGIONS { + tag "${meta.id}" + label 'process_low' + + container 'sanger-tol/telomere:0.0.1-c1' + + input: + tuple val(meta), path(reference) + val telomereseq + + output: + tuple val( meta ), path( "*.telomere" ) , emit: telomere + tuple val("${task.process}"), val('find_telomere_regions'), val("1.0.0"), topic: versions, emit: versions_telomereregions + + when: + task.ext.when == null || task.ext.when + + script: + + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + error "TELOMERE_REGIONS module does not support Conda. Please use Docker / Singularity instead." + } + + def prefix = task.ext.prefix ?: "${meta.id}" + """ + find_telomere $reference $telomereseq > ${prefix}.telomere + """ + + stub: + + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + error "TELOMERE_REGIONS module does not support Conda. Please use Docker / Singularity instead." + } + + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.telomere + """ + +} diff --git a/modules/sanger-tol/telomere/regions/meta.yml b/modules/sanger-tol/telomere/regions/meta.yml new file mode 100644 index 00000000..69ca9f75 --- /dev/null +++ b/modules/sanger-tol/telomere/regions/meta.yml @@ -0,0 +1,73 @@ +name: "telomere_regions" +description: | + Identify regions of the assembly matching a given sequence +keywords: + - telomere + - regions + - motif +tools: + - find_telomere: + description: | + Scripts to identify and output telomeric motif locations + homepage: https://github.com/VGP/vgp-assembly/ + documentation: https://github.com/VGP/vgp-assembly/ + licence: + - "GPL v2.0" + identifier: "" +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1' ]` + - reference: + type: file + description: | + Assembly for telomere. + pattern: "*.{fa,fna,fasta}" + ontologies: + - edam: http://edamontology.org/format_1929 + - telomereseq: + type: string + description: | + A string containing the DNA sequence of a telomere motif + ontologies: [] +output: + telomere: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1' ]` + - "*.telomere": + type: file + description: | + File containing telomeric regions + ontologies: + - edam: "http://edamontology.org/format_3475" + versions_telomereregions: + - - ${task.process}: + type: string + description: The name of the process + - find_telomere_regions: + type: string + description: The name of the module + - 1.0.0: + type: string + description: The version of the module + +topics: + versions: + - - ${task.process}: + type: string + description: The name of the process + - find_telomere_regions: + type: string + description: The name of the module + - 1.0.0: + type: string + description: The version of the module +authors: + - "@DLBPointon" +maintainers: + - "@DLBPointon" diff --git a/modules/sanger-tol/telomere/regions/tests/main.nf.test b/modules/sanger-tol/telomere/regions/tests/main.nf.test new file mode 100644 index 00000000..4e36bee0 --- /dev/null +++ b/modules/sanger-tol/telomere/regions/tests/main.nf.test @@ -0,0 +1,75 @@ +nextflow_process { + + name "Test Process TELOMERE_REGIONS" + script "../main.nf" + process "TELOMERE_REGIONS" + + tag "modules" + tag "modules_sangertol" + tag "telomere" + tag "telomere/regions" + tag "modules/nf-core/gunzip" + + setup { + nfcoreInitialise("${launchDir}/library/") + nfcoreInstall( + "${launchDir}/library/", + [ "gunzip" ] + ) + nfcoreLink("${launchDir}/library/", "${baseDir}/modules/") + + run("GUNZIP"){ + script "../../../../../modules/nf-core/gunzip/main.nf" + process { + """ + input[0] = [ + [ id: "Cloeon_Dipeterum" ], + file(params.modules_testdata_base_path + 'Cloeon_dipterum/assembly/release/ieCloDipt1.1/insdc/GCA_949628265.1.fasta.gz', checkIfExists: true) + ] + """ + } + } + } + + test("GCA_963859965.1 - fasta") { + + when { + process { + """ + input[0] = GUNZIP.out.gunzip + + input[1] = "TTAGG" + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("GCA_963859965.1 - fasta - stub") { + + options "-stub" + + when { + process { + """ + input[0] = GUNZIP.out.gunzip + + input[1] = "TTAGG" + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } +} diff --git a/modules/sanger-tol/telomere/regions/tests/main.nf.test.snap b/modules/sanger-tol/telomere/regions/tests/main.nf.test.snap new file mode 100644 index 00000000..6706c4d4 --- /dev/null +++ b/modules/sanger-tol/telomere/regions/tests/main.nf.test.snap @@ -0,0 +1,84 @@ +{ + "GCA_963859965.1 - fasta - stub": { + "content": [ + { + "0": [ + [ + { + "id": "Cloeon_Dipeterum" + }, + "Cloeon_Dipeterum.telomere:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + "TELOMERE_REGIONS", + "find_telomere_regions", + "1.0.0" + ] + ], + "telomere": [ + [ + { + "id": "Cloeon_Dipeterum" + }, + "Cloeon_Dipeterum.telomere:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions_telomereregions": [ + [ + "TELOMERE_REGIONS", + "find_telomere_regions", + "1.0.0" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-02-25T12:00:39.601105215" + }, + "GCA_963859965.1 - fasta": { + "content": [ + { + "0": [ + [ + { + "id": "Cloeon_Dipeterum" + }, + "Cloeon_Dipeterum.telomere:md5,a18511467c3a1e8ece1973e33c4fe0b6" + ] + ], + "1": [ + [ + "TELOMERE_REGIONS", + "find_telomere_regions", + "1.0.0" + ] + ], + "telomere": [ + [ + { + "id": "Cloeon_Dipeterum" + }, + "Cloeon_Dipeterum.telomere:md5,a18511467c3a1e8ece1973e33c4fe0b6" + ] + ], + "versions_telomereregions": [ + [ + "TELOMERE_REGIONS", + "find_telomere_regions", + "1.0.0" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-02-25T11:59:43.90850903" + } +} \ No newline at end of file diff --git a/modules/sanger-tol/telomere/windows/main.nf b/modules/sanger-tol/telomere/windows/main.nf new file mode 100644 index 00000000..893052ab --- /dev/null +++ b/modules/sanger-tol/telomere/windows/main.nf @@ -0,0 +1,49 @@ +process TELOMERE_WINDOWS { + tag "${meta.id}" + label 'process_low' + + conda "bioconda::java-jdk=8.0.112" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/java-jdk:8.0.112--1' : + 'biocontainers/java-jdk:8.0.112--1' }" + + input: + tuple val(meta), path(telomere) + + output: + tuple val(meta), path("*.windows") , emit: windows + tuple val("${task.process}"), val('find_telomere_windows'), val("1.0.0"), topic: versions, emit: versions_telomerewindows + + when: + task.ext.when == null || task.ext.when + + script: + // WARNING: This module includes the telomere.jar binary and its wrapper telomere_windows.sh + // as module binaries in ${moduleDir}/resources/usr/bin/. To use this module, you will + // either have to copy these two files to ${projectDir}/bin or set the option + // nextflow.enable.moduleBinaries = true in your nextflow.config file. + + def prefix = task.ext.prefix ?: "${meta.id}" + def args = task.ext.args ?: "" + + // Dynamically generate java mem needs based on task.memory + // Taken from: nf-core/umicollapse + def max_heap_size_mega = (task.memory.toMega() * 0.9).intValue() + def max_stack_size_mega = 999 //most java jdks will not allow Xss > 1GB, so fixing this to the allowed max + + """ + telomere_windows.sh \\ + -Xmx${max_heap_size_mega}M \\ + -Xss${max_stack_size_mega}M \\ + FindTelomereWindows $telomere \\ + $args \\ + > ${prefix}.windows + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.windows + """ + +} diff --git a/modules/sanger-tol/telomere/windows/meta.yml b/modules/sanger-tol/telomere/windows/meta.yml new file mode 100644 index 00000000..6210ec11 --- /dev/null +++ b/modules/sanger-tol/telomere/windows/meta.yml @@ -0,0 +1,67 @@ +name: "telomere_windows" +description: | + Identify windows of the telomere file which maybe true telomeric sites +keywords: + - telomere + - windows + - tsv +tools: + - telomere.jar: + description: | + Script to identify likely telomeric sites. + homepage: https://github.com/VGP/vgp-assembly/ + documentation: https://github.com/VGP/vgp-assembly/ + licence: + - "GPL v2.0" + identifier: "" +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1' ]` + - telomere: + type: file + description: | + TSV of putative telomeric sites + pattern: "*.telomere" + ontologies: + - edam: "http://edamontology.org/format_3475" +output: + windows: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1' ]` + - "*.windows": + type: file + description: | + TSV of likely telomeric sites + ontologies: + - edam: "http://edamontology.org/format_3475" + versions_telomerewindows: + - - ${task.process}: + type: string + description: The name of the process + - find_telomere_windows: + type: string + description: The name of the tool + - 1.0.0: + type: string + description: The version of the module +topics: + versions: + - - ${task.process}: + type: string + description: The name of the process + - find_telomere_windows: + type: string + description: The name of the tool + - 1.0.0: + type: string + description: The version of the module +authors: + - "@DLBPointon" +maintainers: + - "@DLBPointon" diff --git a/bin/telomere.jar b/modules/sanger-tol/telomere/windows/resources/usr/bin/telomere.jar similarity index 100% rename from bin/telomere.jar rename to modules/sanger-tol/telomere/windows/resources/usr/bin/telomere.jar diff --git a/modules/sanger-tol/telomere/windows/resources/usr/bin/telomere_windows.sh b/modules/sanger-tol/telomere/windows/resources/usr/bin/telomere_windows.sh new file mode 100755 index 00000000..e3876e18 --- /dev/null +++ b/modules/sanger-tol/telomere/windows/resources/usr/bin/telomere_windows.sh @@ -0,0 +1,2 @@ +#!/bin/bash +exec java -cp "$(dirname -- "${BASH_SOURCE[0]}")/telomere.jar" "$@" diff --git a/modules/sanger-tol/telomere/windows/tests/main.nf.test b/modules/sanger-tol/telomere/windows/tests/main.nf.test new file mode 100644 index 00000000..79dd7dcb --- /dev/null +++ b/modules/sanger-tol/telomere/windows/tests/main.nf.test @@ -0,0 +1,62 @@ +nextflow_process { + + name "Test Process TELOMERE_WINDOWS" + script "../main.nf" + process "TELOMERE_WINDOWS" + + tag "modules" + tag "modules_sangertol" + tag "telomere" + tag "windows" + tag "telomere/windows" + + config "./nextflow.config" + + test("idFanCani4 - telomere") { + + when { + params { + telomere_window_cut = 99.9 + } + + process { + """ + input[0] = [ + [ id: "idFanCani4" ], + file(params.modules_testdata_base_path + 'resources/modules/telomere/idFanCani4/idFanCani4.telomere', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("idFanCani4 - telomere - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id: "idFanCani4" ], + file(params.modules_testdata_base_path + 'resources/modules/telomere/idFanCani4/idFanCani4.telomere', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } +} diff --git a/modules/sanger-tol/telomere/windows/tests/main.nf.test.snap b/modules/sanger-tol/telomere/windows/tests/main.nf.test.snap new file mode 100644 index 00000000..304bdecb --- /dev/null +++ b/modules/sanger-tol/telomere/windows/tests/main.nf.test.snap @@ -0,0 +1,84 @@ +{ + "idFanCani4 - telomere - stub": { + "content": [ + { + "0": [ + [ + { + "id": "idFanCani4" + }, + "idFanCani4.windows:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + "TELOMERE_WINDOWS", + "find_telomere_windows", + "1.0.0" + ] + ], + "versions_telomerewindows": [ + [ + "TELOMERE_WINDOWS", + "find_telomere_windows", + "1.0.0" + ] + ], + "windows": [ + [ + { + "id": "idFanCani4" + }, + "idFanCani4.windows:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-02-25T11:29:18.234895641" + }, + "idFanCani4 - telomere": { + "content": [ + { + "0": [ + [ + { + "id": "idFanCani4" + }, + "idFanCani4.windows:md5,3b9105c87ad2c332dd28a868dff9c89e" + ] + ], + "1": [ + [ + "TELOMERE_WINDOWS", + "find_telomere_windows", + "1.0.0" + ] + ], + "versions_telomerewindows": [ + [ + "TELOMERE_WINDOWS", + "find_telomere_windows", + "1.0.0" + ] + ], + "windows": [ + [ + { + "id": "idFanCani4" + }, + "idFanCani4.windows:md5,3b9105c87ad2c332dd28a868dff9c89e" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-02-25T11:29:10.038291724" + } +} \ No newline at end of file diff --git a/modules/sanger-tol/telomere/windows/tests/nextflow.config b/modules/sanger-tol/telomere/windows/tests/nextflow.config new file mode 100644 index 00000000..3b69e44b --- /dev/null +++ b/modules/sanger-tol/telomere/windows/tests/nextflow.config @@ -0,0 +1,7 @@ +nextflow.enable.moduleBinaries = true + +process { + withName: TELOMERE_WINDOWS { + ext.args = { "${params.telomere_window_cut}" } + } +} diff --git a/nextflow.config b/nextflow.config index 02170084..d4129c05 100644 --- a/nextflow.config +++ b/nextflow.config @@ -23,6 +23,8 @@ params { all_output = false run_hires = true multi_mapping = 0 + cram_chunk_size = 10000 + pre_mapped_bam = null // Boilerplate options outdir = "${params.sample}_CPRETEXT_OUTPUT" @@ -34,7 +36,9 @@ params { plaintext_email = false monochrome_logs = false hook_url = null - help = false + help = false + help_full = false + show_hidden = false version = false validate_params = true @@ -54,6 +58,7 @@ params { // Load base.config by default for all pipelines includeConfig 'conf/base.config' + profiles { cleanup { cleanup = true @@ -94,7 +99,18 @@ profiles { apptainer.enabled = false docker.runOptions = '-u $(id -u):$(id -g)' } - arm { + arm64 { + process.arch = 'arm64' + // TODO https://github.com/nf-core/modules/issues/6694 + // For now if you're using arm64 you have to use wave for the sake of the maintainers + // wave profile + apptainer.ociAutoPull = true + singularity.ociAutoPull = true + wave.enabled = true + wave.freeze = true + wave.strategy = 'conda,container' + } + emulate_amd64 { docker.runOptions = '-u $(id -u):$(id -g) --platform=linux/amd64' } singularity { @@ -151,17 +167,10 @@ profiles { wave.freeze = true wave.strategy = 'conda,container' } - gitpod { - executor.name = 'local' - executor.cpus = 4 - executor.memory = 8.GB - process { - resourceLimits = [ - memory: 8.GB, - cpus : 4, - time : 1.h - ] - } + gpu { + docker.runOptions = '-u $(id -u):$(id -g) --gpus all' + apptainer.runOptions = '--nv' + singularity.runOptions = '--nv' } gpu { docker.runOptions = '-u $(id -u):$(id -g) --gpus all' @@ -211,7 +220,6 @@ process.shell = [ // Disable process selector warnings by default. Use debug profile to enable warnings. nextflow.enable.configProcessNamesValidation = false -def trace_timestamp = new java.util.Date().format( 'yyyy-MM-dd_HH-mm-ss' ) timeline { enabled = true file = "${params.outdir}/pipeline_info/execution_timeline_${params.trace_report_suffix}.html" @@ -251,8 +259,8 @@ manifest { orcid: '0000-0001-6868-3416' ], [ - name: 'Mahesh Panchal', - affiliation: '', + name: 'Mahesh Binzer-Panchal', + affiliation: 'National Bioinformatics Infrastructure Sweden', github: 'mahesh-panchal', contribution: ['contributor'], orcid: '0000-0003-1675-0677' @@ -268,51 +276,27 @@ manifest { homePage = 'https://github.com/sanger-tol/curationpretext' description = """A simple pipeline to generate pretext files for genomic curation.""" mainScript = 'main.nf' - defaultBranch = 'main' - nextflowVersion = '!>=24.10.5' - version = '1.5.1' + defaultBranch = 'master' + nextflowVersion = '!>=25.04.0' + version = '1.6.0' doi = '10.5281/zenodo.12773958' } // Nextflow plugins plugins { - id 'nf-schema@2.4.2' // Validation of pipeline parameters and creation of an input channel from a sample sheet + id 'nf-schema@2.5.1' // Validation of pipeline parameters and creation of an input channel from a sample sheet } validation { defaultIgnoreParams = ["genomes"] monochromeLogs = params.monochrome_logs - help { - enabled = true - command = "nextflow run sanger-tol/curationpretext -profile --input samplesheet.csv --outdir " - fullParameter = "help_full" - showHiddenParameter = "show_hidden" - beforeText = """ --\033[2m----------------------------------------------------\033[0m- -\033[0;34m _____ \033[0;32m _______ \033[0;31m _\033[0m -\033[0;34m / ____| \033[0;32m|__ __| \033[0;31m| |\033[0m -\033[0;34m | (___ __ _ _ __ __ _ ___ _ __ \033[0m ___ \033[0;32m| |\033[0;33m ___ \033[0;31m| |\033[0m -\033[0;34m \\___ \\ / _` | '_ \\ / _` |/ _ \\ '__|\033[0m|___|\033[0;32m| |\033[0;33m/ _ \\\033[0;31m| |\033[0m -\033[0;34m ____) | (_| | | | | (_| | __/ | \033[0;32m| |\033[0;33m (_) \033[0;31m| |____\033[0m -\033[0;34m |_____/ \\__,_|_| |_|\\__, |\\___|_| \033[0;32m|_|\033[0;33m\\___/\033[0;31m|______|\033[0m -\033[0;34m __/ |\033[0m -\033[0;34m |___/\033[0m -\033[0;35m ${manifest.name} ${manifest.version}\033[0m --\033[2m----------------------------------------------------\033[0m- -""" - afterText = """${manifest.doi ? "\n* The pipeline\n" : ""}${manifest.doi.tokenize(",").collect { " https://doi.org/${it.trim().replace('https://doi.org/','')}"}.join("\n")}${manifest.doi ? "\n" : ""} -* The nf-core framework - https://doi.org/10.1038/s41587-020-0439-x -* Software dependencies - https://github.com/sanger-tol/curationpretext/blob/main/CITATIONS.md -""" - } - - summary { - beforeText = validation.help.beforeText - afterText = validation.help.afterText - } } // Load modules.config for DSL2 module specific options includeConfig 'conf/modules.config' + +// load config for sanger tol subworkflows +includeConfig 'subworkflows/sanger-tol/cram_map_illumina_hic/nextflow.config' +includeConfig 'subworkflows/sanger-tol/pairs_create_contact_maps/nextflow.config' +includeConfig 'subworkflows/sanger-tol/telo_finder/nextflow.config' +includeConfig 'subworkflows/sanger-tol/gap_finder/nextflow.config' diff --git a/nextflow_schema.json b/nextflow_schema.json index 15ccb93f..632f2422 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -10,7 +10,7 @@ "type": "object", "fa_icon": "fas fa-terminal", "description": "Define where the pipeline should find input data and save output data.", - "required": ["input", "cram", "sample", "outdir", "map_order"], + "required": ["input", "sample", "outdir", "map_order"], "properties": { "input": { "type": "string", @@ -18,7 +18,8 @@ "pattern": "^\\S+\\.fn?a(sta)?(\\.gz)?$", "description": "Input fasta file", "help_text": "You need the input fasta file", - "fa_icon": "fas fa-file-fasta" + "fa_icon": "fas fa-file-fasta", + "exists": true }, "split_telomere": { "type": "boolean", @@ -33,25 +34,35 @@ "fa_icon": "fas fa-file-signature" }, "reads": { - "type": "string", - "format": "directory-path", - "description": "Input longread fasta directory", - "help_text": "You need the input fasta file directory", - "fa_icon": "fas fa-folder-open" + "type": "array", + "items": { + "type": "string", + "format": "path", + "pattern": ".*\\.(fa|fasta|fq|fastq).gz$" + }, + "description": "Input longread fasta files", + "help_text": "You need the input fasta file array", + "fa_icon": "fas fa-folder-open", + "exists": true }, "read_type": { "type": "string", "description": "Type of longread data", "help_text": "Choose between {'hifi', 'clr', 'ont', 'illumina'}", - "fa_icon": "fas fa-folder-open", + "fa_icon": "fas fa-file-signature", "enum": ["hifi", "clr", "ont", "illumina"] }, "cram": { - "type": "string", - "format": "directory-path", - "description": "Input cram directory", - "help_text": "You need the input fasta file directory", - "fa_icon": "fas fa-folder-open" + "type": "array", + "items": { + "type": "string", + "format": "path", + "pattern": ".*\\.cram$" + }, + "description": "Input cram files", + "help_text": "You need the input fasta file array", + "fa_icon": "fas fa-file-signature", + "exists": true }, "teloseq": { "type": "string", @@ -79,6 +90,22 @@ "help_text": "Boolean to switch off HiRes pretextmap generation", "fa_icon": "fas fa-check" }, + "cram_chunk_size": { + "type": "integer", + "description": "Chunk size for splitting CRAM files", + "help_text": "Chunk size for splitting CRAM files", + "fa_icon": "fas fa-file-code", + "default": 10000 + }, + "pre_mapped_bam": { + "type": "string", + "format": "path", + "description": "Pre-mapped BAM file", + "help_text": "Pre-mapped BAM file", + "fa_icon": "fas fa-file-code", + "exists": true, + "pattern": ".*\\.bam$" + }, "outdir": { "type": "string", "format": "directory-path", @@ -115,7 +142,6 @@ } } }, - "institutional_config_options": { "title": "Institutional config options", "type": "object", @@ -238,6 +264,18 @@ "fa_icon": "far calendar", "description": "Suffix to add to the trace report filename. Default is the date and time in the format yyyy-MM-dd_HH-mm-ss.", "hidden": true + }, + "help": { + "type": ["boolean", "string"], + "description": "Display the help message." + }, + "help_full": { + "type": "boolean", + "description": "Display the full detailed help message." + }, + "show_hidden": { + "type": "boolean", + "description": "Display hidden parameters in the help message (only works when --help or --help_full are provided)." } } } diff --git a/nf-test.config b/nf-test.config index 3a1fff59..a7ef6c36 100644 --- a/nf-test.config +++ b/nf-test.config @@ -11,6 +11,9 @@ config { // ignore tests coming from the nf-core/modules repo ignore 'modules/nf-core/**/tests/*', 'subworkflows/nf-core/**/tests/*' + // ignore tests coming from the sanger-tol/nf-core-modules repo + ignore 'modules/sanger-tol/**/tests/*', 'subworkflows/sanger-tol/**/tests/*' + // run all test with defined profile(s) from the main nextflow.config profile "test" diff --git a/ro-crate-metadata.json b/ro-crate-metadata.json index a50c76b0..560cf26a 100644 --- a/ro-crate-metadata.json +++ b/ro-crate-metadata.json @@ -23,7 +23,7 @@ "@type": "Dataset", "creativeWorkStatus": "Stable", "datePublished": "2025-05-27T09:34:43+00:00", - "description": "# ![sanger-tol/curationpretext](docs/images/curationpretext-light.png#gh-light-mode-only) ![sanger-tol/curationpretext](docs/images/curationpretext-dark.png#gh-dark-mode-only)\n\n[![GitHub Actions CI Status](https://github.com/sanger-tol/curationpretext/actions/workflows/nf-test.yml/badge.svg)](https://github.com/sanger-tol/curationpretext/actions/workflows/nf-test.yml)\n[![GitHub Actions Linting Status](https://github.com/sanger-tol/curationpretext/actions/workflows/linting.yml/badge.svg)](https://github.com/sanger-tol/curationpretext/actions/workflows/linting.yml)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.12773958-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.12773958)\n[![nf-test](https://img.shields.io/badge/unit_tests-nf--test-337ab7.svg)](https://www.nf-test.com)\n\n[![Nextflow](https://img.shields.io/badge/version-%E2%89%A524.10.5-green?style=flat&logo=nextflow&logoColor=white&color=%230DC09D&link=https%3A%2F%2Fnextflow.io)](https://www.nextflow.io/)\n[![nf-core template version](https://img.shields.io/badge/nf--core_template-3.3.2-green?style=flat&logo=nfcore&logoColor=white&color=%2324B064&link=https%3A%2F%2Fnf-co.re)](https://github.com/nf-core/tools/releases/tag/3.3.2)\n[![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/)\n[![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000&logo=docker)](https://www.docker.com/)\n[![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/)\n[![Launch on Seqera Platform](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Seqera%20Platform-%234256e7)](https://cloud.seqera.io/launch?pipeline=https://github.com/sanger-tol/curationpretext)\n\n## Introduction\n\n**sanger-tol/curationpretext** is a bioinformatics pipeline typically used in conjunction with [TreeVal](https://github.com/sanger-tol/treeval) to generate pretext maps (and optionally telomeric, gap, coverage, and repeat density plots which can be ingested into pretext) for the manual curation of high quality genomes.\n\nThis is intended as a supplementary pipeline for the [treeval](https://github.com/sanger-tol/treeval) project. This pipeline can be simply used to generate pretext maps, information on how to run this pipeline can be found in the [usage documentation](https://pipelines.tol.sanger.ac.uk/curationpretext/usage).\n\n![Workflow Diagram](./docs/images/CurationPretext_1_3_0.png)\n\n1. Generate Maps - Generates pretext maps as well as a static image.\n\n2. Accessory files - Generates the repeat density, gap, telomere, and coverage tracks.\n\n## Usage\n\n> [!NOTE]\n> If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) with `-profile test` before running the workflow on actual data.\n\nCurrently, the pipeline uses the following flags:\n\n- `--input`\n - The absolute path to the assembled genome in, e.g., `/path/to/assembly.fa`\n\n- `--sample`\n - Sample is the naming prefix of the output files, e.g. iyTipFemo\n\n- `--reads`\n - The directory of the fasta files generated from longread reads, e.g., `/path/to/fasta/`\n - This folder _must_ contain files in a `.fasta.gz` format, or they will be skipped by the internal file search function.\n\n- `--read_type`\n - The type of longread data you are utilising, e.g., ont, illumina, hifi.\n\n- `--aligner`\n - The aligner yopu wish to use for the coverage generation, defaults to bwamem2 but minimap2 is also supported.\n\n- `--cram`\n - The directory of the cram _and_ cram.crai files, e.g., `/path/to/cram/`\n\n- `--map_order`\n - hic map scaffold order, input either `length` or `unsorted`\n\n- `--teloseq`\n - A telomeric sequence, e.g., `TTAGGG`\n\n- `--multi_mapping`\n - Level of multi-mapping read filtering to perform whilst building the pretext map.\n\n- `--all_output`\n - An option to output all maps + accessory files, the default will only output the pretextmaps where ingestion has occured.\n\nNow, you can run the pipeline using:\n\n```bash\nnextflow run sanger-tol/curationpretext \\\n --input { input.fasta } \\\n --cram { path/to/cram/ } \\\n --reads { path/to/longread/fasta/ } \\\n --read_type { default is \"hifi\" }\n --sample { default is \"pretext_rerun\" } \\\n --teloseq { default is \"TTAGGG\" } \\\n --map_order { default is \"unsorted\" } \\\n --multi_mapping { default is \"0\" (for no mapping)} \\\n --all_output \\\n --outdir { OUTDIR } \\\n -profile \n\n```\n\n> **Warning:**\n> Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those\n> provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_;\n\nFor more details, please refer to the [usage documentation](https://pipelines.tol.sanger.ac.uk/curationpretext/usage) and the [parameter documentation](https://pipelines.tol.sanger.ac.uk/curationpretext/parameters).\n\n## Pipeline output\n\nTo see the the results of a test run with a full size dataset refer to the [results](https://pipelines.tol.sanger.ac.uk/curationpretext/results) tab on the sanger-tol/curationpretext website pipeline page.\nFor more details about the output files and reports, please refer to the\n[output documentation](https://pipelines.tol.sanger.ac.uk/curationpretext/output).\n\n## Credits\n\nsanger-tol/curationpretext was originally written by Damon-Lee B Pointon (@DLBPointon).\n\nWe thank the following people for their extensive assistance in the development of this pipeline:\n\n- @muffato - For reviews.\n\n- @yumisims - TreeVal and Software.\n\n- @weaglesBio - TreeVal and Software.\n\n- @josieparis - Help with better docs and testing.\n\n- @mahesh-panchal - Large support with 1.2.0 in making the pipeline more robust with other HPC environments.\n\n- @GRIT - For feedback and feature requests.\n\n- @prototaxites - Support with 1.3.0 and showing me the power of GAWK.\n\n## Contributions and Support\n\nIf you would like to contribute to this pipeline, please see the [contributing guidelines](.github/CONTRIBUTING.md).\n\n## Citations\n\nIf you use sanger-tol/curationpretext for your analysis, please cite it using the following doi: [10.5281/zenodo.12773958](https://doi.org/10.5281/zenodo.12773958)\n\nAn extensive list of references for the tools used by the pipeline can be found in the [`CITATIONS.md`](CITATIONS.md) file.\n\nThis pipeline uses code and infrastructure developed and maintained by the [nf-core](https://nf-co.re) community, reused here under the [MIT license](https://github.com/nf-core/tools/blob/main/LICENSE).\n\n> **The nf-core framework for community-curated bioinformatics pipelines.**\n>\n> Philip Ewels, Alexander Peltzer, Sven Fillinger, Harshil Patel, Johannes Alneberg, Andreas Wilm, Maxime Ulysse Garcia, Paolo Di Tommaso & Sven Nahnsen.\n>\n> _Nat Biotechnol._ 2020 Feb 13. doi: [10.1038/s41587-020-0439-x](https://dx.doi.org/10.1038/s41587-020-0439-x).\n", + "description": "# ![sanger-tol/curationpretext](docs/images/curationpretext-light.png#gh-light-mode-only) ![sanger-tol/curationpretext](docs/images/curationpretext-dark.png#gh-dark-mode-only)\n\n[![GitHub Actions CI Status](https://github.com/sanger-tol/curationpretext/actions/workflows/nf-test.yml/badge.svg)](https://github.com/sanger-tol/curationpretext/actions/workflows/nf-test.yml)\n[![GitHub Actions Linting Status](https://github.com/sanger-tol/curationpretext/actions/workflows/linting.yml/badge.svg)](https://github.com/sanger-tol/curationpretext/actions/workflows/linting.yml)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.12773958-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.12773958)\n[![nf-test](https://img.shields.io/badge/unit_tests-nf--test-337ab7.svg)](https://www.nf-test.com)\n\n[![Nextflow](https://img.shields.io/badge/version-%E2%89%A525.04.0-green?style=flat&logo=nextflow&logoColor=white&color=%230DC09D&link=https%3A%2F%2Fnextflow.io)](https://www.nextflow.io/)\n[![nf-core template version](https://img.shields.io/badge/nf--core_template-3.5.2-green?style=flat&logo=nfcore&logoColor=white&color=%2324B064&link=https%3A%2F%2Fnf-co.re)](https://github.com/nf-core/tools/releases/tag/3.5.2)\n[![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/)\n[![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000&logo=docker)](https://www.docker.com/)\n[![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/)\n[![Launch on Seqera Platform](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Seqera%20Platform-%234256e7)](https://cloud.seqera.io/launch?pipeline=https://github.com/sanger-tol/curationpretext)\n\n## Introduction\n\n**sanger-tol/curationpretext** is a bioinformatics pipeline typically used in conjunction with [TreeVal](https://github.com/sanger-tol/treeval) to generate pretext maps (and optionally telomeric, gap, coverage, and repeat density plots which can be ingested into pretext) for the manual curation of high quality genomes.\n\nThis is intended as a supplementary pipeline for the [treeval](https://github.com/sanger-tol/treeval) project. This pipeline can be simply used to generate pretext maps, information on how to run this pipeline can be found in the [usage documentation](https://pipelines.tol.sanger.ac.uk/curationpretext/usage).\n\n![Workflow Diagram](./docs/images/CurationPretext_1_3_0.png)\n\n1. Generate Maps - Generates pretext maps as well as a static image.\n\n2. Accessory files - Generates the repeat density, gap, telomere, and coverage tracks.\n\n## Usage\n\n> [!NOTE]\n> If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) with `-profile test` before running the workflow on actual data.\n\nCurrently, the pipeline uses the following flags:\n\n- `--input`\n - The absolute path to the assembled genome in, e.g., `/path/to/assembly.fa`\n\n- `--sample`\n - Sample is the naming prefix of the output files, e.g. iyTipFemo\n\n- `--reads`\n - The directory of the fasta files generated from longread reads, e.g., `/path/to/fasta/`\n - This folder _must_ contain files in a `.fasta.gz` format, or they will be skipped by the internal file search function.\n\n- `--read_type`\n - The type of longread data you are utilising, e.g., ont, illumina, hifi.\n\n- `--aligner`\n - The aligner yopu wish to use for the coverage generation, defaults to bwamem2 but minimap2 is also supported.\n\n- `--cram`\n - The directory of the cram _and_ cram.crai files, e.g., `/path/to/cram/`\n\n- `--map_order`\n - hic map scaffold order, input either `length` or `unsorted`\n\n- `--teloseq`\n - A telomeric sequence, e.g., `TTAGGG`\n\n- `--multi_mapping`\n - Level of multi-mapping read filtering to perform whilst building the pretext map.\n\n- `--all_output`\n - An option to output all maps + accessory files, the default will only output the pretextmaps where ingestion has occured.\n\nNow, you can run the pipeline using:\n\n```bash\nnextflow run sanger-tol/curationpretext \\\n --input { input.fasta } \\\n --cram { path/to/cram/ } \\\n --reads { path/to/longread/fasta/ } \\\n --read_type { default is \"hifi\" }\n --sample { default is \"pretext_rerun\" } \\\n --teloseq { default is \"TTAGGG\" } \\\n --map_order { default is \"unsorted\" } \\\n --multi_mapping { default is \"0\" (for no mapping)} \\\n --all_output \\\n --outdir { OUTDIR } \\\n -profile \n\n```\n\n> **Warning:**\n> Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those\n> provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_;\n\nFor more details, please refer to the [usage documentation](https://pipelines.tol.sanger.ac.uk/curationpretext/usage) and the [parameter documentation](https://pipelines.tol.sanger.ac.uk/curationpretext/parameters).\n\n## Pipeline output\n\nTo see the the results of a test run with a full size dataset refer to the [results](https://pipelines.tol.sanger.ac.uk/curationpretext/results) tab on the sanger-tol/curationpretext website pipeline page.\nFor more details about the output files and reports, please refer to the\n[output documentation](https://pipelines.tol.sanger.ac.uk/curationpretext/output).\n\n## Credits\n\nsanger-tol/curationpretext was originally written by Damon-Lee B Pointon (@DLBPointon).\n\nWe thank the following people for their extensive assistance in the development of this pipeline:\n\n- @muffato - For reviews.\n\n- @yumisims - TreeVal and Software.\n\n- @weaglesBio - TreeVal and Software.\n\n- @josieparis - Help with better docs and testing.\n\n- @mahesh-panchal - Large support with 1.2.0 in making the pipeline more robust with other HPC environments.\n\n- @GRIT - For feedback and feature requests.\n\n- @prototaxites - Support with 1.3.0 and showing me the power of GAWK.\n\n## Contributions and Support\n\nIf you would like to contribute to this pipeline, please see the [contributing guidelines](.github/CONTRIBUTING.md).\n\n## Citations\n\nIf you use sanger-tol/curationpretext for your analysis, please cite it using the following doi: [10.5281/zenodo.12773958](https://doi.org/10.5281/zenodo.12773958)\n\nAn extensive list of references for the tools used by the pipeline can be found in the [`CITATIONS.md`](CITATIONS.md) file.\n\nThis pipeline uses code and infrastructure developed and maintained by the [nf-core](https://nf-co.re) community, reused here under the [MIT license](https://github.com/nf-core/tools/blob/main/LICENSE).\n\n> **The nf-core framework for community-curated bioinformatics pipelines.**\n>\n> Philip Ewels, Alexander Peltzer, Sven Fillinger, Harshil Patel, Johannes Alneberg, Andreas Wilm, Maxime Ulysse Garcia, Paolo Di Tommaso & Sven Nahnsen.\n>\n> _Nat Biotechnol._ 2020 Feb 13. doi: [10.1038/s41587-020-0439-x](https://dx.doi.org/10.1038/s41587-020-0439-x).\n", "hasPart": [ { "@id": "main.nf" diff --git a/subworkflows/local/accessory_files/main.nf b/subworkflows/local/accessory_files/main.nf index 1f04bb04..d4c3fc57 100644 --- a/subworkflows/local/accessory_files/main.nf +++ b/subworkflows/local/accessory_files/main.nf @@ -1,33 +1,39 @@ #!/usr/bin/env nextflow // -// MODULE IMPORT BLOCK +// LOCAL SUBWORKFLOW IMPORT BLOCK // -include { GAP_FINDER } from '../gap_finder/main' -include { TELO_FINDER } from '../telo_finder/main' include { REPEAT_DENSITY } from '../repeat_density/main' include { LONGREAD_COVERAGE } from '../longread_coverage/main' +// +// SANGER_TOL SUBWORKFLOW IMPORT BLOCK +// +include { GAP_FINDER } from '../../sanger-tol/gap_finder/main' +include { TELO_FINDER } from '../../sanger-tol/telo_finder/main' + +// +// NF_CORE MODULE IMPORT BLOCK +// include { GAWK as GAWK_GENERATE_GENOME_FILE } from '../../../modules/nf-core/gawk/main' workflow ACCESSORY_FILES { take: - reference_tuple - longread_reads - val_teloseq - ch_reference_fai // Channel [ val(meta), path(file) ] + reference_tuple // Channel [ val(meta), path(file) ] + longread_reads // Channel [ val(meta), [path(file)] ] + val_teloseq // val(telomere_sequence) + val_split_telomere // val(bool) + val_skip_tracks // val(csv_list) + ch_reference_fai // Channel [ val(meta), path(file) ] main: - ch_versions = Channel.empty() - ch_empty_file = Channel.fromPath("${baseDir}/assets/EMPTY.txt") + ch_empty_file = channel.fromPath("${baseDir}/assets/EMPTY.txt") // - // NOTE: THIS IS DUPLICATED IN THE CURATIONPRETEXT WORKFLOW, - // PASSING THE PARAM TO THE SUBWORKFLOW CAUSED SOME ISSUES IN TESTING - // SO WE USE IT DIRECTLY AGAIN. + // NOTE: THIS IS DUPLICATED IN THE CURATIONPRETEXT WORKFLOW // - dont_generate_tracks = params.skip_tracks ? params.skip_tracks.split(",") : "NONE" + dont_generate_tracks = val_skip_tracks ? val_skip_tracks.split(",") : "NONE" // @@ -38,7 +44,6 @@ workflow ACCESSORY_FILES { [], false ) - ch_versions = ch_versions.mix( GAWK_GENERATE_GENOME_FILE.out.versions ) // @@ -48,9 +53,9 @@ workflow ACCESSORY_FILES { gap_file = ch_empty_file } else { GAP_FINDER ( - reference_tuple + reference_tuple, + false ) - ch_versions = ch_versions.mix(GAP_FINDER.out.versions) gap_file = GAP_FINDER.out.gap_file.map{ it -> it[1] } } @@ -63,10 +68,13 @@ workflow ACCESSORY_FILES { } else { TELO_FINDER ( reference_tuple, - val_teloseq + val_teloseq, + val_split_telomere, + false ) - ch_versions = ch_versions.mix(TELO_FINDER.out.versions) telo_file = TELO_FINDER.out.bedgraph_file + .map{ it -> it[1] } + .ifEmpty("${baseDir}/assets/EMPTY.txt") } @@ -80,7 +88,6 @@ workflow ACCESSORY_FILES { reference_tuple, GAWK_GENERATE_GENOME_FILE.out.output ) - ch_versions = ch_versions.mix(REPEAT_DENSITY.out.versions) repeat_file = REPEAT_DENSITY.out.repeat_density.map{ it -> it[1] } } @@ -97,7 +104,6 @@ workflow ACCESSORY_FILES { GAWK_GENERATE_GENOME_FILE.out.output, longread_reads ) - ch_versions = ch_versions.mix(LONGREAD_COVERAGE.out.versions) longread_output = LONGREAD_COVERAGE.out.ch_bigwig.map{ it -> it[1] } } @@ -106,5 +112,4 @@ workflow ACCESSORY_FILES { repeat_file telo_file // This is the possible collection of telomere files longread_output - versions = ch_versions } diff --git a/subworkflows/local/gap_finder/main.nf b/subworkflows/local/gap_finder/main.nf deleted file mode 100644 index 10ca907c..00000000 --- a/subworkflows/local/gap_finder/main.nf +++ /dev/null @@ -1,37 +0,0 @@ -#!/usr/bin/env nextflow - -// -// MODULE IMPORT BLOCK -// -include { SEQTK_CUTN } from '../../../modules/nf-core/seqtk/cutn/main' -include { GAWK as GAWK_GAP_LENGTH } from '../../../modules/nf-core/gawk/main' - -workflow GAP_FINDER { - take: - reference_tuple // Channel [ val(meta), path(fasta) ] - - main: - ch_versions = Channel.empty() - - // - // MODULE: GENERATES A GAP SUMMARY FILE - // - SEQTK_CUTN ( - reference_tuple - ) - ch_versions = ch_versions.mix( SEQTK_CUTN.out.versions ) - - // - // MODULE: ADD THE LENGTH OF GAP TO BED FILE - INPUT FOR PRETEXT MODULE - // - GAWK_GAP_LENGTH ( - SEQTK_CUTN.out.bed, - [], - false - ) - ch_versions = ch_versions.mix( GAWK_GAP_LENGTH.out.versions ) - - emit: - gap_file = GAWK_GAP_LENGTH.out.output - versions = ch_versions -} diff --git a/subworkflows/local/generate_maps/main.nf b/subworkflows/local/generate_maps/main.nf deleted file mode 100644 index 413ea356..00000000 --- a/subworkflows/local/generate_maps/main.nf +++ /dev/null @@ -1,100 +0,0 @@ -#!/usr/bin/env nextflow - -// -// MODULE IMPORT BLOCK -// - -include { PRETEXTMAP as PRETEXTMAP_STANDRD } from '../../../modules/nf-core/pretextmap/main' -include { PRETEXTMAP as PRETEXTMAP_HIGHRES } from '../../../modules/nf-core/pretextmap/main' -include { PRETEXTSNAPSHOT as SNAPSHOT_SRES } from '../../../modules/nf-core/pretextsnapshot/main' -include { CRAM_GENERATE_CSV } from '../../../modules/local/cram/generate_csv/main' - -include { HIC_MINIMAP2 } from '../../../subworkflows/local/hic_minimap2/main' -include { HIC_BWAMEM2 } from '../../../subworkflows/local/hic_bwamem2/main' - -workflow GENERATE_MAPS { - take: - reference_tuple // Channel [ val(meta), path(file) ] - hic_reads_path // Channel [ val(meta), path(directory) ] - ch_reference_fai // Channel [ val(meta), path(file) ] - - - main: - ch_versions = Channel.empty() - - // - // MODULE: generate a cram csv file containing the required parametres for CRAM_FILTER_ALIGN_BWAMEM2_FIXMATE_SORT - // - CRAM_GENERATE_CSV ( - hic_reads_path - ) - ch_versions = ch_versions.mix( CRAM_GENERATE_CSV.out.versions ) - - - // - // SUBWORKFLOW: mapping hic reads using minimap2 - // - HIC_MINIMAP2 ( - reference_tuple.filter{ meta, _fasta -> meta.aligner == 'minimap2' }, - CRAM_GENERATE_CSV.out.csv, - ch_reference_fai - ) - ch_versions = ch_versions.mix( HIC_MINIMAP2.out.versions ) - - - // - // SUBWORKFLOW: mapping hic reads using bwamem2 - // - HIC_BWAMEM2 ( - reference_tuple.filter{ meta, _fasta -> meta.aligner == 'bwamem2' }, - CRAM_GENERATE_CSV.out.csv, - ch_reference_fai - ) - ch_versions = ch_versions.mix( HIC_BWAMEM2.out.versions ) - - - ch_aligned_bams = HIC_MINIMAP2.out.mergedbam.mix( HIC_BWAMEM2.out.mergedbam ) - .map{ meta, bam -> - tuple( - meta + [ sz: bam.size() ], - bam - ) - } - - - // - // MODULE: GENERATE PRETEXT MAP FROM MAPPED BAM FOR LOW RES - // - PRETEXTMAP_STANDRD ( - ch_aligned_bams, - reference_tuple.join( ch_reference_fai ).collect() - ) - ch_versions = ch_versions.mix( PRETEXTMAP_STANDRD.out.versions ) - - - if (params.run_hires) { - PRETEXTMAP_HIGHRES ( - ch_aligned_bams, - reference_tuple.join( ch_reference_fai ).collect() - ) - hires_pretext = PRETEXTMAP_HIGHRES.out.pretext - ch_versions = ch_versions.mix( PRETEXTMAP_HIGHRES.out.versions ) - } else { - hires_pretext = Channel.empty() - } - - // - // MODULE: GENERATE PNG FROM STANDARD PRETEXT - // - SNAPSHOT_SRES ( - PRETEXTMAP_STANDRD.out.pretext - ) - ch_versions = ch_versions.mix( SNAPSHOT_SRES.out.versions ) - - emit: - standrd_pretext = PRETEXTMAP_STANDRD.out.pretext - standrd_snpshot = SNAPSHOT_SRES.out.image - highres_pretext = hires_pretext - versions = ch_versions - -} diff --git a/subworkflows/local/hic_bwamem2/main.nf b/subworkflows/local/hic_bwamem2/main.nf deleted file mode 100644 index 8f44feee..00000000 --- a/subworkflows/local/hic_bwamem2/main.nf +++ /dev/null @@ -1,66 +0,0 @@ -#!/usr/bin/env nextflow - -// This subworkflow takes an input fasta sequence and csv style list of hic cram file to return -// alignment files including .mcool, pretext and .hic. -// Input - Assembled genomic fasta file, cram file directory -// Output - .mcool, .pretext, .hic - -// -// MODULE IMPORT BLOCK -// -include { BWAMEM2_INDEX } from '../../../modules/nf-core/bwamem2/index/main.nf' -include { CRAM_FILTER_ALIGN_BWAMEM2_FIXMATE_SORT } from '../../../modules/local/cram/filter_align_bwamem2_fixmate_sort/main' -include { SAMTOOLS_MERGE } from '../../../modules/nf-core/samtools/merge/main' - -workflow HIC_BWAMEM2 { - take: - reference_tuple // Channel: tuple [ val(meta), path( fasta ) ] - csv_ch // Channel: tuple [ val(meta), path( cram_csv ) ] - reference_index // Channel: tuple [ val(meta), path( fai ) ] - - main: - ch_versions = Channel.empty() - mappedbam_ch = Channel.empty() - - // - // MODULE: Indexing on reference output the folder of indexing files - // - BWAMEM2_INDEX ( - reference_tuple - ) - ch_versions = ch_versions.mix( BWAMEM2_INDEX.out.versions ) - - - // - // MODULE: map hic reads by 10,000 container per time using bwamem2 - // - CRAM_FILTER_ALIGN_BWAMEM2_FIXMATE_SORT ( - csv_ch.splitCsv().map{ tuple -> tuple.flatten() }, - BWAMEM2_INDEX.out.index.collect() - ) - ch_versions = ch_versions.mix( CRAM_FILTER_ALIGN_BWAMEM2_FIXMATE_SORT.out.versions ) - - - // - // LOGIC: PREPARING BAMS FOR MERGE - // - CRAM_FILTER_ALIGN_BWAMEM2_FIXMATE_SORT.out.mappedbam - .map { meta, mbam -> tuple( meta.subMap('id'), mbam ) } // Is the submap necessary? - .groupTuple() - .set { collected_files_for_merge } - - - // - // MODULE: MERGE POSITION SORTED BAM FILES AND MARK DUPLICATES - // - SAMTOOLS_MERGE ( - collected_files_for_merge, - reference_tuple, - reference_index - ) - ch_versions = ch_versions.mix ( SAMTOOLS_MERGE.out.versions.first() ) - - emit: - mergedbam = SAMTOOLS_MERGE.out.bam - versions = ch_versions -} diff --git a/subworkflows/local/hic_minimap2/main.nf b/subworkflows/local/hic_minimap2/main.nf deleted file mode 100644 index c9ccfaae..00000000 --- a/subworkflows/local/hic_minimap2/main.nf +++ /dev/null @@ -1,66 +0,0 @@ -#!/usr/bin/env nextflow - -// This subworkflow takes an input fasta sequence and csv style list of hic cram file to return -// alignment files including .mcool, pretext and .hic. -// Input - Assembled genomic fasta file, cram file directory -// Output - .mcool, .pretext, .hic - -// -// MODULE IMPORT BLOCK -// -include { CRAM_FILTER_MINIMAP2_FILTER5END_FIXMATE_SORT } from '../../../modules/local/cram/filter_minimap2_filter5end_fixmate_sort/main' -include { SAMTOOLS_MERGE } from '../../../modules/nf-core/samtools/merge/main' -include { MINIMAP2_INDEX } from '../../../modules/nf-core/minimap2/index/main' - - -workflow HIC_MINIMAP2 { - - take: - reference_tuple // Channel: tuple [ val(meta), path( file ) ] - csv_ch - reference_index - - main: - ch_versions = Channel.empty() - mappedbam_ch = Channel.empty() - - - // - // MODULE: generate minimap2 mmi file - // - MINIMAP2_INDEX (reference_tuple) - ch_versions = ch_versions.mix( MINIMAP2_INDEX.out.versions ) - - // - // MODULE: map hic reads by 10,000 container per time - // - CRAM_FILTER_MINIMAP2_FILTER5END_FIXMATE_SORT ( - csv_ch.splitCsv().map{ tuple -> tuple.flatten() }, - MINIMAP2_INDEX.out.index.collect() - ) - ch_versions = ch_versions.mix( CRAM_FILTER_MINIMAP2_FILTER5END_FIXMATE_SORT.out.versions ) - mappedbam_ch = CRAM_FILTER_MINIMAP2_FILTER5END_FIXMATE_SORT.out.mappedbam - - // - // LOGIC: PREPARING BAMS FOR MERGE - // - mappedbam_ch - .map { meta, mbam -> tuple( meta.subMap('id'), mbam ) } // Is the submap necessary? - .groupTuple() - .set { collected_files_for_merge } - - // - // MODULE: MERGE POSITION SORTED BAM FILES AND MARK DUPLICATES - // - SAMTOOLS_MERGE ( - collected_files_for_merge, - reference_tuple, - reference_index - ) - ch_versions = ch_versions.mix ( SAMTOOLS_MERGE.out.versions.first() ) - - - emit: - mergedbam = SAMTOOLS_MERGE.out.bam - versions = ch_versions -} diff --git a/subworkflows/local/longread_coverage/main.nf b/subworkflows/local/longread_coverage/main.nf index 9fd1f927..eaff60be 100644 --- a/subworkflows/local/longread_coverage/main.nf +++ b/subworkflows/local/longread_coverage/main.nf @@ -18,33 +18,30 @@ workflow LONGREAD_COVERAGE { take: reference_tuple // Channel: [ val(meta), path( reference_file ) ] reference_index // Channel: [ val(meta), path( reference_indx ) ] - dot_genome // Channel: [ val(meta), [ path( datafile ) ] ] - reads_path // Channel: [ val(meta), path( str ) ] + dot_genome // Channel: [ val(meta), [ path( datafile ) ] ] + reads_path // Channel: [ val(meta), [ path( read_files ) ] ] main: - ch_versions = Channel.empty() // - // LOGIC: TAKE THE READ FOLDER AS INPUT AND GENERATE THE CHANNEL OF READ FILES + // PROCESS: MINIMAP ALIGNMENT // - ch_reads_path = reads_path.flatMap { meta, dir -> - files(dir.resolve('*.fasta.gz'), checkIfExists: true, type: 'file' ) - .collect{ fasta -> tuple( meta, fasta ) } + reads_path.flatMap{ meta, files -> + files.collect{ file -> + tuple(meta, file) + } } + .set { single_reads_path } - - // - // PROCESS: MINIMAP ALIGNMENT - // MINIMAP2_ALIGN ( - ch_reads_path, + single_reads_path, reference_tuple.collect(), true, "csi", false, false, ) - ch_versions = ch_versions.mix(MINIMAP2_ALIGN.out.versions) + // // LOGIC: COLLECT THE MAPPED BAMS AS THERE MAY BE MULTIPLE AND MERGE, CREATE SAMPLE ID BASED ON PREFIX OF FILE @@ -63,12 +60,17 @@ workflow LONGREAD_COVERAGE { // // MODULE: MERGES THE BAM FILES IN REGARDS TO THE REFERENCE // EMITS A MERGED BAM + // TODO: I AM PASSING IN AN INDEX, COMBINE AND MAP CHANNEL? + def ref_and_index = reference_tuple + .combine(reference_index) + .map{ meta, reference, _meta2, index -> + [meta, reference, index, []] + } + SAMTOOLS_MERGE( collected_files_for_merge, - reference_tuple, - [[],[]] + ref_and_index ) - ch_versions = ch_versions.mix(SAMTOOLS_MERGE.out.versions) // @@ -76,9 +78,9 @@ workflow LONGREAD_COVERAGE { // SAMTOOLS_SORT ( SAMTOOLS_MERGE.out.bam, - [[],[]] + [[],[]], + [] ) - ch_versions = ch_versions.mix( SAMTOOLS_SORT.out.versions ) // @@ -86,11 +88,10 @@ workflow LONGREAD_COVERAGE { // SAMTOOLS_VIEW_FILTER_PRIMARY( SAMTOOLS_SORT.out.bam.map { meta, bam -> tuple( meta + [sz: bam.size(), single_end: true], bam, [] ) }, - reference_tuple.collect(), + reference_tuple.collect().map { meta, file -> [meta, file, []] }, [], "csi" ) - ch_versions = ch_versions.mix(SAMTOOLS_VIEW_FILTER_PRIMARY.out.versions) // @@ -99,7 +100,6 @@ workflow LONGREAD_COVERAGE { BEDTOOLS_BAMTOBED( SAMTOOLS_VIEW_FILTER_PRIMARY.out.bam ) - ch_versions = ch_versions.mix(BEDTOOLS_BAMTOBED.out.versions) // @@ -107,15 +107,15 @@ workflow LONGREAD_COVERAGE { // BEDTOOLS_BAMTOBED.out.bed .combine( dot_genome ) - .multiMap { meta, file, my_genome_meta, my_genome -> - input_tuple : tuple ( - [ id : meta.id, - single_end : true ], - file, - 1 - ) - dot_genome : my_genome - file_suffix : 'bed' + .multiMap { meta, file, _my_genome_meta, my_genome -> + input_tuple : tuple ( + [ id : meta.id, + single_end : true ], + file, + 1 + ) + dot_genome : my_genome + file_suffix : 'bed' } .set { genomecov_input } @@ -129,7 +129,6 @@ workflow LONGREAD_COVERAGE { genomecov_input.file_suffix, false ) - ch_versions = ch_versions.mix( BEDTOOLS_GENOMECOV.out.versions ) // @@ -138,7 +137,6 @@ workflow LONGREAD_COVERAGE { GNU_SORT( BEDTOOLS_GENOMECOV.out.genomecov ) - ch_versions = ch_versions.mix( GNU_SORT.out.versions ) // @@ -147,7 +145,7 @@ workflow LONGREAD_COVERAGE { GNU_SORT.out.sorted .combine( dot_genome ) .combine( reference_tuple ) - .multiMap { meta, file, meta_my_genome, my_genome, ref_meta, ref -> + .multiMap { _meta, file, _meta_my_genome, my_genome, ref_meta, _ref -> ch_coverage_bed : tuple ( [ id: ref_meta.id, single_end: true @@ -166,9 +164,7 @@ workflow LONGREAD_COVERAGE { bed2bw_normal_input.ch_coverage_bed, bed2bw_normal_input.genome_file ) - ch_versions = ch_versions.mix( UCSC_BEDGRAPHTOBIGWIG.out.versions ) emit: ch_bigwig = UCSC_BEDGRAPHTOBIGWIG.out.bigwig - versions = ch_versions } diff --git a/subworkflows/local/repeat_density/main.nf b/subworkflows/local/repeat_density/main.nf index ce4400d2..5fe9bad6 100644 --- a/subworkflows/local/repeat_density/main.nf +++ b/subworkflows/local/repeat_density/main.nf @@ -24,36 +24,32 @@ workflow REPEAT_DENSITY { dot_genome main: - ch_versions = Channel.empty() - // // MODULE: MARK UP THE REPEAT REGIONS OF THE REFERENCE GENOME // WINDOWMASKER_MKCOUNTS ( reference_tuple ) - ch_versions = ch_versions.mix( WINDOWMASKER_MKCOUNTS.out.versions ) // // MODULE: CALCULATE THE STATISTICS OF THE MARKED UP REGIONS // - WINDOWMASKER_USTAT( WINDOWMASKER_MKCOUNTS.out.counts, - reference_tuple ) - ch_versions = ch_versions.mix( WINDOWMASKER_USTAT.out.versions ) + WINDOWMASKER_USTAT( + WINDOWMASKER_MKCOUNTS.out.counts, + reference_tuple + ) // // MODULE: USE USTAT OUTPUT TO EXTRACT REPEATS FROM FASTA // EXTRACT_REPEAT( WINDOWMASKER_USTAT.out.intervals ) - ch_versions = ch_versions.mix( EXTRACT_REPEAT.out.versions ) // // MODULE: CREATE WINDOWS FROM .GENOME FILE // BEDTOOLS_MAKEWINDOWS( dot_genome ) - ch_versions = ch_versions.mix( BEDTOOLS_MAKEWINDOWS.out.versions ) // @@ -77,7 +73,6 @@ workflow REPEAT_DENSITY { intervals, dot_genome ) - ch_versions = ch_versions.mix( BEDTOOLS_INTERSECT.out.versions ) // @@ -88,20 +83,16 @@ workflow REPEAT_DENSITY { [], false ) - ch_versions = ch_versions.mix( GAWK_RENAME_IDS.out.versions ) // // MODULE: SORTS THE ABOVE BED FILES // GNU_SORT_A ( GAWK_RENAME_IDS.out.output ) // Intersect file - ch_versions = ch_versions.mix( GNU_SORT_A.out.versions ) GNU_SORT_B ( dot_genome ) // genome file - ch_versions = ch_versions.mix( GNU_SORT_B.out.versions ) GNU_SORT_C ( BEDTOOLS_MAKEWINDOWS.out.bed ) // windows file - ch_versions = ch_versions.mix( GNU_SORT_C.out.versions ) // @@ -112,7 +103,6 @@ workflow REPEAT_DENSITY { [], false ) - ch_versions = ch_versions.mix( GAWK_REFORMAT_INTERSECT.out.versions ) // @@ -137,7 +127,6 @@ workflow REPEAT_DENSITY { for_mapping, GNU_SORT_B.out.sorted ) - ch_versions = ch_versions.mix( BEDTOOLS_MAP.out.versions ) // @@ -148,7 +137,6 @@ workflow REPEAT_DENSITY { [], false ) - ch_versions = ch_versions.mix( GAWK_REPLACE_DOTS.out.versions ) // @@ -156,12 +144,10 @@ workflow REPEAT_DENSITY { // UCSC_BEDGRAPHTOBIGWIG( GAWK_REPLACE_DOTS.out.output, - GNU_SORT_B.out.sorted.map { it[1] } + GNU_SORT_B.out.sorted.map { _meta, file -> file } ) - ch_versions = ch_versions.mix( UCSC_BEDGRAPHTOBIGWIG.out.versions ) emit: repeat_density = UCSC_BEDGRAPHTOBIGWIG.out.bigwig - versions = ch_versions } diff --git a/subworkflows/local/telo_extraction/main.nf b/subworkflows/local/telo_extraction/main.nf deleted file mode 100644 index e5bfd667..00000000 --- a/subworkflows/local/telo_extraction/main.nf +++ /dev/null @@ -1,37 +0,0 @@ -include { FIND_TELOMERE_WINDOWS } from '../../../modules/local/find/telomere_windows/main' -include { EXTRACT_TELOMERE } from '../../../modules/local/extract/telomere/main' - -workflow TELO_EXTRACTION { - take: - telomere_file //tuple(meta, file) - - main: - ch_versions = Channel.empty() - - // - // MODULE: GENERATES A WINDOWS FILE FROM THE ABOVE - // - FIND_TELOMERE_WINDOWS ( - telomere_file - ) - ch_versions = ch_versions.mix( FIND_TELOMERE_WINDOWS.out.versions ) - - - def windows_file = FIND_TELOMERE_WINDOWS.out.windows - def safe_windows = windows_file.ifEmpty { Channel.empty() } - - // - // MODULE: Extract the telomere data from the FIND_TELOMERE - // file and reformat into bed - // - EXTRACT_TELOMERE( - safe_windows - ) - ch_versions = ch_versions.mix( EXTRACT_TELOMERE.out.versions ) - - - emit: - bedgraph_file = EXTRACT_TELOMERE.out.bedgraph - versions = ch_versions - -} diff --git a/subworkflows/local/telo_finder/main.nf b/subworkflows/local/telo_finder/main.nf deleted file mode 100644 index cdf0d223..00000000 --- a/subworkflows/local/telo_finder/main.nf +++ /dev/null @@ -1,86 +0,0 @@ -#!/usr/bin/env nextflow - -// -// MODULE IMPORT BLOCK -// -include { FIND_TELOMERE_REGIONS } from '../../../modules/local/find/telomere_regions/main' -include { GAWK_SPLIT_DIRECTIONS } from '../../../modules/local/gawk_split_directions/main' - -include { TELO_EXTRACTION } from '../../../subworkflows/local/telo_extraction/main' - -workflow TELO_FINDER { - - take: - reference_tuple // Channel [ val(meta), path(fasta) ] - teloseq - - main: - ch_versions = Channel.empty() - - - // - // MODULE: FINDS THE TELOMERIC SEQEUNCE IN REFERENCE - // - FIND_TELOMERE_REGIONS ( - reference_tuple, - teloseq - ) - ch_versions = ch_versions.mix( FIND_TELOMERE_REGIONS.out.versions ) - - - // - // MODULE: SPLIT THE TELOMERE FILE INTO 5' and 3' FILES - // THIS IS RUNNING ON A LOCAL VERSION OF THE GAWK MODULE - // - if (params.split_telomere) { - GAWK_SPLIT_DIRECTIONS ( - FIND_TELOMERE_REGIONS.out.telomere, - file("${projectDir}/bin/gawk_split_directions.awk") - ) - ch_versions = ch_versions.mix( GAWK_SPLIT_DIRECTIONS.out.versions ) - - GAWK_SPLIT_DIRECTIONS.out.prime5 - .map { meta, file -> - tuple( [id: meta.id + "_5P"], file) - } - .set { prime5_telo } - - GAWK_SPLIT_DIRECTIONS.out.prime3 - .map { meta, file -> - tuple( [id: meta.id + "_3P"], file) - } - .set { prime3_telo } - - prime5_telo - .mix(prime3_telo) - .mix(FIND_TELOMERE_REGIONS.out.telomere) - .set { telo_for_extraction } - - } else { - telo_for_extraction = FIND_TELOMERE_REGIONS.out.telomere - } - - - // - // SUBWORKFLOW: TELO_EXTRACTION - // - The prime5.mix(prime3) creates a queue channel to execute - // TELO_EXTRACTION per item in channel - // - TELO_EXTRACTION ( - telo_for_extraction - ) - ch_versions = ch_versions.mix( TELO_EXTRACTION.out.versions ) - - - TELO_EXTRACTION.out.bedgraph_file - .map{ _meta, bedgraph -> - bedgraph - } - .collect() - .set { telo_bedgraphs } - - - emit: - bedgraph_file = telo_bedgraphs // Used in pretext_graph - versions = ch_versions -} diff --git a/subworkflows/local/utils_nfcore_curationpretext_pipeline/main.nf b/subworkflows/local/utils_nfcore_curationpretext_pipeline/main.nf index cda3a99e..8c345173 100644 --- a/subworkflows/local/utils_nfcore_curationpretext_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_curationpretext_pipeline/main.nf @@ -11,12 +11,14 @@ include { UTILS_NFSCHEMA_PLUGIN } from '../../nf-core/utils_nfschema_plugin' include { paramsSummaryMap } from 'plugin/nf-schema' include { samplesheetToList } from 'plugin/nf-schema' +include { paramsHelp } from 'plugin/nf-schema' include { completionEmail } from '../../nf-core/utils_nfcore_pipeline' include { completionSummary } from '../../nf-core/utils_nfcore_pipeline' include { imNotification } from '../../nf-core/utils_nfcore_pipeline' include { UTILS_NFCORE_PIPELINE } from '../../nf-core/utils_nfcore_pipeline' include { UTILS_NEXTFLOW_PIPELINE } from '../../nf-core/utils_nextflow_pipeline' +include { fn_get_validated_channel } from '../../../functions/local/utils' /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ SUBWORKFLOW TO INITIALISE PIPELINE @@ -28,14 +30,17 @@ workflow PIPELINE_INITIALISATION { take: version // boolean: Display version and exit validate_params // boolean: Boolean whether to validate parameters against the schema at runtime - monochrome_logs // boolean: Do not use coloured log outputs + _monochrome_logs // boolean: Do not use coloured log outputs nextflow_cli_args // array: List of positional nextflow CLI args outdir // string: The output directory where the results will be saved - input // string: Path to input samplesheet + _input // string: Path to input samplesheet + help // boolean: Display help message and exit + help_full // boolean: Show the full help message + show_hidden // boolean: Show hidden parameters in the help message main: - ch_versions = Channel.empty() + ch_versions = channel.empty() // // Print version and exit if required and dump pipeline parameters to JSON file @@ -50,10 +55,39 @@ workflow PIPELINE_INITIALISATION { // // Validate parameters and generate parameter summary to stdout // + before_text = """ +-\033[2m----------------------------------------------------\033[0m- +\033[0;34m _____ \033[0;32m _______ \033[0;31m _\033[0m +\033[0;34m / ____| \033[0;32m|__ __| \033[0;31m| |\033[0m +\033[0;34m | (___ __ _ _ __ __ _ ___ _ __ \033[0m ___ \033[0;32m| |\033[0;33m ___ \033[0;31m| |\033[0m +\033[0;34m \\___ \\ / _` | '_ \\ / _` |/ _ \\ '__|\033[0m|___|\033[0;32m| |\033[0;33m/ _ \\\033[0;31m| |\033[0m +\033[0;34m ____) | (_| | | | | (_| | __/ | \033[0;32m| |\033[0;33m (_) \033[0;31m| |____\033[0m +\033[0;34m |_____/ \\__,_|_| |_|\\__, |\\___|_| \033[0;32m|_|\033[0;33m\\___/\033[0;31m|______|\033[0m +\033[0;34m __/ |\033[0m +\033[0;34m |___/\033[0m +\033[0;35m ${workflow.manifest.name} ${workflow.manifest.version}\033[0m +-\033[2m----------------------------------------------------\033[0m- + """ + after_text = """${workflow.manifest.doi ? "\n* The pipeline\n" : ""}${workflow.manifest.doi.tokenize(",").collect { doi -> " https://doi.org/${doi.trim().replace('https://doi.org/', '')}" }.join("\n")}${workflow.manifest.doi ? "\n" : ""} +* The nf-core framework + https://doi.org/10.5281/zenodo.12773958 + +* Software dependencies + https://github.com/sanger-tol/curationpretext/blob/main/CITATIONS.md +""" + + command = "nextflow run ${workflow.manifest.name} -profile --input samplesheet.csv --outdir " + UTILS_NFSCHEMA_PLUGIN ( workflow, validate_params, - null + null, + help, + help_full, + show_hidden, + before_text, + after_text, + command ) // @@ -66,67 +100,72 @@ workflow PIPELINE_INITIALISATION { // // Create channel from input file provided through params.input // - - input_fasta = Channel.fromPath( + input_fasta = channel.fromPath( params.input, checkIfExists: true, type: 'file' ) - cram_dir = Channel.fromPath( - params.cram, - checkIfExists: true, - type: 'dir' - ) - ch_reference = input_fasta.map { fasta -> - def fasta_size = fasta.size() - def selected_aligner = (params.aligner == "AUTO") ? - (fasta_size > 5e9 ? "minimap2" : "bwamem2") : - params.aligner - - tuple( + [ [ id: params.sample, - aligner: selected_aligner, map_order: params.map_order, multi_mapping: params.multi_mapping, - ref_size: fasta_size, ], fasta - ) + ] } + if ( (params.pre_mapped_bam?.size() ?: 0) == 0 && (params.cram?.size() ?: 0) == 0 ) { + error "You need to supply either a --pre_mapped_bam file of an array of --cram files!" + } - ch_cram_reads = cram_dir.map { dir -> - tuple( - [ id: params.sample ], - dir - ) + if ( (params.pre_mapped_bam?.size() ?: 0) > 1 ) { + error "Using Pre-Mapped Reads supports only 1 file" } - ch_reads = Channel - .fromPath( - params.reads, - checkIfExists: true, - type: 'dir' - ) - .map { dir -> - tuple( - [ id: params.sample, - single_end: true, - read_type: params.read_type - ], - dir - ) - } + if (params.pre_mapped_bam && params.cram) { + error "Can only use Pre-Mapped Reads or CRAM files!" + } + + ch_cram_reads = params.cram ? fn_get_validated_channel( + "cram", + [ + id: params.sample, + map_order: params.map_order, + multi_mapping: params.multi_mapping, + ], + params.cram + ) : channel.empty() + + ch_mapped_bam = params.pre_mapped_bam ? fn_get_validated_channel( + "bam", + [ + id: params.sample, + map_order: params.map_order, + multi_mapping: params.multi_mapping + ], + params.pre_mapped_bam + ) : channel.empty() + + ch_longreads = fn_get_validated_channel( + "pacbio", + [ + id: params.sample, + map_order: params.map_order, + multi_mapping: params.multi_mapping, + ], + params.reads + ) emit: ch_reference ch_cram_reads - ch_reads - teloseq = params.teloseq - versions = ch_versions + ch_mapped_bam + ch_longreads + teloseq = params.teloseq + versions = ch_versions } /* diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/tests/tags.yml b/subworkflows/nf-core/utils_nextflow_pipeline/tests/tags.yml deleted file mode 100644 index f8476112..00000000 --- a/subworkflows/nf-core/utils_nextflow_pipeline/tests/tags.yml +++ /dev/null @@ -1,2 +0,0 @@ -subworkflows/utils_nextflow_pipeline: - - subworkflows/nf-core/utils_nextflow_pipeline/** diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/main.nf b/subworkflows/nf-core/utils_nfcore_pipeline/main.nf index bfd25876..2f30e9a4 100644 --- a/subworkflows/nf-core/utils_nfcore_pipeline/main.nf +++ b/subworkflows/nf-core/utils_nfcore_pipeline/main.nf @@ -98,7 +98,7 @@ def workflowVersionToYAML() { // Get channel of software versions used in pipeline in YAML format // def softwareVersionsToYAML(ch_versions) { - return ch_versions.unique().map { version -> processVersionsFromYAML(version) }.unique().mix(Channel.of(workflowVersionToYAML())) + return ch_versions.unique().map { version -> processVersionsFromYAML(version) }.unique().mix(channel.of(workflowVersionToYAML())) } // diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/tests/tags.yml b/subworkflows/nf-core/utils_nfcore_pipeline/tests/tags.yml deleted file mode 100644 index ac8523c9..00000000 --- a/subworkflows/nf-core/utils_nfcore_pipeline/tests/tags.yml +++ /dev/null @@ -1,2 +0,0 @@ -subworkflows/utils_nfcore_pipeline: - - subworkflows/nf-core/utils_nfcore_pipeline/** diff --git a/subworkflows/nf-core/utils_nfschema_plugin/main.nf b/subworkflows/nf-core/utils_nfschema_plugin/main.nf index 4994303e..ee4738c8 100644 --- a/subworkflows/nf-core/utils_nfschema_plugin/main.nf +++ b/subworkflows/nf-core/utils_nfschema_plugin/main.nf @@ -4,6 +4,7 @@ include { paramsSummaryLog } from 'plugin/nf-schema' include { validateParameters } from 'plugin/nf-schema' +include { paramsHelp } from 'plugin/nf-schema' workflow UTILS_NFSCHEMA_PLUGIN { @@ -15,29 +16,56 @@ workflow UTILS_NFSCHEMA_PLUGIN { // when this input is empty it will automatically use the configured schema or // "${projectDir}/nextflow_schema.json" as default. This input should not be empty // for meta pipelines + help // boolean: show help message + help_full // boolean: show full help message + show_hidden // boolean: show hidden parameters in help message + before_text // string: text to show before the help message and parameters summary + after_text // string: text to show after the help message and parameters summary + command // string: an example command of the pipeline main: + if(help || help_full) { + help_options = [ + beforeText: before_text, + afterText: after_text, + command: command, + showHidden: show_hidden, + fullHelp: help_full, + ] + if(parameters_schema) { + help_options << [parametersSchema: parameters_schema] + } + log.info paramsHelp( + help_options, + params.help instanceof String ? params.help : "", + ) + exit 0 + } + // // Print parameter summary to stdout. This will display the parameters // that differ from the default given in the JSON schema // + + summary_options = [:] if(parameters_schema) { - log.info paramsSummaryLog(input_workflow, parameters_schema:parameters_schema) - } else { - log.info paramsSummaryLog(input_workflow) + summary_options << [parametersSchema: parameters_schema] } + log.info before_text + log.info paramsSummaryLog(summary_options, input_workflow) + log.info after_text // // Validate the parameters using nextflow_schema.json or the schema // given via the validation.parametersSchema configuration option // if(validate_params) { + validateOptions = [:] if(parameters_schema) { - validateParameters(parameters_schema:parameters_schema) - } else { - validateParameters() + validateOptions << [parametersSchema: parameters_schema] } + validateParameters(validateOptions) } emit: diff --git a/subworkflows/nf-core/utils_nfschema_plugin/tests/main.nf.test b/subworkflows/nf-core/utils_nfschema_plugin/tests/main.nf.test index 8fb30164..c977917a 100644 --- a/subworkflows/nf-core/utils_nfschema_plugin/tests/main.nf.test +++ b/subworkflows/nf-core/utils_nfschema_plugin/tests/main.nf.test @@ -25,6 +25,12 @@ nextflow_workflow { input[0] = workflow input[1] = validate_params input[2] = "" + input[3] = false + input[4] = false + input[5] = false + input[6] = "" + input[7] = "" + input[8] = "" """ } } @@ -51,6 +57,12 @@ nextflow_workflow { input[0] = workflow input[1] = validate_params input[2] = "" + input[3] = false + input[4] = false + input[5] = false + input[6] = "" + input[7] = "" + input[8] = "" """ } } @@ -77,6 +89,12 @@ nextflow_workflow { input[0] = workflow input[1] = validate_params input[2] = "${projectDir}/subworkflows/nf-core/utils_nfschema_plugin/tests/nextflow_schema.json" + input[3] = false + input[4] = false + input[5] = false + input[6] = "" + input[7] = "" + input[8] = "" """ } } @@ -103,6 +121,12 @@ nextflow_workflow { input[0] = workflow input[1] = validate_params input[2] = "${projectDir}/subworkflows/nf-core/utils_nfschema_plugin/tests/nextflow_schema.json" + input[3] = false + input[4] = false + input[5] = false + input[6] = "" + input[7] = "" + input[8] = "" """ } } @@ -114,4 +138,36 @@ nextflow_workflow { ) } } + + test("Should create a help message") { + + when { + + params { + test_data = '' + outdir = null + } + + workflow { + """ + validate_params = true + input[0] = workflow + input[1] = validate_params + input[2] = "${projectDir}/subworkflows/nf-core/utils_nfschema_plugin/tests/nextflow_schema.json" + input[3] = true + input[4] = false + input[5] = false + input[6] = "Before" + input[7] = "After" + input[8] = "nextflow run test/test" + """ + } + } + + then { + assertAll( + { assert workflow.success } + ) + } + } } diff --git a/subworkflows/nf-core/utils_nfschema_plugin/tests/nextflow.config b/subworkflows/nf-core/utils_nfschema_plugin/tests/nextflow.config index 443e828c..8d8c7371 100644 --- a/subworkflows/nf-core/utils_nfschema_plugin/tests/nextflow.config +++ b/subworkflows/nf-core/utils_nfschema_plugin/tests/nextflow.config @@ -1,5 +1,5 @@ plugins { - id "nf-schema@2.4.2" + id "nf-schema@2.5.1" } validation { diff --git a/subworkflows/sanger-tol/bam_samtools_merge_markdup/main.nf b/subworkflows/sanger-tol/bam_samtools_merge_markdup/main.nf new file mode 100644 index 00000000..b3d82829 --- /dev/null +++ b/subworkflows/sanger-tol/bam_samtools_merge_markdup/main.nf @@ -0,0 +1,70 @@ +include { SAMTOOLS_FAIDX } from '../../../modules/nf-core/samtools/faidx/main' +include { SAMTOOLS_MERGE } from '../../../modules/nf-core/samtools/merge/main' +include { SAMTOOLS_MERGEDUP } from '../../../modules/sanger-tol/samtools/mergedup/main' + +workflow BAM_SAMTOOLS_MERGE_MARKDUP { + + take: + ch_bam // channel: [ val(meta), [ bam ] ] + ch_assemblies // channel: [ val(meta), fasta ] + val_mark_duplicates // boolean: mark duplicates on output bam + + main: + // + // Module: Index assembly fastas + // + SAMTOOLS_FAIDX( + ch_assemblies.map { meta, assembly -> [meta, assembly, []] }, + false + ) + + // + // Logic: create a channel with both fai and gzi for each assembly + // We do it here so we don't cause downstream issues with the + // remainder join + // + ch_fai_gzi = SAMTOOLS_FAIDX.out.fai + .join(SAMTOOLS_FAIDX.out.gzi, by: 0, remainder: true) + .map { meta, fai, gzi -> [ meta, fai, gzi ?: [] ] } + + // + // Logic: Prepare input for merging bams. + // We use the ch_chunk_counts to set a groupKey so that + // we emit groups downstream ASAP once all bams have been made + // + ch_samtools_merge_input = ch_bam + .combine(ch_assemblies, by: 0) + .combine(ch_fai_gzi, by: 0) + .multiMap { meta, bams, assembly, fai, gzi -> + bam: [ meta, bams ] + fasta: [ meta, assembly, fai, gzi ] + } + + // + // Module: Either merge position-sorted bam files, or merge and mark duplicates + // + if(val_mark_duplicates) { + SAMTOOLS_MERGEDUP( + ch_samtools_merge_input.bam, + ch_samtools_merge_input.fasta + ) + + ch_output_bam = SAMTOOLS_MERGEDUP.out.bam.mix(SAMTOOLS_MERGEDUP.out.cram) + ch_output_index = SAMTOOLS_MERGEDUP.out.csi.mix(SAMTOOLS_MERGEDUP.out.crai) + ch_output_metrics = SAMTOOLS_MERGEDUP.out.metrics + } else { + SAMTOOLS_MERGE( + ch_samtools_merge_input.bam, + ch_samtools_merge_input.fasta + ) + + ch_output_bam = SAMTOOLS_MERGE.out.bam.mix(SAMTOOLS_MERGE.out.cram) + ch_output_index = SAMTOOLS_MERGE.out.csi.mix(SAMTOOLS_MERGE.out.crai) + ch_output_metrics = channel.empty() + } + + emit: + bam = ch_output_bam // channel: [ val(meta), path(bam) ] + bam_index = ch_output_index // channel: [ val(meta), path(index) ] + metrics = ch_output_metrics // channel [ val(meta), path(stats) ] +} diff --git a/subworkflows/sanger-tol/bam_samtools_merge_markdup/meta.yml b/subworkflows/sanger-tol/bam_samtools_merge_markdup/meta.yml new file mode 100644 index 00000000..4df7dc79 --- /dev/null +++ b/subworkflows/sanger-tol/bam_samtools_merge_markdup/meta.yml @@ -0,0 +1,54 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/sanger-tol/nf-core-modules/main/subworkflows/yaml-schema.json +name: "bam_samtools_merge_markdup" +description: | + Merge sorted fixmated BAM files with reference to an assembly, then optionally mark duplicates +keywords: + - bam + - merge + - duplicate marking +components: + - samtools/mergedup + - samtools/faidx: + git_remote: https://github.com/nf-core/modules.git + - samtools/merge: + git_remote: https://github.com/nf-core/modules.git +input: + - ch_bam: + type: file + description: | + The input channel containing the BAM/CRAM/SAM files + Structure: [ val(meta), path(bam) ] + pattern: "*.{bam,cram,sam}" + - ch_assemblies: + type: file + description: | + Assemblies for merging and marking duplicates. + Structure: [ val(meta), path(fasta) ] + pattern: "*.{fasta,fasta.gz}" + - val_mark_duplicates: + type: boolean + description: | + Flag to trigger marking duplicates on the merged BAM files +output: + - bam: + type: file + description: | + Channel containing merged and possibly duplicate-marked BAM files + Structure: [ val(meta), path(bam) ] + pattern: "*.bam" + - bam_index: + type: file + description: | + Channel containing the index file(s) for the merged BAM + Structure: [ val(meta), path(index) ] + pattern: "*.{bai,csi,crai}" + - metrics: + type: file + description: | + Channel containing markdup metrics, if duplicates marked + Structure: [ val(meta), path(index) ] + pattern: "*.{bai,csi,crai}" +authors: + - "@prototaxites" +maintainers: + - "@prototaxites" diff --git a/subworkflows/sanger-tol/bam_samtools_merge_markdup/tests/main.nf.test b/subworkflows/sanger-tol/bam_samtools_merge_markdup/tests/main.nf.test new file mode 100644 index 00000000..804fa51f --- /dev/null +++ b/subworkflows/sanger-tol/bam_samtools_merge_markdup/tests/main.nf.test @@ -0,0 +1,117 @@ +nextflow_workflow { + + name "Test Subworkflow BAM_SAMTOOLS_MERGE_MARKDUP" + script "../main.nf" + workflow "BAM_SAMTOOLS_MERGE_MARKDUP" + + tag "subworkflows" + tag "subworkflows_sangertol" + tag "subworkflows/bam_samtools_merge_markdup" + tag "samtools" + tag "samtools/mergedup" + tag "subworkflows/../../modules/nf-core/samtools/faidx" + tag "subworkflows/../../modules/nf-core/samtools/merge" + tag "subworkflows/../../modules/nf-core/samtools/index" + tag "subworkflows/../../modules/nf-core/samtools/markdup" + + config "./nextflow.config" + + setup { + nfcoreInitialise("${launchDir}/library/") + nfcoreInstall( + "${launchDir}/library/", + [ + "samtools/faidx", + "samtools/index", + "samtools/merge", + ] + ) + nfcoreLink("${launchDir}/library/", "${baseDir}/modules/") + } + + test("Undibacterium unclassified - hap1 - no markdup") { + + when { + + params { + samtools_args = "--write-index" + } + + workflow { + """ + input[0] = Channel.of([ + [ id:'test' ], // meta map + [ + file(params.modules_testdata_base_path + 'Undibacterium_unclassified/analysis/hic_mapping/41741_2.7.sub.cram.baUndUnlc1_hic_phased_hap1.2.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'Undibacterium_unclassified/analysis/hic_mapping/41741_2.7.sub.cram.baUndUnlc1_hic_phased_hap1.3.bam', checkIfExists: true), + ] + ]) + input[1] = Channel.of([ + [ id:'test' ], + file(params.modules_testdata_base_path + 'Undibacterium_unclassified/assembly/draft/baUndUnlc1.hic.hap1.p_ctg.unscaffolded.fa.gz', checkIfExists: true), + ]) + input[2] = false + """ + } + } + + then { + + assert workflow.success + assertAll( + { assert snapshot( + workflow.out.bam.collect { bam(it.get(1)).getStatistics() }, + workflow.out.bam_index.collect { file(it.get(1)).getName() } + ).match() } + ) + } + + cleanup { + nfcoreUnlink("${launchDir}/library/", "${baseDir}/modules/nf-core") + } + + } + + test("Undibacterium unclassified - hap1 - markdup") { + + when { + + params { + samtools_args = "-s --write-index" + } + + workflow { + """ + input[0] = Channel.of([ + [ id:'test' ], // meta map + [ + file(params.modules_testdata_base_path + 'Undibacterium_unclassified/analysis/hic_mapping/41741_2.7.sub.cram.baUndUnlc1_hic_phased_hap1.2.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'Undibacterium_unclassified/analysis/hic_mapping/41741_2.7.sub.cram.baUndUnlc1_hic_phased_hap1.3.bam', checkIfExists: true), + ] + ]) + input[1] = Channel.of([ + [ id:'test' ], + file(params.modules_testdata_base_path + 'Undibacterium_unclassified/assembly/draft/baUndUnlc1.hic.hap1.p_ctg.unscaffolded.fa.gz', checkIfExists: true), + ]) + input[2] = true + """ + } + } + + then { + assert workflow.success + assertAll( + { assert snapshot( + workflow.out.bam.collect { bam(it.get(1)).getStatistics() }, + workflow.out.bam_index.collect { file(it.get(1)).getName() }, + workflow.out.metrics + ).match() } + ) + } + + cleanup { + nfcoreUnlink("${launchDir}/library/", "${baseDir}/modules/nf-core") + } + + } +} diff --git a/subworkflows/sanger-tol/bam_samtools_merge_markdup/tests/main.nf.test.snap b/subworkflows/sanger-tol/bam_samtools_merge_markdup/tests/main.nf.test.snap new file mode 100644 index 00000000..1efcf92f --- /dev/null +++ b/subworkflows/sanger-tol/bam_samtools_merge_markdup/tests/main.nf.test.snap @@ -0,0 +1,60 @@ +{ + "Undibacterium unclassified - hap1 - no markdup": { + "content": [ + [ + { + "maxReadLength": 151, + "minReadLength": 151, + "meanReadLength": 151, + "maxQuality": 60, + "minQuality": 10, + "meanQuality": 50, + "readCount": 11583, + "duplicateReadCount": 0, + "sorted": false + } + ], + [ + "test.bam.csi" + ] + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.3" + }, + "timestamp": "2026-02-09T15:42:55.598432" + }, + "Undibacterium unclassified - hap1 - markdup": { + "content": [ + [ + { + "maxReadLength": 151, + "minReadLength": 151, + "meanReadLength": 151, + "maxQuality": 60, + "minQuality": 10, + "meanQuality": 50, + "readCount": 11583, + "duplicateReadCount": 508, + "sorted": false + } + ], + [ + "test.bam.csi" + ], + [ + [ + { + "id": "test" + }, + "test.metrics:md5,7816ccaa9f01adce0ee1a5e620e933a1" + ] + ] + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.3" + }, + "timestamp": "2026-02-09T15:43:16.774673" + } +} \ No newline at end of file diff --git a/subworkflows/sanger-tol/bam_samtools_merge_markdup/tests/nextflow.config b/subworkflows/sanger-tol/bam_samtools_merge_markdup/tests/nextflow.config new file mode 100644 index 00000000..edcccf7c --- /dev/null +++ b/subworkflows/sanger-tol/bam_samtools_merge_markdup/tests/nextflow.config @@ -0,0 +1,8 @@ +process { + withName: 'SAMTOOLS_MERGE' { + ext.args = { params.samtools_args } + } + withName: 'SAMTOOLS_MERGEDUP' { + ext.args2 = { params.samtools_args } + } +} diff --git a/subworkflows/sanger-tol/cram_map_illumina_hic/main.nf b/subworkflows/sanger-tol/cram_map_illumina_hic/main.nf new file mode 100644 index 00000000..e0e8a945 --- /dev/null +++ b/subworkflows/sanger-tol/cram_map_illumina_hic/main.nf @@ -0,0 +1,181 @@ +include { BWAMEM2_INDEX } from '../../../modules/nf-core/bwamem2/index/main' +include { CRAMALIGN_GENCRAMCHUNKS } from '../../../modules/sanger-tol/cramalign/gencramchunks' +include { CRAMALIGN_BWAMEM2ALIGNHIC } from '../../../modules/sanger-tol/cramalign/bwamem2alignhic' +include { CRAMALIGN_MINIMAP2ALIGNHIC } from '../../../modules/sanger-tol/cramalign/minimap2alignhic' +include { MINIMAP2_INDEX } from '../../../modules/nf-core/minimap2/index/main' +include { SAMTOOLS_INDEX } from '../../../modules/nf-core/samtools/index/main' +include { SAMTOOLS_SPLITHEADER } from '../../../modules/nf-core/samtools/splitheader/main' + +include { BAM_SAMTOOLS_MERGE_MARKDUP } from '../bam_samtools_merge_markdup/main' + +workflow CRAM_MAP_ILLUMINA_HIC { + + take: + ch_assemblies // Channel [meta, assembly] + ch_hic_cram // Channel [meta, cram] OR [meta, [cram1, cram2, ..., cram_n]] + val_aligner // string: [either "bwamem2" or "minimap2"] + val_cram_chunk_size // integer: Number of CRAM slices per chunk for mapping + + main: + // + // Logic: rolling check of assembly meta objects to detect duplicates + // + def val_asm_meta_list = Collections.synchronizedSet(new HashSet()) + + ch_assemblies + .map { meta, _sample -> + if (!val_asm_meta_list.add(meta)) { + error("Error: Duplicate meta object found in `ch_assemblies` in CRAM_MAP_ILLUMINA_HIC: ${meta}") + } + meta + } + + // + // Logic: check if CRAM files are accompanied by an index + // Get indexes, and index those that aren't + // + ch_hic_cram_meta_mod = ch_hic_cram + .transpose() + .map { meta, cram -> [ meta + [ cramfile: cram ], cram ]} + + ch_hic_cram_raw = ch_hic_cram_meta_mod + .branch { meta, cram -> + def cram_file = file(cram, checkIfExists: true) + def index = cram + ".crai" + have_index: file(index).exists() + return [ meta, cram_file, file(index, checkIfExists: true) ] + no_index: true + return [ meta, cram_file ] + } + + // + // Module: Index CRAM files without indexes + // + SAMTOOLS_INDEX(ch_hic_cram_raw.no_index) + + ch_hic_cram_indexed = ch_hic_cram_raw.have_index + .mix( + ch_hic_cram_raw.no_index.join(SAMTOOLS_INDEX.out.crai) + ) + + // + // Module: Process the cram index files to determine how many + // chunks to split into for mapping + // + CRAMALIGN_GENCRAMCHUNKS( + ch_hic_cram_indexed, + val_cram_chunk_size + ) + + // + // Logic: Count the total number of cram chunks for downstream grouping + // + ch_n_cram_chunks = CRAMALIGN_GENCRAMCHUNKS.out.cram_slices + .map { meta, _cram, _crai, chunkn, _slices -> + def clean_meta = meta - meta.subMap("cramfile") + [ clean_meta, chunkn ] + } + .transpose() + .groupTuple(by: 0) + .map { meta, chunkns -> [ meta, chunkns.size() ] } + + // + // Module: Extract read groups from CRAM headers + // + SAMTOOLS_SPLITHEADER(ch_hic_cram_meta_mod) + + ch_readgroups = SAMTOOLS_SPLITHEADER.out.readgroup + .map { meta, rg_file -> + [ meta, rg_file.readLines().collect { line -> line.replaceAll("\t", "\\\\t") } ] + } + + // + // Logic: Join reagroups with the CRAM chunks and clean meta + // + ch_cram_rg = ch_readgroups + .combine(CRAMALIGN_GENCRAMCHUNKS.out.cram_slices.transpose(), by: 0) + .map { meta, rg, cram, crai, chunkn, slices -> + def clean_meta = meta - meta.subMap("cramfile") + [ clean_meta, rg, cram, crai, chunkn, slices ] + } + + // + // Logic: Begin alignment - fork depending on specified aligner + // + if(val_aligner == "bwamem2") { + // + // Module: Create bwa-mem2 index for assembly + // + BWAMEM2_INDEX(ch_assemblies) + + ch_mapping_inputs = ch_cram_rg + .combine(ch_assemblies, by: 0) + .combine(BWAMEM2_INDEX.out.index, by: 0) + .multiMap { meta, rg, cram, crai, chunkn, slices, assembly, index -> + cram: [ meta, cram, crai, rg ] + reference: [ meta, index, assembly ] + slices: [ chunkn, slices ] + } + + CRAMALIGN_BWAMEM2ALIGNHIC( + ch_mapping_inputs.cram, + ch_mapping_inputs.reference, + ch_mapping_inputs.slices + ) + + ch_mapped_bams = CRAMALIGN_BWAMEM2ALIGNHIC.out.bam + } else if(val_aligner == "minimap2") { + // + // MODULE: generate minimap2 mmi file + // + MINIMAP2_INDEX(ch_assemblies) + + ch_mapping_inputs = ch_cram_rg + .combine(ch_assemblies, by: 0) + .combine(MINIMAP2_INDEX.out.index, by: 0) + .multiMap { meta, rg, cram, crai, chunkn, slices, assembly, index -> + cram: [ meta, cram, crai, rg ] + reference: [ meta, index, assembly ] + slices: [ chunkn, slices ] + } + + CRAMALIGN_MINIMAP2ALIGNHIC( + ch_mapping_inputs.cram, + ch_mapping_inputs.reference, + ch_mapping_inputs.slices + ) + + ch_mapped_bams = CRAMALIGN_MINIMAP2ALIGNHIC.out.bam + } else { + error("Unsupported aligner: ${val_aligner}") + } + + // + // Logic: Prepare input for merging bams. + // We use the ch_n_cram_chunks to set a groupKey so that + // we emit groups downstream ASAP once all bams have been made + // + ch_merge_input = ch_mapped_bams + .combine(ch_n_cram_chunks, by: 0) + .map { meta, bam, n_chunks -> + def key = groupKey(meta, n_chunks) + [key, bam] + } + .groupTuple(by: 0) + .map { key, bam -> [key.target, bam.sort { b -> b.getName() }] } + + + // + // Subworkflow: merge BAM files and mark duplicates + // + BAM_SAMTOOLS_MERGE_MARKDUP( + ch_merge_input, + ch_assemblies, + true + ) + + emit: + bam = BAM_SAMTOOLS_MERGE_MARKDUP.out.bam + bam_index = BAM_SAMTOOLS_MERGE_MARKDUP.out.bam_index + bam_markdup_stats = BAM_SAMTOOLS_MERGE_MARKDUP.out.metrics +} diff --git a/subworkflows/sanger-tol/cram_map_illumina_hic/meta.yml b/subworkflows/sanger-tol/cram_map_illumina_hic/meta.yml new file mode 100644 index 00000000..74ba8781 --- /dev/null +++ b/subworkflows/sanger-tol/cram_map_illumina_hic/meta.yml @@ -0,0 +1,61 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/sanger-tol/nf-core-modules/main/subworkflows/yaml-schema.json +name: "cram_map_illumina_hic" +description: | + Maps Hi-C reads in unaligned CRAM format to a set of references. +keywords: + - hic + - align + - bam +components: + - cramalign/gencramchunks + - cramalign/bwamem2alignhic + - cramalign/minimap2alignhic + - bam_samtools_merge_markdup + - samtools/index: + git_remote: https://github.com/nf-core/modules.git + - samtools/splitheader: + git_remote: https://github.com/nf-core/modules.git + - bwamem2/index: + git_remote: https://github.com/nf-core/modules.git + - minimap2/index: + git_remote: https://github.com/nf-core/modules.git + +input: + - ch_assemblies: + type: file + description: | + Input channel containing assemblies to align Hi-C reads to + Structure: [ val(meta), path(fasta) ] + pattern: "*.fn?a(sta)?(.gz)?" + - ch_hic_cram: + type: file + description: | + Input channel containing Hi-C CRAM files for mapping. + Structure: [ val(meta), path(cram) ] or [ val(meta), [ path(cram1), path(cram2), ... ] ] + pattern: "*.cram" + - val_aligner: + type: string + description: Which aligner to use - bwamem2 or minimap2 + - val_cram_chunk_size: + type: integer + description: Size of chunk to split CRAM files into + - val_mark_duplicates: + type: boolean + description: Whether to mark duplicates on the merged BAM or not +output: + - bam: + type: file + description: | + Channel containing the BAM files describing Hi-C alignments to the input assemblies + Structure: [ val(meta), path(bam) ] + pattern: "*.bam" + - bam_index: + type: file + description: | + Channel containing the indexes for the output BAM + Structure: [ val(meta), path(bam) ] + pattern: "*.{bai,csi,crai}" +authors: + - "@prototaxites" +maintainers: + - "@prototaxites" diff --git a/subworkflows/sanger-tol/cram_map_illumina_hic/nextflow.config b/subworkflows/sanger-tol/cram_map_illumina_hic/nextflow.config new file mode 100644 index 00000000..dc7e84a8 --- /dev/null +++ b/subworkflows/sanger-tol/cram_map_illumina_hic/nextflow.config @@ -0,0 +1,22 @@ +nextflow.enable.moduleBinaries = true + +process { + withName: CRAMALIGN_BWAMEM2ALIGNHIC { + ext.args1 = { "" } + ext.args2 = { "-F0xB00 -nt" } + ext.args3 = { "-5SPCp" } + ext.args4 = { "-mpu" } + ext.args5 = { "-q 0 -F 0x904" } + ext.args6 = { "--write-index -l1" } + + } + + withName: CRAMALIGN_MINIMAP2ALIGNHIC { + ext.args1 = { "" } + ext.args2 = { "-F0xB00 -nt" } + ext.args3 = { "-ax sr" } + ext.args4 = { "-mpu" } + ext.args5 = { "-q 0 -F 0x904" } + ext.args6 = { "--write-index -l1" } + } +} diff --git a/subworkflows/sanger-tol/cram_map_illumina_hic/tests/main.nf.test b/subworkflows/sanger-tol/cram_map_illumina_hic/tests/main.nf.test new file mode 100644 index 00000000..cdb27655 --- /dev/null +++ b/subworkflows/sanger-tol/cram_map_illumina_hic/tests/main.nf.test @@ -0,0 +1,392 @@ +nextflow_workflow { + + name "Test Subworkflow CRAM_MAP_ILLUMINA_HIC" + script "../main.nf" + workflow "CRAM_MAP_ILLUMINA_HIC" + config "./nextflow.config" + + tag "subworkflows" + tag "subworkflows_sangertol" + tag "subworkflows/cram_map_illumina_hic" + tag "subworkflows/bam_samtools_merge_markdup" + tag "cramalign/gencramchunks" + tag "cramalign/bwamem2alignhic" + tag "cramalign/minimap2alignhic" + tag "subworkflows/../../modules/nf-core/samtools/markdup" + tag "subworkflows/../../modules/nf-core/minimap2/index" + tag "subworkflows/../../modules/nf-core/samtools/merge" + tag "subworkflows/../../modules/nf-core/samtools/index" + tag "subworkflows/../../modules/nf-core/samtools/faidx" + tag "subworkflows/../../modules/nf-core/samtools/splitheader" + tag "subworkflows/../../modules/nf-core/bwamem2/index" + + + setup { + nfcoreInitialise("${launchDir}/library/") + nfcoreInstall( + "${launchDir}/library/", + [ + "bwamem2/index", + "minimap2/index", + "samtools/index", + "samtools/merge", + "samtools/markdup", + "samtools/faidx", + "samtools/splitheader" + ] + ) + nfcoreLink("${launchDir}/library/", "${baseDir}/modules/") + } + + + test("meles meles - bwamem2") { + + when { + + params { + samtools_cat_args = "" + samtools_fastq_args = "-F0xB00 -nt" + bwamem2_mem_args = "-5SPp" + samtools_fixmate_args = "-mpu" + samtools_view_args = "-q 0 -F 0x904" + samtools_sort_args = "--write-index -l1" + samtools_markdup_args = "--write-index" + } + + workflow { + """ + input[0] = Channel.of( + [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'Meles_meles/assembly/release/mMelMel3.1_paternal_haplotype/GCA_922984935.2.subset.fasta.gz', checkIfExists: true) + ] + ) + input[1] = Channel.of( + [ + [ id:'test' ], + file(params.modules_testdata_base_path + 'Meles_meles/genomic_data/mMelMel3/hic-arima2/35528_2%231_subset.cram', checkIfExists: true) + ] + ) + input[2] = "bwamem2" + input[3] = 1 + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert workflow.out.bam.size() == 1 }, + { + assert snapshot( + workflow.out.bam.collect { bam(it.get(1)).getStatistics() }.sort(), + workflow.out.bam_index.collect { file(it.get(1)).getName() }, + workflow.out.bam_markdup_stats, + ).match() + } + ) + } + + cleanup { + nfcoreUnlink("${launchDir}/library/", "${baseDir}/modules/nf-core") + } + } + + test("meles meles - zymo - bwamem2 - multiple samples - double files") { + + when { + + params { + samtools_cat_args = "" + samtools_fastq_args = "-F0xB00 -nt" + bwamem2_mem_args = "-5SPp" + samtools_fixmate_args = "-mpu" + samtools_view_args = "-q 0 -F 0x904" + samtools_sort_args = "--write-index -l1" + samtools_markdup_args = "--write-index" + } + + workflow { + """ + input[0] = Channel.of( + [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'Meles_meles/assembly/release/mMelMel3.1_paternal_haplotype/GCA_922984935.2.subset.fasta.gz', checkIfExists: true) + ], + [ + [ id:'test2' ], // meta map + file(params.modules_testdata_base_path + 'Zymo_D6311_Metagenome/assembly/xyTesTing1_metamdbg.contigs.fasta.gz', checkIfExists: true) + ] + ) + input[1] = Channel.of( + [ + [ id:'test' ], + file(params.modules_testdata_base_path + 'Meles_meles/genomic_data/mMelMel3/hic-arima2/35528_2%231_subset.cram', checkIfExists: true) + ], + [ + [ id:'test2' ], + [ + file(params.modules_testdata_base_path + 'Zymo_D6311_Metagenome/genomic_data/hic_sim3c/hic1.cram', checkIfExists: true), + file(params.modules_testdata_base_path + 'Zymo_D6311_Metagenome/genomic_data/hic_sim3c/hic2.250000.cram', checkIfExists: true) + ] + ] + ) + input[2] = "bwamem2" + input[3] = 5 + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert workflow.out.bam.size() == 2 }, + { + assert snapshot( + workflow.out.bam.collect { bam(it.get(1)).getStatistics() }.sort(), + workflow.out.bam_index.collect { file(it.get(1)).getName() }, + workflow.out.bam_markdup_stats, + ).match() + } + ) + } + + cleanup { + nfcoreUnlink("${launchDir}/library/", "${baseDir}/modules/nf-core") + } + } + + test("meles meles - bwamem2 - multiple samples - meta mismatch") { + + when { + + params { + samtools_cat_args = "" + samtools_fastq_args = "-F0xB00 -nt" + bwamem2_mem_args = "-5SPp" + samtools_fixmate_args = "-mpu" + samtools_view_args = "-q 0 -F 0x904" + samtools_sort_args = "--write-index -l1" + samtools_markdup_args = "--write-index" + } + + workflow { + """ + input[0] = Channel.of( + [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'Meles_meles/assembly/release/mMelMel3.1_paternal_haplotype/GCA_922984935.2.subset.fasta.gz', checkIfExists: true) + ], + [ + [ id:'test2' ], // meta map + file(params.modules_testdata_base_path + 'Meles_meles/assembly/release/mMelMel3.1_paternal_haplotype/GCA_922984935.2.subset.fasta.gz', checkIfExists: true) + ], + [ + [ id:'test3' ], // meta map + file(params.modules_testdata_base_path + 'Meles_meles/assembly/release/mMelMel3.1_paternal_haplotype/GCA_922984935.2.subset.fasta.gz', checkIfExists: true) + ] + ) + input[1] = Channel.of( + [ + [ id:'test' ], + file(params.modules_testdata_base_path + 'Meles_meles/genomic_data/mMelMel3/hic-arima2/35528_2%231_subset.cram', checkIfExists: true) + ], + [ + [ id:'test2' ], // meta map + file(params.modules_testdata_base_path + 'Meles_meles/genomic_data/mMelMel3/hic-arima2/35528_2_1_subset.noSM.cram', checkIfExists: true) + ], + [ + [ id:'test4' ], + file(params.modules_testdata_base_path + 'Meles_meles/genomic_data/mMelMel3/hic-arima2/35528_2%231_subset.cram', checkIfExists: true) + ] + ) + input[2] = "bwamem2" + input[3] = 1 + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert workflow.out.bam.size() == 2 }, + { + assert snapshot( + workflow.out.bam.collect { bam(it.get(1)).getStatistics() }.sort(), + workflow.out.bam_index.collect { file(it.get(1)).getName() }, + workflow.out.bam.collect { + def rg = bam(it.get(1)).getHeader().toString().split(", ").findAll { it.startsWith('@RG') } + def sm = rg ? rg[0].split('\t').findAll { it.contains('SM') }[0] : 'no RG' + return "${it[0].id}: ${sm}" + }.sort(), + workflow.out.bam_markdup_stats, + ).match() + } + ) + } + + cleanup { + nfcoreUnlink("${launchDir}/library/", "${baseDir}/modules/nf-core") + } + } + + test("meles meles - minimap2") { + + when { + params { + samtools_cat_args = "" + samtools_fastq_args = "-F0xB00 -nt" + minimap2_args = "-ax sr" + samtools_fixmate_args = "-mpu" + samtools_view_args = "-q 0 -F 0x904" + samtools_sort_args = "--write-index -l1" + samtools_markdup_args = "--write-index" + } + + workflow { + """ + input[0] = Channel.of( + [ + [ id:'test' ], // meta map + file( params.modules_testdata_base_path + 'Meles_meles/assembly/release/mMelMel3.1_paternal_haplotype/GCA_922984935.2.subset.fasta.gz', checkIfExists: true) + ] + ) + input[1] = Channel.of( + [ + [ id:'test' ], + file( params.modules_testdata_base_path + 'Meles_meles/genomic_data/mMelMel3/hic-arima2/35528_2%231_subset.cram', checkIfExists: true) + ] + ) + input[2] = "minimap2" + input[3] = 1 + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert workflow.out.bam.size() == 1 }, + { + assert snapshot( + workflow.out.bam.collect { bam(it.get(1)).getStatistics() }.sort(), + workflow.out.bam_index.collect { file(it.get(1)).getName() }, + workflow.out.bam_markdup_stats, + ).match() + } + ) + } + + cleanup { + nfcoreUnlink("${launchDir}/library/", "${baseDir}/modules/nf-core") + } + } + + + test("meles meles - multiple read groups - minimap2") { + + when { + params { + samtools_cat_args = "" + samtools_fastq_args = "-F0xB00 -nt" + minimap2_args = "-ax sr" + samtools_fixmate_args = "-mpu" + samtools_view_args = "-q 0 -F 0x904" + samtools_sort_args = "--write-index -l1" + samtools_markdup_args = "--write-index" + + } + + workflow { + """ + input[0] = Channel.of( + [ + [ id:'test' ], // meta map + file( params.modules_testdata_base_path + 'Biemna_sp_UH_2024/assembly/release/GCA_965654295.1/insdc/GCA_965654295.1.fa.gz', checkIfExists: true) + ] + ) + input[1] = Channel.of( + [ + [ id:'test' ], + file( params.modules_testdata_base_path + 'Biemna_sp_UH_2024/genomic_data/odBieUhxx1/hic-arima2/49103_5-6_5.subset.cram', checkIfExists: true) + ] + ) + input[2] = "minimap2" + input[3] = 1 + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert workflow.out.bam.size() == 1 }, + { + assert snapshot( + workflow.out.bam.collect { bam(it.get(1)).getStatistics() }.sort(), + workflow.out.bam_index.collect { file(it.get(1)).getName() }, + workflow.out.bam_markdup_stats, + ).match() + } + ) + } + + cleanup { + nfcoreUnlink("${launchDir}/library/", "${baseDir}/modules/nf-core") + } + } + + test("meles meles - multiple read groups - bwamem2") { + + when { + params { + samtools_cat_args = "" + samtools_fastq_args = "-F0xB00 -nt" + bwamem2_mem_args = "-5SPp" + samtools_fixmate_args = "-mpu" + samtools_view_args = "-q 0 -F 0x904" + samtools_sort_args = "--write-index -l1" + samtools_markdup_args = "--write-index" + + } + + workflow { + """ + input[0] = Channel.of( + [ + [ id:'test' ], // meta map + file( params.modules_testdata_base_path + 'Biemna_sp_UH_2024/assembly/release/GCA_965654295.1/insdc/GCA_965654295.1.fa.gz', checkIfExists: true) + ] + ) + input[1] = Channel.of( + [ + [ id:'test' ], + file( params.modules_testdata_base_path + 'Biemna_sp_UH_2024/genomic_data/odBieUhxx1/hic-arima2/49103_5-6_5.subset.cram', checkIfExists: true) + ] + ) + input[2] = "bwamem2" + input[3] = 1 + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert workflow.out.bam.size() == 1 }, + { + assert snapshot( + workflow.out.bam.collect { bam(it.get(1)).getStatistics() }.sort(), + workflow.out.bam_index.collect { file(it.get(1)).getName() }, + workflow.out.bam_markdup_stats, + ).match() + } + ) + } + + cleanup { + nfcoreUnlink("${launchDir}/library/", "${baseDir}/modules/nf-core") + } + } + +} diff --git a/subworkflows/sanger-tol/cram_map_illumina_hic/tests/main.nf.test.snap b/subworkflows/sanger-tol/cram_map_illumina_hic/tests/main.nf.test.snap new file mode 100644 index 00000000..d95220d3 --- /dev/null +++ b/subworkflows/sanger-tol/cram_map_illumina_hic/tests/main.nf.test.snap @@ -0,0 +1,240 @@ +{ + "meles meles - multiple read groups - minimap2": { + "content": [ + [ + { + "maxReadLength": 151, + "minReadLength": 26, + "meanReadLength": 144, + "maxQuality": 60, + "minQuality": 0, + "meanQuality": 44, + "readCount": 6659, + "duplicateReadCount": 0, + "sorted": false + } + ], + [ + "test.bam.csi" + ], + [ + [ + { + "id": "test" + }, + "test.metrics:md5,7038a36e37a5b93331ffe618fe771795" + ] + ] + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.3" + }, + "timestamp": "2026-02-10T10:43:38.748216" + }, + "meles meles - bwamem2": { + "content": [ + [ + { + "maxReadLength": 151, + "minReadLength": 151, + "meanReadLength": 151, + "maxQuality": 60, + "minQuality": 0, + "meanQuality": 7, + "readCount": 3802, + "duplicateReadCount": 54, + "sorted": true + } + ], + [ + "test.bam.csi" + ], + [ + [ + { + "id": "test" + }, + "test.metrics:md5,51755ba7077cd444be19d1fd07569cb6" + ] + ] + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.3" + }, + "timestamp": "2026-02-10T10:39:34.946622" + }, + "meles meles - minimap2": { + "content": [ + [ + { + "maxReadLength": 151, + "minReadLength": 25, + "meanReadLength": 147, + "maxQuality": 60, + "minQuality": 0, + "meanQuality": 9, + "readCount": 1856, + "duplicateReadCount": 33, + "sorted": true + } + ], + [ + "test.bam.csi" + ], + [ + [ + { + "id": "test" + }, + "test.metrics:md5,bc737c10e07175ea468040b952538e50" + ] + ] + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.3" + }, + "timestamp": "2026-02-10T10:42:47.872589" + }, + "meles meles - zymo - bwamem2 - multiple samples - double files": { + "content": [ + [ + { + "maxReadLength": 151, + "minReadLength": 151, + "meanReadLength": 151, + "maxQuality": 60, + "minQuality": 0, + "meanQuality": 7, + "readCount": 3802, + "duplicateReadCount": 53, + "sorted": true + }, + { + "maxReadLength": 150, + "minReadLength": 150, + "meanReadLength": 150, + "maxQuality": 60, + "minQuality": 0, + "meanQuality": 54, + "readCount": 543925, + "duplicateReadCount": 533, + "sorted": false + } + ], + [ + "test2.bam.csi", + "test.bam.csi" + ], + [ + [ + { + "id": "test2" + }, + "test2.metrics:md5,9576c2cd924a502933e631d457d034b6" + ], + [ + { + "id": "test" + }, + "test.metrics:md5,75db11ef6694d30a041ce3ef5325dd19" + ] + ] + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.3" + }, + "timestamp": "2026-02-10T10:41:09.214803" + }, + "meles meles - multiple read groups - bwamem2": { + "content": [ + [ + { + "maxReadLength": 151, + "minReadLength": 151, + "meanReadLength": 151, + "maxQuality": 60, + "minQuality": 0, + "meanQuality": 38, + "readCount": 7849, + "duplicateReadCount": 25, + "sorted": false + } + ], + [ + "test.bam.csi" + ], + [ + [ + { + "id": "test" + }, + "test.metrics:md5,93c67967f457e5689c86e071339e9514" + ] + ] + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.3" + }, + "timestamp": "2026-02-10T10:45:13.292929" + }, + "meles meles - bwamem2 - multiple samples - meta mismatch": { + "content": [ + [ + { + "maxReadLength": 151, + "minReadLength": 151, + "meanReadLength": 151, + "maxQuality": 60, + "minQuality": 0, + "meanQuality": 7, + "readCount": 3802, + "duplicateReadCount": 54, + "sorted": true + }, + { + "maxReadLength": 151, + "minReadLength": 151, + "meanReadLength": 151, + "maxQuality": 60, + "minQuality": 0, + "meanQuality": 7, + "readCount": 3802, + "duplicateReadCount": 54, + "sorted": true + } + ], + [ + "test2.bam.csi", + "test.bam.csi" + ], + [ + "test2: SM:test2", + "test: SM:SAMEA5962964" + ], + [ + [ + { + "id": "test2" + }, + "test2.metrics:md5,8548346e3c8f9e913c37a9cd790e77e9" + ], + [ + { + "id": "test" + }, + "test.metrics:md5,51755ba7077cd444be19d1fd07569cb6" + ] + ] + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.3" + }, + "timestamp": "2026-02-10T10:42:06.647189" + } +} \ No newline at end of file diff --git a/subworkflows/sanger-tol/cram_map_illumina_hic/tests/nextflow.config b/subworkflows/sanger-tol/cram_map_illumina_hic/tests/nextflow.config new file mode 100644 index 00000000..a4e33582 --- /dev/null +++ b/subworkflows/sanger-tol/cram_map_illumina_hic/tests/nextflow.config @@ -0,0 +1,29 @@ +nextflow.enable.moduleBinaries = true + +process { + withName: BWAMEM2_INDEX { + memory = 15.Gb + } + + withName: CRAMALIGN_BWAMEM2ALIGNHIC { + ext.args1 = { "${params.samtools_cat_args}" } + ext.args2 = { "${params.samtools_fastq_args}" } + ext.args3 = { "${params.bwamem2_mem_args}" } + ext.args4 = { "${params.samtools_fixmate_args}" } + ext.args5 = { "${params.samtools_view_args}" } + ext.args6 = { "${params.samtools_sort_args}" } + } + + withName: CRAMALIGN_MINIMAP2ALIGNHIC { + ext.args1 = { "${params.samtools_cat_args}" } + ext.args2 = { "${params.samtools_fastq_args}" } + ext.args3 = { "${params.minimap2_args}" } + ext.args4 = { "${params.samtools_fixmate_args}" } + ext.args5 = { "${params.samtools_view_args}" } + ext.args6 = { "${params.samtools_sort_args}" } + } + + withName: 'SAMTOOLS_MERGEDUP' { + ext.args2 = { "${params.samtools_markdup_args}" } + } +} diff --git a/subworkflows/sanger-tol/gap_finder/main.nf b/subworkflows/sanger-tol/gap_finder/main.nf new file mode 100644 index 00000000..f71130d3 --- /dev/null +++ b/subworkflows/sanger-tol/gap_finder/main.nf @@ -0,0 +1,51 @@ +// +// GENERATE BED FILE OF GAPS AND LENGTH IN REFERENCE +// + +include { SEQTK_CUTN } from '../../../modules/nf-core/seqtk/cutn/main' +include { GAWK as GAWK_GAP_LENGTH } from '../../../modules/nf-core/gawk/main' +include { TABIX_BGZIPTABIX } from '../../../modules/nf-core/tabix/bgziptabix/main' + +workflow GAP_FINDER { + take: + ch_reference // Channel [ val(meta), path(fasta) ] + val_run_bgzip // val(boolean) + + main: + + // + // MODULE: GENERATES A GAP SUMMARY FILE + // + SEQTK_CUTN ( + ch_reference + ) + + ch_reformat_gaps = channel.of('''\ + BEGIN { OFS = "\\t" } { + print $0, sqrt(($3-$2)*($3-$2)) + }'''.stripIndent()) + .collectFile(name: "reformat_gaps.awk", cache: true) + .collect() + + + // + // MODULE: ADD THE LENGTH OF GAP TO BED FILE - INPUT FOR PRETEXT MODULE + // + GAWK_GAP_LENGTH ( + SEQTK_CUTN.out.bed, + ch_reformat_gaps, + false + ) + + + // + // MODULE: BGZIP AND TABIX THE GAP FILE + // + TABIX_BGZIPTABIX ( + SEQTK_CUTN.out.bed.filter{ _meta, _file -> val_run_bgzip} + ) + + emit: + gap_file = GAWK_GAP_LENGTH.out.output + gap_tabix = TABIX_BGZIPTABIX.out.gz_index +} diff --git a/subworkflows/sanger-tol/gap_finder/meta.yml b/subworkflows/sanger-tol/gap_finder/meta.yml new file mode 100644 index 00000000..aad87b6d --- /dev/null +++ b/subworkflows/sanger-tol/gap_finder/meta.yml @@ -0,0 +1,43 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/sanger-tol/nf-core-modules/main/subworkflows/yaml-schema.json +name: "gap_finder" +description: Generate a bed file containing gaps and their length +keywords: + - gap + - bed + - adapters + - trimming + - fasta +components: + - seqtk/cutn: + git_remote: https://github.com/nf-core/modules.git + - gawk: + git_remote: https://github.com/nf-core/modules.git + - tabix/bgziptabix: + git_remote: https://github.com/nf-core/modules.git + +input: + - ch_reference: + type: file + description: | + Structure [ val(meta), path(reference) ] + Meta is the Groovy Map containing sample information + Reference is the fasta for analysis + - val_run_bgzip: + type: boolean + description: | + Control running of tabix with boolean +output: + - gap_file: + type: file + description: | + Structure: [ val(meta), path(gap_file) ] + Bed file containing gap location and lengths + - gap_tabix: + type: file + description: | + Structure: [ val(meta), path(gap_tabix) ] + Tabix index for the gap file +authors: + - "@DLBPointon" +maintainers: + - "@DLBPointon" diff --git a/subworkflows/sanger-tol/gap_finder/nextflow.config b/subworkflows/sanger-tol/gap_finder/nextflow.config new file mode 100644 index 00000000..c4a4b470 --- /dev/null +++ b/subworkflows/sanger-tol/gap_finder/nextflow.config @@ -0,0 +1,14 @@ +process { + withName: GAWK { + ext.args2 = "'BEGIN { OFS = \"\\t\" }{print \$0, sqrt((\$3-\$2)*(\$3-\$2))}'" + ext.suffix = 'gap.bedgraph' + } + + withName: SEQTK_CUTN { + ext.args = "-n 1" + } + + withName: TABIX_BGZIPTABIX { + ext.args2 = "--csi" + } +} diff --git a/subworkflows/sanger-tol/gap_finder/tests/main.nf.test b/subworkflows/sanger-tol/gap_finder/tests/main.nf.test new file mode 100644 index 00000000..2f45e331 --- /dev/null +++ b/subworkflows/sanger-tol/gap_finder/tests/main.nf.test @@ -0,0 +1,133 @@ +nextflow_workflow { + + name "Test Subworkflow GAP_FINDER" + script "../main.nf" + config "./nextflow.config" + workflow "GAP_FINDER" + + tag "subworkflows" + tag "subworkflows_sangertol" + tag "subworkflows/gap_finder" + tag "seqtk/cutn" + tag "gawk" + tag "tabix/bgziptabix" + tag "subworkflows/../../modules/nf-core/gawk" + tag "subworkflows/../../modules/nf-core/tabix/bgziptabix" + tag "subworkflows/../../modules/nf-core/seqtk/cutn" + tag "modules/nf-core/gunzip" + + setup { + nfcoreInitialise("${launchDir}/library/") + nfcoreInstall( + "${launchDir}/library/", + [ + "seqtk/cutn", + "gawk", + "tabix/bgziptabix", + "gunzip" + ] + ) + nfcoreLink("${launchDir}/library/", "${baseDir}/modules/") + + run("GUNZIP"){ + script "../../../../modules/nf-core/gunzip/main.nf" + process { + """ + input[0] = [ + [ id: "Cloeon_Dipeterum" ], + file(params.modules_testdata_base_path + 'Cloeon_dipterum/assembly/release/ieCloDipt1.1/insdc/GCA_949628265.1.fasta.gz', checkIfExists: true) + ] + """ + } + } + } + + test("Cloeon_dipterum genome [fasta] w/ index") { + + when { + + params { + seqtk_args = "-n 1" + bgzip_args = "--csi" + } + + + workflow { + """ + input[0] = GUNZIP.out.gunzip + input[1] = true + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot(workflow.out).match() } + ) + } + + cleanup { + nfcoreUnlink("${launchDir}/library/", "${baseDir}/modules/nf-core") + } + } + + test("Cloeon_dipterum genome [fasta] w/o index") { + + when { + + params { + seqtk_args = "-n 1" + } + + + workflow { + """ + input[0] = GUNZIP.out.gunzip + input[1] = false + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot(workflow.out).match() } + ) + } + + cleanup { + nfcoreUnlink("${launchDir}/library/", "${baseDir}/modules/nf-core") + } + } + + test("Cloeon_dipterum genome - stub") { + options "-stub" + when { + + params { + seqtk_args = "-n 1" + bgzip_args = "--csi" + } + + + workflow { + """ + input[0] = GUNZIP.out.gunzip + input[1] = true + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot(workflow.out).match() } + ) + } + + cleanup { + nfcoreUnlink("${launchDir}/library/", "${baseDir}/modules/nf-core") + } + } +} diff --git a/subworkflows/sanger-tol/gap_finder/tests/main.nf.test.snap b/subworkflows/sanger-tol/gap_finder/tests/main.nf.test.snap new file mode 100644 index 00000000..67555e24 --- /dev/null +++ b/subworkflows/sanger-tol/gap_finder/tests/main.nf.test.snap @@ -0,0 +1,125 @@ +{ + "Cloeon_dipterum genome - stub": { + "content": [ + { + "0": [ + [ + { + "id": "Cloeon_Dipeterum" + }, + "Cloeon_Dipeterum.gap.bedgraph:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "Cloeon_Dipeterum" + }, + "Cloeon_Dipeterum.bed.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "Cloeon_Dipeterum.bed.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "gap_file": [ + [ + { + "id": "Cloeon_Dipeterum" + }, + "Cloeon_Dipeterum.gap.bedgraph:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "gap_tabix": [ + [ + { + "id": "Cloeon_Dipeterum" + }, + "Cloeon_Dipeterum.bed.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "Cloeon_Dipeterum.bed.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-02-27T13:37:12.350775975" + }, + "Cloeon_dipterum genome [fasta] w/o index": { + "content": [ + { + "0": [ + [ + { + "id": "Cloeon_Dipeterum" + }, + "Cloeon_Dipeterum.gap.bedgraph:md5,25a0c7406035024fac941b1e900a0bd4" + ] + ], + "1": [ + + ], + "gap_file": [ + [ + { + "id": "Cloeon_Dipeterum" + }, + "Cloeon_Dipeterum.gap.bedgraph:md5,25a0c7406035024fac941b1e900a0bd4" + ] + ], + "gap_tabix": [ + + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-02-27T13:36:39.754958419" + }, + "Cloeon_dipterum genome [fasta] w/ index": { + "content": [ + { + "0": [ + [ + { + "id": "Cloeon_Dipeterum" + }, + "Cloeon_Dipeterum.gap.bedgraph:md5,25a0c7406035024fac941b1e900a0bd4" + ] + ], + "1": [ + [ + { + "id": "Cloeon_Dipeterum" + }, + "Cloeon_Dipeterum.bed.gz:md5,9539ddbcc4299acda81cceac7bb79732", + "Cloeon_Dipeterum.bed.gz.csi:md5,2ae7838a0ceb1abf43db92d59caf206f" + ] + ], + "gap_file": [ + [ + { + "id": "Cloeon_Dipeterum" + }, + "Cloeon_Dipeterum.gap.bedgraph:md5,25a0c7406035024fac941b1e900a0bd4" + ] + ], + "gap_tabix": [ + [ + { + "id": "Cloeon_Dipeterum" + }, + "Cloeon_Dipeterum.bed.gz:md5,9539ddbcc4299acda81cceac7bb79732", + "Cloeon_Dipeterum.bed.gz.csi:md5,2ae7838a0ceb1abf43db92d59caf206f" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-02-27T13:36:06.102057386" + } +} \ No newline at end of file diff --git a/subworkflows/sanger-tol/gap_finder/tests/nextflow.config b/subworkflows/sanger-tol/gap_finder/tests/nextflow.config new file mode 100644 index 00000000..cce226b8 --- /dev/null +++ b/subworkflows/sanger-tol/gap_finder/tests/nextflow.config @@ -0,0 +1,13 @@ +process { + withName: GAWK { + ext.suffix = 'gap.bedgraph' + } + + withName: SEQTK_CUTN { + ext.args = { "${params.seqtk_args}" } + } + + withName: TABIX_BGZIPTABIX { + ext.args2 = { "${params.bgzip_args}" } + } +} diff --git a/subworkflows/sanger-tol/pairs_create_contact_maps/main.nf b/subworkflows/sanger-tol/pairs_create_contact_maps/main.nf new file mode 100644 index 00000000..335f0d05 --- /dev/null +++ b/subworkflows/sanger-tol/pairs_create_contact_maps/main.nf @@ -0,0 +1,95 @@ +include { COOLER_CLOAD } from '../../../modules/nf-core/cooler/cload/main.nf' +include { COOLER_ZOOMIFY } from '../../../modules/nf-core/cooler/zoomify/main.nf' +include { GAWK as GAWK_PROCESS_PAIRS_FILE } from '../../../modules/nf-core/gawk/main.nf' +include { JUICERTOOLS_PRE } from '../../../modules/nf-core/juicertools/pre/main' +include { PRETEXTMAP } from '../../../modules/nf-core/pretextmap/main.nf' +include { PRETEXTSNAPSHOT } from '../../../modules/nf-core/pretextsnapshot/main.nf' + +workflow PAIRS_CREATE_CONTACT_MAPS { + take: + ch_pairs // [meta, pairs] + ch_chrom_sizes // [meta, sizes] + val_build_pretext // bool: build pretext map + val_create_pretext_snapshot // bool: build snapshot + val_build_cooler // bool: build cooler + val_build_juicer // bool: build juicer + val_cool_bin // val: cooler cload parameter + + main: + // + // Module: Build PretextMap + // + PRETEXTMAP( + ch_pairs.filter { val_build_pretext }, // Pairs file + [[], [], []] + ) + + // + // Module: Make a PNG of the PretextMap for fast viz + // + PRETEXTSNAPSHOT( + PRETEXTMAP.out.pretext.filter { val_create_pretext_snapshot } + ) + + // + // Module: Generate a multi-resolution cooler file by coarsening + // + ch_cooler_input = ch_pairs + .filter { val_build_cooler } + .combine(ch_chrom_sizes, by: 0) + .multiMap { meta, pairs, sizes -> + pairs: [ meta, pairs, [] ] + sizes: [ meta, sizes ] + } + + COOLER_CLOAD( + ch_cooler_input.pairs, + ch_cooler_input.sizes, + "pairs", + val_cool_bin + ) + + // + // Module: Zoom cool to mcool + // + COOLER_ZOOMIFY(COOLER_CLOAD.out.cool) + + // + // Module: process .pairs file to remove the chromsize lines as juicer_pre + // does not like them + // + ch_pairs_remove_chromsizes_awk = channel.of('''\ + BEGIN { FS = OFS = "\\t" } + !/^#chromsize/ { + print $0 + }'''.stripIndent()) + .collectFile(name: "pairs_remove_chromsizes.awk", cache: true) + .collect() + + GAWK_PROCESS_PAIRS_FILE( + ch_pairs.filter { val_build_juicer }, + ch_pairs_remove_chromsizes_awk, + false + ) + + // + // Module: Generate juicer .hic map + // + ch_juicertools_pre_input = GAWK_PROCESS_PAIRS_FILE.out.output + .combine(ch_chrom_sizes, by: 0) + .multiMap { meta, pairs, sizes -> + pairs: [ meta, pairs ] + sizes: [ meta, [], sizes ] + } + + JUICERTOOLS_PRE( + ch_juicertools_pre_input.pairs, + ch_juicertools_pre_input.sizes + ) + + emit: + pretext = PRETEXTMAP.out.pretext + pretext_png = PRETEXTSNAPSHOT.out.image + cool = COOLER_ZOOMIFY.out.mcool + hic = JUICERTOOLS_PRE.out.hic +} diff --git a/subworkflows/sanger-tol/pairs_create_contact_maps/meta.yml b/subworkflows/sanger-tol/pairs_create_contact_maps/meta.yml new file mode 100644 index 00000000..a0911b86 --- /dev/null +++ b/subworkflows/sanger-tol/pairs_create_contact_maps/meta.yml @@ -0,0 +1,79 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/sanger-tol/nf-core-modules/main/subworkflows/yaml-schema.json +name: "pairs_create_contact_maps" +description: Build contact maps from pairs files using PretextMap, Juicertools, and Cooler +keywords: + - pairs + - contact map + - hic +components: + - cooler/cload: + git_remote: https://github.com/nf-core/modules.git + - cooler/zoomify: + git_remote: https://github.com/nf-core/modules.git + - gawk: + git_remote: https://github.com/nf-core/modules.git + - juicertools/pre: + git_remote: https://github.com/nf-core/modules.git + - pretextmap: + git_remote: https://github.com/nf-core/modules.git + - pretextsnapshot: + git_remote: https://github.com/nf-core/modules.git +input: + - ch_pairs: + type: file + description: | + Input Hi-C pairs in pairs format (https://github.com/4dn-dcic/pairix/blob/master/pairs_format_specification.md). + + Structure: [ val(meta), path(pairs) ] + pattern: "*.pairs.gz" + - ch_chrom_sizes: + type: file + description: | + Input chromosome sizes file - a TSV with two columns, name and size. + + Structure: [ val(meta), path(sizes) ] + pattern: "*.{sizes,tsv}" + - val_build_pretext: + type: boolean + description: Build a PretextMap contact map + - val_create_pretext_snapshot: + type: boolean + description: Create a snapshot of the PretextMap contact map + - val_build_cooler: + type: boolean + description: Build a Cooler contact map + - val_build_juicer: + type: boolean + description: Build a Juicer contact map + - val_cool_bin: + type: integer + description: Bin size for cooler +output: + - pretext: + type: file + description: | + Contact map in PretextMap format. + Structure: [ val(meta), path(pretext) ] + pattern: "*.pretext" + - pretext_png: + type: file + description: | + PNG visualisation of the Pretext contact map. + Structure: [ val(meta), path(png) ] + pattern: "*.png" + - cool: + type: file + description: | + Contact map in mCooler format + Structure: [ val(meta), path(mcool) ] + pattern: "*.mcool" + - hic: + type: file + description: | + Contact map in Juicer format + Structure: [ val(meta), path(hic) ] + pattern: "*.hic" +authors: + - "@prototaxites" +maintainers: + - "@prototaxites" diff --git a/subworkflows/sanger-tol/pairs_create_contact_maps/nextflow.config b/subworkflows/sanger-tol/pairs_create_contact_maps/nextflow.config new file mode 100644 index 00000000..88552837 --- /dev/null +++ b/subworkflows/sanger-tol/pairs_create_contact_maps/nextflow.config @@ -0,0 +1,7 @@ +process { + + withName: COOLER_CLOAD { + ext.args = '-0 -c1 2 -p1 3 -c2 4 -p2 5' + } + +} diff --git a/subworkflows/sanger-tol/pairs_create_contact_maps/tests/main.nf.test b/subworkflows/sanger-tol/pairs_create_contact_maps/tests/main.nf.test new file mode 100644 index 00000000..9b8a06f4 --- /dev/null +++ b/subworkflows/sanger-tol/pairs_create_contact_maps/tests/main.nf.test @@ -0,0 +1,263 @@ +nextflow_workflow { + + name "Test Subworkflow PAIRS_CREATE_CONTACT_MAPS" + script "../main.nf" + workflow "PAIRS_CREATE_CONTACT_MAPS" + + tag "subworkflows" + tag "subworkflows_sangertol" + tag "subworkflows/pairs_create_contact_maps" + tag "modules/nf-core/gawk" + tag "yahs/makepairsfile" + tag "subworkflows/../../modules/nf-core/cooler/cload" + tag "subworkflows/../../modules/nf-core/pretextsnapshot" + tag "subworkflows/../../modules/nf-core/pretextmap" + tag "subworkflows/../../modules/nf-core/cooler/zoomify" + tag "subworkflows/../../modules/nf-core/gawk" + tag "subworkflows/../../modules/nf-core/juicertools/pre" + + config "./nextflow.config" + + setup { + nfcoreInitialise("${launchDir}/library/") + nfcoreInstall( + "${launchDir}/library/", + [ + "cooler/cload", + "cooler/zoomify", + "gawk", + "juicertools/pre", + "pretextmap", + "pretextsnapshot" + ] + ) + nfcoreLink("${launchDir}/library/", "${baseDir}/modules/") + + run("YAHS_MAKEPAIRSFILE") { + script "../../../../modules/sanger-tol/yahs/makepairsfile/main.nf" + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'Undibacterium_unclassified/analysis/scaffolding/baUndUnlc1_hic_phased_hap2_scaffolds_final.fa.fai', checkIfExists: true), + file(params.modules_testdata_base_path + 'Undibacterium_unclassified/analysis/scaffolding/baUndUnlc1_hic_phased_hap2_scaffolds_final.agp', checkIfExists: true), + file(params.modules_testdata_base_path + 'Undibacterium_unclassified/analysis/scaffolding/baUndUnlc1.hic.hap2.p_ctg.fa.fai', checkIfExists: true), + file(params.modules_testdata_base_path + 'Undibacterium_unclassified/analysis/scaffolding/baUndUnlc1_hic_phased_hap2.bin', checkIfExists: true) + ] + """ + } + } + + run("GAWK") { + script "../../../../modules/nf-core/gawk/main.nf" + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'Undibacterium_unclassified/analysis/scaffolding/baUndUnlc1_hic_phased_hap2_scaffolds_final.fa.fai', checkIfExists: true), + ] + input[1] = channel.of('BEGIN { FS = OFS = "\\t" } { print \$1, \$2 }').collectFile(name: "sizes.awk") + input[2] = false + """ + } + } + } + + test("Undibacterium_unclassified - pairs - hap2 - all maps") { + + when { + + params { + cooler_cload_args = '-0 -c1 2 -p1 3 -c2 4 -p2 5' + pretextsnapshot_args = '--sequences \"=full\"' + juicertools_args = "-r 20000" + } + + workflow { + """ + input[0] = YAHS_MAKEPAIRSFILE.out.pairs + input[1] = GAWK.out.output + input[2] = true + input[3] = true + input[4] = true + input[5] = true + input[6] = 500 + """ + } + } + + then { + assert workflow.success + assertAll( + { assert snapshot( + file(workflow.out.pretext.get(0).get(1)).getName(), + file(workflow.out.pretext_png.get(0).get(1)).getName(), + file(workflow.out.cool.get(0).get(1)).getName(), + file(workflow.out.hic.get(0).get(1)).getName(), + ).match() } + ) + } + + cleanup { + nfcoreUnlink("${launchDir}/library/", "${baseDir}/modules/nf-core") + } + + } + + test("Undibacterium_unclassified - pairs - hap2 - no pretext") { + + when { + + params { + cooler_cload_args = '-0 -c1 2 -p1 3 -c2 4 -p2 5' + pretextsnapshot_args = '--sequences \"=full\"' + juicertools_args = "-r 20000" + } + + workflow { + """ + input[0] = YAHS_MAKEPAIRSFILE.out.pairs + input[1] = GAWK.out.output + input[2] = false + input[3] = true + input[4] = true + input[5] = true + input[6] = 500 + """ + } + } + + then { + assert workflow.success + assertAll( + { assert snapshot( + file(workflow.out.cool.get(0).get(1)).getName(), + file(workflow.out.hic.get(0).get(1)).getName(), + ).match() } + ) + } + + cleanup { + nfcoreUnlink("${launchDir}/library/", "${baseDir}/modules/nf-core") + } + + } + + test("Undibacterium_unclassified - pairs - hap2 - no cooler") { + + when { + + params { + cooler_cload_args = '-0 -c1 2 -p1 3 -c2 4 -p2 5' + pretextsnapshot_args = '--sequences \"=full\"' + juicertools_args = "-r 20000" + } + + workflow { + """ + input[0] = YAHS_MAKEPAIRSFILE.out.pairs + input[1] = GAWK.out.output + input[2] = true + input[3] = true + input[4] = false + input[5] = true + input[6] = 500 + """ + } + } + + then { + assert workflow.success + assertAll( + { assert snapshot( + file(workflow.out.pretext.get(0).get(1)).getName(), + file(workflow.out.pretext_png.get(0).get(1)).getName(), + file(workflow.out.hic.get(0).get(1)).getName(), + ).match() } + ) + } + + cleanup { + nfcoreUnlink("${launchDir}/library/", "${baseDir}/modules/nf-core") + } + + } + + test("Undibacterium_unclassified - pairs - hap2 - no juicer") { + + when { + + params { + cooler_cload_args = '-0 -c1 2 -p1 3 -c2 4 -p2 5' + pretextsnapshot_args = '--sequences \"=full\"' + juicertools_args = "-r 20000" + } + + workflow { + """ + input[0] = YAHS_MAKEPAIRSFILE.out.pairs + input[1] = GAWK.out.output + input[2] = true + input[3] = true + input[4] = true + input[5] = false + input[6] = 500 + """ + } + } + + then { + assert workflow.success + assertAll( + { assert snapshot( + file(workflow.out.pretext.get(0).get(1)).getName(), + file(workflow.out.pretext_png.get(0).get(1)).getName(), + file(workflow.out.cool.get(0).get(1)).getName(), + ).match() } + ) + } + + cleanup { + nfcoreUnlink("${launchDir}/library/", "${baseDir}/modules/nf-core") + } + + } + + test("Undibacterium_unclassified - pairs - hap2 - no snapshot") { + + when { + + params { + cooler_cload_args = '-0 -c1 2 -p1 3 -c2 4 -p2 5' + juicertools_args = "-r 20000" + } + + workflow { + """ + input[0] = YAHS_MAKEPAIRSFILE.out.pairs + input[1] = GAWK.out.output + input[2] = true + input[3] = false + input[4] = false + input[5] = false + input[6] = 500 + """ + } + } + + then { + assert workflow.success + assertAll( + { assert snapshot( + file(workflow.out.pretext.get(0).get(1)).getName() + ).match() } + ) + } + + cleanup { + nfcoreUnlink("${launchDir}/library/", "${baseDir}/modules/nf-core") + } + + } + +} diff --git a/subworkflows/sanger-tol/pairs_create_contact_maps/tests/main.nf.test.snap b/subworkflows/sanger-tol/pairs_create_contact_maps/tests/main.nf.test.snap new file mode 100644 index 00000000..8f63d8af --- /dev/null +++ b/subworkflows/sanger-tol/pairs_create_contact_maps/tests/main.nf.test.snap @@ -0,0 +1,60 @@ +{ + "Undibacterium_unclassified - pairs - hap2 - no snapshot": { + "content": [ + "test.pretext" + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.04.6" + }, + "timestamp": "2026-02-13T15:40:52.094181633" + }, + "Undibacterium_unclassified - pairs - hap2 - all maps": { + "content": [ + "test.pretext", + "test_FullMap.png", + "test.mcool", + "test.hic" + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.3" + }, + "timestamp": "2026-02-10T16:43:13.29248" + }, + "Undibacterium_unclassified - pairs - hap2 - no pretext": { + "content": [ + "test.mcool", + "test.hic" + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.3" + }, + "timestamp": "2026-02-10T16:43:52.435679" + }, + "Undibacterium_unclassified - pairs - hap2 - no juicer": { + "content": [ + "test.pretext", + "test_FullMap.png", + "test.mcool" + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.3" + }, + "timestamp": "2026-02-10T16:50:23.905964" + }, + "Undibacterium_unclassified - pairs - hap2 - no cooler": { + "content": [ + "test.pretext", + "test_FullMap.png", + "test.hic" + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.3" + }, + "timestamp": "2026-02-10T16:47:07.02674" + } +} \ No newline at end of file diff --git a/subworkflows/sanger-tol/pairs_create_contact_maps/tests/nextflow.config b/subworkflows/sanger-tol/pairs_create_contact_maps/tests/nextflow.config new file mode 100644 index 00000000..f7c44b7e --- /dev/null +++ b/subworkflows/sanger-tol/pairs_create_contact_maps/tests/nextflow.config @@ -0,0 +1,15 @@ +process { + + withName: COOLER_CLOAD { + ext.args = { params.cooler_cload_args } + } + + withName: JUICERTOOLS_PRE { + ext.args = { params.juicertools_args } + } + + withName: PRETEXTSNAPSHOT { + ext.args = { params.pretextsnapshot_args } + } + +} diff --git a/subworkflows/sanger-tol/telo_finder/main.nf b/subworkflows/sanger-tol/telo_finder/main.nf new file mode 100644 index 00000000..ee4c7b34 --- /dev/null +++ b/subworkflows/sanger-tol/telo_finder/main.nf @@ -0,0 +1,140 @@ +// +// MODULE IMPORT BLOCK +// +include { TELOMERE_REGIONS } from '../../../modules/sanger-tol/telomere/regions/main' +include { GAWK as GAWK_SPLIT_TELOMERE } from '../../../modules/nf-core/gawk/main' +include { TELOMERE_WINDOWS } from '../../../modules/sanger-tol/telomere/windows/main' +include { TELOMERE_EXTRACT } from '../../../modules/sanger-tol/telomere/extract/main' +include { TABIX_BGZIPTABIX } from '../../../modules/nf-core/tabix/bgziptabix' + + +workflow TELO_FINDER { + + take: + ch_reference // Channel [ val(meta), path(fasta) ] + ch_telomereseq // Channel.of( telomere sequence ) + val_split_telomere // bool + val_run_bgzip // bool + + main: + + // + // MODULE: FINDS THE TELOMERIC SEQEUNCE IN REFERENCE + // + TELOMERE_REGIONS ( + ch_reference, + ch_telomereseq + ) + + ch_full_telomere = TELOMERE_REGIONS.out.telomere + .map{ meta, file -> + def new_meta = meta + [direction: 0] + [new_meta, file] + } + + // + // MODULE: SPLIT THE TELOMERE FILE INTO 5' and 3' FILES + // + if (val_split_telomere) { + + ch_split_telomere = channel.of('''\ + BEGIN { + FS="\\t"; OFS="\\t" + } { + print > "direction."$3".telomere" + }'''.stripIndent()) + .collectFile(name: "split_telomere.awk", cache: true) + .collect() + + GAWK_SPLIT_TELOMERE ( + ch_full_telomere, + ch_split_telomere, + true + ) + + // + // LOGIC: COLLECT FILES AND ITERATE THROUGH + // ADD DIRECTION BASED ON: + // 0: FULL TELOMERE FILE + // 3: FOR 3Prime DIRECTION + // 5: For 5Prime DIRECTION + // THIS PRODUCES A TRIO OF CHANNELS: [meta], file + // FILTER FOR SIZE > 0 FOR SAFETY + // + ch_regions_for_extraction = GAWK_SPLIT_TELOMERE.out.output + .flatMap { meta, files -> + files + .findAll { file -> file.size() > 0 } + .collect { file -> + if (file.name.contains("direction.0")) { + [meta + [direction: 5], file] + } else if (file.name.contains("direction.1")) { + [meta + [direction: 3], file] + } else { + error("Unexpected file name pattern in TELOMERE_REGIONS split output: ${file.name}") + } + } + } + .mix(ch_full_telomere) + + + } else { + ch_regions_for_extraction = ch_full_telomere + } + + + // + // MODULE: GENERATES A WINDOWS FILE FROM THE ABOVE + // + TELOMERE_WINDOWS ( + ch_regions_for_extraction + ) + + + // + // LOGIC: OUTPUT CAN HAVE SIZE 0 WHICH BREAKS gawk IN EXTRACT + // FILTER OUT THE 0 SIZE FILES + // + ch_filtered_windows_for_extraction = TELOMERE_WINDOWS.out.windows + .filter { _meta, file -> + file.size() > 0 + } + + // + // MODULE: EXTRACT TELOMERE DATA FROM FIND_TELOMERE + // AND REFORMAT INTO BEDGRAPH FILE + // + TELOMERE_EXTRACT( + ch_filtered_windows_for_extraction + ) + + + // + // LOGIC: CLEAN OUTPUT CHANNEL INTO + // [meta, [bedgraph_list]] + // + ch_telo_bedgraphs = TELOMERE_EXTRACT.out.bedgraph + .map { meta, bedgraph -> + [ meta - meta.subMap("direction"), bedgraph ] + } + .groupTuple(by: 0) + .map { meta, bedgraphs -> [ meta, bedgraphs.sort { file -> file.name } ] } + + ch_telo_bedfiles = TELOMERE_EXTRACT.out.bed + .map { meta, bed -> + [ meta - meta.subMap("direction"), bed ] + } + + // + // MODULE: BGZIP AND TABIX THE TELO BED FILES + // + TABIX_BGZIPTABIX ( + ch_telo_bedfiles.filter{ _meta, _file -> val_run_bgzip} + ) + + emit: + bed_file = ch_telo_bedfiles // Channel [meta, bed] + bed_gz_tbi = TABIX_BGZIPTABIX.out.gz_index // Not used anymore + bedgraph_file = ch_telo_bedgraphs // Channel [meta, [bedfiles]] - Used in pretext_graph + +} diff --git a/subworkflows/sanger-tol/telo_finder/meta.yml b/subworkflows/sanger-tol/telo_finder/meta.yml new file mode 100644 index 00000000..c92ab76e --- /dev/null +++ b/subworkflows/sanger-tol/telo_finder/meta.yml @@ -0,0 +1,58 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/sanger-tol/nf-core-modules/main/subworkflows/yaml-schema.json +name: "telo_finder" +description: Generate Telomere bedgraphs given a telomere motif +keywords: + - telomere + - bedgraph + - windows + - regions + - fasta +components: + - telomere/regions + - telomere/windows + - telomere/extract + - gawk: + git_remote: https://github.com/nf-core/modules.git + - tabix/bgziptabix: + git_remote: https://github.com/nf-core/modules.git +input: + - ch_reference: + type: file + description: | + Structure [ val(meta), path(reference) ] + Meta is the Groovy Map containing sample information + Reference is the fasta for analysis + - ch_telomereseq: + type: string + description: | + A string containing the DNA sequence of a telomere motif + - val_split_telomere: + type: boolean + description: | + A boolean to control use of the gawk to split the telomere. + - val_run_bgzip: + type: boolean + description: | + Control running of tabix with boolean +output: + - bed_file: + type: file + description: | + Structure: [ val(meta), [path(bed)] ] + A Queue of upto 3 bed file channels. + - bed_tabix: + type: file + description: | + Structure: [ val(meta), path(bed_tabix) ] + Tabix index for the bed file + - bedgraph_file: + type: file + description: | + Structure: [ val(meta), [path(bedgraph)] ] + Collection of 1 or 3 telomere bedgraph files. + +authors: + - "@DLBPointon" +maintainers: + - "@DLBPointon" + - "@weaglesBio" diff --git a/subworkflows/sanger-tol/telo_finder/nextflow.config b/subworkflows/sanger-tol/telo_finder/nextflow.config new file mode 100644 index 00000000..bccea85c --- /dev/null +++ b/subworkflows/sanger-tol/telo_finder/nextflow.config @@ -0,0 +1,23 @@ +nextflow.enable.moduleBinaries = true + +process { + withName: TELOMERE_WINDOWS { + tag = { "${meta.id}_${meta.direction}P" } + ext.args = "99.9" + ext.prefix = { "${meta.id}_${meta.direction}P" } + } + + withName: TELOMERE_EXTRACT { + tag = { "${meta.id}_${meta.direction}P" } + ext.prefix = { "${meta.id}_${meta.direction}P" } + } + + withName: GAWK_SPLIT_TELOMERE { + ext.prefix = "direction" + ext.suffix = "telomere" + } + + withName: TABIX_BGZIPTABIX { + ext.args2 = "--csi" + } +} diff --git a/subworkflows/sanger-tol/telo_finder/tests/main.nf.test b/subworkflows/sanger-tol/telo_finder/tests/main.nf.test new file mode 100644 index 00000000..88fb7993 --- /dev/null +++ b/subworkflows/sanger-tol/telo_finder/tests/main.nf.test @@ -0,0 +1,190 @@ +nextflow_workflow { + + name "Test Subworkflow TELO_FINDER" + script "../main.nf" + config "./nextflow.config" + workflow "TELO_FINDER" + + tag "subworkflows" + tag "subworkflows_sangertol" + tag "subworkflows/telo_finder" + tag "telomere/regions" + tag "telomere/windows" + tag "telomere/extract" + tag "tabix/bgziptabix" + tag "subworkflows/../../modules/nf-core/gawk" + tag "subworkflows/../../modules/nf-core/gunzip" + tag "subworkflows/../../modules/nf-core/tabix/bgziptabix" + tag "modules/nf-core/gunzip" + + setup { + nfcoreInitialise("${launchDir}/library/") + nfcoreInstall( + "${launchDir}/library/", + [ + "gawk", + "tabix/bgziptabix", + "gunzip" + ] + ) + nfcoreLink("${launchDir}/library/", "${baseDir}/modules/") + + run("GUNZIP"){ + script "../../../../modules/nf-core/gunzip/main.nf" + process { + """ + input[0] = [ + [ id: "idFanCani4" ], + file(params.modules_testdata_base_path + 'resources/modules/telomere/idFanCani4/idFanCani4.subset_genome.fa.gz', checkIfExists: true) + ] + """ + } + } + } + + + test("idFanCani4 - no split - fasta w/ index") { + when { + params { + windows_percent = "99.9" + bgzip_args = "--csi" + } + + workflow { + """ + input[0] = GUNZIP.out.gunzip + input[1] = "TTAGG" + input[2] = false + input[3] = true + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot(workflow.out).match() } + ) + } + + cleanup { + nfcoreUnlink("${launchDir}/library/", "${baseDir}/modules/nf-core") + } + } + + test("idFanCani4 - split - fasta w/ index") { + when { + params { + windows_percent = "99.9" + bgzip_args = "--csi" + } + + workflow { + """ + input[0] = GUNZIP.out.gunzip + input[1] = "TTAGG" + input[2] = true + input[3] = true + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot(workflow.out).match() } + ) + } + + cleanup { + nfcoreUnlink("${launchDir}/library/", "${baseDir}/modules/nf-core") + } + } + + test("idFanCani4 - no split - fasta w/o index") { + when { + params { + windows_percent = "99.9" + bgzip_args = "--csi" + } + + workflow { + """ + input[0] = GUNZIP.out.gunzip + input[1] = "TTAGG" + input[2] = false + input[3] = false + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot(workflow.out).match() } + ) + } + + cleanup { + nfcoreUnlink("${launchDir}/library/", "${baseDir}/modules/nf-core") + } + } + + test("idFanCani4 - split - fasta w/o index") { + when { + params { + windows_percent = "99.9" + bgzip_args = "--csi" + } + + workflow { + """ + input[0] = GUNZIP.out.gunzip + input[1] = "TTAGG" + input[2] = true + input[3] = false + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot(workflow.out).match() } + ) + } + + cleanup { + nfcoreUnlink("${launchDir}/library/", "${baseDir}/modules/nf-core") + } + } + + test("idFanCani4 - no split - fasta - stub w/o index") { + when { + params { + windows_percent = "99.9" + bgzip_args = "--csi" + } + + workflow { + """ + input[0] = GUNZIP.out.gunzip + input[1] = "TTAGG" + input[2] = false + input[3] = false + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot(workflow.out).match() } + ) + } + + cleanup { + nfcoreUnlink("${launchDir}/library/", "${baseDir}/modules/nf-core") + } + } +} diff --git a/subworkflows/sanger-tol/telo_finder/tests/main.nf.test.snap b/subworkflows/sanger-tol/telo_finder/tests/main.nf.test.snap new file mode 100644 index 00000000..f3ad8bbe --- /dev/null +++ b/subworkflows/sanger-tol/telo_finder/tests/main.nf.test.snap @@ -0,0 +1,375 @@ +{ + "idFanCani4 - no split - fasta - stub w/o index": { + "content": [ + { + "0": [ + [ + { + "id": "idFanCani4" + }, + "idFanCani4_0P_telomere.bed:md5,3ccb4acb3f33c8ff3ca3201245a42ce3" + ] + ], + "1": [ + + ], + "2": [ + [ + { + "id": "idFanCani4" + }, + [ + "idFanCani4_0P_telomere.bedgraph:md5,f8df4805d7f8948a5bc74c674965d064" + ] + ] + ], + "bed_file": [ + [ + { + "id": "idFanCani4" + }, + "idFanCani4_0P_telomere.bed:md5,3ccb4acb3f33c8ff3ca3201245a42ce3" + ] + ], + "bed_gz_tbi": [ + + ], + "bedgraph_file": [ + [ + { + "id": "idFanCani4" + }, + [ + "idFanCani4_0P_telomere.bedgraph:md5,f8df4805d7f8948a5bc74c674965d064" + ] + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-02-27T16:07:42.294323895" + }, + "idFanCani4 - no split - fasta w/o index": { + "content": [ + { + "0": [ + [ + { + "id": "idFanCani4" + }, + "idFanCani4_0P_telomere.bed:md5,3ccb4acb3f33c8ff3ca3201245a42ce3" + ] + ], + "1": [ + + ], + "2": [ + [ + { + "id": "idFanCani4" + }, + [ + "idFanCani4_0P_telomere.bedgraph:md5,f8df4805d7f8948a5bc74c674965d064" + ] + ] + ], + "bed_file": [ + [ + { + "id": "idFanCani4" + }, + "idFanCani4_0P_telomere.bed:md5,3ccb4acb3f33c8ff3ca3201245a42ce3" + ] + ], + "bed_gz_tbi": [ + + ], + "bedgraph_file": [ + [ + { + "id": "idFanCani4" + }, + [ + "idFanCani4_0P_telomere.bedgraph:md5,f8df4805d7f8948a5bc74c674965d064" + ] + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-02-27T16:01:40.077413969" + }, + "idFanCani4 - no split - fasta w/ index": { + "content": [ + { + "0": [ + [ + { + "id": "idFanCani4" + }, + "idFanCani4_0P_telomere.bed:md5,3ccb4acb3f33c8ff3ca3201245a42ce3" + ] + ], + "1": [ + [ + { + "id": "idFanCani4" + }, + "idFanCani4.bed.gz:md5,3ccb4acb3f33c8ff3ca3201245a42ce3", + "idFanCani4.bed.gz.csi:md5,c1e8ab4feb9c52665916f33ca629e055" + ] + ], + "2": [ + [ + { + "id": "idFanCani4" + }, + [ + "idFanCani4_0P_telomere.bedgraph:md5,f8df4805d7f8948a5bc74c674965d064" + ] + ] + ], + "bed_file": [ + [ + { + "id": "idFanCani4" + }, + "idFanCani4_0P_telomere.bed:md5,3ccb4acb3f33c8ff3ca3201245a42ce3" + ] + ], + "bed_gz_tbi": [ + [ + { + "id": "idFanCani4" + }, + "idFanCani4.bed.gz:md5,3ccb4acb3f33c8ff3ca3201245a42ce3", + "idFanCani4.bed.gz.csi:md5,c1e8ab4feb9c52665916f33ca629e055" + ] + ], + "bedgraph_file": [ + [ + { + "id": "idFanCani4" + }, + [ + "idFanCani4_0P_telomere.bedgraph:md5,f8df4805d7f8948a5bc74c674965d064" + ] + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-02-27T15:55:30.275969768" + }, + "idFanCani4 - split - fasta w/o index": { + "content": [ + { + "0": [ + [ + { + "id": "idFanCani4" + }, + "idFanCani4_0P_telomere.bed:md5,3ccb4acb3f33c8ff3ca3201245a42ce3" + ], + [ + { + "id": "idFanCani4" + }, + "idFanCani4_3P_telomere.bed:md5,3bb5481efc9a9c7192c54413697de8dc" + ], + [ + { + "id": "idFanCani4" + }, + "idFanCani4_5P_telomere.bed:md5,65a96f58d21ee87ce8932b5e3e2da63b" + ] + ], + "1": [ + + ], + "2": [ + [ + { + "id": "idFanCani4" + }, + [ + "idFanCani4_0P_telomere.bedgraph:md5,f8df4805d7f8948a5bc74c674965d064", + "idFanCani4_3P_telomere.bedgraph:md5,ae30aa41f6e9c53df160ddb17ecccd09", + "idFanCani4_5P_telomere.bedgraph:md5,ac00ddc89af099cfdd820495cd8c8c76" + ] + ] + ], + "bed_file": [ + [ + { + "id": "idFanCani4" + }, + "idFanCani4_0P_telomere.bed:md5,3ccb4acb3f33c8ff3ca3201245a42ce3" + ], + [ + { + "id": "idFanCani4" + }, + "idFanCani4_3P_telomere.bed:md5,3bb5481efc9a9c7192c54413697de8dc" + ], + [ + { + "id": "idFanCani4" + }, + "idFanCani4_5P_telomere.bed:md5,65a96f58d21ee87ce8932b5e3e2da63b" + ] + ], + "bed_gz_tbi": [ + + ], + "bedgraph_file": [ + [ + { + "id": "idFanCani4" + }, + [ + "idFanCani4_0P_telomere.bedgraph:md5,f8df4805d7f8948a5bc74c674965d064", + "idFanCani4_3P_telomere.bedgraph:md5,ae30aa41f6e9c53df160ddb17ecccd09", + "idFanCani4_5P_telomere.bedgraph:md5,ac00ddc89af099cfdd820495cd8c8c76" + ] + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-02-27T16:04:34.967301219" + }, + "idFanCani4 - split - fasta w/ index": { + "content": [ + { + "0": [ + [ + { + "id": "idFanCani4" + }, + "idFanCani4_0P_telomere.bed:md5,3ccb4acb3f33c8ff3ca3201245a42ce3" + ], + [ + { + "id": "idFanCani4" + }, + "idFanCani4_3P_telomere.bed:md5,3bb5481efc9a9c7192c54413697de8dc" + ], + [ + { + "id": "idFanCani4" + }, + "idFanCani4_5P_telomere.bed:md5,65a96f58d21ee87ce8932b5e3e2da63b" + ] + ], + "1": [ + [ + { + "id": "idFanCani4" + }, + "idFanCani4.bed.gz:md5,3bb5481efc9a9c7192c54413697de8dc", + "idFanCani4.bed.gz.csi:md5,ea3c693cc2c167eefef157c58ce9ca00" + ], + [ + { + "id": "idFanCani4" + }, + "idFanCani4.bed.gz:md5,3ccb4acb3f33c8ff3ca3201245a42ce3", + "idFanCani4.bed.gz.csi:md5,c1e8ab4feb9c52665916f33ca629e055" + ], + [ + { + "id": "idFanCani4" + }, + "idFanCani4.bed.gz:md5,65a96f58d21ee87ce8932b5e3e2da63b", + "idFanCani4.bed.gz.csi:md5,261e850df19711054886092b79fd3b04" + ] + ], + "2": [ + [ + { + "id": "idFanCani4" + }, + [ + "idFanCani4_0P_telomere.bedgraph:md5,f8df4805d7f8948a5bc74c674965d064", + "idFanCani4_3P_telomere.bedgraph:md5,ae30aa41f6e9c53df160ddb17ecccd09", + "idFanCani4_5P_telomere.bedgraph:md5,ac00ddc89af099cfdd820495cd8c8c76" + ] + ] + ], + "bed_file": [ + [ + { + "id": "idFanCani4" + }, + "idFanCani4_0P_telomere.bed:md5,3ccb4acb3f33c8ff3ca3201245a42ce3" + ], + [ + { + "id": "idFanCani4" + }, + "idFanCani4_3P_telomere.bed:md5,3bb5481efc9a9c7192c54413697de8dc" + ], + [ + { + "id": "idFanCani4" + }, + "idFanCani4_5P_telomere.bed:md5,65a96f58d21ee87ce8932b5e3e2da63b" + ] + ], + "bed_gz_tbi": [ + [ + { + "id": "idFanCani4" + }, + "idFanCani4.bed.gz:md5,3bb5481efc9a9c7192c54413697de8dc", + "idFanCani4.bed.gz.csi:md5,ea3c693cc2c167eefef157c58ce9ca00" + ], + [ + { + "id": "idFanCani4" + }, + "idFanCani4.bed.gz:md5,3ccb4acb3f33c8ff3ca3201245a42ce3", + "idFanCani4.bed.gz.csi:md5,c1e8ab4feb9c52665916f33ca629e055" + ], + [ + { + "id": "idFanCani4" + }, + "idFanCani4.bed.gz:md5,65a96f58d21ee87ce8932b5e3e2da63b", + "idFanCani4.bed.gz.csi:md5,261e850df19711054886092b79fd3b04" + ] + ], + "bedgraph_file": [ + [ + { + "id": "idFanCani4" + }, + [ + "idFanCani4_0P_telomere.bedgraph:md5,f8df4805d7f8948a5bc74c674965d064", + "idFanCani4_3P_telomere.bedgraph:md5,ae30aa41f6e9c53df160ddb17ecccd09", + "idFanCani4_5P_telomere.bedgraph:md5,ac00ddc89af099cfdd820495cd8c8c76" + ] + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-02-27T15:58:31.397290965" + } +} diff --git a/subworkflows/sanger-tol/telo_finder/tests/nextflow.config b/subworkflows/sanger-tol/telo_finder/tests/nextflow.config new file mode 100644 index 00000000..d2621d13 --- /dev/null +++ b/subworkflows/sanger-tol/telo_finder/tests/nextflow.config @@ -0,0 +1,23 @@ +nextflow.enable.moduleBinaries = true + +process { + withName: TELOMERE_WINDOWS { + tag = { "${meta.id}_${meta.direction}P" } + ext.args = { "${params.windows_percent}" } + ext.prefix = { "${meta.id}_${meta.direction}P" } + } + + withName: TELOMERE_EXTRACT { + tag = { "${meta.id}_${meta.direction}P" } + ext.prefix = { "${meta.id}_${meta.direction}P" } + } + + withName: GAWK_SPLIT_TELOMERE { + ext.prefix = "direction" + ext.suffix = "telomere" + } + + withName: TABIX_BGZIPTABIX { + ext.args2 = { "${params.bgzip_args}" } + } +} diff --git a/tests/default.nf.test b/tests/default.nf.test index 8f321d17..240641f6 100644 --- a/tests/default.nf.test +++ b/tests/default.nf.test @@ -6,18 +6,9 @@ nextflow_pipeline { tag "pipeline_sanger_tol" tag "full" - test("Full run") { - - setup { - println "\nDownloading the test data..." - def command = ['bash', '-c', "curl https://tolit.cog.sanger.ac.uk/test-data/resources/treeval/TreeValTinyData.tar.gz | tar xzf - -C ${projectDir}/"] - def process = command.execute() - process.waitFor() + config "../conf/test.config" - if (process.exitValue() != 0) { - throw new RuntimeException("Error - failed to download ${dbKey}: ${process.err.text}") - } - } + test("Full run") { when { params { @@ -29,8 +20,6 @@ nextflow_pipeline { } } - // SETUP FOR THE TEST_DATA - then { def stable_name = getAllFilesFromDir(params.outdir, relative: true, includeDir: true, ignore: ["pipeline_info/*.{html,json,txt}"]) diff --git a/tests/default.nf.test.snap b/tests/default.nf.test.snap index 6bd7f5c1..2e707513 100644 --- a/tests/default.nf.test.snap +++ b/tests/default.nf.test.snap @@ -1,7 +1,7 @@ { "Full run": { "content": [ - 42, + 46, { "BEDTOOLS_BAMTOBED": { "bedtools": "2.31.1" @@ -21,94 +21,96 @@ "BWAMEM2_INDEX": { "bwamem2": "2.2.1" }, - "CRAM_FILTER_ALIGN_BWAMEM2_FIXMATE_SORT": { - "samtools": 1.17, - "bwa-mem2": "2.2.1", - "staden_io_lib": "1.14.14" + "CRAMALIGN_BWAMEM2ALIGNHIC": { + "bwamem2": "2.2.1", + "samtools": "1.22.1" }, - "CRAM_GENERATE_CSV": { - "samtools": 1.17 + "CRAMALIGN_GENCRAMCHUNKS": { + "cramchunks": "1.1.0" }, "EXTRACT_REPEAT": { - "perl": "(v5.26.2))", - "extract_repeat.pl": 1.0 - }, - "EXTRACT_TELOMERE": { - "extract_telomere": 2.0, - "coreutils": 9.1 - }, - "FIND_TELOMERE_REGIONS": { - "find_telomere": 1.0 - }, - "FIND_TELOMERE_WINDOWS": { - "telomere": 1.0 + "extract_repeat.pl": "1.0.0", + "perl": "5.26.2" }, "GAWK_GAP_LENGTH": { - "gawk": "5.3.0" + "gawk": "5.3.1" }, "GAWK_GENERATE_GENOME_FILE": { - "gawk": "5.3.0" + "gawk": "5.3.1" }, "GAWK_REFORMAT_INTERSECT": { - "gawk": "5.3.0" + "gawk": "5.3.1" }, "GAWK_RENAME_IDS": { - "gawk": "5.3.0" + "gawk": "5.3.1" }, "GAWK_REPLACE_DOTS": { - "gawk": "5.3.0" + "gawk": "5.3.1" }, - "GAWK_SPLIT_DIRECTIONS": { - "gawk": "5.3.0" + "GAWK_SPLIT_TELOMERE": { + "gawk": "5.3.1" }, "GAWK_UPPER_SEQUENCE": { - "gawk": "5.3.0" + "gawk": "5.3.1" }, "GNU_SORT": { - "coreutils": 9.3 + "coreutils": 9.5 }, "GNU_SORT_A": { - "coreutils": 9.3 + "coreutils": 9.5 }, "GNU_SORT_B": { - "coreutils": 9.3 + "coreutils": 9.5 }, "GNU_SORT_C": { - "coreutils": 9.3 + "coreutils": 9.5 }, "MINIMAP2_ALIGN": { - "minimap2": "2.28-r1209", - "samtools": 1.2 + "minimap2": "2.29-r1283" }, - "PRETEXTMAP_STANDRD": { - "pretextmap": "0.1.9", + "PRETEXTMAP": { + "PretextMap": "0.1.9", "samtools": 1.21 }, + "PRETEXTSNAPSHOT": { + "PretextSnapshot": "0.0.5" + }, "PRETEXT_INGEST_SNDRD": { "PretextGraph": "0.0.9", - "PretextMap": "0.1.9", - "bigWigToBedGraph": 447 + "ucsc": 447 }, "SAMTOOLS_FAIDX": { - "samtools": 1.21 + "samtools": "1.22.1" }, "SAMTOOLS_MERGE": { - "samtools": 1.21 + "samtools": "1.22.1" + }, + "SAMTOOLS_MERGEDUP": { + "samtools": 1.23 }, "SAMTOOLS_SORT": { - "samtools": 1.21 + "samtools": "1.22.1" + }, + "SAMTOOLS_SPLITHEADER": { + "samtools": "1.22.1" }, "SAMTOOLS_VIEW_FILTER_PRIMARY": { - "samtools": 1.21 + "samtools": "1.22.1" }, "SEQTK_CUTN": { "seqtk": "1.4-r122" }, - "SNAPSHOT_SRES": { - "pretextsnapshot": "0.0.4" + "TELOMERE_EXTRACT": { + "telomere_extract": "5.3.0" + }, + "TELOMERE_REGIONS": { + "find_telomere_regions": "1.0.0" + }, + "TELOMERE_WINDOWS": { + "find_telomere_windows": "1.0.0" }, "UCSC_BEDGRAPHTOBIGWIG": { - "ucsc": 469 + "ucsc": 482 }, "WINDOWMASKER_MKCOUNTS": { "windowmasker": "1.0.0" @@ -117,40 +119,40 @@ "windowmasker": "1.0.0" }, "Workflow": { - "sanger-tol/curationpretext": "v1.5.1" + "sanger-tol/curationpretext": "v1.6.0" } }, [ "accessory_files", - "accessory_files/CurationPretextTest.bigWig", - "accessory_files/CurationPretextTest.gap.bedgraph", - "accessory_files/CurationPretextTest_3P_telomere.bed", - "accessory_files/CurationPretextTest_3P_telomere.bedgraph", - "accessory_files/CurationPretextTest_5P_telomere.bed", - "accessory_files/CurationPretextTest_5P_telomere.bedgraph", - "accessory_files/CurationPretextTest_telomere.bed", - "accessory_files/CurationPretextTest_telomere.bedgraph", - "accessory_files/coverage.bigWig", + "accessory_files/CurationPretextTest_MIN.gap.bedgraph", + "accessory_files/CurationPretextTest_MIN_0P_telomere.bed", + "accessory_files/CurationPretextTest_MIN_0P_telomere.bedgraph", + "accessory_files/CurationPretextTest_MIN_3P_telomere.bed", + "accessory_files/CurationPretextTest_MIN_3P_telomere.bedgraph", + "accessory_files/CurationPretextTest_MIN_5P_telomere.bed", + "accessory_files/CurationPretextTest_MIN_5P_telomere.bedgraph", + "accessory_files/CurationPretextTest_MIN_longread_coverage.bigWig", + "accessory_files/CurationPretextTest_MIN_repeat_density.bigWig", "pipeline_info", "pipeline_info/sanger-tol_curationpretext_software_versions.yml", "pretext_maps_processed", - "pretext_maps_processed/CurationPretextTest_normal.pretext", + "pretext_maps_processed/CurationPretextTest_MIN_normal.pretext", "pretext_maps_raw", - "pretext_maps_raw/CurationPretextTest_normal_pi.pretext", + "pretext_maps_raw/CurationPretextTest_MIN_normal_pi.pretext", "pretext_snapshot", - "pretext_snapshot/CurationPretextTest_normalFullMap.png" + "pretext_snapshot/CurationPretextTest_MIN_normalFullMap.png" ], 18, [ - "CurationPretextTest.bigWig:md5,3f66a9152d793a62f877b733c2336dfd", - "CurationPretextTest.gap.bedgraph:md5,d41d8cd98f00b204e9800998ecf8427e", - "CurationPretextTest_3P_telomere.bed:md5,d41d8cd98f00b204e9800998ecf8427e", - "CurationPretextTest_3P_telomere.bedgraph:md5,d41d8cd98f00b204e9800998ecf8427e", - "CurationPretextTest_5P_telomere.bed:md5,d41d8cd98f00b204e9800998ecf8427e", - "CurationPretextTest_5P_telomere.bedgraph:md5,d41d8cd98f00b204e9800998ecf8427e", - "CurationPretextTest_telomere.bed:md5,d41d8cd98f00b204e9800998ecf8427e", - "CurationPretextTest_telomere.bedgraph:md5,d41d8cd98f00b204e9800998ecf8427e", - "coverage.bigWig:md5,2e474506c957152b231ac63c859f0b17" + "CurationPretextTest_MIN.gap.bedgraph:md5,d41d8cd98f00b204e9800998ecf8427e", + "CurationPretextTest_MIN_0P_telomere.bed:md5,61d1143a0b0e994c6cfd2a2f6093bf7d", + "CurationPretextTest_MIN_0P_telomere.bedgraph:md5,b6a0921c22eb24d9e371ff2a21ecaa51", + "CurationPretextTest_MIN_3P_telomere.bed:md5,34a1c051e3fd28ec10c35308105ec189", + "CurationPretextTest_MIN_3P_telomere.bedgraph:md5,23c0d6a6ec57809808a38483d4bcc6cd", + "CurationPretextTest_MIN_5P_telomere.bed:md5,712d150d0128fd221b27c64ee695509e", + "CurationPretextTest_MIN_5P_telomere.bedgraph:md5,754352b044aa0256d06b6b8054af2451", + "CurationPretextTest_MIN_longread_coverage.bigWig:md5,2b44579096cde838fe9d7b43a3267536", + "CurationPretextTest_MIN_repeat_density.bigWig:md5,3f66a9152d793a62f877b733c2336dfd" ], 9, 1, @@ -163,9 +165,9 @@ true ], "meta": { - "nf-test": "0.9.2", - "nextflow": "25.04.6" + "nf-test": "0.9.3", + "nextflow": "25.10.2" }, - "timestamp": "2025-08-21T21:25:49.92252227" + "timestamp": "2026-03-05T13:59:35.465106708" } -} +} \ No newline at end of file diff --git a/workflows/curationpretext.nf b/workflows/curationpretext.nf index 69ef0a1d..d6f8b641 100644 --- a/workflows/curationpretext.nf +++ b/workflows/curationpretext.nf @@ -4,20 +4,28 @@ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -include { GAWK as GAWK_UPPER_SEQUENCE } from '../modules/nf-core/gawk/main' -include { SAMTOOLS_FAIDX } from '../modules/nf-core/samtools/faidx/main' -include { GUNZIP } from '../modules/nf-core/gunzip/main' +// NF-CORE MODULES +include { GAWK as GAWK_UPPER_SEQUENCE } from '../modules/nf-core/gawk/main' +include { SAMTOOLS_FAIDX } from '../modules/nf-core/samtools/faidx/main' +include { GUNZIP } from '../modules/nf-core/gunzip/main' -include { PRETEXT_GRAPH as PRETEXT_INGEST_SNDRD } from '../modules/local/pretext/graph/main' -include { PRETEXT_GRAPH as PRETEXT_INGEST_HIRES } from '../modules/local/pretext/graph/main' +//LOCAL MODULES +include { PRETEXT_GRAPH as PRETEXT_INGEST_SNDRD } from '../modules/local/pretext/graph/main' +include { PRETEXT_GRAPH as PRETEXT_INGEST_HIRES } from '../modules/local/pretext/graph/main' -include { GENERATE_MAPS } from '../subworkflows/local/generate_maps/main' -include { ACCESSORY_FILES } from '../subworkflows/local/accessory_files/main' +// LOCAL SUBWORKFLOWS +include { ACCESSORY_FILES } from '../subworkflows/local/accessory_files/main' -include { paramsSummaryMap } from 'plugin/nf-schema' -include { paramsSummaryMultiqc } from '../subworkflows/nf-core/utils_nfcore_pipeline' -include { softwareVersionsToYAML } from '../subworkflows/nf-core/utils_nfcore_pipeline' -include { methodsDescriptionText } from '../subworkflows/local/utils_nfcore_curationpretext_pipeline' +// SANGER-TOL SUBWORKFLOWS +include { CRAM_MAP_ILLUMINA_HIC as ALIGN_CRAM } from '../subworkflows/sanger-tol/cram_map_illumina_hic/main' +include { PAIRS_CREATE_CONTACT_MAPS as CREATE_MAPS_STDRD } from '../subworkflows/sanger-tol/pairs_create_contact_maps/main' +include { PAIRS_CREATE_CONTACT_MAPS as CREATE_MAPS_HIRES } from '../subworkflows/sanger-tol/pairs_create_contact_maps/main' + + +include { paramsSummaryMap } from 'plugin/nf-schema' +include { paramsSummaryMultiqc } from '../subworkflows/nf-core/utils_nfcore_pipeline' +include { softwareVersionsToYAML } from '../subworkflows/nf-core/utils_nfcore_pipeline' +include { methodsDescriptionText } from '../subworkflows/local/utils_nfcore_curationpretext_pipeline' /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RUN MAIN WORKFLOW @@ -29,15 +37,20 @@ workflow CURATIONPRETEXT { ch_reference ch_reads ch_cram_reads + ch_mapped_bam val_teloseq + val_input_file_string + val_aligner + val_skip_tracks + val_run_hires + val_split_telomere + val_cram_chunk_size main: - ch_versions = Channel.empty() - ch_empty_file = Channel.fromPath("${baseDir}/assets/EMPTY.txt") - + ch_empty_file = channel.fromPath("${baseDir}/assets/EMPTY.txt") ch_reference - .branch { meta, file -> + .branch { _meta, file -> zipped: file.name.endsWith('.gz') unzipped: !file.name.endsWith('.gz') } @@ -49,13 +62,12 @@ workflow CURATIONPRETEXT { GUNZIP ( ch_input.zipped ) - ch_versions = ch_versions.mix(GUNZIP.out.versions) // // LOGIC: MIX CHANELS WHICH MAY OR MAY NOT BE EMPTY INTO A SINGLE QUEUE CHANNEL // - unzipped_input = Channel.empty() + unzipped_input = channel.empty() unzipped_input .mix(ch_input.unzipped, GUNZIP.out.gunzip) @@ -71,18 +83,15 @@ workflow CURATIONPRETEXT { false, ) ch_upper_ref = GAWK_UPPER_SEQUENCE.out.output - ch_versions = ch_versions.mix( GAWK_UPPER_SEQUENCE.out.versions ) // // MODULE: GENERATE INDEX OF REFERENCE FASTA // SAMTOOLS_FAIDX ( - ch_upper_ref, - [[],[]], + ch_upper_ref.map { meta, file -> [meta, file, []] }, false ) - ch_versions = ch_versions.mix( SAMTOOLS_FAIDX.out.versions ) // @@ -90,7 +99,7 @@ workflow CURATIONPRETEXT { // ACCESSORY FILES SO WE HAVE AN OPTION TO TURN THEM OFF // - dont_generate_tracks = params.skip_tracks ? params.skip_tracks.split(",") : "NONE" + dont_generate_tracks = val_skip_tracks ? val_skip_tracks.split(",") : "NONE" full_list = [ "gap", @@ -121,9 +130,10 @@ workflow CURATIONPRETEXT { ch_upper_ref, ch_reads, val_teloseq, + val_split_telomere, + val_skip_tracks, SAMTOOLS_FAIDX.out.fai ) - ch_versions = ch_versions.mix( ACCESSORY_FILES.out.versions ) gaps_file = ACCESSORY_FILES.out.gap_file cove_file = ACCESSORY_FILES.out.longread_output @@ -132,58 +142,109 @@ workflow CURATIONPRETEXT { } + // + // LOGIC: IDEALLY THIS SHOULD BE DONE IN THE PIPELINE_INITIALISATION + // SUBWORKFLOW, HOWEVER, THE VALUE WOULD BE CONVERTED TO A CHANNEL + // WHICH THEN CANNOT BE USED TO GENERATE A STRING FOR THE SW + // + def fasta_size = file(val_input_file_string).size() + def selected_aligner = (val_aligner == "AUTO") ? + (fasta_size > 5e9 ? "minimap2" : "bwamem2") : + val_aligner + // - // SUBWORKFLOW: GENERATE ONLY PRETEXT MAPS, NO EXTRA FILES - // - GENERATE_MAPS IS THE MINIMAL OUTPUT EXPECTED FROM THIS PIPELLINE + // SUBWORKFLOW: MAP CRAM IF READS NOT ALREADY MAPPED // - GENERATE_MAPS ( + ALIGN_CRAM ( ch_upper_ref, ch_cram_reads, - SAMTOOLS_FAIDX.out.fai + selected_aligner, + val_cram_chunk_size ) - ch_versions = ch_versions.mix( GENERATE_MAPS.out.versions ) + mapped_bam = ch_mapped_bam.mix( ALIGN_CRAM.out.bam ) - if (!dont_generate_tracks.contains("ALL")) { - // - // MODULE: INGEST ACCESSORY FILES INTO PRETEXT BY DEFAULT - // - ADAPTED FROM TREEVAL - // - PRETEXT_INGEST_SNDRD ( - GENERATE_MAPS.out.standrd_pretext, - gaps_file, - cove_file, - telo_file, - rept_file, - params.split_telomere - ) - ch_versions = ch_versions.mix( PRETEXT_INGEST_SNDRD.out.versions ) + // + // SUBWORKFLOW: MAP THE PRETEXT FILE AND TAKE SNAPSHOT + // + CREATE_MAPS_STDRD ( + mapped_bam, + [[:],[]], + true, + true, + false, + false, + [] + ) - // - // MODULE: INGEST ACCESSORY FILES INTO PRETEXT BY DEFAULT - // - ADAPTED FROM TREEVAL - // - if (params.run_hires) { - PRETEXT_INGEST_HIRES ( - GENERATE_MAPS.out.highres_pretext, - gaps_file, - cove_file, - telo_file, - rept_file, - params.split_telomere - ) - ch_versions = ch_versions.mix( PRETEXT_INGEST_SNDRD.out.versions ) - } - } + // + // SUBWORKFLOW: MAP THE PRETEXT FILE + // + CREATE_MAPS_HIRES ( + mapped_bam.filter{ val_run_hires }, + [[:],[]], + true, + false, + false, + false, + [] + ) + + + // + // MODULE: INGEST ACCESSORY FILES INTO PRETEXT BY DEFAULT + // - ADAPTED FROM TREEVAL + // + PRETEXT_INGEST_SNDRD ( + CREATE_MAPS_STDRD.out.pretext.filter { !dont_generate_tracks.contains("ALL") }, + gaps_file, + cove_file, + telo_file, + rept_file, + val_split_telomere + ) + + + // + // MODULE: INGEST ACCESSORY FILES INTO PRETEXT BY DEFAULT + // - ADAPTED FROM TREEVAL + // + PRETEXT_INGEST_HIRES ( + CREATE_MAPS_HIRES.out.pretext.filter { val_run_hires && !dont_generate_tracks.contains("ALL") }, + gaps_file, + cove_file, + telo_file, + rept_file, + val_split_telomere + ) // // Collate and save software versions // - softwareVersionsToYAML(ch_versions) + def topic_versions = channel.topic("versions") + .distinct() + .branch { entry -> + versions_file: entry instanceof Path + versions_tuple: true + } + + def topic_versions_string = topic_versions.versions_tuple + .map { process, tool, version -> + [ process[process.lastIndexOf(':')+1..-1], " ${tool}: ${version}" ] + } + .groupTuple(by:0) + .map { process, tool_versions -> + tool_versions.unique().sort() + "${process}:\n${tool_versions.join('\n')}" + } + + // Removed mix as there is no more ch_versions + softwareVersionsToYAML(topic_versions.versions_file) + .mix(topic_versions_string) .collectFile( storeDir: "${params.outdir}/pipeline_info", name: 'sanger-tol_' + 'curationpretext_software_' + 'versions.yml', @@ -191,10 +252,13 @@ workflow CURATIONPRETEXT { newLine: true ).set { ch_collated_versions } - summary_params = paramsSummaryMap( + _summary_params = paramsSummaryMap( workflow, parameters_schema: "nextflow_schema.json") + emit: + versions = ch_collated_versions // channel: [ path(versions.yml) ] + } /*