From a80391efbe45f95305b78a44185dcfdf3e334af8 Mon Sep 17 00:00:00 2001 From: DLBPointon Date: Thu, 28 Aug 2025 16:47:54 +0100 Subject: [PATCH 01/10] Template update for nf-core/tools version 3.3.2 --- .editorconfig | 37 ----- .github/CONTRIBUTING.md | 2 +- .github/actions/get-shards/action.yml | 69 +++++++++ .github/actions/nf-test/action.yml | 109 ++++++++++++++ .github/workflows/ci.yml | 88 ----------- .github/workflows/clean-up.yml | 2 +- .github/workflows/download_pipeline.yml | 20 +-- .../{fix-linting.yml => fix_linting.yml} | 4 +- .github/workflows/linting.yml | 17 +-- .github/workflows/linting_comment.yml | 4 +- .github/workflows/nf-test.yml | 140 ++++++++++++++++++ ...mment.yml => template-version-comment.yml} | 2 +- .nf-core.yml | 4 +- .pre-commit-config.yaml | 26 +++- .prettierrc.yml | 5 + CHANGELOG.md | 2 +- README.md | 7 +- assets/schema_input.json | 4 +- conf/base.config | 6 +- nextflow.config | 22 ++- nf-test.config | 24 +++ .../main.nf | 1 - .../tests/nextflow.config | 2 +- tests/.nftignore | 2 + tests/default.nf.test | 35 +++++ tests/nextflow.config | 14 ++ 26 files changed, 472 insertions(+), 176 deletions(-) delete mode 100644 .editorconfig create mode 100644 .github/actions/get-shards/action.yml create mode 100644 .github/actions/nf-test/action.yml delete mode 100644 .github/workflows/ci.yml rename .github/workflows/{fix-linting.yml => fix_linting.yml} (96%) create mode 100644 .github/workflows/nf-test.yml rename .github/workflows/{template_version_comment.yml => template-version-comment.yml} (95%) create mode 100644 nf-test.config create mode 100644 tests/.nftignore create mode 100644 tests/default.nf.test create mode 100644 tests/nextflow.config diff --git a/.editorconfig b/.editorconfig deleted file mode 100644 index 6d9b74cc..00000000 --- a/.editorconfig +++ /dev/null @@ -1,37 +0,0 @@ -root = true - -[*] -charset = utf-8 -end_of_line = lf -insert_final_newline = true -trim_trailing_whitespace = true -indent_size = 4 -indent_style = space - -[*.{md,yml,yaml,html,css,scss,js}] -indent_size = 2 - -# These files are edited and tested upstream in nf-core/modules -[/modules/nf-core/**] -charset = unset -end_of_line = unset -insert_final_newline = unset -trim_trailing_whitespace = unset -indent_style = unset -[/subworkflows/nf-core/**] -charset = unset -end_of_line = unset -insert_final_newline = unset -trim_trailing_whitespace = unset -indent_style = unset - -[/assets/email*] -indent_size = unset - -# ignore python and markdown -[*.{py,md}] -indent_style = unset - -# ignore ro-crate metadata files -[**/ro-crate-metadata.json] -insert_final_newline = unset diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md index d7a0a69f..e3f6b58c 100644 --- a/.github/CONTRIBUTING.md +++ b/.github/CONTRIBUTING.md @@ -71,7 +71,7 @@ If you wish to contribute a new step, please use the following coding standards: 5. Add any new parameters to `nextflow_schema.json` with help text (via the `nf-core pipelines schema build` tool). 6. Add sanity checks and validation for all relevant parameters. 7. Perform local tests to validate that the new code works as expected. -8. If applicable, add a new test command in `.github/workflow/ci.yml`. +8. If applicable, add a new test in the `tests` directory. ### Default values diff --git a/.github/actions/get-shards/action.yml b/.github/actions/get-shards/action.yml new file mode 100644 index 00000000..34085279 --- /dev/null +++ b/.github/actions/get-shards/action.yml @@ -0,0 +1,69 @@ +name: "Get number of shards" +description: "Get the number of nf-test shards for the current CI job" +inputs: + max_shards: + description: "Maximum number of shards allowed" + required: true + paths: + description: "Component paths to test" + required: false + tags: + description: "Tags to pass as argument for nf-test --tag parameter" + required: false +outputs: + shard: + description: "Array of shard numbers" + value: ${{ steps.shards.outputs.shard }} + total_shards: + description: "Total number of shards" + value: ${{ steps.shards.outputs.total_shards }} +runs: + using: "composite" + steps: + - name: Install nf-test + uses: nf-core/setup-nf-test@v1 + with: + version: ${{ env.NFT_VER }} + - name: Get number of shards + id: shards + shell: bash + run: | + # Run nf-test with dynamic parameter + nftest_output=$(nf-test test \ + --profile +docker \ + $(if [ -n "${{ inputs.tags }}" ]; then echo "--tag ${{ inputs.tags }}"; fi) \ + --dry-run \ + --ci \ + --changed-since HEAD^) || { + echo "nf-test command failed with exit code $?" + echo "Full output: $nftest_output" + exit 1 + } + echo "nf-test dry-run output: $nftest_output" + + # Default values for shard and total_shards + shard="[]" + total_shards=0 + + # Check if there are related tests + if echo "$nftest_output" | grep -q 'No tests to execute'; then + echo "No related tests found." + else + # Extract the number of related tests + number_of_shards=$(echo "$nftest_output" | sed -n 's|.*Executed \([0-9]*\) tests.*|\1|p') + if [[ -n "$number_of_shards" && "$number_of_shards" -gt 0 ]]; then + shards_to_run=$(( $number_of_shards < ${{ inputs.max_shards }} ? $number_of_shards : ${{ inputs.max_shards }} )) + shard=$(seq 1 "$shards_to_run" | jq -R . | jq -c -s .) + total_shards="$shards_to_run" + else + echo "Unexpected output format. Falling back to default values." + fi + fi + + # Write to GitHub Actions outputs + echo "shard=$shard" >> $GITHUB_OUTPUT + echo "total_shards=$total_shards" >> $GITHUB_OUTPUT + + # Debugging output + echo "Final shard array: $shard" + echo "Total number of shards: $total_shards" diff --git a/.github/actions/nf-test/action.yml b/.github/actions/nf-test/action.yml new file mode 100644 index 00000000..bf44d961 --- /dev/null +++ b/.github/actions/nf-test/action.yml @@ -0,0 +1,109 @@ +name: "nf-test Action" +description: "Runs nf-test with common setup steps" +inputs: + profile: + description: "Profile to use" + required: true + shard: + description: "Shard number for this CI job" + required: true + total_shards: + description: "Total number of test shards(NOT the total number of matrix jobs)" + required: true + paths: + description: "Test paths" + required: true + tags: + description: "Tags to pass as argument for nf-test --tag parameter" + required: false +runs: + using: "composite" + steps: + - name: Setup Nextflow + uses: nf-core/setup-nextflow@v2 + with: + version: "${{ env.NXF_VERSION }}" + + - name: Set up Python + uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5 + with: + python-version: "3.13" + + - name: Install nf-test + uses: nf-core/setup-nf-test@v1 + with: + version: "${{ env.NFT_VER }}" + install-pdiff: true + + - name: Setup apptainer + if: contains(inputs.profile, 'singularity') + uses: eWaterCycle/setup-apptainer@main + + - name: Set up Singularity + if: contains(inputs.profile, 'singularity') + shell: bash + run: | + mkdir -p $NXF_SINGULARITY_CACHEDIR + mkdir -p $NXF_SINGULARITY_LIBRARYDIR + + - name: Conda setup + if: contains(inputs.profile, 'conda') + uses: conda-incubator/setup-miniconda@505e6394dae86d6a5c7fbb6e3fb8938e3e863830 # v3 + with: + auto-update-conda: true + conda-solver: libmamba + conda-remove-defaults: true + + - name: Run nf-test + shell: bash + env: + NFT_WORKDIR: ${{ env.NFT_WORKDIR }} + run: | + nf-test test \ + --profile=+${{ inputs.profile }} \ + $(if [ -n "${{ inputs.tags }}" ]; then echo "--tag ${{ inputs.tags }}"; fi) \ + --ci \ + --changed-since HEAD^ \ + --verbose \ + --tap=test.tap \ + --shard ${{ inputs.shard }}/${{ inputs.total_shards }} + + # Save the absolute path of the test.tap file to the output + echo "tap_file_path=$(realpath test.tap)" >> $GITHUB_OUTPUT + + - name: Generate test summary + if: always() + shell: bash + run: | + # Add header if it doesn't exist (using a token file to track this) + if [ ! -f ".summary_header" ]; then + echo "# 🚀 nf-test results" >> $GITHUB_STEP_SUMMARY + echo "" >> $GITHUB_STEP_SUMMARY + echo "| Status | Test Name | Profile | Shard |" >> $GITHUB_STEP_SUMMARY + echo "|:------:|-----------|---------|-------|" >> $GITHUB_STEP_SUMMARY + touch .summary_header + fi + + if [ -f test.tap ]; then + while IFS= read -r line; do + if [[ $line =~ ^ok ]]; then + test_name="${line#ok }" + # Remove the test number from the beginning + test_name="${test_name#* }" + echo "| ✅ | ${test_name} | ${{ inputs.profile }} | ${{ inputs.shard }}/${{ inputs.total_shards }} |" >> $GITHUB_STEP_SUMMARY + elif [[ $line =~ ^not\ ok ]]; then + test_name="${line#not ok }" + # Remove the test number from the beginning + test_name="${test_name#* }" + echo "| ❌ | ${test_name} | ${{ inputs.profile }} | ${{ inputs.shard }}/${{ inputs.total_shards }} |" >> $GITHUB_STEP_SUMMARY + fi + done < test.tap + else + echo "| ⚠️ | No test results found | ${{ inputs.profile }} | ${{ inputs.shard }}/${{ inputs.total_shards }} |" >> $GITHUB_STEP_SUMMARY + fi + + - name: Clean up + if: always() + shell: bash + run: | + sudo rm -rf /home/ubuntu/tests/ diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml deleted file mode 100644 index 44033727..00000000 --- a/.github/workflows/ci.yml +++ /dev/null @@ -1,88 +0,0 @@ -name: nf-core CI -# This workflow runs the pipeline with the minimal test dataset to check that it completes without any syntax errors -on: - push: - branches: - - dev - pull_request: - release: - types: [published] - workflow_dispatch: - -env: - NXF_ANSI_LOG: false - NXF_SINGULARITY_CACHEDIR: ${{ github.workspace }}/.singularity - NXF_SINGULARITY_LIBRARYDIR: ${{ github.workspace }}/.singularity - -concurrency: - group: "${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}" - cancel-in-progress: true - -jobs: - test: - name: "Run pipeline with test data (${{ matrix.NXF_VER }} | ${{ matrix.test_name }} | ${{ matrix.profile }})" - # Only run on push if this is the nf-core dev branch (merged PRs) - if: "${{ github.event_name != 'push' || (github.event_name == 'push' && github.repository == 'sanger-tol/curationpretext') }}" - runs-on: ubuntu-latest - strategy: - matrix: - NXF_VER: - - "24.04.2" - - "latest-everything" - profile: - - "conda" - - "docker" - - "singularity" - test_name: - - "test" - isMaster: - - ${{ github.base_ref == 'master' }} - # Exclude conda and singularity on dev - exclude: - - isMaster: false - profile: "conda" - - isMaster: false - profile: "singularity" - steps: - - name: Check out pipeline code - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4 - with: - fetch-depth: 0 - - - name: Set up Nextflow - uses: nf-core/setup-nextflow@v2 - with: - version: "${{ matrix.NXF_VER }}" - - - name: Set up Apptainer - if: matrix.profile == 'singularity' - uses: eWaterCycle/setup-apptainer@main - - - name: Set up Singularity - if: matrix.profile == 'singularity' - run: | - mkdir -p $NXF_SINGULARITY_CACHEDIR - mkdir -p $NXF_SINGULARITY_LIBRARYDIR - - - name: Set up Miniconda - if: matrix.profile == 'conda' - uses: conda-incubator/setup-miniconda@a4260408e20b96e80095f42ff7f1a15b27dd94ca # v3 - with: - miniconda-version: "latest" - auto-update-conda: true - conda-solver: libmamba - channels: conda-forge,bioconda - - - name: Set up Conda - if: matrix.profile == 'conda' - run: | - echo $(realpath $CONDA)/condabin >> $GITHUB_PATH - echo $(realpath python) >> $GITHUB_PATH - - - name: Clean up Disk space - uses: jlumbroso/free-disk-space@54081f138730dfa15788a46383842cd2f914a1be # v1.3.1 - - - name: "Run pipeline with test data ${{ matrix.NXF_VER }} | ${{ matrix.test_name }} | ${{ matrix.profile }}" - continue-on-error: ${{ matrix.NXF_VER == 'latest-everything' }} - run: | - nextflow run ${GITHUB_WORKSPACE} -profile ${{ matrix.test_name }},${{ matrix.profile }} --outdir ./results diff --git a/.github/workflows/clean-up.yml b/.github/workflows/clean-up.yml index 0b6b1f27..ac030fd5 100644 --- a/.github/workflows/clean-up.yml +++ b/.github/workflows/clean-up.yml @@ -10,7 +10,7 @@ jobs: issues: write pull-requests: write steps: - - uses: actions/stale@28ca1036281a5e5922ead5184a1bbf96e5fc984e # v9 + - uses: actions/stale@5bef64f19d7facfb25b37b414482c7164d639639 # v9 with: stale-issue-message: "This issue has been tagged as awaiting-changes or awaiting-feedback by an nf-core contributor. Remove stale label or add a comment otherwise this issue will be closed in 20 days." stale-pr-message: "This PR has been tagged as awaiting-changes or awaiting-feedback by an nf-core contributor. Remove stale label or add a comment if it is still useful." diff --git a/.github/workflows/download_pipeline.yml b/.github/workflows/download_pipeline.yml index ab06316e..999bcc38 100644 --- a/.github/workflows/download_pipeline.yml +++ b/.github/workflows/download_pipeline.yml @@ -12,14 +12,6 @@ on: required: true default: "dev" pull_request: - types: - - opened - - edited - - synchronize - branches: - - main - - master - pull_request_target: branches: - main - master @@ -52,9 +44,9 @@ jobs: - name: Disk space cleanup uses: jlumbroso/free-disk-space@54081f138730dfa15788a46383842cd2f914a1be # v1.3.1 - - uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5 + - uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5 with: - python-version: "3.12" + python-version: "3.13" architecture: "x64" - name: Setup Apptainer @@ -120,6 +112,7 @@ jobs: echo "IMAGE_COUNT_AFTER=$image_count" >> "$GITHUB_OUTPUT" - name: Compare container image counts + id: count_comparison run: | if [ "${{ steps.count_initial.outputs.IMAGE_COUNT_INITIAL }}" -ne "${{ steps.count_afterwards.outputs.IMAGE_COUNT_AFTER }}" ]; then initial_count=${{ steps.count_initial.outputs.IMAGE_COUNT_INITIAL }} @@ -132,3 +125,10 @@ jobs: else echo "The pipeline can be downloaded successfully!" fi + + - name: Upload Nextflow logfile for debugging purposes + uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4 + with: + name: nextflow_logfile.txt + path: .nextflow.log* + include-hidden-files: true diff --git a/.github/workflows/fix-linting.yml b/.github/workflows/fix_linting.yml similarity index 96% rename from .github/workflows/fix-linting.yml rename to .github/workflows/fix_linting.yml index 94c929ba..1c97b461 100644 --- a/.github/workflows/fix-linting.yml +++ b/.github/workflows/fix_linting.yml @@ -32,9 +32,9 @@ jobs: GITHUB_TOKEN: ${{ secrets.nf_core_bot_auth_token }} # Install and run pre-commit - - uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5 + - uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5 with: - python-version: "3.12" + python-version: "3.13" - name: Install pre-commit run: pip install pre-commit diff --git a/.github/workflows/linting.yml b/.github/workflows/linting.yml index dbd52d5a..8b0f88c3 100644 --- a/.github/workflows/linting.yml +++ b/.github/workflows/linting.yml @@ -3,9 +3,6 @@ name: nf-core linting # It runs the `nf-core pipelines lint` and markdown lint tests to ensure # that the code meets the nf-core guidelines. on: - push: - branches: - - dev pull_request: release: types: [published] @@ -16,10 +13,10 @@ jobs: steps: - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4 - - name: Set up Python 3.12 - uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5 + - name: Set up Python 3.13 + uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5 with: - python-version: "3.12" + python-version: "3.13" - name: Install pre-commit run: pip install pre-commit @@ -36,13 +33,13 @@ jobs: - name: Install Nextflow uses: nf-core/setup-nextflow@v2 - - uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5 + - uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5 with: - python-version: "3.12" + python-version: "3.13" architecture: "x64" - name: read .nf-core.yml - uses: pietrobolcato/action-read-yaml@1.1.0 + uses: pietrobolcato/action-read-yaml@9f13718d61111b69f30ab4ac683e67a56d254e1d # 1.1.0 id: read_yml with: config: ${{ github.workspace }}/.nf-core.yml @@ -74,7 +71,7 @@ jobs: - name: Upload linting log file artifact if: ${{ always() }} - uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4 + uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4 with: name: linting-logs path: | diff --git a/.github/workflows/linting_comment.yml b/.github/workflows/linting_comment.yml index 95b6b6af..d43797d9 100644 --- a/.github/workflows/linting_comment.yml +++ b/.github/workflows/linting_comment.yml @@ -11,7 +11,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Download lint results - uses: dawidd6/action-download-artifact@20319c5641d495c8a52e688b7dc5fada6c3a9fbc # v8 + uses: dawidd6/action-download-artifact@ac66b43f0e6a346234dd65d4d0c8fbb31cb316e5 # v11 with: workflow: linting.yml workflow_conclusion: completed @@ -21,7 +21,7 @@ jobs: run: echo "pr_number=$(cat linting-logs/PR_number.txt)" >> $GITHUB_OUTPUT - name: Post PR comment - uses: marocchino/sticky-pull-request-comment@331f8f5b4215f0445d3c07b4967662a32a2d3e31 # v2 + uses: marocchino/sticky-pull-request-comment@52423e01640425a022ef5fd42c6fb5f633a02728 # v2 with: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} number: ${{ steps.pr_number.outputs.pr_number }} diff --git a/.github/workflows/nf-test.yml b/.github/workflows/nf-test.yml new file mode 100644 index 00000000..593c9360 --- /dev/null +++ b/.github/workflows/nf-test.yml @@ -0,0 +1,140 @@ +name: Run nf-test +on: + pull_request: + paths-ignore: + - "docs/**" + - "**/meta.yml" + - "**/*.md" + - "**/*.png" + - "**/*.svg" + release: + types: [published] + workflow_dispatch: + +# Cancel if a newer run is started +concurrency: + group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} + cancel-in-progress: true + +env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + NFT_VER: "0.9.2" + NFT_WORKDIR: "~" + NXF_ANSI_LOG: false + NXF_SINGULARITY_CACHEDIR: ${{ github.workspace }}/.singularity + NXF_SINGULARITY_LIBRARYDIR: ${{ github.workspace }}/.singularity + +jobs: + nf-test-changes: + name: nf-test-changes + runs-on: # use GitHub runners + - "ubuntu-latest" + outputs: + shard: ${{ steps.set-shards.outputs.shard }} + total_shards: ${{ steps.set-shards.outputs.total_shards }} + steps: + - name: Clean Workspace # Purge the workspace in case it's running on a self-hosted runner + run: | + ls -la ./ + rm -rf ./* || true + rm -rf ./.??* || true + ls -la ./ + - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4 + with: + fetch-depth: 0 + + - name: get number of shards + id: set-shards + uses: ./.github/actions/get-shards + env: + NFT_VER: ${{ env.NFT_VER }} + with: + max_shards: 7 + + - name: debug + run: | + echo ${{ steps.set-shards.outputs.shard }} + echo ${{ steps.set-shards.outputs.total_shards }} + + nf-test: + name: "${{ matrix.profile }} | ${{ matrix.NXF_VER }} | ${{ matrix.shard }}/${{ needs.nf-test-changes.outputs.total_shards }}" + needs: [nf-test-changes] + if: ${{ needs.nf-test-changes.outputs.total_shards != '0' }} + runs-on: # use GitHub runners + - "ubuntu-latest" + strategy: + fail-fast: false + matrix: + shard: ${{ fromJson(needs.nf-test-changes.outputs.shard) }} + profile: [conda, docker, singularity] + isMain: + - ${{ github.base_ref == 'master' || github.base_ref == 'main' }} + # Exclude conda and singularity on dev + exclude: + - isMain: false + profile: "conda" + - isMain: false + profile: "singularity" + NXF_VER: + - "24.10.5" + - "latest-everything" + env: + NXF_ANSI_LOG: false + TOTAL_SHARDS: ${{ needs.nf-test-changes.outputs.total_shards }} + + steps: + - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4 + with: + fetch-depth: 0 + + - name: Run nf-test + id: run_nf_test + uses: ./.github/actions/nf-test + continue-on-error: ${{ matrix.NXF_VER == 'latest-everything' }} + env: + NFT_WORKDIR: ${{ env.NFT_WORKDIR }} + with: + profile: ${{ matrix.profile }} + shard: ${{ matrix.shard }} + total_shards: ${{ env.TOTAL_SHARDS }} + + - name: Report test status + if: ${{ always() }} + run: | + if [[ "${{ steps.run_nf_test.outcome }}" == "failure" ]]; then + echo "::error::Test with ${{ matrix.NXF_VER }} failed" + # Add to workflow summary + echo "## ❌ Test failed: ${{ matrix.profile }} | ${{ matrix.NXF_VER }} | Shard ${{ matrix.shard }}/${{ env.TOTAL_SHARDS }}" >> $GITHUB_STEP_SUMMARY + if [[ "${{ matrix.NXF_VER }}" == "latest-everything" ]]; then + echo "::warning::Test with latest-everything failed but will not cause workflow failure. Please check if the error is expected or if it needs fixing." + fi + if [[ "${{ matrix.NXF_VER }}" != "latest-everything" ]]; then + exit 1 + fi + fi + + confirm-pass: + needs: [nf-test] + if: always() + runs-on: # use GitHub runners + - "ubuntu-latest" + steps: + - name: One or more tests failed (excluding latest-everything) + if: ${{ contains(needs.*.result, 'failure') }} + run: exit 1 + + - name: One or more tests cancelled + if: ${{ contains(needs.*.result, 'cancelled') }} + run: exit 1 + + - name: All tests ok + if: ${{ contains(needs.*.result, 'success') }} + run: exit 0 + + - name: debug-print + if: always() + run: | + echo "::group::DEBUG: `needs` Contents" + echo "DEBUG: toJSON(needs) = ${{ toJSON(needs) }}" + echo "DEBUG: toJSON(needs.*.result) = ${{ toJSON(needs.*.result) }}" + echo "::endgroup::" diff --git a/.github/workflows/template_version_comment.yml b/.github/workflows/template-version-comment.yml similarity index 95% rename from .github/workflows/template_version_comment.yml rename to .github/workflows/template-version-comment.yml index 537529bc..beb5c77f 100644 --- a/.github/workflows/template_version_comment.yml +++ b/.github/workflows/template-version-comment.yml @@ -14,7 +14,7 @@ jobs: ref: ${{ github.event.pull_request.head.sha }} - name: Read template version from .nf-core.yml - uses: nichmor/minimal-read-yaml@v0.0.2 + uses: nichmor/minimal-read-yaml@1f7205277e25e156e1f63815781db80a6d490b8f # v0.0.2 id: read_yml with: config: ${{ github.workspace }}/.nf-core.yml diff --git a/.nf-core.yml b/.nf-core.yml index 259a3724..485f5487 100644 --- a/.nf-core.yml +++ b/.nf-core.yml @@ -30,7 +30,7 @@ lint: nextflow_config: - manifest.name - manifest.homePage -nf_core_version: 3.2.1 +nf_core_version: 3.3.2 repository_type: pipeline template: author: Damon-Lee B Pointon (@DLBPointon) @@ -48,4 +48,4 @@ template: - seqera_platform - multiqc - rocrate - version: 1.4.1 + version: 1.4.2 diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 1dec8650..bb41beec 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -4,10 +4,24 @@ repos: hooks: - id: prettier additional_dependencies: - - prettier@3.2.5 - - - repo: https://github.com/editorconfig-checker/editorconfig-checker.python - rev: "3.1.2" + - prettier@3.6.2 + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v5.0.0 hooks: - - id: editorconfig-checker - alias: ec + - id: trailing-whitespace + args: [--markdown-linebreak-ext=md] + exclude: | + (?x)^( + .*ro-crate-metadata.json$| + modules/nf-core/.*| + subworkflows/nf-core/.*| + .*\.snap$ + )$ + - id: end-of-file-fixer + exclude: | + (?x)^( + .*ro-crate-metadata.json$| + modules/nf-core/.*| + subworkflows/nf-core/.*| + .*\.snap$ + )$ diff --git a/.prettierrc.yml b/.prettierrc.yml index c81f9a76..07dbd8bb 100644 --- a/.prettierrc.yml +++ b/.prettierrc.yml @@ -1 +1,6 @@ printWidth: 120 +tabWidth: 4 +overrides: + - files: "*.{md,yml,yaml,html,css,scss,js,cff}" + options: + tabWidth: 2 diff --git a/CHANGELOG.md b/CHANGELOG.md index bc3e350d..a38609fc 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,7 +3,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). -## v1.4.1 - [date] +## v1.4.2 - [date] Initial release of sanger-tol/curationpretext, created with the [nf-core](https://nf-co.re/) template. diff --git a/README.md b/README.md index 86c881cb..3b3843c7 100644 --- a/README.md +++ b/README.md @@ -1,10 +1,11 @@ # sanger-tol/curationpretext -[![GitHub Actions CI Status](https://github.com/sanger-tol/curationpretext/actions/workflows/ci.yml/badge.svg)](https://github.com/sanger-tol/curationpretext/actions/workflows/ci.yml) +[![GitHub Actions CI Status](https://github.com/sanger-tol/curationpretext/actions/workflows/nf-test.yml/badge.svg)](https://github.com/sanger-tol/curationpretext/actions/workflows/nf-test.yml) [![GitHub Actions Linting Status](https://github.com/sanger-tol/curationpretext/actions/workflows/linting.yml/badge.svg)](https://github.com/sanger-tol/curationpretext/actions/workflows/linting.yml)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.XXXXXXX-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.XXXXXXX) [![nf-test](https://img.shields.io/badge/unit_tests-nf--test-337ab7.svg)](https://www.nf-test.com) -[![Nextflow](https://img.shields.io/badge/nextflow%20DSL2-%E2%89%A524.04.2-23aa62.svg)](https://www.nextflow.io/) +[![Nextflow](https://img.shields.io/badge/version-%E2%89%A524.10.5-green?style=flat&logo=nextflow&logoColor=white&color=%230DC09D&link=https%3A%2F%2Fnextflow.io)](https://www.nextflow.io/) +[![nf-core template version](https://img.shields.io/badge/nf--core_template-3.3.2-green?style=flat&logo=nfcore&logoColor=white&color=%2324B064&link=https%3A%2F%2Fnf-co.re)](https://github.com/nf-core/tools/releases/tag/3.3.2) [![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/) [![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000&logo=docker)](https://www.docker.com/) [![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/) @@ -21,7 +22,7 @@ --> + workflows use the "tube map" design for that. See https://nf-co.re/docs/guidelines/graphic_design/workflow_diagrams#examples for examples. --> ## Usage diff --git a/assets/schema_input.json b/assets/schema_input.json index 7dea6e86..c21d7515 100644 --- a/assets/schema_input.json +++ b/assets/schema_input.json @@ -17,14 +17,14 @@ "type": "string", "format": "file-path", "exists": true, - "pattern": "^\\S+\\.f(ast)?q\\.gz$", + "pattern": "^([\\S\\s]*\\/)?[^\\s\\/]+\\.f(ast)?q\\.gz$", "errorMessage": "FastQ file for reads 1 must be provided, cannot contain spaces and must have extension '.fq.gz' or '.fastq.gz'" }, "fastq_2": { "type": "string", "format": "file-path", "exists": true, - "pattern": "^\\S+\\.f(ast)?q\\.gz$", + "pattern": "^([\\S\\s]*\\/)?[^\\s\\/]+\\.f(ast)?q\\.gz$", "errorMessage": "FastQ file for reads 2 cannot contain spaces and must have extension '.fq.gz' or '.fastq.gz'" } }, diff --git a/conf/base.config b/conf/base.config index 45a8a625..8370150f 100644 --- a/conf/base.config +++ b/conf/base.config @@ -15,7 +15,7 @@ process { memory = { 6.GB * task.attempt } time = { 4.h * task.attempt } - errorStrategy = { task.exitStatus in ((130..145) + 104) ? 'retry' : 'finish' } + errorStrategy = { task.exitStatus in ((130..145) + 104 + 175) ? 'retry' : 'finish' } maxRetries = 1 maxErrors = '-1' @@ -59,4 +59,8 @@ process { errorStrategy = 'retry' maxRetries = 2 } + withLabel: process_gpu { + ext.use_gpu = { workflow.profile.contains('gpu') } + accelerator = { workflow.profile.contains('gpu') ? 1 : null } + } } diff --git a/nextflow.config b/nextflow.config index db46f10c..396dc039 100644 --- a/nextflow.config +++ b/nextflow.config @@ -148,16 +148,25 @@ profiles { ] } } + gpu { + docker.runOptions = '-u $(id -u):$(id -g) --gpus all' + apptainer.runOptions = '--nv' + singularity.runOptions = '--nv' + } test { includeConfig 'conf/test.config' } test_full { includeConfig 'conf/test_full.config' } } -// Load nf-core custom profiles from different Institutions -includeConfig !System.getenv('NXF_OFFLINE') && params.custom_config_base ? "${params.custom_config_base}/nfcore_custom.config" : "/dev/null" +// Load nf-core custom profiles from different institutions + +// If params.custom_config_base is set AND either the NXF_OFFLINE environment variable is not set or params.custom_config_base is a local path, the nfcore_custom.config file from the specified base path is included. +// Load sanger-tol/curationpretext custom profiles from different institutions. +includeConfig params.custom_config_base && (!System.getenv('NXF_OFFLINE') || !params.custom_config_base.startsWith('http')) ? "${params.custom_config_base}/nfcore_custom.config" : "/dev/null" + // Load sanger-tol/curationpretext custom profiles from different institutions. // TODO nf-core: Optionally, you can add a pipeline-specific nf-core config at https://github.com/nf-core/configs -// includeConfig !System.getenv('NXF_OFFLINE') && params.custom_config_base ? "${params.custom_config_base}/pipeline/curationpretext.config" : "/dev/null" +// includeConfig params.custom_config_base && (!System.getenv('NXF_OFFLINE') || !params.custom_config_base.startsWith('http')) ? "${params.custom_config_base}/pipeline/curationpretext.config" : "/dev/null" // Set default registry for Apptainer, Docker, Podman, Charliecloud and Singularity independent of -profile // Will not be used unless Apptainer / Docker / Podman / Charliecloud / Singularity are enabled @@ -213,7 +222,6 @@ dag { manifest { name = 'sanger-tol/curationpretext' - author = """Damon-Lee B Pointon (@DLBPointon)""" // The author field is deprecated from Nextflow version 24.10.0, use contributors instead contributors = [ // TODO nf-core: Update the field with the details of the contributors to your pipeline. New with Nextflow version 24.10.0 [ @@ -229,14 +237,14 @@ manifest { description = """A simple pipeline to generate pretext files for genomic curation.""" mainScript = 'main.nf' defaultBranch = 'master' - nextflowVersion = '!>=24.04.2' - version = '1.4.1' + nextflowVersion = '!>=24.10.5' + version = '1.4.2' doi = '' } // Nextflow plugins plugins { - id 'nf-schema@2.2.0' // Validation of pipeline parameters and creation of an input channel from a sample sheet + id 'nf-schema@2.4.2' // Validation of pipeline parameters and creation of an input channel from a sample sheet } validation { diff --git a/nf-test.config b/nf-test.config new file mode 100644 index 00000000..3a1fff59 --- /dev/null +++ b/nf-test.config @@ -0,0 +1,24 @@ +config { + // location for all nf-test tests + testsDir "." + + // nf-test directory including temporary files for each test + workDir System.getenv("NFT_WORKDIR") ?: ".nf-test" + + // location of an optional nextflow.config file specific for executing tests + configFile "tests/nextflow.config" + + // ignore tests coming from the nf-core/modules repo + ignore 'modules/nf-core/**/tests/*', 'subworkflows/nf-core/**/tests/*' + + // run all test with defined profile(s) from the main nextflow.config + profile "test" + + // list of filenames or patterns that should be trigger a full test run + triggers 'nextflow.config', 'nf-test.config', 'conf/test.config', 'tests/nextflow.config', 'tests/.nftignore' + + // load the necessary plugins + plugins { + load "nft-utils@0.0.3" + } +} diff --git a/subworkflows/local/utils_nfcore_curationpretext_pipeline/main.nf b/subworkflows/local/utils_nfcore_curationpretext_pipeline/main.nf index aa3f2b7d..40153083 100644 --- a/subworkflows/local/utils_nfcore_curationpretext_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_curationpretext_pipeline/main.nf @@ -219,4 +219,3 @@ def methodsDescriptionText(mqc_methods_yaml) { return description_html.toString() } - diff --git a/subworkflows/nf-core/utils_nfschema_plugin/tests/nextflow.config b/subworkflows/nf-core/utils_nfschema_plugin/tests/nextflow.config index 0907ac58..09ef842a 100644 --- a/subworkflows/nf-core/utils_nfschema_plugin/tests/nextflow.config +++ b/subworkflows/nf-core/utils_nfschema_plugin/tests/nextflow.config @@ -1,5 +1,5 @@ plugins { - id "nf-schema@2.1.0" + id "nf-schema@2.4.2" } validation { diff --git a/tests/.nftignore b/tests/.nftignore new file mode 100644 index 00000000..73eb92f7 --- /dev/null +++ b/tests/.nftignore @@ -0,0 +1,2 @@ +.DS_Store +pipeline_info/*.{html,json,txt,yml} diff --git a/tests/default.nf.test b/tests/default.nf.test new file mode 100644 index 00000000..5c9f4cf2 --- /dev/null +++ b/tests/default.nf.test @@ -0,0 +1,35 @@ +nextflow_pipeline { + + name "Test pipeline" + script "../main.nf" + tag "pipeline" + + test("-profile test") { + + when { + params { + outdir = "$outputDir" + } + } + + then { + // stable_name: All files + folders in ${params.outdir}/ with a stable name + def stable_name = getAllFilesFromDir(params.outdir, relative: true, includeDir: true, ignore: ['pipeline_info/*.{html,json,txt}']) + // stable_path: All files in ${params.outdir}/ with stable content + def stable_path = getAllFilesFromDir(params.outdir, ignoreFile: 'tests/.nftignore') + assertAll( + { assert workflow.success}, + { assert snapshot( + // Number of successful tasks + workflow.trace.succeeded().size(), + // pipeline versions.yml file for multiqc from which Nextflow version is removed because we test pipelines on multiple Nextflow versions + removeNextflowVersion("$outputDir/pipeline_info/curationpretext_software_mqc_versions.yml"), + // All stable path name, with a relative path + stable_name, + // All files with stable contents + stable_path + ).match() } + ) + } + } +} diff --git a/tests/nextflow.config b/tests/nextflow.config new file mode 100644 index 00000000..e3be3550 --- /dev/null +++ b/tests/nextflow.config @@ -0,0 +1,14 @@ +/* +======================================================================================== + Nextflow config file for running nf-test tests +======================================================================================== +*/ + +// TODO nf-core: Specify any additional parameters here +// Or any resources requirements +params { + modules_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/' + pipelines_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/curationpretext' +} + +aws.client.anonymous = true // fixes S3 access issues on self-hosted runners From 84d9bb96df56c9df8042e70c7b59c1ad02a554a8 Mon Sep 17 00:00:00 2001 From: DLBPointon Date: Thu, 28 Aug 2025 17:04:48 +0100 Subject: [PATCH 02/10] Template Update 332 --- tests/default.nf.test.snap | 171 +++++++++++++++++++++++++++++++++++++ 1 file changed, 171 insertions(+) create mode 100644 tests/default.nf.test.snap diff --git a/tests/default.nf.test.snap b/tests/default.nf.test.snap new file mode 100644 index 00000000..7815abe3 --- /dev/null +++ b/tests/default.nf.test.snap @@ -0,0 +1,171 @@ +{ + "Full run": { + "content": [ + 40, + { + "BEDTOOLS_BAMTOBED": { + "bedtools": "2.31.1" + }, + "BEDTOOLS_GENOMECOV": { + "bedtools": "2.31.1" + }, + "BEDTOOLS_INTERSECT": { + "bedtools": "2.31.1" + }, + "BEDTOOLS_MAKEWINDOWS": { + "bedtools": "2.31.1" + }, + "BEDTOOLS_MAP": { + "bedtools": "2.31.1" + }, + "BWAMEM2_INDEX": { + "bwamem2": "2.2.1" + }, + "CRAM_FILTER_ALIGN_BWAMEM2_FIXMATE_SORT": { + "samtools": 1.17, + "bwa-mem2": "2.2.1", + "staden_io_lib": "1.14.14" + }, + "CRAM_GENERATE_CSV": { + "samtools": 1.17 + }, + "EXTRACT_REPEAT": { + "perl": "(v5.26.2))", + "extract_repeat.pl": 1.0 + }, + "EXTRACT_TELOMERE": { + "extract_telomere": 2.0, + "coreutils": 9.1 + }, + "FIND_TELOMERE_REGIONS": { + "find_telomere": 1.0 + }, + "FIND_TELOMERE_WINDOWS": { + "telomere": 1.0 + }, + "GAWK_CLEAN_TELOMERE": { + "gawk": "5.3.0" + }, + "GAWK_GAP_LENGTH": { + "gawk": "5.3.0" + }, + "GAWK_GENERATE_GENOME_FILE": { + "gawk": "5.3.0" + }, + "GAWK_REFORMAT_INTERSECT": { + "gawk": "5.3.0" + }, + "GAWK_RENAME_IDS": { + "gawk": "5.3.0" + }, + "GAWK_REPLACE_DOTS": { + "gawk": "5.3.0" + }, + "GAWK_UPPER_SEQUENCE": { + "gawk": "5.3.0" + }, + "GET_LARGEST_SCAFFOLD": { + "get_largest_scaffold": 2.0, + "coreutils": 9.1 + }, + "GNU_SORT": { + "coreutils": 9.3 + }, + "GNU_SORT_A": { + "coreutils": 9.3 + }, + "GNU_SORT_B": { + "coreutils": 9.3 + }, + "GNU_SORT_C": { + "coreutils": 9.3 + }, + "GRAPH_OVERALL_COVERAGE": { + "perl": "(v5.26.2))", + "graph_overall_coverage.pl": 1.0 + }, + "MINIMAP2_ALIGN": { + "minimap2": "2.28-r1209", + "samtools": 1.2 + }, + "PRETEXTMAP_STANDRD": { + "pretextmap": "0.1.9", + "samtools": 1.21 + }, + "PRETEXT_INGEST_SNDRD": { + "PretextGraph": "0.0.9", + "PretextMap": "0.1.9", + "bigWigToBedGraph": 447 + }, + "SAMTOOLS_FAIDX": { + "samtools": 1.21 + }, + "SAMTOOLS_MERGE": { + "samtools": 1.21 + }, + "SAMTOOLS_SORT": { + "samtools": 1.21 + }, + "SAMTOOLS_VIEW_FILTER_PRIMARY": { + "samtools": 1.21 + }, + "SEQTK_CUTN": { + "seqtk": "1.4-r122" + }, + "SNAPSHOT_SRES": { + "pretextsnapshot": "0.0.4" + }, + "UCSC_BEDGRAPHTOBIGWIG": { + "ucsc": 469 + }, + "WINDOWMASKER_MKCOUNTS": { + "windowmasker": "1.0.0" + }, + "WINDOWMASKER_USTAT": { + "windowmasker": "1.0.0" + }, + "Workflow": { + "sanger-tol/curationpretext": "v1.4.2" + } + }, + [ + "accessory_files", + "accessory_files/CurationPretextTest.bigWig", + "accessory_files/CurationPretextTest.gap.bedgraph", + "accessory_files/CurationPretextTest_telomere.bed", + "accessory_files/CurationPretextTest_telomere.bedgraph", + "accessory_files/coverage.bigWig", + "pipeline_info", + "pipeline_info/sanger-tol_curationpretext_software_versions.yml", + "pretext_maps_processed", + "pretext_maps_processed/CurationPretextTest_normal.pretext", + "pretext_maps_raw", + "pretext_maps_raw/CurationPretextTest_normal_pi.pretext", + "pretext_snapshot", + "pretext_snapshot/CurationPretextTest_normalFullMap.png" + ], + 14, + [ + "CurationPretextTest.bigWig:md5,3f66a9152d793a62f877b733c2336dfd", + "CurationPretextTest.gap.bedgraph:md5,d41d8cd98f00b204e9800998ecf8427e", + "CurationPretextTest_telomere.bed:md5,d41d8cd98f00b204e9800998ecf8427e", + "CurationPretextTest_telomere.bedgraph:md5,d41d8cd98f00b204e9800998ecf8427e", + "coverage.bigWig:md5,2e474506c957152b231ac63c859f0b17" + ], + 5, + 1, + false, + true, + 1, + false, + true, + 1, + true + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.04.4" + }, + "timestamp": "2025-04-16T11:23:34.556355" + } +} From 0832a1a24c5ca0e55c2c853b23e5f5600b1d7a68 Mon Sep 17 00:00:00 2001 From: DLBPointon Date: Fri, 29 Aug 2025 09:36:39 +0100 Subject: [PATCH 03/10] updated ro-crate --- ro-crate-metadata.json | 27 ++++++++++++++++++++------- 1 file changed, 20 insertions(+), 7 deletions(-) diff --git a/ro-crate-metadata.json b/ro-crate-metadata.json index 50ea4f24..61ee727e 100644 --- a/ro-crate-metadata.json +++ b/ro-crate-metadata.json @@ -23,7 +23,7 @@ "@type": "Dataset", "creativeWorkStatus": "Stable", "datePublished": "2025-05-27T09:34:43+00:00", - "description": "# ![sanger-tol/curationpretext](docs/images/curationpretext-light.png#gh-light-mode-only) ![sanger-tol/curationpretext](docs/images/curationpretext-dark.png#gh-dark-mode-only)\n\n[![GitHub Actions CI Status](https://github.com/sanger-tol/curationpretext/workflows/nf-core%20CI/badge.svg)](https://github.com/sanger-tol/curationpretext/actions?query=workflow%3A%22nf-core+CI%22)\n[![GitHub Actions Linting Status](https://github.com/sanger-tol/curationpretext/workflows/nf-core%20linting/badge.svg)](https://github.com/sanger-tol/curationpretext/actions?query=workflow%3A%22nf-core+linting%22)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.12773958-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.12773958)\n\n[![Nextflow](https://img.shields.io/badge/nextflow%20DSL2-%E2%89%A524.04.2-23aa62.svg)](https://www.nextflow.io/)\n[![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/)\n[![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000&logo=docker)](https://www.docker.com/)\n[![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/)\n[![Launch on Seqera Platform](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Seqera%20Platform-%234256e7)](https://cloud.seqera.io/launch?pipeline=https://github.com/sanger-tol/curationpretext)\n\n## Introduction\n\n**sanger-tol/curationpretext** is a bioinformatics pipeline typically used in conjunction with [TreeVal](https://github.com/sanger-tol/treeval) to generate pretext maps (and optionally telomeric, gap, coverage, and repeat density plots which can be ingested into pretext) for the manual curation of high quality genomes.\n\nThis is intended as a supplementary pipeline for the [treeval](https://github.com/sanger-tol/treeval) project. This pipeline can be simply used to generate pretext maps, information on how to run this pipeline can be found in the [usage documentation](https://pipelines.tol.sanger.ac.uk/curationpretext/usage).\n\n![Workflow Diagram](./docs/images/CurationPretext_1_3_0.png)\n\n1. Generate Maps - Generates pretext maps as well as a static image.\n\n2. Accessory files - Generates the repeat density, gap, telomere, and coverage tracks.\n\n## Usage\n\n> [!NOTE]\n> If you are new to Nextflow and nf-core, please refer to [this page](https://pipelines.tol.sanger.ac.uk/curationpretext/1.4.0/usage) on how to set-up Nextflow. Make sure to [test your setup](https://pipelines.tol.sanger.ac.uk/curationpretext/1.4.0/usage) with `-profile test` before running the workflow on actual data.\n\nCurrently, the pipeline uses the following flags:\n\n- `--input`\n\n - The absolute path to the assembled genome in, e.g., `/path/to/assembly.fa`\n\n- `--reads`\n\n - The directory of the fasta files generated from longread reads, e.g., `/path/to/fasta/`\n\n- `--read_type`\n\n - The type of longread data you are utilising, e.g., ont, illumina, hifi.\n\n- `--aligner`\n\n - The aligner yopu wish to use for the coverage generation, defaults to bwamem2 but minimap2 is also supported.\n\n- `--cram`\n\n - The directory of the cram _and_ cram.crai files, e.g., `/path/to/cram/`\n\n- `--map_order`\n\n - hic map scaffold order, input either `length` or `unsorted`\n\n- `--teloseq`\n\n - A telomeric sequence, e.g., `TTAGGG`\n\n- `--all_output`\n\n - An option to output all maps + accessory files, the default will only output the pretextmaps where ingestion has occured.\n\nNow, you can run the pipeline using:\n\n```bash\nnextflow run sanger-tol/curationpretext \\\n --input { input.fasta } \\\n --cram { path/to/cram/ } \\\n --reads { path/to/longread/fasta/ } \\\n --read_type { default is \"hifi\" }\n --sample { default is \"pretext_rerun\" } \\\n --teloseq { default is \"TTAGGG\" } \\\n --map_order { default is \"unsorted\" } \\\n --all_output \\\n --outdir { OUTDIR } \\\n -profile \n\n```\n\n> **Warning:**\n> Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those\n> provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_;\n\nFor more details, please refer to the [usage documentation](https://pipelines.tol.sanger.ac.uk/curationpretext/usage) and the [parameter documentation](https://pipelines.tol.sanger.ac.uk/curationpretext/parameters).\n\n## Pipeline output\n\nTo see the the results of a test run with a full size dataset refer to the [results](https://pipelines.tol.sanger.ac.uk/curationpretext/results) tab on the sanger-tol/curationpretext website pipeline page.\nFor more details about the output files and reports, please refer to the\n[output documentation](https://pipelines.tol.sanger.ac.uk/curationpretext/output).\n\n## Credits\n\nsanger-tol/curationpretext was originally written by Damon-Lee B Pointon (@DLBPointon).\n\nWe thank the following people for their extensive assistance in the development of this pipeline:\n\n- @muffato - For reviews.\n\n- @yumisims - TreeVal and Software.\n\n- @weaglesBio - TreeVal and Software.\n\n- @josieparis - Help with better docs and testing.\n\n- @mahesh-panchal - Large support with 1.2.0 in making the pipeline more robust with other HPC environments.\n\n- @GRIT - For feedback and feature requests.\n\n- @prototaxites - Support with 1.3.0 and showing me the power of GAWK.\n\n## Contributions and Support\n\nIf you would like to contribute to this pipeline, please see the [contributing guidelines](.github/CONTRIBUTING.md).\n\n## Citations\n\nIf you use sanger-tol/curationpretext for your analysis, please cite it using the following doi: [10.5281/zenodo.12773958](https://doi.org/10.5281/zenodo.12773958)\n\nAn extensive list of references for the tools used by the pipeline can be found in the [`CITATIONS.md`](CITATIONS.md) file.\n\nThis pipeline uses code and infrastructure developed and maintained by the [nf-core](https://nf-co.re) community, reused here under the [MIT license](https://github.com/nf-core/tools/blob/main/LICENSE).\n\n> **The nf-core framework for community-curated bioinformatics pipelines.**\n>\n> Philip Ewels, Alexander Peltzer, Sven Fillinger, Harshil Patel, Johannes Alneberg, Andreas Wilm, Maxime Ulysse Garcia, Paolo Di Tommaso & Sven Nahnsen.\n>\n> _Nat Biotechnol._ 2020 Feb 13. doi: [10.1038/s41587-020-0439-x](https://dx.doi.org/10.1038/s41587-020-0439-x).\n", + "description": "# ![sanger-tol/curationpretext](docs/images/curationpretext-light.png#gh-light-mode-only) ![sanger-tol/curationpretext](docs/images/curationpretext-dark.png#gh-dark-mode-only)\n\n[![GitHub Actions CI Status](https://github.com/sanger-tol/curationpretext/actions/workflows/nf-test.yml/badge.svg)](https://github.com/sanger-tol/curationpretext/actions/workflows/nf-test.yml)\n[![GitHub Actions Linting Status](https://github.com/sanger-tol/curationpretext/actions/workflows/linting.yml/badge.svg)](https://github.com/sanger-tol/curationpretext/actions/workflows/linting.yml)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.12773958-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.12773958)\n[![nf-test](https://img.shields.io/badge/unit_tests-nf--test-337ab7.svg)](https://www.nf-test.com)\n\n[![Nextflow](https://img.shields.io/badge/version-%E2%89%A524.10.5-green?style=flat&logo=nextflow&logoColor=white&color=%230DC09D&link=https%3A%2F%2Fnextflow.io)](https://www.nextflow.io/)\n[![nf-core template version](https://img.shields.io/badge/nf--core_template-3.3.2-green?style=flat&logo=nfcore&logoColor=white&color=%2324B064&link=https%3A%2F%2Fnf-co.re)](https://github.com/nf-core/tools/releases/tag/3.3.2)\n[![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/)\n[![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000&logo=docker)](https://www.docker.com/)\n[![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/)\n[![Launch on Seqera Platform](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Seqera%20Platform-%234256e7)](https://cloud.seqera.io/launch?pipeline=https://github.com/sanger-tol/curationpretext)\n\n## Introduction\n\n**sanger-tol/curationpretext** is a bioinformatics pipeline typically used in conjunction with [TreeVal](https://github.com/sanger-tol/treeval) to generate pretext maps (and optionally telomeric, gap, coverage, and repeat density plots which can be ingested into pretext) for the manual curation of high quality genomes.\n\nThis is intended as a supplementary pipeline for the [treeval](https://github.com/sanger-tol/treeval) project. This pipeline can be simply used to generate pretext maps, information on how to run this pipeline can be found in the [usage documentation](https://pipelines.tol.sanger.ac.uk/curationpretext/usage).\n\n![Workflow Diagram](./docs/images/CurationPretext_1_3_0.png)\n\n1. Generate Maps - Generates pretext maps as well as a static image.\n\n2. Accessory files - Generates the repeat density, gap, telomere, and coverage tracks.\n\n## Usage\n\n> [!NOTE]\n> If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) with `-profile test` before running the workflow on actual data.\n\nCurrently, the pipeline uses the following flags:\n\n- `--input`\n\n - The absolute path to the assembled genome in, e.g., `/path/to/assembly.fa`\n\n- `--reads`\n\n - The directory of the fasta files generated from longread reads, e.g., `/path/to/fasta/`\n\n- `--read_type`\n\n - The type of longread data you are utilising, e.g., ont, illumina, hifi.\n\n- `--aligner`\n\n - The aligner yopu wish to use for the coverage generation, defaults to bwamem2 but minimap2 is also supported.\n\n- `--cram`\n\n - The directory of the cram _and_ cram.crai files, e.g., `/path/to/cram/`\n\n- `--map_order`\n\n - hic map scaffold order, input either `length` or `unsorted`\n\n- `--teloseq`\n\n - A telomeric sequence, e.g., `TTAGGG`\n\n- `--all_output`\n\n - An option to output all maps + accessory files, the default will only output the pretextmaps where ingestion has occured.\n\nNow, you can run the pipeline using:\n\n```bash\nnextflow run sanger-tol/curationpretext \\\n --input { input.fasta } \\\n --cram { path/to/cram/ } \\\n --reads { path/to/longread/fasta/ } \\\n --read_type { default is \"hifi\" }\n --sample { default is \"pretext_rerun\" } \\\n --teloseq { default is \"TTAGGG\" } \\\n --map_order { default is \"unsorted\" } \\\n --all_output \\\n --outdir { OUTDIR } \\\n -profile \n\n```\n\n> **Warning:**\n> Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those\n> provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_;\n\nFor more details, please refer to the [usage documentation](https://pipelines.tol.sanger.ac.uk/curationpretext/usage) and the [parameter documentation](https://pipelines.tol.sanger.ac.uk/curationpretext/parameters).\n\n## Pipeline output\n\nTo see the the results of a test run with a full size dataset refer to the [results](https://pipelines.tol.sanger.ac.uk/curationpretext/results) tab on the sanger-tol/curationpretext website pipeline page.\nFor more details about the output files and reports, please refer to the\n[output documentation](https://pipelines.tol.sanger.ac.uk/curationpretext/output).\n\n## Credits\n\nsanger-tol/curationpretext was originally written by Damon-Lee B Pointon (@DLBPointon).\n\nWe thank the following people for their extensive assistance in the development of this pipeline:\n\n- @muffato - For reviews.\n\n- @yumisims - TreeVal and Software.\n\n- @weaglesBio - TreeVal and Software.\n\n- @josieparis - Help with better docs and testing.\n\n- @mahesh-panchal - Large support with 1.2.0 in making the pipeline more robust with other HPC environments.\n\n- @GRIT - For feedback and feature requests.\n\n- @prototaxites - Support with 1.3.0 and showing me the power of GAWK.\n\n## Contributions and Support\n\nIf you would like to contribute to this pipeline, please see the [contributing guidelines](.github/CONTRIBUTING.md).\n\n## Citations\n\nIf you use sanger-tol/curationpretext for your analysis, please cite it using the following doi: [10.5281/zenodo.12773958](https://doi.org/10.5281/zenodo.12773958)\n\nAn extensive list of references for the tools used by the pipeline can be found in the [`CITATIONS.md`](CITATIONS.md) file.\n\nThis pipeline uses code and infrastructure developed and maintained by the [nf-core](https://nf-co.re) community, reused here under the [MIT license](https://github.com/nf-core/tools/blob/main/LICENSE).\n\n> **The nf-core framework for community-curated bioinformatics pipelines.**\n>\n> Philip Ewels, Alexander Peltzer, Sven Fillinger, Harshil Patel, Johannes Alneberg, Andreas Wilm, Maxime Ulysse Garcia, Paolo Di Tommaso & Sven Nahnsen.\n>\n> _Nat Biotechnol._ 2020 Feb 13. doi: [10.1038/s41587-020-0439-x](https://dx.doi.org/10.1038/s41587-020-0439-x).\n", "hasPart": [ { "@id": "main.nf" @@ -124,7 +124,11 @@ }, { "@id": "main.nf", - "@type": ["File", "SoftwareSourceCode", "ComputationalWorkflow"], + "@type": [ + "File", + "SoftwareSourceCode", + "ComputationalWorkflow" + ], "creator": [ { "@id": "https://orcid.org/0000-0002-7860-3560" @@ -133,9 +137,16 @@ "dateCreated": "", "dateModified": "2025-05-27T10:34:43Z", "dct:conformsTo": "https://bioschemas.org/profiles/ComputationalWorkflow/1.0-RELEASE/", - "keywords": ["nf-core", "nextflow"], - "license": ["MIT"], - "name": ["sanger-tol/curationpretext"], + "keywords": [ + "nf-core", + "nextflow" + ], + "license": [ + "MIT" + ], + "name": [ + "sanger-tol/curationpretext" + ], "programmingLanguage": { "@id": "https://w3id.org/workflowhub/workflow-ro-crate#nextflow" }, @@ -146,7 +157,9 @@ "https://github.com/sanger-tol/curationpretext", "https://pipelines.tol.sanger.ac.uk/sanger-tol/curationpretext/1.4.2/" ], - "version": ["1.4.2"] + "version": [ + "1.4.2" + ] }, { "@id": "https://w3id.org/workflowhub/workflow-ro-crate#nextflow", @@ -332,4 +345,4 @@ "name": "Josie Paris" } ] -} +} \ No newline at end of file From 7d8c1390428bb8edf6b24ecc3f5c06fd03a78b1c Mon Sep 17 00:00:00 2001 From: DLBPointon Date: Fri, 29 Aug 2025 09:41:36 +0100 Subject: [PATCH 04/10] Pre-Commit Prettier --- CHANGELOG.md | 2 -- README.md | 8 -------- 2 files changed, 10 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e7f8b932..fe9a0345 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -126,11 +126,9 @@ Note, since the pipeline is using Nextflow DSL2, each process will be run with i ### Added and Fixed - GRIT found a bug in `pretext_graph` ingestion code where null values were being introduced as the track name - - This has now need hardcoded, there was no need for dynamic naming anyway - GRIT found a bug in `pretext_graph` ingestion where gap and telomere tracks stopped being ingested correctly and would no longer display or be zeroed out. - - I'm not entirely sure of the cause of this but i think it is a mix of how pretext handles unnamed tracks, assuming their datatype so a null named gap track would be treated as a repeat track, and incorrect logic in the pretext_graph module. - Added GAWK module (as GAWK_CLEAN_TELOMERE) to remove "you screwed up" (this is a legacy error message which will be changed to something more informative and professional) error lines which can appear with some telo motifs or lower case motifs. These will otherwise cause the FIND_TELOMERE_WINDOWS process to crash. diff --git a/README.md b/README.md index 74b503de..d68799fe 100644 --- a/README.md +++ b/README.md @@ -31,35 +31,27 @@ This is intended as a supplementary pipeline for the [treeval](https://github.co Currently, the pipeline uses the following flags: - `--input` - - The absolute path to the assembled genome in, e.g., `/path/to/assembly.fa` - `--reads` - - The directory of the fasta files generated from longread reads, e.g., `/path/to/fasta/` - `--read_type` - - The type of longread data you are utilising, e.g., ont, illumina, hifi. - `--aligner` - - The aligner yopu wish to use for the coverage generation, defaults to bwamem2 but minimap2 is also supported. - `--cram` - - The directory of the cram _and_ cram.crai files, e.g., `/path/to/cram/` - `--map_order` - - hic map scaffold order, input either `length` or `unsorted` - `--teloseq` - - A telomeric sequence, e.g., `TTAGGG` - `--all_output` - - An option to output all maps + accessory files, the default will only output the pretextmaps where ingestion has occured. Now, you can run the pipeline using: From cebb3ca789cbe7447acb01f7617313b6194810fb Mon Sep 17 00:00:00 2001 From: DLBPointon Date: Fri, 29 Aug 2025 09:43:05 +0100 Subject: [PATCH 05/10] Pre-Commit Prettier --- ro-crate-metadata.json | 25 ++++++------------------- 1 file changed, 6 insertions(+), 19 deletions(-) diff --git a/ro-crate-metadata.json b/ro-crate-metadata.json index 61ee727e..86ee1f8c 100644 --- a/ro-crate-metadata.json +++ b/ro-crate-metadata.json @@ -124,11 +124,7 @@ }, { "@id": "main.nf", - "@type": [ - "File", - "SoftwareSourceCode", - "ComputationalWorkflow" - ], + "@type": ["File", "SoftwareSourceCode", "ComputationalWorkflow"], "creator": [ { "@id": "https://orcid.org/0000-0002-7860-3560" @@ -137,16 +133,9 @@ "dateCreated": "", "dateModified": "2025-05-27T10:34:43Z", "dct:conformsTo": "https://bioschemas.org/profiles/ComputationalWorkflow/1.0-RELEASE/", - "keywords": [ - "nf-core", - "nextflow" - ], - "license": [ - "MIT" - ], - "name": [ - "sanger-tol/curationpretext" - ], + "keywords": ["nf-core", "nextflow"], + "license": ["MIT"], + "name": ["sanger-tol/curationpretext"], "programmingLanguage": { "@id": "https://w3id.org/workflowhub/workflow-ro-crate#nextflow" }, @@ -157,9 +146,7 @@ "https://github.com/sanger-tol/curationpretext", "https://pipelines.tol.sanger.ac.uk/sanger-tol/curationpretext/1.4.2/" ], - "version": [ - "1.4.2" - ] + "version": ["1.4.2"] }, { "@id": "https://w3id.org/workflowhub/workflow-ro-crate#nextflow", @@ -345,4 +332,4 @@ "name": "Josie Paris" } ] -} \ No newline at end of file +} From 78075a28b0d6b4c5ec113ed154529abb62b6c979 Mon Sep 17 00:00:00 2001 From: DLBPointon Date: Fri, 29 Aug 2025 11:09:49 +0100 Subject: [PATCH 06/10] Another rocrate update --- ro-crate-metadata.json | 27 ++++++++++++++++++++------- 1 file changed, 20 insertions(+), 7 deletions(-) diff --git a/ro-crate-metadata.json b/ro-crate-metadata.json index 86ee1f8c..05ff8efd 100644 --- a/ro-crate-metadata.json +++ b/ro-crate-metadata.json @@ -23,7 +23,7 @@ "@type": "Dataset", "creativeWorkStatus": "Stable", "datePublished": "2025-05-27T09:34:43+00:00", - "description": "# ![sanger-tol/curationpretext](docs/images/curationpretext-light.png#gh-light-mode-only) ![sanger-tol/curationpretext](docs/images/curationpretext-dark.png#gh-dark-mode-only)\n\n[![GitHub Actions CI Status](https://github.com/sanger-tol/curationpretext/actions/workflows/nf-test.yml/badge.svg)](https://github.com/sanger-tol/curationpretext/actions/workflows/nf-test.yml)\n[![GitHub Actions Linting Status](https://github.com/sanger-tol/curationpretext/actions/workflows/linting.yml/badge.svg)](https://github.com/sanger-tol/curationpretext/actions/workflows/linting.yml)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.12773958-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.12773958)\n[![nf-test](https://img.shields.io/badge/unit_tests-nf--test-337ab7.svg)](https://www.nf-test.com)\n\n[![Nextflow](https://img.shields.io/badge/version-%E2%89%A524.10.5-green?style=flat&logo=nextflow&logoColor=white&color=%230DC09D&link=https%3A%2F%2Fnextflow.io)](https://www.nextflow.io/)\n[![nf-core template version](https://img.shields.io/badge/nf--core_template-3.3.2-green?style=flat&logo=nfcore&logoColor=white&color=%2324B064&link=https%3A%2F%2Fnf-co.re)](https://github.com/nf-core/tools/releases/tag/3.3.2)\n[![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/)\n[![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000&logo=docker)](https://www.docker.com/)\n[![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/)\n[![Launch on Seqera Platform](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Seqera%20Platform-%234256e7)](https://cloud.seqera.io/launch?pipeline=https://github.com/sanger-tol/curationpretext)\n\n## Introduction\n\n**sanger-tol/curationpretext** is a bioinformatics pipeline typically used in conjunction with [TreeVal](https://github.com/sanger-tol/treeval) to generate pretext maps (and optionally telomeric, gap, coverage, and repeat density plots which can be ingested into pretext) for the manual curation of high quality genomes.\n\nThis is intended as a supplementary pipeline for the [treeval](https://github.com/sanger-tol/treeval) project. This pipeline can be simply used to generate pretext maps, information on how to run this pipeline can be found in the [usage documentation](https://pipelines.tol.sanger.ac.uk/curationpretext/usage).\n\n![Workflow Diagram](./docs/images/CurationPretext_1_3_0.png)\n\n1. Generate Maps - Generates pretext maps as well as a static image.\n\n2. Accessory files - Generates the repeat density, gap, telomere, and coverage tracks.\n\n## Usage\n\n> [!NOTE]\n> If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) with `-profile test` before running the workflow on actual data.\n\nCurrently, the pipeline uses the following flags:\n\n- `--input`\n\n - The absolute path to the assembled genome in, e.g., `/path/to/assembly.fa`\n\n- `--reads`\n\n - The directory of the fasta files generated from longread reads, e.g., `/path/to/fasta/`\n\n- `--read_type`\n\n - The type of longread data you are utilising, e.g., ont, illumina, hifi.\n\n- `--aligner`\n\n - The aligner yopu wish to use for the coverage generation, defaults to bwamem2 but minimap2 is also supported.\n\n- `--cram`\n\n - The directory of the cram _and_ cram.crai files, e.g., `/path/to/cram/`\n\n- `--map_order`\n\n - hic map scaffold order, input either `length` or `unsorted`\n\n- `--teloseq`\n\n - A telomeric sequence, e.g., `TTAGGG`\n\n- `--all_output`\n\n - An option to output all maps + accessory files, the default will only output the pretextmaps where ingestion has occured.\n\nNow, you can run the pipeline using:\n\n```bash\nnextflow run sanger-tol/curationpretext \\\n --input { input.fasta } \\\n --cram { path/to/cram/ } \\\n --reads { path/to/longread/fasta/ } \\\n --read_type { default is \"hifi\" }\n --sample { default is \"pretext_rerun\" } \\\n --teloseq { default is \"TTAGGG\" } \\\n --map_order { default is \"unsorted\" } \\\n --all_output \\\n --outdir { OUTDIR } \\\n -profile \n\n```\n\n> **Warning:**\n> Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those\n> provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_;\n\nFor more details, please refer to the [usage documentation](https://pipelines.tol.sanger.ac.uk/curationpretext/usage) and the [parameter documentation](https://pipelines.tol.sanger.ac.uk/curationpretext/parameters).\n\n## Pipeline output\n\nTo see the the results of a test run with a full size dataset refer to the [results](https://pipelines.tol.sanger.ac.uk/curationpretext/results) tab on the sanger-tol/curationpretext website pipeline page.\nFor more details about the output files and reports, please refer to the\n[output documentation](https://pipelines.tol.sanger.ac.uk/curationpretext/output).\n\n## Credits\n\nsanger-tol/curationpretext was originally written by Damon-Lee B Pointon (@DLBPointon).\n\nWe thank the following people for their extensive assistance in the development of this pipeline:\n\n- @muffato - For reviews.\n\n- @yumisims - TreeVal and Software.\n\n- @weaglesBio - TreeVal and Software.\n\n- @josieparis - Help with better docs and testing.\n\n- @mahesh-panchal - Large support with 1.2.0 in making the pipeline more robust with other HPC environments.\n\n- @GRIT - For feedback and feature requests.\n\n- @prototaxites - Support with 1.3.0 and showing me the power of GAWK.\n\n## Contributions and Support\n\nIf you would like to contribute to this pipeline, please see the [contributing guidelines](.github/CONTRIBUTING.md).\n\n## Citations\n\nIf you use sanger-tol/curationpretext for your analysis, please cite it using the following doi: [10.5281/zenodo.12773958](https://doi.org/10.5281/zenodo.12773958)\n\nAn extensive list of references for the tools used by the pipeline can be found in the [`CITATIONS.md`](CITATIONS.md) file.\n\nThis pipeline uses code and infrastructure developed and maintained by the [nf-core](https://nf-co.re) community, reused here under the [MIT license](https://github.com/nf-core/tools/blob/main/LICENSE).\n\n> **The nf-core framework for community-curated bioinformatics pipelines.**\n>\n> Philip Ewels, Alexander Peltzer, Sven Fillinger, Harshil Patel, Johannes Alneberg, Andreas Wilm, Maxime Ulysse Garcia, Paolo Di Tommaso & Sven Nahnsen.\n>\n> _Nat Biotechnol._ 2020 Feb 13. doi: [10.1038/s41587-020-0439-x](https://dx.doi.org/10.1038/s41587-020-0439-x).\n", + "description": "# ![sanger-tol/curationpretext](docs/images/curationpretext-light.png#gh-light-mode-only) ![sanger-tol/curationpretext](docs/images/curationpretext-dark.png#gh-dark-mode-only)\n\n[![GitHub Actions CI Status](https://github.com/sanger-tol/curationpretext/actions/workflows/nf-test.yml/badge.svg)](https://github.com/sanger-tol/curationpretext/actions/workflows/nf-test.yml)\n[![GitHub Actions Linting Status](https://github.com/sanger-tol/curationpretext/actions/workflows/linting.yml/badge.svg)](https://github.com/sanger-tol/curationpretext/actions/workflows/linting.yml)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.12773958-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.12773958)\n[![nf-test](https://img.shields.io/badge/unit_tests-nf--test-337ab7.svg)](https://www.nf-test.com)\n\n[![Nextflow](https://img.shields.io/badge/version-%E2%89%A524.10.5-green?style=flat&logo=nextflow&logoColor=white&color=%230DC09D&link=https%3A%2F%2Fnextflow.io)](https://www.nextflow.io/)\n[![nf-core template version](https://img.shields.io/badge/nf--core_template-3.3.2-green?style=flat&logo=nfcore&logoColor=white&color=%2324B064&link=https%3A%2F%2Fnf-co.re)](https://github.com/nf-core/tools/releases/tag/3.3.2)\n[![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/)\n[![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000&logo=docker)](https://www.docker.com/)\n[![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/)\n[![Launch on Seqera Platform](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Seqera%20Platform-%234256e7)](https://cloud.seqera.io/launch?pipeline=https://github.com/sanger-tol/curationpretext)\n\n## Introduction\n\n**sanger-tol/curationpretext** is a bioinformatics pipeline typically used in conjunction with [TreeVal](https://github.com/sanger-tol/treeval) to generate pretext maps (and optionally telomeric, gap, coverage, and repeat density plots which can be ingested into pretext) for the manual curation of high quality genomes.\n\nThis is intended as a supplementary pipeline for the [treeval](https://github.com/sanger-tol/treeval) project. This pipeline can be simply used to generate pretext maps, information on how to run this pipeline can be found in the [usage documentation](https://pipelines.tol.sanger.ac.uk/curationpretext/usage).\n\n![Workflow Diagram](./docs/images/CurationPretext_1_3_0.png)\n\n1. Generate Maps - Generates pretext maps as well as a static image.\n\n2. Accessory files - Generates the repeat density, gap, telomere, and coverage tracks.\n\n## Usage\n\n> [!NOTE]\n> If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) with `-profile test` before running the workflow on actual data.\n\nCurrently, the pipeline uses the following flags:\n\n- `--input`\n - The absolute path to the assembled genome in, e.g., `/path/to/assembly.fa`\n\n- `--reads`\n - The directory of the fasta files generated from longread reads, e.g., `/path/to/fasta/`\n\n- `--read_type`\n - The type of longread data you are utilising, e.g., ont, illumina, hifi.\n\n- `--aligner`\n - The aligner yopu wish to use for the coverage generation, defaults to bwamem2 but minimap2 is also supported.\n\n- `--cram`\n - The directory of the cram _and_ cram.crai files, e.g., `/path/to/cram/`\n\n- `--map_order`\n - hic map scaffold order, input either `length` or `unsorted`\n\n- `--teloseq`\n - A telomeric sequence, e.g., `TTAGGG`\n\n- `--all_output`\n - An option to output all maps + accessory files, the default will only output the pretextmaps where ingestion has occured.\n\nNow, you can run the pipeline using:\n\n```bash\nnextflow run sanger-tol/curationpretext \\\n --input { input.fasta } \\\n --cram { path/to/cram/ } \\\n --reads { path/to/longread/fasta/ } \\\n --read_type { default is \"hifi\" }\n --sample { default is \"pretext_rerun\" } \\\n --teloseq { default is \"TTAGGG\" } \\\n --map_order { default is \"unsorted\" } \\\n --all_output \\\n --outdir { OUTDIR } \\\n -profile \n\n```\n\n> **Warning:**\n> Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those\n> provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_;\n\nFor more details, please refer to the [usage documentation](https://pipelines.tol.sanger.ac.uk/curationpretext/usage) and the [parameter documentation](https://pipelines.tol.sanger.ac.uk/curationpretext/parameters).\n\n## Pipeline output\n\nTo see the the results of a test run with a full size dataset refer to the [results](https://pipelines.tol.sanger.ac.uk/curationpretext/results) tab on the sanger-tol/curationpretext website pipeline page.\nFor more details about the output files and reports, please refer to the\n[output documentation](https://pipelines.tol.sanger.ac.uk/curationpretext/output).\n\n## Credits\n\nsanger-tol/curationpretext was originally written by Damon-Lee B Pointon (@DLBPointon).\n\nWe thank the following people for their extensive assistance in the development of this pipeline:\n\n- @muffato - For reviews.\n\n- @yumisims - TreeVal and Software.\n\n- @weaglesBio - TreeVal and Software.\n\n- @josieparis - Help with better docs and testing.\n\n- @mahesh-panchal - Large support with 1.2.0 in making the pipeline more robust with other HPC environments.\n\n- @GRIT - For feedback and feature requests.\n\n- @prototaxites - Support with 1.3.0 and showing me the power of GAWK.\n\n## Contributions and Support\n\nIf you would like to contribute to this pipeline, please see the [contributing guidelines](.github/CONTRIBUTING.md).\n\n## Citations\n\nIf you use sanger-tol/curationpretext for your analysis, please cite it using the following doi: [10.5281/zenodo.12773958](https://doi.org/10.5281/zenodo.12773958)\n\nAn extensive list of references for the tools used by the pipeline can be found in the [`CITATIONS.md`](CITATIONS.md) file.\n\nThis pipeline uses code and infrastructure developed and maintained by the [nf-core](https://nf-co.re) community, reused here under the [MIT license](https://github.com/nf-core/tools/blob/main/LICENSE).\n\n> **The nf-core framework for community-curated bioinformatics pipelines.**\n>\n> Philip Ewels, Alexander Peltzer, Sven Fillinger, Harshil Patel, Johannes Alneberg, Andreas Wilm, Maxime Ulysse Garcia, Paolo Di Tommaso & Sven Nahnsen.\n>\n> _Nat Biotechnol._ 2020 Feb 13. doi: [10.1038/s41587-020-0439-x](https://dx.doi.org/10.1038/s41587-020-0439-x).\n", "hasPart": [ { "@id": "main.nf" @@ -124,7 +124,11 @@ }, { "@id": "main.nf", - "@type": ["File", "SoftwareSourceCode", "ComputationalWorkflow"], + "@type": [ + "File", + "SoftwareSourceCode", + "ComputationalWorkflow" + ], "creator": [ { "@id": "https://orcid.org/0000-0002-7860-3560" @@ -133,9 +137,16 @@ "dateCreated": "", "dateModified": "2025-05-27T10:34:43Z", "dct:conformsTo": "https://bioschemas.org/profiles/ComputationalWorkflow/1.0-RELEASE/", - "keywords": ["nf-core", "nextflow"], - "license": ["MIT"], - "name": ["sanger-tol/curationpretext"], + "keywords": [ + "nf-core", + "nextflow" + ], + "license": [ + "MIT" + ], + "name": [ + "sanger-tol/curationpretext" + ], "programmingLanguage": { "@id": "https://w3id.org/workflowhub/workflow-ro-crate#nextflow" }, @@ -146,7 +157,9 @@ "https://github.com/sanger-tol/curationpretext", "https://pipelines.tol.sanger.ac.uk/sanger-tol/curationpretext/1.4.2/" ], - "version": ["1.4.2"] + "version": [ + "1.4.2" + ] }, { "@id": "https://w3id.org/workflowhub/workflow-ro-crate#nextflow", @@ -332,4 +345,4 @@ "name": "Josie Paris" } ] -} +} \ No newline at end of file From 521e2d609667b1e6cf3abf10d3556040683606e6 Mon Sep 17 00:00:00 2001 From: DLBPointon Date: Fri, 29 Aug 2025 11:10:48 +0100 Subject: [PATCH 07/10] Another rocrate update --- ro-crate-metadata.json | 25 ++++++------------------- 1 file changed, 6 insertions(+), 19 deletions(-) diff --git a/ro-crate-metadata.json b/ro-crate-metadata.json index 05ff8efd..fd7ddd4a 100644 --- a/ro-crate-metadata.json +++ b/ro-crate-metadata.json @@ -124,11 +124,7 @@ }, { "@id": "main.nf", - "@type": [ - "File", - "SoftwareSourceCode", - "ComputationalWorkflow" - ], + "@type": ["File", "SoftwareSourceCode", "ComputationalWorkflow"], "creator": [ { "@id": "https://orcid.org/0000-0002-7860-3560" @@ -137,16 +133,9 @@ "dateCreated": "", "dateModified": "2025-05-27T10:34:43Z", "dct:conformsTo": "https://bioschemas.org/profiles/ComputationalWorkflow/1.0-RELEASE/", - "keywords": [ - "nf-core", - "nextflow" - ], - "license": [ - "MIT" - ], - "name": [ - "sanger-tol/curationpretext" - ], + "keywords": ["nf-core", "nextflow"], + "license": ["MIT"], + "name": ["sanger-tol/curationpretext"], "programmingLanguage": { "@id": "https://w3id.org/workflowhub/workflow-ro-crate#nextflow" }, @@ -157,9 +146,7 @@ "https://github.com/sanger-tol/curationpretext", "https://pipelines.tol.sanger.ac.uk/sanger-tol/curationpretext/1.4.2/" ], - "version": [ - "1.4.2" - ] + "version": ["1.4.2"] }, { "@id": "https://w3id.org/workflowhub/workflow-ro-crate#nextflow", @@ -345,4 +332,4 @@ "name": "Josie Paris" } ] -} \ No newline at end of file +} From 26162d766d8a880481b69d4a39a8d94f103955d8 Mon Sep 17 00:00:00 2001 From: DLBPointon Date: Mon, 15 Sep 2025 10:44:11 +0100 Subject: [PATCH 08/10] Update to resources --- conf/base.config | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/conf/base.config b/conf/base.config index 84aed456..16ee6a42 100644 --- a/conf/base.config +++ b/conf/base.config @@ -31,12 +31,12 @@ process { withName: CRAM_FILTER_ALIGN_BWAMEM2_FIXMATE_SORT { cpus = { 16 } - memory = { 1.GB * ( reference.size() < 2e9 ? 80 : Math.ceil( ( reference.size() / 1e+9 ) * 30 ) * Math.ceil( task.attempt * 1 ) ) } + memory = { 1.GB * ( reference.size() < 2e9 ? 80 : Math.ceil( ( reference.size() / 1e+9 ) * 50 ) * Math.ceil( task.attempt * 1 ) ) } } withName: CRAM_FILTER_MINIMAP2_FILTER5END_FIXMATE_SORT { cpus = { 16 } - memory = { 1.GB * ( reference.size() < 2e9 ? 50 : Math.ceil( ( reference.size() / 1e+9 ) * 3 ) * Math.ceil( task.attempt * 1 ) ) } + memory = { 1.GB * ( reference.size() < 2e9 ? 50 : Math.ceil( ( reference.size() / 1e+9 ) * 8 ) * Math.ceil( task.attempt * 1 ) ) } } withName: PRETEXT_GRAPH { From e196eaa2bf9026accd5003d39a54828e6d573a43 Mon Sep 17 00:00:00 2001 From: DLBPointon Date: Mon, 15 Sep 2025 10:44:37 +0100 Subject: [PATCH 09/10] Update naming of tracks to match what pretextview expects --- modules/local/pretext/graph/main.nf | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/modules/local/pretext/graph/main.nf b/modules/local/pretext/graph/main.nf index 49bec139..df351d1f 100644 --- a/modules/local/pretext/graph/main.nf +++ b/modules/local/pretext/graph/main.nf @@ -30,6 +30,20 @@ process PRETEXT_GRAPH { def UCSC_VERSION = '447' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. // Using single [ ] as nextflow will use sh where possible not bash + // + // Core Args must match the below (taken from PretextView), this allows + // the use of keyboard shortcuts for main tracks: + // + // data_type_dic{ // use this data_type + // {"default", 0, }, + // {"repeat_density", 1}, + // {"gap", 2}, + // {"coverage", 3}, + // {"coverage_avg", 4}, + // {"telomere", 5}, + // {"not_weighted", 6} + // }; + // """ echo "PROCESSING ESSENTIAL FILES" @@ -87,7 +101,9 @@ process PRETEXT_GRAPH { if [ -s "\$file_og" ]; then echo "Processing OG_TELOMERE file: \$file_og" - PretextGraph $args -i "\$input_file" -n "og_telomere" -o telo_0.pretext < "\$file_og" + + # Must be named "telomere" + PretextGraph $args -i "\$input_file" -n "telomere" -o telo_0.pretext < "\$file_og" else echo "OG TELOMERE file - Could be empty or missing" cp "\$input_file" telo_0.pretext From e102e0f4250ba504bad3d2ec4e659cb1b8d622af Mon Sep 17 00:00:00 2001 From: Damon-Lee Pointon <51855558+DLBPointon@users.noreply.github.com> Date: Tue, 16 Sep 2025 10:37:32 +0100 Subject: [PATCH 10/10] Update nextflow.config Correction from @muffato Co-authored-by: Matthieu Muffato --- nextflow.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nextflow.config b/nextflow.config index c8f7f654..cd1f3d60 100644 --- a/nextflow.config +++ b/nextflow.config @@ -265,7 +265,7 @@ manifest { homePage = 'https://github.com/sanger-tol/curationpretext' description = """A simple pipeline to generate pretext files for genomic curation.""" mainScript = 'main.nf' - defaultBranch = 'master' + defaultBranch = 'main' nextflowVersion = '!>=24.10.5' version = '1.5.0' doi = '10.5281/zenodo.12773958'