Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 31 additions & 7 deletions subworkflows/sanger-tol/telo_finder/main.nf
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
//
// MODULE IMPORT BLOCK
//
include { BIOAWK } from '../../../modules/nf-core/bioawk/main'
include { TELOMERE_REGIONS } from '../../../modules/sanger-tol/telomere/regions/main'
include { GAWK as GAWK_SPLIT_TELOMERE } from '../../../modules/nf-core/gawk/main'
include { TELOMERE_WINDOWS } from '../../../modules/sanger-tol/telomere/windows/main'
Expand All @@ -12,20 +13,42 @@ workflow TELO_FINDER {

take:
ch_reference // Channel [ val(meta), path(fasta) ]
ch_telomereseq // Channel.of( telomere sequence )
ch_telomereseq // Channel [ val(meta), path(fasta) ]
val_split_telomere // bool
val_run_bgzip // bool

main:

//if G > 30% then flip else pass
//
// MODULE: BIOAWK CONVERT THE MOTIF INTO THE 5 PRIME DIRECTION
// IF PROVIDED IN THE 3 PRIME DIRECTION
// IF MOTIF HAS A G CONTENT OF > 30% IT IS IN THE 3 PRIME
//
BIOAWK(
ch_telomereseq,
"tsv"
)


//
// LOGIC: READ LINE 2 OF THE OUTPUT FILE
//
corrected_telomere = BIOAWK.out.output
.map { _meta, file ->
def lines = file.toFile().readLines()
// Lines from bioawk are:
// corrected_sequence G_count G_percentage reversed? original_sequence
lines[0].split('\t')[0]
}
.filter { it != null }

Comment on lines +36 to +44
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should this keep meta in the output? When would it return null and what should the behaviour be in this case?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

in all fairness there should never be a null. If there's a null you've forgotten to include a telomere and will have other issues.


//
// MODULE: FINDS THE TELOMERIC SEQEUNCE IN REFERENCE
//
TELOMERE_REGIONS (
ch_reference,
ch_telomereseq
corrected_telomere
)

ch_full_telomere = TELOMERE_REGIONS.out.telomere
Expand Down Expand Up @@ -89,7 +112,7 @@ workflow TELO_FINDER {
// THIS ONLY HAPPENS ON WHOLE TELOMERIC FILES
//
TELOMERE_WINDOWS (
ch_regions_for_extraction.filter { meta, file -> meta.direction == 0 }
ch_regions_for_extraction.filter { meta, _file -> meta.direction == 0 }
)

//
Expand Down Expand Up @@ -140,8 +163,9 @@ workflow TELO_FINDER {
)

emit:
bed_file = ch_telo_bedfiles // Channel [meta, bed]
bed_gz_tbi = TABIX_BGZIPTABIX.out.gz_index // Not used anymore
bedgraph_file = ch_telo_bedgraphs // Channel [meta, [bedfiles]] - Used in pretext_graph
telomere_summary = BIOAWK.out.output // Channel [meta, tsv]
bed_file = ch_telo_bedfiles // Channel [meta, bed]
bed_gz_tbi = TABIX_BGZIPTABIX.out.gz_index // Channel [meta, index]
bedgraph_file = ch_telo_bedgraphs // Channel [meta, [bedfiles]] - Used in pretext_graph

}
11 changes: 10 additions & 1 deletion subworkflows/sanger-tol/telo_finder/meta.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@ components:
- telomere/regions
- telomere/windows
- telomere/extract
- bioawk:
git_remote: https://github.com/nf-core/modules.git
- gawk:
git_remote: https://github.com/nf-core/modules.git
- tabix/bgziptabix:
Expand All @@ -23,8 +25,10 @@ input:
Meta is the Groovy Map containing sample information
Reference is the fasta for analysis
- ch_telomereseq:
type: string
type: file
description: |
Structure [ val(meta), path(reference) ]
Meta is the Groovy Map containing sample information
A string containing the DNA sequence of a telomere motif
- val_split_telomere:
type: boolean
Expand All @@ -35,6 +39,11 @@ input:
description: |
Control running of tabix with boolean
output:
- telomere_summary:
type: file
description: |
Structure: [ val(meta), path(tsv) ]
A tsv file summarising the telomere regions found.
- bed_file:
type: file
description: |
Expand Down
5 changes: 5 additions & 0 deletions subworkflows/sanger-tol/telo_finder/nextflow.config
Original file line number Diff line number Diff line change
@@ -1,6 +1,11 @@
nextflow.enable.moduleBinaries = true

process {

withName: BIOAWK {
ext.args = { "-c fastx \'{s = toupper($seq); copy_s = s; g = gsub(/G/, \"\", s); pct = 100*g/length(copy_s); rev = (pct < 30); out = rev ? revcomp(\$seq) : \$seq; printf \"%s\t%d\t%.2f\t%s\t%s\\n\", out, g, pct, (rev ? \"true\" : \"false\"), copy_s }\'" }
}
Comment on lines +5 to +7
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

One last question, very much optional, and which could have been prompted when you were doing the BIOAWK module before 😅 - would it be worth making the bioawk module more like the GAWK module and be able to take a program file? Then you could write this as a value channel in the subworkflow script?

Copy link
Copy Markdown
Contributor Author

@DLBPointon DLBPointon Mar 26, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I did think about that, it would definately clean it up. But chose the path of least resistance.

I don't know if it can take a file as input to be honest, I'll mock up a test and get back to you.

Edit: actually right in the help line -f progfile

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I checked the bioawk command itself, it does also have the -f option to take an AWK program file.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah looks good:

dp24@tol22-head1:[0c/80f5275761405e54eaf6864f57b83d] (telo_fix):$: bioawk -c fastx -f cli.awk telomere_motif.fasta

CCTAA	2	40.00	true	TTAGG

I'll open up the modules repo again

Copy link
Copy Markdown
Contributor Author

@DLBPointon DLBPointon Mar 26, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.


withName: TELOMERE_WINDOWS {
tag = { "${meta.id}_${meta.direction}P" }
ext.args = "99.9"
Expand Down
35 changes: 29 additions & 6 deletions subworkflows/sanger-tol/telo_finder/tests/main.nf.test
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,12 @@ nextflow_workflow {
tag "telomere/windows"
tag "telomere/extract"
tag "tabix/bgziptabix"
tag "subworkflows/../../modules/nf-core/bioawk"
tag "subworkflows/../../modules/nf-core/gawk"
tag "subworkflows/../../modules/nf-core/gunzip"
tag "subworkflows/../../modules/nf-core/tabix/bgziptabix"
tag "modules/nf-core/gunzip"
tag "modules/nf-core/bioawk"

setup {
nfcoreInitialise("${launchDir}/library/")
Expand All @@ -24,7 +26,8 @@ nextflow_workflow {
[
"gawk",
"tabix/bgziptabix",
"gunzip"
"gunzip",
"bioawk"
]
)
nfcoreLink("${launchDir}/library/", "${baseDir}/modules/")
Expand All @@ -46,14 +49,18 @@ nextflow_workflow {
test("idFanCani4 - no split - fasta w/ index") {
when {
params {
bioawk_command = "-c fastx \'{s = toupper(\$seq); copy_s = s; g = gsub(/G/, \"\", s); pct = 100*g/length(copy_s); rev = (pct > 30); out = rev ? revcomp(\$seq) : \$seq; printf \"%s\\t%d\\t%.2f\\t%s\\t%s\\n\", out, g, pct, (rev ? \"true\" : \"false\"), copy_s }\'"
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The benefit is that json could be reused in other workflows, but thinking about it, this should already be sufficient.

windows_percent = "99.9"
bgzip_args = "--csi"
}

workflow {
"""
input[0] = GUNZIP.out.gunzip
input[1] = "TTAGG"
input[1] = [
[ id: "motifs" ],
file(params.modules_testdata_base_path + 'resources/modules/telomere/generic/telomere_motif.fasta', checkIfExists: true)
]
input[2] = false
input[3] = true
"""
Expand All @@ -75,14 +82,18 @@ nextflow_workflow {
test("idFanCani4 - split - fasta w/ index") {
when {
params {
bioawk_command = "-c fastx \'{s = toupper(\$seq); copy_s = s; g = gsub(/G/, \"\", s); pct = 100*g/length(copy_s); rev = (pct > 30); out = rev ? revcomp(\$seq) : \$seq; printf \"%s\\t%d\\t%.2f\\t%s\\t%s\\n\", out, g, pct, (rev ? \"true\" : \"false\"), copy_s }\'"
windows_percent = "99.9"
bgzip_args = "--csi"
}

workflow {
"""
input[0] = GUNZIP.out.gunzip
input[1] = "TTAGG"
input[1] = [
[ id: "motifs" ],
file(params.modules_testdata_base_path + 'resources/modules/telomere/generic/telomere_motif.fasta', checkIfExists: true)
]
input[2] = true
input[3] = true
"""
Expand All @@ -104,14 +115,18 @@ nextflow_workflow {
test("idFanCani4 - no split - fasta w/o index") {
when {
params {
bioawk_command = "-c fastx \'{s = toupper(\$seq); copy_s = s; g = gsub(/G/, \"\", s); pct = 100*g/length(copy_s); rev = (pct > 30); out = rev ? revcomp(\$seq) : \$seq; printf \"%s\\t%d\\t%.2f\\t%s\\t%s\\n\", out, g, pct, (rev ? \"true\" : \"false\"), copy_s }\'"
windows_percent = "99.9"
bgzip_args = "--csi"
}

workflow {
"""
input[0] = GUNZIP.out.gunzip
input[1] = "TTAGG"
input[1] = [
[ id: "motifs" ],
file(params.modules_testdata_base_path + 'resources/modules/telomere/generic/telomere_motif.fasta', checkIfExists: true)
]
input[2] = false
input[3] = false
"""
Expand All @@ -133,14 +148,18 @@ nextflow_workflow {
test("idFanCani4 - split - fasta w/o index") {
when {
params {
bioawk_command = "-c fastx \'{s = toupper(\$seq); copy_s = s; g = gsub(/G/, \"\", s); pct = 100*g/length(copy_s); rev = (pct > 30); out = rev ? revcomp(\$seq) : \$seq; printf \"%s\\t%d\\t%.2f\\t%s\\t%s\\n\", out, g, pct, (rev ? \"true\" : \"false\"), copy_s }\'"
windows_percent = "99.9"
bgzip_args = "--csi"
}

workflow {
"""
input[0] = GUNZIP.out.gunzip
input[1] = "TTAGG"
input[1] = [
[ id: "motifs" ],
file(params.modules_testdata_base_path + 'resources/modules/telomere/generic/telomere_motif.fasta', checkIfExists: true)
]
input[2] = true
input[3] = false
"""
Expand All @@ -162,14 +181,18 @@ nextflow_workflow {
test("idFanCani4 - no split - fasta - stub w/o index") {
when {
params {
bioawk_command = "-c fastx \'{s = toupper(\$seq); copy_s = s; g = gsub(/G/, \"\", s); pct = 100*g/length(copy_s); rev = (pct > 30); out = rev ? revcomp(\$seq) : \$seq; printf \"%s\\t%d\\t%.2f\\t%s\\t%s\\n\", out, g, pct, (rev ? \"true\" : \"false\"), copy_s }\'"
windows_percent = "99.9"
bgzip_args = "--csi"
}

workflow {
"""
input[0] = GUNZIP.out.gunzip
input[1] = "TTAGG"
input[1] = [
[ id: "motifs" ],
file(params.modules_testdata_base_path + 'resources/modules/telomere/generic/telomere_motif.fasta', checkIfExists: true)
]
input[2] = false
input[3] = false
"""
Expand Down
Loading
Loading