Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
4e4c8a5
Update treeval asfile code
mahesh-panchal May 26, 2025
5a25c15
Remove null injection
mahesh-panchal May 26, 2025
a8a38b9
Move yaml package name
mahesh-panchal May 26, 2025
b8135db
Convert String paths to Path, and annotate rest
mahesh-panchal May 26, 2025
7314d90
Fix read_ch logic
mahesh-panchal May 26, 2025
2ee517b
Refactor YAML_INPUT to not separate everything in channels
mahesh-panchal May 26, 2025
57b845f
Amend Synteny wf
mahesh-panchal May 26, 2025
7a1a621
Replace Grabfiles
mahesh-panchal May 26, 2025
999b4b2
Remove redundant line
mahesh-panchal May 26, 2025
eca33c0
Fix map syntax
mahesh-panchal May 26, 2025
8cce1d1
Update gene alignment wf
mahesh-panchal May 27, 2025
28e107f
Fix object type in gene alignment wf
mahesh-panchal May 27, 2025
8e66472
Remove redundant line
mahesh-panchal May 27, 2025
d85a139
Update reading in cram and fastq
mahesh-panchal May 28, 2025
a1dd22d
Remove additional workflows from rebase
mahesh-panchal May 28, 2025
9c91666
Convert string to channel
mahesh-panchal May 28, 2025
c61f12a
Rename channels
mahesh-panchal May 28, 2025
bb5af0a
Revert rebase variable name change
mahesh-panchal May 28, 2025
8aab202
Add TODO for warning
mahesh-panchal May 28, 2025
49bf4c9
Revert changes from rebase
mahesh-panchal May 28, 2025
e88b13b
Merge branch 'dp24_specified_reads' into nbis_patches
DLBPointon May 28, 2025
8736b26
Fix variable spelling
mahesh-panchal Jun 2, 2025
9dee005
Move genome mode parameter
mahesh-panchal Jun 2, 2025
7bd6ab1
Propogate busco full table
mahesh-panchal Jun 2, 2025
dc949ca
Fix PRETEXT_INGEST_HIRES inputs
mahesh-panchal Jun 2, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion conf/modules.config
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ process {
}

withName: 'GAWK_RENAME_IDS' {
ext.args2 = "'{ gsub(/\\./, \"0\"); print}'"
ext.args2 = "'{ gsub(/\\./, \"0\"); print}'"
ext.prefix = { "${meta.id}_renamed" }
ext.suffix = 'bed'
}
Expand Down
2 changes: 1 addition & 1 deletion modules/local/extract/ancestral/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ process EXTRACT_ANCESTRAL {
def prefix = task.ext.prefix ?: "${meta.id}"
"""
touch ${prefix}_buscopainter_complete_location.tsv
touch ${prefox}_buscopainter_duplicated_location.tsv
touch ${prefix}_buscopainter_duplicated_location.tsv
touch ${prefix}_summary.tsv

cat <<-END_VERSIONS > versions.yml
Expand Down
31 changes: 3 additions & 28 deletions subworkflows/local/ancestral_gene/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -10,40 +10,29 @@ include { UCSC_BEDTOBIGBED } from '../../../modules/nf-core/ucsc/bedtobigbed/m

workflow ANCESTRAL_GENE {
take:
busco_dir // Channel: tuple [val(meta),/path/to/busco/output/dir]
busco_full_table // Channel: tuple [val(meta),/path/to/busco/output/**/fulltable.tsv ]
dot_genome // Channel: tuple [val(meta), [ datafile ]]
buscogene_as // Channel: val(dot_as location)
ancestral_table // Channel: val(ancestral_table location)

main:
ch_versions = Channel.empty()

ch_grab = GrabFiles(busco_dir)

//
// MODULE: EXTRACTS ANCESTRALLY LINKED BUSCO GENES FROM FULL TABLE
//
EXTRACT_ANCESTRAL(
ch_grab,
busco_full_table,
ancestral_table
)
ch_versions = ch_versions.mix(EXTRACT_ANCESTRAL.out.versions)

//
// LOGIC: STRIP OUT METADATA
//
ch_grab
.map { meta, fulltable
-> fulltable
}
.set { assignanc_input }

//
// MODULE: ASSIGN EXTRACTED GENES TO ANCESTRAL GROUPS
//
ASSIGN_ANCESTRAL(
EXTRACT_ANCESTRAL.out.comp_location,
assignanc_input
busco_full_table.map { _meta, fulltable -> fulltable }
)
ch_versions = ch_versions.mix(EXTRACT_ANCESTRAL.out.versions)

Expand All @@ -70,17 +59,3 @@ workflow ANCESTRAL_GENE {
ch_ancestral_bigbed = UCSC_BEDTOBIGBED.out.bigbed
versions = ch_versions
}
process GrabFiles {
label 'process_tiny'

tag "${meta.id}"
executor 'local'

input:
tuple val(meta), path("in")

output:
tuple val(meta), path("in/*/*/full_table.tsv")

"true"
}
59 changes: 15 additions & 44 deletions subworkflows/local/busco_annotation/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -30,30 +30,26 @@ workflow BUSCO_ANNOTATION {
main:
ch_versions = Channel.empty()

// COMMENT: Set BUSCO mode to 'genome'
ch_busco_mode = Channel.of( "genome" )


//
// MODULE: RUN BUSCO TO OBTAIN FULL_TABLE.CSV
// EMITS FULL_TABLE.CSV
//
BUSCO_BUSCO (
reference_tuple,
ch_busco_mode,
"genome",
lineageinfo,
lineagespath,
[]
)
ch_versions = ch_versions.mix(BUSCO_BUSCO.out.versions.first())
ch_grab = GrabFiles(BUSCO_BUSCO.out.busco_dir)
ch_versions = ch_versions.mix(BUSCO_BUSCO.out.versions.first())
ch_busco_full_table = BUSCO_BUSCO.out.busco_dir.map { meta, dir -> tuple(meta, files(dir.resolve("*/*/full_table.tsv"), checkIfExists: true)) }


//
// MODULE: EXTRACT THE BUSCO GENES FOUND IN REFERENCE
//
GAWK_EXTRACT_BUSCOGENE (
ch_grab,
ch_busco_full_table,
file("${projectDir}/bin/get_busco_gene.awk"),
false
)
Expand Down Expand Up @@ -92,33 +88,22 @@ workflow BUSCO_ANNOTATION {
ch_versions = ch_versions.mix( UCSC_BEDTOBIGBED.out.versions )

//
// LOGIC: AGGREGATE DATA AND SORT BRANCH ON CLASS
// SUBWORKFLOW: RUN ANCESTRAL BUSCO ID (ONLY AVAILABLE FOR LEPIDOPTERA)
// LOGIC: AGGREGATE DATA AND FILTER ON CLASS
//
lineageinfo
.combine(BUSCO_BUSCO.out.busco_dir)
.combine(ch_busco_full_table)
.combine(ancestral_table)
.branch {
lep: it[0].split('_')[0] == "lepidoptera"
general: it[0].split('_')[0] != "lepidoptera"
.filter { lineage, _meta, _btable, _atable ->
lineage.split('_')[0] == "lepidoptera"
}
.set{ ch_busco_data }

//
// LOGIC: BUILD NEW INPUT CHANNEL FOR ANCESTRAL ID
//
ch_busco_data
.lep
.multiMap { lineage, meta, busco_dir, ancestral_table ->
busco_dir: tuple( meta, busco_dir )
atable: ancestral_table
}
.set{ ch_busco_lep_data }

//
// SUBWORKFLOW: RUN ANCESTRAL BUSCO ID (ONLY AVAILABLE FOR LEPIDOPTERA)
//
.multiMap { _lineage, meta, busco_full_table, ancestral_table_ ->
busco_table: tuple( meta, busco_full_table )
atable: ancestral_table_
}
.set{ ch_busco_lep_data }
ANCESTRAL_GENE (
ch_busco_lep_data.busco_dir,
ch_busco_lep_data.busco_table,
dot_genome,
buscogene_as,
ch_busco_lep_data.atable
Expand All @@ -131,17 +116,3 @@ workflow BUSCO_ANNOTATION {
versions = ch_versions

}
process GrabFiles {
label 'process_tiny'

tag "${meta.id}"
executor 'local'

input:
tuple val(meta), path("in")

output:
tuple val(meta), path("in/*/*/full_table.tsv")

"true"
}
27 changes: 14 additions & 13 deletions subworkflows/local/gene_alignment/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ workflow GENE_ALIGNMENT {
dot_genome // Channel: [ val(meta), path(file) ]
reference_tuple // Channel: [ val(meta), path(file) ]
reference_index // Channel: [ val(meta), path(file) ]
alignment_genesets // Channel: val(geneset_id)
alignment_genesets // Channel: [ path(geneset_csv) ]
intron_size // Channel: val(50k)
as_files // Channel: [ val(meta), path(file) ]

Expand All @@ -37,7 +37,7 @@ workflow GENE_ALIGNMENT {
// LIST IS MERGED WITH DATA_DIRECTORY AND ORGANISM_CLASS
//
ch_data = alignment_genesets
.splitCsv()
// .splitCsv()
.flatten()

//
Expand All @@ -48,18 +48,19 @@ workflow GENE_ALIGNMENT {
// SUBWORKFLOW
//
ch_data
.map {
geneset_path ->
file(geneset_path)
}
// .map {
// geneset_path ->
// file(geneset_path)
// }
.splitCsv( header: true, sep:',')
.map( row ->
tuple([ org: row.org,
type: row.type,
id: row.data_file.split('/')[-1].split('.MOD.')[0]
],
file(row.data_file)
))
.map{ row ->
def data_file = file(row.data_file, checkIfExists: true)
tuple([ org: row.org,
type: row.type,
id: data_file.name.split('.MOD.').first()
],
data_file
)}
.branch {
pep: it[0].type == 'pep'
gen: it[0].type == 'cdna'
Expand Down
16 changes: 8 additions & 8 deletions subworkflows/local/hic_mapping/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -170,10 +170,10 @@ workflow HIC_MAPPING {
//
PRETEXT_INGEST_SNDRD (
PRETEXTMAP_STANDRD.out.pretext,
gap_file.map{it -> it[1]},
coverage_file.map{it -> it[1]},
telo_file.map{it -> it[1]},
repeat_density_file.map{it -> it[1]}
gap_file.map{ _meta, gapfile -> gapfile },
coverage_file.map{ _meta, covfile -> covfile },
telo_file.map{ _meta, telofile -> telofile },
repeat_density_file.map{ _meta, rdfile -> rdfile }
)
ch_versions = ch_versions.mix( PRETEXT_INGEST_SNDRD.out.versions )

Expand All @@ -199,10 +199,10 @@ workflow HIC_MAPPING {

PRETEXT_INGEST_HIRES (
PRETEXTMAP_HIGHRES.out.pretext,
gap_file,
coverage_file,
telo_file,
repeat_density_file
gap_file.map{ _meta, gapfile -> gapfile },
coverage_file.map{ _meta, covfile -> covfile },
telo_file.map{ _meta, telofile -> telofile },
repeat_density_file.map{ _meta, rdfile -> rdfile }
)
ch_versions = ch_versions.mix( PRETEXT_INGEST_HIRES.out.versions )
hires_pretext = PRETEXT_INGEST_HIRES.out.pretext
Expand Down
1 change: 0 additions & 1 deletion subworkflows/local/kmer/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,6 @@ workflow KMER {
}
.set { get_reads_input }


//
// MODULE: JOIN PACBIO READ
//
Expand Down
12 changes: 6 additions & 6 deletions subworkflows/local/synteny/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -8,26 +8,26 @@ include { MINIMAP2_ALIGN } from '../../../modules/nf-core/minimap2/align/
workflow SYNTENY {
take:
reference_tuple // Channel: tuple [ val(meta), path(file) ]
synteny_paths // Channel: val(meta)
synteny_paths // Channel: List [ path(file) ]

main:
ch_versions = Channel.empty()

ch_data = synteny_paths
.splitCsv()
// .splitCsv()
.flatten()

//
// LOGIC: PULL SYNTENIC GENOMES FROM DIRECTORY STRUCTURE
// AND PARSE INTO CHANNEL PER GENOME
//
ch_data
.map{synteny_path ->
file(synteny_path)
}
// .map{synteny_path ->
// file(synteny_path)
// }
.combine(reference_tuple)
.multiMap{syntenic_ref, meta, ref ->
syntenic_tuple : tuple([ id: syntenic_ref.toString().split('/')[-1].split(/\.fa(sta)?/)[0],
syntenic_tuple : tuple([ id: syntenic_ref.baseName,
class: meta.class,
project_type: meta.project_type
],
Expand Down
Loading