Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
31 commits
Select commit Hold shift + click to select a range
f2404e7
feat: Rename workflow to `NOVEL_ALLELES_AND_GENOTYPE` and integrate `…
ayeletperes Jan 26, 2026
becd48c
Update novel allele and genotype workflow
ayeletperes Jan 29, 2026
20117d7
Merge remote-tracking branch 'ggabernet/novel-allele-and-genotype' in…
ayeletperes Jan 29, 2026
28a6286
feat: Pass report logo to novel allele subworkflow and update segment…
ayeletperes Jan 29, 2026
f2a900d
fix: specify explicit relative path for CLONAL_ANALYSIS module include
ayeletperes Jan 29, 2026
0fb2457
refactor: rename `NOVEL_ALLELES_AND_GENOTYPE` to `NOVEL_ALLELES_AND_G…
ayeletperes Jan 29, 2026
fa816db
removed duplicated line
ayeletperes Jan 29, 2026
c6d3b48
feat: Conditionally execute novel allele inference and reassignment s…
ayeletperes Jan 29, 2026
dfdde96
feat: pass report logo image to NOVEL_ALLELES_AND_GENOTYPING process
ayeletperes Jan 29, 2026
a3bf901
Update schema and novel allele workflow parameters
ayeletperes Jan 29, 2026
091b514
update schema
ayeletperes Jan 29, 2026
2302fdb
update config for genotype
ayeletperes Jan 29, 2026
d14fb1c
refactor: Replace `NOVEL_ALLELES_AND_GENOTYPING` with `NOVEL_ALLELES_…
ayeletperes Jan 29, 2026
ff8f17a
Fix: Correctly trigger novel allele and genotype inference when genot…
ayeletperes Jan 29, 2026
e0adaa3
refactor: Rename `NOVEL_ALLELES_AND_GENOTYPE` process to `NOVEL_ALLEL…
ayeletperes Jan 29, 2026
d777122
feat: Pass validated samplesheet as input to the NOVEL_ALLELES_AND_GE…
ayeletperes Jan 29, 2026
2a077a7
Remove the `reference` output from `reassign_alleles` and correct the…
ayeletperes Jan 29, 2026
01db128
refactor: Simplify novel allele and genotyping subworkflow by removin…
ayeletperes Jan 29, 2026
2b6a7bb
Remove the `force=FALSE` parameter from enchantr module calls.
ayeletperes Jan 29, 2026
a849cb3
fix: Update enchantr species parameter from 'auto' to 'human' in nove…
ayeletperes Jan 30, 2026
8bc03f6
refactor: Update `enchantr` module output paths for `db_genotype` and…
ayeletperes Jan 30, 2026
7fae5a7
refactor: Update `reassign_alleles` module output to `tab` with metad…
ayeletperes Jan 30, 2026
ba1e2ab
fix: Update `reassign_alleles` output file pattern from `_reassigned.…
ayeletperes Jan 30, 2026
85dc8be
feat: Introduce `single_clone_representative` parameter to optionally…
ayeletperes Jan 30, 2026
2d53838
fix: Update segment arguments from string to list
ayeletperes Jan 30, 2026
a97f558
feat: Update `novel_allele_inference` and `bayesian_genotype_inferenc…
ayeletperes Jan 30, 2026
543b9f9
refactor: Update `db_genotype` output path to `*_report/references/*/…
ayeletperes Jan 30, 2026
1f994c5
feat: integrate clonal assignment with new `genotype_clone_threshold`…
ayeletperes Jan 30, 2026
7de1e4f
refactor: wrap `genotype_clone_threshold` in a list when calling `CLO…
ayeletperes Jan 30, 2026
630ee15
refactor: remove commented-out CLONAL_ANALYSIS block
ayeletperes Jan 30, 2026
8135df1
feat: Refactor novel allele and genotyping subworkflow to pass refere…
ayeletperes Feb 5, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions conf/test_genotyping.config
Original file line number Diff line number Diff line change
Expand Up @@ -27,4 +27,7 @@ params {

// Genotyping
genotyping = true
single_clone_representative = true
// Skip clonal analysis
skip_clonal_analysis = true
}
15 changes: 4 additions & 11 deletions modules/local/enchantr/bayesian_genotype_inference.nf
Original file line number Diff line number Diff line change
Expand Up @@ -27,32 +27,25 @@ process BAYESIAN_GENOTYPE_INFERENCE {
container "docker.io/immcantation/airrflow:genotyping"

input:
tuple val(meta), path(tabs) // meta, sequence tsv in AIRR format
path reference_fasta
path repertoires_samplesheet
tuple val(meta), path(tabs), path(reference_fasta) // meta, sequence tsv in AIRR format

output:
path("*/*/db_genotype"), emit: reference // reference folder
tuple val(meta), path("*_report/references/*/db_genotype"), emit: reference // reference folder
path("*/*_command_log.txt"), emit: logs //process logs
path "*_report"
path "versions.yml", emit: versions


script:
def args = task.ext.args ? asString(task.ext.args) : ''
def input = ""
if (repertoires_samplesheet) {
input = repertoires_samplesheet
} else {
input = tabs.join(',')
}
def input = tabs.join(',')
"""
Rscript -e "enchantr::enchantr_report('tigger_bayesian_genotype', \\
report_params=list('input'='${input}', \\
'imgt_db'='${reference_fasta}', \\
'species'='auto', \\
'genotypeby'='${params.genotypeby}', \\
'force'=FALSE, \\
'single_clone_representative'='${params.single_clone_representative}', \\
'outdir'=getwd(), \\
'log'='${meta.id}_bayesian_genotype_inference_command_log' ${args}))"

Expand Down
14 changes: 3 additions & 11 deletions modules/local/enchantr/novel_allele_inference.nf
Original file line number Diff line number Diff line change
Expand Up @@ -27,31 +27,23 @@ process NOVEL_ALLELE_INFERENCE {
container "docker.io/immcantation/airrflow:genotyping"

input:
tuple val(meta), path(tabs) // meta, sequence tsv in AIRR format
path reference_fasta
path repertoires_samplesheet
tuple val(meta), path(tabs), path(reference_fasta) // meta, sequence tsv in AIRR format, reference fasta

output:
path("*/*/db_novel"), emit: reference // reference folder
tuple val(meta), path("*_report/db_novel"), emit: reference // reference folder
path("*/*_command_log.txt"), emit: logs //process logs
path "*_report", optional: true, emit: report
path "versions.yml", emit: versions


script:
def args = task.ext.args ? asString(task.ext.args) : ''
def input = ""
if (repertoires_samplesheet) {
input = repertoires_samplesheet
} else {
input = tabs.join(',')
}
def input = tabs.join(',')
"""
Rscript -e "enchantr::enchantr_report('novel_allele_inference', \\
report_params=list('input'='${input}', \\
'imgt_db'='${reference_fasta}', \\
'species'='auto', \\
'force'=FALSE, \\
'outdir'=getwd(), \\
'nproc'=${task.cpus}, \\
'log'='${meta.id}_novel_allele_inference_command_log' ${args}))"
Expand Down
22 changes: 8 additions & 14 deletions modules/local/enchantr/reassign_alleles.nf
Original file line number Diff line number Diff line change
Expand Up @@ -27,15 +27,14 @@ process REASSIGN_ALLELES {
container "docker.io/immcantation/airrflow:genotyping"

input:
tuple val(meta), path(tabs) // meta, sequence tsv in AIRR format
path reference_fasta
path repertoires_samplesheet
tuple val(meta), path(tabs), path(reference_fasta) // meta, sequence tsv in AIRR format, reference fasta
val segments // which segments to reassign alleles to
val outputby // which field to use for output
//TODO: did we want to handle all segments at once? Then this val channel would not be needed.

// *After novel alleles we just need to change the V, it's a time waste to go over all segments.
//TODO: Check if we need the outputby parameter. Right now this is the same as the genotypeby parameter.
output:
path("*/*/db_genotype"), emit: reference // reference folder
path("*/*_reassigned.tsv"), emit: repertoires // reassigned repertoire
tuple val(meta), path("*/*/*reassign-pass.tsv"), emit: tab // reassigned repertoire
path("*/*_command_log.txt"), emit: logs //process logs
path "*_report"
path "versions.yml", emit: versions
Expand All @@ -44,20 +43,15 @@ process REASSIGN_ALLELES {
script:
def args = task.ext.args ? asString(task.ext.args) : ''
def segs = segments.join(",")
def input = ""
if (repertoires_samplesheet) {
input = repertoires_samplesheet
} else {
input = tabs.join(',')
}
def input = tabs.join(',')

"""
Rscript -e "enchantr::enchantr_report('reassign_alleles', \\
report_params=list('input'='${input}', \\
'imgt_db'='${reference_fasta}', \\
'species'='auto', \\
'outputby'='${params.outputby}', \\
'outputby'='${outputby}', \\
'segments'='${segs}', \\
'force'=FALSE, \\
'outdir'=getwd(), \\
'log'='${meta.id}_reassign_alleles_command_log' ${args}))"

Expand Down
6 changes: 4 additions & 2 deletions nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ params {
remove_chimeric = false
detect_contamination = false
collapseby = 'sample_id'

// -----------------------
// clonal analysis options
// -----------------------
Expand All @@ -121,8 +121,10 @@ params {
// -----------------------
genotyping = false
genotypeby = 'subject_id'
reassignby = 'sample_id'
novel_allele_inference = true

genotype_clone_threshold = '0.2'
single_clone_representative = true
// -----------------------
// translate embed options
// -----------------------
Expand Down
39 changes: 39 additions & 0 deletions nextflow_schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -553,6 +553,42 @@
"help_text": "By default, the pipeline will define clones for each of the samples, as two sequences having the same V-gene assignment, C-gene assignment, J-gene assignment, and junction length. Additionally, the similarity of the CDR3 sequences will be assessed by Hamming distances. \n\nA distance threshold for determining if two sequences come from the same clone or not is automatically determined by the process find threshold. Alternatively, a hamming distance threshold can be manually set by setting the `--clonal_threshold` parameter.",
"fa_icon": "fab fa-pagelines"
},
"genotyping_and_novel_alleles_options": {
"title": "Genotyping and Novel Alleles options",
"type": "object",
"description": "Options for genotyping and novel allele inference.",
"default": "",
"properties": {
"genotyping": {
"type": "boolean",
"description": "Perform TIgGER genotype inference.",
"fa_icon": "fas fa-dna"
},
"genotypeby": {
"type": "string",
"default": "subject_id",
"description": "Name of the field used to group data files to infer genotype.",
"fa_icon": "fab fa-pagelines"
},
"genotype_clone_threshold": {
"type": "string",
"default": "0.2",
"description": "Threshold for determining if two sequences come from the same clone or not.",
"fa_icon": "fas fa-dna"
},
"single_clone_representative": {
"type": "boolean",
"description": "Perform TIgGER novel allele inference.",
"fa_icon": "fas fa-dna"
},
"novel_allele_inference": {
"type": "boolean",
"description": "Perform TIgGER novel allele inference.",
"fa_icon": "fas fa-dna"
}
},
"fa_icon": "fas fa-dna"
},
"translation_and_embedding_options": {
"title": "Translation and embedding options",
"type": "object",
Expand Down Expand Up @@ -847,6 +883,9 @@
{
"$ref": "#/$defs/clonal_analysis_options"
},
{
"$ref": "#/$defs/genotyping_and_novel_alleles_options"
},
{
"$ref": "#/$defs/translation_and_embedding_options"
},
Expand Down
125 changes: 84 additions & 41 deletions subworkflows/local/novel_alleles_and_genotyping.nf
Original file line number Diff line number Diff line change
@@ -1,68 +1,112 @@
include { NOVEL_ALLELE_INFERENCE } from '../../modules/local/enchantr/novel_allele_inference'
include { BAYESIAN_GENOTYPE_INFERENCE } from '../../modules/local/enchantr/bayesian_genotype_inference'
include { REASSIGN_ALLELES as REASSIGN_ALLELES_NOVEL; REASSIGN_ALLELES as REASSIGN_ALLELES_GENOTYPE} from '../../modules/local/enchantr/reassign_alleles'

include { CLONAL_ANALYSIS } from './clonal_analysis.nf'
include { CLONAL_ASSIGNMENT as CLONAL_ASSIGNMENT_COMPUTE } from '../../modules/local/enchantr/clonal_assignment'

workflow NOVEL_ALLELES_AND_GENOTYPING {
take:
ch_repertoire
ch_reference_fasta
ch_validated_samplesheet
ch_logo

main:
ch_versions = Channel.empty()
ch_logs = Channel.empty()

def outputby = params.genotypeby=="sample_id" ? "id" : params.genotypeby //TODO: we need to change this so we can handle the cases of inferring based on naive and reassigning all
// merge all repertoires by genotypeby metadata field
ch_repertoire.map{ it -> [ it[0]."${params.genotypeby}",
it[0].id,
it[0].subject_id,
it[0].species,
it[0].single_cell,
it[0].locus,
it[1] ] }
ch_repertoire
.combine(ch_reference_fasta)
.map{ it ->
def meta = it[0]
def rep = it[1]
def ref = it[2]
def genotypeby = params.genotypeby=="sample_id" ? "id" : params.genotypeby
[ meta."${genotypeby}",
meta.id,
meta.subject_id,
meta.species,
meta.single_cell,
meta.locus,
rep,
ref ] }
.groupTuple()
.map{ get_meta_tabs(it) }
.set{ ch_grouped_repertoires }

//TODO: conditional on params.novel_allele_inference
// infer novel alleles
NOVEL_ALLELE_INFERENCE (
ch_grouped_repertoires,
ch_reference_fasta,
ch_validated_samplesheet.collect()
)

// reassign novel alleles (we can skip this step if no novel alleles were inferred)

REASSIGN_ALLELES_NOVEL (
ch_grouped_repertoires,
NOVEL_ALLELE_INFERENCE.out.reference,
ch_validated_samplesheet.collect(),
"segments" //TODO: update this to pass actual segments.
)


// infer genotype (gets the reference from novel alleles inference in any case)

if (params.novel_allele_inference) {
NOVEL_ALLELE_INFERENCE (
ch_grouped_repertoires
)

// reassign novel alleles (we can skip this step if no novel alleles were inferred)
ch_grouped_repertoires
.join(NOVEL_ALLELE_INFERENCE.out.reference)
.map { it ->
def meta = it[0]
def reps = it[1]
def new_ref = it[3]
[ meta, reps, new_ref ]
}
.set{ ch_for_genotyping }

REASSIGN_ALLELES_NOVEL (
ch_for_genotyping,
["v"],
outputby
)

REASSIGN_ALLELES_NOVEL.out.tab
.join(NOVEL_ALLELE_INFERENCE.out.reference)
.set{ ch_for_genotyping }


} else {
ch_for_genotyping = ch_grouped_repertoires
}

if (params.single_clone_representative) {
// TODO: Check if we need the cloneby parameter, or here it can be the same as genotypeby.
// create separate channels for repertoire and reference based on the genotypeby metadata field
ch_for_genotyping
.map{ it -> [it[0], it[1]] }
.set{ ch_for_genotyping_rep }
ch_for_genotyping
.map{ it -> it[2] }
.set{ ch_for_genotyping_ref }
CLONAL_ASSIGNMENT_COMPUTE(
ch_for_genotyping_rep,
[params.genotype_clone_threshold],
ch_for_genotyping_ref,
[]
)
CLONAL_ASSIGNMENT_COMPUTE.out.tab
.join(ch_for_genotyping
.map{ it -> [it[0], it[2]] })
.set{ ch_for_genotyping }
}

// infer genotype
BAYESIAN_GENOTYPE_INFERENCE (
REASSIGN_ALLELES_NOVEL.out.repertoires,
NOVEL_ALLELE_INFERENCE.out.reference,
ch_validated_samplesheet.collect()
ch_for_genotyping
)

ch_grouped_repertoires
.map{ it -> [it[0], it[1]] }
.join(BAYESIAN_GENOTYPE_INFERENCE.out.reference)
.set{ ch_for_reassign }

// reassign genotypes (gets the reference from genotype inference in any case)

// reassign genotypes
REASSIGN_ALLELES_GENOTYPE (
REASSIGN_ALLELES_NOVEL.out.repertoires,
BAYESIAN_GENOTYPE_INFERENCE.out.reference,
ch_validated_samplesheet.collect(),
"segments" //TODO: update this to pass actual segments.
ch_for_reassign,
["auto"],
outputby
)


emit:
repertoire = ch_repertoire
repertoire = REASSIGN_ALLELES_GENOTYPE.out.tab
versions = ch_versions
logs = ch_logs
}
Expand All @@ -79,7 +123,6 @@ def get_meta_tabs(arr) {

def array = []

array = [ meta, arr[6].flatten() ]

array = [ meta, arr[6].flatten(), arr[7][0] ]
return array
}
14 changes: 8 additions & 6 deletions workflows/airrflow.nf
Original file line number Diff line number Diff line change
Expand Up @@ -247,19 +247,21 @@ workflow AIRRFLOW {
ch_repertoires_after_qc = ch_bulk_filtered
.mix(SINGLE_CELL_QC_AND_FILTERING.out.repertoires)

// Novel allele inference and genotyping
// TODO: for now clonal analysis and genotyping are independent,
// but once genotyping is implemented the personalized reference should be used for clonal analysis
// when genotyping is performed.

// Novel alleles and genotype inference
if (params.genotyping) {
NOVEL_ALLELES_AND_GENOTYPING(
ch_repertoires_after_qc,
VDJ_ANNOTATION.out.reference_fasta.collect(),
ch_validated_samplesheet.collect()
ch_validated_samplesheet.collect(),
ch_report_logo_img.collect().ifEmpty([])
)
ch_versions = ch_versions.mix( NOVEL_ALLELES_AND_GENOTYPING.out.versions )
}

// TODO: for now clonal analysis and genotyping are independent,
// but once genotyping is implemented the personalized reference should be used for clonal analysis
// when genotyping is performed.

// Clonal analysis
if (!params.skip_clonal_analysis) {
CLONAL_ANALYSIS(
Expand Down
Loading