From f2404e7e9ab7663dac0a7c2b54655f2eb0d9291f Mon Sep 17 00:00:00 2001 From: ayelet peres Date: Mon, 26 Jan 2026 09:57:15 -0500 Subject: [PATCH 01/30] feat: Rename workflow to `NOVEL_ALLELES_AND_GENOTYPE` and integrate `CLONAL_ANALYSIS` subworkflow, feeding its repertoire output into genotype inference. --- subworkflows/local/novel_alleles_and_genotype.nf | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/subworkflows/local/novel_alleles_and_genotype.nf b/subworkflows/local/novel_alleles_and_genotype.nf index 1c152f90..7b049400 100644 --- a/subworkflows/local/novel_alleles_and_genotype.nf +++ b/subworkflows/local/novel_alleles_and_genotype.nf @@ -1,9 +1,9 @@ include { NOVEL_ALLELE_INFERENCE } from '../../modules/local/enchantr/novel_allele_inference' include { BAYESIAN_GENOTYPE_INFERENCE } from '../../modules/local/enchantr/bayesian_genotype_inference' include { REASSIGN_ALLELES as REASSIGN_ALLELES_NOVEL; REASSIGN_ALLELES as REASSIGN_ALLELES_GENOTYPE} from '../../modules/local/enchantr/reassign_alleles' +include { CLONAL_ANALYSIS } from 'clonal_analysis.nf' - -workflow CLONAL_ANALYSIS { +workflow NOVEL_ALLELES_AND_GENOTYPE { take: ch_repertoire ch_reference_fasta @@ -27,11 +27,19 @@ workflow CLONAL_ANALYSIS { NOVEL_ALLELE_INFERENCE.out.reference ) + // infer clones (gets the reference from novel alleles inference in any case) + + CLONAL_ANALYSIS( + REASSIGN_ALLELES_NOVEL.out.repertoire, + NOVEL_ALLELE_INFERENCE.out.reference, + ch_logo.collect().ifEmpty([]) + ) + ch_versions = ch_versions.mix( CLONAL_ANALYSIS.out.versions) // infer genotype (gets the reference from novel alleles inference in any case) BAYESIAN_GENOTYPE_INFERENCE ( - REASSIGN_ALLELES_NOVEL.out.repertoire, + CLONAL_ANALYSIS.out.repertoire, NOVEL_ALLELE_INFERENCE.out.reference ) From becd48c83c514011dbbcd02d1f6874cf2f156574 Mon Sep 17 00:00:00 2001 From: ayelet peres Date: Thu, 29 Jan 2026 15:41:33 -0500 Subject: [PATCH 02/30] Update novel allele and genotype workflow --- conf/test_novel_allele_and_genotype.config | 41 +++++++++++++++++++ .../enchantr/bayesian_genotype_inference.nf | 2 +- .../local/enchantr/novel_allele_inference.nf | 2 +- modules/local/enchantr/reassign_alleles.nf | 2 +- nextflow.config | 7 ++++ workflows/airrflow.nf | 10 +++++ 6 files changed, 61 insertions(+), 3 deletions(-) create mode 100644 conf/test_novel_allele_and_genotype.config diff --git a/conf/test_novel_allele_and_genotype.config b/conf/test_novel_allele_and_genotype.config new file mode 100644 index 00000000..2a57ae6f --- /dev/null +++ b/conf/test_novel_allele_and_genotype.config @@ -0,0 +1,41 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Nextflow config file for running minimal tests +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Defines input files and everything required to run a fast and simple pipeline test. + + Use as follows: + nextflow run nf-core/airrflow -profile test_embeddings, --outdir + +---------------------------------------------------------------------------------------- +*/ + +process { + resourceLimits = [ + cpus: 4, + memory: '15.GB', + time: '1.h' + ] +} + +params { + config_profile_name = 'Test novel allele and genotype inference' + config_profile_description = 'Minimal test dataset to test novel allele and genotype inference' + + // Input data + mode = 'assembled' + input = pipelines_testdata_base_path + 'testdata-reveal/test_assembled_metadata_tiny.tsv' + reference_fasta = pipelines_testdata_base_path + 'database-cache/imgtdb_base.zip' + reference_igblast = pipelines_testdata_base_path + 'database-cache/igblast_base.zip' + + reassign = true + productive_only = true + lineage_trees = false + skip_report_threshold = true + skip_all_clones_report = true + skip_clonal_analysis = true + skip_report = true +} + +process{ +} diff --git a/modules/local/enchantr/bayesian_genotype_inference.nf b/modules/local/enchantr/bayesian_genotype_inference.nf index 29526d8a..7756979b 100644 --- a/modules/local/enchantr/bayesian_genotype_inference.nf +++ b/modules/local/enchantr/bayesian_genotype_inference.nf @@ -24,7 +24,7 @@ process BAYESIAN_GENOTYPE_INFERENCE { if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { error "nf-core/airrflow currently does not support Conda. Please use a container profile instead." } - container "docker.io/immcantation/airrflow:4.2.0" + container "docker.io/immcantation/airrflow:4.4.0" input: tuple val(meta), path(tabs) // meta, sequence tsv in AIRR format diff --git a/modules/local/enchantr/novel_allele_inference.nf b/modules/local/enchantr/novel_allele_inference.nf index 6d1f95e8..3cfcd150 100644 --- a/modules/local/enchantr/novel_allele_inference.nf +++ b/modules/local/enchantr/novel_allele_inference.nf @@ -24,7 +24,7 @@ process NOVEL_ALLELE_INFERENCE { if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { error "nf-core/airrflow currently does not support Conda. Please use a container profile instead." } - container "docker.io/immcantation/airrflow:4.2.0" + container "docker.io/immcantation/airrflow:4.4.0" input: tuple val(meta), path(tabs) // meta, sequence tsv in AIRR format diff --git a/modules/local/enchantr/reassign_alleles.nf b/modules/local/enchantr/reassign_alleles.nf index df7135e1..f81808a4 100644 --- a/modules/local/enchantr/reassign_alleles.nf +++ b/modules/local/enchantr/reassign_alleles.nf @@ -24,7 +24,7 @@ process REASSIGN_ALLELES { if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { error "nf-core/airrflow currently does not support Conda. Please use a container profile instead." } - container "docker.io/immcantation/airrflow:4.2.0" + container "docker.io/immcantation/airrflow:4.4.0" input: tuple val(meta), path(tabs) // meta, sequence tsv in AIRR format diff --git a/nextflow.config b/nextflow.config index 2f4f624d..ce001174 100644 --- a/nextflow.config +++ b/nextflow.config @@ -98,6 +98,13 @@ params { detect_contamination = false collapseby = 'sample_id' + // ----------------------- + // novel alleles and genotype inference options + // ----------------------- + skip_novel_alleles_and_genotype = false + genotypeby = 'subject_id' + outputby = 'subject_id' + // ----------------------- // clonal analysis options // ----------------------- diff --git a/workflows/airrflow.nf b/workflows/airrflow.nf index 727f15ea..71fb2cb4 100644 --- a/workflows/airrflow.nf +++ b/workflows/airrflow.nf @@ -45,6 +45,7 @@ include { SC_RAW_INPUT } from '../subworkflows/local/sc_raw_inp include { FASTQ_INPUT_CHECK } from '../subworkflows/local/fastq_input_check' include { RNASEQ_INPUT } from '../subworkflows/local/rnaseq_input' include { TRANSLATE_EMBED } from '../subworkflows/local/translate_embed' +include { NOVEL_ALLELES_AND_GENOTYPE } from '../subworkflows/local/novel_alleles_and_genotype' /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -249,6 +250,15 @@ workflow AIRRFLOW { .mix(SINGLE_CELL_QC_AND_FILTERING.out.repertoires) .dump(tag: 'sc bulk mix') + // Novel alleles and genotype inference + if (!params.skip_novel_alleles_and_genotype) { + NOVEL_ALLELES_AND_GENOTYPE( + ch_repertoires_after_qc, + VDJ_ANNOTATION.out.reference_fasta.collect() + ) + ch_versions = ch_versions.mix( NOVEL_ALLELES_AND_GENOTYPE.out.versions ) + } + // Clonal analysis if (!params.skip_clonal_analysis) { CLONAL_ANALYSIS( From 28a6286e70c070d6c65965121a3989e8c7e9c898 Mon Sep 17 00:00:00 2001 From: ayelet peres Date: Thu, 29 Jan 2026 17:03:52 -0500 Subject: [PATCH 03/30] feat: Pass report logo to novel allele subworkflow and update segment handling arguments. --- subworkflows/local/novel_alleles_and_genotyping.nf | 5 +++-- workflows/airrflow.nf | 3 ++- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/subworkflows/local/novel_alleles_and_genotyping.nf b/subworkflows/local/novel_alleles_and_genotyping.nf index e16a5c7c..c8906b99 100644 --- a/subworkflows/local/novel_alleles_and_genotyping.nf +++ b/subworkflows/local/novel_alleles_and_genotyping.nf @@ -8,6 +8,7 @@ workflow NOVEL_ALLELES_AND_GENOTYPING { ch_repertoire ch_reference_fasta ch_validated_samplesheet + ch_logo main: ch_versions = Channel.empty() @@ -39,7 +40,7 @@ workflow NOVEL_ALLELES_AND_GENOTYPING { ch_grouped_repertoires, NOVEL_ALLELE_INFERENCE.out.reference, ch_validated_samplesheet.collect(), - "segments" //TODO: update this to pass actual segments. + "v" //TODO: update this to pass actual segments. ) // infer clones (gets the reference from novel alleles inference in any case) @@ -65,7 +66,7 @@ workflow NOVEL_ALLELES_AND_GENOTYPING { REASSIGN_ALLELES_NOVEL.out.repertoires, BAYESIAN_GENOTYPE_INFERENCE.out.reference, ch_validated_samplesheet.collect(), - "segments" //TODO: update this to pass actual segments. + "auto" //TODO: update this to pass actual segments. ) diff --git a/workflows/airrflow.nf b/workflows/airrflow.nf index 383e0f5a..f5c8e3e5 100644 --- a/workflows/airrflow.nf +++ b/workflows/airrflow.nf @@ -265,7 +265,8 @@ workflow AIRRFLOW { if (!params.skip_novel_alleles_and_genotype) { NOVEL_ALLELES_AND_GENOTYPE( ch_repertoires_after_qc, - VDJ_ANNOTATION.out.reference_fasta.collect() + VDJ_ANNOTATION.out.reference_fasta.collect(), + ch_report_logo_img.collect().ifEmpty([]) ) ch_versions = ch_versions.mix( NOVEL_ALLELES_AND_GENOTYPE.out.versions ) } From f2a900d729c387fc7b7e4a7ac1710b2c5df815bd Mon Sep 17 00:00:00 2001 From: ayelet peres Date: Thu, 29 Jan 2026 17:16:21 -0500 Subject: [PATCH 04/30] fix: specify explicit relative path for CLONAL_ANALYSIS module include --- subworkflows/local/novel_alleles_and_genotyping.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/subworkflows/local/novel_alleles_and_genotyping.nf b/subworkflows/local/novel_alleles_and_genotyping.nf index c8906b99..9b342cc4 100644 --- a/subworkflows/local/novel_alleles_and_genotyping.nf +++ b/subworkflows/local/novel_alleles_and_genotyping.nf @@ -1,7 +1,7 @@ include { NOVEL_ALLELE_INFERENCE } from '../../modules/local/enchantr/novel_allele_inference' include { BAYESIAN_GENOTYPE_INFERENCE } from '../../modules/local/enchantr/bayesian_genotype_inference' include { REASSIGN_ALLELES as REASSIGN_ALLELES_NOVEL; REASSIGN_ALLELES as REASSIGN_ALLELES_GENOTYPE} from '../../modules/local/enchantr/reassign_alleles' -include { CLONAL_ANALYSIS } from 'clonal_analysis.nf' +include { CLONAL_ANALYSIS } from './clonal_analysis.nf' workflow NOVEL_ALLELES_AND_GENOTYPING { take: From 0fb2457e3e110961773ca396af16a44daa881bb8 Mon Sep 17 00:00:00 2001 From: ayelet peres Date: Thu, 29 Jan 2026 17:18:11 -0500 Subject: [PATCH 05/30] refactor: rename `NOVEL_ALLELES_AND_GENOTYPE` to `NOVEL_ALLELES_AND_GENOTYPING` subworkflow. --- workflows/airrflow.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/workflows/airrflow.nf b/workflows/airrflow.nf index f5c8e3e5..f820942e 100644 --- a/workflows/airrflow.nf +++ b/workflows/airrflow.nf @@ -46,7 +46,7 @@ include { SC_RAW_INPUT } from '../subworkflows/local/sc_raw_inp include { FASTQ_INPUT_CHECK } from '../subworkflows/local/fastq_input_check' include { RNASEQ_INPUT } from '../subworkflows/local/rnaseq_input' include { TRANSLATE_EMBED } from '../subworkflows/local/translate_embed' -include { NOVEL_ALLELES_AND_GENOTYPE } from '../subworkflows/local/novel_alleles_and_genotype' +include { NOVEL_ALLELES_AND_GENOTYPING } from '../subworkflows/local/novel_alleles_and_genotyping' /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ From fa816dbbe9f876df05dc8b3e34154b8bd2fca771 Mon Sep 17 00:00:00 2001 From: ayelet peres Date: Thu, 29 Jan 2026 17:20:04 -0500 Subject: [PATCH 06/30] removed duplicated line --- workflows/airrflow.nf | 1 - 1 file changed, 1 deletion(-) diff --git a/workflows/airrflow.nf b/workflows/airrflow.nf index f820942e..586d640e 100644 --- a/workflows/airrflow.nf +++ b/workflows/airrflow.nf @@ -46,7 +46,6 @@ include { SC_RAW_INPUT } from '../subworkflows/local/sc_raw_inp include { FASTQ_INPUT_CHECK } from '../subworkflows/local/fastq_input_check' include { RNASEQ_INPUT } from '../subworkflows/local/rnaseq_input' include { TRANSLATE_EMBED } from '../subworkflows/local/translate_embed' -include { NOVEL_ALLELES_AND_GENOTYPING } from '../subworkflows/local/novel_alleles_and_genotyping' /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ From c6d3b4860bc8f2e77686307c69e5697ddb30e94f Mon Sep 17 00:00:00 2001 From: ayelet peres Date: Thu, 29 Jan 2026 17:28:45 -0500 Subject: [PATCH 07/30] feat: Conditionally execute novel allele inference and reassignment steps, and update `enchantr`'s `outputby` parameter to `cloneby`. --- modules/local/enchantr/reassign_alleles.nf | 5 ++-- .../local/novel_alleles_and_genotyping.nf | 30 +++++++++++-------- 2 files changed, 20 insertions(+), 15 deletions(-) diff --git a/modules/local/enchantr/reassign_alleles.nf b/modules/local/enchantr/reassign_alleles.nf index e17e9acb..6d8af246 100644 --- a/modules/local/enchantr/reassign_alleles.nf +++ b/modules/local/enchantr/reassign_alleles.nf @@ -32,7 +32,8 @@ process REASSIGN_ALLELES { path repertoires_samplesheet val segments // which segments to reassign alleles to //TODO: did we want to handle all segments at once? Then this val channel would not be needed. - + // *After novel alleles we just need to change the V, it's a time waste to go over all segments. + //TODO: Check if we need the outputby parameter. Right now this is the same as the cloneby parameter. output: path("*/*/db_genotype"), emit: reference // reference folder path("*/*_reassigned.tsv"), emit: repertoires // reassigned repertoire @@ -55,7 +56,7 @@ process REASSIGN_ALLELES { report_params=list('input'='${input}', \\ 'imgt_db'='${reference_fasta}', \\ 'species'='auto', \\ - 'outputby'='${params.outputby}', \\ + 'outputby'='${params.cloneby}', \\ 'segments'='${segs}', \\ 'force'=FALSE, \\ 'outdir'=getwd(), \\ diff --git a/subworkflows/local/novel_alleles_and_genotyping.nf b/subworkflows/local/novel_alleles_and_genotyping.nf index 9b342cc4..2437eeca 100644 --- a/subworkflows/local/novel_alleles_and_genotyping.nf +++ b/subworkflows/local/novel_alleles_and_genotyping.nf @@ -28,20 +28,24 @@ workflow NOVEL_ALLELES_AND_GENOTYPING { //TODO: conditional on params.novel_allele_inference // infer novel alleles - NOVEL_ALLELE_INFERENCE ( - ch_grouped_repertoires, - ch_reference_fasta, - ch_validated_samplesheet.collect() - ) + if (params.novel_allele_inference) { + NOVEL_ALLELE_INFERENCE ( + ch_grouped_repertoires, + ch_reference_fasta, + ch_validated_samplesheet.collect() + ) - // reassign novel alleles (we can skip this step if no novel alleles were inferred) + // reassign novel alleles (we can skip this step if no novel alleles were inferred) - REASSIGN_ALLELES_NOVEL ( - ch_grouped_repertoires, - NOVEL_ALLELE_INFERENCE.out.reference, - ch_validated_samplesheet.collect(), - "v" //TODO: update this to pass actual segments. - ) + REASSIGN_ALLELES_NOVEL ( + ch_grouped_repertoires, + NOVEL_ALLELE_INFERENCE.out.reference, + ch_validated_samplesheet.collect(), + "v" //TODO: update this to pass actual segments. We only need to reassign V after novel allele inference. + ) + } + + // TODO: what are we doing with the reference if we are not running novel allele inference? // infer clones (gets the reference from novel alleles inference in any case) @@ -66,7 +70,7 @@ workflow NOVEL_ALLELES_AND_GENOTYPING { REASSIGN_ALLELES_NOVEL.out.repertoires, BAYESIAN_GENOTYPE_INFERENCE.out.reference, ch_validated_samplesheet.collect(), - "auto" //TODO: update this to pass actual segments. + "auto" //TODO: update this to pass actual segments. We're running over all segment after genotype inference. ) From dfdde960edcecb5ce5c3c02e983196e9e6b374b0 Mon Sep 17 00:00:00 2001 From: ayelet peres Date: Thu, 29 Jan 2026 17:30:56 -0500 Subject: [PATCH 08/30] feat: pass report logo image to NOVEL_ALLELES_AND_GENOTYPING process --- workflows/airrflow.nf | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/workflows/airrflow.nf b/workflows/airrflow.nf index 586d640e..9f5fa4ad 100644 --- a/workflows/airrflow.nf +++ b/workflows/airrflow.nf @@ -252,7 +252,8 @@ workflow AIRRFLOW { NOVEL_ALLELES_AND_GENOTYPING( ch_repertoires_after_qc, VDJ_ANNOTATION.out.reference_fasta.collect(), - ch_validated_samplesheet.collect() + ch_validated_samplesheet.collect(), + ch_report_logo_img.collect().ifEmpty([]) ) } From a3bf90102186754d5c3ea201e0a6a3140f9713f0 Mon Sep 17 00:00:00 2001 From: ayelet peres Date: Thu, 29 Jan 2026 17:32:20 -0500 Subject: [PATCH 09/30] Update schema and novel allele workflow parameters --- nextflow_schema.json | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/nextflow_schema.json b/nextflow_schema.json index 364fbff3..1c0ab0da 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -553,6 +553,37 @@ "help_text": "By default, the pipeline will define clones for each of the samples, as two sequences having the same V-gene assignment, C-gene assignment, J-gene assignment, and junction length. Additionally, the similarity of the CDR3 sequences will be assessed by Hamming distances. \n\nA distance threshold for determining if two sequences come from the same clone or not is automatically determined by the process find threshold. Alternatively, a hamming distance threshold can be manually set by setting the `--clonal_threshold` parameter.", "fa_icon": "fab fa-pagelines" }, + "genotyping_and_novel_alleles_options": { + "title": "Genotyping and Novel Alleles options", + "type": "object", + "description": "Options for genotyping and novel allele inference.", + "default": "", + "properties": { + "genotyping": { + "type": "boolean", + "description": "Perform TIgGER genotype inference.", + "fa_icon": "fas fa-dna" + }, + "genotypeby": { + "type": "string", + "default": "subject_id", + "description": "Name of the field used to group data files to infer genotype.", + "fa_icon": "fab fa-pagelines" + }, + "novel_allele_inference": { + "type": "boolean", + "description": "Perform TIgGER novel allele inference.", + "fa_icon": "fas fa-dna" + }, + "outputby": { + "type": "string", + "default": "subject_id", + "description": "Name of the field used to group data files for output in reassign alleles.", + "fa_icon": "fab fa-pagelines" + } + }, + "fa_icon": "fas fa-dna" + }, "translation_and_embedding_options": { "title": "Translation and embedding options", "type": "object", @@ -847,6 +878,9 @@ { "$ref": "#/$defs/clonal_analysis_options" }, + { + "$ref": "#/$defs/genotyping_and_novel_alleles_options" + }, { "$ref": "#/$defs/translation_and_embedding_options" }, From 091b514b4ad4d47071c25e03cbeaac264b54dbd0 Mon Sep 17 00:00:00 2001 From: ayelet peres Date: Thu, 29 Jan 2026 17:33:27 -0500 Subject: [PATCH 10/30] update schema --- nextflow_schema.json | 6 ------ 1 file changed, 6 deletions(-) diff --git a/nextflow_schema.json b/nextflow_schema.json index 1c0ab0da..eab1a4e0 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -574,12 +574,6 @@ "type": "boolean", "description": "Perform TIgGER novel allele inference.", "fa_icon": "fas fa-dna" - }, - "outputby": { - "type": "string", - "default": "subject_id", - "description": "Name of the field used to group data files for output in reassign alleles.", - "fa_icon": "fab fa-pagelines" } }, "fa_icon": "fas fa-dna" From 2302fdb142a346e542104693cbe2ab1585943a5b Mon Sep 17 00:00:00 2001 From: ayelet peres Date: Thu, 29 Jan 2026 17:36:00 -0500 Subject: [PATCH 11/30] update config for genotype --- nextflow.config | 7 ------- 1 file changed, 7 deletions(-) diff --git a/nextflow.config b/nextflow.config index ffb44109..ef5f0861 100644 --- a/nextflow.config +++ b/nextflow.config @@ -97,13 +97,6 @@ params { remove_chimeric = false detect_contamination = false collapseby = 'sample_id' - - // ----------------------- - // novel alleles and genotype inference options - // ----------------------- - skip_novel_alleles_and_genotype = false - genotypeby = 'subject_id' - outputby = 'subject_id' // ----------------------- // clonal analysis options From d14fb1c10c14a7a4c953e0addcb550c0c35d6482 Mon Sep 17 00:00:00 2001 From: ayeletperes Date: Thu, 29 Jan 2026 17:44:38 -0500 Subject: [PATCH 12/30] refactor: Replace `NOVEL_ALLELES_AND_GENOTYPING` with `NOVEL_ALLELES_AND_GENOTYPE` and update its conditional execution logic. --- workflows/airrflow.nf | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/workflows/airrflow.nf b/workflows/airrflow.nf index 9f5fa4ad..65b4f8bc 100644 --- a/workflows/airrflow.nf +++ b/workflows/airrflow.nf @@ -247,22 +247,12 @@ workflow AIRRFLOW { ch_repertoires_after_qc = ch_bulk_filtered .mix(SINGLE_CELL_QC_AND_FILTERING.out.repertoires) - // Novel allele inference and genotyping - if (params.genotyping) { - NOVEL_ALLELES_AND_GENOTYPING( - ch_repertoires_after_qc, - VDJ_ANNOTATION.out.reference_fasta.collect(), - ch_validated_samplesheet.collect(), - ch_report_logo_img.collect().ifEmpty([]) - ) - } - // TODO: for now clonal analysis and genotyping are independent, // but once genotyping is implemented the personalized reference should be used for clonal analysis // when genotyping is performed. // Novel alleles and genotype inference - if (!params.skip_novel_alleles_and_genotype) { + if (!params.genotyping) { NOVEL_ALLELES_AND_GENOTYPE( ch_repertoires_after_qc, VDJ_ANNOTATION.out.reference_fasta.collect(), From ff8f17a96454a19e456ed7767f6b8db223b7afc8 Mon Sep 17 00:00:00 2001 From: ayeletperes Date: Thu, 29 Jan 2026 17:57:32 -0500 Subject: [PATCH 13/30] Fix: Correctly trigger novel allele and genotype inference when genotyping is enabled and add `skip_clonal_analysis` to test configuration. --- conf/test_genotyping.config | 2 ++ workflows/airrflow.nf | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/conf/test_genotyping.config b/conf/test_genotyping.config index 6313eb23..2244c00b 100644 --- a/conf/test_genotyping.config +++ b/conf/test_genotyping.config @@ -27,4 +27,6 @@ params { // Genotyping genotyping = true + // Skip clonal analysis + skip_clonal_analysis = false } diff --git a/workflows/airrflow.nf b/workflows/airrflow.nf index 65b4f8bc..9c09aef3 100644 --- a/workflows/airrflow.nf +++ b/workflows/airrflow.nf @@ -252,7 +252,7 @@ workflow AIRRFLOW { // when genotyping is performed. // Novel alleles and genotype inference - if (!params.genotyping) { + if (params.genotyping) { NOVEL_ALLELES_AND_GENOTYPE( ch_repertoires_after_qc, VDJ_ANNOTATION.out.reference_fasta.collect(), From e0adaa325162e59d2fe806ec9f871393e14e489a Mon Sep 17 00:00:00 2001 From: ayeletperes Date: Thu, 29 Jan 2026 18:00:04 -0500 Subject: [PATCH 14/30] refactor: Rename `NOVEL_ALLELES_AND_GENOTYPE` process to `NOVEL_ALLELES_AND_GENOTYPING`. --- workflows/airrflow.nf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/workflows/airrflow.nf b/workflows/airrflow.nf index 9c09aef3..0c2e782f 100644 --- a/workflows/airrflow.nf +++ b/workflows/airrflow.nf @@ -253,12 +253,12 @@ workflow AIRRFLOW { // Novel alleles and genotype inference if (params.genotyping) { - NOVEL_ALLELES_AND_GENOTYPE( + NOVEL_ALLELES_AND_GENOTYPING( ch_repertoires_after_qc, VDJ_ANNOTATION.out.reference_fasta.collect(), ch_report_logo_img.collect().ifEmpty([]) ) - ch_versions = ch_versions.mix( NOVEL_ALLELES_AND_GENOTYPE.out.versions ) + ch_versions = ch_versions.mix( NOVEL_ALLELES_AND_GENOTYPING.out.versions ) } // Clonal analysis From d7771228e5698d35052b3219bbd2a2be81766488 Mon Sep 17 00:00:00 2001 From: ayeletperes Date: Thu, 29 Jan 2026 18:02:15 -0500 Subject: [PATCH 15/30] feat: Pass validated samplesheet as input to the NOVEL_ALLELES_AND_GENOTYPING process. --- workflows/airrflow.nf | 1 + 1 file changed, 1 insertion(+) diff --git a/workflows/airrflow.nf b/workflows/airrflow.nf index 0c2e782f..037d0f55 100644 --- a/workflows/airrflow.nf +++ b/workflows/airrflow.nf @@ -256,6 +256,7 @@ workflow AIRRFLOW { NOVEL_ALLELES_AND_GENOTYPING( ch_repertoires_after_qc, VDJ_ANNOTATION.out.reference_fasta.collect(), + ch_validated_samplesheet.collect(), ch_report_logo_img.collect().ifEmpty([]) ) ch_versions = ch_versions.mix( NOVEL_ALLELES_AND_GENOTYPING.out.versions ) From 2a077a7b42138c63d30a89999b2271dbdc55b189 Mon Sep 17 00:00:00 2001 From: ayeletperes Date: Thu, 29 Jan 2026 18:13:25 -0500 Subject: [PATCH 16/30] Remove the `reference` output from `reassign_alleles` and correct the `repertoire` output channel name to `repertoires` in `novel_alleles_and_genotyping`. --- modules/local/enchantr/reassign_alleles.nf | 1 - subworkflows/local/novel_alleles_and_genotyping.nf | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/modules/local/enchantr/reassign_alleles.nf b/modules/local/enchantr/reassign_alleles.nf index 6d8af246..3d25fa46 100644 --- a/modules/local/enchantr/reassign_alleles.nf +++ b/modules/local/enchantr/reassign_alleles.nf @@ -35,7 +35,6 @@ process REASSIGN_ALLELES { // *After novel alleles we just need to change the V, it's a time waste to go over all segments. //TODO: Check if we need the outputby parameter. Right now this is the same as the cloneby parameter. output: - path("*/*/db_genotype"), emit: reference // reference folder path("*/*_reassigned.tsv"), emit: repertoires // reassigned repertoire path("*/*_command_log.txt"), emit: logs //process logs path "*_report" diff --git a/subworkflows/local/novel_alleles_and_genotyping.nf b/subworkflows/local/novel_alleles_and_genotyping.nf index 2437eeca..429476f4 100644 --- a/subworkflows/local/novel_alleles_and_genotyping.nf +++ b/subworkflows/local/novel_alleles_and_genotyping.nf @@ -50,7 +50,7 @@ workflow NOVEL_ALLELES_AND_GENOTYPING { // infer clones (gets the reference from novel alleles inference in any case) CLONAL_ANALYSIS( - REASSIGN_ALLELES_NOVEL.out.repertoire, + REASSIGN_ALLELES_NOVEL.out.repertoires, NOVEL_ALLELE_INFERENCE.out.reference, ch_logo.collect().ifEmpty([]) ) From 01db128ff7ea185c2de1bb5e4b7c02ae4ba455af Mon Sep 17 00:00:00 2001 From: ayeletperes Date: Thu, 29 Jan 2026 18:44:02 -0500 Subject: [PATCH 17/30] refactor: Simplify novel allele and genotyping subworkflow by removing samplesheet collection and update test configurations. --- conf/test_genotyping.config | 2 +- conf/test_novel_allele_and_genotype.config | 41 ------------------- .../local/novel_alleles_and_genotyping.nf | 8 ++-- 3 files changed, 5 insertions(+), 46 deletions(-) delete mode 100644 conf/test_novel_allele_and_genotype.config diff --git a/conf/test_genotyping.config b/conf/test_genotyping.config index 2244c00b..35fa782b 100644 --- a/conf/test_genotyping.config +++ b/conf/test_genotyping.config @@ -28,5 +28,5 @@ params { // Genotyping genotyping = true // Skip clonal analysis - skip_clonal_analysis = false + skip_clonal_analysis = true } diff --git a/conf/test_novel_allele_and_genotype.config b/conf/test_novel_allele_and_genotype.config deleted file mode 100644 index 2a57ae6f..00000000 --- a/conf/test_novel_allele_and_genotype.config +++ /dev/null @@ -1,41 +0,0 @@ -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - Nextflow config file for running minimal tests -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - Defines input files and everything required to run a fast and simple pipeline test. - - Use as follows: - nextflow run nf-core/airrflow -profile test_embeddings, --outdir - ----------------------------------------------------------------------------------------- -*/ - -process { - resourceLimits = [ - cpus: 4, - memory: '15.GB', - time: '1.h' - ] -} - -params { - config_profile_name = 'Test novel allele and genotype inference' - config_profile_description = 'Minimal test dataset to test novel allele and genotype inference' - - // Input data - mode = 'assembled' - input = pipelines_testdata_base_path + 'testdata-reveal/test_assembled_metadata_tiny.tsv' - reference_fasta = pipelines_testdata_base_path + 'database-cache/imgtdb_base.zip' - reference_igblast = pipelines_testdata_base_path + 'database-cache/igblast_base.zip' - - reassign = true - productive_only = true - lineage_trees = false - skip_report_threshold = true - skip_all_clones_report = true - skip_clonal_analysis = true - skip_report = true -} - -process{ -} diff --git a/subworkflows/local/novel_alleles_and_genotyping.nf b/subworkflows/local/novel_alleles_and_genotyping.nf index 429476f4..86a8aec1 100644 --- a/subworkflows/local/novel_alleles_and_genotyping.nf +++ b/subworkflows/local/novel_alleles_and_genotyping.nf @@ -32,7 +32,7 @@ workflow NOVEL_ALLELES_AND_GENOTYPING { NOVEL_ALLELE_INFERENCE ( ch_grouped_repertoires, ch_reference_fasta, - ch_validated_samplesheet.collect() + [] ) // reassign novel alleles (we can skip this step if no novel alleles were inferred) @@ -40,7 +40,7 @@ workflow NOVEL_ALLELES_AND_GENOTYPING { REASSIGN_ALLELES_NOVEL ( ch_grouped_repertoires, NOVEL_ALLELE_INFERENCE.out.reference, - ch_validated_samplesheet.collect(), + [], "v" //TODO: update this to pass actual segments. We only need to reassign V after novel allele inference. ) } @@ -61,7 +61,7 @@ workflow NOVEL_ALLELES_AND_GENOTYPING { BAYESIAN_GENOTYPE_INFERENCE ( REASSIGN_ALLELES_NOVEL.out.repertoires, NOVEL_ALLELE_INFERENCE.out.reference, - ch_validated_samplesheet.collect() + [] ) // reassign genotypes (gets the reference from genotype inference in any case) @@ -69,7 +69,7 @@ workflow NOVEL_ALLELES_AND_GENOTYPING { REASSIGN_ALLELES_GENOTYPE ( REASSIGN_ALLELES_NOVEL.out.repertoires, BAYESIAN_GENOTYPE_INFERENCE.out.reference, - ch_validated_samplesheet.collect(), + [], "auto" //TODO: update this to pass actual segments. We're running over all segment after genotype inference. ) From 2b6a7bb1ee46fc68e039f7e9bd9840a9bc2f3a9b Mon Sep 17 00:00:00 2001 From: ayeletperes Date: Thu, 29 Jan 2026 18:49:53 -0500 Subject: [PATCH 18/30] Remove the `force=FALSE` parameter from enchantr module calls. --- modules/local/enchantr/bayesian_genotype_inference.nf | 1 - modules/local/enchantr/novel_allele_inference.nf | 1 - modules/local/enchantr/reassign_alleles.nf | 1 - 3 files changed, 3 deletions(-) diff --git a/modules/local/enchantr/bayesian_genotype_inference.nf b/modules/local/enchantr/bayesian_genotype_inference.nf index fb8ad57f..331e0313 100644 --- a/modules/local/enchantr/bayesian_genotype_inference.nf +++ b/modules/local/enchantr/bayesian_genotype_inference.nf @@ -52,7 +52,6 @@ process BAYESIAN_GENOTYPE_INFERENCE { 'imgt_db'='${reference_fasta}', \\ 'species'='auto', \\ 'genotypeby'='${params.genotypeby}', \\ - 'force'=FALSE, \\ 'outdir'=getwd(), \\ 'log'='${meta.id}_bayesian_genotype_inference_command_log' ${args}))" diff --git a/modules/local/enchantr/novel_allele_inference.nf b/modules/local/enchantr/novel_allele_inference.nf index 2d5b2d9a..38d6b503 100644 --- a/modules/local/enchantr/novel_allele_inference.nf +++ b/modules/local/enchantr/novel_allele_inference.nf @@ -51,7 +51,6 @@ process NOVEL_ALLELE_INFERENCE { report_params=list('input'='${input}', \\ 'imgt_db'='${reference_fasta}', \\ 'species'='auto', \\ - 'force'=FALSE, \\ 'outdir'=getwd(), \\ 'nproc'=${task.cpus}, \\ 'log'='${meta.id}_novel_allele_inference_command_log' ${args}))" diff --git a/modules/local/enchantr/reassign_alleles.nf b/modules/local/enchantr/reassign_alleles.nf index 3d25fa46..2ef14981 100644 --- a/modules/local/enchantr/reassign_alleles.nf +++ b/modules/local/enchantr/reassign_alleles.nf @@ -57,7 +57,6 @@ process REASSIGN_ALLELES { 'species'='auto', \\ 'outputby'='${params.cloneby}', \\ 'segments'='${segs}', \\ - 'force'=FALSE, \\ 'outdir'=getwd(), \\ 'log'='${meta.id}_reassign_alleles_command_log' ${args}))" From a849cb3f877bacd503f2720f1efd27a11b5ba4c6 Mon Sep 17 00:00:00 2001 From: ayeletperes Date: Thu, 29 Jan 2026 19:04:33 -0500 Subject: [PATCH 19/30] fix: Update enchantr species parameter from 'auto' to 'human' in novel allele and Bayesian genotype inference modules. --- modules/local/enchantr/bayesian_genotype_inference.nf | 2 +- modules/local/enchantr/novel_allele_inference.nf | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/local/enchantr/bayesian_genotype_inference.nf b/modules/local/enchantr/bayesian_genotype_inference.nf index 331e0313..4f33658c 100644 --- a/modules/local/enchantr/bayesian_genotype_inference.nf +++ b/modules/local/enchantr/bayesian_genotype_inference.nf @@ -50,7 +50,7 @@ process BAYESIAN_GENOTYPE_INFERENCE { Rscript -e "enchantr::enchantr_report('tigger_bayesian_genotype', \\ report_params=list('input'='${input}', \\ 'imgt_db'='${reference_fasta}', \\ - 'species'='auto', \\ + 'species'='human', \\ 'genotypeby'='${params.genotypeby}', \\ 'outdir'=getwd(), \\ 'log'='${meta.id}_bayesian_genotype_inference_command_log' ${args}))" diff --git a/modules/local/enchantr/novel_allele_inference.nf b/modules/local/enchantr/novel_allele_inference.nf index 38d6b503..d6f775c1 100644 --- a/modules/local/enchantr/novel_allele_inference.nf +++ b/modules/local/enchantr/novel_allele_inference.nf @@ -50,7 +50,7 @@ process NOVEL_ALLELE_INFERENCE { Rscript -e "enchantr::enchantr_report('novel_allele_inference', \\ report_params=list('input'='${input}', \\ 'imgt_db'='${reference_fasta}', \\ - 'species'='auto', \\ + 'species'='human', \\ 'outdir'=getwd(), \\ 'nproc'=${task.cpus}, \\ 'log'='${meta.id}_novel_allele_inference_command_log' ${args}))" From 8bc03f69b04defd250323e92d97094a87a80ee0c Mon Sep 17 00:00:00 2001 From: ayeletperes Date: Thu, 29 Jan 2026 19:27:49 -0500 Subject: [PATCH 20/30] refactor: Update `enchantr` module output paths for `db_genotype` and `db_novel` to `*_report`. --- modules/local/enchantr/bayesian_genotype_inference.nf | 2 +- modules/local/enchantr/novel_allele_inference.nf | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/local/enchantr/bayesian_genotype_inference.nf b/modules/local/enchantr/bayesian_genotype_inference.nf index 4f33658c..5c5ae336 100644 --- a/modules/local/enchantr/bayesian_genotype_inference.nf +++ b/modules/local/enchantr/bayesian_genotype_inference.nf @@ -32,7 +32,7 @@ process BAYESIAN_GENOTYPE_INFERENCE { path repertoires_samplesheet output: - path("*/*/db_genotype"), emit: reference // reference folder + path "*_report/db_genotype", emit: reference // reference folder path("*/*_command_log.txt"), emit: logs //process logs path "*_report" path "versions.yml", emit: versions diff --git a/modules/local/enchantr/novel_allele_inference.nf b/modules/local/enchantr/novel_allele_inference.nf index d6f775c1..b421d256 100644 --- a/modules/local/enchantr/novel_allele_inference.nf +++ b/modules/local/enchantr/novel_allele_inference.nf @@ -32,7 +32,7 @@ process NOVEL_ALLELE_INFERENCE { path repertoires_samplesheet output: - path("*/*/db_novel"), emit: reference // reference folder + path "*_report/db_novel", emit: reference // reference folder path("*/*_command_log.txt"), emit: logs //process logs path "*_report", optional: true, emit: report path "versions.yml", emit: versions From 7fae5a7f39f9abc6079dddd87cc333eee7f11836 Mon Sep 17 00:00:00 2001 From: ayeletperes Date: Thu, 29 Jan 2026 19:35:08 -0500 Subject: [PATCH 21/30] refactor: Update `reassign_alleles` module output to `tab` with metadata and adjust `novel_alleles_and_genotyping` subworkflow data flow accordingly. --- modules/local/enchantr/reassign_alleles.nf | 2 +- subworkflows/local/novel_alleles_and_genotyping.nf | 10 +++++----- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/modules/local/enchantr/reassign_alleles.nf b/modules/local/enchantr/reassign_alleles.nf index 2ef14981..97832d49 100644 --- a/modules/local/enchantr/reassign_alleles.nf +++ b/modules/local/enchantr/reassign_alleles.nf @@ -35,7 +35,7 @@ process REASSIGN_ALLELES { // *After novel alleles we just need to change the V, it's a time waste to go over all segments. //TODO: Check if we need the outputby parameter. Right now this is the same as the cloneby parameter. output: - path("*/*_reassigned.tsv"), emit: repertoires // reassigned repertoire + tuple val(meta), path("*/*/*_reassigned.tsv"), emit: tab // reassigned repertoire path("*/*_command_log.txt"), emit: logs //process logs path "*_report" path "versions.yml", emit: versions diff --git a/subworkflows/local/novel_alleles_and_genotyping.nf b/subworkflows/local/novel_alleles_and_genotyping.nf index 86a8aec1..acb7f8f7 100644 --- a/subworkflows/local/novel_alleles_and_genotyping.nf +++ b/subworkflows/local/novel_alleles_and_genotyping.nf @@ -50,7 +50,7 @@ workflow NOVEL_ALLELES_AND_GENOTYPING { // infer clones (gets the reference from novel alleles inference in any case) CLONAL_ANALYSIS( - REASSIGN_ALLELES_NOVEL.out.repertoires, + REASSIGN_ALLELES_NOVEL.out.tab, NOVEL_ALLELE_INFERENCE.out.reference, ch_logo.collect().ifEmpty([]) ) @@ -59,15 +59,15 @@ workflow NOVEL_ALLELES_AND_GENOTYPING { // infer genotype (gets the reference from novel alleles inference in any case) BAYESIAN_GENOTYPE_INFERENCE ( - REASSIGN_ALLELES_NOVEL.out.repertoires, + CLONAL_ANALYSIS.out.repertoire, NOVEL_ALLELE_INFERENCE.out.reference, [] ) - +1 // reassign genotypes (gets the reference from genotype inference in any case) REASSIGN_ALLELES_GENOTYPE ( - REASSIGN_ALLELES_NOVEL.out.repertoires, + ch_grouped_repertoires, BAYESIAN_GENOTYPE_INFERENCE.out.reference, [], "auto" //TODO: update this to pass actual segments. We're running over all segment after genotype inference. @@ -75,7 +75,7 @@ workflow NOVEL_ALLELES_AND_GENOTYPING { emit: - repertoire = ch_repertoire + repertoire = REASSIGN_ALLELES_GENOTYPE.out.tab versions = ch_versions logs = ch_logs } From ba1e2ab182d82aa206d3934d4e5f84d5774b4882 Mon Sep 17 00:00:00 2001 From: ayeletperes Date: Thu, 29 Jan 2026 19:37:33 -0500 Subject: [PATCH 22/30] fix: Update `reassign_alleles` output file pattern from `_reassigned.tsv` to `reassign-pass.tsv`. --- modules/local/enchantr/reassign_alleles.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/local/enchantr/reassign_alleles.nf b/modules/local/enchantr/reassign_alleles.nf index 97832d49..dc171b94 100644 --- a/modules/local/enchantr/reassign_alleles.nf +++ b/modules/local/enchantr/reassign_alleles.nf @@ -35,7 +35,7 @@ process REASSIGN_ALLELES { // *After novel alleles we just need to change the V, it's a time waste to go over all segments. //TODO: Check if we need the outputby parameter. Right now this is the same as the cloneby parameter. output: - tuple val(meta), path("*/*/*_reassigned.tsv"), emit: tab // reassigned repertoire + tuple val(meta), path("*/*/*reassign-pass.tsv"), emit: tab // reassigned repertoire path("*/*_command_log.txt"), emit: logs //process logs path "*_report" path "versions.yml", emit: versions From 85dc8be6883731af16ee9c33256e8e2da617d27f Mon Sep 17 00:00:00 2001 From: ayeletperes Date: Thu, 29 Jan 2026 20:07:49 -0500 Subject: [PATCH 23/30] feat: Introduce `single_clone_representative` parameter to optionally perform clonal analysis before Bayesian genotype inference and correct a log file path. --- conf/test_genotyping.config | 1 + .../enchantr/bayesian_genotype_inference.nf | 3 +- nextflow.config | 2 +- .../local/novel_alleles_and_genotyping.nf | 34 ++++++++++++------- 4 files changed, 25 insertions(+), 15 deletions(-) diff --git a/conf/test_genotyping.config b/conf/test_genotyping.config index 35fa782b..222510be 100644 --- a/conf/test_genotyping.config +++ b/conf/test_genotyping.config @@ -27,6 +27,7 @@ params { // Genotyping genotyping = true + single_clone_representative = true // Skip clonal analysis skip_clonal_analysis = true } diff --git a/modules/local/enchantr/bayesian_genotype_inference.nf b/modules/local/enchantr/bayesian_genotype_inference.nf index 5c5ae336..de797837 100644 --- a/modules/local/enchantr/bayesian_genotype_inference.nf +++ b/modules/local/enchantr/bayesian_genotype_inference.nf @@ -33,7 +33,7 @@ process BAYESIAN_GENOTYPE_INFERENCE { output: path "*_report/db_genotype", emit: reference // reference folder - path("*/*_command_log.txt"), emit: logs //process logs + path("*_command_log.txt"), emit: logs //process logs path "*_report" path "versions.yml", emit: versions @@ -52,6 +52,7 @@ process BAYESIAN_GENOTYPE_INFERENCE { 'imgt_db'='${reference_fasta}', \\ 'species'='human', \\ 'genotypeby'='${params.genotypeby}', \\ + 'single_clone_representative'='${params.single_clone_representative}', \\ 'outdir'=getwd(), \\ 'log'='${meta.id}_bayesian_genotype_inference_command_log' ${args}))" diff --git a/nextflow.config b/nextflow.config index ef5f0861..e509d498 100644 --- a/nextflow.config +++ b/nextflow.config @@ -122,7 +122,7 @@ params { genotyping = false genotypeby = 'subject_id' novel_allele_inference = true - + single_clone_representative = false // ----------------------- // translate embed options // ----------------------- diff --git a/subworkflows/local/novel_alleles_and_genotyping.nf b/subworkflows/local/novel_alleles_and_genotyping.nf index acb7f8f7..673fd6ae 100644 --- a/subworkflows/local/novel_alleles_and_genotyping.nf +++ b/subworkflows/local/novel_alleles_and_genotyping.nf @@ -43,31 +43,39 @@ workflow NOVEL_ALLELES_AND_GENOTYPING { [], "v" //TODO: update this to pass actual segments. We only need to reassign V after novel allele inference. ) + ch_for_genotyping = REASSIGN_ALLELES_NOVEL.out.tab + ch_for_reference = NOVEL_ALLELE_INFERENCE.out.reference + } else { + ch_for_genotyping = ch_grouped_repertoires + ch_for_reference = ch_reference_fasta } // TODO: what are we doing with the reference if we are not running novel allele inference? - + // TODO: we can use a constant clonal threshold. // infer clones (gets the reference from novel alleles inference in any case) - - CLONAL_ANALYSIS( - REASSIGN_ALLELES_NOVEL.out.tab, - NOVEL_ALLELE_INFERENCE.out.reference, - ch_logo.collect().ifEmpty([]) - ) - ch_versions = ch_versions.mix( CLONAL_ANALYSIS.out.versions) - + + if (params.single_clone_representative) { + CLONAL_ANALYSIS( + ch_for_genotyping, + ch_for_reference, + ch_logo.collect().ifEmpty([]) + ) + ch_versions = ch_versions.mix( CLONAL_ANALYSIS.out.versions) + + ch_for_genotyping = CLONAL_ANALYSIS.out.repertoire + } // infer genotype (gets the reference from novel alleles inference in any case) BAYESIAN_GENOTYPE_INFERENCE ( - CLONAL_ANALYSIS.out.repertoire, - NOVEL_ALLELE_INFERENCE.out.reference, + ch_for_genotyping, + ch_for_reference, [] ) -1 + // reassign genotypes (gets the reference from genotype inference in any case) REASSIGN_ALLELES_GENOTYPE ( - ch_grouped_repertoires, + ch_for_genotyping, BAYESIAN_GENOTYPE_INFERENCE.out.reference, [], "auto" //TODO: update this to pass actual segments. We're running over all segment after genotype inference. From 2d53838d5f56966f48e85913c7f67fa819acf326 Mon Sep 17 00:00:00 2001 From: ayeletperes Date: Thu, 29 Jan 2026 20:31:12 -0500 Subject: [PATCH 24/30] fix: Update segment arguments from string to list --- subworkflows/local/novel_alleles_and_genotyping.nf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/subworkflows/local/novel_alleles_and_genotyping.nf b/subworkflows/local/novel_alleles_and_genotyping.nf index 673fd6ae..40493dd9 100644 --- a/subworkflows/local/novel_alleles_and_genotyping.nf +++ b/subworkflows/local/novel_alleles_and_genotyping.nf @@ -41,7 +41,7 @@ workflow NOVEL_ALLELES_AND_GENOTYPING { ch_grouped_repertoires, NOVEL_ALLELE_INFERENCE.out.reference, [], - "v" //TODO: update this to pass actual segments. We only need to reassign V after novel allele inference. + ["v"] //TODO: update this to pass actual segments. We only need to reassign V after novel allele inference. ) ch_for_genotyping = REASSIGN_ALLELES_NOVEL.out.tab ch_for_reference = NOVEL_ALLELE_INFERENCE.out.reference @@ -78,7 +78,7 @@ workflow NOVEL_ALLELES_AND_GENOTYPING { ch_for_genotyping, BAYESIAN_GENOTYPE_INFERENCE.out.reference, [], - "auto" //TODO: update this to pass actual segments. We're running over all segment after genotype inference. + ["auto"] //TODO: update this to pass actual segments. We're running over all segment after genotype inference. ) From a97f5587b8d26f0420c57fc69bd2944b3e656de9 Mon Sep 17 00:00:00 2001 From: ayeletperes Date: Thu, 29 Jan 2026 21:04:14 -0500 Subject: [PATCH 25/30] feat: Update `novel_allele_inference` and `bayesian_genotype_inference` to use `species='auto'` and fix log output path for the latter. --- modules/local/enchantr/bayesian_genotype_inference.nf | 4 ++-- modules/local/enchantr/novel_allele_inference.nf | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/modules/local/enchantr/bayesian_genotype_inference.nf b/modules/local/enchantr/bayesian_genotype_inference.nf index de797837..07553467 100644 --- a/modules/local/enchantr/bayesian_genotype_inference.nf +++ b/modules/local/enchantr/bayesian_genotype_inference.nf @@ -33,7 +33,7 @@ process BAYESIAN_GENOTYPE_INFERENCE { output: path "*_report/db_genotype", emit: reference // reference folder - path("*_command_log.txt"), emit: logs //process logs + path("*/*_command_log.txt"), emit: logs //process logs path "*_report" path "versions.yml", emit: versions @@ -50,7 +50,7 @@ process BAYESIAN_GENOTYPE_INFERENCE { Rscript -e "enchantr::enchantr_report('tigger_bayesian_genotype', \\ report_params=list('input'='${input}', \\ 'imgt_db'='${reference_fasta}', \\ - 'species'='human', \\ + 'species'='auto', \\ 'genotypeby'='${params.genotypeby}', \\ 'single_clone_representative'='${params.single_clone_representative}', \\ 'outdir'=getwd(), \\ diff --git a/modules/local/enchantr/novel_allele_inference.nf b/modules/local/enchantr/novel_allele_inference.nf index b421d256..a1854b66 100644 --- a/modules/local/enchantr/novel_allele_inference.nf +++ b/modules/local/enchantr/novel_allele_inference.nf @@ -50,7 +50,7 @@ process NOVEL_ALLELE_INFERENCE { Rscript -e "enchantr::enchantr_report('novel_allele_inference', \\ report_params=list('input'='${input}', \\ 'imgt_db'='${reference_fasta}', \\ - 'species'='human', \\ + 'species'='auto', \\ 'outdir'=getwd(), \\ 'nproc'=${task.cpus}, \\ 'log'='${meta.id}_novel_allele_inference_command_log' ${args}))" From 543b9f97e6a8567b47c8151d27a6cc74db7ecba2 Mon Sep 17 00:00:00 2001 From: ayeletperes Date: Thu, 29 Jan 2026 21:55:42 -0500 Subject: [PATCH 26/30] refactor: Update `db_genotype` output path to `*_report/references/*/db_genotype`. --- modules/local/enchantr/bayesian_genotype_inference.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/local/enchantr/bayesian_genotype_inference.nf b/modules/local/enchantr/bayesian_genotype_inference.nf index 07553467..2febe7ee 100644 --- a/modules/local/enchantr/bayesian_genotype_inference.nf +++ b/modules/local/enchantr/bayesian_genotype_inference.nf @@ -32,7 +32,7 @@ process BAYESIAN_GENOTYPE_INFERENCE { path repertoires_samplesheet output: - path "*_report/db_genotype", emit: reference // reference folder + path "*_report/references/*/db_genotype", emit: reference // reference folder path("*/*_command_log.txt"), emit: logs //process logs path "*_report" path "versions.yml", emit: versions From 1f994c5092fd2a557f63caadadb66228324f67a6 Mon Sep 17 00:00:00 2001 From: ayeletperes Date: Fri, 30 Jan 2026 10:28:56 -0500 Subject: [PATCH 27/30] feat: integrate clonal assignment with new `genotype_clone_threshold` parameter and remove `repertoires_samplesheet` input from enchantr modules. --- .../enchantr/bayesian_genotype_inference.nf | 8 +--- .../local/enchantr/novel_allele_inference.nf | 8 +--- modules/local/enchantr/reassign_alleles.nf | 8 +--- nextflow.config | 3 +- nextflow_schema.json | 11 +++++ .../local/novel_alleles_and_genotyping.nf | 48 +++++++++---------- 6 files changed, 39 insertions(+), 47 deletions(-) diff --git a/modules/local/enchantr/bayesian_genotype_inference.nf b/modules/local/enchantr/bayesian_genotype_inference.nf index 2febe7ee..2e39a755 100644 --- a/modules/local/enchantr/bayesian_genotype_inference.nf +++ b/modules/local/enchantr/bayesian_genotype_inference.nf @@ -29,7 +29,6 @@ process BAYESIAN_GENOTYPE_INFERENCE { input: tuple val(meta), path(tabs) // meta, sequence tsv in AIRR format path reference_fasta - path repertoires_samplesheet output: path "*_report/references/*/db_genotype", emit: reference // reference folder @@ -40,12 +39,7 @@ process BAYESIAN_GENOTYPE_INFERENCE { script: def args = task.ext.args ? asString(task.ext.args) : '' - def input = "" - if (repertoires_samplesheet) { - input = repertoires_samplesheet - } else { - input = tabs.join(',') - } + def input = tabs.join(',') """ Rscript -e "enchantr::enchantr_report('tigger_bayesian_genotype', \\ report_params=list('input'='${input}', \\ diff --git a/modules/local/enchantr/novel_allele_inference.nf b/modules/local/enchantr/novel_allele_inference.nf index a1854b66..dad6cf2d 100644 --- a/modules/local/enchantr/novel_allele_inference.nf +++ b/modules/local/enchantr/novel_allele_inference.nf @@ -29,7 +29,6 @@ process NOVEL_ALLELE_INFERENCE { input: tuple val(meta), path(tabs) // meta, sequence tsv in AIRR format path reference_fasta - path repertoires_samplesheet output: path "*_report/db_novel", emit: reference // reference folder @@ -40,12 +39,7 @@ process NOVEL_ALLELE_INFERENCE { script: def args = task.ext.args ? asString(task.ext.args) : '' - def input = "" - if (repertoires_samplesheet) { - input = repertoires_samplesheet - } else { - input = tabs.join(',') - } + def input = tabs.join(',') """ Rscript -e "enchantr::enchantr_report('novel_allele_inference', \\ report_params=list('input'='${input}', \\ diff --git a/modules/local/enchantr/reassign_alleles.nf b/modules/local/enchantr/reassign_alleles.nf index dc171b94..efc94ba1 100644 --- a/modules/local/enchantr/reassign_alleles.nf +++ b/modules/local/enchantr/reassign_alleles.nf @@ -29,7 +29,6 @@ process REASSIGN_ALLELES { input: tuple val(meta), path(tabs) // meta, sequence tsv in AIRR format path reference_fasta - path repertoires_samplesheet val segments // which segments to reassign alleles to //TODO: did we want to handle all segments at once? Then this val channel would not be needed. // *After novel alleles we just need to change the V, it's a time waste to go over all segments. @@ -44,12 +43,7 @@ process REASSIGN_ALLELES { script: def args = task.ext.args ? asString(task.ext.args) : '' def segs = segments.join(",") - def input = "" - if (repertoires_samplesheet) { - input = repertoires_samplesheet - } else { - input = tabs.join(',') - } + def input = tabs.join(',') """ Rscript -e "enchantr::enchantr_report('reassign_alleles', \\ report_params=list('input'='${input}', \\ diff --git a/nextflow.config b/nextflow.config index e509d498..f33b36df 100644 --- a/nextflow.config +++ b/nextflow.config @@ -122,7 +122,8 @@ params { genotyping = false genotypeby = 'subject_id' novel_allele_inference = true - single_clone_representative = false + genotype_clone_threshold = '0.2' + single_clone_representative = true // ----------------------- // translate embed options // ----------------------- diff --git a/nextflow_schema.json b/nextflow_schema.json index eab1a4e0..a1748858 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -570,6 +570,17 @@ "description": "Name of the field used to group data files to infer genotype.", "fa_icon": "fab fa-pagelines" }, + "genotype_clone_threshold": { + "type": "string", + "default": "0.2", + "description": "Threshold for determining if two sequences come from the same clone or not.", + "fa_icon": "fas fa-dna" + }, + "single_clone_representative": { + "type": "boolean", + "description": "Perform TIgGER novel allele inference.", + "fa_icon": "fas fa-dna" + }, "novel_allele_inference": { "type": "boolean", "description": "Perform TIgGER novel allele inference.", diff --git a/subworkflows/local/novel_alleles_and_genotyping.nf b/subworkflows/local/novel_alleles_and_genotyping.nf index 40493dd9..07b8e786 100644 --- a/subworkflows/local/novel_alleles_and_genotyping.nf +++ b/subworkflows/local/novel_alleles_and_genotyping.nf @@ -2,6 +2,7 @@ include { NOVEL_ALLELE_INFERENCE } from '../../modules/local/enchantr/novel_alle include { BAYESIAN_GENOTYPE_INFERENCE } from '../../modules/local/enchantr/bayesian_genotype_inference' include { REASSIGN_ALLELES as REASSIGN_ALLELES_NOVEL; REASSIGN_ALLELES as REASSIGN_ALLELES_GENOTYPE} from '../../modules/local/enchantr/reassign_alleles' include { CLONAL_ANALYSIS } from './clonal_analysis.nf' +include { CLONAL_ASSIGNMENT as CLONAL_ASSIGNMENT_COMPUTE } from '../../modules/local/enchantr/clonal_assignment' workflow NOVEL_ALLELES_AND_GENOTYPING { take: @@ -26,13 +27,11 @@ workflow NOVEL_ALLELES_AND_GENOTYPING { .map{ get_meta_tabs(it) } .set{ ch_grouped_repertoires } - //TODO: conditional on params.novel_allele_inference // infer novel alleles if (params.novel_allele_inference) { NOVEL_ALLELE_INFERENCE ( ch_grouped_repertoires, - ch_reference_fasta, - [] + ch_reference_fasta ) // reassign novel alleles (we can skip this step if no novel alleles were inferred) @@ -40,8 +39,7 @@ workflow NOVEL_ALLELES_AND_GENOTYPING { REASSIGN_ALLELES_NOVEL ( ch_grouped_repertoires, NOVEL_ALLELE_INFERENCE.out.reference, - [], - ["v"] //TODO: update this to pass actual segments. We only need to reassign V after novel allele inference. + ["v"] ) ch_for_genotyping = REASSIGN_ALLELES_NOVEL.out.tab ch_for_reference = NOVEL_ALLELE_INFERENCE.out.reference @@ -50,38 +48,38 @@ workflow NOVEL_ALLELES_AND_GENOTYPING { ch_for_reference = ch_reference_fasta } - // TODO: what are we doing with the reference if we are not running novel allele inference? - // TODO: we can use a constant clonal threshold. - // infer clones (gets the reference from novel alleles inference in any case) - if (params.single_clone_representative) { - CLONAL_ANALYSIS( - ch_for_genotyping, - ch_for_reference, - ch_logo.collect().ifEmpty([]) - ) - ch_versions = ch_versions.mix( CLONAL_ANALYSIS.out.versions) - - ch_for_genotyping = CLONAL_ANALYSIS.out.repertoire + // TODO: Check if we need the cloneby parameter, or here it can be the same as genotypeby. + CLONAL_ASSIGNMENT_COMPUTE( + ch_for_genotyping, + params.genotype_clone_threshold, + ch_reference_fasta.collect(), + [] + ) + + // CLONAL_ANALYSIS( + // ch_for_genotyping, + // ch_for_reference, + // ch_logo.collect().ifEmpty([]) + // ) + // ch_versions = ch_versions.mix( CLONAL_ANALYSIS.out.versions) + + ch_for_genotyping = CLONAL_ASSIGNMENT_COMPUTE.out.tab//CLONAL_ANALYSIS.out.repertoire } - // infer genotype (gets the reference from novel alleles inference in any case) + // infer genotype BAYESIAN_GENOTYPE_INFERENCE ( ch_for_genotyping, - ch_for_reference, - [] + ch_for_reference ) - // reassign genotypes (gets the reference from genotype inference in any case) - + // reassign genotypes REASSIGN_ALLELES_GENOTYPE ( ch_for_genotyping, BAYESIAN_GENOTYPE_INFERENCE.out.reference, - [], - ["auto"] //TODO: update this to pass actual segments. We're running over all segment after genotype inference. + ["auto"] ) - emit: repertoire = REASSIGN_ALLELES_GENOTYPE.out.tab versions = ch_versions From 7de1e4f98951ba3738698a6b7c8abf6da91efd43 Mon Sep 17 00:00:00 2001 From: ayeletperes Date: Fri, 30 Jan 2026 10:35:34 -0500 Subject: [PATCH 28/30] refactor: wrap `genotype_clone_threshold` in a list when calling `CLONAL_ASSIGNMENT_COMPUTE`. --- subworkflows/local/novel_alleles_and_genotyping.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/subworkflows/local/novel_alleles_and_genotyping.nf b/subworkflows/local/novel_alleles_and_genotyping.nf index 07b8e786..86109840 100644 --- a/subworkflows/local/novel_alleles_and_genotyping.nf +++ b/subworkflows/local/novel_alleles_and_genotyping.nf @@ -52,7 +52,7 @@ workflow NOVEL_ALLELES_AND_GENOTYPING { // TODO: Check if we need the cloneby parameter, or here it can be the same as genotypeby. CLONAL_ASSIGNMENT_COMPUTE( ch_for_genotyping, - params.genotype_clone_threshold, + [params.genotype_clone_threshold], ch_reference_fasta.collect(), [] ) From 630ee15cc101e721c422aea558a21febacbe732a Mon Sep 17 00:00:00 2001 From: ayeletperes Date: Fri, 30 Jan 2026 12:48:25 -0500 Subject: [PATCH 29/30] refactor: remove commented-out CLONAL_ANALYSIS block --- subworkflows/local/novel_alleles_and_genotyping.nf | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/subworkflows/local/novel_alleles_and_genotyping.nf b/subworkflows/local/novel_alleles_and_genotyping.nf index 86109840..9d408bef 100644 --- a/subworkflows/local/novel_alleles_and_genotyping.nf +++ b/subworkflows/local/novel_alleles_and_genotyping.nf @@ -56,15 +56,7 @@ workflow NOVEL_ALLELES_AND_GENOTYPING { ch_reference_fasta.collect(), [] ) - - // CLONAL_ANALYSIS( - // ch_for_genotyping, - // ch_for_reference, - // ch_logo.collect().ifEmpty([]) - // ) - // ch_versions = ch_versions.mix( CLONAL_ANALYSIS.out.versions) - - ch_for_genotyping = CLONAL_ASSIGNMENT_COMPUTE.out.tab//CLONAL_ANALYSIS.out.repertoire + ch_for_genotyping = CLONAL_ASSIGNMENT_COMPUTE.out.tab } // infer genotype From 8135df1dd1439b1bcbec2cca97d3753b90111618 Mon Sep 17 00:00:00 2001 From: ayeletperes Date: Thu, 5 Feb 2026 15:38:51 -0500 Subject: [PATCH 30/30] feat: Refactor novel allele and genotyping subworkflow to pass reference FASTA within the main data tuple and introduce an `outputby` parameter for allele reassignment. --- .../enchantr/bayesian_genotype_inference.nf | 5 +- .../local/enchantr/novel_allele_inference.nf | 5 +- modules/local/enchantr/reassign_alleles.nf | 9 +- nextflow.config | 1 + .../local/novel_alleles_and_genotyping.nf | 86 +++++++++++++------ 5 files changed, 69 insertions(+), 37 deletions(-) diff --git a/modules/local/enchantr/bayesian_genotype_inference.nf b/modules/local/enchantr/bayesian_genotype_inference.nf index 2e39a755..eea65b06 100644 --- a/modules/local/enchantr/bayesian_genotype_inference.nf +++ b/modules/local/enchantr/bayesian_genotype_inference.nf @@ -27,11 +27,10 @@ process BAYESIAN_GENOTYPE_INFERENCE { container "docker.io/immcantation/airrflow:genotyping" input: - tuple val(meta), path(tabs) // meta, sequence tsv in AIRR format - path reference_fasta + tuple val(meta), path(tabs), path(reference_fasta) // meta, sequence tsv in AIRR format output: - path "*_report/references/*/db_genotype", emit: reference // reference folder + tuple val(meta), path("*_report/references/*/db_genotype"), emit: reference // reference folder path("*/*_command_log.txt"), emit: logs //process logs path "*_report" path "versions.yml", emit: versions diff --git a/modules/local/enchantr/novel_allele_inference.nf b/modules/local/enchantr/novel_allele_inference.nf index dad6cf2d..926fe5b5 100644 --- a/modules/local/enchantr/novel_allele_inference.nf +++ b/modules/local/enchantr/novel_allele_inference.nf @@ -27,11 +27,10 @@ process NOVEL_ALLELE_INFERENCE { container "docker.io/immcantation/airrflow:genotyping" input: - tuple val(meta), path(tabs) // meta, sequence tsv in AIRR format - path reference_fasta + tuple val(meta), path(tabs), path(reference_fasta) // meta, sequence tsv in AIRR format, reference fasta output: - path "*_report/db_novel", emit: reference // reference folder + tuple val(meta), path("*_report/db_novel"), emit: reference // reference folder path("*/*_command_log.txt"), emit: logs //process logs path "*_report", optional: true, emit: report path "versions.yml", emit: versions diff --git a/modules/local/enchantr/reassign_alleles.nf b/modules/local/enchantr/reassign_alleles.nf index efc94ba1..ec4d07a9 100644 --- a/modules/local/enchantr/reassign_alleles.nf +++ b/modules/local/enchantr/reassign_alleles.nf @@ -27,12 +27,12 @@ process REASSIGN_ALLELES { container "docker.io/immcantation/airrflow:genotyping" input: - tuple val(meta), path(tabs) // meta, sequence tsv in AIRR format - path reference_fasta + tuple val(meta), path(tabs), path(reference_fasta) // meta, sequence tsv in AIRR format, reference fasta val segments // which segments to reassign alleles to + val outputby // which field to use for output //TODO: did we want to handle all segments at once? Then this val channel would not be needed. // *After novel alleles we just need to change the V, it's a time waste to go over all segments. - //TODO: Check if we need the outputby parameter. Right now this is the same as the cloneby parameter. + //TODO: Check if we need the outputby parameter. Right now this is the same as the genotypeby parameter. output: tuple val(meta), path("*/*/*reassign-pass.tsv"), emit: tab // reassigned repertoire path("*/*_command_log.txt"), emit: logs //process logs @@ -44,12 +44,13 @@ process REASSIGN_ALLELES { def args = task.ext.args ? asString(task.ext.args) : '' def segs = segments.join(",") def input = tabs.join(',') + """ Rscript -e "enchantr::enchantr_report('reassign_alleles', \\ report_params=list('input'='${input}', \\ 'imgt_db'='${reference_fasta}', \\ 'species'='auto', \\ - 'outputby'='${params.cloneby}', \\ + 'outputby'='${outputby}', \\ 'segments'='${segs}', \\ 'outdir'=getwd(), \\ 'log'='${meta.id}_reassign_alleles_command_log' ${args}))" diff --git a/nextflow.config b/nextflow.config index f33b36df..4d4520e5 100644 --- a/nextflow.config +++ b/nextflow.config @@ -121,6 +121,7 @@ params { // ----------------------- genotyping = false genotypeby = 'subject_id' + reassignby = 'sample_id' novel_allele_inference = true genotype_clone_threshold = '0.2' single_clone_representative = true diff --git a/subworkflows/local/novel_alleles_and_genotyping.nf b/subworkflows/local/novel_alleles_and_genotyping.nf index 9d408bef..85c7b094 100644 --- a/subworkflows/local/novel_alleles_and_genotyping.nf +++ b/subworkflows/local/novel_alleles_and_genotyping.nf @@ -14,15 +14,23 @@ workflow NOVEL_ALLELES_AND_GENOTYPING { main: ch_versions = Channel.empty() ch_logs = Channel.empty() - + def outputby = params.genotypeby=="sample_id" ? "id" : params.genotypeby //TODO: we need to change this so we can handle the cases of inferring based on naive and reassigning all // merge all repertoires by genotypeby metadata field - ch_repertoire.map{ it -> [ it[0]."${params.genotypeby}", - it[0].id, - it[0].subject_id, - it[0].species, - it[0].single_cell, - it[0].locus, - it[1] ] } + ch_repertoire + .combine(ch_reference_fasta) + .map{ it -> + def meta = it[0] + def rep = it[1] + def ref = it[2] + def genotypeby = params.genotypeby=="sample_id" ? "id" : params.genotypeby + [ meta."${genotypeby}", + meta.id, + meta.subject_id, + meta.species, + meta.single_cell, + meta.locus, + rep, + ref ] } .groupTuple() .map{ get_meta_tabs(it) } .set{ ch_grouped_repertoires } @@ -30,46 +38,71 @@ workflow NOVEL_ALLELES_AND_GENOTYPING { // infer novel alleles if (params.novel_allele_inference) { NOVEL_ALLELE_INFERENCE ( - ch_grouped_repertoires, - ch_reference_fasta + ch_grouped_repertoires ) // reassign novel alleles (we can skip this step if no novel alleles were inferred) - + ch_grouped_repertoires + .join(NOVEL_ALLELE_INFERENCE.out.reference) + .map { it -> + def meta = it[0] + def reps = it[1] + def new_ref = it[3] + [ meta, reps, new_ref ] + } + .set{ ch_for_genotyping } + REASSIGN_ALLELES_NOVEL ( - ch_grouped_repertoires, - NOVEL_ALLELE_INFERENCE.out.reference, - ["v"] + ch_for_genotyping, + ["v"], + outputby ) - ch_for_genotyping = REASSIGN_ALLELES_NOVEL.out.tab - ch_for_reference = NOVEL_ALLELE_INFERENCE.out.reference + + REASSIGN_ALLELES_NOVEL.out.tab + .join(NOVEL_ALLELE_INFERENCE.out.reference) + .set{ ch_for_genotyping } + + } else { ch_for_genotyping = ch_grouped_repertoires - ch_for_reference = ch_reference_fasta } if (params.single_clone_representative) { // TODO: Check if we need the cloneby parameter, or here it can be the same as genotypeby. + // create separate channels for repertoire and reference based on the genotypeby metadata field + ch_for_genotyping + .map{ it -> [it[0], it[1]] } + .set{ ch_for_genotyping_rep } + ch_for_genotyping + .map{ it -> it[2] } + .set{ ch_for_genotyping_ref } CLONAL_ASSIGNMENT_COMPUTE( - ch_for_genotyping, + ch_for_genotyping_rep, [params.genotype_clone_threshold], - ch_reference_fasta.collect(), + ch_for_genotyping_ref, [] ) - ch_for_genotyping = CLONAL_ASSIGNMENT_COMPUTE.out.tab + CLONAL_ASSIGNMENT_COMPUTE.out.tab + .join(ch_for_genotyping + .map{ it -> [it[0], it[2]] }) + .set{ ch_for_genotyping } } // infer genotype BAYESIAN_GENOTYPE_INFERENCE ( - ch_for_genotyping, - ch_for_reference + ch_for_genotyping ) + + ch_grouped_repertoires + .map{ it -> [it[0], it[1]] } + .join(BAYESIAN_GENOTYPE_INFERENCE.out.reference) + .set{ ch_for_reassign } // reassign genotypes REASSIGN_ALLELES_GENOTYPE ( - ch_for_genotyping, - BAYESIAN_GENOTYPE_INFERENCE.out.reference, - ["auto"] + ch_for_reassign, + ["auto"], + outputby ) emit: @@ -90,7 +123,6 @@ def get_meta_tabs(arr) { def array = [] - array = [ meta, arr[6].flatten() ] - + array = [ meta, arr[6].flatten(), arr[7][0] ] return array }