nf-core · LilyAnderssonLee · Nov 12, 2025 · Nov 12, 2025 · Nov 13, 2025 · Nov 13, 2025
@@ -52,6 +52,8 @@ params {
     centrifuge_save_reads                  = true
     metaphlan_save_samfiles                = true
     run_profile_standardisation            = true
+    generate_downstream_samplesheets       = false
+    generate_pipeline_samplesheets         = null
 }
 
 process {

@@ -75,6 +75,9 @@ params {
 
     run_profile_standardisation            = true
     run_krona                              = true
+
+    generate_downstream_samplesheets       = false
+    generate_pipeline_samplesheets         = null
 }
 
 cleanup = true

@@ -0,0 +1,100 @@
+/*
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+    Nextflow config file for running minimal tests
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+    Defines input files and everything required to run a fast and simple pipeline test.
+
+    Use as follows:
+        nextflow run nf-core/taxprofiler -profile test,<docker/singularity> --outdir <OUTDIR>
+
+----------------------------------------------------------------------------------------
+*/
+
+process {
+    resourceLimits = [
+        cpus: 4,
+        memory: '15.GB',
+        time: '6.h',
+    ]
+}
+
+params {
+    config_profile_name                    = 'Test profile'
+    config_profile_description             = 'Minimal test dataset to check pipeline function'
+
+    // Input data
+    input                                  = params.pipelines_testdata_base_path + 'taxprofiler/samplesheet_metaval.csv'
+    databases                              = params.pipelines_testdata_base_path + 'taxprofiler/database_metaval.csv'
+    perform_shortread_qc                   = true
+    perform_shortread_redundancyestimation = true
+    perform_longread_qc                    = true
+    shortread_qc_mergepairs                = false
+    perform_shortread_complexityfilter     = true
+    save_complexityfiltered_reads          = true
+    perform_shortread_hostremoval          = true
+    perform_longread_hostremoval           = true
+    perform_runmerging                     = true
+    hostremoval_reference                  = params.pipelines_testdata_base_path + 'modules/data/genomics/homo_sapiens/genome/genome.fasta'
+    run_kaiju                              = false
+    run_kraken2                            = true
+    run_bracken                            = false
+    run_malt                               = false
+    run_metaphlan                          = false
+    run_centrifuge                         = true
+    run_diamond                            = true
+    run_krakenuniq                         = false
+    run_motus                              = false
+    run_ganon                              = false
+    run_krona                              = false
+    run_kmcp                               = false
+    krona_taxonomy_directory               = params.pipelines_testdata_base_path + 'modules/data/genomics/sarscov2/metagenome/krona_taxonomy.tab'
+    malt_save_reads                        = false
+    kraken2_save_reads                     = false
+    kraken2_save_readclassifications       = true
+    centrifuge_save_reads                  = false
+    metaphlan_save_samfiles                = false
+    run_profile_standardisation            = true
+    generate_downstream_samplesheets       = true
+    generate_pipeline_samplesheets         = 'metaval'
+
+}
+
+process {
+    withName: MALT_RUN {
+        tag        = { "${meta.db_name}|${meta.id}" }
+        maxForks   = 1
+        ext.args   = { "${meta.db_params} -m ${params.malt_mode} -J-Xmx12G" }
+        ext.prefix = { "${meta.db_name}" }
+        publishDir = [
+            path: { "${params.outdir}/malt/${meta.db_name}/" },
+            mode: params.publish_dir_mode,
+            pattern: '*.{rma6,log,sam}',
+        ]
+    }
+    withName: MEGAN_RMA2INFO_TSV {
+        tag        = { "${meta.db_name}|${meta.id}" }
+        maxForks   = 1
+        ext.args   = "-c2c Taxonomy"
+        ext.prefix = { "${meta.id}" }
+        publishDir = [
+            path: { "${params.outdir}/malt/${meta.db_name}/" },
+            mode: params.publish_dir_mode,
+            pattern: '*.{txt.gz,megan}',
+        ]
+    }
+    withName: MEGAN_RMA2INFO_KRONA {
+        tag        = { "${meta.db_name}|${meta.id}" }
+        maxForks   = 1
+        ext.args   = { "--read2class Taxonomy" }
+        ext.prefix = { "${meta.id}_${meta.db_name}" }
+    }
+    withName: NONPAREIL_NONPAREIL {
+        ext.args   = { "-k 5" }
+        ext.prefix = { "${meta.id}_${meta.run_accession}" }
+        publishDir = [
+            path: { "${params.outdir}/nonpareil/" },
+            mode: params.publish_dir_mode,
+            pattern: '*.np{a,c,l,o}',
+        ]
+    }
+}
@@ -716,6 +716,32 @@ The 'General Stats' table by default will only show statistics referring to pre-
 For example, DIAMOND output does not have a dedicated section in the MultiQC HTML, only in the general stats table. To turn this on, copy the nf-core/taxprofiler [MultiQC config](https://github.com/nf-core/taxprofiler/blob/master/assets/multiqc_config.yml) and change the DIAMOND entry in `table_columns_visible:` to True.
 :::
 
+### Downstream samplesheets
+
+The pipeline can also generate input files for the following downstream
+pipelines:
+
+- [genomic-medicine-sweden/metaval](https://github.com/genomic-medicine-sweden/metaval)
+
+<details markdown="1">
+<summary>Output files</summary>
+
+- `downstream_samplesheets/`
+  - `metaval.csv`: Filled out `genomic-medicine-sweden/metaval` `--input` csv with paths to reads based on the taxprofiler `--outdir` directory, classification tables and taxpasta files.
+
+</details>
+
+The `genomic-medicine-sweden/metaval` workflow only verifies the classification results produced by the three classifiers: `Kraken2`, `Centrifuge`, and `DIAMOND`.
+
+Each classifier must only be executed with a single database and the raw read files must be provided as `*.fastq.gz` files.
+
+If multiple sequencing runs exist for the same sample, `nf-core/taxprofiler` performs read merging after host removal (and before profiling) when `params.perform_runmerging` is enabled. The merged reads are stored in the `run_merging/` folder if `params.save_runmerged_reads` is set, or in `analysis_ready_fastqs/` if `params.save_analysis_ready_fastqs` is set. However `genomic-medicine-sweden/metaval` takes filtered reads without host removal. In this case, reads from multiple runs belonging to the same sample are merged and stored in the `filtered_reads_merged/` directory.
+
+:::warning
+Any generated downstream samplesheet is provided as 'best effort' and are not guaranteed to work straight out of the box!
+They may not be complete (e.g. some columns may need to be manually filled in).
+:::
+
 ### Pipeline information
 
 <details markdown="1">

@@ -354,6 +354,14 @@ Activating this functionality will concatenate the FASTQ files with the same sam
 
 You can optionally save the FASTQ output of the run merging with the `--save_runmerged_reads`.
 
+To avoid name conflicts during profiling and to maintain consistent naming, the following suffixes are added to profile and/or classification outputs (for both FASTQ and FASTA inputs):
+
+- `_srse`: short read FASTQ, single-end
+- `_srpe`: short read FASTQ, paired-end
+- `_srfa`: short read FASTA
+- `_lrfq`: long read FASTQ
+- `_lrfa`: long read FASTA
+
 #### Classification and Profiling
 
 The following sections provide tips and suggestions for running the different taxonomic classification and profiling tools _within the pipeline_. For advice and/or guidance whether you should run a particular tool on your specific data, please see the documentation of each tool!
@@ -486,6 +494,19 @@ The following tools will produce multi-sample taxon tables:
 
 Note that the multi-sample tables from the 'native' tools in each folders are [not inter-operable](https://taxpasta.readthedocs.io/en/latest/tutorials/getting-started/) with each other as they can have different formats and can contain additional and different data. In this case we refer you to use the standardised and merged output from Taxpasta, as described above.
 
+### Downstream samplesheets
+
+To generate the samplesheet for `genomic-medicine-sweden/metaval`, enable the following parameters:
+
+- `--generate_downstream_samplesheets`
+- `--generate_pipeline_samplesheets 'metaval'`
+- `--kraken2_save_readclassifications`
+- `--run_profile_standardisation`
+- `--save_complexityfiltered_reads`
+- `--perform_shortread_qc`
+- `--perform_shortread_complexityfilter`
+- `--perform_longread_qc`
+
 ### Updating the pipeline
 
 When you run the above command, Nextflow automatically pulls the pipeline code from GitHub and stores it as a cached version. When running the pipeline after this, it will always use the cached version if available - even if the pipeline has been updated since. To make sure that you're running the latest version of the pipeline, make sure that you regularly update the cached version of the pipeline:

@@ -194,6 +194,10 @@ params {
     taxpasta_add_ranklineage                             = false
     taxpasta_ignore_errors                               = false
     standardisation_motus_generatebiom                   = false
+
+    // Generate downstream samplesheets
+    generate_downstream_samplesheets                     = false
+    generate_pipeline_samplesheets                       = null
 }
 
 // Load base.config by default for all pipelines
@@ -348,6 +352,9 @@ profiles {
     test_prinseqplusplus {
         includeConfig 'conf/test_prinseqplusplus.config'
     }
+    test_metavalsamplesheet {
+        includeConfig 'conf/test_metavalsamplesheet.config'
+    }
 }
 // Load nf-core custom profiles from different institutions
 

@@ -392,7 +392,7 @@
                     "type": "boolean",
                     "fa_icon": "fas fa-toggle-on",
                     "description": "Turn on run merging",
-                    "help_text": "Turns on the concatenation of sequencing runs or libraries with the same sample name.\n\nThis can be useful to ensure you get a single profile per sample, rather than one profile per run or library. Note that in some cases comparing profiles of independent _libraries_ may be useful, so this parameter may not always be suitable.  "
+                    "help_text": "Turns on the concatenation of sequencing runs or libraries with the same sample name.\n\nThis can be useful to ensure you get a single profile per sample, rather than one profile per run or library. Note that in some cases comparing profiles of independent _libraries_ may be useful, so this parameter may not always be suitable. \n\nNote: only FASTQ files are concatenated, not FASTA."
                 },
                 "save_runmerged_reads": {
                     "type": "boolean",
@@ -724,6 +724,23 @@
             },
             "fa_icon": "fas fa-chart-line"
         },
+        "generate_samplesheet_options": {
+            "title": "Downstream pipeline samplesheet generation options",
+            "type": "object",
+            "fa_icon": "fas fa-university",
+            "description": "Options for generating input samplesheets for complementary downstream pipelines.",
+            "properties": {
+                "generate_downstream_samplesheets": {
+                    "type": "boolean",
+                    "description": "Turn on generation of samplesheets for downstream pipelines."
+                },
+                "generate_pipeline_samplesheets": {
+                    "type": "string",
+                    "description": "Specify a comma separated string in quotes to specify which pipeline to generate a samplesheet for.",
+                    "pattern": "^metaval$"
+                }
+            }
+        },
         "institutional_config_options": {
             "title": "Institutional config options",
             "type": "object",
@@ -911,6 +928,9 @@
         {
             "$ref": "#/$defs/postprocessing_and_visualisation_options"
         },
+        {
+            "$ref": "#/$defs/generate_samplesheet_options"
+        },
         {
             "$ref": "#/$defs/institutional_config_options"
         },