Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
339 changes: 324 additions & 15 deletions conf/modules.config

Large diffs are not rendered by default.

2 changes: 2 additions & 0 deletions conf/test.config
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,8 @@ params {
centrifuge_save_reads = true
metaphlan_save_samfiles = true
run_profile_standardisation = true
generate_downstream_samplesheets = false
generate_pipeline_samplesheets = null
}

process {
Expand Down
3 changes: 3 additions & 0 deletions conf/test_full.config
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,9 @@ params {

run_profile_standardisation = true
run_krona = true

generate_downstream_samplesheets = false
generate_pipeline_samplesheets = null
}

cleanup = true
Expand Down
100 changes: 100 additions & 0 deletions conf/test_metavalsamplesheet.config
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
/*
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Nextflow config file for running minimal tests
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Defines input files and everything required to run a fast and simple pipeline test.

Use as follows:
nextflow run nf-core/taxprofiler -profile test,<docker/singularity> --outdir <OUTDIR>

----------------------------------------------------------------------------------------
*/

process {
resourceLimits = [
cpus: 4,
memory: '15.GB',
time: '6.h',
]
}

params {
config_profile_name = 'Test profile'
config_profile_description = 'Minimal test dataset to check pipeline function'

// Input data
input = params.pipelines_testdata_base_path + 'taxprofiler/samplesheet_metaval.csv'
databases = params.pipelines_testdata_base_path + 'taxprofiler/database_metaval.csv'
perform_shortread_qc = true
perform_shortread_redundancyestimation = true
perform_longread_qc = true
shortread_qc_mergepairs = false
perform_shortread_complexityfilter = true
save_complexityfiltered_reads = true
perform_shortread_hostremoval = true
perform_longread_hostremoval = true
perform_runmerging = true
hostremoval_reference = params.pipelines_testdata_base_path + 'modules/data/genomics/homo_sapiens/genome/genome.fasta'
run_kaiju = false
run_kraken2 = true
run_bracken = false
run_malt = false
run_metaphlan = false
run_centrifuge = true
run_diamond = true
run_krakenuniq = false
run_motus = false
run_ganon = false
run_krona = false
run_kmcp = false
krona_taxonomy_directory = params.pipelines_testdata_base_path + 'modules/data/genomics/sarscov2/metagenome/krona_taxonomy.tab'
malt_save_reads = false
kraken2_save_reads = false
kraken2_save_readclassifications = true
centrifuge_save_reads = false
metaphlan_save_samfiles = false
run_profile_standardisation = true
generate_downstream_samplesheets = true
generate_pipeline_samplesheets = 'metaval'

}

process {
withName: MALT_RUN {
tag = { "${meta.db_name}|${meta.id}" }
maxForks = 1
ext.args = { "${meta.db_params} -m ${params.malt_mode} -J-Xmx12G" }
ext.prefix = { "${meta.db_name}" }
publishDir = [
path: { "${params.outdir}/malt/${meta.db_name}/" },
mode: params.publish_dir_mode,
pattern: '*.{rma6,log,sam}',
]
}
withName: MEGAN_RMA2INFO_TSV {
tag = { "${meta.db_name}|${meta.id}" }
maxForks = 1
ext.args = "-c2c Taxonomy"
ext.prefix = { "${meta.id}" }
publishDir = [
path: { "${params.outdir}/malt/${meta.db_name}/" },
mode: params.publish_dir_mode,
pattern: '*.{txt.gz,megan}',
]
}
withName: MEGAN_RMA2INFO_KRONA {
tag = { "${meta.db_name}|${meta.id}" }
maxForks = 1
ext.args = { "--read2class Taxonomy" }
ext.prefix = { "${meta.id}_${meta.db_name}" }
}
withName: NONPAREIL_NONPAREIL {
ext.args = { "-k 5" }
ext.prefix = { "${meta.id}_${meta.run_accession}" }
publishDir = [
path: { "${params.outdir}/nonpareil/" },
mode: params.publish_dir_mode,
pattern: '*.np{a,c,l,o}',
]
}
}
26 changes: 26 additions & 0 deletions docs/output.md
Original file line number Diff line number Diff line change
Expand Up @@ -716,6 +716,32 @@ The 'General Stats' table by default will only show statistics referring to pre-
For example, DIAMOND output does not have a dedicated section in the MultiQC HTML, only in the general stats table. To turn this on, copy the nf-core/taxprofiler [MultiQC config](https://github.com/nf-core/taxprofiler/blob/master/assets/multiqc_config.yml) and change the DIAMOND entry in `table_columns_visible:` to True.
:::

### Downstream samplesheets

The pipeline can also generate input files for the following downstream
pipelines:

- [genomic-medicine-sweden/metaval](https://github.com/genomic-medicine-sweden/metaval)

<details markdown="1">
<summary>Output files</summary>

- `downstream_samplesheets/`
- `metaval.csv`: Filled out `genomic-medicine-sweden/metaval` `--input` csv with paths to reads based on the taxprofiler `--outdir` directory, classification tables and taxpasta files.

</details>

The `genomic-medicine-sweden/metaval` workflow only verifies the classification results produced by the three classifiers: `Kraken2`, `Centrifuge`, and `DIAMOND`.

Each classifier must only be executed with a single database and the raw read files must be provided as `*.fastq.gz` files.

If multiple sequencing runs exist for the same sample, `nf-core/taxprofiler` performs read merging after host removal (and before profiling) when `params.perform_runmerging` is enabled. The merged reads are stored in the `run_merging/` folder if `params.save_runmerged_reads` is set, or in `analysis_ready_fastqs/` if `params.save_analysis_ready_fastqs` is set. However `genomic-medicine-sweden/metaval` takes filtered reads without host removal. In this case, reads from multiple runs belonging to the same sample are merged and stored in the `filtered_reads_merged/` directory.

:::warning
Any generated downstream samplesheet is provided as 'best effort' and are not guaranteed to work straight out of the box!
They may not be complete (e.g. some columns may need to be manually filled in).
:::

### Pipeline information

<details markdown="1">
Expand Down
21 changes: 21 additions & 0 deletions docs/usage.md
Original file line number Diff line number Diff line change
Expand Up @@ -354,6 +354,14 @@ Activating this functionality will concatenate the FASTQ files with the same sam

You can optionally save the FASTQ output of the run merging with the `--save_runmerged_reads`.

To avoid name conflicts during profiling and to maintain consistent naming, the following suffixes are added to profile and/or classification outputs (for both FASTQ and FASTA inputs):

- `_srse`: short read FASTQ, single-end
- `_srpe`: short read FASTQ, paired-end
- `_srfa`: short read FASTA
- `_lrfq`: long read FASTQ
- `_lrfa`: long read FASTA

#### Classification and Profiling

The following sections provide tips and suggestions for running the different taxonomic classification and profiling tools _within the pipeline_. For advice and/or guidance whether you should run a particular tool on your specific data, please see the documentation of each tool!
Expand Down Expand Up @@ -486,6 +494,19 @@ The following tools will produce multi-sample taxon tables:

Note that the multi-sample tables from the 'native' tools in each folders are [not inter-operable](https://taxpasta.readthedocs.io/en/latest/tutorials/getting-started/) with each other as they can have different formats and can contain additional and different data. In this case we refer you to use the standardised and merged output from Taxpasta, as described above.

### Downstream samplesheets

To generate the samplesheet for `genomic-medicine-sweden/metaval`, enable the following parameters:

- `--generate_downstream_samplesheets`
- `--generate_pipeline_samplesheets 'metaval'`
- `--kraken2_save_readclassifications`
- `--run_profile_standardisation`
- `--save_complexityfiltered_reads`
- `--perform_shortread_qc`
- `--perform_shortread_complexityfilter`
- `--perform_longread_qc`

### Updating the pipeline

When you run the above command, Nextflow automatically pulls the pipeline code from GitHub and stores it as a cached version. When running the pipeline after this, it will always use the cached version if available - even if the pipeline has been updated since. To make sure that you're running the latest version of the pipeline, make sure that you regularly update the cached version of the pipeline:
Expand Down
7 changes: 7 additions & 0 deletions nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -194,6 +194,10 @@ params {
taxpasta_add_ranklineage = false
taxpasta_ignore_errors = false
standardisation_motus_generatebiom = false

// Generate downstream samplesheets
generate_downstream_samplesheets = false
generate_pipeline_samplesheets = null
}

// Load base.config by default for all pipelines
Expand Down Expand Up @@ -348,6 +352,9 @@ profiles {
test_prinseqplusplus {
includeConfig 'conf/test_prinseqplusplus.config'
}
test_metavalsamplesheet {
includeConfig 'conf/test_metavalsamplesheet.config'
}
}
// Load nf-core custom profiles from different institutions

Expand Down
22 changes: 21 additions & 1 deletion nextflow_schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -392,7 +392,7 @@
"type": "boolean",
"fa_icon": "fas fa-toggle-on",
"description": "Turn on run merging",
"help_text": "Turns on the concatenation of sequencing runs or libraries with the same sample name.\n\nThis can be useful to ensure you get a single profile per sample, rather than one profile per run or library. Note that in some cases comparing profiles of independent _libraries_ may be useful, so this parameter may not always be suitable. "
"help_text": "Turns on the concatenation of sequencing runs or libraries with the same sample name.\n\nThis can be useful to ensure you get a single profile per sample, rather than one profile per run or library. Note that in some cases comparing profiles of independent _libraries_ may be useful, so this parameter may not always be suitable. \n\nNote: only FASTQ files are concatenated, not FASTA."
},
"save_runmerged_reads": {
"type": "boolean",
Expand Down Expand Up @@ -724,6 +724,23 @@
},
"fa_icon": "fas fa-chart-line"
},
"generate_samplesheet_options": {
"title": "Downstream pipeline samplesheet generation options",
"type": "object",
"fa_icon": "fas fa-university",
"description": "Options for generating input samplesheets for complementary downstream pipelines.",
"properties": {
"generate_downstream_samplesheets": {
"type": "boolean",
"description": "Turn on generation of samplesheets for downstream pipelines."
},
"generate_pipeline_samplesheets": {
"type": "string",
"description": "Specify a comma separated string in quotes to specify which pipeline to generate a samplesheet for.",
"pattern": "^metaval$"
}
}
},
"institutional_config_options": {
"title": "Institutional config options",
"type": "object",
Expand Down Expand Up @@ -911,6 +928,9 @@
{
"$ref": "#/$defs/postprocessing_and_visualisation_options"
},
{
"$ref": "#/$defs/generate_samplesheet_options"
},
{
"$ref": "#/$defs/institutional_config_options"
},
Expand Down
Loading