Skip to content

Commit 24813cc

Browse files
committed
Update changelog and docs, add new param to enable run cat on unbinned data
1 parent 2e45cf0 commit 24813cc

8 files changed

Lines changed: 72 additions & 22 deletions

File tree

.vscode/settings.json

Lines changed: 22 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,24 @@
11
{
2-
"markdown.styles": ["public/vscode_markdown.css"]
2+
"markdown.styles": [
3+
"public/vscode_markdown.css"
4+
],
5+
"workbench.colorCustomizations": {
6+
"activityBar.activeBackground": "#2f7b49",
7+
"activityBar.background": "#2f7b49",
8+
"activityBar.foreground": "#e7e7e7",
9+
"activityBar.inactiveForeground": "#e7e7e799",
10+
"activityBarBadge.background": "#442c73",
11+
"activityBarBadge.foreground": "#e7e7e7",
12+
"commandCenter.border": "#e7e7e799",
13+
"sash.hoverBorder": "#2f7b49",
14+
"statusBar.background": "#215633",
15+
"statusBar.foreground": "#e7e7e7",
16+
"statusBarItem.hoverBackground": "#2f7b49",
17+
"statusBarItem.remoteBackground": "#215633",
18+
"statusBarItem.remoteForeground": "#e7e7e7",
19+
"titleBar.activeBackground": "#215633",
20+
"titleBar.activeForeground": "#e7e7e7",
21+
"titleBar.inactiveBackground": "#21563399",
22+
"titleBar.inactiveForeground": "#e7e7e799"
23+
}
324
}

CHANGELOG.md

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,21 @@
33
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/)
44
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
55

6+
## dev - unreleased
7+
8+
### `Added`
9+
10+
- [#799](https://github.com/nf-core/mag/pull/799) - Add `--cat_classify_unbinned`, to enable taxonomic classification of unbinned contigs using CAT (by @dialvarezs)
11+
12+
### `Changed`
13+
- [#799](https://github.comf/nf-core/mag/pull/799) - Migrate from local CAT modules to nf-core ones, updating version (by @dialvarezs)
14+
15+
### `Dependencies`
16+
17+
| Tool | Previous version | New version |
18+
| ------- | ---------------- | ----------- |
19+
| CAT | 5.2.3 | 6.0.1 |
20+
621
## v4.0.0 - [2025-05-22]
722

823
### `Added`

conf/base.config

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -114,7 +114,7 @@ process {
114114
}
115115
withName: CATPACK_BINS {
116116
cpus = { 10 * task.attempt }
117-
memory = { 128.GB * task.attempt }
117+
memory = { 120.GB * task.attempt }
118118
time = { 12.h * task.attempt }
119119
}
120120
withName: CATPACK_CONTIGS {
@@ -124,7 +124,7 @@ process {
124124
}
125125
withName: GTDBTK_CLASSIFYWF {
126126
cpus = { 10 * task.attempt }
127-
memory = { 120.GB * task.attempt }
127+
memory = { 128.GB * task.attempt }
128128
time = { 12.h * task.attempt }
129129
}
130130
//MEGAHIT returns exit code 250 when running out of memory

conf/modules.config

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -513,6 +513,7 @@ process {
513513
path: { "${params.outdir}/Taxonomy/CAT/db" },
514514
mode: params.publish_dir_mode,
515515
saveAs: { filename -> filename.equals('versions.yml') ? null : filename },
516+
enabled: params.save_cat_db,
516517
]
517518
}
518519

docs/usage.md

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -127,21 +127,26 @@ The lineage directory (e.g., `bacteria_odb12`) should contain files such as `dat
127127
Then, you must provide `--busco_db <your_db>/` and `--busco_db_lineage <downloaded_lineage>` to the pipeline.
128128
You can also pass to `--busco_db` a URL pointing to a lineage tarball, or the tarball itself if stored locally.
129129

130-
> [! WARNING]
130+
> [!WARNING]
131131
> When any kind of parameter is provided to `--busco_db`, BUSCO will run in offline mode.
132132
> If the lineage specified via `--busco_db_lineage` is not found locally, or if you attempt automatic lineage selection without having a complete lineage dataset pre-downloaded, BUSCO will fail.
133133
134134
### CAT
135135

136-
> [! WARNING]
137-
> This database is very large at ~180 GB!
136+
> [!WARNING]
137+
> This database (CAT_nr) is very large at ~200 GB!
138138
> This can take a long time, so we strongly recommend downloading and unzipping prior the pipeline run.
139139
140-
This database can be downloaded from the [CAT developers' website](https://tbb.bio.uu.nl/bastiaan/CAT_prepare/), which is based in the Netherlands (and could be slow for other regions of the world).
140+
This database can be downloaded from the [CAT developers' website](https://tbb.bio.uu.nl/tina/CAT_pack_prepare/), which is based in the Netherlands (and could be slow for other regions of the world).
141+
142+
> [!NOTE]
143+
Only the `CAT_nr` database is supported by the pipeline.
144+
145+
Enabling the `--cat_db_generate` option will create a new database using the latest version of the NCBI nr database. This requires a large download (over 200 GB) and several hours of subsequent processing. If you enable the `--save_cat_db` option, the database will be saved in the `Taxonomy/CAT/db` directory and can be reused in future runs by specifying its path with `--cat_db`.
141146

142147
### GTDB
143148

144-
> [! WARNING]
149+
> [!WARNING]
145150
> This database is very large at ~110 GB!
146151
> This can take a long time, so we strongly recommend downloading and unzipping prior the pipeline run.
147152

nextflow.config

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,7 @@ params {
9393
cat_db = null
9494
cat_db_generate = false
9595
cat_official_taxonomy = false
96+
cat_classify_unbinned = false
9697
save_cat_db = false
9798
skip_gtdbtk = false
9899
gtdb_db = "https://data.gtdb.ecogenomic.org/releases/release220/220.0/auxillary_files/gtdbtk_package/full_package/gtdbtk_r220_data.tar.gz"

nextflow_schema.json

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -520,6 +520,11 @@
520520
"type": "boolean",
521521
"description": "Only return official taxonomic ranks (Kingdom, Phylum, etc.) when running CAT."
522522
},
523+
"cat_classify_unbinned": {
524+
"type": "boolean",
525+
"description": "Classify unbinned contigs with CAT (contig mode).",
526+
"help_text": "This will run CAT classification on all unbinned contigs, which can be useful to get an idea of the taxonomic composition of the unbinned contigs."
527+
},
523528
"skip_gtdbtk": {
524529
"type": "boolean",
525530
"description": "Skip the running of GTDB, as well as the automatic download of the database"

subworkflows/local/catpack.nf

Lines changed: 16 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -82,24 +82,26 @@ workflow CATPACK {
8282
=========================================
8383
*/
8484

85-
CATPACK_CONTIGS(ch_unbins, ch_cat_db.db, ch_cat_db.taxonomy, [[:], []], [[:], []])
85+
if (params.cat_classify_unbinned) {
86+
CATPACK_CONTIGS(ch_unbins, ch_cat_db.db, ch_cat_db.taxonomy, [[:], []], [[:], []])
8687

87-
CATPACK_ADDNAMES_UNBINS(CATPACK_CONTIGS.out.contig2classification, ch_cat_db.taxonomy)
88+
CATPACK_ADDNAMES_UNBINS(CATPACK_CONTIGS.out.contig2classification, ch_cat_db.taxonomy)
8889

89-
ch_unbin_classification = CATPACK_ADDNAMES_UNBINS.out.txt
90-
.join(ch_unbins)
91-
.multiMap { meta, names, contigs ->
92-
names: [meta, names]
93-
contigs: [meta, contigs]
94-
}
90+
ch_unbin_classification = CATPACK_ADDNAMES_UNBINS.out.txt
91+
.join(ch_unbins)
92+
.multiMap { meta, names, contigs ->
93+
names: [meta, names]
94+
contigs: [meta, contigs]
95+
}
9596

96-
CATPACK_SUMMARISE(ch_unbin_classification.names, ch_unbin_classification.contigs)
97+
CATPACK_SUMMARISE(ch_unbin_classification.names, ch_unbin_classification.contigs)
9798

98-
ch_versions = ch_versions.mix(
99-
CATPACK_CONTIGS.out.versions.first(),
100-
CATPACK_ADDNAMES_UNBINS.out.versions.first(),
101-
CATPACK_SUMMARISE.out.versions.first(),
102-
)
99+
ch_versions = ch_versions.mix(
100+
CATPACK_CONTIGS.out.versions.first(),
101+
CATPACK_ADDNAMES_UNBINS.out.versions.first(),
102+
CATPACK_SUMMARISE.out.versions.first(),
103+
)
104+
}
103105

104106
emit:
105107
summary = CATPACK_ADDNAMES_BINS.out.txt.map { _meta, summary -> summary }

0 commit comments

Comments
 (0)