Skip to content

Commit a83b70e

Browse files
authored
Merge pull request #108 from bigbio/feat/strip-unknown-mods
feat(diann): add --strip_unknown_mods to recover hydroxyproline / non-standard PTMs
2 parents c9f974d + 470282d commit a83b70e

9 files changed

Lines changed: 40 additions & 19 deletions

File tree

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
77

88
### `Added`
99

10+
- New `--strip_unknown_mods` parameter (default `false`) that adds `--strip-unknown-mods` to the DIA-NN steps. Some declared variable modifications are not built into the DIA-NN deep-learning predictor — for example **Oxidation on proline**. During in-silico library generation DIA-NN **silently skips** those precursors (logged as `skipping N precursors, unrecognised modifications`), so they never enter the library and are never identified. Enabling `--strip_unknown_mods` forces DIA-NN to predict spectra/RTs/IMs for them so they are retained and searchable; it is a no-op when every declared modification is already recognised. Documented under [Where each GUI flag goes](docs/usage.md) and [Common pitfalls](docs/usage.md).
1011
- DIA-NN **2.5.1** (academic) version profile `-profile diann_v2_5_1` (container `ghcr.io/bigbio/diann:2.5.1`).
1112
- **DIA-NN Enterprise (2.5.1) support** via `-profile diann_v2_5_1_enterprise` (container `ghcr.io/bigbio/diann-enterprise:2.5.1`). New `--enable_kb` flag adds the Enterprise Knowledge Base (`--kb`) to the first-pass search to boost identifications (mainly human data); it is gated to the Enterprise build and **on by default** under the `diann_v2_5_1_enterprise` profile (disable with `--enable_kb false`). New `--diann_license <file>` stages the Enterprise license key into each DIA-NN step as `--license`, with fallback to a key bundled next to the binary when unset. The license key is a per-user secret and is never committed or bundled into the shared image.
1213

docs/usage.md

Lines changed: 21 additions & 19 deletions
Large diffs are not rendered by default.

modules/local/diann/assemble_empirical_library/main.nf

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@ process ASSEMBLE_EMPIRICAL_LIBRARY {
5151
scoring_mode = params.scoring_mode == 'proteoforms' ? '--proteoforms' :
5252
params.scoring_mode == 'peptidoforms' ? '--peptidoforms' : ''
5353
aa_eq = params.aa_eq ? '--aa-eq' : ''
54+
strip_unknown_mods = params.strip_unknown_mods ? "--strip-unknown-mods" : ""
5455
diann_tims_sum = params.tims_sum ? "--quant-tims-sum" : ""
5556
diann_im_window = params.im_window ? "--im-window $params.im_window" : ""
5657
diann_dda_flag = meta.acquisition_method == 'dda' ? "--dda" : ""
@@ -82,6 +83,7 @@ process ASSEMBLE_EMPIRICAL_LIBRARY {
8283
--gen-spec-lib \\
8384
${scoring_mode} \\
8485
${aa_eq} \\
86+
${strip_unknown_mods} \\
8587
${license_arg} \\
8688
${kb} \\
8789
${diann_tims_sum} \\

modules/local/diann/final_quantification/main.nf

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,7 @@ process FINAL_QUANTIFICATION {
6767
scoring_mode = params.scoring_mode == 'proteoforms' ? '--proteoforms' :
6868
params.scoring_mode == 'peptidoforms' ? '--peptidoforms' : ''
6969
aa_eq = params.aa_eq ? '--aa-eq' : ''
70+
strip_unknown_mods = params.strip_unknown_mods ? "--strip-unknown-mods" : ""
7071
// Precursor q-value: explicit param wins, else auto by diann_version (<2.5 -> 0.01, >=2.5 -> 0.05)
7172
precursor_qvalue = VersionUtils.resolvePrecursorQvalue(params)
7273
// DIA-NN Enterprise license; falls back to a key next to the binary when no path is provided
@@ -109,6 +110,7 @@ process FINAL_QUANTIFICATION {
109110
${quantums_params} \\
110111
${scoring_mode} \\
111112
${aa_eq} \\
113+
${strip_unknown_mods} \\
112114
${diann_use_quant} \\
113115
${diann_dda_flag} \\
114116
${diann_export_quant} \\

modules/local/diann/individual_analysis/main.nf

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,7 @@ process INDIVIDUAL_ANALYSIS {
7272
scoring_mode = params.scoring_mode == 'proteoforms' ? '--proteoforms' :
7373
params.scoring_mode == 'peptidoforms' ? '--peptidoforms' : ''
7474
aa_eq = params.aa_eq ? '--aa-eq' : ''
75+
strip_unknown_mods = params.strip_unknown_mods ? "--strip-unknown-mods" : ""
7576
diann_tims_sum = params.tims_sum ? "--quant-tims-sum" : ""
7677
diann_im_window = params.im_window ? "--im-window $params.im_window" : ""
7778
diann_dda_flag = meta.acquisition_method == 'dda' ? "--dda" : ""
@@ -119,6 +120,7 @@ process INDIVIDUAL_ANALYSIS {
119120
${max_fr_mz} \\
120121
${scoring_mode} \\
121122
${aa_eq} \\
123+
${strip_unknown_mods} \\
122124
${diann_tims_sum} \\
123125
${diann_im_window} \\
124126
${diann_dda_flag} \\

modules/local/diann/insilico_library_generation/main.nf

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@ process INSILICO_LIBRARY_GENERATION {
4040
scoring_mode = params.scoring_mode == 'proteoforms' ? '--proteoforms' :
4141
params.scoring_mode == 'peptidoforms' ? '--peptidoforms' : ''
4242
aa_eq = params.aa_eq ? '--aa-eq' : ''
43+
strip_unknown_mods = params.strip_unknown_mods ? "--strip-unknown-mods" : ""
4344
diann_dda_flag = is_dda ? "--dda" : ""
4445
diann_light_models = params.light_models ? "--light-models" : ""
4546
// Fine-tuned model flags — only set when tuned model files are provided
@@ -72,6 +73,7 @@ process INSILICO_LIBRARY_GENERATION {
7273
--gen-spec-lib \\
7374
${scoring_mode} \\
7475
${aa_eq} \\
76+
${strip_unknown_mods} \\
7577
${license_arg} \\
7678
${diann_light_models} \\
7779
${tuned_tokens_flag} \\

modules/local/diann/preliminary_analysis/main.nf

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@ process PRELIMINARY_ANALYSIS {
4343
scoring_mode = params.scoring_mode == 'proteoforms' ? '--proteoforms' :
4444
params.scoring_mode == 'peptidoforms' ? '--peptidoforms' : ''
4545
aa_eq = params.aa_eq ? '--aa-eq' : ''
46+
strip_unknown_mods = params.strip_unknown_mods ? "--strip-unknown-mods" : ""
4647

4748
// I am using here the ["key"] syntax, since the preprocessed meta makes
4849
// was evaluating to null when using the dot notation.
@@ -107,6 +108,7 @@ process PRELIMINARY_ANALYSIS {
107108
${max_fr_mz} \\
108109
${scoring_mode} \\
109110
${aa_eq} \\
111+
${strip_unknown_mods} \\
110112
${diann_tims_sum} \\
111113
${diann_im_window} \\
112114
--no-prot-inf \\

nextflow.config

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,7 @@ params {
6868
debug_level = 3
6969
speclib = null
7070
extra_args = null
71+
strip_unknown_mods = false // add '--strip-unknown-mods': force DIA-NN to predict spectra/RTs/IMs for declared modifications its deep-learning predictor does not recognise (e.g. Oxidation on proline). Without it those precursors are silently skipped from the in-silico library and never identified. No-op when all declared modifications are recognised.
7172
scoring_mode = 'generic' // Scoring mode: 'generic' (default), 'proteoforms' (variant detection, >= 2.0), 'peptidoforms' (PTM analysis)
7273
aa_eq = false // add '--aa-eq': treat I&L, Q&E, N&D as equivalent during reannotation (essential for entrapment FDR benchmarks)
7374
dda = false // Fallback: explicitly enable DDA when SDRF lacks acquisition method (requires DIA-NN >= 2.3.2)

nextflow_schema.json

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -536,6 +536,13 @@
536536
"fa_icon": "fas fa-filter",
537537
"hidden": true
538538
},
539+
"strip_unknown_mods": {
540+
"type": "boolean",
541+
"default": false,
542+
"description": "Add --strip-unknown-mods so DIA-NN predicts spectra/RTs/IMs for declared modifications its predictor does not recognise (e.g. Oxidation on proline). Without it those precursors are skipped from the in-silico library.",
543+
"fa_icon": "fas fa-prescription-bottle",
544+
"help_text": "Some declared variable modifications are not built into the DIA-NN deep-learning predictor (e.g. Oxidation on proline). During in-silico library generation DIA-NN silently skips those precursors (logged as 'skipping N precursors, unrecognised modifications') unless this is enabled, so they never enter the library and are never identified. No-op when all declared modifications are recognised by the predictor."
545+
},
539546
"extra_args": {
540547
"type": "string",
541548
"description": "Extra arguments appended to all DIA-NN steps. Flags incompatible with specific steps are automatically stripped with a warning.",

0 commit comments

Comments
 (0)