Skip to content

Commit f5e8fe6

Browse files
authored
Merge pull request #1023 from COMBINE-lab/feat/no-frag-length-dist
feat(quant): implement --noFragLengthDist; confirm sketch fragment-length weighting
2 parents 15ee01b + 68c4225 commit f5e8fe6

4 files changed

Lines changed: 29 additions & 3 deletions

File tree

crates/salmon-cli/src/main.rs

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -708,9 +708,6 @@ fn run_quant(args: QuantArgs, quiet: bool) -> Result<()> {
708708
if args.eqclasses.is_some() {
709709
tracing::warn!("--eqclasses (quantify from a precomputed equivalence-class file) is not yet implemented and is ignored; mapping/alignment input is used instead.");
710710
}
711-
if args.no_frag_length_dist {
712-
tracing::warn!("--noFragLengthDist is accepted but not yet implemented and has no effect: the fragment-length distribution is still used in the per-fragment probability.");
713-
}
714711
if args.sample_out || args.sample_unaligned || args.write_qualities {
715712
tracing::warn!("--sampleOut/--sampleUnaligned/--writeQualities (posterior-sampled BAM output) are accepted but not yet implemented and have no effect.");
716713
}
@@ -866,6 +863,7 @@ fn run_quant(args: QuantArgs, quiet: bool) -> Result<()> {
866863
opts.thinning_factor = args.thinning_factor;
867864
opts.no_length_correction = args.no_length_correction;
868865
opts.model_single_frag_prob = !args.no_single_frag_prob;
866+
opts.no_frag_length_dist = args.no_frag_length_dist;
869867
opts.map_config.align.min_score_fraction = args.min_score_fraction;
870868
opts.map_config.pair.orphan_chain_sub_thresh = args.orphan_chain_sub_thresh;
871869
opts.map_config.align.full_length_alignment = args.full_length_alignment;

crates/salmon-quant/src/lib.rs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,9 @@ pub struct QuantOptions {
101101
/// the bounded-CMF "ambiguous" weight (salmon default `true`); `false` =
102102
/// `--noSingleFragProb`.
103103
pub model_single_frag_prob: bool,
104+
/// disable the fragment-length distribution in the per-fragment assignment
105+
/// probability (`--noFragLengthDist`); default `false`.
106+
pub no_frag_length_dist: bool,
104107
/// fragment-length distribution prior mean, SD, and max tracked length
105108
/// (`--fldMean` / `--fldSD` / `--fldMax`)
106109
pub fld_mean: f64,
@@ -173,6 +176,7 @@ impl QuantOptions {
173176
thinning_factor: 16,
174177
no_length_correction: false,
175178
model_single_frag_prob: true,
179+
no_frag_length_dist: false,
176180
fld_mean: 250.0,
177181
fld_sd: 25.0,
178182
fld_max: 1000,
@@ -422,6 +426,7 @@ pub fn quantify(opts: &QuantOptions) -> Result<QuantResult> {
422426
online: online.as_ref(),
423427
paired_lib: opts.is_paired(),
424428
model_single_frag_prob: opts.model_single_frag_prob,
429+
no_frag_length_dist: opts.no_frag_length_dist,
425430
num_processed,
426431
num_mapped,
427432
num_orphan: &num_orphan,

crates/salmon-quant/src/processor.rs

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,9 @@ pub(crate) struct Shared<'a> {
8787
/// the bounded-CMF "ambiguous" weight (salmon default). When `false`
8888
/// (`--noSingleFragProb`) orphans fall back to a flat penalty / weight-1.
8989
pub model_single_frag_prob: bool,
90+
/// disable the fragment-length distribution in the per-fragment assignment
91+
/// probability entirely (`--noFragLengthDist`).
92+
pub no_frag_length_dist: bool,
9093
pub num_processed: &'a AtomicU64,
9194
pub num_mapped: &'a AtomicU64,
9295
/// mapped fragments whose representative mapping is an orphan (only one mate
@@ -196,9 +199,16 @@ fn frag_log_prob(
196199
use_aux: bool,
197200
model_single_frag_prob: bool,
198201
paired_lib: bool,
202+
no_frag_length_dist: bool,
199203
pmf: &[f64],
200204
cmf: &[f64],
201205
) -> f64 {
206+
// --noFragLengthDist: do not consider the fragment-length distribution in the
207+
// per-fragment assignment probability (salmon's flag). Applies to both the
208+
// proper-pair PMF term and the orphan/single-end ambiguous term.
209+
if no_frag_length_dist {
210+
return LOG_1;
211+
}
202212
if m.status == MateStatus::PairedEndPaired && m.fragment_len > 0 {
203213
if use_aux && !pmf.is_empty() {
204214
let flen = (m.fragment_len as usize).min(pmf.len() - 1);
@@ -461,6 +471,7 @@ fn record(
461471
use_aux,
462472
sh.model_single_frag_prob,
463473
sh.paired_lib,
474+
sh.no_frag_length_dist,
464475
&fld_pmf,
465476
&fld_cmf,
466477
);
@@ -529,6 +540,7 @@ fn record(
529540
use_aux,
530541
sh.model_single_frag_prob,
531542
sh.paired_lib,
543+
sh.no_frag_length_dist,
532544
&fld_pmf,
533545
&fld_cmf,
534546
)

docs/release-notes-2.1.0.md

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -356,6 +356,17 @@ Proper pairs are likewise length-conditioned (`pmf(flen) − cmf(txpLen)`), whic
356356
transcripts. The new `--noSingleFragProb` flag (default off) restores the old flat
357357
behavior, matching salmon's option of the same name.
358358

359+
This fragment-length weighting now applies to **sketch (pseudoalignment) mode**
360+
too: sketch already trains the fragment-length distribution from concordant
361+
pairs, and — now that sketch mappings carry their positions (above) — the
362+
orphan/single-end ambiguous term contributes as well. `--noFragLengthDist`,
363+
previously an accept-and-warn no-op, is **now implemented**: it disables the
364+
fragment-length term in the per-fragment assignment probability for both modes.
365+
On the simulated data, enabling the fragment-length model (the default) improves
366+
accuracy in both modes — Spearman vs truth rises ~0.011 (SA) / ~0.012 (sketch) on
367+
the easy set and ~0.015 / ~0.016 on the hard set — and brings sketch accuracy to
368+
within ~0.005 Spearman of selective alignment.
369+
359370
## `num_dovetail_fragments` is now reported
360371

361372
Rust always dropped dovetailed concordant pairs under the default no-dovetail

0 commit comments

Comments
 (0)