Skip to content

Commit 89f6c34

Browse files
rob-pclaude
andcommitted
fix(map): wire num_dovetail_fragments (was always 0)
Rust drops dovetailed concordant pairs in join_reads_and_filter (default no-dovetail policy), matching C++, but never reported the count: the mapper's dovetail check only inspected *surviving* PairedEndPaired joints, which are never dovetailed (pairing already filtered them), so num_dovetail_fragments stayed 0 while C++ reported the diagnostic. Set a thread-local when join_reads_and_filter rejects an otherwise-valid (opposite-strand, in-range fragment length) pair solely for dovetailing, and count the fragment when that happened AND no concordant pair survived for any target — i.e. its only concordant pairing was a dovetail (salmon's definition). Reporting only; no change to mapping behavior (mapped rate identical). On SRR1039508 (3M subset) Rust now reports 15,164 dovetail fragments (C++ 22,765; same order, differing by the tools' internal dovetail accounting + seeding). Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com> Claude-Session: https://claude.ai/code/session_01B7JMur5DmDpECddErpi2JS
1 parent 2bccea8 commit 89f6c34

2 files changed

Lines changed: 31 additions & 10 deletions

File tree

crates/salmon-map/src/mapper.rs

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -193,23 +193,23 @@ pub fn map_read_pair<'idx, R: RefProvider>(
193193
let joints = join_reads_and_filter(left, right, &cfg.pair);
194194

195195
let had_candidates = !joints.is_empty();
196+
// Count this fragment toward num_dovetail_fragments when an otherwise-valid
197+
// concordant pair was rejected only because the mates dovetail AND no
198+
// concordant pair survived for any target — i.e. its only concordant pairing
199+
// was a dovetail (salmon's diagnostic). The pair is dropped regardless; this
200+
// just reports it. (Pairing already filtered dovetails, so the surviving
201+
// PairedEndPaired joints below are never themselves dovetailed.)
202+
let has_concordant_pair = joints
203+
.iter()
204+
.any(|j| matches!(j.status, MateStatus::PairedEndPaired));
205+
let dovetail = crate::pair::dovetail_rejected() && !has_concordant_pair;
196206
let mut below = 0u32;
197-
let mut dovetail = false;
198207
let mut raw = Vec::new();
199208
for j in joints {
200209
match j.status {
201210
MateStatus::PairedEndPaired => {
202211
let l = j.left.as_ref().unwrap();
203212
let r = j.right.as_ref().unwrap();
204-
// Dovetail: opposite-strand mates where the downstream (reverse)
205-
// mate actually starts upstream of the forward mate — they extend
206-
// past each other's start (salmon's `num_dovetail_fragments`).
207-
if l.is_fw != r.is_fw {
208-
let (fw, rc) = if l.is_fw { (l, r) } else { (r, l) };
209-
if rc.chain.ref_start() < fw.chain.ref_start() {
210-
dovetail = true;
211-
}
212-
}
213213
let refseq = refs.ref_seq(j.tid);
214214
let al = align_chain(r1, refseq, &l.chain, &cfg.align);
215215
let ar = align_chain(r2, refseq, &r.chain, &cfg.align);

crates/salmon-map/src/pair.rs

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -161,6 +161,23 @@ thread_local! {
161161
/// per-read `AHashMap<u32, Vec<usize>>` of the old `group_by_tid` are gone.
162162
static PAIRED_TIDS: std::cell::RefCell<AHashSet<u32>> =
163163
std::cell::RefCell::new(AHashSet::new());
164+
165+
/// Set by [`join_reads_and_filter`] when it rejects an otherwise-valid
166+
/// (opposite-strand, in-range fragment length) concordant pair *solely*
167+
/// because the mates dovetail, under the default no-dovetail policy. Cleared
168+
/// at the start of every call and read once per fragment by the mapper, which
169+
/// — combined with "no concordant pair survived" — counts the fragment toward
170+
/// `num_dovetail_fragments` (salmon's diagnostic for fragments whose only
171+
/// concordant pairing was a dovetail). The pairs are dropped either way; this
172+
/// only wires up the count, which previously stayed 0 because the mapper only
173+
/// inspected surviving pairs (never dovetailed after filtering).
174+
static DOVETAIL_REJECTED: std::cell::Cell<bool> = const { std::cell::Cell::new(false) };
175+
}
176+
177+
/// Whether the most recent [`join_reads_and_filter`] rejected an otherwise-valid
178+
/// concordant pair as a dovetail (see [`DOVETAIL_REJECTED`]).
179+
pub fn dovetail_rejected() -> bool {
180+
DOVETAIL_REJECTED.with(|c| c.get())
164181
}
165182

166183
/// Upper bound on concordant pairs emitted per target when several repeat loci
@@ -201,6 +218,7 @@ pub fn join_reads_and_filter(
201218
left.sort_unstable_by_key(|c| c.tid);
202219
right.sort_unstable_by_key(|c| c.tid);
203220

221+
DOVETAIL_REJECTED.with(|c| c.set(false));
204222
let mut joints = Vec::new();
205223
PAIRED_TIDS.with(|cell| {
206224
let paired = &mut *cell.borrow_mut();
@@ -248,6 +266,9 @@ pub fn join_reads_and_filter(
248266
continue;
249267
}
250268
if !cfg.allow_dovetail && is_dovetailed(l, r) {
269+
// An otherwise-valid concordant pair, dropped only because
270+
// the mates dovetail — flag it for the num_dovetail count.
271+
DOVETAIL_REJECTED.with(|c| c.set(true));
251272
continue;
252273
}
253274
let cov = l.chain.covered_read_bases() + r.chain.covered_read_bases();

0 commit comments

Comments
 (0)