Skip to content

Commit 2787a46

Browse files
committed
align no-overlap indexing in rust
1 parent 028bfd3 commit 2787a46

File tree

2 files changed

+59
-4
lines changed

2 files changed

+59
-4
lines changed

src/seqspec_index.rs

Lines changed: 27 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -255,8 +255,8 @@ fn get_coordinate_by_read_id(spec: &Assay, modality: &String, read_id: &str) ->
255255
}
256256
}
257257
fn filter_index_no_overlap(mut indices: Vec<Coordinate>) -> Vec<Coordinate> {
258+
let mut seen: HashSet<String> = HashSet::new();
258259
for idx in &mut indices {
259-
let mut seen: HashSet<String> = HashSet::new();
260260
let mut new_rcv: Vec<RegionCoordinate> = Vec::new();
261261
for rgn in idx.rcv.iter() {
262262
let rid = rgn.region.region_id.clone();
@@ -984,9 +984,33 @@ mod tests {
984984
let indices = get_index_by_reads(&spec, &modality);
985985
let orig_count: usize = indices.iter().map(|i| i.rcv.len()).sum();
986986
let filtered = filter_index_no_overlap(indices);
987-
// Filtered should have same or fewer total region coordinates
987+
// Non-overlapping DOGMA RNA reads should be unchanged.
988988
let filt_count: usize = filtered.iter().map(|i| i.rcv.len()).sum();
989-
assert!(filt_count <= orig_count);
989+
assert_eq!(filt_count, orig_count);
990+
}
991+
992+
#[test]
993+
fn test_filter_index_no_overlap_removes_regions_seen_in_earlier_reads() {
994+
let spec = load_spec(&PathBuf::from(
995+
"tests/fixtures/check_overlap_warning/spec.yaml",
996+
));
997+
let modality = "rna".to_string();
998+
let indices = get_index_by_reads(&spec, &modality);
999+
let filtered = filter_index_no_overlap(indices);
1000+
1001+
assert_eq!(filtered.len(), 2);
1002+
assert_eq!(filtered[0].query_id, "rna_R1");
1003+
assert_eq!(filtered[0].rcv.len(), 2);
1004+
assert_eq!(
1005+
filtered[0]
1006+
.rcv
1007+
.iter()
1008+
.map(|region| region.region.region_id.clone())
1009+
.collect::<Vec<_>>(),
1010+
vec!["barcode".to_string(), "umi".to_string()]
1011+
);
1012+
assert_eq!(filtered[1].query_id, "rna_R2");
1013+
assert_eq!(filtered[1].rcv.len(), 0);
9901014
}
9911015

9921016
#[test]

tests/test_seqspec_index.py

Lines changed: 32 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,9 @@
1-
from seqspec.seqspec_index import seqspec_index, format_index
1+
from seqspec.seqspec_index import seqspec_index, format_index, filter_index_no_overlap
22
from seqspec.Assay import Assay
33
from seqspec.Region import RegionCoordinate
44
import json
5+
from pathlib import Path
6+
from seqspec.utils import load_spec
57

68

79
def test_seqspec_index(dogmaseq_dig_spec: Assay):
@@ -374,3 +376,32 @@ def test_format_index():
374376
split = format_index(indices, "splitcode")
375377
assert "@extract" in split
376378
assert "groups\tids\ttags\tdistances\tlocations" in split
379+
380+
381+
def test_filter_index_no_overlap_is_noop_when_reads_do_not_overlap(
382+
dogmaseq_dig_spec: Assay,
383+
):
384+
indices = seqspec_index(
385+
spec=dogmaseq_dig_spec, modality="rna", ids=["rna_R1", "rna_R2"], idtype="read"
386+
)
387+
388+
filtered = filter_index_no_overlap(indices)
389+
390+
assert len(filtered) == 2
391+
assert len(filtered[0].rcv) == 2
392+
assert len(filtered[1].rcv) == 1
393+
394+
395+
def test_filter_index_no_overlap_removes_regions_seen_in_earlier_reads():
396+
spec = load_spec(Path("tests/fixtures/check_overlap_warning/spec.yaml"))
397+
indices = seqspec_index(
398+
spec=spec, modality="rna", ids=["rna_R1", "rna_R2"], idtype="read"
399+
)
400+
401+
filtered = filter_index_no_overlap(indices)
402+
403+
assert len(filtered) == 2
404+
assert filtered[0].query_id == "rna_R1"
405+
assert [region.region_id for region in filtered[0].rcv] == ["barcode", "umi"]
406+
assert filtered[1].query_id == "rna_R2"
407+
assert filtered[1].rcv == []

0 commit comments

Comments
 (0)