Skip to content

Commit 8588b8b

Browse files
authored
Merge pull request #39 from ShawHahnLab/for-38-igblast-ref-from-species
Default to all available references for a given species for IgBLAST
2 parents 113a912 + 95f5abe commit 8588b8b

File tree

3 files changed

+39
-5
lines changed

3 files changed

+39
-5
lines changed

CHANGELOG.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@
44

55
### Added
66

7+
* Automatic usage of all available references for a given species in `igblast`
8+
command ([#39])
79
* `identity` command for calculating pairwise identity between arbitrary
810
queries and references ([#31], [#37])
911
* Support for showing basic tree topology for Newick-format files in `show`
@@ -15,6 +17,7 @@
1517
* broken pipes (such as from `igseq something | something else`) are now
1618
handled gracefully ([#30])
1719

20+
[#39]: https://github.com/ShawHahnLab/igseq/pull/39
1821
[#37]: https://github.com/ShawHahnLab/igseq/pull/37
1922
[#35]: https://github.com/ShawHahnLab/igseq/pull/35
2023
[#33]: https://github.com/ShawHahnLab/igseq/pull/33

igseq/igblast.py

Lines changed: 18 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,11 @@ def igblast(
7070
LOGGER.info("given colmap: %s", colmap)
7171
LOGGER.info("given extra args: %s", extra_args)
7272
LOGGER.info("given threads: %s", threads)
73+
if species and not ref_paths:
74+
# If only species is given, default to using all available reference
75+
# sets for that species
76+
ref_paths = [_fuzzy_species_match(species)]
77+
LOGGER.info("inferred ref path: %s", ref_paths[0])
7378
attrs_list = vdj.parse_vdj_paths(ref_paths)
7479
for attrs in attrs_list:
7580
LOGGER.info("detected ref path: %s", attrs["path"])
@@ -104,12 +109,11 @@ def detect_organism(species_det, species=None):
104109
species = species_det.pop()
105110
LOGGER.info("detected species: %s", species)
106111
# match species names if needed
107-
species_key = re.sub("[^a-z]", "", species.lower())
108-
if species not in SPECIESMAP and species_key in SPECIESOTHER:
109-
species_new = SPECIESOTHER[species_key]
112+
species_new = _fuzzy_species_match(species)
113+
if species_new != species:
110114
LOGGER.info(
111-
"detected species as synonym: %s -> %s -> %s", species, species_key, species_new)
112-
species = species_new
115+
"detected species as synonym: %s -> %s", species, species_new)
116+
species = species_new
113117
try:
114118
organism = SPECIESMAP[species]
115119
except KeyError as err:
@@ -118,6 +122,15 @@ def detect_organism(species_det, species=None):
118122
LOGGER.info("detected IgBLAST organism: %s", organism)
119123
return organism
120124

125+
def _fuzzy_species_match(species):
126+
"""Fuzzy-match one of our species names"""
127+
species_key = re.sub("[^a-z]", "", species.lower())
128+
try:
129+
return SPECIESOTHER[species_key]
130+
except KeyError as err:
131+
keys = str(SPECIESMAP.keys())
132+
raise util.IgSeqError(f"species not recognized. should be one of: {keys}") from err
133+
121134
@contextmanager
122135
def run_igblast(
123136
db_dir, organism, query_path, threads=1, fmt_in=None, colmap=None, extra_args=None):

test_igseq/test_igblast.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,24 @@ def test_detect_organism(self):
3434
igblast.detect_organism(["species1", "species2"], "rhesus"),
3535
"rhesus_monkey")
3636

37+
def test_detect_ref(self):
38+
"""Test that appropriate reference is selected based on species when needed."""
39+
# Should be able to infer ref from exact species name or synonym
40+
with self.assertLogs(level="INFO") as log_cm:
41+
igblast.igblast(ref_paths=None, query_path="-", species="rhesus", dry_run=True)
42+
self.assertTrue(any("inferred ref path: rhesus" in msg for msg in log_cm.output))
43+
with self.assertLogs(level="INFO") as log_cm:
44+
igblast.igblast(ref_paths=None, query_path="-", species="rhesus_monkey", dry_run=True)
45+
self.assertTrue(any("inferred ref path: rhesus" in msg for msg in log_cm.output))
46+
# Shouldn't infer ref when explicitly given
47+
with self.assertLogs(level="INFO") as log_cm:
48+
igblast.igblast(ref_paths=["rhesus"], query_path="-", species="rhesus", dry_run=True)
49+
self.assertFalse(any("inferred ref path: rhesus" in msg for msg in log_cm.output))
50+
# Should still catch unknown names
51+
with self.assertRaises(IgSeqError) as err_cm:
52+
igblast.igblast(ref_paths=None, query_path="-", species="unknown", dry_run=True)
53+
self.assertIn("species not recognized", err_cm.exception.message)
54+
3755

3856
class TestIgblastInternal(TestBase):
3957
"""Test igblast with internal db."""

0 commit comments

Comments
 (0)