diff --git a/README.md b/README.md index f853ae1..b684a56 100644 --- a/README.md +++ b/README.md @@ -34,6 +34,12 @@ aliasor.partial_compress("B.1.1.529.3.1",accepted_aliases=["AY"]) # 'B.1.1.529.3 aliasor.partial_compress("B.1.617.2",accepted_aliases=["AY"]) # 'AY.2' aliasor.partial_compress('B.1.1.529.2.75.1.2',up_to=4, accepted_aliases={"BA"}) == 'BL.2' + +# Compress an uncompressed_lineage up to the first potential_parent lineage. If no parents are found return the uncompressed_lineage. +aliasor.collapse("B.1.1.529.3.1", potential_parents=['BA.3']) # 'BA.3' +aliasor.collapse("B.1.1.529.3.1", potential_parents=['BA.3', 'BA.3.1']) # 'BA.3.1' +aliasor.collapse("B.1.1.529.3.1", potential_parents=['B.1.1', 'BZ.1', 'AY.4']) # 'B.1.1' +aliasor.collapse("B.1.1.529.3.1", potential_parents=['A']) # "B.1.1.529.3.1" ``` See [tests](tests/test_aliasor.py) for more examples. diff --git a/src/pango_aliasor/aliasor.py b/src/pango_aliasor/aliasor.py index 92eff1e..a92afb8 100644 --- a/src/pango_aliasor/aliasor.py +++ b/src/pango_aliasor/aliasor.py @@ -1,4 +1,7 @@ #%% +from typing import List + + class Aliasor: def __init__(self, alias_file=None): import json @@ -82,5 +85,28 @@ def partial_compress(self, name, up_to: int = 0, accepted_aliases: set = {}): return alias + "." + ".".join(name_split[(3 * up_to + 1) :]) + def collapse(self, uncompressed_lineage: str, potential_parents: List[str]): + """ + Compress an uncompressed_lineage up to the first potential_parent lineage. + If no parents are found return the uncompressed_lineage. + + aliasor.collapse("B.1.1.529.3.1", potential_parents=['BA.3']) # 'BA.3' + + aliasor.collapse("B.1.1.529.3.1", potential_parents=['BA.3', 'BA.3.1']) # 'BA.3.1' + + aliasor.collapse("B.1.1.529.3.1", potential_parents=['B.1.1', 'BZ.1', 'AY.4']) # 'B.1.1' + + aliasor.collapse("B.1.1.529.3.1", potential_parents=['A']) # "B.1.1.529.3.1" + """ + compressed_lineage = self.compress(uncompressed_lineage) + if compressed_lineage in potential_parents: + return compressed_lineage + parts = uncompressed_lineage.split(".") + compressed_parent_lineage = uncompressed_lineage + for i in range(1, len(parts)): + compressed_parent_lineage = self.compress(".".join(parts[:-i])) + if compressed_parent_lineage in potential_parents: + return compressed_parent_lineage + return uncompressed_lineage # %% diff --git a/tests/test_aliasor.py b/tests/test_aliasor.py index 67da938..bef05ea 100644 --- a/tests/test_aliasor.py +++ b/tests/test_aliasor.py @@ -79,4 +79,12 @@ def test_partial_alias_combination(): assert aliasor.partial_compress('B.1.617.2.3',up_to=1, accepted_aliases={"BA","AZ"}) == "AY.3" assert aliasor.partial_compress('B.1.1.529.2.75.1.2',up_to=3, accepted_aliases={"BA"}) == 'BL.2' assert aliasor.partial_compress('B.1.1.529.2.75.1.2',up_to=4, accepted_aliases={"BA"}) == 'BL.2' - assert aliasor.partial_compress('B.1.1.529.2.75.1.2',up_to=1, accepted_aliases={"BA"}) == 'BA.2.75.1.2' \ No newline at end of file + assert aliasor.partial_compress('B.1.1.529.2.75.1.2',up_to=1, accepted_aliases={"BA"}) == 'BA.2.75.1.2' + +def test_collapse(): + aliasor = Aliasor() + assert aliasor.collapse('B.1.1.529.3.1', potential_parents=['BA.3']) == 'BA.3' + assert aliasor.collapse('B.1.1.529.3.1', potential_parents=['BA.3', 'BA.3.1']) == 'BA.3.1' + assert aliasor.collapse('B.1.1.529.3.1', potential_parents=['B.1.1', 'BZ.1', 'AY.4']) == 'B.1.1' + assert aliasor.collapse('B.1.1.529.3.1', potential_parents=['A']) == 'B.1.1.529.3.1' + assert aliasor.collapse('XA.1', potential_parents=['B.1.1', 'BZ.1', 'AY.4']) == 'XA.1' \ No newline at end of file