diff --git a/src/pango_aliasor/aliasor.py b/src/pango_aliasor/aliasor.py index aae0485..c3aebe4 100644 --- a/src/pango_aliasor/aliasor.py +++ b/src/pango_aliasor/aliasor.py @@ -1,4 +1,3 @@ -#%% class Aliasor: def __init__(self, alias_file=None): import json @@ -24,7 +23,13 @@ def __init__(self, alias_file=None): self.realias_dict = {v: k for k, v in self.alias_dict.items()} - def compress(self, name): + def compress(self, name, assign=False): + """ + Returns the compressed lineage name. + Set assign to True to automatically define new aliases for otherwise unhandled designations. + For example, if you want to compress 'BA.5.2.5.6', and BA.5.2.5 does not have an accepted alias, + it will assign BA.5.2.5 to the next available code (in this example, EN) and return EN.6. + """ name_split = name.split(".") levels = len(name_split) - 1 num_indirections = (levels - 1) // 3 @@ -32,9 +37,15 @@ def compress(self, name): return name alias = ".".join(name_split[0 : (3 * num_indirections + 1)]) ending = ".".join(name_split[(3 * num_indirections + 1) :]) + if assign and alias not in self.realias_dict: + #note- this cannot produce lineage aliases prefixed with X, which are handled separately as they represent recombinants. + self.assign_alias(alias) return self.realias_dict[alias] + "." + ending def uncompress(self, name): + """ + Returns the uncompressed lineage name. + """ name_split = name.split(".") letter = name_split[0] try: @@ -89,6 +100,60 @@ def partial_compress(self, name, up_to: int = 0, accepted_aliases: set = {}): if name_split[(3 * up_to + 1) :] == []: return alias return alias + "." + ".".join(name_split[(3 * up_to + 1) :]) + + @staticmethod + def _charToB(char): + return ord(char)-65 + + @staticmethod + def _bToChar(n, banned='IOX'): + l = chr(n+65) + while l in banned: + n += 1 + l = chr(n+65) + return l + + @staticmethod + def _numberToString(n, b=26, banned='IOX'): + #convert the number to base 26 + if n == 0: + return [0] + digits = [] + while n: + digits.append(int(n % b)) + n //= b + #convert the base 26 to an alphabet string, incrementing past banned characters + return "".join([Aliasor._bToChar(d,banned) for d in digits[::-1]]) + @staticmethod + def _stringToNumber(cstr, b=26): + #convert the string to a base26 number + digits = [Aliasor._charToB(c) for c in cstr] + #add the digits up to make a base10 number + num = 0 + level = 0 + for d in digits[::-1]: + num += d * b**level + level += 1 + return num -# %% + def next_available_alias(self, recombinant=False): + """ + Returns the next available alias string. + Tracks recombinants separately; set recombinant to True to get the next available recombinant alias. + """ + if recombinant: + current = [Aliasor._stringToNumber(k[1:]) for k in self.alias_dict.keys() if k[0] == 'X'] + return 'X' + Aliasor._numberToString(max(current) + 1) + else: + current = [Aliasor._stringToNumber(k) for k in self.alias_dict.keys() if k[0] != 'X'] + return Aliasor._numberToString(max(current) + 1) + + def assign_alias(self, name, recombinant=False): + """ + Assigns the input name to the next available alias. + Set recombinant to True to assign it to the next recombinant alias. + """ + nextn = self.next_available_alias(recombinant) + self.alias_dict[nextn] = name + self.realias_dict[name] = nextn \ No newline at end of file diff --git a/tests/test_aliasor.py b/tests/test_aliasor.py index 895bad5..7bdd457 100644 --- a/tests/test_aliasor.py +++ b/tests/test_aliasor.py @@ -90,4 +90,23 @@ def test_parent(): assert aliasor.parent('') == '' assert aliasor.parent('A') == '' assert aliasor.parent('B') == '' - assert aliasor.parent('C.1') == 'B.1.1.1' \ No newline at end of file + assert aliasor.parent('C.1') == 'B.1.1.1' + +def test_next_compression(): + aliasor = Aliasor() + n1 = aliasor.next_available_alias() + assert n1 not in aliasor.alias_dict.keys() + n2 = aliasor.next_available_alias(True) + assert n2[0] == 'X' + assert n1 != n2 + +def test_assign_compression(): + aliasor = Aliasor() + n1 = aliasor.next_available_alias() + aliasor.assign_alias('test') + assert aliasor.realias_dict['test'] == n1 + assert aliasor.alias_dict[n1] == 'test' + n2 = aliasor.next_available_alias(True) + aliasor.assign_alias('test2',True) + assert aliasor.realias_dict['test2'] == n2 + assert aliasor.alias_dict[n2] == 'test2' \ No newline at end of file