Skip to content

Commit 00287a1

Browse files
committed
Typing improvements
1 parent 35bcff6 commit 00287a1

File tree

9 files changed

+121
-70
lines changed

9 files changed

+121
-70
lines changed

pyproject.toml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,11 +6,11 @@ build-backend = "setuptools.build_meta"
66
name = "tola-agp-tpf-utils"
77
version = "1.2.1"
88
readme = "README.md"
9-
requires-python = ">=3.10"
9+
requires-python = ">=3.11"
1010
dependencies = [
1111
"click",
1212
"pyyaml",
13-
'importlib-metadata; python_version>="3.10"',
13+
'importlib-metadata; python_version>="3.11"',
1414
]
1515

1616
[dependency-groups]

src/tola/assembly/assembly.py

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2,19 +2,20 @@
22
import re
33
import textwrap
44

5+
from tola.assembly.fragment import Junction
56
from tola.assembly.scaffold import Scaffold
67

78

89
class Assembly:
910
def __init__(
1011
self, name, header=None, scaffolds=None, bp_per_texel=None, curated=False
1112
):
12-
self.name = str(name)
13-
self.scaffolds = scaffolds if scaffolds else []
14-
self.header = header if header else []
15-
self.curated = curated
13+
self.name: str = str(name)
14+
self.scaffolds: list[Scaffold] = scaffolds if scaffolds else []
15+
self.header: list[str] = header if header else []
16+
self.curated: bool = curated
1617
if bp_per_texel:
17-
self.bp_per_texel = bp_per_texel
18+
self.bp_per_texel: float = bp_per_texel
1819

1920
def __repr__(self):
2021
txt = io.StringIO()
@@ -98,13 +99,13 @@ def name_natural_key(obj):
9899
for i, x in enumerate(re.split(r"(I+V?|\d+)", obj.name))
99100
)
100101

101-
def fragment_junction_set(self):
102-
junctions = set()
102+
def fragment_junction_set(self) -> set[Junction]:
103+
junctions: set[Junction] = set()
103104
for scffld in self.scaffolds:
104105
junctions |= scffld.fragment_junction_set()
105106
return junctions
106107

107-
def fragment_junctions_by_asm_prefix(self):
108+
def fragment_junctions_by_asm_prefix(self) -> dict[str, set[Junction]]:
108109
prefix_junctions = {}
109110
for scffld in self.scaffolds:
110111
try:

src/tola/assembly/build_assembly.py

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,12 @@
11
import logging
22
import math
3-
from collections.abc import Iterator
3+
from typing import TypeAlias
44

55
from tola.assembly.assembly import Assembly
66
from tola.assembly.assembly_stats import AssemblyStats
77
from tola.assembly.build_utils import (
88
ChrNamer,
99
FoundFragment,
10-
OverhangPremise,
1110
OverhangResolver,
1211
ScaffoldNamer,
1312
)
@@ -20,6 +19,9 @@
2019
log = logging.getLogger(__name__)
2120

2221

22+
AssemblyDict: TypeAlias = dict[str | None, Assembly]
23+
24+
2325
class BuildAssembly(Assembly):
2426
"""
2527
Class for building an Assembly from a Pretext Assembly and the
@@ -42,8 +44,8 @@ def __init__(
4244
self.default_gap = default_gap
4345
self.found_fragments = {}
4446
self.fragments_found_more_than_once = {}
45-
self.scaffold_namer = ScaffoldNamer()
46-
self.assembly_stats = AssemblyStats()
47+
self.scaffold_namer: ScaffoldNamer = ScaffoldNamer()
48+
self.assembly_stats: AssemblyStats = AssemblyStats()
4749
if autosome_prefix:
4850
self.autosome_prefix = autosome_prefix
4951

@@ -266,19 +268,17 @@ def add_missing_scaffolds_from_input(self, input_asm: Assembly) -> None:
266268
new_scffld.haplotype = scaffold_namer.current_haplotype
267269
self.add_scaffold(new_scffld)
268270

269-
def assembly_with_scaffolds_in_map_order(self) -> dict[None, Assembly]:
270-
scaffolds, _ = self.__build_name_and_sort_assemblies()
271-
return {
272-
None: Assembly("Pretext", scaffolds=scaffolds)
273-
}
271+
def assembly_with_scaffolds_in_map_order(self) -> AssemblyDict:
272+
scaffolds, assemblies = self.__build_name_and_sort_assemblies()
273+
return {"map-order": Assembly("Pretext", scaffolds=scaffolds)}
274274

275-
def assemblies_with_scaffolds_fused(self) -> dict[str | None, Assembly]:
275+
def assemblies_with_scaffolds_fused(self) -> AssemblyDict:
276276
_, assemblies = self.__build_name_and_sort_assemblies()
277277
return assemblies
278278

279279
def __build_name_and_sort_assemblies(
280280
self,
281-
) -> tuple[list[Scaffold], dict[str | None, Assembly]]:
281+
) -> tuple[list[Scaffold], AssemblyDict]:
282282
chr_namer = ChrNamer(chr_prefix=self.autosome_prefix)
283283

284284
scaffolds = self.scaffolds_fused_by_name()

src/tola/assembly/build_utils.py

Lines changed: 29 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
import logging
66
import re
77
import textwrap
8+
from abc import ABC, abstractmethod
89

910
from tola.assembly.fragment import Fragment
1011
from tola.assembly.overlap_result import OverlapResult
@@ -476,7 +477,7 @@ def remove_scaffold(self, scaffold: Scaffold) -> None:
476477
self.scaffolds.remove(scaffold)
477478

478479

479-
class OverhangPremise:
480+
class OverhangPremise(ABC):
480481
"""
481482
Stores a "what-if" for removal of a terminal (start or end) Fragment. Used
482483
to decide which OverlapResult to remove a Fragment from, where the
@@ -498,6 +499,32 @@ def __str__(self):
498499
+ textwrap.indent(f"{self.scaffold}\n", " ")
499500
)
500501

502+
@property
503+
@abstractmethod
504+
def bait_overlap(self) -> int:
505+
"""
506+
The overlap between the bait and the `Fragment` on this end of the
507+
`OverlapResult`. (`0` if they don't overlap.)
508+
"""
509+
510+
@property
511+
@abstractmethod
512+
def overhang_if_applied(self) -> int:
513+
"""
514+
Overhang of the `Fragment` at this end of the `OverlapResult` beyond
515+
the `bait` that would be left if `Fragment` at this end was removed.
516+
Value is negative if removing the `Fragment` would leave an
517+
underhang.
518+
"""
519+
520+
@property
521+
@abstractmethod
522+
def overhang_error_delta_if_applied(self) -> int:
523+
"""
524+
Change (positive or negative) in the absolute size of the overhang
525+
(or underhang) if this `OverhangPremise` were applied.
526+
"""
527+
501528
def improves(self, err_length) -> bool:
502529
if len(self.scaffold.rows) == 1:
503530
return False
@@ -557,7 +584,7 @@ class OverhangResolver:
557584
the OverlapPremises which were applied.
558585
"""
559586

560-
def __init__(self, error_length=None):
587+
def __init__(self, error_length: int):
561588
self.premises_by_fragment_key: dict[
562589
tuple[str, int, int], list[OverhangPremise]
563590
] = {}

src/tola/assembly/fragment.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,8 @@
1+
from typing import TypeAlias
2+
3+
Junction: TypeAlias = tuple[str | int, str | int, str | int, str | int]
4+
5+
16
class Fragment:
27
__slots__ = "_name", "_start", "_end", "_strand", "_tags"
38

@@ -45,7 +50,7 @@ def length(self):
4550
def key_tuple(self) -> tuple[str, int, int]:
4651
return self._name, self._start, self._end
4752

48-
def junction_tuple(self, othr) -> tuple:
53+
def junction_tuple(self, othr) -> Junction:
4954
"""
5055
Encodes the positions of two adjacent Fragments in a Scaffold, with
5156
reverse strand ends encoded by flipping the order of the name and

src/tola/assembly/parser.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -129,7 +129,7 @@ def lowercase_and_dash_to_underscore():
129129
)
130130

131131

132-
def format_from_file_extn(pth, default=None):
132+
def format_from_file_extn(pth, default=None) -> str | None:
133133
"""
134134
Guess the file format from the extension, or return the supplied default
135135
"""

src/tola/assembly/scaffold.py

Lines changed: 25 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
import io
2+
from collections.abc import Generator
3+
from typing import Self
24

3-
from tola.assembly.fragment import Fragment
5+
from tola.assembly.fragment import Fragment, Junction
46
from tola.assembly.gap import Gap
57

68

@@ -20,11 +22,11 @@ def __init__(
2022
self.rows = [*rows]
2123
else:
2224
self.rows = []
23-
self.tag = tag
24-
self.haplotype = haplotype
25-
self.rank = rank
26-
self.original_name = original_name
27-
self.original_tags = original_tags
25+
self.tag: str | None = tag
26+
self.haplotype: str | None = haplotype
27+
self.rank: int = rank
28+
self.original_name: str | None = original_name
29+
self.original_tags: set[str] | None = original_tags
2830

2931
def __repr__(self):
3032
txt = io.StringIO()
@@ -59,54 +61,57 @@ def add_row(self, row):
5961
self.rows.append(row)
6062

6163
@property
62-
def length(self):
64+
def length(self) -> int:
6365
return sum(r.length for r in self.rows)
6466

6567
@property
66-
def fragments_length(self):
68+
def fragments_length(self) -> int:
6769
return sum(f.length for f in self.fragments())
6870

6971
@property
70-
def gaps_length(self):
72+
def gaps_length(self) -> int:
7173
return sum(g.length for g in self.gaps())
7274

7375
@property
74-
def last_row_is_fragment(self):
76+
def last_row_is_fragment(self) -> bool:
7577
if self.rows:
7678
return isinstance(self.rows[-1], Fragment)
7779
else:
7880
return False
7981

80-
def fragments(self):
82+
def fragments(self) -> Generator[Fragment]:
8183
for row in self.rows:
8284
if isinstance(row, Fragment):
8385
yield row
8486

85-
def idx_fragments(self):
87+
def idx_fragments(self) -> Generator[tuple[int, Fragment]]:
8688
for i, row in enumerate(self.rows):
8789
if isinstance(row, Fragment):
8890
yield i, row
8991

90-
def gaps(self):
92+
def gaps(self) -> Generator[Fragment]:
9193
for row in self.rows:
9294
if isinstance(row, Gap):
9395
yield row
9496

95-
def idx_gaps(self):
97+
def idx_gaps(self) -> Generator[tuple[int, Gap]]:
9698
for i, row in enumerate(self.rows):
9799
if isinstance(row, Gap):
98100
yield i, row
99101

100-
def fragment_tags(self):
102+
def fragment_tags(self) -> set[str]:
101103
tag_set = set()
102104
for frag in self.fragments():
103105
for t in frag.tags:
104106
tag_set.add(t)
105107
return tag_set
106108

107-
def reverse(self):
109+
def reverse(self) -> Self:
108110
new = self.__class__(
109111
self.name,
112+
tag=self.tag,
113+
haplotype=self.haplotype,
114+
rank=self.rank,
110115
original_name=self.original_name,
111116
original_tags=self.original_tags,
112117
)
@@ -121,7 +126,7 @@ def append_scaffold(self, othr, gap=None):
121126
self.add_row(gap)
122127
self.rows.extend(othr.rows)
123128

124-
def fragment_junction_set(self):
129+
def fragment_junction_set(self) -> set[Junction]:
125130
junctions = set()
126131
itr = self.fragments()
127132

@@ -130,12 +135,8 @@ def fragment_junction_set(self):
130135
except StopIteration:
131136
return junctions
132137

133-
while True:
134-
try:
135-
this = next(itr)
136-
junctions.add(prev.junction_tuple(this))
137-
prev = this
138-
except StopIteration:
139-
break
138+
for this in itr:
139+
junctions.add(prev.junction_tuple(this))
140+
prev = this
140141

141142
return junctions

0 commit comments

Comments
 (0)