Skip to content

Commit f6f00df

Browse files
authored
Allow Metric to use dataclasses or attr (#80)
* Update util.metric and related util.inspect modules to work with dataclasses or attr * Update test_metric to test both dataclasses and attr classes Closes #45
1 parent 305530c commit f6f00df

10 files changed

+547
-221
lines changed

codecov.yml

+2
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
ignore:
2+
- "**/test_*.py" # don't compute coverage of tests

docs/api.rst

+3
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,9 @@ Metric files
5151

5252
.. seealso::
5353

54+
https://docs.python.org/3/library/dataclasses.html
55+
Documentation for the dataclasses standard module
56+
5457
https://www.attrs.org/en/stable/examples.html
5558

5659
The attrs website for bringing back the joy to writing classes.

fgpyo/read_structure.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,7 @@ def __str__(self) -> str:
9696
return self.value
9797

9898

99-
@attr.s(frozen=True, auto_attribs=True, kw_only=True)
99+
@attr.s(frozen=True, kw_only=True, auto_attribs=True)
100100
class SubReadWithoutQuals:
101101
"""Contains the bases that correspond to the given read segment."""
102102

@@ -112,7 +112,7 @@ def kind(self) -> SegmentType:
112112
return self.segment.kind
113113

114114

115-
@attr.s(frozen=True, auto_attribs=True, kw_only=True)
115+
@attr.s(frozen=True, kw_only=True, auto_attribs=True)
116116
class SubReadWithQuals:
117117
"""Contains the bases and qualities that correspond to the given read segment"""
118118

@@ -131,7 +131,7 @@ def kind(self) -> SegmentType:
131131
return self.segment.kind
132132

133133

134-
@attr.s(frozen=True, auto_attribs=True, kw_only=True)
134+
@attr.s(frozen=True, kw_only=True, auto_attribs=True)
135135
class ReadSegment:
136136
"""Encapsulates all the information about a segment within a read structure. A segment can
137137
either have a definite length, in which case length must be Some(Int), or an indefinite length
@@ -203,7 +203,7 @@ def __str__(self) -> str:
203203
return f"{ANY_LENGTH_CHAR}{self.kind.value}"
204204

205205

206-
@attr.s(frozen=True, auto_attribs=True, kw_only=True)
206+
@attr.s(frozen=True, kw_only=True, auto_attribs=True)
207207
class ReadStructure(Iterable[ReadSegment]):
208208
"""Describes the structure of a give read. A read contains one or more read segments. A read
209209
segment describes a contiguous stretch of bases of the same type (ex. template bases) of some

fgpyo/sam/__init__.py

+17-18
Original file line numberDiff line numberDiff line change
@@ -386,7 +386,7 @@ def is_clipping(self) -> bool:
386386
return self == CigarOp.S or self == CigarOp.H
387387

388388

389-
@attr.s(frozen=True, slots=True)
389+
@attr.s(frozen=True, slots=True, auto_attribs=True)
390390
class CigarElement:
391391
"""Represents an element in a Cigar
392392
@@ -395,14 +395,13 @@ class CigarElement:
395395
- operator (CigarOp): the operator of the element
396396
"""
397397

398-
length: int = attr.ib()
399-
operator: CigarOp = attr.ib()
398+
length: int
399+
operator: CigarOp
400400

401-
@length.validator
402-
def _validate_length(self, attribute: Any, value: int) -> None:
401+
def __attrs_post_init__(self) -> None:
403402
"""Validates the length attribute is greater than zero."""
404-
if value <= 0:
405-
raise ValueError(f"Cigar element must have a length > 0, found {value}")
403+
if self.length <= 0:
404+
raise ValueError(f"Cigar element must have a length > 0, found {self.length}")
406405

407406
@property
408407
def length_on_query(self) -> int:
@@ -424,15 +423,15 @@ class CigarParsingException(Exception):
424423
pass
425424

426425

427-
@attr.s(frozen=True, slots=True)
426+
@attr.s(frozen=True, slots=True, auto_attribs=True)
428427
class Cigar:
429428
"""Class representing a cigar string.
430429
431430
Attributes:
432431
- elements (Tuple[CigarElement, ...]): zero or more cigar elements
433432
"""
434433

435-
elements: Tuple[CigarElement, ...] = attr.ib(default=())
434+
elements: Tuple[CigarElement, ...] = ()
436435

437436
@classmethod
438437
def from_cigartuples(cls, cigartuples: Optional[List[Tuple[int, int]]]) -> "Cigar":
@@ -519,7 +518,7 @@ def length_on_target(self) -> int:
519518
return sum([elem.length_on_target for elem in self.elements])
520519

521520

522-
@attr.s(auto_attribs=True, frozen=True)
521+
@attr.s(frozen=True, auto_attribs=True)
523522
class SupplementaryAlignment:
524523
"""Stores a supplementary alignment record produced by BWA and stored in the SA SAM tag.
525524
@@ -532,12 +531,12 @@ class SupplementaryAlignment:
532531
nm: the number of edits
533532
"""
534533

535-
reference_name: str = attr.ib()
536-
start: int = attr.ib()
537-
is_forward: bool = attr.ib()
538-
cigar: Cigar = attr.ib()
539-
mapq: int = attr.ib()
540-
nm: int = attr.ib()
534+
reference_name: str
535+
start: int
536+
is_forward: bool
537+
cigar: Cigar
538+
mapq: int
539+
nm: int
541540

542541
def __str__(self) -> str:
543542
return ",".join(
@@ -639,7 +638,7 @@ def set_pair_info(r1: AlignedSegment, r2: AlignedSegment, proper_pair: bool = Tr
639638
r2.template_length = -insert_size
640639

641640

642-
@attr.s(auto_attribs=True, frozen=True)
641+
@attr.s(frozen=True, auto_attribs=True)
643642
class ReadEditInfo:
644643
"""
645644
Counts various stats about how a read compares to a reference sequence.
@@ -728,7 +727,7 @@ def calculate_edit_info(
728727
)
729728

730729

731-
@attr.s(auto_attribs=True, frozen=True)
730+
@attr.s(frozen=True, auto_attribs=True)
732731
class Template:
733732
"""A container for alignment records corresponding to a single sequenced template
734733
or insert.

fgpyo/sam/tests/test_sam.py

+6
Original file line numberDiff line numberDiff line change
@@ -195,6 +195,12 @@ def test_cigar_element_length_on(
195195
assert element.length_on_target == length_on_target
196196

197197

198+
@pytest.mark.parametrize("character", ["M", "I", "D", "S"])
199+
def test_invalid_cigar_element(character: str) -> None:
200+
with pytest.raises(ValueError):
201+
CigarElement(-1, operator=CigarOp.from_character(character))
202+
203+
198204
@pytest.mark.parametrize(
199205
"cigartuples,cigarstring",
200206
[

fgpyo/util/inspect.py

+136-21
Original file line numberDiff line numberDiff line change
@@ -1,29 +1,113 @@
1-
import functools
21
import sys
2+
import types as python_types
33
import typing
4-
from enum import Enum
5-
from functools import partial
6-
from pathlib import PurePath
74
from typing import Any
8-
from typing import Callable
95
from typing import Dict
6+
from typing import FrozenSet
107
from typing import Iterable
118
from typing import List
129
from typing import Literal
13-
from typing import Optional
10+
from typing import Mapping
11+
from typing import Protocol
1412
from typing import Tuple
1513
from typing import Type
1614
from typing import Union
1715

18-
if sys.version_info >= (3, 12):
16+
if sys.version_info >= (3, 10):
1917
from typing import TypeAlias
2018
else:
2119
from typing_extensions import TypeAlias
2220

23-
import attr
21+
import dataclasses
22+
import functools
23+
from dataclasses import MISSING as DATACLASSES_MISSING
24+
from dataclasses import fields as get_dataclasses_fields
25+
from dataclasses import is_dataclass as is_dataclasses_class
26+
from enum import Enum
27+
from functools import partial
28+
from pathlib import PurePath
29+
from typing import TYPE_CHECKING
30+
from typing import Callable
31+
from typing import Optional
32+
from typing import TypeVar
2433

2534
import fgpyo.util.types as types
2635

36+
attr: Optional[python_types.ModuleType]
37+
MISSING: FrozenSet[Any]
38+
39+
try:
40+
import attr
41+
42+
_use_attr = True
43+
from attr import fields as get_attr_fields
44+
from attr import fields_dict as get_attr_fields_dict
45+
46+
Attribute: TypeAlias = attr.Attribute # type: ignore[name-defined, no-redef]
47+
# dataclasses and attr have internal tokens for missing values, join into a set so that we can
48+
# check if a value is missing without knowing the type of backing class
49+
MISSING = frozenset({DATACLASSES_MISSING, attr.NOTHING})
50+
except ImportError: # pragma: no cover
51+
_use_attr = False
52+
attr = None
53+
Attribute: TypeAlias = TypeVar("Attribute", bound=object) # type: ignore[misc, assignment, no-redef] # noqa: E501
54+
55+
# define empty placeholders for getting attr fields as a tuple or dict. They will never be
56+
# called because the import failed; but they're here to ensure that the function is defined in
57+
# sections of code that don't know if the import was successful or not.
58+
59+
def get_attr_fields(cls: type) -> Tuple[dataclasses.Field, ...]: # type: ignore[misc]
60+
"""Get tuple of fields for attr class. attrs isn't imported so return empty tuple."""
61+
return ()
62+
63+
def get_attr_fields_dict(cls: type) -> Dict[str, dataclasses.Field]: # type: ignore[misc]
64+
"""Get dict of name->field for attr class. attrs isn't imported so return empty dict."""
65+
return {}
66+
67+
# for consistency with successful import of attr, create a set for missing values
68+
MISSING = frozenset({DATACLASSES_MISSING})
69+
70+
if TYPE_CHECKING: # pragma: no cover
71+
from _typeshed import DataclassInstance as DataclassesProtocol
72+
else:
73+
74+
class DataclassesProtocol(Protocol):
75+
__dataclasses_fields__: Dict[str, dataclasses.Field]
76+
77+
78+
if TYPE_CHECKING and _use_attr: # pragma: no cover
79+
from attr import AttrsInstance
80+
else:
81+
82+
class AttrsInstance(Protocol): # type: ignore[no-redef]
83+
__attrs_attrs__: Dict[str, Any]
84+
85+
86+
def is_attr_class(cls: type) -> bool: # type: ignore[arg-type]
87+
"""Return True if the class is an attr class, and False otherwise"""
88+
return hasattr(cls, "__attrs_attrs__")
89+
90+
91+
_MISSING_OR_NONE: FrozenSet[Any] = frozenset({*MISSING, None})
92+
"""Set of values that are considered missing or None for dataclasses or attr classes"""
93+
_DataclassesOrAttrClass: TypeAlias = Union[DataclassesProtocol, AttrsInstance]
94+
"""
95+
TypeAlias for dataclasses or attr classes. Mostly nonsense because they are not true types, they
96+
are traits, but there is no python trait-tester.
97+
"""
98+
FieldType: TypeAlias = Union[dataclasses.Field, Attribute]
99+
"""
100+
TypeAlias for dataclass Fields or attrs Attributes. It will correspond to the correct type for the
101+
corresponding _DataclassesOrAttrClass
102+
"""
103+
104+
105+
def _get_dataclasses_fields_dict(
106+
class_or_instance: Union[DataclassesProtocol, Type[DataclassesProtocol]],
107+
) -> Dict[str, dataclasses.Field]:
108+
"""Get a dict from field name to Field for a dataclass class or instance."""
109+
return {field.name: field for field in get_dataclasses_fields(class_or_instance)}
110+
27111

28112
class ParserNotFoundException(Exception):
29113
pass
@@ -67,7 +151,7 @@ def split_at_given_level(
67151
return out_vals
68152

69153

70-
NoneType = type(None)
154+
NoneType: TypeAlias = type(None) # type: ignore[no-redef]
71155

72156

73157
def list_parser(
@@ -305,27 +389,58 @@ def get_parser() -> partial:
305389
return parser
306390

307391

392+
def get_fields_dict(
393+
cls: Union[_DataclassesOrAttrClass, Type[_DataclassesOrAttrClass]]
394+
) -> Mapping[str, FieldType]:
395+
"""Get the fields dict from either a dataclasses or attr dataclass (or instance)"""
396+
if is_dataclasses_class(cls):
397+
return _get_dataclasses_fields_dict(cls) # type: ignore[arg-type]
398+
elif is_attr_class(cls): # type: ignore[arg-type]
399+
return get_attr_fields_dict(cls) # type: ignore[arg-type]
400+
else:
401+
raise TypeError("cls must a dataclasses or attr class")
402+
403+
404+
def get_fields(
405+
cls: Union[_DataclassesOrAttrClass, Type[_DataclassesOrAttrClass]]
406+
) -> Tuple[FieldType, ...]:
407+
"""Get the fields tuple from either a dataclasses or attr dataclass (or instance)"""
408+
if is_dataclasses_class(cls):
409+
return get_dataclasses_fields(cls) # type: ignore[arg-type]
410+
elif is_attr_class(cls): # type: ignore[arg-type]
411+
return get_attr_fields(cls) # type: ignore[arg-type]
412+
else:
413+
raise TypeError("cls must a dataclasses or attr class")
414+
415+
416+
_AttrFromType = TypeVar("_AttrFromType")
417+
"""TypeVar to allow attr_from to be used with either an attr class or a dataclasses class"""
418+
419+
308420
def attr_from(
309-
cls: Type, kwargs: Dict[str, str], parsers: Optional[Dict[type, Callable[[str], Any]]] = None
310-
) -> Any:
311-
"""Builds an attr class from key-word arguments
421+
cls: Type[_AttrFromType],
422+
kwargs: Dict[str, str],
423+
parsers: Optional[Dict[type, Callable[[str], Any]]] = None,
424+
) -> _AttrFromType:
425+
"""Builds an attr or dataclasses class from key-word arguments
312426
313427
Args:
314-
cls: the attr class to be built
428+
cls: the attr or dataclasses class to be built
315429
kwargs: a dictionary of keyword arguments
316430
parsers: a dictionary of parser functions to apply to specific types
317431
318432
"""
319433
return_values: Dict[str, Any] = {}
320-
for attribute in attr.fields(cls):
434+
for attribute in get_fields(cls): # type: ignore[arg-type]
321435
return_value: Any
322436
if attribute.name in kwargs:
323437
str_value: str = kwargs[attribute.name]
324438
set_value: bool = False
325439

326440
# Use the converter if provided
327-
if attribute.converter is not None:
328-
return_value = attribute.converter(str_value)
441+
converter = getattr(attribute, "converter", None)
442+
if converter is not None:
443+
return_value = converter(str_value)
329444
set_value = True
330445

331446
# try getting a known parser
@@ -352,26 +467,26 @@ def attr_from(
352467
set_value
353468
), f"Do not know how to convert string to {attribute.type} for value: {str_value}"
354469
else: # no value, check for a default
355-
assert attribute.default is not None or attribute_is_optional(
470+
assert attribute.default is not None or _attribute_is_optional(
356471
attribute
357472
), f"No value given and no default for attribute `{attribute.name}`"
358473
return_value = attribute.default
359474
# when the default is attr.NOTHING, just use None
360-
if return_value is attr.NOTHING:
475+
if return_value in MISSING:
361476
return_value = None
362477

363478
return_values[attribute.name] = return_value
364479

365480
return cls(**return_values)
366481

367482

368-
def attribute_is_optional(attribute: attr.Attribute) -> bool:
483+
def _attribute_is_optional(attribute: FieldType) -> bool:
369484
"""Returns True if the attribute is optional, False otherwise"""
370485
return typing.get_origin(attribute.type) is Union and isinstance(
371486
None, typing.get_args(attribute.type)
372487
)
373488

374489

375-
def attribute_has_default(attribute: attr.Attribute) -> bool:
490+
def _attribute_has_default(attribute: FieldType) -> bool:
376491
"""Returns True if the attribute has a default value, False otherwise"""
377-
return attribute.default != attr.NOTHING or attribute_is_optional(attribute)
492+
return attribute.default not in _MISSING_OR_NONE or _attribute_is_optional(attribute)

0 commit comments

Comments
 (0)