Skip to content

Commit 80038fa

Browse files
committed
data objects implemented in rust
1 parent 0614d0a commit 80038fa

21 files changed

+2408
-18
lines changed

Cargo.toml

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
[package]
2+
name = "seqspec"
3+
version = "0.1.0"
4+
edition = "2021"
5+
license = "MIT"
6+
repository = "https://github.com/pachterlab/seqspec"
7+
description = "Sequencing specification tools (Rust core + PyO3 bindings)"
8+
9+
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
10+
[lib]
11+
name = "seqspec"
12+
crate-type = ["rlib", "cdylib"]
13+
14+
[dependencies]
15+
pyo3 = { version = "0.25", optional = true, features = ["extension-module", "abi3-py312"] }
16+
pythonize = "0.25.0"
17+
serde = { version = "1", features = ["derive"] }
18+
serde_json = "1"
19+
serde_yaml = "0.9"
20+
thiserror = "1"
21+
22+
23+
[features]
24+
default = []
25+
python-binding = ["pyo3"] # enable PyO3 only when building Python wheels
26+

pyproject.toml

Lines changed: 15 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,11 @@
11
[build-system]
2-
requires = ["setuptools>=61.0"]
3-
build-backend = "setuptools.build_meta"
2+
requires = ["maturin>=1.9,<2.0"]
3+
build-backend = "maturin"
44

55
[project]
66
name = "seqspec"
77
version = "0.4.0"
8+
89
description = "A tool for working with sequencing specifications"
910
readme = "README.md"
1011
requires-python = ">=3.12"
@@ -20,6 +21,9 @@ classifiers = [
2021
"Programming Language :: Python :: 3.13",
2122
"Topic :: Scientific/Engineering :: Bio-Informatics",
2223
"Topic :: Utilities",
24+
"Programming Language :: Rust",
25+
"Programming Language :: Python :: Implementation :: CPython",
26+
"Programming Language :: Python :: Implementation :: PyPy",
2327
]
2428
dependencies = [
2529
"pyyaml>=6.0",
@@ -60,7 +64,7 @@ license-files = ["LICENSE"]
6064
include = ["seqspec", "seqspec.*"]
6165

6266
[tool.setuptools.package-data]
63-
"seqspec" = ["schema/*"]
67+
"seqspec" = ["schema/*", "py.typed", "_core.pyi"]
6468

6569
[tool.pytest.ini_options]
6670
testpaths = ["tests"]
@@ -98,4 +102,11 @@ dev = [
98102
"pre-commit>=4.2.0",
99103
"build>=1.0.0",
100104
"ruff>=0.9.0",
101-
]
105+
]
106+
107+
108+
[tool.maturin]
109+
# Build the PyO3 module named *seqspec* (matches #[pymodule] above)
110+
module-name = "seqspec._core"
111+
features = ["python-binding"]
112+
python-source = "." # ship python/seqspec/* alongside the native module

seqspec/Assay.py

Lines changed: 138 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
from typing import List, Optional, Union
1+
from typing import Iterable, List, Optional, Type, Union
22

33
import yaml
44
from pydantic import BaseModel, Field, PrivateAttr
@@ -7,6 +7,9 @@
77
from seqspec.Region import Region, RegionInput
88

99
from . import __version__
10+
from ._core import Assay as _RustAssay
11+
from ._core import Read as _RustRead
12+
from ._core import Region as _RustRegion
1013

1114

1215
class SeqProtocol(BaseModel):
@@ -159,6 +162,60 @@ def to_libkit(self) -> LibKit:
159162
)
160163

161164

165+
def coerce_protocol_kit_list(value, cls: Type[BaseModel], modalities: Iterable[str]):
166+
"""
167+
Coerce a string or list of strings/objects/dicts into a list of protocol/kit objects (or None).
168+
169+
Supports:
170+
- "NovaSeq" -> [cls(protocol_id|kit_id="NovaSeq", name="NovaSeq", modality=m) for m in modalities]
171+
- ["A","B"] -> expanded per modality
172+
- [{"protocol_id": "...", ...}] -> cls(**dict)
173+
- [cls(...), "X", {...}] -> mixed inputs
174+
"""
175+
if value is None:
176+
return None
177+
178+
# identify target family (protocol vs kit) by class
179+
is_protocol = cls.__name__ in {"SeqProtocol", "LibProtocol"}
180+
is_kit = cls.__name__ in {"SeqKit", "LibKit"}
181+
182+
if not (is_protocol or is_kit):
183+
raise ValueError("cls must be one of: SeqProtocol, LibProtocol, SeqKit, LibKit")
184+
185+
def make_obj(val, modality: str):
186+
if isinstance(val, cls):
187+
return val
188+
if isinstance(val, dict):
189+
return cls(**val)
190+
if isinstance(val, str):
191+
if is_protocol:
192+
return cls(protocol_id=val, name=val, modality=modality)
193+
else:
194+
return cls(kit_id=val, name=val, modality=modality)
195+
raise TypeError(f"Unsupported item type for {cls.__name__}: {type(val)!r}")
196+
197+
if isinstance(value, str):
198+
return [make_obj(value, m) for m in modalities]
199+
200+
if isinstance(value, list):
201+
out = []
202+
for item in value:
203+
if isinstance(item, str):
204+
out.extend(make_obj(item, m) for m in modalities)
205+
else:
206+
# dict or already-typed object: keep as a single item
207+
# if it lacks modality, caller's responsibility (your spec usually includes it)
208+
out.append(make_obj(item, next(iter(modalities), "")))
209+
return out
210+
211+
# already a typed object (rare), wrap into list
212+
if isinstance(value, cls):
213+
return [value]
214+
215+
# last resort: pass through
216+
return value
217+
218+
162219
class Assay(BaseModel):
163220
seqspec_version: Optional[str] = __version__
164221
assay_id: str
@@ -180,6 +237,9 @@ class Assay(BaseModel):
180237
# Not part of the public schema; populated when loading from disk.
181238
_spec_path: Optional[str] = PrivateAttr(default=None)
182239

240+
def model_post_init(self, __context) -> None:
241+
self.normalize_protocols_kits()
242+
183243
def __repr__(self) -> str:
184244
rds = []
185245
rgns = []
@@ -214,6 +274,7 @@ def print_sequence(self):
214274
print("\n", end="")
215275

216276
def update_spec(self):
277+
self.normalize_protocols_kits()
217278
for r in self.library_spec:
218279
r.update_attr()
219280

@@ -294,6 +355,82 @@ def insert_reads(
294355
self.sequence_spec.insert(insert_idx, read)
295356
insert_idx += 1
296357

358+
def normalize_protocols_kits(self) -> None:
359+
"""Normalize str-valued protocol/kit fields into lists of objects."""
360+
self.sequence_protocol = coerce_protocol_kit_list(
361+
self.sequence_protocol, SeqProtocol, self.modalities
362+
)
363+
self.sequence_kit = coerce_protocol_kit_list(
364+
self.sequence_kit, SeqKit, self.modalities
365+
)
366+
self.library_protocol = coerce_protocol_kit_list(
367+
self.library_protocol, LibProtocol, self.modalities
368+
)
369+
self.library_kit = coerce_protocol_kit_list(
370+
self.library_kit, LibKit, self.modalities
371+
)
372+
373+
374+
class RustAssay:
375+
__slots__ = ("_inner",)
376+
377+
def __init__(self, inner: _RustAssay) -> None:
378+
object.__setattr__(self, "_inner", inner)
379+
380+
# generic forwarding
381+
def __getattr__(self, name):
382+
return getattr(self._inner, name)
383+
384+
def __setattr__(self, name, value):
385+
if name == "_inner":
386+
return object.__setattr__(self, name, value)
387+
return setattr(self._inner, name, value)
388+
389+
# constructors
390+
@classmethod
391+
def from_model(cls, m: "Assay") -> "RustAssay":
392+
return cls(_RustAssay.from_json(m.model_dump_json()))
393+
394+
def snapshot(self) -> "Assay":
395+
return Assay.model_validate_json(self._inner.to_json())
396+
397+
# helpers: DTO outputs for downstream Python code
398+
def list_modalities(self) -> List[str]:
399+
return list(self._inner.list_modalities())
400+
401+
def get_libspec(self, modality: str) -> Region:
402+
r: _RustRegion = self._inner.get_libspec(modality)
403+
return Region.model_validate_json(r.to_json())
404+
405+
def get_seqspec(self, modality: str) -> List[Read]:
406+
rlist: List[_RustRead] = self._inner.get_seqspec(modality)
407+
return [Read.model_validate_json(r.to_json()) for r in rlist]
408+
409+
def get_read(self, read_id: str) -> Read:
410+
r: _RustRead = self._inner.get_read(read_id)
411+
return Read.model_validate_json(r.to_json())
412+
413+
def update_spec(self) -> None:
414+
self._inner.update_spec()
415+
416+
def insert_reads(
417+
self, reads: List[Read], modality: str, after: Optional[str] = None
418+
) -> None:
419+
# Convert DTOs to Rust via JSON (serde builds Vec<Read>)
420+
raw: List[_RustRead] = [_RustRead.from_json(r.model_dump_json()) for r in reads]
421+
self._inner.insert_reads(raw, modality, after)
422+
423+
def insert_regions(
424+
self, regions: List[Region], modality: str, after: Optional[str] = None
425+
) -> None:
426+
raw: List[_RustRegion] = [
427+
_RustRegion.from_json(r.model_dump_json()) for r in regions
428+
]
429+
self._inner.insert_regions(raw, modality, after)
430+
431+
def __repr__(self) -> str:
432+
return self._inner.__repr__()
433+
297434

298435
class AssayInput(BaseModel):
299436
"""

seqspec/File.py

Lines changed: 51 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@
33

44
from pydantic import BaseModel, Field
55

6+
from ._core import File as _RustFile
7+
68
__all__ = ["File"]
79

810

@@ -15,12 +17,11 @@ class File(BaseModel):
1517
urltype: str
1618
md5: str
1719

20+
# add an updatae_spec attr that computes the md5 for the object
21+
1822
def __repr__(self) -> str:
1923
return self.file_id
2024

21-
def update_file_id(self, file_id: str):
22-
self.file_id = file_id
23-
2425

2526
class FileInput(BaseModel):
2627
"""
@@ -91,3 +92,50 @@ def to_file(self) -> File:
9192
urltype=self.urltype or "local",
9293
md5=self.md5 or "",
9394
)
95+
96+
97+
class RustFile:
98+
__slots__ = ("_inner",)
99+
100+
def __init__(self, inner: _RustFile) -> None:
101+
self._inner = inner
102+
103+
@classmethod
104+
def new(
105+
cls,
106+
*,
107+
file_id: str,
108+
filename: str,
109+
filetype: str,
110+
filesize: int,
111+
url: str,
112+
urltype: str,
113+
md5: str,
114+
) -> "RustFile":
115+
return cls(
116+
_RustFile(file_id, filename, filetype, int(filesize), url, urltype, md5)
117+
)
118+
119+
def __getattr__(self, name):
120+
# called only if attribute not found on Rust object itself
121+
return getattr(self._inner, name)
122+
123+
def __setattr__(self, name, value):
124+
if name == "_inner":
125+
object.__setattr__(self, name, value)
126+
else:
127+
setattr(self._inner, name, value)
128+
129+
@classmethod
130+
def from_model(cls, m: File) -> "RustFile":
131+
return cls(_RustFile.from_json(m.model_dump_json()))
132+
133+
@classmethod
134+
def from_input(cls, i: FileInput) -> "RustFile":
135+
return cls.from_model(i.to_file())
136+
137+
def snapshot(self) -> File:
138+
return File.model_validate_json(self._inner.to_json())
139+
140+
def __repr__(self) -> str:
141+
return f"RustFile(file_id={self.file_id!r}, filename={self.filename!r}, size={self.filesize})"

0 commit comments

Comments
 (0)