Skip to content

Commit f8648a8

Browse files
committed
add docstrings
1 parent f83048d commit f8648a8

File tree

1 file changed

+52
-9
lines changed

1 file changed

+52
-9
lines changed

ms2query/compound_database.py

Lines changed: 52 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,12 @@ def inchikey14_from_full(inchikey: str) -> Optional[str]:
2323

2424
def encode_sparse_fp(bits: Optional[np.ndarray], counts: Optional[np.ndarray]) -> tuple[bytes, bytes]:
2525
"""Store bits as uint32 indices, counts as int32
26+
27+
Parameters
28+
----------
29+
bits : array-like of uint32 bit indices
30+
counts : array-like of int32 counts
31+
2632
Returns (bits_blob, counts_blob). Accepts None -> empty blobs."""
2733
if bits is None:
2834
b = b""
@@ -41,14 +47,23 @@ def encode_sparse_fp(bits: Optional[np.ndarray], counts: Optional[np.ndarray]) -
4147
return b, c
4248

4349
def decode_sparse_fp(bits_blob: bytes, counts_blob: bytes) -> tuple[np.ndarray, np.ndarray]:
44-
"""Inverse of encode_sparse_fp. Returns (bits_uint32, counts_int32). Empty blobs -> empty arrays."""
50+
"""Inverse of encode_sparse_fp.
51+
52+
Parameters
53+
----------
54+
bits_blob : BLOB bytes of uint32 bit indices
55+
counts_blob : BLOB bytes of int32 counts
56+
57+
Returns (bits_uint32, counts_int32). Empty blobs -> empty arrays.
58+
"""
4559
bits = np.frombuffer(bits_blob, dtype=np.uint32).copy() if bits_blob else np.zeros(0, dtype=np.uint32)
4660
# Guess signedness: store as int32 by default
4761
counts = np.frombuffer(counts_blob, dtype=np.int32).copy() if counts_blob else np.zeros(0, dtype=np.int32)
4862
return bits, counts
4963

5064
def decode_fp_blob(blob: bytes) -> np.ndarray:
51-
"""Decode fingerprint BLOB back to uint8 array. Unknown length -> infer from blob size."""
65+
"""Decode fingerprint BLOB back to uint8 array.
66+
Unknown length -> infer from blob size."""
5267
if not blob:
5368
return np.zeros(0, dtype=np.uint8)
5469
return np.frombuffer(blob, dtype=np.uint8).copy()
@@ -62,8 +77,22 @@ def compute_fingerprints(
6277
progress_bar: bool = True,
6378
) -> np.ndarray:
6479
"""
65-
Placeholder: compute a molecular fingerprint from SMILES or InChI.
66-
For now return a dummy vector (replace with RDKit/Morgan etc. later).
80+
Compute a molecular fingerprint from SMILES or InChI.
81+
82+
Parameters
83+
----------
84+
smiles : str or None
85+
SMILES string to compute the fingerprint from.
86+
inchis : str or None
87+
InChI strings to compute the fingerprint from (used if smiles is None).
88+
sparse : bool
89+
If True, compute sparse fingerprint (indices/counts); else dense bit vector.
90+
count : bool
91+
If True, compute count-based fingerprint; else binary fingerprint.
92+
radius : int
93+
Radius for Morgan fingerprint. Default 9.
94+
progress_bar : bool
95+
Whether to show a progress bar during computation. Default True.
6796
"""
6897
fpgen = rdFingerprintGenerator.GetMorganGenerator(radius=radius, fpSize=4096)
6998

@@ -94,15 +123,29 @@ def compute_fingerprints(
94123
# Compound database (compounds table) in SQLite
95124
# ==================================================
96125

97-
# --- keep your imports/utilities as-is (encode/decode utils etc.) ---
98-
99126
@dataclass
100127
class CompoundDatabase:
128+
"""SQLite-based compound database with sparse fingerprint storage.
129+
Stores compounds identified by inchikey14, with optional metadata and molecular fingerprints.
130+
131+
Attributes
132+
----------
133+
sqlite_path : str
134+
Path to the SQLite database file.
135+
compound_fields : List[str]
136+
List of metadata fields to store for each compound.
137+
fingerprint_radius : int
138+
Radius for Morgan fingerprint computation (used in backfill).
139+
fingerprint_sparse : bool
140+
Whether to store fingerprints as sparse (True) or dense (False) (used in backfill).
141+
fingerprint_count : bool
142+
Whether to store count-based (True) or binary (False) fingerprints (used in backfill).
143+
"""
101144
sqlite_path: str
102145
compound_fields: List[str] = field(default_factory=lambda: [
103146
"smiles", "inchi", "inchikey", "classyfire_class", "classyfire_superclass"
104147
])
105-
# Default FP parameters (used by the backfill method if you pass through to your compute_fingerprints)
148+
# Default FP parameters (used by the backfill method for compute_fingerprints)
106149
fingerprint_radius: int = 9
107150
fingerprint_sparse: bool = True
108151
fingerprint_count: bool = True
@@ -142,7 +185,7 @@ def _ensure_schema(self):
142185
cur.execute(f"ALTER TABLE compounds ADD COLUMN {name} {typ}")
143186
self._conn.commit()
144187

145-
# ---------- UPSERTS: write metadata only; DO NOT compute fingerprints here ----------
188+
# ---------- UPSERTS ----------
146189

147190
def upsert_compound(
148191
self,
@@ -420,7 +463,7 @@ def _update_rows(comp_ids: List[str], results: List[Optional[Tuple[np.ndarray, n
420463
comp_ids = [r[0] for r in rows]
421464
reps = [r[1] for r in rows] # list[str] of smiles or inchi
422465

423-
# call your project-level function ONCE for the whole batch
466+
# call compute_fingerprints ONCE for the whole batch
424467
results = compute_fingerprints(
425468
smiles=reps if which == "smiles" else None,
426469
inchis=reps if which == "inchi" else None,

0 commit comments

Comments
 (0)