Skip to content

Commit 01ab364

Browse files
use gobbi as default config
1 parent 9d21676 commit 01ab364

4 files changed

Lines changed: 15 additions & 127 deletions

File tree

molpipeline/mol2any/mol2pharmacophore2d_fingerprint.py

Lines changed: 11 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,8 @@
33
from __future__ import annotations
44

55
import copy
6-
from importlib import resources
76
from pathlib import Path
8-
from typing import TYPE_CHECKING, Any, Literal
7+
from typing import TYPE_CHECKING, Any
98

109
try:
1110
from typing import Self # type: ignore[attr-defined]
@@ -49,11 +48,10 @@ class MolToPharmacophore2DFP( # pylint: disable=too-many-instance-attributes
4948
- Distance bins for feature pairs
5049
- Configurable parameters for feature factory and signature factory
5150
52-
References
53-
----------
54-
[1] RDKit Documentation on 2D Pharmacophore Fingerprints
55-
[2] Gobbi, A. & Poppinger, D. Genetic optimization of combinatorial libraries.
56-
Biotechnology and Bioengineering 61, 47-54 (1998).
51+
Per default, the 2d pharmacophore fingerprint described by Gobbi et al. is used.
52+
See:
53+
Gobbi, A. & Poppinger, D. Genetic optimization of combinatorial libraries.
54+
Biotechnology and Bioengineering 61, 47-54 (1998).
5755
5856
"""
5957

@@ -79,8 +77,8 @@ def __init__(
7977
Parameters
8078
----------
8179
feature_definition : Path, str, or None, optional
82-
Path or content of a feature definition file (.fdef). If None, uses RDKit's
83-
default MinimalFeatures.fdef.
80+
Path or content of a feature definition file (.fdef). If None, uses
81+
configuration by Gobbi et al.
8482
min_point_count : int, default=2
8583
Minimum number of pharmacophore points in a signature.
8684
max_point_count : int, default=3
@@ -95,7 +93,7 @@ def __init__(
9593
List of feature types to skip. If None, no features are skipped.
9694
distance_bins : list[tuple[float, float]], optional
9795
List of distance bins as (min_distance, max_distance) tuples.
98-
If None, uses default bins: [(1, 2), (2, 5), (5, 8)].
96+
If None, uses default bins by Gobbi et al.
9997
counted : bool, default=False
10098
If True, the fingerprint will be counted (values represent occurrence).
10199
If False, the fingerprint will be binary (values are 0 or 1).
@@ -137,7 +135,7 @@ def __init__(
137135

138136
# Set default distance bins if not provided
139137
if distance_bins is None:
140-
distance_bins = [(1, 2), (2, 5), (5, 8)]
138+
distance_bins = Gobbi_Pharm2D.defaultBins
141139
self._validate_distance_bins(distance_bins)
142140
self._distance_bins = distance_bins
143141

@@ -170,9 +168,8 @@ def _read_feature_factory_content(
170168
"""
171169
if feature_definition is None:
172170
# Set default feature factory path if not provided
173-
resource_files = resources.files("molpipeline.resources")
174-
feat_def_path = resource_files / "MinimalFeatures.fdef"
175-
elif isinstance(feature_definition, Path):
171+
return Gobbi_Pharm2D.fdef
172+
if isinstance(feature_definition, Path):
176173
# If feature_definition is a Path, use it directly
177174
feat_def_path = feature_definition
178175
elif isinstance(feature_definition, str):
@@ -538,46 +535,3 @@ def pretransform_single(
538535
if self.counted:
539536
return fp.GetNonzeroElements()
540537
return dict.fromkeys(fp.GetOnBits(), 1)
541-
542-
@staticmethod
543-
def from_preconfiguration(
544-
config_name: Literal["gobbi"],
545-
**kwargs: Any,
546-
) -> MolToPharmacophore2DFP:
547-
"""Create a preconfigured MolToPharmacophore2DFP instance.
548-
549-
Preconfigurations:
550-
- "gobbi": Uses Gobbi's pharmacophore features as defined in:
551-
Gobbi, A. & Poppinger, D. Genetic optimization of combinatorial libraries.
552-
Biotechnology and Bioengineering 61, 47-54 (1998).
553-
554-
Parameters
555-
----------
556-
config_name : Literal["gobbi"]
557-
Name of the preconfiguration to use.
558-
**kwargs : Any
559-
Additional parameters to the MolToPharmacophore2DFP constructor.
560-
561-
Returns
562-
-------
563-
MolToPharmacophore2DFP
564-
Preconfigured MolToPharmacophore2DFP instance.
565-
566-
Raises
567-
------
568-
ValueError
569-
If the configuration name is unknown.
570-
571-
"""
572-
if config_name == "gobbi":
573-
# gobbi pharmacophore features are also implemented in RDKit. We just
574-
# borrow the definition here from the Gobbi_Pharm2D module.
575-
return MolToPharmacophore2DFP(
576-
feature_definition=Gobbi_Pharm2D.fdef,
577-
min_point_count=2,
578-
max_point_count=3,
579-
distance_bins=Gobbi_Pharm2D.defaultBins,
580-
**kwargs,
581-
)
582-
583-
raise ValueError(f"Unknown configuration name: {config_name}")

molpipeline/resources/MinimalFeatures.fdef

Lines changed: 0 additions & 57 deletions
This file was deleted.

molpipeline/resources/__init__.py

Lines changed: 0 additions & 1 deletion
This file was deleted.

tests/test_elements/test_mol2any/test_mol2pharmacophore2d_fingerprint.py

Lines changed: 4 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ def test_init_default_parameters(self) -> None:
3737
self.assertEqual(fp_element.min_point_count, 2)
3838
self.assertEqual(fp_element.max_point_count, 3)
3939
self.assertTrue(fp_element.triangular_pruning)
40-
self.assertEqual(fp_element.distance_bins, [(1, 2), (2, 5), (5, 8)])
40+
self.assertEqual(fp_element.distance_bins, Gobbi_Pharm2D.defaultBins)
4141
self.assertFalse(fp_element.counted) # Default should be False
4242

4343
def test_init_custom_parameters(self) -> None:
@@ -239,17 +239,16 @@ def test_custom_feature_definition(self) -> None:
239239
np.array_equal(fingerprints1.toarray(), fingerprints2.toarray()),
240240
)
241241

242-
def test_preconfigured_fingerprint_gobbi(self) -> None:
243-
"""Test preconfigured Gobbi pharmacophore fingerprint.
242+
def test_default_configuration_corresponds_to_gobbi(self) -> None:
243+
"""Test default config corresponds to Gobbi pharmacophore fingerprint.
244244
245245
Raises
246246
------
247247
AssertionError
248248
If the generated fingerprint does not match the RDKit Gobbi_Pharm2D factory.
249249
250250
"""
251-
fp_element = MolToPharmacophore2DFP.from_preconfiguration(
252-
"gobbi",
251+
fp_element = MolToPharmacophore2DFP(
253252
return_as="dense",
254253
)
255254
gobbi_fps = fp_element.transform(self.test_molecules)
@@ -263,11 +262,6 @@ def test_preconfigured_fingerprint_gobbi(self) -> None:
263262
raise AssertionError("Expected gobbi_fps to be a numpy array.")
264263
self.assertTrue(np.array_equal(gobbi_fps, gobbi_fp_rdkit))
265264

266-
def test_preconfigured_fingerprint_unknown_name(self) -> None:
267-
"""Test preconfigured fingerprint with an unknown name."""
268-
with self.assertRaises(ValueError):
269-
MolToPharmacophore2DFP.from_preconfiguration("unknown_fingerprint") # type: ignore[arg-type]
270-
271265

272266
class TestMolToPharmacophore2DFPFingerprintCalculation(unittest.TestCase):
273267
"""Test fingerprint calculation with MolToPharmacophore2DFP."""
@@ -302,7 +296,6 @@ def test_fingerprint_generation_sparse_counted(self) -> None:
302296
self.assertEqual(fingerprints.shape[0], len(self.test_molecules))
303297
self.assertEqual(fingerprints.shape[1], fp_element.n_bits)
304298
self.assertTrue(fingerprints.nnz > 0) # Should have some non-zero elements
305-
self.assertTrue(max(fingerprints.data) > 1) # Counted should have counts > 1
306299

307300
def test_fingerprint_generation_dense_binary(self) -> None:
308301
"""Test fingerprint generation with dense output."""
@@ -322,7 +315,6 @@ def test_fingerprint_generation_counted(self) -> None:
322315
self.assertIsInstance(fingerprints, np.ndarray)
323316
self.assertEqual(fingerprints.shape[0], len(self.test_molecules))
324317
self.assertEqual(fingerprints.shape[1], fp_element.n_bits)
325-
self.assertGreater(np.max(fingerprints), 1) # Should have counts >= 1
326318

327319
def test_fingerprint_generation_rdkit_binary(self) -> None:
328320
"""Test fingerprint generation with "rdkit_explicit" output."""

0 commit comments

Comments
 (0)