Skip to content

Commit 35a1707

Browse files
committed
Merge branch 'main' into development
# Conflicts: # molpipeline/any2mol/sdf2mol.py
2 parents dd04bc2 + d858edd commit 35a1707

2 files changed

Lines changed: 248 additions & 3 deletions

File tree

molpipeline/any2mol/sdf2mol.py

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,15 +3,17 @@
33
from __future__ import annotations
44

55
import copy
6-
from typing import Any, Literal, Self
6+
from typing import TYPE_CHECKING, Any, Literal, Self
77

88
from rdkit import Chem
99

1010
from molpipeline.abstract_pipeline_elements.any2mol.string2mol import (
1111
StringToMolPipelineElement as _StringToMolPipelineElement,
1212
)
1313
from molpipeline.abstract_pipeline_elements.core import InvalidInstance
14-
from molpipeline.utils.molpipeline_types import OptionalMol
14+
15+
if TYPE_CHECKING:
16+
from molpipeline.utils.molpipeline_types import OptionalMol
1517

1618

1719
class SDFToMol(_StringToMolPipelineElement):
@@ -39,6 +41,7 @@ def __init__(
3941
Number of cores used for processing.
4042
uuid: str | None, optional
4143
uuid of PipelineElement, by default None
44+
4245
"""
4346
super().__init__(name=name, n_jobs=n_jobs, uuid=uuid)
4447
self.identifier = identifier
@@ -55,6 +58,7 @@ def get_params(self, deep: bool = True) -> dict[str, Any]:
5558
-------
5659
dict[str, Any]
5760
Dictionary containing all parameters defining the object.
61+
5862
"""
5963
params = super().get_params(deep)
6064
if deep:
@@ -75,6 +79,7 @@ def set_params(self, **parameters: Any) -> Self:
7579
-------
7680
Self
7781
SDFToMol with updated parameters.
82+
7883
"""
7984
super().set_params(**parameters)
8085
if "identifier" in parameters:
@@ -93,14 +98,17 @@ def pretransform_single(self, value: str) -> OptionalMol:
9398
-------
9499
OptionalMol
95100
Molecule if transformation was successful, else InvalidInstance.
101+
96102
"""
97103
if not isinstance(value, (str, bytes)):
98104
return InvalidInstance(
99105
self.uuid,
100106
"Invalid SDF string!",
101107
self.name,
102108
)
103-
mol = Chem.MolFromMolBlock(value)
109+
supplier = Chem.SDMolSupplier()
110+
supplier.SetData(value)
111+
mol = next(supplier, None)
104112
if mol is None:
105113
return InvalidInstance(
106114
self.uuid,
Lines changed: 237 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,237 @@
1+
"""Tests for SDF to mol."""
2+
3+
import unittest
4+
5+
from rdkit import Chem
6+
7+
from molpipeline.abstract_pipeline_elements.core import InvalidInstance
8+
from molpipeline.any2mol.sdf2mol import SDFToMol
9+
10+
11+
class TestSDFToMol(unittest.TestCase):
12+
"""Test class for SDFToMol."""
13+
14+
def setUp(self) -> None:
15+
"""Set up test fixtures."""
16+
self.sdf_str_benzaldehyde = """240
17+
-OEChem-05082503512D
18+
19+
14 14 0 0 0 0 0 0 0999 V2000
20+
3.7321 1.7500 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0
21+
2.8660 0.2500 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
22+
2.0000 -0.2500 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
23+
3.7321 -0.2500 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
24+
2.0000 -1.2500 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
25+
3.7321 -1.2500 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
26+
2.8660 -1.7500 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
27+
2.8660 1.2500 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
28+
1.4631 0.0600 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0
29+
4.2690 0.0600 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0
30+
1.4631 -1.5600 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0
31+
4.2690 -1.5600 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0
32+
2.8660 -2.3700 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0
33+
2.3291 1.5600 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0
34+
1 8 2 0 0 0 0
35+
2 3 2 0 0 0 0
36+
2 4 1 0 0 0 0
37+
2 8 1 0 0 0 0
38+
3 5 1 0 0 0 0
39+
3 9 1 0 0 0 0
40+
4 6 2 0 0 0 0
41+
4 10 1 0 0 0 0
42+
5 7 2 0 0 0 0
43+
5 11 1 0 0 0 0
44+
6 7 1 0 0 0 0
45+
6 12 1 0 0 0 0
46+
7 13 1 0 0 0 0
47+
8 14 1 0 0 0 0
48+
M END
49+
> <PUBCHEM_COMPOUND_CID>
50+
240
51+
52+
> <PUBCHEM_COMPOUND_CANONICALIZED>
53+
1
54+
55+
> <PUBCHEM_CACTVS_COMPLEXITY>
56+
72.5
57+
58+
$$$$
59+
"""
60+
61+
self.sdf_str_aspirin = """2244
62+
-OEChem-05082504263D
63+
64+
21 21 0 0 0 0 0 0 0999 V2000
65+
1.2333 0.5540 0.7792 O 0 0 0 0 0 0 0 0 0 0 0 0
66+
-0.6952 -2.7148 -0.7502 O 0 0 0 0 0 0 0 0 0 0 0 0
67+
0.7958 -2.1843 0.8685 O 0 0 0 0 0 0 0 0 0 0 0 0
68+
1.7813 0.8105 -1.4821 O 0 0 0 0 0 0 0 0 0 0 0 0
69+
-0.0857 0.6088 0.4403 C 0 0 0 0 0 0 0 0 0 0 0 0
70+
-0.7927 -0.5515 0.1244 C 0 0 0 0 0 0 0 0 0 0 0 0
71+
-0.7288 1.8464 0.4133 C 0 0 0 0 0 0 0 0 0 0 0 0
72+
-2.1426 -0.4741 -0.2184 C 0 0 0 0 0 0 0 0 0 0 0 0
73+
-2.0787 1.9238 0.0706 C 0 0 0 0 0 0 0 0 0 0 0 0
74+
-2.7855 0.7636 -0.2453 C 0 0 0 0 0 0 0 0 0 0 0 0
75+
-0.1409 -1.8536 0.1477 C 0 0 0 0 0 0 0 0 0 0 0 0
76+
2.1094 0.6715 -0.3113 C 0 0 0 0 0 0 0 0 0 0 0 0
77+
3.5305 0.5996 0.1635 C 0 0 0 0 0 0 0 0 0 0 0 0
78+
-0.1851 2.7545 0.6593 H 0 0 0 0 0 0 0 0 0 0 0 0
79+
-2.7247 -1.3605 -0.4564 H 0 0 0 0 0 0 0 0 0 0 0 0
80+
-2.5797 2.8872 0.0506 H 0 0 0 0 0 0 0 0 0 0 0 0
81+
-3.8374 0.8238 -0.5090 H 0 0 0 0 0 0 0 0 0 0 0 0
82+
3.7290 1.4184 0.8593 H 0 0 0 0 0 0 0 0 0 0 0 0
83+
4.2045 0.6969 -0.6924 H 0 0 0 0 0 0 0 0 0 0 0 0
84+
3.7105 -0.3659 0.6426 H 0 0 0 0 0 0 0 0 0 0 0 0
85+
-0.2555 -3.5916 -0.7337 H 0 0 0 0 0 0 0 0 0 0 0 0
86+
1 5 1 0 0 0 0
87+
1 12 1 0 0 0 0
88+
2 11 1 0 0 0 0
89+
2 21 1 0 0 0 0
90+
3 11 2 0 0 0 0
91+
4 12 2 0 0 0 0
92+
5 6 1 0 0 0 0
93+
5 7 2 0 0 0 0
94+
6 8 2 0 0 0 0
95+
6 11 1 0 0 0 0
96+
7 9 1 0 0 0 0
97+
7 14 1 0 0 0 0
98+
8 10 1 0 0 0 0
99+
8 15 1 0 0 0 0
100+
9 10 2 0 0 0 0
101+
9 16 1 0 0 0 0
102+
10 17 1 0 0 0 0
103+
12 13 1 0 0 0 0
104+
13 18 1 0 0 0 0
105+
13 19 1 0 0 0 0
106+
13 20 1 0 0 0 0
107+
M END
108+
> <PUBCHEM_COMPOUND_CID>
109+
2244
110+
111+
> <PUBCHEM_CONFORMER_RMSD>
112+
0.6
113+
114+
$$$$
115+
"""
116+
self.sdf_str_benzaldehyde_aspirin = (
117+
self.sdf_str_benzaldehyde + self.sdf_str_aspirin
118+
)
119+
self.invalid_sdf = "Not an SDF string"
120+
121+
def test_initialization(self) -> None:
122+
"""Test initialization of SDFToMol."""
123+
sdf2mol = SDFToMol(identifier="smiles", name="CustomName", n_jobs=2)
124+
self.assertEqual(sdf2mol.identifier, "smiles")
125+
self.assertEqual(sdf2mol.name, "CustomName")
126+
self.assertEqual(sdf2mol.n_jobs, 2)
127+
self.assertEqual(sdf2mol.mol_counter, 0)
128+
129+
def test_pretransform_valid_sdf(self) -> None:
130+
"""Test transformation of valid SDF string.
131+
132+
Raises
133+
------
134+
AssertionError
135+
If the transformation does not return a valid molecule.
136+
137+
"""
138+
sdf2mol = SDFToMol()
139+
# test sdf string with single molecule: benzaldehyde
140+
mol = sdf2mol.pretransform_single(self.sdf_str_benzaldehyde)
141+
self.assertIsInstance(mol, Chem.Mol)
142+
if not isinstance(mol, Chem.Mol):
143+
# necessary for mypy
144+
raise AssertionError("Expected a Chem.Mol object")
145+
self.assertEqual(mol.GetNumAtoms(), 8)
146+
147+
# test sdf string with single molecule: aspirin
148+
mol = sdf2mol.pretransform_single(self.sdf_str_aspirin)
149+
self.assertIsInstance(mol, Chem.Mol)
150+
if not isinstance(mol, Chem.Mol):
151+
# necessary for mypy
152+
raise AssertionError("Expected a Chem.Mol object")
153+
self.assertEqual(mol.GetNumAtoms(), 13)
154+
155+
# test sdf string with multiple molecules: benzaldehyde and aspirin
156+
# the current behavior is to return the first molecule only
157+
mol = sdf2mol.pretransform_single(self.sdf_str_benzaldehyde_aspirin)
158+
self.assertIsInstance(mol, Chem.Mol)
159+
if not isinstance(mol, Chem.Mol):
160+
# necessary for mypy
161+
raise AssertionError("Expected a Chem.Mol object")
162+
self.assertEqual(mol.GetNumAtoms(), 8)
163+
164+
def test_pretransform_invalid_sdf(self) -> None:
165+
"""Test handling of invalid SDF input."""
166+
result = SDFToMol().pretransform_single(self.invalid_sdf)
167+
self.assertIsInstance(result, InvalidInstance)
168+
169+
def test_transform(self) -> None:
170+
"""Test transform function.
171+
172+
Raises
173+
------
174+
AssertionError
175+
If the transformation does not return a valid molecule.
176+
177+
"""
178+
# test list of sdf strings
179+
mols = SDFToMol().transform([self.sdf_str_benzaldehyde, self.sdf_str_aspirin])
180+
self.assertEqual(len(mols), 2)
181+
self.assertIsInstance(mols[0], Chem.Mol)
182+
self.assertIsInstance(mols[1], Chem.Mol)
183+
if not isinstance(mols[0], Chem.Mol):
184+
# necessary for mypy
185+
raise AssertionError("Expected a Chem.Mol object")
186+
self.assertEqual(mols[0].GetNumAtoms(), 8)
187+
if not isinstance(mols[1], Chem.Mol):
188+
# necessary for mypy
189+
raise AssertionError("Expected a Chem.Mol object")
190+
self.assertEqual(mols[1].GetNumAtoms(), 13)
191+
192+
# test single sdf string with multiple molecules
193+
# only the first molecule in the SDF string will be read
194+
mols = SDFToMol().transform([self.sdf_str_benzaldehyde_aspirin])
195+
self.assertEqual(len(mols), 1)
196+
self.assertIsInstance(mols[0], Chem.Mol)
197+
if not isinstance(mols[0], Chem.Mol):
198+
# necessary for mypy
199+
raise AssertionError("Expected a Chem.Mol object")
200+
self.assertEqual(mols[0].GetNumAtoms(), 8)
201+
202+
# test multiple sdf strings with multiple molecules
203+
# the current behavior is to return the first molecule only
204+
mols = SDFToMol().transform(
205+
[self.sdf_str_benzaldehyde_aspirin, self.sdf_str_benzaldehyde],
206+
)
207+
self.assertEqual(len(mols), 2)
208+
self.assertIsInstance(mols[0], Chem.Mol)
209+
self.assertIsInstance(mols[1], Chem.Mol)
210+
if not isinstance(mols[0], Chem.Mol):
211+
# necessary for mypy
212+
raise AssertionError("Expected a Chem.Mol object")
213+
self.assertEqual(mols[0].GetNumAtoms(), 8)
214+
if not isinstance(mols[1], Chem.Mol):
215+
# necessary for mypy
216+
raise AssertionError("Expected a Chem.Mol object")
217+
self.assertEqual(mols[1].GetNumAtoms(), 8)
218+
219+
def test_sdf_properties_transfer(self) -> None:
220+
"""Test that properties from SDF are transferred to molecule.
221+
222+
Raises
223+
------
224+
AssertionError
225+
If the transformation does not return a valid molecule.
226+
227+
"""
228+
mol = SDFToMol().pretransform_single(self.sdf_str_benzaldehyde)
229+
if not isinstance(mol, Chem.Mol):
230+
# necessary for mypy
231+
raise AssertionError("Expected a Chem.Mol object")
232+
self.assertTrue(mol.HasProp("PUBCHEM_COMPOUND_CID"))
233+
self.assertEqual(mol.GetProp("PUBCHEM_COMPOUND_CID"), "240")
234+
self.assertTrue(mol.HasProp("PUBCHEM_COMPOUND_CANONICALIZED"))
235+
self.assertEqual(mol.GetProp("PUBCHEM_COMPOUND_CANONICALIZED"), "1")
236+
self.assertTrue(mol.HasProp("PUBCHEM_CACTVS_COMPLEXITY"))
237+
self.assertEqual(mol.GetProp("PUBCHEM_CACTVS_COMPLEXITY"), "72.5")

0 commit comments

Comments
 (0)