|
| 1 | +"""Tests for SDF to mol.""" |
| 2 | + |
| 3 | +import unittest |
| 4 | + |
| 5 | +from rdkit import Chem |
| 6 | + |
| 7 | +from molpipeline.abstract_pipeline_elements.core import InvalidInstance |
| 8 | +from molpipeline.any2mol.sdf2mol import SDFToMol |
| 9 | + |
| 10 | + |
| 11 | +class TestSDFToMol(unittest.TestCase): |
| 12 | + """Test class for SDFToMol.""" |
| 13 | + |
| 14 | + def setUp(self) -> None: |
| 15 | + """Set up test fixtures.""" |
| 16 | + self.sdf_str_benzaldehyde = """240 |
| 17 | + -OEChem-05082503512D |
| 18 | +
|
| 19 | + 14 14 0 0 0 0 0 0 0999 V2000 |
| 20 | + 3.7321 1.7500 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 |
| 21 | + 2.8660 0.2500 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 |
| 22 | + 2.0000 -0.2500 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 |
| 23 | + 3.7321 -0.2500 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 |
| 24 | + 2.0000 -1.2500 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 |
| 25 | + 3.7321 -1.2500 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 |
| 26 | + 2.8660 -1.7500 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 |
| 27 | + 2.8660 1.2500 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 |
| 28 | + 1.4631 0.0600 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0 |
| 29 | + 4.2690 0.0600 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0 |
| 30 | + 1.4631 -1.5600 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0 |
| 31 | + 4.2690 -1.5600 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0 |
| 32 | + 2.8660 -2.3700 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0 |
| 33 | + 2.3291 1.5600 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0 |
| 34 | + 1 8 2 0 0 0 0 |
| 35 | + 2 3 2 0 0 0 0 |
| 36 | + 2 4 1 0 0 0 0 |
| 37 | + 2 8 1 0 0 0 0 |
| 38 | + 3 5 1 0 0 0 0 |
| 39 | + 3 9 1 0 0 0 0 |
| 40 | + 4 6 2 0 0 0 0 |
| 41 | + 4 10 1 0 0 0 0 |
| 42 | + 5 7 2 0 0 0 0 |
| 43 | + 5 11 1 0 0 0 0 |
| 44 | + 6 7 1 0 0 0 0 |
| 45 | + 6 12 1 0 0 0 0 |
| 46 | + 7 13 1 0 0 0 0 |
| 47 | + 8 14 1 0 0 0 0 |
| 48 | +M END |
| 49 | +> <PUBCHEM_COMPOUND_CID> |
| 50 | +240 |
| 51 | +
|
| 52 | +> <PUBCHEM_COMPOUND_CANONICALIZED> |
| 53 | +1 |
| 54 | +
|
| 55 | +> <PUBCHEM_CACTVS_COMPLEXITY> |
| 56 | +72.5 |
| 57 | +
|
| 58 | +$$$$ |
| 59 | +""" |
| 60 | + |
| 61 | + self.sdf_str_aspirin = """2244 |
| 62 | + -OEChem-05082504263D |
| 63 | +
|
| 64 | + 21 21 0 0 0 0 0 0 0999 V2000 |
| 65 | + 1.2333 0.5540 0.7792 O 0 0 0 0 0 0 0 0 0 0 0 0 |
| 66 | + -0.6952 -2.7148 -0.7502 O 0 0 0 0 0 0 0 0 0 0 0 0 |
| 67 | + 0.7958 -2.1843 0.8685 O 0 0 0 0 0 0 0 0 0 0 0 0 |
| 68 | + 1.7813 0.8105 -1.4821 O 0 0 0 0 0 0 0 0 0 0 0 0 |
| 69 | + -0.0857 0.6088 0.4403 C 0 0 0 0 0 0 0 0 0 0 0 0 |
| 70 | + -0.7927 -0.5515 0.1244 C 0 0 0 0 0 0 0 0 0 0 0 0 |
| 71 | + -0.7288 1.8464 0.4133 C 0 0 0 0 0 0 0 0 0 0 0 0 |
| 72 | + -2.1426 -0.4741 -0.2184 C 0 0 0 0 0 0 0 0 0 0 0 0 |
| 73 | + -2.0787 1.9238 0.0706 C 0 0 0 0 0 0 0 0 0 0 0 0 |
| 74 | + -2.7855 0.7636 -0.2453 C 0 0 0 0 0 0 0 0 0 0 0 0 |
| 75 | + -0.1409 -1.8536 0.1477 C 0 0 0 0 0 0 0 0 0 0 0 0 |
| 76 | + 2.1094 0.6715 -0.3113 C 0 0 0 0 0 0 0 0 0 0 0 0 |
| 77 | + 3.5305 0.5996 0.1635 C 0 0 0 0 0 0 0 0 0 0 0 0 |
| 78 | + -0.1851 2.7545 0.6593 H 0 0 0 0 0 0 0 0 0 0 0 0 |
| 79 | + -2.7247 -1.3605 -0.4564 H 0 0 0 0 0 0 0 0 0 0 0 0 |
| 80 | + -2.5797 2.8872 0.0506 H 0 0 0 0 0 0 0 0 0 0 0 0 |
| 81 | + -3.8374 0.8238 -0.5090 H 0 0 0 0 0 0 0 0 0 0 0 0 |
| 82 | + 3.7290 1.4184 0.8593 H 0 0 0 0 0 0 0 0 0 0 0 0 |
| 83 | + 4.2045 0.6969 -0.6924 H 0 0 0 0 0 0 0 0 0 0 0 0 |
| 84 | + 3.7105 -0.3659 0.6426 H 0 0 0 0 0 0 0 0 0 0 0 0 |
| 85 | + -0.2555 -3.5916 -0.7337 H 0 0 0 0 0 0 0 0 0 0 0 0 |
| 86 | + 1 5 1 0 0 0 0 |
| 87 | + 1 12 1 0 0 0 0 |
| 88 | + 2 11 1 0 0 0 0 |
| 89 | + 2 21 1 0 0 0 0 |
| 90 | + 3 11 2 0 0 0 0 |
| 91 | + 4 12 2 0 0 0 0 |
| 92 | + 5 6 1 0 0 0 0 |
| 93 | + 5 7 2 0 0 0 0 |
| 94 | + 6 8 2 0 0 0 0 |
| 95 | + 6 11 1 0 0 0 0 |
| 96 | + 7 9 1 0 0 0 0 |
| 97 | + 7 14 1 0 0 0 0 |
| 98 | + 8 10 1 0 0 0 0 |
| 99 | + 8 15 1 0 0 0 0 |
| 100 | + 9 10 2 0 0 0 0 |
| 101 | + 9 16 1 0 0 0 0 |
| 102 | + 10 17 1 0 0 0 0 |
| 103 | + 12 13 1 0 0 0 0 |
| 104 | + 13 18 1 0 0 0 0 |
| 105 | + 13 19 1 0 0 0 0 |
| 106 | + 13 20 1 0 0 0 0 |
| 107 | +M END |
| 108 | +> <PUBCHEM_COMPOUND_CID> |
| 109 | +2244 |
| 110 | +
|
| 111 | +> <PUBCHEM_CONFORMER_RMSD> |
| 112 | +0.6 |
| 113 | +
|
| 114 | +$$$$ |
| 115 | +""" |
| 116 | + self.sdf_str_benzaldehyde_aspirin = ( |
| 117 | + self.sdf_str_benzaldehyde + self.sdf_str_aspirin |
| 118 | + ) |
| 119 | + self.invalid_sdf = "Not an SDF string" |
| 120 | + |
| 121 | + def test_initialization(self) -> None: |
| 122 | + """Test initialization of SDFToMol.""" |
| 123 | + sdf2mol = SDFToMol(identifier="smiles", name="CustomName", n_jobs=2) |
| 124 | + self.assertEqual(sdf2mol.identifier, "smiles") |
| 125 | + self.assertEqual(sdf2mol.name, "CustomName") |
| 126 | + self.assertEqual(sdf2mol.n_jobs, 2) |
| 127 | + self.assertEqual(sdf2mol.mol_counter, 0) |
| 128 | + |
| 129 | + def test_pretransform_valid_sdf(self) -> None: |
| 130 | + """Test transformation of valid SDF string. |
| 131 | +
|
| 132 | + Raises |
| 133 | + ------ |
| 134 | + AssertionError |
| 135 | + If the transformation does not return a valid molecule. |
| 136 | +
|
| 137 | + """ |
| 138 | + sdf2mol = SDFToMol() |
| 139 | + # test sdf string with single molecule: benzaldehyde |
| 140 | + mol = sdf2mol.pretransform_single(self.sdf_str_benzaldehyde) |
| 141 | + self.assertIsInstance(mol, Chem.Mol) |
| 142 | + if not isinstance(mol, Chem.Mol): |
| 143 | + # necessary for mypy |
| 144 | + raise AssertionError("Expected a Chem.Mol object") |
| 145 | + self.assertEqual(mol.GetNumAtoms(), 8) |
| 146 | + |
| 147 | + # test sdf string with single molecule: aspirin |
| 148 | + mol = sdf2mol.pretransform_single(self.sdf_str_aspirin) |
| 149 | + self.assertIsInstance(mol, Chem.Mol) |
| 150 | + if not isinstance(mol, Chem.Mol): |
| 151 | + # necessary for mypy |
| 152 | + raise AssertionError("Expected a Chem.Mol object") |
| 153 | + self.assertEqual(mol.GetNumAtoms(), 13) |
| 154 | + |
| 155 | + # test sdf string with multiple molecules: benzaldehyde and aspirin |
| 156 | + # the current behavior is to return the first molecule only |
| 157 | + mol = sdf2mol.pretransform_single(self.sdf_str_benzaldehyde_aspirin) |
| 158 | + self.assertIsInstance(mol, Chem.Mol) |
| 159 | + if not isinstance(mol, Chem.Mol): |
| 160 | + # necessary for mypy |
| 161 | + raise AssertionError("Expected a Chem.Mol object") |
| 162 | + self.assertEqual(mol.GetNumAtoms(), 8) |
| 163 | + |
| 164 | + def test_pretransform_invalid_sdf(self) -> None: |
| 165 | + """Test handling of invalid SDF input.""" |
| 166 | + result = SDFToMol().pretransform_single(self.invalid_sdf) |
| 167 | + self.assertIsInstance(result, InvalidInstance) |
| 168 | + |
| 169 | + def test_transform(self) -> None: |
| 170 | + """Test transform function. |
| 171 | +
|
| 172 | + Raises |
| 173 | + ------ |
| 174 | + AssertionError |
| 175 | + If the transformation does not return a valid molecule. |
| 176 | +
|
| 177 | + """ |
| 178 | + # test list of sdf strings |
| 179 | + mols = SDFToMol().transform([self.sdf_str_benzaldehyde, self.sdf_str_aspirin]) |
| 180 | + self.assertEqual(len(mols), 2) |
| 181 | + self.assertIsInstance(mols[0], Chem.Mol) |
| 182 | + self.assertIsInstance(mols[1], Chem.Mol) |
| 183 | + if not isinstance(mols[0], Chem.Mol): |
| 184 | + # necessary for mypy |
| 185 | + raise AssertionError("Expected a Chem.Mol object") |
| 186 | + self.assertEqual(mols[0].GetNumAtoms(), 8) |
| 187 | + if not isinstance(mols[1], Chem.Mol): |
| 188 | + # necessary for mypy |
| 189 | + raise AssertionError("Expected a Chem.Mol object") |
| 190 | + self.assertEqual(mols[1].GetNumAtoms(), 13) |
| 191 | + |
| 192 | + # test single sdf string with multiple molecules |
| 193 | + # only the first molecule in the SDF string will be read |
| 194 | + mols = SDFToMol().transform([self.sdf_str_benzaldehyde_aspirin]) |
| 195 | + self.assertEqual(len(mols), 1) |
| 196 | + self.assertIsInstance(mols[0], Chem.Mol) |
| 197 | + if not isinstance(mols[0], Chem.Mol): |
| 198 | + # necessary for mypy |
| 199 | + raise AssertionError("Expected a Chem.Mol object") |
| 200 | + self.assertEqual(mols[0].GetNumAtoms(), 8) |
| 201 | + |
| 202 | + # test multiple sdf strings with multiple molecules |
| 203 | + # the current behavior is to return the first molecule only |
| 204 | + mols = SDFToMol().transform( |
| 205 | + [self.sdf_str_benzaldehyde_aspirin, self.sdf_str_benzaldehyde], |
| 206 | + ) |
| 207 | + self.assertEqual(len(mols), 2) |
| 208 | + self.assertIsInstance(mols[0], Chem.Mol) |
| 209 | + self.assertIsInstance(mols[1], Chem.Mol) |
| 210 | + if not isinstance(mols[0], Chem.Mol): |
| 211 | + # necessary for mypy |
| 212 | + raise AssertionError("Expected a Chem.Mol object") |
| 213 | + self.assertEqual(mols[0].GetNumAtoms(), 8) |
| 214 | + if not isinstance(mols[1], Chem.Mol): |
| 215 | + # necessary for mypy |
| 216 | + raise AssertionError("Expected a Chem.Mol object") |
| 217 | + self.assertEqual(mols[1].GetNumAtoms(), 8) |
| 218 | + |
| 219 | + def test_sdf_properties_transfer(self) -> None: |
| 220 | + """Test that properties from SDF are transferred to molecule. |
| 221 | +
|
| 222 | + Raises |
| 223 | + ------ |
| 224 | + AssertionError |
| 225 | + If the transformation does not return a valid molecule. |
| 226 | +
|
| 227 | + """ |
| 228 | + mol = SDFToMol().pretransform_single(self.sdf_str_benzaldehyde) |
| 229 | + if not isinstance(mol, Chem.Mol): |
| 230 | + # necessary for mypy |
| 231 | + raise AssertionError("Expected a Chem.Mol object") |
| 232 | + self.assertTrue(mol.HasProp("PUBCHEM_COMPOUND_CID")) |
| 233 | + self.assertEqual(mol.GetProp("PUBCHEM_COMPOUND_CID"), "240") |
| 234 | + self.assertTrue(mol.HasProp("PUBCHEM_COMPOUND_CANONICALIZED")) |
| 235 | + self.assertEqual(mol.GetProp("PUBCHEM_COMPOUND_CANONICALIZED"), "1") |
| 236 | + self.assertTrue(mol.HasProp("PUBCHEM_CACTVS_COMPLEXITY")) |
| 237 | + self.assertEqual(mol.GetProp("PUBCHEM_CACTVS_COMPLEXITY"), "72.5") |
0 commit comments