Skip to content

Commit 381bd12

Browse files
Merge pull request #125 from robpollice/bugfix-120-radical-kekulization
Handling of radicals in kekulization (fix #120)
2 parents 55f4e34 + 5a3b0e2 commit 381bd12

File tree

3 files changed

+52
-8
lines changed

3 files changed

+52
-8
lines changed

selfies/constants.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,13 @@
2121
"O": (2, 4), "S": (2, 4), "Se": (2, 4), "Te": (2, 4)
2222
}
2323

24+
VALENCE_ELECTRONS = {
25+
"B": 3, "Al": 3,
26+
"C": 4, "Si": 4,
27+
"N": 5, "P": 5, "As": 5,
28+
"O": 6, "S": 6, "Se": 6, "Te": 6
29+
}
30+
2431
AROMATIC_SUBSET = set(e.lower() for e in AROMATIC_VALENCES)
2532

2633
# =============================================================================

selfies/mol_graph.py

Lines changed: 23 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
from dataclasses import dataclass, field
55

66
from selfies.bond_constraints import get_bonding_capacity
7-
from selfies.constants import AROMATIC_VALENCES
7+
from selfies.constants import AROMATIC_VALENCES, VALENCE_ELECTRONS
88
from selfies.utils.matching_utils import find_perfect_matching
99

1010

@@ -254,7 +254,7 @@ def kekulize(self) -> bool:
254254

255255
ds = self._delocal_subgraph
256256
kept_nodes = set(itertools.filterfalse(self._prune_from_ds, ds))
257-
257+
258258
# relabel kept DS nodes to be 0, 1, 2, ...
259259
label_to_node = list(sorted(kept_nodes))
260260
node_to_label = {v: i for i, v in enumerate(label_to_node)}
@@ -265,7 +265,7 @@ def kekulize(self) -> bool:
265265
label = node_to_label[node]
266266
for adj in filter(lambda v: v in kept_nodes, ds[node]):
267267
pruned_ds[label].append(node_to_label[adj])
268-
268+
269269
matching = find_perfect_matching(pruned_ds)
270270
if matching is None:
271271
return False
@@ -288,18 +288,33 @@ def _prune_from_ds(self, node):
288288
adj_nodes = self._delocal_subgraph[node]
289289
if not adj_nodes:
290290
return True # aromatic atom with no aromatic bonds
291-
291+
292292
atom = self._atoms[node]
293293
valences = AROMATIC_VALENCES[atom.element]
294-
294+
295295
# each bond in DS has order 1.5 - we treat them as single bonds
296296
used_electrons = int(self._bond_counts[node] - 0.5 * len(adj_nodes))
297-
297+
298298
if atom.h_count is None: # account for implicit Hs
299299
assert atom.charge == 0
300300
return any(used_electrons == v for v in valences)
301301
else:
302302
valence = valences[-1] - atom.charge
303303
used_electrons += atom.h_count
304-
free_electrons = valence - used_electrons
305-
return not ((free_electrons >= 0) and (free_electrons % 2 != 0))
304+
305+
# count the total number of bound electrons of each atom
306+
bound_electrons = (max(0, atom.charge) + atom.h_count
307+
+ int(self._bond_counts[node])
308+
+ int(2 * (self._bond_counts[node] % 1)))
309+
310+
# calculate the number of unpaired electrons of each atom
311+
radical_electrons = (max(0, VALENCE_ELECTRONS[atom.element]
312+
- bound_electrons) % 2)
313+
314+
# unpaired electrons do not contribute to the aromatic system
315+
free_electrons = valence - used_electrons - radical_electrons
316+
317+
if any(used_electrons == v - atom.charge for v in valences):
318+
return True
319+
else:
320+
return not ((free_electrons >= 0) and (free_electrons % 2 != 0))

tests/test_specific_cases.py

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,12 @@ def decode_eq(selfies, smiles):
88
return s == smiles
99

1010

11+
def roundtrip_eq(smiles_in, smiles_out):
12+
sel = sf.encoder(smiles_in)
13+
smi = sf.decoder(sel)
14+
return smi == smiles_out
15+
16+
1117
def test_branch_and_ring_at_state_X0():
1218
"""Tests SELFIES with branches and rings at state X0 (i.e. at the
1319
very beginning of a SELFIES). These symbols should be skipped.
@@ -330,6 +336,7 @@ def test_old_symbols():
330336
except Exception:
331337
assert False
332338

339+
333340
def test_large_selfies_decoding():
334341
"""Test that we can decode extremely large SELFIES strings (used to cause a RecursionError)
335342
"""
@@ -339,8 +346,23 @@ def test_large_selfies_decoding():
339346

340347
assert decode_eq(large_selfies, expected_smiles)
341348

349+
350+
def test_radical_kekulization():
351+
"""Tests kekulization of aromatic systems with radicals and charges.
352+
"""
353+
354+
assert roundtrip_eq("c1ccc[c]c1", "C1=CC=C[CH0]=C1")
355+
assert roundtrip_eq("c1[c]n1(C)", "C1=[CH0]N1C")
356+
assert roundtrip_eq("c1[C][n+]1(C)", "C=1[CH0][N+1]=1C")
357+
assert roundtrip_eq("c1nnn[n-]1", "C1=NN=N[N-1]1")
358+
assert roundtrip_eq("c1ccn[c-](C)[n+]1=O", "C1=CC=N[C-1](C)[N+1]1=O")
359+
assert roundtrip_eq("c1ccs[n+]1c2ccccc2", "C=1C=CS[N+1]=1C2=CC=CC=C2")
360+
assert roundtrip_eq("c1ccs[nH+]1", "C=1C=CS[NH1+1]=1")
361+
362+
342363
def test_novel_charged_symbols():
343364
"""Test decoding of updated constraints for charged atoms (update in 2.2.0)."""
344365
assert decode_eq("[N][#C+1][#NH1][#C@H1]", "N#[C+1]")
345366
assert decode_eq("[O+1][=P+1][#P-1][#C@@]", "[O+1]=[P+1]=[P-1]#[C@@]")
346367
assert decode_eq("[=C-1][#S+1][#B]", "[C-1]#[S+1]=B")
368+

0 commit comments

Comments
 (0)