Skip to content

Commit 241ad49

Browse files
authored
Merge pull request #637 from Steinbeck-Lab/development
feat: Atom numbering & cis/trans detection
2 parents cd6e515 + d638ab1 commit 241ad49

File tree

83 files changed

+9240
-4509
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

83 files changed

+9240
-4509
lines changed

CITATION.cff

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ authors:
1717
given-names: "Kohulan"
1818
orcid: "https://orcid.org/0000-0003-1066-7792"
1919
title: "cheminformatics-microservice"
20-
version: v3.3.0
21-
doi: 10.5281/zenodo.15575699
22-
date-released: 2025-06-02
20+
version: v3.5.0
21+
doi: 10.5281/zenodo.16890410
22+
date-released: 2025-08-17
2323
url: "https://github.com/Steinbeck-Lab/cheminformatics-microservice"

README.md

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -156,10 +156,11 @@ This project is licensed under the MIT License. See the [LICENSE](https://github
156156
## 📰 Citation
157157

158158
### Paper
159-
Chandrasekhar, V., Sharma, N., Schaub, J. et al. Cheminformatics Microservice: unifying access to open cheminformatics toolkits. J Cheminform 15, 98 (2023). [https://doi.org/10.1186/s13321-023-00762-4](https://doi.org/10.1186/s13321-023-00762-4)
159+
1. Chandrasekhar, V., Sharma, N., Schaub, J. et al. Cheminformatics Microservice: unifying access to open cheminformatics toolkits. J Cheminform 15, 98 (2023). [https://doi.org/10.1186/s13321-023-00762-4](https://doi.org/10.1186/s13321-023-00762-4)
160+
2. Rajan, K., Chandrasekhar, V., Sharma, N. et al. Cheminformatics Microservice V3: a web portal for chemical structure manipulation and analysis. J Cheminform 17, 142 (2025). [https://doi.org/10.1186/s13321-025-01094-1](https://doi.org/10.1186/s13321-025-01094-1)
160161

161162
### Software
162-
Venkata, C., Sharma, N., & Rajan, K. (2023). Cheminformatics Microservice (Version v2.6.0) [Computer software]. [https://zenodo.org/records/13867839](https://zenodo.org/records/13867839)
163+
- Venkata, C., Sharma, N., Schaub, J., Steinbeck, C., & Rajan, K. (2025). cheminformatics-microservice (Version v3.5.0) [Computer software]. https://doi.org/10.5281/zenodo.16890410
163164

164165
## 🔧 Maintenance
165166

app/modules/coconut/preprocess.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -250,6 +250,9 @@ def get_COCONUT_preprocessing(
250250
},
251251
"has_stereo": rdkitmodules.has_potential_stereochemistry(original_mol),
252252
"has_stereo_defined": rdkitmodules.has_stereo_defined(original_mol),
253+
"has_stereogenic_elements": rdkitmodules.has_cis_trans_stereochemistry(
254+
original_mol
255+
),
253256
"descriptors": original_descriptors,
254257
"errors": checker.check_molblock(original_mol_block),
255258
},
@@ -264,6 +267,9 @@ def get_COCONUT_preprocessing(
264267
standardized_mol
265268
),
266269
"has_stereo_defined": rdkitmodules.has_stereo_defined(standardized_mol),
270+
"has_stereogenic_elements": rdkitmodules.has_cis_trans_stereochemistry(
271+
standardized_mol
272+
),
267273
"descriptors": standardized_descriptors,
268274
"errors": checker.check_molblock(standardized_mol_block),
269275
},

app/modules/depiction.py

Lines changed: 81 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -20,19 +20,26 @@ def get_cdk_depiction(
2020
unicolor=False,
2121
highlight="",
2222
highlight_atoms=None,
23+
showAtomNumbers=False,
2324
):
2425
"""This function takes the user input SMILES and Depicts it.
2526
2627
using the CDK Depiction Generator.
2728
2829
Args:
2930
molecule (any): CDK IAtomContainer parsed from SMILES string given by the user.
31+
molSize (tuple, optional): Size of the output image. Defaults to (512, 512).
32+
rotate (int, optional): Rotation angle in degrees. Defaults to 0.
33+
kekulize (bool, optional): Whether to kekulize the molecule. Defaults to True.
34+
CIP (bool, optional): Whether to annotate CIP stereochemistry. Defaults to True.
35+
unicolor (bool, optional): Whether to use black and white colors. Defaults to False.
36+
highlight (str, optional): SMARTS pattern to highlight. Defaults to empty.
37+
highlight_atoms (list, optional): List of atom indices to highlight. Defaults to None.
38+
showAtomNumbers (bool, optional): Whether to display atom numbers. Defaults to False.
3039
3140
Returns:
3241
image (SVG): CDK Structure Depiction as an SVG image.
3342
"""
34-
print(unicolor)
35-
3643
cdk_base = "org.openscience.cdk"
3744
StandardGenerator = JClass(
3845
cdk_base + ".renderer.generators.standard.StandardGenerator",
@@ -75,7 +82,7 @@ def get_cdk_depiction(
7582
try:
7683
Kekulization.kekulize(SDGMol)
7784
except Exception as e:
78-
print(e + "Can't Kekulize molecule")
85+
print(str(e) + " Can't Kekulize molecule")
7986

8087
point = JClass(
8188
cdk_base + ".geometry.GeometryTools",
@@ -86,22 +93,65 @@ def get_cdk_depiction(
8693
(rotate * JClass("java.lang.Math").PI / 180.0),
8794
)
8895

96+
# Add atom numbers if requested
97+
if showAtomNumbers:
98+
DepictionGenerator = DepictionGenerator.withAtomNumbers()
99+
89100
# Handle highlighting: prioritize atom indices over SMARTS patterns
90101
if highlight_atoms and len(highlight_atoms) > 0:
91-
# For CDK, we need to create substructures from atom indices
92-
# This is more complex and would require additional CDK classes
93-
# For now, fall back to SMARTS pattern if available
94-
if highlight and highlight.strip():
95-
tmpPattern = SmartsPattern.create(highlight, SCOB.getInstance())
96-
SmartsPattern.prepare(SDGMol)
97-
tmpMappings = tmpPattern.matchAll(SDGMol)
98-
tmpSubstructures = tmpMappings.toSubstructures()
99-
lightBlue = Color(173, 216, 230)
100-
DepictionGenerator = DepictionGenerator.withHighlight(
101-
tmpSubstructures, lightBlue
102-
).withOuterGlowHighlight()
103-
# Note: Direct atom index highlighting in CDK requires more complex implementation
104-
# This would need creating IAtomContainerSet from specific atoms
102+
# Create atom sets from indices for highlighting
103+
AtomContainer = JClass(cdk_base + ".AtomContainer")
104+
AtomContainerSet = JClass(cdk_base + ".AtomContainerSet")
105+
106+
# Create a set of substructures from atom indices
107+
tmpSubstructures = AtomContainerSet()
108+
109+
# If highlight_atoms is a list of lists, each list is a separate substructure
110+
if isinstance(highlight_atoms[0], list):
111+
# Multiple substructures (e.g., multiple sugars)
112+
for atom_indices in highlight_atoms:
113+
if len(atom_indices) > 0:
114+
subset = AtomContainer()
115+
for idx in atom_indices:
116+
if idx < SDGMol.getAtomCount():
117+
subset.addAtom(SDGMol.getAtom(idx))
118+
# Add bonds between highlighted atoms
119+
for i, idx1 in enumerate(atom_indices):
120+
for idx2 in atom_indices[i + 1:]:
121+
if (
122+
idx1 < SDGMol.getAtomCount()
123+
and idx2 < SDGMol.getAtomCount()
124+
):
125+
bond = SDGMol.getBond(
126+
SDGMol.getAtom(idx1), SDGMol.getAtom(idx2)
127+
)
128+
if bond is not None:
129+
subset.addBond(bond)
130+
tmpSubstructures.addAtomContainer(subset)
131+
else:
132+
# Single substructure
133+
subset = AtomContainer()
134+
for idx in highlight_atoms:
135+
if idx < SDGMol.getAtomCount():
136+
subset.addAtom(SDGMol.getAtom(idx))
137+
# Add bonds between highlighted atoms
138+
for i, idx1 in enumerate(highlight_atoms):
139+
for idx2 in highlight_atoms[i + 1:]:
140+
if (
141+
idx1 < SDGMol.getAtomCount()
142+
and idx2 < SDGMol.getAtomCount()
143+
):
144+
bond = SDGMol.getBond(
145+
SDGMol.getAtom(idx1), SDGMol.getAtom(idx2)
146+
)
147+
if bond is not None:
148+
subset.addBond(bond)
149+
tmpSubstructures.addAtomContainer(subset)
150+
151+
lightBlue = Color(173, 216, 230)
152+
DepictionGenerator = DepictionGenerator.withHighlight(
153+
tmpSubstructures, lightBlue
154+
).withOuterGlowHighlight()
105155
elif highlight and highlight.strip():
106156
tmpPattern = SmartsPattern.create(highlight, SCOB.getInstance())
107157
SmartsPattern.prepare(SDGMol)
@@ -138,6 +188,7 @@ def get_rdkit_depiction(
138188
unicolor=False,
139189
highlight: str = "",
140190
highlight_atoms=None,
191+
showAtomNumbers=False,
141192
) -> str:
142193
"""
143194
Generate a 2D depiction of the input molecule using RDKit.
@@ -150,6 +201,8 @@ def get_rdkit_depiction(
150201
CIP (bool, optional): Whether to assign CIP stereochemistry. Defaults to False.
151202
unicolor (bool, optional): Whether to use a unicolor palette. Defaults to False.
152203
highlight (str, optional): SMARTS pattern to highlight atoms/bonds. Defaults to empty.
204+
highlight_atoms (list, optional): List of atom indices to highlight. Defaults to None.
205+
showAtomNumbers (bool, optional): Whether to display atom numbers. Defaults to False.
153206
154207
Returns:
155208
str: RDKit Structure Depiction as an SVG image.
@@ -176,6 +229,12 @@ def get_rdkit_depiction(
176229
if unicolor:
177230
drawer.drawOptions().useBWAtomPalette()
178231

232+
# Add atom numbers if requested
233+
if showAtomNumbers:
234+
# Set atom numbers as notes on each atom
235+
for atom in mc.GetAtoms():
236+
atom.SetProp("atomNote", str(atom.GetIdx()))
237+
179238
# Handle highlighting based on priority: anchor atoms + SMARTS pattern, then atom indices, then SMARTS pattern alone
180239
if highlight_atoms and len(highlight_atoms) > 0 and highlight:
181240
# Combined approach: Use SMARTS pattern but only highlight the match that contains the anchor atoms
@@ -222,12 +281,11 @@ def get_rdkit_depiction(
222281
hit_ats = tuple(highlight_atoms)
223282
# Find ALL bonds that connect atoms within the functional group
224283
hit_bonds = []
225-
for i, atom1_idx in enumerate(hit_ats):
226-
for j, atom2_idx in enumerate(hit_ats):
227-
if i < j: # Avoid duplicate bonds
228-
bond = mc.GetBondBetweenAtoms(atom1_idx, atom2_idx)
229-
if bond:
230-
hit_bonds.append(bond.GetIdx())
284+
for i in range(len(hit_ats)):
285+
for j in range(i + 1, len(hit_ats)):
286+
bond = mc.GetBondBetweenAtoms(hit_ats[i], hit_ats[j])
287+
if bond:
288+
hit_bonds.append(bond.GetIdx())
231289

232290
rdMolDraw2D.PrepareAndDrawMolecule(
233291
drawer, mc, highlightAtoms=hit_ats, highlightBonds=hit_bonds

app/modules/toolkits/cdk_wrapper.py

Lines changed: 22 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -28,18 +28,19 @@ def setup_jvm():
2828

2929
if not isJVMStarted():
3030
paths = {
31-
"cdk-2.10": "https://github.com/cdk/cdk/releases/download/cdk-2.10/cdk-2.10.jar",
32-
"SugarRemovalUtility-jar-with-dependencies": "https://github.com/JonasSchaub/SugarRemoval/releases/download/v1.3.2/SugarRemovalUtility-jar-with-dependencies.jar",
31+
"cdk-2.11": "https://github.com/cdk/cdk/releases/download/cdk-2.11/cdk-2.11.jar",
32+
"SugarRemovalUtility-jar-with-dependencies": "https://github.com/JonasSchaub/SugarRemoval/releases/download/v1.6/SugarRemovalUtility-jar-with-dependencies.jar",
3333
"centres": "https://github.com/SiMolecule/centres/releases/download/1.0/centres.jar",
3434
"opsin-cli-2.8.0-jar-with-dependencies": "https://github.com/dan2097/opsin/releases/download/2.8.0/opsin-cli-2.8.0-jar-with-dependencies.jar",
3535
}
3636

3737
jar_paths = {
38-
key: str(pystow.join("STOUT-V2")) + f"/{key}.jar" for key in paths.keys()
38+
key: str(pystow.join("JAVA_Packages")) + f"/{key}.jar"
39+
for key in paths.keys()
3940
}
4041
for key, url in paths.items():
4142
if not os.path.exists(jar_paths[key]):
42-
pystow.ensure("STOUT-V2", url=url)
43+
pystow.ensure("JAVA_Packages", url=url)
4344

4445
startJVM("-ea", "-Xmx4096M", classpath=[jar_paths[key] for key in jar_paths])
4546

@@ -68,6 +69,23 @@ def get_CDK_IAtomContainer(smiles: str):
6869
return molecule
6970

7071

72+
def get_CDK_IAtomContainer_from_molblock(molblock: str):
73+
"""This function takes a MOL block and creates a CDK IAtomContainer.
74+
75+
Args:
76+
molblock (str): MOL block string as input.
77+
78+
Returns:
79+
mol (object): IAtomContainer with CDK.
80+
"""
81+
SCOB = JClass(cdk_base + ".silent.SilentChemObjectBuilder")
82+
StringReader = JClass("java.io.StringReader")(molblock)
83+
MDLV2000Reader = JClass(cdk_base + ".io.MDLV2000Reader")(StringReader)
84+
molecule = MDLV2000Reader.read(SCOB.getInstance().newAtomContainer())
85+
MDLV2000Reader.close()
86+
return molecule
87+
88+
7189
def get_CDK_SDG(molecule: any):
7290
"""This function takes the input IAtomContainer and Creates a.
7391

app/modules/toolkits/rdkit_wrapper.py

Lines changed: 42 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -862,12 +862,14 @@ def get_ertl_functional_groups(molecule: any) -> list:
862862
try:
863863
# Extract information from IFG object
864864
group_data = {
865-
"atomIds": list(fragment.atomIds)
866-
if hasattr(fragment, "atomIds")
867-
else [],
868-
"atoms": str(fragment.atoms)
869-
if hasattr(fragment, "atoms")
870-
else "",
865+
"atomIds": (
866+
list(fragment.atomIds)
867+
if hasattr(fragment, "atomIds")
868+
else []
869+
),
870+
"atoms": (
871+
str(fragment.atoms) if hasattr(fragment, "atoms") else ""
872+
),
871873
"type": str(fragment.type) if hasattr(fragment, "type") else "",
872874
"description": str(
873875
fragment
@@ -896,7 +898,7 @@ def get_standardized_tautomer(
896898
"""Generate the standardized tautomer SMILES for a given molecule.
897899
898900
Args:
899-
molecule (any): RDKit molecule object.
901+
molecule (Chem.Mol): An RDKit molecule object representing the molecular structure.
900902
isomeric (bool, optional): Flag to generate isomeric SMILES. Defaults to True.
901903
902904
Returns:
@@ -920,3 +922,36 @@ def get_standardized_tautomer(
920922
return new_smiles
921923
else:
922924
return "Error Check input SMILES"
925+
926+
927+
def has_cis_trans_stereochemistry(molecule: any) -> bool:
928+
"""
929+
Detect whether a molecule has cis/trans (E/Z) stereochemistry assigned.
930+
931+
Parameters:
932+
-----------
933+
molecule (Chem.Mol): An RDKit molecule object representing the molecular structure.
934+
935+
Returns:
936+
--------
937+
bool
938+
True if cis/trans stereochemistry is assigned, False otherwise
939+
"""
940+
if molecule is None:
941+
return False
942+
943+
# Check each bond for stereochemistry
944+
for bond in molecule.GetBonds():
945+
# Check if bond is a double bond
946+
if bond.GetBondType() == Chem.BondType.DOUBLE:
947+
# Check if stereochemistry is assigned
948+
stereo = bond.GetStereo()
949+
if stereo in [
950+
Chem.BondStereo.STEREOE,
951+
Chem.BondStereo.STEREOZ,
952+
Chem.BondStereo.STEREOTRANS,
953+
Chem.BondStereo.STEREOCIS,
954+
]:
955+
return True
956+
957+
return False

0 commit comments

Comments
 (0)