-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathquantum_descriptors.py
More file actions
282 lines (227 loc) · 11.1 KB
/
quantum_descriptors.py
File metadata and controls
282 lines (227 loc) · 11.1 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
"""
Quantum Descriptors Calculator for BBB Prediction
Calculates quantum chemical descriptors similar to those from Gaussian:
- HOMO (Highest Occupied Molecular Orbital) energy
- LUMO (Lowest Unoccupied Molecular Orbital) energy
- HOMO-LUMO gap (chemical hardness indicator)
- Dipole moment
- Polarizability
- Electrophilicity index
- Chemical hardness/softness
- Electronegativity (Mulliken)
Since we don't have access to actual Gaussian calculations (expensive DFT),
we use semi-empirical approximations via RDKit and Mordred descriptors.
For production, these could be replaced with actual DFT calculations.
"""
import numpy as np
from rdkit import Chem
from rdkit.Chem import AllChem, Descriptors, rdMolDescriptors
from rdkit.Chem import rdPartialCharges
# Try to import mordred for advanced descriptors
try:
from mordred import Calculator, descriptors
MORDRED_AVAILABLE = True
except ImportError:
MORDRED_AVAILABLE = False
print("Note: mordred not installed. Using RDKit approximations for quantum descriptors.")
print("Install with: pip install mordred")
class QuantumDescriptorCalculator:
"""
Calculate quantum-like descriptors for molecules.
These are approximations based on semi-empirical methods and
empirical correlations, not actual DFT calculations.
For real quantum descriptors, you would:
1. Generate 3D conformers
2. Run Gaussian/ORCA with B3LYP/6-31G* or similar
3. Extract HOMO, LUMO, dipole from output
This class provides fast approximations suitable for ML.
"""
def __init__(self):
if MORDRED_AVAILABLE:
# Initialize mordred calculator with specific descriptors
self.mordred_calc = Calculator(descriptors, ignore_3D=True)
def calculate(self, smiles):
"""
Calculate quantum descriptors for a molecule.
Returns dict with:
- homo_approx: Approximate HOMO energy (eV)
- lumo_approx: Approximate LUMO energy (eV)
- homo_lumo_gap: HOMO-LUMO gap (eV)
- dipole_moment: Approximate dipole moment (Debye)
- polarizability: Molecular polarizability (ų)
- electrophilicity: Electrophilicity index (eV)
- hardness: Chemical hardness (eV)
- softness: Chemical softness (eV⁻¹)
- electronegativity: Mulliken electronegativity (eV)
- electron_affinity: Electron affinity approximation (eV)
- ionization_potential: Ionization potential approximation (eV)
"""
mol = Chem.MolFromSmiles(smiles)
if mol is None:
return None
# Add hydrogens for better calculations
mol = Chem.AddHs(mol)
descriptors_dict = {}
# === HOMO/LUMO Approximations ===
# Based on empirical correlations with molecular properties
# Real values require DFT (B3LYP/6-31G* typical)
# Get basic properties
logp = Descriptors.MolLogP(mol)
tpsa = Descriptors.TPSA(mol)
mw = Descriptors.MolWt(mol)
num_electrons = sum([atom.GetAtomicNum() for atom in mol.GetAtoms()])
# Aromatic character affects HOMO-LUMO
num_aromatic_rings = rdMolDescriptors.CalcNumAromaticRings(mol)
num_aromatic_atoms = len([a for a in mol.GetAtoms() if a.GetIsAromatic()])
frac_aromatic = num_aromatic_atoms / mol.GetNumAtoms() if mol.GetNumAtoms() > 0 else 0
# Heteroatom effects
num_n = len([a for a in mol.GetAtoms() if a.GetAtomicNum() == 7])
num_o = len([a for a in mol.GetAtoms() if a.GetAtomicNum() == 8])
num_s = len([a for a in mol.GetAtoms() if a.GetAtomicNum() == 16])
num_halogens = len([a for a in mol.GetAtoms() if a.GetAtomicNum() in [9, 17, 35, 53]])
# === Empirical HOMO/LUMO estimation ===
# Based on: Ghose-Crippen correlations and aromatic stabilization
# These are rough approximations! Real DFT gives better values.
# HOMO estimation (typically -5 to -9 eV for organic molecules)
# More aromatic = higher HOMO (less negative)
# More electronegative atoms = lower HOMO (more negative)
homo_base = -7.5 # typical organic molecule HOMO
homo_aromatic_contrib = frac_aromatic * 1.5 # aromatic stabilization
homo_heteroatom_contrib = -0.1 * (num_n + num_o) - 0.2 * num_s
homo_halogen_contrib = -0.15 * num_halogens
homo_size_contrib = -0.001 * (mw - 200) # size effect
homo_approx = homo_base + homo_aromatic_contrib + homo_heteroatom_contrib + homo_halogen_contrib + homo_size_contrib
homo_approx = np.clip(homo_approx, -12, -4) # physical bounds
# LUMO estimation (typically 0 to -4 eV for organic molecules)
# More aromatic = lower LUMO (more negative, better acceptor)
# Electron-withdrawing groups lower LUMO
lumo_base = -1.5
lumo_aromatic_contrib = -0.5 * frac_aromatic - 0.3 * num_aromatic_rings
lumo_ewg_contrib = -0.2 * num_halogens - 0.1 * num_n
lumo_size_contrib = -0.0005 * (mw - 200)
lumo_approx = lumo_base + lumo_aromatic_contrib + lumo_ewg_contrib + lumo_size_contrib
lumo_approx = np.clip(lumo_approx, -5, 2) # physical bounds
# Ensure LUMO > HOMO
if lumo_approx <= homo_approx:
lumo_approx = homo_approx + 3.0 # minimum gap
descriptors_dict['homo_approx'] = homo_approx
descriptors_dict['lumo_approx'] = lumo_approx
descriptors_dict['homo_lumo_gap'] = lumo_approx - homo_approx
# === Derived Quantum Properties ===
# Koopman's theorem approximations
ionization_potential = -homo_approx # IP ≈ -HOMO
electron_affinity = -lumo_approx # EA ≈ -LUMO
descriptors_dict['ionization_potential'] = ionization_potential
descriptors_dict['electron_affinity'] = electron_affinity
# Mulliken electronegativity: χ = (IP + EA) / 2
electronegativity = (ionization_potential + electron_affinity) / 2
descriptors_dict['electronegativity'] = electronegativity
# Chemical hardness: η = (IP - EA) / 2 = gap / 2
hardness = (ionization_potential - electron_affinity) / 2
descriptors_dict['hardness'] = hardness
# Chemical softness: S = 1 / (2η)
softness = 1 / (2 * hardness) if hardness > 0.1 else 5.0
descriptors_dict['softness'] = softness
# Electrophilicity index: ω = χ² / (2η)
electrophilicity = (electronegativity ** 2) / (2 * hardness) if hardness > 0.1 else 0
descriptors_dict['electrophilicity'] = electrophilicity
# === Dipole Moment Approximation ===
# Based on charge separation and molecular geometry
# Compute Gasteiger charges
try:
AllChem.ComputeGasteigerCharges(mol)
charges = [float(mol.GetAtomWithIdx(i).GetProp('_GasteigerCharge'))
for i in range(mol.GetNumAtoms())]
charges = [c if not np.isnan(c) else 0 for c in charges]
# Dipole approximation based on charge separation
max_charge = max(charges) if charges else 0
min_charge = min(charges) if charges else 0
charge_sep = max_charge - min_charge
# Estimate dipole (rough approximation)
# Real dipole needs 3D geometry
dipole_approx = charge_sep * np.sqrt(mw) * 0.5 + tpsa * 0.02
dipole_approx = np.clip(dipole_approx, 0, 15) # Debye, typical range
except:
dipole_approx = tpsa * 0.05 # fallback
descriptors_dict['dipole_moment'] = dipole_approx
# === Polarizability ===
# Miller's empirical formula or atom-based additivity
# α ≈ 0.79 × (number of valence electrons)^0.67 × Å³
polarizability = 0.79 * (num_electrons ** 0.67)
descriptors_dict['polarizability'] = polarizability
# === Additional quantum-relevant descriptors ===
if MORDRED_AVAILABLE:
try:
mol_no_h = Chem.RemoveHs(mol)
mordred_result = self.mordred_calc(mol_no_h)
# Get specific mordred descriptors if available
# These are more accurate than our approximations
for desc_name in ['HOMO', 'LUMO', 'GapHL']:
if desc_name in mordred_result and mordred_result[desc_name] is not None:
if not np.isnan(mordred_result[desc_name]):
descriptors_dict[f'mordred_{desc_name}'] = float(mordred_result[desc_name])
except:
pass
# === Quantum-relevant molecular properties ===
# These correlate with quantum behavior
# Maximum partial charge
descriptors_dict['max_partial_charge'] = Descriptors.MaxPartialCharge(mol)
descriptors_dict['min_partial_charge'] = Descriptors.MinPartialCharge(mol)
# Handle NaN values
for key in descriptors_dict:
if descriptors_dict[key] is None or (isinstance(descriptors_dict[key], float) and np.isnan(descriptors_dict[key])):
descriptors_dict[key] = 0.0
return descriptors_dict
def get_feature_names(self):
"""Return list of quantum descriptor names"""
return [
'homo_approx',
'lumo_approx',
'homo_lumo_gap',
'ionization_potential',
'electron_affinity',
'electronegativity',
'hardness',
'softness',
'electrophilicity',
'dipole_moment',
'polarizability',
'max_partial_charge',
'min_partial_charge'
]
def calculate_vector(self, smiles):
"""Return quantum descriptors as a numpy array"""
desc = self.calculate(smiles)
if desc is None:
return None
feature_names = self.get_feature_names()
return np.array([desc.get(name, 0.0) for name in feature_names], dtype=np.float32)
def test_quantum_descriptors():
"""Test the quantum descriptor calculator"""
calc = QuantumDescriptorCalculator()
test_molecules = [
("CCO", "Ethanol"),
("c1ccccc1", "Benzene"),
("CC(=O)Nc1ccc(O)cc1", "Paracetamol"),
("CN1C=NC2=C1C(=O)N(C(=O)N2C)C", "Caffeine"),
("CC(C)Cc1ccc(cc1)C(C)C(=O)O", "Ibuprofen"),
]
print("Quantum Descriptors Test")
print("=" * 80)
for smiles, name in test_molecules:
print(f"\n{name} ({smiles})")
print("-" * 40)
desc = calc.calculate(smiles)
if desc:
print(f" HOMO (approx): {desc['homo_approx']:.2f} eV")
print(f" LUMO (approx): {desc['lumo_approx']:.2f} eV")
print(f" HOMO-LUMO gap: {desc['homo_lumo_gap']:.2f} eV")
print(f" Electronegativity: {desc['electronegativity']:.2f} eV")
print(f" Hardness: {desc['hardness']:.2f} eV")
print(f" Electrophilicity: {desc['electrophilicity']:.2f} eV")
print(f" Dipole moment: {desc['dipole_moment']:.2f} D")
print(f" Polarizability: {desc['polarizability']:.2f} ų")
else:
print(" Failed to calculate")
if __name__ == "__main__":
test_quantum_descriptors()