Skip to content

Commit 022a738

Browse files
whiteadclaude
andcommitted
Seed all random number generators in XAI chapter for reproducibility
The PyTorch/exmol section had no random seeding, causing variance in model accuracy, counterfactuals, and other outputs across runs. Added numpy, torch, and Python random seeds, random_state to train_test_split, and nproc=1 to mordred to avoid multiprocessing non-determinism. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
1 parent 4fe84ee commit 022a738

1 file changed

Lines changed: 10 additions & 4 deletions

File tree

dl/xai.ipynb

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1069,6 +1069,7 @@
10691069
},
10701070
"outputs": [],
10711071
"source": [
1072+
"import random\n",
10721073
"import pandas as pd\n",
10731074
"import matplotlib as mpl\n",
10741075
"import matplotlib.pyplot as plt\n",
@@ -1122,7 +1123,7 @@
11221123
"valid_mol_idx = [bool(m) for m in molecules]\n",
11231124
"valid_mols = [m for m in molecules if m]\n",
11241125
"# Compute molecular descriptors using Mordred\n",
1125-
"features = calc.pandas(valid_mols, quiet=True)\n",
1126+
"features = calc.pandas(valid_mols, quiet=True, nproc=1)\n",
11261127
"labels = toxdata[valid_mol_idx].FDA_APPROVED\n",
11271128
"# Convert to numeric, coercing errors to NaN\n",
11281129
"features = features.apply(pd.to_numeric, errors=\"coerce\")\n",
@@ -1151,9 +1152,12 @@
11511152
"metadata": {},
11521153
"outputs": [],
11531154
"source": [
1155+
"np.random.seed(0)\n",
1156+
"torch.manual_seed(0)\n",
1157+
"random.seed(0)\n",
11541158
"# Train and test spit\n",
11551159
"X_train, X_test, y_train, y_test = train_test_split(\n",
1156-
" features, labels, test_size=0.2, shuffle=True\n",
1160+
" features, labels, test_size=0.2, shuffle=True, random_state=0\n",
11571161
")\n",
11581162
"ft_shape = X_train.shape[-1]\n",
11591163
"\n",
@@ -1237,7 +1241,7 @@
12371241
"source": [
12381242
"def model_eval(smiles, selfies):\n",
12391243
" molecules = [rdkit.Chem.MolFromSmiles(smi) for smi in smiles]\n",
1240-
" feat = calc.pandas(molecules)\n",
1244+
" feat = calc.pandas(molecules, nproc=1)\n",
12411245
" feat = feat.apply(pd.to_numeric, errors=\"coerce\")\n",
12421246
" feat = (feat - feat_mean) / feat_std\n",
12431247
" feat = feat.values.astype(float)\n",
@@ -1268,6 +1272,8 @@
12681272
},
12691273
"outputs": [],
12701274
"source": [
1275+
"random.seed(0)\n",
1276+
"np.random.seed(0)\n",
12711277
"space = exmol.sample_space(\"C1CC(=O)NC(=O)C1N2CC3=C(C2=O)C=CC=C3N\", model_eval);"
12721278
]
12731279
},
@@ -1388,7 +1394,7 @@
13881394
"name": "python",
13891395
"nbconvert_exporter": "python",
13901396
"pygments_lexer": "ipython3",
1391-
"version": "3.7.8"
1397+
"version": "3.13.2"
13921398
},
13931399
"vscode": {
13941400
"interpreter": {

0 commit comments

Comments
 (0)