Seed all random number generators in XAI chapter for reproducibility

whitead · claude · whitead · commit 022a73831f54 · 2026-02-20T11:59:17.000-08:00
The PyTorch/exmol section had no random seeding, causing variance in
model accuracy, counterfactuals, and other outputs across runs. Added
numpy, torch, and Python random seeds, random_state to train_test_split,
and nproc=1 to mordred to avoid multiprocessing non-determinism.

Co-Authored-By: Claude Opus 4.6 &lt;noreply@anthropic.com&gt;
diff --git a/dl/xai.ipynb b/dl/xai.ipynb
@@ -1069,6 +1069,7 @@
    },
    "outputs": [],
    "source": [
+    "import random\n",
     "import pandas as pd\n",
     "import matplotlib as mpl\n",
     "import matplotlib.pyplot as plt\n",
@@ -1122,7 +1123,7 @@
     "valid_mol_idx = [bool(m) for m in molecules]\n",
     "valid_mols = [m for m in molecules if m]\n",
     "# Compute molecular descriptors using Mordred\n",
-    "features = calc.pandas(valid_mols, quiet=True)\n",
+    "features = calc.pandas(valid_mols, quiet=True, nproc=1)\n",
     "labels = toxdata[valid_mol_idx].FDA_APPROVED\n",
     "# Convert to numeric, coercing errors to NaN\n",
     "features = features.apply(pd.to_numeric, errors=\"coerce\")\n",
@@ -1151,9 +1152,12 @@
    "metadata": {},
    "outputs": [],
    "source": [
+    "np.random.seed(0)\n",
+    "torch.manual_seed(0)\n",
+    "random.seed(0)\n",
     "# Train and test spit\n",
     "X_train, X_test, y_train, y_test = train_test_split(\n",
-    "    features, labels, test_size=0.2, shuffle=True\n",
+    "    features, labels, test_size=0.2, shuffle=True, random_state=0\n",
     ")\n",
     "ft_shape = X_train.shape[-1]\n",
     "\n",
@@ -1237,7 +1241,7 @@
    "source": [
     "def model_eval(smiles, selfies):\n",
     "    molecules = [rdkit.Chem.MolFromSmiles(smi) for smi in smiles]\n",
-    "    feat = calc.pandas(molecules)\n",
+    "    feat = calc.pandas(molecules, nproc=1)\n",
     "    feat = feat.apply(pd.to_numeric, errors=\"coerce\")\n",
     "    feat = (feat - feat_mean) / feat_std\n",
     "    feat = feat.values.astype(float)\n",
@@ -1268,6 +1272,8 @@
    },
    "outputs": [],
    "source": [
+    "random.seed(0)\n",
+    "np.random.seed(0)\n",
     "space = exmol.sample_space(\"C1CC(=O)NC(=O)C1N2CC3=C(C2=O)C=CC=C3N\", model_eval);"
    ]
   },
@@ -1388,7 +1394,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.7.8"
+   "version": "3.13.2"
   },
   "vscode": {
    "interpreter": {