refactor: skip plot generation for unavailable datasets

Qazalbash · Qazalbash · commit 467f4787483c · 2026-06-10T10:52:05.000+05:00
diff --git a/src/gwkokab/analysis/report/template_report.ipynb b/src/gwkokab/analysis/report/template_report.ipynb
@@ -60,6 +60,7 @@
    "outputs": [],
    "source": [
     "import glasbey\n",
+    "import h5py\n",
     "import numpy as np\n",
     "from plotly import graph_objects as go\n",
     "from plotly.subplots import make_subplots\n",
@@ -80,8 +81,6 @@
    "source": [
     "def n_chains_from_hdf5(sampler_name: str, filename: str) -> int:\n",
     "    if sampler_name == \"numpyro\":\n",
-    "        import h5py\n",
-    "\n",
     "        with h5py.File(filename, \"r\") as f:\n",
     "            return len(f[\"/chains\"].keys())\n",
     "    return int(read_attrs_from_hdf5(filename, \"sampler_cfg\")[\"n_chains\"])\n",
@@ -289,10 +288,11 @@
    "outputs": [],
    "source": [
     "if SAMPLER_NAME == \"flowMC\":\n",
-    "    global_acc_train = read_from_hdf5(inference_data_file, \"/acceptances/global/train\")\n",
-    "    global_acc_prod = read_from_hdf5(inference_data_file, \"/acceptances/global/prod\")\n",
-    "    local_acc_train = read_from_hdf5(inference_data_file, \"/acceptances/local/train\")\n",
-    "    local_acc_prod = read_from_hdf5(inference_data_file, \"/acceptances/local/prod\")\n",
+    "    with h5py.File(inference_data_file, \"r\") as f:\n",
+    "        global_acc_train = read_from_hdf5(f, \"/acceptances/global/train\")\n",
+    "        global_acc_prod = read_from_hdf5(f, \"/acceptances/global/prod\")\n",
+    "        local_acc_train = read_from_hdf5(f, \"/acceptances/local/train\")\n",
+    "        local_acc_prod = read_from_hdf5(f, \"/acceptances/local/prod\")\n",
     "\n",
     "    color_global, color_local = glasbey.create_palette(\n",
     "        palette_size=2, colorblind_safe=True\n",
@@ -425,43 +425,35 @@
     "    fig.show()"
    ]
   },
-  {
-   "cell_type": "markdown",
-   "id": "cf7393f2",
-   "metadata": {},
-   "source": [
-    "# Training Chains\n",
-    "\n",
-    "<div style=\"border: 1px solid #00f; background-color: #eef; padding: 10px;\">\n",
-    "    <strong>Note:</strong> If a sampler other than flowMC is used, no plots will be generated in this section.\n",
-    "</div>"
-   ]
-  },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "0a6c9dff",
+   "id": "dc096bb9",
    "metadata": {},
    "outputs": [],
    "source": [
-    "if SAMPLER_NAME == \"flowMC\":\n",
-    "    TRAINING_CHAINS = np.stack(\n",
-    "        [\n",
-    "            read_from_hdf5(inference_data_file, f\"/chains/train/chain_{i}/positions\")\n",
-    "            for i in range(N_CHAINS)\n",
-    "        ],\n",
-    "        axis=1,\n",
-    "    )\n",
+    "def auxiliary_chains_plot(datapath: str, output_filename: str) -> None:\n",
+    "    try:\n",
+    "        with h5py.File(inference_data_file, \"r\") as f:\n",
+    "            chains = np.stack(\n",
+    "                [read_from_hdf5(f, datapath.format(i=i)) for i in range(N_CHAINS)],\n",
+    "                axis=1,\n",
+    "            )\n",
+    "    except Exception:\n",
+    "        return\n",
+    "\n",
     "    fig = make_subplots(\n",
     "        rows=n_dims,\n",
     "        cols=1,\n",
     "        shared_xaxes=True,\n",
     "        vertical_spacing=vertical_spacing,\n",
     "    )\n",
     "\n",
+    "    _n_samples = chains.shape[0]\n",
+    "\n",
     "    for i in range(n_dims):\n",
     "        row = i + 1\n",
-    "        data = TRAINING_CHAINS[..., i]\n",
+    "        data = chains[..., i]\n",
     "\n",
     "        for c in range(N_CHAINS):\n",
     "            show_legend = i == 0\n",
@@ -492,17 +484,41 @@
     "        row = i + 1\n",
     "\n",
     "        fig.update_yaxes(title_text=LABELS[i], **grid_style, row=row, col=1)\n",
-    "        fig.update_xaxes(range=[0, n_samples_per_chain], **grid_style, row=row, col=1)\n",
+    "        fig.update_xaxes(range=[0, _n_samples], **grid_style, row=row, col=1)\n",
     "\n",
     "    fig.update_xaxes(title_text=\"Iteration\", row=n_dims, col=1)\n",
     "\n",
-    "    fig.write_html(\n",
-    "        \"figs/training_trace_plots.html\", include_plotlyjs=\"cdn\", full_html=True\n",
-    "    )\n",
+    "    fig.write_html(output_filename, include_plotlyjs=\"cdn\", full_html=True)\n",
     "\n",
     "    fig.show()"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "id": "cf7393f2",
+   "metadata": {},
+   "source": [
+    "# Training Chains\n",
+    "\n",
+    "<div style=\"border: 1px solid #00f; background-color: #eef; padding: 10px;\">\n",
+    "    <strong>Note:</strong> If a sampler other than flowMC is used, no plots will be generated in this section.\n",
+    "</div>"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "0a6c9dff",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "if SAMPLER_NAME == \"flowMC\":\n",
+    "    auxiliary_chains_plot(\n",
+    "        datapath=\"/chains/train/chain_{i}/positions\",\n",
+    "        output_filename=\"figs/training_trace_plots.html\",\n",
+    "    )"
+   ]
+  },
   {
    "cell_type": "markdown",
    "id": "f9367fe1",
@@ -523,62 +539,10 @@
    "outputs": [],
    "source": [
     "if SAMPLER_NAME == \"flowMC\":\n",
-    "    PRODUCTION_CHAINS = np.stack(\n",
-    "        [\n",
-    "            read_from_hdf5(inference_data_file, f\"/chains/prod/chain_{i}/positions\")\n",
-    "            for i in range(N_CHAINS)\n",
-    "        ],\n",
-    "        axis=1,\n",
-    "    )\n",
-    "    fig = make_subplots(\n",
-    "        rows=n_dims,\n",
-    "        cols=1,\n",
-    "        shared_xaxes=True,\n",
-    "        vertical_spacing=vertical_spacing,\n",
-    "    )\n",
-    "\n",
-    "    for i in range(n_dims):\n",
-    "        row = i + 1\n",
-    "        data = PRODUCTION_CHAINS[..., i]\n",
-    "\n",
-    "        for c in range(N_CHAINS):\n",
-    "            show_legend = i == 0\n",
-    "\n",
-    "            fig.add_trace(\n",
-    "                go.Scatter(\n",
-    "                    y=data[:, c],\n",
-    "                    mode=\"lines\",\n",
-    "                    line=dict(color=colors_n_chains[c], width=1.5),\n",
-    "                    name=f\"Chain {c}\",\n",
-    "                    legendgroup=f\"chain_{c}\",\n",
-    "                    showlegend=show_legend,\n",
-    "                ),\n",
-    "                row=row,\n",
-    "                col=1,\n",
-    "            )\n",
-    "\n",
-    "    height = max(250, n_dims * 180)\n",
-    "\n",
-    "    fig.update_layout(\n",
-    "        height=height,\n",
-    "        plot_bgcolor=\"white\",\n",
-    "        margin=dict(l=80, r=60, t=40, b=60),\n",
-    "        legend=dict(orientation=\"h\", yanchor=\"bottom\", y=1.02, xanchor=\"left\", x=0),\n",
-    "    )\n",
-    "\n",
-    "    for i in range(n_dims):\n",
-    "        row = i + 1\n",
-    "\n",
-    "        fig.update_yaxes(title_text=LABELS[i], **grid_style, row=row, col=1)\n",
-    "        fig.update_xaxes(range=[0, n_samples_per_chain], **grid_style, row=row, col=1)\n",
-    "\n",
-    "    fig.update_xaxes(title_text=\"Iteration\", row=n_dims, col=1)\n",
-    "\n",
-    "    fig.write_html(\n",
-    "        \"figs/production_trace_plots.html\", include_plotlyjs=\"cdn\", full_html=True\n",
-    "    )\n",
-    "\n",
-    "    fig.show()"
+    "    auxiliary_chains_plot(\n",
+    "        datapath=\"/chains/prod/chain_{i}/positions\",\n",
+    "        output_filename=\"figs/production_trace_plots.html\",\n",
+    "    )"
    ]
   },
   {