precommit

jpata · jpata · commit 52d9a8919c75 · 2025-12-02T15:15:42.000+02:00
diff --git a/notebooks/clustering_studies.ipynb b/notebooks/clustering_studies.ipynb
@@ -32,6 +32,7 @@
    "outputs": [],
    "source": [
     "import sys\n",
+    "\n",
     "sys.path.append(module_path)"
    ]
   },
@@ -67,7 +68,7 @@
     "    unique_labels, contiguous_labels = np.unique(elem[\"hit_labels\"], return_inverse=True)\n",
     "    elem[\"hit_labels_contiguous\"] = contiguous_labels\n",
     "    elems.append(elem)\n",
-    "    if len(elems)>=100:\n",
+    "    if len(elems) >= 100:\n",
     "        break\n",
     "\n",
     "elems = [[ak.from_iter(elem)] for elem in elems]\n",
@@ -81,7 +82,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "plt.hist(ak.max(elems[\"hit_labels_contiguous\"], axis=1), bins=np.linspace(0,400,41));\n",
+    "plt.hist(ak.max(elems[\"hit_labels_contiguous\"], axis=1), bins=np.linspace(0, 400, 41))\n",
     "plt.xlabel(\"Clusters per event\")\n",
     "plt.ylabel(\"Event count\")"
    ]
@@ -104,9 +105,9 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "plt.hist(calo_hit_features_f[:, 0], np.linspace(-5000,5000,100), histtype=\"step\", lw=2, label=\"x\")\n",
-    "plt.hist(calo_hit_features_f[:, 1], np.linspace(-5000,5000,100), histtype=\"step\", lw=2, label=\"y\")\n",
-    "plt.hist(calo_hit_features_f[:, 2], np.linspace(-5000,5000,100), histtype=\"step\", lw=2, label=\"z\");\n",
+    "plt.hist(calo_hit_features_f[:, 0], np.linspace(-5000, 5000, 100), histtype=\"step\", lw=2, label=\"x\")\n",
+    "plt.hist(calo_hit_features_f[:, 1], np.linspace(-5000, 5000, 100), histtype=\"step\", lw=2, label=\"y\")\n",
+    "plt.hist(calo_hit_features_f[:, 2], np.linspace(-5000, 5000, 100), histtype=\"step\", lw=2, label=\"z\")\n",
     "plt.xlabel(\"Hit position (mm)\")\n",
     "plt.ylabel(\"Hit count\")\n",
     "plt.legend()"
@@ -119,7 +120,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "plt.hist(10*calo_hit_features_f[:, 3], np.logspace(-3,1,100))\n",
+    "plt.hist(10 * calo_hit_features_f[:, 3], np.logspace(-3, 1, 100))\n",
     "plt.xscale(\"log\")\n",
     "plt.xlabel(\"Hit energy (GeV)\")\n",
     "plt.ylabel(\"Hit count\")"
@@ -160,20 +161,20 @@
     "    cluster_hit_count = []\n",
     "    cluster_id = []\n",
     "    for clid in cluster_ids:\n",
-    "        cl_mask = elem[\"hit_labels_contiguous\"]==clid\n",
+    "        cl_mask = elem[\"hit_labels_contiguous\"] == clid\n",
     "        std_x = np.std(elem[\"calo_hit_features\"][:, 0][cl_mask])\n",
     "        std_y = np.std(elem[\"calo_hit_features\"][:, 1][cl_mask])\n",
     "        std_z = np.std(elem[\"calo_hit_features\"][:, 2][cl_mask])\n",
     "        sum_e = np.sum(elem[\"calo_hit_features\"][:, 3][cl_mask])\n",
     "        hit_count = np.sum(cl_mask)\n",
-    "        \n",
+    "\n",
     "        cluster_std_x.append(std_x)\n",
     "        cluster_std_y.append(std_y)\n",
     "        cluster_std_z.append(std_z)\n",
     "        cluster_sum_e.append(sum_e)\n",
     "        cluster_hit_count.append(hit_count)\n",
     "        cluster_id.append(clid)\n",
-    "        \n",
+    "\n",
     "    all_cluster_std_x.append(cluster_std_x)\n",
     "    all_cluster_std_y.append(cluster_std_y)\n",
     "    all_cluster_std_z.append(cluster_std_z)\n",
@@ -198,9 +199,9 @@
    "outputs": [],
    "source": [
     "plt.hist2d(\n",
-    "    ak.to_numpy(ak.flatten(all_cluster_hit_count[all_cluster_hit_count>5])),\n",
-    "    ak.to_numpy(ak.flatten(all_cluster_std_x[all_cluster_hit_count>5])),\n",
-    "    bins=(np.logspace(0,3,100), np.logspace(-2,4,100))\n",
+    "    ak.to_numpy(ak.flatten(all_cluster_hit_count[all_cluster_hit_count > 5])),\n",
+    "    ak.to_numpy(ak.flatten(all_cluster_std_x[all_cluster_hit_count > 5])),\n",
+    "    bins=(np.logspace(0, 3, 100), np.logspace(-2, 4, 100)),\n",
     ")\n",
     "plt.xscale(\"log\")\n",
     "plt.yscale(\"log\")\n",
@@ -218,7 +219,7 @@
     "plt.hist2d(\n",
     "    ak.to_numpy(ak.flatten(ak.Array(all_cluster_hit_count))),\n",
     "    ak.to_numpy(ak.flatten(ak.Array(all_cluster_std_y))),\n",
-    "    bins=(np.logspace(0,3,100), np.logspace(-2,4,100))\n",
+    "    bins=(np.logspace(0, 3, 100), np.logspace(-2, 4, 100)),\n",
     ")\n",
     "plt.xscale(\"log\")\n",
     "plt.yscale(\"log\")\n",
@@ -234,9 +235,9 @@
    "outputs": [],
    "source": [
     "plt.hist2d(\n",
-    "    ak.to_numpy(ak.flatten(ak.Array(all_cluster_hit_count[all_cluster_hit_count>5]))),\n",
-    "    ak.to_numpy(ak.flatten(ak.Array(all_cluster_std_z[all_cluster_hit_count>5]))),\n",
-    "    bins=(np.logspace(0,3,100), np.logspace(-2,4,100))\n",
+    "    ak.to_numpy(ak.flatten(ak.Array(all_cluster_hit_count[all_cluster_hit_count > 5]))),\n",
+    "    ak.to_numpy(ak.flatten(ak.Array(all_cluster_std_z[all_cluster_hit_count > 5]))),\n",
+    "    bins=(np.logspace(0, 3, 100), np.logspace(-2, 4, 100)),\n",
     ")\n",
     "plt.xscale(\"log\")\n",
     "plt.yscale(\"log\")\n",
@@ -251,11 +252,11 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "plt.figure(figsize=(5,5))\n",
+    "plt.figure(figsize=(5, 5))\n",
     "plt.hist2d(\n",
     "    ak.to_numpy(ak.flatten(ak.Array(all_cluster_hit_count))),\n",
     "    ak.to_numpy(ak.flatten(ak.Array(all_cluster_sum_e))),\n",
-    "    bins=(np.logspace(0,3,100), np.logspace(-2,3,100))\n",
+    "    bins=(np.logspace(0, 3, 100), np.logspace(-2, 3, 100)),\n",
     ")\n",
     "plt.xscale(\"log\")\n",
     "plt.yscale(\"log\")\n",
@@ -270,7 +271,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "plt.hist(ak.flatten(all_cluster_hit_count), bins=np.linspace(0,1500,100));\n",
+    "plt.hist(ak.flatten(all_cluster_hit_count), bins=np.linspace(0, 1500, 100))\n",
     "plt.yscale(\"log\")\n",
     "plt.xlabel(\"Number of hits per cluster\")\n",
     "plt.ylabel(\"Cluster count\")"
@@ -283,46 +284,37 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "fig, axs = plt.subplots(3,3, figsize=(10,10))\n",
+    "fig, axs = plt.subplots(3, 3, figsize=(10, 10))\n",
     "axs = axs.flatten()\n",
     "for ielem in range(9):\n",
     "    plt.sca(axs[ielem])\n",
     "    elem = elems[ielem]\n",
-    "    \n",
+    "\n",
     "    unique_labels, contiguous_labels = np.unique(elem[\"hit_labels\"], return_inverse=True)\n",
-    "    cmap = plt.get_cmap('viridis')\n",
+    "    cmap = plt.get_cmap(\"viridis\")\n",
     "    distinct_colors = cmap(np.linspace(0, 1, len(unique_labels)))\n",
-    "    \n",
+    "\n",
     "    plt.scatter(\n",
     "        elem[\"calo_hit_features\"][:, 0],\n",
     "        elem[\"calo_hit_features\"][:, 1],\n",
-    "        s=np.clip(100*elem[\"calo_hit_features\"][:, 3], 0.1, 10),\n",
-    "        c=distinct_colors[contiguous_labels])\n",
+    "        s=np.clip(100 * elem[\"calo_hit_features\"][:, 3], 0.1, 10),\n",
+    "        c=distinct_colors[contiguous_labels],\n",
+    "    )\n",
     "    plt.xlim(-6000, 6000)\n",
     "    plt.ylim(-6000, 6000)\n",
-    "    plt.title(\"$N_{{hit}}$={}, $N_{{cl}}$={}\".format(len(elem[\"calo_hit_features\"]), len(np.unique(elem[\"hit_labels\"]))))\n",
+    "    plt.title(\n",
+    "        \"$N_{{hit}}$={}, $N_{{cl}}$={}\".format(len(elem[\"calo_hit_features\"]), len(np.unique(elem[\"hit_labels\"])))\n",
+    "    )\n",
     "    plt.xticks([])\n",
     "    plt.yticks([])"
    ]
   }
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "Python 3 (ipykernel)",
+   "display_name": "python3",
    "language": "python",
    "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.11.13"
   }
  },
  "nbformat": 4,
diff --git a/notebooks/data_preprocessing.ipynb b/notebooks/data_preprocessing.ipynb
@@ -106,21 +106,9 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "Python 3 (ipykernel)",
+   "display_name": "python3",
    "language": "python",
    "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.11.13"
   }
  },
  "nbformat": 4,
diff --git a/notebooks/debug-cld-processing.ipynb b/notebooks/debug-cld-processing.ipynb
@@ -1204,7 +1204,7 @@
     "    event_data1[\"cluster_to_cluster_hit_matrix\"][event_i][\"hit_idx\"],\n",
     "    event_data1[\"cluster_to_cluster_hit_matrix\"][event_i][\"cluster_idx\"],\n",
     "    event_data1[\"cluster_to_cluster_hit_matrix\"][event_i][\"weight\"],\n",
-    "    max_hits = np.max(hit_idx)+1\n",
+    "    max_hits=np.max(hit_idx) + 1,\n",
     ")\n",
     "\n",
     "# Extract calorimeter hit positions (x, y, z)\n",
@@ -1217,7 +1217,9 @@
     ")\n",
     "\n",
     "\n",
-    "def plot_calo_hits_colored_by_genparticle(hit_labels, calo_hit_positions, title=\"Calorimeter hits colored by genparticle\"):\n",
+    "def plot_calo_hits_colored_by_genparticle(\n",
+    "    hit_labels, calo_hit_positions, title=\"Calorimeter hits colored by genparticle\"\n",
+    "):\n",
     "    # Assign unique colors to each genparticle ID\n",
     "    unique_ids = np.unique(hit_labels)\n",
     "    colors = plt.cm.tab10(np.linspace(0, 1, len(unique_ids)))\n",
@@ -1233,7 +1235,7 @@
     "\n",
     "    random_color_map = {gen_id: random_color() for gen_id in unique_ids}\n",
     "    random_color_map[-1] = \"rgba(0,0,0)\"\n",
-    " \n",
+    "\n",
     "    # Create traces for each genparticle ID\n",
     "    traces = []\n",
     "    for gen_id in unique_ids:\n",
@@ -1369,12 +1371,12 @@
     "                hit_labels = get_hit_labels(\n",
     "                    hit_idx, gen_idx, weights\n",
     "                )  # This could be moved to the pre-processing step if needed\n",
-    "                \n",
+    "\n",
     "                hit_labels2 = get_hit_labels(\n",
     "                    cluster_to_cluster_hit_matrix[\"hit_idx\"],\n",
     "                    cluster_to_cluster_hit_matrix[\"cluster_idx\"],\n",
     "                    cluster_to_cluster_hit_matrix[\"weight\"],\n",
-    "                    max_hits = np.max(hit_idx)+1\n",
+    "                    max_hits=np.max(hit_idx) + 1,\n",
     "                )\n",
     "\n",
     "                yield {\n",
@@ -1610,7 +1612,9 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "plot_calo_hits_colored_by_genparticle(hit_labels_pandora, calo_hit_positions, \"Calorimeter hits colored by Pandora cluster\")"
+    "plot_calo_hits_colored_by_genparticle(\n",
+    "    hit_labels_pandora, calo_hit_positions, \"Calorimeter hits colored by Pandora cluster\"\n",
+    ")"
    ]
   },
   {
@@ -1623,21 +1627,9 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "Python 3 (ipykernel)",
+   "display_name": "python3",
    "language": "python",
    "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.11.13"
   }
  },
  "nbformat": 4,
diff --git a/src/datasets/CLDHits.py b/src/datasets/CLDHits.py
@@ -148,7 +148,7 @@ def __iter__(self):
                     cluster_to_cluster_hit_matrix["hit_idx"],
                     cluster_to_cluster_hit_matrix["cluster_idx"],
                     cluster_to_cluster_hit_matrix["weight"],
-                    max_hits = np.max(hit_idx)+1
+                    max_hits=np.max(hit_idx) + 1,
                 )
 
                 if self.by_event:

Original file line number	Diff line number	Diff line change
`@@ -148,7 +148,7 @@ def __iter__(self):`
`148`	`148`	`cluster_to_cluster_hit_matrix["hit_idx"],`
`149`	`149`	`cluster_to_cluster_hit_matrix["cluster_idx"],`
`150`	`150`	`cluster_to_cluster_hit_matrix["weight"],`
`151`		`- max_hits = np.max(hit_idx)+1`
	`151`	`+ max_hits=np.max(hit_idx) + 1,`
`152`	`152`	`)`
`153`	`153`
`154`	`154`	`if self.by_event:`