chipnbits
diff --git a/‎project/geodata-3d-conditional/inference_demo.ipynb‎
Lines changed: 78 additions & 59 deletions b/‎project/geodata-3d-conditional/inference_demo.ipynb‎
Lines changed: 78 additions & 59 deletions
diff --git a/‎project/geodata-3d-conditional/model_train_sh_inference_cond.py‎
Lines changed: 0 additions & 10 deletions b/‎project/geodata-3d-conditional/model_train_sh_inference_cond.py‎
Lines changed: 0 additions & 10 deletions
diff --git a/‎project/geodata-3d-unconditional/model_train_inference.py‎
Lines changed: 11 additions & 10 deletions b/‎project/geodata-3d-unconditional/model_train_inference.py‎
Lines changed: 11 additions & 10 deletions
diff --git a/‎project/geodata-3d-unconditional/utils.py‎
Lines changed: 1 addition & 1 deletion b/‎project/geodata-3d-unconditional/utils.py‎
Lines changed: 1 addition & 1 deletion
@@ -140,12 +140,12 @@
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "2398bdf49d20427898f2eb0cc16783ef",
+       "model_id": "6f8261b149204a60a37db193813dc6b6",
        "version_major": 2,
        "version_minor": 0
       },
       "text/plain": [
-       "Widget(value='<iframe src=\"http://localhost:53764/index.html?ui=P_0x37992d550_0&reconnect=auto\" class=\"pyvista…"
+       "Widget(value='<iframe src=\"http://localhost:36623/index.html?ui=P_0x7d484a41c1a0_0&reconnect=auto\" class=\"pyvi…"
       ]
      },
      "metadata": {},
@@ -204,30 +204,45 @@
    "source": [
     "An auto-populating function is provided that \n",
     "\n",
-    "1. Iterates through a folder `save_dir` containing subfolders `cond_data_folder_title` with conditional data `boreholes.pt` and `true_model.pt`\n",
-    "2. Creates the conditional data that includes surface, air, and boreholes from `boreholes.pt` and `true_model.pt`\n",
+    "1. Iterates through a folder `save_dir` containing subfolders `cond_data_folder_title` with paired data `boreholes.pt` and `true_model.pt` containing the boreholes extracted from the ground truth geological model.\n",
+    "2. Creates the conditional data for the inverse problem that includes surface, air, and boreholes from `boreholes.pt` and `true_model.pt`\n",
     "3. Runs the inference routine on the data to produce `n_samples_each` for each set of conditional data\n",
-    "4. Saves the solutions in the same subfolder with `sample_title_000.pt` naming convention"
+    "4. Saves the solutions in the same subfolder with `sample_title_000.pt` naming convention\n",
+    "\n",
+    "The script below will sample 9 conditional reconstructions for each pair of boreholes with true model. (The true model is only used to get surface and air data, subsurface is not used in the inference)\n",
+    "\n",
+    "The sample time is long, so precomputed inference results available for demonstration of ensemble analysis below. To run the inference locally, set `USE_PRECOMPUTED_INFERENCE_RESULTS = False` below."
    ]
   },
   {
    "cell_type": "code",
    "execution_count": 7,
+   "id": "1dc6a8c0",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "USE_PRECOMPUTED_INFERENCE_RESULTS = True"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
    "id": "ba241280",
    "metadata": {},
    "outputs": [],
    "source": [
     "from model_inference_experiments import populate_solutions\n",
     "\n",
-    "# populate_solutions(\n",
-    "#     save_dir=save_dir,\n",
-    "#     cond_data_folder_title=cond_data_folder_title,\n",
-    "#     device=device,\n",
-    "#     model=flowmatching_model,\n",
-    "#     n_samples_each=9,\n",
-    "#     batch_size=1,\n",
-    "#     sample_title=\"sample\",\n",
-    "# )"
+    "if not USE_PRECOMPUTED_INFERENCE_RESULTS:\n",
+    "    populate_solutions(\n",
+    "        save_dir=save_dir,\n",
+    "        cond_data_folder_title=cond_data_folder_title,\n",
+    "        device=device,\n",
+    "        model=flowmatching_model,\n",
+    "        n_samples_each=9,\n",
+    "        batch_size=1,\n",
+    "        sample_title=\"sample\",\n",
+    "    )"
    ]
   },
   {
@@ -240,22 +255,23 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 9,
    "id": "10058c0e",
    "metadata": {},
    "outputs": [],
    "source": [
     "from model_inference_experiments import load_solutions, show_solutions\n",
     "\n",
-    "# Same folder as the stored conditional data\n",
-    "sample_number = 0\n",
-    "samples_dir = os.path.join(save_dir, f\"{cond_data_folder_title}_{sample_number}\")\n",
-    "print(\"Loading from:\", samples_dir)\n",
-    "# Autoparse the true_model.pt, boreholes.pt, and any solutions\n",
-    "geomodel, boreholes = load_model_and_boreholes(samples_dir)\n",
-    "solutions = load_solutions(samples_dir, sample_title=\"sample\")\n",
-    "show_model_and_boreholes(geomodel, boreholes)\n",
-    "show_solutions(solutions)"
+    "if not USE_PRECOMPUTED_INFERENCE_RESULTS:\n",
+    "    # Same folder as the stored conditional data\n",
+    "    sample_number = 0\n",
+    "    samples_dir = os.path.join(save_dir, f\"{cond_data_folder_title}_{sample_number}\")\n",
+    "    print(\"Loading from:\", samples_dir)\n",
+    "    # Autoparse the true_model.pt, boreholes.pt, and any solutions\n",
+    "    geomodel, boreholes = load_model_and_boreholes(samples_dir)\n",
+    "    solutions = load_solutions(samples_dir, sample_title=\"sample\")\n",
+    "    show_model_and_boreholes(geomodel, boreholes)\n",
+    "    show_solutions(solutions)"
    ]
   },
   {
@@ -269,7 +285,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": 10,
    "id": "2301054e",
    "metadata": {},
    "outputs": [],
@@ -329,15 +345,15 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": 11,
    "id": "145d861c",
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Restored to: /Users/sghyseli/Projects/synthgeo-paper/flowtrain_stochastic_interpolation/project/geodata-3d-conditional/samples/jupyter-demo/paper_cond_gen_0\n"
+      "Restored to: /home/sghys/projects/flowtrain_stochastic_interpolation/project/geodata-3d-conditional/samples/jupyter-demo/paper_cond_gen_0\n"
      ]
     }
    ],
@@ -360,19 +376,19 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": 12,
    "id": "3afe5e1e",
    "metadata": {},
    "outputs": [
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "efbf0829bf2342e4bec7abe76ac086d8",
+       "model_id": "0e02af847ae94161aebb114ee640b10b",
        "version_major": 2,
        "version_minor": 0
       },
       "text/plain": [
-       "Widget(value='<iframe src=\"http://localhost:53764/index.html?ui=P_0x34fb582f0_1&reconnect=auto\" class=\"pyvista…"
+       "Widget(value='<iframe src=\"http://localhost:36623/index.html?ui=P_0x7d484de739b0_1&reconnect=auto\" class=\"pyvi…"
       ]
      },
      "metadata": {},
@@ -381,12 +397,12 @@
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "86bcac6e346a4d2eae3e1af847d1edd5",
+       "model_id": "0d1d6198cffd40a78fc1fa18e19e6a53",
        "version_major": 2,
        "version_minor": 0
       },
       "text/plain": [
-       "Widget(value='<iframe src=\"http://localhost:53764/index.html?ui=P_0x34def0260_2&reconnect=auto\" class=\"pyvista…"
+       "Widget(value='<iframe src=\"http://localhost:36623/index.html?ui=P_0x7d484e2fa9f0_2&reconnect=auto\" class=\"pyvi…"
       ]
      },
      "metadata": {},
@@ -395,45 +411,48 @@
    ],
    "source": [
     "sample_number = 0\n",
-    "geomodel, boreholes = load_model_and_boreholes(samples_dir, device=device)\n",
-    "solutions = load_solutions(samples_dir, sample_title=\"sample\", device=device)\n",
+    "geomodel, boreholes = load_model_and_boreholes(samples_dir, device=\"cpu\")\n",
+    "solutions = load_solutions(samples_dir, sample_title=\"sample\", device=\"cpu\")\n",
     "show_model_and_boreholes(geomodel, boreholes)\n",
     "# Limit to 10 solutions for display\n",
     "show_solutions(solutions[0:10])"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 14,
+   "execution_count": 15,
    "id": "4e45a77a",
    "metadata": {},
    "outputs": [],
    "source": [
-    "def vote_probabilities(\n",
-    "    solutions: torch.Tensor, num_categories: int = 15\n",
-    ") -> torch.Tensor:\n",
+    "def vote_probabilities(solutions: torch.Tensor, num_categories: int = 15) -> torch.Tensor:\n",
     "    \"\"\"\n",
-    "    Compute per-voxel class probabilities by majority vote across the batch.\n",
-    "    Input: [B,X,Y,Z] of categories and Output: [C,X,Y,Z] of probabilities\n",
+    "    Compute per-voxel class probabilities over a batch.\n",
+    "    Input:  [B, X, Y, Z] integer categories (may include -1)\n",
+    "    Output: [C, X, Y, Z] float probabilities\n",
     "    \"\"\"\n",
     "    assert solutions.dim() == 4\n",
     "    B, X, Y, Z = solutions.shape\n",
+    "    device = solutions.device\n",
     "\n",
-    "    # Shift labels to 0..C-1 if they are -1..C-2\n",
+    "    # Handle negative indices (-1 for \"air\")\n",
     "    if solutions.min().item() < 0:\n",
-    "        sol_shifted = solutions + 1\n",
-    "    else:\n",
-    "        sol_shifted = solutions\n",
-    "    sol_shifted = sol_shifted.to(torch.long)  # required by bincount\n",
+    "        solutions = solutions + 1  # shift to 0..C-1\n",
+    "\n",
+    "    solutions = solutions.to(torch.long)\n",
     "\n",
-    "    sols_one_hot = (\n",
-    "        torch.nn.functional.one_hot(sol_shifted, num_categories)\n",
-    "        .permute(0, 4, 1, 2, 3)\n",
-    "        .float()\n",
-    "    )  # [B, 15, 64, 64, 64]\n",
-    "    probability_vector = sols_one_hot.mean(dim=0, keepdim=False)\n",
+    "    # Accumulator for per-class voxel counts\n",
+    "    accumulator = torch.zeros(num_categories, X, Y, Z, dtype=torch.float32, device=device)\n",
     "\n",
-    "    return probability_vector\n",
+    "    # Accumulate one-hot for each sample\n",
+    "    for b in range(B):\n",
+    "        one_hot = torch.nn.functional.one_hot(solutions[b], num_classes=num_categories)  # [X, Y, Z, C]\n",
+    "        one_hot = one_hot.permute(3, 0, 1, 2).float()                 # [C, X, Y, Z]\n",
+    "        accumulator += one_hot\n",
+    "\n",
+    "    # Normalize by total samples\n",
+    "    probabilities = accumulator / B\n",
+    "    return probabilities\n",
     "\n",
     "\n",
     "solution_probabilistic = vote_probabilities(solutions, num_categories=15)"
@@ -450,19 +469,19 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 15,
+   "execution_count": 16,
    "id": "7c67d1ec",
    "metadata": {},
    "outputs": [
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "f9877221328e42079086c8e85e0aa00f",
+       "model_id": "2fbba51d6ac146919348db865e5aad3d",
        "version_major": 2,
        "version_minor": 0
       },
       "text/plain": [
-       "Widget(value='<iframe src=\"http://localhost:53764/index.html?ui=P_0x38e7f0260_5&reconnect=auto\" class=\"pyvista…"
+       "Widget(value='<iframe src=\"http://localhost:36623/index.html?ui=P_0x7d483cb153d0_3&reconnect=auto\" class=\"pyvi…"
       ]
      },
      "metadata": {},
@@ -471,12 +490,12 @@
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "eb98d17d3b954dd88d1abb5a16356fe2",
+       "model_id": "5ba813e5a3b341ab86744048d92adc05",
        "version_major": 2,
        "version_minor": 0
       },
       "text/plain": [
-       "Widget(value='<iframe src=\"http://localhost:53764/index.html?ui=P_0x38e831760_6&reconnect=auto\" class=\"pyvista…"
+       "Widget(value='<iframe src=\"http://localhost:36623/index.html?ui=P_0x7d486e937620_4&reconnect=auto\" class=\"pyvi…"
       ]
      },
      "metadata": {},
@@ -621,7 +640,7 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "geopaper",
+   "display_name": "ml",
    "language": "python",
    "name": "python3"
   },
@@ -635,7 +654,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.12.11"
+   "version": "3.11.9"
   }
  },
  "nbformat": 4,
 
@@ -1,16 +1,9 @@
 import argparse
 import os
 import platform
-import time
-import warnings
 from typing import Any, Dict, List, Tuple, Optional
-from functools import partial
 
-import matplotlib.pyplot as plt
-import numpy as np
-import seaborn as sns
 import torch
-import json
 
 # from cpu_binding import affinity, num_threads
 # if affinity: # https://github.com/pytorch/pytorch/issues/99625
@@ -21,10 +14,7 @@
 
 import torch.nn as nn
 import torch.nn.functional as F
-import wandb
-from matplotlib import patches
 from torch.utils.data import DataLoader
-from tqdm import tqdm
 
 # Third-party libraries
 from lightning import Trainer
 
@@ -5,19 +5,14 @@
 import argparse
 import os
 import re
-import platform
+
 import time
 import warnings
 from typing import Any, Dict, List, Tuple, Optional, Union
 
-import matplotlib.pyplot as plt
-import numpy as np
-import seaborn as sns
 import torch
 import torch.nn as nn
 import torch.nn.functional as F
-import wandb
-from matplotlib import patches
 from torch.utils.data import DataLoader
 from tqdm import tqdm
 
@@ -320,7 +315,7 @@ def __init__(
         # Embedding layer setup
         self.embedding = nn.Embedding(self.num_categories, self.embedding_dim)
         self._initialize_embedding(self.num_categories, self.embedding_dim)
-        # Freeze embedding weights after initialization (non-learnable)
+        # Freeze embedding weights after initialization (non-learnable hardcoding, set to True for learnable)
         self.embedding.weight.requires_grad = False
 
         # Update model_params to reflect the new input channels
@@ -619,6 +614,7 @@ def run_inference(
 
     solver = ODEFlowSolver(model=model.net, rtol=1e-6)
 
+    # Start and stop times for ODEFlow, slightly away from t=0 to avoid numerical stability issues
     t0, tf = 0.001, 1.0
     n_steps = 16
 
@@ -777,9 +773,10 @@ def parse_arguments():
     )
 
     parser.add_argument(
-        '--save-images', 
-        action='store_true',
-        help='Save visualization images during inference'
+        '--save-images',
+        action=argparse.BooleanOptionalAction,
+        default=True,
+        help='Save visualization images during inference (use --no-save-images to disable)'
     )
 
     parser.add_argument(
@@ -832,6 +829,10 @@ def main() -> None:
         model = Geo3DStochInterp.load_from_checkpoint(
             checkpoint_path, map_location=inference_device
         ).to(inference_device)
+        
+        print(f"Running inference with {args.n_samples} samples on device {inference_device} with batch size {args.batch_size}")
+        print(f"Samples will be saved to: {dirs['samples_dir']}")
+        print(f"Images will be saved to: {dirs['photo_dir'] if args.save_images else 'Not saving images'}")
 
         run_inference(
             dirs,
 
@@ -25,7 +25,7 @@ def download_if_missing(path, url):
     if not os.path.exists(path):
         os.makedirs(os.path.dirname(path), exist_ok=True)
         print(f"Downloading weights from {url}...")
-        urllib.request.urlretrieve(url, path)
+        torch.hub.download_url_to_file(url, path, progress=True)
         print("Download complete.")