Refactor imports and improve code formatting in 00_quickstart notebook for better readability and organization.

valhassan · valhassan · commit d4354a69e442 · 2026-02-13T14:07:04.000-05:00
diff --git a/notebooks/00_quickstart.ipynb b/notebooks/00_quickstart.ipynb
@@ -74,26 +74,29 @@
    ],
    "source": [
     "import time\n",
+    "\n",
     "start_total = time.time()\n",
     "\n",
     "print(\"Importing required libraries...\")\n",
     "\n",
     "# Core modules\n",
-    "import os, sys, platform, random\n",
-    "import zipfile\n",
+    "# Data and ML\n",
+    "import csv\n",
+    "import platform\n",
+    "import sys\n",
     "import warnings\n",
+    "import zipfile\n",
     "from pathlib import Path\n",
     "\n",
-    "# Data and ML\n",
-    "import csv\n",
-    "import numpy as np\n",
-    "import torch\n",
     "import lightning as L\n",
-    "import rasterio as rio\n",
     "import matplotlib.pyplot as plt\n",
+    "import numpy as np\n",
+    "import rasterio as rio\n",
+    "import torch\n",
     "\n",
     "# Remove warnings from not georeferenced dataset (for this example only)\n",
     "from rasterio.errors import NotGeoreferencedWarning\n",
+    "\n",
     "warnings.filterwarnings(\"ignore\", category=NotGeoreferencedWarning)\n",
     "\n",
     "# Append root path to make module work from notebook (might differ in your environment)\n",
@@ -151,7 +154,7 @@
    "source": [
     "# Define path to the archive and extract location\n",
     "zip_path = Path(\"../data/waterloo_subset_512.zip\")\n",
-    "extract_dir = zip_path.with_suffix('')  # removes .zip\n",
+    "extract_dir = zip_path.with_suffix(\"\")  # removes .zip\n",
     "\n",
     "# Unzip only if not already done\n",
     "if not extract_dir.exists():\n",
@@ -212,7 +215,7 @@
    "metadata": {},
    "outputs": [
     {
-     "name": "stderr",
+     "name": "stdout",
      "output_type": "stream",
      "text": [
       "Remapping trn labels: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [00:00<00:00, 339.72it/s]\n",
@@ -222,17 +225,19 @@
     }
    ],
    "source": [
-    "from tqdm import tqdm\n",
     "import numpy as np\n",
     "import rasterio as rio\n",
+    "from tqdm import tqdm\n",
     "\n",
     "# Remap all labels in the dataset\n",
     "for split in [\"trn\", \"val\", \"tst\"]:\n",
     "    lbl_dir = extract_dir / split / \"label\"\n",
     "    if not lbl_dir.exists():\n",
     "        continue\n",
     "\n",
-    "    for lbl_path in tqdm(sorted(lbl_dir.glob(\"*.tif\")), desc=f\"Remapping {split} labels\"):\n",
+    "    for lbl_path in tqdm(\n",
+    "        sorted(lbl_dir.glob(\"*.tif\")), desc=f\"Remapping {split} labels\",\n",
+    "    ):\n",
     "        with rio.open(lbl_path) as lbl_ds:\n",
     "            lbl = lbl_ds.read(1)\n",
     "\n",
@@ -284,7 +289,7 @@
     "    lbl_dir = extract_dir / split / \"label\"\n",
     "    csv_path = extract_dir / f\"{split}.csv\"\n",
     "\n",
-    "    # Collect matching image–label pairs\n",
+    "    # Collect matching image-label pairs\n",
     "    rows = []\n",
     "    for img_path in sorted(img_dir.glob(\"*.tif\")):\n",
     "        lbl_path = lbl_dir / img_path.name\n",
@@ -294,7 +299,7 @@
     "            print(f\"No matching label found for {img_path.name}\")\n",
     "\n",
     "    # Write CSV\n",
-    "    with open(csv_path, \"w\", newline=\"\") as f:\n",
+    "    with csv_path.open(\"w\", newline=\"\") as f:\n",
     "        writer = csv.writer(f, delimiter=\";\")\n",
     "        writer.writerows(rows)\n",
     "\n",
@@ -336,7 +341,7 @@
     "from geo_deep_learning.datasets.csv_dataset import CSVDataset\n",
     "\n",
     "# Define dataset paths previously extracted from the ZIP\n",
-    "dataset_root = extract_dir \n",
+    "dataset_root = extract_dir\n",
     "\n",
     "# Change mask dtype to match SoftCrossEntropyLoss\n",
     "def _load_mask_int64(self, index: int):\n",
@@ -467,22 +472,25 @@
    "metadata": {},
    "outputs": [
     {
-     "name": "stderr",
+     "name": "stdout",
      "output_type": "stream",
      "text": [
       "/home/lromanin/miniforge3/envs/gdl_env_v09/lib/python3.10/site-packages/lightning/pytorch/utilities/parsing.py:209: Attribute 'loss' is an instance of `nn.Module` and is already saved during checkpointing. It is recommended to ignore them using `self.save_hyperparameters(ignore=['loss'])`.\n"
      ]
     }
    ],
    "source": [
-    "from geo_deep_learning.tasks_with_models.segmentation_unetplus import SegmentationUnetPlus\n",
-    "from segmentation_models_pytorch.losses import SoftCrossEntropyLoss\n",
     "import torch\n",
+    "from segmentation_models_pytorch.losses import SoftCrossEntropyLoss\n",
     "from torch.optim import Adam\n",
     "from torch.optim.lr_scheduler import ReduceLROnPlateau\n",
     "\n",
+    "from geo_deep_learning.tasks_with_models.segmentation_unetplus import (\n",
+    "    SegmentationUnetPlus,\n",
+    ")\n",
+    "\n",
     "# Loss function instance (multi-class, 2 classes: background + buildings)\n",
-    "loss_fn = SoftCrossEntropyLoss(smooth_factor=0.1) \n",
+    "loss_fn = SoftCrossEntropyLoss(smooth_factor=0.1)\n",
     "\n",
     "# Optimizer and scheduler configs\n",
     "optimizer_class = Adam\n",
@@ -511,7 +519,7 @@
     "    scheduler_config={\n",
     "        \"interval\": \"epoch\",\n",
     "        \"frequency\": 1,\n",
-    "        \"monitor\": \"val_loss\"\n",
+    "        \"monitor\": \"val_loss\",\n",
     "    },\n",
     "    class_labels=[\"background\", \"buildings\"],\n",
     "    class_colors=[\"#000000\", \"#FF0000\"],\n",
@@ -543,7 +551,7 @@
    "metadata": {},
    "outputs": [
     {
-     "name": "stderr",
+     "name": "stdout",
      "output_type": "stream",
      "text": [
       "GPU available: True (cuda), used: True\n",
@@ -583,7 +591,7 @@
      "output_type": "display_data"
     },
     {
-     "name": "stderr",
+     "name": "stdout",
      "output_type": "stream",
      "text": [
       "/home/lromanin/miniforge3/envs/gdl_env_v09/lib/python3.10/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:425: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=23` in the `DataLoader` to improve performance.\n",
@@ -746,7 +754,7 @@
      "output_type": "display_data"
     },
     {
-     "name": "stderr",
+     "name": "stdout",
      "output_type": "stream",
      "text": [
       "`Trainer.fit` stopped: `max_epochs=10` reached.\n"
@@ -756,12 +764,12 @@
    "source": [
     "import pandas as pd\n",
     "from lightning.pytorch import Trainer\n",
-    "from lightning.pytorch.loggers import MLFlowLogger\n",
     "from lightning.pytorch.callbacks import TQDMProgressBar\n",
+    "from lightning.pytorch.loggers import MLFlowLogger\n",
     "\n",
     "logger = MLFlowLogger(\n",
     "    experiment_name=\"unet_segmentation\",\n",
-    "    tracking_uri=\"file:./mlruns\"\n",
+    "    tracking_uri=\"file:./mlruns\",\n",
     ")\n",
     "\n",
     "# Define trainer\n",
@@ -804,7 +812,7 @@
        "<Axes: >"
       ]
      },
-     "execution_count": 9,
+     "execution_count": null,
      "metadata": {},
      "output_type": "execute_result"
     },
@@ -852,7 +860,7 @@
    "metadata": {},
    "outputs": [
     {
-     "name": "stderr",
+     "name": "stdout",
      "output_type": "stream",
      "text": [
       "Restoring states from the checkpoint path at ./mlruns/406009257167130993/c7dc0307d39346e4ad7d3d45b19dcff8/checkpoints/epoch=9-step=80.ckpt\n",
@@ -908,14 +916,18 @@
        "  'test_loss': 0.2789146900177002}]"
       ]
      },
-     "execution_count": 10,
+     "execution_count": null,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
     "# Run evaluation on the test set (defined by the csv_datamodule)\n",
-    "trainer.test(model, datamodule=dm, ckpt_path=trainer.checkpoint_callback.best_model_path)"
+    "trainer.test(\n",
+    "    model,\n",
+    "    datamodule=dm,\n",
+    "    ckpt_path=trainer.checkpoint_callback.best_model_path,\n",
+    ")"
    ]
   },
   {
@@ -992,8 +1004,9 @@
    ],
    "source": [
     "from pathlib import Path\n",
-    "from PIL import Image\n",
+    "\n",
     "import matplotlib.pyplot as plt\n",
+    "from PIL import Image\n",
     "\n",
     "# Identify MLflow experiment/run ids\n",
     "print(\"Experiment:\", logger.experiment_id, \"Run:\", logger.run_id)\n",