Merge pull request #591 from NRCan/notebook_n_base

LucaRom · web-flow · commit 7d795045604e · 2025-10-01T15:57:30.000-04:00
Add notebooks, tests, and environment setup with improved project metadata for refactor initial release
diff --git a/.github/workflows/github-actions-ci.yml b/.github/workflows/github-actions-ci.yml
@@ -1,22 +1,26 @@
 name: tests
+
 on: [push, pull_request]
+
 jobs:
   run-tests:
     runs-on: ubuntu-22.04
-    defaults:
-      run:
-        shell: bash -el {0}
+
     steps:
-      - name: checkout repository
-        uses: actions/checkout@v4.1.2
+      - uses: actions/checkout@v4
 
-      - name: create environment
-        uses: conda-incubator/setup-miniconda@v3
+      - uses: actions/setup-python@v5
         with:
-          mamba-version: "*"
-          activate-environment: geo_deep_env
-          environment-file: environment.yml
+          python-version: "3.10"
+          cache: "pip"
 
-      - name: test with pytest
+      - name: Install dependencies
         run: |
-          pytest tests/
+          python -m pip install --upgrade pip
+          pip install -r requirements.txt
+
+      - name: Install pytest
+        run: pip install pytest
+
+      - name: Run tests
+        run: pytest tests/
diff --git a/.gitignore b/.gitignore
@@ -1,4 +1,11 @@
 *__pycache__**
 *.idea**
 *.vscode**
-*geo_deep_learning/notebooks*
+
+# Specific folders name
+waterloo_subset_512/
+mlruns/
+.ipynb_checkpoints/
+
+# Specific files
+environment_full_conda_bckp.yml
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -15,3 +15,8 @@ repos:
       - id: check-yaml
       - id: check-json
       - id: check-added-large-files
+
+exclude: |
+  (?x)(
+    ^notebooks/.*\.ipynb$
+  )
diff --git a/data/waterloo_subset_512.zip b/data/waterloo_subset_512.zip
diff --git a/geo_deep_learning/__init__.py b/geo_deep_learning/__init__.py
@@ -0,0 +1,14 @@
+"""
+Geo Deep Learning (GDL).
+
+A geospatial deep learning framework for segmentation and related tasks.
+Provides utilities for dataset preparation, model training, evaluation,
+and deployment with PyTorch Lightning.
+
+Modules include:
+- datasets: data loading and preprocessing for geospatial sources
+- models: deep learning architectures for segmentation
+- datamodules: PyTorch Lightning DataModules for training pipelines
+- tasks_with_models: high-level training tasks coupled with models
+- tools: utilities for data handling and workflow management
+"""
diff --git a/geo_deep_learning/tasks_with_models/segmentation_unetplus.py b/geo_deep_learning/tasks_with_models/segmentation_unetplus.py
@@ -13,12 +13,14 @@
 from kornia.augmentation import AugmentationSequential
 from lightning.pytorch import LightningModule, Trainer
 from lightning.pytorch.cli import LRSchedulerCallable, OptimizerCallable
-from tools.utils import denormalization, load_weights_from_checkpoint
-from tools.visualization import visualize_prediction
 from torch import Tensor
+from torch.optim.lr_scheduler import _LRScheduler
 from torchmetrics.segmentation import MeanIoU
 from torchmetrics.wrappers import ClasswiseWrapper
 
+from geo_deep_learning.tools.utils import denormalization, load_weights_from_checkpoint
+from geo_deep_learning.tools.visualization import visualize_prediction
+
 # Ignore warning about default grid_sample and affine_grid behavior triggered by kornia
 warnings.filterwarnings(
     "ignore",
@@ -140,17 +142,27 @@ def configure_model(self) -> None:
                 map_location=map_location,
             )
 
-    def configure_optimizers(self) -> list[list[dict[str, Any]]]:
-        """Configure optimizers."""
+    def configure_optimizers(self) -> list:
+        """Configure optimizers and schedulers."""
         optimizer = self.optimizer(self.parameters())
-        if (
-            self.hparams["scheduler"]["class_path"]
-            == "torch.optim.lr_scheduler.OneCycleLR"
-        ):
-            max_lr = (
-                self.hparams.get("scheduler", {}).get("init_args", {}).get("max_lr")
-            )
+        scheduler_cfg = self.hparams.get("scheduler", None)
+
+        # Initialize scheduler variable (either an LR scheduler or None)
+        scheduler: _LRScheduler | None = None
+
+        # Handle non-CLI case
+        if not scheduler_cfg or not isinstance(scheduler_cfg, dict):
+            scheduler = self.scheduler(optimizer) if callable(self.scheduler) else None
+            if scheduler:
+                return [optimizer], [{"scheduler": scheduler, **self.scheduler_config}]
+            return [optimizer]
+
+        # CLI-compatible config logic
+        scheduler_class_path = scheduler_cfg.get("class_path", "")
+        if scheduler_class_path == "torch.optim.lr_scheduler.OneCycleLR":
+            max_lr = scheduler_cfg.get("init_args", {}).get("max_lr")
             stepping_batches = self.trainer.estimated_stepping_batches
+
             if stepping_batches > -1:
                 scheduler = torch.optim.lr_scheduler.OneCycleLR(
                     optimizer,
@@ -165,31 +177,31 @@ def configure_optimizers(self) -> list[list[dict[str, Any]]]:
                 epoch_size = self.trainer.datamodule.epoch_size
                 accumulate_grad_batches = self.trainer.accumulate_grad_batches
                 max_epochs = self.trainer.max_epochs
+
                 steps_per_epoch = math.ceil(
                     epoch_size / (batch_size * accumulate_grad_batches),
                 )
                 buffer_steps = int(steps_per_epoch * accumulate_grad_batches)
+
                 scheduler = torch.optim.lr_scheduler.OneCycleLR(
                     optimizer,
                     max_lr=max_lr,
                     steps_per_epoch=steps_per_epoch + buffer_steps,
                     epochs=max_epochs,
                 )
             else:
-                stepping_batches = (
-                    self.hparams.get("scheduler", {})
-                    .get("init_args", {})
-                    .get("total_steps")
-                )
+                total_steps = scheduler_cfg.get("init_args", {}).get("total_steps")
                 scheduler = torch.optim.lr_scheduler.OneCycleLR(
                     optimizer,
                     max_lr=max_lr,
-                    total_steps=stepping_batches,
+                    total_steps=total_steps,
                 )
         else:
             scheduler = self.scheduler(optimizer)
 
-        return [optimizer], [{"scheduler": scheduler, **self.scheduler_config}]
+        return [optimizer], [
+            {"scheduler": scheduler, **self.scheduler_config},
+        ] if scheduler else [optimizer]
 
     def forward(self, image: Tensor) -> Tensor:
         """Forward pass."""
@@ -275,6 +287,7 @@ def test_step(
         y_hat = self(x)
         loss = self.loss(y_hat, y)
         y = y.squeeze(1).long()
+
         if self.num_classes == 1:
             y_hat = (y_hat.sigmoid().squeeze(1) > self.threshold).long()
         else:
diff --git a/notebooks/00_quickstart.ipynb b/notebooks/00_quickstart.ipynb
diff --git a/notebooks/README.md b/notebooks/README.md
@@ -0,0 +1,35 @@
+# Notebooks
+
+This folder contains example notebooks to help you get started with **Geo Deep Learning (GDL)**.
+
+## Available Notebooks
+
+- **`00_quickstart.ipynb`**
+  Minimal end-to-end demo:
+  1. Prepare a small sample dataset
+  2. Train a UNet++ model on CPU
+  3. Run inference & visualize predictions
+
+  This version calls **GDL’s core classes directly** (no config files).
+  It is meant as the simplest entry point to verify everything works.
+
+- **`01_quickstart_config.ipynb`** *(coming soon)*
+  Same workflow as above, but using **LightningCLI** and GDL’s config files.
+  This is the recommended way for reproducible experiments.
+
+## Requirements
+  - GDL repository cloned locally
+  - Environment with proper dependencies (see `requirements.txt` or `pyproject.toml`
+
+## Troubleshooting
+    `ModuleNotFoundError: No module named 'geo_deep_learning'` (or other module)
+
+    In general, this problem occurs when the paths are not properly defined. Make sure
+    to add the repo to your PYTHONPATH.
+
+    Example inside a notebook:
+
+    ```python
+    import sys
+    sys.path.append("..")
+    ```
diff --git a/notebooks/__init__.py b/notebooks/__init__.py
@@ -0,0 +1 @@
+"""Notebooks package for demo and examples."""
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,3 +1,47 @@
+[build-system]
+requires = ["setuptools>=61"]
+build-backend = "setuptools.build_meta"
+
+[project]
+name = "geo-deep-learning"
+version = "0.1.0a0"
+description = "Geospatial deep learning framework for segmentation tasks"
+readme = "README.md"
+authors = [
+  { name = "Victor Alhassan", email = "victor.alhassan@NRCan-RNCan.gc.ca" },
+  { name = "Luca Romanini", email = "luca.romanini@NRCan-RNCan.gc.ca" },
+]
+requires-python = ">=3.10"
+license = { file = "LICENSE" }
+classifiers = [
+    "Development Status :: 3 - Alpha",
+    "Intended Audience :: Science/Research",
+    "License :: OSI Approved :: MIT License",
+    "Programming Language :: Python :: 3.10",
+    "Topic :: Scientific/Engineering :: Artificial Intelligence",
+    "Topic :: Scientific/Engineering :: GIS",
+]
+keywords = ["pytorch", "deep learning", "machine learning", "remote sensing", "satellite imagery", "earth observation", "geospatial"]
+
+# Dependencies are pulled from requirements.txt
+dynamic = ["dependencies"]
+
+[tool.setuptools.dynamic]
+dependencies = { file = ["requirements.txt"] }
+
+[project.optional-dependencies]
+dev = ["pytest", "ruff", "pre-commit"]
+
+[project.urls]
+Homepage = "https://github.com/NRCan/geo-deep-learning"
+Repository = "https://github.com/NRCan/geo-deep-learning"
+Issues = "https://github.com/NRCan/geo-deep-learning/issues"
+
+
+# --------------------------
+# Ruff configuration
+# --------------------------
+
 [tool.ruff]
 exclude = [
     ".bzr", ".direnv", ".eggs", ".git", ".git-rewrite", ".hg",
@@ -6,6 +50,7 @@ exclude = [
     ".vscode", "__pypackages__", "_build", "buck-out", "build", "dist",
     "node_modules", "site-packages", "venv"
 ]
+src = ["geo_deep_learning"]
 line-length = 88
 indent-width = 4
 target-version = "py310"
@@ -18,11 +63,9 @@ ignore = [
     "ANN101", "ANN102",  # allow skipping `self`, `cls` annotations
     "EXE002", # ignore missing executable bit on scripts with shebangs
     "ERA001", # ignore commented out code
+    "TC002", # allow third-party imports in type annotations without TYPE_CHECKING
 ]
 
-# You can limit to specific rule groups instead of ALL:
-# select = ["E", "F", "W", "I", "N", "UP", "B", "C4", "SIM", "D", "PT"]
-
 [tool.ruff.lint]
 fixable = ["ALL"]
 unfixable = []
@@ -34,3 +77,17 @@ quote-style = "double"
 indent-style = "space"
 skip-magic-trailing-comma = false
 line-ending = "auto"
+
+[tool.ruff.lint.isort]
+# Treat both the package and legacy alias names as first-party
+known-first-party = [
+  "geo_deep_learning",
+  "tools",
+  "models",
+  "datasets",
+  "datamodules",
+  "tasks_with_models",
+]
+
+[tool.ruff.lint.per-file-ignores]
+"tests/*" = ["S101"]
diff --git a/requirements.txt b/requirements.txt
@@ -0,0 +1,18 @@
+ipykernel==6.29.5
+ipywidgets==8.1.5
+kornia==0.8.1
+matplotlib==3.10.5
+mlflow==2.22.0
+notebook==7.4.5
+numpy==1.26.4
+pandas==2.3.2
+psutil==7.0.0
+pytorch-lightning==2.5.0.post0
+pyyaml==6.0.2
+rasterio==1.4.3
+segmentation-models-pytorch==0.5.0
+timm==1.0.19
+torchgeo==0.5.2
+torchmetrics==1.6.0
+torchvision==0.19.1
+whitebox==2.3.6
diff --git a/tests/__init__.py b/tests/__init__.py
@@ -0,0 +1 @@
+"""Unit test package for geo-deep-learning."""
diff --git a/tests/test_notebooks_00quickstart.py b/tests/test_notebooks_00quickstart.py

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1 @@`
	`1`	`+"""Notebooks package for demo and examples."""`