JoshuaBillson
diff --git a/‎.gitignore‎
Lines changed: 2 additions & 0 deletions b/‎.gitignore‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎backend/callbacks.py‎
Lines changed: 1 addition & 1 deletion b/‎backend/callbacks.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎backend/config.py‎
Lines changed: 17 additions & 8 deletions b/‎backend/config.py‎
Lines changed: 17 additions & 8 deletions
diff --git a/‎backend/data_loader.py‎
Lines changed: 14 additions & 3 deletions b/‎backend/data_loader.py‎
Lines changed: 14 additions & 3 deletions
diff --git a/‎backend/metrics.py‎
Lines changed: 1 addition & 1 deletion b/‎backend/metrics.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎backend/pipeline.py‎
Lines changed: 75 additions & 51 deletions b/‎backend/pipeline.py‎
Lines changed: 75 additions & 51 deletions
@@ -11,4 +11,6 @@ validation/
 test/
 old_data/
 experiments
+remove_experiment.py
+resample.py
 
@@ -43,7 +43,7 @@ def get_callbacks(config: Dict[str, Any], val_data: ImgSequence, model: Model) -
     checkpoint = ModelCheckpoint(f"checkpoints/{model.name}/{model.name}", save_best_only=False, monitor='val_loss', mode='min', save_weights_only=True)
     prediction_logger = PredictionCallback(val_data, model)
     learning_rate_scheduler = LearningRateScheduler(lr_scheduler)
-    early_stopping = EarlyStopping(monitor="val_loss", min_delta=0.0001, patience=15, verbose=1, mode="min")
+    early_stopping = EarlyStopping(monitor="val_loss", min_delta=0.0001, patience=10, verbose=1, mode="min")
     return [tensorboard, csv, checkpoint, prediction_logger, learning_rate_scheduler, early_stopping] if get_create_logs(config) else [learning_rate_scheduler, early_stopping]
 
 
 
@@ -154,19 +154,28 @@ def get_num_experiments(config: Dict[str, Any]) -> int:
     return config["experiments"]
 
 
-def get_water_threshold(config: Dict[str, Any]) -> float:
+def get_random_subsample(config: Dict[str, Any]) -> bool:
     """
-    Get the water threshold that patches must meet to avoid being discarded
+    Get the setting for whether or not the data pipeline should sub-sample 512x512 patches
     :param config: A dictionary storing the project configuration; typically loaded from an external file
-    :returns: The water threshold as a percentage that must be met by a patch to avoid being discarded
+    :returns: Whether or not to randomly sub-sample patches
     """
-    return config["hyperparameters"]["water_threshold"]
+    return config["hyperparameters"]["random_subsample"]
 
 
-def get_random_subsample(config: Dict[str, Any]) -> bool:
+def get_water_threshold(config: Dict[str, Any]) -> int:
     """
-    Get the setting for whether or not the data pipeline should sub-sample 512x512 patches
+    Get the water threshold for waterbody transfer
     :param config: A dictionary storing the project configuration; typically loaded from an external file
-    :returns: Whether or not to randomly sub-sample patches
+    :returns: The content threshold (percent) to be applied to waterbody transfer
     """
-    return config["hyperparameters"]["random_subsample"]
+    return config["hyperparameters"]["water_threshold"]
+    
+
+def get_mixed_precision(config: Dict[str, Any]) -> bool:
+    """
+    Return True if we want to use mixed precision to speed up trainig/inference at the cost of accuracy
+    :param config: A dictionary storing the project configuration; typically loaded from an external file
+    :returns: A boolean indicating whether or not we want to use mixed precision
+    """
+    return config["use_mixed_precision"]
@@ -7,11 +7,12 @@
 
 class DataLoader:
     """A class to save and load images from disk"""
-    def __init__(self, timestamp: int = 1, overlapping_patches: bool = False, random_subsample: bool = False):
+    def __init__(self, timestamp: int = 1, overlapping_patches: bool = False, random_subsample: bool = False, upscale_swir: bool = True):
         self.timestamp = timestamp
         self.folders = {1: "2018.04", 2: "2018.12", 3: "2019.02"}
         self.overlapping_patches = overlapping_patches
         self.random_subsample = random_subsample
+        self.upscale_swir = upscale_swir
 
     def get_rgb_features(self, tile_number: int, coords: Tuple[int, int] = (0, 0), preprocess_img: bool = True, tile_dir: str = "tiles") -> np.ndarray:
         """
@@ -38,8 +39,10 @@ def get_swir_features(self, tile_number: int, coords: Tuple[int, int] = None, pr
         :return: The SWIR features of the matching patch,
         """
         tile = self.read_image(f"data/{self.folders.get(self.timestamp, 1)}/{tile_dir}/swir/swir.{tile_number}.tif", preprocess_img=preprocess_img)
-        tile = np.resize(cv2.resize(tile, (1024, 1024), interpolation = cv2.INTER_AREA), (1024, 1024, 1))
-        return self.subsample_tile(tile, coords=coords) if coords is not None else tile
+        tile = np.resize(cv2.resize(tile, (1024, 1024), interpolation = cv2.INTER_AREA), (1024, 1024, 1)) if self.upscale_swir else tile
+        if coords is not None:
+            return self.subsample_tile(tile, coords=coords) if self.upscale_swir else self.subsample_swir_tile(tile, coords=coords)
+        return tile
 
     def get_mask(self, tile_number: int, coords: Tuple[int, int] = None, preprocess_img: bool = True, tile_dir: str = "tiles") -> np.ndarray:
         """
@@ -80,6 +83,14 @@ def subsample_tile(self, tile: np.ndarray, coords: Tuple[int, int] = (0, 0)) ->
         Take a 512X512 sub-patch from a 1024X12024 tile.
         """
         return tile[coords[1]:coords[1]+512, coords[0]:coords[0]+512, :]
+
+    def subsample_swir_tile(self, tile: np.ndarray, coords: Tuple[int, int] = (0, 0)) -> np.ndarray:
+        """
+        Take a 512X512 sub-patch from a 1024X12024 tile.
+        """
+        y_coord = coords[1] // 2
+        x_coord = coords[0] // 2
+        return tile[y_coord:y_coord+256, x_coord:x_coord+256, :]
 
     def get_patch_coords(self, patch_index: int = 0) -> Tuple[int, int]:
         """Get the coordinates for a patch inside a tile from a given patch_index. If random_subsample is True, the coords will be selected randomly."""
 
@@ -11,7 +11,7 @@ def MIOU():
     m = tf.keras.metrics.MeanIoU(num_classes=2)
     def MIoU(y_true, y_pred):
         m.reset_states()
-        y_true, y_pred = flatten(y_true), flatten(tf.where(y_pred >= 0.5, 1.1, 0.0))
+        y_true, y_pred = y_true, tf.where(y_pred >= 0.5, 1.1, 0.0)
         _ = m.update_state(y_true, y_pred)
         return m.result()
     return MIoU
 
@@ -1,6 +1,7 @@
 import gc
 import os
 import math
+import statistics
 import json
 import random
 import shutil
@@ -14,15 +15,15 @@
 from tensorflow.keras.backend import clear_session
 from backend.utils import adjust_rgb
 from backend.metrics import MIOU
-from backend.config import get_timestamp, get_waterbody_transfer, get_random_subsample
+from backend.config import get_timestamp, get_waterbody_transfer, get_random_subsample, get_fusion_head, get_water_threshold
 from models.utils import evaluate_model
 from backend.data_loader import DataLoader
 
 
 class ImgSequence(KerasSequence):
-    def __init__(self, timestamp: int, tiles: List[int], batch_size: int = 32, bands: Sequence[str] = None, is_train: bool = False, random_subsample: bool = False):
+    def __init__(self, timestamp: int, tiles: List[int], batch_size: int = 32, bands: Sequence[str] = None, is_train: bool = False, random_subsample: bool = False, upscale_swir: bool = True):
         # Initialize Member Variables
-        self.data_loader = DataLoader(timestamp, overlapping_patches=is_train, random_subsample=(random_subsample and is_train))
+        self.data_loader = DataLoader(timestamp, overlapping_patches=is_train, random_subsample=(random_subsample and is_train), upscale_swir=upscale_swir)
         self.batch_size = batch_size
         self.bands = ["RGB"] if bands is None else bands
         self.indices = []
@@ -39,7 +40,7 @@ def __init__(self, timestamp: int, tiles: List[int], batch_size: int = 32, bands
     def __len__(self) -> int:
         return math.ceil(len(self.indices) / self.batch_size)
 
-    def __getitem__(self, idx):
+    def __getitem__(self, idx, normalize_data=True):
         # Create Batch
         feature_batches = {"RGB": [], "NIR": [], "SWIR": [], "mask": []}
         batch = self.indices[idx*self.batch_size:(idx+1)*self.batch_size]
@@ -54,7 +55,10 @@ def __getitem__(self, idx):
 
             # Add Features To Batch
             for key, val in features.items():
-                feature_batches[key].append(DataLoader.normalize_channels(val.astype("float32")) if key != "mask" else val)
+                if normalize_data:
+                    feature_batches[key].append(DataLoader.normalize_channels(val.astype("float32")) if key != "mask" else val)
+                else:
+                    feature_batches[key].append(val)
 
         # Return Batch
         return [np.array(feature_batches[band]).astype("float32") for band in ("RGB", "NIR", "SWIR") if len(feature_batches[band]) > 0], np.array(feature_batches["mask"]).astype("float32")
@@ -90,52 +94,63 @@ def predict_batch(self, model: Model, directory: str):
         os.mkdir(model_directory)
 
         # Iterate Over All Patches In Batch
-        MIoUs, MIoU = [], MIOU()
-        for patch_index in self.indices:
+        MIoUs, MIoU, i = [], MIOU(), 0
+        for batch in range(len(self)):
+
+            # Get Batch
+            features, masks = self.__getitem__(batch, normalize_data=False)
+            normalized_features, _ = self.__getitem__(batch)
+            rgb_features = features[0] if "RGB" in self.bands else None
+            nir_features = features[1 if "RGB" in self.bands else 0] if "NIR" in self.bands else None
+            swir_features = features[2] if "SWIR" in self.bands else None
 
-            # Load Features And Mask
-            features = self._get_features(patch_index)
-            mask = features["mask"]
-        
             # Get Prediction
-            prediction = model.predict([np.array([DataLoader.normalize_channels(features[band].astype("float32"))]) for band in self.bands])
-            MIoUs.append([patch_index, MIoU(mask.astype("float32"), prediction).numpy()])
-
-            # Plot Features
-            i = 0
-            _, axs = plt.subplots(1, len(self.bands) + 2)
-            for band in self.bands:
-                axs[i].imshow(adjust_rgb(features[band], gamma=0.5) if band == "RGB" else features[band])
-                axs[i].set_title(band, fontsize=6)
-                axs[i].axis("off")
+            predictions = model.predict(normalized_features)
+
+            # Iterate Over Each Prediction In The Batch
+            for p in range(predictions.shape[0]):
+
+                mask = masks[p, ...]
+                prediction = predictions[p, ...]
+                MIoUs.append([self.indices[i], MIoU(mask, prediction).numpy()])
+
+                # Plot Features
+                col = 0
+                _, axs = plt.subplots(1, len(self.bands) + 2)
+                for band, feature in zip(["RGB", "NIR", "SWIR"], [rgb_features, nir_features, swir_features]):
+                    if feature is not None:
+                        axs[col].imshow(adjust_rgb(feature[p, ...], gamma=0.5) if feature.shape[-1] == 3 else feature[p, ...])
+                        axs[col].set_title(band, fontsize=6)
+                        axs[col].axis("off")
+                        col += 1
+                
+                # Plot Ground Truth
+                axs[col].imshow(mask)
+                axs[col].set_title("Ground Truth", fontsize=6)
+                axs[col].axis("off")
+                col += 1
+
+                # Plot Prediction
+                axs[col].imshow(np.where(prediction < 0.5, 0, 1))
+                axs[col].set_title(f"Prediction ({MIoUs[-1][1]:.3f})", fontsize=6)
+                axs[col].axis("off")
+                col += 1
+
+                # Save Prediction To Disk
+                plt.tight_layout()
+                plt.savefig(f"{model_directory}/prediction.{self.indices[i]}.png", dpi=300, bbox_inches='tight')
+                plt.cla()
+                plt.close()
+
+                # Housekeeping
+                gc.collect()
+                clear_session()
                 i += 1
-            
-            # Plot Ground Truth
-            axs[i].imshow(mask)
-            axs[i].set_title("Ground Truth", fontsize=6)
-            axs[i].axis("off")
-            i += 1
-
-            # Plot Prediction
-            axs[i].imshow(np.where(prediction < 0.5, 0, 1)[0])
-            axs[i].set_title(f"Prediction ({MIoUs[-1][1]:.3f})", fontsize=6)
-            axs[i].axis("off")
-            i += 1
-
-            # Save Prediction To Disk
-            plt.tight_layout()
-            plt.savefig(f"{model_directory}/prediction.{patch_index}.png", dpi=300, bbox_inches='tight')
-            plt.cla()
-            plt.close()
-
-            # Housekeeping
-            gc.collect()
-            clear_session()
 
         # Save MIoU For Each Patch
-        summary = np.array(MIoUs)
-        df = pandas.DataFrame(summary[:, 1:], columns=["MIoU"], index=summary[:, 0].astype("int32"))
-        df.to_csv(f"{model_directory}/Evaluation.csv", index_label="patch")
+        # summary = np.array(MIoUs)
+        # df = pandas.DataFrame(summary[:, 1:], columns=["MIoU"], index=summary[:, 0].astype("int32"))
+        # df.to_csv(f"{model_directory}/Evaluation.csv", index_label="patch")
 
         # Evaluate Final Performance
         results = evaluate_model(model, self)
@@ -171,10 +186,14 @@ def _get_features(self, patch: int, subsample: bool = True) -> Dict[str, np.ndar
 
 
 class WaterbodyTransferImgSequence(ImgSequence):
+    def __init__(self, timestamp: int, tiles: List[int], batch_size: int = 32, bands: Sequence[str] = None, is_train: bool = False, random_subsample: bool = False, upscale_swir: bool = True, water_threshold: int = 5):
+        super().__init__(timestamp, tiles, batch_size, bands, is_train, random_subsample, upscale_swir)
+        self.water_threshold = water_threshold
+
     """A data pipeline that returns tiles with transplanted waterbodies"""
     def _get_features(self, patch: int, subsample: bool = True) -> Dict[str, np.ndarray]:
         tile_index = patch // 100
-        return self.data_loader.get_features(patch, self.bands, tile_dir="tiles" if tile_index <= 400 else "transplanted_tiles")
+        return self.data_loader.get_features(patch, self.bands, tile_dir="tiles" if tile_index <= 400 else f"transplanted_tiles_{self.water_threshold}")
 
 
 def load_dataset(config) -> Tuple[ImgSequence, ImgSequence, ImgSequence]:
@@ -188,15 +207,20 @@ def load_dataset(config) -> Tuple[ImgSequence, ImgSequence, ImgSequence]:
     batch_size = config["hyperparameters"]["batch_size"]
 
     # Read Batches From JSON File
-    batch_filename = "batches/transplanted.json" if get_waterbody_transfer(config) else "batches/tiles.json"
+    water_threshold = get_water_threshold(config)
+    batch_filename = f"batches/transplanted_tiles_{water_threshold}.json" if get_waterbody_transfer(config) else "batches/tiles.json"
     with open(batch_filename) as f:
         batch_file = json.loads(f.read())
 
     # Choose Type Of Data Pipeline Based On Project Config
     Constructor = WaterbodyTransferImgSequence if get_waterbody_transfer(config) else ImgSequence
 
     # Create Train, Validation, And Test Data
-    train_data = Constructor(get_timestamp(config), batch_file["train"], batch_size=batch_size, bands=bands, is_train=True, random_subsample=get_random_subsample(config))
-    val_data = ImgSequence(get_timestamp(config), batch_file["validation"], batch_size=batch_size, bands=bands, is_train=False)
-    test_data = ImgSequence(get_timestamp(config), batch_file["test"], batch_size=batch_size, bands=bands, is_train=False)
+    upscale_swir = get_fusion_head(config) != "paper"
+    if get_waterbody_transfer(config):
+        train_data = WaterbodyTransferImgSequence(get_timestamp(config), batch_file["train"], batch_size=batch_size, bands=bands, is_train=True, random_subsample=get_random_subsample(config), upscale_swir=upscale_swir, water_threshold=water_threshold)
+    else:
+        train_data = ImgSequence(get_timestamp(config), batch_file["train"], batch_size=batch_size, bands=bands, is_train=True, random_subsample=get_random_subsample(config), upscale_swir=upscale_swir)
+    val_data = ImgSequence(get_timestamp(config), batch_file["validation"], batch_size=12, bands=bands, is_train=False, upscale_swir=upscale_swir)
+    test_data = ImgSequence(get_timestamp(config), batch_file["test"], batch_size=12, bands=bands, is_train=False, upscale_swir=upscale_swir)
     return train_data, val_data, test_data