1818 compute_twi_whitebox ,
1919 generate_csv_from_tiles ,
2020 rasterize_labels_binary_aoi_mask ,
21+ rasterize_valid_lidar_mask ,
2122 stack_rasters ,
2223 tile_raster_pair ,
2324)
@@ -56,6 +57,8 @@ def __init__( # noqa: PLR0913
5657 include_intensity : bool = False ,
5758 stride : int = 256 ,
5859 test_ratio : float = 0.2 ,
60+ valid_mask_min_ratio : float | None = 0.9 ,
61+ save_rejected_tiles : bool = False ,
5962 ) -> None :
6063 """
6164 Initialize ElevationStackDataModule.
@@ -77,6 +80,11 @@ def __init__( # noqa: PLR0913
7780 Defaults to False.
7881 stride (int): Stride for tiling. Defaults to 256.
7982 test_ratio (float): Ratio for test split. Defaults to 0.2.
83+ valid_mask_min_ratio (float | None): Minimum ratio of valid LiDAR
84+ pixels required per tile (uses valid_mask.tif when present).
85+ Set to None to disable. Defaults to 0.9.
86+ save_rejected_tiles (bool): If True, save tiles filtered out during
87+ tiling for debugging. Defaults to False.
8088
8189 """
8290 super ().__init__ (
@@ -98,6 +106,8 @@ def __init__( # noqa: PLR0913
98106 self .intensity = include_intensity
99107 self .stride = stride
100108 self .test_ratio = test_ratio
109+ self .valid_mask_min_ratio = valid_mask_min_ratio
110+ self .save_rejected_tiles = save_rejected_tiles
101111
102112 def setup (self , stage : str | None = None ) -> None : # noqa: ARG002
103113 """
@@ -165,6 +175,7 @@ def prepare_data(self) -> None:
165175 csv_inference_path = self .csv_infer_path ,
166176 test_ratio = self .test_ratio ,
167177 remove_empty_labels = True ,
178+ valid_mask_min_ratio = self .valid_mask_min_ratio ,
168179 )
169180
170181 # Compute and save statistics
@@ -252,6 +263,7 @@ def _process_aoi(self, aoi_path: str) -> None:
252263 dsm = Path (aoi_path ) / "dsm.tif"
253264 intensity = Path (aoi_path ) / "intensity.tif"
254265 labels_vector = Path (aoi_path ) / "waterbodies.shp"
266+ valid_mask_vector = Path (aoi_path ) / "valid_lidar_mask.gpkg"
255267
256268 # Step 1: Align inputs to DTM
257269 log .info ("Aligning inputs to DTM" )
@@ -299,6 +311,19 @@ def _process_aoi(self, aoi_path: str) -> None:
299311 log .info ("Stacking %d bands: %s" , len (stack_inputs ), stack_inputs )
300312 stack_rasters (stack_inputs , str (stack_path ))
301313
314+ # Optional: rasterize valid LiDAR mask
315+ valid_mask_raster = out_dir / "valid_mask.tif"
316+ if valid_mask_vector .exists ():
317+ if not valid_mask_raster .exists ():
318+ log .info ("Rasterizing valid LiDAR mask: %s" , valid_mask_vector )
319+ rasterize_valid_lidar_mask (
320+ str (valid_mask_vector ),
321+ str (dtm ),
322+ str (valid_mask_raster ),
323+ )
324+ else :
325+ log .info ("Skipping valid mask rasterization (already exists)" )
326+
302327 # Step 4: Rasterize labels
303328 label_raster = out_dir / "labels_aligned.tif"
304329 rasterize_labels_binary_aoi_mask (
@@ -319,6 +344,9 @@ def _process_aoi(self, aoi_path: str) -> None:
319344 output_dir = str (out_dir / "tiles" ),
320345 patch_size = self .patch_size [0 ],
321346 stride = self .stride ,
347+ valid_mask_path = str (valid_mask_raster ) if valid_mask_raster .exists () else None ,
348+ valid_mask_min_ratio = self .valid_mask_min_ratio ,
349+ save_rejected_tiles = self .save_rejected_tiles ,
322350 )
323351
324352 def train_dataloader (self ) -> DataLoader [Any ]:
0 commit comments