Skip to content

Commit c2abc90

Browse files
committed
mosaicing, upsampling, other raking
1 parent 115f499 commit c2abc90

File tree

18 files changed

+517
-392
lines changed

18 files changed

+517
-392
lines changed

src/rra_population_model/cli_options.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -126,7 +126,7 @@ def with_block_key() -> ClickOption[_P, _T]:
126126
"-b",
127127
type=click.STRING,
128128
required=True,
129-
help="Block key of block to run.",
129+
help="Block key to run.",
130130
)
131131

132132

src/rra_population_model/data.py

Lines changed: 35 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -333,7 +333,7 @@ def itu_mask_path(self, iso3: str) -> Path:
333333
return self.itu_masks / f"{iso3}.tif"
334334

335335
def list_itu_iso3s(self) -> list[str]:
336-
return [f.stem for f in self.itu_masks.glob("*")]
336+
return [f.stem for f in self.itu_masks.glob("*.tif")]
337337

338338
def load_itu_mask(self, iso3: str) -> rt.RasterArray:
339339
path = self.itu_mask_path(iso3)
@@ -532,20 +532,15 @@ def save_tile_training_data(
532532
touch(raster_path, clobber=True)
533533
save_raster(raster, raster_path)
534534

535-
def save_summary_training_data(
535+
def save_summary_people_per_structure(
536536
self,
537+
data: pd.DataFrame,
537538
resolution: str,
538-
people_per_structure: gpd.GeoDataFrame,
539-
pixel_area_weights: gpd.GeoDataFrame,
540539
) -> None:
541540
root = self.training_data_root(resolution)
542-
pps_path = root / "people_per_structure.parquet"
543-
touch(pps_path, clobber=True)
544-
people_per_structure.to_parquet(pps_path)
545-
546-
paw_path = root / "pixel_area_weights.parquet"
547-
touch(paw_path, clobber=True)
548-
pixel_area_weights.to_parquet(paw_path)
541+
path = root / f"people_per_structure.parquet"
542+
touch(path, clobber=True)
543+
data.to_parquet(path)
549544

550545
def load_people_per_structure(
551546
self, resolution: str, tile_key: str | None = None
@@ -725,6 +720,15 @@ def raked_prediction_path(
725720
/ f"{block_key}.tif"
726721
)
727722

723+
def list_raked_prediction_time_points(
724+
self, resolution: str, version: str
725+
) -> list[str]:
726+
return [
727+
p.name
728+
for p in self.raked_predictions_root(resolution, version).iterdir()
729+
if p.is_dir()
730+
]
731+
728732
def save_raked_prediction(
729733
self,
730734
raster: rt.RasterArray,
@@ -759,6 +763,26 @@ def compiled_prediction_path(
759763
/ f"{group_key}.tif"
760764
)
761765

766+
def compiled_prediction_vrt_path(
767+
self, time_point: str, model_spec: "ModelSpecification"
768+
) -> Path:
769+
resolution = model_spec.resolution
770+
version = model_spec.model_version
771+
return (
772+
self.compiled_predictions_root(resolution, version)
773+
/ time_point
774+
/ "index.vrt"
775+
)
776+
777+
def list_compiled_prediction_time_points(
778+
self, resolution: str, version: str
779+
) -> list[str]:
780+
return [
781+
p.name
782+
for p in self.compiled_predictions_root(resolution, version).iterdir()
783+
if p.is_dir()
784+
]
785+
762786
def save_compiled_prediction(
763787
self,
764788
raster: rt.RasterArray,

src/rra_population_model/model/inference/runner.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -66,19 +66,25 @@ def inference_main(
6666

6767
modeling_frame = pm_data.load_modeling_frame(model_spec.resolution)
6868
block_keys = modeling_frame.block_key.unique().tolist()
69+
block_keys = [
70+
block_key
71+
for block_key in block_keys
72+
if not pm_data.raw_prediction_path(block_key, time_point, model_spec).exists()
73+
]
6974

7075
datamodule = InferenceDataModule(
7176
model_spec.model_dump(),
7277
block_keys,
7378
time_point,
74-
num_workers=0,
79+
num_workers=4,
7580
)
7681
pred_writer = CustomWriter(
7782
pm_data, model.specification, time_point, write_interval="batch"
7883
)
7984
trainer = Trainer(
8085
callbacks=[pred_writer],
8186
enable_progress_bar=progress_bar,
87+
devices=2,
8288
)
8389
trainer.predict(model, datamodule, return_predictions=False)
8490

src/rra_population_model/model_prep/training_data/metadata.py

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,17 @@ def get_training_metadata(
5454
# for at least one tile it causes a GEOS exception.
5555
# This should be investigated, but just going with something
5656
# that appears to work for now.
57-
full_shape = intersecting_admins.union_all()
57+
try:
58+
full_shape = intersecting_admins.union_all()
59+
except shapely.errors.GEOSException:
60+
# Last ditch effort, actually move around the boundaries a little
61+
buffer_size = 0.01 # 1 cm
62+
full_shape = (
63+
intersecting_admins
64+
.buffer(buffer_size)
65+
.buffer(-buffer_size)
66+
.union_all()
67+
)
5868

5969
overlaps = model_frame.intersects(full_shape)
6070
neighborhood_keys = model_frame[overlaps].tile_key.tolist()

src/rra_population_model/model_prep/training_data/runner.py

Lines changed: 3 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -158,19 +158,13 @@ def training_data(
158158
"project": "proj_rapidresponse",
159159
},
160160
max_attempts=5,
161+
log_root=pm_data.log_dir("model_prep_training_data"),
161162
)
162163

163164
if status != "D":
164165
msg = f"Workflow failed with status {status}."
165166
raise RuntimeError(msg)
166167

167168
print("Building summary datasets.")
168-
people_per_structure, pixel_area_weight = utils.build_summary_data(
169-
pm_data, resolution
170-
)
171-
172-
pm_data.save_summary_training_data(
173-
resolution,
174-
people_per_structure,
175-
pixel_area_weight,
176-
)
169+
people_per_structure = utils.build_summary_people_per_structure(resolution, pm_data)
170+
pm_data.save_summary_people_per_structure(people_per_structure, resolution)

src/rra_population_model/model_prep/training_data/utils.py

Lines changed: 7 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -85,22 +85,17 @@ def build_arg_list(
8585
return to_run
8686

8787

88-
def build_summary_data(
88+
def build_summary_people_per_structure(
8989
pm_data: PopulationModelData,
9090
resolution: str,
91-
) -> tuple[gpd.GeoDataFrame, gpd.GeoDataFrame]:
91+
) -> pd.DataFrame:
9292
tile_dirs = list(pm_data.tile_training_data_root(resolution).iterdir())
93-
model_gdfs = []
94-
for tile_dir in tqdm.tqdm(tile_dirs):
95-
tile_pps = pm_data.load_people_per_structure(resolution, tile_dir.name)
96-
tile_paw = pm_data.load_pixel_area_weights(resolution, tile_dir.name)
97-
model_gdfs.append((tile_pps, tile_paw))
93+
data = pd.concat([
94+
pm_data.load_people_per_structure(resolution, tile_dir.name)
95+
for tile_dir in tqdm.tqdm(tile_dirs)
96+
], ignore_index=True)
97+
return data
9898

99-
pps, paw = zip(*model_gdfs, strict=False)
100-
101-
people_per_structure_gdf = pd.concat(pps, ignore_index=True)
102-
pixel_area_weight_gdf = pd.concat(paw, ignore_index=True)
103-
return people_per_structure_gdf, pixel_area_weight_gdf
10499

105100

106101
def safe_divide(
Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
1-
from rra_population_model.postprocess.compile_results import (
2-
compile_results,
3-
compile_results_task,
4-
upsample_task,
1+
from rra_population_model.postprocess.mosaic.runner import (
2+
mosaic,
3+
mosaic_task,
54
)
65
from rra_population_model.postprocess.rake.runner import (
76
rake,
@@ -15,18 +14,23 @@
1514
raking_factors,
1615
raking_factors_task,
1716
)
17+
from rra_population_model.postprocess.upsample.runner import (
18+
upsample,
19+
upsample_task,
20+
)
1821

1922
RUNNERS = {
2023
"raking_factors": raking_factors,
2124
"rake": rake,
2225
"rake_itu": rake_itu,
23-
"compile": compile_results,
26+
"mosaic": mosaic,
27+
"upsample": upsample,
2428
}
2529

2630
TASK_RUNNERS = {
2731
"raking_factors": raking_factors_task,
2832
"rake": rake_task,
2933
"rake_itu": rake_itu_task,
30-
"compile": compile_results_task,
34+
"mosaic": mosaic_task,
3135
"upsample": upsample_task,
3236
}

0 commit comments

Comments
 (0)