Skip to content

Commit 4c32f0a

Browse files
authored
Merge pull request #6 from ihmeuw/feature/rmbarber/v7-height-update
Feature/rmbarber/v7 height update
2 parents 0b594eb + 59061a4 commit 4c32f0a

File tree

11 files changed

+73
-33
lines changed

11 files changed

+73
-33
lines changed

.gitignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,9 @@ MANIFEST
2929
pip-log.txt
3030
pip-delete-this-directory.txt
3131

32+
# Lightning model logs
33+
*lightning_logs/
34+
3235
# Unit test / coverage reports
3336
htmlcov/
3437
.tox/

specifications/model_specification.yaml

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,13 @@
11
resolution: "100"
22
split: 0
3-
denominator: "ghsl_residential_volume"
3+
denominator: "microsoft_v7_residential_volume"
44
features:
5-
- "log_nighttime_lights"
6-
- "ghsl_residential_volume_250m"
7-
- "ghsl_residential_volume_500m"
8-
- "ghsl_residential_volume_1000m"
9-
- "ghsl_residential_volume_2500m"
10-
- "ghsl_residential_volume_5000m"
11-
- "ghsl_residential_volume_10000m"
5+
# - "microsoft_v7_residential_volume_250m"
6+
# - "microsoft_v7_residential_volume_500m"
7+
# - "microsoft_v7_residential_volume_1000m"
8+
# - "microsoft_v7_residential_volume_2500m"
9+
# - "microsoft_v7_residential_volume_5000m"
10+
# - "microsoft_v7_residential_volume_10000m"
1211
training_target: "admin_log_occupancy_rate"
1312
loss_target: "admin_log_occupancy_rate"
1413
loss_metric: "mse"

src/rra_population_model/constants.py

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ def to_list(cls) -> list[str]:
2727

2828
class BuiltVersion(BaseModel):
2929
provider: Literal["ghsl", "microsoft"]
30-
version: Literal["v6", "r2023a"]
30+
version: Literal["v6", "v7", "r2023a"]
3131
time_points: list[str]
3232
measures: list[str]
3333

@@ -69,6 +69,20 @@ def time_points_float(self) -> list[float]:
6969
][1:],
7070
measures=["density"],
7171
),
72+
"microsoft_v7": BuiltVersion(
73+
provider="microsoft",
74+
version="v7",
75+
# time_points=[
76+
# f"{y}q{q}" for y, q in itertools.product(range(2020, 2025), range(1, 5))
77+
# ][1:-2],
78+
time_points=[
79+
f"{y}q{q}" for y, q in itertools.product(range(2020, 2024), range(1, 5))
80+
][1:],
81+
measures=[
82+
"density",
83+
"height",
84+
],
85+
),
7286
}
7387

7488
DENOMINATORS = []
@@ -93,7 +107,7 @@ def time_points_float(self) -> list[float]:
93107

94108
ALL_TIME_POINTS = sorted(
95109
set.union(*[set(v.time_points) for v in BUILT_VERSIONS.values()])
96-
| {f"{y}q1" for y in range(1975, 2025)}
110+
| {f"{y}q1" for y in range(1975, 2026)}
97111
)
98112

99113

src/rra_population_model/data.py

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -153,22 +153,24 @@ def get_shapefile_dir(
153153
iso3: str,
154154
year: str | int,
155155
state: str | None = None,
156+
purpose: str = 'training',
156157
) -> Path:
157-
shapefile_dir = self.shapefiles / iso3 / str(year)
158+
shapefile_dir = self.shapefiles / purpose / iso3 / str(year)
158159
if state:
159160
return shapefile_dir / state
160161
return shapefile_dir
161162

162-
def list_shapefile_years(self) -> list[tuple[str, str]]:
163+
def list_shapefile_years(self, purpose: str = 'training') -> list[tuple[str, str]]:
163164
"""List all available shapefile years by country."""
164-
return self._list_years(self.shapefiles)
165+
return self._list_years(self.shapefiles / purpose)
165166

166167
def load_shapefile(
167168
self,
168169
admin_level: int,
169170
iso3: str,
170171
year: str | int,
171172
state: str | None = None,
173+
purpose: str = 'training',
172174
) -> gpd.GeoDataFrame:
173175
"""Load administrative boundary data from a shapefile.
174176
@@ -182,20 +184,22 @@ def load_shapefile(
182184
The year represented by the shapefile boundaries.
183185
state
184186
State or province name. Optional.
187+
purpose
188+
Shapefile purpose - training or raking.
185189
186190
Returns
187191
-------
188192
gpd.GeoDataFrame
189193
Administrative boundary data.
190194
"""
191-
shape_root = self.get_shapefile_dir(iso3, year, state)
195+
shape_root = self.get_shapefile_dir(iso3, year, state, purpose=purpose)
192196
path = shape_root / f"admin{admin_level}.parquet"
193197
gdf = gpd.read_parquet(path)
194198
return gdf
195199

196-
def list_admin_levels(self, iso3: str, year: str | int) -> list[int]:
200+
def list_admin_levels(self, iso3: str, year: str | int, purpose: str = 'training') -> list[int]:
197201
"""List all available administrative levels for a country and year."""
198-
shapefile_dir = self.get_shapefile_dir(iso3, year)
202+
shapefile_dir = self.get_shapefile_dir(iso3, year, purpose=purpose)
199203
admin_levels = [
200204
admin_level
201205
for admin_level in range(10)

src/rra_population_model/model/inference/runner.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -136,11 +136,12 @@ def inference(
136136
runner="pmtask model",
137137
task_name="inference",
138138
node_args={
139+
# "version": [f"2025_06_21.00{x}" for x in range(1, 5)],
139140
"time-point": time_points,
140141
},
141142
task_args={
142-
"resolution": resolution,
143143
"version": version,
144+
"resolution": resolution,
144145
"output-dir": output_dir,
145146
},
146147
task_resources={

src/rra_population_model/model_prep/features/built.py

Lines changed: 21 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -132,8 +132,8 @@ def generate_derived_measures(
132132
if self.built_version.name == "ghsl_r2023a":
133133
# No derived measures for GHSL
134134
return {}
135-
elif self.built_version.name == "microsoft_v6":
136-
return _generate_microsoft_derived_measures(pm_data, self.feature_metadata)
135+
elif self.built_version.name in ["microsoft_v6", "microsoft_v7"]:
136+
return _generate_microsoft_derived_measures(pm_data, self.feature_metadata, self.built_version.name)
137137
else:
138138
msg = f"Unknown built version: {self.built_version.name}"
139139
raise ValueError(msg)
@@ -256,18 +256,31 @@ def link_features(
256256
def _generate_microsoft_derived_measures(
257257
pm_data: PopulationModelData,
258258
feature_metadata: FeatureMetadata,
259+
built_version_name: str,
259260
) -> dict[str, Path]:
261+
feature_dict = {
262+
"microsoft_v6": {
263+
"density": "microsoft_v6_density",
264+
"height": "ghsl_r2023a_height",
265+
"p_residential": "ghsl_r2023a_proportion_residential",
266+
},
267+
"microsoft_v7": {
268+
"density": "microsoft_v7_density",
269+
"height": "microsoft_v7_height",
270+
"p_residential": "ghsl_r2023a_proportion_residential",
271+
},
272+
}[built_version_name]
260273
density = pm_data.load_feature(
261-
feature_name="microsoft_v6_density",
274+
feature_name=feature_dict["density"],
262275
**feature_metadata.shared_kwargs,
263276
)
264277
density_arr = density._ndarray # noqa: SLF001
265278
height_arr = pm_data.load_feature( # noqa: SLF001
266-
feature_name="ghsl_r2023a_height",
279+
feature_name=feature_dict["height"],
267280
**feature_metadata.shared_kwargs,
268281
)._ndarray
269282
p_residential_arr = pm_data.load_feature( # noqa: SLF001
270-
feature_name="ghsl_r2023a_proportion_residential",
283+
feature_name=feature_dict["p_residential"],
271284
**feature_metadata.shared_kwargs,
272285
)._ndarray
273286

@@ -298,13 +311,13 @@ def _generate_microsoft_derived_measures(
298311
)
299312
pm_data.save_feature(
300313
out,
301-
feature_name=f"microsoft_v6_{measure}",
314+
feature_name=f"{built_version_name}_{measure}",
302315
**feature_metadata.shared_kwargs,
303316
)
304317

305318
out_paths = {
306-
f"microsoft_v6_{m}": pm_data.feature_path(
307-
feature_name=f"microsoft_v6_{m}",
319+
f"{built_version_name}_{m}": pm_data.feature_path(
320+
feature_name=f"{built_version_name}_{m}",
308321
**feature_metadata.shared_kwargs,
309322
)
310323
for m in out_ops

src/rra_population_model/model_prep/features/runner.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,8 @@
1717

1818
# GHSL first, as we need the height and residential mask for msft
1919
BUILT_VERSIONS = [
20-
pmc.BUILT_VERSIONS["ghsl_r2023a"],
21-
pmc.BUILT_VERSIONS["microsoft_v6"],
20+
# pmc.BUILT_VERSIONS["ghsl_r2023a"],
21+
pmc.BUILT_VERSIONS["microsoft_v7"],
2222
]
2323

2424

src/rra_population_model/postprocess/rake/runner.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,7 @@ def rake(
105105
rf_time_points = pm_data.list_raking_factor_time_points(resolution, version)
106106

107107
time_points = clio.convert_choice(time_point, rf_time_points)
108+
# time_points = sorted([time_point for time_point in time_points if time_point.startswith('202')])
108109

109110
model_frame = pm_data.load_modeling_frame(resolution)
110111
block_keys = model_frame.block_key.unique().tolist()
@@ -121,11 +122,12 @@ def rake(
121122
"project": "proj_rapidresponse",
122123
},
123124
node_args={
124-
"version": [f"2025_04_24.00{x}" for x in range(1, 9)],
125+
# "version": [f"2025_06_21.00{x}" for x in range(1, 5)],
125126
"block-key": block_keys,
126127
"time-point": time_points,
127128
},
128129
task_args={
130+
"version": version,
129131
"resolution": resolution,
130132
"output-dir": output_dir,
131133
},

src/rra_population_model/postprocess/raking_factors/runner.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -224,6 +224,7 @@ def raking_factors(
224224
"project": "proj_rapidresponse",
225225
},
226226
node_args={
227+
# "version": [f"2025_06_21.00{x}" for x in range(1, 5)],
227228
"time-point": time_points,
228229
},
229230
task_args={

src/rra_population_model/postprocess/upsample/runner.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,9 +20,11 @@
2020
"world_cylindrical_2000f": (pmc.CRSES["world_cylindrical"], 2000, "average"),
2121
"world_cylindrical_4000f": (pmc.CRSES["world_cylindrical"], 4000, "average"),
2222
"world_cylindrical_8000f": (pmc.CRSES["world_cylindrical"], 8000, "average"),
23+
"world_cylindrical_10000f": (pmc.CRSES["world_cylindrical"], 10000, "average"),
2324
"world_cylindrical_16000f": (pmc.CRSES["world_cylindrical"], 16000, "average"),
24-
"world_cylindrical_5000": (pmc.CRSES["world_cylindrical"], 5000, "sum"),
2525
"world_cylindrical_1000": (pmc.CRSES["world_cylindrical"], 1000, "sum"),
26+
"world_cylindrical_5000": (pmc.CRSES["world_cylindrical"], 5000, "sum"),
27+
"world_cylindrical_10000": (pmc.CRSES["world_cylindrical"], 10000, "sum"),
2628
"wgs84_0p1": (pmc.CRSES["wgs84"], 0.1, "sum"),
2729
"wgs84_0p01": (pmc.CRSES["wgs84"], 0.01, "sum"),
2830
}
@@ -160,8 +162,9 @@ def upsample(
160162
compiled_time_points = pm_data.list_compiled_prediction_time_points(
161163
resolution, version
162164
)
163-
compiled_time_points = [f"{y}q1" for y in range(1950, 1976)]
165+
# compiled_time_points = [f"{y}q1" for y in range(1950, 1976)]
164166
time_points = clio.convert_choice(time_point, compiled_time_points)
167+
# time_points = [time_point for time_point in time_points if time_point.startswith('202')]
165168

166169
print("Upsampling")
167170

0 commit comments

Comments
 (0)