Skip to content

Commit 6abd215

Browse files
committed
Update GHSL processing to produce height and proportion residential
1 parent d4d0ed0 commit 6abd215

File tree

5 files changed

+374
-196
lines changed

5 files changed

+374
-196
lines changed

src/rra_building_density/constants.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,8 @@ def process_resources(self, resolution: str) -> tuple[str, str]:
9595
),
9696
}
9797

98+
LATEST_MICROSOFT_VERSION = MICROSOFT_VERSIONS["6"]
99+
98100

99101
class GHSLVersion(BuiltVersion):
100102
measure_map: ClassVar[dict[str, tuple[str, str]]] = {

src/rra_building_density/process/ghsl.py

Lines changed: 60 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -1,92 +1,112 @@
11
import click
22
import numpy as np
3+
import rasterra as rt
34
from rra_tools import jobmon
45

56
from rra_building_density import cli_options as clio
67
from rra_building_density import constants as bdc
7-
from rra_building_density import utils
88
from rra_building_density.data import BuildingDensityData
9+
from rra_building_density.process import utils
910

1011

1112
def format_ghsl_main(
1213
block_key: str,
13-
measure: str,
1414
time_point: str,
1515
resolution: int | str,
1616
output_dir: str,
1717
) -> None:
18-
ghsl_version = bdc.GHSL_VERSIONS["r2023a"]
19-
ghsl_measure = ghsl_version.prefix_and_measure(measure)[1]
18+
version = bdc.GHSL_VERSIONS["r2023a"]
2019
crs = bdc.CRSES["wgs84"].to_pyproj()
21-
2220
bd_data = BuildingDensityData(output_dir)
21+
22+
print("Loading the reference block")
23+
mask_version = bdc.LATEST_MICROSOFT_VERSION
24+
reference_block = bd_data.load_tile(
25+
resolution,
26+
provider=mask_version.name,
27+
block_key=block_key,
28+
# This will be the transition time point
29+
time_point=mask_version.time_points[0],
30+
measure="density",
31+
)
32+
2333
print("Loading the tile index")
2434
tile_index = bd_data.load_tile_index(resolution)
25-
tile_index_info = bd_data.load_tile_index_info(resolution)
26-
27-
print("Building template")
2835
block_index = tile_index[tile_index.block_key == block_key]
36+
2937
block_poly_series = block_index.dissolve("block_key").geometry
30-
block_poly = block_poly_series.iloc[0]
3138
block_poly_ghsl = (
3239
utils.bbox_safe_buffer(block_poly_series, 5000).to_crs(crs).iloc[0]
3340
)
3441

35-
block_template = utils.make_raster_template(
36-
block_poly,
37-
resolution=tile_index_info.tile_resolution,
38-
crs=bdc.CRSES["equal_area"],
42+
print("Loading the GHSL data")
43+
density_arr, raw_volume_arr, raw_nonresidential_density_arr = (
44+
utils.load_and_resample_ghsl_data( # noqa: SLF001
45+
measure=measure,
46+
time_point=time_point,
47+
ghsl_version=version,
48+
bounds=block_poly_ghsl,
49+
reference_block=reference_block,
50+
bd_data=bd_data,
51+
)._ndarray
52+
for measure in ["density", "volume", "nonresidential_density"]
3953
)
40-
41-
print("Loading GHSL data")
42-
raw_tile = bd_data.load_provider_tile(
43-
ghsl_version,
44-
bounds=block_poly_ghsl,
45-
measure=ghsl_measure,
46-
time_point=time_point,
47-
year=time_point[:4],
54+
print("Generating height and proportion residential arrays")
55+
height_arr = utils.generate_height_array(density_arr, raw_volume_arr) # type: ignore[arg-type]
56+
proportion_residential_arr = utils.generate_proportion_residential_array(
57+
density_arr, # type: ignore[arg-type]
58+
raw_nonresidential_density_arr, # type: ignore[arg-type]
4859
)
49-
raw_tile = raw_tile.astype(np.float32) / 10000.0
5060

51-
print("Resampling")
52-
tile = raw_tile.set_no_data_value(np.nan).resample_to(block_template, "average")
53-
tile = utils.suppress_noise(tile)
54-
print("Saving")
55-
bd_data.save_tile(
56-
tile,
57-
resolution,
58-
provider="ghsl_r2023a",
59-
block_key=block_key,
60-
time_point=time_point,
61-
measure=measure,
62-
)
61+
print("Generating and saving output arrays")
62+
out_ops = {
63+
"height": lambda _, h, __: h,
64+
"proportion_residential": lambda _, __, p: p,
65+
"density": lambda d, _, __: d,
66+
"residential_density": lambda d, _, p: d * p,
67+
"nonresidential_density": lambda d, _, p: d * (1 - p),
68+
"volume": lambda d, h, _: h * d,
69+
"residential_volume": lambda d, h, p: h * d * p,
70+
"nonresidential_volume": lambda d, h, p: h * d * (1 - p),
71+
}
72+
for measure, op in out_ops.items():
73+
out = rt.RasterArray(
74+
data=op(density_arr, height_arr, proportion_residential_arr), # type: ignore[no-untyped-call]
75+
transform=reference_block.transform,
76+
crs=reference_block.crs,
77+
no_data_value=np.nan,
78+
)
79+
bd_data.save_tile(
80+
out,
81+
resolution,
82+
provider="ghsl_r2023a",
83+
block_key=block_key,
84+
time_point=time_point,
85+
measure=measure,
86+
)
6387

6488

6589
@click.command()
66-
@clio.with_measure(bdc.GHSLVersion.measure_map)
6790
@clio.with_block_key()
6891
@clio.with_time_point()
6992
@clio.with_resolution(bdc.RESOLUTIONS)
7093
@clio.with_output_directory(bdc.MODEL_ROOT)
7194
def format_ghsl_task(
7295
block_key: str,
73-
measure: str,
7496
time_point: str,
7597
resolution: str,
7698
output_dir: str,
7799
) -> None:
78100
"""Build predictors for a given tile and time point."""
79-
format_ghsl_main(block_key, measure, time_point, resolution, output_dir)
101+
format_ghsl_main(block_key, time_point, resolution, output_dir)
80102

81103

82104
@click.command()
83-
@clio.with_measure(bdc.GHSLVersion.measure_map, allow_all=True)
84105
@clio.with_time_point(allow_all=True)
85106
@clio.with_resolution(bdc.RESOLUTIONS)
86107
@clio.with_output_directory(bdc.MODEL_ROOT)
87108
@clio.with_queue()
88109
def format_ghsl(
89-
measure: list[str],
90110
time_point: str,
91111
resolution: str,
92112
output_dir: str,
@@ -100,7 +120,7 @@ def format_ghsl(
100120
print("Loading the tile index")
101121
tile_index = bd_data.load_tile_index(resolution)
102122
block_keys = tile_index.block_key.unique().tolist()
103-
njobs = len(block_keys) * len(time_point) * len(measure)
123+
njobs = len(block_keys) * len(time_points)
104124
print(f"Formating building density for {njobs} block-times")
105125

106126
memory, runtime = ghsl_version.process_resources(resolution)
@@ -114,7 +134,6 @@ def format_ghsl(
114134
},
115135
node_args={
116136
"block-key": block_keys,
117-
"measure": measure,
118137
"time-point": time_points,
119138
},
120139
task_resources={

src/rra_building_density/process/microsoft.py

Lines changed: 8 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,12 @@
11
import click
22
import numpy as np
33
import rasterra as rt
4-
from affine import Affine
54
from rra_tools import jobmon
65

76
from rra_building_density import cli_options as clio
87
from rra_building_density import constants as bdc
9-
from rra_building_density import utils
108
from rra_building_density.data import BuildingDensityData
11-
12-
USE_WATER_MASK = False
9+
from rra_building_density.process import utils
1310

1411

1512
def format_microsoft_main(
@@ -25,28 +22,19 @@ def format_microsoft_main(
2522
tile_index_info = bd_data.load_tile_index_info(resolution)
2623
msft_index = bd_data.load_provider_index(msft_version, "union")
2724

28-
block_index = tile_index[tile_index.block_key == block_key]
29-
block_poly_series = block_index.dissolve("block_key").geometry
30-
block_poly = block_poly_series.iloc[0]
31-
block_poly_msft = block_poly_series.to_crs(msft_index.crs).iloc[0]
25+
block_poly, block_poly_msft = utils.get_block_polys(
26+
tile_index[tile_index.block_key == block_key], msft_index.crs
27+
)
3228

3329
block_template = utils.make_raster_template(
3430
block_poly,
3531
resolution=tile_index_info.tile_resolution,
3632
crs=bdc.CRSES["equal_area"],
3733
)
3834

39-
overlapping = msft_index.loc[
40-
msft_index.intersects(block_poly_msft), "quad_name"
41-
].tolist()
42-
43-
msft_tile_keys = [
44-
tile_key
45-
for tile_key in overlapping
46-
if bd_data.provider_tile_exists(
47-
msft_version, tile_key=tile_key, time_point=time_point
48-
)
49-
]
35+
msft_tile_keys = utils.get_provider_tile_keys(
36+
msft_index, block_poly_msft, msft_version, bd_data, time_point=time_point
37+
)
5038

5139
if not msft_tile_keys:
5240
print("No overlapping building tiles, likely open ocean.")
@@ -65,31 +53,8 @@ def format_microsoft_main(
6553
bd_tile = bd_data.load_provider_tile(
6654
msft_version, tile_key=tile_key, time_point=time_point
6755
)
68-
69-
# The resolution of the MSFT tiles has too many decimal points.
70-
# This causes tiles slightly west of the antimeridian to cross
71-
# over and really mucks up reprojection. We'll clip the values
72-
# here to 5 decimal places (ie to 100 microns), explicitly
73-
# rounding down. This reduces the width of the tile by
74-
# 512*0.0001 = 0.05m or 50cm, enough to fix roundoff issues.
75-
x_res, y_res = bd_tile.resolution
76-
xmin, xmax, ymin, ymax = bd_tile.bounds
77-
bd_tile._transform = Affine( # noqa: SLF001
78-
a=utils.precise_floor(x_res, 4),
79-
b=0.0,
80-
c=xmin,
81-
d=0.0,
82-
e=-utils.precise_floor(-y_res, 4),
83-
f=ymax,
84-
)
56+
bd_tile = utils.fix_microsoft_tile(bd_tile)
8557
bd_tile = bd_tile.unset_no_data_value().set_no_data_value(np.nan)
86-
if USE_WATER_MASK:
87-
# mask out water
88-
mask_version = bdc.MICROSOFT_VERSIONS["water_mask"]
89-
mask = bd_data.load_provider_tile(
90-
mask_version, tile_key=tile_key
91-
).to_numpy()
92-
bd_tile._ndarray[mask] = np.nan # noqa: SLF001
9358

9459
reprojected_tile = bd_tile.reproject(
9560
dst_resolution=block_template.x_resolution,

0 commit comments

Comments
 (0)