Skip to content

Commit 156e0a5

Browse files
authored
Merge pull request #25 from GEMScienceTools/subset_source
added subsetting to core and improvements to H3 indexing
2 parents 68a47d4 + dc6f372 commit 156e0a5

File tree

5 files changed

+102
-49
lines changed

5 files changed

+102
-49
lines changed

openquake/hme/core/core.py

Lines changed: 13 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@
3838
breakpoint,
3939
get_eq_df_mag_bins,
4040
get_mag_bins_from_cfg,
41+
subset_source,
4142
)
4243

4344
from openquake.hme.utils.results_processing import process_results
@@ -254,11 +255,11 @@ def load_ruptures_from_file(cfg: dict):
254255
logging.info("Reading ruptures from {}".format(rup_file))
255256
if os.path.exists(rup_file):
256257
rupture_gdf = read_rupture_file(
257-
rup_file, h3_res=h3_res, parallel=parallel
258+
rup_file, h3_res=h3_res, parallel=False
258259
)
259260

260261
else:
261-
logging.warn("Rupture file does not exist; reading SSM.")
262+
logging.warning("Rupture file does not exist; reading SSM.")
262263
rupture_gdf = load_ruptures_from_ssm(cfg)
263264

264265
return rupture_gdf
@@ -369,21 +370,23 @@ def load_inputs(cfg: dict) -> dict:
369370
logging.info("grouping ruptures by cell")
370371
cell_groups = rupture_gdf.groupby("cell_id")
371372

372-
logger.info("rupture_gdf shape: {}".format(rupture_gdf.shape))
373+
logger.info(f"{len(rupture_gdf):_} ruptures in model")
373374
logger.debug(
374375
"rupture_gdf memory: {} GB".format(
375376
sum(rupture_gdf.memory_usage(index=True, deep=True)) * 1e-9
376377
)
377378
)
378379

379380
if cfg["input"]["subset"]["file"] is not None:
380-
# logger.info(" Subsetting bin_gdf")
381-
# bin_gdf = subset_source(
382-
# bin_gdf,
383-
# subset_file=cfg["input"]["subset"]["file"],
384-
# buffer=cfg["input"]["subset"]["buffer"],
385-
# )
386-
logger.warning("CANNOT SUBSET SOURCE YET!!!")
381+
logger.info("Subsetting rupture_gdf")
382+
rupture_gdf = subset_source(
383+
rupture_gdf,
384+
cfg["input"]["subset"]["file"],
385+
buffer=cfg["input"]["subset"].get("buffer"),
386+
fid=cfg["input"]["subset"].get("fid"),
387+
feature_number=cfg["input"]["subset"].get("feature_number"),
388+
)
389+
logger.info(f"{len(rupture_gdf):_} ruptures after subset")
387390

388391
if len(rupture_gdf) != len(rupture_gdf.index.unique()):
389392
logging.warning(

openquake/hme/utils/io/io.py

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,6 @@
2020
from ..simple_rupture import SimpleRupture, rup_to_dict
2121
from openquake.hme.utils.io.source_processing import (
2222
_get_h3_cell_for_rupture_df,
23-
_get_h3_cell_for_rupture_df_parallel,
2423
)
2524

2625

@@ -116,6 +115,7 @@ def oq_rupture_to_json(
116115
def read_rupture_file(
117116
rupture_file, h3_res: int = 3, parallel=False
118117
) -> pd.DataFrame:
118+
# parallel not currently used but leaving for future possibilities
119119
rup_file_type = rupture_file.split(".")[-1]
120120

121121
if rup_file_type == "hdf5":
@@ -127,10 +127,7 @@ def read_rupture_file(
127127
else:
128128
raise ValueError("Cannot read filetype {}".format(rup_file_type))
129129

130-
if parallel is False:
131-
_get_h3_cell_for_rupture_df(rupture_df, h3_res)
132-
else:
133-
_get_h3_cell_for_rupture_df_parallel(rupture_df, h3_res)
130+
_get_h3_cell_for_rupture_df(rupture_df, h3_res)
134131

135132
return rupture_df
136133

openquake/hme/utils/io/source_processing.py

Lines changed: 21 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import logging
2+
import warnings
23
from time import sleep
34
from typing import Union, Optional, Tuple
45
from multiprocessing import Pool
@@ -10,6 +11,12 @@
1011
from shapely.geometry import Point, Polygon
1112
from tqdm.autonotebook import tqdm
1213

14+
warnings.filterwarnings(
15+
"ignore",
16+
message="Modules under `h3.unstable` are experimental, and may change at any time.",
17+
)
18+
19+
from h3.unstable import vect as h3_vect # name in 3.x
1320

1421
from openquake.hazardlib.source.rupture import (
1522
NonParametricProbabilisticRupture,
@@ -24,7 +31,7 @@
2431
flatten_list,
2532
get_nonparametric_rupture_occurrence_rate,
2633
_get_class_name,
27-
breakpoint
34+
breakpoint,
2835
)
2936

3037

@@ -401,7 +408,8 @@ def rupture_dict_from_logic_tree_dict(
401408
logic_tree_dict.items()
402409
):
403410
logging.info(
404-
f"processing {branch_name} ({i+1}/{len(logic_tree_dict.keys())})"
411+
f"processing {branch_name} ({i+1}/"
412+
+ f"{len(logic_tree_dict.keys())})"
405413
)
406414
rup_dict[branch_name] = rupture_list_from_source_list_parallel(
407415
source_list,
@@ -417,7 +425,8 @@ def rupture_dict_from_logic_tree_dict(
417425
logic_tree_dict.items()
418426
):
419427
logging.info(
420-
f"processing {branch_name} ({i+1}/{len(logic_tree_dict.keys())})"
428+
f"processing {branch_name} ({i+1}/"
429+
+ f"{len(logic_tree_dict.keys())})"
421430
)
422431
rup_dict[branch_name] = rupture_df_from_source_list(
423432
source_list,
@@ -452,10 +461,11 @@ def rupture_dict_to_gdf(
452461
if df.occurrence_rate.min() <= 0.0:
453462
logging.info("trimming zero-rate ruptures")
454463
df_len = len(df)
455-
df = df[df.occurrence_rate > 0.]
464+
df = df[df.occurrence_rate > 0.0]
456465
df_trim_len = len(df)
457466
logging.info(
458-
f"{df_trim_len} ruptures remaining, {df_len - df_trim_len} removed"
467+
f"{df_trim_len} ruptures remaining, "
468+
+ f"{df_len - df_trim_len} removed"
459469
)
460470

461471
if return_gdf:
@@ -472,35 +482,17 @@ def parse_geometry(row, x="longitude", y="latitude", z="depth"):
472482
return df
473483

474484

475-
def _get_h3_cell_for_rupture_df(rupture_df, h3_res):
476-
logging.info("getting H3 cells")
477-
cell_ids = list(
478-
tqdm(
479-
(
480-
h3.geo_to_h3(row.latitude, row.longitude, h3_res)
481-
for i, row in rupture_df.iterrows()
482-
),
483-
total=len(rupture_df),
484-
)
485-
)
486-
rupture_df["cell_id"] = cell_ids
487-
488-
489485
def _get_h3_cell(args):
490486
return h3.geo_to_h3(*args)
491487

492488

493-
def _get_h3_cell_for_rupture_df_parallel(rupture_df, h3_res):
494-
logging.info("getting H3 cells in parallel")
495-
496-
lons = rupture_df.longitude.values
497-
lats = rupture_df.latitude.values
498-
499-
args = ((lat, lons[i], h3_res) for i, lat in enumerate(lats))
500-
501-
with Pool(_n_procs) as pool:
502-
cell_ids = pool.map(_get_h3_cell, args)
489+
def _get_h3_cell_for_rupture_df(rupture_df, h3_res):
490+
lats = rupture_df["latitude"].to_numpy()
491+
lons = rupture_df["longitude"].to_numpy()
503492

493+
cell_ints = h3_vect.geo_to_h3(lats, lons, h3_res)
494+
h3_to_string = np.frompyfunc(h3.h3_to_string, 1, 1)
495+
cell_ids = h3_to_string(cell_ints)
504496
rupture_df["cell_id"] = cell_ids
505497

506498

openquake/hme/utils/tests/test_utils.py

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ def test_get_mag_duration_from_comp_table(self):
5353
)
5454

5555

56-
def test_subset_source():
56+
def test_subset_source_no_buffer():
5757
rups = pd.read_csv(
5858
os.path.join(test_data_dir, "sm1_rups.csv"), index_col=0
5959
)
@@ -65,3 +65,19 @@ def test_subset_source():
6565
assert "88.0_313_0" not in rups_inside.index
6666

6767
assert len(rups_inside) == 4814
68+
69+
70+
def test_subset_source_buffer():
71+
rups = pd.read_csv(
72+
os.path.join(test_data_dir, "sm1_rups.csv"), index_col=0
73+
)
74+
subset_gj_file = os.path.join(test_data_dir, "data", "phl_subset.geojson")
75+
76+
rups_inside = subset_source(
77+
rups, subset_gj_file, feature_number=0, buffer=0.001
78+
) # small but checks if fns work
79+
80+
assert "88.3_20_0" in rups_inside.index
81+
assert "88.0_313_0" not in rups_inside.index
82+
83+
assert len(rups_inside) == 4814

openquake/hme/utils/utils.py

Lines changed: 49 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,8 @@
2121
from h3 import h3
2222

2323
from tqdm.autonotebook import tqdm
24-
from shapely.geometry import Point
24+
25+
from shapely.geometry import Point, shape, mapping
2526

2627
from openquake.hazardlib.geo.point import Point as OQPoint
2728

@@ -992,8 +993,49 @@ def datetime_to_string(dt):
992993
return timestamp.strftime("%Y-%m-%d %H:%M:%S")
993994

994995

996+
def buffer_geojson_polygon(geojson_feature, buffer_distance):
997+
"""
998+
Buffer a GeoJSON polygon and return a new GeoJSON dict.
999+
1000+
Parameters:
1001+
-----------
1002+
geojson_feature : dict
1003+
GeoJSON Feature or Geometry dict with a Polygon
1004+
buffer_distance : float
1005+
Buffer distance in degrees
1006+
1007+
Returns:
1008+
--------
1009+
dict
1010+
GeoJSON geometry dict with buffered polygon
1011+
"""
1012+
is_feature = "geometry" in geojson_feature
1013+
1014+
if is_feature:
1015+
geometry_dict = geojson_feature["geometry"]
1016+
else:
1017+
geometry_dict = geojson_feature
1018+
1019+
polygon = shape(geometry_dict)
1020+
buffered_polygon = polygon.buffer(buffer_distance)
1021+
buffered_geometry = mapping(buffered_polygon)
1022+
1023+
if is_feature:
1024+
return {
1025+
"type": "Feature",
1026+
"geometry": buffered_geometry,
1027+
"properties": geojson_feature.get("properties", {}),
1028+
}
1029+
else:
1030+
return buffered_geometry
1031+
1032+
9951033
def subset_source(
996-
rupture_gdf, subset_geojson_file, fid=None, feature_number=None
1034+
rupture_gdf,
1035+
subset_geojson_file,
1036+
buffer=None,
1037+
fid=None,
1038+
feature_number=None,
9971039
):
9981040

9991041
with open(subset_geojson_file) as f:
@@ -1017,13 +1059,16 @@ def subset_source(
10171059

10181060
if subset_geojson["geometry"]["type"] not in [
10191061
"Polygon",
1020-
"MultiPolygon",
10211062
]:
10221063
raise ValueError(
1023-
"`subset_geojson_file` not Polygon or MultiPolygon: "
1064+
"`subset_geojson_file` not Polygon: "
10241065
+ f"is {subset_geojson['geometry']['type']}"
10251066
)
10261067

1068+
if buffer is not None:
1069+
logging.info(f" Buffering subset polygon at {buffer} degrees.")
1070+
subset_geojson = buffer_geojson_polygon(subset_geojson, buffer)
1071+
10271072
h3_res = h3.h3_get_resolution(rupture_gdf.iloc[0]["cell_id"])
10281073

10291074
subset_h3_cells = h3.polyfill_geojson(

0 commit comments

Comments
 (0)