Skip to content

Commit 016c411

Browse files
authored
Merge pull request #9096 from gem/mea_tau_phi
Refactored conditioned GMFs a bit more
2 parents 7977f18 + fb08077 commit 016c411

File tree

3 files changed

+57
-41
lines changed

3 files changed

+57
-41
lines changed

openquake/calculators/event_based.py

Lines changed: 21 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -188,13 +188,20 @@ def strip_zeros(gmf_df):
188188
return gmf_df[ok]
189189

190190

191-
def get_computer(cmaker, proxy, sids, complete, station_data, station_sitecol):
191+
def get_computer(cmaker, proxy, rupgeoms, srcfilter,
192+
station_data, station_sitecol):
192193
"""
193194
:returns: GmfComputer or ConditionedGmfComputer
194195
"""
196+
sids = srcfilter.close_sids(proxy, cmaker.trt)
197+
if len(sids) == 0: # filtered away
198+
raise FarAwayRupture
199+
200+
complete = srcfilter.sitecol.complete
201+
proxy.geom = rupgeoms[proxy['geom_id']]
202+
ebr = proxy.to_ebr(cmaker.trt)
195203
oq = cmaker.oq
196-
trt = cmaker.trt
197-
ebr = proxy.to_ebr(trt)
204+
198205
if station_sitecol:
199206
stations = numpy.isin(sids, station_sitecol.sids)
200207
assert stations.sum(), 'There are no stations??'
@@ -246,6 +253,7 @@ def event_based(proxies, cmaker, stations, dstore, monitor):
246253
times = [] # rup_id, nsites, dt
247254
fmon = monitor('filtering ruptures', measuremem=False)
248255
cmon = monitor('computing gmfs', measuremem=False)
256+
rmon = monitor('reading mea,tau,phi', measuremem=False)
249257
max_iml = oq.get_max_iml()
250258
scenario = 'scenario' in oq.calculation_mode
251259
with dstore:
@@ -260,22 +268,17 @@ def event_based(proxies, cmaker, stations, dstore, monitor):
260268
with fmon:
261269
if proxy['mag'] < cmaker.min_mag:
262270
continue
263-
sids = srcfilter.close_sids(proxy, cmaker.trt)
264-
if len(sids) == 0: # filtered away
265-
continue
266-
proxy.geom = rupgeoms[proxy['geom_id']]
267271
try:
268272
computer = get_computer(
269-
cmaker, proxy, sids, sitecol.complete, *stations)
273+
cmaker, proxy, rupgeoms, srcfilter, *stations)
270274
except FarAwayRupture:
271275
# skip this rupture
272276
continue
273-
with cmon:
274-
if hasattr(computer, 'station_data'): # conditioned GMFs
275-
assert scenario
276-
df = computer.compute_all(dstore, sig_eps, max_iml)
277-
else: # regular GMFs
278-
df = computer.compute_all(scenario, sig_eps, max_iml)
277+
if hasattr(computer, 'station_data'): # conditioned GMFs
278+
assert scenario
279+
df = computer.compute_all(dstore, sig_eps, max_iml, cmon, rmon)
280+
else: # regular GMFs
281+
df = computer.compute_all(scenario, sig_eps, max_iml, cmon)
279282
dt = time.time() - t0
280283
times.append((proxy['id'], computer.ctx.rrup.min(), dt))
281284
alldata.append(df)
@@ -326,9 +329,10 @@ def starmap_from_rups(func, oq, full_lt, sitecol, dstore, save_tmp=None):
326329
logging.info('Affected sites = %.1f per rupture', rups['nsites'].mean())
327330
allproxies = [RuptureProxy(rec) for rec in rups]
328331
if "station_data" in oq.inputs:
332+
rupgeoms = dstore['rupgeoms'][:]
329333
trt = full_lt.trts[0]
330334
proxy = allproxies[0]
331-
proxy.geom = dstore['rupgeoms'][proxy['geom_id']]
335+
proxy.geom = rupgeoms[proxy['geom_id']]
332336
rup = proxy.to_ebr(trt).rupture
333337
station_df = dstore.read_df('station_data', 'site_id')
334338
maxdist = (oq.maximum_distance_stations or
@@ -347,11 +351,11 @@ def starmap_from_rups(func, oq, full_lt, sitecol, dstore, save_tmp=None):
347351
cmaker = ContextMaker(trt, rlzs_by_gsim, oq)
348352
maxdist = oq.maximum_distance(cmaker.trt)
349353
srcfilter = SourceFilter(sitecol.complete, maxdist)
350-
sids = srcfilter.close_sids(proxy, cmaker.trt)
351354
computer = get_computer(
352-
cmaker, proxy, sids, sitecol.complete,
355+
cmaker, proxy, rupgeoms, srcfilter,
353356
station_data, station_sites)
354357
ms, sids = computer.get_ms_and_sids()
358+
del proxy.geom # to reduce data transfer
355359
dstore.create_dset('conditioned/sids', sids)
356360
keys = ['mea', 'sig', 'tau', 'phi']
357361
for g, gsim in enumerate(ms):

openquake/hazardlib/calc/conditioned_gmfs.py

Lines changed: 25 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,7 @@
114114
import pandas
115115
from openquake.baselib.python3compat import decode
116116
from openquake.baselib.general import AccumDict
117+
from openquake.baselib.performance import Monitor
117118
from openquake.hazardlib import correlation, cross_correlation
118119
from openquake.hazardlib.imt import from_string
119120
from openquake.hazardlib.calc.gmf import GmfComputer, exp
@@ -221,7 +222,8 @@ def get_ms_and_sids(self):
221222
self.cross_correl_between, self.cross_correl_within,
222223
self.cmaker.maximum_distance)
223224

224-
def compute_all(self, dstore, sig_eps=None, max_iml=None):
225+
def compute_all(self, dstore, sig_eps=None, max_iml=None,
226+
mon1=Monitor(), mon2=Monitor()):
225227
"""
226228
:returns: (dict with fields eid, sid, gmv_X, ...), dt
227229
"""
@@ -236,9 +238,13 @@ def compute_all(self, dstore, sig_eps=None, max_iml=None):
236238
# NB: ms is a dictionary gsim -> [imt -> array]
237239
sids = dstore['conditioned/sids'][:]
238240
for g, (gsim, rlzs) in enumerate(rlzs_by_gsim.items()):
239-
mean_covs = [dstore['conditioned/gsim_%d/%s' % (g, key)][:]
240-
for key in ('mea', 'sig', 'tau', 'phi')]
241-
array, sig, eps = self.compute(gsim, num_events, mean_covs, rng)
241+
with mon1:
242+
mea = dstore['conditioned/gsim_%d/mea' % g][:]
243+
tau = dstore['conditioned/gsim_%d/tau' % g][:]
244+
phi = dstore['conditioned/gsim_%d/phi' % g][:]
245+
with mon2:
246+
array, sig, eps = self.compute(
247+
gsim, num_events, mea, tau, phi, rng)
242248
M, N, E = array.shape # sig and eps have shapes (M, E) instead
243249
assert len(sids) == N, (len(sids), N)
244250

@@ -248,7 +254,7 @@ def compute_all(self, dstore, sig_eps=None, max_iml=None):
248254
if (array[m] > max_iml[m]).any():
249255
for n in range(N):
250256
bad = array[m, n] > max_iml[m] # shape E
251-
mean = mean_covs[0][m] # shape (N, 1)
257+
mean = mea[m] # shape (N, 1)
252258
array[m, n, bad] = exp(mean[n, 0], im)
253259

254260
# manage min_iml
@@ -286,29 +292,31 @@ def compute_all(self, dstore, sig_eps=None, max_iml=None):
286292
n += len(eids)
287293
return pandas.DataFrame(data)
288294

289-
def compute(self, gsim, num_events, mean_covs, rng):
295+
def compute(self, gsim, num_events, mea, tau, phi, rng):
290296
"""
291297
:param gsim: GSIM used to compute mean_stds
292298
:param num_events: the number of seismic events
293-
:param mean_covs: array of shape (4, M, N)
299+
:param mea: array of shape (M, N, 1)
300+
:param tau: array of shape (M, N, N)
301+
:param phi: array of shape (M, N, N)
294302
:returns:
295303
a 32 bit array of shape (num_imts, num_sites, num_events) and
296304
two arrays with shape (num_imts, num_events): sig for tau
297305
and eps for the random part
298306
"""
299-
M = len(self.imts)
300-
num_sids = mean_covs[0][0].size
301-
result = numpy.zeros((M, num_sids, num_events), F32)
307+
M, N, _ = mea.shape
308+
result = numpy.zeros((M, N, num_events), F32)
302309
sig = numpy.zeros((M, num_events), F32) # same for all events
303310
eps = numpy.zeros((M, num_events), F32) # not the same
304311

305-
for m, im in enumerate(self.imts):
306-
mu_Y_yD = mean_covs[0][m]
307-
cov_WY_WY_wD = mean_covs[2][m]
308-
cov_BY_BY_yD = mean_covs[3][m]
312+
for m, im in enumerate(self.cmaker.imtls):
313+
mu_Y_yD = mea[m]
314+
cov_WY_WY_wD = tau[m]
315+
cov_BY_BY_yD = phi[m]
309316
try:
310317
result[m], sig[m], eps[m] = self._compute(
311-
mu_Y_yD, cov_WY_WY_wD, cov_BY_BY_yD, im, num_events, rng)
318+
mu_Y_yD, cov_WY_WY_wD, cov_BY_BY_yD,
319+
im, num_events, rng)
312320
except Exception as exc:
313321
raise RuntimeError(
314322
"(%s, %s, source_id=%r) %s: %s"
@@ -683,8 +691,8 @@ def _compute_spatial_cross_correlation_matrix(
683691
return spatial_correlation_matrix * cross_corr_coeff
684692

685693

686-
def clip_evals(x, value=0): # threshold=0, value=0):
687-
evals, evecs = numpy.linalg.eigh(x)
694+
def clip_evals(x, value=0):
695+
evals, evecs = numpy.linalg.eigh(x) # totally dominates the performance
688696
clipped = numpy.any(evals < value)
689697
x_new = evecs * numpy.maximum(evals, value) @ evecs.T
690698
return x_new, clipped

openquake/hazardlib/calc/gmf.py

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
import pandas
2525

2626
from openquake.baselib.general import AccumDict
27+
from openquake.baselib.performance import Monitor
2728
from openquake.hazardlib.const import StdDev
2829
from openquake.hazardlib.cross_correlation import NoCrossCorrelation
2930
from openquake.hazardlib.gsim.base import ContextMaker, FarAwayRupture
@@ -51,7 +52,7 @@ def exp(vals, imt):
5152
"""
5253
Exponentiate the values unless the IMT is MMI
5354
"""
54-
if str(imt) == 'MMI':
55+
if imt == 'MMI':
5556
return vals
5657
return numpy.exp(vals)
5758

@@ -130,7 +131,7 @@ def __init__(self, rupture, sitecol, cmaker, correlation_model=None,
130131
self.cross_correl = cross_correl or NoCrossCorrelation(
131132
cmaker.truncation_level)
132133

133-
def compute_all(self, scenario, sig_eps=None, max_iml=None):
134+
def compute_all(self, scenario, sig_eps=None, max_iml=None, mon=Monitor()):
134135
"""
135136
:returns: DataFrame with fields eid, rlz, sid, gmv_X, ...
136137
"""
@@ -148,7 +149,8 @@ def compute_all(self, scenario, sig_eps=None, max_iml=None):
148149
# NB: the trick for performance is to keep the call to
149150
# .compute outside of the loop over the realizations;
150151
# it is better to have few calls producing big arrays
151-
array, sig, eps = self.compute(gs, num_events, mean_stds[:, g])
152+
with mon:
153+
array, sig, eps = self.compute(gs, num_events, mean_stds[:, g])
152154
M, N, E = array.shape # sig and eps have shapes (M, E) instead
153155

154156
# manage max_iml
@@ -157,7 +159,8 @@ def compute_all(self, scenario, sig_eps=None, max_iml=None):
157159
if (array[m] > max_iml[m]).any():
158160
for n in range(N):
159161
bad = array[m, n] > max_iml[m] # shape E
160-
array[m, n, bad] = exp(mean_stds[0, g, m, n], im)
162+
array[m, n, bad] = exp(
163+
mean_stds[0, g, m, n], im)
161164

162165
# manage min_iml
163166
for n in range(N):
@@ -238,13 +241,14 @@ def compute(self, gsim, num_events, mean_stds):
238241
return result, sig, eps
239242

240243
def _compute(self, mean_stds, imt, gsim, intra_eps, inter_eps):
244+
im = imt.string
241245
if self.cmaker.truncation_level <= 1E-9:
242246
# for truncation_level = 0 there is only mean, no stds
243247
if self.correlation_model:
244248
raise ValueError('truncation_level=0 requires '
245249
'no correlation model')
246250
mean, _, _, _ = mean_stds
247-
gmf = exp(mean, imt)[:, None]
251+
gmf = exp(mean, im)[:, None]
248252
gmf = gmf.repeat(len(inter_eps), axis=1)
249253
inter_sig = 0
250254
elif gsim.DEFINED_FOR_STANDARD_DEVIATION_TYPES == {StdDev.TOTAL}:
@@ -257,7 +261,7 @@ def _compute(self, mean_stds, imt, gsim, intra_eps, inter_eps):
257261
self.correlation_model, gsim)
258262

259263
mean, sig, _, _ = mean_stds
260-
gmf = exp(mean[:, None] + sig[:, None] * intra_eps, imt)
264+
gmf = exp(mean[:, None] + sig[:, None] * intra_eps, im)
261265
inter_sig = numpy.nan
262266
else:
263267
mean, sig, tau, phi = mean_stds
@@ -274,7 +278,7 @@ def _compute(self, mean_stds, imt, gsim, intra_eps, inter_eps):
274278
intra_res = intra_res[:, None]
275279

276280
inter_res = tau[:, None] * inter_eps # shape (N, 1) * E => (N, E)
277-
gmf = exp(mean[:, None] + intra_res + inter_res, imt) # (N, E)
281+
gmf = exp(mean[:, None] + intra_res + inter_res, im) # (N, E)
278282
inter_sig = tau.max() # from shape (N, 1) => scalar
279283
return gmf, inter_sig, inter_eps # shapes (N, E), 1, E
280284

0 commit comments

Comments
 (0)