Skip to content

Commit a0f9dd0

Browse files
committed
fix relative location
1 parent 871cf52 commit a0f9dd0

File tree

6 files changed

+147
-203
lines changed

6 files changed

+147
-203
lines changed

scripts/convert_dtcc.py

Lines changed: 77 additions & 174 deletions
Original file line numberDiff line numberDiff line change
@@ -1,192 +1,95 @@
11
# %%
22
import json
3-
import multiprocessing as mp
43
import os
5-
import sys
6-
from datetime import datetime
7-
from glob import glob
8-
from pathlib import Path
4+
import pickle
95

10-
import h5py
116
import numpy as np
127
import pandas as pd
13-
import scipy
8+
from args import parse_args
149
from tqdm import tqdm
1510

16-
os.environ["OMP_NUM_THREADS"] = "8"
17-
18-
1911
# %%
20-
def extract_picks(pair, data, config, tt_memmap, station_df):
21-
tt_memmap = np.memmap(
22-
tt_memmap,
23-
dtype=np.float32,
24-
mode="r",
25-
shape=tuple(config["traveltime_shape"]),
26-
)
27-
28-
h5, id1 = pair
29-
30-
x = config["interp"]["x"]
31-
x_interp = config["interp"]["x_interp"]
32-
dt = config["interp"]["dt"]
33-
dt_interp = config["interp"]["dt_interp"]
34-
min_cc_score = config["min_cc_score"]
35-
min_cc_diff = config["min_cc_diff"]
36-
num_channel = config["num_channel"]
37-
phase_list = config["phase_list"]
38-
39-
with h5py.File(h5, "r") as fp:
40-
gp = fp[id1]
41-
id1 = int(id1)
42-
43-
for id2 in gp:
44-
ds = gp[id2]
45-
id2 = int(id2)
46-
if id1 > id2:
47-
continue
48-
49-
# TODO: save only the best cc score
50-
cc_score = ds["cc_score"][:] # [nch, nsta, 3]
51-
cc_index = ds["cc_index"][:] # [nch, nsta, 3]
52-
cc_diff = ds["cc_diff"][:] # [nch, nsta]
53-
neighbor_score = ds["neighbor_score"][:] # [nch, nsta, 3]
54-
# print(f"{cc_score.shape = }, {cc_index.shape = }, {cc_diff.shape = }, {neighbor_score.shape = }")
55-
56-
if np.max(cc_score) < min_cc_score or (np.max(cc_diff) < min_cc_diff):
57-
continue
58-
59-
# cubic_score = scipy.interpolate.interp1d(x, neighbor_score, axis=-1, kind="quadratic")(x_interp)
60-
# cubic_index = np.argmax(cubic_score, axis=-1, keepdims=True) - len(x_interp) // 2
61-
# dt_cc = cc_index * dt + cubic_index * dt_interp
62-
63-
key = (id1, id2)
64-
nch, nsta, npick = cc_score.shape
65-
records = []
66-
for i in range(nch // num_channel):
67-
for j in range(nsta):
68-
dt_ct = tt_memmap[id1][i, j] - tt_memmap[id2][i, j]
69-
best = np.argmax(cc_score[i * num_channel : (i + 1) * num_channel, j, 0]) + i * num_channel
70-
if cc_score[best, j, 0] >= min_cc_score:
71-
cubic_score = scipy.interpolate.interp1d(x, neighbor_score[best, j, :], kind="quadratic")(
72-
x_interp
73-
)
74-
cubic_index = np.argmax(cubic_score) - len(x_interp) // 2
75-
dt_cc = cc_index[best, j, 0] * dt + cubic_index * dt_interp
76-
77-
# Shelly (2016) Fluid-faulting evolution in high definition: Connecting fault structure and
78-
# frequency-magnitude variations during the 2014 Long Valley Caldera, California, earthquake swarm
79-
weight = (0.1 + 3 * cc_diff[best, j]) * cc_score[best, j, 0] ** 2
80-
records.append(
81-
[
82-
f"{station_df.loc[j]['station']:<4}",
83-
# dt_ct + dt_cc[best, j, 0],
84-
dt_ct + dt_cc,
85-
weight,
86-
phase_list[i],
87-
]
88-
)
89-
90-
if len(records) > 0:
91-
data[key] = records
92-
93-
return 0
94-
95-
96-
if __name__ == "__main__":
97-
# %%
98-
root_path = "local"
99-
region = "demo"
100-
if len(sys.argv) > 1:
101-
root_path = sys.argv[1]
102-
region = sys.argv[2]
12+
args = parse_args()
13+
root_path = args.root_path
14+
region = args.region
10315

104-
# %%
105-
cctorch_path = f"{region}/cctorch"
16+
with open(f"{root_path}/{region}/config.json", "r") as fp:
17+
config = json.load(fp)
10618

107-
# %%
108-
with open(f"{root_path}/{cctorch_path}/config.json", "r") as fp:
109-
config = json.load(fp)
110-
config["min_cc_score"] = 0.6
111-
config["min_cc_diff"] = 0.0
112-
113-
# %%
114-
event_df = pd.read_csv(f"{root_path}/{cctorch_path}/events.csv", index_col=0)
115-
116-
# %%
117-
station_df = pd.read_csv(f"{root_path}/{cctorch_path}/stations.csv", index_col=0)
118-
119-
# %%
120-
tt_memmap = f"{root_path}/{cctorch_path}/traveltime.dat"
121-
122-
# %%
123-
lines = []
124-
for i, row in station_df.iterrows():
125-
# tmp = f"{row['network']}{row['station']}"
126-
tmp = f"{row['station']}"
127-
line = f"{tmp:<4} {row['latitude']:.4f} {row['longitude']:.4f}\n"
128-
lines.append(line)
129-
130-
with open(f"{root_path}/{cctorch_path}/stlist.txt", "w") as fp:
131-
fp.writelines(lines)
132-
133-
h5_list = sorted(list(glob(f"{root_path}/{cctorch_path}/ccpairs/*.h5")))
134-
135-
# %%
136-
dt = 0.01
137-
dt_interp = dt / 100
138-
x = np.linspace(0, 1, 2 + 1)
139-
x_interp = np.linspace(0, 1, 2 * int(dt / dt_interp) + 1)
140-
num_channel = 3
141-
phase_list = ["P", "S"]
19+
# %%
20+
data_path = f"{region}/cctorch"
21+
result_path = f"{region}/adloc_dd"
22+
if not os.path.exists(f"{result_path}"):
23+
os.makedirs(f"{result_path}")
14224

143-
config["interp"] = {"x": x, "x_interp": x_interp, "dt": dt, "dt_interp": dt_interp}
144-
config["num_channel"] = num_channel
145-
config["phase_list"] = phase_list
25+
# %%
26+
stations = pd.read_csv(f"{root_path}/{data_path}/cctorch_stations.csv")
27+
stations["station_id"] = stations["station"]
28+
stations = stations.groupby("station_id").first().reset_index()
14629

147-
# %%
148-
ctx = mp.get_context("spawn")
149-
with ctx.Manager() as manager:
150-
data = manager.dict()
151-
pair_list = []
152-
num_pair = 0
153-
for h5 in h5_list:
154-
with h5py.File(h5, "r") as fp:
155-
for id1 in tqdm(fp, desc=f"Loading {h5.split('/')[-1]}", leave=True):
156-
gp1 = fp[id1]
157-
# for id2 in gp1:
158-
# pair_list.append((h5, id1, id2))
159-
# pair_list.append([h5, id1, list(gp1.keys())])
160-
pair_list.append([h5, id1])
161-
num_pair += len(gp1.keys())
30+
# %%
31+
events = pd.read_csv(f"{root_path}/{data_path}/cctorch_events.csv", dtype={"event_index": str})
32+
events["time"] = pd.to_datetime(events["event_time"], format="mixed")
16233

163-
ncpu = max(1, min(32, mp.cpu_count() - 1))
164-
pbar = tqdm(total=len(pair_list), desc="Extracting pairs")
165-
print(f"Total pairs: {num_pair}. Using {ncpu} cores.")
34+
# %%
35+
stations["idx_sta"] = np.arange(len(stations)) # reindex in case the index does not start from 0 or is not continuous
36+
events["idx_eve"] = np.arange(len(events)) # reindex in case the index does not start from 0 or is not continuous
37+
mapping_phase_type_int = {"P": 0, "S": 1}
16638

167-
## Debug
168-
# for pair in pair_list:
169-
# extract_picks(pair, data, config, tt_memmap, station_df)
170-
# pbar.update()
39+
# %%
40+
with open(f"{root_path}/{data_path}/dt.cc", "r") as f:
41+
lines = f.readlines()
17142

172-
with ctx.Pool(processes=ncpu) as pool:
173-
# with mp.Pool(processes=ncpu) as pool:
174-
for pair in pair_list:
175-
pool.apply_async(
176-
extract_picks, args=(pair, data, config, tt_memmap, station_df), callback=lambda x: pbar.update()
177-
)
178-
pool.close()
179-
pool.join()
180-
pbar.close()
43+
# %%
44+
event_index1 = []
45+
event_index2 = []
46+
station_index = []
47+
phase_type = []
48+
phase_score = []
49+
phase_dtime = []
50+
51+
stations.set_index("station_id", inplace=True)
52+
events.set_index("event_index", inplace=True)
53+
54+
for line in tqdm(lines):
55+
if line[0] == "#":
56+
evid1, evid2, _ = line[1:].split()
57+
else:
58+
stid, dt, weight, phase = line.split()
59+
event_index1.append(events.loc[evid1, "idx_eve"])
60+
event_index2.append(events.loc[evid2, "idx_eve"])
61+
station_index.append(stations.loc[stid, "idx_sta"])
62+
phase_type.append(mapping_phase_type_int[phase])
63+
phase_score.append(weight)
64+
phase_dtime.append(dt)
65+
66+
67+
dtypes = np.dtype(
68+
[
69+
("idx_eve1", np.int32),
70+
("idx_eve2", np.int32),
71+
("idx_sta", np.int32),
72+
("phase_type", np.int32),
73+
("phase_score", np.float32),
74+
("phase_dtime", np.float32),
75+
]
76+
)
77+
pairs_array = np.memmap(
78+
f"{root_path}/{result_path}/pair_dt.dat",
79+
mode="w+",
80+
shape=(len(phase_dtime),),
81+
dtype=dtypes,
82+
)
83+
pairs_array["idx_eve1"] = event_index1
84+
pairs_array["idx_eve2"] = event_index2
85+
pairs_array["idx_sta"] = station_index
86+
pairs_array["phase_type"] = phase_type
87+
pairs_array["phase_score"] = phase_score
88+
pairs_array["phase_dtime"] = phase_dtime
89+
with open(f"{root_path}/{result_path}/pair_dtypes.pkl", "wb") as f:
90+
pickle.dump(dtypes, f)
18191

182-
data = dict(data)
183-
print(f"Valid pairs: {len(data)}")
18492

185-
# %%
186-
with open(f"{root_path}/{cctorch_path}/dt.cc", "w") as fp:
187-
for key in tqdm(sorted(data.keys()), desc="Writing dt.cc"):
188-
event_index0 = event_df.loc[key[0]]["event_index"]
189-
event_index1 = event_df.loc[key[1]]["event_index"]
190-
fp.write(f"# {event_index0} {event_index1} 0.000\n")
191-
for record in data[key]:
192-
fp.write(f"{record[0]} {record[1]: .4f} {record[2]:.4f} {record[3]}\n")
93+
# %%
94+
events.to_csv(f"{root_path}/{result_path}/pair_events.csv", index=True, index_label="event_index")
95+
stations.to_csv(f"{root_path}/{result_path}/pair_stations.csv", index=True, index_label="station_id")

scripts/generate_pairs.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -194,9 +194,9 @@ def pairing_picks(event_pairs, picks, config):
194194

195195
dtypes = np.dtype(
196196
[
197-
("event_index1", np.int32),
198-
("event_index2", np.int32),
199-
("station_index", np.int32),
197+
("idx_eve1", np.int32),
198+
("idx_eve2", np.int32),
199+
("idx_sta", np.int32),
200200
("phase_type", np.int32),
201201
("phase_score", np.float32),
202202
("phase_dtime", np.float32),
@@ -208,9 +208,9 @@ def pairing_picks(event_pairs, picks, config):
208208
shape=(len(event_pairs),),
209209
dtype=dtypes,
210210
)
211-
pairs_array["event_index1"] = event_pairs["idx_eve1"].values
212-
pairs_array["event_index2"] = event_pairs["idx_eve2"].values
213-
pairs_array["station_index"] = event_pairs["idx_sta"].values
211+
pairs_array["idx_eve1"] = event_pairs["idx_eve1"].values
212+
pairs_array["idx_eve2"] = event_pairs["idx_eve2"].values
213+
pairs_array["idx_sta"] = event_pairs["idx_sta"].values
214214
pairs_array["phase_type"] = event_pairs["phase_type"].values
215215
pairs_array["phase_score"] = event_pairs["phase_score"].values
216216
pairs_array["phase_dtime"] = event_pairs["phase_dtime"].values

scripts/run_adloc_cc.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -229,6 +229,9 @@
229229
"station_index": pairs["idx_sta"],
230230
}
231231
)
232+
233+
pairs_df["time_error"] = pred_time - pairs["dt"]
234+
232235
pairs_df = pairs_df[valid_index]
233236
config["MIN_OBS"] = 8
234237
pairs_df = pairs_df.groupby(["event_index1", "event_index2"], as_index=False, group_keys=False).filter(
@@ -239,6 +242,16 @@
239242

240243
phase_dataset.valid_index = valid_index
241244

245+
## correct origin time
246+
time_shift = np.zeros(len(travel_time.event_time.weight))
247+
time_count = np.zeros(len(travel_time.event_time.weight))
248+
np.add.at(time_shift, pairs_df["event_index1"].values, pairs_df["time_error"].values)
249+
np.add.at(time_shift, pairs_df["event_index2"].values, -pairs_df["time_error"].values)
250+
np.add.at(time_count, pairs_df["event_index1"].values, 1)
251+
np.add.at(time_count, pairs_df["event_index2"].values, 1)
252+
time_shift[time_count > 0] /= time_count[time_count > 0]
253+
travel_time.event_time.weight.data -= torch.tensor(time_shift[:, None], dtype=torch.float32)
254+
242255
invert_event_loc = raw_travel_time.event_loc.weight.clone().detach().numpy()
243256
invert_event_time = raw_travel_time.event_time.weight.clone().detach().numpy()
244257
valid_event_index = np.unique(pairs["idx_eve1"][valid_index])

scripts/run_adloc_ct.py

Lines changed: 19 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -223,11 +223,14 @@
223223

224224
pairs_df = pd.DataFrame(
225225
{
226-
"event_index1": pairs["event_index1"],
227-
"event_index2": pairs["event_index2"],
228-
"station_index": pairs["station_index"],
226+
"event_index1": pairs["idx_eve1"],
227+
"event_index2": pairs["idx_eve2"],
228+
"station_index": pairs["idx_sta"],
229229
}
230230
)
231+
232+
pairs_df["time_error"] = pred_time - pairs["phase_dtime"]
233+
231234
pairs_df = pairs_df[valid_index]
232235
config["MIN_OBS"] = 8
233236
pairs_df = pairs_df.groupby(["event_index1", "event_index2"], as_index=False, group_keys=False).filter(
@@ -238,11 +241,22 @@
238241

239242
phase_dataset.valid_index = valid_index
240243

244+
## correct origin time
245+
time_shift = np.zeros(len(travel_time.event_time.weight))
246+
time_count = np.zeros(len(travel_time.event_time.weight))
247+
np.add.at(time_shift, pairs_df["event_index1"].values, pairs_df["time_error"].values)
248+
np.add.at(time_shift, pairs_df["event_index2"].values, -pairs_df["time_error"].values)
249+
np.add.at(time_count, pairs_df["event_index1"].values, 1)
250+
np.add.at(time_count, pairs_df["event_index2"].values, 1)
251+
time_shift[time_count > 0] /= time_count[time_count > 0]
252+
print(f"{np.mean(time_shift):.3f} {np.std(time_shift):.3f}")
253+
travel_time.event_time.weight.data -= torch.tensor(time_shift[:, None], dtype=torch.float32)
254+
241255
invert_event_loc = raw_travel_time.event_loc.weight.clone().detach().numpy()
242256
invert_event_time = raw_travel_time.event_time.weight.clone().detach().numpy()
243-
valid_event_index = np.unique(pairs["event_index1"][valid_index])
257+
# valid_event_index = np.unique(pairs["event_index1"][valid_index])
244258
valid_event_index = np.concatenate(
245-
[np.unique(pairs["event_index1"][valid_index]), np.unique(pairs["event_index2"][valid_index])]
259+
[np.unique(pairs["idx_eve1"][valid_index]), np.unique(pairs["idx_eve2"][valid_index])]
246260
)
247261
valid_event_index = np.sort(np.unique(valid_event_index))
248262

0 commit comments

Comments
 (0)