|
1 | 1 | # %% |
2 | 2 | import json |
3 | | -import multiprocessing as mp |
4 | 3 | import os |
5 | | -import sys |
6 | | -from datetime import datetime |
7 | | -from glob import glob |
8 | | -from pathlib import Path |
| 4 | +import pickle |
9 | 5 |
|
10 | | -import h5py |
11 | 6 | import numpy as np |
12 | 7 | import pandas as pd |
13 | | -import scipy |
| 8 | +from args import parse_args |
14 | 9 | from tqdm import tqdm |
15 | 10 |
|
16 | | -os.environ["OMP_NUM_THREADS"] = "8" |
17 | | - |
18 | | - |
19 | 11 | # %% |
20 | | -def extract_picks(pair, data, config, tt_memmap, station_df): |
21 | | - tt_memmap = np.memmap( |
22 | | - tt_memmap, |
23 | | - dtype=np.float32, |
24 | | - mode="r", |
25 | | - shape=tuple(config["traveltime_shape"]), |
26 | | - ) |
27 | | - |
28 | | - h5, id1 = pair |
29 | | - |
30 | | - x = config["interp"]["x"] |
31 | | - x_interp = config["interp"]["x_interp"] |
32 | | - dt = config["interp"]["dt"] |
33 | | - dt_interp = config["interp"]["dt_interp"] |
34 | | - min_cc_score = config["min_cc_score"] |
35 | | - min_cc_diff = config["min_cc_diff"] |
36 | | - num_channel = config["num_channel"] |
37 | | - phase_list = config["phase_list"] |
38 | | - |
39 | | - with h5py.File(h5, "r") as fp: |
40 | | - gp = fp[id1] |
41 | | - id1 = int(id1) |
42 | | - |
43 | | - for id2 in gp: |
44 | | - ds = gp[id2] |
45 | | - id2 = int(id2) |
46 | | - if id1 > id2: |
47 | | - continue |
48 | | - |
49 | | - # TODO: save only the best cc score |
50 | | - cc_score = ds["cc_score"][:] # [nch, nsta, 3] |
51 | | - cc_index = ds["cc_index"][:] # [nch, nsta, 3] |
52 | | - cc_diff = ds["cc_diff"][:] # [nch, nsta] |
53 | | - neighbor_score = ds["neighbor_score"][:] # [nch, nsta, 3] |
54 | | - # print(f"{cc_score.shape = }, {cc_index.shape = }, {cc_diff.shape = }, {neighbor_score.shape = }") |
55 | | - |
56 | | - if np.max(cc_score) < min_cc_score or (np.max(cc_diff) < min_cc_diff): |
57 | | - continue |
58 | | - |
59 | | - # cubic_score = scipy.interpolate.interp1d(x, neighbor_score, axis=-1, kind="quadratic")(x_interp) |
60 | | - # cubic_index = np.argmax(cubic_score, axis=-1, keepdims=True) - len(x_interp) // 2 |
61 | | - # dt_cc = cc_index * dt + cubic_index * dt_interp |
62 | | - |
63 | | - key = (id1, id2) |
64 | | - nch, nsta, npick = cc_score.shape |
65 | | - records = [] |
66 | | - for i in range(nch // num_channel): |
67 | | - for j in range(nsta): |
68 | | - dt_ct = tt_memmap[id1][i, j] - tt_memmap[id2][i, j] |
69 | | - best = np.argmax(cc_score[i * num_channel : (i + 1) * num_channel, j, 0]) + i * num_channel |
70 | | - if cc_score[best, j, 0] >= min_cc_score: |
71 | | - cubic_score = scipy.interpolate.interp1d(x, neighbor_score[best, j, :], kind="quadratic")( |
72 | | - x_interp |
73 | | - ) |
74 | | - cubic_index = np.argmax(cubic_score) - len(x_interp) // 2 |
75 | | - dt_cc = cc_index[best, j, 0] * dt + cubic_index * dt_interp |
76 | | - |
77 | | - # Shelly (2016) Fluid-faulting evolution in high definition: Connecting fault structure and |
78 | | - # frequency-magnitude variations during the 2014 Long Valley Caldera, California, earthquake swarm |
79 | | - weight = (0.1 + 3 * cc_diff[best, j]) * cc_score[best, j, 0] ** 2 |
80 | | - records.append( |
81 | | - [ |
82 | | - f"{station_df.loc[j]['station']:<4}", |
83 | | - # dt_ct + dt_cc[best, j, 0], |
84 | | - dt_ct + dt_cc, |
85 | | - weight, |
86 | | - phase_list[i], |
87 | | - ] |
88 | | - ) |
89 | | - |
90 | | - if len(records) > 0: |
91 | | - data[key] = records |
92 | | - |
93 | | - return 0 |
94 | | - |
95 | | - |
96 | | -if __name__ == "__main__": |
97 | | - # %% |
98 | | - root_path = "local" |
99 | | - region = "demo" |
100 | | - if len(sys.argv) > 1: |
101 | | - root_path = sys.argv[1] |
102 | | - region = sys.argv[2] |
| 12 | +args = parse_args() |
| 13 | +root_path = args.root_path |
| 14 | +region = args.region |
103 | 15 |
|
104 | | - # %% |
105 | | - cctorch_path = f"{region}/cctorch" |
| 16 | +with open(f"{root_path}/{region}/config.json", "r") as fp: |
| 17 | + config = json.load(fp) |
106 | 18 |
|
107 | | - # %% |
108 | | - with open(f"{root_path}/{cctorch_path}/config.json", "r") as fp: |
109 | | - config = json.load(fp) |
110 | | - config["min_cc_score"] = 0.6 |
111 | | - config["min_cc_diff"] = 0.0 |
112 | | - |
113 | | - # %% |
114 | | - event_df = pd.read_csv(f"{root_path}/{cctorch_path}/events.csv", index_col=0) |
115 | | - |
116 | | - # %% |
117 | | - station_df = pd.read_csv(f"{root_path}/{cctorch_path}/stations.csv", index_col=0) |
118 | | - |
119 | | - # %% |
120 | | - tt_memmap = f"{root_path}/{cctorch_path}/traveltime.dat" |
121 | | - |
122 | | - # %% |
123 | | - lines = [] |
124 | | - for i, row in station_df.iterrows(): |
125 | | - # tmp = f"{row['network']}{row['station']}" |
126 | | - tmp = f"{row['station']}" |
127 | | - line = f"{tmp:<4} {row['latitude']:.4f} {row['longitude']:.4f}\n" |
128 | | - lines.append(line) |
129 | | - |
130 | | - with open(f"{root_path}/{cctorch_path}/stlist.txt", "w") as fp: |
131 | | - fp.writelines(lines) |
132 | | - |
133 | | - h5_list = sorted(list(glob(f"{root_path}/{cctorch_path}/ccpairs/*.h5"))) |
134 | | - |
135 | | - # %% |
136 | | - dt = 0.01 |
137 | | - dt_interp = dt / 100 |
138 | | - x = np.linspace(0, 1, 2 + 1) |
139 | | - x_interp = np.linspace(0, 1, 2 * int(dt / dt_interp) + 1) |
140 | | - num_channel = 3 |
141 | | - phase_list = ["P", "S"] |
| 19 | +# %% |
| 20 | +data_path = f"{region}/cctorch" |
| 21 | +result_path = f"{region}/adloc_dd" |
| 22 | +if not os.path.exists(f"{result_path}"): |
| 23 | + os.makedirs(f"{result_path}") |
142 | 24 |
|
143 | | - config["interp"] = {"x": x, "x_interp": x_interp, "dt": dt, "dt_interp": dt_interp} |
144 | | - config["num_channel"] = num_channel |
145 | | - config["phase_list"] = phase_list |
| 25 | +# %% |
| 26 | +stations = pd.read_csv(f"{root_path}/{data_path}/cctorch_stations.csv") |
| 27 | +stations["station_id"] = stations["station"] |
| 28 | +stations = stations.groupby("station_id").first().reset_index() |
146 | 29 |
|
147 | | - # %% |
148 | | - ctx = mp.get_context("spawn") |
149 | | - with ctx.Manager() as manager: |
150 | | - data = manager.dict() |
151 | | - pair_list = [] |
152 | | - num_pair = 0 |
153 | | - for h5 in h5_list: |
154 | | - with h5py.File(h5, "r") as fp: |
155 | | - for id1 in tqdm(fp, desc=f"Loading {h5.split('/')[-1]}", leave=True): |
156 | | - gp1 = fp[id1] |
157 | | - # for id2 in gp1: |
158 | | - # pair_list.append((h5, id1, id2)) |
159 | | - # pair_list.append([h5, id1, list(gp1.keys())]) |
160 | | - pair_list.append([h5, id1]) |
161 | | - num_pair += len(gp1.keys()) |
| 30 | +# %% |
| 31 | +events = pd.read_csv(f"{root_path}/{data_path}/cctorch_events.csv", dtype={"event_index": str}) |
| 32 | +events["time"] = pd.to_datetime(events["event_time"], format="mixed") |
162 | 33 |
|
163 | | - ncpu = max(1, min(32, mp.cpu_count() - 1)) |
164 | | - pbar = tqdm(total=len(pair_list), desc="Extracting pairs") |
165 | | - print(f"Total pairs: {num_pair}. Using {ncpu} cores.") |
| 34 | +# %% |
| 35 | +stations["idx_sta"] = np.arange(len(stations)) # reindex in case the index does not start from 0 or is not continuous |
| 36 | +events["idx_eve"] = np.arange(len(events)) # reindex in case the index does not start from 0 or is not continuous |
| 37 | +mapping_phase_type_int = {"P": 0, "S": 1} |
166 | 38 |
|
167 | | - ## Debug |
168 | | - # for pair in pair_list: |
169 | | - # extract_picks(pair, data, config, tt_memmap, station_df) |
170 | | - # pbar.update() |
| 39 | +# %% |
| 40 | +with open(f"{root_path}/{data_path}/dt.cc", "r") as f: |
| 41 | + lines = f.readlines() |
171 | 42 |
|
172 | | - with ctx.Pool(processes=ncpu) as pool: |
173 | | - # with mp.Pool(processes=ncpu) as pool: |
174 | | - for pair in pair_list: |
175 | | - pool.apply_async( |
176 | | - extract_picks, args=(pair, data, config, tt_memmap, station_df), callback=lambda x: pbar.update() |
177 | | - ) |
178 | | - pool.close() |
179 | | - pool.join() |
180 | | - pbar.close() |
| 43 | +# %% |
| 44 | +event_index1 = [] |
| 45 | +event_index2 = [] |
| 46 | +station_index = [] |
| 47 | +phase_type = [] |
| 48 | +phase_score = [] |
| 49 | +phase_dtime = [] |
| 50 | + |
| 51 | +stations.set_index("station_id", inplace=True) |
| 52 | +events.set_index("event_index", inplace=True) |
| 53 | + |
| 54 | +for line in tqdm(lines): |
| 55 | + if line[0] == "#": |
| 56 | + evid1, evid2, _ = line[1:].split() |
| 57 | + else: |
| 58 | + stid, dt, weight, phase = line.split() |
| 59 | + event_index1.append(events.loc[evid1, "idx_eve"]) |
| 60 | + event_index2.append(events.loc[evid2, "idx_eve"]) |
| 61 | + station_index.append(stations.loc[stid, "idx_sta"]) |
| 62 | + phase_type.append(mapping_phase_type_int[phase]) |
| 63 | + phase_score.append(weight) |
| 64 | + phase_dtime.append(dt) |
| 65 | + |
| 66 | + |
| 67 | +dtypes = np.dtype( |
| 68 | + [ |
| 69 | + ("idx_eve1", np.int32), |
| 70 | + ("idx_eve2", np.int32), |
| 71 | + ("idx_sta", np.int32), |
| 72 | + ("phase_type", np.int32), |
| 73 | + ("phase_score", np.float32), |
| 74 | + ("phase_dtime", np.float32), |
| 75 | + ] |
| 76 | +) |
| 77 | +pairs_array = np.memmap( |
| 78 | + f"{root_path}/{result_path}/pair_dt.dat", |
| 79 | + mode="w+", |
| 80 | + shape=(len(phase_dtime),), |
| 81 | + dtype=dtypes, |
| 82 | +) |
| 83 | +pairs_array["idx_eve1"] = event_index1 |
| 84 | +pairs_array["idx_eve2"] = event_index2 |
| 85 | +pairs_array["idx_sta"] = station_index |
| 86 | +pairs_array["phase_type"] = phase_type |
| 87 | +pairs_array["phase_score"] = phase_score |
| 88 | +pairs_array["phase_dtime"] = phase_dtime |
| 89 | +with open(f"{root_path}/{result_path}/pair_dtypes.pkl", "wb") as f: |
| 90 | + pickle.dump(dtypes, f) |
181 | 91 |
|
182 | | - data = dict(data) |
183 | | - print(f"Valid pairs: {len(data)}") |
184 | 92 |
|
185 | | - # %% |
186 | | - with open(f"{root_path}/{cctorch_path}/dt.cc", "w") as fp: |
187 | | - for key in tqdm(sorted(data.keys()), desc="Writing dt.cc"): |
188 | | - event_index0 = event_df.loc[key[0]]["event_index"] |
189 | | - event_index1 = event_df.loc[key[1]]["event_index"] |
190 | | - fp.write(f"# {event_index0} {event_index1} 0.000\n") |
191 | | - for record in data[key]: |
192 | | - fp.write(f"{record[0]} {record[1]: .4f} {record[2]:.4f} {record[3]}\n") |
| 93 | +# %% |
| 94 | +events.to_csv(f"{root_path}/{result_path}/pair_events.csv", index=True, index_label="event_index") |
| 95 | +stations.to_csv(f"{root_path}/{result_path}/pair_stations.csv", index=True, index_label="station_id") |
0 commit comments