Skip to content

Commit 0208470

Browse files
committed
run_adloc_v2.py works
1 parent 270f1fd commit 0208470

File tree

5 files changed

+261
-130
lines changed

5 files changed

+261
-130
lines changed

scripts/merge_adloc_picks.py

Lines changed: 166 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,166 @@
1+
# %%
2+
import json
3+
import multiprocessing as mp
4+
import os
5+
from concurrent.futures import ProcessPoolExecutor, ThreadPoolExecutor, as_completed
6+
from datetime import datetime, timedelta, timezone
7+
from threading import Lock, Thread
8+
9+
import fsspec
10+
import numpy as np
11+
import pandas as pd
12+
import pyproj
13+
from obspy import read_inventory
14+
from obspy.clients.fdsn import Client
15+
from sklearn.cluster import DBSCAN
16+
from tqdm import tqdm
17+
from args import parse_args
18+
from glob import glob
19+
import matplotlib.pyplot as plt
20+
from utils.plotting import plotting_ransac
21+
22+
# %%
23+
if __name__ == "__main__":
24+
25+
args = parse_args()
26+
root_path = args.root_path
27+
region = args.region
28+
iter = args.iter
29+
30+
data_path = f"{region}/adloc"
31+
result_path = f"{region}/adloc"
32+
figure_path = f"{region}/adloc/figures"
33+
if not os.path.exists(figure_path):
34+
os.makedirs(figure_path)
35+
36+
# %%
37+
# protocol = "gs"
38+
# token_json = f"{os.environ['HOME']}/.config/gcloud/application_default_credentials.json"
39+
# with open(token_json, "r") as fp:
40+
# token = json.load(fp)
41+
# fs = fsspec.filesystem(protocol, token=token)
42+
43+
# %%
44+
event_csvs = sorted(glob(f"{root_path}/{data_path}/????/????.???.events_sst_{iter}.csv"))
45+
46+
# %%
47+
events = []
48+
picks = []
49+
stations = []
50+
for event_csv in tqdm(event_csvs, desc="Load event csvs"):
51+
pick_csv = event_csv.replace(f"events_sst_{iter}.csv", f"picks_sst_{iter}.csv")
52+
station_csv = event_csv.replace(f"events_sst_{iter}.csv", f"stations_sst_{iter}.csv")
53+
54+
year, jday = event_csv.split("/")[-1].split(".")[:2]
55+
events_ = pd.read_csv(event_csv, dtype=str)
56+
picks_ = pd.read_csv(pick_csv, dtype=str)
57+
stations_ = pd.read_csv(station_csv)
58+
events_["year"] = year
59+
events_["jday"] = jday
60+
picks_["year"] = year
61+
picks_["jday"] = jday
62+
stations_["year"] = year
63+
stations_["jday"] = jday
64+
events.append(events_)
65+
picks.append(picks_)
66+
stations.append(stations_)
67+
68+
events = pd.concat(events, ignore_index=True)
69+
picks = pd.concat(picks, ignore_index=True)
70+
stations = pd.concat(stations, ignore_index=True)
71+
72+
station_terms = (
73+
stations.groupby(["station_id"])
74+
.apply(
75+
lambda x: pd.Series(
76+
{
77+
"station_term_time_p": (
78+
(x.station_term_time_p * x.num_pick_p).sum() / x.num_pick_p.sum()
79+
if x.num_pick_p.sum() > 0
80+
else 0
81+
),
82+
"station_term_time_s": (
83+
(x.station_term_time_s * x.num_pick_s).sum() / x.num_pick_s.sum()
84+
if x.num_pick_s.sum() > 0
85+
else 0
86+
),
87+
"station_term_amplitude": (
88+
(x.station_term_amplitude * x.num_pick).sum() / x.num_pick.sum() if x.num_pick.sum() > 0 else 0
89+
),
90+
}
91+
)
92+
)
93+
.reset_index()
94+
)
95+
if iter > 0:
96+
stations_prev = pd.read_csv(f"{root_path}/{result_path}/adloc_stations_sst_{iter-1}.csv")
97+
stations_prev.set_index("station_id", inplace=True)
98+
99+
station_terms["station_term_time_p"] += (
100+
station_terms["station_id"].map(stations_prev["station_term_time_p"]).fillna(0)
101+
)
102+
station_terms["station_term_time_s"] += (
103+
station_terms["station_id"].map(stations_prev["station_term_time_s"]).fillna(0)
104+
)
105+
station_terms["station_term_amplitude"] += (
106+
station_terms["station_id"].map(stations_prev["station_term_amplitude"]).fillna(0)
107+
)
108+
109+
stations = stations.groupby(["station_id"]).first().reset_index()
110+
stations.drop(["station_term_time_p", "station_term_time_s", "station_term_amplitude"], axis=1, inplace=True)
111+
stations = stations.merge(station_terms, on="station_id")
112+
113+
events["dummy_id"] = events["year"] + "." + events["jday"] + "." + events["event_index"]
114+
picks["dummy_id"] = picks["year"] + "." + picks["jday"] + "." + picks["event_index"]
115+
116+
events["event_index"] = np.arange(len(events))
117+
picks = picks.drop("event_index", axis=1)
118+
picks = picks.merge(events[["dummy_id", "event_index"]], on="dummy_id")
119+
120+
events.drop(["year", "jday", "dummy_id"], axis=1, inplace=True)
121+
picks.drop(["year", "jday", "dummy_id"], axis=1, inplace=True)
122+
stations.drop(["year", "jday"], axis=1, inplace=True)
123+
124+
events.to_csv(f"{root_path}/{result_path}/adloc_events_sst_{iter}.csv", index=False)
125+
picks.to_csv(f"{root_path}/{result_path}/adloc_picks_sst_{iter}.csv", index=False)
126+
stations.to_csv(f"{root_path}/{result_path}/adloc_stations_sst_{iter}.csv", index=False)
127+
128+
# %%
129+
130+
events = pd.read_csv(f"{root_path}/{result_path}/adloc_events_sst_{iter}.csv")
131+
picks = pd.read_csv(f"{root_path}/{result_path}/adloc_picks_sst_{iter}.csv")
132+
stations = pd.read_csv(f"{root_path}/{result_path}/adloc_stations_sst_{iter}.csv")
133+
134+
fig, ax = plt.subplots(3, 3, figsize=(12, 10))
135+
ax[0, 0].scatter(events["longitude"], events["latitude"], c=events["depth_km"], s=1, cmap="viridis_r")
136+
ax[0, 0].set_title(f"Events {len(events)}")
137+
ax[0, 1].scatter(events["longitude"], events["depth_km"], c=events["depth_km"], s=1, cmap="viridis_r")
138+
ax[0, 1].invert_yaxis()
139+
ax[0, 1].set_title(f"Events depth")
140+
ax[0, 2].scatter(events["latitude"], events["depth_km"], c=events["depth_km"], s=1, cmap="viridis_r")
141+
ax[0, 2].invert_yaxis()
142+
ax[0, 2].set_title(f"Events latitude")
143+
ax[1, 0].scatter(
144+
stations["longitude"], stations["latitude"], c=stations["station_term_time_p"], marker="^", cmap="viridis_r"
145+
)
146+
ax[1, 0].set_title(f"Station term time P {stations['station_term_time_p'].mean():.2f} s")
147+
ax[1, 1].scatter(
148+
stations["longitude"], stations["latitude"], c=stations["station_term_time_s"], marker="^", cmap="viridis_r"
149+
)
150+
ax[1, 1].set_title(f"Station term time S {stations['station_term_time_s'].mean():.2f} s")
151+
ax[1, 2].scatter(
152+
stations["longitude"], stations["latitude"], c=stations["station_term_amplitude"], marker="^", cmap="viridis_r"
153+
)
154+
ax[1, 2].set_title(f"Station term amplitude {stations['station_term_amplitude'].mean():.2f} m")
155+
ax[2, 0].hist(events["adloc_residual_time"], bins=30, edgecolor="white")
156+
ax[2, 0].set_title(f"Event residual time")
157+
ax[2, 1].hist(events["adloc_residual_amplitude"], bins=30, edgecolor="white")
158+
ax[2, 1].set_title(f"Event residual amplitude")
159+
idx = picks["adloc_mask"] == 1
160+
ax[2, 2].hist(picks.loc[idx, "adloc_residual_time"], bins=30, edgecolor="white")
161+
ax[2, 2].set_title(f"Pick residual time")
162+
# ax[2, 2].hist(picks["adloc_residual_amplitude"], bins=30, edgecolor="white")
163+
# ax[2, 2].set_title(f"Pick residual amplitude")
164+
plt.tight_layout()
165+
plt.savefig(f"{root_path}/{figure_path}/adloc_summary_{iter}.png")
166+
plt.close()

scripts/merge_gamma_picks.py

Lines changed: 1 addition & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -18,28 +18,6 @@
1818
from glob import glob
1919

2020

21-
def load_data(year, jday, data_path, root_path, bucket, protocol, token):
22-
23-
fs = fsspec.filesystem(protocol, token=token)
24-
adloc_events_csv = f"{data_path}/{year:04d}/adloc_events_{jday:03d}.csv"
25-
adloc_picks_csv = f"{data_path}/{year:04d}/adloc_picks_{jday:03d}.csv"
26-
if protocol == "file":
27-
events = pd.read_csv(f"{root_path}/{adloc_events_csv}", parse_dates=["time"])
28-
picks = pd.read_csv(f"{root_path}/{adloc_picks_csv}", parse_dates=["phase_time"])
29-
else:
30-
with fs.open(f"{bucket}/{adloc_events_csv}", "r") as fp:
31-
events = pd.read_csv(fp, parse_dates=["time"])
32-
with fs.open(f"{bucket}/{adloc_picks_csv}", "r") as fp:
33-
picks = pd.read_csv(fp, parse_dates=["phase_time"])
34-
35-
events["year"] = year
36-
events["jday"] = jday
37-
picks["year"] = year
38-
picks["jday"] = jday
39-
40-
return events, picks
41-
42-
4321
# %%
4422
if __name__ == "__main__":
4523

@@ -83,7 +61,7 @@ def load_data(year, jday, data_path, root_path, bucket, protocol, token):
8361

8462
events["event_index"] = np.arange(len(events))
8563
picks = picks.drop("event_index", axis=1)
86-
picks = picks.merge(events[["dummy_id", "event_index"]], on="dummy_id")
64+
picks = picks.merge(events[["dummy_id", "event_index"]], on="dummy_id", how="left")
8765

8866
events.drop(["year", "jday", "dummy_id"], axis=1, inplace=True)
8967
picks.drop(["year", "jday", "dummy_id"], axis=1, inplace=True)

scripts/run_adloc.py

Lines changed: 16 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,6 @@ def run_adloc(
2626
config: Dict,
2727
node_rank: int = 0,
2828
num_nodes: int = 1,
29-
picks_csv: str = None,
3029
protocol: str = "file",
3130
bucket: str = "",
3231
token: Dict = None,
@@ -69,13 +68,6 @@ def run_adloc(
6968
config["maxdepth"] = config["maxdepth"] if "maxdepth" in config else 60.0
7069
config["use_amplitude"] = True
7170

72-
# ## Eikonal for 1D velocity model
73-
zz = [0.0, 5.5, 16.0, 32.0]
74-
vp = [5.5, 5.5, 6.7, 7.8]
75-
vp_vs_ratio = 1.73
76-
vs = [v / vp_vs_ratio for v in vp]
77-
h = 0.3
78-
7971
# %%
8072
## Automatic region; you can also specify a region
8173
# lon0 = stations["longitude"].median()
@@ -119,6 +111,17 @@ def run_adloc(
119111
# %%
120112
config["eikonal"] = None
121113

114+
# ## Eikonal for 1D velocity model
115+
zz = [0.0, 5.5, 16.0, 32.0]
116+
vp = [5.5, 5.5, 6.7, 7.8]
117+
vp_vs_ratio = 1.73
118+
vs = [v / vp_vs_ratio for v in vp]
119+
# Northern California (Gil7)
120+
# zz = [0.0, 1.0, 3.0, 4.0, 5.0, 17.0, 25.0, 62.0]
121+
# vp = [3.2, 3.2, 4.5, 4.8, 5.51, 6.21, 6.89, 7.83]
122+
# vs = [1.5, 1.5, 2.4, 2.78, 3.18, 3.40, 3.98, 4.52]
123+
h = 0.3
124+
122125
if os.path.exists(f"{root_path}/{region}/obspy/velocity.csv"):
123126
velocity = pd.read_csv(f"{root_path}/{region}/obspy/velocity.csv")
124127
zz = velocity["z_km"].values
@@ -153,17 +156,6 @@ def run_adloc(
153156
(None, None), # t
154157
)
155158

156-
# %%
157-
plt.figure()
158-
plt.scatter(stations["x_km"], stations["y_km"], c=stations["depth_km"], cmap="viridis_r", s=100, marker="^")
159-
plt.colorbar(label="Depth (km)")
160-
plt.xlabel("X (km)")
161-
plt.ylabel("Y (km)")
162-
plt.xlim(config["xlim_km"])
163-
plt.ylim(config["ylim_km"])
164-
plt.title("Stations")
165-
plt.savefig(os.path.join(figure_path, "stations.png"), bbox_inches="tight", dpi=300)
166-
167159
# %%
168160
mapping_phase_type_int = {"P": 0, "S": 1}
169161
config["vel"] = {mapping_phase_type_int[k]: v for k, v in config["vel"].items()}
@@ -207,9 +199,8 @@ def run_adloc(
207199
station_term_amp = (
208200
picks[picks["mask"] == 1.0].groupby("idx_sta").agg({"residual_amplitude": "median"}).reset_index()
209201
)
210-
stations["station_term_amplitude"] += (
211-
stations["idx_sta"].map(station_term_amp.set_index("idx_sta")["residual_amplitude"]).fillna(0)
212-
)
202+
station_term_amp.set_index("idx_sta", inplace=True)
203+
stations["station_term_amplitude"] += stations["idx_sta"].map(station_term_amp["residual_amplitude"]).fillna(0)
213204

214205
## Same P and S station term
215206
# station_term_time = picks[picks["mask"] == 1.0].groupby("idx_sta").agg({"residual_time": "mean"}).reset_index()
@@ -224,15 +215,12 @@ def run_adloc(
224215
station_term_time = (
225216
picks[picks["mask"] == 1.0].groupby(["idx_sta", "phase_type"]).agg({"residual_time": "mean"}).reset_index()
226217
)
218+
station_term_time.set_index("idx_sta", inplace=True)
227219
stations["station_term_time_p"] += (
228-
stations["idx_sta"]
229-
.map(station_term_time[station_term_time["phase_type"] == 0].set_index("idx_sta")["residual_time"])
230-
.fillna(0)
220+
stations["idx_sta"].map(station_term_time[station_term_time["phase_type"] == 0]["residual_time"]).fillna(0)
231221
)
232222
stations["station_term_time_s"] += (
233-
stations["idx_sta"]
234-
.map(station_term_time[station_term_time["phase_type"] == 1].set_index("idx_sta")["residual_time"])
235-
.fillna(0)
223+
stations["idx_sta"].map(station_term_time[station_term_time["phase_type"] == 1]["residual_time"]).fillna(0)
236224
)
237225

238226
plotting_ransac(stations, figure_path, config, picks, events_init, events, suffix=f"_ransac_sst_{iter}")

0 commit comments

Comments
 (0)