Skip to content

Commit 270f1fd

Browse files
committed
add v2 version
1 parent 15bd242 commit 270f1fd

File tree

13 files changed

+1036
-172
lines changed

13 files changed

+1036
-172
lines changed

ADLoc

CCTorch

PhaseNet

scripts/args.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,9 @@ def parse_args():
1919
parser.add_argument("--num_nodes", type=int, default=1, help="number of nodes")
2020
parser.add_argument("--node_rank", type=int, default=0, help="node rank")
2121

22+
## ADLOC
23+
parser.add_argument("--iter", type=int, default=0, help="iteration")
24+
2225
## CCTorch
2326
parser.add_argument("--dtct_pair", action="store_true", help="run convert_dtcc.py")
2427

scripts/cut_templates_cc.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -378,7 +378,9 @@ def cut_templates(root_path, region, config):
378378

379379
xmin, ymin = proj(config["minlongitude"], config["minlatitude"])
380380
xmax, ymax = proj(config["maxlongitude"], config["maxlatitude"])
381-
zmin, zmax = config["mindepth"], config["maxdepth"]
381+
# zmin, zmax = config["mindepth"], config["maxdepth"]
382+
zmin = config["mindepth"] if "mindepth" in config else 0.0
383+
zmax = config["maxdepth"] if "maxdepth" in config else 60.0
382384
config["xlim_km"] = (xmin, xmax)
383385
config["ylim_km"] = (ymin, ymax)
384386
config["zlim_km"] = (zmin, zmax)

scripts/download_station.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -176,6 +176,7 @@ def parse_inventory_csv(inventory, mseed_ids=[]):
176176
sensor_description = channel.sensor.description
177177
channel_list.append(
178178
{
179+
"station_id": f"{network.code}.{station.code}.{channel.location_code}.{channel.code[:-1]}",
179180
"network": network.code,
180181
"station": station.code,
181182
"location": channel.location_code,
@@ -247,7 +248,9 @@ def parse_inventory_csv(inventory, mseed_ids=[]):
247248
tmp["provider"] = provider
248249
stations.append(tmp)
249250
stations = pd.concat(stations)
250-
stations = stations.groupby(["network", "station", "location", "channel"], dropna=False).first().reset_index()
251+
# stations = stations.groupby(["network", "station", "location", "channel"], dropna=False).first().reset_index()
252+
stations = stations.sort_values(by=["station_id", "channel"])
253+
stations = stations.groupby(["station_id"], dropna=False).first().reset_index()
251254
print(f"Merged {len(stations)} channels")
252255
stations.to_csv(f"{root_path}/{result_dir}/stations.csv", index=False)
253256
if protocol != "file":

scripts/merge_gamma_picks.py

Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,94 @@
1+
# %%
2+
import json
3+
import multiprocessing as mp
4+
import os
5+
from concurrent.futures import ProcessPoolExecutor, ThreadPoolExecutor, as_completed
6+
from datetime import datetime, timedelta, timezone
7+
from threading import Lock, Thread
8+
9+
import fsspec
10+
import numpy as np
11+
import pandas as pd
12+
import pyproj
13+
from obspy import read_inventory
14+
from obspy.clients.fdsn import Client
15+
from sklearn.cluster import DBSCAN
16+
from tqdm import tqdm
17+
from args import parse_args
18+
from glob import glob
19+
20+
21+
def load_data(year, jday, data_path, root_path, bucket, protocol, token):
22+
23+
fs = fsspec.filesystem(protocol, token=token)
24+
adloc_events_csv = f"{data_path}/{year:04d}/adloc_events_{jday:03d}.csv"
25+
adloc_picks_csv = f"{data_path}/{year:04d}/adloc_picks_{jday:03d}.csv"
26+
if protocol == "file":
27+
events = pd.read_csv(f"{root_path}/{adloc_events_csv}", parse_dates=["time"])
28+
picks = pd.read_csv(f"{root_path}/{adloc_picks_csv}", parse_dates=["phase_time"])
29+
else:
30+
with fs.open(f"{bucket}/{adloc_events_csv}", "r") as fp:
31+
events = pd.read_csv(fp, parse_dates=["time"])
32+
with fs.open(f"{bucket}/{adloc_picks_csv}", "r") as fp:
33+
picks = pd.read_csv(fp, parse_dates=["phase_time"])
34+
35+
events["year"] = year
36+
events["jday"] = jday
37+
picks["year"] = year
38+
picks["jday"] = jday
39+
40+
return events, picks
41+
42+
43+
# %%
44+
if __name__ == "__main__":
45+
46+
args = parse_args()
47+
root_path = args.root_path
48+
region = args.region
49+
50+
data_path = f"{region}/gamma"
51+
result_path = f"{region}/gamma"
52+
53+
# %%
54+
# protocol = "gs"
55+
# token_json = f"{os.environ['HOME']}/.config/gcloud/application_default_credentials.json"
56+
# with open(token_json, "r") as fp:
57+
# token = json.load(fp)
58+
# fs = fsspec.filesystem(protocol, token=token)
59+
60+
# %%
61+
event_csvs = sorted(glob(f"{root_path}/{data_path}/????/????.???.events.csv"))
62+
63+
# %%
64+
events = []
65+
picks = []
66+
for event_csv in tqdm(event_csvs, desc="Load event csvs"):
67+
pick_csv = event_csv.replace("events.csv", "picks.csv")
68+
year, jday = event_csv.split("/")[-1].split(".")[:2]
69+
events_ = pd.read_csv(event_csv, dtype=str)
70+
picks_ = pd.read_csv(pick_csv, dtype=str)
71+
events_["year"] = year
72+
events_["jday"] = jday
73+
picks_["year"] = year
74+
picks_["jday"] = jday
75+
events.append(events_)
76+
picks.append(picks_)
77+
78+
events = pd.concat(events, ignore_index=True)
79+
picks = pd.concat(picks, ignore_index=True)
80+
81+
events["dummy_id"] = events["year"] + "." + events["jday"] + "." + events["event_index"]
82+
picks["dummy_id"] = picks["year"] + "." + picks["jday"] + "." + picks["event_index"]
83+
84+
events["event_index"] = np.arange(len(events))
85+
picks = picks.drop("event_index", axis=1)
86+
picks = picks.merge(events[["dummy_id", "event_index"]], on="dummy_id")
87+
88+
events.drop(["year", "jday", "dummy_id"], axis=1, inplace=True)
89+
picks.drop(["year", "jday", "dummy_id"], axis=1, inplace=True)
90+
91+
events.to_csv(f"{root_path}/{result_path}/gamma_events.csv", index=False)
92+
picks.to_csv(f"{root_path}/{result_path}/gamma_picks.csv", index=False)
93+
94+
# %%

scripts/merge_phasenet_picks.py

Lines changed: 136 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,136 @@
1+
# %%
2+
import json
3+
import multiprocessing as mp
4+
import os
5+
from concurrent.futures import ProcessPoolExecutor, ThreadPoolExecutor, as_completed
6+
from datetime import datetime, timedelta, timezone
7+
from threading import Lock, Thread
8+
9+
import fsspec
10+
import numpy as np
11+
import pandas as pd
12+
import pyproj
13+
from obspy import read_inventory
14+
from obspy.clients.fdsn import Client
15+
from sklearn.cluster import DBSCAN
16+
from tqdm import tqdm
17+
from args import parse_args
18+
from glob import glob
19+
20+
21+
def scan_csv(year, root_path, fs=None, bucket=None, protocol="file"):
22+
# %%
23+
csv_list = []
24+
if protocol != "file":
25+
jdays = fs.ls(f"{bucket}/{region}/{folder}/{year}")
26+
else:
27+
jdays = os.listdir(f"{root_path}/{region}/phasenet/picks/{year}/")
28+
29+
for jday in jdays:
30+
if protocol != "file":
31+
csvs = fs.glob(f"{jday}/??/*.csv")
32+
else:
33+
csvs = glob(f"{root_path}/{region}/phasenet/picks/{year}/{jday}/??/*.csv")
34+
35+
csv_list.extend([[year, jday, csv] for csv in csvs])
36+
37+
csvs = pd.DataFrame(csv_list, columns=["year", "jday", "csv"])
38+
csv_file = f"{root_path}/{region}/phasenet/csv_list_{year}.csv"
39+
csvs.to_csv(csv_file, index=False)
40+
41+
return csv_file
42+
43+
44+
# %%
45+
def read_csv(rows, region, year, jday, root_path, fs=None, bucket=None):
46+
47+
picks = []
48+
for i, row in rows.iterrows():
49+
# if fs.info(row["csv"])["size"] == 0:
50+
# continue
51+
# with fs.open(row["csv"], "r") as f:
52+
# picks_ = pd.read_csv(f, dtype=str)
53+
if os.path.getsize(row["csv"]) == 0:
54+
continue
55+
with open(row["csv"], "r") as f:
56+
picks_ = pd.read_csv(f, dtype=str)
57+
picks.append(picks_)
58+
59+
if len(picks) > 0:
60+
picks = pd.concat(picks, ignore_index=True)
61+
if not os.path.exists(f"{root_path}/{region}/phasenet/{year}"):
62+
os.makedirs(f"{root_path}/{region}/phasenet/{year}", exist_ok=True)
63+
picks.to_csv(f"{root_path}/{region}/phasenet/{year}/{year}.{jday}.csv", index=False)
64+
# fs.put(
65+
# f"{root_path}/{region}/phasenet/{year}/{jday}/{year}.{jday}.csv",
66+
# f"{bucket}/{region}/phasenet_merged/{year}/{year}.{jday}.csv",
67+
# )
68+
else:
69+
with open(f"{root_path}/{region}/phasenet/{year}/{year}.{jday}.csv", "w") as f:
70+
f.write("")
71+
72+
73+
# %%
74+
if __name__ == "__main__":
75+
76+
args = parse_args()
77+
root_path = args.root_path
78+
region = args.region
79+
80+
data_path = f"{region}/phasenet/picks"
81+
result_path = f"{region}/phasenet"
82+
83+
# %%
84+
# protocol = "gs"
85+
# token_json = f"{os.environ['HOME']}/.config/gcloud/application_default_credentials.json"
86+
# with open(token_json, "r") as fp:
87+
# token = json.load(fp)
88+
# fs = fsspec.filesystem(protocol, token=token)
89+
90+
# %%
91+
years = os.listdir(f"{root_path}/{region}/phasenet/picks")
92+
93+
for year in years:
94+
95+
csv_list = scan_csv(year, root_path)
96+
97+
# %%
98+
csv_list = pd.read_csv(csv_list, dtype=str)
99+
100+
# for jday, csvs in csv_list.groupby("jday"):
101+
# read_csv(csvs, region, year, jday, root_path)
102+
# raise
103+
104+
# ncpu = os.cpu_count()
105+
ncpu = 64
106+
print(f"Number of processors: {ncpu}")
107+
csv_by_jday = csv_list.groupby("jday")
108+
pbar = tqdm(total=len(csv_by_jday), desc=f"Loading csv files (year {year})")
109+
110+
# with mp.Pool(ncpu) as pool:
111+
ctx = mp.get_context("spawn")
112+
with ctx.Pool(ncpu) as pool:
113+
jobs = []
114+
for jday, csvs in csv_by_jday:
115+
job = pool.apply_async(
116+
read_csv, (csvs, region, year, jday, root_path), callback=lambda _: pbar.update()
117+
)
118+
jobs.append(job)
119+
pool.close()
120+
pool.join()
121+
for job in jobs:
122+
output = job.get()
123+
if output:
124+
print(output)
125+
126+
pbar.close()
127+
128+
# %%
129+
csvs = glob(f"{root_path}/{region}/phasenet/????/????.???.csv")
130+
picks = []
131+
for csv in tqdm(csvs, desc="Merge csv files"):
132+
picks.append(pd.read_csv(csv, dtype=str))
133+
picks = pd.concat(picks, ignore_index=True)
134+
picks.to_csv(f"{root_path}/{region}/phasenet/phasenet_picks.csv", index=False)
135+
136+
# %%

scripts/plot_catalog.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@
4848

4949

5050
# %%
51-
gamma_file = f"{root_path}/{region}/gamma/gamma_events_000_001.csv"
51+
gamma_file = f"{root_path}/{region}/gamma/gamma_events.csv"
5252
gamma_exist = False
5353
if os.path.exists(gamma_file):
5454
print(f"Reading {gamma_file}")

scripts/run_adloc.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -45,8 +45,8 @@ def run_adloc(
4545

4646
# %%
4747
data_path = f"{root_path}/{region}/gamma"
48-
picks_file = os.path.join(data_path, f"gamma_picks_{node_rank:03d}_{num_nodes:03d}.csv")
49-
events_file = os.path.join(data_path, f"gamma_events_{node_rank:03d}_{num_nodes:03d}.csv")
48+
picks_file = os.path.join(data_path, f"gamma_picks.csv")
49+
events_file = os.path.join(data_path, f"gamma_events.csv")
5050
# stations_file = os.path.join(data_path, "stations.csv")
5151
stations_file = f"{root_path}/{region}/obspy/stations.json"
5252

@@ -66,7 +66,7 @@ def run_adloc(
6666
stations.reset_index(drop=True, inplace=True)
6767

6868
config["mindepth"] = config["mindepth"] if "mindepth" in config else 0.0
69-
config["maxdepth"] = config["maxdepth"] if "maxdepth" in config else 30.0
69+
config["maxdepth"] = config["maxdepth"] if "maxdepth" in config else 60.0
7070
config["use_amplitude"] = True
7171

7272
# ## Eikonal for 1D velocity model
@@ -203,6 +203,7 @@ def run_adloc(
203203
for iter in range(MAX_SST_ITER):
204204
# picks, events = invert_location_iter(picks, stations, config, estimator, events_init=events_init, iter=iter)
205205
picks, events = invert_location(picks, stations, config, estimator, events_init=events_init, iter=iter)
206+
206207
station_term_amp = (
207208
picks[picks["mask"] == 1.0].groupby("idx_sta").agg({"residual_amplitude": "median"}).reset_index()
208209
)

0 commit comments

Comments
 (0)