Skip to content

Commit da9ee53

Browse files
committed
update phasenet plus
1 parent da0705a commit da9ee53

File tree

8 files changed

+488
-322
lines changed

8 files changed

+488
-322
lines changed

scripts/args.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,9 @@ def parse_args():
1919
parser.add_argument("--num_nodes", type=int, default=1, help="number of nodes")
2020
parser.add_argument("--node_rank", type=int, default=0, help="node rank")
2121

22+
## Model
23+
parser.add_argument("--model", type=str, default="phasenet", help="model")
24+
2225
## ADLOC
2326
parser.add_argument("--iter", type=int, default=0, help="iteration")
2427

scripts/merge_phasenet_picks.py

Lines changed: 22 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -18,31 +18,31 @@
1818
from glob import glob
1919

2020

21-
def scan_csv(year, root_path, fs=None, bucket=None, protocol="file"):
21+
def scan_csv(year, root_path, region, model, fs=None, bucket=None, protocol="file"):
2222
# %%
2323
csv_list = []
2424
if protocol != "file":
25-
jdays = fs.ls(f"{bucket}/{region}/{folder}/{year}")
25+
jdays = fs.ls(f"{bucket}/{region}/{model}/picks/{year}")
2626
else:
27-
jdays = os.listdir(f"{root_path}/{region}/phasenet/picks/{year}/")
27+
jdays = os.listdir(f"{root_path}/{region}/{model}/picks/{year}/")
2828

2929
for jday in jdays:
3030
if protocol != "file":
3131
csvs = fs.glob(f"{jday}/??/*.csv")
3232
else:
33-
csvs = glob(f"{root_path}/{region}/phasenet/picks/{year}/{jday}/??/*.csv")
33+
csvs = glob(f"{root_path}/{region}/{model}/picks/{year}/{jday}/??/*.csv")
3434

3535
csv_list.extend([[year, jday, csv] for csv in csvs])
3636

3737
csvs = pd.DataFrame(csv_list, columns=["year", "jday", "csv"])
38-
csv_file = f"{root_path}/{region}/phasenet/csv_list_{year}.csv"
38+
csv_file = f"{root_path}/{region}/{model}/csv_list_{year}.csv"
3939
csvs.to_csv(csv_file, index=False)
4040

4141
return csv_file
4242

4343

4444
# %%
45-
def read_csv(rows, region, year, jday, root_path, fs=None, bucket=None):
45+
def read_csv(rows, region, model, year, jday, root_path, fs=None, bucket=None):
4646

4747
picks = []
4848
for i, row in rows.iterrows():
@@ -58,15 +58,15 @@ def read_csv(rows, region, year, jday, root_path, fs=None, bucket=None):
5858

5959
if len(picks) > 0:
6060
picks = pd.concat(picks, ignore_index=True)
61-
if not os.path.exists(f"{root_path}/{region}/phasenet/{year}"):
62-
os.makedirs(f"{root_path}/{region}/phasenet/{year}", exist_ok=True)
63-
picks.to_csv(f"{root_path}/{region}/phasenet/{year}/{year}.{jday}.csv", index=False)
61+
if not os.path.exists(f"{root_path}/{region}/{model}/{year}"):
62+
os.makedirs(f"{root_path}/{region}/{model}/{year}", exist_ok=True)
63+
picks.to_csv(f"{root_path}/{region}/{model}/{year}/{year}.{jday}.csv", index=False)
6464
# fs.put(
6565
# f"{root_path}/{region}/phasenet/{year}/{jday}/{year}.{jday}.csv",
6666
# f"{bucket}/{region}/phasenet_merged/{year}/{year}.{jday}.csv",
6767
# )
6868
else:
69-
with open(f"{root_path}/{region}/phasenet/{year}/{year}.{jday}.csv", "w") as f:
69+
with open(f"{root_path}/{region}/{model}/{year}/{year}.{jday}.csv", "w") as f:
7070
f.write("")
7171

7272

@@ -76,9 +76,9 @@ def read_csv(rows, region, year, jday, root_path, fs=None, bucket=None):
7676
args = parse_args()
7777
root_path = args.root_path
7878
region = args.region
79+
model = args.model
7980

80-
data_path = f"{region}/phasenet/picks"
81-
result_path = f"{region}/phasenet"
81+
result_path = f"{region}/{model}"
8282

8383
# %%
8484
# protocol = "gs"
@@ -88,32 +88,33 @@ def read_csv(rows, region, year, jday, root_path, fs=None, bucket=None):
8888
# fs = fsspec.filesystem(protocol, token=token)
8989

9090
# %%
91-
years = os.listdir(f"{root_path}/{region}/phasenet/picks")
91+
# years = os.listdir(f"{root_path}/{region}/{model}/picks_{model}")
92+
years = glob(f"{root_path}/{region}/{model}/picks_{model}/????/")
93+
years = [year.rstrip("/").split("/")[-1] for year in years]
94+
print(f"Years: {years}")
9295

9396
for year in years:
9497

95-
csv_list = scan_csv(year, root_path)
98+
csv_list = scan_csv(year, root_path, region, model)
9699

97100
# %%
98101
csv_list = pd.read_csv(csv_list, dtype=str)
99102

100103
# for jday, csvs in csv_list.groupby("jday"):
101-
# read_csv(csvs, region, year, jday, root_path)
104+
# read_csv(csvs, region, model, year, jday, root_path)
102105
# raise
103106

104-
# ncpu = os.cpu_count()
105-
ncpu = 64
107+
ncpu = min(64, mp.cpu_count())
106108
print(f"Number of processors: {ncpu}")
107109
csv_by_jday = csv_list.groupby("jday")
108110
pbar = tqdm(total=len(csv_by_jday), desc=f"Loading csv files (year {year})")
109111

110-
# with mp.Pool(ncpu) as pool:
111112
ctx = mp.get_context("spawn")
112113
with ctx.Pool(ncpu) as pool:
113114
jobs = []
114115
for jday, csvs in csv_by_jday:
115116
job = pool.apply_async(
116-
read_csv, (csvs, region, year, jday, root_path), callback=lambda _: pbar.update()
117+
read_csv, (csvs, region, model, year, jday, root_path), callback=lambda _: pbar.update()
117118
)
118119
jobs.append(job)
119120
pool.close()
@@ -126,11 +127,11 @@ def read_csv(rows, region, year, jday, root_path, fs=None, bucket=None):
126127
pbar.close()
127128

128129
# %%
129-
csvs = glob(f"{root_path}/{region}/phasenet/????/????.???.csv")
130+
csvs = glob(f"{root_path}/{region}/{model}/????/????.???.csv")
130131
picks = []
131132
for csv in tqdm(csvs, desc="Merge csv files"):
132133
picks.append(pd.read_csv(csv, dtype=str))
133134
picks = pd.concat(picks, ignore_index=True)
134-
picks.to_csv(f"{root_path}/{region}/phasenet/phasenet_picks.csv", index=False)
135+
picks.to_csv(f"{root_path}/{region}/{model}/{model}_picks.csv", index=False)
135136

136137
# %%
Lines changed: 134 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,134 @@
1+
# %%
2+
import json
3+
import multiprocessing as mp
4+
import os
5+
6+
import fsspec
7+
import numpy as np
8+
import pandas as pd
9+
from tqdm import tqdm
10+
from args import parse_args
11+
from glob import glob
12+
13+
14+
def scan_csv(year, root_path, region, model, data="picks", fs=None, bucket=None, protocol="file"):
15+
# %%
16+
csv_list = []
17+
if protocol != "file":
18+
jdays = fs.ls(f"{bucket}/{region}/{model}/{data}_{model}/{year}")
19+
else:
20+
jdays = os.listdir(f"{root_path}/{region}/{model}/{data}_{model}/{year}/")
21+
22+
for jday in jdays:
23+
if protocol != "file":
24+
csvs = fs.glob(f"{jday}/??/*.csv")
25+
else:
26+
csvs = glob(f"{root_path}/{region}/{model}/{data}_{model}/{year}/{jday}/??/*.csv")
27+
28+
csv_list.extend([[year, jday, csv] for csv in csvs])
29+
30+
csvs = pd.DataFrame(csv_list, columns=["year", "jday", "csv"])
31+
csv_file = f"{root_path}/{region}/{model}/{data}_list_{year}.csv"
32+
csvs.to_csv(csv_file, index=False)
33+
34+
return csv_file
35+
36+
37+
# %%
38+
def read_csv(rows, region, model, data, year, jday, root_path, fs=None, bucket=None):
39+
40+
picks = []
41+
for i, row in rows.iterrows():
42+
# if fs.info(row["csv"])["size"] == 0:
43+
# continue
44+
# with fs.open(row["csv"], "r") as f:
45+
# picks_ = pd.read_csv(f, dtype=str)
46+
if os.path.getsize(row["csv"]) == 0:
47+
continue
48+
with open(row["csv"], "r") as f:
49+
picks_ = pd.read_csv(f, dtype=str)
50+
picks.append(picks_)
51+
52+
if len(picks) > 0:
53+
picks = pd.concat(picks, ignore_index=True)
54+
if not os.path.exists(f"{root_path}/{region}/{model}/{year}"):
55+
os.makedirs(f"{root_path}/{region}/{model}/{year}", exist_ok=True)
56+
picks.to_csv(f"{root_path}/{region}/{model}/{year}/{year}.{jday}.{data}.csv", index=False)
57+
# fs.put(
58+
# f"{root_path}/{region}/phasenet/{year}/{jday}/{year}.{jday}.csv",
59+
# f"{bucket}/{region}/phasenet_merged/{year}/{year}.{jday}.csv",
60+
# )
61+
else:
62+
with open(f"{root_path}/{region}/{model}/{year}/{year}.{jday}.{data}.csv", "w") as f:
63+
f.write("")
64+
65+
66+
# %%
67+
if __name__ == "__main__":
68+
69+
args = parse_args()
70+
root_path = args.root_path
71+
region = args.region
72+
# model = args.model
73+
model = "phasenet_plus"
74+
75+
result_path = f"{region}/{model}"
76+
77+
# %%
78+
# protocol = "gs"
79+
# token_json = f"{os.environ['HOME']}/.config/gcloud/application_default_credentials.json"
80+
# with open(token_json, "r") as fp:
81+
# token = json.load(fp)
82+
# fs = fsspec.filesystem(protocol, token=token)
83+
84+
# %%
85+
# years = os.listdir(f"{root_path}/{region}/{model}/picks_{model}")
86+
years = glob(f"{root_path}/{region}/{model}/picks_{model}/????/")
87+
years = [year.rstrip("/").split("/")[-1] for year in years]
88+
print(f"Years: {years}")
89+
90+
for year in years:
91+
92+
for data in ["picks", "events"]:
93+
94+
csv_list = scan_csv(year, root_path, region, model, data)
95+
96+
# %%
97+
csv_list = pd.read_csv(csv_list, dtype=str)
98+
99+
# for jday, csvs in csv_list.groupby("jday"):
100+
# read_csv(csvs, region, model, data, year, jday, root_path)
101+
# raise
102+
103+
ncpu = min(64, mp.cpu_count())
104+
print(f"Number of processors: {ncpu}")
105+
csv_by_jday = csv_list.groupby("jday")
106+
pbar = tqdm(total=len(csv_by_jday), desc=f"Loading {data} csv files (year {year})")
107+
108+
ctx = mp.get_context("spawn")
109+
with ctx.Pool(ncpu) as pool:
110+
jobs = []
111+
for jday, csvs in csv_by_jday:
112+
job = pool.apply_async(
113+
read_csv, (csvs, region, model, data, year, jday, root_path), callback=lambda _: pbar.update()
114+
)
115+
jobs.append(job)
116+
pool.close()
117+
pool.join()
118+
for job in jobs:
119+
output = job.get()
120+
if output:
121+
print(output)
122+
123+
pbar.close()
124+
125+
# %%
126+
for data in ["picks", "events"]:
127+
csvs = glob(f"{root_path}/{region}/{model}/????/????.???.{data}.csv")
128+
picks = []
129+
for csv in tqdm(csvs, desc=f"Merge {data} csv files"):
130+
picks.append(pd.read_csv(csv, dtype=str))
131+
picks = pd.concat(picks, ignore_index=True)
132+
picks.to_csv(f"{root_path}/{region}/{model}/{model}_{data}.csv", index=False)
133+
134+
# %%

scripts/run_adloc.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,9 @@ def run_adloc(
4646
data_path = f"{root_path}/{region}/gamma"
4747
picks_file = os.path.join(data_path, f"gamma_picks.csv")
4848
events_file = os.path.join(data_path, f"gamma_events.csv")
49+
# picks_file = f"{root_path}/{region}/phasenet_plus/phasenet_plus_picks_associated.csv"
50+
# events_file = f"{root_path}/{region}/phasenet_plus/phasenet_plus_events_associated.csv"
51+
4952
# stations_file = os.path.join(data_path, "stations.csv")
5053
stations_file = f"{root_path}/{region}/obspy/stations.json"
5154

@@ -93,7 +96,7 @@ def run_adloc(
9396
events[["x_km", "y_km"]] = events.apply(
9497
lambda x: pd.Series(proj(longitude=x.longitude, latitude=x.latitude)), axis=1
9598
)
96-
events["z_km"] = events["depth_km"]
99+
events["z_km"] = events["depth_km"] if "depth_km" in events.columns else 10.0
97100

98101
## set up the config; you can also specify the region manually
99102
if ("xlim_km" not in config) or ("ylim_km" not in config) or ("zlim_km" not in config):

0 commit comments

Comments
 (0)