Skip to content

Commit 7481c7b

Browse files
committed
fix conflicts between an and tm
1 parent 4d7d98c commit 7481c7b

File tree

9 files changed

+416
-116
lines changed

9 files changed

+416
-116
lines changed

scripts/download_waveform.py

Lines changed: 41 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -1,46 +1,45 @@
1-
from typing import Dict, List
1+
import json
2+
import os
3+
import sys
4+
from datetime import datetime
5+
from glob import glob
6+
from typing import Dict
7+
8+
import fsspec
9+
import numpy as np
10+
import obspy
11+
import obspy.clients.fdsn
12+
import pandas as pd
13+
from args import parse_args
14+
from obspy.clients.fdsn.mass_downloader import (
15+
CircularDomain,
16+
GlobalDomain,
17+
MassDownloader,
18+
RectangularDomain,
19+
Restrictions,
20+
)
221

3-
from kfp import dsl
422

5-
6-
@dsl.component(base_image="zhuwq0/quakeflow:latest")
723
def download_waveform(
824
root_path: str,
925
region: str,
1026
config: Dict,
11-
rank: int = 0,
27+
node_rank: int = 0,
28+
num_nodes: int = 1,
1229
protocol: str = "file",
1330
bucket: str = "",
1431
token: Dict = None,
15-
) -> List:
16-
import os
17-
from datetime import datetime
18-
from glob import glob
19-
20-
import fsspec
21-
import numpy as np
22-
import obspy
23-
import obspy.clients.fdsn
24-
import pandas as pd
25-
from obspy.clients.fdsn.mass_downloader import (
26-
CircularDomain,
27-
GlobalDomain,
28-
MassDownloader,
29-
RectangularDomain,
30-
Restrictions,
31-
)
32+
):
3233

3334
# %%
3435
fs = fsspec.filesystem(protocol, token=token)
3536

3637
# %%
37-
num_nodes = config["kubeflow"]["num_nodes"] if "kubeflow" in config else 1
38-
3938
waveform_dir = f"{region}/waveforms"
4039
if not os.path.exists(f"{root_path}/{waveform_dir}"):
4140
os.makedirs(f"{root_path}/{waveform_dir}")
4241

43-
DELTATIME = "1H" # "1D"
42+
DELTATIME = "1D" # "1D"
4443
if DELTATIME == "1H":
4544
start = datetime.fromisoformat(config["starttime"]).strftime("%Y-%m-%dT%H")
4645
DELTATIME_SEC = 3600
@@ -49,7 +48,7 @@ def download_waveform(
4948
DELTATIME_SEC = 3600 * 24
5049
starttimes = pd.date_range(start, config["endtime"], freq=DELTATIME, tz="UTC", inclusive="left")
5150
# starttimes = starttimes[rank::num_nodes]
52-
starttimes = np.array_split(starttimes, num_nodes)[rank]
51+
starttimes = np.array_split(starttimes, num_nodes)[node_rank]
5352
if len(starttimes) == 0:
5453
return
5554

@@ -93,9 +92,11 @@ def download_waveform(
9392
print(f"{restrictions = }")
9493

9594
def get_mseed_storage(network, station, location, channel, starttime, endtime):
96-
mseed_name = (
97-
f"{starttime.strftime('%Y-%j')}/{starttime.strftime('%H')}/{network}.{station}.{location}.{channel}.mseed"
98-
)
95+
if DELTATIME == "1H":
96+
mseed_name = f"{starttime.strftime('%Y-%j')}/{starttime.strftime('%H')}/{network}.{station}.{location}.{channel}.mseed"
97+
elif DELTATIME == "1D":
98+
mseed_name = f"{starttime.strftime('%Y/%j')}/{network}.{station}.{location}.{channel}.mseed"
99+
mseed_name = f"{starttime.strftime('%Y/%j')}/{network}.{station}.{location}.{channel}.mseed"
99100
if os.path.exists(f"{root_path}/{waveform_dir}/{mseed_name}"):
100101
print(f"{root_path}/{waveform_dir}/{mseed_name} already exists. Skip.")
101102
return True
@@ -123,30 +124,24 @@ def get_mseed_storage(network, station, location, channel, starttime, endtime):
123124
fs.put(f"{root_path}/{waveform_dir}/", f"{bucket}/{waveform_dir}/", recursive=True)
124125
# fs.put(f"{root_path}/{waveform_dir}/stations/", f"{bucket}/{waveform_dir}/stations/", recursive=True)
125126

126-
mseed_list = glob(f"{root_path}/{waveform_dir}/**/*.mseed", recursive=True)
127-
with open(f"{root_path}/{region}/data/mseed_list_{rank:03d}.csv", "w") as fp:
128-
fp.write("\n".join(mseed_list))
129-
130-
return f"{region}/data/mseed_list_{rank:03d}.csv"
127+
return
131128

132129

133130
if __name__ == "__main__":
134-
import json
135-
import os
136-
import sys
137-
138-
root_path = "local"
139-
region = "demo"
140-
rank = int(os.environ["RANK"]) if "RANK" in os.environ else 0
141-
world_size = int(os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1
142-
if len(sys.argv) > 1:
143-
root_path = sys.argv[1]
144-
region = sys.argv[2]
131+
132+
args = parse_args()
133+
root_path = args.root_path
134+
region = args.region
135+
protocol = args.protocol
136+
bucket = args.bucket
137+
token = args.token
138+
node_rank = args.node_rank
139+
num_nodes = args.num_nodes
145140

146141
with open(f"{root_path}/{region}/config.json", "r") as fp:
147142
config = json.load(fp)
148143

149-
download_waveform.execute(root_path, region=region, config=config, rank=rank)
144+
download_waveform(root_path, region=region, config=config, node_rank=node_rank, num_nodes=num_nodes)
150145

151146
# # %%
152147
# bucket = "quakeflow_share"

scripts/download_waveform_v2.py

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -182,7 +182,8 @@ def download_waveform(
182182
root_path: str,
183183
region: str,
184184
config: Dict,
185-
rank: int = 0,
185+
node_rank: int = 0,
186+
num_nodes: int = 1,
186187
protocol: str = "file",
187188
bucket: str = "",
188189
token: Dict = None,
@@ -192,8 +193,7 @@ def download_waveform(
192193
fs = fsspec.filesystem(protocol=protocol, token=token)
193194
# %%
194195
network_dir = f"{region}/results/network"
195-
num_nodes = config["num_nodes"] if "num_nodes" in config else 1
196-
print(f"{num_nodes = }, {rank = }")
196+
print(f"{num_nodes = }, {node_rank = }")
197197
waveform_dir = f"{region}/waveforms"
198198
if not os.path.exists(f"{root_path}/{waveform_dir}"):
199199
os.makedirs(f"{root_path}/{waveform_dir}")
@@ -219,8 +219,8 @@ def download_waveform(
219219
else:
220220
raise ValueError("Invalid interval")
221221
starttimes = pd.date_range(start, config["endtime"], freq=DELTATIME, tz="UTC", inclusive="left").to_list()
222-
starttimes = np.array_split(starttimes, num_nodes)[rank]
223-
print(f"rank {rank}: {len(starttimes) = }, {starttimes[0]}, {starttimes[-1]}")
222+
starttimes = np.array_split(starttimes, num_nodes)[node_rank]
223+
print(f"rank {node_rank}: {len(starttimes) = }, {starttimes[0]}, {starttimes[-1]}")
224224

225225
if provider.lower() in ["scedc", "ncedc"]:
226226
cloud_config = {
@@ -299,9 +299,11 @@ def download_waveform(
299299
protocol = args.protocol
300300
bucket = args.bucket
301301
token = args.token
302+
node_rank = args.node_rank
303+
num_nodes = args.num_nodes
302304

303305
with open(f"{root_path}/{region}/config.json", "r") as fp:
304306
config = json.load(fp)
305307

306-
download_waveform(root_path=root_path, region=region, config=config)
308+
download_waveform(root_path=root_path, region=region, config=config, node_rank=node_rank, num_nodes=num_nodes)
307309
# %%

scripts/plot_catalog.py

Lines changed: 87 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@
5858

5959

6060
# %%
61-
adloc_file = f"{root_path}/{region}/adloc/ransac_events.csv"
61+
adloc_file = f"{root_path}/{region}/adloc/adloc_events.csv"
6262
adloc_exist = False
6363
if os.path.exists(adloc_file):
6464
print(f"Reading {adloc_file}")
@@ -67,6 +67,14 @@
6767
# adloc_catalog["magnitude"] = 0.0
6868
# gamma_catalog["depth_km"] = gamma_catalog["depth(m)"] / 1e3
6969

70+
# %%
71+
qtm_file = f"{root_path}/{region}/qtm/qtm_events.csv"
72+
qtm_exist = False
73+
if os.path.exists(qtm_file):
74+
print(f"Reading {qtm_file}")
75+
qtm_exist = True
76+
qtm_catalog = pd.read_csv(qtm_file, parse_dates=["time"])
77+
7078
# %%
7179
adloc_dt_file = f"{root_path}/{region}/adloc_dd/adloc_dt_events.csv"
7280
adloc_dt_exist = False
@@ -335,9 +343,9 @@
335343

336344
# %%
337345
size_factor = 2600
338-
fig, ax = plt.subplots(4, 2, squeeze=False, figsize=(10, 15), sharex=True, sharey=True)
346+
fig, ax = plt.subplots(4, 3, squeeze=False, figsize=(15, 15), sharex=True, sharey=True)
339347
for i in range(4):
340-
for j in range(2):
348+
for j in range(3):
341349
ax[i, j].set_xlim(xlim)
342350
ax[i, j].set_ylim(ylim)
343351
ax[i, j].set_aspect((ylim[1] - ylim[0]) / ((xlim[1] - xlim[0]) * np.cos(np.mean(ylim) * np.pi / 180)))
@@ -375,16 +383,27 @@
375383
# ylim = ax[0, 1].get_ylim()
376384

377385
if adloc_exist and (len(adloc_catalog) > 0):
378-
ax[0, 1].scatter(
386+
ax[0, 2].scatter(
379387
adloc_catalog["longitude"],
380388
adloc_catalog["latitude"],
381389
s=min(2, size_factor / len(adloc_catalog)),
382390
alpha=1.0,
383391
linewidth=0,
384392
label=f"AdLoc: {len(adloc_catalog)}",
385393
)
386-
# ax[0, 1].legend()
387-
ax[0, 1].set_title(f"AdLoc: {len(adloc_catalog)}")
394+
# ax[0, 2].legend()
395+
ax[0, 2].set_title(f"AdLoc: {len(adloc_catalog)}")
396+
397+
if qtm_exist and (len(qtm_catalog) > 0):
398+
ax[1, 2].scatter(
399+
qtm_catalog["longitude"],
400+
qtm_catalog["latitude"],
401+
s=min(2, size_factor / len(qtm_catalog)),
402+
alpha=1.0,
403+
linewidth=0,
404+
label=f"QTM: {len(qtm_catalog)}",
405+
)
406+
ax[1, 2].set_title(f"QTM: {len(qtm_catalog)}")
388407

389408
if adloc_dt_exist and (len(adloc_dt_catalog) > 0):
390409
ax[1, 0].scatter(
@@ -459,11 +478,11 @@
459478
plt.show()
460479

461480
# %%
462-
fig, ax = plt.subplots(4, 2, squeeze=False, figsize=(15, 30), sharex=True, sharey=True)
481+
fig, ax = plt.subplots(4, 3, squeeze=False, figsize=(20, 30), sharex=True, sharey=True)
463482
cmin = 0
464483
cmax = 10
465484
for i in range(4):
466-
for j in range(2):
485+
for j in range(3):
467486
ax[i, j].grid()
468487

469488
if routine_exist and (len(routine_catalog) > 0):
@@ -509,7 +528,7 @@
509528
ylim = None
510529

511530
if adloc_exist and (len(adloc_catalog) > 0):
512-
ax[0, 1].scatter(
531+
ax[0, 2].scatter(
513532
adloc_catalog["longitude"],
514533
adloc_catalog["depth_km"],
515534
c=adloc_catalog["depth_km"],
@@ -521,10 +540,27 @@
521540
cmap="viridis_r",
522541
label=f"AdLoc: {len(adloc_catalog)}",
523542
)
524-
# ax[0, 1].legend()
525-
ax[0, 1].set_title(f"AdLoc: {len(adloc_catalog)}")
526-
ax[0, 1].set_xlim(xlim)
527-
ax[0, 1].set_ylim(ylim)
543+
# ax[0, 2].legend()
544+
ax[0, 2].set_title(f"AdLoc: {len(adloc_catalog)}")
545+
ax[0, 2].set_xlim(xlim)
546+
ax[0, 2].set_ylim(ylim)
547+
548+
if qtm_exist and (len(qtm_catalog) > 0):
549+
ax[1, 2].scatter(
550+
qtm_catalog["longitude"],
551+
qtm_catalog["depth_km"],
552+
c=qtm_catalog["depth_km"],
553+
s=8000 / len(qtm_catalog),
554+
alpha=1.0,
555+
linewidth=0,
556+
vmin=cmin,
557+
vmax=cmax,
558+
cmap="viridis_r",
559+
)
560+
# ax[1, 2].legend()
561+
ax[1, 2].set_title(f"QTM: {len(qtm_catalog)}")
562+
ax[1, 2].set_xlim(xlim)
563+
ax[1, 2].set_ylim(ylim)
528564

529565
if adloc_dt_exist and (len(adloc_dt_catalog) > 0):
530566
ax[1, 0].scatter(
@@ -630,11 +666,11 @@
630666

631667

632668
# %%
633-
fig, ax = plt.subplots(4, 2, squeeze=False, figsize=(15, 30), sharex=True, sharey=True)
669+
fig, ax = plt.subplots(4, 3, squeeze=False, figsize=(20, 30), sharex=True, sharey=True)
634670
cmin = 0
635671
cmax = 10
636672
for i in range(4):
637-
for j in range(2):
673+
for j in range(3):
638674
ax[i, j].grid()
639675

640676
if routine_exist and (len(routine_catalog) > 0):
@@ -680,7 +716,7 @@
680716
ylim = None
681717

682718
if adloc_exist and (len(adloc_catalog) > 0):
683-
ax[0, 1].scatter(
719+
ax[0, 2].scatter(
684720
adloc_catalog["latitude"],
685721
adloc_catalog["depth_km"],
686722
c=adloc_catalog["depth_km"],
@@ -692,10 +728,26 @@
692728
cmap="viridis_r",
693729
label=f"AdLoc: {len(adloc_catalog)}",
694730
)
695-
# ax[0, 1].legend()
696-
ax[0, 1].set_title(f"AdLoc: {len(adloc_catalog)}")
697-
ax[0, 1].set_xlim(xlim)
698-
ax[0, 1].set_ylim(ylim)
731+
# ax[0, 2].legend()
732+
ax[0, 2].set_title(f"AdLoc: {len(adloc_catalog)}")
733+
ax[0, 2].set_xlim(xlim)
734+
ax[0, 2].set_ylim(ylim)
735+
736+
if qtm_exist and (len(qtm_catalog) > 0):
737+
ax[1, 2].scatter(
738+
qtm_catalog["latitude"],
739+
qtm_catalog["depth_km"],
740+
c=qtm_catalog["depth_km"],
741+
s=8000 / len(qtm_catalog),
742+
alpha=1.0,
743+
linewidth=0,
744+
vmin=cmin,
745+
vmax=cmax,
746+
cmap="viridis_r",
747+
)
748+
ax[1, 2].set_title(f"QTM: {len(qtm_catalog)}")
749+
ax[1, 2].set_xlim(xlim)
750+
ax[1, 2].set_ylim(ylim)
699751

700752
if adloc_dt_exist and (len(adloc_dt_catalog) > 0):
701753
ax[1, 0].scatter(
@@ -815,6 +867,9 @@
815867
if adloc_exist:
816868
ax[0, 0].plot(adloc_catalog["time"], adloc_catalog["magnitude"], "o", markersize=2, alpha=0.5, label="AdLoc")
817869
ax[1, 0].plot(adloc_catalog["time"], adloc_catalog["magnitude"], "o", markersize=2, alpha=0.5, label="AdLoc")
870+
# if qtm_exist:
871+
# ax[0, 0].plot(qtm_catalog["time"], qtm_catalog["magnitude"], "o", markersize=2, alpha=0.5, label="QTM")
872+
# ax[1, 0].plot(qtm_catalog["time"], qtm_catalog["magnitude"], "o", markersize=2, alpha=0.5, label="QTM")
818873
if adloc_dt_exist:
819874
ax[0, 0].plot(
820875
adloc_dt_catalog["time"], adloc_dt_catalog["magnitude"], "o", markersize=2, alpha=0.5, label="AdLoc (CT)"
@@ -862,6 +917,9 @@
862917
if adloc_exist:
863918
ax[0, 0].hist(adloc_catalog["magnitude"], bins=bins, alpha=0.5, label="AdLoc")
864919
ax[1, 0].hist(adloc_catalog["magnitude"], bins=bins, alpha=0.5, label="AdLoc")
920+
# if qtm_exist:
921+
# ax[0, 0].hist(qtm_catalog["magnitude"], bins=bins, alpha=0.5, label="QTM")
922+
# ax[1, 0].hist(qtm_catalog["magnitude"], bins=bins, alpha=0.5, label="QTM")
865923
if adloc_dt_exist:
866924
ax[0, 0].hist(adloc_dt_catalog["magnitude"], bins=bins, alpha=0.5, label="AdLoc (CT)")
867925
if adloc_dtcc_exist:
@@ -954,6 +1012,15 @@ def plot3d(x, y, z, config, fig_name):
9541012
f"{root_path}/{figure_path}/earthquake_location_adloc.html",
9551013
)
9561014

1015+
if qtm_exist and len(qtm_catalog) > 0:
1016+
plot3d(
1017+
qtm_catalog["longitude"],
1018+
qtm_catalog["latitude"],
1019+
qtm_catalog["depth_km"],
1020+
config_plot3d,
1021+
f"{root_path}/{figure_path}/earthquake_location_qtm.html",
1022+
)
1023+
9571024
if adloc_dt_exist and len(adloc_dt_catalog) > 0:
9581025
plot3d(
9591026
adloc_dt_catalog["longitude"],

0 commit comments

Comments
 (0)