Skip to content

Commit a527fdd

Browse files
committed
fix by hour to by day
1 parent 71e70f6 commit a527fdd

File tree

6 files changed

+165
-80
lines changed

6 files changed

+165
-80
lines changed

scripts/cut_templates_cc.py

Lines changed: 55 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -415,6 +415,8 @@ def cut_templates(root_path, region, config):
415415
picks = picks[picks["adloc_mask"] == 1]
416416
picks["phase_time"] = pd.to_datetime(picks["phase_time"], utc=True)
417417
min_phase_score = picks["phase_score"].min()
418+
print(f"Number of picks: {len(picks)}")
419+
print(picks.iloc[:5])
418420

419421
picks = picks.merge(events[["event_index", "event_timestamp"]], on="event_index")
420422
# picks = picks.merge(stations[["station_id", "station_term_time"]], on="station_id")
@@ -428,6 +430,11 @@ def cut_templates(root_path, region, config):
428430
picks["phase_timestamp"] - picks["event_timestamp"] - picks["station_term_time"]
429431
) ## Separate P and S station term
430432

433+
# Keep only the pick with highest phase_score for each event-station-phase combination
434+
picks = picks.sort_values("phase_score", ascending=False).drop_duplicates(
435+
subset=["event_index", "station_id", "phase_type"], keep="first"
436+
)
437+
431438
picks = fillin_missing_picks(
432439
picks,
433440
events,
@@ -529,15 +536,60 @@ def cut_templates(root_path, region, config):
529536
key = "/".join(mseed.replace(".mseed", "").split("/")[-subdir - 1 :])
530537
key = key[:-1] ## remove the channel suffix
531538
mseed_3c[key].append(mseed)
532-
print(f"Number of mseed files: {len(mseed_3c)}")
533539

534540
def parse_key(key):
535541
year, jday, name = key.split("/")
536542
network, station, location, instrument = name.split(".")
537543
return [year, jday, network, station, location, instrument]
538544

539545
mseeds = [parse_key(k) + [",".join(sorted(mseed_3c[k]))] for k in mseed_3c]
540-
mseeds = pd.DataFrame(mseeds, columns=["year", "jday", "network", "station", "location", "instrument", "ENZ"])
546+
mseeds_df = pd.DataFrame(mseeds, columns=["year", "jday", "network", "station", "location", "instrument", "ENZ"])
547+
548+
# protocol = "gs"
549+
# bucket = "quakeflow_catalog"
550+
# folder = "SC"
551+
# token_json = "application_default_credentials.json"
552+
# with open(token_json, "r") as fp:
553+
# token = json.load(fp)
554+
# fs = fsspec.filesystem(protocol=protocol, token=token)
555+
# year = 2019
556+
# mseeds_df = []
557+
# # for folder in ["SC", "NC"]:
558+
# for folder in ["SC"]:
559+
# with fs.open(f"{bucket}/{folder}/mseed_list/{year}_3c.txt", "r") as f:
560+
# mseeds = f.readlines()
561+
# mseeds = [x.strip("\n") for x in mseeds]
562+
# mseeds = pd.DataFrame(mseeds, columns=["ENZ"])
563+
# if folder == "SC":
564+
# mseeds["fname"] = mseeds["ENZ"].apply(lambda x: x.split("/")[-1])
565+
# mseeds["network"] = mseeds["fname"].apply(lambda x: x[:2])
566+
# mseeds["station"] = mseeds["fname"].apply(lambda x: x[2:7].strip("_"))
567+
# mseeds["instrument"] = mseeds["fname"].apply(lambda x: x[7:9])
568+
# mseeds["location"] = mseeds["fname"].apply(lambda x: x[10:12].strip("_"))
569+
# mseeds["year"] = mseeds["fname"].apply(lambda x: x[13:17])
570+
# mseeds["jday"] = mseeds["fname"].apply(lambda x: x[17:20])
571+
# if folder == "NC":
572+
# mseeds["fname"] = mseeds["ENZ"].apply(lambda x: x.split("/")[-1])
573+
# mseeds["network"] = mseeds["fname"].apply(lambda x: x.split(".")[1])
574+
# mseeds["station"] = mseeds["fname"].apply(lambda x: x.split(".")[0])
575+
# mseeds["instrument"] = mseeds["fname"].apply(lambda x: x.split(".")[2][:-1])
576+
# mseeds["location"] = mseeds["fname"].apply(lambda x: x.split(".")[3])
577+
# mseeds["year"] = mseeds["fname"].apply(lambda x: x.split(".")[5])
578+
# mseeds["jday"] = mseeds["fname"].apply(lambda x: x.split(".")[6])
579+
# mseeds_df.append(mseeds)
580+
# mseeds_df = pd.concat(mseeds_df)
581+
# mseeds_df.drop(columns=["fname"], inplace=True, errors="ignore")
582+
# mseeds_df = mseeds_df[["year", "jday", "network", "station", "location", "instrument", "ENZ"]]
583+
# mseeds_df = mseeds_df.merge(
584+
# picks[["network", "station", "location", "instrument", "year", "jday"]],
585+
# on=["network", "station", "location", "instrument", "year", "jday"],
586+
# )
587+
# mseeds_df = mseeds_df.drop_duplicates(subset=["ENZ"], keep="first")
588+
# mseeds_df.sort_values(by=["year", "jday", "network", "station", "location", "instrument", "ENZ"], inplace=True)
589+
# mseeds_df.to_csv(f"debug_mseed_remote.csv", index=False)
590+
591+
print(f"Number of mseeds: {len(mseeds_df)}")
592+
print(mseeds_df.iloc[:5])
541593

542594
## Match picks with mseed files
543595
picks["network"] = picks["station_id"].apply(lambda x: x.split(".")[0])
@@ -546,7 +598,7 @@ def parse_key(key):
546598
picks["instrument"] = picks["station_id"].apply(lambda x: x.split(".")[3])
547599
picks["year"] = picks["phase_time"].dt.strftime("%Y")
548600
picks["jday"] = picks["phase_time"].dt.strftime("%j")
549-
picks = picks.merge(mseeds, on=["network", "station", "location", "instrument", "year", "jday"])
601+
picks = picks.merge(mseeds_df, on=["network", "station", "location", "instrument", "year", "jday"])
550602
picks.drop(columns=["station_id", "network", "location", "instrument", "year", "jday"], inplace=True)
551603

552604
picks_group = picks.copy()

scripts/merge_phasenet_plus_picks.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,8 +23,8 @@ def scan_csv(year, root_path, region, model, data="picks", fs=None, bucket=None,
2323
if protocol != "file":
2424
csvs = fs.glob(f"{jday}/??/*.csv")
2525
else:
26-
csvs = glob(f"{root_path}/{region}/{model}/{data}_{model}/{year}/{jday}/??/*.csv")
27-
# csvs = glob(f"{root_path}/{region}/{model}/{data}_{model}/{year}/{jday}/*.csv")
26+
# csvs = glob(f"{root_path}/{region}/{model}/{data}_{model}/{year}/{jday}/??/*.csv")
27+
csvs = glob(f"{root_path}/{region}/{model}/{data}_{model}/{year}/{jday}/*.csv")
2828

2929
csv_list.extend([[year, jday, csv] for csv in csvs])
3030

scripts/plot_catalog.py

Lines changed: 97 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,12 @@
3636
print(json.dumps(config, indent=4, sort_keys=True))
3737
xlim = [config["minlongitude"], config["maxlongitude"]]
3838
ylim = [config["minlatitude"], config["maxlatitude"]]
39+
if "mindepth" not in config:
40+
config["mindepth"] = 0
41+
if "maxdepth" not in config:
42+
config["maxdepth"] = 60
43+
zlim = [config["mindepth"], config["maxdepth"]]
44+
3945

4046
# %%
4147
# %%
@@ -485,26 +491,30 @@
485491
for j in range(3):
486492
ax[i, j].grid()
487493

488-
if routine_exist and (len(routine_catalog) > 0):
489-
ax[0, 0].scatter(
490-
routine_catalog["longitude"],
491-
routine_catalog["depth_km"],
492-
c=routine_catalog["depth_km"],
493-
s=8000 / len(routine_catalog),
494+
xlim = None
495+
ylim = None
496+
if adloc_exist and (len(adloc_catalog) > 0):
497+
ax[0, 2].scatter(
498+
adloc_catalog["longitude"],
499+
adloc_catalog["depth_km"],
500+
c=adloc_catalog["depth_km"],
501+
s=8000 / len(adloc_catalog),
494502
alpha=1.0,
495503
linewidth=0,
496504
vmin=cmin,
497505
vmax=cmax,
498506
cmap="viridis_r",
499-
label=f"Routine: {len(routine_catalog)}",
507+
label=f"AdLoc: {len(adloc_catalog)}",
500508
)
501-
ax[0, 0].set_title(f"Routine: {len(routine_catalog)}")
502-
# ax[0, 0].invert_yaxis()
503-
xlim = ax[0, 0].get_xlim()
504-
ylim = ax[0, 0].get_ylim()
505-
else:
506-
xlim = None
507-
ylim = None
509+
# ax[0, 2].legend()
510+
ax[0, 2].set_title(f"AdLoc: {len(adloc_catalog)}")
511+
if (xlim is None) and (ylim is None):
512+
ax[0, 2].invert_yaxis()
513+
xlim = ax[0, 2].get_xlim()
514+
ylim = ax[0, 2].get_ylim()
515+
else:
516+
ax[0, 2].set_xlim(xlim)
517+
ax[0, 2].set_ylim(ylim)
508518

509519
if gamma_exist and (len(gamma_catalog) > 0):
510520
ax[0, 1].scatter(
@@ -520,30 +530,35 @@
520530
label=f"GaMMA: {len(gamma_catalog)}",
521531
)
522532
ax[0, 1].set_title(f"GaMMA: {len(gamma_catalog)}")
523-
ax[0, 1].invert_yaxis()
524-
xlim = ax[0, 1].get_xlim()
525-
ylim = ax[0, 1].get_ylim()
526-
else:
527-
xlim = None
528-
ylim = None
533+
if (xlim is None) and (ylim is None):
534+
ax[0, 1].invert_yaxis()
535+
xlim = ax[0, 1].get_xlim()
536+
ylim = ax[0, 1].get_ylim()
537+
else:
538+
ax[0, 1].set_xlim(xlim)
539+
ax[0, 1].set_ylim(ylim)
529540

530-
if adloc_exist and (len(adloc_catalog) > 0):
531-
ax[0, 2].scatter(
532-
adloc_catalog["longitude"],
533-
adloc_catalog["depth_km"],
534-
c=adloc_catalog["depth_km"],
535-
s=8000 / len(adloc_catalog),
541+
if routine_exist and (len(routine_catalog) > 0):
542+
ax[0, 0].scatter(
543+
routine_catalog["longitude"],
544+
routine_catalog["depth_km"],
545+
c=routine_catalog["depth_km"],
546+
s=8000 / len(routine_catalog),
536547
alpha=1.0,
537548
linewidth=0,
538549
vmin=cmin,
539550
vmax=cmax,
540551
cmap="viridis_r",
541-
label=f"AdLoc: {len(adloc_catalog)}",
552+
label=f"Routine: {len(routine_catalog)}",
542553
)
543-
# ax[0, 2].legend()
544-
ax[0, 2].set_title(f"AdLoc: {len(adloc_catalog)}")
545-
ax[0, 2].set_xlim(xlim)
546-
ax[0, 2].set_ylim(ylim)
554+
ax[0, 0].set_title(f"Routine: {len(routine_catalog)}")
555+
if (xlim is None) and (ylim is None):
556+
ax[0, 0].invert_yaxis()
557+
xlim = ax[0, 0].get_xlim()
558+
ylim = ax[0, 0].get_ylim()
559+
else:
560+
ax[0, 0].set_xlim(xlim)
561+
ax[0, 0].set_ylim(ylim)
547562

548563
if qtm_exist and (len(qtm_catalog) > 0):
549564
ax[1, 2].scatter(
@@ -669,30 +684,34 @@
669684
fig, ax = plt.subplots(4, 3, squeeze=False, figsize=(20, 30), sharex=True, sharey=True)
670685
cmin = 0
671686
cmax = 10
687+
xlim = None
688+
ylim = None
672689
for i in range(4):
673690
for j in range(3):
674691
ax[i, j].grid()
675692

676-
if routine_exist and (len(routine_catalog) > 0):
677-
ax[0, 0].scatter(
678-
routine_catalog["latitude"],
679-
routine_catalog["depth_km"],
680-
c=routine_catalog["depth_km"],
681-
s=8000 / len(routine_catalog),
693+
if adloc_exist and (len(adloc_catalog) > 0):
694+
ax[0, 2].scatter(
695+
adloc_catalog["latitude"],
696+
adloc_catalog["depth_km"],
697+
c=adloc_catalog["depth_km"],
698+
s=8000 / len(adloc_catalog),
682699
alpha=1.0,
683700
linewidth=0,
684701
vmin=cmin,
685702
vmax=cmax,
686703
cmap="viridis_r",
687-
label=f"Routine: {len(routine_catalog)}",
704+
label=f"AdLoc: {len(adloc_catalog)}",
688705
)
689-
ax[0, 0].set_title(f"Routine: {len(routine_catalog)}")
690-
# ax[0, 0].invert_yaxis()
691-
xlim = ax[0, 0].get_xlim()
692-
ylim = ax[0, 0].get_ylim()
693-
else:
694-
xlim = None
695-
ylim = None
706+
# ax[0, 2].legend()
707+
ax[0, 2].set_title(f"AdLoc: {len(adloc_catalog)}")
708+
if (xlim is None) and (ylim is None):
709+
ax[0, 2].invert_yaxis()
710+
xlim = ax[0, 2].get_xlim()
711+
ylim = ax[0, 2].get_ylim()
712+
else:
713+
ax[0, 2].set_xlim(xlim)
714+
ax[0, 2].set_ylim(ylim)
696715

697716
if gamma_exist and (len(gamma_catalog) > 0):
698717
ax[0, 1].scatter(
@@ -708,30 +727,35 @@
708727
label=f"GaMMA: {len(gamma_catalog)}",
709728
)
710729
ax[0, 1].set_title(f"GaMMA: {len(gamma_catalog)}")
711-
ax[0, 1].invert_yaxis()
712-
xlim = ax[0, 1].get_xlim()
713-
ylim = ax[0, 1].get_ylim()
714-
else:
715-
xlim = None
716-
ylim = None
730+
if (xlim is None) and (ylim is None):
731+
ax[0, 1].invert_yaxis()
732+
xlim = ax[0, 1].get_xlim()
733+
ylim = ax[0, 1].get_ylim()
734+
else:
735+
ax[0, 1].set_xlim(xlim)
736+
ax[0, 1].set_ylim(ylim)
717737

718-
if adloc_exist and (len(adloc_catalog) > 0):
719-
ax[0, 2].scatter(
720-
adloc_catalog["latitude"],
721-
adloc_catalog["depth_km"],
722-
c=adloc_catalog["depth_km"],
723-
s=8000 / len(adloc_catalog),
738+
if routine_exist and (len(routine_catalog) > 0):
739+
ax[0, 0].scatter(
740+
routine_catalog["latitude"],
741+
routine_catalog["depth_km"],
742+
c=routine_catalog["depth_km"],
743+
s=8000 / len(routine_catalog),
724744
alpha=1.0,
725745
linewidth=0,
726746
vmin=cmin,
727747
vmax=cmax,
728748
cmap="viridis_r",
729-
label=f"AdLoc: {len(adloc_catalog)}",
749+
label=f"Routine: {len(routine_catalog)}",
730750
)
731-
# ax[0, 2].legend()
732-
ax[0, 2].set_title(f"AdLoc: {len(adloc_catalog)}")
733-
ax[0, 2].set_xlim(xlim)
734-
ax[0, 2].set_ylim(ylim)
751+
ax[0, 0].set_title(f"Routine: {len(routine_catalog)}")
752+
if (xlim is None) and (ylim is None):
753+
ax[0, 0].invert_yaxis()
754+
xlim = ax[0, 0].get_xlim()
755+
ylim = ax[0, 0].get_ylim()
756+
else:
757+
ax[0, 0].set_xlim(xlim)
758+
ax[0, 0].set_ylim(ylim)
735759

736760
if qtm_exist and (len(qtm_catalog) > 0):
737761
ax[1, 2].scatter(
@@ -906,8 +930,17 @@
906930

907931
# %%
908932
fig, ax = plt.subplots(2, 1, squeeze=False, figsize=(10, 10))
909-
xlim = [int(np.floor(gamma_catalog["magnitude"].min())), int(np.ceil(gamma_catalog["magnitude"].max()))]
910-
bins = np.arange(xlim[0], xlim[1] + 1, 0.2)
933+
if gamma_exist:
934+
xlim = [int(np.floor(gamma_catalog["magnitude"].min())), int(np.ceil(gamma_catalog["magnitude"].max()))]
935+
bins = np.arange(xlim[0], xlim[1] + 1, 0.2)
936+
elif adloc_exist:
937+
xlim = [int(np.floor(adloc_catalog["magnitude"].min())), int(np.ceil(adloc_catalog["magnitude"].max()))]
938+
bins = np.arange(xlim[0], xlim[1] + 1, 0.2)
939+
elif routine_exist:
940+
xlim = [int(np.floor(routine_catalog["magnitude"].min())), int(np.ceil(routine_catalog["magnitude"].max()))]
941+
bins = np.arange(xlim[0], xlim[1] + 1, 0.2)
942+
else:
943+
raise ValueError("No catalog found")
911944
if routine_exist:
912945
ax[0, 0].hist(routine_catalog["magnitude"], bins=bins, alpha=0.5, label="Routine")
913946
ax[1, 0].hist(routine_catalog["magnitude"], bins=bins, alpha=0.5, label="Routine")

scripts/run_adloc.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -51,10 +51,10 @@ def run_adloc(
5151
# result_path = f"{root_path}/{region}/adloc_gamma_plus"
5252
# figure_path = f"{root_path}/{region}/adloc_gamma_plus/figures"
5353

54-
# picks_file = f"{root_path}/{region}/phasenet_plus/phasenet_plus_picks_associated.csv"
55-
# events_file = f"{root_path}/{region}/phasenet_plus/phasenet_plus_events_associated.csv"
56-
# result_path = f"{root_path}/{region}/adloc_plus"
57-
# figure_path = f"{root_path}/{region}/adloc_plus/figures"
54+
picks_file = f"{root_path}/{region}/phasenet_plus/phasenet_plus_picks_associated.csv"
55+
events_file = f"{root_path}/{region}/phasenet_plus/phasenet_plus_events_associated.csv"
56+
result_path = f"{root_path}/{region}/adloc_plus"
57+
figure_path = f"{root_path}/{region}/adloc_plus/figures"
5858

5959
# %%
6060
if not os.path.exists(result_path):

scripts/run_phasenet_plus.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -29,8 +29,8 @@ def run_phasenet(
2929

3030
# %%
3131
if data_type == "continuous":
32-
subdir = 3
33-
# subdir = 2
32+
# subdir = 3
33+
subdir = 2
3434
elif data_type == "event":
3535
subdir = 1
3636

@@ -49,8 +49,8 @@ def run_phasenet(
4949
# fs.get(f"{bucket}/{waveform_dir}/", f"{root_path}/{waveform_dir}/", recursive=True)
5050

5151
if data_type == "continuous":
52-
mseed_list = sorted(glob(f"{root_path}/{waveform_dir}/????/???/??/*.mseed"))
53-
# mseed_list = sorted(glob(f"{root_path}/{waveform_dir}/????/???/*.mseed"))
52+
# mseed_list = sorted(glob(f"{root_path}/{waveform_dir}/????/???/??/*.mseed"))
53+
mseed_list = sorted(glob(f"{root_path}/{waveform_dir}/????/???/*.mseed"))
5454
elif data_type == "event":
5555
mseed_list = sorted(glob(f"{root_path}/{waveform_dir}/*.mseed"))
5656
else:
@@ -68,8 +68,8 @@ def run_phasenet(
6868

6969
# %% skip processed files
7070
if not overwrite:
71-
processed = sorted(glob(f"{root_path}/{result_path}/picks_phasenet_plus/????/???/??/*.csv"))
72-
# processed = sorted(glob(f"{root_path}/{result_path}/picks_phasenet_plus/????/???/*.csv"))
71+
# processed = sorted(glob(f"{root_path}/{result_path}/picks_phasenet_plus/????/???/??/*.csv"))
72+
processed = sorted(glob(f"{root_path}/{result_path}/picks_phasenet_plus/????/???/*.csv"))
7373
processed = ["/".join(f.replace(".csv", "").split("/")[-subdir - 1 :]) for f in processed]
7474
processed = [p[:-1] for p in processed] ## remove the channel suffix
7575
print(f"Number of processed files: {len(processed)}")

scripts/run_qtm.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@ def parse_args():
5858
{
5959
"station_id": lambda x: ",".join(x.unique()),
6060
"begin_time": lambda x: ",".join(x.unique()),
61-
"file_name": lambda x: "_".join(sorted(x)),
61+
"file_name": lambda x: "|".join(sorted(x)),
6262
}
6363
)
6464
.reset_index()

0 commit comments

Comments
 (0)