Skip to content

Commit 0c613b1

Browse files
committed
debugging Ridgecrest
1 parent ac0272d commit 0c613b1

File tree

5 files changed

+144
-97
lines changed

5 files changed

+144
-97
lines changed

examples/california/cut_templates_cc.py

Lines changed: 36 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,10 @@ def fillin_missing_picks(picks, events, stations, config):
6969
picks_ps.reset_index(inplace=True)
7070

7171
## add provider
72-
picks_ps = picks_ps.merge(picks[["event_index", "station_id", "provider"]].drop_duplicates(), on=["event_index", "station_id"])
72+
if "provider" in picks.columns:
73+
picks_ps = picks_ps.merge(picks[["event_index", "station_id", "provider"]].drop_duplicates(), on=["event_index", "station_id"])
74+
else:
75+
picks_ps = picks_ps.merge(picks[["event_index", "station_id"]].drop_duplicates(), on=["event_index", "station_id"])
7376

7477
print(f"Original picks: {len(picks)}, Filled picks: {len(picks_ps)}")
7578
print(picks_ps.iloc[:10])
@@ -440,8 +443,8 @@ def cut_templates(jdays, root_path, region, config, bucket, protocol, token):
440443
# stations["station_id"] = stations.index
441444
# stations.sort_values(by=["latitude", "longitude"], inplace=True)
442445

443-
# station_csv = f"{data_path}/adloc_stations.csv"
444-
station_csv = f"{data_path}/ransac_stations.csv"
446+
station_csv = f"{data_path}/adloc_stations.csv"
447+
# station_csv = f"{data_path}/ransac_stations.csv"
445448
if protocol == "file":
446449
stations = pd.read_csv(station_csv)
447450
else:
@@ -468,12 +471,12 @@ def cut_templates(jdays, root_path, region, config, bucket, protocol, token):
468471
# events = pd.read_csv("adloc_events.csv", parse_dates=["time"])
469472
if protocol == "file":
470473
events = pd.read_csv(
471-
# f"{root_path}/{data_path}/{year:04d}/adloc_events_{jday:03d}.csv", parse_dates=["time"]
472-
f"{root_path}/{data_path}/{year:04d}/ransac_events_{jday:03d}.csv", parse_dates=["time"]
474+
f"{root_path}/{data_path}/{year:04d}/adloc_events_{jday:03d}.csv", parse_dates=["time"]
475+
# f"{root_path}/{data_path}/{year:04d}/ransac_events_{jday:03d}.csv", parse_dates=["time"]
473476
)
474477
else:
475-
# with fs.open(f"{bucket}/{data_path}/{year:04d}/adloc_events_{jday:03d}.csv", "r") as fp:
476-
with fs.open(f"{bucket}/{data_path}/{year:04d}/ransac_events_{jday:03d}.csv", "r") as fp:
478+
with fs.open(f"{bucket}/{data_path}/{year:04d}/adloc_events_{jday:03d}.csv", "r") as fp:
479+
# with fs.open(f"{bucket}/{data_path}/{year:04d}/ransac_events_{jday:03d}.csv", "r") as fp:
477480
events = pd.read_csv(fp, parse_dates=["time"])
478481

479482
# # ############### DEBUG ###############
@@ -529,13 +532,17 @@ def cut_templates(jdays, root_path, region, config, bucket, protocol, token):
529532
# picks = pd.read_csv(f"{root_path}/{data_path}/ransac_picks.csv")
530533
# picks = pd.read_csv("adloc_picks.csv")
531534
if protocol == "file":
532-
# picks = pd.read_csv(f"{root_path}/{data_path}/{year:04d}/adloc_picks_{jday:03d}.csv")
533-
picks = pd.read_csv(f"{root_path}/{data_path}/{year:04d}/ransac_picks_{jday:03d}.csv")
535+
picks = pd.read_csv(f"{root_path}/{data_path}/{year:04d}/adloc_picks_{jday:03d}.csv")
536+
# picks = pd.read_csv(f"{root_path}/{data_path}/{year:04d}/ransac_picks_{jday:03d}.csv")
534537
else:
535-
# with fs.open(f"{bucket}/{data_path}/{year:04d}/adloc_picks_{jday:03d}.csv", "r") as fp:
536-
with fs.open(f"{bucket}/{data_path}/{year:04d}/ransac_picks_{jday:03d}.csv", "r") as fp:
538+
with fs.open(f"{bucket}/{data_path}/{year:04d}/adloc_picks_{jday:03d}.csv", "r") as fp:
539+
# with fs.open(f"{bucket}/{data_path}/{year:04d}/ransac_picks_{jday:03d}.csv", "r") as fp:
537540
picks = pd.read_csv(fp)
538541

542+
# if "provider" not in picks.columns:
543+
# picks["provider"] = "adloc"
544+
# print(f"No provider in picks; set to adloc as default")
545+
539546
# ############### DEBUG ###############
540547
# picks = picks[(picks["event_index"].isin(events["event_index"]))]
541548
# ############### DEBUG ###############
@@ -648,19 +655,10 @@ def cut_templates(jdays, root_path, region, config, bucket, protocol, token):
648655
config["reference_t0"] = reference_t0.strftime("%Y-%m-%dT%H:%M:%S.%fZ")
649656
events = events[["idx_eve", "x_km", "y_km", "z_km", "event_index", "event_time", "event_timestamp"]]
650657
stations = stations[["idx_sta", "x_km", "y_km", "z_km", "station_id", "component", "network", "station"]]
651-
picks = picks[
652-
[
653-
"idx_eve",
654-
"idx_sta",
655-
"phase_type",
656-
"phase_score",
657-
"phase_time",
658-
"phase_timestamp",
659-
"phase_source",
660-
"station_id",
661-
"provider",
662-
]
663-
]
658+
columns = ["idx_eve", "idx_sta", "phase_type", "phase_score", "phase_time", "phase_timestamp", "phase_source", "station_id"]
659+
if "provider" in picks.columns:
660+
columns.append("provider")
661+
picks = picks[columns]
664662
events.set_index("idx_eve", inplace=True)
665663
stations.set_index("idx_sta", inplace=True)
666664
picks.sort_values(by=["idx_eve", "idx_sta", "phase_type"], inplace=True)
@@ -700,7 +698,8 @@ def cut_templates(jdays, root_path, region, config, bucket, protocol, token):
700698
mseeds["location"] = mseeds["fname"].apply(lambda x: x[10:12].strip("_"))
701699
mseeds["year"] = mseeds["fname"].apply(lambda x: x[13:17])
702700
mseeds["jday"] = mseeds["fname"].apply(lambda x: x[17:20])
703-
mseeds["provider"] = "SC"
701+
if "provider" not in picks.columns:
702+
mseeds["provider"] = "SC"
704703
elif folder == "NC":
705704
mseeds["fname"] = mseeds["ENZ"].apply(lambda x: x.split("/")[-1])
706705
mseeds["network"] = mseeds["fname"].apply(lambda x: x.split(".")[1])
@@ -709,7 +708,8 @@ def cut_templates(jdays, root_path, region, config, bucket, protocol, token):
709708
mseeds["location"] = mseeds["fname"].apply(lambda x: x.split(".")[3])
710709
mseeds["year"] = mseeds["fname"].apply(lambda x: x.split(".")[5])
711710
mseeds["jday"] = mseeds["fname"].apply(lambda x: x.split(".")[6])
712-
mseeds["provider"] = "NC"
711+
if "provider" not in picks.columns:
712+
mseeds["provider"] = "NC"
713713
elif folder == "IRIS":
714714
mseeds["fname"] = mseeds["ENZ"].apply(lambda x: x.split("/")[-1])
715715
mseeds["jday"] = mseeds["ENZ"].apply(lambda x: x.split("/")[-2])
@@ -718,7 +718,8 @@ def cut_templates(jdays, root_path, region, config, bucket, protocol, token):
718718
mseeds["station"] = mseeds["fname"].apply(lambda x: x.split(".")[1])
719719
mseeds["location"] = mseeds["fname"].apply(lambda x: x.split(".")[2])
720720
mseeds["instrument"] = mseeds["fname"].apply(lambda x: x.split(".")[3][:2])
721-
mseeds["provider"] = "IRIS"
721+
if "provider" not in picks.columns:
722+
mseeds["provider"] = "IRIS"
722723
else:
723724
raise ValueError(f"Unknown folder: {folder}")
724725
mseeds_df.append(mseeds)
@@ -737,7 +738,10 @@ def cut_templates(jdays, root_path, region, config, bucket, protocol, token):
737738
mseeds_df = mseeds_df[(mseeds_df["year"].astype(int) == year) & (mseeds_df["jday"].astype(int) == jday)]
738739
picks = picks[(picks["year"].astype(int) == year) & (picks["jday"].astype(int) == jday)]
739740

740-
picks = picks.merge(mseeds_df, on=["network", "station", "location", "instrument", "year", "jday", "provider"])
741+
if "provider" in picks.columns:
742+
picks = picks.merge(mseeds_df, on=["network", "station", "location", "instrument", "year", "jday", "provider"])
743+
else:
744+
picks = picks.merge(mseeds_df, on=["network", "station", "location", "instrument", "year", "jday"])
741745
picks.drop(columns=["fname", "station_id", "network", "location", "instrument", "year", "jday"], inplace=True)
742746

743747
if len(picks) == 0:
@@ -930,11 +934,12 @@ def cut_templates(jdays, root_path, region, config, bucket, protocol, token):
930934
# jdays = [f"{year}.{i:03d}" for i in range(1, num_jday + 1)]
931935
jdays = range(1, num_jday + 1)
932936

933-
# jdays = fs.glob(f"{bucket}/{region}/adloc/{year:04d}/adloc_events_???.csv")
934-
jdays = fs.glob(f"{bucket}/{region}/adloc/{year:04d}/ransac_events_???.csv")
937+
jdays = fs.glob(f"{bucket}/{region}/adloc/{year:04d}/adloc_events_???.csv")
938+
# jdays = fs.glob(f"{bucket}/{region}/adloc/{year:04d}/ransac_events_???.csv")
935939
jdays = [int(x.split("/")[-1].split(".")[0].split("_")[-1]) for x in jdays]
936940

937941
jdays = np.array_split(jdays, num_nodes)[node_rank]
942+
938943
# jdays = ["2019.185"]
939944
# jdays = ["2019.186"]
940945
# jdays = ["2019.185", "2019.186", "2019.187"]
@@ -949,7 +954,7 @@ def cut_templates(jdays, root_path, region, config, bucket, protocol, token):
949954
processed = [x.split("/")[-2] for x in processed]
950955
print(f"Processed days: {len(processed)}")
951956

952-
jdays = [jday for jday in jdays if f"{jday:03d}" not in processed]
957+
# jdays = [jday for jday in jdays if f"{jday:03d}" not in processed]
953958
print(f"Remaining days: {len(jdays)}")
954959

955960
if len(jdays) == 0:

examples/california/cut_templates_merge.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -126,8 +126,8 @@ def plot_templates(templates, events, picks):
126126
# stations = pd.read_json(fp, orient="index")
127127
# stations["station_id"] = stations.index
128128

129-
station_csv = f"{region}/adloc/ransac_stations.csv"
130-
# station_csv = f"{region}/adloc/adloc_stations.csv"
129+
# station_csv = f"{region}/adloc/ransac_stations.csv"
130+
station_csv = f"{region}/adloc/adloc_stations.csv"
131131
if protocol == "file":
132132
stations = pd.read_csv(f"{root_path}/{station_csv}")
133133
else:

examples/california/plot_catalog.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -115,9 +115,11 @@ def parse_args():
115115

116116
# %%
117117
if protocol == "file":
118-
adloc_file = f"{root_path}/{region}/adloc/ransac_events.csv"
118+
# adloc_file = f"{root_path}/{region}/adloc/ransac_events.csv"
119+
adloc_file = f"{root_path}/{region}/adloc/adloc_events.csv"
119120
else:
120-
adloc_file = f"{protocol}://{bucket}/{region}/adloc/ransac_events.csv"
121+
# adloc_file = f"{protocol}://{bucket}/{region}/adloc/ransac_events.csv"
122+
adloc_file = f"{protocol}://{bucket}/{region}/adloc/adloc_events.csv"
121123
adloc_exist = False
122124
try:
123125
adloc_catalog = pd.read_csv(adloc_file, parse_dates=["time"])

0 commit comments

Comments
 (0)