Skip to content

Commit 16eba5b

Browse files
committed
test on Mendocino
1 parent 47dd4cc commit 16eba5b

File tree

10 files changed

+575
-192
lines changed

10 files changed

+575
-192
lines changed

examples/california/cut_templates_cc.py

Lines changed: 56 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -414,17 +414,29 @@ def cut_templates(jdays, root_path, region, config, bucket, protocol, token):
414414
os.makedirs(f"{root_path}/{result_path}/{year:04d}/{jday:03d}", exist_ok=True)
415415

416416
# %%
417-
# stations = pd.read_csv(f"{root_path}/{data_path}/ransac_stations.csv")
418-
# stations = pd.read_csv("adloc_stations.csv")
419-
station_json = f"{region}/network/stations.json"
417+
# station_json = f"{region}/network/stations.json"
418+
# if protocol == "file":
419+
# stations = pd.read_json(f"{root_path}/{station_json}", orient="index")
420+
# else:
421+
# with fs.open(f"{bucket}/{station_json}", "r") as fp:
422+
# stations = pd.read_json(fp, orient="index")
423+
# stations["station_id"] = stations.index
424+
# stations.sort_values(by=["latitude", "longitude"], inplace=True)
425+
426+
# station_csv = f"{data_path}/adloc_stations.csv"
427+
station_csv = f"{data_path}/ransac_stations.csv"
420428
if protocol == "file":
421-
stations = pd.read_json(f"{root_path}/{station_json}", orient="index")
429+
stations = pd.read_csv(station_csv)
422430
else:
423-
with fs.open(f"{bucket}/{station_json}", "r") as fp:
424-
stations = pd.read_json(fp, orient="index")
425-
stations["station_id"] = stations.index
431+
with fs.open(f"{bucket}/{station_csv}", "r") as fp:
432+
stations = pd.read_csv(fp)
426433
stations.sort_values(by=["latitude", "longitude"], inplace=True)
427434

435+
436+
# stations = stations[stations["network"] == "7D"]
437+
print(f"{len(stations) = }")
438+
print(stations.head())
439+
428440
# # ############### DEBUG ###############
429441
# # "minlatitude": 35.205,
430442
# # "maxlatitude": 36.205,
@@ -439,10 +451,12 @@ def cut_templates(jdays, root_path, region, config, bucket, protocol, token):
439451
# events = pd.read_csv("adloc_events.csv", parse_dates=["time"])
440452
if protocol == "file":
441453
events = pd.read_csv(
442-
f"{root_path}/{data_path}/{year:04d}/adloc_events_{jday:03d}.csv", parse_dates=["time"]
454+
# f"{root_path}/{data_path}/{year:04d}/adloc_events_{jday:03d}.csv", parse_dates=["time"]
455+
f"{root_path}/{data_path}/{year:04d}/ransac_events_{jday:03d}.csv", parse_dates=["time"]
443456
)
444457
else:
445-
with fs.open(f"{bucket}/{data_path}/{year:04d}/adloc_events_{jday:03d}.csv", "r") as fp:
458+
# with fs.open(f"{bucket}/{data_path}/{year:04d}/adloc_events_{jday:03d}.csv", "r") as fp:
459+
with fs.open(f"{bucket}/{data_path}/{year:04d}/ransac_events_{jday:03d}.csv", "r") as fp:
446460
events = pd.read_csv(fp, parse_dates=["time"])
447461

448462
# # ############### DEBUG ###############
@@ -498,9 +512,11 @@ def cut_templates(jdays, root_path, region, config, bucket, protocol, token):
498512
# picks = pd.read_csv(f"{root_path}/{data_path}/ransac_picks.csv")
499513
# picks = pd.read_csv("adloc_picks.csv")
500514
if protocol == "file":
501-
picks = pd.read_csv(f"{root_path}/{data_path}/{year:04d}/adloc_picks_{jday:03d}.csv")
515+
# picks = pd.read_csv(f"{root_path}/{data_path}/{year:04d}/adloc_picks_{jday:03d}.csv")
516+
picks = pd.read_csv(f"{root_path}/{data_path}/{year:04d}/ransac_picks_{jday:03d}.csv")
502517
else:
503-
with fs.open(f"{bucket}/{data_path}/{year:04d}/adloc_picks_{jday:03d}.csv", "r") as fp:
518+
# with fs.open(f"{bucket}/{data_path}/{year:04d}/adloc_picks_{jday:03d}.csv", "r") as fp:
519+
with fs.open(f"{bucket}/{data_path}/{year:04d}/ransac_picks_{jday:03d}.csv", "r") as fp:
504520
picks = pd.read_csv(fp)
505521

506522
# ############### DEBUG ###############
@@ -643,16 +659,19 @@ def cut_templates(jdays, root_path, region, config, bucket, protocol, token):
643659
# dirs = sorted(glob(f"{root_path}/{region}/waveforms/????/???/??"), reverse=True)
644660
protocol = "gs"
645661
bucket = "quakeflow_catalog"
646-
folder = "SC"
647662
token_json = "application_default_credentials.json"
648663
with open(token_json, "r") as fp:
649664
token = json.load(fp)
650665
fs = fsspec.filesystem(protocol=protocol, token=token)
651666
# year = 2019
652667
mseeds_df = []
653-
for folder in ["SC", "NC"]:
654-
with fs.open(f"{bucket}/{folder}/mseed_list/{year}_3c.txt", "r") as f:
655-
mseeds = f.readlines()
668+
for folder in ["SC", "NC", "IRIS"]:
669+
try:
670+
with fs.open(f"{bucket}/{folder}/mseed_list/{year}_3c.txt", "r") as f:
671+
mseeds = f.readlines()
672+
except Exception as e:
673+
print(f"Not found {bucket}/{folder}/mseed_list/{year}_3c.txt")
674+
continue
656675
mseeds = [x.strip("\n") for x in mseeds]
657676
mseeds = pd.DataFrame(mseeds, columns=["ENZ"])
658677
if folder == "SC":
@@ -663,14 +682,24 @@ def cut_templates(jdays, root_path, region, config, bucket, protocol, token):
663682
mseeds["location"] = mseeds["fname"].apply(lambda x: x[10:12].strip("_"))
664683
mseeds["year"] = mseeds["fname"].apply(lambda x: x[13:17])
665684
mseeds["jday"] = mseeds["fname"].apply(lambda x: x[17:20])
666-
if folder == "NC":
685+
elif folder == "NC":
667686
mseeds["fname"] = mseeds["ENZ"].apply(lambda x: x.split("/")[-1])
668687
mseeds["network"] = mseeds["fname"].apply(lambda x: x.split(".")[1])
669688
mseeds["station"] = mseeds["fname"].apply(lambda x: x.split(".")[0])
670689
mseeds["instrument"] = mseeds["fname"].apply(lambda x: x.split(".")[2][:-1])
671690
mseeds["location"] = mseeds["fname"].apply(lambda x: x.split(".")[3])
672691
mseeds["year"] = mseeds["fname"].apply(lambda x: x.split(".")[5])
673692
mseeds["jday"] = mseeds["fname"].apply(lambda x: x.split(".")[6])
693+
elif folder == "IRIS":
694+
mseeds["fname"] = mseeds["ENZ"].apply(lambda x: x.split("/")[-1])
695+
mseeds["jday"] = mseeds["ENZ"].apply(lambda x: x.split("/")[-2])
696+
mseeds["year"] = mseeds["ENZ"].apply(lambda x: x.split("/")[-3])
697+
mseeds["network"] = mseeds["fname"].apply(lambda x: x.split(".")[0])
698+
mseeds["station"] = mseeds["fname"].apply(lambda x: x.split(".")[1])
699+
mseeds["location"] = mseeds["fname"].apply(lambda x: x.split(".")[2])
700+
mseeds["instrument"] = mseeds["fname"].apply(lambda x: x.split(".")[3][:2])
701+
else:
702+
raise ValueError(f"Unknown folder: {folder}")
674703
mseeds_df.append(mseeds)
675704
mseeds_df = pd.concat(mseeds_df)
676705
print(mseeds_df.head())
@@ -694,6 +723,8 @@ def cut_templates(jdays, root_path, region, config, bucket, protocol, token):
694723
print(f"No picks found for {year:04d}/{jday:03d}")
695724
continue
696725

726+
727+
697728
# ####
698729
# out = picks.drop(columns=["ENZ"])
699730
# out.to_csv(f"{root_path}/{result_path}/{year:04d}/cctorch_picks_{jday:03d}.csv", index=False)
@@ -714,7 +745,7 @@ def cut_templates(jdays, root_path, region, config, bucket, protocol, token):
714745
print(f"Using {ncpu} cores")
715746

716747
pbar = tqdm(total=nsplit, desc="Cutting templates")
717-
ctx = mp.get_context("spawn")
748+
ctx = mp.get_context("fork")
718749

719750
with ctx.Manager() as manager:
720751
lock = manager.Lock()
@@ -830,13 +861,10 @@ def cut_templates(jdays, root_path, region, config, bucket, protocol, token):
830861
year = args.year
831862

832863
# %%
833-
# with fs.open(f"{bucket}/{region}/config.json", "r") as fp:
834-
# config = json.load(fp)
835-
# with open("config.json", "r") as fp:
836-
# config = json.load(fp)
837-
# config["world_size"] = num_nodes
838-
with open(args.config, "r") as fp:
864+
with fs.open(f"{bucket}/{region}/config.json", "r") as fp:
839865
config = json.load(fp)
866+
# with open(args.config, "r") as fp:
867+
# config = json.load(fp)
840868
config.update(vars(args))
841869
print(json.dumps(config, indent=4, sort_keys=True))
842870

@@ -876,10 +904,15 @@ def cut_templates(jdays, root_path, region, config, bucket, protocol, token):
876904
# num_jday = 366 if (year % 4 == 0 and year % 100 != 0) or year % 400 == 0 else 365
877905
# jdays.extend([f"{year}.{i:03d}" for i in range(1, num_jday + 1)])
878906

907+
879908
num_jday = 366 if (year % 4 == 0 and year % 100 != 0) or year % 400 == 0 else 365
880909
# jdays = [f"{year}.{i:03d}" for i in range(1, num_jday + 1)]
881910
jdays = range(1, num_jday + 1)
882911

912+
# jdays = fs.glob(f"{bucket}/{region}/adloc/{year:04d}/adloc_events_???.csv")
913+
jdays = fs.glob(f"{bucket}/{region}/adloc/{year:04d}/ransac_events_???.csv")
914+
jdays = [int(x.split("/")[-1].split(".")[0].split("_")[-1]) for x in jdays]
915+
883916
jdays = np.array_split(jdays, num_nodes)[node_rank]
884917
# jdays = ["2019.185"]
885918
# jdays = ["2019.186"]

examples/california/cut_templates_merge.py

Lines changed: 54 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -63,18 +63,34 @@ def generate_pairs(picks, events, stations, max_pair_dist=10, max_neighbors=50,
6363
# %%
6464
if __name__ == "__main__":
6565

66+
# %%
67+
protocol = "gs"
68+
token_json = f"application_default_credentials.json"
69+
with open(token_json, "r") as fp:
70+
token = json.load(fp)
71+
72+
fs = fsspec.filesystem(protocol, token=token)
73+
6674
# %%
6775
args = parse_args()
76+
region = args.region
77+
root_path = args.root_path
78+
bucket = args.bucket
79+
num_nodes = args.num_nodes
80+
node_rank = args.node_rank
81+
year = args.year
6882

6983
# %%
70-
region = "Cal"
7184
result_path = f"{region}/cctorch"
85+
folder = result_path
7286
# if not os.path.exists(result_path):
7387
# os.makedirs(result_path)
7488

7589
# %%
76-
with open(args.config, "r") as fp:
90+
with fs.open(f"{bucket}/{region}/config.json", "r") as fp:
7791
config = json.load(fp)
92+
# with open(args.config, "r") as fp:
93+
# config = json.load(fp)
7894
config.update(vars(args))
7995
print(json.dumps(config, indent=4, sort_keys=True))
8096

@@ -88,16 +104,6 @@ def generate_pairs(picks, events, stations, max_pair_dist=10, max_neighbors=50,
88104
# config["mindepth"] = 0
89105
# config["max_epicenter_dist_km"] = 200.0
90106

91-
# %%
92-
protocol = "gs"
93-
bucket = "quakeflow_catalog"
94-
folder = result_path
95-
token_json = f"application_default_credentials.json"
96-
with open(token_json, "r") as fp:
97-
token = json.load(fp)
98-
99-
fs = fsspec.filesystem(protocol, token=token)
100-
101107
# %%
102108
def plot_templates(templates, events, picks):
103109
templates = templates - np.nanmean(templates, axis=(-1), keepdims=True)
@@ -111,18 +117,21 @@ def plot_templates(templates, events, picks):
111117
plt.show()
112118

113119
# %%
114-
115-
region = "Cal"
116-
root_path = "local"
117-
protocol = "gs"
118-
bucket = "quakeflow_catalog"
119-
station_json = f"{region}/network/stations.json"
120+
# station_json = f"{region}/network/stations.json"
121+
# if protocol == "file":
122+
# stations = pd.read_json(f"{root_path}/{station_json}", orient="index")
123+
# else:
124+
# with fs.open(f"{bucket}/{station_json}", "r") as fp:
125+
# stations = pd.read_json(fp, orient="index")
126+
# stations["station_id"] = stations.index
127+
128+
station_csv = f"{region}/adloc/ransac_stations.csv"
129+
# station_csv = f"{region}/adloc/adloc_stations.csv"
120130
if protocol == "file":
121-
stations = pd.read_json(f"{root_path}/{station_json}", orient="index")
131+
stations = pd.read_csv(f"{root_path}/{station_csv}")
122132
else:
123-
with fs.open(f"{bucket}/{station_json}", "r") as fp:
124-
stations = pd.read_json(fp, orient="index")
125-
stations["station_id"] = stations.index
133+
with fs.open(f"{bucket}/{station_csv}", "r") as fp:
134+
stations = pd.read_csv(fp)
126135
stations.sort_values(by=["latitude", "longitude"], inplace=True)
127136
print(f"stations: {len(stations):,} ")
128137
print(stations.iloc[:5])
@@ -152,12 +161,12 @@ def plot_templates(templates, events, picks):
152161
# exit_jdays.append((year, jday))
153162
# selected_years = [2019]
154163
# selected_jdays = [185, 186, 187]
155-
selected_years = [2020, 2021, 2022, 2023]
164+
# selected_years = [2020, 2021, 2022, 2023]
156165
year_jday = [
157166
(year, jday)
158167
for tmp in exit_jdays
159168
for (year, jday) in [tmp.split("/")[-3:-1]]
160-
if (int(year) in selected_years)
169+
# if (int(year) in selected_years)
161170
# if (int(year) in selected_years and int(jday) in selected_jdays)
162171
]
163172
print(f"Selected jdays: {len(year_jday)}")
@@ -417,4 +426,25 @@ def process_day(year_jday):
417426

418427
# break
419428

429+
if protocol == "gs":
430+
print(f"{root_path}/{result_path}/cctorch_picks.csv -> {bucket}/{folder}/cctorch_picks.csv")
431+
fs.put(f"{root_path}/{result_path}/cctorch_picks.csv", f"{bucket}/{folder}/cctorch_picks.csv")
432+
print(f"{root_path}/{result_path}/cctorch_events.csv -> {bucket}/{folder}/cctorch_events.csv")
433+
fs.put(f"{root_path}/{result_path}/cctorch_events.csv", f"{bucket}/{folder}/cctorch_events.csv")
434+
print(f"{root_path}/{result_path}/cctorch_stations.csv -> {bucket}/{folder}/cctorch_stations.csv")
435+
fs.put(f"{root_path}/{result_path}/cctorch_stations.csv", f"{bucket}/{folder}/cctorch_stations.csv")
436+
print(f"{root_path}/{result_path}/config.json -> {bucket}/{folder}/config.json")
437+
fs.put(f"{root_path}/{result_path}/config.json", f"{bucket}/{folder}/config.json")
438+
print(f"{root_path}/{result_path}/pairs.txt -> {bucket}/{folder}/pairs.txt")
439+
fs.put(f"{root_path}/{result_path}/pairs.txt", f"{bucket}/{folder}/pairs.txt")
440+
print(f"{root_path}/{result_path}/template.dat -> {bucket}/{folder}/template.dat")
441+
fs.put(f"{root_path}/{result_path}/template.dat", f"{bucket}/{folder}/template.dat")
442+
print(f"{root_path}/{result_path}/traveltime.dat -> {bucket}/{folder}/traveltime.dat")
443+
fs.put(f"{root_path}/{result_path}/traveltime.dat", f"{bucket}/{folder}/traveltime.dat")
444+
print(f"{root_path}/{result_path}/traveltime_index.dat -> {bucket}/{folder}/traveltime_index.dat")
445+
fs.put(f"{root_path}/{result_path}/traveltime_index.dat", f"{bucket}/{folder}/traveltime_index.dat")
446+
print(f"{root_path}/{result_path}/traveltime_mask.dat -> {bucket}/{folder}/traveltime_mask.dat")
447+
fs.put(f"{root_path}/{result_path}/traveltime_mask.dat", f"{bucket}/{folder}/traveltime_mask.dat")
448+
449+
420450
# %%

0 commit comments

Comments
 (0)