@@ -414,17 +414,29 @@ def cut_templates(jdays, root_path, region, config, bucket, protocol, token):
414414 os .makedirs (f"{ root_path } /{ result_path } /{ year :04d} /{ jday :03d} " , exist_ok = True )
415415
416416 # %%
417- # stations = pd.read_csv(f"{root_path}/{data_path}/ransac_stations.csv")
418- # stations = pd.read_csv("adloc_stations.csv")
419- station_json = f"{ region } /network/stations.json"
417+ # station_json = f"{region}/network/stations.json"
418+ # if protocol == "file":
419+ # stations = pd.read_json(f"{root_path}/{station_json}", orient="index")
420+ # else:
421+ # with fs.open(f"{bucket}/{station_json}", "r") as fp:
422+ # stations = pd.read_json(fp, orient="index")
423+ # stations["station_id"] = stations.index
424+ # stations.sort_values(by=["latitude", "longitude"], inplace=True)
425+
426+ # station_csv = f"{data_path}/adloc_stations.csv"
427+ station_csv = f"{ data_path } /ransac_stations.csv"
420428 if protocol == "file" :
421- stations = pd .read_json ( f" { root_path } / { station_json } " , orient = "index" )
429+ stations = pd .read_csv ( station_csv )
422430 else :
423- with fs .open (f"{ bucket } /{ station_json } " , "r" ) as fp :
424- stations = pd .read_json (fp , orient = "index" )
425- stations ["station_id" ] = stations .index
431+ with fs .open (f"{ bucket } /{ station_csv } " , "r" ) as fp :
432+ stations = pd .read_csv (fp )
426433 stations .sort_values (by = ["latitude" , "longitude" ], inplace = True )
427434
435+
436+ # stations = stations[stations["network"] == "7D"]
437+ print (f"{ len (stations ) = } " )
438+ print (stations .head ())
439+
428440 # # ############### DEBUG ###############
429441 # # "minlatitude": 35.205,
430442 # # "maxlatitude": 36.205,
@@ -439,10 +451,12 @@ def cut_templates(jdays, root_path, region, config, bucket, protocol, token):
439451 # events = pd.read_csv("adloc_events.csv", parse_dates=["time"])
440452 if protocol == "file" :
441453 events = pd .read_csv (
442- f"{ root_path } /{ data_path } /{ year :04d} /adloc_events_{ jday :03d} .csv" , parse_dates = ["time" ]
454+ # f"{root_path}/{data_path}/{year:04d}/adloc_events_{jday:03d}.csv", parse_dates=["time"]
455+ f"{ root_path } /{ data_path } /{ year :04d} /ransac_events_{ jday :03d} .csv" , parse_dates = ["time" ]
443456 )
444457 else :
445- with fs .open (f"{ bucket } /{ data_path } /{ year :04d} /adloc_events_{ jday :03d} .csv" , "r" ) as fp :
458+ # with fs.open(f"{bucket}/{data_path}/{year:04d}/adloc_events_{jday:03d}.csv", "r") as fp:
459+ with fs .open (f"{ bucket } /{ data_path } /{ year :04d} /ransac_events_{ jday :03d} .csv" , "r" ) as fp :
446460 events = pd .read_csv (fp , parse_dates = ["time" ])
447461
448462 # # ############### DEBUG ###############
@@ -498,9 +512,11 @@ def cut_templates(jdays, root_path, region, config, bucket, protocol, token):
498512 # picks = pd.read_csv(f"{root_path}/{data_path}/ransac_picks.csv")
499513 # picks = pd.read_csv("adloc_picks.csv")
500514 if protocol == "file" :
501- picks = pd .read_csv (f"{ root_path } /{ data_path } /{ year :04d} /adloc_picks_{ jday :03d} .csv" )
515+ # picks = pd.read_csv(f"{root_path}/{data_path}/{year:04d}/adloc_picks_{jday:03d}.csv")
516+ picks = pd .read_csv (f"{ root_path } /{ data_path } /{ year :04d} /ransac_picks_{ jday :03d} .csv" )
502517 else :
503- with fs .open (f"{ bucket } /{ data_path } /{ year :04d} /adloc_picks_{ jday :03d} .csv" , "r" ) as fp :
518+ # with fs.open(f"{bucket}/{data_path}/{year:04d}/adloc_picks_{jday:03d}.csv", "r") as fp:
519+ with fs .open (f"{ bucket } /{ data_path } /{ year :04d} /ransac_picks_{ jday :03d} .csv" , "r" ) as fp :
504520 picks = pd .read_csv (fp )
505521
506522 # ############### DEBUG ###############
@@ -643,16 +659,19 @@ def cut_templates(jdays, root_path, region, config, bucket, protocol, token):
643659 # dirs = sorted(glob(f"{root_path}/{region}/waveforms/????/???/??"), reverse=True)
644660 protocol = "gs"
645661 bucket = "quakeflow_catalog"
646- folder = "SC"
647662 token_json = "application_default_credentials.json"
648663 with open (token_json , "r" ) as fp :
649664 token = json .load (fp )
650665 fs = fsspec .filesystem (protocol = protocol , token = token )
651666 # year = 2019
652667 mseeds_df = []
653- for folder in ["SC" , "NC" ]:
654- with fs .open (f"{ bucket } /{ folder } /mseed_list/{ year } _3c.txt" , "r" ) as f :
655- mseeds = f .readlines ()
668+ for folder in ["SC" , "NC" , "IRIS" ]:
669+ try :
670+ with fs .open (f"{ bucket } /{ folder } /mseed_list/{ year } _3c.txt" , "r" ) as f :
671+ mseeds = f .readlines ()
672+ except Exception as e :
673+ print (f"Not found { bucket } /{ folder } /mseed_list/{ year } _3c.txt" )
674+ continue
656675 mseeds = [x .strip ("\n " ) for x in mseeds ]
657676 mseeds = pd .DataFrame (mseeds , columns = ["ENZ" ])
658677 if folder == "SC" :
@@ -663,14 +682,24 @@ def cut_templates(jdays, root_path, region, config, bucket, protocol, token):
663682 mseeds ["location" ] = mseeds ["fname" ].apply (lambda x : x [10 :12 ].strip ("_" ))
664683 mseeds ["year" ] = mseeds ["fname" ].apply (lambda x : x [13 :17 ])
665684 mseeds ["jday" ] = mseeds ["fname" ].apply (lambda x : x [17 :20 ])
666- if folder == "NC" :
685+ elif folder == "NC" :
667686 mseeds ["fname" ] = mseeds ["ENZ" ].apply (lambda x : x .split ("/" )[- 1 ])
668687 mseeds ["network" ] = mseeds ["fname" ].apply (lambda x : x .split ("." )[1 ])
669688 mseeds ["station" ] = mseeds ["fname" ].apply (lambda x : x .split ("." )[0 ])
670689 mseeds ["instrument" ] = mseeds ["fname" ].apply (lambda x : x .split ("." )[2 ][:- 1 ])
671690 mseeds ["location" ] = mseeds ["fname" ].apply (lambda x : x .split ("." )[3 ])
672691 mseeds ["year" ] = mseeds ["fname" ].apply (lambda x : x .split ("." )[5 ])
673692 mseeds ["jday" ] = mseeds ["fname" ].apply (lambda x : x .split ("." )[6 ])
693+ elif folder == "IRIS" :
694+ mseeds ["fname" ] = mseeds ["ENZ" ].apply (lambda x : x .split ("/" )[- 1 ])
695+ mseeds ["jday" ] = mseeds ["ENZ" ].apply (lambda x : x .split ("/" )[- 2 ])
696+ mseeds ["year" ] = mseeds ["ENZ" ].apply (lambda x : x .split ("/" )[- 3 ])
697+ mseeds ["network" ] = mseeds ["fname" ].apply (lambda x : x .split ("." )[0 ])
698+ mseeds ["station" ] = mseeds ["fname" ].apply (lambda x : x .split ("." )[1 ])
699+ mseeds ["location" ] = mseeds ["fname" ].apply (lambda x : x .split ("." )[2 ])
700+ mseeds ["instrument" ] = mseeds ["fname" ].apply (lambda x : x .split ("." )[3 ][:2 ])
701+ else :
702+ raise ValueError (f"Unknown folder: { folder } " )
674703 mseeds_df .append (mseeds )
675704 mseeds_df = pd .concat (mseeds_df )
676705 print (mseeds_df .head ())
@@ -694,6 +723,8 @@ def cut_templates(jdays, root_path, region, config, bucket, protocol, token):
694723 print (f"No picks found for { year :04d} /{ jday :03d} " )
695724 continue
696725
726+
727+
697728 # ####
698729 # out = picks.drop(columns=["ENZ"])
699730 # out.to_csv(f"{root_path}/{result_path}/{year:04d}/cctorch_picks_{jday:03d}.csv", index=False)
@@ -714,7 +745,7 @@ def cut_templates(jdays, root_path, region, config, bucket, protocol, token):
714745 print (f"Using { ncpu } cores" )
715746
716747 pbar = tqdm (total = nsplit , desc = "Cutting templates" )
717- ctx = mp .get_context ("spawn " )
748+ ctx = mp .get_context ("fork " )
718749
719750 with ctx .Manager () as manager :
720751 lock = manager .Lock ()
@@ -830,13 +861,10 @@ def cut_templates(jdays, root_path, region, config, bucket, protocol, token):
830861 year = args .year
831862
832863 # %%
833- # with fs.open(f"{bucket}/{region}/config.json", "r") as fp:
834- # config = json.load(fp)
835- # with open("config.json", "r") as fp:
836- # config = json.load(fp)
837- # config["world_size"] = num_nodes
838- with open (args .config , "r" ) as fp :
864+ with fs .open (f"{ bucket } /{ region } /config.json" , "r" ) as fp :
839865 config = json .load (fp )
866+ # with open(args.config, "r") as fp:
867+ # config = json.load(fp)
840868 config .update (vars (args ))
841869 print (json .dumps (config , indent = 4 , sort_keys = True ))
842870
@@ -876,10 +904,15 @@ def cut_templates(jdays, root_path, region, config, bucket, protocol, token):
876904 # num_jday = 366 if (year % 4 == 0 and year % 100 != 0) or year % 400 == 0 else 365
877905 # jdays.extend([f"{year}.{i:03d}" for i in range(1, num_jday + 1)])
878906
907+
879908 num_jday = 366 if (year % 4 == 0 and year % 100 != 0 ) or year % 400 == 0 else 365
880909 # jdays = [f"{year}.{i:03d}" for i in range(1, num_jday + 1)]
881910 jdays = range (1 , num_jday + 1 )
882911
912+ # jdays = fs.glob(f"{bucket}/{region}/adloc/{year:04d}/adloc_events_???.csv")
913+ jdays = fs .glob (f"{ bucket } /{ region } /adloc/{ year :04d} /ransac_events_???.csv" )
914+ jdays = [int (x .split ("/" )[- 1 ].split ("." )[0 ].split ("_" )[- 1 ]) for x in jdays ]
915+
883916 jdays = np .array_split (jdays , num_nodes )[node_rank ]
884917 # jdays = ["2019.185"]
885918 # jdays = ["2019.186"]
0 commit comments