@@ -69,7 +69,10 @@ def fillin_missing_picks(picks, events, stations, config):
6969 picks_ps .reset_index (inplace = True )
7070
7171 ## add provider
72- picks_ps = picks_ps .merge (picks [["event_index" , "station_id" , "provider" ]].drop_duplicates (), on = ["event_index" , "station_id" ])
72+ if "provider" in picks .columns :
73+ picks_ps = picks_ps .merge (picks [["event_index" , "station_id" , "provider" ]].drop_duplicates (), on = ["event_index" , "station_id" ])
74+ else :
75+ picks_ps = picks_ps .merge (picks [["event_index" , "station_id" ]].drop_duplicates (), on = ["event_index" , "station_id" ])
7376
7477 print (f"Original picks: { len (picks )} , Filled picks: { len (picks_ps )} " )
7578 print (picks_ps .iloc [:10 ])
@@ -440,8 +443,8 @@ def cut_templates(jdays, root_path, region, config, bucket, protocol, token):
440443 # stations["station_id"] = stations.index
441444 # stations.sort_values(by=["latitude", "longitude"], inplace=True)
442445
443- # station_csv = f"{data_path}/adloc_stations.csv"
444- station_csv = f"{ data_path } /ransac_stations.csv"
446+ station_csv = f"{ data_path } /adloc_stations.csv"
447+ # station_csv = f"{data_path}/ransac_stations.csv"
445448 if protocol == "file" :
446449 stations = pd .read_csv (station_csv )
447450 else :
@@ -468,12 +471,12 @@ def cut_templates(jdays, root_path, region, config, bucket, protocol, token):
468471 # events = pd.read_csv("adloc_events.csv", parse_dates=["time"])
469472 if protocol == "file" :
470473 events = pd .read_csv (
471- # f"{root_path}/{data_path}/{year:04d}/adloc_events_{jday:03d}.csv", parse_dates=["time"]
472- f"{ root_path } /{ data_path } /{ year :04d} /ransac_events_{ jday :03d} .csv" , parse_dates = ["time" ]
474+ f"{ root_path } /{ data_path } /{ year :04d} /adloc_events_{ jday :03d} .csv" , parse_dates = ["time" ]
475+ # f"{root_path}/{data_path}/{year:04d}/ransac_events_{jday:03d}.csv", parse_dates=["time"]
473476 )
474477 else :
475- # with fs.open(f"{bucket}/{data_path}/{year:04d}/adloc_events_{jday:03d}.csv", "r") as fp:
476- with fs .open (f"{ bucket } /{ data_path } /{ year :04d} /ransac_events_{ jday :03d} .csv" , "r" ) as fp :
478+ with fs .open (f"{ bucket } /{ data_path } /{ year :04d} /adloc_events_{ jday :03d} .csv" , "r" ) as fp :
479+ # with fs.open(f"{bucket}/{data_path}/{year:04d}/ransac_events_{jday:03d}.csv", "r") as fp:
477480 events = pd .read_csv (fp , parse_dates = ["time" ])
478481
479482 # # ############### DEBUG ###############
@@ -529,13 +532,17 @@ def cut_templates(jdays, root_path, region, config, bucket, protocol, token):
529532 # picks = pd.read_csv(f"{root_path}/{data_path}/ransac_picks.csv")
530533 # picks = pd.read_csv("adloc_picks.csv")
531534 if protocol == "file" :
532- # picks = pd.read_csv(f"{root_path}/{data_path}/{year:04d}/adloc_picks_{jday:03d}.csv")
533- picks = pd .read_csv (f"{ root_path } /{ data_path } /{ year :04d} /ransac_picks_{ jday :03d} .csv" )
535+ picks = pd .read_csv (f"{ root_path } /{ data_path } /{ year :04d} /adloc_picks_{ jday :03d} .csv" )
536+ # picks = pd.read_csv(f"{root_path}/{data_path}/{year:04d}/ransac_picks_{jday:03d}.csv")
534537 else :
535- # with fs.open(f"{bucket}/{data_path}/{year:04d}/adloc_picks_{jday:03d}.csv", "r") as fp:
536- with fs .open (f"{ bucket } /{ data_path } /{ year :04d} /ransac_picks_{ jday :03d} .csv" , "r" ) as fp :
538+ with fs .open (f"{ bucket } /{ data_path } /{ year :04d} /adloc_picks_{ jday :03d} .csv" , "r" ) as fp :
539+ # with fs.open(f"{bucket}/{data_path}/{year:04d}/ransac_picks_{jday:03d}.csv", "r") as fp:
537540 picks = pd .read_csv (fp )
538541
542+ # if "provider" not in picks.columns:
543+ # picks["provider"] = "adloc"
544+ # print(f"No provider in picks; set to adloc as default")
545+
539546 # ############### DEBUG ###############
540547 # picks = picks[(picks["event_index"].isin(events["event_index"]))]
541548 # ############### DEBUG ###############
@@ -648,19 +655,10 @@ def cut_templates(jdays, root_path, region, config, bucket, protocol, token):
648655 config ["reference_t0" ] = reference_t0 .strftime ("%Y-%m-%dT%H:%M:%S.%fZ" )
649656 events = events [["idx_eve" , "x_km" , "y_km" , "z_km" , "event_index" , "event_time" , "event_timestamp" ]]
650657 stations = stations [["idx_sta" , "x_km" , "y_km" , "z_km" , "station_id" , "component" , "network" , "station" ]]
651- picks = picks [
652- [
653- "idx_eve" ,
654- "idx_sta" ,
655- "phase_type" ,
656- "phase_score" ,
657- "phase_time" ,
658- "phase_timestamp" ,
659- "phase_source" ,
660- "station_id" ,
661- "provider" ,
662- ]
663- ]
658+ columns = ["idx_eve" , "idx_sta" , "phase_type" , "phase_score" , "phase_time" , "phase_timestamp" , "phase_source" , "station_id" ]
659+ if "provider" in picks .columns :
660+ columns .append ("provider" )
661+ picks = picks [columns ]
664662 events .set_index ("idx_eve" , inplace = True )
665663 stations .set_index ("idx_sta" , inplace = True )
666664 picks .sort_values (by = ["idx_eve" , "idx_sta" , "phase_type" ], inplace = True )
@@ -700,7 +698,8 @@ def cut_templates(jdays, root_path, region, config, bucket, protocol, token):
700698 mseeds ["location" ] = mseeds ["fname" ].apply (lambda x : x [10 :12 ].strip ("_" ))
701699 mseeds ["year" ] = mseeds ["fname" ].apply (lambda x : x [13 :17 ])
702700 mseeds ["jday" ] = mseeds ["fname" ].apply (lambda x : x [17 :20 ])
703- mseeds ["provider" ] = "SC"
701+ if "provider" not in picks .columns :
702+ mseeds ["provider" ] = "SC"
704703 elif folder == "NC" :
705704 mseeds ["fname" ] = mseeds ["ENZ" ].apply (lambda x : x .split ("/" )[- 1 ])
706705 mseeds ["network" ] = mseeds ["fname" ].apply (lambda x : x .split ("." )[1 ])
@@ -709,7 +708,8 @@ def cut_templates(jdays, root_path, region, config, bucket, protocol, token):
709708 mseeds ["location" ] = mseeds ["fname" ].apply (lambda x : x .split ("." )[3 ])
710709 mseeds ["year" ] = mseeds ["fname" ].apply (lambda x : x .split ("." )[5 ])
711710 mseeds ["jday" ] = mseeds ["fname" ].apply (lambda x : x .split ("." )[6 ])
712- mseeds ["provider" ] = "NC"
711+ if "provider" not in picks .columns :
712+ mseeds ["provider" ] = "NC"
713713 elif folder == "IRIS" :
714714 mseeds ["fname" ] = mseeds ["ENZ" ].apply (lambda x : x .split ("/" )[- 1 ])
715715 mseeds ["jday" ] = mseeds ["ENZ" ].apply (lambda x : x .split ("/" )[- 2 ])
@@ -718,7 +718,8 @@ def cut_templates(jdays, root_path, region, config, bucket, protocol, token):
718718 mseeds ["station" ] = mseeds ["fname" ].apply (lambda x : x .split ("." )[1 ])
719719 mseeds ["location" ] = mseeds ["fname" ].apply (lambda x : x .split ("." )[2 ])
720720 mseeds ["instrument" ] = mseeds ["fname" ].apply (lambda x : x .split ("." )[3 ][:2 ])
721- mseeds ["provider" ] = "IRIS"
721+ if "provider" not in picks .columns :
722+ mseeds ["provider" ] = "IRIS"
722723 else :
723724 raise ValueError (f"Unknown folder: { folder } " )
724725 mseeds_df .append (mseeds )
@@ -737,7 +738,10 @@ def cut_templates(jdays, root_path, region, config, bucket, protocol, token):
737738 mseeds_df = mseeds_df [(mseeds_df ["year" ].astype (int ) == year ) & (mseeds_df ["jday" ].astype (int ) == jday )]
738739 picks = picks [(picks ["year" ].astype (int ) == year ) & (picks ["jday" ].astype (int ) == jday )]
739740
740- picks = picks .merge (mseeds_df , on = ["network" , "station" , "location" , "instrument" , "year" , "jday" , "provider" ])
741+ if "provider" in picks .columns :
742+ picks = picks .merge (mseeds_df , on = ["network" , "station" , "location" , "instrument" , "year" , "jday" , "provider" ])
743+ else :
744+ picks = picks .merge (mseeds_df , on = ["network" , "station" , "location" , "instrument" , "year" , "jday" ])
741745 picks .drop (columns = ["fname" , "station_id" , "network" , "location" , "instrument" , "year" , "jday" ], inplace = True )
742746
743747 if len (picks ) == 0 :
@@ -930,11 +934,12 @@ def cut_templates(jdays, root_path, region, config, bucket, protocol, token):
930934 # jdays = [f"{year}.{i:03d}" for i in range(1, num_jday + 1)]
931935 jdays = range (1 , num_jday + 1 )
932936
933- # jdays = fs.glob(f"{bucket}/{region}/adloc/{year:04d}/adloc_events_???.csv")
934- jdays = fs .glob (f"{ bucket } /{ region } /adloc/{ year :04d} /ransac_events_???.csv" )
937+ jdays = fs .glob (f"{ bucket } /{ region } /adloc/{ year :04d} /adloc_events_???.csv" )
938+ # jdays = fs.glob(f"{bucket}/{region}/adloc/{year:04d}/ransac_events_???.csv")
935939 jdays = [int (x .split ("/" )[- 1 ].split ("." )[0 ].split ("_" )[- 1 ]) for x in jdays ]
936940
937941 jdays = np .array_split (jdays , num_nodes )[node_rank ]
942+
938943 # jdays = ["2019.185"]
939944 # jdays = ["2019.186"]
940945 # jdays = ["2019.185", "2019.186", "2019.187"]
@@ -949,7 +954,7 @@ def cut_templates(jdays, root_path, region, config, bucket, protocol, token):
949954 processed = [x .split ("/" )[- 2 ] for x in processed ]
950955 print (f"Processed days: { len (processed )} " )
951956
952- jdays = [jday for jday in jdays if f"{ jday :03d} " not in processed ]
957+ # jdays = [jday for jday in jdays if f"{jday:03d}" not in processed]
953958 print (f"Remaining days: { len (jdays )} " )
954959
955960 if len (jdays ) == 0 :
0 commit comments