1818from glob import glob
1919
2020
21- def scan_csv (year , root_path , fs = None , bucket = None , protocol = "file" ):
21+ def scan_csv (year , root_path , region , model , fs = None , bucket = None , protocol = "file" ):
2222 # %%
2323 csv_list = []
2424 if protocol != "file" :
25- jdays = fs .ls (f"{ bucket } /{ region } /{ folder } /{ year } " )
25+ jdays = fs .ls (f"{ bucket } /{ region } /{ model } /picks /{ year } " )
2626 else :
27- jdays = os .listdir (f"{ root_path } /{ region } /phasenet /picks/{ year } /" )
27+ jdays = os .listdir (f"{ root_path } /{ region } /{ model } /picks/{ year } /" )
2828
2929 for jday in jdays :
3030 if protocol != "file" :
3131 csvs = fs .glob (f"{ jday } /??/*.csv" )
3232 else :
33- csvs = glob (f"{ root_path } /{ region } /phasenet /picks/{ year } /{ jday } /??/*.csv" )
33+ csvs = glob (f"{ root_path } /{ region } /{ model } /picks/{ year } /{ jday } /??/*.csv" )
3434
3535 csv_list .extend ([[year , jday , csv ] for csv in csvs ])
3636
3737 csvs = pd .DataFrame (csv_list , columns = ["year" , "jday" , "csv" ])
38- csv_file = f"{ root_path } /{ region } /phasenet /csv_list_{ year } .csv"
38+ csv_file = f"{ root_path } /{ region } /{ model } /csv_list_{ year } .csv"
3939 csvs .to_csv (csv_file , index = False )
4040
4141 return csv_file
4242
4343
4444# %%
45- def read_csv (rows , region , year , jday , root_path , fs = None , bucket = None ):
45+ def read_csv (rows , region , model , year , jday , root_path , fs = None , bucket = None ):
4646
4747 picks = []
4848 for i , row in rows .iterrows ():
@@ -58,15 +58,15 @@ def read_csv(rows, region, year, jday, root_path, fs=None, bucket=None):
5858
5959 if len (picks ) > 0 :
6060 picks = pd .concat (picks , ignore_index = True )
61- if not os .path .exists (f"{ root_path } /{ region } /phasenet /{ year } " ):
62- os .makedirs (f"{ root_path } /{ region } /phasenet /{ year } " , exist_ok = True )
63- picks .to_csv (f"{ root_path } /{ region } /phasenet /{ year } /{ year } .{ jday } .csv" , index = False )
61+ if not os .path .exists (f"{ root_path } /{ region } /{ model } /{ year } " ):
62+ os .makedirs (f"{ root_path } /{ region } /{ model } /{ year } " , exist_ok = True )
63+ picks .to_csv (f"{ root_path } /{ region } /{ model } /{ year } /{ year } .{ jday } .csv" , index = False )
6464 # fs.put(
6565 # f"{root_path}/{region}/phasenet/{year}/{jday}/{year}.{jday}.csv",
6666 # f"{bucket}/{region}/phasenet_merged/{year}/{year}.{jday}.csv",
6767 # )
6868 else :
69- with open (f"{ root_path } /{ region } /phasenet /{ year } /{ year } .{ jday } .csv" , "w" ) as f :
69+ with open (f"{ root_path } /{ region } /{ model } /{ year } /{ year } .{ jday } .csv" , "w" ) as f :
7070 f .write ("" )
7171
7272
@@ -76,9 +76,9 @@ def read_csv(rows, region, year, jday, root_path, fs=None, bucket=None):
7676 args = parse_args ()
7777 root_path = args .root_path
7878 region = args .region
79+ model = args .model
7980
80- data_path = f"{ region } /phasenet/picks"
81- result_path = f"{ region } /phasenet"
81+ result_path = f"{ region } /{ model } "
8282
8383 # %%
8484 # protocol = "gs"
@@ -88,32 +88,33 @@ def read_csv(rows, region, year, jday, root_path, fs=None, bucket=None):
8888 # fs = fsspec.filesystem(protocol, token=token)
8989
9090 # %%
91- years = os .listdir (f"{ root_path } /{ region } /phasenet/picks" )
91+ # years = os.listdir(f"{root_path}/{region}/{model}/picks_{model}")
92+ years = glob (f"{ root_path } /{ region } /{ model } /picks_{ model } /????/" )
93+ years = [year .rstrip ("/" ).split ("/" )[- 1 ] for year in years ]
94+ print (f"Years: { years } " )
9295
9396 for year in years :
9497
95- csv_list = scan_csv (year , root_path )
98+ csv_list = scan_csv (year , root_path , region , model )
9699
97100 # %%
98101 csv_list = pd .read_csv (csv_list , dtype = str )
99102
100103 # for jday, csvs in csv_list.groupby("jday"):
101- # read_csv(csvs, region, year, jday, root_path)
104+ # read_csv(csvs, region, model, year, jday, root_path)
102105 # raise
103106
104- # ncpu = os.cpu_count()
105- ncpu = 64
107+ ncpu = min (64 , mp .cpu_count ())
106108 print (f"Number of processors: { ncpu } " )
107109 csv_by_jday = csv_list .groupby ("jday" )
108110 pbar = tqdm (total = len (csv_by_jday ), desc = f"Loading csv files (year { year } )" )
109111
110- # with mp.Pool(ncpu) as pool:
111112 ctx = mp .get_context ("spawn" )
112113 with ctx .Pool (ncpu ) as pool :
113114 jobs = []
114115 for jday , csvs in csv_by_jday :
115116 job = pool .apply_async (
116- read_csv , (csvs , region , year , jday , root_path ), callback = lambda _ : pbar .update ()
117+ read_csv , (csvs , region , model , year , jday , root_path ), callback = lambda _ : pbar .update ()
117118 )
118119 jobs .append (job )
119120 pool .close ()
@@ -126,11 +127,11 @@ def read_csv(rows, region, year, jday, root_path, fs=None, bucket=None):
126127 pbar .close ()
127128
128129 # %%
129- csvs = glob (f"{ root_path } /{ region } /phasenet /????/????.???.csv" )
130+ csvs = glob (f"{ root_path } /{ region } /{ model } /????/????.???.csv" )
130131 picks = []
131132 for csv in tqdm (csvs , desc = "Merge csv files" ):
132133 picks .append (pd .read_csv (csv , dtype = str ))
133134 picks = pd .concat (picks , ignore_index = True )
134- picks .to_csv (f"{ root_path } /{ region } /phasenet/phasenet_picks .csv" , index = False )
135+ picks .to_csv (f"{ root_path } /{ region } /{ model } / { model } _picks .csv" , index = False )
135136
136137# %%
0 commit comments