@@ -17,6 +17,7 @@ def run_phasenet(
1717 config : Dict ,
1818 node_rank : int = 0 ,
1919 num_nodes : int = 1 ,
20+ overwrite : bool = False ,
2021 model_path : str = "../PhaseNet/" ,
2122 protocol : str = "file" ,
2223 bucket : str = "" ,
@@ -34,22 +35,32 @@ def run_phasenet(
3435 # %%
3536 waveform_dir = f"{ region } /waveforms"
3637 mseed_list = sorted (glob (f"{ root_path } /{ waveform_dir } /????/???/??/*.mseed" ))
37-
38- # %%
39- processed = sorted (glob (f"{ root_path } /{ result_path } /picks/????/???/??/*.csv" ))
40- processed = [f .replace (f"{ root_path } /{ result_path } /picks/" , "" ).replace (".csv" , "" )[:- 1 ] for f in processed ]
41- print (f"Processed: { len (processed )} " )
38+ subdir = 4
4239
4340 # %%
4441 mseed_3c = defaultdict (list )
4542 for mseed in mseed_list :
46- key = mseed .replace (f"{ root_path } /{ waveform_dir } /" , "" ).replace (".mseed" , "" )[:- 1 ]
47- if key in processed :
48- continue
43+ key = "/" .join (mseed .replace (".mseed" , "" ).split ("/" )[- subdir :])
44+ key = key [:- 1 ] ## remove the channel suffix
4945 mseed_3c [key ].append (mseed )
50- mseed_3c = ["," .join (sorted (v )) for k , v in mseed_3c .items ()]
51- print (f"Unprocessed: { len (mseed_3c )} " )
52- mseed_3c = list (np .array_split (mseed_3c , num_nodes )[node_rank ])
46+ print (f"Number of mseed files: { len (mseed_3c )} " )
47+
48+ # %%
49+ if not overwrite :
50+ processed = sorted (glob (f"{ root_path } /{ result_path } /picks/????/???/??/*.csv" ))
51+ processed = ["/" .join (f .replace (".csv" , "" ).split ("/" )[- subdir :]) for f in processed ]
52+ processed = [p [:- 1 ] for p in processed ] ## remove the channel suffix
53+ print (f"Number of processed files: { len (processed )} " )
54+
55+ keys = sorted (list (set (mseed_3c .keys ()) - set (processed )))
56+ print (f"Number of unprocessed files: { len (keys )} " )
57+ keys = list (np .array_split (keys , num_nodes )[node_rank ])
58+ print (f"Node { node_rank :03d} /{ num_nodes :03d} : processing { len (keys )} files" )
59+
60+ if len (keys ) > 0 :
61+ return 0
62+
63+ mseed_3c = ["," .join (sorted (mseed_3c [k ])) for k in keys ]
5364
5465 # %%
5566 mseed_file = f"{ root_path } /{ result_path } /mseed_list_{ node_rank :03d} _{ num_nodes :03d} .csv"
0 commit comments