AI4EPS
diff --git a/‎examples/california/cut_templates_cc.py‎
Lines changed: 56 additions & 23 deletions b/‎examples/california/cut_templates_cc.py‎
Lines changed: 56 additions & 23 deletions
diff --git a/‎examples/california/cut_templates_merge.py‎
Lines changed: 54 additions & 24 deletions b/‎examples/california/cut_templates_merge.py‎
Lines changed: 54 additions & 24 deletions
@@ -414,17 +414,29 @@ def cut_templates(jdays, root_path, region, config, bucket, protocol, token):
             os.makedirs(f"{root_path}/{result_path}/{year:04d}/{jday:03d}", exist_ok=True)
 
         # %%
-        # stations = pd.read_csv(f"{root_path}/{data_path}/ransac_stations.csv")
-        # stations = pd.read_csv("adloc_stations.csv")
-        station_json = f"{region}/network/stations.json"
+        # station_json = f"{region}/network/stations.json"
+        # if protocol == "file":
+        #     stations = pd.read_json(f"{root_path}/{station_json}", orient="index")
+        # else:
+        #     with fs.open(f"{bucket}/{station_json}", "r") as fp:
+        #         stations = pd.read_json(fp, orient="index")
+        # stations["station_id"] = stations.index
+        # stations.sort_values(by=["latitude", "longitude"], inplace=True)
+
+        # station_csv = f"{data_path}/adloc_stations.csv"
+        station_csv = f"{data_path}/ransac_stations.csv"
         if protocol == "file":
-            stations = pd.read_json(f"{root_path}/{station_json}", orient="index")
+            stations = pd.read_csv(station_csv)
         else:
-            with fs.open(f"{bucket}/{station_json}", "r") as fp:
-                stations = pd.read_json(fp, orient="index")
-        stations["station_id"] = stations.index
+            with fs.open(f"{bucket}/{station_csv}", "r") as fp:
+                stations = pd.read_csv(fp)
         stations.sort_values(by=["latitude", "longitude"], inplace=True)
 
+
+        # stations = stations[stations["network"] == "7D"]
+        print(f"{len(stations) = }")
+        print(stations.head())
+
         # # ############### DEBUG ###############
         # # "minlatitude": 35.205,
         # # "maxlatitude": 36.205,
@@ -439,10 +451,12 @@ def cut_templates(jdays, root_path, region, config, bucket, protocol, token):
         # events = pd.read_csv("adloc_events.csv", parse_dates=["time"])
         if protocol == "file":
             events = pd.read_csv(
-                f"{root_path}/{data_path}/{year:04d}/adloc_events_{jday:03d}.csv", parse_dates=["time"]
+                # f"{root_path}/{data_path}/{year:04d}/adloc_events_{jday:03d}.csv", parse_dates=["time"]
+                f"{root_path}/{data_path}/{year:04d}/ransac_events_{jday:03d}.csv", parse_dates=["time"]
             )
         else:
-            with fs.open(f"{bucket}/{data_path}/{year:04d}/adloc_events_{jday:03d}.csv", "r") as fp:
+            # with fs.open(f"{bucket}/{data_path}/{year:04d}/adloc_events_{jday:03d}.csv", "r") as fp:
+            with fs.open(f"{bucket}/{data_path}/{year:04d}/ransac_events_{jday:03d}.csv", "r") as fp:
                 events = pd.read_csv(fp, parse_dates=["time"])
 
         # # ############### DEBUG ###############
@@ -498,9 +512,11 @@ def cut_templates(jdays, root_path, region, config, bucket, protocol, token):
         # picks = pd.read_csv(f"{root_path}/{data_path}/ransac_picks.csv")
         # picks = pd.read_csv("adloc_picks.csv")
         if protocol == "file":
-            picks = pd.read_csv(f"{root_path}/{data_path}/{year:04d}/adloc_picks_{jday:03d}.csv")
+            # picks = pd.read_csv(f"{root_path}/{data_path}/{year:04d}/adloc_picks_{jday:03d}.csv")
+            picks = pd.read_csv(f"{root_path}/{data_path}/{year:04d}/ransac_picks_{jday:03d}.csv")
         else:
-            with fs.open(f"{bucket}/{data_path}/{year:04d}/adloc_picks_{jday:03d}.csv", "r") as fp:
+            # with fs.open(f"{bucket}/{data_path}/{year:04d}/adloc_picks_{jday:03d}.csv", "r") as fp:
+            with fs.open(f"{bucket}/{data_path}/{year:04d}/ransac_picks_{jday:03d}.csv", "r") as fp:
                 picks = pd.read_csv(fp)
 
         # ############### DEBUG ###############
@@ -643,16 +659,19 @@ def cut_templates(jdays, root_path, region, config, bucket, protocol, token):
         # dirs = sorted(glob(f"{root_path}/{region}/waveforms/????/???/??"), reverse=True)
         protocol = "gs"
         bucket = "quakeflow_catalog"
-        folder = "SC"
         token_json = "application_default_credentials.json"
         with open(token_json, "r") as fp:
             token = json.load(fp)
         fs = fsspec.filesystem(protocol=protocol, token=token)
         # year = 2019
         mseeds_df = []
-        for folder in ["SC", "NC"]:
-            with fs.open(f"{bucket}/{folder}/mseed_list/{year}_3c.txt", "r") as f:
-                mseeds = f.readlines()
+        for folder in ["SC", "NC", "IRIS"]:
+            try:
+                with fs.open(f"{bucket}/{folder}/mseed_list/{year}_3c.txt", "r") as f:
+                    mseeds = f.readlines()
+            except Exception as e:
+                print(f"Not found {bucket}/{folder}/mseed_list/{year}_3c.txt")
+                continue
             mseeds = [x.strip("\n") for x in mseeds]
             mseeds = pd.DataFrame(mseeds, columns=["ENZ"])
             if folder == "SC":
@@ -663,14 +682,24 @@ def cut_templates(jdays, root_path, region, config, bucket, protocol, token):
                 mseeds["location"] = mseeds["fname"].apply(lambda x: x[10:12].strip("_"))
                 mseeds["year"] = mseeds["fname"].apply(lambda x: x[13:17])
                 mseeds["jday"] = mseeds["fname"].apply(lambda x: x[17:20])
-            if folder == "NC":
+            elif folder == "NC":
                 mseeds["fname"] = mseeds["ENZ"].apply(lambda x: x.split("/")[-1])
                 mseeds["network"] = mseeds["fname"].apply(lambda x: x.split(".")[1])
                 mseeds["station"] = mseeds["fname"].apply(lambda x: x.split(".")[0])
                 mseeds["instrument"] = mseeds["fname"].apply(lambda x: x.split(".")[2][:-1])
                 mseeds["location"] = mseeds["fname"].apply(lambda x: x.split(".")[3])
                 mseeds["year"] = mseeds["fname"].apply(lambda x: x.split(".")[5])
                 mseeds["jday"] = mseeds["fname"].apply(lambda x: x.split(".")[6])
+            elif folder == "IRIS":
+                mseeds["fname"] = mseeds["ENZ"].apply(lambda x: x.split("/")[-1])
+                mseeds["jday"] = mseeds["ENZ"].apply(lambda x: x.split("/")[-2])
+                mseeds["year"] = mseeds["ENZ"].apply(lambda x: x.split("/")[-3])
+                mseeds["network"] = mseeds["fname"].apply(lambda x: x.split(".")[0])
+                mseeds["station"] = mseeds["fname"].apply(lambda x: x.split(".")[1])
+                mseeds["location"] = mseeds["fname"].apply(lambda x: x.split(".")[2])
+                mseeds["instrument"] = mseeds["fname"].apply(lambda x: x.split(".")[3][:2])
+            else:
+                raise ValueError(f"Unknown folder: {folder}")
             mseeds_df.append(mseeds)
         mseeds_df = pd.concat(mseeds_df)
         print(mseeds_df.head())
@@ -694,6 +723,8 @@ def cut_templates(jdays, root_path, region, config, bucket, protocol, token):
             print(f"No picks found for {year:04d}/{jday:03d}")
             continue
 
+
+
         # ####
         # out = picks.drop(columns=["ENZ"])
         # out.to_csv(f"{root_path}/{result_path}/{year:04d}/cctorch_picks_{jday:03d}.csv", index=False)
@@ -714,7 +745,7 @@ def cut_templates(jdays, root_path, region, config, bucket, protocol, token):
         print(f"Using {ncpu} cores")
 
         pbar = tqdm(total=nsplit, desc="Cutting templates")
-        ctx = mp.get_context("spawn")
+        ctx = mp.get_context("fork")
 
         with ctx.Manager() as manager:
             lock = manager.Lock()
@@ -830,13 +861,10 @@ def cut_templates(jdays, root_path, region, config, bucket, protocol, token):
     year = args.year
 
     # %%
-    # with fs.open(f"{bucket}/{region}/config.json", "r") as fp:
-    #     config = json.load(fp)
-    # with open("config.json", "r") as fp:
-    #     config = json.load(fp)
-    # config["world_size"] = num_nodes
-    with open(args.config, "r") as fp:
+    with fs.open(f"{bucket}/{region}/config.json", "r") as fp:
         config = json.load(fp)
+    # with open(args.config, "r") as fp:
+    #     config = json.load(fp)
     config.update(vars(args))
     print(json.dumps(config, indent=4, sort_keys=True))
 
@@ -876,10 +904,15 @@ def cut_templates(jdays, root_path, region, config, bucket, protocol, token):
     #     num_jday = 366 if (year % 4 == 0 and year % 100 != 0) or year % 400 == 0 else 365
     #     jdays.extend([f"{year}.{i:03d}" for i in range(1, num_jday + 1)])
 
+
     num_jday = 366 if (year % 4 == 0 and year % 100 != 0) or year % 400 == 0 else 365
     # jdays = [f"{year}.{i:03d}" for i in range(1, num_jday + 1)]
     jdays = range(1, num_jday + 1)
 
+    # jdays = fs.glob(f"{bucket}/{region}/adloc/{year:04d}/adloc_events_???.csv")
+    jdays = fs.glob(f"{bucket}/{region}/adloc/{year:04d}/ransac_events_???.csv")
+    jdays = [int(x.split("/")[-1].split(".")[0].split("_")[-1]) for x in jdays]
+
     jdays = np.array_split(jdays, num_nodes)[node_rank]
     # jdays = ["2019.185"]
     # jdays = ["2019.186"]
 
@@ -63,18 +63,34 @@ def generate_pairs(picks, events, stations, max_pair_dist=10, max_neighbors=50,
 # %%
 if __name__ == "__main__":
 
+    # %%
+    protocol = "gs"
+    token_json = f"application_default_credentials.json"
+    with open(token_json, "r") as fp:
+        token = json.load(fp)
+
+    fs = fsspec.filesystem(protocol, token=token)
+
     # %%
     args = parse_args()
+    region = args.region
+    root_path = args.root_path
+    bucket = args.bucket
+    num_nodes = args.num_nodes
+    node_rank = args.node_rank
+    year = args.year
 
     # %%
-    region = "Cal"
     result_path = f"{region}/cctorch"
+    folder = result_path
     # if not os.path.exists(result_path):
     #     os.makedirs(result_path)
 
     # %%
-    with open(args.config, "r") as fp:
+    with fs.open(f"{bucket}/{region}/config.json", "r") as fp:
         config = json.load(fp)
+    # with open(args.config, "r") as fp:
+    #     config = json.load(fp)
     config.update(vars(args))
     print(json.dumps(config, indent=4, sort_keys=True))
 
@@ -88,16 +104,6 @@ def generate_pairs(picks, events, stations, max_pair_dist=10, max_neighbors=50,
     # config["mindepth"] = 0
     # config["max_epicenter_dist_km"] = 200.0
 
-    # %%
-    protocol = "gs"
-    bucket = "quakeflow_catalog"
-    folder = result_path
-    token_json = f"application_default_credentials.json"
-    with open(token_json, "r") as fp:
-        token = json.load(fp)
-
-    fs = fsspec.filesystem(protocol, token=token)
-
     # %%
     def plot_templates(templates, events, picks):
         templates = templates - np.nanmean(templates, axis=(-1), keepdims=True)
@@ -111,18 +117,21 @@ def plot_templates(templates, events, picks):
         plt.show()
 
     # %%
-
-    region = "Cal"
-    root_path = "local"
-    protocol = "gs"
-    bucket = "quakeflow_catalog"
-    station_json = f"{region}/network/stations.json"
+    # station_json = f"{region}/network/stations.json"
+    # if protocol == "file":
+    #     stations = pd.read_json(f"{root_path}/{station_json}", orient="index")
+    # else:
+    #     with fs.open(f"{bucket}/{station_json}", "r") as fp:
+    #         stations = pd.read_json(fp, orient="index")
+    # stations["station_id"] = stations.index
+
+    station_csv = f"{region}/adloc/ransac_stations.csv"
+    # station_csv = f"{region}/adloc/adloc_stations.csv"
     if protocol == "file":
-        stations = pd.read_json(f"{root_path}/{station_json}", orient="index")
+        stations = pd.read_csv(f"{root_path}/{station_csv}")
     else:
-        with fs.open(f"{bucket}/{station_json}", "r") as fp:
-            stations = pd.read_json(fp, orient="index")
-    stations["station_id"] = stations.index
+        with fs.open(f"{bucket}/{station_csv}", "r") as fp:
+            stations = pd.read_csv(fp)
     stations.sort_values(by=["latitude", "longitude"], inplace=True)
     print(f"stations: {len(stations):,} ")
     print(stations.iloc[:5])
@@ -152,12 +161,12 @@ def plot_templates(templates, events, picks):
     # exit_jdays.append((year, jday))
     # selected_years = [2019]
     # selected_jdays = [185, 186, 187]
-    selected_years = [2020, 2021, 2022, 2023]
+    # selected_years = [2020, 2021, 2022, 2023]
     year_jday = [
         (year, jday)
         for tmp in exit_jdays
         for (year, jday) in [tmp.split("/")[-3:-1]]
-        if (int(year) in selected_years)
+        # if (int(year) in selected_years)
         # if (int(year) in selected_years and int(jday) in selected_jdays)
     ]
     print(f"Selected jdays: {len(year_jday)}")
@@ -417,4 +426,25 @@ def process_day(year_jday):
 
     # break
 
+    if protocol == "gs":
+        print(f"{root_path}/{result_path}/cctorch_picks.csv -> {bucket}/{folder}/cctorch_picks.csv")
+        fs.put(f"{root_path}/{result_path}/cctorch_picks.csv", f"{bucket}/{folder}/cctorch_picks.csv")
+        print(f"{root_path}/{result_path}/cctorch_events.csv -> {bucket}/{folder}/cctorch_events.csv")
+        fs.put(f"{root_path}/{result_path}/cctorch_events.csv", f"{bucket}/{folder}/cctorch_events.csv")
+        print(f"{root_path}/{result_path}/cctorch_stations.csv -> {bucket}/{folder}/cctorch_stations.csv")
+        fs.put(f"{root_path}/{result_path}/cctorch_stations.csv", f"{bucket}/{folder}/cctorch_stations.csv")
+        print(f"{root_path}/{result_path}/config.json -> {bucket}/{folder}/config.json")
+        fs.put(f"{root_path}/{result_path}/config.json", f"{bucket}/{folder}/config.json")
+        print(f"{root_path}/{result_path}/pairs.txt -> {bucket}/{folder}/pairs.txt")
+        fs.put(f"{root_path}/{result_path}/pairs.txt", f"{bucket}/{folder}/pairs.txt")
+        print(f"{root_path}/{result_path}/template.dat -> {bucket}/{folder}/template.dat")
+        fs.put(f"{root_path}/{result_path}/template.dat", f"{bucket}/{folder}/template.dat")
+        print(f"{root_path}/{result_path}/traveltime.dat -> {bucket}/{folder}/traveltime.dat")
+        fs.put(f"{root_path}/{result_path}/traveltime.dat", f"{bucket}/{folder}/traveltime.dat")
+        print(f"{root_path}/{result_path}/traveltime_index.dat -> {bucket}/{folder}/traveltime_index.dat")
+        fs.put(f"{root_path}/{result_path}/traveltime_index.dat", f"{bucket}/{folder}/traveltime_index.dat")
+        print(f"{root_path}/{result_path}/traveltime_mask.dat -> {bucket}/{folder}/traveltime_mask.dat")
+        fs.put(f"{root_path}/{result_path}/traveltime_mask.dat", f"{bucket}/{folder}/traveltime_mask.dat")
+        
+
 # %%