add cctorch to california

zhuwq0 · zhuwq0 · commit 4d7d98c4d518 · 2025-01-02T12:35:16.000-08:00
diff --git a/examples/california/cut_templates_cc.py b/examples/california/cut_templates_cc.py
@@ -421,8 +421,15 @@ def cut_templates(jdays, root_path, region, config, bucket, protocol, token):
                 stations = pd.read_json(fp, orient="index")
         stations["station_id"] = stations.index
         stations.sort_values(by=["latitude", "longitude"], inplace=True)
-        print(f"{len(stations) = }")
-        print(stations.iloc[:5])
+
+        # ############### DEBUG ###############
+        # "minlatitude": 35.205,
+        # "maxlatitude": 36.205,
+        # "minlongitude": -118.004,
+        # "maxlongitude": -117.004,
+        minlat, maxlat = 35.205, 36.205
+        minlon, maxlon = -118.004, -117.004
+        # ############### DEBUG ###############
 
         # %%
         # events = pd.read_csv(f"{root_path}/{data_path}/ransac_events.csv", parse_dates=["time"])
@@ -435,6 +442,18 @@ def cut_templates(jdays, root_path, region, config, bucket, protocol, token):
             with fs.open(f"{bucket}/{data_path}/{year:04d}/adloc_events_{jday:03d}.csv", "r") as fp:
                 events = pd.read_csv(fp, parse_dates=["time"])
 
+        # ############### DEBUG ###############
+        # events = events[
+        #     (events["latitude"] >= minlat)
+        #     & (events["latitude"] <= maxlat)
+        #     & (events["longitude"] >= minlon)
+        #     & (events["longitude"] <= maxlon)
+        # ]
+        # plt.figure(figsize=(10, 10))
+        # plt.scatter(events["longitude"], events["latitude"], s=1)
+        # plt.savefig(f"events_{year:04d}_{jday:03d}.png")
+        # ############### DEBUG ###############
+
         events.rename(columns={"time": "event_time"}, inplace=True)
         events["event_time"] = pd.to_datetime(events["event_time"], utc=True)
         reference_t0 = events["event_time"].min()
@@ -467,6 +486,10 @@ def cut_templates(jdays, root_path, region, config, bucket, protocol, token):
             with fs.open(f"{bucket}/{data_path}/{year:04d}/adloc_picks_{jday:03d}.csv", "r") as fp:
                 picks = pd.read_csv(fp)
 
+        # ############### DEBUG ###############
+        # picks = picks[(picks["event_index"].isin(events["event_index"]))]
+        # ############### DEBUG ###############
+
         picks = picks[picks["adloc_mask"] == 1]
         picks["phase_time"] = pd.to_datetime(picks["phase_time"], utc=True)
         min_phase_score = picks["phase_score"].min()
@@ -507,6 +530,12 @@ def cut_templates(jdays, root_path, region, config, bucket, protocol, token):
         print(f"{len(picks) = }")
         picks = picks[picks["dist_km"] < config["max_epicenter_dist_km"]]
         print(f"{len(picks) = } with dist_km < {config['max_epicenter_dist_km']} km")
+        print(picks.iloc[:5])
+
+        ## filter stations
+        stations = stations[stations["station_id"].isin(picks["station_id"])]
+        print(f"{len(stations) = }")
+        print(stations.iloc[:5])
 
         # %% Reindex event and station to make it continuous
         stations["idx_sta"] = np.arange(len(stations))
diff --git a/examples/california/run_cctorch.py b/examples/california/run_cctorch.py
@@ -1,15 +1,33 @@
 # %%
+import argparse
 import os
 
 import torch
-from args import parse_args
+
+
+##
+def parse_args():
+    parser = argparse.ArgumentParser(description="Run Gamma on NCEDC/SCEDC data")
+    parser.add_argument("--num_nodes", type=int, default=1)
+    parser.add_argument("--node_rank", type=int, default=0)
+    parser.add_argument("--year", type=int, default=2023)
+    parser.add_argument("--root_path", type=str, default="local")
+    parser.add_argument("--region", type=str, default="Cal")
+    parser.add_argument("--bucket", type=str, default="quakeflow_catalog")
+    return parser.parse_args()
+
 
 args = parse_args()
+
+##
+year = 2019
+jday = 185
+
 # %%
 root_path = args.root_path
 region = args.region
 
-data_path = f"{region}/cctorch"
+data_path = f"{region}/cctorch/{year}"
 result_path = f"{region}/cctorch/ccpairs"
 
 # data_path = f"{region}/cctorch_ca"
@@ -28,31 +46,14 @@
 block_size1 = 1000_000
 block_size2 = 1000_000
 
-if args.dtct_pair:
-    dt_ct = f"{root_path}/{region}/hypodd/dt.ct"
-    pair_list = f"{root_path}/{region}/hypodd/pairs.txt"
-    lines = []
-    with open(dt_ct, "r") as fp:
-        for line in fp:
-            if line.startswith("#"):
-                ev1, ev2 = line.split()[1:3]
-                if ev1 > ev2:
-                    ev1, ev2 = ev2, ev1
-                lines.append(f"{ev1},{ev2}\n")
-
-    print(f"Number of pairs from hypodd dt.ct: {len(lines)}")
-    with open(f"{root_path}/{region}/hypodd/pairs.txt", "w") as fp:
-        fp.writelines(lines)
-    base_cmd = f"../CCTorch/run.py --pair_list={root_path}/{region}/hypodd/pairs.txt --data_path1={root_path}/{region}/cctorch/template.dat --data_format1=memmap --config={root_path}/{region}/cctorch/config.json  --batch_size={batch} --block_size1={block_size1} --block_size2={block_size2} --result_path={root_path}/{result_path}"
 
-else:
-    base_cmd = (
-        f"../CCTorch/run.py --pair_list={root_path}/{data_path}/pairs.txt --data_path1={root_path}/{data_path}/template.dat --data_format1=memmap "
-        f"--data_list1={root_path}/{data_path}/cctorch_picks.csv "
-        f"--events_csv={root_path}/{data_path}/cctorch_events.csv --picks_csv={root_path}/{data_path}/cctorch_picks.csv --stations_csv={root_path}/{data_path}/cctorch_stations.csv "
-        f"--config={root_path}/{data_path}/config.json  --batch_size={batch} --block_size1={block_size1} --block_size2={block_size2} "
-        f"--result_path={root_path}/{result_path}"
-    )
+base_cmd = (
+    f"../../CCTorch/run.py --pair_list={root_path}/{data_path}/pairs_{jday:03d}.txt --data_path1={root_path}/{data_path}/template_{jday:03d}.dat --data_format1=memmap "
+    f"--data_list1={root_path}/{data_path}/cctorch_picks_{jday:03d}.csv "
+    f"--events_csv={root_path}/{data_path}/cctorch_events_{jday:03d}.csv --picks_csv={root_path}/{data_path}/cctorch_picks_{jday:03d}.csv --stations_csv={root_path}/{data_path}/cctorch_stations_{jday:03d}.csv "
+    f"--config={root_path}/{data_path}/config_{jday:03d}.json  --batch_size={batch} --block_size1={block_size1} --block_size2={block_size2} "
+    f"--result_path={root_path}/{result_path}"
+)
 
 
 if torch.cuda.is_available():
@@ -70,23 +71,34 @@
 else:
     cmd = f"python {base_cmd} --device={device}"
 print(cmd)
-os.system(cmd)
+# os.system(cmd)
 
 # %%
 for rank in range(num_gpu):
     if not os.path.exists(f"{root_path}/{result_path}/CC_{rank:03d}_{num_gpu:03d}.csv"):
         continue
     if rank == 0:
-        cmd = f"cat {root_path}/{result_path}/CC_{rank:03d}_{num_gpu:03d}.csv > {root_path}/{data_path}/dtcc.csv"
+        cmd = f"cat {root_path}/{result_path}/CC_{rank:03d}_{num_gpu:03d}.csv > {root_path}/{data_path}/../dtcc.csv"
     else:
-        cmd = (
-            f"tail -n +2 {root_path}/{result_path}/CC_{rank:03d}_{num_gpu:03d}.csv >> {root_path}/{data_path}/dtcc.csv"
-        )
+        cmd = f"tail -n +2 {root_path}/{result_path}/CC_{rank:03d}_{num_gpu:03d}.csv >> {root_path}/{data_path}/../dtcc.csv"
     print(cmd)
     os.system(cmd)
 
 
-cmd = f"cat {root_path}/{result_path}/CC_*_{num_gpu:03d}_dt.cc > {root_path}/{data_path}/dt.cc"
+cmd = f"cat {root_path}/{result_path}/CC_*_{num_gpu:03d}_dt.cc > {root_path}/{data_path}/../dt.cc"
+print(cmd)
+os.system(cmd)
+
+##
+cmd = f"cp {root_path}/{data_path}/cctorch_stations_{jday:03d}.csv {root_path}/{data_path}/../cctorch_stations.csv"
+print(cmd)
+os.system(cmd)
+
+cmd = f"cp {root_path}/{data_path}/cctorch_events_{jday:03d}.csv {root_path}/{data_path}/../cctorch_events.csv"
+print(cmd)
+os.system(cmd)
+
+cmd = f"cp {root_path}/{data_path}/cctorch_picks_{jday:03d}.csv {root_path}/{data_path}/../cctorch_picks.csv"
 print(cmd)
 os.system(cmd)
 
diff --git a/examples/california/run_hypodd_cc.py b/examples/california/run_hypodd_cc.py
@@ -0,0 +1,83 @@
+# %%
+# from args import parse_args
+##
+import argparse
+import json
+import os
+
+import numpy as np
+import pandas as pd
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(description="Run Gamma on NCEDC/SCEDC data")
+    parser.add_argument("--num_nodes", type=int, default=1)
+    parser.add_argument("--node_rank", type=int, default=0)
+    parser.add_argument("--year", type=int, default=2023)
+    parser.add_argument("--root_path", type=str, default="local")
+    parser.add_argument("--region", type=str, default="Cal")
+    parser.add_argument("--bucket", type=str, default="quakeflow_catalog")
+    return parser.parse_args()
+
+
+# %%
+args = parse_args()
+root_path = args.root_path
+region = args.region
+
+with open(f"{root_path}/{region}/config.json", "r") as fp:
+    config = json.load(fp)
+
+# %%
+data_path = f"{region}/cctorch"
+result_path = f"{region}/hypodd"
+if not os.path.exists(f"{root_path}/{result_path}"):
+    os.makedirs(f"{root_path}/{result_path}")
+
+# %%
+stations = pd.read_csv(f"{root_path}/{data_path}/cctorch_stations.csv")
+
+station_lines = {}
+for i, row in stations.iterrows():
+    station_id = row["station_id"]
+    network_code, station_code, comp_code, channel_code = station_id.split(".")
+    # tmp_code = f"{station_code}{channel_code}"
+    tmp_code = f"{station_code}"
+    station_lines[tmp_code] = f"{tmp_code:<8s} {row['latitude']:.3f} {row['longitude']:.3f}\n"
+
+
+with open(f"{root_path}/{result_path}/stations.dat", "w") as f:
+    for line in sorted(station_lines.values()):
+        f.write(line)
+
+# %%
+events = pd.read_csv(f"{root_path}/{data_path}/cctorch_events.csv")
+events["time"] = pd.to_datetime(events["event_time"], format="mixed")
+
+event_lines = []
+
+for i, row in events.iterrows():
+    event_index = row["event_index"]
+    origin = row["time"]
+    magnitude = row["magnitude"]
+    x_err = 0.0
+    z_err = 0.0
+    time_err = 0.0
+    dx, dy, dz = 0.0, 0.0, 0.0
+    # dx = np.random.uniform(-0.01, 0.01)
+    # dy = np.random.uniform(-0.01, 0.01)
+    # dz = np.random.uniform(0, 10)
+    # dz = 0
+    event_lines.append(
+        f"{origin.year:4d}{origin.month:02d}{origin.day:02d}  "
+        f"{origin.hour:2d}{origin.minute:02d}{origin.second:02d}{round(origin.microsecond / 1e4):02d}  "
+        # f"{row['latitude']:8.4f}  {row['longitude']:9.4f}   {row['depth_km']:8.4f}  "
+        f"{row['latitude'] + dy:8.4f}  {row['longitude']+ dx:9.4f}   {row['depth_km']+dz:8.4f}  "
+        f"{magnitude:5.2f}  {x_err:5.2f}  {z_err:5.2f}  {time_err:5.2f}  {event_index:9d}\n"
+    )
+
+with open(f"{root_path}/{result_path}/events.dat", "w") as f:
+    f.writelines(event_lines)
+
+# %%
+os.system(f"bash run_hypodd_cc.sh {root_path} {region}")
diff --git a/examples/california/run_hypodd_cc.sh b/examples/california/run_hypodd_cc.sh
@@ -0,0 +1,107 @@
+#!/bin/bash
+set -x
+WORKING_DIR=$PWD
+if [ $# -eq 2 ]; then
+  root_path=$1
+  region=$2
+else
+  root_path="local"
+  region="demo"
+fi
+
+if [ ! -d "$root_path/$region/hypodd" ]; then
+  mkdir -p $root_path/$region/hypodd
+fi
+
+cp $root_path/$region/cctorch/dt.cc $root_path/$region/hypodd/dt.cc 
+cd $root_path/$region/hypodd
+
+if [ ! -d "HypoDD" ]; then
+  git clone git@github.com:zhuwq0/HypoDD.git
+  export PATH=$PATH:$PWD/HypoDD
+  make -C HypoDD/src/
+fi
+
+cat <<EOF > cc.inp
+* RELOC.INP:
+*--- input file selection
+* cross correlation diff times:
+dt.cc
+*
+*catalog P diff times:
+
+*
+* event file:
+events.dat
+*
+* station file:
+stations.dat
+*
+*--- output file selection
+* original locations:
+hypodd_cc.loc
+* relocations:
+hypodd_cc.reloc
+* station information:
+hypodd.sta
+* residual information:
+hypodd.res
+* source paramater information:
+hypodd.src
+*
+*--- data type selection: 
+* IDAT:  0 = synthetics; 1= cross corr; 2= catalog; 3= cross & cat 
+* IPHA: 1= P; 2= S; 3= P&S
+* DIST:max dist [km] between cluster centroid and station 
+* IDAT   IPHA   DIST
+    1     3     120
+*
+*--- event clustering:
+* OBSCC:    min # of obs/pair for crosstime data (0= no clustering)
+* OBSCT:    min # of obs/pair for network data (0= no clustering)
+* OBSCC  OBSCT    
+     0     0        
+*
+*--- solution control:
+* ISTART:  	1 = from single source; 2 = from network sources
+* ISOLV:	1 = SVD, 2=lsqr
+* NSET:      	number of sets of iteration with specifications following
+*  ISTART  ISOLV  NSET
+    2        2      4
+*
+*--- data weighting and re-weighting: 
+* NITER: 		last iteration to used the following weights
+* WTCCP, WTCCS:		weight cross P, S 
+* WTCTP, WTCTS:		weight catalog P, S 
+* WRCC, WRCT:		residual threshold in sec for cross, catalog data 
+* WDCC, WDCT:  		max dist [km] between cross, catalog linked pairs
+* DAMP:    		damping (for lsqr only) 
+*       ---  CROSS DATA ----- ----CATALOG DATA ----
+* NITER WTCCP WTCCS WRCC WDCC WTCTP WTCTS WRCT WDCT DAMP
+   4      1    1    -9    -9    -9    -9     -9    -9  70
+   4      1    1     6    -9    -9    -9     -9    -9  70
+   4      1    0.8   3     4    -9    -9     -9    -9  70
+   4      1    0.8   2     2    -9    -9     -9    -9  70  
+*
+*--- 1D model:
+* NLAY:		number of model layers  
+* RATIO:	vp/vs ratio 
+* TOP:		depths of top of layer (km) 
+* VEL: 		layer velocities (km/s)
+* NLAY  RATIO 
+   12     1.73
+* TOP 
+0.0 1.0 3.0 5.0 7.0 9.0 11.0 13.0 17.0 21.0 31.00 31.10
+* VEL
+5.30 5.65 5.93 6.20 6.20 6.20 6.20 6.20 6.20 6.20 7.50 8.11
+*
+*--- event selection:
+* CID: 	cluster to be relocated (0 = all)
+* ID:	cuspids of event to be relocated (8 per line)
+* CID    
+    0      
+* ID
+EOF
+
+./HypoDD/src/hypoDD/hypoDD cc.inp
+cd $WORKING_DIR