1818from tqdm import tqdm
1919
2020
21+ def plotting_debug (xt , hist , topk_index , topk_score , picks , events , stations , config ):
22+
23+ # timestamp0 = config["timestamp0"]
24+ # events_compare = pd.read_csv("local/Ridgecrest_debug5/adloc_gamma/ransac_events.csv")
25+ # picks_compare = pd.read_csv("local/Ridgecrest_debug5/adloc_gamma/ransac_picks.csv")
26+ # # events_compare = pd.read_csv("local/Ridgecrest_debug5/adloc_plus2/ransac_events_sst_0.csv")
27+ # # picks_compare = pd.read_csv("local/Ridgecrest_debug5/adloc_plus2/ransac_picks_sst_0.csv")
28+ # events_compare["time"] = pd.to_datetime(events_compare["time"])
29+ # events_compare["timestamp"] = events_compare["time"].apply(lambda x: (x - timestamp0).total_seconds())
30+ # picks_compare["phase_time"] = pd.to_datetime(picks_compare["phase_time"])
31+ # picks_compare["timestamp"] = picks_compare["phase_time"].apply(lambda x: (x - timestamp0).total_seconds())
32+
33+ DT = config ["DT" ]
34+ MIN_STATION = config ["MIN_STATION" ]
35+
36+ # map station_id to int
37+ stations ["xy" ] = stations ["longitude" ] - stations ["latitude" ]
38+ stations .sort_values (by = "xy" , inplace = True )
39+ mapping_id = {v : i for i , v in enumerate (stations ["station_id" ])}
40+ mapping_color = {v : f"C{ i } " if v != - 1 else "k" for i , v in enumerate (events ["event_index" ].unique ())}
41+
42+ NX = 100
43+ for i in tqdm (range (0 , len (hist ), NX )):
44+ bins = np .arange (i , i + NX , DT )
45+
46+ fig , ax = plt .subplots (2 , 1 , figsize = (15 , 10 ), sharex = True )
47+
48+ # plot hist
49+ idx = (xt > i ) & (xt < i + NX )
50+ ax [0 ].bar (xt [idx ], hist [idx ], width = DT )
51+
52+ ylim = ax [0 ].get_ylim ()
53+ idx = (xt [topk_index ] > i ) & (xt [topk_index ] < i + NX )
54+ ax [0 ].vlines (xt [topk_index ][idx ], ylim [0 ], ylim [1 ], color = "k" , linewidth = 1 )
55+
56+ # idx = (events_compare["timestamp"] > i) & (events_compare["timestamp"] < i + NX)
57+ # ax[0].vlines(events_compare["timestamp"][idx], ylim[0], ylim[1], color="r", linewidth=1, linestyle="--")
58+
59+ # plot picks-events match
60+ idx = (events ["timestamp" ] > i ) & (events ["timestamp" ] < i + NX )
61+ ax [1 ].scatter (
62+ events ["timestamp" ][idx ],
63+ events ["station_id" ][idx ].map (mapping_id ),
64+ c = events ["event_index" ][idx ].map (mapping_color ),
65+ marker = "." ,
66+ s = 30 ,
67+ )
68+
69+ idx = (picks ["timestamp" ] > i ) & (picks ["timestamp" ] < i + NX )
70+ ax [1 ].scatter (
71+ picks ["timestamp" ][idx ],
72+ picks ["station_id" ][idx ].map (mapping_id ),
73+ c = picks ["event_index" ][idx ].map (mapping_color ),
74+ marker = "x" ,
75+ linewidth = 0.5 ,
76+ s = 10 ,
77+ )
78+
79+ # idx = (picks_compare["timestamp"] > i) & (picks_compare["timestamp"] < i + NX)
80+ # ax[1].scatter(
81+ # picks_compare["timestamp"][idx],
82+ # picks_compare["station_id"][idx].map(mapping_id),
83+ # facecolors="none",
84+ # edgecolors="r",
85+ # linewidths=0.1,
86+ # s=30,
87+ # )
88+
89+ if not os .path .exists (f"figures" ):
90+ os .makedirs (f"figures" )
91+ plt .savefig (f"figures/debug_{ i :04d} .png" , dpi = 300 , bbox_inches = "tight" )
92+
93+
2194def associate (
2295 picks : pd .DataFrame ,
2396 events : pd .DataFrame ,
@@ -27,63 +100,68 @@ def associate(
27100
28101 VPVS_RATIO = config ["VPVS_RATIO" ]
29102 VP = config ["VP" ]
30- DT = 1 .0 # seconds
103+ DT = 2 .0 # seconds
31104 MIN_STATION = 3
32105
33106 # %%
34- t0 = min (events ["event_time" ].min (), picks ["phase_time" ].min ())
35- events ["timestamp" ] = events ["event_time" ].apply (lambda x : (x - t0 ).total_seconds ())
36- events ["timestamp_center" ] = events ["center_time" ].apply (lambda x : (x - t0 ).total_seconds ())
37- picks ["timestamp" ] = picks ["phase_time" ].apply (lambda x : (x - t0 ).total_seconds ())
107+ timestamp0 = min (events ["event_time" ].min (), picks ["phase_time" ].min ())
38108
39- # proj = Proj(proj="merc", datum="WGS84", units="km")
40- # stations[["x_km", "y_km"]] = stations.apply(lambda x: pd.Series(proj(x.longitude, x.latitude)), axis=1)
109+ events ["timestamp" ] = events ["event_time" ].apply (lambda x : (x - timestamp0 ).total_seconds ())
110+ events ["timestamp_center" ] = events ["center_time" ].apply (lambda x : (x - timestamp0 ).total_seconds ())
111+ picks ["timestamp" ] = picks ["phase_time" ].apply (lambda x : (x - timestamp0 ).total_seconds ())
41112
42- # dist_matrix = squareform(pdist(stations[["x_km", "y_km"]].values))
43- # mst = minimum_spanning_tree(dist_matrix)
44- # dx = np.median(mst.data[mst.data > 0])
45- # print(f"dx: {dx:.3f}")
46- # eps_t = dx / VP * 2.0
47- # eps_t = 6.0
48- # eps_xy = eps_t * VP * 2 / (1.0 + VPVS_RATIO)
49- # print(f"eps_t: {eps_t:.3f}, eps_xy: {eps_xy:.3f}")
50- # eps_xy = 30.0
51- # print(f"eps_xy: {eps_xy:.3f}")
113+ t0 = min (events ["timestamp" ].min (), picks ["timestamp" ].min ())
114+ t1 = max (events ["timestamp" ].max (), picks ["timestamp" ].max ())
52115
53116 # %% Using DBSCAN to cluster events
117+ # proj = Proj(proj="merc", datum="WGS84", units="km")
118+ # stations[["x_km", "y_km"]] = stations.apply(lambda x: pd.Series(proj(x.longitude, x.latitude)), axis=1)
54119 # events = events.merge(stations[["station_id", "x_km", "y_km"]], on="station_id", how="left")
55-
56120 # scaling = np.array([1.0, 1.0 / eps_xy, 1.0 / eps_xy])
57121 # clustering = DBSCAN(eps=2.0, min_samples=4).fit(events[["timestamp", "x_km", "y_km"]] * scaling)
58122 # # clustering = DBSCAN(eps=2.0, min_samples=4).fit(events[["timestamp"]])
59- # # clustering = DBSCAN(eps=3.0, min_samples=3).fit(events[["timestamp"]])
60- # # clustering = DBSCAN(eps=1.0, min_samples=3).fit(events[["timestamp"]])
61123 # events["event_index"] = clustering.labels_
62124
63125 ## Using histogram to cluster events
64126 events ["event_index" ] = - 1
65- t = np .arange (events ["timestamp" ].min (), events ["timestamp" ].max (), DT )
66- hist , _ = np .histogram (events ["timestamp" ], bins = t )
67- # retrieve picks using max_pool of kernel size 5 seconds
127+ t = np .arange (t0 , t1 , DT )
128+ hist , edge = np .histogram (events ["timestamp" ], bins = t , weights = events ["event_score" ])
129+ xt = (edge [:- 1 ] + edge [1 :]) / 2 # center of the bin
130+ # hist_numpy = hist.copy()
131+
68132 hist = torch .from_numpy (hist ).float ().unsqueeze (0 ).unsqueeze (0 )
69- hist_pool = F .max_pool1d (hist , kernel_size = 5 , padding = 2 , stride = 1 )
70- # find the index of the maximum value in hist_pool
133+ hist_pool = F .max_pool1d (hist , kernel_size = 3 , padding = 1 , stride = 1 )
71134 mask = hist_pool == hist
72135 hist = hist * mask
73- K = int ((t [- 1 ] - t [0 ]) / 10 ) # assume max 1 event per 10 seconds on average
136+ hist = hist .squeeze (0 ).squeeze (0 )
137+ K = int ((t [- 1 ] - t [0 ]) / 5 ) # assume max 1 event per 10 seconds on average
74138 topk_score , topk_index = torch .topk (hist , k = K )
75- topk_index = topk_index [topk_score > MIN_STATION ] # min 3 stations
76- topk_index = topk_index .squeeze ().numpy ()
139+ topk_index = topk_index [topk_score >= MIN_STATION ] # min 3 stations
140+ topk_score = topk_score [topk_score >= MIN_STATION ]
141+ topk_index = topk_index .numpy ()
142+ topk_score = topk_score .numpy ()
77143 num_events = len (topk_index )
78- # assign timestamp to events based on the topk_index within 2 DT
79- t0 = (topk_index - 2 ) * DT
80- t1 = (topk_index + 2 ) * DT
144+ t00 = xt [topk_index - 1 ]
145+ t11 = xt [topk_index + 1 ]
81146 timestamp = events ["timestamp" ].values
82147 for i in tqdm (range (num_events ), desc = "Assigning event index" ):
83- mask = (timestamp >= t0 [i ]) & (timestamp <= t1 [i ])
148+ mask = (timestamp >= t00 [i ]) & (timestamp <= t11 [i ])
84149 events .loc [mask , "event_index" ] = i
85-
86- print (f"Number of associated events: { len (events ['event_index' ].unique ())} " )
150+ events ["num_picks" ] = events .groupby ("event_index" ).size ()
151+
152+ # # refine event index using DBSCAN
153+ # events["group_index"] = -1
154+ # for group_id, event in tqdm(events.groupby("event_index"), desc="DBSCAN clustering"):
155+ # if len(event) < MIN_STATION:
156+ # events.loc[event.index, "event_index"] = -1
157+ # clustering = DBSCAN(eps=20, min_samples=MIN_STATION).fit(event[["x_km", "y_km"]])
158+ # events.loc[event.index, "group_index"] = clustering.labels_
159+ # events["dummy_index"] = events["event_index"].astype(str) + "." + events["group_index"].astype(str)
160+ # mapping = {v: i for i, v in enumerate(events["dummy_index"].unique())}
161+ # events["dummy_index"] = events["dummy_index"].map(mapping)
162+ # events.loc[(events["event_index"] == -1) | (events["group_index"] == -1), "dummy_index"] = -1
163+ # events["event_index"] = events["dummy_index"]
164+ # events.drop(columns=["dummy_index"], inplace=True)
87165
88166 # %% link picks to events
89167 picks ["event_index" ] = - 1
@@ -92,6 +170,8 @@ def associate(
92170 for group_id , event in tqdm (events .groupby ("station_id" ), desc = "Linking picks to events" ):
93171 # travel time tt = (tp + ts) / 2 = (1 + ps_ratio)/2 * tp => tp = tt * 2 / (1 + ps_ratio)
94172 # (ts - tp) = (ps_ratio - 1) tp = tt * 2 * (ps_ratio - 1) / (ps_ratio + 1)
173+
174+ event = event .sort_values (by = "num_picks" , ascending = True )
95175 ps_delta = event ["travel_time_s" ].values * 2 * (VPVS_RATIO - 1 ) / (VPVS_RATIO + 1 )
96176 t1 = event ["timestamp_center" ].values - ps_delta * 1.2
97177 t2 = event ["timestamp_center" ].values + ps_delta * 1.2
@@ -107,6 +187,17 @@ def associate(
107187
108188 picks .reset_index (inplace = True )
109189
190+ # plotting_debug(
191+ # xt,
192+ # hist_numpy,
193+ # topk_index,
194+ # topk_score,
195+ # picks,
196+ # events,
197+ # stations,
198+ # {"DT": DT, "MIN_STATION": MIN_STATION, "timestamp0": timestamp0},
199+ # )
200+
110201 picks .drop (columns = ["timestamp" ], inplace = True )
111202 events .drop (columns = ["timestamp" , "timestamp_center" ], inplace = True )
112203
@@ -127,6 +218,9 @@ def associate(
127218 # drop event index -1
128219 events = events [events ["event_index" ] != - 1 ]
129220
221+ print (f"Number of associated events: { len (events ['event_index' ].unique ()):,} " )
222+ print (f"Number of associated picks: { len (picks [picks ['event_index' ] != - 1 ]):,} / { len (picks ):,} " )
223+
130224 return events , picks
131225
132226
0 commit comments