Skip to content

Commit 6f421e2

Browse files
authored
Merge pull request #3 from Donavin97/feature/auto-dataset
Automatic waveform data folder creation script.
2 parents 9b4adf2 + e69dd4d commit 6f421e2

File tree

3 files changed

+205
-1
lines changed

3 files changed

+205
-1
lines changed

eqcctpro/OptimizedEQCCT_Paper.pdf

4.53 MB
Binary file not shown.

eqcctpro/README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
# **EQCCTPro: powerful seismic event detection toolkit**
22

3-
EQCCTPro is a high-performace seismic event detection and processing framework that leverages DL-pickers, like EQCCT, to process seismic data efficiently. It enables users to fully leverage the computational ability of their computing resources for maximum performance for simultaneous seismic waveform processing, achieving real-time performance by identifying and utilizing the optimal computational configurations for their hardware. More information about the development, capabilities, and real-world applications about EQCCTPro can be read about in our research publication here.
3+
EQCCTPro is a high-performace seismic event detection and processing framework that leverages DL-pickers, like EQCCT, to process seismic data efficiently. It enables users to fully leverage the computational ability of their computing resources for maximum performance for simultaneous seismic waveform processing, achieving real-time performance by identifying and utilizing the optimal computational configurations for their hardware. More information about the development, capabilities, and real-world applications about EQCCTPro can be read about in our upcoming research publication [here](https://github.com/ut-beg-texnet/eqcct/blob/main/eqcctpro/OptimizedEQCCT_Paper.pdf) (currently in review).
44

55
## **Features**
66
- Supports both CPU and GPU execution

eqcctpro/create_dataset.py

Lines changed: 204 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,204 @@
1+
#!/usr/bin/env python3
2+
import os
3+
import math
4+
import argparse
5+
import numpy as np
6+
from obspy import UTCDateTime, Stream
7+
from obspy.clients.fdsn import Client
8+
9+
# optional seisbench import
10+
try:
11+
import seisbench.models as sbm
12+
except ImportError:
13+
sbm = None
14+
15+
def ensure_dir(path):
16+
os.makedirs(path, exist_ok=True)
17+
18+
def to_list(val):
19+
"""
20+
Convert a comma-separated string to a list.
21+
If val is "*" or empty, return None (meaning "all").
22+
"""
23+
if val is None:
24+
return None
25+
s = val.strip()
26+
if s == "" or s == "*":
27+
return None
28+
return [v.strip() for v in s.split(",") if v.strip()]
29+
30+
def split_windows(t_start, t_end, chunk_min):
31+
"""
32+
Yield successive (start, end) UTCDateTime pairs of chunk_min minutes.
33+
The last chunk may be shorter.
34+
"""
35+
chunk_sec = chunk_min * 60
36+
cur = t_start
37+
while cur < t_end:
38+
nxt = cur + chunk_sec
39+
if nxt > t_end:
40+
nxt = t_end
41+
yield cur, nxt
42+
cur = nxt
43+
44+
def get_window_count(t_start, t_end, chunk_min):
45+
"""
46+
Return how many chunk_min-minute windows fit between t_start and t_end
47+
(ceiling of the division).
48+
"""
49+
total_sec = t_end - t_start
50+
chunk_sec = chunk_min * 60
51+
return math.ceil(total_sec / chunk_sec)
52+
53+
def main():
54+
p = argparse.ArgumentParser(
55+
description="Download FDSN waveforms in equal-time chunks."
56+
)
57+
p.add_argument("--start", help="Start time, e.g. 2024-12-03T00:00:00Z", required=False)
58+
p.add_argument("--end", help="End time, e.g. 2024-12-03T02:00:00Z", required=False)
59+
p.add_argument("--networks", default="*", help="Comma-separated network codes or *")
60+
p.add_argument("--stations", default="*", help="Comma-separated station codes or *")
61+
p.add_argument("--locations", default="*", help="Comma-separated location codes or *")
62+
p.add_argument("--channels", default="*", help="Comma-separated channel codes or *")
63+
p.add_argument("--host", default="http://localhost:8080", help="FDSNWS base URL")
64+
p.add_argument("--output", default=".", help="Base output directory")
65+
p.add_argument("--chunk", type=int, default=None,
66+
help="Chunk size in minutes. Splits start–end into N windows.")
67+
p.add_argument("--denoise", action="store_true",
68+
help="If set, apply seisbench.DeepDenoiser to each chunk.")
69+
args = p.parse_args()
70+
71+
if not args.start:
72+
args.start = input("Start time (ISO): ").strip()
73+
if not args.end:
74+
args.end = input("End time (ISO): ").strip()
75+
76+
# parse times
77+
try:
78+
t_start = UTCDateTime(args.start)
79+
t_end = UTCDateTime(args.end)
80+
except Exception as e:
81+
print("Error parsing times:", e)
82+
return
83+
84+
# build windows
85+
if args.chunk:
86+
windows = list(split_windows(t_start, t_end, args.chunk))
87+
wcount = get_window_count(t_start, t_end, args.chunk)
88+
print(f"Total time: {t_start}{t_end} ({t_end - t_start} s)")
89+
print(f"Chunk size: {args.chunk} minute(s) → {wcount} window(s)\n")
90+
else:
91+
windows = [(t_start, t_end)]
92+
wcount = 1
93+
94+
# prepare client
95+
try:
96+
client = Client(args.host)
97+
except Exception as e:
98+
print("Could not create FDSN client:", e)
99+
return
100+
101+
# prepare denoiser
102+
denoiser = None
103+
if args.denoise:
104+
if sbm is None:
105+
print("seisbench not installed; cannot denoise.")
106+
return
107+
print("Loading seisbench DeepDenoiser…")
108+
denoiser = sbm.DeepDenoiser.from_pretrained("urban")
109+
110+
# normalize selectors
111+
nets = to_list(args.networks)
112+
studs = to_list(args.stations)
113+
locs = to_list(args.locations)
114+
chans = to_list(args.channels)
115+
116+
net_iter = nets if nets is not None else ["*"]
117+
sta_iter = studs if studs is not None else ["*"]
118+
loc_iter = locs if locs is not None else ["*"]
119+
cha_iter = chans if chans is not None else ["*"]
120+
121+
# prepare output base
122+
base_dir = os.path.abspath(args.output)
123+
za_dir = os.path.join(base_dir, "za")
124+
ensure_dir(za_dir)
125+
126+
# loop over windows
127+
for idx, (win_start, win_end) in enumerate(windows, 1):
128+
start_str = win_start.strftime("%Y%m%dT%H%M%SZ")
129+
end_str = win_end.strftime("%Y%m%dT%H%M%SZ")
130+
time_dir = os.path.join(za_dir, f"{start_str}_{end_str}")
131+
ensure_dir(time_dir)
132+
133+
print(f"\n=== WINDOW {idx}/{wcount}: {start_str}{end_str} ===")
134+
print(f" Networks: {net_iter}")
135+
print(f" Stations: {sta_iter}")
136+
print(f" Locations: {loc_iter}")
137+
print(f" Channels: {cha_iter}")
138+
139+
# download all traces for this window
140+
streams_by_key = {}
141+
for net in net_iter:
142+
for sta in sta_iter:
143+
for loc in loc_iter:
144+
for cha in cha_iter:
145+
try:
146+
st = client.get_waveforms(net, sta, loc, cha,
147+
win_start, win_end)
148+
if not st:
149+
continue
150+
for tr in st:
151+
key = (tr.stats.network,
152+
tr.stats.station,
153+
tr.stats.location,
154+
tr.stats.channel)
155+
streams_by_key.setdefault(key, []).append(tr)
156+
except Exception as e:
157+
print(f" Warning: can't fetch {net}.{sta}.{loc}.{cha}: {e}")
158+
159+
if not streams_by_key:
160+
print(" No data downloaded for this window.")
161+
continue
162+
163+
# write out each station/channel bundle
164+
for (net, sta, loc, cha), traces in streams_by_key.items():
165+
station_dir = os.path.join(time_dir, sta)
166+
ensure_dir(station_dir)
167+
168+
if not traces:
169+
continue
170+
171+
st_out = Stream(traces)
172+
173+
# optionally denoise
174+
if denoiser:
175+
st_out = denoiser.annotate(st_out)
176+
177+
if len(st_out) == 0:
178+
continue
179+
180+
# cast to int32
181+
for tr in st_out:
182+
tr.data = tr.data.astype(np.int32)
183+
184+
# strip any DeepDenoiser_ prefix
185+
for tr in st_out:
186+
tr.stats.channel = tr.stats.channel.replace("DeepDenoiser_", "")
187+
188+
# pick a channel name for the filename (all traces share same channel)
189+
out_chan = st_out[0].stats.channel
190+
loc_str = loc if loc not in (None, "") else ""
191+
fname = f"{net}.{sta}.{loc_str}.{out_chan}__{start_str}__{end_str}.mseed"
192+
fpath = os.path.join(station_dir, fname)
193+
194+
try:
195+
st_out.write(fpath, format="MSEED",
196+
reclen=512, encoding="STEIM2", byteorder=">")
197+
print(f" Wrote {fpath} ({len(st_out)} traces)")
198+
except Exception as e:
199+
print(f" Error writing {fpath}: {e}")
200+
201+
print("\nAll windows processed. Done.")
202+
203+
if __name__ == "__main__":
204+
main()

0 commit comments

Comments
 (0)