Skip to content

Commit 436cfce

Browse files
committed
add cut template
1 parent 27df135 commit 436cfce

File tree

7 files changed

+753
-247
lines changed

7 files changed

+753
-247
lines changed

examples/california/cut_templates_cc.py

Lines changed: 450 additions & 244 deletions
Large diffs are not rendered by default.

examples/california/run_cctorch.py

Lines changed: 107 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,107 @@
1+
# %%
2+
import os
3+
4+
import torch
5+
from args import parse_args
6+
7+
args = parse_args()
8+
# %%
9+
root_path = args.root_path
10+
region = args.region
11+
12+
data_path = f"{region}/cctorch"
13+
result_path = f"{region}/cctorch/ccpairs"
14+
15+
# data_path = f"{region}/cctorch_ca"
16+
# result_path = f"{region}/cctorch_ca/ccpairs"
17+
18+
# data_path = f"{region}/cctorch_gamma"
19+
# result_path = f"{region}/cctorch_gamma/ccpairs"
20+
21+
if not os.path.exists(f"{root_path}/{result_path}"):
22+
os.makedirs(f"{root_path}/{result_path}")
23+
24+
25+
## based on GPU memory
26+
27+
batch = 1_024
28+
block_size1 = 1000_000
29+
block_size2 = 1000_000
30+
31+
if args.dtct_pair:
32+
dt_ct = f"{root_path}/{region}/hypodd/dt.ct"
33+
pair_list = f"{root_path}/{region}/hypodd/pairs.txt"
34+
lines = []
35+
with open(dt_ct, "r") as fp:
36+
for line in fp:
37+
if line.startswith("#"):
38+
ev1, ev2 = line.split()[1:3]
39+
if ev1 > ev2:
40+
ev1, ev2 = ev2, ev1
41+
lines.append(f"{ev1},{ev2}\n")
42+
43+
print(f"Number of pairs from hypodd dt.ct: {len(lines)}")
44+
with open(f"{root_path}/{region}/hypodd/pairs.txt", "w") as fp:
45+
fp.writelines(lines)
46+
base_cmd = f"../CCTorch/run.py --pair_list={root_path}/{region}/hypodd/pairs.txt --data_path1={root_path}/{region}/cctorch/template.dat --data_format1=memmap --config={root_path}/{region}/cctorch/config.json --batch_size={batch} --block_size1={block_size1} --block_size2={block_size2} --result_path={root_path}/{result_path}"
47+
48+
else:
49+
base_cmd = (
50+
f"../CCTorch/run.py --pair_list={root_path}/{data_path}/pairs.txt --data_path1={root_path}/{data_path}/template.dat --data_format1=memmap "
51+
f"--data_list1={root_path}/{data_path}/cctorch_picks.csv "
52+
f"--events_csv={root_path}/{data_path}/cctorch_events.csv --picks_csv={root_path}/{data_path}/cctorch_picks.csv --stations_csv={root_path}/{data_path}/cctorch_stations.csv "
53+
f"--config={root_path}/{data_path}/config.json --batch_size={batch} --block_size1={block_size1} --block_size2={block_size2} "
54+
f"--result_path={root_path}/{result_path}"
55+
)
56+
57+
58+
if torch.cuda.is_available():
59+
device = "cuda"
60+
num_gpu = torch.cuda.device_count()
61+
elif torch.backends.mps.is_available():
62+
device = "mps"
63+
num_gpu = 0
64+
else:
65+
device = "cpu"
66+
num_gpu = 0
67+
68+
if num_gpu > 0:
69+
cmd = f"torchrun --standalone --nproc_per_node {num_gpu} {base_cmd} --device={device}"
70+
else:
71+
cmd = f"python {base_cmd} --device={device}"
72+
print(cmd)
73+
os.system(cmd)
74+
75+
# %%
76+
for rank in range(num_gpu):
77+
if not os.path.exists(f"{root_path}/{result_path}/CC_{rank:03d}_{num_gpu:03d}.csv"):
78+
continue
79+
if rank == 0:
80+
cmd = f"cat {root_path}/{result_path}/CC_{rank:03d}_{num_gpu:03d}.csv > {root_path}/{data_path}/dtcc.csv"
81+
else:
82+
cmd = (
83+
f"tail -n +2 {root_path}/{result_path}/CC_{rank:03d}_{num_gpu:03d}.csv >> {root_path}/{data_path}/dtcc.csv"
84+
)
85+
print(cmd)
86+
os.system(cmd)
87+
88+
89+
cmd = f"cat {root_path}/{result_path}/CC_*_{num_gpu:03d}_dt.cc > {root_path}/{data_path}/dt.cc"
90+
print(cmd)
91+
os.system(cmd)
92+
93+
# # %%
94+
# os.chdir(f"{root_path}/{region}/cctorch")
95+
# source_file = f"ccpairs/CC_{num_gpu:03d}_dt.cc"
96+
# target_file = f"dt.cc"
97+
# print(f"{source_file} -> {target_file}")
98+
# if os.path.lexists(target_file):
99+
# os.remove(target_file)
100+
# os.symlink(source_file, target_file)
101+
102+
# source_file = f"ccpairs/CC_{num_gpu:03d}.csv"
103+
# target_file = f"dtcc.csv"
104+
# print(f"{source_file} -> {target_file}")
105+
# if os.path.lexists(target_file):
106+
# os.remove(target_file)
107+
# os.symlink(source_file, target_file)

examples/california/submit_adloc.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ def parse_args():
2424
setup="""
2525
echo "Begin setup."
2626
echo export WANDB_API_KEY=$WANDB_API_KEY >> ~/.bashrc
27-
pip install -U h5py tqdm wandb pandas numpy scipy scikit-learn
27+
pip install -U h5py tqdm wandb pandas scipy scikit-learn numpy==1.26.4
2828
pip install -U fsspec gcsfs
2929
pip install -U obspy pyproj
3030
pip install -e /opt/ADLoc
Lines changed: 117 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,117 @@
1+
import argparse
2+
import time
3+
from concurrent.futures import ThreadPoolExecutor
4+
5+
import sky
6+
7+
8+
# NUM_NODES = 8
9+
def parse_args():
10+
parser = argparse.ArgumentParser()
11+
parser.add_argument("--num_nodes", type=int, default=32)
12+
parser.add_argument("--year", type=int, default=2023)
13+
parser.add_argument("--region", type=str, default="CA")
14+
return parser.parse_args()
15+
16+
17+
args = parse_args()
18+
NUM_NODES = args.num_nodes
19+
YEAR = args.year
20+
REGION = args.region
21+
22+
task = sky.Task(
23+
name="run_adloc",
24+
setup="""
25+
echo "Begin setup."
26+
echo export WANDB_API_KEY=$WANDB_API_KEY >> ~/.bashrc
27+
pip install -U h5py tqdm wandb pandas scipy scikit-learn numpy==1.26.4
28+
pip install -U fsspec gcsfs s3fs
29+
pip install -U obspy pyproj
30+
pip install -e /opt/ADLoc
31+
""",
32+
run="""
33+
num_nodes=`echo "$SKYPILOT_NODE_IPS" | wc -l`
34+
master_addr=`echo "$SKYPILOT_NODE_IPS" | head -n1`
35+
if [ "$SKYPILOT_NODE_RANK" == "0" ]; then
36+
ls -al /opt
37+
ls -al /data
38+
ls -al ./
39+
fi
40+
python cut_templates_cc.py --num_node $NUM_NODES --node_rank $NODE_RANK --year $YEAR
41+
""",
42+
workdir=".",
43+
num_nodes=1,
44+
envs={"NUM_NODES": NUM_NODES, "NODE_RANK": 0, "YEAR": YEAR},
45+
)
46+
47+
task.set_file_mounts(
48+
{
49+
"/opt/ADLoc": "../../ADLoc",
50+
},
51+
)
52+
# task.set_storage_mounts({
53+
# '/remote/imagenet/': sky.Storage(name='my-bucket',
54+
# source='/local/imagenet'),
55+
# })
56+
task.set_resources(
57+
sky.Resources(
58+
cloud=sky.GCP(),
59+
region="us-west1", # GCP
60+
# region="us-west-2", # AWS
61+
accelerators=None,
62+
cpus=16,
63+
disk_tier="low",
64+
disk_size=50, # GB
65+
memory=None,
66+
use_spot=False,
67+
),
68+
)
69+
70+
# for NODE_RANK in range(NUM_NODES):
71+
# task.update_envs({"NODE_RANK": NODE_RANK})
72+
# cluster_name = f"cctorch-{NODE_RANK:02d}"
73+
# print(f"Launching cluster {cluster_name}-{NUM_NODES}...")
74+
# sky.jobs.launch(
75+
# task,
76+
# name=f"{cluster_name}",
77+
# )
78+
79+
jobs = []
80+
try:
81+
sky.status(refresh=True)
82+
except Exception as e:
83+
print(e)
84+
85+
with ThreadPoolExecutor(max_workers=NUM_NODES) as executor:
86+
for NODE_RANK in range(NUM_NODES):
87+
88+
task.update_envs({"NODE_RANK": NODE_RANK})
89+
cluster_name = f"cctorch-{YEAR}-{NODE_RANK:02d}"
90+
91+
status = sky.status(cluster_names=[f"{cluster_name}"], refresh=True)
92+
if len(status) > 0:
93+
if status[0]["status"].value == "INIT":
94+
sky.down(f"{cluster_name}")
95+
if (not status[0]["to_down"]) and (not status[0]["status"].value == "INIT"):
96+
sky.autostop(f"{cluster_name}", idle_minutes=10, down=True)
97+
print(f"Cluster {cluster_name}/{NUM_NODES} already exists.")
98+
continue
99+
100+
status = sky.status(cluster_names=[f"{cluster_name}"])
101+
if len(status) == 0:
102+
print(f"Launching cluster {cluster_name}/{NUM_NODES}...")
103+
jobs.append(
104+
executor.submit(
105+
sky.launch,
106+
task,
107+
cluster_name=f"{cluster_name}",
108+
idle_minutes_to_autostop=10,
109+
down=True,
110+
detach_setup=True,
111+
detach_run=True,
112+
)
113+
)
114+
time.sleep(5)
115+
116+
for job in jobs:
117+
print(job.result())

examples/california/submit_gamma.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ def parse_args():
2424
setup="""
2525
echo "Begin setup."
2626
echo export WANDB_API_KEY=$WANDB_API_KEY >> ~/.bashrc
27-
pip install -U h5py tqdm wandb pandas numpy scipy
27+
pip install -U h5py tqdm wandb pandas scipy numpy==1.26.4
2828
pip install -U fsspec gcsfs
2929
pip install -U obspy pyproj
3030
pip install -e /opt/GaMMA

examples/california/submit_phasenet.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ def parse_args():
4444
setup="""
4545
echo "Begin setup."
4646
echo export WANDB_API_KEY=$WANDB_API_KEY >> ~/.bashrc
47-
pip install h5py tqdm wandb pandas numpy scipy
47+
pip install h5py tqdm wandb pandas scipy numpy==1.26.4
4848
pip install fsspec gcsfs s3fs
4949
pip install obspy pyproj
5050
# pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu121

scripts/load_cloud_templates.py

Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
# %%
2+
import json
3+
import os
4+
from concurrent.futures import ThreadPoolExecutor
5+
6+
import fsspec
7+
import matplotlib.pyplot as plt
8+
import numpy as np
9+
import pandas as pd
10+
from tqdm import tqdm
11+
12+
# %%
13+
if __name__ == "__main__":
14+
15+
# %%
16+
result_path = "results/"
17+
if not os.path.exists(result_path):
18+
os.makedirs(result_path)
19+
20+
# %%
21+
protocol = "gs"
22+
token_json = f"{os.environ['HOME']}/.config/gcloud/application_default_credentials.json"
23+
with open(token_json, "r") as fp:
24+
token = json.load(fp)
25+
26+
bucket = "quakeflow_catalog"
27+
folder = "Cal/cctorch"
28+
29+
fs = fsspec.filesystem(protocol, token=token)
30+
31+
# %%
32+
def plot_templates(templates, events, picks):
33+
templates = templates - np.nanmean(templates, axis=(-1), keepdims=True)
34+
std = np.std(templates, axis=(-1), keepdims=True)
35+
std[std == 0] = 1.0
36+
templates = templates / std
37+
38+
plt.figure(figsize=(10, 10))
39+
plt.imshow(templates[:, -1, 0, :], origin="lower", aspect="auto", vmin=-0.3, vmax=0.3, cmap="RdBu_r")
40+
plt.colorbar()
41+
plt.show()
42+
43+
# %%
44+
years = [2023]
45+
46+
for year in years:
47+
num_jday = 366 if (year % 4 == 0 and year % 100 != 0) or year % 400 == 0 else 365
48+
49+
for jday in range(1, num_jday + 1):
50+
51+
if not fs.exists(f"{bucket}/{folder}/{year}/template_{jday:03d}.dat"):
52+
continue
53+
54+
with fs.open(f"{bucket}/{folder}/{year}/cctorch_picks_{jday:03d}.csv", "r") as fp:
55+
picks = pd.read_csv(fp, dtype=str)
56+
with fs.open(f"{bucket}/{folder}/{year}/cctorch_events_{jday:03d}.csv", "r") as fp:
57+
events = pd.read_csv(fp, dtype=str)
58+
with fs.open(f"{bucket}/{folder}/{year}/config_{jday:03d}.json", "r") as fp:
59+
config = json.load(fp)
60+
template_file = fs.open(f"{bucket}/{folder}/{year}/template_{jday:03d}.dat", "rb")
61+
templates = np.frombuffer(template_file.read(), dtype=np.float32).reshape(tuple(config["template_shape"]))
62+
template_file.close()
63+
64+
print(f"events: {len(events):,} ")
65+
print(f"picks: {len(picks):,} ")
66+
print(f"templates: {templates.shape}")
67+
68+
picks.to_csv(f"{result_path}/picks_{year:04d}_{jday:03d}.csv", index=False)
69+
events.to_csv(f"{result_path}/events_{year:04d}_{jday:03d}.csv", index=False)
70+
np.save(f"{result_path}/templates_{year:04d}_{jday:03d}.npy", templates)
71+
72+
plot_templates(templates, events, picks)
73+
74+
# break
75+
76+
# %%

0 commit comments

Comments
 (0)