Skip to content

Commit 865dd52

Browse files
committed
Script for alt DC2 merging strategy
1 parent 13a5fcb commit 865dd52

File tree

1 file changed

+157
-0
lines changed

1 file changed

+157
-0
lines changed
Lines changed: 157 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,157 @@
1+
import os
2+
import pickle as pkl
3+
4+
import GCRCatalogs
5+
import healpy as hp
6+
import numpy as np
7+
import pandas as pd
8+
from GCRCatalogs import GCRQuery
9+
10+
GCRCatalogs.set_root_dir("/data/scratch/dc2_nfs/")
11+
12+
file_name = "dc2_lensing_catalog_alt.pkl"
13+
file_path = os.path.join("/data", "scratch", "dc2local", file_name)
14+
file_already_populated = os.path.isfile(file_path)
15+
16+
if file_already_populated:
17+
raise FileExistsError(f"{file_path} already exists.")
18+
19+
20+
print("Loading truth...\n") # noqa: WPS421
21+
22+
truth_cat = GCRCatalogs.load_catalog("desc_dc2_run2.2i_dr6_truth")
23+
24+
truth_df = truth_cat.get_quantities(
25+
quantities=[
26+
"cosmodc2_id",
27+
"id",
28+
"match_objectId",
29+
"truth_type",
30+
"ra",
31+
"dec",
32+
"redshift",
33+
"flux_u",
34+
"flux_g",
35+
"flux_r",
36+
"flux_i",
37+
"flux_z",
38+
"flux_y",
39+
"mag_u",
40+
"mag_g",
41+
"mag_r",
42+
"mag_i",
43+
"mag_z",
44+
"mag_y",
45+
]
46+
)
47+
truth_df = pd.DataFrame(truth_df)
48+
49+
truth_df = truth_df[truth_df["truth_type"] == 1]
50+
51+
truth_df = truth_df[truth_df["flux_r"] >= 50]
52+
53+
max_ra = np.nanmax(truth_df["ra"])
54+
min_ra = np.nanmin(truth_df["ra"])
55+
max_dec = np.nanmax(truth_df["dec"])
56+
min_dec = np.nanmin(truth_df["dec"])
57+
ra_dec_filters = [f"ra >= {min_ra}", f"ra <= {max_ra}", f"dec >= {min_dec}", f"dec <= {max_dec}"]
58+
59+
vertices = hp.ang2vec(
60+
np.array([min_ra, max_ra, max_ra, min_ra]),
61+
np.array([min_dec, min_dec, max_dec, max_dec]),
62+
lonlat=True,
63+
)
64+
ipix = hp.query_polygon(32, vertices, inclusive=True)
65+
healpix_filter = GCRQuery((lambda h: np.isin(h, ipix, assume_unique=True), "healpix_pixel"))
66+
67+
68+
print("Loading object-with-truth-match...\n") # noqa: WPS421
69+
70+
object_truth_cat = GCRCatalogs.load_catalog("desc_dc2_run2.2i_dr6_object_with_truth_match")
71+
72+
object_truth_df = object_truth_cat.get_quantities(
73+
quantities=[
74+
"cosmodc2_id_truth",
75+
"Ixx_pixel",
76+
"Iyy_pixel",
77+
"Ixy_pixel",
78+
"IxxPSF_pixel_u",
79+
"IxxPSF_pixel_g",
80+
"IxxPSF_pixel_r",
81+
"IxxPSF_pixel_i",
82+
"IxxPSF_pixel_z",
83+
"IxxPSF_pixel_y",
84+
"IyyPSF_pixel_u",
85+
"IyyPSF_pixel_g",
86+
"IyyPSF_pixel_r",
87+
"IyyPSF_pixel_i",
88+
"IyyPSF_pixel_z",
89+
"IyyPSF_pixel_y",
90+
"IxyPSF_pixel_u",
91+
"IxyPSF_pixel_g",
92+
"IxyPSF_pixel_r",
93+
"IxyPSF_pixel_i",
94+
"IxyPSF_pixel_z",
95+
"IxyPSF_pixel_y",
96+
"psf_fwhm_u",
97+
"psf_fwhm_g",
98+
"psf_fwhm_r",
99+
"psf_fwhm_i",
100+
"psf_fwhm_z",
101+
"psf_fwhm_y",
102+
]
103+
)
104+
object_truth_df = pd.DataFrame(object_truth_df)
105+
106+
107+
print("Loading CosmoDC2...\n") # noqa: WPS421
108+
109+
config_overwrite = {"catalog_root_dir": "/data/scratch/dc2_nfs/cosmoDC2"}
110+
111+
cosmo_cat = GCRCatalogs.load_catalog("desc_cosmodc2", config_overwrite)
112+
113+
cosmo_df = cosmo_cat.get_quantities(
114+
quantities=[
115+
"galaxy_id",
116+
"ra",
117+
"dec",
118+
"ellipticity_1_true",
119+
"ellipticity_2_true",
120+
"shear_1",
121+
"shear_2",
122+
"convergence",
123+
],
124+
filters=ra_dec_filters,
125+
native_filters=healpix_filter,
126+
)
127+
cosmo_df = pd.DataFrame(cosmo_df)
128+
129+
130+
print("Merging truth with object-with-truth-match...\n") # noqa: WPS421
131+
132+
merge_df1 = truth_df.merge(
133+
object_truth_df, left_on="cosmodc2_id", right_on="cosmodc2_id_truth", how="left"
134+
)
135+
136+
merge_df1.drop_duplicates(subset=["cosmodc2_id"], inplace=True)
137+
138+
merge_df1.drop(columns=["cosmodc2_id_truth"], inplace=True)
139+
140+
141+
print("Merging with CosmoDC2...\n") # noqa: WPS421
142+
143+
merge_df2 = merge_df1.merge(cosmo_df, left_on="cosmodc2_id", right_on="galaxy_id", how="left")
144+
145+
merge_df2 = merge_df2[~merge_df2["galaxy_id"].isna()]
146+
147+
merge_df2.drop(columns=["ra_y", "dec_y"], inplace=True)
148+
149+
merge_df2.rename(columns={"ra_x": "ra", "dec_x": "dec"}, inplace=True)
150+
151+
152+
print("Saving...\n") # noqa: WPS421
153+
154+
with open(file_path, "wb") as f:
155+
pkl.dump(merge_df2, f)
156+
157+
print(f"Catalog has been saved at {file_path}") # noqa: WPS421

0 commit comments

Comments
 (0)