forked from henrysky/stellarium_star_catalogs
-
Notifications
You must be signed in to change notification settings - Fork 5
Expand file tree
/
Copy pathsimbad_query_hipsaohdhr.py
More file actions
50 lines (46 loc) · 2.22 KB
/
simbad_query_hipsaohdhr.py
File metadata and controls
50 lines (46 loc) · 2.22 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
import pathlib
import tqdm
from astropy.table import Table, vstack
from py.utils import custom_simbad
base_path = pathlib.Path("simbad_query_results")
base_path.mkdir(parents=True, exist_ok=True)
hip_subdir = base_path / "hip"
hip_subdir.mkdir(parents=True, exist_ok=True)
sao_subdir = base_path / "sao"
sao_subdir.mkdir(parents=True, exist_ok=True)
hd_subdir = base_path / "hd"
hd_subdir.mkdir(parents=True, exist_ok=True)
hr_subdir = base_path / "hr"
hr_subdir.mkdir(parents=True, exist_ok=True)
hip_combined_path = base_path / "hip_combined.dat"
sao_combined_path = base_path / "sao_combined.dat"
hd_combined_path = base_path / "hd_combined.dat"
hr_combined_path = base_path / "hr_combined.dat"
max_hip_id = 120416
max_sao_id = 258997
max_hd_id = 272150
max_hr_id = 9110
query_batch_size = 2000
for subdir, max_id, combined_path in zip(
[hip_subdir, sao_subdir, hd_subdir, hr_subdir], [max_hip_id, max_sao_id, max_hd_id, max_hr_id], [hip_combined_path, sao_combined_path, hd_combined_path, hr_combined_path]):
if combined_path.exists(): # if the combined file already exists, skip
print(f"Skipping {subdir.name} as the combined file already exists. If you want to re-query, delete the combined file.")
continue
for batch in tqdm.tqdm(range(max_id // query_batch_size + 1), desc=f"Querying {subdir.name}"):
max_id_clipped = min(max_id, batch * query_batch_size + query_batch_size)
ids = [f"{subdir.name.upper()} {str(i)}".strip() for i in range(1 + batch * query_batch_size, 1 + max_id_clipped)]
result = custom_simbad.query_objects(ids)
result.write(
subdir / f"simbad_{subdir.name}_{str(batch)}.dat", format="ascii", overwrite=True
)
# merge all the tables
files_list = subdir.glob("simbad_*.dat")
table_list = []
counter = 0
curr_path = subdir / f"simbad_{subdir.name}_{str(counter)}.dat"
while curr_path.exists(): # need to loop through all the files in the exact order. can't use glob("*")
table_list.append(Table.read(curr_path, format="ascii"))
counter += 1
curr_path = subdir / f"simbad_{subdir.name}_{str(counter)}.dat"
simbad_table = vstack(table_list)
simbad_table.write(combined_path, format="ascii", overwrite=True)