Skip to content
Merged
Show file tree
Hide file tree
Changes from 13 commits
Commits
Show all changes
31 commits
Select commit Hold shift + click to select a range
cd64298
Refactor create_simulated_catalog.py to account for parquet format.
mairanteodoro May 22, 2025
b5be827
Implement integration test and other updates.
mairanteodoro Jun 2, 2025
ccedb9a
Style check fixes.
mairanteodoro Jun 2, 2025
15f25e8
More style fixes.
mairanteodoro Jun 2, 2025
21a3e2c
Add regtest files
mairanteodoro Jun 2, 2025
34cc863
Ignore fail-under threshold.
mairanteodoro Jun 2, 2025
b222d25
Ignore fail-under in GA workflow.
mairanteodoro Jun 2, 2025
eb0df44
Update files
mairanteodoro Jun 2, 2025
6ae4e86
Refactoring and improvements.
mairanteodoro Jun 30, 2025
baa0e09
Mock catalog data to avoid using real file.
mairanteodoro Jun 30, 2025
e7cced1
Merge branch 'main' into RCAL-1074
mairanteodoro Jun 30, 2025
bf2df44
Add scripts folder.
mairanteodoro Jun 30, 2025
7e83c2f
Bug fix when saving results.
mairanteodoro Jun 30, 2025
1387e51
Update tests configuration to use romancal's.
mairanteodoro Jul 1, 2025
d8250bf
Remove scripts folder.
mairanteodoro Jul 1, 2025
32ff6e7
Added romancal to pyproject.toml
mairanteodoro Jul 1, 2025
31d3578
Use released version of romancal + RDM.
mairanteodoro Jul 1, 2025
f5f7147
Fix duplicate registered name issue.
mairanteodoro Jul 1, 2025
fdd672a
Code refactoring to normalize filters name list.
mairanteodoro Jul 1, 2025
3b5759f
Set default parameters for roman_photoz.
mairanteodoro Jul 1, 2025
5a701f4
Fix filter name list used in unit test.
mairanteodoro Jul 2, 2025
0febdd9
Updated project dependencies.
mairanteodoro Jul 2, 2025
f9410e8
Fix issue when installing dependencies.
mairanteodoro Jul 2, 2025
b3865ed
Bug fix.
mairanteodoro Jul 2, 2025
6d84a88
Fix typo.
mairanteodoro Jul 2, 2025
2c2545b
Remove unnecessary data files.
mairanteodoro Jul 2, 2025
61e0211
Revert removal of sample catalog file.
mairanteodoro Jul 2, 2025
9833fca
Set default column name to segment_flux.
mairanteodoro Jul 3, 2025
c889853
Add reference to romancal's regtest documentation.
mairanteodoro Jul 3, 2025
a34c7fc
Update documentation and configuration.
mairanteodoro Jul 7, 2025
b962b64
Add missing extension to docs configuration file.
mairanteodoro Jul 7, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/run-unit-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -39,4 +39,4 @@ jobs:
- name: Run unit tests with coverage
run: |
coverage run -m pytest
coverage report --fail-under=70
coverage report
2 changes: 1 addition & 1 deletion docs/roman_catalog_handler.rst
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ The following example demonstrates how to use the `RomanCatalogHandler` class to
reg_test_data = Path(test_bigdata)

# Specify the catalog file
test_cat = reg_test_data / "r0000101001001001001_0001_wfi01_cat.asdf"
test_cat = reg_test_data / "r0000101001001001001_0001_wfi01_cat.parquet"

# Create an instance of RomanCatalogHandler
catalog_handler = RomanCatalogHandler(test_cat.as_posix())
Expand Down
13 changes: 10 additions & 3 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
name = "roman_photoz"
description = "Library for photometric redshift determination using data from the Nancy Grace Roman Space Telescope"
readme = "README.md"
requires-python = ">=3.10"
requires-python = ">=3.11"
authors = [
{ name = "Roman calibration pipeline developers", email = "help@stsci.edu" },
]
Expand All @@ -22,7 +22,7 @@ dependencies = [
"photutils >=1.13.0",
"pyparsing >=2.4.7",
"requests >=2.26",
"roman_datamodels>=0.22.0",
"roman_datamodels @ git+https://github.com/spacetelescope/roman_datamodels.git@main",
# "roman_datamodels @ git+https://github.com/spacetelescope/roman_datamodels.git",
"scipy>=1.7.0",
"stcal>=1.10.0",
Expand All @@ -38,6 +38,9 @@ dependencies = [
"pz-rail-lephare @ git+https://git@github.com/mairanteodoro/rail_lephare.git",
"lephare == 0.1.13",
"pz-rail-base == 1.1.5",
"tox>=4.26.0",
"pip>=25.1.1",
"jdaviz>=4.2.3",
]
license-files = ["LICENSE"]
dynamic = ["version"]
Expand Down Expand Up @@ -139,10 +142,14 @@ omit = ["*/tests/*", "*/docs/*"]
show_missing = true
skip_covered = true
precision = 2
fail_under = 70
# fail_under = 70

[tool.coverage.html]
directory = "htmlcov"

[tool.coverage.xml]
output = "coverage.xml"

[project.scripts]
roman-photoz = "roman_photoz.__main__:main"
roman-photoz-create-simulated-catalog = "roman_photoz.create_simulated_catalog:main"
182 changes: 58 additions & 124 deletions roman_photoz/create_simulated_catalog.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,14 @@

import lephare as lp
import numpy as np
from astropy.table import Table
from numpy.lib import recfunctions as rfn
from rail.core.stage import RailStage
from roman_datamodels import datamodels as rdm

from roman_photoz import create_roman_filters
from roman_photoz.default_config_file import default_roman_config
from roman_photoz.logger import logger
from roman_photoz.utils import save_catalog

ROMAN_DEFAULT_CONFIG = default_roman_config

Expand All @@ -25,7 +26,7 @@
"LEPHAREWORK", (Path(LEPHAREDIR).parent / "work").as_posix()
)
CWD = os.getcwd()
DEFAULT_OUTPUT_CATALOG_FILENAME = "roman_simulated_catalog.asdf"
DEFAULT_OUTPUT_CATALOG_FILENAME = "roman_simulated_catalog.parquet"


class SimulatedCatalog:
Expand Down Expand Up @@ -56,7 +57,7 @@ class SimulatedCatalog:
Placeholder for simulated data.
"""

def __init__(self):
def __init__(self, include_errors: bool = False):
"""
Initializes the SimulatedCatalog class.
"""
Expand All @@ -71,12 +72,13 @@ def __init__(self):
self.simulated_data_filename = ""
self.simulated_data = None
self.roman_catalog_template = self.read_roman_template_catalog()
self.include_errors = include_errors

def read_roman_template_catalog(self):
input_filename = "roman_catalog_template.asdf"
input_filename = "r00001_p_v01001001001001_270p65x49y70_f158_mbcat_cat.parquet"
this_path = Path(__file__).resolve().parent
input_path = (this_path / "data" / input_filename).as_posix()
return rdm.open(input_path)
return Table.read(input_path)

def is_folder_not_empty(self, folder_path: str, partial_text: str) -> bool:
"""
Expand Down Expand Up @@ -166,6 +168,13 @@ def create_simulated_input_catalog(
):
"""
Create a simulated input catalog from the simulated data.

+ read the ROMAN_SIMULATED_MAGS.dat produced by LePhare's
`prepare` method in `create_simulated_input_catalog`. The
columns name are defined by LePhare.

+ format the columns name to match Roman catalog's specifications

"""
catalog_name = Path(
LEPHAREWORK, "lib_mag", f"{self.simulated_data_filename}.dat"
Expand All @@ -184,14 +193,14 @@ def create_simulated_input_catalog(
]

# we're matching the number of objects in the template
num_lines = len(self.roman_catalog_template.source_catalog)
num_lines = -1 # len(self.roman_catalog_template)
random_lines = self.pick_random_lines(num_lines)
catalog = random_lines[cols_to_keep]

final_catalog = self.add_error(catalog)
final_catalog = self.add_ids(final_catalog)

context = np.full((num_lines), 0)
context = np.full((len(catalog)), 0)
# zspec = np.full((num_lines), np.nan)
zspec = final_catalog["redshift"]
string_data = final_catalog["redshift"]
Expand All @@ -206,108 +215,15 @@ def create_simulated_input_catalog(
# remove the redshift column
final_catalog = rfn.drop_fields(final_catalog, ["redshift"])

self.update_roman_catalog_template(final_catalog)
# now that self.roman_catalog_template has been updated, we can get rid of
# the simulated data and the simulated data filename
del final_catalog
final_catalog = Table(final_catalog)

self.save_catalog(
save_catalog(
final_catalog,
output_filename=output_filename,
output_path=output_path,
overwrite=True,
)

def update_roman_catalog_template(self, catalog):
"""
Update the Roman catalog template with the simulated data.

Parameters
----------
catalog : np.ndarray
The catalog data to update the Roman catalog template with.
"""
filter_list = (
default_roman_config["FILTER_LIST"]
.replace("roman/roman_", "")
.replace(".pb", "")
.split(",")
)
# in the asdf template file we only have the flux in
# the F158 filter so we're adding the other filters
roman_filter_params = [
x.replace("F158", "")
for x in self.roman_catalog_template.source_catalog.columns
if "F158" in x
]

# # first, clear the template
# self.roman_catalog_template.source_catalog.remove_rows(slice(None))
self.roman_catalog_template.source_catalog.add_column(catalog["id"], name="id")

# then add the simulated data
for filter_name in filter_list:
for param in roman_filter_params:
new_column = f"{filter_name}{param}"
if new_column not in self.roman_catalog_template.source_catalog.columns:
# add new column
if "flux" in new_column:
# add flux and error columns for each filter
simulated_colname = (
f"magnitude_{filter_name}_err"
if "err" in new_column
else f"magnitude_{filter_name}"
)
self.roman_catalog_template.source_catalog.add_column(
catalog[simulated_colname], name=new_column
)
else:
# copy parameter from F158
simulated_colname = new_column
self.roman_catalog_template.source_catalog.add_column(
self.roman_catalog_template.source_catalog[f"F158{param}"],
name=new_column,
)

else:
# replace column data
if "flux" in new_column:
simulated_colname = (
f"magnitude_{filter_name}_err"
if "err" in new_column
else f"magnitude_{filter_name}"
)
self.roman_catalog_template.source_catalog[new_column] = (
catalog[simulated_colname]
)

self.roman_catalog_template.source_catalog.add_column(
catalog["context"], name="context"
)
self.roman_catalog_template.source_catalog.add_column(
catalog["zspec"], name="zspec"
)
self.roman_catalog_template.source_catalog.add_column(
catalog["z_true"], name="string_data"
)

def save_catalog(
self,
output_path: str = LEPHAREWORK,
output_filename: str = DEFAULT_OUTPUT_CATALOG_FILENAME,
):
"""
Save the simulated input catalog to a file.

Parameters
----------
output_filename : str, optional
The filename to save the catalog to.
"""
output_path = Path(output_path).as_posix()
output_filename = output_filename
logger.info(f"Saving catalog to {output_path}/{output_filename}...")
self.roman_catalog_template.save(output_filename, dir_path=output_path)
logger.info("Catalog saved successfully")

def add_ids(self, catalog):
"""
Add an ID column to the catalog.
Expand All @@ -323,10 +239,12 @@ def add_ids(self, catalog):
The catalog data with an ID column added.
"""
ids = np.arange(1, len(catalog) + 1)
catalog = rfn.append_fields(catalog, "id", ids, usemask=False)
catalog = rfn.append_fields(catalog, "label", ids, usemask=False)

new_dtype = [("id", catalog["id"].dtype)] + [
(name, catalog[name].dtype) for name in catalog.dtype.names if name != "id"
new_dtype = [("label", catalog["label"].dtype)] + [
(name, catalog[name].dtype)
for name in catalog.dtype.names
if name != "label"
]
new_catalog = np.empty(catalog.shape, dtype=new_dtype)
for name in new_catalog.dtype.names:
Expand Down Expand Up @@ -362,6 +280,10 @@ def add_error(
np.ndarray
The catalog data with error columns added.
"""
if not self.include_errors:
logger.info("Skipping error addition to the simulated catalog.")
return catalog

rng = np.random.default_rng(seed=seed)
new_dtype = []
for col in catalog.dtype.names:
Expand All @@ -383,6 +305,7 @@ def add_error(

return new_catalog


def create_header(self, catalog_name: str):
"""
Create the header for the catalog.
Expand Down Expand Up @@ -436,20 +359,23 @@ def pick_random_lines(self, num_lines: int):
np.ndarray
An array containing the randomly picked lines.
"""
if self.simulated_data is None:
raise ValueError(
"Data array is not initialized. Please run create_simulated_input_catalog first."
)
if num_lines > 0:
if self.simulated_data is None:
raise ValueError(
"Data array is not initialized. Please run create_simulated_input_catalog first."
)

total_lines = len(self.simulated_data)
if num_lines > total_lines:
raise ValueError(
f"Requested {num_lines} lines, but only {total_lines} lines are available."
)
total_lines = len(self.simulated_data)
if num_lines > total_lines:
raise ValueError(
f"Requested {num_lines} lines, but only {total_lines} lines are available."
)

rng = np.random.default_rng()
random_indices = rng.choice(total_lines, num_lines, replace=False)
return self.simulated_data[random_indices]
rng = np.random.default_rng()
random_indices = rng.choice(total_lines, num_lines, replace=False)
return self.simulated_data[random_indices]
else:
return self.simulated_data

def process(
self,
Expand All @@ -469,31 +395,39 @@ def process(
logger.info("DONE")


if __name__ == "__main__":

def main():
def parse_args():
parser = argparse.ArgumentParser(
description="Create a simulated catalog using the Roman telescope data."
)
parser.add_argument(
"--output_path",
"--output-path",
type=str,
default=LEPHAREWORK,
help="Path to save the output catalog.",
)
parser.add_argument(
"--output_filename",
"--output-filename",
type=str,
default=DEFAULT_OUTPUT_CATALOG_FILENAME,
help="Filename for the output catalog.",
)
parser.add_argument(
"--add-error",
action="store_true",
help="Optionally add error to the simulated catalog.",
)
return parser.parse_args()

args = parse_args()

logger.info("Starting simulated catalog creation...")
rcp = SimulatedCatalog()
rcp = SimulatedCatalog(include_errors=args.add_error)
rcp.process(args.output_path, args.output_filename)
logger.info("Simulated catalog creation completed successfully")

logger.info("Done.")


if __name__ == "__main__":
main()
Binary file not shown.
Loading