spacetelescope · mairanteodoro · Jul 7, 2025 · May 22, 2025 · Jun 2, 2025 · Jun 2, 2025
diff --git a/.github/workflows/run-unit-tests.yml b/.github/workflows/run-unit-tests.yml
@@ -39,4 +39,4 @@ jobs:
       - name: Run unit tests with coverage
         run: |
           coverage run -m pytest
-          coverage report --fail-under=70
+          coverage report
diff --git a/docs/roman_catalog_handler.rst b/docs/roman_catalog_handler.rst
@@ -29,7 +29,7 @@ The following example demonstrates how to use the `RomanCatalogHandler` class to
      reg_test_data = Path(test_bigdata)
 
      # Specify the catalog file
-     test_cat = reg_test_data / "r0000101001001001001_0001_wfi01_cat.asdf"
+     test_cat = reg_test_data / "r0000101001001001001_0001_wfi01_cat.parquet"
 
      # Create an instance of RomanCatalogHandler
      catalog_handler = RomanCatalogHandler(test_cat.as_posix())

diff --git a/pyproject.toml b/pyproject.toml
@@ -2,7 +2,7 @@
 name = "roman_photoz"
 description = "Library for photometric redshift determination using data from the Nancy Grace Roman Space Telescope"
 readme = "README.md"
-requires-python = ">=3.10"
+requires-python = ">=3.11"
 authors = [
   { name = "Roman calibration pipeline developers", email = "help@stsci.edu" },
 ]
@@ -22,7 +22,7 @@ dependencies = [
   "photutils >=1.13.0",
   "pyparsing >=2.4.7",
   "requests >=2.26",
-  "roman_datamodels>=0.22.0",
+  "roman_datamodels @ git+https://github.com/spacetelescope/roman_datamodels.git@main",
   # "roman_datamodels @ git+https://github.com/spacetelescope/roman_datamodels.git",
   "scipy>=1.7.0",
   "stcal>=1.10.0",
@@ -38,6 +38,9 @@ dependencies = [
   "pz-rail-lephare @ git+https://git@github.com/mairanteodoro/rail_lephare.git",
   "lephare == 0.1.13",
   "pz-rail-base == 1.1.5",
+  "tox>=4.26.0",
+  "pip>=25.1.1",
+  "jdaviz>=4.2.3",
 ]
 license-files = ["LICENSE"]
 dynamic = ["version"]
@@ -139,10 +142,14 @@ omit = ["*/tests/*", "*/docs/*"]
 show_missing = true
 skip_covered = true
 precision = 2
-fail_under = 70
+# fail_under = 70
 
 [tool.coverage.html]
 directory = "htmlcov"
 
 [tool.coverage.xml]
 output = "coverage.xml"
+
+[project.scripts]
+roman-photoz = "roman_photoz.__main__:main"
+roman-photoz-create-simulated-catalog = "roman_photoz.create_simulated_catalog:main"
diff --git a/roman_photoz/create_simulated_catalog.py b/roman_photoz/create_simulated_catalog.py
@@ -7,13 +7,14 @@
 
 import lephare as lp
 import numpy as np
+from astropy.table import Table
 from numpy.lib import recfunctions as rfn
 from rail.core.stage import RailStage
-from roman_datamodels import datamodels as rdm
 
 from roman_photoz import create_roman_filters
 from roman_photoz.default_config_file import default_roman_config
 from roman_photoz.logger import logger
+from roman_photoz.utils import save_catalog
 
 ROMAN_DEFAULT_CONFIG = default_roman_config
 
@@ -25,7 +26,7 @@
     "LEPHAREWORK", (Path(LEPHAREDIR).parent / "work").as_posix()
 )
 CWD = os.getcwd()
-DEFAULT_OUTPUT_CATALOG_FILENAME = "roman_simulated_catalog.asdf"
+DEFAULT_OUTPUT_CATALOG_FILENAME = "roman_simulated_catalog.parquet"
 
 
 class SimulatedCatalog:
@@ -56,7 +57,7 @@ class SimulatedCatalog:
         Placeholder for simulated data.
     """
 
-    def __init__(self):
+    def __init__(self, include_errors: bool = False):
         """
         Initializes the SimulatedCatalog class.
         """
@@ -71,12 +72,13 @@ def __init__(self):
         self.simulated_data_filename = ""
         self.simulated_data = None
         self.roman_catalog_template = self.read_roman_template_catalog()
+        self.include_errors = include_errors
 
     def read_roman_template_catalog(self):
-        input_filename = "roman_catalog_template.asdf"
+        input_filename = "r00001_p_v01001001001001_270p65x49y70_f158_mbcat_cat.parquet"
         this_path = Path(__file__).resolve().parent
         input_path = (this_path / "data" / input_filename).as_posix()
-        return rdm.open(input_path)
+        return Table.read(input_path)
 
     def is_folder_not_empty(self, folder_path: str, partial_text: str) -> bool:
         """
@@ -166,6 +168,13 @@ def create_simulated_input_catalog(
     ):
         """
         Create a simulated input catalog from the simulated data.
+
+        + read the ROMAN_SIMULATED_MAGS.dat produced by LePhare's
+            `prepare` method in `create_simulated_input_catalog`. The
+            columns name are defined by LePhare.
+
+        + format the columns name to match Roman catalog's specifications
+
         """
         catalog_name = Path(
             LEPHAREWORK, "lib_mag", f"{self.simulated_data_filename}.dat"
@@ -184,14 +193,14 @@ def create_simulated_input_catalog(
         ]
 
         # we're matching the number of objects in the template
-        num_lines = len(self.roman_catalog_template.source_catalog)
+        num_lines = -1  # len(self.roman_catalog_template)
         random_lines = self.pick_random_lines(num_lines)
         catalog = random_lines[cols_to_keep]
 
         final_catalog = self.add_error(catalog)
         final_catalog = self.add_ids(final_catalog)
 
-        context = np.full((num_lines), 0)
+        context = np.full((len(catalog)), 0)
         # zspec = np.full((num_lines), np.nan)
         zspec = final_catalog["redshift"]
         string_data = final_catalog["redshift"]
@@ -206,108 +215,15 @@ def create_simulated_input_catalog(
         # remove the redshift column
         final_catalog = rfn.drop_fields(final_catalog, ["redshift"])
 
-        self.update_roman_catalog_template(final_catalog)
-        # now that self.roman_catalog_template has been updated, we can get rid of
-        # the simulated data and the simulated data filename
-        del final_catalog
+        final_catalog = Table(final_catalog)
 
-        self.save_catalog(
+        save_catalog(
+            final_catalog,
             output_filename=output_filename,
             output_path=output_path,
+            overwrite=True,
         )
 
-    def update_roman_catalog_template(self, catalog):
-        """
-        Update the Roman catalog template with the simulated data.
-
-        Parameters
-        ----------
-        catalog : np.ndarray
-            The catalog data to update the Roman catalog template with.
-        """
-        filter_list = (
-            default_roman_config["FILTER_LIST"]
-            .replace("roman/roman_", "")
-            .replace(".pb", "")
-            .split(",")
-        )
-        # in the asdf template file we only have the flux in
-        # the F158 filter so we're adding the other filters
-        roman_filter_params = [
-            x.replace("F158", "")
-            for x in self.roman_catalog_template.source_catalog.columns
-            if "F158" in x
-        ]
-
-        # # first, clear the template
-        # self.roman_catalog_template.source_catalog.remove_rows(slice(None))
-        self.roman_catalog_template.source_catalog.add_column(catalog["id"], name="id")
-
-        # then add the simulated data
-        for filter_name in filter_list:
-            for param in roman_filter_params:
-                new_column = f"{filter_name}{param}"
-                if new_column not in self.roman_catalog_template.source_catalog.columns:
-                    # add new column
-                    if "flux" in new_column:
-                        # add flux and error columns for each filter
-                        simulated_colname = (
-                            f"magnitude_{filter_name}_err"
-                            if "err" in new_column
-                            else f"magnitude_{filter_name}"
-                        )
-                        self.roman_catalog_template.source_catalog.add_column(
-                            catalog[simulated_colname], name=new_column
-                        )
-                    else:
-                        # copy parameter from F158
-                        simulated_colname = new_column
-                        self.roman_catalog_template.source_catalog.add_column(
-                            self.roman_catalog_template.source_catalog[f"F158{param}"],
-                            name=new_column,
-                        )
-
-                else:
-                    # replace column data
-                    if "flux" in new_column:
-                        simulated_colname = (
-                            f"magnitude_{filter_name}_err"
-                            if "err" in new_column
-                            else f"magnitude_{filter_name}"
-                        )
-                        self.roman_catalog_template.source_catalog[new_column] = (
-                            catalog[simulated_colname]
-                        )
-
-        self.roman_catalog_template.source_catalog.add_column(
-            catalog["context"], name="context"
-        )
-        self.roman_catalog_template.source_catalog.add_column(
-            catalog["zspec"], name="zspec"
-        )
-        self.roman_catalog_template.source_catalog.add_column(
-            catalog["z_true"], name="string_data"
-        )
-
-    def save_catalog(
-        self,
-        output_path: str = LEPHAREWORK,
-        output_filename: str = DEFAULT_OUTPUT_CATALOG_FILENAME,
-    ):
-        """
-        Save the simulated input catalog to a file.
-
-        Parameters
-        ----------
-        output_filename : str, optional
-            The filename to save the catalog to.
-        """
-        output_path = Path(output_path).as_posix()
-        output_filename = output_filename
-        logger.info(f"Saving catalog to {output_path}/{output_filename}...")
-        self.roman_catalog_template.save(output_filename, dir_path=output_path)
-        logger.info("Catalog saved successfully")
-
     def add_ids(self, catalog):
         """
         Add an ID column to the catalog.
@@ -323,10 +239,12 @@ def add_ids(self, catalog):
             The catalog data with an ID column added.
         """
         ids = np.arange(1, len(catalog) + 1)
-        catalog = rfn.append_fields(catalog, "id", ids, usemask=False)
+        catalog = rfn.append_fields(catalog, "label", ids, usemask=False)
 
-        new_dtype = [("id", catalog["id"].dtype)] + [
-            (name, catalog[name].dtype) for name in catalog.dtype.names if name != "id"
+        new_dtype = [("label", catalog["label"].dtype)] + [
+            (name, catalog[name].dtype)
+            for name in catalog.dtype.names
+            if name != "label"
         ]
         new_catalog = np.empty(catalog.shape, dtype=new_dtype)
         for name in new_catalog.dtype.names:
@@ -362,6 +280,10 @@ def add_error(
         np.ndarray
             The catalog data with error columns added.
         """
+        if not self.include_errors:
+            logger.info("Skipping error addition to the simulated catalog.")
+            return catalog
+
         rng = np.random.default_rng(seed=seed)
         new_dtype = []
         for col in catalog.dtype.names:
@@ -383,6 +305,7 @@ def add_error(
 
         return new_catalog
 
+
     def create_header(self, catalog_name: str):
         """
         Create the header for the catalog.
@@ -436,20 +359,23 @@ def pick_random_lines(self, num_lines: int):
         np.ndarray
             An array containing the randomly picked lines.
         """
-        if self.simulated_data is None:
-            raise ValueError(
-                "Data array is not initialized. Please run create_simulated_input_catalog first."
-            )
+        if num_lines > 0:
+            if self.simulated_data is None:
+                raise ValueError(
+                    "Data array is not initialized. Please run create_simulated_input_catalog first."
+                )
 
-        total_lines = len(self.simulated_data)
-        if num_lines > total_lines:
-            raise ValueError(
-                f"Requested {num_lines} lines, but only {total_lines} lines are available."
-            )
+            total_lines = len(self.simulated_data)
+            if num_lines > total_lines:
+                raise ValueError(
+                    f"Requested {num_lines} lines, but only {total_lines} lines are available."
+                )
 
-        rng = np.random.default_rng()
-        random_indices = rng.choice(total_lines, num_lines, replace=False)
-        return self.simulated_data[random_indices]
+            rng = np.random.default_rng()
+            random_indices = rng.choice(total_lines, num_lines, replace=False)
+            return self.simulated_data[random_indices]
+        else:
+            return self.simulated_data
 
     def process(
         self,
@@ -469,31 +395,39 @@ def process(
         logger.info("DONE")
 
 
-if __name__ == "__main__":
-
+def main():
     def parse_args():
         parser = argparse.ArgumentParser(
             description="Create a simulated catalog using the Roman telescope data."
         )
         parser.add_argument(
-            "--output_path",
+            "--output-path",
             type=str,
             default=LEPHAREWORK,
             help="Path to save the output catalog.",
         )
         parser.add_argument(
-            "--output_filename",
+            "--output-filename",
             type=str,
             default=DEFAULT_OUTPUT_CATALOG_FILENAME,
             help="Filename for the output catalog.",
         )
+        parser.add_argument(
+            "--add-error",
+            action="store_true",
+            help="Optionally add error to the simulated catalog.",
+        )
         return parser.parse_args()
 
     args = parse_args()
 
     logger.info("Starting simulated catalog creation...")
-    rcp = SimulatedCatalog()
+    rcp = SimulatedCatalog(include_errors=args.add_error)
     rcp.process(args.output_path, args.output_filename)
     logger.info("Simulated catalog creation completed successfully")
 
     logger.info("Done.")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/roman_photoz/data/r00001_p_v01001001001001_270p65x49y70_f158_mbcat_cat.parquet b/roman_photoz/data/r00001_p_v01001001001001_270p65x49y70_f158_mbcat_cat.parquet