Skip to content

Commit f539100

Browse files
authored
Merge pull request #32 from spacetelescope/ASB-29533_db_outputs
ASB-29533: Adding option to save filename checker output in alternate format (csv, fits, excel, html)
2 parents c91bfab + 5888ff6 commit f539100

9 files changed

Lines changed: 184 additions & 10 deletions

File tree

CHANGELOG.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@
22

33
## [Unreleased]
44
### Added
5+
[PR # 32](https://github.com/spacetelescope/mast_contributor_tools/pull/32)
6+
- Adding option to save filename checker output in altnerate format (csv, fits, excel, html)
57

68
### Changed
79

TUTORIAL/tutorial_readme.md

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -164,6 +164,16 @@ mct check_filenames mct-tutorial --directory='tutorial-data/' --exclude='*_spec
164164
```
165165
The output message will report that this command only checked 2 files, but both files passed.
166166

167+
### Step 4e. Exporting results to alternate format
168+
169+
If you're not used to working with SQLite `.db` files, you can also save the output to an alternate format using the `--output_format` flag. Currently supported values include "csv", "fits", "html", and "excel". The `.db` file is always written by default, but this option will write an additional file in the chosen format.
170+
171+
For example, this command will save out a html file with the filename checker results in a color-coded table:
172+
173+
```shell
174+
mct check_filenames mct-tutorial --directory='tutorial-data/' --output_format='html'
175+
```
176+
167177

168178
# Additional Resources
169179
Congratulations! You have completed this tutorial and now know the basic usage of the MAST Contributor's Tools Filename Checker.

docs/filename_check_readme.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ The various options for this command are described below:
2222
| `-e` or `--exclude` | File pattern to exclude from testing, for example '*.jpg' to test all files except the jpgs | None |
2323
| `-n` or `--max_n` | Maximum number of files to check, for testing purposes. | None (all files) |
2424
| `-db` or `--dbFile` | Name of Results database file | `results_<hlsp_name>.db` |
25+
| `-f` or `--output_format` | Write output to alternate format. Currently supports "csv", "fits", "html" or "excel" | `db` |
2526
| `-v` or `--verbose` | Enables verbose output for more information | `False` |
2627
| `--help` | Prints information about this command | |
2728

mast_contributor_tools/filename_check/fc_app.py

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,7 @@ def get_file_paths(
9292
return file_list
9393

9494

95-
def check_filenames(hlsp_name: str, file_list: list[Path], dbFile: str) -> None:
95+
def check_filenames(hlsp_name: str, file_list: list[Path], dbFile: str, output_format: str = "db") -> None:
9696
"""Recursively check filenames in a directory tree of HLSP products
9797
9898
Parameters
@@ -103,6 +103,8 @@ def check_filenames(hlsp_name: str, file_list: list[Path], dbFile: str) -> None:
103103
List of files to check, typically output from get_file_paths()
104104
dbFile : str, optional
105105
Name of SQLite database file to contain results
106+
output_format : str, optional
107+
Alternate format to save results to: 'csv', 'fits', 'html', or 'excel'. Default: "db"
106108
"""
107109
# Make sure hlsp name is valid
108110
if not FieldRule.match_pattern(hlsp_name, HLSPNAME_REGEX):
@@ -153,6 +155,14 @@ def check_filenames(hlsp_name: str, file_list: list[Path], dbFile: str) -> None:
153155
logger.debug(f"Verdict for {f.name}: '{file_rec['final_verdict']}'")
154156

155157
logger.critical(db.print_summary()) # print summary information on how many files passed
158+
logger.critical(f"\nResults written to {dbFile}")
159+
160+
# Write ouput to alternate format if specified
161+
if output_format != "db":
162+
logger.debug(f"Also writing to alternate format '{output_format}'")
163+
ouput_files = db.write_to_alternate_format(output_format)
164+
logger.critical(f"Written to {ouput_files}")
165+
156166
db.close_db()
157167
logger.critical(f"\nFilename checking complete. Results written to {dbFile}")
158168

mast_contributor_tools/filename_check/fc_db.py

Lines changed: 114 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,10 @@
22

33
import sqlite3
44

5+
import astropy.io.fits as fits
6+
import pandas as pd
7+
from astropy.table import Table
8+
59
# The following SQL will create am SQLite database
610
FILENAME_TABLE = """
711
CREATE TABLE IF NOT EXISTS filename (
@@ -133,3 +137,113 @@ def print_summary(self) -> str:
133137
# Add more detail here later? - could break down by fields, etc.
134138

135139
return summary_message
140+
141+
def write_to_alternate_format(self, save_format: str) -> list[str]:
142+
"""
143+
Write out the SQLite DB as an alternate format.
144+
145+
Parameters
146+
----------
147+
save_format : str
148+
Format to save output: 'csv', 'excel', 'html', or 'fits'
149+
150+
Returns
151+
--------
152+
files_written: list[str]: List of file names written out
153+
"""
154+
# Check that input is a valid option
155+
supported_formats = ["csv", "excel", "fits", "html"]
156+
if save_format.lower() not in supported_formats:
157+
msg = f"Save Format '{save_format}' is not supported."
158+
msg += f"Please choose from {supported_formats}"
159+
raise ValueError(msg)
160+
161+
# Construct new filename to save to
162+
fileroot = self.db_file.strip(".db")
163+
164+
# Read DB file as pandas dataframe
165+
self.conn = sqlite3.connect(self.db_file)
166+
filename_data = pd.read_sql_query("SELECT * FROM filename", self.conn)
167+
fields_data = pd.read_sql_query("SELECT * FROM fields", self.conn)
168+
# Close connection
169+
self.conn.close()
170+
171+
# Save out data in new format
172+
# CSV files
173+
if save_format == "csv":
174+
ouput_filename1 = f"{fileroot}_filenames.csv"
175+
ouput_filename2 = f"{fileroot}_fields.csv"
176+
filename_data.to_csv(ouput_filename1)
177+
fields_data.to_csv(ouput_filename2)
178+
files_written = [ouput_filename1, ouput_filename2]
179+
# Excel spreadsheet
180+
elif save_format == "excel":
181+
output_filename = f"{fileroot}.xlsx"
182+
# Style dataframe: change color of cell depending on verdict
183+
filename_data = filename_data.style.map(color_formatter)
184+
fields_data = fields_data.style.map(color_formatter)
185+
# Save as one Excel document with two sheets:
186+
# One for filenames table and one for fields
187+
with pd.ExcelWriter(output_filename) as excel_writer:
188+
filename_data.to_excel(excel_writer, sheet_name="FileNames")
189+
fields_data.to_excel(excel_writer, sheet_name="Fields")
190+
files_written = [output_filename]
191+
192+
# Fits table
193+
elif save_format == "fits":
194+
# Save as one fits file with two table extensions:
195+
# One for filenames table and one for fields
196+
output_filename = f"{fileroot}.fits"
197+
hdu_list = fits.HDUList(
198+
[
199+
fits.PrimaryHDU(), # TODO: add some metadata?
200+
fits.table_to_hdu(Table.from_pandas(filename_data), name="FILENAMES"),
201+
fits.table_to_hdu(Table.from_pandas(fields_data), name="FIELDS"),
202+
]
203+
)
204+
hdu_list.writeto(output_filename, overwrite=True)
205+
files_written = [output_filename]
206+
207+
# Html table
208+
elif save_format == "html":
209+
ouput_filename1 = f"{fileroot}_filenames.html"
210+
ouput_filename2 = f"{fileroot}_fields.html"
211+
# Style dataframe: change color of cell depending on verdict
212+
filename_data = filename_data.style.map(color_formatter)
213+
fields_data = fields_data.style.map(color_formatter)
214+
# Then write to html file
215+
filename_data.to_html(
216+
ouput_filename1,
217+
header=True,
218+
)
219+
fields_data.to_html(
220+
ouput_filename2,
221+
header=True,
222+
)
223+
files_written = [ouput_filename1, ouput_filename2]
224+
225+
else:
226+
# No alternate format - only DB file
227+
files_written = [self.db_file]
228+
229+
return files_written
230+
231+
232+
def color_formatter(value: str) -> str:
233+
"""Color mapping for use in write_to_alternate_format().
234+
Color-codes table cells based on verdict: green for "PASS", red for "FAIL", etc.
235+
236+
Parameters
237+
------
238+
value: str
239+
240+
"""
241+
if str(value).upper() == "PASS":
242+
color = "lightgreen"
243+
elif str(value).upper() == "FAIL":
244+
color = "red"
245+
elif str(value).upper() == "NEEDS REVIEW":
246+
color = "yellow"
247+
else:
248+
color = None
249+
return "background-color: %s" % color

mast_contributor_tools/mast_cli.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,12 @@ def cli() -> None:
4747
@click.option("-e", "--exclude", default="", help="File pattern to exclude from testing, for example '\\*.png'")
4848
@click.option("-n", "--max_n", default=None, help="Maximum number of files to check, for testing purposes.")
4949
@click.option("-db", "--dbFile", default="", help="Results database filename (defaults to: results_<hlsp_name>.db)")
50+
@click.option(
51+
"-f",
52+
"--output_format",
53+
default="db",
54+
help="Write output to alternate format (csv, fits, html, excel)",
55+
)
5056
@click.option("-v", "--verbose", default=False, flag_value=True, help="Enable verbose output")
5157
def filenames_cli(
5258
hlsp_name: str,
@@ -56,6 +62,7 @@ def filenames_cli(
5662
exclude: str = "",
5763
max_n: Union[int, None] = None,
5864
dbfile: str = "",
65+
output_format: str = "db",
5966
verbose: bool = False,
6067
) -> None:
6168
"""
@@ -102,7 +109,7 @@ def filenames_cli(
102109
)
103110

104111
# Perform the file name check
105-
check_filenames(hlsp_name, file_list, dbFile=dbfile)
112+
check_filenames(hlsp_name, file_list, dbFile=dbfile, output_format=output_format)
106113

107114

108115
@cli.command("check_filename", short_help="Check a single file name against MAST HLSP naming standards")

mast_contributor_tools/tests/filename_check/test_fc_db.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -171,6 +171,27 @@ def test_add_fields_xfail(field_record) -> None:
171171
test_db.close_db()
172172

173173

174+
# Test write_to_alternate_format() function
175+
@pytest.mark.parametrize(
176+
"format, output_filename",
177+
[
178+
("csv", "test_file_fields.csv"),
179+
("fits", "test_file.fits"),
180+
("excel", "test_file.xlsx"),
181+
("html", "test_file_filenames.html"),
182+
],
183+
)
184+
def test_write_to_alternate_format(format, output_filename):
185+
"""Test write_to_alternate_format() function"""
186+
test_db = Hlsp_SQLiteDb(TEST_DB_FILE)
187+
test_db.write_to_alternate_format(format)
188+
# Assert new file format was written
189+
output_filename = TEST_DB_FILE.replace("test_file.db", output_filename)
190+
assert os.path.exists(output_filename), TEST_DB_FILE # f"File {output_filename} not found"
191+
# Delete the file after test complete
192+
os.remove(output_filename)
193+
194+
174195
# Remove the test.db file once the tests are complete
175196
def test_remove_test_db_file():
176197
"""Delete the test_file.db now that the tests are complete"""

mast_contributor_tools/tests/mast_cli/test_mast_cli.py

Lines changed: 13 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -53,9 +53,9 @@ def test_filenames_cli_defaults(mock_checkfiles, mock_filepaths) -> None:
5353
# Assert logging level is correct
5454
assert logger.level == logging.getLevelNamesMapping()["INFO"]
5555
# Assert get_file_paths called with right arguments
56-
mock_filepaths.assert_called_with(".", from_file='', search_pattern="*.*", exclude_pattern="", max_n=None)
56+
mock_filepaths.assert_called_with(".", from_file="", search_pattern="*.*", exclude_pattern="", max_n=None)
5757
# Assert check_filenames was called with right arguments
58-
mock_checkfiles.assert_called_with("my-hlsp", mock_filepaths(), dbFile="results_my-hlsp.db")
58+
mock_checkfiles.assert_called_with("my-hlsp", mock_filepaths(), dbFile="results_my-hlsp.db", output_format="db")
5959

6060

6161
def test_filenames_cli_logging(mock_checkfiles, mock_filepaths, mock_singlefile) -> None:
@@ -87,26 +87,32 @@ def test_filenames_cli_fileparams(mock_checkfiles, mock_singlefile, mock_filepat
8787
# Assert it ran successfully
8888
assert output.exit_code == 0
8989
# Assert get_file_paths called with right arguments
90-
mock_filepaths.assert_called_with(".", from_file='', search_pattern="*.fits", exclude_pattern="*.png", max_n="2")
90+
mock_filepaths.assert_called_with(".", from_file="", search_pattern="*.fits", exclude_pattern="*.png", max_n="2")
9191
# Assert check_filenames was called with right arguments
92-
mock_checkfiles.assert_called_with("my-hlsp", mock_filepaths(), dbFile="results_my-hlsp.db")
92+
mock_checkfiles.assert_called_with("my-hlsp", mock_filepaths(), dbFile="results_my-hlsp.db", output_format="db")
9393
# Assert check_single_filename was not called
9494
mock_singlefile.assert_not_called()
9595

96+
9697
def test_filenames_cli_fromfile(mock_checkfiles, mock_singlefile, mock_filepaths) -> None:
9798
# Test multiple file names from a file list
9899
# equivalent to command "mct check_filenames --from_file='file_list.txt'"
99100
runner = CliRunner()
100-
output = runner.invoke(filenames_cli, ["my-hlsp", "--from_file=file_list.txt", "--pattern=*.fits", "--exclude=*.png", "--max_n=2"])
101+
output = runner.invoke(
102+
filenames_cli, ["my-hlsp", "--from_file=file_list.txt", "--pattern=*.fits", "--exclude=*.png", "--max_n=2"]
103+
)
101104
# Assert it ran successfully
102105
assert output.exit_code == 0
103106
# Assert get_file_paths called with right arguments
104-
mock_filepaths.assert_called_with(".", from_file='file_list.txt', search_pattern="*.fits", exclude_pattern="*.png", max_n="2")
107+
mock_filepaths.assert_called_with(
108+
".", from_file="file_list.txt", search_pattern="*.fits", exclude_pattern="*.png", max_n="2"
109+
)
105110
# Assert check_filenames was called with right arguments
106-
mock_checkfiles.assert_called_with("my-hlsp", mock_filepaths(), dbFile="results_my-hlsp.db")
111+
mock_checkfiles.assert_called_with("my-hlsp", mock_filepaths(), dbFile="results_my-hlsp.db", output_format="db")
107112
# Assert check_single_filename was not called
108113
mock_singlefile.assert_not_called()
109114

115+
110116
def test_filenames_cli_singlefile(mock_checkfiles, mock_singlefile, mock_filepaths) -> None:
111117
"""Test different flags are working as expected for the single filename checker CLI"""
112118
# Test single file

pyproject.toml

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,10 +8,13 @@ authors = [
88
{ name = "Mikulski Archive for Space Telescopes", email = "mast_contrib@stsci.edu" },
99
]
1010
dependencies = [
11+
"astropy >= 7.2.0",
1112
"click >= 8.1.0",
13+
"openpyxl >= 3.1.5",
14+
"pandas >= 2.3.3",
1215
"pyyaml > 6.0.1",
16+
"setuptools-scm >= 8.3.1",
1317
"tqdm >= 4.67.1",
14-
"setuptools-scm >= 8.3.1"
1518
]
1619
dynamic = ["version"]
1720

0 commit comments

Comments
 (0)