Skip to content

Commit 009773d

Browse files
author
Julie Imig
committed
updating entry point for CCSP/MCCMs
1 parent 6609cc5 commit 009773d

3 files changed

Lines changed: 123 additions & 26 deletions

File tree

mast_contributor_tools/filename_check/fc_app.py

Lines changed: 71 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,14 @@
55

66
from tqdm import tqdm
77

8+
from mast_contributor_tools.filename_check.check_filename import (
9+
COLLECTION_NAME_REGEX,
10+
CCSPFileName,
11+
FieldRule,
12+
HlspFileName,
13+
MCCMFileName,
14+
)
815
from mast_contributor_tools.filename_check.fc_db import Hlsp_SQLiteDb
9-
from mast_contributor_tools.filename_check.hlsp_filename import HLSPNAME_REGEX, FieldRule, HlspFileName
1016
from mast_contributor_tools.utils.logger_config import setup_logger
1117

1218
logger = setup_logger(__name__)
@@ -92,35 +98,61 @@ def get_file_paths(
9298
return file_list
9399

94100

95-
def check_filenames(hlsp_name: str, file_list: list[Path], dbFile: str, output_format: str = "db") -> None:
96-
"""Recursively check filenames in a directory tree of HLSP products
101+
def identify_collection_type(file_name: str) -> str:
102+
"""
103+
Identify if a file is an HLSP, CCSP, or MCCM product based on the file name prefix.
97104
98105
Parameters
99106
----------
100-
hlsp_name : str
101-
Official identifier (abbreviation/acronym/initialism) for the HLSP collection
107+
filename : str
108+
File name
109+
110+
Returns
111+
-------
112+
collection_type: str
113+
Collection Type - "HLSP", "CCSP", or "MCCM". Raises a warning and defaults to "HLSP" if unable to identify.
114+
"""
115+
collection_type = file_name.split("_")[0].upper()
116+
if collection_type.upper() not in ["HLSP", "MCCM", "CCSP"]:
117+
# Default to HLSP, raise warning
118+
msg = f"WARNING: Could not identify collection type '{collection_type}' from filename. Assuming HLSP."
119+
logger.warning(msg)
120+
collection_type = "HLSP"
121+
return collection_type
122+
123+
124+
def check_filenames(collection_name: str, file_list: list[Path], dbFile: str, output_format: str = "db") -> None:
125+
"""Recursively check filenames in a directory tree of data products
126+
127+
Parameters
128+
----------
129+
collection_name : str
130+
Official identifier (abbreviation/acronym/initialism) for the HLSP/MCCM/CCSP collection
102131
file_list: list[str]
103132
List of files to check, typically output from get_file_paths()
104133
dbFile : str, optional
105134
Name of SQLite database file to contain results
106135
output_format : str, optional
107136
Alternate format to save results to: 'csv', 'fits', 'html', or 'excel'. Default: "db"
108137
"""
109-
# Make sure hlsp name is valid
110-
if not FieldRule.match_pattern(hlsp_name, HLSPNAME_REGEX):
138+
# Make sure collection name is valid
139+
if not FieldRule.match_pattern(collection_name, COLLECTION_NAME_REGEX):
111140
msg = (
112-
f"Invalid hlsp_name for HLSP collection: '{hlsp_name}'.\n"
113-
"The HLSP name must follow these rules: \n"
141+
f"Invalid collection_name: '{collection_name}'.\n"
142+
"The collection name must follow these rules: \n"
114143
"\t 1. The first character must be a lowercase letter \n"
115144
"\t 2. The middle characters can be lowercase letters, numbers, or a hyphen ‘-‘ \n"
116145
"\t 3. The last character must be a lowercase letter or a number \n"
117-
"\t 4. The hlsp_name must be 20 characters or less in length"
146+
"\t 4. The name must be 20 characters or less in length"
118147
)
119148
logger.error(msg)
120149
raise ValueError(msg)
121150

151+
# Identify if this is an HLSP, CCSP, or MCCM collection
152+
collection_type = identify_collection_type(file_list[0].name)
153+
122154
# Beging file name checking
123-
logger.critical(f"Evaluating {len(file_list)} files for HLSP collection '{hlsp_name}'")
155+
logger.critical(f"Evaluating {len(file_list)} files for {collection_type} collection '{collection_name}'")
124156
if Path(dbFile).is_file():
125157
logger.warning(f"Database file {dbFile} already exists. Overwriting File.")
126158
os.remove(dbFile)
@@ -133,11 +165,19 @@ def check_filenames(hlsp_name: str, file_list: list[Path], dbFile: str, output_f
133165
for f in tqdm(file_list):
134166
logger.debug(f"Examining {f.name}")
135167
try:
136-
hfn = HlspFileName(f, hlsp_name)
168+
# Create the filename object
169+
if collection_type == "HLSP":
170+
hfn = HlspFileName(f, collection_name)
171+
elif collection_type == "CCSP":
172+
hfn = CCSPFileName(f, collection_name)
173+
elif collection_type == "MCCM":
174+
hfn = MCCMFileName(f, collection_name)
175+
# Partition into fields
137176
hfn.partition()
138177
except ValueError:
139178
logger.error(f"Invalid name: {f.name}, skipping...")
140179
else:
180+
# Evaluate each field
141181
hfn.create_fields()
142182
elements = hfn.evaluate_fields()
143183
# Link elements to parent filename in db
@@ -167,30 +207,39 @@ def check_filenames(hlsp_name: str, file_list: list[Path], dbFile: str, output_f
167207
logger.critical(f"\nFilename checking complete. Results written to {dbFile}")
168208

169209

170-
def check_single_filename(file_name: str, hlsp_name: str = "") -> None:
171-
"""HLSP filename module CLI driver.
210+
def check_single_filename(file_name: str, collection_name: str = "") -> None:
211+
"""Check a single filename against requirements for HLSP/MCCM/CCSP files.
172212
173213
Parameters
174214
----------
175215
file_name : str
176216
File name of an HLSP product to test: for example 'hlsp_my-hlsp_readme.txt'.
177217
This is a string, and does not need to be a real file.
178-
hlsp_name : str, optional
179-
Name of example HLSP collection. For example, 'my-hlsp'.
180-
If not supplied, the hlsp_name is inferred using the second field of the filename.
218+
collection_name : str, optional
219+
Name of example HLSP/MCCM/CCSP collection. For example, 'my-hlsp'.
220+
If not supplied, the collection_name is inferred using the second field of the filename.
181221
"""
182-
# Infer hlsp_name from the file name if it wasn't provided
183-
if not hlsp_name:
222+
# Infer collection type from file name
223+
collection_type = identify_collection_type(file_name)
224+
225+
# Infer collection_name from the file name if it wasn't provided
226+
if not collection_name:
184227
if len(file_name.split("_")) > 2:
185-
hlsp_name = file_name.split("_")[1].lower()
228+
collection_name = file_name.split("_")[1].lower()
186229
else:
187-
msg = f"Could not infer HLSP name from filename '{file_name}'. Not enough parts in filename."
230+
msg = f"Could not infer collection name from filename '{file_name}'. Not enough parts in filename."
188231
logger.error(msg)
189232
raise ValueError(msg)
190233

191234
# Check file name fields
192235
fp = Path(file_name)
193-
hfn = HlspFileName(fp, hlsp_name)
236+
if collection_type == "HLSP":
237+
hfn = HlspFileName(fp, collection_name)
238+
elif collection_type == "CCSP":
239+
hfn = CCSPFileName(fp, collection_name)
240+
elif collection_type == "MCCM":
241+
hfn = MCCMFileName(fp, collection_name)
242+
194243
hfn.partition()
195244
hfn.create_fields()
196245
elements = hfn.evaluate_fields()

mast_contributor_tools/filename_check/fc_config.yaml

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,10 @@ FieldLength:
33
'extension': 16
44
'filter': 20
55
'hlsp_str': 4
6-
'hlsp_name': 20
6+
'collection_name': 20
77
'instrument': 20
88
'mission': 20
9+
'str_literal': 10
910
'target_name': 30
1011
'product_type': 16
1112
'version_id': 9
@@ -16,6 +17,7 @@ FieldLength:
1617

1718
ExtensionTypes:
1819
- asdf
20+
- asdf.gz
1921
- csv
2022
- dat
2123
- db
@@ -29,6 +31,7 @@ ExtensionTypes:
2931
- jpeg
3032
- jpg
3133
- md
34+
- parquet
3235
- pdf
3336
- png
3437
- ps
@@ -50,6 +53,7 @@ SemanticTypes:
5053
- cat
5154
- cbv
5255
- cube
56+
- cspec
5357
- deflect
5458
- download
5559
- drc
@@ -105,6 +109,7 @@ SemanticTypes:
105109
- sr
106110
- stack
107111
- tds
112+
- thumb
108113
- tp
109114
- tpf
110115
- warp

mast_contributor_tools/tests/filename_check/test_fc_app.py

Lines changed: 46 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,14 @@
55
from pathlib import Path
66
from unittest import mock
77

8-
from mast_contributor_tools.filename_check.fc_app import check_filenames, get_file_paths
8+
import pytest
9+
10+
from mast_contributor_tools.filename_check.fc_app import (
11+
check_filenames,
12+
check_single_filename,
13+
get_file_paths,
14+
identify_collection_type,
15+
)
916

1017

1118
def fake_directory() -> list[Path]:
@@ -36,12 +43,28 @@ def test_get_file_paths(mock_isfile, mock_rglob) -> None:
3643
output = get_file_paths("fake-directory", max_n=2)
3744
assert len(output) == 2
3845
# Test that the search_pattern argument performs as expected
39-
output = get_file_paths("fake-directory", search_pattern='*1.fits')
46+
output = get_file_paths("fake-directory", search_pattern="*1.fits")
4047
assert len(output) == 1
4148
# Test that the exclude_pattern argument performs as expected
42-
output = get_file_paths("fake-directory", exclude_pattern='*1.fits')
49+
output = get_file_paths("fake-directory", exclude_pattern="*1.fits")
4350
assert len(output) == 2
4451

52+
53+
@pytest.mark.parametrize(
54+
"test_filename, expected",
55+
[
56+
("hlsp_my-hlsp_file.txt", "HLSP"),
57+
("ccsp_my-hlsp_file.txt", "CCSP"),
58+
("mccm_my-hlsp_file.txt", "MCCM"),
59+
# Defaults to HLSP when not recognized
60+
("mast_my-hlsp_file.txt", "HLSP"),
61+
],
62+
)
63+
def test_identify_collection_type(test_filename, expected) -> None:
64+
"""Test that the identify_collection_type() function works correctly"""
65+
assert identify_collection_type(test_filename) == expected
66+
67+
4568
@mock.patch("mast_contributor_tools.filename_check.fc_app.HlspFileName")
4669
@mock.patch("mast_contributor_tools.filename_check.fc_app.Hlsp_SQLiteDb")
4770
def test_check_filenames(mock_Hlsp_SQLiteDb, mock_HlspFileName) -> None:
@@ -53,3 +76,23 @@ def test_check_filenames(mock_Hlsp_SQLiteDb, mock_HlspFileName) -> None:
5376
mock_Hlsp_SQLiteDb.assert_called_once()
5477
# Assert HlspFileName was called once for each file
5578
assert mock_HlspFileName.call_count == len(fake_directory())
79+
80+
81+
# Test the the right filename class is called for HLSPs, CCSPs, and MCCMs
82+
@pytest.mark.parametrize(
83+
"test_filename, expected",
84+
[
85+
("hlsp_my-hlsp_file.txt", "HlspFileName"),
86+
("ccsp_my-hlsp_file.txt", "CCSPFileName"),
87+
("mccm_my-hlsp_file.txt", "MCCMFileName"),
88+
# Defaults to HLSP when not recognized
89+
("mast_my-hlsp_file.txt", "HlspFileName"),
90+
],
91+
)
92+
def test_check_single_filename(test_filename, expected) -> None:
93+
"""Test that the test_check_single_filename() function calls the right classes"""
94+
with mock.patch(f"mast_contributor_tools.filename_check.fc_app.{expected}") as expected_class:
95+
# Run function
96+
check_single_filename(test_filename)
97+
# Assert correct class was called
98+
expected_class.assert_called_once()

0 commit comments

Comments
 (0)