55
66from tqdm import tqdm
77
8+ from mast_contributor_tools .filename_check .check_filename import (
9+ COLLECTION_NAME_REGEX ,
10+ CCSPFileName ,
11+ FieldRule ,
12+ HlspFileName ,
13+ MCCMFileName ,
14+ )
815from mast_contributor_tools .filename_check .fc_db import Hlsp_SQLiteDb
9- from mast_contributor_tools .filename_check .hlsp_filename import HLSPNAME_REGEX , FieldRule , HlspFileName
1016from mast_contributor_tools .utils .logger_config import setup_logger
1117
1218logger = setup_logger (__name__ )
@@ -92,35 +98,61 @@ def get_file_paths(
9298 return file_list
9399
94100
95- def check_filenames (hlsp_name : str , file_list : list [Path ], dbFile : str , output_format : str = "db" ) -> None :
96- """Recursively check filenames in a directory tree of HLSP products
101+ def identify_collection_type (file_name : str ) -> str :
102+ """
103+ Identify if a file is an HLSP, CCSP, or MCCM product based on the file name prefix.
97104
98105 Parameters
99106 ----------
100- hlsp_name : str
101- Official identifier (abbreviation/acronym/initialism) for the HLSP collection
107+ filename : str
108+ File name
109+
110+ Returns
111+ -------
112+ collection_type: str
113+ Collection Type - "HLSP", "CCSP", or "MCCM". Raises a warning and defaults to "HLSP" if unable to identify.
114+ """
115+ collection_type = file_name .split ("_" )[0 ].upper ()
116+ if collection_type .upper () not in ["HLSP" , "MCCM" , "CCSP" ]:
117+ # Default to HLSP, raise warning
118+ msg = f"WARNING: Could not identify collection type '{ collection_type } ' from filename. Assuming HLSP."
119+ logger .warning (msg )
120+ collection_type = "HLSP"
121+ return collection_type
122+
123+
124+ def check_filenames (collection_name : str , file_list : list [Path ], dbFile : str , output_format : str = "db" ) -> None :
125+ """Recursively check filenames in a directory tree of data products
126+
127+ Parameters
128+ ----------
129+ collection_name : str
130+ Official identifier (abbreviation/acronym/initialism) for the HLSP/MCCM/CCSP collection
102131 file_list: list[str]
103132 List of files to check, typically output from get_file_paths()
104133 dbFile : str, optional
105134 Name of SQLite database file to contain results
106135 output_format : str, optional
107136 Alternate format to save results to: 'csv', 'fits', 'html', or 'excel'. Default: "db"
108137 """
109- # Make sure hlsp name is valid
110- if not FieldRule .match_pattern (hlsp_name , HLSPNAME_REGEX ):
138+ # Make sure collection name is valid
139+ if not FieldRule .match_pattern (collection_name , COLLECTION_NAME_REGEX ):
111140 msg = (
112- f"Invalid hlsp_name for HLSP collection : '{ hlsp_name } '.\n "
113- "The HLSP name must follow these rules: \n "
141+ f"Invalid collection_name : '{ collection_name } '.\n "
142+ "The collection name must follow these rules: \n "
114143 "\t 1. The first character must be a lowercase letter \n "
115144 "\t 2. The middle characters can be lowercase letters, numbers, or a hyphen ‘-‘ \n "
116145 "\t 3. The last character must be a lowercase letter or a number \n "
117- "\t 4. The hlsp_name must be 20 characters or less in length"
146+ "\t 4. The name must be 20 characters or less in length"
118147 )
119148 logger .error (msg )
120149 raise ValueError (msg )
121150
151+ # Identify if this is an HLSP, CCSP, or MCCM collection
152+ collection_type = identify_collection_type (file_list [0 ].name )
153+
122154 # Beging file name checking
123- logger .critical (f"Evaluating { len (file_list )} files for HLSP collection '{ hlsp_name } '" )
155+ logger .critical (f"Evaluating { len (file_list )} files for { collection_type } collection '{ collection_name } '" )
124156 if Path (dbFile ).is_file ():
125157 logger .warning (f"Database file { dbFile } already exists. Overwriting File." )
126158 os .remove (dbFile )
@@ -133,11 +165,19 @@ def check_filenames(hlsp_name: str, file_list: list[Path], dbFile: str, output_f
133165 for f in tqdm (file_list ):
134166 logger .debug (f"Examining { f .name } " )
135167 try :
136- hfn = HlspFileName (f , hlsp_name )
168+ # Create the filename object
169+ if collection_type == "HLSP" :
170+ hfn = HlspFileName (f , collection_name )
171+ elif collection_type == "CCSP" :
172+ hfn = CCSPFileName (f , collection_name )
173+ elif collection_type == "MCCM" :
174+ hfn = MCCMFileName (f , collection_name )
175+ # Partition into fields
137176 hfn .partition ()
138177 except ValueError :
139178 logger .error (f"Invalid name: { f .name } , skipping..." )
140179 else :
180+ # Evaluate each field
141181 hfn .create_fields ()
142182 elements = hfn .evaluate_fields ()
143183 # Link elements to parent filename in db
@@ -167,30 +207,39 @@ def check_filenames(hlsp_name: str, file_list: list[Path], dbFile: str, output_f
167207 logger .critical (f"\n Filename checking complete. Results written to { dbFile } " )
168208
169209
170- def check_single_filename (file_name : str , hlsp_name : str = "" ) -> None :
171- """HLSP filename module CLI driver .
210+ def check_single_filename (file_name : str , collection_name : str = "" ) -> None :
211+ """Check a single filename against requirements for HLSP/MCCM/CCSP files .
172212
173213 Parameters
174214 ----------
175215 file_name : str
176216 File name of an HLSP product to test: for example 'hlsp_my-hlsp_readme.txt'.
177217 This is a string, and does not need to be a real file.
178- hlsp_name : str, optional
179- Name of example HLSP collection. For example, 'my-hlsp'.
180- If not supplied, the hlsp_name is inferred using the second field of the filename.
218+ collection_name : str, optional
219+ Name of example HLSP/MCCM/CCSP collection. For example, 'my-hlsp'.
220+ If not supplied, the collection_name is inferred using the second field of the filename.
181221 """
182- # Infer hlsp_name from the file name if it wasn't provided
183- if not hlsp_name :
222+ # Infer collection type from file name
223+ collection_type = identify_collection_type (file_name )
224+
225+ # Infer collection_name from the file name if it wasn't provided
226+ if not collection_name :
184227 if len (file_name .split ("_" )) > 2 :
185- hlsp_name = file_name .split ("_" )[1 ].lower ()
228+ collection_name = file_name .split ("_" )[1 ].lower ()
186229 else :
187- msg = f"Could not infer HLSP name from filename '{ file_name } '. Not enough parts in filename."
230+ msg = f"Could not infer collection name from filename '{ file_name } '. Not enough parts in filename."
188231 logger .error (msg )
189232 raise ValueError (msg )
190233
191234 # Check file name fields
192235 fp = Path (file_name )
193- hfn = HlspFileName (fp , hlsp_name )
236+ if collection_type == "HLSP" :
237+ hfn = HlspFileName (fp , collection_name )
238+ elif collection_type == "CCSP" :
239+ hfn = CCSPFileName (fp , collection_name )
240+ elif collection_type == "MCCM" :
241+ hfn = MCCMFileName (fp , collection_name )
242+
194243 hfn .partition ()
195244 hfn .create_fields ()
196245 elements = hfn .evaluate_fields ()
0 commit comments