2727# Environment variables
2828S3_BUCKET = os .getenv ("S3_BUCKET" , "dibbs-text-to-code" )
2929EICR_INPUT_PREFIX = os .getenv ("EICR_INPUT_PREFIX" , "eCRMessageV2/" )
30- SCHEMATRON_ERROR_PREFIX = os .getenv ("SCHEMATRON_ERROR_PREFIX" , "schematronErrors /" )
31- TTC_INPUT_PREFIX = os .getenv ("TTC_INPUT_PREFIX" , "TextToCodeValidateSubmissionV2 /" )
32- TTC_OUTPUT_PREFIX = os .getenv ("TTC_OUTPUT_PREFIX" , "TTCOutput /" )
33- TTC_METADATA_PREFIX = os .getenv ("TTC_METADATA_PREFIX" , "TTCMetadata /" )
30+ SCHEMATRON_ERROR_PREFIX = os .getenv ("SCHEMATRON_ERROR_PREFIX" , "ValidationResponseV2 /" )
31+ TTC_INPUT_PREFIX = os .getenv ("TTC_INPUT_PREFIX" , "TextToCodeSubmissionV2 /" )
32+ TTC_OUTPUT_PREFIX = os .getenv ("TTC_OUTPUT_PREFIX" , "TTCAugmentationMetadataV2 /" )
33+ TTC_METADATA_PREFIX = os .getenv ("TTC_METADATA_PREFIX" , "TTCMetadataV2 /" )
3434AWS_REGION = os .getenv ("AWS_REGION" )
3535S3_ENDPOINT_URL = os .getenv ("S3_ENDPOINT_URL" )
3636OPENSEARCH_ENDPOINT_URL = os .getenv ("OPENSEARCH_ENDPOINT_URL" )
@@ -120,7 +120,8 @@ def process_record(record: SQSRecord, s3_client: BaseClient, opensearch_client:
120120 # Parse the EventBridge S3 event from the SQS message body
121121 eventbridge_data = lambda_handler .get_eventbridge_data_from_s3_event (s3_event )
122122 object_key = eventbridge_data ["object_key" ]
123- logger .info (f"Processing S3 Object: s3://{ S3_BUCKET } /{ object_key } " )
123+ bucket_name = eventbridge_data .get ("bucket_name" ) or S3_BUCKET
124+ logger .info (f"Processing S3 Object: s3://{ bucket_name } /{ object_key } " )
124125
125126 # Extract persistence_id from the RR object key
126127 persistence_id = lambda_handler .get_persistence_id (object_key , TTC_INPUT_PREFIX )
@@ -129,7 +130,7 @@ def process_record(record: SQSRecord, s3_client: BaseClient, opensearch_client:
129130 with logger .append_context_keys (
130131 persistence_id = persistence_id ,
131132 ):
132- _process_record_pipeline (persistence_id , s3_client , opensearch_client )
133+ _process_record_pipeline (persistence_id , s3_client , opensearch_client , bucket_name )
133134
134135
135136def _initialize_ttc_outputs (persistence_id : str ) -> tuple [dict , dict ]:
@@ -152,17 +153,20 @@ def _initialize_ttc_outputs(persistence_id: str) -> tuple[dict, dict]:
152153 return ttc_output , ttc_metadata_output
153154
154155
155- def _load_schematron_data_fields (persistence_id : str , s3_client : BaseClient ) -> list :
156+ def _load_schematron_data_fields (
157+ persistence_id : str , s3_client : BaseClient , bucket_name : str
158+ ) -> list :
156159 """Load Schematron errors from S3 and extract relevant fields.
157160
158161 :param persistence_id: The persistence ID extracted from the S3 object key
159162 :param s3_client: The S3 client to use for fetching files.
163+ :param bucket_name: The S3 bucket name to read from.
160164 :return: The relevant Schematron data fields for TTC processing.
161165 """
162166 object_key = f"{ SCHEMATRON_ERROR_PREFIX } { persistence_id } "
163- logger .info ("Loading Schematron errors" , s3_key = f"s3://{ S3_BUCKET } /{ object_key } " )
167+ logger .info ("Loading Schematron errors" , s3_key = f"s3://{ bucket_name } /{ object_key } " )
164168 schematron_errors = lambda_handler .get_file_content_from_s3 (
165- bucket_name = S3_BUCKET ,
169+ bucket_name = bucket_name ,
166170 object_key = object_key ,
167171 s3_client = s3_client ,
168172 )
@@ -172,17 +176,18 @@ def _load_schematron_data_fields(persistence_id: str, s3_client: BaseClient) ->
172176 return schematron_processor .get_data_fields_from_schematron_error (schematron_errors )
173177
174178
175- def _load_original_eicr (persistence_id : str , s3_client : BaseClient ) -> str :
179+ def _load_original_eicr (persistence_id : str , s3_client : BaseClient , bucket_name : str ) -> str :
176180 """Load the original eICR from S3.
177181
178182 :param persistence_id: The persistence ID extracted from the S3 object key
179183 :param s3_client: The S3 client to use for fetching files.
184+ :param bucket_name: The S3 bucket name to read from.
180185 :return: The original eICR content.
181186 """
182187 object_key = f"{ EICR_INPUT_PREFIX } { persistence_id } "
183- logger .info (f"Retrieving eICR from s3://{ S3_BUCKET } /{ object_key } " )
188+ logger .info (f"Retrieving eICR from s3://{ bucket_name } /{ object_key } " )
184189 original_eicr_content = lambda_handler .get_file_content_from_s3 (
185- bucket_name = S3_BUCKET , object_key = object_key , s3_client = s3_client
190+ bucket_name = bucket_name , object_key = object_key , s3_client = s3_client
186191 )
187192 logger .info (f"Retrieved eICR content for persistence_id { persistence_id } " )
188193 return original_eicr_content
@@ -283,20 +288,25 @@ def _process_schematron_errors(
283288
284289
285290def _save_ttc_outputs (
286- persistence_id : str , ttc_output : dict , ttc_metadata_output : dict , s3_client : BaseClient
291+ persistence_id : str ,
292+ ttc_output : dict ,
293+ ttc_metadata_output : dict ,
294+ s3_client : BaseClient ,
295+ bucket_name : str ,
287296) -> None :
288297 """Save TTC output and metadata output to S3.
289298
290299 :param persistence_id: The persistence ID extracted from the S3 object key
291300 :param ttc_output: The TTC output dictionary.
292301 :param ttc_metadata_output: The TTC metadata output dictionary.
293302 :param s3_client: The S3 client to use for uploading files.
303+ :param bucket_name: The S3 bucket name to write to.
294304 """
295305 # Save the TTC output to S3 for the Augmentation Lambda to consume
296306 logger .info (f"Saving TTC output to S3 for persistence_id { persistence_id } " )
297307 lambda_handler .put_file (
298308 file_obj = io .BytesIO (json .dumps (ttc_output , default = str ).encode ("utf-8" )),
299- bucket_name = S3_BUCKET ,
309+ bucket_name = bucket_name ,
300310 object_key = f"{ TTC_OUTPUT_PREFIX } { persistence_id } " ,
301311 s3_client = s3_client ,
302312 )
@@ -305,7 +315,7 @@ def _save_ttc_outputs(
305315 logger .info (f"Saving TTC metadata output to S3 for persistence_id { persistence_id } " )
306316 lambda_handler .put_file (
307317 file_obj = io .BytesIO (json .dumps (ttc_metadata_output , default = str ).encode ("utf-8" )),
308- bucket_name = S3_BUCKET ,
318+ bucket_name = bucket_name ,
309319 object_key = f"{ TTC_METADATA_PREFIX } { persistence_id } " ,
310320 s3_client = s3_client ,
311321 )
@@ -315,6 +325,7 @@ def _process_record_pipeline(
315325 persistence_id : str ,
316326 s3_client : BaseClient ,
317327 opensearch_client : OpenSearch ,
328+ bucket_name : str ,
318329) -> dict :
319330 """The main pipeline for processing each record.
320331
@@ -333,11 +344,12 @@ def _process_record_pipeline(
333344 :param persistence_id: The persistence ID extracted from the S3 object key
334345 :param s3_client: The S3 client to use for S3 operations.
335346 :param opensearch_client: The OpenSearch client.
347+ :param bucket_name: The S3 bucket name extracted from the event, or the default.
336348 """
337349 ttc_output , ttc_metadata_output = _initialize_ttc_outputs (persistence_id )
338350
339351 logger .info ("Starting TTC processing" )
340- schematron_data_fields = _load_schematron_data_fields (persistence_id , s3_client )
352+ schematron_data_fields = _load_schematron_data_fields (persistence_id , s3_client , bucket_name )
341353
342354 if not schematron_data_fields :
343355 logger .warning (
@@ -348,13 +360,13 @@ def _process_record_pipeline(
348360 logger .info (f"Saving TTC metadata output to S3 for persistence_id { persistence_id } " )
349361 lambda_handler .put_file (
350362 file_obj = io .BytesIO (json .dumps (ttc_metadata_output , default = str ).encode ("utf-8" )),
351- bucket_name = S3_BUCKET ,
363+ bucket_name = bucket_name ,
352364 object_key = f"{ TTC_METADATA_PREFIX } { persistence_id } " ,
353365 s3_client = s3_client ,
354366 )
355367 return ttc_output
356368
357- original_eicr_content = _load_original_eicr (persistence_id , s3_client )
369+ original_eicr_content = _load_original_eicr (persistence_id , s3_client , bucket_name )
358370 _populate_eicr_metadata (original_eicr_content , ttc_output , ttc_metadata_output )
359371 _process_schematron_errors (
360372 original_eicr_content ,
@@ -363,6 +375,6 @@ def _process_record_pipeline(
363375 ttc_output ,
364376 ttc_metadata_output ,
365377 )
366- _save_ttc_outputs (persistence_id , ttc_output , ttc_metadata_output , s3_client )
378+ _save_ttc_outputs (persistence_id , ttc_output , ttc_metadata_output , s3_client , bucket_name )
367379
368380 return {"statusCode" : 200 , "message" : "TTC processed successfully!" }
0 commit comments