77import json
88import os
99from collections import defaultdict
10- from dataclasses import dataclass , field
10+ from dataclasses import asdict , dataclass , field
1111from typing import Any , TypedDict
1212
1313import boto3
2626from app .services .ecr .refine import get_file_size_in_mib
2727from app .services .pipeline import (
2828 AugmentationRun ,
29+ RefinementContext ,
2930 RefinementResult ,
30- RefinementTrace ,
3131 create_augmentation_run_from_xml_files ,
3232 discover_reportable_conditions ,
3333 produce_remainder_rr_for_jurisdiction ,
@@ -83,20 +83,13 @@ class RefinementState:
8383 jurisdiction/per-condition refinement traces, the AIMS-facing
8484 metadata dict, and the set of codes per jurisdiction that were
8585 NOT refined (used to drive remainder RR production).
86-
87- TODO:
88- * skipped_condition_codes_by_jurisdiction holds information that
89- is also present on the per-condition RefinementTrace objects
90- (traces with refinement_outcome == "skipped")
91- * that means skipped codes are tracked in two places
9286 """
9387
9488 output_files : set [str ] = field (default_factory = set )
9589 metadata : RefinerMetadata = field (default_factory = dict )
9690 skipped_condition_codes_by_jurisdiction : dict [str , set [str ]] = field (
9791 default_factory = lambda : defaultdict (set )
9892 )
99- traces : list [RefinementTrace ] = field (default_factory = list )
10093
10194
10295@dataclass
@@ -528,12 +521,6 @@ def run_refinement(input: RefinementInput) -> RefinementOutput:
528521 run = run ,
529522 )
530523
531- log_refinement_summary (
532- persistence_id = input .persistence_id ,
533- output_files = state .output_files ,
534- traces = state .traces ,
535- )
536-
537524 return RefinementOutput (
538525 output_file_keys = list (state .output_files ), metadata = state .metadata
539526 )
@@ -588,10 +575,6 @@ def process_condition(
588575 Process a single reportable condition for a jurisdiction.
589576 """
590577 rsg_code = reportable_condition .code
591- trace = RefinementTrace (
592- jurisdiction_code = jurisdiction_code ,
593- rsg_code = rsg_code ,
594- )
595578
596579 cg_metadata = rsg_cg_payload .mappings .get (rsg_code )
597580
@@ -604,28 +587,23 @@ def process_condition(
604587 operation = "skipped" ,
605588 )
606589 mark_condition_skipped (
607- trace = trace ,
608590 jurisdiction_code = jurisdiction_code ,
609591 condition_code = rsg_code ,
610592 reason = "rsg_not_in_mapping" ,
611593 state = state ,
612594 )
613595 return
614596
615- trace .canonical_url = cg_metadata .canonical_url
616-
617- processed_configuration = load_active_configuration (
597+ active_configuration = load_active_configuration (
618598 s3_client = refiner_input .s3_client ,
619599 config_bucket = refiner_input .config_bucket_name ,
620600 jurisdiction_code = jurisdiction_code ,
621601 cg_metadata = cg_metadata ,
622602 rsg_metadata = reportable_condition ,
623- trace = trace ,
624603 )
625604
626- if processed_configuration is None :
605+ if active_configuration is None :
627606 mark_condition_skipped (
628- trace = trace ,
629607 jurisdiction_code = jurisdiction_code ,
630608 condition_code = rsg_code ,
631609 reason = "no_active_configuration" ,
@@ -635,25 +613,35 @@ def process_condition(
635613
636614 result = refine_for_condition (
637615 xml_files = refiner_input .xml_files ,
638- processed_configuration = processed_configuration ,
639- trace = trace ,
616+ processed_configuration = active_configuration .configuration ,
617+ context = RefinementContext (
618+ canonical_url = cg_metadata .canonical_url ,
619+ jurisdiction_id = jurisdiction_code ,
620+ configuration_version = active_configuration .version ,
621+ ),
640622 run = run ,
641623 )
642624
643- state .traces .append (trace )
644-
645625 write_refined_outputs (
646626 refiner_input = refiner_input ,
647627 jurisdiction_code = jurisdiction_code ,
648628 condition_grouper_name = cg_metadata .name ,
649629 result = result ,
650- trace = trace ,
651630 condition_code = rsg_code ,
652631 state = state ,
653632 )
654633
655634 state .metadata [jurisdiction_code ][rsg_code ] = True
656635
636+ logger .info (
637+ "Refinement complete for condition." ,
638+ rsg_code = rsg_code ,
639+ jurisidiction_code = jurisdiction_code ,
640+ metrics = asdict (result .metrics ),
641+ report = asdict (result .report ),
642+ operation = "log_summary" ,
643+ )
644+
657645
658646def load_condition_mapping_for_jurisdiction (
659647 s3_client ,
@@ -693,21 +681,20 @@ def skip_all_conditions_for_missing_mapping(
693681 Mark every condition in a jurisdiction as skipped when the mapping file is missing.
694682 """
695683 for condition in jurisdiction_group .conditions :
696- trace = RefinementTrace (
684+ logger .info (
685+ "Mapping file not found for condition" ,
697686 jurisdiction_code = jurisdiction_code ,
698687 rsg_code = condition .code ,
699688 refinement_outcome = "skipped" ,
700- skip_reason = "no_mapping_file" ,
689+ reason = "no_mapping_file" ,
701690 )
702- state .traces .append (trace )
703691 state .skipped_condition_codes_by_jurisdiction [jurisdiction_code ].add (
704692 condition .code
705693 )
706694 state .metadata [jurisdiction_code ][condition .code ] = False
707695
708696
709697def mark_condition_skipped (
710- trace : RefinementTrace ,
711698 jurisdiction_code : str ,
712699 condition_code : str ,
713700 reason : str ,
@@ -716,22 +703,31 @@ def mark_condition_skipped(
716703 """
717704 Helper to mark a condition as "skipped" during refinement.
718705 """
719- trace .refinement_outcome = "skipped"
720- trace .skip_reason = reason
706+ logger .info (
707+ "Refiner skipped condition." ,
708+ jurisdiction = jurisdiction_code ,
709+ condition_code = condition_code ,
710+ reason = reason ,
711+ operation = "skipped" ,
712+ )
721713
722- state .traces .append (trace )
723714 state .metadata [jurisdiction_code ][condition_code ] = False
724715 state .skipped_condition_codes_by_jurisdiction [jurisdiction_code ].add (condition_code )
725716
726717
718+ @dataclass
719+ class ActiveConfiguration :
720+ configuration : ProcessedConfiguration
721+ version : int
722+
723+
727724def load_active_configuration (
728725 s3_client ,
729726 config_bucket : str ,
730727 jurisdiction_code : str ,
731728 cg_metadata : ConditionMapValue ,
732729 rsg_metadata : ReportableCondition ,
733- trace : RefinementTrace ,
734- ) -> ProcessedConfiguration | None :
730+ ) -> ActiveConfiguration | None :
735731 """
736732 Attempts to find and load the active configuration.
737733 """
@@ -756,8 +752,6 @@ def load_active_configuration(
756752 )
757753 return None
758754
759- trace .configuration_version = config_version_to_use
760-
761755 serialized_configuration_key = get_active_file_key (
762756 jurisdiction_id = jurisdiction_code ,
763757 canonical_url = cg_metadata .canonical_url ,
@@ -780,7 +774,10 @@ def load_active_configuration(
780774 operation = "activation_file_read" ,
781775 )
782776
783- return ProcessedConfiguration .from_dict (serialized_configuration )
777+ return ActiveConfiguration (
778+ configuration = ProcessedConfiguration .from_dict (serialized_configuration ),
779+ version = config_version_to_use ,
780+ )
784781
785782
786783def write_refined_outputs (
@@ -789,7 +786,6 @@ def write_refined_outputs(
789786 condition_code : str ,
790787 condition_grouper_name : str ,
791788 result : RefinementResult ,
792- trace : RefinementTrace ,
793789 state : RefinementState ,
794790) -> None :
795791 """
@@ -804,7 +800,7 @@ def write_refined_outputs(
804800 refiner_input .s3_client .put_object (
805801 Bucket = refiner_input .output_bucket_name ,
806802 Key = eicr_output_key ,
807- Body = result .refined_eicr .encode ("utf-8" ),
803+ Body = result .documents . eicr .encode ("utf-8" ),
808804 ContentType = "application/xml" ,
809805 )
810806
@@ -814,17 +810,17 @@ def write_refined_outputs(
814810 refiner_input .s3_client .put_object (
815811 Bucket = refiner_input .output_bucket_name ,
816812 Key = rr_output_key ,
817- Body = result .refined_rr .encode ("utf-8" ),
813+ Body = result .documents . rr .encode ("utf-8" ),
818814 ContentType = "application/xml" ,
819815 )
820816
821817 state .output_files .add (rr_output_key )
822818
823819 logger .info (
824- "Condition refinement complete ." ,
820+ "Writing refined output files ." ,
825821 eicr_key = eicr_output_key ,
826822 rr_key = rr_output_key ,
827- eicr_size_reduction_percentage = trace . eicr_size_reduction_percentage ,
823+ eicr_size_reduction_percentage = result . metrics . eicr . size_reduction_percentage ,
828824 jurisdiction_code = jurisdiction_code ,
829825 condition_code = condition_code ,
830826 operation = "condition_refinement_complete" ,
@@ -908,32 +904,3 @@ def write_remainder_rrs(
908904 augmented_rr_doc_id = remainder .augmented_result .augmented_doc_id ,
909905 operation = "remainder_rr_written" ,
910906 )
911-
912-
913- def log_refinement_summary (
914- persistence_id : str ,
915- output_files : set [str ],
916- traces : list [RefinementTrace ],
917- ) -> None :
918- """
919- Log a final summary for the entire refinement run.
920- """
921- logger .info (
922- "Refinement complete." ,
923- persistence_id = persistence_id ,
924- output_file_urls = list (output_files ),
925- traces = [
926- {
927- "jurisdiction" : t .jurisdiction_code ,
928- "rsg_code" : t .rsg_code ,
929- "canonical_url" : t .canonical_url ,
930- "outcome" : t .refinement_outcome ,
931- "skip_reason" : t .skip_reason ,
932- "config_version" : t .configuration_version ,
933- "eicr_size_reduction" : t .eicr_size_reduction_percentage ,
934- "eicr_size_mb" : t .eicr_size_mib ,
935- }
936- for t in traces
937- ],
938- operation = "refinement_complete" ,
939- )
0 commit comments