Skip to content

Commit 88e4ab4

Browse files
authored
Merge branch 'main' into jw/231
2 parents 7ccc103 + 6721cb3 commit 88e4ab4

18 files changed

Lines changed: 223 additions & 383 deletions

File tree

refiner/app/core/exceptions.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -221,3 +221,20 @@ class DatabaseDataError(DatabaseError):
221221
"""
222222

223223
pass
224+
225+
226+
class RefinementException(Exception):
227+
"""
228+
Exception raised during a failed refinement run.
229+
"""
230+
231+
def __init__(self, message: str, detail: str):
232+
"""
233+
RefinementException constructor.
234+
235+
Args:
236+
message (str): High-level error message
237+
detail (str): Additional detail describing the issue encountered
238+
"""
239+
super().__init__(message)
240+
self.detail = detail

refiner/app/lambda/lambda_function.py

Lines changed: 44 additions & 77 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
import json
88
import os
99
from collections import defaultdict
10-
from dataclasses import dataclass, field
10+
from dataclasses import asdict, dataclass, field
1111
from typing import Any, TypedDict
1212

1313
import boto3
@@ -26,8 +26,8 @@
2626
from app.services.ecr.refine import get_file_size_in_mib
2727
from app.services.pipeline import (
2828
AugmentationRun,
29+
RefinementContext,
2930
RefinementResult,
30-
RefinementTrace,
3131
create_augmentation_run_from_xml_files,
3232
discover_reportable_conditions,
3333
produce_remainder_rr_for_jurisdiction,
@@ -83,20 +83,13 @@ class RefinementState:
8383
jurisdiction/per-condition refinement traces, the AIMS-facing
8484
metadata dict, and the set of codes per jurisdiction that were
8585
NOT refined (used to drive remainder RR production).
86-
87-
TODO:
88-
* skipped_condition_codes_by_jurisdiction holds information that
89-
is also present on the per-condition RefinementTrace objects
90-
(traces with refinement_outcome == "skipped")
91-
* that means skipped codes are tracked in two places
9286
"""
9387

9488
output_files: set[str] = field(default_factory=set)
9589
metadata: RefinerMetadata = field(default_factory=dict)
9690
skipped_condition_codes_by_jurisdiction: dict[str, set[str]] = field(
9791
default_factory=lambda: defaultdict(set)
9892
)
99-
traces: list[RefinementTrace] = field(default_factory=list)
10093

10194

10295
@dataclass
@@ -528,12 +521,6 @@ def run_refinement(input: RefinementInput) -> RefinementOutput:
528521
run=run,
529522
)
530523

531-
log_refinement_summary(
532-
persistence_id=input.persistence_id,
533-
output_files=state.output_files,
534-
traces=state.traces,
535-
)
536-
537524
return RefinementOutput(
538525
output_file_keys=list(state.output_files), metadata=state.metadata
539526
)
@@ -588,10 +575,6 @@ def process_condition(
588575
Process a single reportable condition for a jurisdiction.
589576
"""
590577
rsg_code = reportable_condition.code
591-
trace = RefinementTrace(
592-
jurisdiction_code=jurisdiction_code,
593-
rsg_code=rsg_code,
594-
)
595578

596579
cg_metadata = rsg_cg_payload.mappings.get(rsg_code)
597580

@@ -604,28 +587,23 @@ def process_condition(
604587
operation="skipped",
605588
)
606589
mark_condition_skipped(
607-
trace=trace,
608590
jurisdiction_code=jurisdiction_code,
609591
condition_code=rsg_code,
610592
reason="rsg_not_in_mapping",
611593
state=state,
612594
)
613595
return
614596

615-
trace.canonical_url = cg_metadata.canonical_url
616-
617-
processed_configuration = load_active_configuration(
597+
active_configuration = load_active_configuration(
618598
s3_client=refiner_input.s3_client,
619599
config_bucket=refiner_input.config_bucket_name,
620600
jurisdiction_code=jurisdiction_code,
621601
cg_metadata=cg_metadata,
622602
rsg_metadata=reportable_condition,
623-
trace=trace,
624603
)
625604

626-
if processed_configuration is None:
605+
if active_configuration is None:
627606
mark_condition_skipped(
628-
trace=trace,
629607
jurisdiction_code=jurisdiction_code,
630608
condition_code=rsg_code,
631609
reason="no_active_configuration",
@@ -635,25 +613,35 @@ def process_condition(
635613

636614
result = refine_for_condition(
637615
xml_files=refiner_input.xml_files,
638-
processed_configuration=processed_configuration,
639-
trace=trace,
616+
processed_configuration=active_configuration.configuration,
617+
context=RefinementContext(
618+
canonical_url=cg_metadata.canonical_url,
619+
jurisdiction_id=jurisdiction_code,
620+
configuration_version=active_configuration.version,
621+
),
640622
run=run,
641623
)
642624

643-
state.traces.append(trace)
644-
645625
write_refined_outputs(
646626
refiner_input=refiner_input,
647627
jurisdiction_code=jurisdiction_code,
648628
condition_grouper_name=cg_metadata.name,
649629
result=result,
650-
trace=trace,
651630
condition_code=rsg_code,
652631
state=state,
653632
)
654633

655634
state.metadata[jurisdiction_code][rsg_code] = True
656635

636+
logger.info(
637+
"Refinement complete for condition.",
638+
rsg_code=rsg_code,
639+
jurisidiction_code=jurisdiction_code,
640+
metrics=asdict(result.metrics),
641+
report=asdict(result.report),
642+
operation="log_summary",
643+
)
644+
657645

658646
def load_condition_mapping_for_jurisdiction(
659647
s3_client,
@@ -693,21 +681,20 @@ def skip_all_conditions_for_missing_mapping(
693681
Mark every condition in a jurisdiction as skipped when the mapping file is missing.
694682
"""
695683
for condition in jurisdiction_group.conditions:
696-
trace = RefinementTrace(
684+
logger.info(
685+
"Mapping file not found for condition",
697686
jurisdiction_code=jurisdiction_code,
698687
rsg_code=condition.code,
699688
refinement_outcome="skipped",
700-
skip_reason="no_mapping_file",
689+
reason="no_mapping_file",
701690
)
702-
state.traces.append(trace)
703691
state.skipped_condition_codes_by_jurisdiction[jurisdiction_code].add(
704692
condition.code
705693
)
706694
state.metadata[jurisdiction_code][condition.code] = False
707695

708696

709697
def mark_condition_skipped(
710-
trace: RefinementTrace,
711698
jurisdiction_code: str,
712699
condition_code: str,
713700
reason: str,
@@ -716,22 +703,31 @@ def mark_condition_skipped(
716703
"""
717704
Helper to mark a condition as "skipped" during refinement.
718705
"""
719-
trace.refinement_outcome = "skipped"
720-
trace.skip_reason = reason
706+
logger.info(
707+
"Refiner skipped condition.",
708+
jurisdiction=jurisdiction_code,
709+
condition_code=condition_code,
710+
reason=reason,
711+
operation="skipped",
712+
)
721713

722-
state.traces.append(trace)
723714
state.metadata[jurisdiction_code][condition_code] = False
724715
state.skipped_condition_codes_by_jurisdiction[jurisdiction_code].add(condition_code)
725716

726717

718+
@dataclass
719+
class ActiveConfiguration:
720+
configuration: ProcessedConfiguration
721+
version: int
722+
723+
727724
def load_active_configuration(
728725
s3_client,
729726
config_bucket: str,
730727
jurisdiction_code: str,
731728
cg_metadata: ConditionMapValue,
732729
rsg_metadata: ReportableCondition,
733-
trace: RefinementTrace,
734-
) -> ProcessedConfiguration | None:
730+
) -> ActiveConfiguration | None:
735731
"""
736732
Attempts to find and load the active configuration.
737733
"""
@@ -756,8 +752,6 @@ def load_active_configuration(
756752
)
757753
return None
758754

759-
trace.configuration_version = config_version_to_use
760-
761755
serialized_configuration_key = get_active_file_key(
762756
jurisdiction_id=jurisdiction_code,
763757
canonical_url=cg_metadata.canonical_url,
@@ -780,7 +774,10 @@ def load_active_configuration(
780774
operation="activation_file_read",
781775
)
782776

783-
return ProcessedConfiguration.from_dict(serialized_configuration)
777+
return ActiveConfiguration(
778+
configuration=ProcessedConfiguration.from_dict(serialized_configuration),
779+
version=config_version_to_use,
780+
)
784781

785782

786783
def write_refined_outputs(
@@ -789,7 +786,6 @@ def write_refined_outputs(
789786
condition_code: str,
790787
condition_grouper_name: str,
791788
result: RefinementResult,
792-
trace: RefinementTrace,
793789
state: RefinementState,
794790
) -> None:
795791
"""
@@ -804,7 +800,7 @@ def write_refined_outputs(
804800
refiner_input.s3_client.put_object(
805801
Bucket=refiner_input.output_bucket_name,
806802
Key=eicr_output_key,
807-
Body=result.refined_eicr.encode("utf-8"),
803+
Body=result.documents.eicr.encode("utf-8"),
808804
ContentType="application/xml",
809805
)
810806

@@ -814,17 +810,17 @@ def write_refined_outputs(
814810
refiner_input.s3_client.put_object(
815811
Bucket=refiner_input.output_bucket_name,
816812
Key=rr_output_key,
817-
Body=result.refined_rr.encode("utf-8"),
813+
Body=result.documents.rr.encode("utf-8"),
818814
ContentType="application/xml",
819815
)
820816

821817
state.output_files.add(rr_output_key)
822818

823819
logger.info(
824-
"Condition refinement complete.",
820+
"Writing refined output files.",
825821
eicr_key=eicr_output_key,
826822
rr_key=rr_output_key,
827-
eicr_size_reduction_percentage=trace.eicr_size_reduction_percentage,
823+
eicr_size_reduction_percentage=result.metrics.eicr.size_reduction_percentage,
828824
jurisdiction_code=jurisdiction_code,
829825
condition_code=condition_code,
830826
operation="condition_refinement_complete",
@@ -908,32 +904,3 @@ def write_remainder_rrs(
908904
augmented_rr_doc_id=remainder.augmented_result.augmented_doc_id,
909905
operation="remainder_rr_written",
910906
)
911-
912-
913-
def log_refinement_summary(
914-
persistence_id: str,
915-
output_files: set[str],
916-
traces: list[RefinementTrace],
917-
) -> None:
918-
"""
919-
Log a final summary for the entire refinement run.
920-
"""
921-
logger.info(
922-
"Refinement complete.",
923-
persistence_id=persistence_id,
924-
output_file_urls=list(output_files),
925-
traces=[
926-
{
927-
"jurisdiction": t.jurisdiction_code,
928-
"rsg_code": t.rsg_code,
929-
"canonical_url": t.canonical_url,
930-
"outcome": t.refinement_outcome,
931-
"skip_reason": t.skip_reason,
932-
"config_version": t.configuration_version,
933-
"eicr_size_reduction": t.eicr_size_reduction_percentage,
934-
"eicr_size_mb": t.eicr_size_mib,
935-
}
936-
for t in traces
937-
],
938-
operation="refinement_complete",
939-
)

0 commit comments

Comments
 (0)