@@ -57,6 +57,10 @@ class BaseExtractorConfig:
5757 )
5858 chunk_max_chars : int = 6000
5959 secret_id : Optional [str ] = None
60+ aws_profile : Optional [str ] = None
61+ aws_access_key_id : Optional [str ] = None
62+ aws_secret_access_key : Optional [str ] = None
63+ aws_session_token : Optional [str ] = None
6064
6165
6266# --- Base Extractor Class ---
@@ -65,11 +69,23 @@ class BaseExtractorConfig:
6569class BaseQuoteExtractor :
6670 def __init__ (self , config : BaseExtractorConfig ):
6771 self .config = config
68- self .s3_client = boto3 .client ("s3" , region_name = self .config .region )
72+
73+ # Initialize AWS Session
74+ self .session = boto3 .Session (
75+ profile_name = self .config .aws_profile ,
76+ aws_access_key_id = self .config .aws_access_key_id ,
77+ aws_secret_access_key = self .config .aws_secret_access_key ,
78+ aws_session_token = self .config .aws_session_token ,
79+ region_name = self .config .region ,
80+ )
81+
82+ self .s3_client = self .session .client ("s3" )
83+ self .url_map : Dict [str , str ] = {}
6984
7085 # Initialize Bedrock Agent
86+ bedrock_client = self .session .client ("bedrock-runtime" )
7187 model = BedrockConverseModel (
72- self .config .model_id , provider = BedrockProvider (region_name = self . config . region )
88+ self .config .model_id , provider = BedrockProvider (bedrock_client = bedrock_client )
7389 )
7490 self .agent = Agent (
7591 model ,
@@ -95,9 +111,43 @@ def __init__(self, config: BaseExtractorConfig):
95111 ),
96112 )
97113
114+ def _fetch_url_map (self , s3_uris : List [str ]):
115+ """
116+ Attempts to fetch sources.json files from the directories of the input files.
117+ Deduplicates potential sources.json locations and merges their mappings.
118+ """
119+ import logging
120+
121+ logger = logging .getLogger (__name__ )
122+
123+ if not s3_uris :
124+ return
125+
126+ sources_locations = set ()
127+ for uri in s3_uris :
128+ if uri in self .url_map :
129+ continue
130+
131+ if "/input/" in uri :
132+ sources_uri = uri .split ("/input/" )[0 ] + "/input/sources.json"
133+ else :
134+ sources_uri = "/" .join (uri .split ("/" )[:- 1 ]) + "/sources.json"
135+ sources_locations .add (sources_uri )
136+
137+ for sources_uri in sources_locations :
138+ logger .info (f"Attempting to fetch sources map from { sources_uri } ..." )
139+ content : Optional [str ] = self .fetch_s3_content (sources_uri )
140+ if content :
141+ try :
142+ new_map = json .loads (content )
143+ self .url_map .update (new_map )
144+ logger .info (f"Successfully loaded { len (new_map )} mappings from { sources_uri } ." )
145+ except Exception as e :
146+ logger .error (f"Failed to parse { sources_uri } : { e } " )
147+
98148 def get_aws_secret (self , secret_id : str ) -> dict :
99149 """Fetches and parses a JSON secret from AWS Secrets Manager."""
100- client = boto3 . client ("secretsmanager" , region_name = self . config . region )
150+ client = self . session . client ("secretsmanager" )
101151 try :
102152 response = client .get_secret_value (SecretId = secret_id )
103153 if "SecretString" in response :
0 commit comments