@@ -128,17 +128,19 @@ def try_process_gazette_file(
128128
129129 gazette ["source_text" ] = try_to_extract_content (gazette_file , text_extractor )
130130 gazette_txt_path = define_gazette_txt_path (gazette )
131-
131+
132132 # Store relative paths instead of full URLs (controlled by feature flag)
133- use_relative_paths = os .environ .get ("USE_RELATIVE_FILE_PATHS" , "false" ).lower () == "true"
133+ use_relative_paths = (
134+ os .environ .get ("USE_RELATIVE_FILE_PATHS" , "false" ).lower () == "true"
135+ )
134136 if use_relative_paths :
135137 gazette ["url" ] = gazette ["file_path" ] # Relative path only
136138 gazette ["file_raw_txt" ] = gazette_txt_path # Relative path only
137139 else :
138140 # Legacy behavior: store full URLs
139141 gazette ["url" ] = define_file_url (gazette ["file_path" ])
140142 gazette ["file_raw_txt" ] = define_file_url (gazette_txt_path )
141-
143+
142144 upload_raw_text (gazette_txt_path , gazette ["source_text" ], storage )
143145
144146 # Delete file ASAP to free disk space
@@ -152,15 +154,17 @@ def try_process_gazette_file(
152154
153155 for segment in territory_segments :
154156 segment_txt_path = define_segment_txt_path (segment )
155-
157+
156158 # Store relative path for segments (controlled by feature flag)
157- use_relative_paths = os .environ .get ("USE_RELATIVE_FILE_PATHS" , "false" ).lower () == "true"
159+ use_relative_paths = (
160+ os .environ .get ("USE_RELATIVE_FILE_PATHS" , "false" ).lower () == "true"
161+ )
158162 if use_relative_paths :
159163 segment ["file_raw_txt" ] = segment_txt_path # Relative path only
160164 else :
161165 # Legacy behavior: store full URL
162166 segment ["file_raw_txt" ] = define_file_url (segment_txt_path )
163-
167+
164168 upload_raw_text (segment_txt_path , segment ["source_text" ], storage )
165169 index .index_document (segment , document_id = segment ["file_checksum" ])
166170 document_ids .append (segment ["file_checksum" ])
@@ -232,7 +236,7 @@ def define_segment_txt_path(segment: Dict):
232236def define_file_url (path : str ):
233237 """
234238 Joins the storage endpoint with the path to form the URL
235-
239+
236240 DEPRECATED: This function will be removed in a future version.
237241 With USE_RELATIVE_FILE_PATHS=true, paths are stored without endpoints.
238242 The API will handle endpoint concatenation dynamically.
@@ -244,7 +248,7 @@ def define_file_url(path: str):
244248def get_file_endpoint () -> str :
245249 """
246250 Get the endpoint where the gazette files can be downloaded.
247-
251+
248252 DEPRECATED: This function will be removed in a future version.
249253 The QUERIDO_DIARIO_FILES_ENDPOINT should be used in the API layer,
250254 not in data processing.
0 commit comments