66from pathlib import Path
77from typing import Any
88
9- from ..security .policy import default_export_profile , resolve_export_profile
9+ from ..security .findings import ResidualFindingsBlocked , merge_counts
10+ from ..security .policy import default_export_profile , resolve_export_profile , resolve_findings_policy
1011from .projection import prepare_projection
1112
1213
@@ -22,13 +23,15 @@ def export_jsonl(
2223 export_profile : str | None = None ,
2324 db_path : str | None = None ,
2425 config_path : str | None = None ,
26+ findings_policy : str | None = None ,
2527) -> dict [str , Any ]:
2628 jsonl_config = (config .get ("exports" ) or {}).get ("jsonl" ) or {}
2729 include_classification = jsonl_config .get ("include_classification" , True )
2830 security = config .get ("security" ) or {}
2931 config_default = default_export_profile (security .get ("default_export_profile" ))
3032 security_profile = config_default
3133 effective_profile = resolve_export_profile (export_profile , config_default )
34+ policy = resolve_findings_policy (security .get ("on_residual_findings" ), override = findings_policy )
3235
3336 if account_id is not None :
3437 rows = conn .execute (
@@ -88,48 +91,62 @@ def export_jsonl(
8891 "classifier_type" : cr [5 ],
8992 }
9093
91- out_path .parent .mkdir (parents = True , exist_ok = True )
92- written = 0
9394 candidate_message_count = len (rows )
9495 excluded_message_count = 0
9596 any_scrubbed = False
97+ residual_total : dict [str , int ] = {}
98+ projected_records : list [dict [str , Any ]] = []
99+
100+ for row in rows :
101+ record = dict (zip (cols , row ))
102+
103+ per_record_profile = (classifications .get (record ["id" ]) or {}).get ("export_profile" )
104+ projected = prepare_projection (
105+ record ,
106+ config ,
107+ override_profile = export_profile ,
108+ record_profile = per_record_profile ,
109+ clear_body_word_count_for_metadata_only = True ,
110+ )
111+ if projected is None :
112+ excluded_message_count += 1
113+ continue
114+
115+ record = projected .record
116+ merge_counts (residual_total , projected .residual )
117+ if projected .was_scrubbed :
118+ any_scrubbed = True
119+
120+ record ["account_key" ] = account_key
121+ try :
122+ record ["recipients" ] = json .loads (record .pop ("recipients_json" ) or "[]" )
123+ record ["cc" ] = json .loads (record .pop ("cc_json" ) or "[]" )
124+ except Exception :
125+ record ["recipients" ] = []
126+ record ["cc" ] = []
127+
128+ if include_classification and record ["id" ] in classifications :
129+ record ["classification" ] = classifications [record ["id" ]]
130+
131+ projected_records .append (record )
132+
133+ if policy == "block" and residual_total :
134+ raise ResidualFindingsBlocked (residual_total )
135+
136+ warnings : list [str ] = []
137+ if policy == "warn" and residual_total :
138+ warnings .append (f"residual detected-sensitive items in export: { residual_total } " )
139+
140+ out_path .parent .mkdir (parents = True , exist_ok = True )
141+ written = 0
96142 thread_keys : set [str ] = set ()
97143 date_min : str | None = None
98144 date_max : str | None = None
99145 word_count = 0
100146 export_id = _start_export_run (conn , "jsonl" , str (out_path ), effective_profile , account_id )
101147
102148 with out_path .open ("w" , encoding = "utf-8" ) as f :
103- for row in rows :
104- record = dict (zip (cols , row ))
105-
106- # Resolve export profile for this record
107- per_record_profile = (classifications .get (record ["id" ]) or {}).get ("export_profile" )
108- projected = prepare_projection (
109- record ,
110- config ,
111- override_profile = export_profile ,
112- record_profile = per_record_profile ,
113- clear_body_word_count_for_metadata_only = True ,
114- )
115- if projected is None :
116- excluded_message_count += 1
117- continue
118- record = projected .record
119- if projected .was_scrubbed :
120- any_scrubbed = True
121-
122- record ["account_key" ] = account_key
123- try :
124- record ["recipients" ] = json .loads (record .pop ("recipients_json" ) or "[]" )
125- record ["cc" ] = json .loads (record .pop ("cc_json" ) or "[]" )
126- except Exception :
127- record ["recipients" ] = []
128- record ["cc" ] = []
129-
130- if include_classification and record ["id" ] in classifications :
131- record ["classification" ] = classifications [record ["id" ]]
132-
149+ for record in projected_records :
133150 f .write (json .dumps (record , ensure_ascii = False ) + "\n " )
134151 written += 1
135152
@@ -171,6 +188,11 @@ def export_jsonl(
171188 export_format = jsonl_config ,
172189 candidate_message_count = candidate_message_count ,
173190 excluded_message_count = excluded_message_count ,
191+ warnings = warnings ,
192+ residual_scan_performed = True ,
193+ residual_findings_total = sum (residual_total .values ()),
194+ residual_findings_by_type = residual_total ,
195+ residual_findings_policy = policy ,
174196 )
175197 manifest_path = write_jsonl_manifest (out_path , manifest_rows )
176198 source_count = 1 if out_path .exists () else 0
@@ -211,6 +233,11 @@ def export_jsonl(
211233 message_count = written ,
212234 contains_scrubbed_content = any_scrubbed ,
213235 generated_sha256 = manifest_rows [0 ]["generated_sha256" ],
236+ warnings = warnings ,
237+ residual_scan_performed = True ,
238+ residual_findings_total = sum (residual_total .values ()),
239+ residual_findings_by_type = residual_total ,
240+ residual_findings_policy = policy ,
214241 ),
215242 export_id ,
216243 ),
@@ -224,6 +251,10 @@ def export_jsonl(
224251 "contains_scrubbed_content" : any_scrubbed ,
225252 "candidate_message_count" : candidate_message_count ,
226253 "excluded_message_count" : excluded_message_count ,
254+ "residual_findings" : residual_total ,
255+ "residual_findings_total" : sum (residual_total .values ()),
256+ "residual_findings_policy" : policy ,
257+ "warnings" : warnings ,
227258 }
228259
229260
@@ -262,6 +293,11 @@ def _jsonl_export_metadata_json(
262293 message_count : int ,
263294 contains_scrubbed_content : bool ,
264295 generated_sha256 : str ,
296+ warnings : list [str ] | None ,
297+ residual_scan_performed : bool ,
298+ residual_findings_total : int ,
299+ residual_findings_by_type : dict [str , int ],
300+ residual_findings_policy : str ,
265301) -> str :
266302 from .manifest import build_safe_export_run_metadata , security_manifest_posture
267303
@@ -289,5 +325,10 @@ def _jsonl_export_metadata_json(
289325 message_count = message_count ,
290326 contains_scrubbed_content = contains_scrubbed_content ,
291327 generated_sha256 = generated_sha256 ,
328+ warnings = warnings ,
329+ residual_scan_performed = residual_scan_performed ,
330+ residual_findings_total = residual_findings_total ,
331+ residual_findings_by_type = residual_findings_by_type ,
332+ residual_findings_policy = residual_findings_policy ,
292333 )
293334 return json .dumps (metadata , ensure_ascii = False , sort_keys = True )
0 commit comments