uscient
diff --git a/‎config/mboxer.example.yaml‎
Lines changed: 3 additions & 2 deletions b/‎config/mboxer.example.yaml‎
Lines changed: 3 additions & 2 deletions
diff --git a/‎docs/security-roadmap.md‎
Lines changed: 28 additions & 1 deletion b/‎docs/security-roadmap.md‎
Lines changed: 28 additions & 1 deletion
diff --git a/‎mboxer-current-config.yaml‎
Lines changed: 3 additions & 2 deletions b/‎mboxer-current-config.yaml‎
Lines changed: 3 additions & 2 deletions
diff --git a/‎src/mboxer/cli.py‎
Lines changed: 25 additions & 0 deletions b/‎src/mboxer/cli.py‎
Lines changed: 25 additions & 0 deletions
diff --git a/‎src/mboxer/exporters/jsonl.py‎
Lines changed: 74 additions & 33 deletions b/‎src/mboxer/exporters/jsonl.py‎
Lines changed: 74 additions & 33 deletions
@@ -120,13 +120,14 @@ security:
   default_export_profile: scrubbed
   scan_enabled: true
   scrub_enabled: true
+  on_residual_findings: warn  # allow | warn | block
   scan_attachments: true
   quarantine_unsafe_attachments: true
-  redact_email_addresses: false
+  redact_email_addresses: true
   redact_phone_numbers: true
   redact_ssn_like_numbers: true
   redact_credit_card_like_numbers: true
-  redact_physical_addresses: false
+  # reserved/planned detectors (not yet implemented): physical_addresses
 
 exports:
   notebooklm:
 
@@ -21,14 +21,19 @@ exclude
   Do not export.
 ```
 
-## Future scan checks
+## Current scan checks
 
 Message checks:
 
 - email addresses
 - phone numbers
 - SSN-like values
 - credit-card-like values
+
+## Future scan checks
+
+Reserved message detector names, not active claims:
+
 - postal addresses
 - medical terms
 - legal terms
@@ -63,3 +68,25 @@ The `security_findings` table should record:
 - excerpt or metadata
 - review status
 - created timestamp
+
+Implemented export support:
+
+- exports can be flagged with residual finding counts by type
+- exports can warn or block when projected export text still contains detected-sensitive items
+- export manifests and run metadata record residual counts, policy, and detector descriptors
+
+## Residual export gate
+
+`on_residual_findings` controls what happens after a record is projected for export and the
+projected body text is scanned again:
+
+- `allow`: write the export and record residual counts in manifest metadata
+- `warn`: write the export, record residual counts, and emit a counts-only warning
+- `block`: abort before export files or export rows are written when residual counts are non-empty
+
+The default is `warn`.
+
+The scanner runs through a deterministic in-process detector registry. The active registry currently
+contains regex detectors for email addresses, phone numbers, SSN-like values, and credit-card-like
+values. Physical-address, medical, legal, financial-account, and credential detectors are reserved
+future names, not active detection or scrubbing claims.
@@ -102,13 +102,14 @@ security:
   default_export_profile: scrubbed
   scan_enabled: true
   scrub_enabled: true
+  on_residual_findings: warn  # allow | warn | block
   scan_attachments: true
   quarantine_unsafe_attachments: true
-  redact_email_addresses: false
+  redact_email_addresses: true
   redact_phone_numbers: true
   redact_ssn_like_numbers: true
   redact_credit_card_like_numbers: true
-  redact_physical_addresses: false
+  # reserved/planned detectors (not yet implemented): physical_addresses
 
 exports:
   notebooklm:
 
@@ -2,6 +2,7 @@
 
 import argparse
 import sqlite3
+import sys
 from pathlib import Path
 
 from .accounts import AccountError
@@ -137,6 +138,7 @@ def build_parser() -> argparse.ArgumentParser:
     p_nlm.add_argument("--out", default=None)
     p_nlm.add_argument("--export-profile",
                        choices=["raw", "reviewed", "scrubbed", "metadata-only"], default=None)
+    p_nlm.add_argument("--findings-policy", choices=["allow", "warn", "block"], default=None)
     p_nlm.add_argument("--profile", default=None, help="NotebookLM limit profile")
     p_nlm.add_argument("--max-sources", type=int)
     p_nlm.add_argument("--reserved-sources", type=int)
@@ -156,6 +158,7 @@ def build_parser() -> argparse.ArgumentParser:
     p_jsonl.add_argument("--out", default=None)
     p_jsonl.add_argument("--export-profile",
                          choices=["raw", "reviewed", "scrubbed", "metadata-only"], default=None)
+    p_jsonl.add_argument("--findings-policy", choices=["allow", "warn", "block"], default=None)
     p_jsonl.set_defaults(func=cmd_export_jsonl)
 
     return parser
@@ -369,6 +372,7 @@ def cmd_security_scan(args: argparse.Namespace) -> None:
 def cmd_export_notebooklm(args: argparse.Namespace) -> None:
     from .accounts import resolve_account
     from .exporters.notebooklm import export_notebooklm
+    from .security.findings import ResidualFindingsBlocked
     config, db_path = load_runtime(args)
     limits = resolve_notebooklm_limits(
         config, args.profile,
@@ -430,10 +434,20 @@ def cmd_export_notebooklm(args: argparse.Namespace) -> None:
                 db_path=str(db_path),
                 config_path=args.config,
                 warnings=warnings,
+                findings_policy=args.findings_policy,
             )
+        except ResidualFindingsBlocked as exc:
+            print(
+                f"BLOCKED: would export residual detected-sensitive items {exc.counts}; "
+                "no files written.",
+                file=sys.stderr,
+            )
+            raise SystemExit(2) from exc
         finally:
             conn.close()
 
+        for warning in stats.get("warnings", [])[len(warnings):]:
+            print(f"WARNING: {warning}")
         if args.dry_run:
             print(f"  [{account['account_key']}] Dry run: {stats.get('groups', 0)} "
                   f"category/band groups would become source files.")
@@ -445,6 +459,7 @@ def cmd_export_notebooklm(args: argparse.Namespace) -> None:
 def cmd_export_jsonl(args: argparse.Namespace) -> None:
     from .accounts import resolve_account
     from .exporters.jsonl import export_jsonl
+    from .security.findings import ResidualFindingsBlocked
     config, db_path = load_runtime(args)
     conn = open_db(db_path)
     try:
@@ -472,9 +487,19 @@ def cmd_export_jsonl(args: argparse.Namespace) -> None:
             export_profile=args.export_profile,
             db_path=str(db_path),
             config_path=args.config,
+            findings_policy=args.findings_policy,
+        )
+    except ResidualFindingsBlocked as exc:
+        print(
+            f"BLOCKED: would export residual detected-sensitive items {exc.counts}; "
+            "no files written.",
+            file=sys.stderr,
         )
+        raise SystemExit(2) from exc
     finally:
         conn.close()
+    if result.get("residual_findings") and result.get("residual_findings_policy") == "warn":
+        print(f"WARNING: residual detected-sensitive items in export: {result['residual_findings']}")
     print(f"[{account_key}] Wrote {result['messages_written']} messages to {out_path}")
 
 
 
@@ -6,7 +6,8 @@
 from pathlib import Path
 from typing import Any
 
-from ..security.policy import default_export_profile, resolve_export_profile
+from ..security.findings import ResidualFindingsBlocked, merge_counts
+from ..security.policy import default_export_profile, resolve_export_profile, resolve_findings_policy
 from .projection import prepare_projection
 
 
@@ -22,13 +23,15 @@ def export_jsonl(
     export_profile: str | None = None,
     db_path: str | None = None,
     config_path: str | None = None,
+    findings_policy: str | None = None,
 ) -> dict[str, Any]:
     jsonl_config = (config.get("exports") or {}).get("jsonl") or {}
     include_classification = jsonl_config.get("include_classification", True)
     security = config.get("security") or {}
     config_default = default_export_profile(security.get("default_export_profile"))
     security_profile = config_default
     effective_profile = resolve_export_profile(export_profile, config_default)
+    policy = resolve_findings_policy(security.get("on_residual_findings"), override=findings_policy)
 
     if account_id is not None:
         rows = conn.execute(
@@ -88,48 +91,62 @@ def export_jsonl(
                     "classifier_type": cr[5],
                 }
 
-    out_path.parent.mkdir(parents=True, exist_ok=True)
-    written = 0
     candidate_message_count = len(rows)
     excluded_message_count = 0
     any_scrubbed = False
+    residual_total: dict[str, int] = {}
+    projected_records: list[dict[str, Any]] = []
+
+    for row in rows:
+        record = dict(zip(cols, row))
+
+        per_record_profile = (classifications.get(record["id"]) or {}).get("export_profile")
+        projected = prepare_projection(
+            record,
+            config,
+            override_profile=export_profile,
+            record_profile=per_record_profile,
+            clear_body_word_count_for_metadata_only=True,
+        )
+        if projected is None:
+            excluded_message_count += 1
+            continue
+
+        record = projected.record
+        merge_counts(residual_total, projected.residual)
+        if projected.was_scrubbed:
+            any_scrubbed = True
+
+        record["account_key"] = account_key
+        try:
+            record["recipients"] = json.loads(record.pop("recipients_json") or "[]")
+            record["cc"] = json.loads(record.pop("cc_json") or "[]")
+        except Exception:
+            record["recipients"] = []
+            record["cc"] = []
+
+        if include_classification and record["id"] in classifications:
+            record["classification"] = classifications[record["id"]]
+
+        projected_records.append(record)
+
+    if policy == "block" and residual_total:
+        raise ResidualFindingsBlocked(residual_total)
+
+    warnings: list[str] = []
+    if policy == "warn" and residual_total:
+        warnings.append(f"residual detected-sensitive items in export: {residual_total}")
+
+    out_path.parent.mkdir(parents=True, exist_ok=True)
+    written = 0
     thread_keys: set[str] = set()
     date_min: str | None = None
     date_max: str | None = None
     word_count = 0
     export_id = _start_export_run(conn, "jsonl", str(out_path), effective_profile, account_id)
 
     with out_path.open("w", encoding="utf-8") as f:
-        for row in rows:
-            record = dict(zip(cols, row))
-
-            # Resolve export profile for this record
-            per_record_profile = (classifications.get(record["id"]) or {}).get("export_profile")
-            projected = prepare_projection(
-                record,
-                config,
-                override_profile=export_profile,
-                record_profile=per_record_profile,
-                clear_body_word_count_for_metadata_only=True,
-            )
-            if projected is None:
-                excluded_message_count += 1
-                continue
-            record = projected.record
-            if projected.was_scrubbed:
-                any_scrubbed = True
-
-            record["account_key"] = account_key
-            try:
-                record["recipients"] = json.loads(record.pop("recipients_json") or "[]")
-                record["cc"] = json.loads(record.pop("cc_json") or "[]")
-            except Exception:
-                record["recipients"] = []
-                record["cc"] = []
-
-            if include_classification and record["id"] in classifications:
-                record["classification"] = classifications[record["id"]]
-
+        for record in projected_records:
             f.write(json.dumps(record, ensure_ascii=False) + "\n")
             written += 1
 
@@ -171,6 +188,11 @@ def export_jsonl(
         export_format=jsonl_config,
         candidate_message_count=candidate_message_count,
         excluded_message_count=excluded_message_count,
+        warnings=warnings,
+        residual_scan_performed=True,
+        residual_findings_total=sum(residual_total.values()),
+        residual_findings_by_type=residual_total,
+        residual_findings_policy=policy,
     )
     manifest_path = write_jsonl_manifest(out_path, manifest_rows)
     source_count = 1 if out_path.exists() else 0
@@ -211,6 +233,11 @@ def export_jsonl(
                 message_count=written,
                 contains_scrubbed_content=any_scrubbed,
                 generated_sha256=manifest_rows[0]["generated_sha256"],
+                warnings=warnings,
+                residual_scan_performed=True,
+                residual_findings_total=sum(residual_total.values()),
+                residual_findings_by_type=residual_total,
+                residual_findings_policy=policy,
             ),
             export_id,
         ),
@@ -224,6 +251,10 @@ def export_jsonl(
         "contains_scrubbed_content": any_scrubbed,
         "candidate_message_count": candidate_message_count,
         "excluded_message_count": excluded_message_count,
+        "residual_findings": residual_total,
+        "residual_findings_total": sum(residual_total.values()),
+        "residual_findings_policy": policy,
+        "warnings": warnings,
     }
 
 
@@ -262,6 +293,11 @@ def _jsonl_export_metadata_json(
     message_count: int,
     contains_scrubbed_content: bool,
     generated_sha256: str,
+    warnings: list[str] | None,
+    residual_scan_performed: bool,
+    residual_findings_total: int,
+    residual_findings_by_type: dict[str, int],
+    residual_findings_policy: str,
 ) -> str:
     from .manifest import build_safe_export_run_metadata, security_manifest_posture
 
@@ -289,5 +325,10 @@ def _jsonl_export_metadata_json(
         message_count=message_count,
         contains_scrubbed_content=contains_scrubbed_content,
         generated_sha256=generated_sha256,
+        warnings=warnings,
+        residual_scan_performed=residual_scan_performed,
+        residual_findings_total=residual_findings_total,
+        residual_findings_by_type=residual_findings_by_type,
+        residual_findings_policy=residual_findings_policy,
     )
     return json.dumps(metadata, ensure_ascii=False, sort_keys=True)