Skip to content

Commit a105edc

Browse files
chernistryclaude
andauthored
feat: wire audit integrity check on startup + SOC 2 control mappings (#605)
- Wire verify_on_startup() into orchestrator.run() so the last 100 HMAC-chained audit entries are verified automatically on every start. - Add SOC 2 Trust Services Criteria control mappings (CC6.1, CC6.8, CC7.2, CC7.3, CC8.1) to evidence export with gap analysis. - Add Merkle root attestation summary to SOC 2 packages. - Add PDF-ready Markdown evidence summary with executive overview, artifacts inventory, control mapping table, and integrity status. Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
1 parent 1bcf5e9 commit a105edc

2 files changed

Lines changed: 282 additions & 3 deletions

File tree

src/bernstein/core/compliance.py

Lines changed: 263 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -508,6 +508,246 @@ def parse_period(period: str) -> tuple[str, str]:
508508
raise ValueError(msg)
509509

510510

511+
# ---------------------------------------------------------------------------
512+
# SOC 2 Trust Services Criteria control mappings
513+
# ---------------------------------------------------------------------------
514+
515+
# Maps SOC 2 control IDs to the artifact types that serve as evidence.
516+
_SOC2_CONTROL_MAP: dict[str, dict[str, str]] = {
517+
"CC6.1": {
518+
"title": "Logical and Physical Access Controls",
519+
"description": "The entity implements logical access security software, "
520+
"infrastructure, and architectures over protected information assets.",
521+
"evidence_types": "audit_logs,compliance_config",
522+
},
523+
"CC6.8": {
524+
"title": "Controls Against Threats and Vulnerabilities",
525+
"description": "The entity implements controls to prevent or detect and act upon "
526+
"the introduction of unauthorized or malicious software.",
527+
"evidence_types": "sbom,compliance_config",
528+
},
529+
"CC7.2": {
530+
"title": "System Monitoring",
531+
"description": "The entity monitors system components and the operation of those "
532+
"components for anomalies that are indicative of malicious acts.",
533+
"evidence_types": "audit_logs,wal",
534+
},
535+
"CC7.3": {
536+
"title": "Detection and Response",
537+
"description": "The entity evaluates anomalies and incidents to determine whether "
538+
"they constitute security incidents.",
539+
"evidence_types": "audit_logs,wal",
540+
},
541+
"CC8.1": {
542+
"title": "Change Management",
543+
"description": "The entity authorizes, designs, develops, configures, documents, "
544+
"tests, approves, and implements changes to infrastructure and software.",
545+
"evidence_types": "audit_logs,wal,sbom",
546+
},
547+
}
548+
549+
550+
def _build_soc2_control_mappings(
551+
artifacts: list[dict[str, Any]],
552+
verification: dict[str, Any],
553+
) -> list[dict[str, Any]]:
554+
"""Build SOC 2 control-to-evidence mappings.
555+
556+
For each SOC 2 control, lists which artifacts in the package satisfy it
557+
and whether the evidence is present and verified.
558+
559+
Args:
560+
artifacts: Collected artifact metadata from the export.
561+
verification: HMAC/Merkle verification results.
562+
563+
Returns:
564+
List of control mapping dicts with status and evidence references.
565+
"""
566+
artifact_types = {a["type"] for a in artifacts}
567+
hmac_valid = (
568+
verification.get("hmac_chain", {}).get("valid", False)
569+
if verification.get("hmac_chain")
570+
else False
571+
)
572+
573+
mappings: list[dict[str, Any]] = []
574+
for control_id, meta in _SOC2_CONTROL_MAP.items():
575+
required_types = set(meta["evidence_types"].split(","))
576+
present = required_types & artifact_types
577+
missing = required_types - artifact_types
578+
satisfied = len(missing) == 0
579+
580+
mappings.append({
581+
"control_id": control_id,
582+
"title": meta["title"],
583+
"description": meta["description"],
584+
"satisfied": satisfied,
585+
"evidence_present": sorted(present),
586+
"evidence_missing": sorted(missing),
587+
"integrity_verified": hmac_valid,
588+
})
589+
590+
return mappings
591+
592+
593+
def _build_merkle_attestation(merkle_dir: Path | None) -> dict[str, Any] | None:
594+
"""Build a Merkle root attestation summary from seal files.
595+
596+
Reads all seal JSON files and produces an attestation record with the
597+
latest root hash suitable for anchoring or external verification.
598+
599+
Args:
600+
merkle_dir: Directory containing Merkle seal JSON files, or None.
601+
602+
Returns:
603+
Attestation dict, or None if no seals exist.
604+
"""
605+
if merkle_dir is None or not merkle_dir.is_dir():
606+
return None
607+
608+
seals: list[dict[str, Any]] = []
609+
for seal_file in sorted(merkle_dir.glob("*.json")):
610+
try:
611+
seal_data = json.loads(seal_file.read_text())
612+
seals.append({
613+
"file": seal_file.name,
614+
"root_hash": seal_data.get("root_hash", ""),
615+
"sealed_at": seal_data.get("sealed_at_iso", ""),
616+
"leaf_count": seal_data.get("leaf_count", 0),
617+
"algorithm": seal_data.get("algorithm", "sha256"),
618+
})
619+
except (json.JSONDecodeError, OSError):
620+
continue
621+
622+
if not seals:
623+
return None
624+
625+
latest = seals[-1]
626+
return {
627+
"attestation_type": "merkle_root",
628+
"latest_root_hash": latest["root_hash"],
629+
"latest_sealed_at": latest["sealed_at"],
630+
"total_seals": len(seals),
631+
"seals": seals,
632+
"generated_at": time.strftime(_ISO_TIMESTAMP_FMT, time.gmtime()),
633+
}
634+
635+
636+
def _build_evidence_summary(
637+
period: str,
638+
start_date: str,
639+
end_date: str,
640+
artifacts: list[dict[str, Any]],
641+
verification: dict[str, Any],
642+
control_mappings: list[dict[str, Any]],
643+
merkle_attestation: dict[str, Any] | None,
644+
) -> str:
645+
"""Generate a PDF-ready Markdown evidence summary for auditors.
646+
647+
Args:
648+
period: Original period string.
649+
start_date: ISO start date.
650+
end_date: ISO end date.
651+
artifacts: Collected artifact metadata.
652+
verification: HMAC/Merkle verification results.
653+
control_mappings: SOC 2 control mapping results.
654+
merkle_attestation: Merkle attestation data or None.
655+
656+
Returns:
657+
Markdown string suitable for conversion to PDF.
658+
"""
659+
lines: list[str] = []
660+
generated = time.strftime(_ISO_TIMESTAMP_FMT, time.gmtime())
661+
662+
lines.append(f"# SOC 2 Evidence Package — {period}")
663+
lines.append("")
664+
lines.append(f"**Period**: {start_date} to {end_date}")
665+
lines.append(f"**Generated**: {generated}")
666+
lines.append("**Package type**: SOC 2 Type II Evidence Bundle")
667+
lines.append("")
668+
669+
# Executive summary
670+
lines.append("## Executive Summary")
671+
lines.append("")
672+
total_controls = len(control_mappings)
673+
satisfied = sum(1 for m in control_mappings if m["satisfied"])
674+
hmac_chain = verification.get("hmac_chain")
675+
hmac_status = "PASSED" if hmac_chain and hmac_chain.get("valid") else "NOT VERIFIED"
676+
lines.append(
677+
f"This package contains compliance evidence for the period "
678+
f"{start_date} to {end_date}. "
679+
f"**{satisfied}/{total_controls}** SOC 2 controls have supporting evidence. "
680+
f"HMAC chain integrity: **{hmac_status}**."
681+
)
682+
lines.append("")
683+
684+
# Artifacts inventory
685+
lines.append("## Evidence Artifacts")
686+
lines.append("")
687+
lines.append("| Artifact | Description | Files |")
688+
lines.append("|----------|-------------|-------|")
689+
for a in artifacts:
690+
lines.append(f"| {a['type']} | {a['description']} | {a.get('file_count', 'N/A')} |")
691+
if not artifacts:
692+
lines.append("| *(none collected)* | | |")
693+
lines.append("")
694+
695+
# Control mappings
696+
lines.append("## SOC 2 Control Mappings")
697+
lines.append("")
698+
lines.append("| Control | Title | Status | Evidence Present | Missing |")
699+
lines.append("|---------|-------|--------|------------------|---------|")
700+
for m in control_mappings:
701+
status = "Satisfied" if m["satisfied"] else "Gap"
702+
present = ", ".join(m["evidence_present"]) or "—"
703+
missing = ", ".join(m["evidence_missing"]) or "—"
704+
lines.append(f"| {m['control_id']} | {m['title']} | {status} | {present} | {missing} |")
705+
lines.append("")
706+
707+
# Integrity verification
708+
lines.append("## Integrity Verification")
709+
lines.append("")
710+
hmac_info = verification.get("hmac_chain")
711+
if hmac_info:
712+
valid_str = "PASSED" if hmac_info.get("valid") else "FAILED"
713+
lines.append(f"**HMAC Chain**: {valid_str}")
714+
if hmac_info.get("errors"):
715+
lines.append("")
716+
for err in hmac_info["errors"]:
717+
lines.append(f"- {err}")
718+
verified_at = hmac_info.get("verified_at", "")
719+
if verified_at:
720+
lines.append("")
721+
lines.append(f"Verified at: {verified_at}")
722+
else:
723+
lines.append("HMAC chain verification: *not performed*")
724+
lines.append("")
725+
726+
# Merkle attestation
727+
lines.append("## Merkle Root Attestation")
728+
lines.append("")
729+
if merkle_attestation:
730+
lines.append(f"**Latest root hash**: `{merkle_attestation['latest_root_hash']}`")
731+
lines.append(f"**Sealed at**: {merkle_attestation['latest_sealed_at']}")
732+
lines.append(f"**Total seals**: {merkle_attestation['total_seals']}")
733+
lines.append("")
734+
lines.append("| Seal File | Root Hash | Sealed At |")
735+
lines.append("|-----------|-----------|-----------|")
736+
for s in merkle_attestation["seals"]:
737+
root_short = s["root_hash"][:16] + "..." if len(s["root_hash"]) > 16 else s["root_hash"]
738+
lines.append(f"| {s['file']} | `{root_short}` | {s['sealed_at']} |")
739+
else:
740+
lines.append("*No Merkle seals found for this period.*")
741+
lines.append("")
742+
743+
# Footer
744+
lines.append("---")
745+
lines.append(f"*Generated by Bernstein Compliance Engine at {generated}*")
746+
lines.append("")
747+
748+
return "\n".join(lines)
749+
750+
511751
def export_soc2_package(
512752
sdd_dir: Path,
513753
period: str,
@@ -657,17 +897,35 @@ def export_soc2_package(
657897
}
658898
)
659899

660-
# --- 7. Write verification results -------------------------------------
900+
# --- 7. SOC 2 control mappings ------------------------------------------
901+
control_mappings = _build_soc2_control_mappings(artifacts_collected, verification)
902+
(bundle_dir / "control_mappings.json").write_text(json.dumps(control_mappings, indent=2))
903+
904+
# --- 8. Merkle root attestation ----------------------------------------
905+
merkle_attestation = _build_merkle_attestation(merkle_dir if merkle_dir.is_dir() else None)
906+
if merkle_attestation:
907+
(bundle_dir / "merkle_attestation.json").write_text(
908+
json.dumps(merkle_attestation, indent=2)
909+
)
910+
911+
# --- 9. Evidence summary (PDF-ready Markdown) --------------------------
912+
evidence_summary = _build_evidence_summary(
913+
period, start_date, end_date, artifacts_collected, verification,
914+
control_mappings, merkle_attestation,
915+
)
916+
(bundle_dir / "evidence_summary.md").write_text(evidence_summary)
917+
918+
# --- 10. Write verification results ------------------------------------
661919
(bundle_dir / "verification.json").write_text(json.dumps(verification, indent=2))
662920

663-
# --- 8. Compute package checksum ---------------------------------------
921+
# --- 11. Compute package checksum --------------------------------------
664922
file_checksums: dict[str, str] = {}
665923
for path in sorted(bundle_dir.rglob("*")):
666924
if path.is_file() and path.name != _MANIFEST_JSON:
667925
digest = hashlib.sha256(path.read_bytes()).hexdigest()
668926
file_checksums[str(path.relative_to(bundle_dir))] = digest
669927

670-
# --- 9. Write manifest -------------------------------------------------
928+
# --- 12. Write manifest ------------------------------------------------
671929
manifest = {
672930
"package_type": "soc2-evidence",
673931
"period": period,
@@ -676,6 +934,8 @@ def export_soc2_package(
676934
"exported_at": time.strftime(_ISO_TIMESTAMP_FMT, time.gmtime()),
677935
"artifacts": artifacts_collected,
678936
"verification": verification,
937+
"control_mappings": control_mappings,
938+
"merkle_attestation": merkle_attestation,
679939
"file_checksums": file_checksums,
680940
}
681941
(bundle_dir / _MANIFEST_JSON).write_text(json.dumps(manifest, indent=2))

src/bernstein/core/orchestrator.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1463,6 +1463,25 @@ def run(self) -> None:
14631463
self._recover_from_wal()
14641464
except Exception:
14651465
logger.exception("WAL recovery failed (non-fatal) — continuing startup")
1466+
# Audit log integrity check: verify the last N HMAC-chained entries.
1467+
try:
1468+
from bernstein.core.audit_integrity import verify_on_startup
1469+
1470+
_integrity = verify_on_startup(self._workdir / ".sdd")
1471+
if not _integrity.valid:
1472+
logger.warning(
1473+
"Audit integrity check found %d error(s) — review with "
1474+
"'bernstein audit verify'",
1475+
len(_integrity.errors),
1476+
)
1477+
elif _integrity.entries_checked > 0:
1478+
logger.info(
1479+
"Audit integrity OK (%d entries verified in %.1fms)",
1480+
_integrity.entries_checked,
1481+
_integrity.duration_ms,
1482+
)
1483+
except Exception:
1484+
logger.exception("Audit integrity check failed (non-fatal) — continuing startup")
14661485
# Zombie cleanup: terminate orphaned agent processes from prior crashed runs.
14671486
try:
14681487
from bernstein.core.zombie_cleanup import scan_and_cleanup_zombies

0 commit comments

Comments
 (0)