diff --git a/packages/agent-mesh/src/agentmesh/trust/cards.py b/packages/agent-mesh/src/agentmesh/trust/cards.py index 0bacce8f..fadb4332 100644 --- a/packages/agent-mesh/src/agentmesh/trust/cards.py +++ b/packages/agent-mesh/src/agentmesh/trust/cards.py @@ -186,18 +186,28 @@ class CardRegistry: Registry for trusted agent cards. Provides discovery and caching of verified cards. + Optionally integrates with a ``RevocationList`` to reject cards + whose agent DID has been revoked. """ - def __init__(self, cache_ttl_seconds: int = 900): + def __init__( + self, + cache_ttl_seconds: int = 900, + revocation_list: Optional["RevocationList"] = None, + ): """Initialise the card registry. Args: cache_ttl_seconds: Time-to-live in seconds for the verification cache. Defaults to 900 (15 minutes). + revocation_list: Optional revocation list to check during + verification. When set, revoked agent DIDs fail + ``is_verified()`` even if their signatures are valid. """ self._cards: Dict[str, TrustedAgentCard] = {} self._verified_cache: Dict[str, tuple[bool, datetime]] = {} self._cache_ttl = timedelta(seconds=cache_ttl_seconds) + self._revocation_list = revocation_list def register(self, card: TrustedAgentCard) -> bool: """ @@ -229,12 +239,29 @@ def get(self, agent_did: str) -> Optional[TrustedAgentCard]: """ return self._cards.get(agent_did) + @property + def revocation_list(self) -> Optional["RevocationList"]: + """The attached revocation list, if any.""" + return self._revocation_list + + @revocation_list.setter + def revocation_list(self, value: Optional["RevocationList"]) -> None: + self._revocation_list = value + self.clear_cache() + def is_verified(self, agent_did: str) -> bool: """ Check if a card is verified (with caching). Uses TTL-based caching to avoid repeated verification. + Returns False if the agent DID is on the revocation list, + even if the cryptographic signature is valid. """ + # Revocation check always runs (not cached — revocations are instant) + if self._revocation_list and self._revocation_list.is_revoked(agent_did): + self._verified_cache.pop(agent_did, None) + return False + if agent_did in self._verified_cache: verified, timestamp = self._verified_cache[agent_did] if datetime.now(timezone.utc) - timestamp < self._cache_ttl: diff --git a/packages/agent-mesh/tests/test_revocation_rotation.py b/packages/agent-mesh/tests/test_revocation_rotation.py new file mode 100644 index 00000000..fa3f2f63 --- /dev/null +++ b/packages/agent-mesh/tests/test_revocation_rotation.py @@ -0,0 +1,148 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. +"""Tests for revocation integration with CardRegistry.""" + +import pytest +from datetime import datetime, timezone + +from agentmesh.identity.agent_id import AgentIdentity +from agentmesh.identity.revocation import RevocationList, RevocationEntry +from agentmesh.trust.cards import TrustedAgentCard, CardRegistry + + +@pytest.fixture +def identity(): + return AgentIdentity.create("test-agent", sponsor="test@example.com") + + +@pytest.fixture +def signed_card(identity): + card = TrustedAgentCard.from_identity(identity) + return card + + +class TestRevocationList: + def test_revoke_and_check(self): + rl = RevocationList() + rl.revoke("did:agent:bad", reason="compromised") + assert rl.is_revoked("did:agent:bad") is True + assert rl.is_revoked("did:agent:good") is False + + def test_unrevoke(self): + rl = RevocationList() + rl.revoke("did:agent:temp", reason="testing") + assert rl.unrevoke("did:agent:temp") is True + assert rl.is_revoked("did:agent:temp") is False + + def test_temporary_revocation_expires(self): + rl = RevocationList() + rl.revoke("did:agent:temp", reason="timeout", ttl_seconds=0) + # TTL=0 means it expires immediately + assert rl.is_revoked("did:agent:temp") is False + + def test_list_revoked(self): + rl = RevocationList() + rl.revoke("did:agent:a", reason="r1") + rl.revoke("did:agent:b", reason="r2") + assert len(rl.list_revoked()) == 2 + + def test_cleanup_expired(self): + rl = RevocationList() + rl.revoke("did:agent:a", reason="r", ttl_seconds=0) + removed = rl.cleanup_expired() + assert removed == 1 + assert len(rl) == 0 + + +class TestCardRegistryRevocation: + def test_registry_without_revocation(self, signed_card): + registry = CardRegistry() + assert registry.register(signed_card) is True + assert registry.is_verified(signed_card.agent_did) is True + + def test_registry_blocks_revoked_agent(self, signed_card): + rl = RevocationList() + rl.revoke(signed_card.agent_did, reason="compromised key") + registry = CardRegistry(revocation_list=rl) + registry.register(signed_card) + assert registry.is_verified(signed_card.agent_did) is False + + def test_registry_allows_after_unrevoke(self, signed_card): + rl = RevocationList() + rl.revoke(signed_card.agent_did, reason="temporary") + registry = CardRegistry(revocation_list=rl) + registry.register(signed_card) + assert registry.is_verified(signed_card.agent_did) is False + rl.unrevoke(signed_card.agent_did) + assert registry.is_verified(signed_card.agent_did) is True + + def test_setting_revocation_list_clears_cache(self, signed_card): + registry = CardRegistry() + registry.register(signed_card) + assert registry.is_verified(signed_card.agent_did) is True + rl = RevocationList() + rl.revoke(signed_card.agent_did, reason="late revoke") + registry.revocation_list = rl + assert registry.is_verified(signed_card.agent_did) is False + + def test_revocation_list_property(self): + rl = RevocationList() + registry = CardRegistry(revocation_list=rl) + assert registry.revocation_list is rl + + +class TestKeyRotationManager: + def test_rotate_preserves_did(self): + from agentmesh.identity.rotation import KeyRotationManager + + identity = AgentIdentity.create("rotate-agent", sponsor="test@example.com") + original_did = str(identity.did) + mgr = KeyRotationManager(identity, rotation_ttl_seconds=0) + mgr.rotate() + assert str(identity.did) == original_did + + def test_rotate_changes_public_key(self): + from agentmesh.identity.rotation import KeyRotationManager + + identity = AgentIdentity.create("rotate-agent", sponsor="test@example.com") + original_key = identity.public_key + mgr = KeyRotationManager(identity, rotation_ttl_seconds=0) + mgr.rotate() + assert identity.public_key != original_key + + def test_key_history_tracked(self): + from agentmesh.identity.rotation import KeyRotationManager + + identity = AgentIdentity.create("rotate-agent", sponsor="test@example.com") + mgr = KeyRotationManager(identity, rotation_ttl_seconds=0) + mgr.rotate() + mgr.rotate() + history = mgr.get_key_history() + assert len(history) == 2 + + def test_rotation_proof_valid(self): + from agentmesh.identity.rotation import KeyRotationManager + + identity = AgentIdentity.create("rotate-agent", sponsor="test@example.com") + old_key = identity.public_key + mgr = KeyRotationManager(identity, rotation_ttl_seconds=0) + mgr.rotate() + new_key = identity.public_key + proof = mgr.get_rotation_proof() + assert KeyRotationManager.verify_rotation(old_key, new_key, proof) is True + + def test_needs_rotation_respects_ttl(self): + from agentmesh.identity.rotation import KeyRotationManager + + identity = AgentIdentity.create("rotate-agent", sponsor="test@example.com") + mgr = KeyRotationManager(identity, rotation_ttl_seconds=86400) + assert mgr.needs_rotation() is False + + def test_max_history_trimmed(self): + from agentmesh.identity.rotation import KeyRotationManager + + identity = AgentIdentity.create("rotate-agent", sponsor="test@example.com") + mgr = KeyRotationManager(identity, rotation_ttl_seconds=0, max_history=2) + for _ in range(5): + mgr.rotate() + assert len(mgr.get_key_history()) == 2 diff --git a/packages/agent-os/src/agent_os/integrations/__init__.py b/packages/agent-os/src/agent_os/integrations/__init__.py index 501ff418..7e226665 100644 --- a/packages/agent-os/src/agent_os/integrations/__init__.py +++ b/packages/agent-os/src/agent_os/integrations/__init__.py @@ -96,6 +96,18 @@ ) from .config import AgentOSConfig, get_config, reset_config from .dry_run import DryRunCollector, DryRunDecision, DryRunPolicy, DryRunResult +from .escalation import ( + ApprovalBackend, + DefaultTimeoutAction, + EscalationDecision, + EscalationHandler, + EscalationPolicy, + EscalationRequest, + EscalationResult, + InMemoryApprovalQueue, + WebhookApprovalBackend, +) +from .compat import CompatReport, check_compatibility, doctor, warn_on_import from .health import ComponentHealth, HealthChecker, HealthReport, HealthStatus from .logging import GovernanceLogger, JSONFormatter, get_logger from .policy_compose import PolicyHierarchy, compose_policies, override_policy @@ -167,6 +179,21 @@ "DryRunResult", "DryRunDecision", "DryRunCollector", + # Escalation (Human-in-the-Loop) + "EscalationPolicy", + "EscalationHandler", + "EscalationRequest", + "EscalationResult", + "EscalationDecision", + "DefaultTimeoutAction", + "ApprovalBackend", + "InMemoryApprovalQueue", + "WebhookApprovalBackend", + # Version Compatibility + "doctor", + "check_compatibility", + "CompatReport", + "warn_on_import", # Rate Limiting "RateLimiter", "RateLimitStatus", diff --git a/packages/agent-os/src/agent_os/integrations/compat.py b/packages/agent-os/src/agent_os/integrations/compat.py new file mode 100644 index 00000000..88317b2b --- /dev/null +++ b/packages/agent-os/src/agent_os/integrations/compat.py @@ -0,0 +1,229 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. +""" +Inter-package version compatibility validation. + +Detects version skew between governance toolkit packages and warns when +running untested combinations. Ships a machine-readable compatibility +matrix and a ``doctor()`` function that validates the current environment. + +Usage: + from agent_os.integrations.compat import doctor, check_compatibility + + report = doctor() + print(report) + + # Or check specific pair: + ok, msg = check_compatibility("agent-os-kernel", "2.0.2", "agentmesh-platform", "2.0.2") +""" + +from __future__ import annotations + +import importlib.metadata +import logging +from dataclasses import dataclass, field +from typing import Optional + +logger = logging.getLogger(__name__) + +# Machine-readable compatibility matrix. +# Each entry maps (package_a, package_b) -> list of tested version-pair ranges. +# Format: (min_a, max_a, min_b, max_b) — all inclusive. +COMPATIBILITY_MATRIX: dict[tuple[str, str], list[tuple[str, str, str, str]]] = { + ("agent-os-kernel", "agentmesh-platform"): [ + ("2.0.0", "2.99.99", "2.0.0", "2.99.99"), + ("1.2.0", "1.99.99", "1.0.0", "1.99.99"), + ], + ("agent-os-kernel", "agent-sre"): [ + ("2.0.0", "2.99.99", "1.0.0", "1.99.99"), + ], + ("agent-os-kernel", "ai-agent-compliance"): [ + ("2.0.0", "2.99.99", "1.0.0", "1.99.99"), + ("1.0.0", "1.99.99", "1.0.0", "1.99.99"), + ], + ("agentmesh-platform", "ai-agent-compliance"): [ + ("2.0.0", "2.99.99", "1.0.0", "1.99.99"), + ("1.0.0", "1.99.99", "1.0.0", "1.99.99"), + ], + ("agentmesh-platform", "agent-sre"): [ + ("2.0.0", "2.99.99", "1.0.0", "1.99.99"), + ], +} + +KNOWN_PACKAGES = [ + "agent-os-kernel", + "agentmesh-platform", + "agent-sre", + "ai-agent-compliance", + "agent-hypervisor", +] + + +def _parse_version(v: str) -> tuple[int, ...]: + """Parse a version string into a comparable tuple.""" + parts = [] + for p in v.split("."): + try: + parts.append(int(p)) + except ValueError: + parts.append(0) + return tuple(parts) + + +def _version_in_range(version: str, min_v: str, max_v: str) -> bool: + """Check if version is within [min_v, max_v] inclusive.""" + v = _parse_version(version) + return _parse_version(min_v) <= v <= _parse_version(max_v) + + +def _get_installed_version(package_name: str) -> Optional[str]: + """Get the installed version of a package, or None if not installed.""" + try: + return importlib.metadata.version(package_name) + except importlib.metadata.PackageNotFoundError: + return None + + +def check_compatibility( + pkg_a: str, + ver_a: str, + pkg_b: str, + ver_b: str, +) -> tuple[bool, str]: + """Check if two package versions are in a tested-compatible range. + + Args: + pkg_a: Name of the first package. + ver_a: Version of the first package. + pkg_b: Name of the second package. + ver_b: Version of the second package. + + Returns: + Tuple of (compatible, message). ``compatible`` is True if the + versions fall within a known-good range, False if they are known + to be outside tested ranges, or True with a warning if no + compatibility data exists for the pair. + """ + key = (pkg_a, pkg_b) + alt_key = (pkg_b, pkg_a) + + ranges = COMPATIBILITY_MATRIX.get(key) + if ranges is None: + ranges = COMPATIBILITY_MATRIX.get(alt_key) + if ranges is not None: + # Swap versions to match the key order + ver_a, ver_b = ver_b, ver_a + + if ranges is None: + return True, f"No compatibility data for {pkg_a} + {pkg_b} (assumed OK)" + + for min_a, max_a, min_b, max_b in ranges: + if _version_in_range(ver_a, min_a, max_a) and _version_in_range( + ver_b, min_b, max_b + ): + return True, f"{pkg_a}=={ver_a} + {pkg_b}=={ver_b}: compatible" + + return ( + False, + f"WARNING: {pkg_a}=={ver_a} + {pkg_b}=={ver_b} is outside tested ranges", + ) + + +@dataclass +class CompatReport: + """Report from the compatibility doctor.""" + + installed: dict[str, str] = field(default_factory=dict) + not_installed: list[str] = field(default_factory=list) + compatible_pairs: list[str] = field(default_factory=list) + incompatible_pairs: list[str] = field(default_factory=list) + warnings: list[str] = field(default_factory=list) + + @property + def ok(self) -> bool: + """True if no incompatible pairs were found.""" + return len(self.incompatible_pairs) == 0 + + def __str__(self) -> str: + lines = ["=== Governance Toolkit Compatibility Report ===", ""] + + lines.append("Installed packages:") + if self.installed: + for pkg, ver in sorted(self.installed.items()): + lines.append(f" ✓ {pkg}=={ver}") + if self.not_installed: + for pkg in self.not_installed: + lines.append(f" ✗ {pkg} (not installed)") + lines.append("") + + if self.compatible_pairs: + lines.append("Compatible pairs:") + for msg in self.compatible_pairs: + lines.append(f" ✓ {msg}") + lines.append("") + + if self.incompatible_pairs: + lines.append("INCOMPATIBLE pairs:") + for msg in self.incompatible_pairs: + lines.append(f" ✗ {msg}") + lines.append("") + + if self.warnings: + lines.append("Warnings:") + for msg in self.warnings: + lines.append(f" ⚠ {msg}") + lines.append("") + + status = "OK" if self.ok else "ISSUES FOUND" + lines.append(f"Status: {status}") + return "\n".join(lines) + + +def doctor() -> CompatReport: + """Run a full compatibility check on all installed governance packages. + + Discovers installed packages, checks every pair against the + compatibility matrix, and returns a structured report. + + Returns: + A ``CompatReport`` with installed versions and pair-wise results. + """ + report = CompatReport() + + for pkg in KNOWN_PACKAGES: + ver = _get_installed_version(pkg) + if ver: + report.installed[pkg] = ver + else: + report.not_installed.append(pkg) + + installed_list = list(report.installed.items()) + for i, (pkg_a, ver_a) in enumerate(installed_list): + for pkg_b, ver_b in installed_list[i + 1 :]: + ok, msg = check_compatibility(pkg_a, ver_a, pkg_b, ver_b) + if ok: + if "assumed" in msg: + report.warnings.append(msg) + else: + report.compatible_pairs.append(msg) + else: + report.incompatible_pairs.append(msg) + + if report.incompatible_pairs: + logger.warning( + "Version skew detected: %d incompatible package pairs", + len(report.incompatible_pairs), + ) + + return report + + +def warn_on_import() -> None: + """Run at import time to log version skew warnings. + + Call this from a package's ``__init__.py`` to emit a warning + if incompatible peer packages are detected. + """ + report = doctor() + for msg in report.incompatible_pairs: + logger.warning(msg) diff --git a/packages/agent-os/src/agent_os/integrations/escalation.py b/packages/agent-os/src/agent_os/integrations/escalation.py new file mode 100644 index 00000000..30533c3d --- /dev/null +++ b/packages/agent-os/src/agent_os/integrations/escalation.py @@ -0,0 +1,477 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. +""" +Human-in-the-Loop Escalation for Governance Policies. + +Adds an ``ESCALATE`` decision tier between ALLOW and DENY. When a policy +requires human approval, the agent is **suspended** and an approval request +is routed to a configurable backend (in-memory queue, webhook, or custom +handler). A timeout with configurable default action ensures the system +never blocks indefinitely. + +Usage: + from agent_os.integrations.escalation import ( + EscalationHandler, + EscalationPolicy, + EscalationRequest, + EscalationDecision, + InMemoryApprovalQueue, + ) + + queue = InMemoryApprovalQueue() + handler = EscalationHandler(backend=queue, timeout_seconds=300) + policy = EscalationPolicy(integration, handler=handler) + + result = policy.evaluate("tool_call", context, input_data) + if result.decision == EscalationDecision.PENDING: + # Agent is suspended — await human decision + queue.approve(result.request_id, approver="admin@corp.com") + final = policy.resolve(result.request_id) +""" + +from __future__ import annotations + +import abc +import logging +import threading +import time +import uuid +from dataclasses import dataclass, field +from datetime import datetime, timezone +from enum import Enum +from typing import Any, Callable, Optional + +from .base import BaseIntegration, ExecutionContext, GovernanceEventType + +logger = logging.getLogger(__name__) + + +class EscalationDecision(Enum): + """Possible outcomes of an escalation evaluation.""" + + ALLOW = "ALLOW" + DENY = "DENY" + ESCALATE = "ESCALATE" + PENDING = "PENDING" + TIMEOUT = "TIMEOUT" + + +class DefaultTimeoutAction(Enum): + """Action to take when a human doesn't respond within the SLA.""" + + DENY = "deny" + ALLOW = "allow" + + +@dataclass +class EscalationRequest: + """A request for human approval of an agent action. + + Attributes: + request_id: Unique identifier for this escalation. + agent_id: ID of the agent whose action needs approval. + action: Description of the action being escalated. + reason: Why escalation was triggered. + context_snapshot: Serialisable snapshot of the execution context. + created_at: When the escalation was created. + resolved_at: When a human responded (or timeout). + decision: Final decision from the human (or timeout default). + resolved_by: Identifier of the human who resolved. + """ + + request_id: str = field(default_factory=lambda: str(uuid.uuid4())) + agent_id: str = "" + action: str = "" + reason: str = "" + context_snapshot: dict[str, Any] = field(default_factory=dict) + created_at: datetime = field(default_factory=lambda: datetime.now(timezone.utc)) + resolved_at: Optional[datetime] = None + decision: EscalationDecision = EscalationDecision.PENDING + resolved_by: Optional[str] = None + + +class ApprovalBackend(abc.ABC): + """Abstract interface for escalation approval backends.""" + + @abc.abstractmethod + def submit(self, request: EscalationRequest) -> None: + """Submit an escalation request for human review.""" + + @abc.abstractmethod + def get_decision(self, request_id: str) -> EscalationRequest | None: + """Retrieve the current state of an escalation request.""" + + @abc.abstractmethod + def approve(self, request_id: str, approver: str = "") -> bool: + """Approve an escalation request. Returns True if found and updated.""" + + @abc.abstractmethod + def deny(self, request_id: str, approver: str = "") -> bool: + """Deny an escalation request. Returns True if found and updated.""" + + @abc.abstractmethod + def list_pending(self) -> list[EscalationRequest]: + """List all pending escalation requests.""" + + +class InMemoryApprovalQueue(ApprovalBackend): + """Thread-safe in-memory approval queue. + + Suitable for testing, single-process deployments, and development. + For production, implement a backend that uses Redis, a database, + or a webhook-based notification service. + """ + + def __init__(self) -> None: + self._requests: dict[str, EscalationRequest] = {} + self._lock = threading.Lock() + self._events: dict[str, threading.Event] = {} + + def submit(self, request: EscalationRequest) -> None: + with self._lock: + self._requests[request.request_id] = request + self._events[request.request_id] = threading.Event() + + def get_decision(self, request_id: str) -> EscalationRequest | None: + with self._lock: + return self._requests.get(request_id) + + def approve(self, request_id: str, approver: str = "") -> bool: + with self._lock: + req = self._requests.get(request_id) + if req is None or req.decision != EscalationDecision.PENDING: + return False + req.decision = EscalationDecision.ALLOW + req.resolved_by = approver + req.resolved_at = datetime.now(timezone.utc) + event = self._events.get(request_id) + if event: + event.set() + return True + + def deny(self, request_id: str, approver: str = "") -> bool: + with self._lock: + req = self._requests.get(request_id) + if req is None or req.decision != EscalationDecision.PENDING: + return False + req.decision = EscalationDecision.DENY + req.resolved_by = approver + req.resolved_at = datetime.now(timezone.utc) + event = self._events.get(request_id) + if event: + event.set() + return True + + def list_pending(self) -> list[EscalationRequest]: + with self._lock: + return [ + r + for r in self._requests.values() + if r.decision == EscalationDecision.PENDING + ] + + def wait_for_decision( + self, request_id: str, timeout: float | None = None + ) -> EscalationDecision: + """Block until a decision is made or timeout expires. + + Returns: + The final decision, or ``PENDING`` if timeout was reached. + """ + event = self._events.get(request_id) + if event is None: + return EscalationDecision.PENDING + event.wait(timeout=timeout) + req = self._requests.get(request_id) + return req.decision if req else EscalationDecision.PENDING + + +class WebhookApprovalBackend(ApprovalBackend): + """Approval backend that sends webhook notifications for escalations. + + Stores state in-memory but fires an HTTP POST to the configured URL + when a new escalation is submitted. The receiving system is responsible + for calling back via the ``approve``/``deny`` methods (e.g., via an + API endpoint). + + Args: + webhook_url: URL to POST escalation notifications to. + headers: Optional HTTP headers (e.g., auth tokens). + """ + + def __init__( + self, + webhook_url: str, + headers: dict[str, str] | None = None, + ) -> None: + self._inner = InMemoryApprovalQueue() + self._webhook_url = webhook_url + self._headers = headers or {} + + def submit(self, request: EscalationRequest) -> None: + self._inner.submit(request) + self._notify(request) + + def _notify(self, request: EscalationRequest) -> None: + """Fire-and-forget webhook notification.""" + try: + import urllib.request + import json + + payload = json.dumps( + { + "request_id": request.request_id, + "agent_id": request.agent_id, + "action": request.action, + "reason": request.reason, + "created_at": request.created_at.isoformat(), + }, + default=str, + ).encode() + req = urllib.request.Request( + self._webhook_url, + data=payload, + headers={**self._headers, "Content-Type": "application/json"}, + method="POST", + ) + urllib.request.urlopen(req, timeout=10) # noqa: S310 + logger.info("Escalation webhook sent for %s", request.request_id) + except Exception: + logger.warning( + "Failed to send escalation webhook for %s", + request.request_id, + exc_info=True, + ) + + def get_decision(self, request_id: str) -> EscalationRequest | None: + return self._inner.get_decision(request_id) + + def approve(self, request_id: str, approver: str = "") -> bool: + return self._inner.approve(request_id, approver) + + def deny(self, request_id: str, approver: str = "") -> bool: + return self._inner.deny(request_id, approver) + + def list_pending(self) -> list[EscalationRequest]: + return self._inner.list_pending() + + +class EscalationHandler: + """Manages escalation lifecycle: submit, wait, resolve. + + Args: + backend: The approval backend to use. + timeout_seconds: How long to wait for a human decision. + default_action: What to do if the timeout expires. + on_escalate: Optional callback fired when an escalation is created. + """ + + def __init__( + self, + backend: ApprovalBackend | None = None, + timeout_seconds: float = 300, + default_action: DefaultTimeoutAction = DefaultTimeoutAction.DENY, + on_escalate: Callable[[EscalationRequest], None] | None = None, + ) -> None: + self.backend = backend or InMemoryApprovalQueue() + self.timeout_seconds = timeout_seconds + self.default_action = default_action + self._on_escalate = on_escalate + + def escalate( + self, + agent_id: str, + action: str, + reason: str, + context_snapshot: dict[str, Any] | None = None, + ) -> EscalationRequest: + """Create and submit an escalation request. + + Returns: + The ``EscalationRequest`` in PENDING state. + """ + request = EscalationRequest( + agent_id=agent_id, + action=action, + reason=reason, + context_snapshot=context_snapshot or {}, + ) + self.backend.submit(request) + logger.info( + "Escalation %s created for agent %s: %s", + request.request_id, + agent_id, + reason, + ) + if self._on_escalate: + self._on_escalate(request) + return request + + def resolve(self, request_id: str) -> EscalationDecision: + """Check or wait for a resolution. + + For ``InMemoryApprovalQueue``, this blocks up to ``timeout_seconds``. + For other backends, this polls once and returns the current state. + + Returns: + The final decision. If the timeout expires, applies the + ``default_action`` and returns that. + """ + if isinstance(self.backend, InMemoryApprovalQueue): + decision = self.backend.wait_for_decision( + request_id, timeout=self.timeout_seconds + ) + else: + req = self.backend.get_decision(request_id) + decision = req.decision if req else EscalationDecision.PENDING + + if decision == EscalationDecision.PENDING: + # Timeout — apply default + decision = ( + EscalationDecision.ALLOW + if self.default_action == DefaultTimeoutAction.ALLOW + else EscalationDecision.DENY + ) + logger.warning( + "Escalation %s timed out after %.0fs, defaulting to %s", + request_id, + self.timeout_seconds, + decision.value, + ) + return decision + + +@dataclass +class EscalationResult: + """Result of an escalation policy evaluation.""" + + action: str + decision: EscalationDecision + reason: Optional[str] + request: Optional[EscalationRequest] = None + policy_name: str = "" + timestamp: datetime = field(default_factory=lambda: datetime.now(timezone.utc)) + + +class EscalationPolicy: + """Wraps a BaseIntegration with human-in-the-loop escalation. + + When ``require_human_approval`` is True in the policy, instead of + immediately denying the action, this wrapper **suspends** execution + and routes an approval request to the configured handler. + + This is the ``ESCALATE`` tier between ALLOW and DENY. + + Args: + integration: The governance integration to wrap. + handler: The escalation handler managing approvals. + policy_name: Name for audit logging. + """ + + def __init__( + self, + integration: BaseIntegration, + handler: EscalationHandler | None = None, + *, + policy_name: str = "default", + ) -> None: + self._integration = integration + self._handler = handler or EscalationHandler() + self._policy_name = policy_name + + @property + def handler(self) -> EscalationHandler: + return self._handler + + def evaluate( + self, + action: str, + context: ExecutionContext, + input_data: Any = None, + ) -> EscalationResult: + """Evaluate a policy check with escalation support. + + If the policy would deny due to ``require_human_approval``, + this creates an escalation request instead of blocking. + + For all other deny reasons (blocked patterns, timeouts, etc.), + the action is denied immediately — escalation only applies + to the human-approval gate. + + Returns: + An ``EscalationResult`` with the decision and optional + escalation request. + """ + allowed, reason = self._integration.pre_execute(context, input_data) + + if allowed: + return EscalationResult( + action=action, + decision=EscalationDecision.ALLOW, + reason=None, + policy_name=self._policy_name, + ) + + # Check if this denial was due to human approval requirement + if self._integration.policy.require_human_approval and reason and ( + "human approval" in reason.lower() + ): + request = self._handler.escalate( + agent_id=context.agent_id, + action=action, + reason=reason, + context_snapshot={ + "session_id": context.session_id, + "call_count": context.call_count, + "total_tokens": context.total_tokens, + "input_summary": str(input_data)[:500] if input_data else "", + }, + ) + self._integration.emit( + GovernanceEventType.POLICY_CHECK, + { + "agent_id": context.agent_id, + "action": action, + "escalation_id": request.request_id, + "phase": "escalated", + }, + ) + return EscalationResult( + action=action, + decision=EscalationDecision.PENDING, + reason=reason, + request=request, + policy_name=self._policy_name, + ) + + # Hard deny (not an escalation scenario) + return EscalationResult( + action=action, + decision=EscalationDecision.DENY, + reason=reason, + policy_name=self._policy_name, + ) + + def resolve(self, request_id: str) -> EscalationDecision: + """Wait for and return the human decision on an escalation. + + Delegates to the handler's resolve method, which blocks or + polls depending on the backend. + """ + return self._handler.resolve(request_id) + + def evaluate_and_wait( + self, + action: str, + context: ExecutionContext, + input_data: Any = None, + ) -> EscalationResult: + """Evaluate and, if escalated, block until resolved. + + Convenience method that combines ``evaluate()`` and ``resolve()``. + """ + result = self.evaluate(action, context, input_data) + if result.decision == EscalationDecision.PENDING and result.request: + final = self.resolve(result.request.request_id) + result.decision = final + return result diff --git a/packages/agent-os/tests/test_compat.py b/packages/agent-os/tests/test_compat.py new file mode 100644 index 00000000..0e41625c --- /dev/null +++ b/packages/agent-os/tests/test_compat.py @@ -0,0 +1,102 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. +"""Tests for version compatibility checker.""" + +import pytest + +from agent_os.integrations.compat import ( + CompatReport, + check_compatibility, + doctor, + _parse_version, + _version_in_range, +) + + +class TestVersionParsing: + def test_simple_version(self): + assert _parse_version("1.2.3") == (1, 2, 3) + + def test_two_part_version(self): + assert _parse_version("2.0") == (2, 0) + + def test_non_numeric(self): + assert _parse_version("1.0.0rc1") == (1, 0, 0) + + +class TestVersionInRange: + def test_in_range(self): + assert _version_in_range("2.0.2", "2.0.0", "2.99.99") is True + + def test_at_minimum(self): + assert _version_in_range("2.0.0", "2.0.0", "2.99.99") is True + + def test_at_maximum(self): + assert _version_in_range("2.99.99", "2.0.0", "2.99.99") is True + + def test_below_range(self): + assert _version_in_range("1.9.9", "2.0.0", "2.99.99") is False + + def test_above_range(self): + assert _version_in_range("3.0.0", "2.0.0", "2.99.99") is False + + +class TestCheckCompatibility: + def test_compatible_pair(self): + ok, msg = check_compatibility( + "agent-os-kernel", "2.0.2", "agentmesh-platform", "2.0.2" + ) + assert ok is True + assert "compatible" in msg + + def test_incompatible_pair(self): + ok, msg = check_compatibility( + "agent-os-kernel", "3.5.0", "agentmesh-platform", "2.0.0" + ) + assert ok is False + assert "WARNING" in msg + + def test_unknown_pair(self): + ok, msg = check_compatibility( + "unknown-pkg", "1.0.0", "other-pkg", "1.0.0" + ) + assert ok is True + assert "assumed" in msg + + def test_reversed_key_order(self): + ok, msg = check_compatibility( + "agentmesh-platform", "2.0.2", "agent-os-kernel", "2.0.2" + ) + assert ok is True + + +class TestCompatReport: + def test_ok_when_no_incompatible(self): + report = CompatReport( + installed={"agent-os-kernel": "2.0.2"}, + compatible_pairs=["some pair"], + ) + assert report.ok is True + + def test_not_ok_when_incompatible(self): + report = CompatReport( + incompatible_pairs=["bad pair"], + ) + assert report.ok is False + + def test_str_contains_status(self): + report = CompatReport( + installed={"agent-os-kernel": "2.0.2"}, + ) + text = str(report) + assert "OK" in text + assert "agent-os-kernel" in text + + +class TestDoctor: + def test_returns_report(self): + report = doctor() + assert isinstance(report, CompatReport) + # Should detect installed or not-installed packages + total = len(report.installed) + len(report.not_installed) + assert total > 0 diff --git a/packages/agent-os/tests/test_escalation.py b/packages/agent-os/tests/test_escalation.py new file mode 100644 index 00000000..ae639ad5 --- /dev/null +++ b/packages/agent-os/tests/test_escalation.py @@ -0,0 +1,248 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. +"""Tests for human-in-the-loop escalation policy.""" + +import threading +import time + +import pytest + +from agent_os.integrations.base import BaseIntegration, ExecutionContext, GovernancePolicy +from agent_os.integrations.escalation import ( + ApprovalBackend, + DefaultTimeoutAction, + EscalationDecision, + EscalationHandler, + EscalationPolicy, + EscalationRequest, + EscalationResult, + InMemoryApprovalQueue, +) + + +class _StubIntegration(BaseIntegration): + """Minimal concrete BaseIntegration for testing.""" + + def wrap(self, agent): + return agent + + def unwrap(self, governed): + return governed + + +@pytest.fixture +def policy_requires_approval(): + return GovernancePolicy(name="strict", require_human_approval=True) + + +@pytest.fixture +def policy_no_approval(): + return GovernancePolicy(name="relaxed", require_human_approval=False) + + +@pytest.fixture +def make_context(): + def _make(policy): + return ExecutionContext(agent_id="agent-1", session_id="sess-1", policy=policy) + return _make + + +class TestInMemoryApprovalQueue: + def test_submit_and_get(self): + queue = InMemoryApprovalQueue() + req = EscalationRequest(agent_id="a1", action="write_file", reason="needs review") + queue.submit(req) + retrieved = queue.get_decision(req.request_id) + assert retrieved is not None + assert retrieved.decision == EscalationDecision.PENDING + + def test_approve(self): + queue = InMemoryApprovalQueue() + req = EscalationRequest(agent_id="a1", action="call_api", reason="policy") + queue.submit(req) + assert queue.approve(req.request_id, approver="admin") is True + retrieved = queue.get_decision(req.request_id) + assert retrieved.decision == EscalationDecision.ALLOW + assert retrieved.resolved_by == "admin" + assert retrieved.resolved_at is not None + + def test_deny(self): + queue = InMemoryApprovalQueue() + req = EscalationRequest(agent_id="a1", action="delete", reason="dangerous") + queue.submit(req) + assert queue.deny(req.request_id, approver="sec-team") is True + retrieved = queue.get_decision(req.request_id) + assert retrieved.decision == EscalationDecision.DENY + + def test_double_approve_fails(self): + queue = InMemoryApprovalQueue() + req = EscalationRequest(agent_id="a1", action="x", reason="r") + queue.submit(req) + assert queue.approve(req.request_id) is True + assert queue.approve(req.request_id) is False # Already resolved + + def test_approve_nonexistent(self): + queue = InMemoryApprovalQueue() + assert queue.approve("nonexistent") is False + + def test_list_pending(self): + queue = InMemoryApprovalQueue() + r1 = EscalationRequest(agent_id="a1", action="x", reason="r") + r2 = EscalationRequest(agent_id="a2", action="y", reason="s") + queue.submit(r1) + queue.submit(r2) + queue.approve(r1.request_id) + pending = queue.list_pending() + assert len(pending) == 1 + assert pending[0].request_id == r2.request_id + + def test_wait_for_decision_with_approval(self): + queue = InMemoryApprovalQueue() + req = EscalationRequest(agent_id="a1", action="x", reason="r") + queue.submit(req) + + def approve_later(): + time.sleep(0.1) + queue.approve(req.request_id, approver="user") + + t = threading.Thread(target=approve_later) + t.start() + decision = queue.wait_for_decision(req.request_id, timeout=5) + t.join() + assert decision == EscalationDecision.ALLOW + + def test_wait_for_decision_timeout(self): + queue = InMemoryApprovalQueue() + req = EscalationRequest(agent_id="a1", action="x", reason="r") + queue.submit(req) + decision = queue.wait_for_decision(req.request_id, timeout=0.1) + assert decision == EscalationDecision.PENDING + + +class TestEscalationHandler: + def test_escalate_creates_request(self): + handler = EscalationHandler(timeout_seconds=1) + request = handler.escalate("agent-1", "write_file", "policy requires approval") + assert request.agent_id == "agent-1" + assert request.decision == EscalationDecision.PENDING + + def test_resolve_with_approval(self): + queue = InMemoryApprovalQueue() + handler = EscalationHandler(backend=queue, timeout_seconds=5) + request = handler.escalate("agent-1", "action", "reason") + + def approve(): + time.sleep(0.1) + queue.approve(request.request_id) + + t = threading.Thread(target=approve) + t.start() + decision = handler.resolve(request.request_id) + t.join() + assert decision == EscalationDecision.ALLOW + + def test_resolve_timeout_defaults_to_deny(self): + handler = EscalationHandler( + timeout_seconds=0.1, + default_action=DefaultTimeoutAction.DENY, + ) + request = handler.escalate("agent-1", "action", "reason") + decision = handler.resolve(request.request_id) + assert decision == EscalationDecision.DENY + + def test_resolve_timeout_defaults_to_allow(self): + handler = EscalationHandler( + timeout_seconds=0.1, + default_action=DefaultTimeoutAction.ALLOW, + ) + request = handler.escalate("agent-1", "action", "reason") + decision = handler.resolve(request.request_id) + assert decision == EscalationDecision.ALLOW + + def test_on_escalate_callback(self): + captured = [] + handler = EscalationHandler( + timeout_seconds=1, + on_escalate=lambda req: captured.append(req), + ) + handler.escalate("agent-1", "action", "reason") + assert len(captured) == 1 + assert captured[0].agent_id == "agent-1" + + +class TestEscalationPolicy: + def test_allow_when_no_approval_required(self, policy_no_approval, make_context): + integration = _StubIntegration(policy=policy_no_approval) + ctx = make_context(policy_no_approval) + ep = EscalationPolicy(integration) + result = ep.evaluate("tool_call", ctx) + assert result.decision == EscalationDecision.ALLOW + assert result.request is None + + def test_escalate_when_approval_required(self, policy_requires_approval, make_context): + integration = _StubIntegration(policy=policy_requires_approval) + ctx = make_context(policy_requires_approval) + ep = EscalationPolicy(integration) + result = ep.evaluate("tool_call", ctx) + assert result.decision == EscalationDecision.PENDING + assert result.request is not None + assert result.request.agent_id == "agent-1" + + def test_deny_for_non_approval_reasons(self, make_context): + policy = GovernancePolicy( + name="tight", + max_tool_calls=0, + require_human_approval=False, + ) + integration = _StubIntegration(policy=policy) + ctx = make_context(policy) + ep = EscalationPolicy(integration) + result = ep.evaluate("tool_call", ctx) + assert result.decision == EscalationDecision.DENY + + def test_evaluate_and_wait_approved(self, policy_requires_approval, make_context): + queue = InMemoryApprovalQueue() + handler = EscalationHandler(backend=queue, timeout_seconds=5) + integration = _StubIntegration(policy=policy_requires_approval) + ctx = make_context(policy_requires_approval) + ep = EscalationPolicy(integration, handler=handler) + + def approve_pending(): + time.sleep(0.1) + pending = queue.list_pending() + if pending: + queue.approve(pending[0].request_id, approver="admin") + + t = threading.Thread(target=approve_pending) + t.start() + result = ep.evaluate_and_wait("tool_call", ctx) + t.join() + assert result.decision == EscalationDecision.ALLOW + + def test_evaluate_and_wait_timeout_deny(self, policy_requires_approval, make_context): + handler = EscalationHandler( + timeout_seconds=0.1, + default_action=DefaultTimeoutAction.DENY, + ) + integration = _StubIntegration(policy=policy_requires_approval) + ctx = make_context(policy_requires_approval) + ep = EscalationPolicy(integration, handler=handler) + result = ep.evaluate_and_wait("tool_call", ctx) + assert result.decision == EscalationDecision.DENY + + +class TestEscalationRequest: + def test_default_fields(self): + req = EscalationRequest() + assert req.request_id # UUID generated + assert req.decision == EscalationDecision.PENDING + assert req.resolved_by is None + + def test_custom_fields(self): + req = EscalationRequest( + agent_id="a1", + action="deploy", + reason="production change", + ) + assert req.agent_id == "a1" + assert req.action == "deploy"