veristamp · srimon12 · Jan 10, 2026 · Jan 10, 2026 · Jan 10, 2026 · Jan 24, 2026
diff --git a/.gitignore b/.gitignore
@@ -8,7 +8,6 @@ wheels/
 
 # Virtual environments
 .venv
-docs
 old
 static
 templates
@@ -23,4 +22,22 @@ qdrant_data
 .env
 memory.json
 tool_schema.json
-uv.lock
+uv.lock
+tools_schema.json
+node_modules
+output
+sdk
+legacy
+workflows_gcm
+workflows_test
+skills_test_e2e
+policy/role_permissions_e2e.json
+memory.jsonl
+.opencode
+.ruff_cache
+registry.sqlite
+registry.sqlite-shm
+registry.sqlite-wal
+drizzle
+bun.lock
+docs
diff --git a/Agent.md b/Agent.md
@@ -0,0 +1,55 @@
+# Governed Code Mode: Agent Architecture (DIY & Layered)
+
+## Core Philosophy: The "DIY" Agent
+The fundamental goal of this architecture is to treat Agents not as hardcoded classes or frameworks, but as **composable configurations** running on top of a robust, governed kernel.
+
+We adhere to a **Layered Abstraction** model. As we move up the layers, rigidity decreases and flexibility increases.
+
+### The Stack
+
+| Layer | Component | Responsibility | properties |
+|-------|-----------|----------------|------------|
+| **L3** | **DIY Agents** | Prompts, Tool Selections, specialized workflows. | *Ephemeral, Hot-swappable, Defined by text/config* |
+| **L2** | **Runtime** | `runGovernedLoop`, `Mission`, `Session`, `SubAgent`. | *Orchestration, State Management, Composition* |
+| **L1** | **Governance** | `PolicyEngine`, `RuntimeIdentity`, `AuditLogger`. | *Security, Access Control, Visibility* |
+| **L0** | **Kernel** | `MCPClientManager`, `MissionService`, `Registry`, `Engram`. | *Capabilities, Persistence, System Calls, Structural Memory* |
+
+## Key Concepts
+
+### 1. Session vs. Mission
+We strictly separate the **Conversational Context** from the **Execution Container**.
+
+*   **Session (`sessionId`)**: 
+    *   Represents a conversational thread (User ↔ Agent).
+    *   Anchors the **Prompt Cache** (history, context).
+    *   Stores trace events for debugging and user feedback.
+    *   *Lifespan*: Ephemeral or persistent (chat log).
+
+*   **Mission (`missionId`)**:
+    *   Represents a governed unit of work/execution.
+    *   Anchors **Policy**, **Budget**, and **Audit**.
+    *   Can span multiple sessions (e.g., a long-running job checked by multiple users).
+    *   *Lifespan*: Task-defined (until goal is met).
+
+### 2. The DIY Agent Model & Recursive Discovery
+An "Agent" in this system is simply:
+1.  A **System Prompt** (Personality + Strategy).
+2.  A set of **Tools** (Capabilities).
+3.  A **Runtime Identity** (Permissions/Scope).
+
+Crucially, agents do NOT need to have all tools loaded upfront. We use the **Recursive Discovery** pattern (aligned with Anthropic's Tool Search):
+
+*   **Capability Search**: A unified tool (`capability_search`) that allows the agent to find Tools, Skills, and Workflows on demand.
+*   **Deferred Loading**: The agent starts with minimal context and "pages in" capabilities as needed.
+
+### 3. The "Grand Fusion" (Engram + RLM + GCM)
+This architecture implements the "Grand Fusion" of concepts:
+
+*   **Engram (KB Core)**: Exposed as `kb-core` MCP tools (or `registry.*` tools). It allows the agent to navigate the *structure* of code/knowledge (AST, Graph) without reading entire files, preventing context rot.
+*   **RLM (Recursive Language Model)**: The Agent behaves like a Python REPL. It stitches together verified "Skills" (Python functions) and executes them in a sandbox. It does not hallucinate code from scratch; it orchestrates existing blocks.
+*   **GCM (Governed Code Mode)**: The chassis that ensures every `mcp.use()` call is policy-checked against the Mission ID.
+
+## Roadmap to Pure DIY
+1.  **Unified Discovery Tool**: Implement `src/core/capabilities/discovery.ts` to replace hardcoded `searchWorkflows` / `searchSkills` logic.
+2.  **Delete Legacy Wrappers**: Remove `OrchestratorAgent` class logic. The Orchestrator is just a loop with `capability_search` and `spawn_scout`.
+3.  **Engram Integration**: Ensure the `kb-core` (or equivalent) tools are discoverable via the registry so the RLM can "hop" through the codebase structure.
diff --git a/Agent/code_auditor.py b/Agent/code_auditor.py
@@ -0,0 +1,304 @@
+"""
+Static Code Auditor for Governed Code Mode.
+
+This module provides AST-based static analysis of LLM-generated Python code to:
+1. Detect prohibited imports (os, sys, subprocess, etc.)
+2. Detect prohibited builtin calls (eval, exec, open, etc.)
+3. Extract all binding calls (derives the "manifest" automatically)
+4. Validate binding calls against available tools
+5. Extract the PLAN comment for audit purposes
+
+This is "Pillar 4: The Static Auditor" - the machine derives the manifest from code,
+so the LLM only needs to generate ONE artifact (not manifest + code).
+"""
+from __future__ import annotations
+
+import ast
+import re
+import logging
+from dataclasses import dataclass, field
+from typing import Any, Dict, List, Set, Tuple, Optional
+
+log = logging.getLogger("code_auditor")
+
+
+@dataclass
+class DerivedManifest:
+    """
+    Manifest derived from static analysis of LLM-generated code.
+
+    This is the "auditable plan" that we show to users for approval.
+    It's derived by the MACHINE, not generated by the LLM.
+    """
+    plan_title: str
+    io_calls: List[str]
+    security_flags: List[str]
+    estimated_call_count: int = 0
+
+    def to_dict(self) -> Dict[str, Any]:
+        return {
+            "plan_title": self.plan_title,
+            "io_calls": self.io_calls,
+            "security_flags": self.security_flags,
+            "estimated_call_count": self.estimated_call_count,
+        }
+
+
+@dataclass
+class AuditResult:
+    """Complete result of code audit."""
+    manifest: Optional[DerivedManifest]
+    errors: List[str] = field(default_factory=list)
+    warnings: List[str] = field(default_factory=list)
+
+    @property
+    def is_valid(self) -> bool:
+        return len(self.errors) == 0
+
+
+class CodeAuditor:
+    """
+    AST-based static analyzer for LLM-generated Python code.
+
+    This auditor:
+    1. Parses Python code into an AST
+    2. Walks the AST to find all binding calls
+    3. Detects any prohibited imports or calls
+    4. Validates bindings against available tools
+    5. Generates a DerivedManifest for audit
+
+    Example:
+        auditor = CodeAuditor()
+        result = auditor.audit(code, available_bindings={"filesystem.list_directory"})
+        if result.is_valid:
+            print(result.manifest.io_calls)
+    """
+
+    # Modules that must NEVER be imported
+    PROHIBITED_IMPORTS: Set[str] = {
+        'os', 'sys', 'subprocess', 'socket', 'ctypes', 'shutil',
+        'multiprocessing', 'threading', 'signal', 'pty', 'fcntl',
+        'resource', 'syslog', 'grp', 'pwd', 'crypt',
+        'pickle', 'shelve', 'marshal',  # Serialization exploits
+        'importlib', 'pkgutil',  # Dynamic imports
+        'code', 'codeop',  # Interactive interpreter
+        'gc', 'inspect', 'traceback',  # Introspection that could leak info
+        'builtins', '__builtins__',
+    }
+
+    # Imports that ARE allowed (skill modules)
+    ALLOWED_IMPORT_PREFIXES: Set[str] = {
+        'skills',  # Allow: from skills import filesystem
+    }
+
+    # Builtin functions that must NEVER be called
+    PROHIBITED_CALLS: Set[str] = {
+        'eval', 'exec', 'compile', 'open',
+        'globals', 'locals', 'vars', 'dir',
+        'input',  # No interactive input
+        'breakpoint', 'exit', 'quit',
+    }
+
+    def __init__(self):
+        log.debug("CodeAuditor initialized")
+
+    def audit(
+        self,
+        code: str,
+        available_bindings: Optional[Set[str]] = None
+    ) -> AuditResult:
+        """
+        Audit LLM-generated Python code.
+
+        Args:
+            code: The Python code to audit
+            available_bindings: Set of allowed binding calls (e.g., {"filesystem.list_directory"})
+                              If None, skip binding validation
+
+        Returns:
+            AuditResult with manifest, errors, and warnings
+        """
+        log.info("=== CODE AUDIT START ===")
+        errors: List[str] = []
+        warnings: List[str] = []
+        io_calls: List[str] = []
+        security_flags: List[str] = []
+
+        # --- 1. Parse the code ---
+        try:
+            tree = ast.parse(code)
+            log.debug("AST parsing successful")
+        except SyntaxError as e:
+            log.error(f"Syntax error: {e}")
+            return AuditResult(manifest=None, errors=[f"Syntax error at line {e.lineno}: {e.msg}"])
+
+        # --- 2. Check for async main() ---
+        has_async_main = False
+        for node in ast.walk(tree):
+            if isinstance(node, ast.AsyncFunctionDef) and node.name == "main":
+                has_async_main = True
+                break
+
+        if not has_async_main:
+            errors.append("Code must define 'async def main()'")
+
+        # --- 3. Walk AST and analyze ---
+        has_prohibited_calls = False
+
+        for node in ast.walk(tree):
+            # Check imports
+            if isinstance(node, ast.Import):
+                for alias in node.names:
+                    module_name = alias.name.split('.')[0]
+                    # Check if it's an allowed import
+                    if any(alias.name.startswith(prefix) for prefix in self.ALLOWED_IMPORT_PREFIXES):
+                        log.debug(f"Allowed import: {alias.name}")
+                        continue
+                    # Check if it's specifically prohibited
+                    if module_name in self.PROHIBITED_IMPORTS:
+                        errors.append(f"Prohibited import: {alias.name}")
+                    else:
+                        errors.append(f"Imports not allowed: {alias.name}")
+
+            if isinstance(node, ast.ImportFrom):
+                module = node.module or ''
+                module_root = module.split('.')[0]
+                # Check if it's an allowed import
+                if any(module.startswith(prefix) for prefix in self.ALLOWED_IMPORT_PREFIXES):
+                    log.debug(f"Allowed import: from {module}")
+                    continue
+                # Check if it's specifically prohibited
+                if module_root in self.PROHIBITED_IMPORTS:
+                    errors.append(f"Prohibited import: from {module}")
+                else:
+                    errors.append(f"Imports not allowed: from {module}")
+
+            # Check function calls
+            if isinstance(node, ast.Call):
+                call_info = self._analyze_call(node)
+
+                if call_info:
+                    call_type, call_name = call_info
+
+                    if call_type == "builtin" and call_name in self.PROHIBITED_CALLS:
+                        has_prohibited_calls = True
+                        errors.append(f"Prohibited call: {call_name}()")
+
+                    elif call_type == "binding":
+                        io_calls.append(call_name)
+                        log.debug(f"Found binding call: {call_name}")
+
+        # --- 4. Validate bindings against available tools ---
+        if available_bindings is not None:
+            for call in io_calls:
+                if call not in available_bindings:
+                    errors.append(f"Binding '{call}' not in available tools")
+                    log.warning(f"Unknown binding: {call}")
+
+        # --- 5. Build security flags ---
+        if len(errors) == 0:  # Only if no errors from imports
+            security_flags.append("CLEAN_IMPORTS")
+        if not has_prohibited_calls:
+            security_flags.append("NO_PROHIBITED_CALLS")
+        if has_async_main:
+            security_flags.append("HAS_ASYNC_MAIN")
+
+        # --- 6. Extract plan title from comment ---
+        plan_title = self._extract_plan_comment(code)
+
+        # --- 7. Build manifest ---
+        manifest = DerivedManifest(
+            plan_title=plan_title,
+            io_calls=list(set(io_calls)),  # Deduplicate
+            security_flags=security_flags,
+            estimated_call_count=len(io_calls),
+        )
+
+        log.info(f"=== CODE AUDIT COMPLETE ===")
+        log.info(f"Errors: {len(errors)}, Warnings: {len(warnings)}")
+        log.info(f"IO Calls: {manifest.io_calls}")
+        log.info(f"Security Flags: {manifest.security_flags}")
+
+        return AuditResult(
+            manifest=manifest,
+            errors=errors,
+            warnings=warnings,
+        )
+
+    def _analyze_call(self, node: ast.Call) -> Optional[Tuple[str, str]]:
+        """
+        Analyze a Call node to determine what's being called.
+
+        Returns:
+            Tuple of (call_type, call_name) or None
+            call_type is "builtin" or "binding"
+        """
+        func = node.func
+
+        # Simple name call: func()
+        if isinstance(func, ast.Name):
+            return ("builtin", func.id)
+
+        # Attribute call: obj.method()
+        if isinstance(func, ast.Attribute):
+            # Check for binding call pattern: binding.method()
+            if isinstance(func.value, ast.Name):
+                binding_name = func.value.id
+                method_name = func.attr
+                qualified = f"{binding_name}.{method_name}"
+                return ("binding", qualified)
+
+            # Chained call: obj.attr.method() - treat as unknown
+            return None
+
+        return None
+
+    def _extract_plan_comment(self, code: str) -> str:
+        """
+        Extract the PLAN comment from code.
+
+        Expected format:
+            # PLAN: Description of what this code does
+        """
+        match = re.search(r'#\s*PLAN:\s*(.+)', code, re.IGNORECASE)
+        if match:
+            title = match.group(1).strip()
+            log.debug(f"Extracted plan title: {title}")
+            return title
+
+        log.warning("No PLAN comment found, using default")
+        return "No description provided"
+
+
+def quick_validate(code: str) -> List[str]:
+    """
+    Quick validation without full audit.
+    Returns list of critical errors.
+    """
+    auditor = CodeAuditor()
+    result = auditor.audit(code, available_bindings=None)
+    return result.errors
+
+
+if __name__ == "__main__":
+    # Test the auditor
+    logging.basicConfig(level=logging.DEBUG)
+
+    test_code = '''
+# PLAN: List files and save to memory
+
+async def main():
+    files = await filesystem.list_directory(path=".")
+    await memory.create_entities(entities=[{"name": "files", "data": files}])
+    return files
+'''
+
+    auditor = CodeAuditor()
+    result = auditor.audit(test_code, available_bindings={
+        "filesystem.list_directory",
+        "memory.create_entities"
+    })
+
+    print(f"Valid: {result.is_valid}")
+    print(f"Errors: {result.errors}")
+    print(f"Manifest: {result.manifest}")