diff --git a/.claude/settings.json b/.claude/settings.json
new file mode 100644
index 0000000000..d03570d981
--- /dev/null
+++ b/.claude/settings.json
@@ -0,0 +1,15 @@
+{
+ "hooks": {
+ "PostToolUse": [
+ {
+ "matcher": "Edit|Write|NotebookEdit",
+ "hooks": [
+ {
+ "type": "command",
+ "command": "ruff check --fix \"$CLAUDE_FILE_PATH\" 2>/dev/null; ruff format \"$CLAUDE_FILE_PATH\" 2>/dev/null; true"
+ }
+ ]
+ }
+ ]
+ }
+}
diff --git a/.claude/skills/building-agents-construction/SKILL.md b/.claude/skills/building-agents-construction/SKILL.md
index f7e4eb9396..8858a25fd5 100644
--- a/.claude/skills/building-agents-construction/SKILL.md
+++ b/.claude/skills/building-agents-construction/SKILL.md
@@ -520,6 +520,8 @@ class RuntimeConfig:
model: str = "cerebras/zai-glm-4.7"
temperature: float = 0.7
max_tokens: int = 4096
+ api_key: str | None = None
+ api_base: str | None = None
default_config = RuntimeConfig()
@@ -972,7 +974,11 @@ class {agent_class_name}:
llm = None
if not mock_mode:
# LiteLLMProvider uses environment variables for API keys
- llm = LiteLLMProvider(model=self.config.model)
+ llm = LiteLLMProvider(
+ model=self.config.model,
+ api_key=self.config.api_key,
+ api_base=self.config.api_base,
+ )
self._graph = GraphSpec(
id="{agent_name}-graph",
diff --git a/.claude/skills/building-agents-construction/examples/online_research_agent/__main__.py b/.claude/skills/building-agents-construction/examples/online_research_agent/__main__.py
index dfee11d7c6..8fa5985a6c 100644
--- a/.claude/skills/building-agents-construction/examples/online_research_agent/__main__.py
+++ b/.claude/skills/building-agents-construction/examples/online_research_agent/__main__.py
@@ -108,8 +108,10 @@ async def _interactive_shell(verbose=False):
try:
while True:
try:
- topic = await asyncio.get_event_loop().run_in_executor(None, input, "Topic> ")
- if topic.lower() in ['quit', 'exit', 'q']:
+ topic = await asyncio.get_event_loop().run_in_executor(
+ None, input, "Topic> "
+ )
+ if topic.lower() in ["quit", "exit", "q"]:
click.echo("Goodbye!")
break
@@ -130,7 +132,11 @@ async def _interactive_shell(verbose=False):
click.echo(f"\nReport saved to: {output['file_path']}\n")
if "final_report" in output:
click.echo("\n--- Report Preview ---\n")
- preview = output["final_report"][:500] + "..." if len(output.get("final_report", "")) > 500 else output.get("final_report", "")
+ preview = (
+ output["final_report"][:500] + "..."
+ if len(output.get("final_report", "")) > 500
+ else output.get("final_report", "")
+ )
click.echo(preview)
click.echo("\n")
else:
@@ -142,6 +148,7 @@ async def _interactive_shell(verbose=False):
except Exception as e:
click.echo(f"Error: {e}", err=True)
import traceback
+
traceback.print_exc()
finally:
await agent.stop()
diff --git a/.claude/skills/building-agents-construction/examples/online_research_agent/agent.py b/.claude/skills/building-agents-construction/examples/online_research_agent/agent.py
index 405f3ee46d..c487e9f57d 100644
--- a/.claude/skills/building-agents-construction/examples/online_research_agent/agent.py
+++ b/.claude/skills/building-agents-construction/examples/online_research_agent/agent.py
@@ -1,4 +1,5 @@
"""Agent graph construction for Online Research Agent."""
+
from framework.graph import EdgeSpec, EdgeCondition, Goal, SuccessCriterion, Constraint
from framework.graph.edge import GraphSpec
from framework.graph.executor import ExecutionResult
@@ -8,6 +9,16 @@
from framework.runner.tool_registry import ToolRegistry
from .config import default_config, metadata
+from .nodes import (
+ parse_query_node,
+ search_sources_node,
+ fetch_content_node,
+ evaluate_sources_node,
+ synthesize_findings_node,
+ write_report_node,
+ quality_check_node,
+ save_report_node,
+)
# Goal definition
goal = Goal(
@@ -78,17 +89,6 @@
),
],
)
-# Import nodes
-from .nodes import (
- parse_query_node,
- search_sources_node,
- fetch_content_node,
- evaluate_sources_node,
- synthesize_findings_node,
- write_report_node,
- quality_check_node,
- save_report_node,
-)
# Node list
nodes = [
@@ -195,13 +195,15 @@ def _build_entry_point_specs(self) -> list[EntryPointSpec]:
trigger_type = "manual"
name = ep_id.replace("-", " ").title()
- specs.append(EntryPointSpec(
- id=ep_id,
- name=name,
- entry_node=node_id,
- trigger_type=trigger_type,
- isolation_level="shared",
- ))
+ specs.append(
+ EntryPointSpec(
+ id=ep_id,
+ name=name,
+ entry_node=node_id,
+ trigger_type=trigger_type,
+ isolation_level="shared",
+ )
+ )
return specs
def _create_runtime(self, mock_mode=False) -> AgentRuntime:
@@ -226,14 +228,21 @@ def _create_runtime(self, mock_mode=False) -> AgentRuntime:
for server_name, server_config in mcp_servers.items():
server_config["name"] = server_name
# Resolve relative cwd paths
- if "cwd" in server_config and not Path(server_config["cwd"]).is_absolute():
+ if (
+ "cwd" in server_config
+ and not Path(server_config["cwd"]).is_absolute()
+ ):
server_config["cwd"] = str(agent_dir / server_config["cwd"])
tool_registry.register_mcp_server(server_config)
llm = None
if not mock_mode:
# LiteLLMProvider uses environment variables for API keys
- llm = LiteLLMProvider(model=self.config.model)
+ llm = LiteLLMProvider(
+ model=self.config.model,
+ api_key=self.config.api_key,
+ api_base=self.config.api_base,
+ )
self._graph = GraphSpec(
id="online-research-agent-graph",
@@ -294,7 +303,9 @@ async def trigger(
"""
if self._runtime is None or not self._runtime.is_running:
raise RuntimeError("Agent runtime not started. Call start() first.")
- return await self._runtime.trigger(entry_point, input_data, correlation_id, session_state=session_state)
+ return await self._runtime.trigger(
+ entry_point, input_data, correlation_id, session_state=session_state
+ )
async def trigger_and_wait(
self,
@@ -317,9 +328,13 @@ async def trigger_and_wait(
"""
if self._runtime is None or not self._runtime.is_running:
raise RuntimeError("Agent runtime not started. Call start() first.")
- return await self._runtime.trigger_and_wait(entry_point, input_data, timeout, session_state=session_state)
+ return await self._runtime.trigger_and_wait(
+ entry_point, input_data, timeout, session_state=session_state
+ )
- async def run(self, context: dict, mock_mode=False, session_state=None) -> ExecutionResult:
+ async def run(
+ self, context: dict, mock_mode=False, session_state=None
+ ) -> ExecutionResult:
"""
Run the agent (convenience method for simple single execution).
@@ -338,7 +353,9 @@ async def run(self, context: dict, mock_mode=False, session_state=None) -> Execu
else:
entry_point = "start"
- result = await self.trigger_and_wait(entry_point, context, session_state=session_state)
+ result = await self.trigger_and_wait(
+ entry_point, context, session_state=session_state
+ )
return result or ExecutionResult(success=False, error="Execution timeout")
finally:
await self.stop()
@@ -400,7 +417,9 @@ def validate(self):
# Validate entry points
for ep_id, node_id in self.entry_points.items():
if node_id not in node_ids:
- errors.append(f"Entry point '{ep_id}' references unknown node '{node_id}'")
+ errors.append(
+ f"Entry point '{ep_id}' references unknown node '{node_id}'"
+ )
return {
"valid": len(errors) == 0,
diff --git a/.claude/skills/building-agents-construction/examples/online_research_agent/config.py b/.claude/skills/building-agents-construction/examples/online_research_agent/config.py
index b68c30e51c..31f4cf6222 100644
--- a/.claude/skills/building-agents-construction/examples/online_research_agent/config.py
+++ b/.claude/skills/building-agents-construction/examples/online_research_agent/config.py
@@ -1,4 +1,5 @@
"""Runtime configuration."""
+
from dataclasses import dataclass
@@ -7,10 +8,13 @@ class RuntimeConfig:
model: str = "groq/moonshotai/kimi-k2-instruct-0905"
temperature: float = 0.7
max_tokens: int = 16384
+ api_key: str | None = None
+ api_base: str | None = None
default_config = RuntimeConfig()
+
# Agent metadata
@dataclass
class AgentMetadata:
diff --git a/.claude/skills/building-agents-construction/examples/online_research_agent/nodes/__init__.py b/.claude/skills/building-agents-construction/examples/online_research_agent/nodes/__init__.py
index 58d897de46..944d370753 100644
--- a/.claude/skills/building-agents-construction/examples/online_research_agent/nodes/__init__.py
+++ b/.claude/skills/building-agents-construction/examples/online_research_agent/nodes/__init__.py
@@ -1,4 +1,5 @@
"""Node definitions for Online Research Agent."""
+
from framework.graph import NodeSpec
# Node 1: Parse Query
@@ -10,9 +11,21 @@
input_keys=["topic"],
output_keys=["search_queries", "research_focus", "key_aspects"],
output_schema={
- "research_focus": {"type": "string", "required": True, "description": "Brief statement of what we're researching"},
- "key_aspects": {"type": "array", "required": True, "description": "List of 3-5 key aspects to investigate"},
- "search_queries": {"type": "array", "required": True, "description": "List of 3-5 search queries"},
+ "research_focus": {
+ "type": "string",
+ "required": True,
+ "description": "Brief statement of what we're researching",
+ },
+ "key_aspects": {
+ "type": "array",
+ "required": True,
+ "description": "List of 3-5 key aspects to investigate",
+ },
+ "search_queries": {
+ "type": "array",
+ "required": True,
+ "description": "List of 3-5 search queries",
+ },
},
system_prompt="""\
You are a research query strategist. Given a research topic, analyze it and generate search queries.
@@ -50,8 +63,16 @@
input_keys=["search_queries", "research_focus"],
output_keys=["source_urls", "search_results_summary"],
output_schema={
- "source_urls": {"type": "array", "required": True, "description": "List of source URLs found"},
- "search_results_summary": {"type": "string", "required": True, "description": "Brief summary of what was found"},
+ "source_urls": {
+ "type": "array",
+ "required": True,
+ "description": "List of source URLs found",
+ },
+ "search_results_summary": {
+ "type": "string",
+ "required": True,
+ "description": "Brief summary of what was found",
+ },
},
system_prompt="""\
You are a research assistant executing web searches. Use the web_search tool to find sources.
@@ -80,8 +101,16 @@
input_keys=["source_urls", "research_focus"],
output_keys=["fetched_sources", "fetch_errors"],
output_schema={
- "fetched_sources": {"type": "array", "required": True, "description": "List of fetched source objects with url, title, content"},
- "fetch_errors": {"type": "array", "required": True, "description": "List of URLs that failed to fetch"},
+ "fetched_sources": {
+ "type": "array",
+ "required": True,
+ "description": "List of fetched source objects with url, title, content",
+ },
+ "fetch_errors": {
+ "type": "array",
+ "required": True,
+ "description": "List of URLs that failed to fetch",
+ },
},
system_prompt="""\
You are a content fetcher. Use web_scrape tool to retrieve content from URLs.
@@ -113,8 +142,16 @@
input_keys=["fetched_sources", "research_focus", "key_aspects"],
output_keys=["ranked_sources", "source_analysis"],
output_schema={
- "ranked_sources": {"type": "array", "required": True, "description": "List of ranked sources with scores"},
- "source_analysis": {"type": "string", "required": True, "description": "Overview of source quality and coverage"},
+ "ranked_sources": {
+ "type": "array",
+ "required": True,
+ "description": "List of ranked sources with scores",
+ },
+ "source_analysis": {
+ "type": "string",
+ "required": True,
+ "description": "Overview of source quality and coverage",
+ },
},
system_prompt="""\
You are a source evaluator. Assess each source for quality and relevance.
@@ -153,9 +190,21 @@
input_keys=["ranked_sources", "research_focus", "key_aspects"],
output_keys=["key_findings", "themes", "source_citations"],
output_schema={
- "key_findings": {"type": "array", "required": True, "description": "List of key findings with sources and confidence"},
- "themes": {"type": "array", "required": True, "description": "List of themes with descriptions and supporting sources"},
- "source_citations": {"type": "object", "required": True, "description": "Map of facts to supporting URLs"},
+ "key_findings": {
+ "type": "array",
+ "required": True,
+ "description": "List of key findings with sources and confidence",
+ },
+ "themes": {
+ "type": "array",
+ "required": True,
+ "description": "List of themes with descriptions and supporting sources",
+ },
+ "source_citations": {
+ "type": "object",
+ "required": True,
+ "description": "Map of facts to supporting URLs",
+ },
},
system_prompt="""\
You are a research synthesizer. Analyze multiple sources to extract insights.
@@ -192,11 +241,25 @@
name="Write Report",
description="Generate a narrative report with proper citations",
node_type="llm_generate",
- input_keys=["key_findings", "themes", "source_citations", "research_focus", "ranked_sources"],
+ input_keys=[
+ "key_findings",
+ "themes",
+ "source_citations",
+ "research_focus",
+ "ranked_sources",
+ ],
output_keys=["report_content", "references"],
output_schema={
- "report_content": {"type": "string", "required": True, "description": "Full markdown report text with citations"},
- "references": {"type": "array", "required": True, "description": "List of reference objects with number, url, title"},
+ "report_content": {
+ "type": "string",
+ "required": True,
+ "description": "Full markdown report text with citations",
+ },
+ "references": {
+ "type": "array",
+ "required": True,
+ "description": "List of reference objects with number, url, title",
+ },
},
system_prompt="""\
You are a research report writer. Create a well-structured narrative report.
@@ -239,9 +302,21 @@
input_keys=["report_content", "references", "source_citations"],
output_keys=["quality_score", "issues", "final_report"],
output_schema={
- "quality_score": {"type": "number", "required": True, "description": "Quality score 0-1"},
- "issues": {"type": "array", "required": True, "description": "List of issues found and fixed"},
- "final_report": {"type": "string", "required": True, "description": "Corrected full report"},
+ "quality_score": {
+ "type": "number",
+ "required": True,
+ "description": "Quality score 0-1",
+ },
+ "issues": {
+ "type": "array",
+ "required": True,
+ "description": "List of issues found and fixed",
+ },
+ "final_report": {
+ "type": "string",
+ "required": True,
+ "description": "Corrected full report",
+ },
},
system_prompt="""\
You are a quality assurance reviewer. Check the research report for issues.
@@ -278,8 +353,16 @@
input_keys=["final_report", "references", "research_focus"],
output_keys=["file_path", "save_status"],
output_schema={
- "file_path": {"type": "string", "required": True, "description": "Path where report was saved"},
- "save_status": {"type": "string", "required": True, "description": "Status of save operation"},
+ "file_path": {
+ "type": "string",
+ "required": True,
+ "description": "Path where report was saved",
+ },
+ "save_status": {
+ "type": "string",
+ "required": True,
+ "description": "Status of save operation",
+ },
},
system_prompt="""\
You are a file manager. Save the research report to disk.
diff --git a/.cursorrules b/.cursorrules
new file mode 100644
index 0000000000..db7b6d3c4a
--- /dev/null
+++ b/.cursorrules
@@ -0,0 +1,18 @@
+This project uses ruff for Python linting and formatting.
+
+Rules:
+- Line length: 100 characters
+- Python target: 3.11+
+- Use double quotes for strings
+- Sort imports with isort (ruff I rules): stdlib, third-party, first-party (framework), local
+- Combine as-imports
+- Use type hints on all function signatures
+- Use `from __future__ import annotations` for modern type syntax
+- Raise exceptions with `from` in except blocks (B904)
+- No unused imports (F401), no unused variables (F841)
+- Prefer list/dict/set comprehensions over map/filter (C4)
+
+Run `make lint` to auto-fix, `make check` to verify without modifying files.
+Run `make format` to apply ruff formatting.
+
+The ruff config lives in core/pyproject.toml under [tool.ruff].
diff --git a/.editorconfig b/.editorconfig
index 51b5033c13..252d41467d 100644
--- a/.editorconfig
+++ b/.editorconfig
@@ -11,6 +11,9 @@ indent_size = 2
insert_final_newline = true
trim_trailing_whitespace = true
+[*.py]
+indent_size = 4
+
[*.md]
trim_trailing_whitespace = false
diff --git a/.gitattributes b/.gitattributes
new file mode 100644
index 0000000000..3db0e15274
--- /dev/null
+++ b/.gitattributes
@@ -0,0 +1,124 @@
+# Normalize line endings for all text files
+* text=auto
+
+# Source code
+*.py text diff=python
+*.js text
+*.ts text
+*.jsx text
+*.tsx text
+*.json text
+*.yaml text
+*.yml text
+*.toml text
+*.ini text
+*.cfg text
+
+# Shell scripts (must use LF)
+*.sh text eol=lf
+quickstart.sh text eol=lf
+
+# PowerShell scripts (Windows-friendly)
+*.ps1 text eol=lf
+*.psm1 text eol=lf
+
+# Windows batch files (must use CRLF)
+*.bat text eol=crlf
+*.cmd text eol=crlf
+
+# Documentation
+*.md text
+*.txt text
+*.rst text
+*.tex text
+
+# Configuration files
+.gitignore text
+.gitattributes text
+.editorconfig text
+Dockerfile text
+docker-compose.yml text
+requirements*.txt text
+pyproject.toml text
+setup.py text
+setup.cfg text
+MANIFEST.in text
+LICENSE text
+README* text
+CHANGELOG* text
+CONTRIBUTING* text
+CODE_OF_CONDUCT* text
+
+# Web files
+*.html text
+*.css text
+*.scss text
+*.sass text
+
+# Data files
+*.xml text
+*.csv text
+*.sql text
+
+# Graphics (binary)
+*.png binary
+*.jpg binary
+*.jpeg binary
+*.gif binary
+*.ico binary
+*.svg binary
+*.eps binary
+*.bmp binary
+*.tif binary
+*.tiff binary
+
+# Archives (binary)
+*.zip binary
+*.tar binary
+*.gz binary
+*.bz2 binary
+*.7z binary
+*.rar binary
+
+# Python compiled (binary)
+*.pyc binary
+*.pyo binary
+*.pyd binary
+*.whl binary
+*.egg binary
+
+# System libraries (binary)
+*.so binary
+*.dll binary
+*.dylib binary
+*.lib binary
+*.a binary
+
+# Documents (binary)
+*.pdf binary
+*.doc binary
+*.docx binary
+*.ppt binary
+*.pptx binary
+*.xls binary
+*.xlsx binary
+
+# Fonts (binary)
+*.ttf binary
+*.otf binary
+*.woff binary
+*.woff2 binary
+*.eot binary
+
+# Audio/Video (binary)
+*.mp3 binary
+*.mp4 binary
+*.wav binary
+*.avi binary
+*.mov binary
+*.flv binary
+
+# Database files (binary)
+*.db binary
+*.sqlite binary
+*.sqlite3 binary
diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS
index 71ab6b3f8d..1a60b37340 100644
--- a/.github/CODEOWNERS
+++ b/.github/CODEOWNERS
@@ -8,7 +8,6 @@
/hive/ @adenhq/maintainers
# Infrastructure
-/docker-compose*.yml @adenhq/maintainers
/.github/ @adenhq/maintainers
# Documentation
diff --git a/.github/workflows/auto-close-duplicates.yml b/.github/workflows/auto-close-duplicates.yml
new file mode 100644
index 0000000000..e809229933
--- /dev/null
+++ b/.github/workflows/auto-close-duplicates.yml
@@ -0,0 +1,31 @@
+name: Auto-close duplicate issues
+description: Auto-closes issues that are duplicates of existing issues
+on:
+ schedule:
+ - cron: "0 */6 * * *"
+ workflow_dispatch:
+
+jobs:
+ auto-close-duplicates:
+ runs-on: ubuntu-latest
+ timeout-minutes: 10
+ permissions:
+ contents: read
+ issues: write
+
+ steps:
+ - name: Checkout repository
+ uses: actions/checkout@v4
+
+ - name: Setup Bun
+ uses: oven-sh/setup-bun@v2
+ with:
+ bun-version: latest
+
+ - name: Auto-close duplicate issues
+ run: bun run scripts/auto-close-duplicates.ts
+ env:
+ GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+ GITHUB_REPOSITORY_OWNER: ${{ github.repository_owner }}
+ GITHUB_REPOSITORY_NAME: ${{ github.event.repository.name }}
+ STATSIG_API_KEY: ${{ secrets.STATSIG_API_KEY }}
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 3f5205e464..c50e83c2d5 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -29,10 +29,15 @@ jobs:
pip install -e .
pip install -r requirements-dev.txt
- - name: Run ruff
+ - name: Ruff lint
run: |
- cd core
- ruff check .
+ ruff check core/
+ ruff check tools/
+
+ - name: Ruff format
+ run: |
+ ruff format --check core/
+ ruff format --check tools/
test:
name: Test Python Framework
@@ -79,9 +84,31 @@ jobs:
- name: Validate exported agents
run: |
# Check that agent exports have valid structure
- for agent_dir in exports/*/; do
+ if [ ! -d "exports" ]; then
+ echo "No exports/ directory found, skipping validation"
+ exit 0
+ fi
+
+ shopt -s nullglob
+ agent_dirs=(exports/*/)
+ shopt -u nullglob
+
+ if [ ${#agent_dirs[@]} -eq 0 ]; then
+ echo "No agent directories in exports/, skipping validation"
+ exit 0
+ fi
+
+ validated=0
+ for agent_dir in "${agent_dirs[@]}"; do
if [ -f "$agent_dir/agent.json" ]; then
echo "Validating $agent_dir"
python -c "import json; json.load(open('$agent_dir/agent.json'))"
+ validated=$((validated + 1))
fi
done
+
+ if [ "$validated" -eq 0 ]; then
+ echo "No agent.json files found in exports/, skipping validation"
+ else
+ echo "Validated $validated agent(s)"
+ fi
diff --git a/.github/workflows/claude-issue-triage.yml b/.github/workflows/claude-issue-triage.yml
new file mode 100644
index 0000000000..2567674492
--- /dev/null
+++ b/.github/workflows/claude-issue-triage.yml
@@ -0,0 +1,83 @@
+name: Issue Triage
+
+on:
+ issues:
+ types: [opened]
+
+jobs:
+ triage:
+ runs-on: ubuntu-latest
+ timeout-minutes: 10
+ permissions:
+ contents: read
+ issues: write
+ id-token: write
+
+ steps:
+ - name: Checkout repository
+ uses: actions/checkout@v4
+ with:
+ fetch-depth: 1
+
+ - name: Triage and check for duplicates
+ uses: anthropics/claude-code-action@v1
+ with:
+ anthropic_api_key: ${{ secrets.ANTHROPIC_API_KEY }}
+ github_token: ${{ secrets.GITHUB_TOKEN }}
+ allowed_non_write_users: "*"
+ prompt: |
+ Analyze this new issue and perform triage tasks.
+
+ Issue: #${{ github.event.issue.number }}
+ Repository: ${{ github.repository }}
+
+ ## Your Tasks:
+
+ ### 1. Get issue details
+ Use mcp__github__get_issue to get the full details of issue #${{ github.event.issue.number }}
+
+ ### 2. Check for duplicates
+ Search for similar existing issues using mcp__github__search_issues with relevant keywords from the issue title and body.
+
+ Criteria for duplicates:
+ - Same bug or error being reported
+ - Same feature request (even if worded differently)
+ - Same question being asked
+ - Issues describing the same root problem
+
+ If you find a duplicate:
+ - Add a comment using EXACTLY this format (required for auto-close to work):
+ "Found a possible duplicate of #: "
+ - Do NOT apply the "duplicate" label yet (the auto-close script will add it after 12 hours if no objections)
+ - Suggest the user react with a thumbs-down if they disagree
+
+ ### 3. Check for invalid issues
+ If the issue lacks sufficient information, is spam, or doesn't make sense:
+ - Add the "invalid" label
+ - Comment asking for clarification or explaining why it's invalid
+
+ ### 4. Categorize with labels (if NOT a duplicate)
+ Apply appropriate labels based on the issue content. Use ONLY these labels:
+ - bug: Something isn't working
+ - enhancement: New feature or request
+ - question: Further information is requested
+ - documentation: Improvements or additions to documentation
+ - good first issue: Good for newcomers (if issue is well-defined and small scope)
+ - help wanted: Extra attention is needed (if issue needs community input)
+ - backlog: Tracked for the future, but not currently planned or prioritized
+
+ You may apply multiple labels if appropriate (e.g., "bug" and "help wanted").
+
+ ## Tools Available:
+ - mcp__github__get_issue: Get issue details
+ - mcp__github__search_issues: Search for similar issues
+ - mcp__github__list_issues: List recent issues if needed
+ - mcp__github__add_issue_comment: Add a comment
+ - mcp__github__update_issue: Add labels
+ - mcp__github__get_issue_comments: Get existing comments
+
+ Be thorough but efficient. Focus on accurate categorization and finding true duplicates.
+
+ claude_args: |
+ --model claude-haiku-4-5-20251001
+ --allowedTools "mcp__github__get_issue,mcp__github__search_issues,mcp__github__list_issues,mcp__github__add_issue_comment,mcp__github__update_issue,mcp__github__get_issue_comments"
diff --git a/.github/workflows/pr-check-command.yml b/.github/workflows/pr-check-command.yml
new file mode 100644
index 0000000000..1b5f30a424
--- /dev/null
+++ b/.github/workflows/pr-check-command.yml
@@ -0,0 +1,204 @@
+name: PR Check Command
+
+on:
+ issue_comment:
+ types: [created]
+
+jobs:
+ check-pr:
+ # Only run on PR comments that start with /check
+ if: github.event.issue.pull_request && startsWith(github.event.comment.body, '/check')
+ runs-on: ubuntu-latest
+ permissions:
+ pull-requests: write
+ issues: write
+ checks: write
+ statuses: write
+
+ steps:
+ - name: Check PR requirements
+ uses: actions/github-script@v7
+ with:
+ script: |
+ const prNumber = context.payload.issue.number;
+ console.log(`Triggered by /check comment on PR #${prNumber}`);
+
+ // Fetch PR data
+ const { data: pr } = await github.rest.pulls.get({
+ owner: context.repo.owner,
+ repo: context.repo.repo,
+ pull_number: prNumber,
+ });
+
+ const prBody = pr.body || '';
+ const prTitle = pr.title || '';
+ const prAuthor = pr.user.login;
+ const headSha = pr.head.sha;
+
+ // Create a check run in progress
+ const { data: checkRun } = await github.rest.checks.create({
+ owner: context.repo.owner,
+ repo: context.repo.repo,
+ name: 'check-requirements',
+ head_sha: headSha,
+ status: 'in_progress',
+ started_at: new Date().toISOString(),
+ });
+
+ // Extract issue numbers
+ const issuePattern = /(?:close[sd]?|fix(?:e[sd])?|resolve[sd]?)?\s*#(\d+)/gi;
+ const allText = `${prTitle} ${prBody}`;
+ const matches = [...allText.matchAll(issuePattern)];
+ const issueNumbers = [...new Set(matches.map(m => parseInt(m[1], 10)))];
+
+ console.log(`PR #${prNumber}:`);
+ console.log(` Author: ${prAuthor}`);
+ console.log(` Found issue references: ${issueNumbers.length > 0 ? issueNumbers.join(', ') : 'none'}`);
+
+ if (issueNumbers.length === 0) {
+ const message = `## PR Closed - Requirements Not Met
+
+ This PR has been automatically closed because it doesn't meet the requirements.
+
+ **Missing:** No linked issue found.
+
+ **To fix:**
+ 1. Create or find an existing issue for this work
+ 2. Assign yourself to the issue
+ 3. Re-open this PR and add \`Fixes #123\` in the description
+
+ **Why is this required?** See #472 for details.`;
+
+ await github.rest.issues.createComment({
+ owner: context.repo.owner,
+ repo: context.repo.repo,
+ issue_number: prNumber,
+ body: message,
+ });
+
+ await github.rest.pulls.update({
+ owner: context.repo.owner,
+ repo: context.repo.repo,
+ pull_number: prNumber,
+ state: 'closed',
+ });
+
+ // Update check run to failure
+ await github.rest.checks.update({
+ owner: context.repo.owner,
+ repo: context.repo.repo,
+ check_run_id: checkRun.id,
+ status: 'completed',
+ conclusion: 'failure',
+ completed_at: new Date().toISOString(),
+ output: {
+ title: 'Missing linked issue',
+ summary: 'PR must reference an issue (e.g., `Fixes #123`)',
+ },
+ });
+
+ core.setFailed('PR must reference an issue');
+ return;
+ }
+
+ // Check if PR author is assigned to any linked issue
+ let issueWithAuthorAssigned = null;
+ let issuesWithoutAuthor = [];
+
+ for (const issueNum of issueNumbers) {
+ try {
+ const { data: issue } = await github.rest.issues.get({
+ owner: context.repo.owner,
+ repo: context.repo.repo,
+ issue_number: issueNum,
+ });
+
+ const assigneeLogins = (issue.assignees || []).map(a => a.login);
+ if (assigneeLogins.includes(prAuthor)) {
+ issueWithAuthorAssigned = issueNum;
+ console.log(` Issue #${issueNum} has PR author ${prAuthor} as assignee`);
+ break;
+ } else {
+ issuesWithoutAuthor.push({
+ number: issueNum,
+ assignees: assigneeLogins
+ });
+ console.log(` Issue #${issueNum} assignees: ${assigneeLogins.length > 0 ? assigneeLogins.join(', ') : 'none'}`);
+ }
+ } catch (error) {
+ console.log(` Issue #${issueNum} not found`);
+ }
+ }
+
+ if (!issueWithAuthorAssigned) {
+ const issueList = issuesWithoutAuthor.map(i =>
+ `#${i.number} (assignees: ${i.assignees.length > 0 ? i.assignees.join(', ') : 'none'})`
+ ).join(', ');
+
+ const message = `## PR Closed - Requirements Not Met
+
+ This PR has been automatically closed because it doesn't meet the requirements.
+
+ **PR Author:** @${prAuthor}
+ **Found issues:** ${issueList}
+ **Problem:** The PR author must be assigned to the linked issue.
+
+ **To fix:**
+ 1. Assign yourself (@${prAuthor}) to one of the linked issues
+ 2. Re-open this PR
+
+ **Why is this required?** See #472 for details.`;
+
+ await github.rest.issues.createComment({
+ owner: context.repo.owner,
+ repo: context.repo.repo,
+ issue_number: prNumber,
+ body: message,
+ });
+
+ await github.rest.pulls.update({
+ owner: context.repo.owner,
+ repo: context.repo.repo,
+ pull_number: prNumber,
+ state: 'closed',
+ });
+
+ // Update check run to failure
+ await github.rest.checks.update({
+ owner: context.repo.owner,
+ repo: context.repo.repo,
+ check_run_id: checkRun.id,
+ status: 'completed',
+ conclusion: 'failure',
+ completed_at: new Date().toISOString(),
+ output: {
+ title: 'PR author not assigned to issue',
+ summary: `PR author @${prAuthor} must be assigned to one of the linked issues: ${issueList}`,
+ },
+ });
+
+ core.setFailed('PR author must be assigned to the linked issue');
+ } else {
+ await github.rest.issues.createComment({
+ owner: context.repo.owner,
+ repo: context.repo.repo,
+ issue_number: prNumber,
+ body: `✅ PR requirements met! Issue #${issueWithAuthorAssigned} has @${prAuthor} as assignee.`,
+ });
+
+ // Update check run to success
+ await github.rest.checks.update({
+ owner: context.repo.owner,
+ repo: context.repo.repo,
+ check_run_id: checkRun.id,
+ status: 'completed',
+ conclusion: 'success',
+ completed_at: new Date().toISOString(),
+ output: {
+ title: 'Requirements met',
+ summary: `Issue #${issueWithAuthorAssigned} has @${prAuthor} as assignee.`,
+ },
+ });
+
+ console.log(`PR requirements met!`);
+ }
diff --git a/.github/workflows/pr-requirements-backfill.yml b/.github/workflows/pr-requirements-backfill.yml
new file mode 100644
index 0000000000..40319df4bf
--- /dev/null
+++ b/.github/workflows/pr-requirements-backfill.yml
@@ -0,0 +1,138 @@
+name: PR Requirements Backfill
+
+on:
+ workflow_dispatch:
+
+jobs:
+ check-all-open-prs:
+ runs-on: ubuntu-latest
+ permissions:
+ pull-requests: write
+ issues: write
+
+ steps:
+ - name: Check all open PRs
+ uses: actions/github-script@v7
+ with:
+ script: |
+ const { data: pullRequests } = await github.rest.pulls.list({
+ owner: context.repo.owner,
+ repo: context.repo.repo,
+ state: 'open',
+ per_page: 100,
+ });
+
+ console.log(`Found ${pullRequests.length} open PRs`);
+
+ for (const pr of pullRequests) {
+ const prNumber = pr.number;
+ const prBody = pr.body || '';
+ const prTitle = pr.title || '';
+ const prAuthor = pr.user.login;
+
+ console.log(`\nChecking PR #${prNumber}: ${prTitle}`);
+
+ // Extract issue numbers from body and title
+ const issuePattern = /(?:close[sd]?|fix(?:e[sd])?|resolve[sd]?)?\s*#(\d+)/gi;
+ const allText = `${prTitle} ${prBody}`;
+ const matches = [...allText.matchAll(issuePattern)];
+ const issueNumbers = [...new Set(matches.map(m => parseInt(m[1], 10)))];
+
+ console.log(` Found issue references: ${issueNumbers.length > 0 ? issueNumbers.join(', ') : 'none'}`);
+
+ if (issueNumbers.length === 0) {
+ console.log(` ❌ No linked issue - closing PR`);
+
+ const message = `## PR Closed - Requirements Not Met
+
+ This PR has been automatically closed because it doesn't meet the requirements.
+
+ **Missing:** No linked issue found.
+
+ **To fix:**
+ 1. Create or find an existing issue for this work
+ 2. Assign yourself to the issue
+ 3. Re-open this PR and add \`Fixes #123\` in the description`;
+
+ await github.rest.issues.createComment({
+ owner: context.repo.owner,
+ repo: context.repo.repo,
+ issue_number: prNumber,
+ body: message,
+ });
+
+ await github.rest.pulls.update({
+ owner: context.repo.owner,
+ repo: context.repo.repo,
+ pull_number: prNumber,
+ state: 'closed',
+ });
+
+ continue;
+ }
+
+ // Check if any linked issue has the PR author as assignee
+ let issueWithAuthorAssigned = null;
+ let issuesWithoutAuthor = [];
+
+ for (const issueNum of issueNumbers) {
+ try {
+ const { data: issue } = await github.rest.issues.get({
+ owner: context.repo.owner,
+ repo: context.repo.repo,
+ issue_number: issueNum,
+ });
+
+ const assigneeLogins = (issue.assignees || []).map(a => a.login);
+ if (assigneeLogins.includes(prAuthor)) {
+ issueWithAuthorAssigned = issueNum;
+ break;
+ } else {
+ issuesWithoutAuthor.push({
+ number: issueNum,
+ assignees: assigneeLogins
+ });
+ }
+ } catch (error) {
+ console.log(` Issue #${issueNum} not found or inaccessible`);
+ }
+ }
+
+ if (!issueWithAuthorAssigned) {
+ const issueList = issuesWithoutAuthor.map(i =>
+ `#${i.number} (assignees: ${i.assignees.length > 0 ? i.assignees.join(', ') : 'none'})`
+ ).join(', ');
+
+ console.log(` ❌ PR author not assigned to any linked issue - closing PR`);
+
+ const message = `## PR Closed - Requirements Not Met
+
+ This PR has been automatically closed because it doesn't meet the requirements.
+
+ **PR Author:** @${prAuthor}
+ **Found issues:** ${issueList}
+ **Problem:** The PR author must be assigned to the linked issue.
+
+ **To fix:**
+ 1. Assign yourself (@${prAuthor}) to one of the linked issues
+ 2. Re-open this PR`;
+
+ await github.rest.issues.createComment({
+ owner: context.repo.owner,
+ repo: context.repo.repo,
+ issue_number: prNumber,
+ body: message,
+ });
+
+ await github.rest.pulls.update({
+ owner: context.repo.owner,
+ repo: context.repo.repo,
+ pull_number: prNumber,
+ state: 'closed',
+ });
+ } else {
+ console.log(` ✅ PR requirements met! Issue #${issueWithAuthorAssigned} has ${prAuthor} as assignee.`);
+ }
+ }
+
+ console.log('\nBackfill complete!');
diff --git a/.github/workflows/pr-requirements.yml b/.github/workflows/pr-requirements.yml
new file mode 100644
index 0000000000..0b4be8cf4a
--- /dev/null
+++ b/.github/workflows/pr-requirements.yml
@@ -0,0 +1,175 @@
+name: PR Requirements Check
+
+on:
+ pull_request_target:
+ types: [opened, reopened, edited, synchronize]
+
+jobs:
+ check-requirements:
+ runs-on: ubuntu-latest
+ permissions:
+ pull-requests: write
+ issues: write
+
+ steps:
+ - name: Check PR has linked issue with assignee
+ uses: actions/github-script@v7
+ with:
+ script: |
+ const pr = context.payload.pull_request;
+ const prNumber = pr.number;
+ const prBody = pr.body || '';
+ const prTitle = pr.title || '';
+ const prLabels = (pr.labels || []).map(l => l.name);
+
+ // Allow micro-fix and documentation PRs without a linked issue
+ const isMicroFix = prLabels.includes('micro-fix') || /micro-fix/i.test(prTitle);
+ const isDocumentation = prLabels.includes('documentation') || /\bdocs?\b/i.test(prTitle);
+ if (isMicroFix || isDocumentation) {
+ const reason = isMicroFix ? 'micro-fix' : 'documentation';
+ console.log(`PR #${prNumber} is a ${reason}, skipping issue requirement.`);
+ return;
+ }
+
+ // Extract issue numbers from body and title
+ // Matches: fixes #123, closes #123, resolves #123, or plain #123
+ const issuePattern = /(?:close[sd]?|fix(?:e[sd])?|resolve[sd]?)?\s*#(\d+)/gi;
+
+ const allText = `${prTitle} ${prBody}`;
+ const matches = [...allText.matchAll(issuePattern)];
+ const issueNumbers = [...new Set(matches.map(m => parseInt(m[1], 10)))];
+
+ console.log(`PR #${prNumber}:`);
+ console.log(` Found issue references: ${issueNumbers.length > 0 ? issueNumbers.join(', ') : 'none'}`);
+
+ if (issueNumbers.length === 0) {
+ const message = `## PR Closed - Requirements Not Met
+
+ This PR has been automatically closed because it doesn't meet the requirements.
+
+ **Missing:** No linked issue found.
+
+ **To fix:**
+ 1. Create or find an existing issue for this work
+ 2. Assign yourself to the issue
+ 3. Re-open this PR and add \`Fixes #123\` in the description
+
+ **Exception:** To bypass this requirement, you can:
+ - Add the \`micro-fix\` label or include \`micro-fix\` in your PR title for trivial fixes
+ - Add the \`documentation\` label or include \`doc\`/\`docs\` in your PR title for documentation changes
+
+ **Why is this required?** See #472 for details.`;
+
+ const comments = await github.rest.issues.listComments({
+ owner: context.repo.owner,
+ repo: context.repo.repo,
+ issue_number: prNumber,
+ });
+
+ const botComment = comments.data.find(
+ (c) => c.user.type === 'Bot' && c.body.includes('PR Closed - Requirements Not Met')
+ );
+
+ if (!botComment) {
+ await github.rest.issues.createComment({
+ owner: context.repo.owner,
+ repo: context.repo.repo,
+ issue_number: prNumber,
+ body: message,
+ });
+ }
+
+ await github.rest.pulls.update({
+ owner: context.repo.owner,
+ repo: context.repo.repo,
+ pull_number: prNumber,
+ state: 'closed',
+ });
+
+ core.setFailed('PR must reference an issue');
+ return;
+ }
+
+ // Check if any linked issue has the PR author as assignee
+ const prAuthor = pr.user.login;
+ let issueWithAuthorAssigned = null;
+ let issuesWithoutAuthor = [];
+
+ for (const issueNum of issueNumbers) {
+ try {
+ const { data: issue } = await github.rest.issues.get({
+ owner: context.repo.owner,
+ repo: context.repo.repo,
+ issue_number: issueNum,
+ });
+
+ const assigneeLogins = (issue.assignees || []).map(a => a.login);
+ if (assigneeLogins.includes(prAuthor)) {
+ issueWithAuthorAssigned = issueNum;
+ console.log(` Issue #${issueNum} has PR author ${prAuthor} as assignee`);
+ break;
+ } else {
+ issuesWithoutAuthor.push({
+ number: issueNum,
+ assignees: assigneeLogins
+ });
+ console.log(` Issue #${issueNum} assignees: ${assigneeLogins.length > 0 ? assigneeLogins.join(', ') : 'none'} (PR author: ${prAuthor})`);
+ }
+ } catch (error) {
+ console.log(` Issue #${issueNum} not found or inaccessible`);
+ }
+ }
+
+ if (!issueWithAuthorAssigned) {
+ const issueList = issuesWithoutAuthor.map(i =>
+ `#${i.number} (assignees: ${i.assignees.length > 0 ? i.assignees.join(', ') : 'none'})`
+ ).join(', ');
+
+ const message = `## PR Closed - Requirements Not Met
+
+ This PR has been automatically closed because it doesn't meet the requirements.
+
+ **PR Author:** @${prAuthor}
+ **Found issues:** ${issueList}
+ **Problem:** The PR author must be assigned to the linked issue.
+
+ **To fix:**
+ 1. Assign yourself (@${prAuthor}) to one of the linked issues
+ 2. Re-open this PR
+
+ **Exception:** To bypass this requirement, you can:
+ - Add the \`micro-fix\` label or include \`micro-fix\` in your PR title for trivial fixes
+ - Add the \`documentation\` label or include \`doc\`/\`docs\` in your PR title for documentation changes
+
+ **Why is this required?** See #472 for details.`;
+
+ const comments = await github.rest.issues.listComments({
+ owner: context.repo.owner,
+ repo: context.repo.repo,
+ issue_number: prNumber,
+ });
+
+ const botComment = comments.data.find(
+ (c) => c.user.type === 'Bot' && c.body.includes('PR Closed - Requirements Not Met')
+ );
+
+ if (!botComment) {
+ await github.rest.issues.createComment({
+ owner: context.repo.owner,
+ repo: context.repo.repo,
+ issue_number: prNumber,
+ body: message,
+ });
+ }
+
+ await github.rest.pulls.update({
+ owner: context.repo.owner,
+ repo: context.repo.repo,
+ pull_number: prNumber,
+ state: 'closed',
+ });
+
+ core.setFailed('PR author must be assigned to the linked issue');
+ } else {
+ console.log(`PR requirements met! Issue #${issueWithAuthorAssigned} has ${prAuthor} as assignee.`);
+ }
diff --git a/.gitignore b/.gitignore
index 8be154f4ca..7761552cf1 100644
Binary files a/.gitignore and b/.gitignore differ
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
new file mode 100644
index 0000000000..93f5fa0388
--- /dev/null
+++ b/.pre-commit-config.yaml
@@ -0,0 +1,18 @@
+repos:
+ - repo: https://github.com/astral-sh/ruff-pre-commit
+ rev: v0.8.6
+ hooks:
+ - id: ruff
+ name: ruff lint (core)
+ args: [--fix]
+ files: ^core/
+ - id: ruff
+ name: ruff lint (tools)
+ args: [--fix]
+ files: ^tools/
+ - id: ruff-format
+ name: ruff format (core)
+ files: ^core/
+ - id: ruff-format
+ name: ruff format (tools)
+ files: ^tools/
diff --git a/.vscode/extensions.json b/.vscode/extensions.json
new file mode 100644
index 0000000000..88ae26a180
--- /dev/null
+++ b/.vscode/extensions.json
@@ -0,0 +1,7 @@
+{
+ "recommendations": [
+ "charliermarsh.ruff",
+ "editorconfig.editorconfig",
+ "ms-python.python"
+ ]
+}
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 90a7b86bda..96038df792 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -25,8 +25,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
### Removed
- N/A
+
### Fixed
-- N/A
+- tools: Fixed web_scrape tool attempting to parse non-HTML content (PDF, JSON) as HTML (#487)
### Security
- N/A
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index a83094bf04..02f84ab553 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -6,6 +6,36 @@ Thank you for your interest in contributing to the Aden Agent Framework! This do
By participating in this project, you agree to abide by our [Code of Conduct](CODE_OF_CONDUCT.md).
+## Contributor License Agreement
+
+By submitting a Pull Request, you agree that your contributions will be licensed under the Aden Agent Framework license.
+
+## Issue Assignment Policy
+
+To prevent duplicate work and respect contributors' time, we require issue assignment before submitting PRs.
+
+### How to Claim an Issue
+
+1. **Find an Issue:** Browse existing issues or create a new one
+2. **Claim It:** Leave a comment (e.g., *"I'd like to work on this!"*)
+3. **Wait for Assignment:** A maintainer will assign you within 24 hours
+4. **Submit Your PR:** Once assigned, you're ready to contribute
+
+> **Note:** PRs for unassigned issues may be delayed or closed if someone else was already assigned.
+
+### The 5-Day Momentum Rule
+
+To keep the project moving, issues with **no activity for 5 days** (no PR or status update) will be unassigned. If you need more time, just drop a quick comment!
+
+### Exceptions (No Assignment Needed)
+
+You may submit PRs without prior assignment for:
+- **Documentation:** Fixing typos or clarifying instructions — add the `documentation` label or include `doc`/`docs` in your PR title to bypass the linked issue requirement
+- **Micro-fixes:** Minor tweaks or obvious linting errors — add the `micro-fix` label or include `micro-fix` in your PR title to bypass the linked issue requirement
+- **Small Refactors:** Tiny improvements that don't change core logic
+
+If a high-quality PR is submitted for a "stale" assigned issue (no activity for 7+ days), we may proceed with the submitted code.
+
## Getting Started
1. Fork the repository
@@ -29,6 +59,12 @@ python -c "import framework; import aden_tools; print('✓ Setup complete')"
./quickstart.sh
```
+> **Windows Users:**
+> If you are on native Windows, it is recommended to use **WSL (Windows Subsystem for Linux)**.
+> Alternatively, make sure to run PowerShell or Git Bash with Python 3.11+ installed, and disable "App Execution Aliases" in Windows settings.
+
+> **Tip:** Installing Claude Code skills is optional for running existing agents, but required if you plan to **build new agents**.
+
## Commit Convention
We follow [Conventional Commits](https://www.conventionalcommits.org/):
@@ -59,11 +95,12 @@ docs(readme): update installation instructions
## Pull Request Process
-1. Update documentation if needed
-2. Add tests for new functionality
-3. Ensure all tests pass
-4. Update the CHANGELOG.md if applicable
-5. Request review from maintainers
+1. **Get assigned to the issue first** (see [Issue Assignment Policy](#issue-assignment-policy))
+2. Update documentation if needed
+3. Add tests for new functionality
+4. Ensure all tests pass
+5. Update the CHANGELOG.md if applicable
+6. Request review from maintainers
### PR Title Format
@@ -92,6 +129,12 @@ feat(component): add new feature description
## Testing
+> **Note:** When testing agents in `exports/`, always set PYTHONPATH:
+>
+> ```bash
+> PYTHONPATH=core:exports python -m agent_name test
+> ```
+
```bash
# Run all tests for the framework
cd core && python -m pytest
@@ -107,4 +150,4 @@ PYTHONPATH=core:exports python -m agent_name test
Feel free to open an issue for questions or join our [Discord community](https://discord.com/invite/MXE49hrKDk).
-Thank you for contributing!
+Thank you for contributing!
\ No newline at end of file
diff --git a/DEVELOPER.md b/DEVELOPER.md
index 862d9b8a9e..3f3a049ad0 100644
--- a/DEVELOPER.md
+++ b/DEVELOPER.md
@@ -213,7 +213,7 @@ The fastest way to build agents is using the Claude Code skills:
./quickstart.sh
# Build a new agent
-claude> /building-agents
+claude> /building-agents-construction
# Test the agent
claude> /testing-agent
@@ -224,7 +224,7 @@ claude> /testing-agent
1. **Define Your Goal**
```
- claude> /building-agents
+ claude> /building-agents-construction
Enter goal: "Build an agent that processes customer support tickets"
```
@@ -594,12 +594,13 @@ pip install -e .
```bash
# Option 1: Use Claude Code skill (recommended)
-claude> /building-agents
+claude> /building-agents-construction
-# Option 2: Copy from example
-cp -r exports/support_ticket_agent exports/my_new_agent
+# Option 2: Create manually
+# Note: exports/ is initially empty (gitignored). Create your agent directory:
+mkdir -p exports/my_new_agent
cd exports/my_new_agent
-# Edit agent.json, tools.py, README.md
+# Create agent.json, tools.py, README.md (see Agent Package Structure below)
# Option 3: Use the agent builder MCP tools (advanced)
# See core/MCP_BUILDER_TOOLS_GUIDE.md
diff --git a/ENVIRONMENT_SETUP.md b/ENVIRONMENT_SETUP.md
index 8e1cb30d36..47b084e701 100644
--- a/ENVIRONMENT_SETUP.md
+++ b/ENVIRONMENT_SETUP.md
@@ -9,6 +9,10 @@ Complete setup guide for building and running goal-driven agents with the Aden A
./scripts/setup-python.sh
```
+> **Note for Windows Users:**
+> Running the setup script on native Windows shells (PowerShell / Git Bash) may sometimes fail due to Python App Execution Aliases.
+> It is **strongly recommended to use WSL (Windows Subsystem for Linux)** for a smoother setup experience.
+
This will:
- Check Python version (requires 3.11+)
@@ -50,6 +54,9 @@ python -c "import aden_tools; print('✓ aden_tools OK')"
python -c "import litellm; print('✓ litellm OK')"
```
+> **Windows Tip:**
+> On Windows, if the verification commands fail, ensure you are running them in **WSL** or after **disabling Python App Execution Aliases** in Windows Settings → Apps → App Execution Aliases.
+
## Requirements
### Python Version
@@ -63,6 +70,7 @@ python -c "import litellm; print('✓ litellm OK')"
- pip (latest version)
- 2GB+ RAM
- Internet connection (for LLM API calls)
+- For Windows users: WSL 2 is recommended for full compatibility.
### API Keys (Optional)
@@ -132,7 +140,7 @@ This installs:
### 2. Build an Agent
```
-claude> /building-agents
+claude> /building-agents-construction
```
Follow the prompts to:
@@ -152,6 +160,31 @@ Creates comprehensive test suites for your agent.
## Troubleshooting
+### "externally-managed-environment" error (PEP 668)
+
+**Cause:** Python 3.12+ on macOS/Homebrew, WSL, or some Linux distros prevents system-wide pip installs.
+
+**Solution:** Create and use a virtual environment:
+
+```bash
+# Create virtual environment
+python3 -m venv .venv
+
+# Activate it
+source .venv/bin/activate # macOS/Linux
+# .venv\Scripts\activate # Windows
+
+# Then run setup
+./scripts/setup-python.sh
+```
+
+Always activate the venv before running agents:
+
+```bash
+source .venv/bin/activate
+PYTHONPATH=core:exports python -m your_agent_name demo
+```
+
### "ModuleNotFoundError: No module named 'framework'"
**Solution:** Install the core package:
@@ -188,7 +221,7 @@ pip install --upgrade "openai>=1.0.0"
**Cause:** Not running from project root or missing PYTHONPATH
-**Solution:** Ensure you're in `/home/timothy/oss/hive/` and use:
+**Solution:** Ensure you're in the project root directory and use:
```bash
PYTHONPATH=core:exports python -m support_ticket_agent validate
@@ -256,7 +289,7 @@ This design allows agents in `exports/` to be:
### 2. Build Agent (Claude Code)
```
-claude> /building-agents
+claude> /building-agents-construction
Enter goal: "Build an agent that processes customer support tickets"
```
@@ -343,4 +376,4 @@ When contributing agent packages:
- **Issues:** https://github.com/adenhq/hive/issues
- **Discord:** https://discord.com/invite/MXE49hrKDk
-- **Documentation:** https://docs.adenhq.com/
+- **Documentation:** https://docs.adenhq.com/
\ No newline at end of file
diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000000..1ad3a08a75
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,26 @@
+.PHONY: lint format check test install-hooks help
+
+help: ## Show this help
+ @grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | \
+ awk 'BEGIN {FS = ":.*?## "}; {printf " \033[36m%-15s\033[0m %s\n", $$1, $$2}'
+
+lint: ## Run ruff linter (with auto-fix)
+ cd core && ruff check --fix .
+ cd tools && ruff check --fix .
+
+format: ## Run ruff formatter
+ cd core && ruff format .
+ cd tools && ruff format .
+
+check: ## Run all checks without modifying files (CI-safe)
+ cd core && ruff check .
+ cd tools && ruff check .
+ cd core && ruff format --check .
+ cd tools && ruff format --check .
+
+test: ## Run all tests
+ cd core && python -m pytest tests/ -v
+
+install-hooks: ## Install pre-commit hooks
+ pip install pre-commit
+ pre-commit install
diff --git a/README.es.md b/README.es.md
index 0ebf5aa5ea..e7d50b0d52 100644
--- a/README.es.md
+++ b/README.es.md
@@ -8,7 +8,8 @@
Español |
Português |
日本語 |
- Русский
+ Русский |
+ 한국어
[](https://github.com/adenhq/hive/blob/main/LICENSE)
@@ -90,7 +91,7 @@ Esto instala:
./quickstart.sh
# Construir un agente usando Claude Code
-claude> /building-agents
+claude> /building-agents-construction
# Probar tu agente
claude> /testing-agent
@@ -236,7 +237,7 @@ Para construir y ejecutar agentes orientados a objetivos con el framework:
# - Todas las dependencias
# Construir nuevos agentes usando habilidades de Claude Code
-claude> /building-agents
+claude> /building-agents-construction
# Probar agentes
claude> /testing-agent
@@ -288,11 +289,14 @@ Usamos [Discord](https://discord.com/invite/MXE49hrKDk) para soporte, solicitude
¡Damos la bienvenida a las contribuciones! Por favor consulta [CONTRIBUTING.md](CONTRIBUTING.md) para las directrices.
-1. Haz fork del repositorio
-2. Crea tu rama de funcionalidad (`git checkout -b feature/amazing-feature`)
-3. Haz commit de tus cambios (`git commit -m 'Add amazing feature'`)
-4. Haz push a la rama (`git push origin feature/amazing-feature`)
-5. Abre un Pull Request
+**Importante:** Por favor, solicita que se te asigne un issue antes de enviar un PR. Comenta en el issue para reclamarlo y un mantenedor te lo asignará en 24 horas. Esto ayuda a evitar trabajo duplicado.
+
+1. Encuentra o crea un issue y solicita asignación
+2. Haz fork del repositorio
+3. Crea tu rama de funcionalidad (`git checkout -b feature/amazing-feature`)
+4. Haz commit de tus cambios (`git commit -m 'Add amazing feature'`)
+5. Haz push a la rama (`git push origin feature/amazing-feature`)
+6. Abre un Pull Request
## Únete a Nuestro Equipo
diff --git a/README.ja.md b/README.ja.md
index 12e095086a..5170470374 100644
--- a/README.ja.md
+++ b/README.ja.md
@@ -8,7 +8,8 @@
Español |
Português |
日本語 |
- Русский
+ Русский |
+ 한국어
[](https://github.com/adenhq/hive/blob/main/LICENSE)
@@ -90,7 +91,7 @@ cd hive
./quickstart.sh
# Claude Codeを使用してエージェントを構築
-claude> /building-agents
+claude> /building-agents-construction
# エージェントをテスト
claude> /testing-agent
@@ -236,7 +237,7 @@ hive/
# - すべての依存関係
# Claude Codeスキルを使用して新しいエージェントを構築
-claude> /building-agents
+claude> /building-agents-construction
# エージェントをテスト
claude> /testing-agent
@@ -288,11 +289,14 @@ timeline
貢献を歓迎します!ガイドラインについては[CONTRIBUTING.md](CONTRIBUTING.md)をご覧ください。
-1. リポジトリをフォーク
-2. 機能ブランチを作成 (`git checkout -b feature/amazing-feature`)
-3. 変更をコミット (`git commit -m 'Add amazing feature'`)
-4. ブランチにプッシュ (`git push origin feature/amazing-feature`)
-5. プルリクエストを開く
+**重要:** PRを提出する前に、まずIssueにアサインされてください。Issueにコメントして担当を申請すると、メンテナーが24時間以内にアサインします。これにより重複作業を防ぐことができます。
+
+1. Issueを見つけるか作成し、アサインを受ける
+2. リポジトリをフォーク
+3. 機能ブランチを作成 (`git checkout -b feature/amazing-feature`)
+4. 変更をコミット (`git commit -m 'Add amazing feature'`)
+5. ブランチにプッシュ (`git push origin feature/amazing-feature`)
+6. プルリクエストを開く
## チームに参加
diff --git a/README.ko.md b/README.ko.md
new file mode 100644
index 0000000000..2c67e8d860
--- /dev/null
+++ b/README.ko.md
@@ -0,0 +1,397 @@
+
+
+
+
+
+ English |
+ 简体中文 |
+ Español |
+ Português |
+ 日本語 |
+ Русский |
+ 한국어
+
+
+[](https://github.com/adenhq/hive/blob/main/LICENSE)
+[](https://www.ycombinator.com/companies/aden)
+[](https://hub.docker.com/u/adenhq)
+[](https://discord.com/invite/MXE49hrKDk)
+[](https://x.com/aden_hq)
+[](https://www.linkedin.com/company/teamaden/)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+## 개요
+
+워크플로우를 하드코딩할 필요 없이 안정적이고 자체 개선 기능을 갖춘 AI 에이전트를 구축하세요. 코딩 에이전트와의 대화를 통해 목표를 정의하면, 프레임워크가 동적으로 생성된 연결 코드로 구성된 노드 그래프를 자동으로 생성합니다. 문제가 발생하면 프레임워크는 실패 데이터를 수집하고, 코딩 에이전트를 통해 에이전트를 진화시킨 뒤 다시 배포합니다. 사람이 개입할 수 있는(human-in-the-loop) 노드, 자격 증명 관리, 실시간 모니터링 기능이 기본으로 제공되어, 유연성을 유지하면서도 제어권을 잃지 않도록 합니다.
+
+자세한 문서, 예제, 가이드는 [adenhq.com](https://adenhq.com)에서 확인할 수 있습니다.
+
+## Aden이란 무엇인가
+
+
+
+
+
+Aden은 AI 에이전트를 구축, 배포, 운영, 적응시키기 위한 플랫폼입니다:
+
+- **Build** - 코딩 에이전트가 자연어로 정의된 목표를 기반으로 특화된 워커 에이전트(Sales, Marketing, Ops 등)를 생성
+- **Deploy** - CI/CD 통합과 전체 API 라이프사이클 관리를 포함한 헤드리스 배포 지원
+- **Operate** - 실시간 모니터링, 관측성(observability), 런타임 가드레일을 통해 에이전트를 안정적으로 유지
+- **Adapt** - 지속적인 평가, 감독, 적응 과정을 통해 에이전트가 시간이 지날수록 개선되도록 보장
+- **Infra** - 공유 메모리, LLM 연동, 도구, 스킬 등 모든 에이전트를 구동하는 인프라 제공
+
+## Quick Links
+
+- **[문서](https://docs.adenhq.com/)** - 전체 가이드와 API 레퍼런스
+- **[셀프 호스팅 가이드](https://docs.adenhq.com/getting-started/quickstart)** - 자체 인프라에 Hive 배포하기
+- **[변경 사항(Changelog)](https://github.com/adenhq/hive/releases)** - 최신 업데이트 및 릴리스 내역
+
+- **[이슈 신고](https://github.com/adenhq/hive/issues)** - 버그 리포트 및 기능 요청
+
+## 빠른 시작
+
+### 사전 요구 사항
+
+- 에이전트 개발을 위한 [Python 3.11+](https://www.python.org/downloads/)
+- 컨테이너 기반 도구 사용 시 선택 사항: [Docker](https://docs.docker.com/get-docker/) (v20.10+)
+
+### 설치
+
+```bash
+# 저장소 클론
+git clone https://github.com/adenhq/hive.git
+cd hive
+
+# Python 환경 설정 실행
+./scripts/setup-python.sh
+```
+
+다음 요소들이 설치됩니다:
+- **framework** - 핵심 에이전트 런타임 및 그래프 실행기
+- **aden_tools** - 에이전트 기능을 위한 19개의 MCP 도구
+- 필요한 모든 의존성
+
+### 첫 번째 에이전트 만들기
+
+```bash
+# Claude Code 스킬 설치 (최소 1회)
+./quickstart.sh
+
+# Claude Code를 사용해 에이전트 빌드
+claude> /building-agents
+
+# 에이전트 테스트
+claude> /testing-agent
+
+# 에이전트 실행
+PYTHONPATH=core:exports python -m your_agent_name run --input '{...}'
+```
+
+**[📖 전체 설정 가이드](ENVIRONMENT_SETUP.md)** - 에이전트 개발을 위한 상세한 설명
+
+## 주요 기능
+
+- **목표 기반 개발** - 자연어로 목표를 정의하면, 코딩 에이전트가 이를 달성하기 위한 에이전트 그래프와 연결 코드를 생성
+- **자기 적응형 에이전트** - 프레임워크가 실패를 수집하고, 목표를 갱신하며, 에이전트 그래프를 업데이트
+- **동적 노드 연결** - 사전에 정의된 엣지 없어. 목표에 따라 어떤 역량을 갖춘 LLM이든 연결 코드를 생성
+- **SDK 래핑 노드** - 모든 노드는 기본적으로 공유 메모리, 로컬 RLM 메모리, 모니터링, 도구, LLM 접근 권한 제공
+- **사람 개입형(Human-in-the-Loop)** - 실행을 일시 중지하고 사람의 입력을 받는 개입 노드 제공 (타입아웃 및 에스컬레이션 설정 가능)
+- **실시간 관측성** - WebSocket 스트리밍을 통해 에이전트 실행, 의사결정, 노드 간 통신을 실시간으로 모니터링
+- **비용 및 예산 제어** - 지출 한도, 호출 제한, 자동 모델 다운그레이드 정책 설정 가능
+- **프로덕션 대응** - 셀프 호스팅 가능하며, 확장성과 안정성을 고려해 설계됨
+
+## 왜 Aden인가
+
+기존의 에이전트 프레임워크는 워크플로를 직접 설계하고, 에이전트 간 상호작용을 정의하며, 실패를 사후적으로 처리해야 합니다. Aden은 이 패러다임을 뒤집어 — **결과만 설명하면, 시스템이 스스로를 구축합니다.**
+
+```mermaid
+flowchart LR
+ subgraph BUILD["🏗️ BUILD"]
+ GOAL["Define Goal
+ Success Criteria"] --> NODES["Add Nodes
LLM/Router/Function"]
+ NODES --> EDGES["Connect Edges
on_success/failure/conditional"]
+ EDGES --> TEST["Test & Validate"] --> APPROVE["Approve & Export"]
+ end
+
+ subgraph EXPORT["📦 EXPORT"]
+ direction TB
+ JSON["agent.json
(GraphSpec)"]
+ TOOLS["tools.py
(Functions)"]
+ MCP["mcp_servers.json
(Integrations)"]
+ end
+
+ subgraph RUN["🚀 RUNTIME"]
+ LOAD["AgentRunner
Load + Parse"] --> SETUP["Setup Runtime
+ ToolRegistry"]
+ SETUP --> EXEC["GraphExecutor
Execute Nodes"]
+
+ subgraph DECISION["Decision Recording"]
+ DEC1["runtime.decide()
intent → options → choice"]
+ DEC2["runtime.record_outcome()
success, result, metrics"]
+ end
+ end
+
+ subgraph INFRA["⚙️ INFRASTRUCTURE"]
+ CTX["NodeContext
memory • llm • tools"]
+ STORE[("FileStorage
Runs & Decisions")]
+ end
+
+ APPROVE --> EXPORT
+ EXPORT --> LOAD
+ EXEC --> DECISION
+ EXEC --> CTX
+ DECISION --> STORE
+ STORE -.->|"Analyze & Improve"| NODES
+
+ style BUILD fill:#ffbe42,stroke:#cc5d00,stroke-width:3px,color:#333
+ style EXPORT fill:#fff59d,stroke:#ed8c00,stroke-width:2px,color:#333
+ style RUN fill:#ffb100,stroke:#cc5d00,stroke-width:3px,color:#333
+ style DECISION fill:#ffcc80,stroke:#ed8c00,stroke-width:2px,color:#333
+ style INFRA fill:#e8763d,stroke:#cc5d00,stroke-width:3px,color:#fff
+ style STORE fill:#ed8c00,stroke:#cc5d00,stroke-width:2px,color:#fff
+```
+
+### Aden의 강점
+
+| 기존 프레임워크 | Aden |
+| -------------- |---------------------|
+| 에이전트 워크플로 하드코딩 | 자연어로 목표를 설명 |
+| 수동 그래프 정의 | 에이전트 그래프 자동 생성 |
+| 사후 대응식 에러 처리 | 선제적 자기 진화 |
+| 정적인 도구 설정 | 동적인 SDK 래핑 노드 |
+| 별도의 모니터링 구성 | 내장된 실시간 관측성 |
+| 수동 예산 관리 | 비용 제어 및 모델 다운그레이드 통합 |
+
+### 작동 방식
+
+1. **목표 정의** → 달성하고 싶은 결과를 평범한 영어 문장으로 설명
+2. **코딩 에이전트 생성** → 에이전트 그래프, 연결 코드, 테스트 케이스를 생성
+3. **워커 실행** → SDK로 래핑된 노드가 완전한 관측성과 도구 접근 권한을 갖고 실행
+4. **컨트롤 플레인 모니터링** → 실시간 메트릭, 예산 집행, 정책 관리
+5. **자기 개선** → 실패 시 그래프를 진화시키고 자동으로 재배포
+
+## How Aden Compares
+
+Aden은 에이전트 개발에 대해 근본적으로 다른 접근 방식을 취합니다. 대부분의 프레임워크가 워크플로를 하드코딩하거나 에이전트 그래프를 수동으로 정의하도록 요구하는 반면, Aden은 **코딩 에이전트를 사용해 자연어 목표로부터 전체 에이전트 시스템을 생성**합니다. 에이전트가 실패했을 때도 단순히 에러를 기록하는 데서 끝나지 않고, **에이전트 그래프를 자동으로 진화시킨 뒤 다시 배포**합니다.
+
+### 비교 표
+
+| 프레임워크 | 분류 | 접근 방식 | Aden의 차별점 |
+| ----------------------------------- | --------------- | ---------------------------------------------- | ----------------------------- |
+| **LangChain, LlamaIndex, Haystack** | 컴포넌트 라이브러리 | RAG/LLM 앱용 사전 정의 컴포넌트, 수동 연결 로직 | 전체 그래프와 연결 코드를 처음부터 자동 생성 |
+| **CrewAI, AutoGen, Swarm** | 멀티 에이전트 오케스트레이션 | 역할 기반 에이전트와 사전 정의된 협업 패턴 | 동적으로 에이전트/연결 생성, 실패 시 적응 |
+| **PydanticAI, Mastra, Agno** | 타입 안전 프레임워크 | 알려진 워크플로를 위한 구조화된 출력 및 검증 | 반복을 통해 구조가 형성되는 진화형 워크플로 |
+| **Agent Zero, Letta** | 개인 AI 어시스턴트 | 메모리와 학습 중심, OS-as-tool 또는 상태 기반 메모리 | 자기 복구가 가능한 프로덕션용 멀티 에이전트 시스템 |
+| **CAMEL** | 연구용 프레임워크 | 대규모 시뮬레이션에서의 창발적 행동 연구 (최대 100만 에이전트) | 신뢰 가능한 실행과 복구를 중시한 프로덕션 지향 |
+| **TEN Framework, Genkit** | 인프라 프레임워크 | 실시간 멀티모달(TEN) 또는 풀스택 AI(Genkit) | 더 높은 추상화 수준에서 에이전트 로직 생성 및 진화 |
+| **GPT Engineer, Motia** | 코드 생성 | 명세 기반 코드 생성(GPT Engineer) 또는 Step 프리미티브(Motia) | 자동 실패 복구가 포함된 자기 적응형 그래프 |
+| **Trading Agents** | 도메인 특화 | LangGraph 기반, 트레이딩 회사 역할을 하드코딩 | 도메인 독립적, 모든 사용 사례에 맞는 구조 생성 |
+
+### Aden을 선택해야 할 때
+
+다음이 필요하다면 Aden을 선택:
+
+- 수동 개입 없이 **실패로부터 스스로 개선되는 에이전트**
+- 워크플로가 아닌 **결과 중심의 목표 기반 개발**
+- 자동 복구와 재배포를 포함한 **프로덕션 수준의 안정성**
+- 코드를 다시 쓰지 않고도 가능한 **빠른 에이전트 구조 반복**
+- 실시간 모니터링과 사람 개입이 가능한 **완전한 관측성**
+
+다음이 목적이라면 다른 프레임워크가 더 적합:
+
+- **타입 안전하고 예측 가능한 워크플로** (PydanticAI, Mastra)
+- **RAG 및 문서 처리** (LlamaIndex, Haystack)
+- **에이전트 창발성 연구** (CAMEL)
+- **실시간 음성·멀티모달 처리** (TEN Framework)
+- **단순한 컴포넌트 체이닝** (LangChain, Swarm)
+
+## Project Structure
+
+```
+hive/
+├── core/ # 핵심 프레임워크 – 에이전트 런타임, 그래프 실행기, 프로토콜
+├── tools/ # MCP 도구 패키지 – 에이전트 기능을 위한 19개 도구
+├── exports/ # 에이전트 패키지 – 사전 제작된 에이전트 및 예제
+├── docs/ # 문서 및 가이드
+├── scripts/ # 빌드 및 유틸리티 스크립트
+├── .claude/ # 에이전트 생성을 위한 Claude Code 스킬
+├── ENVIRONMENT_SETUP.md # 에이전트 개발을 위한 Python 환경 설정 가이드
+├── DEVELOPER.md # 개발자 가이드
+├── CONTRIBUTING.md # 기여 가이드라인
+└── ROADMAP.md # 제품 로드맵
+```
+
+## 개발
+
+### Python 에이전트 개발
+
+프레임워크를 사용해 목표 기반 에이전트를 구축하고 실행하기 위한 절차입니다:
+
+```bash
+# 최초 1회 설정
+./scripts/setup-python.sh
+
+# 다음 항목들이 설치됨:
+# - framework 패키지 (핵심 런타임)
+# - aden_tools 패키지 (19개의 MCP 도구)
+# - 모든 의존성
+
+# Claude Code 스킬을 사용해 새 에이전트 생성
+claude> /building-agents
+
+# 에이전트 테스트
+claude> /testing-agent
+
+# 에이전트 실행
+PYTHONPATH=core:exports python -m agent_name run --input '{...}'
+```
+
+전체 설정 방법은 [ENVIRONMENT_SETUP.md](ENVIRONMENT_SETUP.md) 를 참고하세요.
+
+## 문서
+
+- **[개발자 가이드](DEVELOPER.md)** - 개발자를 위한 종합 가이드
+- [시작하기](docs/getting-started.md) - 빠른 설정 방법
+- [설정 가이드](docs/configuration.md) - 모든 설정 옵션 안내
+- [아키텍처 개요](docs/architecture.md) - 시스템 설계 및 구조
+
+## 로드맵
+
+Aden Agent Framework는 개발자가 결과 중심(outcome-oriented) 이며 자기 적응형(self-adaptive) 에이전트를 구축할 수 있도록 돕는 것을 목표로 합니다.
+자세한 로드맵은 아래 문서에서 확인할 수 있습니다.
+
+[ROADMAP.md](ROADMAP.md)
+
+```mermaid
+timeline
+ title Aden Agent Framework Roadmap
+ section Foundation
+ Architecture : Node-Based Architecture : Python SDK : LLM Integration (OpenAI, Anthropic, Google) : Communication Protocol
+ Coding Agent : Goal Creation Session : Worker Agent Creation : MCP Tools Integration
+ Worker Agent : Human-in-the-Loop : Callback Handlers : Intervention Points : Streaming Interface
+ Tools : File Use : Memory (STM/LTM) : Web Search : Web Scraper : Audit Trail
+ Core : Eval System : Pydantic Validation : Docker Deployment : Documentation : Sample Agents
+ section Expansion
+ Intelligence : Guardrails : Streaming Mode : Semantic Search
+ Platform : JavaScript SDK : Custom Tool Integrator : Credential Store
+ Deployment : Self-Hosted : Cloud Services : CI/CD Pipeline
+ Templates : Sales Agent : Marketing Agent : Analytics Agent : Training Agent : Smart Form Agent
+```
+
+## 커뮤니티 및 지원
+
+Aden은 지원, 기능 요청, 커뮤니티 토론을 위해 [Discord](https://discord.com/invite/MXE49hrKDk)를 사용합니다.
+
+- Discord - [커뮤니티 참여하기](https://discord.com/invite/MXE49hrKDk)
+- Twitter/X - [@adenhq](https://x.com/aden_hq)
+- LinkedIn - [회사 페이지](https://www.linkedin.com/company/teamaden/)
+
+## 기여하기
+
+기여를 환영합니다. 기여 가이드라인은 [CONTRIBUTING.md](CONTRIBUTING.md)를 참고해 주세요.
+
+**중요:** PR을 제출하기 전에 먼저 Issue에 할당받으세요. Issue에 댓글을 달아 담당을 요청하면 유지관리자가 24시간 내에 할당해 드립니다. 이는 중복 작업을 방지하는 데 도움이 됩니다.
+
+1. Issue를 찾거나 생성하고 할당받습니다
+2. 저장소를 포크합니다
+3. 기능 브랜치를 생성합니다 (`git checkout -b feature/amazing-feature`)
+4. 변경 사항을 커밋합니다 (`git commit -m 'Add amazing feature'`)
+5. 브랜치에 푸시합니다 (`git push origin feature/amazing-feature`)
+6. Pull Request를 생성합니다
+
+## 팀에 합류하세요
+
+**채용 중입니다!** 엔지니어링, 연구, 그리고 Go-To-Market 분야에서 함께하실 분을 찾고 있습니다.
+
+[채용 공고 보기](https://jobs.adenhq.com/a8cec478-cdbc-473c-bbd4-f4b7027ec193/applicant)
+
+## 보안
+
+보안 관련 문의 사항은 [SECURITY.md](SECURITY.md)를 참고해 주세요.
+
+## 라이선스
+
+본 프로젝트는 Apache License 2.0 하에 배포됩니다. 자세한 내용은 [LICENSE](LICENSE)를 참고해 주세요.
+
+## Frequently Asked Questions (FAQ)
+
+**Q: Aden은 LangChain이나 다른 에이전트 프레임워크에 의존하나요?**
+
+아니요. Aden은 LangChain, CrewAI, 또는 기타 에이전트 프레임워크에 전혀 의존하지 않고 처음부터 새롭게 구축되었습니다. 사전에 정의된 컴포넌트에 의존하는 대신, 에이전트 그래프를 동적으로 생성하도록 설계된 가볍고 유연한 프레임워크입니다.
+
+**Q: Aden은 어떤 LLM 제공자를 지원하나요?**
+
+Aden은 LiteLLM 연동을 통해 100개 이상의 LLM 제공자를 지원합니다. 여기에는 OpenAI(GPT-4, GPT-4o), Anthropic(Claude 모델), Google Gemini, Mistral, Groq 등이 포함됩니다. 적절한 API 키 환경 변수를 설정하고 모델 이름만 지정하면 바로 사용할 수 있습니다.
+
+**Ollama 같은 로컬 AI 모델과 함께 Aden을 사용할 수 있나요?**
+
+네, 가능합니다. Aden은 LiteLLM을 통해 로컬 모델을 지원합니다. `ollama/model-name` 형식(예: `ollama/llama3`, `ollama/mistral`)으로 모델 이름을 지정하고, Ollama가 로컬에서 실행 중이면 됩니다.
+
+**Q: Aden이 다른 에이전트 프레임워크와 다른 점은 무엇인가요?**
+
+Aden은 코딩 에이전트를 사용해 자연어 목표로부터 전체 에이전트 시스템을 생성합니다. 워크플로를 하드코딩하거나 그래프를 수동으로 정의할 필요가 없습니다. 에이전트가 실패하면 프레임워크가 실패 데이터를 자동으로 수집하고, 에이전트 그래프를 진화시킨 뒤 다시 배포합니다. 이러한 자기 개선 루프는 Aden만의 고유한 특징입니다.
+
+**Q: Aden은 오픈소스인가요?**
+
+네. Aden은 Apache License 2.0 하에 배포되는 완전한 오픈소스 프로젝트입니다. 커뮤니티의 기여와 협업을 적극적으로 장려하고 있습니다.
+
+**Q: Aden은 사용자 데이터를 수집하나요?**
+
+Aden은 모니터링과 관측성을 위해 토큰 사용량, 지연 시간 메트릭, 비용 추적과 같은 텔레메트리 데이터를 수집합니다. 프롬프트 및 응답과 같은 콘텐츠 수집은 설정 가능하며, 팀 단위로 격리된 상태로 저장됩니다. 셀프 호스팅 환경에서는 모든 데이터가 사용자의 인프라 내부에만 저장됩니다.
+
+**Q: Aden은 어떤 배포 방식을 지원하나요?**
+
+Aden은 Python 패키지를 통한 셀프 호스팅 배포를 지원합니다. 설치 방법은 [환경 설정 가이드](ENVIRONMENT_SETUP.md)를 참조하세요. 클라우드 배포 옵션과 Kubernetes 대응 설정은 로드맵에 포함되어 있습니다.
+
+**Q: Aden은 복잡한 프로덕션 규모의 사용 사례도 처리할 수 있나요?**
+
+네. Aden은 자동 실패 복구, 실시간 관측성, 비용 제어, 수평 확장 지원 등 프로덕션 환경을 명확히 목표로 설계되었습니다. 단순한 자동화부터 복잡한 멀티 에이전트 워크플로까지 모두 처리할 수 있습니다.
+
+**Q: Aden은 Human-in-the-Loop 워크플로를 지원하나요?**
+
+네. Aden은 사람의 입력을 받기 위해 실행을 일시 중지하는 개입 노드를 통해 Human-in-the-Loop 워크플로를 완전히 지원합니다. 타임아웃과 에스컬레이션 정책을 설정할 수 있어, 인간 전문가와 AI 에이전트 간의 원활한 협업이 가능합니다.
+
+**Q: Aden은 어떤 모니터링 및 디버깅 도구를 제공하나요?**
+
+Aden은 다음과 같은 포괄적인 관측성 기능을 제공합니다. 실시간 에이전트 실행 모니터링을 위한 WebSocket 스트리밍, TimescaleDB 기반의 비용 및 성능 메트릭 분석, Kubernetes 연동을 위한 헬스 체크 엔드포인트, 예산 관리, 에이전트 상태, 정책 제어를 위한 19개의 MCP 도구
+
+**Q: Aden은 어떤 프로그래밍 언어를 지원하나요?**
+
+Aden은 Python과 JavaScript/TypeScript SDK를 모두 제공합니다. Python SDK에는 LangGraph, LangFlow, LiveKit 연동 템플릿이 포함되어 있습니다. 백엔드는 Node.js/TypeScript로 구현되어 있으며, 프론트엔드는 React/TypeScript를 사용합니다.
+
+**Q: Aden 에이전트는 외부 도구나 API와 연동할 수 있나요?**
+
+네. Aden의 SDK로 래핑된 노드는 기본적인 도구 접근 기능을 제공하며, 유연한 도구 생태계를 지원합니다. 노드 아키텍처를 통해 외부 API, 데이터베이스, 다양한 서비스와 연동할 수 있습니다.
+
+**Q: Aden에서 비용 제어는 어떻게 이루어지나요??**
+
+Aden은 지출 한도, 호출 제한, 자동 모델 다운그레이드 정책 등 세밀한 예산 제어 기능을 제공합니다. 팀, 에이전트, 워크플로 단위로 예산을 설정할 수 있으며, 실시간 비용 추적과 알림 기능을 제공합니다.
+
+**Q: 예제와 문서는 어디에서 확인할 수 있나요?**
+
+전체 가이드, API 레퍼런스, 시작 튜토리얼은 [docs.adenhq.com](https://docs.adenhq.com/) 에서 확인하실 수 있습니다. 또한 저장소의 `docs/` 디렉터리와 종합적인 [DEVELOPER.md](DEVELOPER.md) 가이드도 함께 제공됩니다.
+
+**Q: Aden에 기여하려면 어떻게 해야 하나요?**
+
+기여를 환영합니다. 저장소를 포크하고 기능 브랜치를 생성한 뒤 변경 사항을 구현하여 Pull Request를 제출해 주세요. 자세한 내용은 [CONTRIBUTING.md](CONTRIBUTING.md)를 참고해 주세요.
+
+**Q: Aden은 엔터프라이즈 지원을 제공하나요?**
+
+엔터프라이즈 관련 문의는 [adenhq.com](https://adenhq.com)을 통해 Aden 팀에 연락하시거나, 지원을 위해 [Discord community](https://discord.com/invite/MXE49hrKDk)에 참여해 주시기 바랍니다.
+
+---
+
+
+ Made with 🔥 Passion in San Francisco
+
diff --git a/README.md b/README.md
index 932a98bc59..faccb00811 100644
--- a/README.md
+++ b/README.md
@@ -8,7 +8,8 @@
Español |
Português |
日本語 |
- Русский
+ Русский |
+ 한국어
[](https://github.com/adenhq/hive/blob/main/LICENSE)
@@ -90,7 +91,7 @@ This installs:
./quickstart.sh
# Build an agent using Claude Code
-claude> /building-agents
+claude> /building-agents-construction
# Test your agent
claude> /testing-agent
@@ -247,7 +248,7 @@ For building and running goal-driven agents with the framework:
# - All dependencies
# Build new agents using Claude Code skills
-claude> /building-agents
+claude> /building-agents-construction
# Test agents
claude> /testing-agent
@@ -263,7 +264,7 @@ See [ENVIRONMENT_SETUP.md](ENVIRONMENT_SETUP.md) for complete setup instructions
- **[Developer Guide](DEVELOPER.md)** - Comprehensive guide for developers
- [Getting Started](docs/getting-started.md) - Quick setup instructions
- [Configuration Guide](docs/configuration.md) - All configuration options
-- [Architecture Overview](docs/architecture.md) - System design and structure
+- [Architecture Overview](docs/architecture/README.md) - System design and structure
## Roadmap
@@ -299,11 +300,14 @@ We use [Discord](https://discord.com/invite/MXE49hrKDk) for support, feature req
We welcome contributions! Please see [CONTRIBUTING.md](CONTRIBUTING.md) for guidelines.
-1. Fork the repository
-2. Create your feature branch (`git checkout -b feature/amazing-feature`)
-3. Commit your changes (`git commit -m 'Add amazing feature'`)
-4. Push to the branch (`git push origin feature/amazing-feature`)
-5. Open a Pull Request
+**Important:** Please get assigned to an issue before submitting a PR. Comment on an issue to claim it, and a maintainer will assign you within 24 hours. This helps prevent duplicate work.
+
+1. Find or create an issue and get assigned
+2. Fork the repository
+3. Create your feature branch (`git checkout -b feature/amazing-feature`)
+4. Commit your changes (`git commit -m 'Add amazing feature'`)
+5. Push to the branch (`git push origin feature/amazing-feature`)
+6. Open a Pull Request
## Join Our Team
@@ -327,7 +331,7 @@ No. Aden is built from the ground up with no dependencies on LangChain, CrewAI,
**Q: What LLM providers does Aden support?**
-Aden supports 100+ LLM providers through LiteLLM integration, including OpenAI (GPT-4, GPT-4o), Anthropic (Claude models), Google Gemini, Mistral, Groq, and many more. Simply set the appropriate API key environment variable and specify the model name.
+Aden supports 100+ LLM providers through LiteLLM integration, including OpenAI (GPT-4, GPT-4o), Anthropic (Claude models), Google Gemini, DeepSeek, Mistral, Groq, and many more. Simply set the appropriate API key environment variable and specify the model name.
**Q: Can I use Aden with local AI models like Ollama?**
@@ -347,7 +351,7 @@ Aden collects telemetry data for monitoring and observability purposes, includin
**Q: What deployment options does Aden support?**
-Aden supports Docker Compose deployment out of the box, with both production and development configurations. Self-hosted deployments work on any infrastructure supporting Docker. Cloud deployment options and Kubernetes-ready configurations are on the roadmap.
+Aden supports self-hosted deployments via Python packages. See the [Environment Setup Guide](ENVIRONMENT_SETUP.md) for installation instructions. Cloud deployment options and Kubernetes-ready configurations are on the roadmap.
**Q: Can Aden handle complex, production-scale use cases?**
diff --git a/README.pt.md b/README.pt.md
index 6725de43e0..5a4544b2d9 100644
--- a/README.pt.md
+++ b/README.pt.md
@@ -8,7 +8,8 @@
Español |
Português |
日本語 |
- Русский
+ Русский |
+ 한국어
[](https://github.com/adenhq/hive/blob/main/LICENSE)
@@ -90,7 +91,7 @@ Isto instala:
./quickstart.sh
# Construir um agente usando Claude Code
-claude> /building-agents
+claude> /building-agents-construction
# Testar seu agente
claude> /testing-agent
@@ -236,7 +237,7 @@ Para construir e executar agentes orientados a objetivos com o framework:
# - Todas as dependências
# Construir novos agentes usando habilidades Claude Code
-claude> /building-agents
+claude> /building-agents-construction
# Testar agentes
claude> /testing-agent
@@ -288,11 +289,14 @@ Usamos [Discord](https://discord.com/invite/MXE49hrKDk) para suporte, solicitaç
Aceitamos contribuições! Por favor, consulte [CONTRIBUTING.md](CONTRIBUTING.md) para diretrizes.
-1. Faça fork do repositório
-2. Crie sua branch de funcionalidade (`git checkout -b feature/amazing-feature`)
-3. Faça commit das suas alterações (`git commit -m 'Add amazing feature'`)
-4. Faça push para a branch (`git push origin feature/amazing-feature`)
-5. Abra um Pull Request
+**Importante:** Por favor, seja atribuído a uma issue antes de enviar um PR. Comente na issue para reivindicá-la e um mantenedor irá atribuí-la a você em 24 horas. Isso ajuda a evitar trabalho duplicado.
+
+1. Encontre ou crie uma issue e seja atribuído
+2. Faça fork do repositório
+3. Crie sua branch de funcionalidade (`git checkout -b feature/amazing-feature`)
+4. Faça commit das suas alterações (`git commit -m 'Add amazing feature'`)
+5. Faça push para a branch (`git push origin feature/amazing-feature`)
+6. Abra um Pull Request
## Junte-se ao Nosso Time
diff --git a/README.ru.md b/README.ru.md
index 524af454dc..a3fd8497b7 100644
--- a/README.ru.md
+++ b/README.ru.md
@@ -8,7 +8,8 @@
Español |
Português |
日本語 |
- Русский
+ Русский |
+ 한국어
[](https://github.com/adenhq/hive/blob/main/LICENSE)
@@ -90,7 +91,7 @@ cd hive
./quickstart.sh
# Создать агента с помощью Claude Code
-claude> /building-agents
+claude> /building-agents-construction
# Протестировать агента
claude> /testing-agent
@@ -236,7 +237,7 @@ hive/
# - Все зависимости
# Создать новых агентов с помощью навыков Claude Code
-claude> /building-agents
+claude> /building-agents-construction
# Протестировать агентов
claude> /testing-agent
@@ -288,11 +289,14 @@ timeline
Мы приветствуем вклад! Пожалуйста, ознакомьтесь с [CONTRIBUTING.md](CONTRIBUTING.md) для руководств.
-1. Сделайте форк репозитория
-2. Создайте ветку функции (`git checkout -b feature/amazing-feature`)
-3. Зафиксируйте изменения (`git commit -m 'Add amazing feature'`)
-4. Отправьте в ветку (`git push origin feature/amazing-feature`)
-5. Откройте Pull Request
+**Важно:** Пожалуйста, получите назначение на issue перед отправкой PR. Оставьте комментарий в issue, чтобы заявить о своём желании работать над ним, и мейнтейнер назначит вас в течение 24 часов. Это помогает избежать дублирования работы.
+
+1. Найдите или создайте issue и получите назначение
+2. Сделайте форк репозитория
+3. Создайте ветку функции (`git checkout -b feature/amazing-feature`)
+4. Зафиксируйте изменения (`git commit -m 'Add amazing feature'`)
+5. Отправьте в ветку (`git push origin feature/amazing-feature`)
+6. Откройте Pull Request
## Присоединяйтесь к команде
diff --git a/README.zh-CN.md b/README.zh-CN.md
index 5608e199c7..8fa32e3690 100644
--- a/README.zh-CN.md
+++ b/README.zh-CN.md
@@ -8,7 +8,8 @@
Español |
Português |
日本語 |
- Русский
+ Русский |
+ 한국어
[](https://github.com/adenhq/hive/blob/main/LICENSE)
@@ -90,7 +91,7 @@ cd hive
./quickstart.sh
# 使用 Claude Code 构建智能体
-claude> /building-agents
+claude> /building-agents-construction
# 测试您的智能体
claude> /testing-agent
@@ -236,7 +237,7 @@ hive/
# - 所有依赖项
# 使用 Claude Code 技能构建新智能体
-claude> /building-agents
+claude> /building-agents-construction
# 测试智能体
claude> /testing-agent
@@ -288,11 +289,14 @@ timeline
我们欢迎贡献!请参阅 [CONTRIBUTING.md](CONTRIBUTING.md) 了解指南。
-1. Fork 仓库
-2. 创建功能分支 (`git checkout -b feature/amazing-feature`)
-3. 提交更改 (`git commit -m 'Add amazing feature'`)
-4. 推送到分支 (`git push origin feature/amazing-feature`)
-5. 创建 Pull Request
+**重要提示:** 请在提交 PR 之前先认领 Issue。在 Issue 下评论认领,维护者将在 24 小时内分配给您。我们致力于避免重复工作,让大家的努力不被浪费。
+
+1. 找到或创建 Issue 并获得分配
+2. Fork 仓库
+3. 创建功能分支 (`git checkout -b feature/amazing-feature`)
+4. 提交更改 (`git commit -m 'Add amazing feature'`)
+5. 推送到分支 (`git push origin feature/amazing-feature`)
+6. 创建 Pull Request
## 加入我们的团队
diff --git a/ROADMAP.md b/ROADMAP.md
index d5e888b25f..78fb468332 100644
--- a/ROADMAP.md
+++ b/ROADMAP.md
@@ -1,4 +1,4 @@
-Product Roadmap
+# Product Roadmap
Aden Agent Framework aims to help developers build outcome oriented, self-adaptive agents. Please find our roadmap here
diff --git a/core/examples/manual_agent.py b/core/examples/manual_agent.py
new file mode 100644
index 0000000000..49e6687372
--- /dev/null
+++ b/core/examples/manual_agent.py
@@ -0,0 +1,123 @@
+"""
+Minimal Manual Agent Example
+----------------------------
+This example demonstrates how to build and run an agent programmatically
+without using the Claude Code CLI or external LLM APIs.
+
+It uses 'function' nodes to define logic in pure Python, making it perfect
+for understanding the core runtime loop:
+Setup -> Graph definition -> Execution -> Result
+
+Run with:
+ PYTHONPATH=core python core/examples/manual_agent.py
+"""
+
+import asyncio
+
+from framework.graph import EdgeCondition, EdgeSpec, Goal, GraphSpec, NodeSpec
+from framework.graph.executor import GraphExecutor
+from framework.runtime.core import Runtime
+
+
+# 1. Define Node Logic (Pure Python Functions)
+def greet(name: str) -> str:
+ """Generate a simple greeting."""
+ return f"Hello, {name}!"
+
+
+def uppercase(greeting: str) -> str:
+ """Convert text to uppercase."""
+ return greeting.upper()
+
+
+async def main():
+ print("🚀 Setting up Manual Agent...")
+
+ # 2. Define the Goal
+ # Every agent needs a goal with success criteria
+ goal = Goal(
+ id="greet-user",
+ name="Greet User",
+ description="Generate a friendly uppercase greeting",
+ success_criteria=[
+ {
+ "id": "greeting_generated",
+ "description": "Greeting produced",
+ "metric": "custom",
+ "target": "any",
+ }
+ ],
+ )
+
+ # 3. Define Nodes
+ # Nodes describe steps in the process
+ node1 = NodeSpec(
+ id="greeter",
+ name="Greeter",
+ description="Generates a simple greeting",
+ node_type="function",
+ function="greet", # Matches the registered function name
+ input_keys=["name"],
+ output_keys=["greeting"],
+ )
+
+ node2 = NodeSpec(
+ id="uppercaser",
+ name="Uppercaser",
+ description="Converts greeting to uppercase",
+ node_type="function",
+ function="uppercase",
+ input_keys=["greeting"],
+ output_keys=["final_greeting"],
+ )
+
+ # 4. Define Edges
+ # Edges define the flow between nodes
+ edge1 = EdgeSpec(
+ id="greet-to-upper",
+ source="greeter",
+ target="uppercaser",
+ condition=EdgeCondition.ON_SUCCESS,
+ )
+
+ # 5. Create Graph
+ # The graph works like a blueprint connecting nodes and edges
+ graph = GraphSpec(
+ id="greeting-agent",
+ goal_id="greet-user",
+ entry_node="greeter",
+ terminal_nodes=["uppercaser"],
+ nodes=[node1, node2],
+ edges=[edge1],
+ )
+
+ # 6. Initialize Runtime & Executor
+ # Runtime handles state/memory; Executor runs the graph
+ from pathlib import Path
+
+ runtime = Runtime(storage_path=Path("./agent_logs"))
+ executor = GraphExecutor(runtime=runtime)
+
+ # 7. Register Function Implementations
+ # Connect string names in NodeSpecs to actual Python functions
+ executor.register_function("greeter", greet)
+ executor.register_function("uppercaser", uppercase)
+
+ # 8. Execute Agent
+ print("▶ Executing agent with input: name='Alice'...")
+
+ result = await executor.execute(graph=graph, goal=goal, input_data={"name": "Alice"})
+
+ # 9. Verify Results
+ if result.success:
+ print("\n✅ Success!")
+ print(f"Path taken: {' -> '.join(result.path)}")
+ print(f"Final output: {result.output.get('final_greeting')}")
+ else:
+ print(f"\n❌ Failed: {result.error}")
+
+
+if __name__ == "__main__":
+ # Optional: Enable logging to see internal decision flow
+ # logging.basicConfig(level=logging.INFO)
+ asyncio.run(main())
diff --git a/core/examples/mcp_integration_example.py b/core/examples/mcp_integration_example.py
index 53acc5d583..ec7c8440e5 100644
--- a/core/examples/mcp_integration_example.py
+++ b/core/examples/mcp_integration_example.py
@@ -37,9 +37,9 @@ async def example_1_programmatic_registration():
print(f"\nAvailable tools: {list(tools.keys())}")
# Run the agent with MCP tools available
- result = await runner.run({
- "objective": "Search for 'Claude AI' and summarize the top 3 results"
- })
+ result = await runner.run(
+ {"objective": "Search for 'Claude AI' and summarize the top 3 results"}
+ )
print(f"\nAgent result: {result}")
@@ -78,10 +78,8 @@ async def example_3_config_file():
# Copy example config (in practice, you'd place this in your agent folder)
import shutil
- shutil.copy(
- "examples/mcp_servers.json",
- test_agent_path / "mcp_servers.json"
- )
+
+ shutil.copy("examples/mcp_servers.json", test_agent_path / "mcp_servers.json")
# Load agent - MCP servers will be auto-discovered
runner = AgentRunner.load(test_agent_path)
@@ -110,18 +108,14 @@ async def example_4_custom_agent_with_mcp_tools():
builder.set_goal(
goal_id="web-researcher",
name="Web Research Agent",
- description="Search the web and summarize findings"
+ description="Search the web and summarize findings",
)
# Add success criteria
builder.add_success_criterion(
- "search-results",
- "Successfully retrieve at least 3 web search results"
- )
- builder.add_success_criterion(
- "summary",
- "Provide a clear, concise summary of the findings"
+ "search-results", "Successfully retrieve at least 3 web search results"
)
+ builder.add_success_criterion("summary", "Provide a clear, concise summary of the findings")
# Add nodes that will use MCP tools
builder.add_node(
@@ -192,6 +186,7 @@ async def main():
except Exception as e:
print(f"\nError running example: {e}")
import traceback
+
traceback.print_exc()
diff --git a/core/framework/__init__.py b/core/framework/__init__.py
index 4c0088e8a5..4bc274eeaa 100644
--- a/core/framework/__init__.py
+++ b/core/framework/__init__.py
@@ -22,22 +22,22 @@
See `framework.testing` for details.
"""
-from framework.schemas.decision import Decision, Option, Outcome, DecisionEvaluation
-from framework.schemas.run import Run, RunSummary, Problem
-from framework.runtime.core import Runtime
from framework.builder.query import BuilderQuery
-from framework.llm import LLMProvider, AnthropicProvider
-from framework.runner import AgentRunner, AgentOrchestrator
+from framework.llm import AnthropicProvider, LLMProvider
+from framework.runner import AgentOrchestrator, AgentRunner
+from framework.runtime.core import Runtime
+from framework.schemas.decision import Decision, DecisionEvaluation, Option, Outcome
+from framework.schemas.run import Problem, Run, RunSummary
# Testing framework
from framework.testing import (
+ ApprovalStatus,
+ DebugTool,
+ ErrorCategory,
Test,
TestResult,
- TestSuiteResult,
TestStorage,
- ApprovalStatus,
- ErrorCategory,
- DebugTool,
+ TestSuiteResult,
)
__all__ = [
diff --git a/core/framework/builder/__init__.py b/core/framework/builder/__init__.py
index 7a3c4a3e09..5e17b1c526 100644
--- a/core/framework/builder/__init__.py
+++ b/core/framework/builder/__init__.py
@@ -2,12 +2,12 @@
from framework.builder.query import BuilderQuery
from framework.builder.workflow import (
- GraphBuilder,
- BuildSession,
BuildPhase,
- ValidationResult,
+ BuildSession,
+ GraphBuilder,
TestCase,
TestResult,
+ ValidationResult,
)
__all__ = [
diff --git a/core/framework/builder/query.py b/core/framework/builder/query.py
index aeffc98538..1509c59193 100644
--- a/core/framework/builder/query.py
+++ b/core/framework/builder/query.py
@@ -8,12 +8,12 @@
4. What should we change? (suggestions)
"""
-from typing import Any
from collections import defaultdict
from pathlib import Path
+from typing import Any
from framework.schemas.decision import Decision
-from framework.schemas.run import Run, RunSummary, RunStatus
+from framework.schemas.run import Run, RunStatus, RunSummary
from framework.storage.backend import FileStorage
@@ -196,10 +196,7 @@ def analyze_failure(self, run_id: str) -> FailureAnalysis | None:
break
# Extract problems
- problems = [
- f"[{p.severity}] {p.description}"
- for p in run.problems
- ]
+ problems = [f"[{p.severity}] {p.description}" for p in run.problems]
# Generate suggestions based on the failure
suggestions = self._generate_suggestions(run, failed_decisions)
@@ -253,11 +250,7 @@ def find_patterns(self, goal_id: str) -> PatternAnalysis | None:
error = decision.outcome.error or "Unknown error"
failure_counts[error] += 1
- common_failures = sorted(
- failure_counts.items(),
- key=lambda x: x[1],
- reverse=True
- )[:5]
+ common_failures = sorted(failure_counts.items(), key=lambda x: x[1], reverse=True)[:5]
# Find problematic nodes
node_stats: dict[str, dict[str, int]] = defaultdict(lambda: {"total": 0, "failed": 0})
@@ -328,34 +321,45 @@ def suggest_improvements(self, goal_id: str) -> list[dict[str, Any]]:
# Suggestion: Fix problematic nodes
for node_id, failure_rate in patterns.problematic_nodes:
- suggestions.append({
- "type": "node_improvement",
- "target": node_id,
- "reason": f"Node has {failure_rate:.1%} failure rate",
- "recommendation": f"Review and improve node '{node_id}' - high failure rate suggests prompt or tool issues",
- "priority": "high" if failure_rate > 0.3 else "medium",
- })
+ suggestions.append(
+ {
+ "type": "node_improvement",
+ "target": node_id,
+ "reason": f"Node has {failure_rate:.1%} failure rate",
+ "recommendation": (
+ f"Review and improve node '{node_id}' - "
+ "high failure rate suggests prompt or tool issues"
+ ),
+ "priority": "high" if failure_rate > 0.3 else "medium",
+ }
+ )
# Suggestion: Address common failures
for failure, count in patterns.common_failures:
if count >= 2:
- suggestions.append({
- "type": "error_handling",
- "target": failure,
- "reason": f"Error occurred {count} times",
- "recommendation": f"Add handling for: {failure}",
- "priority": "high" if count >= 5 else "medium",
- })
+ suggestions.append(
+ {
+ "type": "error_handling",
+ "target": failure,
+ "reason": f"Error occurred {count} times",
+ "recommendation": f"Add handling for: {failure}",
+ "priority": "high" if count >= 5 else "medium",
+ }
+ )
# Suggestion: Overall success rate
if patterns.success_rate < 0.8:
- suggestions.append({
- "type": "architecture",
- "target": goal_id,
- "reason": f"Goal success rate is only {patterns.success_rate:.1%}",
- "recommendation": "Consider restructuring the agent graph or improving goal definition",
- "priority": "high",
- })
+ suggestions.append(
+ {
+ "type": "architecture",
+ "target": goal_id,
+ "reason": f"Goal success rate is only {patterns.success_rate:.1%}",
+ "recommendation": (
+ "Consider restructuring the agent graph or improving goal definition"
+ ),
+ "priority": "high",
+ }
+ )
return suggestions
@@ -408,21 +412,22 @@ def _generate_suggestions(
alternatives = [o for o in decision.options if o.id != decision.chosen_option_id]
if alternatives:
alt_desc = alternatives[0].description
+ chosen_desc = chosen.description if chosen else "unknown"
suggestions.append(
- f"Consider alternative: '{alt_desc}' instead of '{chosen.description if chosen else 'unknown'}'"
+ f"Consider alternative: '{alt_desc}' instead of '{chosen_desc}'"
)
# Check for missing context
if not decision.input_context:
suggestions.append(
- f"Decision '{decision.intent}' had no input context - ensure relevant data is passed"
+ f"Decision '{decision.intent}' had no input context - "
+ "ensure relevant data is passed"
)
# Check for constraint issues
if decision.active_constraints:
- suggestions.append(
- f"Review constraints: {', '.join(decision.active_constraints)} - may be too restrictive"
- )
+ constraints = ", ".join(decision.active_constraints)
+ suggestions.append(f"Review constraints: {constraints} - may be too restrictive")
# Check for reported problems with suggestions
for problem in run.problems:
@@ -471,15 +476,14 @@ def _find_differences(self, run1: Run, run2: Run) -> list[str]:
# Decision count difference
if len(run1.decisions) != len(run2.decisions):
- differences.append(
- f"Decision count: {len(run1.decisions)} vs {len(run2.decisions)}"
- )
+ differences.append(f"Decision count: {len(run1.decisions)} vs {len(run2.decisions)}")
# Find first divergence point
- for i, (d1, d2) in enumerate(zip(run1.decisions, run2.decisions)):
+ for i, (d1, d2) in enumerate(zip(run1.decisions, run2.decisions, strict=False)):
if d1.chosen_option_id != d2.chosen_option_id:
differences.append(
- f"Diverged at decision {i}: chose '{d1.chosen_option_id}' vs '{d2.chosen_option_id}'"
+ f"Diverged at decision {i}: "
+ f"chose '{d1.chosen_option_id}' vs '{d2.chosen_option_id}'"
)
break
diff --git a/core/framework/builder/workflow.py b/core/framework/builder/workflow.py
index baf1e5b5ac..2c1c0f45c8 100644
--- a/core/framework/builder/workflow.py
+++ b/core/framework/builder/workflow.py
@@ -13,32 +13,35 @@
You cannot skip steps or bypass validation.
"""
+from collections.abc import Callable
+from datetime import datetime
from enum import Enum
from pathlib import Path
-from datetime import datetime
-from typing import Any, Callable
+from typing import Any
from pydantic import BaseModel, Field
+from framework.graph.edge import EdgeCondition, EdgeSpec, GraphSpec
from framework.graph.goal import Goal
from framework.graph.node import NodeSpec
-from framework.graph.edge import EdgeSpec, EdgeCondition, GraphSpec
class BuildPhase(str, Enum):
"""Current phase of the build process."""
- INIT = "init" # Just started
- GOAL_DRAFT = "goal_draft" # Drafting goal
+
+ INIT = "init" # Just started
+ GOAL_DRAFT = "goal_draft" # Drafting goal
GOAL_APPROVED = "goal_approved" # Goal approved
- ADDING_NODES = "adding_nodes" # Adding nodes
- ADDING_EDGES = "adding_edges" # Adding edges
- TESTING = "testing" # Running tests
- APPROVED = "approved" # Fully approved
- EXPORTED = "exported" # Exported to file
+ ADDING_NODES = "adding_nodes" # Adding nodes
+ ADDING_EDGES = "adding_edges" # Adding edges
+ TESTING = "testing" # Running tests
+ APPROVED = "approved" # Fully approved
+ EXPORTED = "exported" # Exported to file
class ValidationResult(BaseModel):
"""Result of a validation check."""
+
valid: bool
errors: list[str] = Field(default_factory=list)
warnings: list[str] = Field(default_factory=list)
@@ -47,6 +50,7 @@ class ValidationResult(BaseModel):
class TestCase(BaseModel):
"""A test case for validating agent behavior."""
+
id: str
description: str
input: dict[str, Any]
@@ -56,6 +60,7 @@ class TestCase(BaseModel):
class TestResult(BaseModel):
"""Result of running a test case."""
+
test_id: str
passed: bool
actual_output: Any = None
@@ -69,6 +74,7 @@ class BuildSession(BaseModel):
Saved after each approved step so you can resume later.
"""
+
id: str
name: str
phase: BuildPhase = BuildPhase.INIT
@@ -457,11 +463,14 @@ def run_test(
# Run the test
import asyncio
- result = asyncio.run(executor.execute(
- graph=graph,
- goal=self.session.goal,
- input_data=test.input,
- ))
+
+ result = asyncio.run(
+ executor.execute(
+ graph=graph,
+ goal=self.session.goal,
+ input_data=test.input,
+ )
+ )
# Check result
passed = result.success
@@ -515,12 +524,14 @@ def approve(self, comment: str) -> bool:
if not self._pending_validation.valid:
return False
- self.session.approvals.append({
- "phase": self.session.phase.value,
- "comment": comment,
- "timestamp": datetime.now().isoformat(),
- "validation": self._pending_validation.model_dump(),
- })
+ self.session.approvals.append(
+ {
+ "phase": self.session.phase.value,
+ "comment": comment,
+ "timestamp": datetime.now().isoformat(),
+ "validation": self._pending_validation.model_dump(),
+ }
+ )
# Advance phase if appropriate
if self.session.phase == BuildPhase.GOAL_DRAFT:
@@ -554,11 +565,13 @@ def final_approve(self, comment: str) -> bool:
return False
self.session.phase = BuildPhase.APPROVED
- self.session.approvals.append({
- "phase": "final",
- "comment": comment,
- "timestamp": datetime.now().isoformat(),
- })
+ self.session.approvals.append(
+ {
+ "phase": "final",
+ "comment": comment,
+ "timestamp": datetime.now().isoformat(),
+ }
+ )
self._save_session()
return True
@@ -630,69 +643,75 @@ def _generate_code(self, graph: GraphSpec) -> str:
"""Generate Python code for the graph."""
lines = [
'"""',
- f'Generated agent: {self.session.name}',
- f'Generated at: {datetime.now().isoformat()}',
+ f"Generated agent: {self.session.name}",
+ f"Generated at: {datetime.now().isoformat()}",
'"""',
- '',
- 'from framework.graph import (',
- ' Goal, SuccessCriterion, Constraint,',
- ' NodeSpec, EdgeSpec, EdgeCondition,',
- ')',
- 'from framework.graph.edge import GraphSpec',
- 'from framework.graph.goal import GoalStatus',
- '',
- '',
- '# Goal',
+ "",
+ "from framework.graph import (",
+ " Goal, SuccessCriterion, Constraint,",
+ " NodeSpec, EdgeSpec, EdgeCondition,",
+ ")",
+ "from framework.graph.edge import GraphSpec",
+ "from framework.graph.goal import GoalStatus",
+ "",
+ "",
+ "# Goal",
]
if self.session.goal:
goal_json = self.session.goal.model_dump_json(indent=4)
- lines.append('GOAL = Goal.model_validate_json(\'\'\'')
+ lines.append("GOAL = Goal.model_validate_json('''")
lines.append(goal_json)
lines.append("''')")
else:
- lines.append('GOAL = None')
+ lines.append("GOAL = None")
- lines.extend([
- '',
- '',
- '# Nodes',
- 'NODES = [',
- ])
+ lines.extend(
+ [
+ "",
+ "",
+ "# Nodes",
+ "NODES = [",
+ ]
+ )
for node in self.session.nodes:
node_json = node.model_dump_json(indent=4)
- lines.append(' NodeSpec.model_validate_json(\'\'\'')
+ lines.append(" NodeSpec.model_validate_json('''")
lines.append(node_json)
lines.append(" '''),")
- lines.extend([
- ']',
- '',
- '',
- '# Edges',
- 'EDGES = [',
- ])
+ lines.extend(
+ [
+ "]",
+ "",
+ "",
+ "# Edges",
+ "EDGES = [",
+ ]
+ )
for edge in self.session.edges:
edge_json = edge.model_dump_json(indent=4)
- lines.append(' EdgeSpec.model_validate_json(\'\'\'')
+ lines.append(" EdgeSpec.model_validate_json('''")
lines.append(edge_json)
lines.append(" '''),")
- lines.extend([
- ']',
- '',
- '',
- '# Graph',
- ])
+ lines.extend(
+ [
+ "]",
+ "",
+ "",
+ "# Graph",
+ ]
+ )
graph_json = graph.model_dump_json(indent=4)
- lines.append('GRAPH = GraphSpec.model_validate_json(\'\'\'')
+ lines.append("GRAPH = GraphSpec.model_validate_json('''")
lines.append(graph_json)
lines.append("''')")
- return '\n'.join(lines)
+ return "\n".join(lines)
# =========================================================================
# SESSION MANAGEMENT
@@ -743,7 +762,9 @@ def status(self) -> dict[str, Any]:
"tests": len(self.session.test_cases),
"tests_passed": sum(1 for t in self.session.test_results if t.passed),
"approvals": len(self.session.approvals),
- "pending_validation": self._pending_validation.model_dump() if self._pending_validation else None,
+ "pending_validation": self._pending_validation.model_dump()
+ if self._pending_validation
+ else None,
}
def show(self) -> str:
@@ -755,11 +776,13 @@ def show(self) -> str:
]
if self.session.goal:
- lines.extend([
- f"Goal: {self.session.goal.name}",
- f" {self.session.goal.description}",
- "",
- ])
+ lines.extend(
+ [
+ f"Goal: {self.session.goal.name}",
+ f" {self.session.goal.description}",
+ "",
+ ]
+ )
if self.session.nodes:
lines.append("Nodes:")
diff --git a/core/framework/cli.py b/core/framework/cli.py
index 5c52d54df9..0538d271c3 100644
--- a/core/framework/cli.py
+++ b/core/framework/cli.py
@@ -21,9 +21,7 @@
def main():
- parser = argparse.ArgumentParser(
- description="Goal Agent - Build and run goal-driven agents"
- )
+ parser = argparse.ArgumentParser(description="Goal Agent - Build and run goal-driven agents")
parser.add_argument(
"--model",
default="claude-haiku-4-5-20251001",
@@ -34,10 +32,12 @@ def main():
# Register runner commands (run, info, validate, list, dispatch, shell)
from framework.runner.cli import register_commands
+
register_commands(subparsers)
# Register testing commands (test-run, test-debug, test-list, test-stats)
from framework.testing.cli import register_testing_commands
+
register_testing_commands(subparsers)
args = parser.parse_args()
diff --git a/core/framework/credentials/__init__.py b/core/framework/credentials/__init__.py
new file mode 100644
index 0000000000..de8c203282
--- /dev/null
+++ b/core/framework/credentials/__init__.py
@@ -0,0 +1,92 @@
+"""
+Credential Store - Production-ready credential management for Hive.
+
+This module provides secure credential storage with:
+- Key-vault structure: Credentials as objects with multiple keys
+- Template-based usage: {{cred.key}} patterns for injection
+- Bipartisan model: Store stores values, tools define usage
+- Provider system: Extensible lifecycle management (refresh, validate)
+- Multiple backends: Encrypted files, env vars, HashiCorp Vault
+
+Quick Start:
+ from core.framework.credentials import CredentialStore, CredentialObject
+
+ # Create store with encrypted storage
+ store = CredentialStore.with_encrypted_storage("/var/hive/credentials")
+
+ # Get a credential
+ api_key = store.get("brave_search")
+
+ # Resolve templates in headers
+ headers = store.resolve_headers({
+ "Authorization": "Bearer {{github_oauth.access_token}}"
+ })
+
+ # Save a new credential
+ store.save_credential(CredentialObject(
+ id="my_api",
+ keys={"api_key": CredentialKey(name="api_key", value=SecretStr("xxx"))}
+ ))
+
+For OAuth2 support:
+ from core.framework.credentials.oauth2 import BaseOAuth2Provider, OAuth2Config
+
+For Vault integration:
+ from core.framework.credentials.vault import HashiCorpVaultStorage
+"""
+
+from .models import (
+ CredentialDecryptionError,
+ CredentialError,
+ CredentialKey,
+ CredentialKeyNotFoundError,
+ CredentialNotFoundError,
+ CredentialObject,
+ CredentialRefreshError,
+ CredentialType,
+ CredentialUsageSpec,
+ CredentialValidationError,
+)
+from .provider import (
+ BearerTokenProvider,
+ CredentialProvider,
+ StaticProvider,
+)
+from .storage import (
+ CompositeStorage,
+ CredentialStorage,
+ EncryptedFileStorage,
+ EnvVarStorage,
+ InMemoryStorage,
+)
+from .store import CredentialStore
+from .template import TemplateResolver
+
+__all__ = [
+ # Main store
+ "CredentialStore",
+ # Models
+ "CredentialObject",
+ "CredentialKey",
+ "CredentialType",
+ "CredentialUsageSpec",
+ # Providers
+ "CredentialProvider",
+ "StaticProvider",
+ "BearerTokenProvider",
+ # Storage backends
+ "CredentialStorage",
+ "EncryptedFileStorage",
+ "EnvVarStorage",
+ "InMemoryStorage",
+ "CompositeStorage",
+ # Template resolution
+ "TemplateResolver",
+ # Exceptions
+ "CredentialError",
+ "CredentialNotFoundError",
+ "CredentialKeyNotFoundError",
+ "CredentialRefreshError",
+ "CredentialValidationError",
+ "CredentialDecryptionError",
+]
diff --git a/core/framework/credentials/models.py b/core/framework/credentials/models.py
new file mode 100644
index 0000000000..02a49b9a5c
--- /dev/null
+++ b/core/framework/credentials/models.py
@@ -0,0 +1,293 @@
+"""
+Core data models for the credential store.
+
+This module defines the key-vault structure where credentials are objects
+containing one or more keys (e.g., api_key, access_token, refresh_token).
+"""
+
+from __future__ import annotations
+
+from datetime import UTC, datetime
+from enum import Enum
+from typing import Any
+
+from pydantic import BaseModel, Field, SecretStr
+
+
+def _utc_now() -> datetime:
+ """Get current UTC time as timezone-aware datetime."""
+ return datetime.now(UTC)
+
+
+class CredentialType(str, Enum):
+ """Types of credentials the store can manage."""
+
+ API_KEY = "api_key"
+ """Simple API key (e.g., Brave Search, OpenAI)"""
+
+ OAUTH2 = "oauth2"
+ """OAuth2 with refresh token support"""
+
+ BASIC_AUTH = "basic_auth"
+ """Username/password pair"""
+
+ BEARER_TOKEN = "bearer_token"
+ """JWT or bearer token without refresh"""
+
+ CUSTOM = "custom"
+ """User-defined credential type"""
+
+
+class CredentialKey(BaseModel):
+ """
+ A single key within a credential object.
+
+ Example: 'api_key' within a 'brave_search' credential
+
+ Attributes:
+ name: Key name (e.g., 'api_key', 'access_token')
+ value: Secret value (SecretStr prevents accidental logging)
+ expires_at: Optional expiration time
+ metadata: Additional key-specific metadata
+ """
+
+ name: str
+ value: SecretStr
+ expires_at: datetime | None = None
+ metadata: dict[str, Any] = Field(default_factory=dict)
+
+ model_config = {"extra": "allow"}
+
+ @property
+ def is_expired(self) -> bool:
+ """Check if this key has expired."""
+ if self.expires_at is None:
+ return False
+ return datetime.now(UTC) >= self.expires_at
+
+ def get_secret_value(self) -> str:
+ """Get the actual secret value (use sparingly)."""
+ return self.value.get_secret_value()
+
+
+class CredentialObject(BaseModel):
+ """
+ A credential object containing one or more keys.
+
+ This is the key-vault structure where each credential can have
+ multiple keys (e.g., access_token, refresh_token, expires_at).
+
+ Example:
+ CredentialObject(
+ id="github_oauth",
+ credential_type=CredentialType.OAUTH2,
+ keys={
+ "access_token": CredentialKey(name="access_token", value=SecretStr("ghp_xxx")),
+ "refresh_token": CredentialKey(name="refresh_token", value=SecretStr("ghr_xxx")),
+ },
+ provider_id="oauth2"
+ )
+
+ Attributes:
+ id: Unique identifier (e.g., 'brave_search', 'github_oauth')
+ credential_type: Type of credential (API_KEY, OAUTH2, etc.)
+ keys: Dictionary of key name to CredentialKey
+ provider_id: ID of provider responsible for lifecycle management
+ auto_refresh: Whether to automatically refresh when expired
+ """
+
+ id: str = Field(description="Unique identifier (e.g., 'brave_search', 'github_oauth')")
+ credential_type: CredentialType = CredentialType.API_KEY
+ keys: dict[str, CredentialKey] = Field(default_factory=dict)
+
+ # Lifecycle management
+ provider_id: str | None = Field(
+ default=None,
+ description="ID of provider responsible for lifecycle (e.g., 'oauth2', 'static')",
+ )
+ last_refreshed: datetime | None = None
+ auto_refresh: bool = False
+
+ # Usage tracking
+ last_used: datetime | None = None
+ use_count: int = 0
+
+ # Metadata
+ description: str = ""
+ tags: list[str] = Field(default_factory=list)
+ created_at: datetime = Field(default_factory=_utc_now)
+ updated_at: datetime = Field(default_factory=_utc_now)
+
+ model_config = {"extra": "allow"}
+
+ def get_key(self, key_name: str) -> str | None:
+ """
+ Get a specific key's value.
+
+ Args:
+ key_name: Name of the key to retrieve
+
+ Returns:
+ The key's secret value, or None if not found
+ """
+ key = self.keys.get(key_name)
+ if key is None:
+ return None
+ return key.get_secret_value()
+
+ def set_key(
+ self,
+ key_name: str,
+ value: str,
+ expires_at: datetime | None = None,
+ metadata: dict[str, Any] | None = None,
+ ) -> None:
+ """
+ Set or update a key.
+
+ Args:
+ key_name: Name of the key
+ value: Secret value
+ expires_at: Optional expiration time
+ metadata: Optional key-specific metadata
+ """
+ self.keys[key_name] = CredentialKey(
+ name=key_name,
+ value=SecretStr(value),
+ expires_at=expires_at,
+ metadata=metadata or {},
+ )
+ self.updated_at = datetime.now(UTC)
+
+ def has_key(self, key_name: str) -> bool:
+ """Check if a key exists."""
+ return key_name in self.keys
+
+ @property
+ def needs_refresh(self) -> bool:
+ """Check if any key is expired or near expiration."""
+ for key in self.keys.values():
+ if key.is_expired:
+ return True
+ return False
+
+ @property
+ def is_valid(self) -> bool:
+ """Check if credential has at least one non-expired key."""
+ if not self.keys:
+ return False
+ return not all(key.is_expired for key in self.keys.values())
+
+ def record_usage(self) -> None:
+ """Record that this credential was used."""
+ self.last_used = datetime.now(UTC)
+ self.use_count += 1
+
+ def get_default_key(self) -> str | None:
+ """
+ Get the default key value.
+
+ Priority: 'value' > 'api_key' > 'access_token' > first key
+
+ Returns:
+ The default key's value, or None if no keys exist
+ """
+ for key_name in ["value", "api_key", "access_token"]:
+ if key_name in self.keys:
+ return self.get_key(key_name)
+
+ if self.keys:
+ first_key = next(iter(self.keys))
+ return self.get_key(first_key)
+
+ return None
+
+
+class CredentialUsageSpec(BaseModel):
+ """
+ Specification for how a tool uses credentials.
+
+ This implements the "bipartisan" model where the credential store
+ just stores values, and tools define how those values are used
+ in HTTP requests (headers, query params, body).
+
+ Example:
+ CredentialUsageSpec(
+ credential_id="brave_search",
+ required_keys=["api_key"],
+ headers={"X-Subscription-Token": "{{api_key}}"}
+ )
+
+ CredentialUsageSpec(
+ credential_id="github_oauth",
+ required_keys=["access_token"],
+ headers={"Authorization": "Bearer {{access_token}}"}
+ )
+
+ Attributes:
+ credential_id: ID of credential to use
+ required_keys: Keys that must be present
+ headers: Header templates with {{key}} placeholders
+ query_params: Query parameter templates
+ body_fields: Request body field templates
+ """
+
+ credential_id: str = Field(description="ID of credential to use (e.g., 'brave_search')")
+ required_keys: list[str] = Field(default_factory=list, description="Keys that must be present")
+
+ # Injection templates (bipartisan model)
+ headers: dict[str, str] = Field(
+ default_factory=dict,
+ description="Header templates (e.g., {'Authorization': 'Bearer {{access_token}}'})",
+ )
+ query_params: dict[str, str] = Field(
+ default_factory=dict,
+ description="Query param templates (e.g., {'api_key': '{{api_key}}'})",
+ )
+ body_fields: dict[str, str] = Field(
+ default_factory=dict,
+ description="Request body field templates",
+ )
+
+ # Metadata
+ required: bool = True
+ description: str = ""
+ help_url: str = ""
+
+ model_config = {"extra": "allow"}
+
+
+class CredentialError(Exception):
+ """Base exception for credential-related errors."""
+
+ pass
+
+
+class CredentialNotFoundError(CredentialError):
+ """Raised when a referenced credential doesn't exist."""
+
+ pass
+
+
+class CredentialKeyNotFoundError(CredentialError):
+ """Raised when a referenced key doesn't exist in a credential."""
+
+ pass
+
+
+class CredentialRefreshError(CredentialError):
+ """Raised when credential refresh fails."""
+
+ pass
+
+
+class CredentialValidationError(CredentialError):
+ """Raised when credential validation fails."""
+
+ pass
+
+
+class CredentialDecryptionError(CredentialError):
+ """Raised when credential decryption fails."""
+
+ pass
diff --git a/core/framework/credentials/oauth2/__init__.py b/core/framework/credentials/oauth2/__init__.py
new file mode 100644
index 0000000000..b5492aaa18
--- /dev/null
+++ b/core/framework/credentials/oauth2/__init__.py
@@ -0,0 +1,91 @@
+"""
+OAuth2 support for the credential store.
+
+This module provides OAuth2 credential management with:
+- Token types and configuration (OAuth2Token, OAuth2Config)
+- Generic OAuth2 provider (BaseOAuth2Provider)
+- Token lifecycle management (TokenLifecycleManager)
+
+Quick Start:
+ from core.framework.credentials import CredentialStore
+ from core.framework.credentials.oauth2 import BaseOAuth2Provider, OAuth2Config
+
+ # Configure OAuth2 provider
+ provider = BaseOAuth2Provider(OAuth2Config(
+ token_url="https://oauth2.example.com/token",
+ client_id="your-client-id",
+ client_secret="your-client-secret",
+ default_scopes=["read", "write"],
+ ))
+
+ # Create store with OAuth2 provider
+ store = CredentialStore.with_encrypted_storage(
+ "/var/hive/credentials",
+ providers=[provider]
+ )
+
+ # Get token using client credentials
+ token = provider.client_credentials_grant()
+
+ # Save to store
+ from core.framework.credentials import CredentialObject, CredentialKey, CredentialType
+ from pydantic import SecretStr
+
+ store.save_credential(CredentialObject(
+ id="my_api",
+ credential_type=CredentialType.OAUTH2,
+ keys={
+ "access_token": CredentialKey(
+ name="access_token",
+ value=SecretStr(token.access_token),
+ expires_at=token.expires_at,
+ ),
+ "refresh_token": CredentialKey(
+ name="refresh_token",
+ value=SecretStr(token.refresh_token),
+ ) if token.refresh_token else None,
+ },
+ provider_id="oauth2",
+ auto_refresh=True,
+ ))
+
+For advanced lifecycle management:
+ from core.framework.credentials.oauth2 import TokenLifecycleManager
+
+ manager = TokenLifecycleManager(
+ provider=provider,
+ credential_id="my_api",
+ store=store,
+ )
+
+ # Get valid token (auto-refreshes if needed)
+ token = manager.sync_get_valid_token()
+ headers = manager.get_request_headers()
+"""
+
+from .base_provider import BaseOAuth2Provider
+from .lifecycle import TokenLifecycleManager, TokenRefreshResult
+from .provider import (
+ OAuth2Config,
+ OAuth2Error,
+ OAuth2Token,
+ RefreshTokenInvalidError,
+ TokenExpiredError,
+ TokenPlacement,
+)
+
+__all__ = [
+ # Types
+ "OAuth2Token",
+ "OAuth2Config",
+ "TokenPlacement",
+ # Provider
+ "BaseOAuth2Provider",
+ # Lifecycle
+ "TokenLifecycleManager",
+ "TokenRefreshResult",
+ # Errors
+ "OAuth2Error",
+ "TokenExpiredError",
+ "RefreshTokenInvalidError",
+]
diff --git a/core/framework/credentials/oauth2/base_provider.py b/core/framework/credentials/oauth2/base_provider.py
new file mode 100644
index 0000000000..ad0b6c2fd8
--- /dev/null
+++ b/core/framework/credentials/oauth2/base_provider.py
@@ -0,0 +1,486 @@
+"""
+Base OAuth2 provider implementation.
+
+This module provides a generic OAuth2 provider that works with standard
+OAuth2 servers. OSS users can extend this class for custom providers.
+"""
+
+from __future__ import annotations
+
+import logging
+from datetime import UTC, datetime, timedelta
+from typing import Any
+from urllib.parse import urlencode
+
+from ..models import CredentialObject, CredentialRefreshError, CredentialType
+from ..provider import CredentialProvider
+from .provider import (
+ OAuth2Config,
+ OAuth2Error,
+ OAuth2Token,
+ TokenPlacement,
+)
+
+logger = logging.getLogger(__name__)
+
+
+class BaseOAuth2Provider(CredentialProvider):
+ """
+ Generic OAuth2 provider implementation.
+
+ Works with standard OAuth2 servers (RFC 6749). Override methods for
+ provider-specific behavior.
+
+ Supported grant types:
+ - Client Credentials: For server-to-server authentication
+ - Refresh Token: For refreshing expired access tokens
+ - Authorization Code: For user-authorized access (requires callback handling)
+
+ OSS users can extend this class for custom providers:
+
+ class GitHubOAuth2Provider(BaseOAuth2Provider):
+ def __init__(self, client_id: str, client_secret: str):
+ super().__init__(OAuth2Config(
+ token_url="https://github.com/login/oauth/access_token",
+ authorization_url="https://github.com/login/oauth/authorize",
+ client_id=client_id,
+ client_secret=client_secret,
+ default_scopes=["repo", "user"],
+ ))
+
+ def exchange_code(self, code: str, redirect_uri: str, **kwargs) -> OAuth2Token:
+ # GitHub returns data as form-encoded by default
+ # Override to handle this
+ ...
+
+ Example usage:
+ provider = BaseOAuth2Provider(OAuth2Config(
+ token_url="https://oauth2.example.com/token",
+ client_id="my-client-id",
+ client_secret="my-client-secret",
+ ))
+
+ # Get token using client credentials
+ token = provider.client_credentials_grant()
+
+ # Refresh an expired token
+ new_token = provider.refresh_token(old_token.refresh_token)
+ """
+
+ def __init__(self, config: OAuth2Config, provider_id: str = "oauth2"):
+ """
+ Initialize the OAuth2 provider.
+
+ Args:
+ config: OAuth2 configuration
+ provider_id: Unique identifier for this provider instance
+ """
+ self.config = config
+ self._provider_id = provider_id
+ self._client: Any | None = None
+
+ @property
+ def provider_id(self) -> str:
+ return self._provider_id
+
+ @property
+ def supported_types(self) -> list[CredentialType]:
+ return [CredentialType.OAUTH2, CredentialType.BEARER_TOKEN]
+
+ def _get_client(self) -> Any:
+ """Get or create HTTP client."""
+ if self._client is None:
+ try:
+ import httpx
+
+ self._client = httpx.Client(timeout=self.config.request_timeout)
+ except ImportError as e:
+ raise ImportError(
+ "OAuth2 provider requires 'httpx'. Install with: pip install httpx"
+ ) from e
+ return self._client
+
+ def _close_client(self) -> None:
+ """Close the HTTP client."""
+ if self._client is not None:
+ self._client.close()
+ self._client = None
+
+ def __del__(self) -> None:
+ """Cleanup HTTP client on deletion."""
+ self._close_client()
+
+ # --- Grant Types ---
+
+ def get_authorization_url(
+ self,
+ state: str,
+ redirect_uri: str,
+ scopes: list[str] | None = None,
+ **kwargs: Any,
+ ) -> str:
+ """
+ Generate authorization URL for user consent (Authorization Code flow).
+
+ Args:
+ state: Anti-CSRF state parameter (should be random and verified)
+ redirect_uri: Callback URL to receive the authorization code
+ scopes: Requested scopes (defaults to config.default_scopes)
+ **kwargs: Additional provider-specific parameters
+
+ Returns:
+ URL to redirect user for authorization
+
+ Raises:
+ ValueError: If authorization_url is not configured
+ """
+ if not self.config.authorization_url:
+ raise ValueError("authorization_url not configured for this provider")
+
+ params = {
+ "client_id": self.config.client_id,
+ "redirect_uri": redirect_uri,
+ "response_type": "code",
+ "state": state,
+ "scope": " ".join(scopes or self.config.default_scopes),
+ **kwargs,
+ }
+
+ return f"{self.config.authorization_url}?{urlencode(params)}"
+
+ def exchange_code(
+ self,
+ code: str,
+ redirect_uri: str,
+ **kwargs: Any,
+ ) -> OAuth2Token:
+ """
+ Exchange authorization code for tokens (Authorization Code flow).
+
+ Args:
+ code: Authorization code from callback
+ redirect_uri: Same redirect_uri used in authorization request
+ **kwargs: Additional provider-specific parameters
+
+ Returns:
+ OAuth2Token with access_token and optional refresh_token
+
+ Raises:
+ OAuth2Error: If token exchange fails
+ """
+ data = {
+ "grant_type": "authorization_code",
+ "client_id": self.config.client_id,
+ "client_secret": self.config.client_secret,
+ "code": code,
+ "redirect_uri": redirect_uri,
+ **self.config.extra_token_params,
+ **kwargs,
+ }
+
+ return self._token_request(data)
+
+ def client_credentials_grant(
+ self,
+ scopes: list[str] | None = None,
+ **kwargs: Any,
+ ) -> OAuth2Token:
+ """
+ Obtain token using client credentials (Client Credentials flow).
+
+ This is for server-to-server authentication where no user is involved.
+
+ Args:
+ scopes: Requested scopes (defaults to config.default_scopes)
+ **kwargs: Additional provider-specific parameters
+
+ Returns:
+ OAuth2Token (typically without refresh_token)
+
+ Raises:
+ OAuth2Error: If token request fails
+ """
+ data = {
+ "grant_type": "client_credentials",
+ "client_id": self.config.client_id,
+ "client_secret": self.config.client_secret,
+ **self.config.extra_token_params,
+ **kwargs,
+ }
+
+ if scopes or self.config.default_scopes:
+ data["scope"] = " ".join(scopes or self.config.default_scopes)
+
+ return self._token_request(data)
+
+ def refresh_access_token(
+ self,
+ refresh_token: str,
+ scopes: list[str] | None = None,
+ **kwargs: Any,
+ ) -> OAuth2Token:
+ """
+ Refresh an expired access token (Refresh Token flow).
+
+ Args:
+ refresh_token: The refresh token
+ scopes: Scopes to request (defaults to original scopes)
+ **kwargs: Additional provider-specific parameters
+
+ Returns:
+ New OAuth2Token (may include new refresh_token)
+
+ Raises:
+ OAuth2Error: If refresh fails
+ RefreshTokenInvalidError: If refresh token is revoked/invalid
+ """
+ data = {
+ "grant_type": "refresh_token",
+ "client_id": self.config.client_id,
+ "client_secret": self.config.client_secret,
+ "refresh_token": refresh_token,
+ **self.config.extra_token_params,
+ **kwargs,
+ }
+
+ if scopes:
+ data["scope"] = " ".join(scopes)
+
+ return self._token_request(data)
+
+ def revoke_token(
+ self,
+ token: str,
+ token_type_hint: str = "access_token",
+ ) -> bool:
+ """
+ Revoke a token (RFC 7009).
+
+ Args:
+ token: The token to revoke
+ token_type_hint: "access_token" or "refresh_token"
+
+ Returns:
+ True if revocation succeeded
+ """
+ if not self.config.revocation_url:
+ logger.warning("revocation_url not configured, cannot revoke token")
+ return False
+
+ try:
+ client = self._get_client()
+ response = client.post(
+ self.config.revocation_url,
+ data={
+ "token": token,
+ "token_type_hint": token_type_hint,
+ "client_id": self.config.client_id,
+ "client_secret": self.config.client_secret,
+ },
+ headers={"Accept": "application/json", **self.config.extra_headers},
+ )
+ # RFC 7009: 200 indicates success (even if token was already invalid)
+ return response.status_code == 200
+ except Exception as e:
+ logger.error(f"Token revocation failed: {e}")
+ return False
+
+ # --- CredentialProvider Interface ---
+
+ def refresh(self, credential: CredentialObject) -> CredentialObject:
+ """
+ Refresh a credential using its refresh token.
+
+ Implements CredentialProvider.refresh().
+
+ Args:
+ credential: The credential to refresh
+
+ Returns:
+ Updated credential with new access_token
+
+ Raises:
+ CredentialRefreshError: If refresh fails
+ """
+ refresh_tok = credential.get_key("refresh_token")
+ if not refresh_tok:
+ raise CredentialRefreshError(f"Credential '{credential.id}' has no refresh_token")
+
+ try:
+ new_token = self.refresh_access_token(refresh_tok)
+ except OAuth2Error as e:
+ if e.error == "invalid_grant":
+ raise CredentialRefreshError(
+ f"Refresh token for '{credential.id}' is invalid or revoked. "
+ "Re-authorization required."
+ ) from e
+ raise CredentialRefreshError(f"Failed to refresh '{credential.id}': {e}") from e
+
+ # Update credential
+ credential.set_key("access_token", new_token.access_token, expires_at=new_token.expires_at)
+
+ # Update refresh token if a new one was issued
+ if new_token.refresh_token and new_token.refresh_token != refresh_tok:
+ credential.set_key("refresh_token", new_token.refresh_token)
+
+ credential.last_refreshed = datetime.now(UTC)
+ logger.info(f"Refreshed OAuth2 credential '{credential.id}'")
+
+ return credential
+
+ def validate(self, credential: CredentialObject) -> bool:
+ """
+ Validate that credential has a valid (non-expired) access_token.
+
+ Args:
+ credential: The credential to validate
+
+ Returns:
+ True if credential has valid access_token
+ """
+ access_key = credential.keys.get("access_token")
+ if access_key is None:
+ return False
+ return not access_key.is_expired
+
+ def should_refresh(self, credential: CredentialObject) -> bool:
+ """
+ Check if credential should be refreshed.
+
+ Returns True if access_token is expired or within 5 minutes of expiry.
+ """
+ access_key = credential.keys.get("access_token")
+ if access_key is None:
+ return False
+
+ if access_key.expires_at is None:
+ return False
+
+ buffer = timedelta(minutes=5)
+ return datetime.now(UTC) >= (access_key.expires_at - buffer)
+
+ def revoke(self, credential: CredentialObject) -> bool:
+ """
+ Revoke all tokens in a credential.
+
+ Args:
+ credential: The credential to revoke
+
+ Returns:
+ True if all revocations succeeded
+ """
+ success = True
+
+ # Revoke access token
+ access_token = credential.get_key("access_token")
+ if access_token:
+ if not self.revoke_token(access_token, "access_token"):
+ success = False
+
+ # Revoke refresh token
+ refresh_token = credential.get_key("refresh_token")
+ if refresh_token:
+ if not self.revoke_token(refresh_token, "refresh_token"):
+ success = False
+
+ return success
+
+ # --- Token Request Helpers ---
+
+ def _token_request(self, data: dict[str, Any]) -> OAuth2Token:
+ """
+ Make a token request to the OAuth2 server.
+
+ Args:
+ data: Form data for the token request
+
+ Returns:
+ OAuth2Token from the response
+
+ Raises:
+ OAuth2Error: If request fails or returns an error
+ """
+ client = self._get_client()
+
+ headers = {
+ "Accept": "application/json",
+ "Content-Type": "application/x-www-form-urlencoded",
+ **self.config.extra_headers,
+ }
+
+ response = client.post(self.config.token_url, data=data, headers=headers)
+
+ # Parse response
+ content_type = response.headers.get("content-type", "")
+ if "application/json" in content_type:
+ response_data = response.json()
+ else:
+ # Some providers (like GitHub) may return form-encoded
+ response_data = self._parse_form_response(response.text)
+
+ # Check for error
+ if response.status_code != 200 or "error" in response_data:
+ error = response_data.get("error", "unknown_error")
+ description = response_data.get("error_description", response.text)
+ raise OAuth2Error(
+ error=error, description=description, status_code=response.status_code
+ )
+
+ return OAuth2Token.from_token_response(response_data)
+
+ def _parse_form_response(self, text: str) -> dict[str, str]:
+ """Parse form-encoded response (some providers use this instead of JSON)."""
+ from urllib.parse import parse_qs
+
+ parsed = parse_qs(text)
+ return {k: v[0] if len(v) == 1 else v for k, v in parsed.items()}
+
+ # --- Token Formatting for Requests ---
+
+ def format_for_request(self, token: OAuth2Token) -> dict[str, Any]:
+ """
+ Format token for use in HTTP requests (bipartisan model).
+
+ Args:
+ token: The OAuth2 token
+
+ Returns:
+ Dict with 'headers', 'params', or 'data' keys as appropriate
+ """
+ placement = self.config.token_placement
+
+ if placement == TokenPlacement.HEADER_BEARER:
+ return {"headers": {"Authorization": f"{token.token_type} {token.access_token}"}}
+
+ elif placement == TokenPlacement.HEADER_CUSTOM:
+ header_name = self.config.custom_header_name or "X-Access-Token"
+ return {"headers": {header_name: token.access_token}}
+
+ elif placement == TokenPlacement.QUERY_PARAM:
+ return {"params": {self.config.query_param_name: token.access_token}}
+
+ elif placement == TokenPlacement.BODY_PARAM:
+ return {"data": {"access_token": token.access_token}}
+
+ return {}
+
+ def format_credential_for_request(self, credential: CredentialObject) -> dict[str, Any]:
+ """
+ Format a credential for use in HTTP requests.
+
+ Args:
+ credential: The credential containing access_token
+
+ Returns:
+ Dict with 'headers', 'params', or 'data' keys as appropriate
+ """
+ access_token = credential.get_key("access_token")
+ if not access_token:
+ return {}
+
+ token = OAuth2Token(
+ access_token=access_token,
+ token_type=credential.keys.get("token_type", "Bearer") or "Bearer",
+ )
+
+ return self.format_for_request(token)
diff --git a/core/framework/credentials/oauth2/lifecycle.py b/core/framework/credentials/oauth2/lifecycle.py
new file mode 100644
index 0000000000..89ac2c7edd
--- /dev/null
+++ b/core/framework/credentials/oauth2/lifecycle.py
@@ -0,0 +1,363 @@
+"""
+Token lifecycle management for OAuth2 credentials.
+
+This module provides the TokenLifecycleManager which coordinates
+automatic token refresh with the credential store.
+"""
+
+from __future__ import annotations
+
+import asyncio
+import logging
+from collections.abc import Callable
+from dataclasses import dataclass
+from datetime import UTC, datetime, timedelta
+from typing import TYPE_CHECKING
+
+from pydantic import SecretStr
+
+from ..models import CredentialKey, CredentialObject, CredentialType
+from .base_provider import BaseOAuth2Provider
+from .provider import OAuth2Token
+
+if TYPE_CHECKING:
+ from ..store import CredentialStore
+
+logger = logging.getLogger(__name__)
+
+
+@dataclass
+class TokenRefreshResult:
+ """Result of a token refresh operation."""
+
+ success: bool
+ token: OAuth2Token | None = None
+ error: str | None = None
+ needs_reauthorization: bool = False
+
+
+class TokenLifecycleManager:
+ """
+ Manages the complete lifecycle of OAuth2 tokens.
+
+ Responsibilities:
+ - Coordinate with CredentialStore for persistence
+ - Automatically refresh expired tokens
+ - Handle refresh failures gracefully
+ - Provide callbacks for monitoring
+
+ This class is useful when you need more control over token management
+ than the basic auto-refresh in CredentialStore provides.
+
+ Usage:
+ manager = TokenLifecycleManager(
+ provider=github_provider,
+ credential_id="github_oauth",
+ store=credential_store,
+ )
+
+ # Get valid token (auto-refreshes if needed)
+ token = await manager.get_valid_token()
+
+ # Use token
+ headers = provider.format_for_request(token)
+
+ Synchronous usage:
+ # For synchronous code, use sync_ methods
+ token = manager.sync_get_valid_token()
+ """
+
+ def __init__(
+ self,
+ provider: BaseOAuth2Provider,
+ credential_id: str,
+ store: CredentialStore,
+ refresh_buffer_minutes: int = 5,
+ on_token_refreshed: Callable[[OAuth2Token], None] | None = None,
+ on_refresh_failed: Callable[[str], None] | None = None,
+ ):
+ """
+ Initialize the lifecycle manager.
+
+ Args:
+ provider: OAuth2 provider for token operations
+ credential_id: ID of the credential in the store
+ store: Credential store for persistence
+ refresh_buffer_minutes: Minutes before expiry to trigger refresh
+ on_token_refreshed: Callback when token is refreshed
+ on_refresh_failed: Callback when refresh fails
+ """
+ self.provider = provider
+ self.credential_id = credential_id
+ self.store = store
+ self.refresh_buffer = timedelta(minutes=refresh_buffer_minutes)
+ self.on_token_refreshed = on_token_refreshed
+ self.on_refresh_failed = on_refresh_failed
+
+ # In-memory cache for performance
+ self._cached_token: OAuth2Token | None = None
+ self._cache_time: datetime | None = None
+
+ # --- Async Token Access ---
+
+ async def get_valid_token(self) -> OAuth2Token | None:
+ """
+ Get a valid access token, refreshing if necessary.
+
+ This is the main entry point for async code.
+
+ Returns:
+ Valid OAuth2Token or None if unavailable
+ """
+ # Check cache first
+ if self._cached_token and not self._needs_refresh(self._cached_token):
+ return self._cached_token
+
+ # Load from store
+ credential = self.store.get_credential(self.credential_id, refresh_if_needed=False)
+ if credential is None:
+ return None
+
+ # Convert to OAuth2Token
+ token = self._credential_to_token(credential)
+ if token is None:
+ return None
+
+ # Refresh if needed
+ if self._needs_refresh(token):
+ result = await self._async_refresh_token(credential)
+ if result.success and result.token:
+ token = result.token
+ elif result.needs_reauthorization:
+ logger.warning(f"Token for {self.credential_id} needs reauthorization")
+ return None
+ else:
+ # Use existing token if still technically valid
+ if token.is_expired:
+ return None
+ logger.warning(f"Refresh failed for {self.credential_id}, using existing token")
+
+ self._cached_token = token
+ self._cache_time = datetime.now(UTC)
+ return token
+
+ async def acquire_token_client_credentials(
+ self,
+ scopes: list[str] | None = None,
+ ) -> OAuth2Token:
+ """
+ Acquire a new token using client credentials flow.
+
+ For service-to-service authentication.
+
+ Args:
+ scopes: Scopes to request
+
+ Returns:
+ New OAuth2Token
+ """
+ # Run in executor to avoid blocking
+ loop = asyncio.get_event_loop()
+ token = await loop.run_in_executor(
+ None, lambda: self.provider.client_credentials_grant(scopes=scopes)
+ )
+
+ self._save_token_to_store(token)
+ self._cached_token = token
+ return token
+
+ async def revoke(self) -> bool:
+ """
+ Revoke tokens and clear from store.
+
+ Returns:
+ True if revocation succeeded
+ """
+ credential = self.store.get_credential(self.credential_id, refresh_if_needed=False)
+ if credential:
+ self.provider.revoke(credential)
+
+ self.store.delete_credential(self.credential_id)
+ self._cached_token = None
+ return True
+
+ # --- Synchronous Token Access ---
+
+ def sync_get_valid_token(self) -> OAuth2Token | None:
+ """
+ Synchronous version of get_valid_token().
+
+ For use in synchronous code.
+ """
+ # Check cache
+ if self._cached_token and not self._needs_refresh(self._cached_token):
+ return self._cached_token
+
+ # Load from store
+ credential = self.store.get_credential(self.credential_id, refresh_if_needed=False)
+ if credential is None:
+ return None
+
+ token = self._credential_to_token(credential)
+ if token is None:
+ return None
+
+ # Refresh if needed
+ if self._needs_refresh(token):
+ result = self._sync_refresh_token(credential)
+ if result.success and result.token:
+ token = result.token
+ elif result.needs_reauthorization:
+ logger.warning(f"Token for {self.credential_id} needs reauthorization")
+ return None
+ else:
+ if token.is_expired:
+ return None
+
+ self._cached_token = token
+ self._cache_time = datetime.now(UTC)
+ return token
+
+ def sync_acquire_token_client_credentials(
+ self,
+ scopes: list[str] | None = None,
+ ) -> OAuth2Token:
+ """Synchronous version of acquire_token_client_credentials()."""
+ token = self.provider.client_credentials_grant(scopes=scopes)
+ self._save_token_to_store(token)
+ self._cached_token = token
+ return token
+
+ # --- Helper Methods ---
+
+ def _needs_refresh(self, token: OAuth2Token) -> bool:
+ """Check if token needs refresh."""
+ if token.expires_at is None:
+ return False
+ return datetime.now(UTC) >= (token.expires_at - self.refresh_buffer)
+
+ def _credential_to_token(self, credential: CredentialObject) -> OAuth2Token | None:
+ """Convert credential to OAuth2Token."""
+ access_token = credential.get_key("access_token")
+ if not access_token:
+ return None
+
+ expires_at = None
+ access_key = credential.keys.get("access_token")
+ if access_key:
+ expires_at = access_key.expires_at
+
+ return OAuth2Token(
+ access_token=access_token,
+ token_type="Bearer",
+ expires_at=expires_at,
+ refresh_token=credential.get_key("refresh_token"),
+ scope=credential.get_key("scope"),
+ )
+
+ def _save_token_to_store(self, token: OAuth2Token) -> None:
+ """Save token to credential store."""
+ credential = CredentialObject(
+ id=self.credential_id,
+ credential_type=CredentialType.OAUTH2,
+ keys={
+ "access_token": CredentialKey(
+ name="access_token",
+ value=SecretStr(token.access_token),
+ expires_at=token.expires_at,
+ ),
+ },
+ provider_id=self.provider.provider_id,
+ auto_refresh=True,
+ )
+
+ if token.refresh_token:
+ credential.keys["refresh_token"] = CredentialKey(
+ name="refresh_token",
+ value=SecretStr(token.refresh_token),
+ )
+
+ if token.scope:
+ credential.keys["scope"] = CredentialKey(
+ name="scope",
+ value=SecretStr(token.scope),
+ )
+
+ self.store.save_credential(credential)
+
+ async def _async_refresh_token(self, credential: CredentialObject) -> TokenRefreshResult:
+ """Async wrapper for token refresh."""
+ loop = asyncio.get_event_loop()
+ return await loop.run_in_executor(None, lambda: self._sync_refresh_token(credential))
+
+ def _sync_refresh_token(self, credential: CredentialObject) -> TokenRefreshResult:
+ """Synchronously refresh token."""
+ refresh_token = credential.get_key("refresh_token")
+ if not refresh_token:
+ return TokenRefreshResult(
+ success=False,
+ error="No refresh token available",
+ needs_reauthorization=True,
+ )
+
+ try:
+ new_token = self.provider.refresh_access_token(refresh_token)
+
+ # Save to store
+ self._save_token_to_store(new_token)
+
+ # Notify callback
+ if self.on_token_refreshed:
+ self.on_token_refreshed(new_token)
+
+ logger.info(f"Token refreshed for {self.credential_id}")
+ return TokenRefreshResult(success=True, token=new_token)
+
+ except Exception as e:
+ error_msg = str(e)
+
+ # Check for refresh token revocation
+ if "invalid_grant" in error_msg.lower():
+ return TokenRefreshResult(
+ success=False,
+ error=error_msg,
+ needs_reauthorization=True,
+ )
+
+ if self.on_refresh_failed:
+ self.on_refresh_failed(error_msg)
+
+ logger.error(f"Token refresh failed for {self.credential_id}: {e}")
+ return TokenRefreshResult(success=False, error=error_msg)
+
+ def invalidate_cache(self) -> None:
+ """Clear cached token."""
+ self._cached_token = None
+ self._cache_time = None
+
+ # --- Convenience Methods ---
+
+ def get_request_headers(self) -> dict[str, str]:
+ """
+ Get headers for HTTP request with current token.
+
+ Returns empty dict if no valid token.
+ """
+ token = self.sync_get_valid_token()
+ if token is None:
+ return {}
+
+ result = self.provider.format_for_request(token)
+ return result.get("headers", {})
+
+ def get_request_kwargs(self) -> dict:
+ """
+ Get kwargs for HTTP request (headers, params, etc.).
+
+ Returns empty dict if no valid token.
+ """
+ token = self.sync_get_valid_token()
+ if token is None:
+ return {}
+
+ return self.provider.format_for_request(token)
diff --git a/core/framework/credentials/oauth2/provider.py b/core/framework/credentials/oauth2/provider.py
new file mode 100644
index 0000000000..c94ea530eb
--- /dev/null
+++ b/core/framework/credentials/oauth2/provider.py
@@ -0,0 +1,213 @@
+"""
+OAuth2 types and configuration.
+
+This module defines the core OAuth2 data structures:
+- OAuth2Token: Represents an access token with metadata
+- OAuth2Config: Configuration for OAuth2 endpoints
+- TokenPlacement: Where to place tokens in requests
+"""
+
+from __future__ import annotations
+
+from dataclasses import dataclass, field
+from datetime import UTC, datetime, timedelta
+from enum import Enum
+from typing import Any
+
+
+class TokenPlacement(str, Enum):
+ """Where to place the access token in HTTP requests."""
+
+ HEADER_BEARER = "header_bearer"
+ """Authorization: Bearer (most common)"""
+
+ HEADER_CUSTOM = "header_custom"
+ """Custom header name (e.g., X-Access-Token)"""
+
+ QUERY_PARAM = "query_param"
+ """Query parameter (e.g., ?access_token=)"""
+
+ BODY_PARAM = "body_param"
+ """Form body parameter"""
+
+
+@dataclass
+class OAuth2Token:
+ """
+ Represents an OAuth2 token with metadata.
+
+ Attributes:
+ access_token: The access token string
+ token_type: Token type (usually "Bearer")
+ expires_at: When the token expires
+ refresh_token: Optional refresh token
+ scope: Granted scopes (space-separated)
+ raw_response: Original token response from server
+ """
+
+ access_token: str
+ token_type: str = "Bearer"
+ expires_at: datetime | None = None
+ refresh_token: str | None = None
+ scope: str | None = None
+ raw_response: dict[str, Any] = field(default_factory=dict)
+
+ @property
+ def is_expired(self) -> bool:
+ """
+ Check if token is expired.
+
+ Uses a 5-minute buffer to account for clock skew and
+ request latency.
+ """
+ if self.expires_at is None:
+ return False
+ buffer = timedelta(minutes=5)
+ return datetime.now(UTC) >= (self.expires_at - buffer)
+
+ @property
+ def can_refresh(self) -> bool:
+ """Check if token can be refreshed (has refresh_token)."""
+ return self.refresh_token is not None and self.refresh_token.strip() != ""
+
+ @property
+ def expires_in_seconds(self) -> int | None:
+ """Get seconds until expiration, or None if no expiration."""
+ if self.expires_at is None:
+ return None
+ delta = self.expires_at - datetime.now(UTC)
+ return max(0, int(delta.total_seconds()))
+
+ @classmethod
+ def from_token_response(cls, data: dict[str, Any]) -> OAuth2Token:
+ """
+ Create OAuth2Token from an OAuth2 token endpoint response.
+
+ Args:
+ data: Token response JSON (access_token, token_type, expires_in, etc.)
+
+ Returns:
+ OAuth2Token instance
+ """
+ expires_at = None
+ if "expires_in" in data:
+ expires_at = datetime.now(UTC) + timedelta(seconds=data["expires_in"])
+
+ return cls(
+ access_token=data["access_token"],
+ token_type=data.get("token_type", "Bearer"),
+ expires_at=expires_at,
+ refresh_token=data.get("refresh_token"),
+ scope=data.get("scope"),
+ raw_response=data,
+ )
+
+
+@dataclass
+class OAuth2Config:
+ """
+ Configuration for an OAuth2 provider.
+
+ This contains all the information needed to perform OAuth2 operations
+ for a specific provider (GitHub, Google, Salesforce, etc.).
+
+ Attributes:
+ token_url: URL for token endpoint (required)
+ authorization_url: URL for authorization endpoint (optional, for auth code flow)
+ revocation_url: URL for token revocation (optional)
+ introspection_url: URL for token introspection (optional)
+ client_id: OAuth2 client ID
+ client_secret: OAuth2 client secret
+ default_scopes: Default scopes to request
+ token_placement: How to include token in requests
+ custom_header_name: Header name when using HEADER_CUSTOM placement
+ query_param_name: Query param name when using QUERY_PARAM placement
+ extra_token_params: Additional parameters for token requests
+ request_timeout: Timeout for HTTP requests in seconds
+
+ Example:
+ config = OAuth2Config(
+ token_url="https://github.com/login/oauth/access_token",
+ authorization_url="https://github.com/login/oauth/authorize",
+ client_id="your-client-id",
+ client_secret="your-client-secret",
+ default_scopes=["repo", "user"],
+ )
+ """
+
+ # Endpoints (only token_url is strictly required)
+ token_url: str
+ authorization_url: str | None = None
+ revocation_url: str | None = None
+ introspection_url: str | None = None
+
+ # Client credentials
+ client_id: str = ""
+ client_secret: str = ""
+
+ # Scopes
+ default_scopes: list[str] = field(default_factory=list)
+
+ # Token placement for API calls (bipartisan model)
+ token_placement: TokenPlacement = TokenPlacement.HEADER_BEARER
+ custom_header_name: str | None = None
+ query_param_name: str = "access_token"
+
+ # Request configuration
+ extra_token_params: dict[str, str] = field(default_factory=dict)
+ request_timeout: float = 30.0
+
+ # Additional headers for token requests
+ extra_headers: dict[str, str] = field(default_factory=dict)
+
+ def __post_init__(self) -> None:
+ """Validate configuration."""
+ if not self.token_url:
+ raise ValueError("token_url is required")
+
+ if self.token_placement == TokenPlacement.HEADER_CUSTOM and not self.custom_header_name:
+ raise ValueError("custom_header_name is required when using HEADER_CUSTOM placement")
+
+
+class OAuth2Error(Exception):
+ """
+ OAuth2 protocol error.
+
+ Attributes:
+ error: OAuth2 error code (e.g., 'invalid_grant', 'invalid_client')
+ description: Human-readable error description
+ status_code: HTTP status code from the response
+ """
+
+ def __init__(
+ self,
+ error: str,
+ description: str = "",
+ status_code: int = 0,
+ ):
+ self.error = error
+ self.description = description
+ self.status_code = status_code
+ super().__init__(f"{error}: {description}" if description else error)
+
+
+class TokenExpiredError(OAuth2Error):
+ """Raised when a token has expired and cannot be used."""
+
+ def __init__(self, credential_id: str):
+ super().__init__(
+ error="token_expired",
+ description=f"Token for '{credential_id}' has expired",
+ )
+ self.credential_id = credential_id
+
+
+class RefreshTokenInvalidError(OAuth2Error):
+ """Raised when the refresh token is invalid or revoked."""
+
+ def __init__(self, credential_id: str, reason: str = ""):
+ description = f"Refresh token for '{credential_id}' is invalid"
+ if reason:
+ description += f": {reason}"
+ super().__init__(error="invalid_grant", description=description)
+ self.credential_id = credential_id
diff --git a/core/framework/credentials/provider.py b/core/framework/credentials/provider.py
new file mode 100644
index 0000000000..0227f5e209
--- /dev/null
+++ b/core/framework/credentials/provider.py
@@ -0,0 +1,283 @@
+"""
+Provider interface for credential lifecycle management.
+
+Providers handle credential lifecycle operations:
+- Refresh: Obtain new tokens when expired
+- Validate: Check if credentials are still working
+- Revoke: Invalidate credentials when no longer needed
+
+OSS users can implement custom providers by subclassing CredentialProvider.
+"""
+
+from __future__ import annotations
+
+import logging
+from abc import ABC, abstractmethod
+from datetime import UTC, datetime, timedelta
+
+from .models import CredentialObject, CredentialRefreshError, CredentialType
+
+logger = logging.getLogger(__name__)
+
+
+class CredentialProvider(ABC):
+ """
+ Abstract base class for credential providers.
+
+ Providers handle credential lifecycle operations:
+ - refresh(): Obtain new tokens when expired
+ - validate(): Check if credentials are still working
+ - should_refresh(): Determine if a credential needs refresh
+ - revoke(): Invalidate credentials (optional)
+
+ Example custom provider:
+ class MyCustomProvider(CredentialProvider):
+ @property
+ def provider_id(self) -> str:
+ return "my_custom"
+
+ @property
+ def supported_types(self) -> List[CredentialType]:
+ return [CredentialType.CUSTOM]
+
+ def refresh(self, credential: CredentialObject) -> CredentialObject:
+ # Custom refresh logic
+ new_token = my_api.refresh(credential.get_key("api_key"))
+ credential.set_key("access_token", new_token)
+ return credential
+
+ def validate(self, credential: CredentialObject) -> bool:
+ token = credential.get_key("access_token")
+ return my_api.validate(token)
+ """
+
+ @property
+ @abstractmethod
+ def provider_id(self) -> str:
+ """
+ Unique identifier for this provider.
+
+ Examples: 'static', 'oauth2', 'my_custom_auth'
+ """
+ pass
+
+ @property
+ @abstractmethod
+ def supported_types(self) -> list[CredentialType]:
+ """
+ Credential types this provider can manage.
+
+ Returns:
+ List of CredentialType enums this provider supports
+ """
+ pass
+
+ @abstractmethod
+ def refresh(self, credential: CredentialObject) -> CredentialObject:
+ """
+ Refresh the credential (e.g., use refresh_token to get new access_token).
+
+ This method should:
+ 1. Use existing credential data to obtain new values
+ 2. Update the credential object with new values
+ 3. Set appropriate expiration times
+ 4. Update last_refreshed timestamp
+
+ Args:
+ credential: The credential to refresh
+
+ Returns:
+ Updated credential with new values
+
+ Raises:
+ CredentialRefreshError: If refresh fails
+ """
+ pass
+
+ @abstractmethod
+ def validate(self, credential: CredentialObject) -> bool:
+ """
+ Validate that a credential is still working.
+
+ This might involve:
+ - Checking expiration times
+ - Making a test API call
+ - Validating token signatures
+
+ Args:
+ credential: The credential to validate
+
+ Returns:
+ True if credential is valid, False otherwise
+ """
+ pass
+
+ def should_refresh(self, credential: CredentialObject) -> bool:
+ """
+ Determine if a credential should be refreshed.
+
+ Default implementation: refresh if any key is expired or within
+ 5 minutes of expiry. Override for custom logic.
+
+ Args:
+ credential: The credential to check
+
+ Returns:
+ True if credential should be refreshed
+ """
+ buffer = timedelta(minutes=5)
+ now = datetime.now(UTC)
+
+ for key in credential.keys.values():
+ if key.expires_at is not None:
+ if key.expires_at <= now + buffer:
+ return True
+ return False
+
+ def revoke(self, credential: CredentialObject) -> bool:
+ """
+ Revoke a credential (optional operation).
+
+ Not all providers support revocation. The default implementation
+ logs a warning and returns False.
+
+ Args:
+ credential: The credential to revoke
+
+ Returns:
+ True if revocation succeeded, False otherwise
+ """
+ logger.warning(f"Provider '{self.provider_id}' does not support revocation")
+ return False
+
+ def can_handle(self, credential: CredentialObject) -> bool:
+ """
+ Check if this provider can handle a credential.
+
+ Args:
+ credential: The credential to check
+
+ Returns:
+ True if this provider can manage the credential
+ """
+ return credential.credential_type in self.supported_types
+
+
+class StaticProvider(CredentialProvider):
+ """
+ Provider for static credentials that never need refresh.
+
+ Use for simple API keys that don't expire, such as:
+ - Brave Search API key
+ - OpenAI API key
+ - Basic auth credentials
+
+ Static credentials are always considered valid if they have at least one key.
+ """
+
+ @property
+ def provider_id(self) -> str:
+ return "static"
+
+ @property
+ def supported_types(self) -> list[CredentialType]:
+ return [CredentialType.API_KEY, CredentialType.BASIC_AUTH, CredentialType.CUSTOM]
+
+ def refresh(self, credential: CredentialObject) -> CredentialObject:
+ """
+ Static credentials don't need refresh.
+
+ Returns the credential unchanged.
+ """
+ logger.debug(f"Static credential '{credential.id}' does not need refresh")
+ return credential
+
+ def validate(self, credential: CredentialObject) -> bool:
+ """
+ Validate that credential has at least one key with a value.
+
+ For static credentials, we can't verify the key works without
+ making an API call, so we just check existence.
+ """
+ if not credential.keys:
+ return False
+
+ # Check at least one key has a non-empty value
+ for key in credential.keys.values():
+ try:
+ value = key.get_secret_value()
+ if value and value.strip():
+ return True
+ except Exception:
+ continue
+
+ return False
+
+ def should_refresh(self, credential: CredentialObject) -> bool:
+ """Static credentials never need refresh."""
+ return False
+
+
+class BearerTokenProvider(CredentialProvider):
+ """
+ Provider for bearer tokens without refresh capability.
+
+ Use for JWTs or tokens that:
+ - Have an expiration time
+ - Cannot be refreshed (no refresh token)
+ - Must be re-obtained when expired
+
+ This provider validates based on expiration time only.
+ """
+
+ @property
+ def provider_id(self) -> str:
+ return "bearer_token"
+
+ @property
+ def supported_types(self) -> list[CredentialType]:
+ return [CredentialType.BEARER_TOKEN]
+
+ def refresh(self, credential: CredentialObject) -> CredentialObject:
+ """
+ Bearer tokens without refresh capability cannot be refreshed.
+
+ Raises:
+ CredentialRefreshError: Always, as refresh is not supported
+ """
+ raise CredentialRefreshError(
+ f"Bearer token '{credential.id}' cannot be refreshed. "
+ "Obtain a new token and save it to the credential store."
+ )
+
+ def validate(self, credential: CredentialObject) -> bool:
+ """
+ Validate based on expiration time.
+
+ Returns True if token exists and is not expired.
+ """
+ access_key = credential.keys.get("access_token") or credential.keys.get("token")
+ if access_key is None:
+ return False
+
+ # Check if expired
+ return not access_key.is_expired
+
+ def should_refresh(self, credential: CredentialObject) -> bool:
+ """
+ Check if token is expired or near expiration.
+
+ Note: Even though this returns True for expired tokens,
+ refresh() will fail. This allows the store to know the
+ credential needs attention.
+ """
+ buffer = timedelta(minutes=5)
+ now = datetime.now(UTC)
+
+ for key_name in ["access_token", "token"]:
+ key = credential.keys.get(key_name)
+ if key and key.expires_at:
+ if key.expires_at <= now + buffer:
+ return True
+
+ return False
diff --git a/core/framework/credentials/storage.py b/core/framework/credentials/storage.py
new file mode 100644
index 0000000000..bee7f8dfd8
--- /dev/null
+++ b/core/framework/credentials/storage.py
@@ -0,0 +1,516 @@
+"""
+Storage backends for the credential store.
+
+This module provides abstract and concrete storage implementations:
+- CredentialStorage: Abstract base class
+- EncryptedFileStorage: Fernet-encrypted JSON files (default for production)
+- EnvVarStorage: Environment variable reading (backward compatibility)
+- InMemoryStorage: For testing
+"""
+
+from __future__ import annotations
+
+import json
+import logging
+import os
+from abc import ABC, abstractmethod
+from datetime import UTC, datetime
+from pathlib import Path
+from typing import Any
+
+from pydantic import SecretStr
+
+from .models import CredentialDecryptionError, CredentialKey, CredentialObject, CredentialType
+
+logger = logging.getLogger(__name__)
+
+
+class CredentialStorage(ABC):
+ """
+ Abstract storage backend for credentials.
+
+ Implementations must provide save, load, delete, list_all, and exists methods.
+ All implementations should handle serialization of SecretStr values securely.
+ """
+
+ @abstractmethod
+ def save(self, credential: CredentialObject) -> None:
+ """
+ Save a credential to storage.
+
+ Args:
+ credential: The credential object to save
+ """
+ pass
+
+ @abstractmethod
+ def load(self, credential_id: str) -> CredentialObject | None:
+ """
+ Load a credential from storage.
+
+ Args:
+ credential_id: The ID of the credential to load
+
+ Returns:
+ CredentialObject if found, None otherwise
+ """
+ pass
+
+ @abstractmethod
+ def delete(self, credential_id: str) -> bool:
+ """
+ Delete a credential from storage.
+
+ Args:
+ credential_id: The ID of the credential to delete
+
+ Returns:
+ True if the credential existed and was deleted, False otherwise
+ """
+ pass
+
+ @abstractmethod
+ def list_all(self) -> list[str]:
+ """
+ List all credential IDs in storage.
+
+ Returns:
+ List of credential IDs
+ """
+ pass
+
+ @abstractmethod
+ def exists(self, credential_id: str) -> bool:
+ """
+ Check if a credential exists in storage.
+
+ Args:
+ credential_id: The ID to check
+
+ Returns:
+ True if credential exists, False otherwise
+ """
+ pass
+
+
+class EncryptedFileStorage(CredentialStorage):
+ """
+ Encrypted file-based credential storage.
+
+ Uses Fernet symmetric encryption (AES-128-CBC + HMAC) for at-rest encryption.
+ Each credential is stored as a separate encrypted JSON file.
+
+ Directory structure:
+ {base_path}/
+ credentials/
+ {credential_id}.enc # Encrypted credential JSON
+ metadata/
+ index.json # Index of all credentials (unencrypted)
+
+ The encryption key is read from the HIVE_CREDENTIAL_KEY environment variable.
+ If not set, a new key is generated (and must be persisted for data recovery).
+
+ Example:
+ storage = EncryptedFileStorage("/var/hive/credentials")
+ storage.save(credential)
+ credential = storage.load("brave_search")
+ """
+
+ def __init__(
+ self,
+ base_path: str | Path,
+ encryption_key: bytes | None = None,
+ key_env_var: str = "HIVE_CREDENTIAL_KEY",
+ ):
+ """
+ Initialize encrypted storage.
+
+ Args:
+ base_path: Directory for credential files
+ encryption_key: 32-byte Fernet key. If None, reads from env var.
+ key_env_var: Environment variable containing encryption key
+ """
+ try:
+ from cryptography.fernet import Fernet
+ except ImportError as e:
+ raise ImportError(
+ "Encrypted storage requires 'cryptography'. Install with: pip install cryptography"
+ ) from e
+
+ self.base_path = Path(base_path)
+ self._ensure_dirs()
+ self._key_env_var = key_env_var
+
+ # Get or generate encryption key
+ if encryption_key:
+ self._key = encryption_key
+ else:
+ key_str = os.environ.get(key_env_var)
+ if key_str:
+ self._key = key_str.encode()
+ else:
+ # Generate new key
+ self._key = Fernet.generate_key()
+ logger.warning(
+ f"Generated new encryption key. To persist credentials across restarts, "
+ f"set {key_env_var}={self._key.decode()}"
+ )
+
+ self._fernet = Fernet(self._key)
+
+ def _ensure_dirs(self) -> None:
+ """Create directory structure."""
+ (self.base_path / "credentials").mkdir(parents=True, exist_ok=True)
+ (self.base_path / "metadata").mkdir(parents=True, exist_ok=True)
+
+ def _cred_path(self, credential_id: str) -> Path:
+ """Get the file path for a credential."""
+ # Sanitize credential_id to prevent path traversal
+ safe_id = credential_id.replace("/", "_").replace("\\", "_").replace("..", "_")
+ return self.base_path / "credentials" / f"{safe_id}.enc"
+
+ def save(self, credential: CredentialObject) -> None:
+ """Encrypt and save credential."""
+ # Serialize credential
+ data = self._serialize_credential(credential)
+ json_bytes = json.dumps(data, default=str).encode()
+
+ # Encrypt
+ encrypted = self._fernet.encrypt(json_bytes)
+
+ # Write to file
+ cred_path = self._cred_path(credential.id)
+ with open(cred_path, "wb") as f:
+ f.write(encrypted)
+
+ # Update index
+ self._update_index(credential.id, "save", credential.credential_type.value)
+ logger.debug(f"Saved encrypted credential '{credential.id}'")
+
+ def load(self, credential_id: str) -> CredentialObject | None:
+ """Load and decrypt credential."""
+ cred_path = self._cred_path(credential_id)
+ if not cred_path.exists():
+ return None
+
+ # Read encrypted data
+ with open(cred_path, "rb") as f:
+ encrypted = f.read()
+
+ # Decrypt
+ try:
+ json_bytes = self._fernet.decrypt(encrypted)
+ data = json.loads(json_bytes.decode())
+ except Exception as e:
+ raise CredentialDecryptionError(
+ f"Failed to decrypt credential '{credential_id}': {e}"
+ ) from e
+
+ # Deserialize
+ return self._deserialize_credential(data)
+
+ def delete(self, credential_id: str) -> bool:
+ """Delete a credential file."""
+ cred_path = self._cred_path(credential_id)
+ if cred_path.exists():
+ cred_path.unlink()
+ self._update_index(credential_id, "delete")
+ logger.debug(f"Deleted credential '{credential_id}'")
+ return True
+ return False
+
+ def list_all(self) -> list[str]:
+ """List all credential IDs."""
+ index_path = self.base_path / "metadata" / "index.json"
+ if not index_path.exists():
+ return []
+ with open(index_path) as f:
+ index = json.load(f)
+ return list(index.get("credentials", {}).keys())
+
+ def exists(self, credential_id: str) -> bool:
+ """Check if credential exists."""
+ return self._cred_path(credential_id).exists()
+
+ def _serialize_credential(self, credential: CredentialObject) -> dict[str, Any]:
+ """Convert credential to JSON-serializable dict, extracting secret values."""
+ data = credential.model_dump(mode="json")
+
+ # Extract actual secret values from SecretStr
+ for key_name, key_data in data.get("keys", {}).items():
+ if "value" in key_data:
+ # SecretStr serializes as "**********", need actual value
+ actual_key = credential.keys.get(key_name)
+ if actual_key:
+ key_data["value"] = actual_key.get_secret_value()
+
+ return data
+
+ def _deserialize_credential(self, data: dict[str, Any]) -> CredentialObject:
+ """Reconstruct credential from dict, wrapping values in SecretStr."""
+ # Convert plain values back to SecretStr
+ for key_data in data.get("keys", {}).values():
+ if "value" in key_data and isinstance(key_data["value"], str):
+ key_data["value"] = SecretStr(key_data["value"])
+
+ return CredentialObject.model_validate(data)
+
+ def _update_index(
+ self,
+ credential_id: str,
+ operation: str,
+ credential_type: str | None = None,
+ ) -> None:
+ """Update the metadata index."""
+ index_path = self.base_path / "metadata" / "index.json"
+
+ if index_path.exists():
+ with open(index_path) as f:
+ index = json.load(f)
+ else:
+ index = {"credentials": {}, "version": "1.0"}
+
+ if operation == "save":
+ index["credentials"][credential_id] = {
+ "updated_at": datetime.now(UTC).isoformat(),
+ "type": credential_type,
+ }
+ elif operation == "delete":
+ index["credentials"].pop(credential_id, None)
+
+ index["last_modified"] = datetime.now(UTC).isoformat()
+
+ with open(index_path, "w") as f:
+ json.dump(index, f, indent=2)
+
+
+class EnvVarStorage(CredentialStorage):
+ """
+ Environment variable-based storage for backward compatibility.
+
+ Maps credential IDs to environment variable patterns.
+ Supports hot-reload from .env files using python-dotenv.
+
+ This storage is READ-ONLY - credentials cannot be saved at runtime.
+
+ Example:
+ storage = EnvVarStorage(
+ env_mapping={"brave_search": "BRAVE_SEARCH_API_KEY"},
+ dotenv_path=Path(".env")
+ )
+ credential = storage.load("brave_search")
+ """
+
+ def __init__(
+ self,
+ env_mapping: dict[str, str] | None = None,
+ dotenv_path: Path | None = None,
+ ):
+ """
+ Initialize env var storage.
+
+ Args:
+ env_mapping: Map of credential_id -> env_var_name
+ e.g., {"brave_search": "BRAVE_SEARCH_API_KEY"}
+ If not provided, uses {CREDENTIAL_ID}_API_KEY pattern
+ dotenv_path: Path to .env file for hot-reload support
+ """
+ self._env_mapping = env_mapping or {}
+ self._dotenv_path = dotenv_path or Path.cwd() / ".env"
+
+ def _get_env_var_name(self, credential_id: str) -> str:
+ """Get the environment variable name for a credential."""
+ if credential_id in self._env_mapping:
+ return self._env_mapping[credential_id]
+ # Default pattern: CREDENTIAL_ID_API_KEY
+ return f"{credential_id.upper().replace('-', '_')}_API_KEY"
+
+ def _read_env_value(self, env_var: str) -> str | None:
+ """Read value from env var or .env file."""
+ # Check os.environ first (takes precedence)
+ value = os.environ.get(env_var)
+ if value:
+ return value
+
+ # Fallback: read from .env file (hot-reload)
+ if self._dotenv_path.exists():
+ try:
+ from dotenv import dotenv_values
+
+ values = dotenv_values(self._dotenv_path)
+ return values.get(env_var)
+ except ImportError:
+ logger.debug("python-dotenv not installed, skipping .env file")
+ return None
+
+ return None
+
+ def save(self, credential: CredentialObject) -> None:
+ """Cannot save to environment variables at runtime."""
+ raise NotImplementedError(
+ "EnvVarStorage is read-only. Set environment variables "
+ "externally or use EncryptedFileStorage."
+ )
+
+ def load(self, credential_id: str) -> CredentialObject | None:
+ """Load credential from environment variable."""
+ env_var = self._get_env_var_name(credential_id)
+ value = self._read_env_value(env_var)
+
+ if not value:
+ return None
+
+ return CredentialObject(
+ id=credential_id,
+ credential_type=CredentialType.API_KEY,
+ keys={"api_key": CredentialKey(name="api_key", value=SecretStr(value))},
+ description=f"Loaded from {env_var}",
+ )
+
+ def delete(self, credential_id: str) -> bool:
+ """Cannot delete environment variables at runtime."""
+ raise NotImplementedError(
+ "EnvVarStorage is read-only. Unset environment variables externally."
+ )
+
+ def list_all(self) -> list[str]:
+ """List credentials that are available in environment."""
+ available = []
+
+ # Check mapped credentials
+ for cred_id in self._env_mapping.keys():
+ if self.exists(cred_id):
+ available.append(cred_id)
+
+ return available
+
+ def exists(self, credential_id: str) -> bool:
+ """Check if credential is available in environment."""
+ env_var = self._get_env_var_name(credential_id)
+ return self._read_env_value(env_var) is not None
+
+ def add_mapping(self, credential_id: str, env_var: str) -> None:
+ """
+ Add a credential ID to environment variable mapping.
+
+ Args:
+ credential_id: The credential identifier
+ env_var: The environment variable name
+ """
+ self._env_mapping[credential_id] = env_var
+
+
+class InMemoryStorage(CredentialStorage):
+ """
+ In-memory storage for testing.
+
+ Credentials are stored in a dictionary and lost when the process exits.
+
+ Example:
+ storage = InMemoryStorage()
+ storage.save(credential)
+ credential = storage.load("test_cred")
+ """
+
+ def __init__(self, initial_data: dict[str, CredentialObject] | None = None):
+ """
+ Initialize in-memory storage.
+
+ Args:
+ initial_data: Optional dict of credential_id -> CredentialObject
+ """
+ self._data: dict[str, CredentialObject] = initial_data or {}
+
+ def save(self, credential: CredentialObject) -> None:
+ """Save credential to memory."""
+ self._data[credential.id] = credential
+
+ def load(self, credential_id: str) -> CredentialObject | None:
+ """Load credential from memory."""
+ return self._data.get(credential_id)
+
+ def delete(self, credential_id: str) -> bool:
+ """Delete credential from memory."""
+ if credential_id in self._data:
+ del self._data[credential_id]
+ return True
+ return False
+
+ def list_all(self) -> list[str]:
+ """List all credential IDs."""
+ return list(self._data.keys())
+
+ def exists(self, credential_id: str) -> bool:
+ """Check if credential exists."""
+ return credential_id in self._data
+
+ def clear(self) -> None:
+ """Clear all credentials."""
+ self._data.clear()
+
+
+class CompositeStorage(CredentialStorage):
+ """
+ Composite storage that reads from multiple backends.
+
+ Useful for layering storages, e.g., encrypted file with env var fallback:
+ - Writes go to the primary storage
+ - Reads check primary first, then fallback storages
+
+ Example:
+ storage = CompositeStorage(
+ primary=EncryptedFileStorage("/var/hive/credentials"),
+ fallbacks=[EnvVarStorage({"brave_search": "BRAVE_SEARCH_API_KEY"})]
+ )
+ """
+
+ def __init__(
+ self,
+ primary: CredentialStorage,
+ fallbacks: list[CredentialStorage] | None = None,
+ ):
+ """
+ Initialize composite storage.
+
+ Args:
+ primary: Primary storage for writes and first read attempt
+ fallbacks: List of fallback storages to check if primary doesn't have credential
+ """
+ self._primary = primary
+ self._fallbacks = fallbacks or []
+
+ def save(self, credential: CredentialObject) -> None:
+ """Save to primary storage."""
+ self._primary.save(credential)
+
+ def load(self, credential_id: str) -> CredentialObject | None:
+ """Load from primary, then fallbacks."""
+ # Try primary first
+ credential = self._primary.load(credential_id)
+ if credential is not None:
+ return credential
+
+ # Try fallbacks
+ for fallback in self._fallbacks:
+ credential = fallback.load(credential_id)
+ if credential is not None:
+ return credential
+
+ return None
+
+ def delete(self, credential_id: str) -> bool:
+ """Delete from primary storage only."""
+ return self._primary.delete(credential_id)
+
+ def list_all(self) -> list[str]:
+ """List credentials from all storages."""
+ all_ids = set(self._primary.list_all())
+ for fallback in self._fallbacks:
+ all_ids.update(fallback.list_all())
+ return list(all_ids)
+
+ def exists(self, credential_id: str) -> bool:
+ """Check if credential exists in any storage."""
+ if self._primary.exists(credential_id):
+ return True
+ return any(fallback.exists(credential_id) for fallback in self._fallbacks)
diff --git a/core/framework/credentials/store.py b/core/framework/credentials/store.py
new file mode 100644
index 0000000000..8202b6d959
--- /dev/null
+++ b/core/framework/credentials/store.py
@@ -0,0 +1,614 @@
+"""
+Main credential store orchestrating storage, providers, and template resolution.
+
+The CredentialStore is the primary interface for credential management, providing:
+- Multi-backend storage (file, env, vault)
+- Provider-based lifecycle management (refresh, validate)
+- Template resolution for {{cred.key}} patterns
+- Caching with TTL for performance
+- Thread-safe operations
+"""
+
+from __future__ import annotations
+
+import logging
+import threading
+from datetime import UTC, datetime
+from typing import Any
+
+from pydantic import SecretStr
+
+from .models import (
+ CredentialKey,
+ CredentialObject,
+ CredentialRefreshError,
+ CredentialUsageSpec,
+)
+from .provider import CredentialProvider, StaticProvider
+from .storage import CredentialStorage, EnvVarStorage, InMemoryStorage
+from .template import TemplateResolver
+
+logger = logging.getLogger(__name__)
+
+
+class CredentialStore:
+ """
+ Main credential store orchestrating storage, providers, and template resolution.
+
+ Features:
+ - Multi-backend storage (file, env, vault)
+ - Provider-based lifecycle management (refresh, validate)
+ - Template resolution for {{cred.key}} patterns
+ - Caching with TTL for performance
+ - Thread-safe operations
+
+ Usage:
+ # Basic usage
+ store = CredentialStore(
+ storage=EncryptedFileStorage("/path/to/creds"),
+ providers=[OAuth2Provider(), StaticProvider()]
+ )
+
+ # Get a credential
+ cred = store.get_credential("github_oauth")
+
+ # Resolve templates in headers
+ headers = store.resolve_headers({
+ "Authorization": "Bearer {{github_oauth.access_token}}"
+ })
+
+ # Register a tool's credential requirements
+ store.register_usage(CredentialUsageSpec(
+ credential_id="brave_search",
+ required_keys=["api_key"],
+ headers={"X-Subscription-Token": "{{brave_search.api_key}}"}
+ ))
+ """
+
+ def __init__(
+ self,
+ storage: CredentialStorage | None = None,
+ providers: list[CredentialProvider] | None = None,
+ cache_ttl_seconds: int = 300,
+ auto_refresh: bool = True,
+ ):
+ """
+ Initialize the credential store.
+
+ Args:
+ storage: Storage backend. Defaults to EnvVarStorage for compatibility.
+ providers: List of credential providers. Defaults to [StaticProvider()].
+ cache_ttl_seconds: How long to cache credentials in memory (default: 5 minutes).
+ auto_refresh: Whether to auto-refresh expired credentials on access.
+ """
+ self._storage = storage or EnvVarStorage()
+ self._providers: dict[str, CredentialProvider] = {}
+ self._usage_specs: dict[str, CredentialUsageSpec] = {}
+
+ # Cache: credential_id -> (CredentialObject, cached_at)
+ self._cache: dict[str, tuple[CredentialObject, datetime]] = {}
+ self._cache_ttl = cache_ttl_seconds
+ self._lock = threading.RLock()
+
+ self._auto_refresh = auto_refresh
+
+ # Register providers
+ for provider in providers or [StaticProvider()]:
+ self.register_provider(provider)
+
+ # Template resolver
+ self._resolver = TemplateResolver(self)
+
+ # --- Provider Management ---
+
+ def register_provider(self, provider: CredentialProvider) -> None:
+ """
+ Register a credential provider.
+
+ Args:
+ provider: The provider to register
+ """
+ self._providers[provider.provider_id] = provider
+ logger.debug(f"Registered credential provider: {provider.provider_id}")
+
+ def get_provider(self, provider_id: str) -> CredentialProvider | None:
+ """
+ Get a provider by ID.
+
+ Args:
+ provider_id: The provider identifier
+
+ Returns:
+ The provider if found, None otherwise
+ """
+ return self._providers.get(provider_id)
+
+ def get_provider_for_credential(
+ self, credential: CredentialObject
+ ) -> CredentialProvider | None:
+ """
+ Get the appropriate provider for a credential.
+
+ Args:
+ credential: The credential to find a provider for
+
+ Returns:
+ The provider if found, None otherwise
+ """
+ # First, check if credential specifies a provider
+ if credential.provider_id:
+ provider = self._providers.get(credential.provider_id)
+ if provider:
+ return provider
+
+ # Fall back to finding a provider that supports this type
+ for provider in self._providers.values():
+ if provider.can_handle(credential):
+ return provider
+
+ return None
+
+ # --- Usage Spec Management ---
+
+ def register_usage(self, spec: CredentialUsageSpec) -> None:
+ """
+ Register how a tool uses credentials.
+
+ Args:
+ spec: The usage specification
+ """
+ self._usage_specs[spec.credential_id] = spec
+
+ def get_usage_spec(self, credential_id: str) -> CredentialUsageSpec | None:
+ """
+ Get the usage spec for a credential.
+
+ Args:
+ credential_id: The credential identifier
+
+ Returns:
+ The usage spec if registered, None otherwise
+ """
+ return self._usage_specs.get(credential_id)
+
+ # --- Credential Access ---
+
+ def get_credential(
+ self,
+ credential_id: str,
+ refresh_if_needed: bool = True,
+ ) -> CredentialObject | None:
+ """
+ Get a credential by ID.
+
+ Args:
+ credential_id: The credential identifier
+ refresh_if_needed: If True, refresh expired credentials
+
+ Returns:
+ CredentialObject or None if not found
+ """
+ with self._lock:
+ # Check cache
+ cached = self._get_from_cache(credential_id)
+ if cached is not None:
+ if refresh_if_needed and self._should_refresh(cached):
+ return self._refresh_credential(cached)
+ return cached
+
+ # Load from storage
+ credential = self._storage.load(credential_id)
+ if credential is None:
+ return None
+
+ # Refresh if needed
+ if refresh_if_needed and self._should_refresh(credential):
+ credential = self._refresh_credential(credential)
+
+ # Cache
+ self._add_to_cache(credential)
+
+ return credential
+
+ def get_key(self, credential_id: str, key_name: str) -> str | None:
+ """
+ Convenience method to get a specific key value.
+
+ Args:
+ credential_id: The credential identifier
+ key_name: The key within the credential
+
+ Returns:
+ The key value or None if not found
+ """
+ credential = self.get_credential(credential_id)
+ if credential is None:
+ return None
+ return credential.get_key(key_name)
+
+ def get(self, credential_id: str) -> str | None:
+ """
+ Legacy compatibility: get the primary key value.
+
+ For single-key credentials, returns that key.
+ For multi-key, returns 'value', 'api_key', or 'access_token'.
+
+ Args:
+ credential_id: The credential identifier
+
+ Returns:
+ The primary key value or None
+ """
+ credential = self.get_credential(credential_id)
+ if credential is None:
+ return None
+ return credential.get_default_key()
+
+ # --- Template Resolution ---
+
+ def resolve(self, template: str) -> str:
+ """
+ Resolve credential templates in a string.
+
+ Args:
+ template: String containing {{cred.key}} patterns
+
+ Returns:
+ Template with all references resolved
+
+ Example:
+ >>> store.resolve("Bearer {{github.access_token}}")
+ "Bearer ghp_xxxxxxxxxxxx"
+ """
+ return self._resolver.resolve(template)
+
+ def resolve_headers(self, headers: dict[str, str]) -> dict[str, str]:
+ """
+ Resolve credential templates in headers dictionary.
+
+ Args:
+ headers: Dict of header name to template value
+
+ Returns:
+ Dict with all templates resolved
+
+ Example:
+ >>> store.resolve_headers({
+ ... "Authorization": "Bearer {{github.access_token}}"
+ ... })
+ {"Authorization": "Bearer ghp_xxx"}
+ """
+ return self._resolver.resolve_headers(headers)
+
+ def resolve_params(self, params: dict[str, str]) -> dict[str, str]:
+ """
+ Resolve credential templates in query parameters dictionary.
+
+ Args:
+ params: Dict of param name to template value
+
+ Returns:
+ Dict with all templates resolved
+ """
+ return self._resolver.resolve_params(params)
+
+ def resolve_for_usage(self, credential_id: str) -> dict[str, Any]:
+ """
+ Get resolved request kwargs for a registered usage spec.
+
+ Args:
+ credential_id: The credential identifier
+
+ Returns:
+ Dict with 'headers', 'params', etc. keys as appropriate
+
+ Raises:
+ ValueError: If no usage spec is registered for the credential
+ """
+ spec = self._usage_specs.get(credential_id)
+ if spec is None:
+ raise ValueError(f"No usage spec registered for '{credential_id}'")
+
+ result: dict[str, Any] = {}
+
+ if spec.headers:
+ result["headers"] = self.resolve_headers(spec.headers)
+
+ if spec.query_params:
+ result["params"] = self.resolve_params(spec.query_params)
+
+ if spec.body_fields:
+ result["data"] = {key: self.resolve(value) for key, value in spec.body_fields.items()}
+
+ return result
+
+ # --- Credential Management ---
+
+ def save_credential(self, credential: CredentialObject) -> None:
+ """
+ Save a credential to storage.
+
+ Args:
+ credential: The credential to save
+ """
+ with self._lock:
+ self._storage.save(credential)
+ self._add_to_cache(credential)
+ logger.info(f"Saved credential '{credential.id}'")
+
+ def delete_credential(self, credential_id: str) -> bool:
+ """
+ Delete a credential from storage.
+
+ Args:
+ credential_id: The credential identifier
+
+ Returns:
+ True if the credential existed and was deleted
+ """
+ with self._lock:
+ self._remove_from_cache(credential_id)
+ result = self._storage.delete(credential_id)
+ if result:
+ logger.info(f"Deleted credential '{credential_id}'")
+ return result
+
+ def list_credentials(self) -> list[str]:
+ """
+ List all available credential IDs.
+
+ Returns:
+ List of credential IDs
+ """
+ return self._storage.list_all()
+
+ def is_available(self, credential_id: str) -> bool:
+ """
+ Check if a credential is available.
+
+ Args:
+ credential_id: The credential identifier
+
+ Returns:
+ True if credential exists and is accessible
+ """
+ return self.get_credential(credential_id, refresh_if_needed=False) is not None
+
+ # --- Validation ---
+
+ def validate_for_usage(self, credential_id: str) -> list[str]:
+ """
+ Validate that a credential meets its usage spec requirements.
+
+ Args:
+ credential_id: The credential identifier
+
+ Returns:
+ List of missing keys or errors. Empty list if valid.
+ """
+ spec = self._usage_specs.get(credential_id)
+ if spec is None:
+ return [] # No requirements registered
+
+ credential = self.get_credential(credential_id)
+ if credential is None:
+ return [f"Credential '{credential_id}' not found"]
+
+ errors = []
+ for key_name in spec.required_keys:
+ if not credential.has_key(key_name):
+ errors.append(f"Missing required key '{key_name}'")
+
+ return errors
+
+ def validate_all(self) -> dict[str, list[str]]:
+ """
+ Validate all registered usage specs.
+
+ Returns:
+ Dict mapping credential_id to list of errors.
+ Only includes credentials with errors.
+ """
+ errors = {}
+ for cred_id in self._usage_specs.keys():
+ cred_errors = self.validate_for_usage(cred_id)
+ if cred_errors:
+ errors[cred_id] = cred_errors
+ return errors
+
+ def validate_credential(self, credential_id: str) -> bool:
+ """
+ Validate a credential using its provider.
+
+ Args:
+ credential_id: The credential identifier
+
+ Returns:
+ True if credential is valid
+ """
+ credential = self.get_credential(credential_id, refresh_if_needed=False)
+ if credential is None:
+ return False
+
+ provider = self.get_provider_for_credential(credential)
+ if provider is None:
+ # No provider, assume valid if has keys
+ return bool(credential.keys)
+
+ return provider.validate(credential)
+
+ # --- Lifecycle Management ---
+
+ def _should_refresh(self, credential: CredentialObject) -> bool:
+ """Check if credential should be refreshed."""
+ if not self._auto_refresh:
+ return False
+
+ if not credential.auto_refresh:
+ return False
+
+ provider = self.get_provider_for_credential(credential)
+ if provider is None:
+ return False
+
+ return provider.should_refresh(credential)
+
+ def _refresh_credential(self, credential: CredentialObject) -> CredentialObject:
+ """Refresh a credential using its provider."""
+ provider = self.get_provider_for_credential(credential)
+ if provider is None:
+ logger.warning(f"No provider found for credential '{credential.id}'")
+ return credential
+
+ try:
+ refreshed = provider.refresh(credential)
+ refreshed.last_refreshed = datetime.now(UTC)
+
+ # Persist the refreshed credential
+ self._storage.save(refreshed)
+ self._add_to_cache(refreshed)
+
+ logger.info(f"Refreshed credential '{credential.id}'")
+ return refreshed
+
+ except CredentialRefreshError as e:
+ logger.error(f"Failed to refresh credential '{credential.id}': {e}")
+ return credential
+
+ def refresh_credential(self, credential_id: str) -> CredentialObject | None:
+ """
+ Manually refresh a credential.
+
+ Args:
+ credential_id: The credential identifier
+
+ Returns:
+ The refreshed credential, or None if not found
+
+ Raises:
+ CredentialRefreshError: If refresh fails
+ """
+ credential = self.get_credential(credential_id, refresh_if_needed=False)
+ if credential is None:
+ return None
+
+ return self._refresh_credential(credential)
+
+ # --- Caching ---
+
+ def _get_from_cache(self, credential_id: str) -> CredentialObject | None:
+ """Get credential from cache if not expired."""
+ if credential_id not in self._cache:
+ return None
+
+ credential, cached_at = self._cache[credential_id]
+ age = (datetime.now(UTC) - cached_at).total_seconds()
+
+ if age > self._cache_ttl:
+ del self._cache[credential_id]
+ return None
+
+ return credential
+
+ def _add_to_cache(self, credential: CredentialObject) -> None:
+ """Add credential to cache."""
+ self._cache[credential.id] = (credential, datetime.now(UTC))
+
+ def _remove_from_cache(self, credential_id: str) -> None:
+ """Remove credential from cache."""
+ self._cache.pop(credential_id, None)
+
+ def clear_cache(self) -> None:
+ """Clear the credential cache."""
+ with self._lock:
+ self._cache.clear()
+
+ # --- Factory Methods ---
+
+ @classmethod
+ def for_testing(
+ cls,
+ credentials: dict[str, dict[str, str]],
+ ) -> CredentialStore:
+ """
+ Create a credential store for testing with mock credentials.
+
+ Args:
+ credentials: Dict mapping credential_id to {key_name: value}
+ e.g., {"brave_search": {"api_key": "test-key"}}
+
+ Returns:
+ CredentialStore with in-memory credentials
+
+ Example:
+ store = CredentialStore.for_testing({
+ "brave_search": {"api_key": "test-brave-key"},
+ "github_oauth": {
+ "access_token": "test-token",
+ "refresh_token": "test-refresh"
+ }
+ })
+ """
+ # Convert test data to CredentialObjects
+ cred_objects: dict[str, CredentialObject] = {}
+
+ for cred_id, keys in credentials.items():
+ cred_objects[cred_id] = CredentialObject(
+ id=cred_id,
+ keys={k: CredentialKey(name=k, value=SecretStr(v)) for k, v in keys.items()},
+ )
+
+ return cls(
+ storage=InMemoryStorage(cred_objects),
+ auto_refresh=False,
+ )
+
+ @classmethod
+ def with_encrypted_storage(
+ cls,
+ base_path: str,
+ providers: list[CredentialProvider] | None = None,
+ **kwargs: Any,
+ ) -> CredentialStore:
+ """
+ Create a credential store with encrypted file storage.
+
+ Args:
+ base_path: Directory for credential files
+ providers: List of credential providers
+ **kwargs: Additional arguments passed to CredentialStore
+
+ Returns:
+ CredentialStore with EncryptedFileStorage
+ """
+ from .storage import EncryptedFileStorage
+
+ return cls(
+ storage=EncryptedFileStorage(base_path),
+ providers=providers,
+ **kwargs,
+ )
+
+ @classmethod
+ def with_env_storage(
+ cls,
+ env_mapping: dict[str, str] | None = None,
+ providers: list[CredentialProvider] | None = None,
+ **kwargs: Any,
+ ) -> CredentialStore:
+ """
+ Create a credential store with environment variable storage.
+
+ Args:
+ env_mapping: Map of credential_id -> env_var_name
+ providers: List of credential providers
+ **kwargs: Additional arguments passed to CredentialStore
+
+ Returns:
+ CredentialStore with EnvVarStorage
+ """
+ return cls(
+ storage=EnvVarStorage(env_mapping),
+ providers=providers,
+ **kwargs,
+ )
diff --git a/core/framework/credentials/template.py b/core/framework/credentials/template.py
new file mode 100644
index 0000000000..dd441da388
--- /dev/null
+++ b/core/framework/credentials/template.py
@@ -0,0 +1,219 @@
+"""
+Template resolution system for credential injection.
+
+This module handles {{cred.key}} patterns, enabling the bipartisan model
+where tools specify how credentials are used in HTTP requests.
+
+Template Syntax:
+ {{credential_id.key_name}} - Access specific key
+ {{credential_id}} - Access default key (value, api_key, or access_token)
+
+Examples:
+ "Bearer {{github_oauth.access_token}}" -> "Bearer ghp_xxx"
+ "X-API-Key: {{brave_search.api_key}}" -> "X-API-Key: BSAKxxx"
+ "{{brave_search}}" -> "BSAKxxx" (uses default key)
+"""
+
+from __future__ import annotations
+
+import re
+from typing import TYPE_CHECKING
+
+from .models import CredentialKeyNotFoundError, CredentialNotFoundError
+
+if TYPE_CHECKING:
+ from .store import CredentialStore
+
+
+class TemplateResolver:
+ """
+ Resolves credential templates like {{cred.key}} into actual values.
+
+ Usage:
+ resolver = TemplateResolver(credential_store)
+
+ # Resolve single template string
+ auth_header = resolver.resolve("Bearer {{github_oauth.access_token}}")
+
+ # Resolve all headers at once
+ headers = resolver.resolve_headers({
+ "Authorization": "Bearer {{github_oauth.access_token}}",
+ "X-API-Key": "{{brave_search.api_key}}"
+ })
+ """
+
+ # Matches {{credential_id}} or {{credential_id.key_name}}
+ TEMPLATE_PATTERN = re.compile(r"\{\{([a-zA-Z0-9_-]+)(?:\.([a-zA-Z0-9_-]+))?\}\}")
+
+ def __init__(self, credential_store: CredentialStore):
+ """
+ Initialize the template resolver.
+
+ Args:
+ credential_store: The credential store to resolve references against
+ """
+ self._store = credential_store
+
+ def resolve(self, template: str, fail_on_missing: bool = True) -> str:
+ """
+ Resolve all credential references in a template string.
+
+ Args:
+ template: String containing {{cred.key}} patterns
+ fail_on_missing: If True, raise error on missing credentials
+
+ Returns:
+ Template with all references replaced with actual values
+
+ Raises:
+ CredentialNotFoundError: If credential doesn't exist and fail_on_missing=True
+ CredentialKeyNotFoundError: If key doesn't exist in credential
+
+ Example:
+ >>> resolver.resolve("Bearer {{github_oauth.access_token}}")
+ "Bearer ghp_xxxxxxxxxxxx"
+ """
+
+ def replace_match(match: re.Match) -> str:
+ cred_id = match.group(1)
+ key_name = match.group(2) # May be None
+
+ credential = self._store.get_credential(cred_id, refresh_if_needed=True)
+ if credential is None:
+ if fail_on_missing:
+ raise CredentialNotFoundError(f"Credential '{cred_id}' not found")
+ return match.group(0) # Return original template
+
+ # Get specific key or default
+ if key_name:
+ value = credential.get_key(key_name)
+ if value is None:
+ raise CredentialKeyNotFoundError(
+ f"Key '{key_name}' not found in credential '{cred_id}'"
+ )
+ else:
+ # Use default key
+ value = credential.get_default_key()
+ if value is None:
+ raise CredentialKeyNotFoundError(f"Credential '{cred_id}' has no keys")
+
+ # Record usage
+ credential.record_usage()
+
+ return value
+
+ return self.TEMPLATE_PATTERN.sub(replace_match, template)
+
+ def resolve_headers(
+ self,
+ header_templates: dict[str, str],
+ fail_on_missing: bool = True,
+ ) -> dict[str, str]:
+ """
+ Resolve templates in a headers dictionary.
+
+ Args:
+ header_templates: Dict of header name to template value
+ fail_on_missing: If True, raise error on missing credentials
+
+ Returns:
+ Dict with all templates resolved to actual values
+
+ Example:
+ >>> resolver.resolve_headers({
+ ... "Authorization": "Bearer {{github_oauth.access_token}}",
+ ... "X-API-Key": "{{brave_search.api_key}}"
+ ... })
+ {"Authorization": "Bearer ghp_xxx", "X-API-Key": "BSAKxxx"}
+ """
+ return {
+ key: self.resolve(value, fail_on_missing) for key, value in header_templates.items()
+ }
+
+ def resolve_params(
+ self,
+ param_templates: dict[str, str],
+ fail_on_missing: bool = True,
+ ) -> dict[str, str]:
+ """
+ Resolve templates in a query parameters dictionary.
+
+ Args:
+ param_templates: Dict of param name to template value
+ fail_on_missing: If True, raise error on missing credentials
+
+ Returns:
+ Dict with all templates resolved to actual values
+ """
+ return {key: self.resolve(value, fail_on_missing) for key, value in param_templates.items()}
+
+ def has_templates(self, text: str) -> bool:
+ """
+ Check if text contains any credential templates.
+
+ Args:
+ text: String to check
+
+ Returns:
+ True if text contains {{...}} patterns
+ """
+ return bool(self.TEMPLATE_PATTERN.search(text))
+
+ def extract_references(self, text: str) -> list[tuple[str, str | None]]:
+ """
+ Extract all credential references from text.
+
+ Args:
+ text: String to extract references from
+
+ Returns:
+ List of (credential_id, key_name) tuples.
+ key_name is None if only credential_id was specified.
+
+ Example:
+ >>> resolver.extract_references("{{github.token}} and {{brave_search.api_key}}")
+ [("github", "token"), ("brave_search", "api_key")]
+ """
+ return [(match.group(1), match.group(2)) for match in self.TEMPLATE_PATTERN.finditer(text)]
+
+ def validate_references(self, text: str) -> list[str]:
+ """
+ Validate all credential references in text without resolving.
+
+ Args:
+ text: String containing template references
+
+ Returns:
+ List of error messages for invalid references.
+ Empty list if all references are valid.
+ """
+ errors = []
+ references = self.extract_references(text)
+
+ for cred_id, key_name in references:
+ credential = self._store.get_credential(cred_id, refresh_if_needed=False)
+
+ if credential is None:
+ errors.append(f"Credential '{cred_id}' not found")
+ continue
+
+ if key_name:
+ if not credential.has_key(key_name):
+ errors.append(f"Key '{key_name}' not found in credential '{cred_id}'")
+ elif not credential.keys:
+ errors.append(f"Credential '{cred_id}' has no keys")
+
+ return errors
+
+ def get_required_credentials(self, text: str) -> list[str]:
+ """
+ Get list of credential IDs required by a template string.
+
+ Args:
+ text: String containing template references
+
+ Returns:
+ List of unique credential IDs referenced in the text
+ """
+ references = self.extract_references(text)
+ return list(dict.fromkeys(cred_id for cred_id, _ in references))
diff --git a/core/framework/credentials/tests/__init__.py b/core/framework/credentials/tests/__init__.py
new file mode 100644
index 0000000000..22b0c4cba6
--- /dev/null
+++ b/core/framework/credentials/tests/__init__.py
@@ -0,0 +1 @@
+"""Tests for the credential store module."""
diff --git a/core/framework/credentials/tests/test_credential_store.py b/core/framework/credentials/tests/test_credential_store.py
new file mode 100644
index 0000000000..6a1462d249
--- /dev/null
+++ b/core/framework/credentials/tests/test_credential_store.py
@@ -0,0 +1,707 @@
+"""
+Comprehensive tests for the credential store module.
+
+Tests cover:
+- Core models (CredentialObject, CredentialKey, CredentialUsageSpec)
+- Template resolution
+- Storage backends (InMemoryStorage, EnvVarStorage, EncryptedFileStorage)
+- Providers (StaticProvider, BearerTokenProvider)
+- Main CredentialStore
+- OAuth2 module
+"""
+
+import os
+import tempfile
+from datetime import UTC, datetime, timedelta
+from pathlib import Path
+from unittest.mock import patch
+
+import pytest
+from core.framework.credentials import (
+ CompositeStorage,
+ CredentialKey,
+ CredentialKeyNotFoundError,
+ CredentialNotFoundError,
+ CredentialObject,
+ CredentialStore,
+ CredentialType,
+ CredentialUsageSpec,
+ EncryptedFileStorage,
+ EnvVarStorage,
+ InMemoryStorage,
+ StaticProvider,
+ TemplateResolver,
+)
+from pydantic import SecretStr
+
+
+class TestCredentialKey:
+ """Tests for CredentialKey model."""
+
+ def test_create_basic_key(self):
+ """Test creating a basic credential key."""
+ key = CredentialKey(name="api_key", value=SecretStr("test-value"))
+ assert key.name == "api_key"
+ assert key.get_secret_value() == "test-value"
+ assert key.expires_at is None
+ assert not key.is_expired
+
+ def test_key_with_expiration(self):
+ """Test key with expiration time."""
+ future = datetime.now(UTC) + timedelta(hours=1)
+ key = CredentialKey(name="token", value=SecretStr("xxx"), expires_at=future)
+ assert not key.is_expired
+
+ def test_expired_key(self):
+ """Test that expired key is detected."""
+ past = datetime.now(UTC) - timedelta(hours=1)
+ key = CredentialKey(name="token", value=SecretStr("xxx"), expires_at=past)
+ assert key.is_expired
+
+ def test_key_with_metadata(self):
+ """Test key with metadata."""
+ key = CredentialKey(
+ name="token",
+ value=SecretStr("xxx"),
+ metadata={"client_id": "abc", "scope": "read"},
+ )
+ assert key.metadata["client_id"] == "abc"
+
+
+class TestCredentialObject:
+ """Tests for CredentialObject model."""
+
+ def test_create_simple_credential(self):
+ """Test creating a simple API key credential."""
+ cred = CredentialObject(
+ id="brave_search",
+ credential_type=CredentialType.API_KEY,
+ keys={"api_key": CredentialKey(name="api_key", value=SecretStr("test-key"))},
+ )
+ assert cred.id == "brave_search"
+ assert cred.credential_type == CredentialType.API_KEY
+ assert cred.get_key("api_key") == "test-key"
+
+ def test_create_multi_key_credential(self):
+ """Test creating a credential with multiple keys."""
+ cred = CredentialObject(
+ id="github_oauth",
+ credential_type=CredentialType.OAUTH2,
+ keys={
+ "access_token": CredentialKey(name="access_token", value=SecretStr("ghp_xxx")),
+ "refresh_token": CredentialKey(name="refresh_token", value=SecretStr("ghr_xxx")),
+ },
+ )
+ assert cred.get_key("access_token") == "ghp_xxx"
+ assert cred.get_key("refresh_token") == "ghr_xxx"
+ assert cred.get_key("nonexistent") is None
+
+ def test_set_key(self):
+ """Test setting a key on a credential."""
+ cred = CredentialObject(id="test", keys={})
+ cred.set_key("new_key", "new_value")
+ assert cred.get_key("new_key") == "new_value"
+
+ def test_set_key_with_expiration(self):
+ """Test setting a key with expiration."""
+ cred = CredentialObject(id="test", keys={})
+ expires = datetime.now(UTC) + timedelta(hours=1)
+ cred.set_key("token", "xxx", expires_at=expires)
+ assert cred.keys["token"].expires_at == expires
+
+ def test_needs_refresh(self):
+ """Test needs_refresh property."""
+ past = datetime.now(UTC) - timedelta(hours=1)
+ cred = CredentialObject(
+ id="test",
+ keys={"token": CredentialKey(name="token", value=SecretStr("xxx"), expires_at=past)},
+ )
+ assert cred.needs_refresh
+
+ def test_get_default_key(self):
+ """Test get_default_key returns appropriate default."""
+ # With api_key
+ cred = CredentialObject(
+ id="test",
+ keys={"api_key": CredentialKey(name="api_key", value=SecretStr("key-value"))},
+ )
+ assert cred.get_default_key() == "key-value"
+
+ # With access_token
+ cred2 = CredentialObject(
+ id="test",
+ keys={
+ "access_token": CredentialKey(name="access_token", value=SecretStr("token-value"))
+ },
+ )
+ assert cred2.get_default_key() == "token-value"
+
+ def test_record_usage(self):
+ """Test recording credential usage."""
+ cred = CredentialObject(id="test", keys={})
+ assert cred.use_count == 0
+ assert cred.last_used is None
+
+ cred.record_usage()
+ assert cred.use_count == 1
+ assert cred.last_used is not None
+
+
+class TestCredentialUsageSpec:
+ """Tests for CredentialUsageSpec model."""
+
+ def test_create_usage_spec(self):
+ """Test creating a usage spec."""
+ spec = CredentialUsageSpec(
+ credential_id="brave_search",
+ required_keys=["api_key"],
+ headers={"X-Subscription-Token": "{{api_key}}"},
+ )
+ assert spec.credential_id == "brave_search"
+ assert "api_key" in spec.required_keys
+ assert "{{api_key}}" in spec.headers.values()
+
+
+class TestInMemoryStorage:
+ """Tests for InMemoryStorage."""
+
+ def test_save_and_load(self):
+ """Test saving and loading a credential."""
+ storage = InMemoryStorage()
+ cred = CredentialObject(
+ id="test",
+ keys={"key": CredentialKey(name="key", value=SecretStr("value"))},
+ )
+
+ storage.save(cred)
+ loaded = storage.load("test")
+
+ assert loaded is not None
+ assert loaded.id == "test"
+ assert loaded.get_key("key") == "value"
+
+ def test_load_nonexistent(self):
+ """Test loading a nonexistent credential."""
+ storage = InMemoryStorage()
+ assert storage.load("nonexistent") is None
+
+ def test_delete(self):
+ """Test deleting a credential."""
+ storage = InMemoryStorage()
+ cred = CredentialObject(id="test", keys={})
+ storage.save(cred)
+
+ assert storage.delete("test")
+ assert storage.load("test") is None
+ assert not storage.delete("test")
+
+ def test_list_all(self):
+ """Test listing all credentials."""
+ storage = InMemoryStorage()
+ storage.save(CredentialObject(id="a", keys={}))
+ storage.save(CredentialObject(id="b", keys={}))
+
+ ids = storage.list_all()
+ assert "a" in ids
+ assert "b" in ids
+
+ def test_exists(self):
+ """Test checking if credential exists."""
+ storage = InMemoryStorage()
+ storage.save(CredentialObject(id="test", keys={}))
+
+ assert storage.exists("test")
+ assert not storage.exists("nonexistent")
+
+ def test_clear(self):
+ """Test clearing all credentials."""
+ storage = InMemoryStorage()
+ storage.save(CredentialObject(id="test", keys={}))
+ storage.clear()
+
+ assert storage.list_all() == []
+
+
+class TestEnvVarStorage:
+ """Tests for EnvVarStorage."""
+
+ def test_load_from_env(self):
+ """Test loading credential from environment variable."""
+ with patch.dict(os.environ, {"TEST_API_KEY": "test-value"}):
+ storage = EnvVarStorage(env_mapping={"test": "TEST_API_KEY"})
+ cred = storage.load("test")
+
+ assert cred is not None
+ assert cred.get_key("api_key") == "test-value"
+
+ def test_load_nonexistent(self):
+ """Test loading when env var is not set."""
+ storage = EnvVarStorage(env_mapping={"test": "NONEXISTENT_VAR"})
+ assert storage.load("test") is None
+
+ def test_default_env_var_pattern(self):
+ """Test default env var naming pattern."""
+ with patch.dict(os.environ, {"MY_SERVICE_API_KEY": "value"}):
+ storage = EnvVarStorage()
+ cred = storage.load("my_service")
+
+ assert cred is not None
+ assert cred.get_key("api_key") == "value"
+
+ def test_save_raises(self):
+ """Test that save raises NotImplementedError."""
+ storage = EnvVarStorage()
+ with pytest.raises(NotImplementedError):
+ storage.save(CredentialObject(id="test", keys={}))
+
+ def test_delete_raises(self):
+ """Test that delete raises NotImplementedError."""
+ storage = EnvVarStorage()
+ with pytest.raises(NotImplementedError):
+ storage.delete("test")
+
+
+class TestEncryptedFileStorage:
+ """Tests for EncryptedFileStorage."""
+
+ @pytest.fixture
+ def temp_dir(self):
+ """Create a temporary directory for tests."""
+ with tempfile.TemporaryDirectory() as tmpdir:
+ yield Path(tmpdir)
+
+ @pytest.fixture
+ def storage(self, temp_dir):
+ """Create EncryptedFileStorage for tests."""
+ return EncryptedFileStorage(temp_dir)
+
+ def test_save_and_load(self, storage):
+ """Test saving and loading encrypted credential."""
+ cred = CredentialObject(
+ id="test",
+ credential_type=CredentialType.API_KEY,
+ keys={"api_key": CredentialKey(name="api_key", value=SecretStr("secret-value"))},
+ )
+
+ storage.save(cred)
+ loaded = storage.load("test")
+
+ assert loaded is not None
+ assert loaded.id == "test"
+ assert loaded.get_key("api_key") == "secret-value"
+
+ def test_encryption_key_from_env(self, temp_dir):
+ """Test using encryption key from environment variable."""
+ from cryptography.fernet import Fernet
+
+ key = Fernet.generate_key().decode()
+ with patch.dict(os.environ, {"HIVE_CREDENTIAL_KEY": key}):
+ storage = EncryptedFileStorage(temp_dir)
+ cred = CredentialObject(
+ id="test", keys={"k": CredentialKey(name="k", value=SecretStr("v"))}
+ )
+ storage.save(cred)
+
+ # Create new storage instance with same key
+ storage2 = EncryptedFileStorage(temp_dir)
+ loaded = storage2.load("test")
+ assert loaded is not None
+ assert loaded.get_key("k") == "v"
+
+ def test_list_all(self, storage):
+ """Test listing all credentials."""
+ storage.save(CredentialObject(id="cred1", keys={}))
+ storage.save(CredentialObject(id="cred2", keys={}))
+
+ ids = storage.list_all()
+ assert "cred1" in ids
+ assert "cred2" in ids
+
+ def test_delete(self, storage):
+ """Test deleting a credential."""
+ storage.save(CredentialObject(id="test", keys={}))
+ assert storage.delete("test")
+ assert storage.load("test") is None
+
+
+class TestCompositeStorage:
+ """Tests for CompositeStorage."""
+
+ def test_read_from_primary(self):
+ """Test reading from primary storage."""
+ primary = InMemoryStorage()
+ primary.save(
+ CredentialObject(
+ id="test", keys={"k": CredentialKey(name="k", value=SecretStr("primary"))}
+ )
+ )
+
+ fallback = InMemoryStorage()
+ fallback.save(
+ CredentialObject(
+ id="test", keys={"k": CredentialKey(name="k", value=SecretStr("fallback"))}
+ )
+ )
+
+ storage = CompositeStorage(primary, [fallback])
+ cred = storage.load("test")
+
+ # Should get from primary
+ assert cred.get_key("k") == "primary"
+
+ def test_fallback_when_not_in_primary(self):
+ """Test fallback when credential not in primary."""
+ primary = InMemoryStorage()
+ fallback = InMemoryStorage()
+ fallback.save(
+ CredentialObject(
+ id="test", keys={"k": CredentialKey(name="k", value=SecretStr("fallback"))}
+ )
+ )
+
+ storage = CompositeStorage(primary, [fallback])
+ cred = storage.load("test")
+
+ assert cred.get_key("k") == "fallback"
+
+ def test_write_to_primary_only(self):
+ """Test that writes go to primary only."""
+ primary = InMemoryStorage()
+ fallback = InMemoryStorage()
+
+ storage = CompositeStorage(primary, [fallback])
+ storage.save(CredentialObject(id="test", keys={}))
+
+ assert primary.exists("test")
+ assert not fallback.exists("test")
+
+
+class TestStaticProvider:
+ """Tests for StaticProvider."""
+
+ def test_provider_id(self):
+ """Test provider ID."""
+ provider = StaticProvider()
+ assert provider.provider_id == "static"
+
+ def test_supported_types(self):
+ """Test supported credential types."""
+ provider = StaticProvider()
+ assert CredentialType.API_KEY in provider.supported_types
+ assert CredentialType.CUSTOM in provider.supported_types
+
+ def test_refresh_returns_unchanged(self):
+ """Test that refresh returns credential unchanged."""
+ provider = StaticProvider()
+ cred = CredentialObject(
+ id="test", keys={"k": CredentialKey(name="k", value=SecretStr("v"))}
+ )
+
+ refreshed = provider.refresh(cred)
+ assert refreshed.get_key("k") == "v"
+
+ def test_validate_with_keys(self):
+ """Test validation with keys present."""
+ provider = StaticProvider()
+ cred = CredentialObject(
+ id="test", keys={"k": CredentialKey(name="k", value=SecretStr("v"))}
+ )
+
+ assert provider.validate(cred)
+
+ def test_validate_without_keys(self):
+ """Test validation without keys."""
+ provider = StaticProvider()
+ cred = CredentialObject(id="test", keys={})
+
+ assert not provider.validate(cred)
+
+ def test_should_refresh(self):
+ """Test that static provider never needs refresh."""
+ provider = StaticProvider()
+ cred = CredentialObject(id="test", keys={})
+
+ assert not provider.should_refresh(cred)
+
+
+class TestTemplateResolver:
+ """Tests for TemplateResolver."""
+
+ @pytest.fixture
+ def store(self):
+ """Create a test store with credentials."""
+ return CredentialStore.for_testing(
+ {
+ "brave_search": {"api_key": "test-brave-key"},
+ "github_oauth": {"access_token": "ghp_xxx", "refresh_token": "ghr_xxx"},
+ }
+ )
+
+ @pytest.fixture
+ def resolver(self, store):
+ """Create a resolver with the test store."""
+ return TemplateResolver(store)
+
+ def test_resolve_simple(self, resolver):
+ """Test resolving a simple template."""
+ result = resolver.resolve("Bearer {{github_oauth.access_token}}")
+ assert result == "Bearer ghp_xxx"
+
+ def test_resolve_multiple(self, resolver):
+ """Test resolving multiple templates."""
+ result = resolver.resolve("{{github_oauth.access_token}} and {{brave_search.api_key}}")
+ assert "ghp_xxx" in result
+ assert "test-brave-key" in result
+
+ def test_resolve_default_key(self, resolver):
+ """Test resolving credential without key specified."""
+ result = resolver.resolve("Key: {{brave_search}}")
+ assert "test-brave-key" in result
+
+ def test_resolve_headers(self, resolver):
+ """Test resolving headers dict."""
+ headers = resolver.resolve_headers(
+ {
+ "Authorization": "Bearer {{github_oauth.access_token}}",
+ "X-API-Key": "{{brave_search.api_key}}",
+ }
+ )
+ assert headers["Authorization"] == "Bearer ghp_xxx"
+ assert headers["X-API-Key"] == "test-brave-key"
+
+ def test_resolve_missing_credential(self, resolver):
+ """Test error on missing credential."""
+ with pytest.raises(CredentialNotFoundError):
+ resolver.resolve("{{nonexistent.key}}")
+
+ def test_resolve_missing_key(self, resolver):
+ """Test error on missing key."""
+ with pytest.raises(CredentialKeyNotFoundError):
+ resolver.resolve("{{github_oauth.nonexistent}}")
+
+ def test_has_templates(self, resolver):
+ """Test detecting templates in text."""
+ assert resolver.has_templates("{{cred.key}}")
+ assert resolver.has_templates("Bearer {{token}}")
+ assert not resolver.has_templates("no templates here")
+
+ def test_extract_references(self, resolver):
+ """Test extracting credential references."""
+ refs = resolver.extract_references("{{github.token}} and {{brave.key}}")
+ assert ("github", "token") in refs
+ assert ("brave", "key") in refs
+
+
+class TestCredentialStore:
+ """Tests for CredentialStore."""
+
+ def test_for_testing_factory(self):
+ """Test creating store for testing."""
+ store = CredentialStore.for_testing({"test": {"api_key": "value"}})
+
+ assert store.get("test") == "value"
+ assert store.get_key("test", "api_key") == "value"
+
+ def test_get_credential(self):
+ """Test getting a credential."""
+ store = CredentialStore.for_testing({"test": {"key": "value"}})
+
+ cred = store.get_credential("test")
+ assert cred is not None
+ assert cred.get_key("key") == "value"
+
+ def test_get_nonexistent(self):
+ """Test getting nonexistent credential."""
+ store = CredentialStore.for_testing({})
+ assert store.get_credential("nonexistent") is None
+ assert store.get("nonexistent") is None
+
+ def test_save_and_load(self):
+ """Test saving and loading a credential."""
+ store = CredentialStore.for_testing({})
+
+ cred = CredentialObject(id="new", keys={"k": CredentialKey(name="k", value=SecretStr("v"))})
+ store.save_credential(cred)
+
+ loaded = store.get_credential("new")
+ assert loaded is not None
+ assert loaded.get_key("k") == "v"
+
+ def test_delete_credential(self):
+ """Test deleting a credential."""
+ store = CredentialStore.for_testing({"test": {"k": "v"}})
+
+ assert store.delete_credential("test")
+ assert store.get_credential("test") is None
+
+ def test_list_credentials(self):
+ """Test listing all credentials."""
+ store = CredentialStore.for_testing({"a": {"k": "v"}, "b": {"k": "v"}})
+
+ ids = store.list_credentials()
+ assert "a" in ids
+ assert "b" in ids
+
+ def test_is_available(self):
+ """Test checking credential availability."""
+ store = CredentialStore.for_testing({"test": {"k": "v"}})
+
+ assert store.is_available("test")
+ assert not store.is_available("nonexistent")
+
+ def test_resolve_templates(self):
+ """Test template resolution through store."""
+ store = CredentialStore.for_testing({"test": {"api_key": "value"}})
+
+ result = store.resolve("Key: {{test.api_key}}")
+ assert result == "Key: value"
+
+ def test_resolve_headers(self):
+ """Test resolving headers through store."""
+ store = CredentialStore.for_testing({"test": {"token": "xxx"}})
+
+ headers = store.resolve_headers({"Authorization": "Bearer {{test.token}}"})
+ assert headers["Authorization"] == "Bearer xxx"
+
+ def test_register_provider(self):
+ """Test registering a provider."""
+ store = CredentialStore.for_testing({})
+ provider = StaticProvider()
+
+ store.register_provider(provider)
+ assert store.get_provider("static") is provider
+
+ def test_register_usage_spec(self):
+ """Test registering a usage spec."""
+ store = CredentialStore.for_testing({})
+ spec = CredentialUsageSpec(
+ credential_id="test",
+ required_keys=["api_key"],
+ headers={"X-Key": "{{api_key}}"},
+ )
+
+ store.register_usage(spec)
+ assert store.get_usage_spec("test") is spec
+
+ def test_validate_for_usage(self):
+ """Test validating credential for usage spec."""
+ store = CredentialStore.for_testing({"test": {"api_key": "value"}})
+ spec = CredentialUsageSpec(credential_id="test", required_keys=["api_key"])
+ store.register_usage(spec)
+
+ errors = store.validate_for_usage("test")
+ assert errors == []
+
+ def test_validate_for_usage_missing_key(self):
+ """Test validation with missing required key."""
+ store = CredentialStore.for_testing({"test": {"other_key": "value"}})
+ spec = CredentialUsageSpec(credential_id="test", required_keys=["api_key"])
+ store.register_usage(spec)
+
+ errors = store.validate_for_usage("test")
+ assert "api_key" in errors[0]
+
+ def test_caching(self):
+ """Test that credentials are cached."""
+ storage = InMemoryStorage()
+ store = CredentialStore(storage=storage, cache_ttl_seconds=60)
+
+ storage.save(
+ CredentialObject(id="test", keys={"k": CredentialKey(name="k", value=SecretStr("v"))})
+ )
+
+ # First load
+ store.get_credential("test")
+
+ # Delete from storage
+ storage.delete("test")
+
+ # Should still get from cache
+ cred2 = store.get_credential("test")
+ assert cred2 is not None
+
+ def test_clear_cache(self):
+ """Test clearing the cache."""
+ storage = InMemoryStorage()
+ store = CredentialStore(storage=storage)
+
+ storage.save(CredentialObject(id="test", keys={}))
+ store.get_credential("test") # Cache it
+
+ storage.delete("test")
+ store.clear_cache()
+
+ # Should not find in cache now
+ assert store.get_credential("test") is None
+
+
+class TestOAuth2Module:
+ """Tests for OAuth2 module."""
+
+ def test_oauth2_token_from_response(self):
+ """Test creating OAuth2Token from token response."""
+ from core.framework.credentials.oauth2 import OAuth2Token
+
+ response = {
+ "access_token": "xxx",
+ "token_type": "Bearer",
+ "expires_in": 3600,
+ "refresh_token": "yyy",
+ "scope": "read write",
+ }
+
+ token = OAuth2Token.from_token_response(response)
+ assert token.access_token == "xxx"
+ assert token.token_type == "Bearer"
+ assert token.refresh_token == "yyy"
+ assert token.scope == "read write"
+ assert token.expires_at is not None
+
+ def test_token_is_expired(self):
+ """Test token expiration check."""
+ from core.framework.credentials.oauth2 import OAuth2Token
+
+ # Not expired
+ future = datetime.now(UTC) + timedelta(hours=1)
+ token = OAuth2Token(access_token="xxx", expires_at=future)
+ assert not token.is_expired
+
+ # Expired
+ past = datetime.now(UTC) - timedelta(hours=1)
+ expired_token = OAuth2Token(access_token="xxx", expires_at=past)
+ assert expired_token.is_expired
+
+ def test_token_can_refresh(self):
+ """Test token refresh capability check."""
+ from core.framework.credentials.oauth2 import OAuth2Token
+
+ with_refresh = OAuth2Token(access_token="xxx", refresh_token="yyy")
+ assert with_refresh.can_refresh
+
+ without_refresh = OAuth2Token(access_token="xxx")
+ assert not without_refresh.can_refresh
+
+ def test_oauth2_config_validation(self):
+ """Test OAuth2Config validation."""
+ from core.framework.credentials.oauth2 import OAuth2Config, TokenPlacement
+
+ # Valid config
+ config = OAuth2Config(
+ token_url="https://example.com/token", client_id="id", client_secret="secret"
+ )
+ assert config.token_url == "https://example.com/token"
+
+ # Missing token_url
+ with pytest.raises(ValueError):
+ OAuth2Config(token_url="")
+
+ # HEADER_CUSTOM without custom_header_name
+ with pytest.raises(ValueError):
+ OAuth2Config(
+ token_url="https://example.com/token",
+ token_placement=TokenPlacement.HEADER_CUSTOM,
+ )
+
+
+if __name__ == "__main__":
+ pytest.main([__file__, "-v"])
diff --git a/core/framework/credentials/vault/__init__.py b/core/framework/credentials/vault/__init__.py
new file mode 100644
index 0000000000..8e31862f86
--- /dev/null
+++ b/core/framework/credentials/vault/__init__.py
@@ -0,0 +1,55 @@
+"""
+HashiCorp Vault integration for the credential store.
+
+This module provides enterprise-grade secret management through
+HashiCorp Vault integration.
+
+Quick Start:
+ from core.framework.credentials import CredentialStore
+ from core.framework.credentials.vault import HashiCorpVaultStorage
+
+ # Configure Vault storage
+ storage = HashiCorpVaultStorage(
+ url="https://vault.example.com:8200",
+ # token read from VAULT_TOKEN env var
+ mount_point="secret",
+ path_prefix="hive/agents/prod"
+ )
+
+ # Create credential store with Vault backend
+ store = CredentialStore(storage=storage)
+
+ # Use normally - credentials are stored in Vault
+ credential = store.get_credential("my_api")
+
+Requirements:
+ pip install hvac
+
+Authentication:
+ Set the VAULT_TOKEN environment variable or pass the token directly:
+
+ export VAULT_TOKEN="hvs.xxxxxxxxxxxxx"
+
+ For production, consider using Vault auth methods:
+ - Kubernetes auth
+ - AppRole auth
+ - AWS IAM auth
+
+Vault Configuration:
+ Ensure KV v2 secrets engine is enabled:
+
+ vault secrets enable -path=secret kv-v2
+
+ Grant appropriate policies:
+
+ path "secret/data/hive/credentials/*" {
+ capabilities = ["create", "read", "update", "delete", "list"]
+ }
+ path "secret/metadata/hive/credentials/*" {
+ capabilities = ["list", "delete"]
+ }
+"""
+
+from .hashicorp import HashiCorpVaultStorage
+
+__all__ = ["HashiCorpVaultStorage"]
diff --git a/core/framework/credentials/vault/hashicorp.py b/core/framework/credentials/vault/hashicorp.py
new file mode 100644
index 0000000000..5984d5277a
--- /dev/null
+++ b/core/framework/credentials/vault/hashicorp.py
@@ -0,0 +1,394 @@
+"""
+HashiCorp Vault storage adapter.
+
+Provides integration with HashiCorp Vault for enterprise secret management.
+Requires the 'hvac' package: pip install hvac
+"""
+
+from __future__ import annotations
+
+import logging
+import os
+from datetime import datetime
+from typing import Any
+
+from pydantic import SecretStr
+
+from ..models import CredentialKey, CredentialObject, CredentialType
+from ..storage import CredentialStorage
+
+logger = logging.getLogger(__name__)
+
+
+class HashiCorpVaultStorage(CredentialStorage):
+ """
+ HashiCorp Vault storage adapter.
+
+ Features:
+ - KV v2 secrets engine support
+ - Namespace support (Enterprise)
+ - Automatic secret versioning
+ - Audit logging via Vault
+
+ The adapter stores credentials in Vault's KV v2 secrets engine with
+ the following structure:
+
+ {mount_point}/data/{path_prefix}/{credential_id}
+ └── data:
+ ├── _type: "oauth2"
+ ├── access_token: "xxx"
+ ├── refresh_token: "yyy"
+ ├── _expires_access_token: "2024-01-26T12:00:00"
+ └── _provider_id: "oauth2"
+
+ Example:
+ storage = HashiCorpVaultStorage(
+ url="https://vault.example.com:8200",
+ token="hvs.xxx", # Or use VAULT_TOKEN env var
+ mount_point="secret",
+ path_prefix="hive/credentials"
+ )
+
+ store = CredentialStore(storage=storage)
+
+ # Credentials are now stored in Vault
+ store.save_credential(credential)
+ credential = store.get_credential("my_api")
+
+ Authentication:
+ The adapter uses token-based authentication. The token can be provided:
+ 1. Directly via the 'token' parameter
+ 2. Via the VAULT_TOKEN environment variable
+
+ For production, consider using:
+ - Kubernetes auth method
+ - AppRole auth method
+ - AWS IAM auth method
+
+ Requirements:
+ pip install hvac
+ """
+
+ def __init__(
+ self,
+ url: str,
+ token: str | None = None,
+ mount_point: str = "secret",
+ path_prefix: str = "hive/credentials",
+ namespace: str | None = None,
+ verify_ssl: bool = True,
+ ):
+ """
+ Initialize Vault storage.
+
+ Args:
+ url: Vault server URL (e.g., https://vault.example.com:8200)
+ token: Vault token. If None, reads from VAULT_TOKEN env var
+ mount_point: KV secrets engine mount point (default: "secret")
+ path_prefix: Path prefix for all credentials
+ namespace: Vault namespace (Enterprise feature)
+ verify_ssl: Whether to verify SSL certificates
+
+ Raises:
+ ImportError: If hvac is not installed
+ ValueError: If authentication fails
+ """
+ try:
+ import hvac
+ except ImportError as e:
+ raise ImportError(
+ "HashiCorp Vault support requires 'hvac'. Install with: pip install hvac"
+ ) from e
+
+ self._url = url
+ self._token = token or os.environ.get("VAULT_TOKEN")
+ self._mount = mount_point
+ self._prefix = path_prefix
+ self._namespace = namespace
+
+ if not self._token:
+ raise ValueError(
+ "Vault token required. Set VAULT_TOKEN env var or pass token parameter."
+ )
+
+ self._client = hvac.Client(
+ url=url,
+ token=self._token,
+ namespace=namespace,
+ verify=verify_ssl,
+ )
+
+ if not self._client.is_authenticated():
+ raise ValueError("Vault authentication failed. Check token and server URL.")
+
+ logger.info(f"Connected to HashiCorp Vault at {url}")
+
+ def _path(self, credential_id: str) -> str:
+ """Build Vault path for credential."""
+ # Sanitize credential_id
+ safe_id = credential_id.replace("/", "_").replace("\\", "_")
+ return f"{self._prefix}/{safe_id}"
+
+ def save(self, credential: CredentialObject) -> None:
+ """Save credential to Vault KV v2."""
+ path = self._path(credential.id)
+ data = self._serialize_for_vault(credential)
+
+ try:
+ self._client.secrets.kv.v2.create_or_update_secret(
+ path=path,
+ secret=data,
+ mount_point=self._mount,
+ )
+ logger.debug(f"Saved credential '{credential.id}' to Vault at {path}")
+ except Exception as e:
+ logger.error(f"Failed to save credential '{credential.id}' to Vault: {e}")
+ raise
+
+ def load(self, credential_id: str) -> CredentialObject | None:
+ """Load credential from Vault."""
+ path = self._path(credential_id)
+
+ try:
+ response = self._client.secrets.kv.v2.read_secret_version(
+ path=path,
+ mount_point=self._mount,
+ )
+ data = response["data"]["data"]
+ return self._deserialize_from_vault(credential_id, data)
+ except Exception as e:
+ # Check if it's a "not found" error
+ error_str = str(e).lower()
+ if "not found" in error_str or "404" in error_str:
+ logger.debug(f"Credential '{credential_id}' not found in Vault")
+ return None
+ logger.error(f"Failed to load credential '{credential_id}' from Vault: {e}")
+ raise
+
+ def delete(self, credential_id: str) -> bool:
+ """Delete credential from Vault (all versions)."""
+ path = self._path(credential_id)
+
+ try:
+ self._client.secrets.kv.v2.delete_metadata_and_all_versions(
+ path=path,
+ mount_point=self._mount,
+ )
+ logger.debug(f"Deleted credential '{credential_id}' from Vault")
+ return True
+ except Exception as e:
+ error_str = str(e).lower()
+ if "not found" in error_str or "404" in error_str:
+ return False
+ logger.error(f"Failed to delete credential '{credential_id}' from Vault: {e}")
+ raise
+
+ def list_all(self) -> list[str]:
+ """List all credentials under the prefix."""
+ try:
+ response = self._client.secrets.kv.v2.list_secrets(
+ path=self._prefix,
+ mount_point=self._mount,
+ )
+ keys = response.get("data", {}).get("keys", [])
+ # Remove trailing slashes from folder names
+ return [k.rstrip("/") for k in keys]
+ except Exception as e:
+ error_str = str(e).lower()
+ if "not found" in error_str or "404" in error_str:
+ return []
+ logger.error(f"Failed to list credentials from Vault: {e}")
+ raise
+
+ def exists(self, credential_id: str) -> bool:
+ """Check if credential exists in Vault."""
+ try:
+ path = self._path(credential_id)
+ self._client.secrets.kv.v2.read_secret_version(
+ path=path,
+ mount_point=self._mount,
+ )
+ return True
+ except Exception:
+ return False
+
+ def _serialize_for_vault(self, credential: CredentialObject) -> dict[str, Any]:
+ """Convert credential to Vault secret format."""
+ data: dict[str, Any] = {
+ "_type": credential.credential_type.value,
+ }
+
+ if credential.provider_id:
+ data["_provider_id"] = credential.provider_id
+
+ if credential.description:
+ data["_description"] = credential.description
+
+ if credential.auto_refresh:
+ data["_auto_refresh"] = "true"
+
+ # Store each key
+ for key_name, key in credential.keys.items():
+ data[key_name] = key.get_secret_value()
+
+ if key.expires_at:
+ data[f"_expires_{key_name}"] = key.expires_at.isoformat()
+
+ if key.metadata:
+ data[f"_metadata_{key_name}"] = str(key.metadata)
+
+ return data
+
+ def _deserialize_from_vault(self, credential_id: str, data: dict[str, Any]) -> CredentialObject:
+ """Reconstruct credential from Vault secret."""
+ # Extract metadata fields
+ cred_type = CredentialType(data.pop("_type", "api_key"))
+ provider_id = data.pop("_provider_id", None)
+ description = data.pop("_description", "")
+ auto_refresh = data.pop("_auto_refresh", "") == "true"
+
+ # Build keys dict
+ keys: dict[str, CredentialKey] = {}
+
+ # Find all non-metadata keys
+ key_names = [k for k in data.keys() if not k.startswith("_")]
+
+ for key_name in key_names:
+ value = data[key_name]
+
+ # Check for expiration
+ expires_at = None
+ expires_key = f"_expires_{key_name}"
+ if expires_key in data:
+ try:
+ expires_at = datetime.fromisoformat(data[expires_key])
+ except (ValueError, TypeError):
+ pass
+
+ # Check for metadata
+ metadata: dict[str, Any] = {}
+ metadata_key = f"_metadata_{key_name}"
+ if metadata_key in data:
+ try:
+ import ast
+
+ metadata = ast.literal_eval(data[metadata_key])
+ except (ValueError, SyntaxError):
+ pass
+
+ keys[key_name] = CredentialKey(
+ name=key_name,
+ value=SecretStr(value),
+ expires_at=expires_at,
+ metadata=metadata,
+ )
+
+ return CredentialObject(
+ id=credential_id,
+ credential_type=cred_type,
+ keys=keys,
+ provider_id=provider_id,
+ description=description,
+ auto_refresh=auto_refresh,
+ )
+
+ # --- Vault-Specific Operations ---
+
+ def get_secret_metadata(self, credential_id: str) -> dict[str, Any] | None:
+ """
+ Get Vault metadata for a secret (version info, timestamps, etc.).
+
+ Args:
+ credential_id: The credential identifier
+
+ Returns:
+ Metadata dict or None if not found
+ """
+ path = self._path(credential_id)
+
+ try:
+ response = self._client.secrets.kv.v2.read_secret_metadata(
+ path=path,
+ mount_point=self._mount,
+ )
+ return response.get("data", {})
+ except Exception:
+ return None
+
+ def soft_delete(self, credential_id: str, versions: list[int] | None = None) -> bool:
+ """
+ Soft delete specific versions (can be recovered).
+
+ Args:
+ credential_id: The credential identifier
+ versions: Version numbers to delete. If None, deletes latest.
+
+ Returns:
+ True if successful
+ """
+ path = self._path(credential_id)
+
+ try:
+ if versions:
+ self._client.secrets.kv.v2.delete_secret_versions(
+ path=path,
+ versions=versions,
+ mount_point=self._mount,
+ )
+ else:
+ self._client.secrets.kv.v2.delete_latest_version_of_secret(
+ path=path,
+ mount_point=self._mount,
+ )
+ return True
+ except Exception as e:
+ logger.error(f"Soft delete failed for '{credential_id}': {e}")
+ return False
+
+ def undelete(self, credential_id: str, versions: list[int]) -> bool:
+ """
+ Recover soft-deleted versions.
+
+ Args:
+ credential_id: The credential identifier
+ versions: Version numbers to recover
+
+ Returns:
+ True if successful
+ """
+ path = self._path(credential_id)
+
+ try:
+ self._client.secrets.kv.v2.undelete_secret_versions(
+ path=path,
+ versions=versions,
+ mount_point=self._mount,
+ )
+ return True
+ except Exception as e:
+ logger.error(f"Undelete failed for '{credential_id}': {e}")
+ return False
+
+ def load_version(self, credential_id: str, version: int) -> CredentialObject | None:
+ """
+ Load a specific version of a credential.
+
+ Args:
+ credential_id: The credential identifier
+ version: Version number to load
+
+ Returns:
+ CredentialObject or None
+ """
+ path = self._path(credential_id)
+
+ try:
+ response = self._client.secrets.kv.v2.read_secret_version(
+ path=path,
+ version=version,
+ mount_point=self._mount,
+ )
+ data = response["data"]["data"]
+ return self._deserialize_from_vault(credential_id, data)
+ except Exception:
+ return None
diff --git a/core/framework/graph/__init__.py b/core/framework/graph/__init__.py
index 361567d3ff..620a93b383 100644
--- a/core/framework/graph/__init__.py
+++ b/core/framework/graph/__init__.py
@@ -1,32 +1,32 @@
"""Graph structures: Goals, Nodes, Edges, and Flexible Execution."""
-from framework.graph.goal import Goal, SuccessCriterion, Constraint, GoalStatus
-from framework.graph.node import NodeSpec, NodeContext, NodeResult, NodeProtocol
-from framework.graph.edge import EdgeSpec, EdgeCondition
+from framework.graph.code_sandbox import CodeSandbox, safe_eval, safe_exec
+from framework.graph.edge import EdgeCondition, EdgeSpec, GraphSpec
from framework.graph.executor import GraphExecutor
+from framework.graph.flexible_executor import ExecutorConfig, FlexibleGraphExecutor
+from framework.graph.goal import Constraint, Goal, GoalStatus, SuccessCriterion
+from framework.graph.judge import HybridJudge, create_default_judge
+from framework.graph.node import NodeContext, NodeProtocol, NodeResult, NodeSpec
# Flexible execution (Worker-Judge pattern)
from framework.graph.plan import (
- Plan,
- PlanStep,
ActionSpec,
ActionType,
- StepStatus,
- Judgment,
- JudgmentAction,
- EvaluationRule,
- PlanExecutionResult,
- ExecutionStatus,
- load_export,
# HITL (Human-in-the-loop)
ApprovalDecision,
ApprovalRequest,
ApprovalResult,
+ EvaluationRule,
+ ExecutionStatus,
+ Judgment,
+ JudgmentAction,
+ Plan,
+ PlanExecutionResult,
+ PlanStep,
+ StepStatus,
+ load_export,
)
-from framework.graph.judge import HybridJudge, create_default_judge
-from framework.graph.worker_node import WorkerNode, StepExecutionResult
-from framework.graph.flexible_executor import FlexibleGraphExecutor, ExecutorConfig
-from framework.graph.code_sandbox import CodeSandbox, safe_exec, safe_eval
+from framework.graph.worker_node import StepExecutionResult, WorkerNode
__all__ = [
# Goal
@@ -42,6 +42,7 @@
# Edge
"EdgeSpec",
"EdgeCondition",
+ "GraphSpec",
# Executor (fixed graph)
"GraphExecutor",
# Plan (flexible execution)
diff --git a/core/framework/graph/code_sandbox.py b/core/framework/graph/code_sandbox.py
index 28a4c231b8..ee399586aa 100644
--- a/core/framework/graph/code_sandbox.py
+++ b/core/framework/graph/code_sandbox.py
@@ -13,11 +13,11 @@
"""
import ast
-import sys
import signal
-from typing import Any
-from dataclasses import dataclass, field
+import sys
from contextlib import contextmanager
+from dataclasses import dataclass, field
+from typing import Any
# Safe builtins whitelist
SAFE_BUILTINS = {
@@ -25,7 +25,6 @@
"True": True,
"False": False,
"None": None,
-
# Type constructors
"bool": bool,
"int": int,
@@ -36,7 +35,6 @@
"set": set,
"tuple": tuple,
"frozenset": frozenset,
-
# Basic functions
"abs": abs,
"all": all,
@@ -97,22 +95,26 @@
class CodeSandboxError(Exception):
"""Error during sandboxed code execution."""
+
pass
class TimeoutError(CodeSandboxError):
"""Code execution timed out."""
+
pass
class SecurityError(CodeSandboxError):
"""Code contains potentially dangerous operations."""
+
pass
@dataclass
class SandboxResult:
"""Result of sandboxed code execution."""
+
success: bool
result: Any = None
error: str | None = None
@@ -134,6 +136,7 @@ def __call__(self, name: str, *args, **kwargs):
if name not in self._cache:
import importlib
+
self._cache[name] = importlib.import_module(name)
return self._cache[name]
@@ -161,9 +164,8 @@ def validate(self, code: str) -> list[str]:
for node in ast.walk(tree):
# Check for blocked node types
if type(node) in self.blocked_nodes:
- issues.append(
- f"Blocked operation: {type(node).__name__} at line {getattr(node, 'lineno', '?')}"
- )
+ lineno = getattr(node, "lineno", "?")
+ issues.append(f"Blocked operation: {type(node).__name__} at line {lineno}")
# Check for dangerous attribute access
if isinstance(node, ast.Attribute):
@@ -212,11 +214,12 @@ def __init__(
@contextmanager
def _timeout_context(self, seconds: int):
"""Context manager for timeout enforcement."""
+
def handler(signum, frame):
raise TimeoutError(f"Code execution timed out after {seconds} seconds")
# Only works on Unix-like systems
- if hasattr(signal, 'SIGALRM'):
+ if hasattr(signal, "SIGALRM"):
old_handler = signal.signal(signal.SIGALRM, handler)
signal.alarm(seconds)
try:
@@ -275,6 +278,7 @@ def execute(
# Capture stdout
import io
+
old_stdout = sys.stdout
sys.stdout = captured_stdout = io.StringIO()
@@ -296,11 +300,7 @@ def execute(
# Also extract any new variables (not in inputs or builtins)
for key, value in namespace.items():
- if (
- key not in inputs
- and key not in self.safe_builtins
- and not key.startswith("_")
- ):
+ if key not in inputs and key not in self.safe_builtins and not key.startswith("_"):
extracted[key] = value
return SandboxResult(
diff --git a/core/framework/graph/edge.py b/core/framework/graph/edge.py
index f94688c788..886daa3075 100644
--- a/core/framework/graph/edge.py
+++ b/core/framework/graph/edge.py
@@ -11,9 +11,10 @@
Edge Types:
- always: Always traverse after source completes
+- always: Always traverse after source completes
- on_success: Traverse only if source succeeds
- on_failure: Traverse only if source fails
-- conditional: Traverse based on expression evaluation
+- conditional: Traverse based on expression evaluation (SAFE SUBSET ONLY)
- llm_decide: Let LLM decide based on goal and context (goal-aware routing)
The llm_decide condition is particularly powerful for goal-driven agents,
@@ -21,19 +22,22 @@
given the current goal, context, and execution state.
"""
-from typing import Any
from enum import Enum
+from typing import Any
from pydantic import BaseModel, Field
+from framework.graph.safe_eval import safe_eval
+
class EdgeCondition(str, Enum):
"""When an edge should be traversed."""
- ALWAYS = "always" # Always after source completes
- ON_SUCCESS = "on_success" # Only if source succeeds
- ON_FAILURE = "on_failure" # Only if source fails
- CONDITIONAL = "conditional" # Based on expression
- LLM_DECIDE = "llm_decide" # Let LLM decide based on goal and context
+
+ ALWAYS = "always" # Always after source completes
+ ON_SUCCESS = "on_success" # Only if source succeeds
+ ON_FAILURE = "on_failure" # Only if source fails
+ CONDITIONAL = "conditional" # Based on expression
+ LLM_DECIDE = "llm_decide" # Let LLM decide based on goal and context
class EdgeSpec(BaseModel):
@@ -68,6 +72,7 @@ class EdgeSpec(BaseModel):
description="Only filter if results need refinement to meet goal",
)
"""
+
id: str
source: str = Field(description="Source node ID")
target: str = Field(description="Target node ID")
@@ -76,20 +81,17 @@ class EdgeSpec(BaseModel):
condition: EdgeCondition = EdgeCondition.ALWAYS
condition_expr: str | None = Field(
default=None,
- description="Expression for CONDITIONAL edges, e.g., 'output.confidence > 0.8'"
+ description="Expression for CONDITIONAL edges, e.g., 'output.confidence > 0.8'",
)
# Data flow
input_mapping: dict[str, str] = Field(
default_factory=dict,
- description="Map source outputs to target inputs: {target_key: source_key}"
+ description="Map source outputs to target inputs: {target_key: source_key}",
)
# Priority for multiple outgoing edges
- priority: int = Field(
- default=0,
- description="Higher priority edges are evaluated first"
- )
+ priority: int = Field(default=0, description="Higher priority edges are evaluated first")
# Metadata
description: str = ""
@@ -164,17 +166,18 @@ def _evaluate_condition(
"output": output,
"memory": memory,
"result": output.get("result"),
- "true": True, # Allow lowercase true/false in conditions
+ "true": True, # Allow lowercase true/false in conditions
"false": False,
**memory, # Unpack memory keys directly into context
}
try:
- # Safe evaluation (in production, use a proper expression evaluator)
- return bool(eval(self.condition_expr, {"__builtins__": {}}, context))
+ # Safe evaluation using AST-based whitelist
+ return bool(safe_eval(self.condition_expr, context))
except Exception as e:
# Log the error for debugging
import logging
+
logger = logging.getLogger(__name__)
logger.warning(f" ⚠ Condition evaluation failed: {self.condition_expr}")
logger.warning(f" Error: {e}")
@@ -235,7 +238,8 @@ def _llm_decide(
# Parse response
import re
- json_match = re.search(r'\{[^{}]*\}', response.content, re.DOTALL)
+
+ json_match = re.search(r"\{[^{}]*\}", response.content, re.DOTALL)
if json_match:
data = json.loads(json_match.group())
proceed = data.get("proceed", False)
@@ -243,6 +247,7 @@ def _llm_decide(
# Log the decision (using basic print for now)
import logging
+
logger = logging.getLogger(__name__)
logger.info(f" 🤔 LLM routing decision: {'PROCEED' if proceed else 'SKIP'}")
logger.info(f" Reason: {reasoning}")
@@ -252,6 +257,7 @@ def _llm_decide(
except Exception as e:
# Fallback: proceed on success
import logging
+
logger = logging.getLogger(__name__)
logger.warning(f" ⚠ LLM routing failed, defaulting to on_success: {e}")
return source_success
@@ -304,28 +310,24 @@ class AsyncEntryPointSpec(BaseModel):
isolation_level="shared",
)
"""
+
id: str = Field(description="Unique identifier for this entry point")
name: str = Field(description="Human-readable name")
entry_node: str = Field(description="Node ID to start execution from")
trigger_type: str = Field(
default="manual",
- description="How this entry point is triggered: webhook, api, timer, event, manual"
+ description="How this entry point is triggered: webhook, api, timer, event, manual",
)
trigger_config: dict[str, Any] = Field(
default_factory=dict,
- description="Trigger-specific configuration (e.g., webhook URL, timer interval)"
+ description="Trigger-specific configuration (e.g., webhook URL, timer interval)",
)
isolation_level: str = Field(
- default="shared",
- description="State isolation: isolated, shared, or synchronized"
- )
- priority: int = Field(
- default=0,
- description="Execution priority (higher = more priority)"
+ default="shared", description="State isolation: isolated, shared, or synchronized"
)
+ priority: int = Field(default=0, description="Execution priority (higher = more priority)")
max_concurrent: int = Field(
- default=10,
- description="Maximum concurrent executions for this entry point"
+ default=10, description="Maximum concurrent executions for this entry point"
)
model_config = {"extra": "allow"}
@@ -370,6 +372,7 @@ class GraphSpec(BaseModel):
edges=[...],
)
"""
+
id: str
goal_id: str
version: str = "1.0.0"
@@ -378,46 +381,43 @@ class GraphSpec(BaseModel):
entry_node: str = Field(description="ID of the first node to execute")
entry_points: dict[str, str] = Field(
default_factory=dict,
- description="Named entry points for resuming execution. Format: {name: node_id}"
+ description="Named entry points for resuming execution. Format: {name: node_id}",
)
async_entry_points: list[AsyncEntryPointSpec] = Field(
default_factory=list,
- description="Asynchronous entry points for concurrent execution streams (used with AgentRuntime)"
+ description=(
+ "Asynchronous entry points for concurrent execution streams (used with AgentRuntime)"
+ ),
)
terminal_nodes: list[str] = Field(
- default_factory=list,
- description="IDs of nodes that end execution"
+ default_factory=list, description="IDs of nodes that end execution"
)
pause_nodes: list[str] = Field(
- default_factory=list,
- description="IDs of nodes that pause execution for HITL input"
+ default_factory=list, description="IDs of nodes that pause execution for HITL input"
)
# Components
nodes: list[Any] = Field( # NodeSpec, but avoiding circular import
- default_factory=list,
- description="All node specifications"
- )
- edges: list[EdgeSpec] = Field(
- default_factory=list,
- description="All edge specifications"
+ default_factory=list, description="All node specifications"
)
+ edges: list[EdgeSpec] = Field(default_factory=list, description="All edge specifications")
# Shared memory keys
memory_keys: list[str] = Field(
- default_factory=list,
- description="Keys available in shared memory"
+ default_factory=list, description="Keys available in shared memory"
)
# Default LLM settings
default_model: str = "claude-haiku-4-5-20251001"
max_tokens: int = 1024
+ # Cleanup LLM for JSON extraction fallback (fast/cheap model preferred)
+ # If not set, uses CEREBRAS_API_KEY -> cerebras/llama-3.3-70b or
+ # ANTHROPIC_API_KEY -> claude-3-5-haiku as fallback
+ cleanup_llm_model: str | None = None
+
# Execution limits
- max_steps: int = Field(
- default=100,
- description="Maximum node executions before timeout"
- )
+ max_steps: int = Field(default=100, description="Maximum node executions before timeout")
max_retries_per_node: int = 3
# Metadata
@@ -453,6 +453,42 @@ def get_incoming_edges(self, node_id: str) -> list[EdgeSpec]:
"""Get all edges entering a node."""
return [e for e in self.edges if e.target == node_id]
+ def detect_fan_out_nodes(self) -> dict[str, list[str]]:
+ """
+ Detect nodes that fan-out to multiple targets.
+
+ A fan-out occurs when a node has multiple outgoing edges with the same
+ condition (typically ON_SUCCESS) that should execute in parallel.
+
+ Returns:
+ Dict mapping source_node_id -> list of parallel target_node_ids
+ """
+ fan_outs: dict[str, list[str]] = {}
+ for node in self.nodes:
+ outgoing = self.get_outgoing_edges(node.id)
+ # Fan-out: multiple edges with ON_SUCCESS condition
+ success_edges = [e for e in outgoing if e.condition == EdgeCondition.ON_SUCCESS]
+ if len(success_edges) > 1:
+ fan_outs[node.id] = [e.target for e in success_edges]
+ return fan_outs
+
+ def detect_fan_in_nodes(self) -> dict[str, list[str]]:
+ """
+ Detect nodes that receive from multiple sources (fan-in / convergence).
+
+ A fan-in occurs when a node has multiple incoming edges, meaning
+ it should wait for all predecessor branches to complete.
+
+ Returns:
+ Dict mapping target_node_id -> list of source_node_ids
+ """
+ fan_ins: dict[str, list[str]] = {}
+ for node in self.nodes:
+ incoming = self.get_incoming_edges(node.id)
+ if len(incoming) > 1:
+ fan_ins[node.id] = [e.source for e in incoming]
+ return fan_ins
+
def get_entry_point(self, session_state: dict | None = None) -> str:
"""
Get the appropriate entry point based on session state.
@@ -504,7 +540,8 @@ def validate(self) -> list[str]:
# Check entry node exists
if not self.get_node(entry_point.entry_node):
errors.append(
- f"Async entry point '{entry_point.id}' references missing node '{entry_point.entry_node}'"
+ f"Async entry point '{entry_point.id}' references "
+ f"missing node '{entry_point.entry_node}'"
)
# Validate isolation level
@@ -562,11 +599,13 @@ def validate(self) -> list[str]:
for node in self.nodes:
if node.id not in reachable:
- # Skip this error if the node is a pause node, entry point target, or async entry point
- # (pause/resume architecture and async entry points make these reachable)
- if (node.id in self.pause_nodes or
- node.id in self.entry_points.values() or
- node.id in async_entry_nodes):
+ # Skip if node is a pause node, entry point target, or async entry
+ # (pause/resume architecture and async entry points make reachable)
+ if (
+ node.id in self.pause_nodes
+ or node.id in self.entry_points.values()
+ or node.id in async_entry_nodes
+ ):
continue
errors.append(f"Node '{node.id}' is unreachable from entry")
diff --git a/core/framework/graph/executor.py b/core/framework/graph/executor.py
index 4f89ac78a4..eac54d37c8 100644
--- a/core/framework/graph/executor.py
+++ b/core/framework/graph/executor.py
@@ -9,31 +9,34 @@
5. Returns the final result
"""
+import asyncio
import logging
-from typing import Any, Callable
+from collections.abc import Callable
from dataclasses import dataclass, field
+from typing import Any
-from framework.runtime.core import Runtime
+from framework.graph.edge import EdgeSpec, GraphSpec
from framework.graph.goal import Goal
from framework.graph.node import (
- NodeSpec,
+ FunctionNode,
+ LLMNode,
NodeContext,
- NodeResult,
NodeProtocol,
- SharedMemory,
- LLMNode,
+ NodeResult,
+ NodeSpec,
RouterNode,
- FunctionNode,
+ SharedMemory,
)
-from framework.graph.edge import GraphSpec
+from framework.graph.output_cleaner import CleansingConfig, OutputCleaner
from framework.graph.validator import OutputValidator
-from framework.graph.output_cleaner import OutputCleaner, CleansingConfig
from framework.llm.provider import LLMProvider, Tool
+from framework.runtime.core import Runtime
@dataclass
class ExecutionResult:
"""Result of executing a graph."""
+
success: bool
output: dict[str, Any] = field(default_factory=dict)
error: str | None = None
@@ -45,6 +48,35 @@ class ExecutionResult:
session_state: dict[str, Any] = field(default_factory=dict) # State to resume from
+@dataclass
+class ParallelBranch:
+ """Tracks a single branch in parallel fan-out execution."""
+
+ branch_id: str
+ node_id: str
+ edge: EdgeSpec
+ result: "NodeResult | None" = None
+ status: str = "pending" # pending, running, completed, failed
+ retry_count: int = 0
+ error: str | None = None
+
+
+@dataclass
+class ParallelExecutionConfig:
+ """Configuration for parallel execution behavior."""
+
+ # Error handling: "fail_all" cancels all on first failure,
+ # "continue_others" lets remaining branches complete,
+ # "wait_all" waits for all and reports all failures
+ on_branch_failure: str = "fail_all"
+
+ # Memory conflict handling when branches write same key
+ memory_conflict_strategy: str = "last_wins" # "last_wins", "first_wins", "error"
+
+ # Timeout per branch in seconds
+ branch_timeout_seconds: float = 300.0
+
+
class GraphExecutor:
"""
Executes agent graphs.
@@ -73,6 +105,8 @@ def __init__(
node_registry: dict[str, NodeProtocol] | None = None,
approval_callback: Callable | None = None,
cleansing_config: CleansingConfig | None = None,
+ enable_parallel_execution: bool = True,
+ parallel_config: ParallelExecutionConfig | None = None,
):
"""
Initialize the executor.
@@ -85,6 +119,8 @@ def __init__(
node_registry: Custom node implementations by ID
approval_callback: Optional callback for human-in-the-loop approval
cleansing_config: Optional output cleansing configuration
+ enable_parallel_execution: Enable parallel fan-out execution (default True)
+ parallel_config: Configuration for parallel execution behavior
"""
self.runtime = runtime
self.llm = llm
@@ -102,6 +138,10 @@ def __init__(
llm_provider=llm,
)
+ # Parallel execution settings
+ self.enable_parallel_execution = enable_parallel_execution
+ self._parallel_config = parallel_config or ParallelExecutionConfig()
+
def _validate_tools(self, graph: GraphSpec) -> list[str]:
"""
Validate that all tools declared by nodes are available.
@@ -116,14 +156,15 @@ def _validate_tools(self, graph: GraphSpec) -> list[str]:
if node.tools:
missing = set(node.tools) - available_tool_names
if missing:
+ available = sorted(available_tool_names) if available_tool_names else "none"
errors.append(
- f"Node '{node.name}' (id={node.id}) requires tools {sorted(missing)} "
- f"but they are not registered. Available tools: {sorted(available_tool_names) if available_tool_names else 'none'}"
+ f"Node '{node.name}' (id={node.id}) requires tools "
+ f"{sorted(missing)} but they are not registered. "
+ f"Available tools: {available}"
)
return errors
-
async def execute(
self,
graph: GraphSpec,
@@ -159,7 +200,10 @@ async def execute(
self.logger.error(f" • {err}")
return ExecutionResult(
success=False,
- error=f"Missing tools: {'; '.join(tool_errors)}. Register tools via ToolRegistry or remove tool declarations from nodes.",
+ error=(
+ f"Missing tools: {'; '.join(tool_errors)}. "
+ "Register tools via ToolRegistry or remove tool declarations from nodes."
+ ),
)
# Initialize execution state
@@ -167,10 +211,18 @@ async def execute(
# Restore session state if provided
if session_state and "memory" in session_state:
- # Restore memory from previous session
- for key, value in session_state["memory"].items():
- memory.write(key, value)
- self.logger.info(f"📥 Restored session state with {len(session_state['memory'])} memory keys")
+ memory_data = session_state["memory"]
+ # [RESTORED] Type safety check
+ if not isinstance(memory_data, dict):
+ self.logger.warning(
+ f"⚠️ Invalid memory data type in session state: "
+ f"{type(memory_data).__name__}, expected dict"
+ )
+ else:
+ # Restore memory from previous session
+ for key, value in memory_data.items():
+ memory.write(key, value)
+ self.logger.info(f"📥 Restored session state with {len(memory_data)} memory keys")
# Write new input data to memory (each key individually)
if input_data:
@@ -181,7 +233,6 @@ async def execute(
total_tokens = 0
total_latency = 0
node_retry_counts: dict[str, int] = {} # Track retries per node
- max_retries_per_node = 3
# Determine entry point (may differ if resuming)
current_node_id = graph.get_entry_point(session_state)
@@ -228,6 +279,7 @@ async def execute(
memory=memory,
goal=goal,
input_data=input_data or {},
+ max_tokens=graph.max_tokens,
)
# Log actual input data being read
@@ -243,7 +295,7 @@ async def execute(
self.logger.info(f" {key}: {value_str}")
# Get or create node implementation
- node_impl = self._get_node_implementation(node_spec)
+ node_impl = self._get_node_implementation(node_spec, graph.cleanup_llm_model)
# Validate inputs
validation_errors = node_impl.validate_input(ctx)
@@ -277,7 +329,10 @@ async def execute(
)
if result.success:
- self.logger.info(f" ✓ Success (tokens: {result.tokens_used}, latency: {result.latency_ms}ms)")
+ self.logger.info(
+ f" ✓ Success (tokens: {result.tokens_used}, "
+ f"latency: {result.latency_ms}ms)"
+ )
# Generate and log human-readable summary
summary = result.to_summary(node_spec)
@@ -300,28 +355,55 @@ async def execute(
# Handle failure
if not result.success:
# Track retries per node
- node_retry_counts[current_node_id] = node_retry_counts.get(current_node_id, 0) + 1
+ node_retry_counts[current_node_id] = (
+ node_retry_counts.get(current_node_id, 0) + 1
+ )
- if node_retry_counts[current_node_id] < max_retries_per_node:
+ # [CORRECTED] Use node_spec.max_retries instead of hardcoded 3
+ max_retries = getattr(node_spec, "max_retries", 3)
+
+ if node_retry_counts[current_node_id] < max_retries:
# Retry - don't increment steps for retries
steps -= 1
- self.logger.info(f" ↻ Retrying ({node_retry_counts[current_node_id]}/{max_retries_per_node})...")
+
+ # --- EXPONENTIAL BACKOFF ---
+ retry_count = node_retry_counts[current_node_id]
+ # Backoff formula: 1.0 * (2^(retry - 1)) -> 1s, 2s, 4s...
+ delay = 1.0 * (2 ** (retry_count - 1))
+ self.logger.info(f" Using backoff: Sleeping {delay}s before retry...")
+ await asyncio.sleep(delay)
+ # --------------------------------------
+
+ self.logger.info(
+ f" ↻ Retrying ({node_retry_counts[current_node_id]}/{max_retries})..."
+ )
continue
else:
# Max retries exceeded - fail the execution
- self.logger.error(f" ✗ Max retries ({max_retries_per_node}) exceeded for node {current_node_id}")
+ self.logger.error(
+ f" ✗ Max retries ({max_retries}) exceeded for node {current_node_id}"
+ )
self.runtime.report_problem(
severity="critical",
- description=f"Node {current_node_id} failed after {max_retries_per_node} attempts: {result.error}",
+ description=(
+ f"Node {current_node_id} failed after "
+ f"{max_retries} attempts: {result.error}"
+ ),
)
self.runtime.end_run(
success=False,
output_data=memory.read_all(),
- narrative=f"Failed at {node_spec.name} after {max_retries_per_node} retries: {result.error}",
+ narrative=(
+ f"Failed at {node_spec.name} after "
+ f"{max_retries} retries: {result.error}"
+ ),
)
return ExecutionResult(
success=False,
- error=f"Node '{node_spec.name}' failed after {max_retries_per_node} attempts: {result.error}",
+ error=(
+ f"Node '{node_spec.name}' failed after "
+ f"{max_retries} attempts: {result.error}"
+ ),
output=memory.read_all(),
steps_executed=steps,
total_tokens=total_tokens,
@@ -369,8 +451,8 @@ async def execute(
self.logger.info(f" → Router directing to: {result.next_node}")
current_node_id = result.next_node
else:
- # Follow edges
- next_node = self._follow_edges(
+ # Get all traversable edges for fan-out detection
+ traversable_edges = self._get_all_traversable_edges(
graph=graph,
goal=goal,
current_node_id=current_node_id,
@@ -378,12 +460,59 @@ async def execute(
result=result,
memory=memory,
)
- if next_node is None:
+
+ if not traversable_edges:
self.logger.info(" → No more edges, ending execution")
break # No valid edge, end execution
- next_spec = graph.get_node(next_node)
- self.logger.info(f" → Next: {next_spec.name if next_spec else next_node}")
- current_node_id = next_node
+
+ # Check for fan-out (multiple traversable edges)
+ if self.enable_parallel_execution and len(traversable_edges) > 1:
+ # Find convergence point (fan-in node)
+ targets = [e.target for e in traversable_edges]
+ fan_in_node = self._find_convergence_node(graph, targets)
+
+ # Execute branches in parallel
+ (
+ _branch_results,
+ branch_tokens,
+ branch_latency,
+ ) = await self._execute_parallel_branches(
+ graph=graph,
+ goal=goal,
+ edges=traversable_edges,
+ memory=memory,
+ source_result=result,
+ source_node_spec=node_spec,
+ path=path,
+ )
+
+ total_tokens += branch_tokens
+ total_latency += branch_latency
+
+ # Continue from fan-in node
+ if fan_in_node:
+ self.logger.info(f" ⑃ Fan-in: converging at {fan_in_node}")
+ current_node_id = fan_in_node
+ else:
+ # No convergence point - branches are terminal
+ self.logger.info(" → Parallel branches completed (no convergence)")
+ break
+ else:
+ # Sequential: follow single edge (existing logic via _follow_edges)
+ next_node = self._follow_edges(
+ graph=graph,
+ goal=goal,
+ current_node_id=current_node_id,
+ current_node_spec=node_spec,
+ result=result,
+ memory=memory,
+ )
+ if next_node is None:
+ self.logger.info(" → No more edges, ending execution")
+ break
+ next_spec = graph.get_node(next_node)
+ self.logger.info(f" → Next: {next_spec.name if next_spec else next_node}")
+ current_node_id = next_node
# Update input_data for next node
input_data = result.output
@@ -434,6 +563,7 @@ def _build_context(
memory: SharedMemory,
goal: Goal,
input_data: dict[str, Any],
+ max_tokens: int = 4096,
) -> NodeContext:
"""Build execution context for a node."""
# Filter tools to those available to this node
@@ -457,12 +587,15 @@ def _build_context(
available_tools=available_tools,
goal_context=goal.to_prompt_context(),
goal=goal, # Pass Goal object for LLM-powered routers
+ max_tokens=max_tokens,
)
# Valid node types - no ambiguous "llm" type allowed
VALID_NODE_TYPES = {"llm_tool_use", "llm_generate", "router", "function", "human_input"}
- def _get_node_implementation(self, node_spec: NodeSpec) -> NodeProtocol:
+ def _get_node_implementation(
+ self, node_spec: NodeSpec, cleanup_llm_model: str | None = None
+ ) -> NodeProtocol:
"""Get or create a node implementation."""
# Check registry first
if node_spec.id in self.node_registry:
@@ -483,10 +616,18 @@ def _get_node_implementation(self, node_spec: NodeSpec) -> NodeProtocol:
f"Node '{node_spec.id}' is type 'llm_tool_use' but declares no tools. "
"Either add tools to the node or change type to 'llm_generate'."
)
- return LLMNode(tool_executor=self.tool_executor, require_tools=True)
+ return LLMNode(
+ tool_executor=self.tool_executor,
+ require_tools=True,
+ cleanup_llm_model=cleanup_llm_model,
+ )
if node_spec.node_type == "llm_generate":
- return LLMNode(tool_executor=None, require_tools=False)
+ return LLMNode(
+ tool_executor=None,
+ require_tools=False,
+ cleanup_llm_model=cleanup_llm_model,
+ )
if node_spec.node_type == "router":
return RouterNode()
@@ -494,13 +635,16 @@ def _get_node_implementation(self, node_spec: NodeSpec) -> NodeProtocol:
if node_spec.node_type == "function":
# Function nodes need explicit registration
raise RuntimeError(
- f"Function node '{node_spec.id}' not registered. "
- "Register with node_registry."
+ f"Function node '{node_spec.id}' not registered. Register with node_registry."
)
if node_spec.node_type == "human_input":
# Human input nodes are handled specially by HITL mechanism
- return LLMNode(tool_executor=None, require_tools=False)
+ return LLMNode(
+ tool_executor=None,
+ require_tools=False,
+ cleanup_llm_model=cleanup_llm_model,
+ )
# Should never reach here due to validation above
raise RuntimeError(f"Unhandled node type: {node_spec.node_type}")
@@ -540,9 +684,7 @@ def _follow_edges(
)
if not validation.valid:
- self.logger.warning(
- f"⚠ Output validation failed: {validation.errors}"
- )
+ self.logger.warning(f"⚠ Output validation failed: {validation.errors}")
# Clean the output
cleaned_output = self.output_cleaner.clean_output(
@@ -555,9 +697,9 @@ def _follow_edges(
# Update result with cleaned output
result.output = cleaned_output
- # Write cleaned output back to memory
+ # Write cleaned output back to memory (skip validation for LLM output)
for key, value in cleaned_output.items():
- memory.write(key, value)
+ memory.write(key, value, validate=False)
# Revalidate
revalidation = self.output_cleaner.validate_output(
@@ -574,15 +716,249 @@ def _follow_edges(
)
# Continue anyway if fallback_to_raw is True
- # Map inputs
+ # Map inputs (skip validation for processed LLM output)
mapped = edge.map_inputs(result.output, memory.read_all())
for key, value in mapped.items():
- memory.write(key, value)
+ memory.write(key, value, validate=False)
return edge.target
return None
+ def _get_all_traversable_edges(
+ self,
+ graph: GraphSpec,
+ goal: Goal,
+ current_node_id: str,
+ current_node_spec: Any,
+ result: NodeResult,
+ memory: SharedMemory,
+ ) -> list[EdgeSpec]:
+ """
+ Get ALL edges that should be traversed (for fan-out detection).
+
+ Unlike _follow_edges which returns the first match, this returns
+ all matching edges to enable parallel execution.
+ """
+ edges = graph.get_outgoing_edges(current_node_id)
+ traversable = []
+
+ for edge in edges:
+ target_node_spec = graph.get_node(edge.target)
+ if edge.should_traverse(
+ source_success=result.success,
+ source_output=result.output,
+ memory=memory.read_all(),
+ llm=self.llm,
+ goal=goal,
+ source_node_name=current_node_spec.name if current_node_spec else current_node_id,
+ target_node_name=target_node_spec.name if target_node_spec else edge.target,
+ ):
+ traversable.append(edge)
+
+ return traversable
+
+ def _find_convergence_node(
+ self,
+ graph: GraphSpec,
+ parallel_targets: list[str],
+ ) -> str | None:
+ """
+ Find the common target node where parallel branches converge (fan-in).
+
+ Args:
+ graph: The graph specification
+ parallel_targets: List of node IDs that are running in parallel
+
+ Returns:
+ Node ID where all branches converge, or None if no convergence
+ """
+ # Get all nodes that parallel branches lead to
+ next_nodes: dict[str, int] = {} # node_id -> count of branches leading to it
+
+ for target in parallel_targets:
+ outgoing = graph.get_outgoing_edges(target)
+ for edge in outgoing:
+ next_nodes[edge.target] = next_nodes.get(edge.target, 0) + 1
+
+ # Convergence node is where ALL branches lead
+ for node_id, count in next_nodes.items():
+ if count == len(parallel_targets):
+ return node_id
+
+ # Fallback: return most common target if any
+ if next_nodes:
+ return max(next_nodes.keys(), key=lambda k: next_nodes[k])
+
+ return None
+
+ async def _execute_parallel_branches(
+ self,
+ graph: GraphSpec,
+ goal: Goal,
+ edges: list[EdgeSpec],
+ memory: SharedMemory,
+ source_result: NodeResult,
+ source_node_spec: Any,
+ path: list[str],
+ ) -> tuple[dict[str, NodeResult], int, int]:
+ """
+ Execute multiple branches in parallel using asyncio.gather.
+
+ Args:
+ graph: The graph specification
+ goal: The execution goal
+ edges: List of edges to follow in parallel
+ memory: Shared memory instance
+ source_result: Result from the source node
+ source_node_spec: Spec of the source node
+ path: Execution path list to update
+
+ Returns:
+ Tuple of (branch_results dict, total_tokens, total_latency)
+ """
+ branches: dict[str, ParallelBranch] = {}
+
+ # Create branches for each edge
+ for edge in edges:
+ branch_id = f"{edge.source}_to_{edge.target}"
+ branches[branch_id] = ParallelBranch(
+ branch_id=branch_id,
+ node_id=edge.target,
+ edge=edge,
+ )
+
+ self.logger.info(f" ⑂ Fan-out: executing {len(branches)} branches in parallel")
+ for branch in branches.values():
+ target_spec = graph.get_node(branch.node_id)
+ self.logger.info(f" • {target_spec.name if target_spec else branch.node_id}")
+
+ async def execute_single_branch(
+ branch: ParallelBranch,
+ ) -> tuple[ParallelBranch, NodeResult | Exception]:
+ """Execute a single branch with retry logic."""
+ node_spec = graph.get_node(branch.node_id)
+ if node_spec is None:
+ branch.status = "failed"
+ branch.error = f"Node {branch.node_id} not found in graph"
+ return branch, RuntimeError(branch.error)
+ branch.status = "running"
+
+ try:
+ # Validate and clean output before mapping inputs (same as _follow_edges)
+ if self.cleansing_config.enabled and node_spec:
+ validation = self.output_cleaner.validate_output(
+ output=source_result.output,
+ source_node_id=source_node_spec.id if source_node_spec else "unknown",
+ target_node_spec=node_spec,
+ )
+
+ if not validation.valid:
+ self.logger.warning(
+ f"⚠ Output validation failed for branch "
+ f"{branch.node_id}: {validation.errors}"
+ )
+ cleaned_output = self.output_cleaner.clean_output(
+ output=source_result.output,
+ source_node_id=source_node_spec.id if source_node_spec else "unknown",
+ target_node_spec=node_spec,
+ validation_errors=validation.errors,
+ )
+ # Write cleaned output to memory
+ for key, value in cleaned_output.items():
+ await memory.write_async(key, value)
+
+ # Map inputs via edge
+ mapped = branch.edge.map_inputs(source_result.output, memory.read_all())
+ for key, value in mapped.items():
+ await memory.write_async(key, value)
+
+ # Execute with retries
+ last_result = None
+ for attempt in range(node_spec.max_retries):
+ branch.retry_count = attempt
+
+ # Build context for this branch
+ ctx = self._build_context(node_spec, memory, goal, mapped, graph.max_tokens)
+ node_impl = self._get_node_implementation(node_spec, graph.cleanup_llm_model)
+
+ self.logger.info(
+ f" ▶ Branch {node_spec.name}: executing (attempt {attempt + 1})"
+ )
+ result = await node_impl.execute(ctx)
+ last_result = result
+
+ if result.success:
+ # Write outputs to shared memory using async write
+ for key, value in result.output.items():
+ await memory.write_async(key, value)
+
+ branch.result = result
+ branch.status = "completed"
+ self.logger.info(
+ f" ✓ Branch {node_spec.name}: success "
+ f"(tokens: {result.tokens_used}, latency: {result.latency_ms}ms)"
+ )
+ return branch, result
+
+ self.logger.warning(
+ f" ↻ Branch {node_spec.name}: "
+ f"retry {attempt + 1}/{node_spec.max_retries}"
+ )
+
+ # All retries exhausted
+ branch.status = "failed"
+ branch.error = last_result.error if last_result else "Unknown error"
+ branch.result = last_result
+ self.logger.error(
+ f" ✗ Branch {node_spec.name}: "
+ f"failed after {node_spec.max_retries} attempts"
+ )
+ return branch, last_result
+
+ except Exception as e:
+ branch.status = "failed"
+ branch.error = str(e)
+ self.logger.error(f" ✗ Branch {branch.node_id}: exception - {e}")
+ return branch, e
+
+ # Execute all branches concurrently
+ tasks = [execute_single_branch(b) for b in branches.values()]
+ results = await asyncio.gather(*tasks, return_exceptions=False)
+
+ # Process results
+ total_tokens = 0
+ total_latency = 0
+ branch_results: dict[str, NodeResult] = {}
+ failed_branches: list[ParallelBranch] = []
+
+ for branch, result in results:
+ path.append(branch.node_id)
+
+ if isinstance(result, Exception):
+ failed_branches.append(branch)
+ elif result is None or not result.success:
+ failed_branches.append(branch)
+ else:
+ total_tokens += result.tokens_used
+ total_latency += result.latency_ms
+ branch_results[branch.branch_id] = result
+
+ # Handle failures based on config
+ if failed_branches:
+ failed_names = [graph.get_node(b.node_id).name for b in failed_branches]
+ if self._parallel_config.on_branch_failure == "fail_all":
+ raise RuntimeError(f"Parallel execution failed: branches {failed_names} failed")
+ elif self._parallel_config.on_branch_failure == "continue_others":
+ self.logger.warning(
+ f"⚠ Some branches failed ({failed_names}), continuing with successful ones"
+ )
+
+ self.logger.info(
+ f" ⑃ Fan-out complete: {len(branch_results)}/{len(branches)} branches succeeded"
+ )
+ return branch_results, total_tokens, total_latency
+
def register_node(self, node_id: str, implementation: NodeProtocol) -> None:
"""Register a custom node implementation."""
self.node_registry[node_id] = implementation
diff --git a/core/framework/graph/flexible_executor.py b/core/framework/graph/flexible_executor.py
index 238b127c50..c3a5659158 100644
--- a/core/framework/graph/flexible_executor.py
+++ b/core/framework/graph/flexible_executor.py
@@ -15,28 +15,29 @@
This keeps planning external while execution/evaluation is internal.
"""
-from typing import Any, Callable
+from collections.abc import Callable
from dataclasses import dataclass
from datetime import datetime
+from typing import Any
-from framework.runtime.core import Runtime
+from framework.graph.code_sandbox import CodeSandbox
from framework.graph.goal import Goal
+from framework.graph.judge import HybridJudge, create_default_judge
from framework.graph.plan import (
- Plan,
- PlanStep,
- PlanExecutionResult,
+ ApprovalDecision,
+ ApprovalRequest,
+ ApprovalResult,
ExecutionStatus,
- StepStatus,
Judgment,
JudgmentAction,
- ApprovalRequest,
- ApprovalResult,
- ApprovalDecision,
+ Plan,
+ PlanExecutionResult,
+ PlanStep,
+ StepStatus,
)
-from framework.graph.judge import HybridJudge, create_default_judge
-from framework.graph.worker_node import WorkerNode, StepExecutionResult
-from framework.graph.code_sandbox import CodeSandbox
+from framework.graph.worker_node import StepExecutionResult, WorkerNode
from framework.llm.provider import LLMProvider, Tool
+from framework.runtime.core import Runtime
# Type alias for approval callback
ApprovalCallback = Callable[[ApprovalRequest], ApprovalResult]
@@ -45,6 +46,7 @@
@dataclass
class ExecutorConfig:
"""Configuration for FlexibleGraphExecutor."""
+
max_retries_per_step: int = 3
max_total_steps: int = 100
timeout_seconds: int = 300
@@ -165,7 +167,10 @@ async def execute_plan(
status=ExecutionStatus.NEEDS_REPLAN,
plan=plan,
context=context,
- feedback="No executable steps available but plan not complete. Check dependencies.",
+ feedback=(
+ "No executable steps available but plan not complete. "
+ "Check dependencies."
+ ),
steps_executed=steps_executed,
total_tokens=total_tokens,
total_latency=total_latency,
@@ -174,7 +179,8 @@ async def execute_plan(
# Execute next step (for now, sequential; could be parallel)
step = ready_steps[0]
# Debug: show ready steps
- # print(f" [DEBUG] Ready steps: {[s.id for s in ready_steps]}, executing: {step.id}")
+ # ready_ids = [s.id for s in ready_steps]
+ # print(f" [DEBUG] Ready steps: {ready_ids}, executing: {step.id}")
# APPROVAL CHECK - before execution
if step.requires_approval:
@@ -360,7 +366,10 @@ async def _handle_judgment(
status=ExecutionStatus.NEEDS_REPLAN,
plan=plan,
context=context,
- feedback=f"Step '{step.id}' failed after {step.attempts} attempts: {judgment.feedback}",
+ feedback=(
+ f"Step '{step.id}' failed after {step.attempts} attempts: "
+ f"{judgment.feedback}"
+ ),
steps_executed=steps_executed,
total_tokens=total_tokens,
total_latency=total_latency,
@@ -450,12 +459,17 @@ async def _request_approval(
preview_parts.append(f"Tool: {step.action.tool_name}")
if step.action.tool_args:
import json
+
args_preview = json.dumps(step.action.tool_args, indent=2, default=str)
if len(args_preview) > 500:
args_preview = args_preview[:500] + "..."
preview_parts.append(f"Args: {args_preview}")
elif step.action.prompt:
- prompt_preview = step.action.prompt[:300] + "..." if len(step.action.prompt) > 300 else step.action.prompt
+ prompt_preview = (
+ step.action.prompt[:300] + "..."
+ if len(step.action.prompt) > 300
+ else step.action.prompt
+ )
preview_parts.append(f"Prompt: {prompt_preview}")
# Include step inputs resolved from context (what will be sent/used)
diff --git a/core/framework/graph/goal.py b/core/framework/graph/goal.py
index bddf7ff72e..f66cb58187 100644
--- a/core/framework/graph/goal.py
+++ b/core/framework/graph/goal.py
@@ -12,20 +12,21 @@
"""
from datetime import datetime
-from typing import Any
from enum import Enum
+from typing import Any
from pydantic import BaseModel, Field
class GoalStatus(str, Enum):
"""Lifecycle status of a goal."""
- DRAFT = "draft" # Being defined
- READY = "ready" # Ready for agent creation
- ACTIVE = "active" # Has an agent graph, can execute
- COMPLETED = "completed" # Achieved
- FAILED = "failed" # Could not be achieved
- SUSPENDED = "suspended" # Paused for revision
+
+ DRAFT = "draft" # Being defined
+ READY = "ready" # Ready for agent creation
+ ACTIVE = "active" # Has an agent graph, can execute
+ COMPLETED = "completed" # Achieved
+ FAILED = "failed" # Could not be achieved
+ SUSPENDED = "suspended" # Paused for revision
class SuccessCriterion(BaseModel):
@@ -37,22 +38,14 @@ class SuccessCriterion(BaseModel):
- Measurable: Can be evaluated programmatically or by LLM
- Achievable: Within the agent's capabilities
"""
+
id: str
- description: str = Field(
- description="Human-readable description of what success looks like"
- )
+ description: str = Field(description="Human-readable description of what success looks like")
metric: str = Field(
description="How to measure: 'output_contains', 'output_equals', 'llm_judge', 'custom'"
)
- target: Any = Field(
- description="The target value or condition"
- )
- weight: float = Field(
- default=1.0,
- ge=0.0,
- le=1.0,
- description="Relative importance (0-1)"
- )
+ target: Any = Field(description="The target value or condition")
+ weight: float = Field(default=1.0, ge=0.0, le=1.0, description="Relative importance (0-1)")
met: bool = False
model_config = {"extra": "allow"}
@@ -66,18 +59,17 @@ class Constraint(BaseModel):
- Hard: Violation means failure
- Soft: Violation is discouraged but allowed
"""
+
id: str
description: str
constraint_type: str = Field(
description="Type: 'hard' (must not violate) or 'soft' (prefer not to violate)"
)
category: str = Field(
- default="general",
- description="Category: 'time', 'cost', 'safety', 'scope', 'quality'"
+ default="general", description="Category: 'time', 'cost', 'safety', 'scope', 'quality'"
)
check: str = Field(
- default="",
- description="How to check: expression, function name, or 'llm_judge'"
+ default="", description="How to check: expression, function name, or 'llm_judge'"
)
model_config = {"extra": "allow"}
@@ -119,6 +111,7 @@ class Goal(BaseModel):
]
)
"""
+
id: str
name: str
description: str
@@ -133,23 +126,19 @@ class Goal(BaseModel):
# Context for the agent
context: dict[str, Any] = Field(
default_factory=dict,
- description="Additional context: domain knowledge, user preferences, etc."
+ description="Additional context: domain knowledge, user preferences, etc.",
)
# Capabilities required
required_capabilities: list[str] = Field(
default_factory=list,
- description="What the agent needs: 'llm', 'web_search', 'code_execution', etc."
+ description="What the agent needs: 'llm', 'web_search', 'code_execution', etc.",
)
# Input/output schema
- input_schema: dict[str, Any] = Field(
- default_factory=dict,
- description="Expected input format"
- )
+ input_schema: dict[str, Any] = Field(default_factory=dict, description="Expected input format")
output_schema: dict[str, Any] = Field(
- default_factory=dict,
- description="Expected output format"
+ default_factory=dict, description="Expected output format"
)
# Versioning for evolution
diff --git a/core/framework/graph/hitl.py b/core/framework/graph/hitl.py
index 0f88f8f68c..78e41a8ecb 100644
--- a/core/framework/graph/hitl.py
+++ b/core/framework/graph/hitl.py
@@ -12,6 +12,7 @@
class HITLInputType(str, Enum):
"""Type of input expected from human."""
+
FREE_TEXT = "free_text" # Open-ended text response
STRUCTURED = "structured" # Specific fields to fill
SELECTION = "selection" # Choose from options
@@ -22,6 +23,7 @@ class HITLInputType(str, Enum):
@dataclass
class HITLQuestion:
"""A single question to ask the human."""
+
id: str
question: str
input_type: HITLInputType = HITLInputType.FREE_TEXT
@@ -44,6 +46,7 @@ class HITLRequest:
This is what the agent produces when it needs human input.
"""
+
# Context
objective: str # What we're trying to accomplish
current_state: str # Where we are in the process
@@ -92,6 +95,7 @@ class HITLResponse:
This is what gets passed back when resuming from a pause.
"""
+
# Original request reference
request_id: str
@@ -170,13 +174,13 @@ def parse_response(
# Use Haiku to extract answers
try:
- import anthropic
import json
- questions_str = "\n".join([
- f"{i+1}. {q.question} (id: {q.id})"
- for i, q in enumerate(request.questions)
- ])
+ import anthropic
+
+ questions_str = "\n".join(
+ [f"{i + 1}. {q.question} (id: {q.id})" for i, q in enumerate(request.questions)]
+ )
prompt = f"""Parse the user's response and extract answers for each question.
@@ -195,13 +199,14 @@ def parse_response(
message = client.messages.create(
model="claude-3-5-haiku-20241022",
max_tokens=500,
- messages=[{"role": "user", "content": prompt}]
+ messages=[{"role": "user", "content": prompt}],
)
# Parse Haiku's response
import re
+
response_text = message.content[0].text.strip()
- json_match = re.search(r'\{[^{}]*\}', response_text, re.DOTALL)
+ json_match = re.search(r"\{[^{}]*\}", response_text, re.DOTALL)
if json_match:
parsed = json.loads(json_match.group())
diff --git a/core/framework/graph/judge.py b/core/framework/graph/judge.py
index ab0c69d440..1c7e87c984 100644
--- a/core/framework/graph/judge.py
+++ b/core/framework/graph/judge.py
@@ -8,23 +8,24 @@
Escalation path: rules → LLM → human
"""
-from typing import Any
from dataclasses import dataclass, field
+from typing import Any
+from framework.graph.code_sandbox import safe_eval
+from framework.graph.goal import Goal
from framework.graph.plan import (
- PlanStep,
+ EvaluationRule,
Judgment,
JudgmentAction,
- EvaluationRule,
+ PlanStep,
)
-from framework.graph.goal import Goal
-from framework.graph.code_sandbox import safe_eval
from framework.llm.provider import LLMProvider
@dataclass
class RuleEvaluationResult:
"""Result of rule-based evaluation."""
+
is_definitive: bool # True if a rule matched definitively
judgment: Judgment | None = None
context: dict[str, Any] = field(default_factory=dict)
@@ -136,9 +137,9 @@ def _evaluate_rules(
# Build evaluation context
eval_context = {
- "step": step.model_dump() if hasattr(step, 'model_dump') else step,
+ "step": step.model_dump() if hasattr(step, "model_dump") else step,
"result": result,
- "goal": goal.model_dump() if hasattr(goal, 'model_dump') else goal,
+ "goal": goal.model_dump() if hasattr(goal, "model_dump") else goal,
"context": context,
"success": isinstance(result, dict) and result.get("success", False),
"error": isinstance(result, dict) and result.get("error"),
@@ -216,7 +217,10 @@ async def _evaluate_llm(
# Low confidence - escalate
return Judgment(
action=JudgmentAction.ESCALATE,
- reasoning=f"LLM confidence ({judgment.confidence:.2f}) below threshold ({self.llm_confidence_threshold})",
+ reasoning=(
+ f"LLM confidence ({judgment.confidence:.2f}) "
+ f"below threshold ({self.llm_confidence_threshold})"
+ ),
feedback=judgment.feedback,
confidence=judgment.confidence,
llm_used=True,
@@ -338,52 +342,65 @@ def create_default_judge(llm: LLMProvider | None = None) -> HybridJudge:
judge = HybridJudge(llm=llm)
# Rule: Accept on explicit success flag
- judge.add_rule(EvaluationRule(
- id="explicit_success",
- description="Step explicitly marked as successful",
- condition="isinstance(result, dict) and result.get('success') == True",
- action=JudgmentAction.ACCEPT,
- priority=100,
- ))
+ judge.add_rule(
+ EvaluationRule(
+ id="explicit_success",
+ description="Step explicitly marked as successful",
+ condition="isinstance(result, dict) and result.get('success') == True",
+ action=JudgmentAction.ACCEPT,
+ priority=100,
+ )
+ )
# Rule: Retry on transient errors
- judge.add_rule(EvaluationRule(
- id="transient_error_retry",
- description="Transient error that can be retried",
- condition="isinstance(result, dict) and result.get('error_type') in ['timeout', 'rate_limit', 'connection_error']",
- action=JudgmentAction.RETRY,
- feedback_template="Transient error: {result[error]}. Please retry.",
- priority=90,
- ))
+ judge.add_rule(
+ EvaluationRule(
+ id="transient_error_retry",
+ description="Transient error that can be retried",
+ condition=(
+ "isinstance(result, dict) and "
+ "result.get('error_type') in ['timeout', 'rate_limit', 'connection_error']"
+ ),
+ action=JudgmentAction.RETRY,
+ feedback_template="Transient error: {result[error]}. Please retry.",
+ priority=90,
+ )
+ )
# Rule: Replan on missing data
- judge.add_rule(EvaluationRule(
- id="missing_data_replan",
- description="Required data not available",
- condition="isinstance(result, dict) and result.get('error_type') == 'missing_data'",
- action=JudgmentAction.REPLAN,
- feedback_template="Missing required data: {result[error]}. Plan needs adjustment.",
- priority=80,
- ))
+ judge.add_rule(
+ EvaluationRule(
+ id="missing_data_replan",
+ description="Required data not available",
+ condition="isinstance(result, dict) and result.get('error_type') == 'missing_data'",
+ action=JudgmentAction.REPLAN,
+ feedback_template="Missing required data: {result[error]}. Plan needs adjustment.",
+ priority=80,
+ )
+ )
# Rule: Escalate on security issues
- judge.add_rule(EvaluationRule(
- id="security_escalate",
- description="Security issue detected",
- condition="isinstance(result, dict) and result.get('error_type') == 'security'",
- action=JudgmentAction.ESCALATE,
- feedback_template="Security issue detected: {result[error]}",
- priority=200,
- ))
+ judge.add_rule(
+ EvaluationRule(
+ id="security_escalate",
+ description="Security issue detected",
+ condition="isinstance(result, dict) and result.get('error_type') == 'security'",
+ action=JudgmentAction.ESCALATE,
+ feedback_template="Security issue detected: {result[error]}",
+ priority=200,
+ )
+ )
# Rule: Fail on max retries exceeded
- judge.add_rule(EvaluationRule(
- id="max_retries_fail",
- description="Maximum retries exceeded",
- condition="step.get('attempts', 0) >= step.get('max_retries', 3)",
- action=JudgmentAction.REPLAN,
- feedback_template="Step '{step[id]}' failed after {step[attempts]} attempts",
- priority=150,
- ))
+ judge.add_rule(
+ EvaluationRule(
+ id="max_retries_fail",
+ description="Maximum retries exceeded",
+ condition="step.get('attempts', 0) >= step.get('max_retries', 3)",
+ action=JudgmentAction.REPLAN,
+ feedback_template="Step '{step[id]}' failed after {step[attempts]} attempts",
+ priority=150,
+ )
+ )
return judge
diff --git a/core/framework/graph/node.py b/core/framework/graph/node.py
index f33d87c505..9e86ec599a 100644
--- a/core/framework/graph/node.py
+++ b/core/framework/graph/node.py
@@ -15,25 +15,83 @@
The framework provides NodeContext with everything the node needs.
"""
+import asyncio
import logging
from abc import ABC, abstractmethod
-from typing import Any, Callable
+from collections.abc import Callable
from dataclasses import dataclass, field
+from typing import Any
from pydantic import BaseModel, Field
-from framework.runtime.core import Runtime
from framework.llm.provider import LLMProvider, Tool
+from framework.runtime.core import Runtime
logger = logging.getLogger(__name__)
+def _fix_unescaped_newlines_in_json(json_str: str) -> str:
+ """Fix unescaped newlines inside JSON string values.
+
+ LLMs sometimes output actual newlines inside JSON strings instead of \\n.
+ This function fixes that by properly escaping newlines within string values.
+ """
+ result = []
+ in_string = False
+ escape_next = False
+ i = 0
+
+ while i < len(json_str):
+ char = json_str[i]
+
+ if escape_next:
+ result.append(char)
+ escape_next = False
+ i += 1
+ continue
+
+ if char == "\\" and in_string:
+ escape_next = True
+ result.append(char)
+ i += 1
+ continue
+
+ if char == '"' and not escape_next:
+ in_string = not in_string
+ result.append(char)
+ i += 1
+ continue
+
+ # Fix unescaped newlines inside strings
+ if in_string and char == "\n":
+ result.append("\\n")
+ i += 1
+ continue
+
+ # Fix unescaped carriage returns inside strings
+ if in_string and char == "\r":
+ result.append("\\r")
+ i += 1
+ continue
+
+ # Fix unescaped tabs inside strings
+ if in_string and char == "\t":
+ result.append("\\t")
+ i += 1
+ continue
+
+ result.append(char)
+ i += 1
+
+ return "".join(result)
+
+
def find_json_object(text: str) -> str | None:
"""Find the first valid JSON object in text using balanced brace matching.
This handles nested objects correctly, unlike simple regex like r'\\{[^{}]*\\}'.
"""
- start = text.find('{')
+ start = text.find("{")
if start == -1:
return None
@@ -46,7 +104,7 @@ def find_json_object(text: str) -> str | None:
escape_next = False
continue
- if char == '\\' and in_string:
+ if char == "\\" and in_string:
escape_next = True
continue
@@ -57,12 +115,12 @@ def find_json_object(text: str) -> str | None:
if in_string:
continue
- if char == '{':
+ if char == "{":
depth += 1
- elif char == '}':
+ elif char == "}":
depth -= 1
if depth == 0:
- return text[start:i + 1]
+ return text[start : i + 1]
return None
@@ -87,6 +145,7 @@ class NodeSpec(BaseModel):
system_prompt="You are a calculator..."
)
"""
+
id: str
name: str
description: str
@@ -94,67 +153,73 @@ class NodeSpec(BaseModel):
# Node behavior type
node_type: str = Field(
default="llm_tool_use",
- description="Type: 'llm_tool_use', 'llm_generate', 'function', 'router', 'human_input'"
+ description="Type: 'llm_tool_use', 'llm_generate', 'function', 'router', 'human_input'",
)
# Data flow
input_keys: list[str] = Field(
- default_factory=list,
- description="Keys this node reads from shared memory or input"
+ default_factory=list, description="Keys this node reads from shared memory or input"
)
output_keys: list[str] = Field(
- default_factory=list,
- description="Keys this node writes to shared memory or output"
+ default_factory=list, description="Keys this node writes to shared memory or output"
)
# Optional schemas for validation and cleansing
input_schema: dict[str, dict] = Field(
default_factory=dict,
- description="Optional schema for input validation. Format: {key: {type: 'string', required: True, description: '...'}}"
+ description=(
+ "Optional schema for input validation. "
+ "Format: {key: {type: 'string', required: True, description: '...'}}"
+ ),
)
output_schema: dict[str, dict] = Field(
default_factory=dict,
- description="Optional schema for output validation. Format: {key: {type: 'dict', required: True, description: '...'}}"
+ description=(
+ "Optional schema for output validation. "
+ "Format: {key: {type: 'dict', required: True, description: '...'}}"
+ ),
)
# For LLM nodes
- system_prompt: str | None = Field(
- default=None,
- description="System prompt for LLM nodes"
- )
- tools: list[str] = Field(
- default_factory=list,
- description="Tool names this node can use"
- )
+ system_prompt: str | None = Field(default=None, description="System prompt for LLM nodes")
+ tools: list[str] = Field(default_factory=list, description="Tool names this node can use")
model: str | None = Field(
- default=None,
- description="Specific model to use (defaults to graph default)"
+ default=None, description="Specific model to use (defaults to graph default)"
)
# For function nodes
function: str | None = Field(
- default=None,
- description="Function name or path for function nodes"
+ default=None, description="Function name or path for function nodes"
)
# For router nodes
routes: dict[str, str] = Field(
- default_factory=dict,
- description="Condition -> target_node_id mapping for routers"
+ default_factory=dict, description="Condition -> target_node_id mapping for routers"
)
# Retry behavior
max_retries: int = Field(default=3)
- retry_on: list[str] = Field(
- default_factory=list,
- description="Error types to retry on"
+ retry_on: list[str] = Field(default_factory=list, description="Error types to retry on")
+
+ # Pydantic model for output validation
+ output_model: type[BaseModel] | None = Field(
+ default=None,
+ description=(
+ "Optional Pydantic model class for validating and parsing LLM output. "
+ "When set, the LLM response will be validated against this model."
+ ),
+ )
+ max_validation_retries: int = Field(
+ default=2,
+ description="Maximum retries when Pydantic validation fails (with feedback to LLM)",
)
- model_config = {"extra": "allow"}
+ model_config = {"extra": "allow", "arbitrary_types_allowed": True}
class MemoryWriteError(Exception):
"""Raised when an invalid value is written to memory."""
+
pass
@@ -165,10 +230,22 @@ class SharedMemory:
Nodes read and write to shared memory using typed keys.
The memory is scoped to a single run.
+
+ For parallel execution, use write_async() which provides per-key locking
+ to prevent race conditions when multiple nodes write concurrently.
"""
+
_data: dict[str, Any] = field(default_factory=dict)
_allowed_read: set[str] = field(default_factory=set)
_allowed_write: set[str] = field(default_factory=set)
+ # Locks for thread-safe parallel execution
+ _lock: asyncio.Lock | None = field(default=None, repr=False)
+ _key_locks: dict[str, asyncio.Lock] = field(default_factory=dict, repr=False)
+
+ def __post_init__(self) -> None:
+ """Initialize the main lock if not provided."""
+ if self._lock is None:
+ self._lock = asyncio.Lock()
def read(self, key: str) -> Any:
"""Read a value from shared memory."""
@@ -196,8 +273,7 @@ def write(self, key: str, value: Any, validate: bool = True) -> None:
# Check for obviously hallucinated content
if len(value) > 5000:
# Long strings that look like code are suspicious
- code_indicators = ["```python", "def ", "class ", "import ", "async def "]
- if any(indicator in value[:500] for indicator in code_indicators):
+ if self._contains_code_indicators(value):
logger.warning(
f"⚠ Suspicious write to key '{key}': appears to be code "
f"({len(value)} chars). Consider using validate=False if intended."
@@ -210,6 +286,109 @@ def write(self, key: str, value: Any, validate: bool = True) -> None:
self._data[key] = value
+ async def write_async(self, key: str, value: Any, validate: bool = True) -> None:
+ """
+ Thread-safe async write with per-key locking.
+
+ Use this method when multiple nodes may write concurrently during
+ parallel execution. Each key has its own lock to minimize contention.
+
+ Args:
+ key: The memory key to write to
+ value: The value to write
+ validate: If True, check for suspicious content (default True)
+
+ Raises:
+ PermissionError: If node doesn't have write permission
+ MemoryWriteError: If value appears to be hallucinated content
+ """
+ # Check permissions first (no lock needed)
+ if self._allowed_write and key not in self._allowed_write:
+ raise PermissionError(f"Node not allowed to write key: {key}")
+
+ # Ensure key has a lock (double-checked locking pattern)
+ if key not in self._key_locks:
+ async with self._lock:
+ if key not in self._key_locks:
+ self._key_locks[key] = asyncio.Lock()
+
+ # Acquire per-key lock and write
+ async with self._key_locks[key]:
+ if validate and isinstance(value, str):
+ if len(value) > 5000:
+ if self._contains_code_indicators(value):
+ logger.warning(
+ f"⚠ Suspicious write to key '{key}': appears to be code "
+ f"({len(value)} chars). Consider using validate=False if intended."
+ )
+ raise MemoryWriteError(
+ f"Rejected suspicious content for key '{key}': "
+ f"appears to be hallucinated code ({len(value)} chars). "
+ "If this is intentional, use validate=False."
+ )
+ self._data[key] = value
+
+ def _contains_code_indicators(self, value: str) -> bool:
+ """
+ Check for code patterns in a string using sampling for efficiency.
+
+ For strings under 10KB, checks the entire content.
+ For longer strings, samples at strategic positions to balance
+ performance with detection accuracy.
+
+ Args:
+ value: The string to check for code indicators
+
+ Returns:
+ True if code indicators are found, False otherwise
+ """
+ code_indicators = [
+ # Python
+ "```python",
+ "def ",
+ "class ",
+ "import ",
+ "async def ",
+ "from ",
+ # JavaScript/TypeScript
+ "function ",
+ "const ",
+ "let ",
+ "=> {",
+ "require(",
+ "export ",
+ # SQL
+ "SELECT ",
+ "INSERT ",
+ "UPDATE ",
+ "DELETE ",
+ "DROP ",
+ # HTML/Script injection
+ "\n"
+ padding_end = "B" * 5000
+ content = padding + code + padding_end
+
+ with pytest.raises(MemoryWriteError) as exc_info:
+ memory.write("output", content)
+
+ assert "hallucinated code" in str(exc_info.value)
+
+ def test_allows_short_strings_without_validation(self):
+ """Strings under 5000 chars should not trigger validation."""
+ memory = SharedMemory()
+ content = "def hello(): pass" # Contains code indicator but short
+
+ # Should not raise - too short to validate
+ memory.write("output", content)
+ assert memory.read("output") == content
+
+ def test_allows_long_strings_without_code(self):
+ """Long strings without code indicators should be allowed."""
+ memory = SharedMemory()
+ content = "This is a long text document. " * 500 # ~15000 chars, no code
+
+ memory.write("output", content)
+ assert memory.read("output") == content
+
+ def test_validate_false_bypasses_check(self):
+ """Using validate=False should bypass the check."""
+ memory = SharedMemory()
+ code_content = "```python\nimport os\n```" + "A" * 6000
+
+ # Should not raise when validate=False
+ memory.write("output", code_content, validate=False)
+ assert memory.read("output") == code_content
+
+ def test_sampling_for_very_long_strings(self):
+ """Very long strings (>10KB) should be sampled at multiple positions."""
+ memory = SharedMemory()
+ # Create a 50KB string with code at the 75% mark
+ size = 50000
+ code_position = int(size * 0.75)
+ content = (
+ "A" * code_position + "def hidden_code(): pass" + "B" * (size - code_position - 25)
+ )
+
+ with pytest.raises(MemoryWriteError) as exc_info:
+ memory.write("output", content)
+
+ assert "hallucinated code" in str(exc_info.value)
+
+
+class TestOutputValidatorHallucinationDetection:
+ """Test the OutputValidator hallucination detection."""
+
+ def test_detects_code_anywhere_in_output(self):
+ """Code anywhere in the output value should trigger a warning."""
+ validator = OutputValidator()
+ padding = "Normal text content. " * 50
+ code = "\ndef suspicious_function():\n pass\n"
+ output = {"result": padding + code}
+
+ # The method logs a warning but doesn't fail
+ result = validator.validate_no_hallucination(output)
+ # The warning is logged - we can't easily test logging, but the method should work
+ assert isinstance(result, ValidationResult)
+
+ def test_contains_code_indicators_full_check(self):
+ """_contains_code_indicators should check the entire string."""
+ validator = OutputValidator()
+
+ # Code at position 600 (was previously missed with [:500] check)
+ padding = "A" * 600
+ code = "import os"
+ content = padding + code
+
+ assert validator._contains_code_indicators(content) is True
+
+ def test_contains_code_indicators_sampling(self):
+ """_contains_code_indicators should sample for very long strings."""
+ validator = OutputValidator()
+
+ # 50KB string with code at 75% position
+ size = 50000
+ code_position = int(size * 0.75)
+ content = "A" * code_position + "class HiddenClass:" + "B" * (size - code_position - 18)
+
+ assert validator._contains_code_indicators(content) is True
+
+ def test_no_false_positive_for_clean_text(self):
+ """Clean text without code should not trigger false positives."""
+ validator = OutputValidator()
+
+ # Long text without any code indicators
+ content = "This is a perfectly normal document. " * 300
+
+ assert validator._contains_code_indicators(content) is False
+
+ def test_detects_multiple_languages(self):
+ """Should detect code patterns from multiple programming languages."""
+ validator = OutputValidator()
+
+ test_cases = [
+ "function test() {}", # JavaScript
+ "const x = 5;", # JavaScript
+ "SELECT * FROM users", # SQL
+ "DROP TABLE data", # SQL
+ "