win4r
diff --git a/‎clawteam/cli/commands.py‎
Lines changed: 22 additions & 5 deletions b/‎clawteam/cli/commands.py‎
Lines changed: 22 additions & 5 deletions
diff --git a/‎clawteam/spawn/__init__.py‎
Lines changed: 33 additions & 1 deletion b/‎clawteam/spawn/__init__.py‎
Lines changed: 33 additions & 1 deletion
diff --git a/‎clawteam/spawn/registry.py‎
Lines changed: 122 additions & 0 deletions b/‎clawteam/spawn/registry.py‎
Lines changed: 122 additions & 0 deletions
diff --git a/‎clawteam/store/file.py‎
Lines changed: 21 additions & 0 deletions b/‎clawteam/store/file.py‎
Lines changed: 21 additions & 0 deletions
@@ -1008,6 +1008,7 @@ def cost_report(
     provider: str = typer.Option("", "--provider", help="Provider name (e.g. anthropic)"),
     model: str = typer.Option("", "--model", help="Model name"),
     agent: Optional[str] = typer.Option(None, "--agent", "-a", help="Agent name (default: from env)"),
+    task_id: str = typer.Option("", "--task-id", help="Associated task ID"),
 ):
     """Report token usage and cost for an agent."""
     from clawteam.identity import AgentIdentity
@@ -1023,6 +1024,7 @@ def cost_report(
         input_tokens=input_tokens,
         output_tokens=output_tokens,
         cost_cents=cost_cents,
+        task_id=task_id,
     )
     data = _dump(event)
 
@@ -1049,6 +1051,7 @@ def _human(d):
 def cost_show(
     team: str = typer.Argument(..., help="Team name"),
     agent: Optional[str] = typer.Option(None, "--agent", "-a", help="Filter by agent"),
+    by: Optional[str] = typer.Option(None, "--by", "-b", help="Breakdown dimension: agent, task, or model"),
 ):
     """Show cost summary and event history."""
     from clawteam.team.costs import CostStore
@@ -1059,10 +1062,12 @@ def cost_show(
     events = store.list_events(agent_name=agent or "")
     config = TeamManager.get_team(team)
     budget = config.budget_cents if config else 0.0
+    rate = store.cost_rate()
 
     data = {
         "summary": _dump(summary),
         "budget_cents": budget,
+        "cost_rate_per_min": rate,
         "events": [_dump(e) for e in events],
     }
 
@@ -1077,11 +1082,21 @@ def _human(d):
         console.print(f"  Input tokens:  {s.get('totalInputTokens', 0):,}")
         console.print(f"  Output tokens: {s.get('totalOutputTokens', 0):,}")
         console.print(f"  Events: {s.get('eventCount', 0)}")
-        by_agent = s.get("byAgent", {})
-        if by_agent:
-            console.print("  By agent:")
-            for a, c in sorted(by_agent.items()):
-                console.print(f"    {a}: ${c / 100:.4f}")
+        if rate > 0:
+            console.print(f"  Rate: ${rate / 100:.4f}/min")
+
+        # Dimension breakdown
+        dimension = by or "agent"
+        dimension_key = {
+            "agent": "byAgent",
+            "model": "byModel",
+            "task": "byTask",
+        }.get(dimension, "byAgent")
+        breakdown = s.get(dimension_key, {})
+        if breakdown:
+            console.print(f"  By {dimension}:")
+            for k, c in sorted(breakdown.items()):
+                console.print(f"    {k}: ${c / 100:.4f}")
 
         evts = d["events"]
         if evts:
@@ -1092,6 +1107,7 @@ def _human(d):
             table.add_column("Out Tokens", justify="right")
             table.add_column("Cost", justify="right")
             table.add_column("Model", style="dim")
+            table.add_column("Task", style="dim")
             for e in evts[-20:]:  # show last 20
                 table.add_row(
                     (e.get("reportedAt") or "")[:19],
@@ -1100,6 +1116,7 @@ def _human(d):
                     f"{e.get('outputTokens', 0):,}",
                     f"${e.get('costCents', 0) / 100:.4f}",
                     e.get("model", ""),
+                    e.get("taskId", ""),
                 )
             console.print(table)
 
 
@@ -2,8 +2,13 @@
 
 from __future__ import annotations
 
+import logging
+import time
+
 from clawteam.spawn.base import SpawnBackend
 
+logger = logging.getLogger(__name__)
+
 
 def get_backend(name: str = "tmux") -> SpawnBackend:
     """Factory function to get a spawn backend by name."""
@@ -17,4 +22,31 @@ def get_backend(name: str = "tmux") -> SpawnBackend:
         raise ValueError(f"Unknown spawn backend: {name}. Available: subprocess, tmux")
 
 
-__all__ = ["SpawnBackend", "get_backend"]
+def spawn_with_retry(
+    backend: SpawnBackend,
+    max_retries: int = 3,
+    backoff_base: float = 1.0,
+    backoff_max: float = 30.0,
+    **spawn_kwargs,
+) -> str:
+    """Wrap backend.spawn() with exponential backoff retry.
+
+    Returns the result of a successful spawn, or the last error message.
+    """
+    last_result = ""
+    for attempt in range(max_retries + 1):
+        result = backend.spawn(**spawn_kwargs)
+        if not result.startswith("Error"):
+            return result
+        last_result = result
+        if attempt < max_retries:
+            delay = min(backoff_base * (2 ** attempt), backoff_max)
+            logger.warning(
+                "Spawn attempt %d/%d failed: %s — retrying in %.1fs",
+                attempt + 1, max_retries + 1, result, delay,
+            )
+            time.sleep(delay)
+    return last_result
+
+
+__all__ = ["SpawnBackend", "get_backend", "spawn_with_retry"]
@@ -5,12 +5,134 @@
 import json
 import subprocess
 import time
+from enum import Enum
 from pathlib import Path
 
+from pydantic import BaseModel, Field
+
 from clawteam.fileutil import atomic_write_text, file_locked
 from clawteam.paths import ensure_within_root, validate_identifier
 from clawteam.team.models import get_data_dir
 
+# ---------------------------------------------------------------------------
+# Circuit Breaker — agent health tracking
+# ---------------------------------------------------------------------------
+
+class HealthState(str, Enum):
+    healthy = "healthy"
+    degraded = "degraded"
+    open = "open"
+
+
+class AgentHealth(BaseModel):
+    """Health status for a spawned agent (circuit breaker pattern)."""
+
+    model_config = {"populate_by_name": True}
+
+    agent_name: str = Field(alias="agentName")
+    state: HealthState = HealthState.healthy
+    quality_score: float = Field(default=1.0, alias="qualityScore")
+    consecutive_failures: int = Field(default=0, alias="consecutiveFailures")
+    total_successes: int = Field(default=0, alias="totalSuccesses")
+    total_failures: int = Field(default=0, alias="totalFailures")
+    last_failure_at: float = Field(default=0.0, alias="lastFailureAt")
+    cooldown_seconds: float = Field(default=60.0, alias="cooldownSeconds")
+
+    @property
+    def is_accepting_tasks(self) -> bool:
+        """Return True if the agent can accept new tasks."""
+        if self.state != HealthState.open:
+            return True
+        # Half-open: allow after cooldown
+        if self.last_failure_at and (time.time() - self.last_failure_at) >= self.cooldown_seconds:
+            return True
+        return False
+
+
+DEFAULT_FAILURE_THRESHOLD = 3
+DEFAULT_COOLDOWN_SECONDS = 60.0
+
+
+def _health_path(team_name: str) -> Path:
+    return ensure_within_root(
+        get_data_dir() / "teams",
+        validate_identifier(team_name, "team name"),
+        "agent_health.json",
+    )
+
+
+def _load_health(team_name: str) -> dict[str, dict]:
+    path = _health_path(team_name)
+    if path.exists():
+        try:
+            return json.loads(path.read_text())
+        except (json.JSONDecodeError, OSError):
+            return {}
+    return {}
+
+
+def _save_health(team_name: str, data: dict[str, dict]) -> None:
+    atomic_write_text(_health_path(team_name), json.dumps(data, indent=2))
+
+
+def get_agent_health(team_name: str, agent_name: str) -> AgentHealth:
+    """Return health status for an agent (creates default if not tracked)."""
+    health_data = _load_health(team_name)
+    if agent_name in health_data:
+        return AgentHealth.model_validate(health_data[agent_name])
+    return AgentHealth(agent_name=agent_name)
+
+
+def get_all_health(team_name: str) -> dict[str, AgentHealth]:
+    """Return health for all tracked agents."""
+    health_data = _load_health(team_name)
+    return {
+        name: AgentHealth.model_validate(data)
+        for name, data in health_data.items()
+    }
+
+
+def record_outcome(
+    team_name: str,
+    agent_name: str,
+    success: bool,
+    failure_threshold: int = DEFAULT_FAILURE_THRESHOLD,
+    cooldown_seconds: float = DEFAULT_COOLDOWN_SECONDS,
+) -> AgentHealth:
+    """Record a task outcome and update agent health state.
+
+    State transitions:
+    - healthy → degraded: first failure
+    - degraded → open: consecutive_failures >= threshold
+    - open → healthy: success after cooldown (half-open probe)
+    - any → healthy: success resets consecutive failures
+    """
+    path = _health_path(team_name)
+    with file_locked(path):
+        health_data = _load_health(team_name)
+        raw = health_data.get(agent_name, {"agentName": agent_name})
+        health = AgentHealth.model_validate(raw)
+        health.cooldown_seconds = cooldown_seconds
+
+        if success:
+            health.consecutive_failures = 0
+            health.total_successes += 1
+            health.quality_score = min(1.0, health.quality_score + 0.1)
+            health.state = HealthState.healthy
+        else:
+            health.consecutive_failures += 1
+            health.total_failures += 1
+            health.last_failure_at = time.time()
+            health.quality_score = max(0.0, health.quality_score - 0.2)
+            if health.consecutive_failures >= failure_threshold:
+                health.state = HealthState.open
+            elif health.consecutive_failures >= 1:
+                health.state = HealthState.degraded
+
+        health_data[agent_name] = json.loads(health.model_dump_json(by_alias=True))
+        _save_health(team_name, health_data)
+    return health
+
 
 def _registry_path(team_name: str) -> Path:
     return ensure_within_root(
 
@@ -83,7 +83,14 @@ def create(
         blocks: list[str] | None = None,
         blocked_by: list[str] | None = None,
         metadata: dict[str, Any] | None = None,
+        idempotency_key: str | None = None,
     ) -> TaskItem:
+        # Idempotency: return existing task if key matches
+        if idempotency_key:
+            existing = self._find_by_idempotency_key(idempotency_key)
+            if existing is not None:
+                return existing
+
         task = TaskItem(
             subject=subject,
             description=description,
@@ -92,6 +99,7 @@ def create(
             blocks=blocks or [],
             blocked_by=blocked_by or [],
             metadata=metadata or {},
+            idempotency_key=idempotency_key,
         )
         self._validate_blocked_by_unlocked(task.id, task.blocked_by)
         if task.blocked_by:
@@ -100,6 +108,19 @@ def create(
             self._save_unlocked(task)
         return task
 
+    def _find_by_idempotency_key(self, key: str) -> TaskItem | None:
+        """Return existing task with matching idempotency key, if any."""
+        root = _tasks_root(self.team_name)
+        for f in root.glob("task-*.json"):
+            try:
+                data = json.loads(f.read_text(encoding="utf-8"))
+                task = TaskItem.model_validate(data)
+                if task.idempotency_key == key:
+                    return task
+            except Exception:
+                continue
+        return None
+
     def get(self, task_id: str) -> TaskItem | None:
         return self._get_unlocked(task_id)