fix(evaluate): resolve project working dir fallback

Q00 · Q00 · commit b4e0f2461028 · 2026-06-20T22:25:36.000+09:00
diff --git a/skills/evaluate/SKILL.md b/skills/evaluate/SKILL.md
@@ -82,9 +82,12 @@ fallback instead of retrying the failing call.
      seed_content: <original seed YAML, if available>
      acceptance_criterion: <specific AC to check, optional>
      artifact_type: "code"  (or "docs", "config")
+     working_dir: <absolute project root, recommended>
      trigger_consensus: false  (true if user requests Stage 3)
    ```
 
+   `working_dir` controls both Stage 1 command execution and Stage 2 source-file visibility. Pass the absolute project root whenever available; if omitted, the MCP handler falls back to the registered brownfield default, seed project metadata, then the MCP server cwd.
+
 4. Present results clearly:
    - Show each stage's pass/fail status
    - Highlight the final approval decision
diff --git a/src/ouroboros/mcp/tools/evaluation_handlers.py b/src/ouroboros/mcp/tools/evaluation_handlers.py
@@ -18,6 +18,7 @@
 
 from ouroboros.config import get_llm_backend_for_role, get_llm_model_for_role
 from ouroboros.core.errors import ValidationError
+from ouroboros.core.project_paths import resolve_path_against_base
 from ouroboros.core.seed import Seed
 from ouroboros.core.types import Result
 from ouroboros.mcp.errors import MCPServerError, MCPToolError
@@ -56,6 +57,93 @@
 log = structlog.get_logger(__name__)
 
 
+async def _default_brownfield_project_dir() -> Path | None:
+    """Return the registered default brownfield project directory, if any."""
+    from ouroboros.persistence.brownfield import BrownfieldStore
+
+    store = BrownfieldStore()
+    try:
+        await store.initialize()
+        default_repo = await store.get_default()
+    except Exception as exc:  # noqa: BLE001 - fallback discovery must be best-effort
+        log.warning("mcp.tool.evaluate.brownfield_default_lookup_failed", error=str(exc))
+        return None
+    finally:
+        await store.close()
+
+    if default_repo is None or not default_repo.path:
+        return None
+    return Path(default_repo.path).expanduser().resolve()
+
+
+def _seed_project_dir(seed: Seed | None, *, stable_base: Path) -> Path | None:
+    """Resolve a project directory encoded in seed metadata/brownfield context."""
+    if seed is None:
+        return None
+
+    candidates: list[str] = []
+    seed_meta = getattr(seed, "metadata", None)
+    if seed_meta is not None:
+        project_dir = getattr(seed_meta, "project_dir", None) or getattr(
+            seed_meta,
+            "working_directory",
+            None,
+        )
+        if isinstance(project_dir, str) and project_dir:
+            candidates.append(project_dir)
+
+    brownfield_context = getattr(seed, "brownfield_context", None)
+    context_references = getattr(brownfield_context, "context_references", ()) or ()
+    for preferred_role in ("primary", None):
+        for reference in context_references:
+            path = getattr(reference, "path", None)
+            role = getattr(reference, "role", None)
+            if not isinstance(path, str) or not path or path in candidates:
+                continue
+            if preferred_role is None or role == preferred_role:
+                candidates.append(path)
+
+    for candidate in candidates:
+        resolved = resolve_path_against_base(candidate, stable_base=stable_base)
+        if resolved is None:
+            continue
+        if resolved.is_file():
+            return resolved.parent
+        if resolved.exists() and not resolved.is_dir():
+            continue
+        return resolved
+
+    return None
+
+
+async def _resolve_evaluate_working_dir(
+    explicit_working_dir: str | None,
+    seed: Seed | None,
+) -> Path:
+    """Resolve the project root that gates Stage 1 and Stage 2 evaluation.
+
+    Precedence is explicit tool argument, registered brownfield default,
+    seed-declared project directory, then the MCP server cwd. The last
+    fallback preserves the historical behavior, but only after project-aware
+    sources have been exhausted.
+    """
+    stable_base = Path.cwd().resolve()
+    if explicit_working_dir:
+        resolved = resolve_path_against_base(explicit_working_dir, stable_base=stable_base)
+        if resolved is not None:
+            return resolved
+
+    brownfield_default = await _default_brownfield_project_dir()
+    if brownfield_default is not None:
+        return brownfield_default
+
+    seed_dir = _seed_project_dir(seed, stable_base=stable_base)
+    if seed_dir is not None:
+        return seed_dir
+
+    return stable_base
+
+
 def _evaluation_allowed_tools(runtime_backend: str | None) -> list[str]:
     """Return the policy-derived read-only tool envelope for evaluation."""
     return allowed_runtime_builtin_tool_names(
@@ -340,7 +428,8 @@ def definition(self) -> MCPToolDefinition:
                     type=ToolInputType.STRING,
                     description=(
                         "Project root used to resolve Stage 1 mechanical verification "
-                        "commands. Commands are read from .ouroboros/mechanical.toml; "
+                        "commands and Stage 2 source-file visibility. Commands are "
+                        "read from .ouroboros/mechanical.toml; "
                         "when the file is missing, the evaluator makes one AI detect "
                         "call that inspects manifests (package.json, pyproject.toml, "
                         "Cargo.toml, Makefile, ...) and authors the toml. Stage 1 "
@@ -363,8 +452,6 @@ async def handle(
         Returns:
             Result containing evaluation results or error.
         """
-        from pathlib import Path
-
         from ouroboros.evaluation import (
             EvaluationContext,
             EvaluationPipeline,
@@ -424,14 +511,34 @@ async def handle(
             trigger_consensus=trigger_consensus,
         )
 
+        # Parse seed before dispatch so working_dir fallback is available for
+        # both plugin/subagent and in-process evaluation paths.
+        goal = ""
+        constraints: tuple[str, ...] = ()
+        seed_id = session_id  # fallback
+        seed: Seed | None = None
+
+        if seed_content:
+            try:
+                seed_dict = yaml.safe_load(seed_content)
+                seed = Seed.from_dict(seed_dict)
+                goal = seed.goal
+                constraints = tuple(seed.constraints)
+                seed_id = seed.metadata.seed_id
+            except (yaml.YAMLError, ValidationError, PydanticValidationError) as e:
+                log.warning("mcp.tool.evaluate.seed_parse_warning", error=str(e))
+                # Continue without seed data - not fatal
+
+        working_dir = await _resolve_evaluate_working_dir(arguments.get("working_dir"), seed)
+
         # --- Subagent dispatch: gate on runtime + opencode_mode ---
         payload = build_evaluate_subagent(
             session_id=session_id,
             artifact=artifact,
             artifact_type=artifact_type,
             seed_content=seed_content,
             acceptance_criterion=acceptance_criterion,
-            working_dir=arguments.get("working_dir"),
+            working_dir=str(working_dir),
             trigger_consensus=trigger_consensus,
         )
         if should_dispatch_via_plugin(self.agent_runtime_backend, self.opencode_mode):
@@ -456,22 +563,6 @@ async def handle(
         owns_event_store = False
 
         try:
-            # Extract goal/constraints from seed if provided
-            goal = ""
-            constraints: tuple[str, ...] = ()
-            seed_id = session_id  # fallback
-
-            if seed_content:
-                try:
-                    seed_dict = yaml.safe_load(seed_content)
-                    seed = Seed.from_dict(seed_dict)
-                    goal = seed.goal
-                    constraints = tuple(seed.constraints)
-                    seed_id = seed.metadata.seed_id
-                except (yaml.YAMLError, ValidationError, PydanticValidationError) as e:
-                    log.warning("mcp.tool.evaluate.seed_parse_warning", error=str(e))
-                    # Continue without seed data - not fatal
-
             # Try to enrich from session repository if event_store available
             if not goal:
                 if store is None:
@@ -508,8 +599,6 @@ async def handle(
                 allowed_tools=_evaluation_allowed_tools(backend),
                 max_turns=20,
             )
-            working_dir_str = arguments.get("working_dir")
-            working_dir = Path(working_dir_str).resolve() if working_dir_str else Path.cwd()
             log.info(
                 "mcp.tool.evaluate.started",
                 session_id=session_id,
diff --git a/tests/unit/mcp/tools/test_evaluate_multi_ac.py b/tests/unit/mcp/tools/test_evaluate_multi_ac.py
@@ -8,6 +8,8 @@
 
 from __future__ import annotations
 
+from pathlib import Path
+from types import SimpleNamespace
 from unittest.mock import AsyncMock, MagicMock, patch
 
 import pytest
@@ -20,7 +22,7 @@
     MechanicalResult,
     SemanticResult,
 )
-from ouroboros.mcp.tools.evaluation_handlers import EvaluateHandler
+from ouroboros.mcp.tools.evaluation_handlers import EvaluateHandler, _resolve_evaluate_working_dir
 from ouroboros.mcp.types import ToolInputType
 
 
@@ -72,6 +74,67 @@ def _failing_eval(execution_id: str, *, reason: str) -> EvaluationResult:
     )
 
 
+class TestEvaluateWorkingDirResolution:
+    """Working dir fallback keeps Stage 2 pointed at the project root."""
+
+    async def test_explicit_working_dir_wins(self, tmp_path: Path) -> None:
+        explicit = tmp_path / "project"
+        explicit.mkdir()
+
+        with patch(
+            "ouroboros.mcp.tools.evaluation_handlers._default_brownfield_project_dir",
+            new=AsyncMock(return_value=tmp_path / "default"),
+        ):
+            resolved = await _resolve_evaluate_working_dir(str(explicit), None)
+
+        assert resolved == explicit.resolve()
+
+    async def test_brownfield_default_used_before_cwd(self, tmp_path: Path, monkeypatch) -> None:
+        cwd = tmp_path / "hermes"
+        default = tmp_path / "repo"
+        cwd.mkdir()
+        default.mkdir()
+        monkeypatch.chdir(cwd)
+
+        with patch(
+            "ouroboros.mcp.tools.evaluation_handlers._default_brownfield_project_dir",
+            new=AsyncMock(return_value=default),
+        ):
+            resolved = await _resolve_evaluate_working_dir(None, None)
+
+        assert resolved == default.resolve()
+
+    async def test_seed_metadata_used_when_no_default(self, tmp_path: Path, monkeypatch) -> None:
+        cwd = tmp_path / "hermes"
+        project = tmp_path / "project"
+        cwd.mkdir()
+        project.mkdir()
+        monkeypatch.chdir(cwd)
+        seed = SimpleNamespace(
+            metadata=SimpleNamespace(project_dir=str(project), working_directory=None),
+            brownfield_context=None,
+        )
+
+        with patch(
+            "ouroboros.mcp.tools.evaluation_handlers._default_brownfield_project_dir",
+            new=AsyncMock(return_value=None),
+        ):
+            resolved = await _resolve_evaluate_working_dir(None, seed)
+
+        assert resolved == project.resolve()
+
+    async def test_cwd_fallback_last(self, tmp_path: Path, monkeypatch) -> None:
+        monkeypatch.chdir(tmp_path)
+
+        with patch(
+            "ouroboros.mcp.tools.evaluation_handlers._default_brownfield_project_dir",
+            new=AsyncMock(return_value=None),
+        ):
+            resolved = await _resolve_evaluate_working_dir(None, None)
+
+        assert resolved == tmp_path.resolve()
+
+
 class TestDefinitionAcceptsMultiAC:
     """The tool schema must advertise the new acceptance_criteria parameter."""