Q00 · Q00 · Jun 20, 2026 · Jun 20, 2026 · Jun 20, 2026 · Jun 20, 2026
diff --git a/skills/evaluate/SKILL.md b/skills/evaluate/SKILL.md
@@ -82,9 +82,12 @@ fallback instead of retrying the failing call.
      seed_content: <original seed YAML, if available>
      acceptance_criterion: <specific AC to check, optional>
      artifact_type: "code"  (or "docs", "config")
+     working_dir: <absolute project root, recommended>
      trigger_consensus: false  (true if user requests Stage 3)
    ```
 
+   `working_dir` controls both Stage 1 command execution and Stage 2 source-file visibility. Pass the absolute project root whenever available; if omitted, the MCP handler falls back to the registered brownfield default, seed project metadata, then the MCP server cwd.
+
 4. Present results clearly:
    - Show each stage's pass/fail status
    - Highlight the final approval decision

diff --git a/src/ouroboros/mcp/tools/evaluation_handlers.py b/src/ouroboros/mcp/tools/evaluation_handlers.py
@@ -18,6 +18,7 @@
 
 from ouroboros.config import get_llm_backend_for_role, get_llm_model_for_role
 from ouroboros.core.errors import ValidationError
+from ouroboros.core.project_paths import resolve_path_against_base, resolve_seed_project_path
 from ouroboros.core.seed import Seed
 from ouroboros.core.types import Result
 from ouroboros.mcp.errors import MCPServerError, MCPToolError
@@ -56,6 +57,80 @@
 log = structlog.get_logger(__name__)
 
 
+async def _default_brownfield_project_dir() -> Path | None:
+    """Return the registered default brownfield project directory, if any."""
+    from ouroboros.persistence.brownfield import BrownfieldStore
+
+    store = BrownfieldStore()
+    try:
+        await store.initialize()
+        default_repo = await store.get_default()
+    except Exception as exc:  # noqa: BLE001 - fallback discovery must be best-effort
+        log.warning("mcp.tool.evaluate.brownfield_default_lookup_failed", error=str(exc))
+        return None
+    finally:
+        await store.close()
+
+    if default_repo is None or not default_repo.path:
+        return None
+
+    resolved = Path(default_repo.path).expanduser().resolve()
+    if not resolved.is_dir():
+        log.warning(
+            "mcp.tool.evaluate.brownfield_default_unusable",
+            path=str(resolved),
+        )
+        return None
+    return resolved
+
+
+def _seed_project_dir(seed: Seed | None, *, stable_base: Path) -> Path | None:
+    """Resolve a contained project directory encoded in seed metadata/context."""
+    resolution = resolve_seed_project_path(seed, stable_base=stable_base)
+    if resolution.path is None:
+        return None
+
+    resolved = resolution.path
+    if resolved.is_file():
+        return resolved.parent
+    if resolved.exists() and not resolved.is_dir():
+        return None
+    return resolved
+
+
+async def _resolve_evaluate_working_dir(
+    explicit_working_dir: str | None,
+    seed: Seed | None,
+) -> Path:
+    """Resolve the project root that gates Stage 1 and Stage 2 evaluation.
+
+    Precedence is explicit tool argument, registered brownfield default,
+    seed-declared project directory, then the MCP server cwd. The last
+    fallback preserves the historical behavior, but only after project-aware
+    sources have been exhausted.
+    """
+    stable_base = Path.cwd().resolve()
+    if explicit_working_dir:
+        resolved = resolve_path_against_base(explicit_working_dir, stable_base=stable_base)
+        if resolved is not None:
+            return resolved
+
+    brownfield_default = await _default_brownfield_project_dir()
+    if brownfield_default is not None:
+        if brownfield_default.is_dir():
+            return brownfield_default.resolve()
+        log.warning(
+            "mcp.tool.evaluate.brownfield_default_unusable",
+            path=str(brownfield_default),
+        )
+
+    seed_dir = _seed_project_dir(seed, stable_base=stable_base)
+    if seed_dir is not None:
+        return seed_dir
+
+    return stable_base
+
+
 def _evaluation_allowed_tools(runtime_backend: str | None) -> list[str]:
     """Return the policy-derived read-only tool envelope for evaluation."""
     return allowed_runtime_builtin_tool_names(
@@ -340,7 +415,8 @@ def definition(self) -> MCPToolDefinition:
                     type=ToolInputType.STRING,
                     description=(
                         "Project root used to resolve Stage 1 mechanical verification "
-                        "commands. Commands are read from .ouroboros/mechanical.toml; "
+                        "commands and Stage 2 source-file visibility. Commands are "
+                        "read from .ouroboros/mechanical.toml; "
                         "when the file is missing, the evaluator makes one AI detect "
                         "call that inspects manifests (package.json, pyproject.toml, "
                         "Cargo.toml, Makefile, ...) and authors the toml. Stage 1 "
@@ -363,8 +439,6 @@ async def handle(
         Returns:
             Result containing evaluation results or error.
         """
-        from pathlib import Path
-
         from ouroboros.evaluation import (
             EvaluationContext,
             EvaluationPipeline,
@@ -424,14 +498,34 @@ async def handle(
             trigger_consensus=trigger_consensus,
         )
 
+        # Parse seed before dispatch so working_dir fallback is available for
+        # both plugin/subagent and in-process evaluation paths.
+        goal = ""
+        constraints: tuple[str, ...] = ()
+        seed_id = session_id  # fallback
+        seed: Seed | None = None
+
+        if seed_content:
+            try:
+                seed_dict = yaml.safe_load(seed_content)
+                seed = Seed.from_dict(seed_dict)
+                goal = seed.goal
+                constraints = tuple(seed.constraints)
+                seed_id = seed.metadata.seed_id
+            except (yaml.YAMLError, ValidationError, PydanticValidationError) as e:
+                log.warning("mcp.tool.evaluate.seed_parse_warning", error=str(e))
+                # Continue without seed data - not fatal
+
+        working_dir = await _resolve_evaluate_working_dir(arguments.get("working_dir"), seed)
+
         # --- Subagent dispatch: gate on runtime + opencode_mode ---
         payload = build_evaluate_subagent(
             session_id=session_id,
             artifact=artifact,
             artifact_type=artifact_type,
             seed_content=seed_content,
             acceptance_criterion=acceptance_criterion,
-            working_dir=arguments.get("working_dir"),
+            working_dir=str(working_dir),
             trigger_consensus=trigger_consensus,
         )
         if should_dispatch_via_plugin(self.agent_runtime_backend, self.opencode_mode):
@@ -456,22 +550,6 @@ async def handle(
         owns_event_store = False
 
         try:
-            # Extract goal/constraints from seed if provided
-            goal = ""
-            constraints: tuple[str, ...] = ()
-            seed_id = session_id  # fallback
-
-            if seed_content:
-                try:
-                    seed_dict = yaml.safe_load(seed_content)
-                    seed = Seed.from_dict(seed_dict)
-                    goal = seed.goal
-                    constraints = tuple(seed.constraints)
-                    seed_id = seed.metadata.seed_id
-                except (yaml.YAMLError, ValidationError, PydanticValidationError) as e:
-                    log.warning("mcp.tool.evaluate.seed_parse_warning", error=str(e))
-                    # Continue without seed data - not fatal
-
             # Try to enrich from session repository if event_store available
             if not goal:
                 if store is None:
@@ -508,8 +586,6 @@ async def handle(
                 allowed_tools=_evaluation_allowed_tools(backend),
                 max_turns=20,
             )
-            working_dir_str = arguments.get("working_dir")
-            working_dir = Path(working_dir_str).resolve() if working_dir_str else Path.cwd()
             log.info(
                 "mcp.tool.evaluate.started",
                 session_id=session_id,
@@ -1786,13 +1862,27 @@ async def handle(
             else:
                 ac_for_payload = None
 
+            seed: Seed | None = None
+            seed_content = arguments.get("seed_content")
+            if seed_content:
+                try:
+                    seed_dict = yaml.safe_load(seed_content)
+                    seed = Seed.from_dict(seed_dict)
+                except (yaml.YAMLError, ValidationError, PydanticValidationError) as e:
+                    log.warning("mcp.tool.start_evaluate.seed_parse_warning", error=str(e))
+
+            working_dir = await _resolve_evaluate_working_dir(
+                arguments.get("working_dir"),
+                seed,
+            )
+
             payload = build_evaluate_subagent(
                 session_id=session_id,
                 artifact=artifact,
                 artifact_type=arguments.get("artifact_type", "code"),
-                seed_content=arguments.get("seed_content"),
+                seed_content=seed_content,
                 acceptance_criterion=ac_for_payload,
-                working_dir=arguments.get("working_dir"),
+                working_dir=str(working_dir),
                 trigger_consensus=arguments.get("trigger_consensus", False),
             )
             return await dispatch_plugin_terminal(

diff --git a/tests/unit/mcp/tools/test_evaluate_multi_ac.py b/tests/unit/mcp/tools/test_evaluate_multi_ac.py
@@ -8,6 +8,8 @@
 
 from __future__ import annotations
 
+from pathlib import Path
+from types import SimpleNamespace
 from unittest.mock import AsyncMock, MagicMock, patch
 
 import pytest
@@ -20,7 +22,7 @@
     MechanicalResult,
     SemanticResult,
 )
-from ouroboros.mcp.tools.evaluation_handlers import EvaluateHandler
+from ouroboros.mcp.tools.evaluation_handlers import EvaluateHandler, _resolve_evaluate_working_dir
 from ouroboros.mcp.types import ToolInputType
 
 
@@ -72,6 +74,155 @@ def _failing_eval(execution_id: str, *, reason: str) -> EvaluationResult:
     )
 
 
+class TestEvaluateWorkingDirResolution:
+    """Working dir fallback keeps Stage 2 pointed at the project root."""
+
+    async def test_explicit_working_dir_wins(self, tmp_path: Path) -> None:
+        explicit = tmp_path / "project"
+        explicit.mkdir()
+
+        with patch(
+            "ouroboros.mcp.tools.evaluation_handlers._default_brownfield_project_dir",
+            new=AsyncMock(return_value=tmp_path / "default"),
+        ):
+            resolved = await _resolve_evaluate_working_dir(str(explicit), None)
+
+        assert resolved == explicit.resolve()
+
+    async def test_brownfield_default_used_before_cwd(self, tmp_path: Path, monkeypatch) -> None:
+        cwd = tmp_path / "hermes"
+        default = tmp_path / "repo"
+        cwd.mkdir()
+        default.mkdir()
+        monkeypatch.chdir(cwd)
+
+        with patch(
+            "ouroboros.mcp.tools.evaluation_handlers._default_brownfield_project_dir",
+            new=AsyncMock(return_value=default),
+        ):
+            resolved = await _resolve_evaluate_working_dir(None, None)
+
+        assert resolved == default.resolve()
+
+    async def test_seed_metadata_used_when_no_default(self, tmp_path: Path, monkeypatch) -> None:
+        cwd = tmp_path / "hermes"
+        project = cwd / "project"
+        project.mkdir(parents=True)
+        monkeypatch.chdir(cwd)
+        seed = SimpleNamespace(
+            metadata=SimpleNamespace(project_dir="project", working_directory=None),
+            brownfield_context=None,
+        )
+
+        with patch(
+            "ouroboros.mcp.tools.evaluation_handlers._default_brownfield_project_dir",
+            new=AsyncMock(return_value=None),
+        ):
+            resolved = await _resolve_evaluate_working_dir(None, seed)
+
+        assert resolved == project.resolve()
+
+    async def test_brownfield_default_wins_over_seed_metadata(
+        self, tmp_path: Path, monkeypatch
+    ) -> None:
+        cwd = tmp_path / "hermes"
+        default = tmp_path / "repo-default"
+        seed_project = cwd / "seed-project"
+        cwd.mkdir()
+        default.mkdir()
+        seed_project.mkdir()
+        monkeypatch.chdir(cwd)
+        seed = SimpleNamespace(
+            metadata=SimpleNamespace(project_dir="seed-project", working_directory=None),
+            brownfield_context=None,
+        )
+
+        with patch(
+            "ouroboros.mcp.tools.evaluation_handlers._default_brownfield_project_dir",
+            new=AsyncMock(return_value=default),
+        ):
+            resolved = await _resolve_evaluate_working_dir(None, seed)
+
+        assert resolved == default.resolve()
+
+    async def test_stale_brownfield_default_falls_back_to_seed(
+        self, tmp_path: Path, monkeypatch
+    ) -> None:
+        cwd = tmp_path / "hermes"
+        stale_default = tmp_path / "missing-default"
+        seed_project = cwd / "seed-project"
+        cwd.mkdir()
+        seed_project.mkdir()
+        monkeypatch.chdir(cwd)
+        seed = SimpleNamespace(
+            metadata=SimpleNamespace(project_dir="seed-project", working_directory=None),
+            brownfield_context=None,
+        )
+
+        with patch(
+            "ouroboros.mcp.tools.evaluation_handlers._default_brownfield_project_dir",
+            new=AsyncMock(return_value=stale_default),
+        ):
+            resolved = await _resolve_evaluate_working_dir(None, seed)
+
+        assert resolved == seed_project.resolve()
+
+    async def test_non_directory_brownfield_default_falls_back_to_seed(
+        self, tmp_path: Path, monkeypatch
+    ) -> None:
+        cwd = tmp_path / "hermes"
+        file_default = tmp_path / "default-file"
+        seed_project = cwd / "seed-project"
+        cwd.mkdir()
+        file_default.write_text("not a directory")
+        seed_project.mkdir()
+        monkeypatch.chdir(cwd)
+        seed = SimpleNamespace(
+            metadata=SimpleNamespace(project_dir="seed-project", working_directory=None),
+            brownfield_context=None,
+        )
+
+        with patch(
+            "ouroboros.mcp.tools.evaluation_handlers._default_brownfield_project_dir",
+            new=AsyncMock(return_value=file_default),
+        ):
+            resolved = await _resolve_evaluate_working_dir(None, seed)
+
+        assert resolved == seed_project.resolve()
+
+    async def test_seed_metadata_escape_falls_back_to_cwd(
+        self, tmp_path: Path, monkeypatch
+    ) -> None:
+        cwd = tmp_path / "hermes"
+        outside = tmp_path / "outside"
+        cwd.mkdir()
+        outside.mkdir()
+        monkeypatch.chdir(cwd)
+        seed = SimpleNamespace(
+            metadata=SimpleNamespace(project_dir=str(outside), working_directory=None),
+            brownfield_context=None,
+        )
+
+        with patch(
+            "ouroboros.mcp.tools.evaluation_handlers._default_brownfield_project_dir",
+            new=AsyncMock(return_value=None),
+        ):
+            resolved = await _resolve_evaluate_working_dir(None, seed)
+
+        assert resolved == cwd.resolve()
+
+    async def test_cwd_fallback_last(self, tmp_path: Path, monkeypatch) -> None:
+        monkeypatch.chdir(tmp_path)
+
+        with patch(
+            "ouroboros.mcp.tools.evaluation_handlers._default_brownfield_project_dir",
+            new=AsyncMock(return_value=None),
+        ):
+            resolved = await _resolve_evaluate_working_dir(None, None)
+
+        assert resolved == tmp_path.resolve()
+
+
 class TestDefinitionAcceptsMultiAC:
     """The tool schema must advertise the new acceptance_criteria parameter."""