Skip to content

Commit b4e0f24

Browse files
committed
fix(evaluate): resolve project working dir fallback
1 parent b62b565 commit b4e0f24

3 files changed

Lines changed: 178 additions & 23 deletions

File tree

skills/evaluate/SKILL.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,9 +82,12 @@ fallback instead of retrying the failing call.
8282
seed_content: <original seed YAML, if available>
8383
acceptance_criterion: <specific AC to check, optional>
8484
artifact_type: "code" (or "docs", "config")
85+
working_dir: <absolute project root, recommended>
8586
trigger_consensus: false (true if user requests Stage 3)
8687
```
8788

89+
`working_dir` controls both Stage 1 command execution and Stage 2 source-file visibility. Pass the absolute project root whenever available; if omitted, the MCP handler falls back to the registered brownfield default, seed project metadata, then the MCP server cwd.
90+
8891
4. Present results clearly:
8992
- Show each stage's pass/fail status
9093
- Highlight the final approval decision

src/ouroboros/mcp/tools/evaluation_handlers.py

Lines changed: 111 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818

1919
from ouroboros.config import get_llm_backend_for_role, get_llm_model_for_role
2020
from ouroboros.core.errors import ValidationError
21+
from ouroboros.core.project_paths import resolve_path_against_base
2122
from ouroboros.core.seed import Seed
2223
from ouroboros.core.types import Result
2324
from ouroboros.mcp.errors import MCPServerError, MCPToolError
@@ -56,6 +57,93 @@
5657
log = structlog.get_logger(__name__)
5758

5859

60+
async def _default_brownfield_project_dir() -> Path | None:
61+
"""Return the registered default brownfield project directory, if any."""
62+
from ouroboros.persistence.brownfield import BrownfieldStore
63+
64+
store = BrownfieldStore()
65+
try:
66+
await store.initialize()
67+
default_repo = await store.get_default()
68+
except Exception as exc: # noqa: BLE001 - fallback discovery must be best-effort
69+
log.warning("mcp.tool.evaluate.brownfield_default_lookup_failed", error=str(exc))
70+
return None
71+
finally:
72+
await store.close()
73+
74+
if default_repo is None or not default_repo.path:
75+
return None
76+
return Path(default_repo.path).expanduser().resolve()
77+
78+
79+
def _seed_project_dir(seed: Seed | None, *, stable_base: Path) -> Path | None:
80+
"""Resolve a project directory encoded in seed metadata/brownfield context."""
81+
if seed is None:
82+
return None
83+
84+
candidates: list[str] = []
85+
seed_meta = getattr(seed, "metadata", None)
86+
if seed_meta is not None:
87+
project_dir = getattr(seed_meta, "project_dir", None) or getattr(
88+
seed_meta,
89+
"working_directory",
90+
None,
91+
)
92+
if isinstance(project_dir, str) and project_dir:
93+
candidates.append(project_dir)
94+
95+
brownfield_context = getattr(seed, "brownfield_context", None)
96+
context_references = getattr(brownfield_context, "context_references", ()) or ()
97+
for preferred_role in ("primary", None):
98+
for reference in context_references:
99+
path = getattr(reference, "path", None)
100+
role = getattr(reference, "role", None)
101+
if not isinstance(path, str) or not path or path in candidates:
102+
continue
103+
if preferred_role is None or role == preferred_role:
104+
candidates.append(path)
105+
106+
for candidate in candidates:
107+
resolved = resolve_path_against_base(candidate, stable_base=stable_base)
108+
if resolved is None:
109+
continue
110+
if resolved.is_file():
111+
return resolved.parent
112+
if resolved.exists() and not resolved.is_dir():
113+
continue
114+
return resolved
115+
116+
return None
117+
118+
119+
async def _resolve_evaluate_working_dir(
120+
explicit_working_dir: str | None,
121+
seed: Seed | None,
122+
) -> Path:
123+
"""Resolve the project root that gates Stage 1 and Stage 2 evaluation.
124+
125+
Precedence is explicit tool argument, registered brownfield default,
126+
seed-declared project directory, then the MCP server cwd. The last
127+
fallback preserves the historical behavior, but only after project-aware
128+
sources have been exhausted.
129+
"""
130+
stable_base = Path.cwd().resolve()
131+
if explicit_working_dir:
132+
resolved = resolve_path_against_base(explicit_working_dir, stable_base=stable_base)
133+
if resolved is not None:
134+
return resolved
135+
136+
brownfield_default = await _default_brownfield_project_dir()
137+
if brownfield_default is not None:
138+
return brownfield_default
139+
140+
seed_dir = _seed_project_dir(seed, stable_base=stable_base)
141+
if seed_dir is not None:
142+
return seed_dir
143+
144+
return stable_base
145+
146+
59147
def _evaluation_allowed_tools(runtime_backend: str | None) -> list[str]:
60148
"""Return the policy-derived read-only tool envelope for evaluation."""
61149
return allowed_runtime_builtin_tool_names(
@@ -340,7 +428,8 @@ def definition(self) -> MCPToolDefinition:
340428
type=ToolInputType.STRING,
341429
description=(
342430
"Project root used to resolve Stage 1 mechanical verification "
343-
"commands. Commands are read from .ouroboros/mechanical.toml; "
431+
"commands and Stage 2 source-file visibility. Commands are "
432+
"read from .ouroboros/mechanical.toml; "
344433
"when the file is missing, the evaluator makes one AI detect "
345434
"call that inspects manifests (package.json, pyproject.toml, "
346435
"Cargo.toml, Makefile, ...) and authors the toml. Stage 1 "
@@ -363,8 +452,6 @@ async def handle(
363452
Returns:
364453
Result containing evaluation results or error.
365454
"""
366-
from pathlib import Path
367-
368455
from ouroboros.evaluation import (
369456
EvaluationContext,
370457
EvaluationPipeline,
@@ -424,14 +511,34 @@ async def handle(
424511
trigger_consensus=trigger_consensus,
425512
)
426513

514+
# Parse seed before dispatch so working_dir fallback is available for
515+
# both plugin/subagent and in-process evaluation paths.
516+
goal = ""
517+
constraints: tuple[str, ...] = ()
518+
seed_id = session_id # fallback
519+
seed: Seed | None = None
520+
521+
if seed_content:
522+
try:
523+
seed_dict = yaml.safe_load(seed_content)
524+
seed = Seed.from_dict(seed_dict)
525+
goal = seed.goal
526+
constraints = tuple(seed.constraints)
527+
seed_id = seed.metadata.seed_id
528+
except (yaml.YAMLError, ValidationError, PydanticValidationError) as e:
529+
log.warning("mcp.tool.evaluate.seed_parse_warning", error=str(e))
530+
# Continue without seed data - not fatal
531+
532+
working_dir = await _resolve_evaluate_working_dir(arguments.get("working_dir"), seed)
533+
427534
# --- Subagent dispatch: gate on runtime + opencode_mode ---
428535
payload = build_evaluate_subagent(
429536
session_id=session_id,
430537
artifact=artifact,
431538
artifact_type=artifact_type,
432539
seed_content=seed_content,
433540
acceptance_criterion=acceptance_criterion,
434-
working_dir=arguments.get("working_dir"),
541+
working_dir=str(working_dir),
435542
trigger_consensus=trigger_consensus,
436543
)
437544
if should_dispatch_via_plugin(self.agent_runtime_backend, self.opencode_mode):
@@ -456,22 +563,6 @@ async def handle(
456563
owns_event_store = False
457564

458565
try:
459-
# Extract goal/constraints from seed if provided
460-
goal = ""
461-
constraints: tuple[str, ...] = ()
462-
seed_id = session_id # fallback
463-
464-
if seed_content:
465-
try:
466-
seed_dict = yaml.safe_load(seed_content)
467-
seed = Seed.from_dict(seed_dict)
468-
goal = seed.goal
469-
constraints = tuple(seed.constraints)
470-
seed_id = seed.metadata.seed_id
471-
except (yaml.YAMLError, ValidationError, PydanticValidationError) as e:
472-
log.warning("mcp.tool.evaluate.seed_parse_warning", error=str(e))
473-
# Continue without seed data - not fatal
474-
475566
# Try to enrich from session repository if event_store available
476567
if not goal:
477568
if store is None:
@@ -508,8 +599,6 @@ async def handle(
508599
allowed_tools=_evaluation_allowed_tools(backend),
509600
max_turns=20,
510601
)
511-
working_dir_str = arguments.get("working_dir")
512-
working_dir = Path(working_dir_str).resolve() if working_dir_str else Path.cwd()
513602
log.info(
514603
"mcp.tool.evaluate.started",
515604
session_id=session_id,

tests/unit/mcp/tools/test_evaluate_multi_ac.py

Lines changed: 64 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,8 @@
88

99
from __future__ import annotations
1010

11+
from pathlib import Path
12+
from types import SimpleNamespace
1113
from unittest.mock import AsyncMock, MagicMock, patch
1214

1315
import pytest
@@ -20,7 +22,7 @@
2022
MechanicalResult,
2123
SemanticResult,
2224
)
23-
from ouroboros.mcp.tools.evaluation_handlers import EvaluateHandler
25+
from ouroboros.mcp.tools.evaluation_handlers import EvaluateHandler, _resolve_evaluate_working_dir
2426
from ouroboros.mcp.types import ToolInputType
2527

2628

@@ -72,6 +74,67 @@ def _failing_eval(execution_id: str, *, reason: str) -> EvaluationResult:
7274
)
7375

7476

77+
class TestEvaluateWorkingDirResolution:
78+
"""Working dir fallback keeps Stage 2 pointed at the project root."""
79+
80+
async def test_explicit_working_dir_wins(self, tmp_path: Path) -> None:
81+
explicit = tmp_path / "project"
82+
explicit.mkdir()
83+
84+
with patch(
85+
"ouroboros.mcp.tools.evaluation_handlers._default_brownfield_project_dir",
86+
new=AsyncMock(return_value=tmp_path / "default"),
87+
):
88+
resolved = await _resolve_evaluate_working_dir(str(explicit), None)
89+
90+
assert resolved == explicit.resolve()
91+
92+
async def test_brownfield_default_used_before_cwd(self, tmp_path: Path, monkeypatch) -> None:
93+
cwd = tmp_path / "hermes"
94+
default = tmp_path / "repo"
95+
cwd.mkdir()
96+
default.mkdir()
97+
monkeypatch.chdir(cwd)
98+
99+
with patch(
100+
"ouroboros.mcp.tools.evaluation_handlers._default_brownfield_project_dir",
101+
new=AsyncMock(return_value=default),
102+
):
103+
resolved = await _resolve_evaluate_working_dir(None, None)
104+
105+
assert resolved == default.resolve()
106+
107+
async def test_seed_metadata_used_when_no_default(self, tmp_path: Path, monkeypatch) -> None:
108+
cwd = tmp_path / "hermes"
109+
project = tmp_path / "project"
110+
cwd.mkdir()
111+
project.mkdir()
112+
monkeypatch.chdir(cwd)
113+
seed = SimpleNamespace(
114+
metadata=SimpleNamespace(project_dir=str(project), working_directory=None),
115+
brownfield_context=None,
116+
)
117+
118+
with patch(
119+
"ouroboros.mcp.tools.evaluation_handlers._default_brownfield_project_dir",
120+
new=AsyncMock(return_value=None),
121+
):
122+
resolved = await _resolve_evaluate_working_dir(None, seed)
123+
124+
assert resolved == project.resolve()
125+
126+
async def test_cwd_fallback_last(self, tmp_path: Path, monkeypatch) -> None:
127+
monkeypatch.chdir(tmp_path)
128+
129+
with patch(
130+
"ouroboros.mcp.tools.evaluation_handlers._default_brownfield_project_dir",
131+
new=AsyncMock(return_value=None),
132+
):
133+
resolved = await _resolve_evaluate_working_dir(None, None)
134+
135+
assert resolved == tmp_path.resolve()
136+
137+
75138
class TestDefinitionAcceptsMultiAC:
76139
"""The tool schema must advertise the new acceptance_criteria parameter."""
77140

0 commit comments

Comments
 (0)