diff --git a/src/fast_agent/agents/mcp_agent.py b/src/fast_agent/agents/mcp_agent.py index 640249c96..cedb774f0 100644 --- a/src/fast_agent/agents/mcp_agent.py +++ b/src/fast_agent/agents/mcp_agent.py @@ -72,6 +72,10 @@ NamespacedTool, ServerStatus, ) +from fast_agent.mcp.mcp_skills_loader import ( + load_mcp_skill_manifests, + merge_filesystem_and_mcp_manifests, +) from fast_agent.mcp.provider_management import ( ProviderManagedMCPState, build_provider_managed_mcp_state, @@ -229,15 +233,16 @@ def __init__( else: self._shell_runtime_activation_reason = "via " + " and ".join(reasons) - # Derive skills directory from this agent's manifests (respects per-agent config) + # Derive skills directory from this agent's manifests (respects per-agent config). + # URI-backed (Skills-over-MCP) manifests have no filesystem path, so pick + # the first filesystem-backed manifest rather than indexing [0] blindly. skills_directory = None - if self._skill_manifests: - # Get the skills directory from the first manifest's path - # Path structure: /skills/skill-name/SKILL.md - # So we need parent.parent of the manifest path - first_manifest = self._skill_manifests[0] - if first_manifest.path: - skills_directory = first_manifest.path.parent.parent + first_fs_manifest = next( + (m for m in self._skill_manifests if m.path is not None), None + ) + if first_fs_manifest is not None: + # Path structure: /skills/skill-name/SKILL.md -> parent.parent + skills_directory = first_fs_manifest.path.parent.parent self._shell_access_modes: tuple[str, ...] = () if self._shell_runtime_activation_reason is not None: @@ -316,6 +321,11 @@ async def initialize(self) -> None: """ await self.__aenter__() + # Discover Skills-over-MCP skills from connected servers and merge + # them with any filesystem manifests before the instruction template + # is built (so the frontmatter lands in {{agentSkills}}). + await self._load_mcp_skill_manifests() + # Apply template substitution to the instruction with server instructions await self._apply_instruction_templates() @@ -492,7 +502,9 @@ def has_filesystem_read_text_file_tool(self) -> bool: @property def skill_read_tool_name(self) -> str: - """Return the tool name that should be referenced for reading skill content.""" + """Tool the model uses to read skill content. Forced to ``read_skill`` when any manifest is URI-backed (only it accepts both filesystem paths and resource URIs).""" + if any(manifest.uri for manifest in self._skill_manifests): + return "read_skill" return "read_text_file" if self.has_filesystem_read_text_file_tool else "read_skill" @property @@ -583,11 +595,68 @@ def _warn_if_invalid_shell_working_directory(self, working_directory: Path | Non surface="startup_once", ) + async def _load_mcp_skill_manifests(self) -> None: + """Fetch skills served by connected MCP servers per the Skills-over-MCP SEP. + + Merges discovered MCP manifests with any pre-existing filesystem + manifests and updates the skill reader. On name collision, the + filesystem manifest wins (consistent with SkillRegistry dedup). + Disabled per-server via MCPServerSettings.mcp_skills. + """ + server_names = tuple(self._aggregator.server_names or ()) + if not server_names: + return + + # Collect per-server opt-out from config (default: enabled). + enabled_servers: set[str] | None = None + if self._context and self._context.config and self._context.config.mcp: + server_settings = self._context.config.mcp.servers or {} + enabled_servers = { + name + for name in server_names + if getattr(server_settings.get(name), "mcp_skills", True) + } + if not enabled_servers: + return + + try: + mcp_manifests = await load_mcp_skill_manifests( + self._aggregator, + server_names, + enabled_servers=enabled_servers, + ) + except Exception as exc: + # Discovery must not break agent startup. + self.logger.error( + "Failed to load MCP skills", + data={"error": str(exc)}, + exc_info=True, + ) + return + + if not mcp_manifests: + return + + merged, warnings = merge_filesystem_and_mcp_manifests( + self._skill_manifests, mcp_manifests + ) + for message in warnings: + self._record_warning(f"[dim]{message}[/dim]", surface="startup_once") + + self.set_skill_manifests(merged) + def set_skill_manifests(self, manifests: Sequence[SkillManifest]) -> None: self._skill_manifests = list(manifests) self._skill_map = {manifest.name: manifest for manifest in self._skill_manifests} if self._skill_manifests: - self._skill_reader = SkillReader(self._skill_manifests, self.logger) + # The aggregator is only needed when any manifest is URI-backed + # (Skills-over-MCP), but passing it unconditionally is cheap and + # keeps the reader uniform. + self._skill_reader = SkillReader( + self._skill_manifests, + self.logger, + aggregator=self._aggregator, + ) self._ensure_shell_runtime_for_skills() else: self._skill_reader = None @@ -598,12 +667,14 @@ def _ensure_shell_runtime_for_skills(self) -> None: if self._external_runtime is not None: return - # Derive skills directory from manifests (respects per-agent config) + # Derive skills directory from manifests (respects per-agent config). + # Skip URI-backed manifests — they have no filesystem root to anchor the shell runtime. skills_directory = None - if self._skill_manifests: - first_manifest = self._skill_manifests[0] - if first_manifest.path: - skills_directory = first_manifest.path.parent.parent + first_fs_manifest = next( + (m for m in self._skill_manifests if m.path is not None), None + ) + if first_fs_manifest is not None: + skills_directory = first_fs_manifest.path.parent.parent self._activate_shell_runtime( "because agent skills are configured", diff --git a/src/fast_agent/config.py b/src/fast_agent/config.py index 3364c1f62..5e7af58e6 100644 --- a/src/fast_agent/config.py +++ b/src/fast_agent/config.py @@ -380,6 +380,14 @@ class MCPServerSettings(BaseModel): include_instructions: bool = True """Whether to include this server's instructions in the system prompt (default: True).""" + mcp_skills: bool = True + """Whether to discover and load Skills-over-MCP (io.modelcontextprotocol/skills) + skills from this server (default: True). Set False to suppress reading + `skill://index.json` and its entries from this server. Independent of + `include_instructions`: disabling server instructions does not suppress + Skills-over-MCP discovery — set `mcp_skills: false` separately if you + want that too.""" + reconnect_on_disconnect: bool = True """Whether to automatically reconnect when the server session is terminated (e.g., 404). diff --git a/src/fast_agent/mcp/mcp_skills_loader.py b/src/fast_agent/mcp/mcp_skills_loader.py new file mode 100644 index 000000000..365590980 --- /dev/null +++ b/src/fast_agent/mcp/mcp_skills_loader.py @@ -0,0 +1,292 @@ +"""Load skills served by connected MCP servers per the Skills-over-MCP SEP. + +Implements the `io.modelcontextprotocol/skills` extension: fetches the +well-known `skill://index.json` resource from each connected server, parses +its entries, and builds `SkillManifest` objects backed by the URIs listed +in the index. Entry URIs may use any scheme (`skill://` is the SEP's +default but servers MAY use `github://`, `repo://`, etc.). + +SEP: https://github.com/modelcontextprotocol/experimental-ext-skills/pull/69 +""" + +from __future__ import annotations + +import json +from typing import TYPE_CHECKING, Iterable, Sequence + +from mcp.types import TextResourceContents + +from fast_agent.core.logging.logger import get_logger +from fast_agent.mcp.skill_uri import skill_name_from_uri +from fast_agent.skills.registry import SkillManifest, SkillRegistry + +if TYPE_CHECKING: + from fast_agent.mcp.mcp_aggregator import MCPAggregator + +logger = get_logger(__name__) + +INDEX_URI = "skill://index.json" + +# Soft ceilings on server-returned text to keep a hostile or misbehaving +# server from pinning megabytes of memory per discovery pass. An index +# listing thousands of skills still fits comfortably under 1MB; a single +# SKILL.md is conventionally a short instruction document. +MAX_INDEX_BYTES = 1_048_576 # 1 MiB +MAX_SKILL_MD_BYTES = 262_144 # 256 KiB + + +def merge_filesystem_and_mcp_manifests( + filesystem_manifests: Sequence[SkillManifest], + mcp_manifests: Sequence[SkillManifest], +) -> tuple[list[SkillManifest], list[str]]: + """Merge MCP-discovered manifests into the filesystem set. + + Filesystem manifests win on name collision (consistent with + `SkillRegistry` dedup semantics). Within the MCP batch, the first + manifest with a given name wins; later ones are hidden with a + warning. Returns the merged list and a list of human-readable + warnings for hidden manifests. + """ + filesystem_names = {m.name.lower() for m in filesystem_manifests} + mcp_winner_by_name: dict[str, str | None] = {} + merged: list[SkillManifest] = list(filesystem_manifests) + warnings: list[str] = [] + for mcp_manifest in mcp_manifests: + key = mcp_manifest.name.lower() + if key in filesystem_names: + warnings.append( + f"MCP-served skill '{mcp_manifest.name}' from server " + f"'{mcp_manifest.server_name}' hidden by local filesystem skill." + ) + continue + if key in mcp_winner_by_name: + winner = mcp_winner_by_name[key] or "" + warnings.append( + f"MCP-served skill '{mcp_manifest.name}' from server " + f"'{mcp_manifest.server_name}' hidden by an earlier MCP-served " + f"skill of the same name from server '{winner}'." + ) + continue + merged.append(mcp_manifest) + mcp_winner_by_name[key] = mcp_manifest.server_name + return merged, warnings + + +async def load_mcp_skill_manifests( + aggregator: "MCPAggregator", + server_names: Sequence[str], + *, + enabled_servers: set[str] | None = None, +) -> list[SkillManifest]: + """Fetch and parse skill manifests from connected MCP servers. + + For each server, reads `skill://index.json` (optional; missing index is + silently skipped), then fetches each listed `SKILL.md` resource and parses + its frontmatter. Returns one `SkillManifest` per concrete `skill-md` index + entry. `mcp-resource-template` entries are logged and skipped (future work). + + Errors from a single server or entry are logged as warnings; a failure + never aborts the whole batch. + """ + + manifests: list[SkillManifest] = [] + for server_name in server_names: + if enabled_servers is not None and server_name not in enabled_servers: + logger.debug( + "MCP skill discovery disabled for server", + data={"server": server_name}, + ) + continue + + entries = await _read_index(aggregator, server_name) + if not entries: + continue + + for entry in entries: + entry_type = entry.get("type") + if entry_type == "mcp-resource-template": + logger.debug( + "Skipping MCP skill template entry (not yet supported)", + data={ + "server": server_name, + "url": entry.get("url"), + }, + ) + continue + if entry_type != "skill-md": + logger.debug( + "Skipping MCP skill entry with unrecognized type", + data={"server": server_name, "type": entry_type}, + ) + continue + manifest = await _load_concrete_entry(aggregator, server_name, entry) + if manifest is not None: + manifests.append(manifest) + + return manifests + + +async def _read_index( + aggregator: "MCPAggregator", server_name: str +) -> list[dict] | None: + """Read and parse `skill://index.json` from a server; returns None if absent.""" + try: + result = await aggregator.get_resource(INDEX_URI, server_name=server_name) + except Exception as exc: + # The SEP treats the index as optional. Absence / lack of resources + # support / network error all fall through to "no indexed skills". + logger.debug( + "No skill index from server", + data={"server": server_name, "error": str(exc)}, + ) + return None + + text = _first_text(result.contents) + if not text: + logger.warning( + "Skill index has no text content", + data={"server": server_name}, + ) + return None + + byte_len = len(text.encode("utf-8")) + if byte_len > MAX_INDEX_BYTES: + logger.warning( + "Skill index exceeds size limit; refusing to parse", + data={ + "server": server_name, + "bytes": byte_len, + "limit": MAX_INDEX_BYTES, + }, + ) + return None + + try: + parsed = json.loads(text) + except Exception as exc: + logger.warning( + "Failed to parse skill index JSON", + data={"server": server_name, "error": str(exc)}, + ) + return None + + skills = parsed.get("skills") if isinstance(parsed, dict) else None + if not isinstance(skills, list): + logger.warning( + "Skill index missing `skills` array", + data={"server": server_name, "top_level_type": type(parsed).__name__}, + ) + return None + return [entry for entry in skills if isinstance(entry, dict)] + + +async def _load_concrete_entry( + aggregator: "MCPAggregator", + server_name: str, + entry: dict, +) -> SkillManifest | None: + url = entry.get("url") + if not isinstance(url, str) or not url: + logger.warning( + "Skill entry missing `url`", + data={"server": server_name, "entry": entry}, + ) + return None + + # Reject `file://` skill URIs: the trust model for Skills-over-MCP is + # "the MCP server is the authority for content under its published + # roots". A `file://` root collapses that to "whatever the server's + # process can read from disk" — something the user thought they were + # delegating through MCP ends up reading the local filesystem without + # the usual ACP / filesystem-runtime path guardrails. Keep the root + # out of the reader's allow-list entirely. + if url.lower().startswith("file://"): + logger.warning( + "Rejecting `file://` skill URI: not allowed for Skills-over-MCP", + data={"server": server_name, "url": url}, + ) + return None + + try: + result = await aggregator.get_resource(url, server_name=server_name) + except Exception as exc: + logger.warning( + "Failed to read MCP skill SKILL.md", + data={"server": server_name, "url": url, "error": str(exc)}, + ) + return None + + text = _first_text(result.contents) + if not text: + logger.warning( + "MCP skill SKILL.md has no text content", + data={"server": server_name, "url": url}, + ) + return None + + byte_len = len(text.encode("utf-8")) + if byte_len > MAX_SKILL_MD_BYTES: + logger.warning( + "MCP skill SKILL.md exceeds size limit; refusing to parse", + data={ + "server": server_name, + "url": url, + "bytes": byte_len, + "limit": MAX_SKILL_MD_BYTES, + }, + ) + return None + + manifest, parse_error = SkillRegistry.parse_manifest_text(text) + if manifest is None: + logger.warning( + "Failed to parse MCP skill frontmatter", + data={"server": server_name, "url": url, "error": parse_error}, + ) + return None + + # SEP: the final segment of MUST equal the frontmatter name. + # A URI with no skill-path segment (e.g. `skill://SKILL.md`) is rejected + # outright: stripping `/SKILL.md` would yield `skill:/`, which the reader + # would seed into its allowed-roots set and then admit every `skill://...` + # URI via the `startswith(root + "/")` check — the trust boundary must + # not rely on server-published URIs being well-formed. + url_name = skill_name_from_uri(url) + if not url_name: + logger.warning( + "Skill entry URI has no path segment before `/SKILL.md`; refusing to register", + data={"server": server_name, "url": url}, + ) + return None + if url_name != manifest.name: + # If index or URI disagree with the frontmatter, frontmatter wins — the + # spec says the skill's identity is its `name` field — but log it. + logger.warning( + "MCP skill URI final segment differs from frontmatter name", + data={ + "server": server_name, + "url": url, + "url_name": url_name, + "frontmatter_name": manifest.name, + }, + ) + + return SkillManifest( + name=manifest.name, + description=manifest.description, + body=manifest.body, + path=None, + license=manifest.license, + compatibility=manifest.compatibility, + metadata=manifest.metadata, + allowed_tools=manifest.allowed_tools, + uri=url, + server_name=server_name, + ) + + +def _first_text(contents: Iterable) -> str | None: + for item in contents: + if isinstance(item, TextResourceContents): + return item.text + return None diff --git a/src/fast_agent/mcp/skill_uri.py b/src/fast_agent/mcp/skill_uri.py new file mode 100644 index 000000000..08fa5cf23 --- /dev/null +++ b/src/fast_agent/mcp/skill_uri.py @@ -0,0 +1,47 @@ +"""Shared helpers for Skills-over-MCP resource URIs. + +Centralizes the `/SKILL.md` suffix handling used by the discovery loader, +the skill registry (prompt formatting), and the read-tool dispatcher. +Kept scheme-agnostic per the SEP — `skill://` is the default but any +scheme listed in `skill://index.json` is valid. +""" + +from __future__ import annotations + +SKILL_MD_SUFFIX = "/SKILL.md" + + +def strip_skill_md(uri: str) -> str: + """Return the skill root URI — the SKILL.md URI minus `/SKILL.md`. + + Used both to derive the `` URI for prompt formatting and + the root prefix for the read-tool trust boundary. Returns the URI + unchanged if the suffix is absent. Tolerates a buggy trailing slash + (`.../SKILL.md/`) so a misbehaving server can't seed an oversized + root into the reader's allow-list. + """ + trimmed = uri.rstrip("/") + if trimmed.endswith(SKILL_MD_SUFFIX): + return trimmed[: -len(SKILL_MD_SUFFIX)] + return uri + + +def skill_name_from_uri(uri: str) -> str | None: + """Return the final `` segment from a `://.../SKILL.md` URI. + + `skill://git-workflow/SKILL.md` yields `git-workflow`; + `github://owner/repo/skills/refunds/SKILL.md` yields `refunds`. + Returns None if the URI doesn't end in `/SKILL.md` or has no + skill-path segment before the suffix (e.g. `skill://SKILL.md`). + """ + trimmed = uri.rstrip("/") + if not trimmed.endswith(SKILL_MD_SUFFIX): + return None + stem = trimmed[: -len(SKILL_MD_SUFFIX)] + scheme_sep = stem.find("://") + if scheme_sep != -1: + stem = stem[scheme_sep + 3 :] + if "/" in stem: + tail = stem.rsplit("/", 1)[-1] + return tail or None + return stem or None diff --git a/src/fast_agent/skills/registry.py b/src/fast_agent/skills/registry.py index faa1a1d17..449e43882 100644 --- a/src/fast_agent/skills/registry.py +++ b/src/fast_agent/skills/registry.py @@ -7,6 +7,7 @@ import frontmatter from fast_agent.core.logging.logger import get_logger +from fast_agent.mcp.skill_uri import strip_skill_md from fast_agent.paths import default_skill_paths logger = get_logger(__name__) @@ -14,17 +15,38 @@ @dataclass(frozen=True) class SkillManifest: - """Represents a single skill description loaded from SKILL.md.""" + """Represents a single skill description loaded from SKILL.md. + + A manifest is backed by either a local filesystem ``path`` or, when + served by an MCP server per the Skills-over-MCP SEP, a resource ``uri`` + (any scheme — `skill://` is the SEP default but `github://`, `repo://`, + etc. are valid). Exactly one backing must be set; URI-backed manifests + additionally require ``server_name`` so the reader can route reads to + the publishing server rather than falling through to whichever + connected server happens to answer. + """ name: str description: str body: str - path: Path # Absolute path to SKILL.md - # Optional fields from the Agent Skills specification + path: Path | None = None license: str | None = None compatibility: str | None = None metadata: dict[str, str] | None = None allowed_tools: list[str] | None = None + uri: str | None = None + server_name: str | None = None + + def __post_init__(self) -> None: + if self.path is None and self.uri is None: + raise ValueError( + f"SkillManifest '{self.name}' must have a filesystem path or a resource URI." + ) + if self.uri is not None and not self.server_name: + raise ValueError( + f"SkillManifest '{self.name}' has a resource URI but no server_name; " + "URI-backed manifests must name the MCP server that published them." + ) class SkillRegistry: @@ -269,9 +291,9 @@ def format_skills_for_prompt( return "" formatted_parts: list[str] = [] + has_mcp_skill = False for manifest in manifests: - skill_dir = manifest.path.parent lines: list[str] = [""] lines.append(f" {manifest.name}") @@ -279,14 +301,24 @@ def format_skills_for_prompt( if description: lines.append(f" {description}") - # Use absolute path per Agent Skills specification - lines.append(f" {manifest.path}") - lines.append(f" {skill_dir}") - - for tag_name in ("scripts", "references", "assets"): - subdir = skill_dir / tag_name - if subdir.is_dir(): - lines.append(f" <{tag_name}>{subdir}") + if manifest.uri: + # Skills-over-MCP SEP: location is the resource URI (any scheme), + # directory is the URI with the trailing /SKILL.md stripped (the + # skill root). + has_mcp_skill = True + skill_root = strip_skill_md(manifest.uri) + lines.append(f" {manifest.uri}") + lines.append(f" {skill_root}") + elif manifest.path is not None: + # Filesystem skill: location is the absolute SKILL.md path. + skill_dir = manifest.path.parent + lines.append(f" {manifest.path}") + lines.append(f" {skill_dir}") + + for tag_name in ("scripts", "references", "assets"): + subdir = skill_dir / tag_name + if subdir.is_dir(): + lines.append(f" <{tag_name}>{subdir}") lines.append("") formatted_parts.append("\n".join(lines)) @@ -296,6 +328,18 @@ def format_skills_for_prompt( if not include_preamble: return skills_xml + mcp_note = ( + "Some skills are served by connected MCP servers: their is a " + "URI (typically `skill://...`, but other schemes such as `github://` or " + "`repo://` are also valid per the SEP) rather than an absolute path. " + "The same read tool accepts both forms — pass the URI verbatim. " + "Relative references inside an MCP-served skill resolve against its " + " URI (e.g. `references/GUIDE.md` inside `skill://acme/billing/refunds/SKILL.md` " + "becomes `skill://acme/billing/refunds/references/GUIDE.md`).\n" + if has_mcp_skill + else "" + ) + preamble = ( "Skills provide specialized capabilities and domain knowledge. Use a Skill if it seems " "relevant to the user's task, intent, or would increase your effectiveness.\n" @@ -308,6 +352,7 @@ def format_skills_for_prompt( "for standard skill resource directories.\n" "When a skill references relative paths, resolve them against the skill's " "directory (the parent of SKILL.md) and use absolute paths in tool calls.\n" + f"{mcp_note}" "Only use Skills listed in below.\n\n" ) diff --git a/src/fast_agent/tools/skill_reader.py b/src/fast_agent/tools/skill_reader.py index d96c54272..27a2e97e6 100644 --- a/src/fast_agent/tools/skill_reader.py +++ b/src/fast_agent/tools/skill_reader.py @@ -4,6 +4,11 @@ This provides a dedicated 'read_skill' tool for reading SKILL.md files and associated resources when not running in an ACP context (where read_text_file is provided by ACPFilesystemRuntime). + +Also handles Skills-over-MCP URIs (any `://...` that descends from +a discovered manifest root — `skill://` is the SEP's default but not +required) by dispatching through the MCP aggregator, so filesystem-backed +and MCP-backed skills share one tool. """ from __future__ import annotations @@ -11,9 +16,12 @@ from pathlib import Path from typing import TYPE_CHECKING, Any -from mcp.types import CallToolResult, TextContent, Tool +from mcp.types import BlobResourceContents, CallToolResult, TextContent, TextResourceContents, Tool + +from fast_agent.mcp.skill_uri import strip_skill_md if TYPE_CHECKING: + from fast_agent.mcp.mcp_aggregator import MCPAggregator from fast_agent.skills.registry import SkillManifest @@ -24,6 +32,8 @@ def __init__( self, skill_manifests: list[SkillManifest], logger, + *, + aggregator: "MCPAggregator | None" = None, ) -> None: """ Initialize the skill reader. @@ -31,16 +41,25 @@ def __init__( Args: skill_manifests: List of available skill manifests (for path validation) logger: Logger instance for debugging + aggregator: MCP aggregator for reading Skills-over-MCP resources. + Required when any manifest is URI-backed; optional otherwise. """ self._skill_manifests = skill_manifests self._logger = logger + self._aggregator = aggregator - # Build set of allowed skill directories for security + # Build set of allowed filesystem skill directories (for path reads) self._allowed_directories: set[Path] = set() + # Build set of allowed URI roots (for URI reads). A read is allowed + # if the requested URI begins with one of these roots — same trust + # boundary as the filesystem allowed-directories check. Any scheme + # is accepted per the SEP (`skill://`, `github://`, `repo://`, ...). + self._allowed_uri_roots: set[str] = set() for manifest in skill_manifests: if manifest.path: - # Allow reading from the skill's directory and subdirectories self._allowed_directories.add(manifest.path.parent.resolve()) + if manifest.uri: + self._allowed_uri_roots.add(strip_skill_md(manifest.uri)) self._tool = Tool( name="read_skill", @@ -53,7 +72,13 @@ def __init__( "properties": { "path": { "type": "string", - "description": "Absolute path to the file to read (from the in available_skills)", + "description": ( + "Absolute filesystem path or resource URI of the file to " + "read. Pass whatever appears in verbatim — most " + "often a `skill://...` URI for MCP-served skills, though " + "other schemes (`github://`, `repo://`) are valid per the " + "SEP. Filesystem skills use absolute paths." + ), } }, "required": ["path"], @@ -82,8 +107,65 @@ def _is_path_allowed(self, path: Path) -> bool: continue return False + def _is_uri_allowed(self, uri: str) -> bool: + """Check the URI is under a known skill root (trust boundary). + + Rejects URIs containing `..` or `.` path segments (raw or + percent-encoded) so `skill://good/../evil/SKILL.md` and + `skill://good/%2E%2E/evil/SKILL.md` cannot slip past the + prefix check. Also rejects query (`?`) and fragment (`#`) + suffixes — neither is meaningful for skill resource reads and + leaving them in would let a caller sidestep the exact-match + path by appending junk. The filesystem guard relies on + `Path.resolve()` for the same normalization; URIs get these + explicit rejects instead of a full URL normalize to keep the + trust boundary independent of server URI semantics. + + Case handling follows RFC 3986: scheme and traversal-marker + checks operate on a lowercased copy (scheme is case-insensitive, + and we want `%2E` / `%2e` both caught); the root-prefix check + compares raw URIs because the path component is case-sensitive. + A server publishing `skill://Acme/...` will not match a model + call for `skill://acme/...` — publish consistently. + """ + if "?" in uri or "#" in uri: + return False + lowered = uri.lower() + # Defense in depth: even if a manifest somehow registered a `file://` + # root, refuse to honor it here. The loader already rejects `file://` + # entries; this is the fallback trust-boundary check. + if lowered.startswith("file://"): + return False + # Check each path segment (after the scheme) for raw or + # percent-encoded traversal markers. + scheme_sep = lowered.find("://") + tail = lowered[scheme_sep + 3 :] if scheme_sep != -1 else lowered + for segment in tail.split("/"): + decoded = segment.replace("%2e", ".") + if decoded in ("..", "."): + return False + for root in self._allowed_uri_roots: + if uri == root or uri.startswith(f"{root}/"): + return True + return False + + def _find_server_for_uri(self, uri: str) -> str | None: + """Return the MCP server that serves the skill covering this URI.""" + best_len = -1 + best_server: str | None = None + for manifest in self._skill_manifests: + if not manifest.uri or not manifest.server_name: + continue + root = strip_skill_md(manifest.uri) + if uri == root or uri.startswith(f"{root}/") or uri == manifest.uri: + # Prefer the most specific (longest) match when roots overlap. + if len(root) > best_len: + best_len = len(root) + best_server = manifest.server_name + return best_server + async def execute(self, arguments: dict[str, Any] | None = None) -> CallToolResult: - """Read a skill file.""" + """Read a skill file (filesystem path or any resource URI).""" path_str = (arguments or {}).get("path") if arguments else None if not isinstance(path_str, str) or not path_str.strip(): return CallToolResult( @@ -96,7 +178,116 @@ async def execute(self, arguments: dict[str, Any] | None = None) -> CallToolResu ], ) - path = Path(path_str.strip()) + target = path_str.strip() + if _looks_like_uri(target): + return await self._read_mcp_uri(target) + return await self._read_filesystem(target) + + async def _read_mcp_uri(self, uri: str) -> CallToolResult: + if self._aggregator is None: + return CallToolResult( + isError=True, + content=[ + TextContent( + type="text", + text=( + "No MCP aggregator is configured to resolve URI-based " + "skill resources for this agent." + ), + ) + ], + ) + + if not self._is_uri_allowed(uri): + return CallToolResult( + isError=True, + content=[ + TextContent( + type="text", + text=f"Access denied: {uri} is not within an allowed skill root.", + ) + ], + ) + + server_name = self._find_server_for_uri(uri) + if server_name is None: + # Defense in depth: the manifest invariant should guarantee every + # URI has a publishing server, but if that invariant is ever + # violated the aggregator would iterate every connected server + # and return the first one that happens to serve the URI — + # crossing the per-server trust boundary silently. + return CallToolResult( + isError=True, + content=[ + TextContent( + type="text", + text=( + f"Access denied: {uri} is not mapped to a known " + "MCP server." + ), + ) + ], + ) + try: + result = await self._aggregator.get_resource(uri, server_name=server_name) + except Exception as exc: + self._logger.error( + "Failed to read MCP skill resource", + data={"uri": uri, "server": server_name, "error": str(exc)}, + ) + return CallToolResult( + isError=True, + content=[TextContent(type="text", text=f"Error reading resource: {exc}")], + ) + + text_parts: list[str] = [] + binary_mimes: list[str] = [] + for item in result.contents: + if isinstance(item, TextResourceContents): + text_parts.append(item.text) + elif isinstance(item, BlobResourceContents): + binary_mimes.append(item.mimeType or "application/octet-stream") + + if not text_parts: + if binary_mimes: + # read_skill exists to load skill *text* (SKILL.md, references, + # scripts). A blob-only resource has no text the model can act + # on — return an error rather than synthesizing a fake text + # placeholder the model would treat as content. + mimes = ", ".join(sorted(set(binary_mimes))) + return CallToolResult( + isError=True, + content=[ + TextContent( + type="text", + text=( + f"Resource {uri} is binary ({mimes}); " + f"read_skill only returns text content." + ), + ) + ], + ) + return CallToolResult( + isError=True, + content=[ + TextContent( + type="text", + text=f"Resource returned no content: {uri}", + ) + ], + ) + + self._logger.debug( + "Read MCP skill resource", + data={"uri": uri, "chars": sum(len(p) for p in text_parts)}, + ) + return CallToolResult( + isError=False, + content=[TextContent(type="text", text="\n".join(text_parts))], + ) + + async def _read_filesystem(self, path_str: str) -> CallToolResult: + path = Path(path_str) # Security: ensure path is absolute if not path.is_absolute(): @@ -147,7 +338,10 @@ async def execute(self, arguments: dict[str, Any] | None = None) -> CallToolResu try: content = path.read_text(encoding="utf-8") - self._logger.debug(f"Read skill file: {path} ({len(content)} bytes)") + self._logger.debug( + "Read skill file", + data={"path": str(path), "bytes": len(content)}, + ) return CallToolResult( isError=False, @@ -159,7 +353,10 @@ async def execute(self, arguments: dict[str, Any] | None = None) -> CallToolResu ], ) except Exception as exc: - self._logger.error(f"Failed to read skill file: {exc}") + self._logger.error( + "Failed to read skill file", + data={"path": str(path), "error": str(exc)}, + ) return CallToolResult( isError=True, content=[ @@ -169,3 +366,27 @@ async def execute(self, arguments: dict[str, Any] | None = None) -> CallToolResu ) ], ) + + +def _looks_like_uri(value: str) -> bool: + """Detect a URI by `://` shape. + + Per the SEP, `skill://` is a SHOULD: servers MAY publish skills under + any scheme (`github://`, `repo://`, etc.) so long as they're listed in + `skill://index.json`. The reader routes any URI through the aggregator + and lets `_is_uri_allowed` enforce that it descends from a discovered + skill root. + """ + sep = value.find("://") + if sep <= 0: + return False + # The scheme part must be a plain identifier (alpha + a few specials per + # RFC 3986). Reject Windows drive paths like `C://...` is not an issue + # since drive letters are single chars (`C:\\` not `C://`), but an + # over-eager `://` substring on something like `https//x` shouldn't match + # either — `find` returns -1 for that. This guard is defensive: only + # treat as a URI if the scheme contains alphanumerics/+/-/. + scheme = value[:sep] + if not scheme or not all(c.isalnum() or c in "+-." for c in scheme): + return False + return True diff --git a/tests/unit/fast_agent/skills/test_format_skills_for_prompt_uri.py b/tests/unit/fast_agent/skills/test_format_skills_for_prompt_uri.py new file mode 100644 index 000000000..c7abd9677 --- /dev/null +++ b/tests/unit/fast_agent/skills/test_format_skills_for_prompt_uri.py @@ -0,0 +1,72 @@ +"""Tests for URI-backed manifests in format_skills_for_prompt.""" + +from pathlib import Path + +from fast_agent.skills.registry import SkillManifest, format_skills_for_prompt + + +def _mcp_manifest(name: str = "git-workflow") -> SkillManifest: + return SkillManifest( + name=name, + description=f"The {name} skill", + body="", + path=None, + uri=f"skill://{name}/SKILL.md", + server_name="test-server", + ) + + +def _fs_manifest(tmp_path: Path, name: str = "git-workflow") -> SkillManifest: + skill_dir = tmp_path / name + skill_dir.mkdir(parents=True, exist_ok=True) + manifest_path = skill_dir / "SKILL.md" + manifest_path.write_text( + f"---\nname: {name}\ndescription: The {name} skill\n---\nBody\n", + encoding="utf-8", + ) + return SkillManifest( + name=name, + description=f"The {name} skill", + body="Body", + path=manifest_path, + ) + + +def test_mcp_manifest_emits_uri_location() -> None: + output = format_skills_for_prompt([_mcp_manifest()], include_preamble=False) + assert "skill://git-workflow/SKILL.md" in output + assert "skill://git-workflow" in output + # MCP manifests must NOT render filesystem subdir tags + assert "" not in output + assert "" not in output + assert "" not in output + + +def test_filesystem_manifest_regression(tmp_path: Path) -> None: + """Filesystem manifests render exactly as before (regression guard).""" + manifest = _fs_manifest(tmp_path) + output = format_skills_for_prompt([manifest], include_preamble=False) + assert f"{manifest.path}" in output + assert f"{manifest.path.parent}" in output + assert "skill://" not in output + + +def test_mixed_manifests_preamble_mentions_mcp(tmp_path: Path) -> None: + output = format_skills_for_prompt( + [_fs_manifest(tmp_path), _mcp_manifest()], + include_preamble=True, + ) + # Preamble note should mention URIs scheme-agnostically per the SEP + # (skill:// is SHOULD, not MUST; github:// / repo:// also valid). + assert "URI" in output + assert "skill://" in output + assert "skill://git-workflow/SKILL.md" in output + + +def test_preamble_omits_mcp_note_when_no_uri_skills(tmp_path: Path) -> None: + output = format_skills_for_prompt( + [_fs_manifest(tmp_path)], + include_preamble=True, + ) + # MCP-specific relative-path guidance only appears when relevant. + assert "skill://acme" not in output diff --git a/tests/unit/fast_agent/skills/test_mcp_skills_loader.py b/tests/unit/fast_agent/skills/test_mcp_skills_loader.py new file mode 100644 index 000000000..d994d0362 --- /dev/null +++ b/tests/unit/fast_agent/skills/test_mcp_skills_loader.py @@ -0,0 +1,424 @@ +"""Tests for mcp_skills_loader — Skills-over-MCP discovery.""" + +from __future__ import annotations + +import json +from pathlib import Path +from typing import Any +from unittest.mock import AsyncMock, MagicMock + +import pytest +from mcp.types import ReadResourceResult, TextResourceContents +from pydantic import AnyUrl + +from fast_agent.mcp.mcp_skills_loader import ( + INDEX_URI, + MAX_INDEX_BYTES, + MAX_SKILL_MD_BYTES, + load_mcp_skill_manifests, + merge_filesystem_and_mcp_manifests, +) +from fast_agent.skills.registry import SkillManifest + + +def _read_result(text: str, uri: str, mime: str = "application/json") -> ReadResourceResult: + return ReadResourceResult( + contents=[ + TextResourceContents(uri=AnyUrl(uri), mimeType=mime, text=text), + ] + ) + + +def _index(skills: list[dict]) -> str: + return json.dumps({"$schema": "https://schemas.agentskills.io/discovery/0.2.0/schema.json", + "skills": skills}) + + +def _skill_md(name: str, description: str = "desc", body: str = "body") -> str: + return f"---\nname: {name}\ndescription: {description}\n---\n{body}\n" + + +def _make_aggregator(responses: dict[tuple[str, str], ReadResourceResult | Exception]) -> Any: + agg = MagicMock() + + async def get_resource(uri: str, *, server_name: str | None = None) -> ReadResourceResult: + key = (server_name or "", uri) + if key not in responses: + raise ValueError(f"Resource not found: {key}") + result = responses[key] + if isinstance(result, Exception): + raise result + return result + + agg.get_resource = get_resource + agg.get_capabilities = AsyncMock(return_value=None) + return agg + + +@pytest.mark.asyncio +async def test_loads_concrete_skill_md_entries() -> None: + responses = { + ("srv", INDEX_URI): _read_result( + _index( + [ + { + "name": "git-workflow", + "type": "skill-md", + "description": "Follow git conventions", + "url": "skill://git-workflow/SKILL.md", + } + ] + ), + INDEX_URI, + ), + ("srv", "skill://git-workflow/SKILL.md"): _read_result( + _skill_md("git-workflow", description="Follow git conventions"), + "skill://git-workflow/SKILL.md", + mime="text/markdown", + ), + } + agg = _make_aggregator(responses) + + manifests = await load_mcp_skill_manifests(agg, ["srv"]) + + assert len(manifests) == 1 + m = manifests[0] + assert m.name == "git-workflow" + assert m.description == "Follow git conventions" + assert m.uri == "skill://git-workflow/SKILL.md" + assert m.server_name == "srv" + assert m.path is None + + +@pytest.mark.asyncio +async def test_missing_index_yields_empty() -> None: + agg = _make_aggregator({}) # get_resource on index raises + + manifests = await load_mcp_skill_manifests(agg, ["srv"]) + + assert manifests == [] + + +@pytest.mark.asyncio +async def test_malformed_index_yields_empty() -> None: + responses = { + ("srv", INDEX_URI): _read_result("not json at all", INDEX_URI), + } + agg = _make_aggregator(responses) + + manifests = await load_mcp_skill_manifests(agg, ["srv"]) + assert manifests == [] + + +@pytest.mark.asyncio +async def test_index_without_skills_key_yields_empty() -> None: + responses = { + ("srv", INDEX_URI): _read_result(json.dumps({"other": 1}), INDEX_URI), + } + agg = _make_aggregator(responses) + assert await load_mcp_skill_manifests(agg, ["srv"]) == [] + + +@pytest.mark.asyncio +async def test_template_entries_skipped() -> None: + responses = { + ("srv", INDEX_URI): _read_result( + _index( + [ + { + "type": "mcp-resource-template", + "description": "Per-product docs", + "url": "skill://docs/{product}/SKILL.md", + } + ] + ), + INDEX_URI, + ), + } + agg = _make_aggregator(responses) + + assert await load_mcp_skill_manifests(agg, ["srv"]) == [] + + +@pytest.mark.asyncio +async def test_partial_skill_md_failure_does_not_poison_batch() -> None: + responses = { + ("srv", INDEX_URI): _read_result( + _index( + [ + { + "name": "good", + "type": "skill-md", + "description": "ok", + "url": "skill://good/SKILL.md", + }, + { + "name": "bad", + "type": "skill-md", + "description": "fails", + "url": "skill://bad/SKILL.md", + }, + ] + ), + INDEX_URI, + ), + ("srv", "skill://good/SKILL.md"): _read_result( + _skill_md("good", description="ok"), + "skill://good/SKILL.md", + ), + ("srv", "skill://bad/SKILL.md"): RuntimeError("boom"), + } + agg = _make_aggregator(responses) + + manifests = await load_mcp_skill_manifests(agg, ["srv"]) + + assert [m.name for m in manifests] == ["good"] + + +@pytest.mark.asyncio +async def test_degenerate_url_without_skill_path_segment_rejected() -> None: + """A URL like `skill://SKILL.md` (no skill-path segment) must be rejected. + + Otherwise `strip_skill_md` would produce `skill:/`, which the reader + would add to its allowed-URI-roots set, admitting every `skill://...` + URI via the `startswith(root + "/")` trust-boundary check. + """ + responses = { + ("srv", INDEX_URI): _read_result( + _index( + [ + { + "name": "good", + "type": "skill-md", + "description": "ok", + "url": "skill://good/SKILL.md", + }, + { + "name": "malformed", + "type": "skill-md", + "description": "no path segment", + "url": "skill://SKILL.md", + }, + ] + ), + INDEX_URI, + ), + ("srv", "skill://good/SKILL.md"): _read_result( + _skill_md("good"), + "skill://good/SKILL.md", + ), + ("srv", "skill://SKILL.md"): _read_result( + _skill_md("malformed"), + "skill://SKILL.md", + mime="text/markdown", + ), + } + agg = _make_aggregator(responses) + + manifests = await load_mcp_skill_manifests(agg, ["srv"]) + + assert [m.name for m in manifests] == ["good"] + + +@pytest.mark.asyncio +async def test_file_uri_entry_rejected() -> None: + """`file://` skill URIs are rejected at load: the MCP-server-is-authority + trust model breaks down if local filesystem paths enter the allow list. + """ + responses = { + ("srv", INDEX_URI): _read_result( + _index( + [ + { + "name": "good", + "type": "skill-md", + "description": "ok", + "url": "skill://good/SKILL.md", + }, + { + "name": "local", + "type": "skill-md", + "description": "local file", + "url": "file:///tmp/local/SKILL.md", + }, + ] + ), + INDEX_URI, + ), + ("srv", "skill://good/SKILL.md"): _read_result( + _skill_md("good"), + "skill://good/SKILL.md", + ), + # No response wired for the file:// URI — rejection must happen + # before the aggregator is asked. + } + agg = _make_aggregator(responses) + + manifests = await load_mcp_skill_manifests(agg, ["srv"]) + assert [m.name for m in manifests] == ["good"] + + +@pytest.mark.asyncio +async def test_oversize_index_rejected() -> None: + """A hostile server returning a huge `skill://index.json` must be rejected + before parsing — don't pin arbitrary memory on `json.loads`.""" + huge = " " * (MAX_INDEX_BYTES + 1) # padding; json.loads would still try + responses = { + ("srv", INDEX_URI): _read_result(huge, INDEX_URI), + } + agg = _make_aggregator(responses) + + manifests = await load_mcp_skill_manifests(agg, ["srv"]) + assert manifests == [] + + +@pytest.mark.asyncio +async def test_oversize_skill_md_rejected() -> None: + """A SKILL.md above the soft limit must be skipped but not abort the batch.""" + big_body = "x" * (MAX_SKILL_MD_BYTES + 1) + responses = { + ("srv", INDEX_URI): _read_result( + _index( + [ + { + "name": "good", + "type": "skill-md", + "description": "ok", + "url": "skill://good/SKILL.md", + }, + { + "name": "huge", + "type": "skill-md", + "description": "too big", + "url": "skill://huge/SKILL.md", + }, + ] + ), + INDEX_URI, + ), + ("srv", "skill://good/SKILL.md"): _read_result( + _skill_md("good"), + "skill://good/SKILL.md", + ), + ("srv", "skill://huge/SKILL.md"): _read_result( + _skill_md("huge", body=big_body), + "skill://huge/SKILL.md", + ), + } + agg = _make_aggregator(responses) + + manifests = await load_mcp_skill_manifests(agg, ["srv"]) + assert [m.name for m in manifests] == ["good"] + + +@pytest.mark.asyncio +async def test_enabled_servers_filter() -> None: + """Per-server opt-out: servers absent from enabled_servers are skipped.""" + responses = { + ("a", INDEX_URI): _read_result( + _index( + [ + { + "name": "alpha", + "type": "skill-md", + "description": "a", + "url": "skill://alpha/SKILL.md", + } + ] + ), + INDEX_URI, + ), + ("a", "skill://alpha/SKILL.md"): _read_result( + _skill_md("alpha"), + "skill://alpha/SKILL.md", + ), + } + agg = _make_aggregator(responses) + + # Only server "b" enabled — "a" is suppressed even though its index exists. + manifests = await load_mcp_skill_manifests(agg, ["a"], enabled_servers={"b"}) + assert manifests == [] + + +def _fs(tmp_path: Path, name: str) -> SkillManifest: + skill_dir = tmp_path / name + skill_dir.mkdir(parents=True, exist_ok=True) + manifest_path = skill_dir / "SKILL.md" + manifest_path.write_text( + f"---\nname: {name}\ndescription: d\n---\nbody\n", encoding="utf-8" + ) + return SkillManifest(name=name, description="d", body="body", path=manifest_path) + + +def _mcp(name: str, server: str = "srv") -> SkillManifest: + return SkillManifest( + name=name, + description="d", + body="", + path=None, + uri=f"skill://{name}/SKILL.md", + server_name=server, + ) + + +class TestMergeFilesystemAndMcpManifests: + def test_no_collisions_appends_all(self, tmp_path: Path) -> None: + fs = [_fs(tmp_path, "local-only")] + mcp = [_mcp("mcp-only")] + + merged, warnings = merge_filesystem_and_mcp_manifests(fs, mcp) + + assert [m.name for m in merged] == ["local-only", "mcp-only"] + assert warnings == [] + + def test_filesystem_wins_on_collision(self, tmp_path: Path) -> None: + """Filesystem manifest must take precedence over an MCP manifest + with the same name — consistent with SkillRegistry dedup semantics. + Without this rule, a malicious or misconfigured server could shadow + a locally-curated skill.""" + fs = [_fs(tmp_path, "refunds")] + mcp = [_mcp("refunds", server="acme")] + + merged, warnings = merge_filesystem_and_mcp_manifests(fs, mcp) + + assert len(merged) == 1 + assert merged[0].path is not None # the filesystem one + assert merged[0].uri is None + assert len(warnings) == 1 + assert "refunds" in warnings[0] + assert "acme" in warnings[0] + assert "hidden by local filesystem skill" in warnings[0] + + def test_case_insensitive_collision(self, tmp_path: Path) -> None: + fs = [_fs(tmp_path, "Refunds")] + mcp = [_mcp("refunds")] + + merged, warnings = merge_filesystem_and_mcp_manifests(fs, mcp) + + assert len(merged) == 1 + assert merged[0].name == "Refunds" + assert len(warnings) == 1 + + def test_first_mcp_wins_within_batch(self) -> None: + """When two MCP servers publish the same skill name, the first one + discovered wins and the second is logged. Deterministic behavior + matters — otherwise the active skill would depend on server-discovery + ordering.""" + mcp = [_mcp("refunds", server="acme"), _mcp("refunds", server="globex")] + + merged, warnings = merge_filesystem_and_mcp_manifests([], mcp) + + assert [m.server_name for m in merged] == ["acme"] + assert len(warnings) == 1 + # The warning must name BOTH the loser and the winning server so + # an operator can act on it without correlating against the index + # log stream. + assert "globex" in warnings[0] + assert "acme" in warnings[0] + assert "earlier MCP-served skill" in warnings[0] + + def test_empty_mcp_list_is_noop(self, tmp_path: Path) -> None: + fs = [_fs(tmp_path, "alpha")] + merged, warnings = merge_filesystem_and_mcp_manifests(fs, []) + assert [m.name for m in merged] == ["alpha"] + assert warnings == [] diff --git a/tests/unit/fast_agent/skills/test_registry.py b/tests/unit/fast_agent/skills/test_registry.py index a6dfc7dc1..794053d14 100644 --- a/tests/unit/fast_agent/skills/test_registry.py +++ b/tests/unit/fast_agent/skills/test_registry.py @@ -2,8 +2,32 @@ from contextlib import contextmanager from pathlib import Path +import pytest + from fast_agent.paths import default_skill_paths -from fast_agent.skills.registry import SkillRegistry +from fast_agent.skills.registry import SkillManifest, SkillRegistry + + +def test_skill_manifest_requires_path_or_uri() -> None: + """Dataclass invariant: a manifest with neither path nor uri has no way + to be read and would silently render as an empty block. Fail + loudly at construction instead.""" + with pytest.raises(ValueError, match="path or a resource URI"): + SkillManifest(name="broken", description="d", body="") + + +def test_skill_manifest_uri_requires_server_name() -> None: + """A URI-backed manifest without server_name collapses the trust boundary: + SkillReader._find_server_for_uri returns None, and the aggregator falls + through every connected server until one responds. Fail at construction + so this invariant holds structurally, not just by loader convention.""" + with pytest.raises(ValueError, match="server_name"): + SkillManifest( + name="orphan", + description="d", + body="", + uri="skill://orphan/SKILL.md", + ) @contextmanager diff --git a/tests/unit/fast_agent/skills/test_skill_reader_uri.py b/tests/unit/fast_agent/skills/test_skill_reader_uri.py new file mode 100644 index 000000000..b864c9bc7 --- /dev/null +++ b/tests/unit/fast_agent/skills/test_skill_reader_uri.py @@ -0,0 +1,316 @@ +"""Tests for SkillReader URI handling (Skills-over-MCP).""" + +from __future__ import annotations + +from typing import Any +from unittest.mock import MagicMock + +import pytest +from mcp.types import ReadResourceResult, TextResourceContents +from pydantic import AnyUrl + +from fast_agent.skills.registry import SkillManifest +from fast_agent.tools.skill_reader import SkillReader + + +def _text_result(text: str, uri: str) -> ReadResourceResult: + return ReadResourceResult( + contents=[TextResourceContents(uri=AnyUrl(uri), mimeType="text/markdown", text=text)] + ) + + +def _mcp_manifest(name: str = "git-workflow", server: str = "srv") -> SkillManifest: + return SkillManifest( + name=name, + description=f"The {name} skill", + body="", + path=None, + uri=f"skill://{name}/SKILL.md", + server_name=server, + ) + + +def _fake_aggregator(responses: dict[str, ReadResourceResult | Exception]) -> Any: + agg = MagicMock() + + async def get_resource(uri: str, *, server_name: str | None = None) -> ReadResourceResult: + result = responses.get(uri) + if result is None: + raise ValueError(f"unknown uri {uri}") + if isinstance(result, Exception): + raise result + return result + + agg.get_resource = get_resource + return agg + + +@pytest.mark.asyncio +async def test_uri_read_dispatches_to_aggregator() -> None: + manifest = _mcp_manifest() + agg = _fake_aggregator( + {"skill://git-workflow/SKILL.md": _text_result("# body", "skill://git-workflow/SKILL.md")} + ) + reader = SkillReader([manifest], logger=MagicMock(), aggregator=agg) + + result = await reader.execute({"path": "skill://git-workflow/SKILL.md"}) + + assert not result.isError + assert result.content[0].text == "# body" + + +@pytest.mark.asyncio +async def test_uri_read_allows_descendant_of_skill_root() -> None: + manifest = _mcp_manifest() + agg = _fake_aggregator( + { + "skill://git-workflow/references/GUIDE.md": _text_result( + "refs", + "skill://git-workflow/references/GUIDE.md", + ) + } + ) + reader = SkillReader([manifest], logger=MagicMock(), aggregator=agg) + + result = await reader.execute( + {"path": "skill://git-workflow/references/GUIDE.md"} + ) + + assert not result.isError + assert result.content[0].text == "refs" + + +@pytest.mark.asyncio +async def test_uri_outside_known_skill_root_denied() -> None: + manifest = _mcp_manifest("git-workflow") + reader = SkillReader([manifest], logger=MagicMock(), aggregator=MagicMock()) + + result = await reader.execute({"path": "skill://unknown/SKILL.md"}) + + assert result.isError + assert "Access denied" in result.content[0].text + + +@pytest.mark.asyncio +async def test_uri_read_with_no_aggregator_errors_clearly() -> None: + manifest = _mcp_manifest() + reader = SkillReader([manifest], logger=MagicMock(), aggregator=None) + + result = await reader.execute({"path": "skill://git-workflow/SKILL.md"}) + + assert result.isError + assert "aggregator" in result.content[0].text.lower() + + +@pytest.mark.asyncio +async def test_non_skill_scheme_uri_dispatches_via_aggregator() -> None: + """SEP allows servers to publish skills under any scheme (e.g. github://). + + My host MUST route those URIs through the aggregator, not the local + filesystem — otherwise a `github://...` argument would be Path()-mangled + into `github:\\...` under cwd on Windows. + """ + manifest = SkillManifest( + name="refunds", + description="d", + body="", + path=None, + uri="github://acme/billing/refunds/SKILL.md", + server_name="acme-srv", + ) + agg = _fake_aggregator( + { + "github://acme/billing/refunds/SKILL.md": _text_result( + "# refunds skill", "github://acme/billing/refunds/SKILL.md" + ) + } + ) + reader = SkillReader([manifest], logger=MagicMock(), aggregator=agg) + + result = await reader.execute( + {"path": "github://acme/billing/refunds/SKILL.md"} + ) + + assert not result.isError + assert "refunds skill" in result.content[0].text + + +@pytest.mark.asyncio +async def test_unknown_uri_scheme_outside_allowed_roots_denied() -> None: + """A URI-shaped argument that doesn't match any discovered manifest's + root must be rejected (security: don't read arbitrary URIs).""" + manifest = SkillManifest( + name="known", + description="d", + body="", + path=None, + uri="skill://known/SKILL.md", + server_name="srv", + ) + reader = SkillReader([manifest], logger=MagicMock(), aggregator=MagicMock()) + + result = await reader.execute({"path": "https://evil.example/anything"}) + + assert result.isError + assert "Access denied" in result.content[0].text + + +@pytest.mark.asyncio +async def test_uri_with_parent_traversal_denied() -> None: + """`skill://good/../evil/SKILL.md` must not slip past the prefix check. + + Defense in depth: the filesystem guard normalizes via Path.resolve(); + the URI guard doesn't resolve URIs (that's server semantics), so it + rejects any path containing a `..` or `.` segment outright. + """ + manifest = _mcp_manifest("good") + reader = SkillReader([manifest], logger=MagicMock(), aggregator=MagicMock()) + + traversal = await reader.execute({"path": "skill://good/../evil/SKILL.md"}) + dot_segment = await reader.execute({"path": "skill://good/./SKILL.md"}) + trailing = await reader.execute({"path": "skill://good/.."}) + + assert traversal.isError and "Access denied" in traversal.content[0].text + assert dot_segment.isError and "Access denied" in dot_segment.content[0].text + assert trailing.isError and "Access denied" in trailing.content[0].text + + +@pytest.mark.asyncio +@pytest.mark.parametrize( + "uri", + [ + "skill://good/%2e%2e/evil/SKILL.md", + "skill://good/%2E%2E/evil/SKILL.md", + "skill://good/%2E/SKILL.md", + ], +) +async def test_uri_with_percent_encoded_traversal_denied(uri: str) -> None: + """Percent-encoded `..` / `.` segments must be rejected too. + + The aggregator forwards the URI to the server as-is and the server + is the ultimate authority, but the host's trust boundary should not + rely on that. Decoding just `%2E` (the only RFC-3986 unreserved dot + encoding) is enough to catch the common bypass. + """ + manifest = _mcp_manifest("good") + reader = SkillReader([manifest], logger=MagicMock(), aggregator=MagicMock()) + + result = await reader.execute({"path": uri}) + assert result.isError + assert "Access denied" in result.content[0].text + + +@pytest.mark.asyncio +@pytest.mark.parametrize( + "uri", + [ + "skill://good/SKILL.md?redirect=evil", + "skill://good/SKILL.md#frag", + ], +) +async def test_uri_with_query_or_fragment_denied(uri: str) -> None: + """Queries and fragments aren't meaningful for skill reads and would + let a caller pass the exact-match allow-check with trailing junk.""" + manifest = _mcp_manifest("good") + reader = SkillReader([manifest], logger=MagicMock(), aggregator=MagicMock()) + + result = await reader.execute({"path": uri}) + assert result.isError + assert "Access denied" in result.content[0].text + + +@pytest.mark.asyncio +async def test_binary_only_resource_returns_error() -> None: + """A blob-only resource must error rather than fake a text placeholder. + + The old behavior synthesized `` + and returned it as TextContent — the model would treat that string as the + actual skill content. + """ + from mcp.types import BlobResourceContents, ReadResourceResult + from pydantic import AnyUrl + + manifest = _mcp_manifest("good") + uri = "skill://good/diagram.png" + blob_result = ReadResourceResult( + contents=[ + BlobResourceContents(uri=AnyUrl(uri), mimeType="image/png", blob="AAAA") + ] + ) + agg = _fake_aggregator({uri: blob_result}) + reader = SkillReader([manifest], logger=MagicMock(), aggregator=agg) + + result = await reader.execute({"path": uri}) + assert result.isError + assert "binary" in result.content[0].text.lower() + assert "image/png" in result.content[0].text + + +@pytest.mark.asyncio +async def test_file_uri_rejected_even_if_registered() -> None: + """Defense in depth: even if a manifest somehow declared a `file://` root, + the trust-boundary check must still refuse the URI. The loader blocks + file:// at discovery time; this test simulates the invariant being + violated (e.g. a test fixture or direct construction) and verifies the + reader refuses regardless.""" + manifest = SkillManifest( + name="local", + description="d", + body="", + path=None, + uri="file:///tmp/local/SKILL.md", + server_name="srv", + ) + reader = SkillReader([manifest], logger=MagicMock(), aggregator=MagicMock()) + + result = await reader.execute({"path": "file:///tmp/local/SKILL.md"}) + + assert result.isError + assert "Access denied" in result.content[0].text + + +@pytest.mark.asyncio +async def test_unmapped_uri_denied() -> None: + """Defense in depth for the SkillManifest invariant: if a URI somehow + lands in allowed-roots without a matching server_name, the aggregator + would fall through every connected server. The reader must refuse + before dispatch. + """ + # Construct by bypassing __post_init__ so we can simulate an invariant + # violation. In normal use the registry validates this at construction. + manifest = SkillManifest.__new__(SkillManifest) + object.__setattr__(manifest, "name", "orphan") + object.__setattr__(manifest, "description", "d") + object.__setattr__(manifest, "body", "") + object.__setattr__(manifest, "path", None) + object.__setattr__(manifest, "uri", "skill://orphan/SKILL.md") + object.__setattr__(manifest, "server_name", None) + object.__setattr__(manifest, "license", None) + object.__setattr__(manifest, "compatibility", None) + object.__setattr__(manifest, "metadata", None) + object.__setattr__(manifest, "allowed_tools", None) + + reader = SkillReader([manifest], logger=MagicMock(), aggregator=MagicMock()) + result = await reader.execute({"path": "skill://orphan/SKILL.md"}) + + assert result.isError + assert "not mapped to a known" in result.content[0].text + + +@pytest.mark.asyncio +async def test_filesystem_read_still_works(tmp_path) -> None: + skill_dir = tmp_path / "git-workflow" + skill_dir.mkdir() + md = skill_dir / "SKILL.md" + md.write_text("---\nname: git-workflow\ndescription: d\n---\nbody\n", encoding="utf-8") + manifest = SkillManifest( + name="git-workflow", + description="d", + body="body", + path=md, + ) + reader = SkillReader([manifest], logger=MagicMock()) + + result = await reader.execute({"path": str(md)}) + assert not result.isError + assert "body" in result.content[0].text diff --git a/tests/unit/fast_agent/skills/test_skill_uri.py b/tests/unit/fast_agent/skills/test_skill_uri.py new file mode 100644 index 000000000..bbc82a923 --- /dev/null +++ b/tests/unit/fast_agent/skills/test_skill_uri.py @@ -0,0 +1,64 @@ +"""Direct tests for the `skill_uri` helpers. + +Covers degenerate URI shapes that could otherwise compromise the reader's +allowed-URI-roots trust boundary if the loader ever forgets to validate. +""" + +from __future__ import annotations + +import pytest + +from fast_agent.mcp.skill_uri import skill_name_from_uri, strip_skill_md + + +class TestStripSkillMd: + def test_strips_trailing_skill_md(self) -> None: + assert strip_skill_md("skill://acme/refunds/SKILL.md") == "skill://acme/refunds" + + def test_returns_unchanged_when_suffix_absent(self) -> None: + assert strip_skill_md("skill://acme/refunds/README.md") == "skill://acme/refunds/README.md" + + def test_scheme_agnostic(self) -> None: + assert strip_skill_md("github://o/r/s/SKILL.md") == "github://o/r/s" + + def test_degenerate_no_path_segment_strips_literally(self) -> None: + # strip_skill_md is a pure lexical helper; it does NOT guard against + # URIs without a skill-path segment. That guard lives at the loader. + # The invariant callers rely on is: if strip_skill_md is given a URI + # that skill_name_from_uri accepts as non-empty, the result is a + # well-formed root. + assert strip_skill_md("skill://SKILL.md") == "skill:/" + + def test_tolerates_trailing_slash(self) -> None: + # A buggy server publishing `.../SKILL.md/` (non-conformant but + # possible) must not seed a root that includes the SKILL.md segment + # into the reader's allow-list. + assert strip_skill_md("skill://acme/refunds/SKILL.md/") == "skill://acme/refunds" + assert skill_name_from_uri("skill://acme/refunds/SKILL.md/") == "refunds" + + +class TestSkillNameFromUri: + def test_single_segment(self) -> None: + assert skill_name_from_uri("skill://git-workflow/SKILL.md") == "git-workflow" + + def test_nested_segments_returns_final(self) -> None: + assert ( + skill_name_from_uri("github://owner/repo/skills/refunds/SKILL.md") == "refunds" + ) + + def test_returns_none_when_suffix_absent(self) -> None: + assert skill_name_from_uri("skill://acme/refunds/README.md") is None + + @pytest.mark.parametrize( + "uri", + [ + "skill://SKILL.md", # no path segment between :// and /SKILL.md + "skill:///SKILL.md", # empty first segment + ], + ) + def test_returns_none_for_degenerate_shapes(self, uri: str) -> None: + # This is the contract the loader depends on: no skill-path segment + # means no registrable manifest. Returning "" here instead of None + # previously let `skill://SKILL.md` slip past the `if url_name and ...` + # check and seed `skill:/` into the reader's allowed-roots set. + assert skill_name_from_uri(uri) is None