Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
112 changes: 102 additions & 10 deletions src/mcp.zig
Original file line number Diff line number Diff line change
Expand Up @@ -368,7 +368,7 @@ const tools_list =
\\{"name":"codedb_status","description":"Get current codedb status: number of indexed files and current sequence number.","inputSchema":{"type":"object","properties":{"project":{"type":"string","description":"Optional absolute path to a different project (must have codedb.snapshot)"}},"required":[]}},
\\{"name":"codedb_snapshot","description":"Get the full pre-rendered snapshot of the codebase as a single JSON blob. Contains tree, all outlines, symbol index, and dependency graph. Ideal for caching or deploying to edge workers.","inputSchema":{"type":"object","properties":{"project":{"type":"string","description":"Optional absolute path to a different project (must have codedb.snapshot)"}},"required":[]}},
\\{"name":"codedb_bundle","description":"Batch multiple queries in one call. Max 20 ops. WARNING: Avoid bundling multiple codedb_read calls on large files — use codedb_outline + codedb_symbol instead. Bundle outline+symbol+search, not full file reads. Total response is not size-capped, so large bundles can exceed token limits.","inputSchema":{"type":"object","properties":{"ops":{"type":"array","items":{"type":"object","properties":{"tool":{"type":"string","description":"Tool name (e.g. codedb_outline, codedb_symbol, codedb_read)"},"arguments":{"type":"object","description":"Tool arguments"}},"required":["tool"]},"description":"Array of tool calls to execute"},"project":{"type":"string","description":"Optional absolute path to a different project (must have codedb.snapshot)"}},"required":["ops"]}},
\\{"name":"codedb_remote","description":"Query any GitHub repo via codedb.codegraff.com cloud intelligence. Gets file tree, symbol outlines, or searches code in external repos without cloning. Use when you need to understand a dependency, check an external API, or explore a repo you don't have locally.","inputSchema":{"type":"object","properties":{"repo":{"type":"string","description":"GitHub repo in owner/repo format (e.g. justrach/merjs)"},"action":{"type":"string","enum":["tree","outline","search","meta"],"description":"What to query: tree (file list), outline (symbols), search (text search), meta (repo info)"},"query":{"type":"string","description":"Search query (required when action=search)"}},"required":["repo","action"]}},
\\{"name":"codedb_remote","description":"Query any GitHub repo via cloud intelligence. Default backend 'codegraff' (codedb.codegraff.com) gets file tree, symbol outlines, searches, repo meta. Backend 'wiki' (wiki.codes) fronts the Hetzner parquet router and adds exact-identifier lookup (action=symbol) and hot-pin policy (action=policy). Use when you need to understand a dependency, check an external API, or explore a repo you don't have locally.","inputSchema":{"type":"object","properties":{"repo":{"type":"string","description":"GitHub repo in owner/repo format (e.g. justrach/merjs)"},"action":{"type":"string","enum":["tree","outline","search","meta","symbol","policy"],"description":"What to query. codegraff backend: tree, outline, search, meta. wiki backend: tree, outline, search, symbol, policy."},"query":{"type":"string","description":"Action-specific argument. search: text query. symbol: identifier name. outline: file path. tree/meta/policy: unused."},"backend":{"type":"string","enum":["codegraff","wiki"],"description":"Which remote indexer to query. Default: codegraff. Use 'wiki' for symbol/policy actions or richer parquet-backed results."}},"required":["repo","action"]}},
\\{"name":"codedb_projects","description":"List all locally indexed projects on this machine. Shows project paths, data directory hashes, and whether a snapshot exists. Use to discover what codebases are available.","inputSchema":{"type":"object","properties":{},"required":[]}},
\\{"name":"codedb_index","description":"Index a local folder on this machine. Scans all source files, builds outlines/trigrams/word indexes, and creates a codedb.snapshot in the target directory. After indexing, the folder is queryable via the project param on any tool.","inputSchema":{"type":"object","properties":{"path":{"type":"string","description":"Absolute path to the folder to index (e.g. /Users/you/myproject)"}},"required":["path"]}},
\\{"name":"codedb_find","description":"Fuzzy file search — finds files by approximate name. Typo-tolerant subsequence matching with word-boundary and filename bonuses. Use when you know roughly what file you're looking for but not the exact path. Much faster than codedb_tree + manual scan.","inputSchema":{"type":"object","properties":{"query":{"type":"string","description":"Fuzzy search query (e.g. 'authmidlware', 'test_auth', 'main.zig')"},"max_results":{"type":"integer","description":"Maximum results to return (default: 10)"},"project":{"type":"string","description":"Optional absolute path to a different project (must have codedb.snapshot)"}},"required":["query"]}},
Expand Down Expand Up @@ -1328,24 +1328,51 @@ fn handleRemote(alloc: std.mem.Allocator, args: *const std.json.ObjectMap, out:
return;
};
const action = getStr(args, "action") orelse {
out.appendSlice(alloc, "error: missing 'action' (tree, outline, search, meta)") catch {};
out.appendSlice(alloc, "error: missing 'action' (tree, outline, search, meta, symbol, policy)") catch {};
return;
};
// Validate action against whitelist to prevent SSRF/path injection
const valid_actions = [_][]const u8{ "tree", "outline", "search", "meta" };

// Backend selection: default preserves existing behavior. "wiki" routes
// through the codedb-cloud / wiki.codes Vercel proxy, which fronts the
// Hetzner parquet router and exposes a superset of actions (adds
// `symbol` and `policy`).
const backend = getStr(args, "backend") orelse "codegraff";
const is_wiki = std.mem.eql(u8, backend, "wiki");
const is_codegraff = std.mem.eql(u8, backend, "codegraff");
if (!is_wiki and !is_codegraff) {
out.appendSlice(alloc, "error: invalid backend, must be one of: codegraff, wiki") catch {};
return;
}

// Per-backend action allowlists. Wiki adds symbol + policy, drops meta;
// codegraff stays as shipped.
const codegraff_actions = [_][]const u8{ "tree", "outline", "search", "meta" };
const wiki_actions = [_][]const u8{ "tree", "outline", "search", "symbol", "policy" };
const allowed: []const []const u8 = if (is_wiki) &wiki_actions else &codegraff_actions;
var action_valid = false;
for (valid_actions) |va| {
for (allowed) |va| {
if (std.mem.eql(u8, action, va)) {
action_valid = true;
break;
}
}
if (!action_valid) {
out.appendSlice(alloc, "error: invalid action, must be one of: tree, outline, search, meta") catch {};
out.appendSlice(alloc, "error: action '") catch {};
out.appendSlice(alloc, action) catch {};
out.appendSlice(alloc, "' not supported on backend '") catch {};
out.appendSlice(alloc, backend) catch {};
out.appendSlice(alloc, "' (") catch {};
if (is_wiki) {
out.appendSlice(alloc, "wiki supports: tree, outline, search, symbol, policy)") catch {};
} else {
out.appendSlice(alloc, "codegraff supports: tree, outline, search, meta)") catch {};
}
return;
}

// Validate repo format: must be "owner/name" with no path traversal
// Validate repo format: must be "owner/name" with no path traversal.
// (Same rule for both backends — slug derivation below just replaces
// the single '/' with '-'.)
if (std.mem.indexOf(u8, repo, "..") != null or
std.mem.indexOf(u8, repo, "//") != null or
repo[0] == '/' or
Expand All @@ -1354,17 +1381,83 @@ fn handleRemote(alloc: std.mem.Allocator, args: *const std.json.ObjectMap, out:
out.appendSlice(alloc, "error: invalid repo format, use owner/repo (e.g. justrach/merjs)") catch {};
return;
}
// Ensure exactly one slash (owner/repo, not owner/repo/extra/path)
const slash_pos = std.mem.indexOfScalar(u8, repo, '/').?;
if (std.mem.indexOfScalarPos(u8, repo, slash_pos + 1, '/') != null) {
out.appendSlice(alloc, "error: invalid repo format, use owner/repo (e.g. justrach/merjs)") catch {};
return;
}

// Build URL and curl args
var url_buf: [512]u8 = undefined;
const query = getStr(args, "query");

if (is_wiki) {
// wiki.codes uses flat slugs: owner/repo → owner-repo. The Vercel
// /api/query proxy takes slug+endpoint+q and server-side-auths to
// the Hetzner router. No client secrets.
var slug_buf: [256]u8 = undefined;
if (repo.len >= slug_buf.len) {
out.appendSlice(alloc, "error: repo too long") catch {};
return;
}
@memcpy(slug_buf[0..repo.len], repo);
for (slug_buf[0..repo.len]) |*c| {
if (c.* == '/') c.* = '-';
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Badge Preserve unambiguous repo identity in wiki slug mapping

The wiki path derivation flattens owner/repo by replacing only / with -, which makes distinct valid repos collide (for example, foo/bar-baz and foo-bar/baz both map to foo-bar-baz). When both are indexed upstream, codedb_remote can return results for the wrong repository with no error, which can silently corrupt analysis. Use an unambiguous encoding (e.g., separate owner/repo params or escaping component dashes) before building slug.

Useful? React with 👍 / 👎.

}
const slug = slug_buf[0..repo.len];

const base_url = std.fmt.bufPrint(&url_buf, "https://www.wiki.codes/api/query", .{}) catch {
out.appendSlice(alloc, "error: URL too long") catch {};
return;
};
var slug_param_buf: [320]u8 = undefined;
const slug_param = std.fmt.bufPrint(&slug_param_buf, "slug={s}", .{slug}) catch {
out.appendSlice(alloc, "error: slug too long") catch {};
return;
};
var ep_param_buf: [64]u8 = undefined;
const ep_param = std.fmt.bufPrint(&ep_param_buf, "endpoint={s}", .{action}) catch {
out.appendSlice(alloc, "error: endpoint too long") catch {};
return;
};
var q_param_buf: [1024]u8 = undefined;
const q_param = std.fmt.bufPrint(&q_param_buf, "q={s}", .{query orelse ""}) catch {
out.appendSlice(alloc, "error: query too long") catch {};
return;
};

const result = cio.runCapture(.{
.allocator = alloc,
.argv = &.{
"curl", "-sf",
"--max-time", "30",
"-G",
"--data-urlencode", slug_param,
"--data-urlencode", ep_param,
"--data-urlencode", q_param,
base_url,
},
}) catch {
out.appendSlice(alloc, "error: failed to fetch from wiki.codes") catch {};
return;
};
defer alloc.free(result.stdout);
defer alloc.free(result.stderr);
if (result.term.Exited != 0) {
out.appendSlice(alloc, "error: wiki.codes returned error for ") catch {};
out.appendSlice(alloc, slug) catch {};
out.appendSlice(alloc, "/") catch {};
out.appendSlice(alloc, action) catch {};
if (result.stderr.len > 0) {
out.appendSlice(alloc, " — ") catch {};
out.appendSlice(alloc, result.stderr[0..@min(result.stderr.len, 200)]) catch {};
}
return;
}
out.appendSlice(alloc, result.stdout) catch {};
return;
}

// codegraff backend — unchanged from the shipping behavior.
if (std.mem.eql(u8, action, "search")) {
const base_url = std.fmt.bufPrint(&url_buf, "https://codedb.codegraff.com/{s}/search", .{repo}) catch {
out.appendSlice(alloc, "error: URL too long") catch {};
Expand All @@ -1375,7 +1468,6 @@ fn handleRemote(alloc: std.mem.Allocator, args: *const std.json.ObjectMap, out:
out.appendSlice(alloc, "error: query too long") catch {};
return;
};
// -G + --data-urlencode lets curl handle encoding spaces etc.
const result = cio.runCapture(.{
.allocator = alloc,
.argv = &.{ "curl", "-sf", "--max-time", "30", "-G", "--data-urlencode", q_param, base_url },
Expand Down
Loading