Skip to content
Closed
Show file tree
Hide file tree
Changes from 12 commits
Commits
Show all changes
34 commits
Select commit Hold shift + click to select a range
c4cc763
Restore `codedb serve --port` on Zig 0.16
justrach Apr 22, 2026
2fbc66c
Route MCP status output to stderr
justrach Apr 22, 2026
aaba92e
Make `codedb serve` port configurable via CODEDB_PORT
justrach Apr 22, 2026
c9d773c
O(1) `findSymbol` via complete symbol_index
justrach Apr 22, 2026
14c3160
Make `codedb serve` opt-in; require CODEDB_PORT with no default
justrach Apr 22, 2026
8b43e89
Revert `codedb serve` gate; default port 6767
justrach Apr 22, 2026
74ba881
Harden `server.isPathSafe` against backslash and null-byte paths
justrach Apr 22, 2026
6ef7185
Resolve `/file/read` paths against the indexed root
justrach Apr 22, 2026
fbb8b49
findAllSymbols: always merge index + outline scan
justrach Apr 22, 2026
814c03a
Merge feat/local-server-trial into release/0.2.579
justrach Apr 22, 2026
3233de4
Bump semver to 0.2.579
justrach Apr 22, 2026
1f04fad
codedb_remote: add 'wiki' backend (wiki.codes) alongside codegraff (#…
justrach Apr 22, 2026
56af2f6
codedb_remote: reject empty query on actions that consume it
justrach Apr 22, 2026
3988c1f
mcp: refresh last_activity during long bundle processing (#278)
justrach Apr 22, 2026
e7c9fd4
Add native C outline parser
justrach Apr 25, 2026
3e6dae9
Merge pull request #320 from justrach/fix/native-c-parser
justrach Apr 25, 2026
a514ea8
Merge pull request #316 from justrach/fix/remote-require-query
justrach Apr 25, 2026
d58e284
Merge pull request #317 from justrach/fix/278-bundle-activity-refresh
justrach Apr 25, 2026
ad52783
Add language detection for common extensions
justrach Apr 25, 2026
654ff5c
Merge pull request #321 from justrach/fix/extension-language-coverage
justrach Apr 25, 2026
27b8d81
Parse outlines for common detected extensions
justrach Apr 25, 2026
4e5864f
Merge pull request #322 from justrach/fix/common-extension-parsers
justrach Apr 25, 2026
3ca698b
Add golden parser checks for common extensions
justrach Apr 26, 2026
c83533b
Merge pull request #323 from justrach/fix/extension-parser-golden-tests
justrach Apr 26, 2026
5b76d9c
Fix benchmark noise status reporting
justrach Apr 26, 2026
07ac438
Merge pull request #324 from justrach/fix/bench-report-noise-status
justrach Apr 26, 2026
f6f12b6
Speed up snapshot JSON generation
justrach Apr 26, 2026
47a2f1a
Merge pull request #325 from justrach/fix/snapshot-json-performance
justrach Apr 26, 2026
d99041b
Cache snapshot responses by store sequence
justrach Apr 26, 2026
ae587b5
Merge pull request #326 from justrach/fix/snapshot-response-cache
justrach Apr 26, 2026
32b5e3f
Point codedb_remote at api.wiki.codes
justrach Apr 26, 2026
944e4ad
Merge pull request #327 from justrach/fix/wiki-api-host
justrach Apr 26, 2026
9a70fb4
Extend MCP idle timeout to one hour
justrach Apr 26, 2026
4e38a29
Merge pull request #328 from justrach/fix/mcp-hour-timeout
justrach Apr 26, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
68 changes: 54 additions & 14 deletions src/explore.zig
Original file line number Diff line number Diff line change
Expand Up @@ -1256,22 +1256,63 @@ pub const Explorer = struct {
var result_list: std.ArrayList(SymbolResult) = .empty;
errdefer result_list.deinit(allocator);

// Scan outlines for all symbols by name (catches all kinds including imports).
// Track (path, line_start) pairs already appended. symbol_index can be
// incomplete after fast-snapshot restore (outlines are populated before
// rebuildSymbolIndexFor runs on every file), so we must still fall
// through to the outline scan — and dedupe against what the index
// already supplied. Keys are "<path>:<line>" allocated from the caller
// allocator, freed at end of call.
var seen = std.StringHashMap(void).init(allocator);
defer {
var sit = seen.keyIterator();
while (sit.next()) |k| allocator.free(k.*);
seen.deinit();
}

if (self.symbol_index.get(name)) |locs| {
for (locs.items) |loc| {
var detail: ?[]const u8 = null;
if (self.outlines.getPtr(loc.path)) |outline| {
for (outline.symbols.items) |sym| {
if (sym.line_start == loc.line_start and std.mem.eql(u8, sym.name, name)) {
detail = if (sym.detail) |d| try allocator.dupe(u8, d) else null;
break;
}
}
}
try result_list.append(allocator, .{
.path = try allocator.dupe(u8, loc.path),
.symbol = .{
.name = try allocator.dupe(u8, name),
.kind = loc.kind,
.line_start = loc.line_start,
.line_end = loc.line_end,
.detail = detail,
},
});
const key = try std.fmt.allocPrint(allocator, "{s}:{d}", .{ loc.path, loc.line_start });
seen.put(key, {}) catch allocator.free(key);
}
}

// Safety scan: append any outline symbols the index missed.
var iter = self.outlines.iterator();
while (iter.next()) |entry| {
for (entry.value_ptr.symbols.items) |sym| {
if (std.mem.eql(u8, sym.name, name)) {
try result_list.append(allocator, .{
.path = try allocator.dupe(u8, entry.key_ptr.*),
.symbol = .{
.name = try allocator.dupe(u8, sym.name),
.kind = sym.kind,
.line_start = sym.line_start,
.line_end = sym.line_end,
.detail = if (sym.detail) |d| try allocator.dupe(u8, d) else null,
},
});
}
if (!std.mem.eql(u8, sym.name, name)) continue;
var key_buf: [std.fs.max_path_bytes + 32]u8 = undefined;
const key = std.fmt.bufPrint(&key_buf, "{s}:{d}", .{ entry.key_ptr.*, sym.line_start }) catch continue;
if (seen.contains(key)) continue;
try result_list.append(allocator, .{
.path = try allocator.dupe(u8, entry.key_ptr.*),
.symbol = .{
.name = try allocator.dupe(u8, sym.name),
.kind = sym.kind,
.line_start = sym.line_start,
.line_end = sym.line_end,
.detail = if (sym.detail) |d| try allocator.dupe(u8, d) else null,
},
});
}
}
return result_list.toOwnedSlice(allocator);
Expand Down Expand Up @@ -2684,7 +2725,6 @@ pub const Explorer = struct {
fn rebuildSymbolIndexFor(self: *Explorer, path: []const u8, outline: *FileOutline) void {
self.removeSymbolIndexFor(path);
for (outline.symbols.items) |sym| {
if (sym.kind == .import or sym.kind == .comment_block) continue;
const gop = self.symbol_index.getOrPut(sym.name) catch continue;
if (!gop.found_existing) {
gop.value_ptr.* = std.ArrayList(SymbolLocation).empty;
Expand Down
14 changes: 12 additions & 2 deletions src/main.zig
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ fn mainImpl() !void {
const stdout = cio.File.stdout();
const use_color = stdout.isTty();
const s = sty.style(use_color);
const out = Out{ .file = stdout, .alloc = allocator };
var out = Out{ .file = stdout, .alloc = allocator };

const args = try cio.argsAlloc(allocator);
defer cio.argsFree(allocator, args);
Expand Down Expand Up @@ -106,6 +106,13 @@ fn mainImpl() !void {
std.process.exit(1);
}

// MCP stdio reserves stdout for JSON-RPC — route status/error output to
// stderr so startup/failure paths don't corrupt the protocol stream.
// See #304.
if (std.mem.eql(u8, cmd, "mcp")) {
out.file = cio.File.stderr();
}

// Handle --version early (no root needed)
if (std.mem.eql(u8, cmd, "--version") or std.mem.eql(u8, cmd, "-v") or std.mem.eql(u8, cmd, "version")) {
out.p("codedb {s}\n", .{release_info.semver});
Expand Down Expand Up @@ -550,7 +557,10 @@ fn mainImpl() !void {
s.reset,
});
} else if (std.mem.eql(u8, cmd, "serve")) {
const port: u16 = 7719;
const port: u16 = blk: {
const raw = cio.posixGetenv("CODEDB_PORT") orelse break :blk 6767;
break :blk std.fmt.parseInt(u16, raw, 10) catch 6767;
};
var agents = AgentRegistry.init(allocator);
defer agents.deinit();
_ = try agents.register("__filesystem__");
Expand Down
112 changes: 102 additions & 10 deletions src/mcp.zig
Original file line number Diff line number Diff line change
Expand Up @@ -368,7 +368,7 @@ const tools_list =
\\{"name":"codedb_status","description":"Get current codedb status: number of indexed files and current sequence number.","inputSchema":{"type":"object","properties":{"project":{"type":"string","description":"Optional absolute path to a different project (must have codedb.snapshot)"}},"required":[]}},
\\{"name":"codedb_snapshot","description":"Get the full pre-rendered snapshot of the codebase as a single JSON blob. Contains tree, all outlines, symbol index, and dependency graph. Ideal for caching or deploying to edge workers.","inputSchema":{"type":"object","properties":{"project":{"type":"string","description":"Optional absolute path to a different project (must have codedb.snapshot)"}},"required":[]}},
\\{"name":"codedb_bundle","description":"Batch multiple queries in one call. Max 20 ops. WARNING: Avoid bundling multiple codedb_read calls on large files — use codedb_outline + codedb_symbol instead. Bundle outline+symbol+search, not full file reads. Total response is not size-capped, so large bundles can exceed token limits.","inputSchema":{"type":"object","properties":{"ops":{"type":"array","items":{"type":"object","properties":{"tool":{"type":"string","description":"Tool name (e.g. codedb_outline, codedb_symbol, codedb_read)"},"arguments":{"type":"object","description":"Tool arguments"}},"required":["tool"]},"description":"Array of tool calls to execute"},"project":{"type":"string","description":"Optional absolute path to a different project (must have codedb.snapshot)"}},"required":["ops"]}},
\\{"name":"codedb_remote","description":"Query any GitHub repo via codedb.codegraff.com cloud intelligence. Gets file tree, symbol outlines, or searches code in external repos without cloning. Use when you need to understand a dependency, check an external API, or explore a repo you don't have locally.","inputSchema":{"type":"object","properties":{"repo":{"type":"string","description":"GitHub repo in owner/repo format (e.g. justrach/merjs)"},"action":{"type":"string","enum":["tree","outline","search","meta"],"description":"What to query: tree (file list), outline (symbols), search (text search), meta (repo info)"},"query":{"type":"string","description":"Search query (required when action=search)"}},"required":["repo","action"]}},
\\{"name":"codedb_remote","description":"Query any GitHub repo via cloud intelligence. Default backend 'codegraff' (codedb.codegraff.com) gets file tree, symbol outlines, searches, repo meta. Backend 'wiki' (wiki.codes) fronts the Hetzner parquet router and adds exact-identifier lookup (action=symbol) and hot-pin policy (action=policy). Use when you need to understand a dependency, check an external API, or explore a repo you don't have locally.","inputSchema":{"type":"object","properties":{"repo":{"type":"string","description":"GitHub repo in owner/repo format (e.g. justrach/merjs)"},"action":{"type":"string","enum":["tree","outline","search","meta","symbol","policy"],"description":"What to query. codegraff backend: tree, outline, search, meta. wiki backend: tree, outline, search, symbol, policy."},"query":{"type":"string","description":"Action-specific argument. search: text query. symbol: identifier name. outline: file path. tree/meta/policy: unused."},"backend":{"type":"string","enum":["codegraff","wiki"],"description":"Which remote indexer to query. Default: codegraff. Use 'wiki' for symbol/policy actions or richer parquet-backed results."}},"required":["repo","action"]}},
\\{"name":"codedb_projects","description":"List all locally indexed projects on this machine. Shows project paths, data directory hashes, and whether a snapshot exists. Use to discover what codebases are available.","inputSchema":{"type":"object","properties":{},"required":[]}},
\\{"name":"codedb_index","description":"Index a local folder on this machine. Scans all source files, builds outlines/trigrams/word indexes, and creates a codedb.snapshot in the target directory. After indexing, the folder is queryable via the project param on any tool.","inputSchema":{"type":"object","properties":{"path":{"type":"string","description":"Absolute path to the folder to index (e.g. /Users/you/myproject)"}},"required":["path"]}},
\\{"name":"codedb_find","description":"Fuzzy file search — finds files by approximate name. Typo-tolerant subsequence matching with word-boundary and filename bonuses. Use when you know roughly what file you're looking for but not the exact path. Much faster than codedb_tree + manual scan.","inputSchema":{"type":"object","properties":{"query":{"type":"string","description":"Fuzzy search query (e.g. 'authmidlware', 'test_auth', 'main.zig')"},"max_results":{"type":"integer","description":"Maximum results to return (default: 10)"},"project":{"type":"string","description":"Optional absolute path to a different project (must have codedb.snapshot)"}},"required":["query"]}},
Expand Down Expand Up @@ -1328,24 +1328,51 @@ fn handleRemote(alloc: std.mem.Allocator, args: *const std.json.ObjectMap, out:
return;
};
const action = getStr(args, "action") orelse {
out.appendSlice(alloc, "error: missing 'action' (tree, outline, search, meta)") catch {};
out.appendSlice(alloc, "error: missing 'action' (tree, outline, search, meta, symbol, policy)") catch {};
return;
};
// Validate action against whitelist to prevent SSRF/path injection
const valid_actions = [_][]const u8{ "tree", "outline", "search", "meta" };

// Backend selection: default preserves existing behavior. "wiki" routes
// through the codedb-cloud / wiki.codes Vercel proxy, which fronts the
// Hetzner parquet router and exposes a superset of actions (adds
// `symbol` and `policy`).
const backend = getStr(args, "backend") orelse "codegraff";
const is_wiki = std.mem.eql(u8, backend, "wiki");
const is_codegraff = std.mem.eql(u8, backend, "codegraff");
if (!is_wiki and !is_codegraff) {
out.appendSlice(alloc, "error: invalid backend, must be one of: codegraff, wiki") catch {};
return;
}

// Per-backend action allowlists. Wiki adds symbol + policy, drops meta;
// codegraff stays as shipped.
const codegraff_actions = [_][]const u8{ "tree", "outline", "search", "meta" };
const wiki_actions = [_][]const u8{ "tree", "outline", "search", "symbol", "policy" };
const allowed: []const []const u8 = if (is_wiki) &wiki_actions else &codegraff_actions;
var action_valid = false;
for (valid_actions) |va| {
for (allowed) |va| {
if (std.mem.eql(u8, action, va)) {
action_valid = true;
break;
}
}
if (!action_valid) {
out.appendSlice(alloc, "error: invalid action, must be one of: tree, outline, search, meta") catch {};
out.appendSlice(alloc, "error: action '") catch {};
out.appendSlice(alloc, action) catch {};
out.appendSlice(alloc, "' not supported on backend '") catch {};
out.appendSlice(alloc, backend) catch {};
out.appendSlice(alloc, "' (") catch {};
if (is_wiki) {
out.appendSlice(alloc, "wiki supports: tree, outline, search, symbol, policy)") catch {};
} else {
out.appendSlice(alloc, "codegraff supports: tree, outline, search, meta)") catch {};
}
return;
}

// Validate repo format: must be "owner/name" with no path traversal
// Validate repo format: must be "owner/name" with no path traversal.
// (Same rule for both backends — slug derivation below just replaces
// the single '/' with '-'.)
if (std.mem.indexOf(u8, repo, "..") != null or
std.mem.indexOf(u8, repo, "//") != null or
repo[0] == '/' or
Expand All @@ -1354,17 +1381,83 @@ fn handleRemote(alloc: std.mem.Allocator, args: *const std.json.ObjectMap, out:
out.appendSlice(alloc, "error: invalid repo format, use owner/repo (e.g. justrach/merjs)") catch {};
return;
}
// Ensure exactly one slash (owner/repo, not owner/repo/extra/path)
const slash_pos = std.mem.indexOfScalar(u8, repo, '/').?;
if (std.mem.indexOfScalarPos(u8, repo, slash_pos + 1, '/') != null) {
out.appendSlice(alloc, "error: invalid repo format, use owner/repo (e.g. justrach/merjs)") catch {};
return;
}

// Build URL and curl args
var url_buf: [512]u8 = undefined;
const query = getStr(args, "query");

if (is_wiki) {
// wiki.codes uses flat slugs: owner/repo → owner-repo. The Vercel
// /api/query proxy takes slug+endpoint+q and server-side-auths to
// the Hetzner router. No client secrets.
var slug_buf: [256]u8 = undefined;
if (repo.len >= slug_buf.len) {
out.appendSlice(alloc, "error: repo too long") catch {};
return;
}
@memcpy(slug_buf[0..repo.len], repo);
for (slug_buf[0..repo.len]) |*c| {
if (c.* == '/') c.* = '-';
}
const slug = slug_buf[0..repo.len];

const base_url = std.fmt.bufPrint(&url_buf, "https://www.wiki.codes/api/query", .{}) catch {
out.appendSlice(alloc, "error: URL too long") catch {};
return;
};
var slug_param_buf: [320]u8 = undefined;
const slug_param = std.fmt.bufPrint(&slug_param_buf, "slug={s}", .{slug}) catch {
out.appendSlice(alloc, "error: slug too long") catch {};
return;
};
var ep_param_buf: [64]u8 = undefined;
const ep_param = std.fmt.bufPrint(&ep_param_buf, "endpoint={s}", .{action}) catch {
out.appendSlice(alloc, "error: endpoint too long") catch {};
return;
};
var q_param_buf: [1024]u8 = undefined;
const q_param = std.fmt.bufPrint(&q_param_buf, "q={s}", .{query orelse ""}) catch {
out.appendSlice(alloc, "error: query too long") catch {};
return;
};

const result = cio.runCapture(.{
.allocator = alloc,
.argv = &.{
"curl", "-sf",
"--max-time", "30",
"-G",
"--data-urlencode", slug_param,
"--data-urlencode", ep_param,
"--data-urlencode", q_param,
base_url,
},
}) catch {
out.appendSlice(alloc, "error: failed to fetch from wiki.codes") catch {};
return;
};
defer alloc.free(result.stdout);
defer alloc.free(result.stderr);
if (result.term.Exited != 0) {
out.appendSlice(alloc, "error: wiki.codes returned error for ") catch {};
out.appendSlice(alloc, slug) catch {};
out.appendSlice(alloc, "/") catch {};
out.appendSlice(alloc, action) catch {};
if (result.stderr.len > 0) {
out.appendSlice(alloc, " — ") catch {};
out.appendSlice(alloc, result.stderr[0..@min(result.stderr.len, 200)]) catch {};
}
return;
}
out.appendSlice(alloc, result.stdout) catch {};
return;
}

// codegraff backend — unchanged from the shipping behavior.
if (std.mem.eql(u8, action, "search")) {
const base_url = std.fmt.bufPrint(&url_buf, "https://codedb.codegraff.com/{s}/search", .{repo}) catch {
out.appendSlice(alloc, "error: URL too long") catch {};
Expand All @@ -1375,7 +1468,6 @@ fn handleRemote(alloc: std.mem.Allocator, args: *const std.json.ObjectMap, out:
out.appendSlice(alloc, "error: query too long") catch {};
return;
};
// -G + --data-urlencode lets curl handle encoding spaces etc.
const result = cio.runCapture(.{
.allocator = alloc,
.argv = &.{ "curl", "-sf", "--max-time", "30", "-G", "--data-urlencode", q_param, base_url },
Expand Down
2 changes: 1 addition & 1 deletion src/release_info.zig
Original file line number Diff line number Diff line change
@@ -1 +1 @@
pub const semver = "0.2.578";
pub const semver = "0.2.579";
Loading
Loading