Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
61 changes: 51 additions & 10 deletions src/explore.zig
Original file line number Diff line number Diff line change
Expand Up @@ -1010,19 +1010,59 @@ pub const Explorer = struct {
}
}

/// Rebuild the inverted word index from stored contents.
/// Used after fast snapshot restore, which intentionally avoids per-file tokenization.
/// Rebuild the inverted word index from cached contents when complete, or
/// by streaming source files from the project root when the content cache
/// was capped during fast snapshot restore.
pub fn rebuildWordIndex(self: *Explorer) !void {
self.mu.lock();
defer self.mu.unlock();
const source_paths = blk: {
self.mu.lockShared();
defer self.mu.unlockShared();

self.word_index.deinit();
self.word_index = WordIndex.init(self.allocator);
if (self.contents.count() == self.outlines.count()) break :blk null;
if (self.io == null or self.root_dir == null) return error.WordIndexIncomplete;

var iter = self.contents.iterator();
while (iter.next()) |entry| {
try self.word_index.indexFile(entry.key_ptr.*, entry.value_ptr.*);
var paths: std.ArrayList([]u8) = .empty;
errdefer {
for (paths.items) |path| self.allocator.free(path);
paths.deinit(self.allocator);
}
try paths.ensureTotalCapacity(self.allocator, self.outlines.count());
var iter = self.outlines.keyIterator();
while (iter.next()) |path_ptr| {
paths.appendAssumeCapacity(try self.allocator.dupe(u8, path_ptr.*));
}
break :blk try paths.toOwnedSlice(self.allocator);
};
defer if (source_paths) |paths| {
for (paths) |path| self.allocator.free(path);
self.allocator.free(paths);
};

var rebuilt = WordIndex.init(self.allocator);
errdefer rebuilt.deinit();

if (source_paths) |paths| {
const io = self.io orelse return error.WordIndexIncomplete;
const dir = self.root_dir orelse return error.WordIndexIncomplete;
for (paths) |path| {
const content = try dir.readFileAlloc(io, path, self.allocator, .limited(64 * 1024 * 1024));
errdefer self.allocator.free(content);
try rebuilt.indexFile(path, content);
self.allocator.free(content);
}
} else {
self.mu.lockShared();
defer self.mu.unlockShared();
var iter = self.contents.iterator();
while (iter.next()) |entry| {
try rebuilt.indexFile(entry.key_ptr.*, entry.value_ptr.*);
}
}

self.mu.lock();
defer self.mu.unlock();
self.word_index.deinit();
self.word_index = rebuilt;
Comment on lines +1062 to +1065
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P1 Badge Rebuild word index under writer lock to avoid lost updates

rebuildWordIndex now does most of its work without holding the write lock, then swaps self.word_index at the end. In MCP/serve mode, queries can run concurrently with watcher.incrementalLoop updates (commitParsedFileOwnedOutline/removeFile), so updates applied while rebuild is in progress can be overwritten by this final assignment, leaving word_index_complete = true but missing recent file changes until another mutation happens. The previous implementation rebuilt while holding the writer lock, so this is a correctness regression under concurrent query + file-change workloads.

Useful? React with 👍 / 👎.

self.word_index_generation +%= 1;
self.word_index_complete = true;
self.word_index_can_load_from_disk = false;
Expand Down Expand Up @@ -1623,7 +1663,8 @@ pub const Explorer = struct {
/// Search for a word using the inverted word index. O(1) lookup.
pub fn searchWord(self: *Explorer, word: []const u8, allocator: std.mem.Allocator) ![]const idx.WordHit {
self.mu.lockShared();
const needs_rebuild = !self.word_index_complete and self.contents.count() > 0;
const needs_rebuild = !self.word_index_complete and
(self.contents.count() > 0 or (self.io != null and self.root_dir != null));
self.mu.unlockShared();
if (needs_rebuild) {
try self.rebuildWordIndex();
Expand Down
17 changes: 17 additions & 0 deletions src/tests.zig
Original file line number Diff line number Diff line change
Expand Up @@ -3657,14 +3657,31 @@ test "snapshot: writer streams uncached file contents for large repos" {
defer testing.allocator.free(snap_path);
try snapshot_mod.writeSnapshot(io, &exp, dir_path, snap_path, testing.allocator);

var loaded_without_root = Explorer.init(testing.allocator);
defer loaded_without_root.deinit();
var store_without_root = Store.init(testing.allocator);
defer store_without_root.deinit();

try testing.expect(snapshot_mod.loadSnapshot(io, snap_path, &loaded_without_root, &store_without_root, testing.allocator));
try testing.expectEqual(@as(usize, 1002), loaded_without_root.outlines.count());
try testing.expect(loaded_without_root.contents.count() < loaded_without_root.outlines.count());
try testing.expectError(error.WordIndexIncomplete, loaded_without_root.searchWord("func_1001", testing.allocator));

var loaded = Explorer.init(testing.allocator);
loaded.setRoot(io, dir_path);
defer loaded.deinit();
var store = Store.init(testing.allocator);
defer store.deinit();

try testing.expect(snapshot_mod.loadSnapshot(io, snap_path, &loaded, &store, testing.allocator));
try testing.expectEqual(@as(usize, 1002), loaded.outlines.count());
try testing.expect(loaded.contents.count() < loaded.outlines.count());

const hits = try loaded.searchWord("func_1001", testing.allocator);
defer testing.allocator.free(hits);
try testing.expectEqual(@as(usize, 1), hits.len);
try testing.expectEqualStrings("src/file_1001.zig", loaded.word_index.hitPath(hits[0]));
try testing.expect(loaded.wordIndexIsComplete());
}

test "issue-220: partial word index state rebuilds before search" {
Expand Down
Loading