diff --git a/src/explore.zig b/src/explore.zig index a1049db..dab1b06 100644 --- a/src/explore.zig +++ b/src/explore.zig @@ -1010,19 +1010,59 @@ pub const Explorer = struct { } } - /// Rebuild the inverted word index from stored contents. - /// Used after fast snapshot restore, which intentionally avoids per-file tokenization. + /// Rebuild the inverted word index from cached contents when complete, or + /// by streaming source files from the project root when the content cache + /// was capped during fast snapshot restore. pub fn rebuildWordIndex(self: *Explorer) !void { - self.mu.lock(); - defer self.mu.unlock(); + const source_paths = blk: { + self.mu.lockShared(); + defer self.mu.unlockShared(); - self.word_index.deinit(); - self.word_index = WordIndex.init(self.allocator); + if (self.contents.count() == self.outlines.count()) break :blk null; + if (self.io == null or self.root_dir == null) return error.WordIndexIncomplete; - var iter = self.contents.iterator(); - while (iter.next()) |entry| { - try self.word_index.indexFile(entry.key_ptr.*, entry.value_ptr.*); + var paths: std.ArrayList([]u8) = .empty; + errdefer { + for (paths.items) |path| self.allocator.free(path); + paths.deinit(self.allocator); + } + try paths.ensureTotalCapacity(self.allocator, self.outlines.count()); + var iter = self.outlines.keyIterator(); + while (iter.next()) |path_ptr| { + paths.appendAssumeCapacity(try self.allocator.dupe(u8, path_ptr.*)); + } + break :blk try paths.toOwnedSlice(self.allocator); + }; + defer if (source_paths) |paths| { + for (paths) |path| self.allocator.free(path); + self.allocator.free(paths); + }; + + var rebuilt = WordIndex.init(self.allocator); + errdefer rebuilt.deinit(); + + if (source_paths) |paths| { + const io = self.io orelse return error.WordIndexIncomplete; + const dir = self.root_dir orelse return error.WordIndexIncomplete; + for (paths) |path| { + const content = try dir.readFileAlloc(io, path, self.allocator, .limited(64 * 1024 * 1024)); + errdefer self.allocator.free(content); + try rebuilt.indexFile(path, content); + self.allocator.free(content); + } + } else { + self.mu.lockShared(); + defer self.mu.unlockShared(); + var iter = self.contents.iterator(); + while (iter.next()) |entry| { + try rebuilt.indexFile(entry.key_ptr.*, entry.value_ptr.*); + } } + + self.mu.lock(); + defer self.mu.unlock(); + self.word_index.deinit(); + self.word_index = rebuilt; self.word_index_generation +%= 1; self.word_index_complete = true; self.word_index_can_load_from_disk = false; @@ -1623,7 +1663,8 @@ pub const Explorer = struct { /// Search for a word using the inverted word index. O(1) lookup. pub fn searchWord(self: *Explorer, word: []const u8, allocator: std.mem.Allocator) ![]const idx.WordHit { self.mu.lockShared(); - const needs_rebuild = !self.word_index_complete and self.contents.count() > 0; + const needs_rebuild = !self.word_index_complete and + (self.contents.count() > 0 or (self.io != null and self.root_dir != null)); self.mu.unlockShared(); if (needs_rebuild) { try self.rebuildWordIndex(); diff --git a/src/tests.zig b/src/tests.zig index 6361b21..ae23043 100644 --- a/src/tests.zig +++ b/src/tests.zig @@ -3657,7 +3657,18 @@ test "snapshot: writer streams uncached file contents for large repos" { defer testing.allocator.free(snap_path); try snapshot_mod.writeSnapshot(io, &exp, dir_path, snap_path, testing.allocator); + var loaded_without_root = Explorer.init(testing.allocator); + defer loaded_without_root.deinit(); + var store_without_root = Store.init(testing.allocator); + defer store_without_root.deinit(); + + try testing.expect(snapshot_mod.loadSnapshot(io, snap_path, &loaded_without_root, &store_without_root, testing.allocator)); + try testing.expectEqual(@as(usize, 1002), loaded_without_root.outlines.count()); + try testing.expect(loaded_without_root.contents.count() < loaded_without_root.outlines.count()); + try testing.expectError(error.WordIndexIncomplete, loaded_without_root.searchWord("func_1001", testing.allocator)); + var loaded = Explorer.init(testing.allocator); + loaded.setRoot(io, dir_path); defer loaded.deinit(); var store = Store.init(testing.allocator); defer store.deinit(); @@ -3665,6 +3676,12 @@ test "snapshot: writer streams uncached file contents for large repos" { try testing.expect(snapshot_mod.loadSnapshot(io, snap_path, &loaded, &store, testing.allocator)); try testing.expectEqual(@as(usize, 1002), loaded.outlines.count()); try testing.expect(loaded.contents.count() < loaded.outlines.count()); + + const hits = try loaded.searchWord("func_1001", testing.allocator); + defer testing.allocator.free(hits); + try testing.expectEqual(@as(usize, 1), hits.len); + try testing.expectEqualStrings("src/file_1001.zig", loaded.word_index.hitPath(hits[0])); + try testing.expect(loaded.wordIndexIsComplete()); } test "issue-220: partial word index state rebuilds before search" {