Skip to content

Commit 7caa69d

Browse files
twaughclaude
andcommitted
Replace context hashing with timestamp-based change detection
Use block and page updated-at timestamps from Logseq to detect changes instead of computing content hashes. When a block or page timestamp differs from the stored value, it and all its descendants are marked dirty and re-embedded. This avoids per-block normalizeContent + hash computation on every scan. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
1 parent 9f7638e commit 7caa69d

5 files changed

Lines changed: 132 additions & 145 deletions

File tree

src/__tests__/indexer.test.ts

Lines changed: 30 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ function mockQueries(blocks: any[][], pages: any[][]) {
4040
.mockResolvedValueOnce(pages); // page query
4141
}
4242

43-
const defaultPage = { id: 10, name: "test-page", originalName: "Test Page", properties: {} };
43+
const defaultPage = { id: 10, name: "test-page", originalName: "Test Page", properties: {}, "updated-at": 1000 };
4444

4545
beforeEach(async () => {
4646
vi.clearAllMocks();
@@ -58,7 +58,7 @@ beforeEach(async () => {
5858
describe("indexBlocks", () => {
5959
it("skips unchanged blocks", async () => {
6060
const blocks = [
61-
[{ id: 1, uuid: "u1", content: "Hello world this is a test block", page: { id: 10 }, parent: { id: 10 } }],
61+
[{ id: 1, uuid: "u1", content: "Hello world this is a test block", page: { id: 10 }, parent: { id: 10 }, "updated-at": 1000 }],
6262
];
6363
const pages = [[defaultPage]];
6464

@@ -69,7 +69,7 @@ describe("indexBlocks", () => {
6969
await indexBlocks();
7070
expect(mockEmbedTexts).toHaveBeenCalledTimes(1);
7171

72-
// Second index - same content, should skip
72+
// Second index - same timestamps, should skip
7373
mockEmbedTexts.mockClear();
7474
mockQueries(blocks, pages);
7575
await indexBlocks();
@@ -80,16 +80,16 @@ describe("indexBlocks", () => {
8080
const pages = [[defaultPage]];
8181

8282
mockQueries(
83-
[[{ id: 1, uuid: "u1", content: "Hello world this is a test block", page: { id: 10 }, parent: { id: 10 } }]],
83+
[[{ id: 1, uuid: "u1", content: "Hello world this is a test block", page: { id: 10 }, parent: { id: 10 }, "updated-at": 1000 }]],
8484
pages,
8585
);
8686
mockEmbedTexts.mockResolvedValue([[0.1, 0.2, 0.3]]);
8787

8888
await indexBlocks();
8989

90-
// Change content
90+
// Change content — updated-at changes
9191
mockQueries(
92-
[[{ id: 1, uuid: "u1", content: "Changed content that is different", page: { id: 10 }, parent: { id: 10 } }]],
92+
[[{ id: 1, uuid: "u1", content: "Changed content that is different", page: { id: 10 }, parent: { id: 10 }, "updated-at": 2000 }]],
9393
pages,
9494
);
9595
mockEmbedTexts.mockClear();
@@ -103,11 +103,11 @@ describe("indexBlocks", () => {
103103
// batchSize is 2
104104
mockQueries(
105105
[
106-
[{ id: 1, uuid: "u1", content: "First block with enough content", page: { id: 10 }, parent: { id: 10 } }],
107-
[{ id: 2, uuid: "u2", content: "Second block with enough content", page: { id: 10 }, parent: { id: 10 } }],
108-
[{ id: 3, uuid: "u3", content: "Third block with enough content", page: { id: 11 }, parent: { id: 11 } }],
106+
[{ id: 1, uuid: "u1", content: "First block with enough content", page: { id: 10 }, parent: { id: 10 }, "updated-at": 1000 }],
107+
[{ id: 2, uuid: "u2", content: "Second block with enough content", page: { id: 10 }, parent: { id: 10 }, "updated-at": 1000 }],
108+
[{ id: 3, uuid: "u3", content: "Third block with enough content", page: { id: 11 }, parent: { id: 11 }, "updated-at": 1000 }],
109109
],
110-
[[defaultPage], [{ id: 11, name: "page-2", originalName: "Page 2", properties: {} }]],
110+
[[defaultPage], [{ id: 11, name: "page-2", originalName: "Page 2", properties: {}, "updated-at": 1000 }]],
111111
);
112112

113113
mockEmbedTexts
@@ -124,8 +124,8 @@ describe("indexBlocks", () => {
124124
it("tracks progress", async () => {
125125
mockQueries(
126126
[
127-
[{ id: 1, uuid: "u1", content: "Block one with enough content", page: { id: 10 }, parent: { id: 10 } }],
128-
[{ id: 2, uuid: "u2", content: "Block two with enough content", page: { id: 10 }, parent: { id: 10 } }],
127+
[{ id: 1, uuid: "u1", content: "Block one with enough content", page: { id: 10 }, parent: { id: 10 }, "updated-at": 1000 }],
128+
[{ id: 2, uuid: "u2", content: "Block two with enough content", page: { id: 10 }, parent: { id: 10 }, "updated-at": 1000 }],
129129
],
130130
[[defaultPage]],
131131
);
@@ -141,7 +141,7 @@ describe("indexBlocks", () => {
141141

142142
it("clears embeddings on model change", async () => {
143143
const blocks = [
144-
[{ id: 1, uuid: "u1", content: "Hello world this is a test block", page: { id: 10 }, parent: { id: 10 } }],
144+
[{ id: 1, uuid: "u1", content: "Hello world this is a test block", page: { id: 10 }, parent: { id: 10 }, "updated-at": 1000 }],
145145
];
146146
const pages = [[defaultPage]];
147147

@@ -170,12 +170,13 @@ describe("indexBlocks", () => {
170170
name: "meeting-notes",
171171
originalName: "Meeting Notes",
172172
properties: { tags: ["project-x", "planning"] },
173+
"updated-at": 1000,
173174
};
174175

175176
mockQueries(
176177
[
177-
[{ id: 4, uuid: "u4", content: "## Project X Updates", page: { id: 10 }, parent: { id: 10 } }],
178-
[{ id: 5, uuid: "u5", content: "Discussed timeline and budget", page: { id: 10 }, parent: { id: 4 } }],
178+
[{ id: 4, uuid: "u4", content: "## Project X Updates", page: { id: 10 }, parent: { id: 10 }, "updated-at": 1000 }],
179+
[{ id: 5, uuid: "u5", content: "Discussed timeline and budget", page: { id: 10 }, parent: { id: 4 }, "updated-at": 1000 }],
179180
],
180181
[[meetingPage]],
181182
);
@@ -199,18 +200,18 @@ describe("indexBlocks", () => {
199200

200201
it("re-embeds when page is renamed", async () => {
201202
const blocks = [
202-
[{ id: 1, uuid: "u1", content: "Hello world this is a test block", page: { id: 10 }, parent: { id: 10 } }],
203+
[{ id: 1, uuid: "u1", content: "Hello world this is a test block", page: { id: 10 }, parent: { id: 10 }, "updated-at": 1000 }],
203204
];
204205

205-
mockQueries(blocks, [[{ id: 10, name: "old-name", originalName: "Old Name", properties: {} }]]);
206+
mockQueries(blocks, [[{ id: 10, name: "old-name", originalName: "Old Name", properties: {}, "updated-at": 1000 }]]);
206207
mockEmbedTexts.mockResolvedValue([[0.1, 0.2, 0.3]]);
207208

208209
await indexBlocks();
209210
expect(mockEmbedTexts).toHaveBeenCalledTimes(1);
210211

211-
// Same block, but page renamed
212+
// Same block, but page renamed — page updated-at changes
212213
mockEmbedTexts.mockClear();
213-
mockQueries(blocks, [[{ id: 10, name: "new-name", originalName: "New Name", properties: {} }]]);
214+
mockQueries(blocks, [[{ id: 10, name: "new-name", originalName: "New Name", properties: {}, "updated-at": 2000 }]]);
214215
mockEmbedTexts.mockResolvedValue([[0.4, 0.5, 0.6]]);
215216

216217
await indexBlocks();
@@ -223,8 +224,8 @@ describe("indexBlocks", () => {
223224
// Initial: block 5 is child of block 4
224225
mockQueries(
225226
[
226-
[{ id: 4, uuid: "u4", content: "Original parent content here", page: { id: 10 }, parent: { id: 10 } }],
227-
[{ id: 5, uuid: "u5", content: "Child block content unchanged", page: { id: 10 }, parent: { id: 4 } }],
227+
[{ id: 4, uuid: "u4", content: "Original parent content here", page: { id: 10 }, parent: { id: 10 }, "updated-at": 1000 }],
228+
[{ id: 5, uuid: "u5", content: "Child block content unchanged", page: { id: 10 }, parent: { id: 4 }, "updated-at": 1000 }],
228229
],
229230
pages,
230231
);
@@ -233,12 +234,12 @@ describe("indexBlocks", () => {
233234
await indexBlocks();
234235
expect(mockEmbedTexts).toHaveBeenCalledTimes(1);
235236

236-
// Parent content changes, child stays the same
237+
// Parent content changes (updated-at changes), child stays the same
237238
mockEmbedTexts.mockClear();
238239
mockQueries(
239240
[
240-
[{ id: 4, uuid: "u4", content: "Updated parent content here", page: { id: 10 }, parent: { id: 10 } }],
241-
[{ id: 5, uuid: "u5", content: "Child block content unchanged", page: { id: 10 }, parent: { id: 4 } }],
241+
[{ id: 4, uuid: "u4", content: "Updated parent content here", page: { id: 10 }, parent: { id: 10 }, "updated-at": 2000 }],
242+
[{ id: 5, uuid: "u5", content: "Child block content unchanged", page: { id: 10 }, parent: { id: 4 }, "updated-at": 1000 }],
242243
],
243244
pages,
244245
);
@@ -251,9 +252,9 @@ describe("indexBlocks", () => {
251252
expect(texts).toHaveLength(2);
252253
});
253254

254-
it("stores contextHashes in embedding records", async () => {
255+
it("stores timestamps in embedding records", async () => {
255256
mockQueries(
256-
[[{ id: 1, uuid: "u1", content: "Hello world this is a test block", page: { id: 10 }, parent: { id: 10 } }]],
257+
[[{ id: 1, uuid: "u1", content: "Hello world this is a test block", page: { id: 10 }, parent: { id: 10 }, "updated-at": 1000 }]],
257258
[[defaultPage]],
258259
);
259260
mockEmbedTexts.mockResolvedValue([[0.1, 0.2, 0.3]]);
@@ -262,14 +263,13 @@ describe("indexBlocks", () => {
262263

263264
const stored = await getAllEmbeddings();
264265
expect(stored).toHaveLength(1);
265-
expect(stored[0].contextHashes).toBeDefined();
266-
expect(Array.isArray(stored[0].contextHashes)).toBe(true);
267-
expect(stored[0].contextHashes.length).toBeGreaterThan(0);
266+
expect(stored[0].blockUpdatedAt).toBe(1000);
267+
expect(stored[0].pageUpdatedAt).toBe(1000);
268268
});
269269

270270
it("handles cancellation", async () => {
271271
mockQueries(
272-
[[{ id: 1, uuid: "u1", content: "Block one with enough content", page: { id: 10 }, parent: { id: 10 } }]],
272+
[[{ id: 1, uuid: "u1", content: "Block one with enough content", page: { id: 10 }, parent: { id: 10 }, "updated-at": 1000 }]],
273273
[[defaultPage]],
274274
);
275275

src/__tests__/search.test.ts

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -18,9 +18,9 @@ describe("dotProduct", () => {
1818

1919
describe("searchEmbeddings", () => {
2020
const records: EmbeddingRecord[] = [
21-
{ blockId: "a", contextHashes: [], embedding: [1, 0, 0], pageId: 1, timestamp: 0 },
22-
{ blockId: "b", contextHashes: [], embedding: [0, 1, 0], pageId: 1, timestamp: 0 },
23-
{ blockId: "c", contextHashes: [], embedding: [0.7, 0.7, 0], pageId: 2, timestamp: 0 },
21+
{ blockId: "a", embedding: [1, 0, 0], pageId: 1, blockUpdatedAt: 0, pageUpdatedAt: 0 },
22+
{ blockId: "b", embedding: [0, 1, 0], pageId: 1, blockUpdatedAt: 0, pageUpdatedAt: 0 },
23+
{ blockId: "c", embedding: [0.7, 0.7, 0], pageId: 2, blockUpdatedAt: 0, pageUpdatedAt: 0 },
2424
];
2525

2626
it("returns results sorted by similarity", () => {

src/__tests__/storage.test.ts

Lines changed: 12 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -25,15 +25,16 @@ describe("embeddings CRUD", () => {
2525
it("stores and retrieves embeddings", async () => {
2626
const record = {
2727
blockId: "uuid-1",
28-
contextHashes: ["h1", "h2"],
28+
blockUpdatedAt: 1000,
29+
pageUpdatedAt: 2000,
2930
embedding: [0.1, 0.2, 0.3],
3031
pageId: 1,
31-
timestamp: Date.now(),
3232
};
3333
await putEmbeddings([record]);
3434
const result = await getEmbedding("uuid-1");
3535
expect(result).toBeDefined();
36-
expect(result!.contextHashes).toEqual(["h1", "h2"]);
36+
expect(result!.blockUpdatedAt).toBe(1000);
37+
expect(result!.pageUpdatedAt).toBe(2000);
3738
expect(result!.embedding).toEqual([0.1, 0.2, 0.3]);
3839
});
3940

@@ -44,17 +45,17 @@ describe("embeddings CRUD", () => {
4445

4546
it("gets all embeddings", async () => {
4647
await putEmbeddings([
47-
{ blockId: "a", contextHashes: ["h1"], embedding: [1], pageId: 1, timestamp: 0 },
48-
{ blockId: "b", contextHashes: ["h2"], embedding: [2], pageId: 1, timestamp: 0 },
48+
{ blockId: "a", embedding: [1], pageId: 1, blockUpdatedAt: 0, pageUpdatedAt: 0 },
49+
{ blockId: "b", embedding: [2], pageId: 1, blockUpdatedAt: 0, pageUpdatedAt: 0 },
4950
]);
5051
const all = await getAllEmbeddings();
5152
expect(all).toHaveLength(2);
5253
});
5354

5455
it("deletes specific embeddings", async () => {
5556
await putEmbeddings([
56-
{ blockId: "a", contextHashes: ["h1"], embedding: [1], pageId: 1, timestamp: 0 },
57-
{ blockId: "b", contextHashes: ["h2"], embedding: [2], pageId: 1, timestamp: 0 },
57+
{ blockId: "a", embedding: [1], pageId: 1, blockUpdatedAt: 0, pageUpdatedAt: 0 },
58+
{ blockId: "b", embedding: [2], pageId: 1, blockUpdatedAt: 0, pageUpdatedAt: 0 },
5859
]);
5960
await deleteEmbeddings(["a"]);
6061
const all = await getAllEmbeddings();
@@ -64,7 +65,7 @@ describe("embeddings CRUD", () => {
6465

6566
it("clears all embeddings", async () => {
6667
await putEmbeddings([
67-
{ blockId: "a", contextHashes: ["h1"], embedding: [1], pageId: 1, timestamp: 0 },
68+
{ blockId: "a", embedding: [1], pageId: 1, blockUpdatedAt: 0, pageUpdatedAt: 0 },
6869
]);
6970
await clearAllEmbeddings();
7071
const count = await getEmbeddingCount();
@@ -73,9 +74,9 @@ describe("embeddings CRUD", () => {
7374

7475
it("counts embeddings", async () => {
7576
await putEmbeddings([
76-
{ blockId: "a", contextHashes: ["h1"], embedding: [1], pageId: 1, timestamp: 0 },
77-
{ blockId: "b", contextHashes: ["h2"], embedding: [2], pageId: 1, timestamp: 0 },
78-
{ blockId: "c", contextHashes: ["h3"], embedding: [3], pageId: 2, timestamp: 0 },
77+
{ blockId: "a", embedding: [1], pageId: 1, blockUpdatedAt: 0, pageUpdatedAt: 0 },
78+
{ blockId: "b", embedding: [2], pageId: 1, blockUpdatedAt: 0, pageUpdatedAt: 0 },
79+
{ blockId: "c", embedding: [3], pageId: 2, blockUpdatedAt: 0, pageUpdatedAt: 0 },
7980
]);
8081
expect(await getEmbeddingCount()).toBe(3);
8182
});

0 commit comments

Comments
 (0)