Skip to content

Commit 67c9114

Browse files
committed
perf(index): reduce embedding batch long-tail stalls
1 parent a282c6b commit 67c9114

3 files changed

Lines changed: 12 additions & 4 deletions

File tree

CHANGELOG.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
77

88
## [Unreleased]
99

10+
### Changed
11+
- Full-vault indexing now embeds in smaller batches to reduce long-tail embedding stalls on real Markdown vaults.
12+
1013
## [0.4.0] - 2026-04-29
1114

1215
### Added

seeklink/ingest.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@
2222

2323
# Non-hidden top-level dirs excluded from indexing (mirrors freshness._SKIP_DIRS)
2424
_SKIP_DIRS = {"todo", "archive"}
25-
_EMBED_BATCH_SIZE = 32
25+
_EMBED_BATCH_SIZE = 16
2626

2727
# Regex for YAML frontmatter block (handles empty frontmatter too).
2828
# Public — search.py imports this to map body-relative chunk offsets back

tests/test_ingest.py

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,12 @@
1212

1313
from seeklink.db import Database
1414
from seeklink.embedder import Embedder
15-
from seeklink.ingest import _parse_frontmatter, ingest_file, ingest_vault
15+
from seeklink.ingest import (
16+
_EMBED_BATCH_SIZE,
17+
_parse_frontmatter,
18+
ingest_file,
19+
ingest_vault,
20+
)
1621

1722

1823
@pytest.fixture(scope="session")
@@ -314,8 +319,8 @@ def test_batches_embeddings_across_files(self, db: Database, vault: Path):
314319
assert stats["ingested"] == 40
315320
assert stats["errors"] == 0
316321
call_sizes = [len(call) for call in fake.calls]
317-
assert len(call_sizes) == 2
318-
assert max(call_sizes) <= 32
322+
assert len(call_sizes) == (40 + _EMBED_BATCH_SIZE - 1) // _EMBED_BATCH_SIZE
323+
assert max(call_sizes) <= _EMBED_BATCH_SIZE
319324
assert sum(call_sizes) == 40
320325

321326
def test_batch_vault_resolves_forward_refs(self, db: Database, vault: Path):

0 commit comments

Comments
 (0)