Skip to content

Commit 652b418

Browse files
aeneasrclaude
andcommitted
fix(store,index): auto-recover from SQLite database corruption
When SQLite reports "database disk image is malformed" or "disk I/O error", the index is permanently broken until manually purged. Every subsequent semantic_search call would fail with the same error because touchChecked is never set and each retry hits the same corrupted file. This change adds automatic recovery at two layers: - store.New: if open/schema-setup fails with a corruption error, delete the DB file and its WAL/SHM sidecars and retry once from a clean state. In-memory databases are never deleted. - Indexer.EnsureFresh / Index: if indexWithTree returns a corruption error mid-operation, log ERROR "corrupted database detected, rebuilding", call rebuildStore() (close → delete files → reopen), then retry with an empty stored hash so the fresh DB receives a full index pass. Adds IsCorruptionErr(err) to the store package as the single source of truth for what constitutes a SQLite corruption error. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
1 parent bd237e3 commit 652b418

File tree

2 files changed

+91
-2
lines changed

2 files changed

+91
-2
lines changed

internal/index/index.go

Lines changed: 56 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,7 @@ type Indexer struct {
8787
chunker chunker.Chunker
8888
maxChunkTokens int
8989
logger *slog.Logger
90+
dsn string // path to the SQLite database file; used for corruption recovery
9091
}
9192

9293
// SetLogger attaches a logger to the indexer for structured diagnostic output.
@@ -107,9 +108,28 @@ func NewIndexer(dsn string, emb embedder.Embedder, maxChunkTokens int) (*Indexer
107108
emb: emb,
108109
chunker: chunker.NewMultiChunker(chunker.DefaultLanguages(maxChunkTokens)),
109110
maxChunkTokens: maxChunkTokens,
111+
dsn: dsn,
110112
}, nil
111113
}
112114

115+
// rebuildStore closes the current store, deletes the database files, and
116+
// opens a fresh store. Must be called while holding idx.mu.Lock() or before
117+
// the Indexer is shared with other goroutines.
118+
func (idx *Indexer) rebuildStore() error {
119+
_ = idx.store.Close()
120+
if idx.dsn != "" && idx.dsn != ":memory:" {
121+
for _, suffix := range []string{"", "-wal", "-shm"} {
122+
_ = os.Remove(idx.dsn + suffix)
123+
}
124+
}
125+
s, err := store.New(idx.dsn, idx.emb.Dimensions())
126+
if err != nil {
127+
return fmt.Errorf("open fresh store: %w", err)
128+
}
129+
idx.store = s
130+
return nil
131+
}
132+
113133
// Close closes the underlying store.
114134
func (idx *Indexer) Close() error {
115135
return idx.store.Close()
@@ -146,7 +166,25 @@ func (idx *Indexer) Index(ctx context.Context, projectDir string, force bool, pr
146166

147167
stats, indexErr := idx.indexWithTree(ctx, projectDir, storedHash, force, curTree, progress)
148168
if indexErr != nil {
149-
return stats, indexErr
169+
if !store.IsCorruptionErr(indexErr) {
170+
return stats, indexErr
171+
}
172+
if idx.logger != nil {
173+
idx.logger.Error("corrupted database detected during index, rebuilding",
174+
"project", projectDir, "err", indexErr)
175+
}
176+
if rebuildErr := idx.rebuildStore(); rebuildErr != nil {
177+
return Stats{}, fmt.Errorf("rebuild corrupted db: %w", rebuildErr)
178+
}
179+
// Retry with force=true so the fresh DB gets a full index pass.
180+
stats, indexErr = idx.indexWithTree(ctx, projectDir, "", true, curTree, progress)
181+
if indexErr != nil {
182+
return stats, fmt.Errorf("reindex after rebuild: %w", indexErr)
183+
}
184+
stats.OldRootHash = storedHash
185+
stats.NewRootHash = curTree.RootHash
186+
stats.Reason = "rebuilt after corruption"
187+
return stats, nil
150188
}
151189

152190
if force {
@@ -191,7 +229,23 @@ func (idx *Indexer) EnsureFresh(ctx context.Context, projectDir string, progress
191229

192230
stats, err := idx.indexWithTree(ctx, projectDir, storedHash, false, curTree, progress)
193231
if err != nil {
194-
return false, stats, err
232+
if !store.IsCorruptionErr(err) {
233+
return false, stats, err
234+
}
235+
if idx.logger != nil {
236+
idx.logger.Error("corrupted database detected during reindex, rebuilding",
237+
"project", projectDir, "err", err)
238+
}
239+
if rebuildErr := idx.rebuildStore(); rebuildErr != nil {
240+
return false, Stats{}, fmt.Errorf("rebuild corrupted db: %w", rebuildErr)
241+
}
242+
// Retry with empty storedHash so the fresh DB gets a full index pass.
243+
stats, err = idx.indexWithTree(ctx, projectDir, "", false, curTree, progress)
244+
if err != nil {
245+
return false, stats, fmt.Errorf("reindex after rebuild: %w", err)
246+
}
247+
reason = "rebuilt after corruption"
248+
storedHash = ""
195249
}
196250
stats.Reason = reason
197251
stats.OldRootHash = storedHash

internal/store/store.go

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ package store
1818
import (
1919
"database/sql"
2020
"fmt"
21+
"os"
2122
"strings"
2223

2324
sqlite_vec "github.com/asg017/sqlite-vec-go-bindings/cgo"
@@ -30,6 +31,26 @@ func init() {
3031
sqlite_vec.Auto()
3132
}
3233

34+
// IsCorruptionErr reports whether err indicates SQLite database corruption.
35+
// These are the canonical SQLite error messages for an unrecoverable on-disk
36+
// data problem; the only safe recovery is to delete the database and rebuild.
37+
func IsCorruptionErr(err error) bool {
38+
if err == nil {
39+
return false
40+
}
41+
msg := err.Error()
42+
return strings.Contains(msg, "database disk image is malformed") ||
43+
strings.Contains(msg, "disk I/O error")
44+
}
45+
46+
// deleteDBFiles removes the SQLite database file and its WAL/SHM sidecars.
47+
// Errors are silently ignored — the file may already be gone or unwritable.
48+
func deleteDBFiles(path string) {
49+
for _, suffix := range []string{"", "-wal", "-shm"} {
50+
_ = os.Remove(path + suffix)
51+
}
52+
}
53+
3354
// SearchResult represents a single result from a vector search.
3455
type SearchResult struct {
3556
FilePath string
@@ -56,7 +77,21 @@ type Store struct {
5677
// New opens (or creates) a SQLite database at dsn, enables WAL mode and
5778
// foreign keys, and creates the schema tables if they do not exist.
5879
// dimensions specifies the size of the embedding vectors.
80+
//
81+
// If the database file is corrupted (SQLite returns a corruption error during
82+
// open or schema setup), New deletes the file and its WAL/SHM sidecars and
83+
// retries once from a clean state. In-memory databases (dsn == ":memory:")
84+
// are never deleted.
5985
func New(dsn string, dimensions int) (*Store, error) {
86+
s, err := openStore(dsn, dimensions)
87+
if err != nil && IsCorruptionErr(err) && dsn != ":memory:" {
88+
deleteDBFiles(dsn)
89+
s, err = openStore(dsn, dimensions)
90+
}
91+
return s, err
92+
}
93+
94+
func openStore(dsn string, dimensions int) (*Store, error) {
6095
db, err := sql.Open("sqlite3", dsn)
6196
if err != nil {
6297
return nil, fmt.Errorf("open db: %w", err)

0 commit comments

Comments
 (0)