diff --git a/cmd/remindb/serve.go b/cmd/remindb/serve.go index 5ea8f1a..90ea5ee 100644 --- a/cmd/remindb/serve.go +++ b/cmd/remindb/serve.go @@ -13,6 +13,7 @@ import ( "github.com/radimsem/remindb/pkg/config" "github.com/radimsem/remindb/pkg/logbuf" remindb "github.com/radimsem/remindb/pkg/mcp" + "github.com/radimsem/remindb/pkg/mcp/rescanlog" "github.com/radimsem/remindb/pkg/mcp/rescanstat" "github.com/radimsem/remindb/pkg/mcp/sessionlog" "github.com/radimsem/remindb/pkg/store" @@ -33,6 +34,7 @@ var ( const ( defaultLogBufferSize = 1000 defaultSessionLogMaxFileSize = 10 << 20 // 10 MiB + defaultRescanLogMaxFileSize = 10 << 20 // 10 MiB ) var serveCmd = &cobra.Command{ @@ -167,7 +169,12 @@ func runServe(cmd *cobra.Command, _ []string) error { }) if sourceDir != "" { - rescan, err := remindb.NewRescanLoop(st, sourceDir, rescanInterval, workspaceCfg.Compile, logger, rescanStatus) + rescanLog, err := newRescanLog(sourceDir, workspaceCfg.Server.RescanFiles) + if err != nil { + return err + } + + rescan, err := remindb.NewRescanLoop(st, sourceDir, rescanInterval, workspaceCfg.Compile, logger, rescanStatus, rescanLog) if err != nil { return err } @@ -274,6 +281,20 @@ func withSessionLogs(logger *slog.Logger, workspace string, sl config.SessionFil return slog.New(sessionlog.NewHandler(logger.Handler(), sink)), nil } +// newRescanLog builds the durable rescan-tick sink when enabled; nil otherwise. +func newRescanLog(workspace string, rf config.RescanFilesConfig) (*rescanlog.Sink, error) { + if rf.Enabled == nil || !*rf.Enabled { + return nil, nil + } + + maxFileSize := int64(defaultRescanLogMaxFileSize) + if rf.MaxFileSize != nil { + maxFileSize = int64(*rf.MaxFileSize) + } + + return rescanlog.New(workspace, maxFileSize) +} + func parseLogLevel(s string) slog.Level { switch s { case "debug": diff --git a/docs/configuration.md b/docs/configuration.md index 0b7c4c2..8c93530 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -13,6 +13,7 @@ remindb keeps its workspace-level state in a `.remindb/` directory at the source | `.remindb/temperatures.json` | Per-path initial-temperature overrides. | | `.remindb/sessions/` | Machine-managed per-client session ledger ([below](#session-ledger-remindbsessions)). | | `.remindb/logs/` | Opt-in per-session tool-call/error logfiles ([below](#session-logfiles-remindblogs)). | +| `.remindb/rescan.jsonl` | Opt-in durable source-rescan tick history ([below](#rescan-history-remindbrescanjsonl)). | The three files are optional; missing → defaults. The whole directory is skipped during source walks, so its contents never end up as memory nodes. @@ -74,6 +75,10 @@ A single JSON object of feature blocks. Unknown top-level or nested keys are rej }, "sessions": { "flush_interval": "30s" + }, + "rescan_files": { + "enabled": true, + "max_file_size": "10MB" } } } @@ -91,7 +96,7 @@ Every field in every block is optional — only the keys you set override the de **`budgets`** sets the default token budget for the four read tools that take one — `MemorySearch`, `MemoryFetch`, `MemoryFetchBatch`, `MemoryRelated`. Resolution is per-tool and local: an explicit positive `budget` on the call always wins; otherwise the configured default; otherwise the built-in. `MemoryRelated`'s built-in is 1000; the other three treat an unset budget as **unlimited** (no trimming). Write tools are unaffected. -**`server`** configures `serve` itself. `transport` (`stdio`|`http`) and `listen` mirror the flags of the same name; the nested `logging` object sets `level` (`debug`|`info`|`warn`|`error`), `format` (`text`|`json`), `output_path` (a file; absent → stderr), and `buffer_size` (the capacity of the in-memory ring buffer behind the `remindb://logs` resource; must be > 0, absent → 1000). Absent → today's behavior (stdio, info-level text to stderr, 1000-record buffer). `--verbose` is sugar for `logging.level=debug`. `buffer_size` only sizes the `remindb://logs` mirror — it never affects what reaches stderr/the file. The nested `session_files` object opts into per-session tool-call/error logfiles ([below](#session-logfiles-remindblogs)) — a third sink of the same captured records, alongside the shared stream and the `buffer_size` ring buffer: `enabled` (absent/`false` → off, zero behavior change) and `max_file_size` (a size string, absent → `"10MB"`, must be positive). It only takes effect when `serve` has a source workspace. The nested `resources` object tunes resource-update notification coalescing (see [resources](./resources.md#live-updates--subscriptions)): `debounce` is the global trailing-edge window applied to every subscribable resource (absent → `"500ms"`), and `overrides` maps a short resource name (`graph`, `snapshots`, `tree`, `files`, `temperature`, `logs`) to its own window. Absent overrides fall back to built-in floors of `"1s"` for `logs` and `"2s"` for `temperature` (so the two high-frequency streams never flood); every other resource uses the global default. A negative duration, or an `overrides` key naming a resource that isn't subscribable, fails startup with the offending field named. The nested `sessions` object has one knob, `flush_interval` (absent → `"30s"`), the cadence at which `serve` checkpoints the session ledger ([below](#session-ledger-remindbsessions)); it doubles as the crash-recovery granularity. Must be positive. +**`server`** configures `serve` itself. `transport` (`stdio`|`http`) and `listen` mirror the flags of the same name; the nested `logging` object sets `level` (`debug`|`info`|`warn`|`error`), `format` (`text`|`json`), `output_path` (a file; absent → stderr), and `buffer_size` (the capacity of the in-memory ring buffer behind the `remindb://logs` resource; must be > 0, absent → 1000). Absent → today's behavior (stdio, info-level text to stderr, 1000-record buffer). `--verbose` is sugar for `logging.level=debug`. `buffer_size` only sizes the `remindb://logs` mirror — it never affects what reaches stderr/the file. The nested `session_files` object opts into per-session tool-call/error logfiles ([below](#session-logfiles-remindblogs)) — a third sink of the same captured records, alongside the shared stream and the `buffer_size` ring buffer: `enabled` (absent/`false` → off, zero behavior change) and `max_file_size` (a size string, absent → `"10MB"`, must be positive). It only takes effect when `serve` has a source workspace. The nested `resources` object tunes resource-update notification coalescing (see [resources](./resources.md#live-updates--subscriptions)): `debounce` is the global trailing-edge window applied to every subscribable resource (absent → `"500ms"`), and `overrides` maps a short resource name (`graph`, `snapshots`, `tree`, `files`, `temperature`, `logs`) to its own window. Absent overrides fall back to built-in floors of `"1s"` for `logs` and `"2s"` for `temperature` (so the two high-frequency streams never flood); every other resource uses the global default. A negative duration, or an `overrides` key naming a resource that isn't subscribable, fails startup with the offending field named. The nested `sessions` object has one knob, `flush_interval` (absent → `"30s"`), the cadence at which `serve` checkpoints the session ledger ([below](#session-ledger-remindbsessions)); it doubles as the crash-recovery granularity. Must be positive. The nested `rescan_files` object opts into a durable source-rescan tick history ([below](#rescan-history-remindbrescanjsonl)), the persistence counterpart of the in-process `remindb://rescan` holder: `enabled` (absent/`false` → off, zero behavior change vs. the in-process-only holder) and `max_file_size` (a size string, absent → `"10MB"`, must be positive). It only takes effect when `serve` has a source workspace. This is independent of the top-level `rescan` block, which tunes the loop's timing, not its persistence. **Precedence**, highest first: **explicit CLI flag → `.remindb/config.json` → environment variable → built-in default**. The committed workspace config is authoritative — an env var only fills a key the config leaves *unset*, it never overrides one the config sets. In CI/automation, override a committed value with the explicit flag, not `REMINDB_*`. (`logging` has no flag/env tier beyond `--verbose`, which forces `debug` and wins.) @@ -151,3 +156,9 @@ The ledger never stores payloads, summaries, or node bodies — only connection Off by default. Set `server.logging.session_files.enabled: true` (and run `serve` with a source workspace) and each connected MCP client session gets its own append-only `.remindb/logs/.log`, keyed by the same id `remindb://sessions` reports — the SDK session id, or the synthesized `contentid` fallback for the lone stdio session. Each file is **JSONL**: one record per line (`{time, level, msg, fields}`), serialized from a single shared `sessionlog.Record` definition so the read-back resource deserializes the exact shape `serve` writes (no second hand-rolled parser that could drift). Each file captures that session's `Memory*` tool-call trace (tool name, elapsed, error) and its `Warn`/`Error` records, **distinct from** the shared stderr stream and the `remindb://logs` ring buffer, so an operator can audit one client's activity in isolation. The session trace is captured even when the shared stream sits at `info` (it has its own threshold). Like the ledger, these files **never** contain payloads, summaries, or node bodies — only the same payload-free fields the shared log carries (see [logging-conventions](../.claude/rules/logging-conventions.md)). Each file is bounded by `server.logging.session_files.max_file_size` (default `"10MB"`, must be positive): when an append would cross the cap the file rotates once to `.log.1` (replacing any prior rotation) and a fresh file starts. These logs are read back over MCP as the passive resources `remindb://sessions/logs` (index) and `remindb://sessions/logs/{id}` (one session's structured records, active file only) — see [resources](./resources.md#the-sessionslogs-envelope). Like every `.remindb/` entry the directory is excluded from `compile`, the `serve` rescan loop, `MemoryCompile`, and `bench`, so session logs never become memory nodes. Disabled or unconfigured ⇒ no files written and the logger chain is byte-identical to today. + +## Rescan history: `.remindb/rescan.jsonl` + +Off by default. Set `server.rescan_files.enabled: true` (and run `serve` with a source workspace) and every source-rescan tick appends exactly one JSON line to `.remindb/rescan.jsonl` — the same per-tick shape the `remindb://rescan` resource projects (run timestamp, error string, add/modify/remove counts, per-file purge list). This is the durable counterpart of the in-process `rescanstat` holder #111 added: the resource exposes only the *latest* tick and is lost on restart, whereas this file accumulates across reconnects and `serve` restarts (append, never truncate). + +Like the ledger and session logfiles, it **never** contains payloads, summaries, or node bodies — only the counts, paths, and per-tick metadata the holder already carries. The file is bounded by `server.rescan_files.max_file_size` (default `"10MB"`, must be positive): when an append would cross the cap the file rotates once to `rescan.jsonl.1` (replacing any prior rotation) and a fresh file starts. Reading it back (a resource or CLI) is out of scope for now — file separately if a consumer need appears. Like every `.remindb/` entry it's excluded from `compile`, the `serve` rescan loop, `MemoryCompile`, and `bench`, so the history never becomes a memory node. Disabled or unconfigured ⇒ no file written and behavior is identical to the in-process-only holder. diff --git a/docs/resources.md b/docs/resources.md index c7cbbde..2e14b90 100644 --- a/docs/resources.md +++ b/docs/resources.md @@ -344,7 +344,7 @@ The shape is **locked**. Notes: - `last_meta` is exactly one tick's result, replaced wholesale each tick. `run_at` is Unix **seconds** (`0` = never run); `error` is that tick's failure string, empty on success (a failed tick still publishes — counts stay zero, `error` is set). - `added` / `modified` / `removed` are the tick's compile counts. There is **no `total`** — a consumer sums the three. - `purged_files` lists each source file that was deleted from disk that tick, with `nodes` = how many context nodes it carried. Always present (`[]` when nothing was purged). Purging is **whole-file only** — a file removed from the source tree drops all its context nodes — so there is no separate `purged_nodes` count; the per-file `nodes` fully describes the purge. Entries sort by `path`. -- Reading this resource does **not** boost, lock, or snapshot. The holder is in-process memory only; it is not persisted and resets on restart. (Durable JSONL rescan history under `.remindb/` is tracked separately, mirroring the per-session-logfile work.) +- Reading this resource does **not** boost, lock, or snapshot. The holder is in-process memory only; it is not persisted and resets on restart. For durable history, opt into `server.rescan_files` — every tick is then also appended to `.remindb/rescan.jsonl` (this exact per-tick shape), surviving restarts. See [configuration](./configuration.md#rescan-history-remindbrescanjsonl). Reading that file back over MCP is out of scope for now. ## Live updates — subscriptions diff --git a/internal/mcptest/mcptest.go b/internal/mcptest/mcptest.go index d6c4689..88a2b5e 100644 --- a/internal/mcptest/mcptest.go +++ b/internal/mcptest/mcptest.go @@ -188,7 +188,7 @@ func NewEnvWithRescan(t *testing.T) *Env { t.Fatalf("NewServer: %v", err) } - loop, err := remindb.NewRescanLoop(st, dir, time.Second, config.CompileConfig{}, nil, status) + loop, err := remindb.NewRescanLoop(st, dir, time.Second, config.CompileConfig{}, nil, status, nil) if err != nil { t.Fatalf("NewRescanLoop: %v", err) } diff --git a/pkg/config/config.go b/pkg/config/config.go index 14501d2..3eec2e7 100644 --- a/pkg/config/config.go +++ b/pkg/config/config.go @@ -30,11 +30,17 @@ type Config struct { } type ServerConfig struct { - Transport *string `json:"transport,omitempty"` - Listen *string `json:"listen,omitempty"` - Logging LoggingConfig `json:"logging"` - Resources ResourcesConfig `json:"resources"` - Sessions SessionsConfig `json:"sessions"` + Transport *string `json:"transport,omitempty"` + Listen *string `json:"listen,omitempty"` + Logging LoggingConfig `json:"logging"` + Resources ResourcesConfig `json:"resources"` + Sessions SessionsConfig `json:"sessions"` + RescanFiles RescanFilesConfig `json:"rescan_files"` +} + +type RescanFilesConfig struct { + Enabled *bool `json:"enabled,omitempty"` + MaxFileSize *ByteSize `json:"max_file_size,omitempty"` } type SessionsConfig struct { @@ -308,6 +314,10 @@ func (c Config) Validate() error { return errors.New("server.logging.session_files.max_file_size must be positive") } + if mfs := sc.RescanFiles.MaxFileSize; mfs != nil && *mfs <= 0 { + return errors.New("server.rescan_files.max_file_size must be positive") + } + return nil } diff --git a/pkg/config/config_test.go b/pkg/config/config_test.go index 4b54159..13246e1 100644 --- a/pkg/config/config_test.go +++ b/pkg/config/config_test.go @@ -661,3 +661,41 @@ func TestValidate_ServerSessionFilesBlock(t *testing.T) { t.Errorf("valid session_files block should pass, got %v", err) } } + +func TestLoad_ServerRescanFilesBlock(t *testing.T) { + ws := t.TempDir() + writeConfig(t, ws, `{"server": {"rescan_files": {"enabled": true, "max_file_size": "4MB"}}}`) + + cfg, err := Load(ws) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + rf := cfg.Server.RescanFiles + if rf.Enabled == nil || !*rf.Enabled { + t.Errorf("enabled = %v, want true", rf.Enabled) + } + if rf.MaxFileSize == nil || *rf.MaxFileSize != 4<<20 { + t.Errorf("max_file_size = %v, want 4MiB", rf.MaxFileSize) + } +} + +func TestValidate_ServerRescanFilesBlock(t *testing.T) { + neg := ByteSize(-1) + zero := ByteSize(0) + ok := ByteSize(1 << 20) + + for i, c := range []Config{ + {Server: ServerConfig{RescanFiles: RescanFilesConfig{MaxFileSize: &neg}}}, + {Server: ServerConfig{RescanFiles: RescanFilesConfig{MaxFileSize: &zero}}}, + } { + if err := c.Validate(); err == nil { + t.Errorf("case %d: expected validation error, got nil", i) + } + } + + good := Config{Server: ServerConfig{RescanFiles: RescanFilesConfig{MaxFileSize: &ok}}} + if err := good.Validate(); err != nil { + t.Errorf("valid rescan_files block should pass, got %v", err) + } +} diff --git a/pkg/mcp/rescan.go b/pkg/mcp/rescan.go index f6fd98c..90b1726 100644 --- a/pkg/mcp/rescan.go +++ b/pkg/mcp/rescan.go @@ -19,6 +19,7 @@ import ( "github.com/radimsem/remindb/pkg/config" "github.com/radimsem/remindb/pkg/diff" "github.com/radimsem/remindb/pkg/emitter" + "github.com/radimsem/remindb/pkg/mcp/rescanlog" "github.com/radimsem/remindb/pkg/mcp/rescanstat" "github.com/radimsem/remindb/pkg/parser" "github.com/radimsem/remindb/pkg/store" @@ -44,6 +45,7 @@ type RescanLoop struct { ignore *ignore.Matcher compileOpts []compiler.Option status *rescanstat.Status + rescanLog *rescanlog.Sink onChange func() } @@ -58,7 +60,7 @@ func (r *RescanLoop) notifyChange() { } } -func NewRescanLoop(st *store.Store, dir string, interval time.Duration, cc config.CompileConfig, logger *slog.Logger, status *rescanstat.Status) (*RescanLoop, error) { +func NewRescanLoop(st *store.Store, dir string, interval time.Duration, cc config.CompileConfig, logger *slog.Logger, status *rescanstat.Status, rescanLog *rescanlog.Sink) (*RescanLoop, error) { if interval <= 0 { interval = defaultRescanInterval } @@ -88,6 +90,7 @@ func NewRescanLoop(st *store.Store, dir string, interval time.Duration, cc confi ignore: matcher, compileOpts: compiler.ConfigOptions(cc), status: status, + rescanLog: rescanLog, }, nil } @@ -172,7 +175,15 @@ func (r *RescanLoop) scan(ctx context.Context) { now := r.now() snap := rescanstat.Snapshot{RunAt: now.Unix()} - defer func() { r.status.Set(int64(r.interval/time.Second), snap) }() + defer func() { + r.status.Set(int64(r.interval/time.Second), snap) + + if r.rescanLog != nil { + if err := r.rescanLog.Append(snap); err != nil { + r.logger.Warn("rescan: failed to persist tick", "err", err) + } + } + }() walkErr := r.walkFn(r.dir, func(path string, d os.DirEntry, err error) error { if err != nil { diff --git a/pkg/mcp/rescan_test.go b/pkg/mcp/rescan_test.go index 570644e..b4c7119 100644 --- a/pkg/mcp/rescan_test.go +++ b/pkg/mcp/rescan_test.go @@ -3,6 +3,7 @@ package mcp import ( "bytes" "context" + "encoding/json" "errors" "io/fs" "log/slog" @@ -17,13 +18,14 @@ import ( "github.com/radimsem/remindb/internal/testutil" "github.com/radimsem/remindb/pkg/compiler" "github.com/radimsem/remindb/pkg/config" + "github.com/radimsem/remindb/pkg/mcp/rescanlog" "github.com/radimsem/remindb/pkg/mcp/rescanstat" "github.com/radimsem/remindb/pkg/store" ) func mustRescan(t *testing.T, st *store.Store, dir string, interval time.Duration, logger *slog.Logger) *RescanLoop { t.Helper() - r, err := NewRescanLoop(st, dir, interval, config.CompileConfig{}, logger, nil) + r, err := NewRescanLoop(st, dir, interval, config.CompileConfig{}, logger, nil, nil) if err != nil { t.Fatalf("NewRescanLoop: %v", err) @@ -224,7 +226,7 @@ func TestRescanLoop_PublishesStatus(t *testing.T) { st := testutil.OpenTestDB(t) status := rescanstat.New() - r, err := NewRescanLoop(st, dir, 90*time.Second, config.CompileConfig{}, nil, status) + r, err := NewRescanLoop(st, dir, 90*time.Second, config.CompileConfig{}, nil, status, nil) if err != nil { t.Fatalf("NewRescanLoop: %v", err) } @@ -279,6 +281,62 @@ func TestRescanLoop_PublishesStatus(t *testing.T) { } } +func TestRescanLoop_PersistsTickAndExcludesFromWalk(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "doc.md", "# Doc\n\nBody.\n") + + st := testutil.OpenTestDB(t) + sink, err := rescanlog.New(dir, 1<<20) + if err != nil { + t.Fatalf("rescanlog.New: %v", err) + } + + r, err := NewRescanLoop(st, dir, 45*time.Second, config.CompileConfig{}, nil, nil, sink) + if err != nil { + t.Fatalf("NewRescanLoop: %v", err) + } + r.now = func() time.Time { return time.Now().Add(time.Hour) } + + ctx := context.Background() + r.scan(ctx) // tick 1: compiles doc.md and writes rescan.jsonl + r.scan(ctx) // tick 2: rescan.jsonl is now on disk while the tree is walked + + data, err := os.ReadFile(rescanlog.Path(dir)) + if err != nil { + t.Fatalf("read rescan.jsonl: %v", err) + } + + lines := strings.Split(strings.TrimRight(string(data), "\n"), "\n") + if len(lines) != 2 { + t.Fatalf("rescan.jsonl line count = %d, want 2 (one per tick)", len(lines)) + } + + for i, l := range lines { + var snap rescanstat.Snapshot + if err := json.Unmarshal([]byte(l), &snap); err != nil { + t.Fatalf("tick %d line not valid Snapshot JSON: %v", i, err) + } + + if snap.RunAt == 0 { + t.Errorf("tick %d run_at unset", i) + } + } + + nodes, err := st.GetAllNodes(ctx) + if err != nil { + t.Fatalf("GetAllNodes: %v", err) + } + + if len(nodes) == 0 { + t.Fatal("expected doc.md nodes after scan") + } + for _, n := range nodes { + if strings.Contains(filepath.ToSlash(n.SourceFile), config.DirName+"/") { + t.Errorf("indexed file inside %s/: %s", config.DirName, n.SourceFile) + } + } +} + func TestRescanLoop_RecordsDeletionsInSnapshot(t *testing.T) { dir := t.TempDir() writeFile(t, dir, "keep.md", "# Keep\n") @@ -506,7 +564,7 @@ func TestNewRescanLoop_FailsOnMalformedIgnore(t *testing.T) { writeFile(t, dir, ignore.Path, "a//b\n") st := testutil.OpenTestDB(t) - _, err := NewRescanLoop(st, dir, time.Minute, config.CompileConfig{}, nil, nil) + _, err := NewRescanLoop(st, dir, time.Minute, config.CompileConfig{}, nil, nil, nil) if err == nil { t.Fatal("expected error for malformed ignore file") } diff --git a/pkg/mcp/rescanlog/rescanlog.go b/pkg/mcp/rescanlog/rescanlog.go new file mode 100644 index 0000000..ac53bb9 --- /dev/null +++ b/pkg/mcp/rescanlog/rescanlog.go @@ -0,0 +1,72 @@ +// Package rescanlog appends each serve source-rescan tick to an append-only .remindb/rescan.jsonl. +package rescanlog + +import ( + "encoding/json" + "fmt" + "os" + "path/filepath" + "sync" + + "github.com/radimsem/remindb/pkg/config" + "github.com/radimsem/remindb/pkg/mcp/rescanstat" +) + +const fileName = "rescan.jsonl" + +type Sink struct { + path string + maxFileSize int64 + + mu sync.Mutex +} + +// New ensures /.remindb exists and returns a sink bounded by maxFileSize bytes. +func New(workspace string, maxFileSize int64) (*Sink, error) { + dir := filepath.Join(workspace, config.DirName) + if err := os.MkdirAll(dir, 0o755); err != nil { + return nil, fmt.Errorf("failed to create: rescan log dir: %w", err) + } + + return &Sink{path: filepath.Join(dir, fileName), maxFileSize: maxFileSize}, nil +} + +// Path returns the rescan-history file under workspace. +func Path(workspace string) string { + return filepath.Join(workspace, config.DirName, fileName) +} + +// Append writes one tick snapshot as a JSON line, rotating once when the cap is reached. +func (s *Sink) Append(snap rescanstat.Snapshot) error { + line, err := json.Marshal(snap) + if err != nil { + return fmt.Errorf("failed to marshal: rescan snapshot: %w", err) + } + line = append(line, '\n') + + s.mu.Lock() + defer s.mu.Unlock() + + // Single-generation rotation: the prior .1 is intentionally discarded. + fi, statErr := os.Stat(s.path) + overCap := statErr == nil && fi.Size() > 0 && + int64(len(line)) <= s.maxFileSize && + fi.Size()+int64(len(line)) > s.maxFileSize + + if overCap { + if err := os.Rename(s.path, s.path+".1"); err != nil { + return fmt.Errorf("failed to rotate: rescan log %s: %w", s.path, err) + } + } + + f, err := os.OpenFile(s.path, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0o644) + if err != nil { + return fmt.Errorf("failed to open: rescan log %s: %w", s.path, err) + } + defer func() { _ = f.Close() }() + + if _, err := f.Write(line); err != nil { + return fmt.Errorf("failed to write: rescan log %s: %w", s.path, err) + } + return nil +} diff --git a/pkg/mcp/rescanlog/rescanlog_test.go b/pkg/mcp/rescanlog/rescanlog_test.go new file mode 100644 index 0000000..5b8e834 --- /dev/null +++ b/pkg/mcp/rescanlog/rescanlog_test.go @@ -0,0 +1,137 @@ +package rescanlog + +import ( + "encoding/json" + "os" + "path/filepath" + "strings" + "testing" + + "github.com/radimsem/remindb/pkg/config" + "github.com/radimsem/remindb/pkg/mcp/rescanstat" +) + +func readLines(t *testing.T, path string) []string { + t.Helper() + + data, err := os.ReadFile(path) + if err != nil { + t.Fatalf("read %s: %v", path, err) + } + + trimmed := strings.TrimRight(string(data), "\n") + if trimmed == "" { + return nil + } + return strings.Split(trimmed, "\n") +} + +func TestSink_AppendsOneLinePerCall(t *testing.T) { + ws := t.TempDir() + + s, err := New(ws, 1<<20) + if err != nil { + t.Fatalf("New: %v", err) + } + + snaps := []rescanstat.Snapshot{ + {RunAt: 1, Added: 2}, + {RunAt: 2, Error: "boom"}, + {RunAt: 3, PurgedFiles: []rescanstat.PurgedFile{{Path: "a.md", Nodes: 4}}}, + } + for _, sn := range snaps { + if err := s.Append(sn); err != nil { + t.Fatalf("Append: %v", err) + } + } + + lines := readLines(t, Path(ws)) + if len(lines) != len(snaps) { + t.Fatalf("line count = %d, want %d", len(lines), len(snaps)) + } + + for i, l := range lines { + var got rescanstat.Snapshot + + if err := json.Unmarshal([]byte(l), &got); err != nil { + t.Fatalf("line %d not valid JSON: %v", i, err) + } + if got.RunAt != snaps[i].RunAt { + t.Errorf("line %d run_at = %d, want %d", i, got.RunAt, snaps[i].RunAt) + } + } + + if got := lines[2]; !strings.Contains(got, `"path":"a.md"`) || !strings.Contains(got, `"nodes":4`) { + t.Errorf("purge line missing per-file detail: %s", got) + } +} + +func TestSink_AppendsAcrossRestart(t *testing.T) { + ws := t.TempDir() + + s1, err := New(ws, 1<<20) + if err != nil { + t.Fatalf("New: %v", err) + } + if err := s1.Append(rescanstat.Snapshot{RunAt: 1}); err != nil { + t.Fatalf("Append: %v", err) + } + + // A fresh Sink against the same workspace must append, not truncate. + s2, err := New(ws, 1<<20) + if err != nil { + t.Fatalf("New (restart): %v", err) + } + if err := s2.Append(rescanstat.Snapshot{RunAt: 2}); err != nil { + t.Fatalf("Append (restart): %v", err) + } + + lines := readLines(t, Path(ws)) + if len(lines) != 2 { + t.Fatalf("line count = %d, want 2 (append survived restart)", len(lines)) + } +} + +func TestSink_RotatesOnceAtCap(t *testing.T) { + ws := t.TempDir() + + one, _ := json.Marshal(rescanstat.Snapshot{RunAt: 1}) + capBytes := int64(len(one)+1) + 1 // room for one line, second crosses the cap + + s, err := New(ws, capBytes) + if err != nil { + t.Fatalf("New: %v", err) + } + + if err := s.Append(rescanstat.Snapshot{RunAt: 1}); err != nil { + t.Fatalf("Append 1: %v", err) + } + if err := s.Append(rescanstat.Snapshot{RunAt: 2}); err != nil { + t.Fatalf("Append 2: %v", err) + } + + rotated := Path(ws) + ".1" + if _, err := os.Stat(rotated); err != nil { + t.Fatalf("expected rotated file %s: %v", rotated, err) + } + + lines := readLines(t, Path(ws)) + if len(lines) != 1 { + t.Fatalf("active file line count = %d, want 1 after rotation", len(lines)) + } + + var got rescanstat.Snapshot + if err := json.Unmarshal([]byte(lines[0]), &got); err != nil { + t.Fatalf("active line invalid: %v", err) + } + if got.RunAt != 2 { + t.Errorf("active file run_at = %d, want 2 (latest tick)", got.RunAt) + } +} + +func TestPath_UnderRemindbDir(t *testing.T) { + want := filepath.Join("/ws", config.DirName, "rescan.jsonl") + if got := Path("/ws"); got != want { + t.Errorf("Path = %q, want %q", got, want) + } +}