Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 22 additions & 1 deletion cmd/remindb/serve.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ import (
"github.com/radimsem/remindb/pkg/config"
"github.com/radimsem/remindb/pkg/logbuf"
remindb "github.com/radimsem/remindb/pkg/mcp"
"github.com/radimsem/remindb/pkg/mcp/rescanlog"
"github.com/radimsem/remindb/pkg/mcp/rescanstat"
"github.com/radimsem/remindb/pkg/mcp/sessionlog"
"github.com/radimsem/remindb/pkg/store"
Expand All @@ -33,6 +34,7 @@ var (
const (
defaultLogBufferSize = 1000
defaultSessionLogMaxFileSize = 10 << 20 // 10 MiB
defaultRescanLogMaxFileSize = 10 << 20 // 10 MiB
)

var serveCmd = &cobra.Command{
Expand Down Expand Up @@ -167,7 +169,12 @@ func runServe(cmd *cobra.Command, _ []string) error {
})

if sourceDir != "" {
rescan, err := remindb.NewRescanLoop(st, sourceDir, rescanInterval, workspaceCfg.Compile, logger, rescanStatus)
rescanLog, err := newRescanLog(sourceDir, workspaceCfg.Server.RescanFiles)
if err != nil {
return err
}

rescan, err := remindb.NewRescanLoop(st, sourceDir, rescanInterval, workspaceCfg.Compile, logger, rescanStatus, rescanLog)
if err != nil {
return err
}
Expand Down Expand Up @@ -274,6 +281,20 @@ func withSessionLogs(logger *slog.Logger, workspace string, sl config.SessionFil
return slog.New(sessionlog.NewHandler(logger.Handler(), sink)), nil
}

// newRescanLog builds the durable rescan-tick sink when enabled; nil otherwise.
func newRescanLog(workspace string, rf config.RescanFilesConfig) (*rescanlog.Sink, error) {
if rf.Enabled == nil || !*rf.Enabled {
return nil, nil
}

maxFileSize := int64(defaultRescanLogMaxFileSize)
if rf.MaxFileSize != nil {
maxFileSize = int64(*rf.MaxFileSize)
}

return rescanlog.New(workspace, maxFileSize)
}

func parseLogLevel(s string) slog.Level {
switch s {
case "debug":
Expand Down
13 changes: 12 additions & 1 deletion docs/configuration.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ remindb keeps its workspace-level state in a `.remindb/` directory at the source
| `.remindb/temperatures.json` | Per-path initial-temperature overrides. |
| `.remindb/sessions/` | Machine-managed per-client session ledger ([below](#session-ledger-remindbsessions)). |
| `.remindb/logs/` | Opt-in per-session tool-call/error logfiles ([below](#session-logfiles-remindblogs)). |
| `.remindb/rescan.jsonl` | Opt-in durable source-rescan tick history ([below](#rescan-history-remindbrescanjsonl)). |

The three files are optional; missing → defaults. The whole directory is skipped during source walks, so its contents never end up as memory nodes.

Expand Down Expand Up @@ -74,6 +75,10 @@ A single JSON object of feature blocks. Unknown top-level or nested keys are rej
},
"sessions": {
"flush_interval": "30s"
},
"rescan_files": {
"enabled": true,
"max_file_size": "10MB"
}
}
}
Expand All @@ -91,7 +96,7 @@ Every field in every block is optional — only the keys you set override the de

**`budgets`** sets the default token budget for the four read tools that take one — `MemorySearch`, `MemoryFetch`, `MemoryFetchBatch`, `MemoryRelated`. Resolution is per-tool and local: an explicit positive `budget` on the call always wins; otherwise the configured default; otherwise the built-in. `MemoryRelated`'s built-in is 1000; the other three treat an unset budget as **unlimited** (no trimming). Write tools are unaffected.

**`server`** configures `serve` itself. `transport` (`stdio`|`http`) and `listen` mirror the flags of the same name; the nested `logging` object sets `level` (`debug`|`info`|`warn`|`error`), `format` (`text`|`json`), `output_path` (a file; absent → stderr), and `buffer_size` (the capacity of the in-memory ring buffer behind the `remindb://logs` resource; must be > 0, absent → 1000). Absent → today's behavior (stdio, info-level text to stderr, 1000-record buffer). `--verbose` is sugar for `logging.level=debug`. `buffer_size` only sizes the `remindb://logs` mirror — it never affects what reaches stderr/the file. The nested `session_files` object opts into per-session tool-call/error logfiles ([below](#session-logfiles-remindblogs)) — a third sink of the same captured records, alongside the shared stream and the `buffer_size` ring buffer: `enabled` (absent/`false` → off, zero behavior change) and `max_file_size` (a size string, absent → `"10MB"`, must be positive). It only takes effect when `serve` has a source workspace. The nested `resources` object tunes resource-update notification coalescing (see [resources](./resources.md#live-updates--subscriptions)): `debounce` is the global trailing-edge window applied to every subscribable resource (absent → `"500ms"`), and `overrides` maps a short resource name (`graph`, `snapshots`, `tree`, `files`, `temperature`, `logs`) to its own window. Absent overrides fall back to built-in floors of `"1s"` for `logs` and `"2s"` for `temperature` (so the two high-frequency streams never flood); every other resource uses the global default. A negative duration, or an `overrides` key naming a resource that isn't subscribable, fails startup with the offending field named. The nested `sessions` object has one knob, `flush_interval` (absent → `"30s"`), the cadence at which `serve` checkpoints the session ledger ([below](#session-ledger-remindbsessions)); it doubles as the crash-recovery granularity. Must be positive.
**`server`** configures `serve` itself. `transport` (`stdio`|`http`) and `listen` mirror the flags of the same name; the nested `logging` object sets `level` (`debug`|`info`|`warn`|`error`), `format` (`text`|`json`), `output_path` (a file; absent → stderr), and `buffer_size` (the capacity of the in-memory ring buffer behind the `remindb://logs` resource; must be > 0, absent → 1000). Absent → today's behavior (stdio, info-level text to stderr, 1000-record buffer). `--verbose` is sugar for `logging.level=debug`. `buffer_size` only sizes the `remindb://logs` mirror — it never affects what reaches stderr/the file. The nested `session_files` object opts into per-session tool-call/error logfiles ([below](#session-logfiles-remindblogs)) — a third sink of the same captured records, alongside the shared stream and the `buffer_size` ring buffer: `enabled` (absent/`false` → off, zero behavior change) and `max_file_size` (a size string, absent → `"10MB"`, must be positive). It only takes effect when `serve` has a source workspace. The nested `resources` object tunes resource-update notification coalescing (see [resources](./resources.md#live-updates--subscriptions)): `debounce` is the global trailing-edge window applied to every subscribable resource (absent → `"500ms"`), and `overrides` maps a short resource name (`graph`, `snapshots`, `tree`, `files`, `temperature`, `logs`) to its own window. Absent overrides fall back to built-in floors of `"1s"` for `logs` and `"2s"` for `temperature` (so the two high-frequency streams never flood); every other resource uses the global default. A negative duration, or an `overrides` key naming a resource that isn't subscribable, fails startup with the offending field named. The nested `sessions` object has one knob, `flush_interval` (absent → `"30s"`), the cadence at which `serve` checkpoints the session ledger ([below](#session-ledger-remindbsessions)); it doubles as the crash-recovery granularity. Must be positive. The nested `rescan_files` object opts into a durable source-rescan tick history ([below](#rescan-history-remindbrescanjsonl)), the persistence counterpart of the in-process `remindb://rescan` holder: `enabled` (absent/`false` → off, zero behavior change vs. the in-process-only holder) and `max_file_size` (a size string, absent → `"10MB"`, must be positive). It only takes effect when `serve` has a source workspace. This is independent of the top-level `rescan` block, which tunes the loop's timing, not its persistence.

**Precedence**, highest first: **explicit CLI flag → `.remindb/config.json` → environment variable → built-in default**. The committed workspace config is authoritative — an env var only fills a key the config leaves *unset*, it never overrides one the config sets. In CI/automation, override a committed value with the explicit flag, not `REMINDB_*`. (`logging` has no flag/env tier beyond `--verbose`, which forces `debug` and wins.)

Expand Down Expand Up @@ -151,3 +156,9 @@ The ledger never stores payloads, summaries, or node bodies — only connection
Off by default. Set `server.logging.session_files.enabled: true` (and run `serve` with a source workspace) and each connected MCP client session gets its own append-only `.remindb/logs/<session-id>.log`, keyed by the same id `remindb://sessions` reports — the SDK session id, or the synthesized `contentid` fallback for the lone stdio session. Each file is **JSONL**: one record per line (`{time, level, msg, fields}`), serialized from a single shared `sessionlog.Record` definition so the read-back resource deserializes the exact shape `serve` writes (no second hand-rolled parser that could drift). Each file captures that session's `Memory*` tool-call trace (tool name, elapsed, error) and its `Warn`/`Error` records, **distinct from** the shared stderr stream and the `remindb://logs` ring buffer, so an operator can audit one client's activity in isolation. The session trace is captured even when the shared stream sits at `info` (it has its own threshold).

Like the ledger, these files **never** contain payloads, summaries, or node bodies — only the same payload-free fields the shared log carries (see [logging-conventions](../.claude/rules/logging-conventions.md)). Each file is bounded by `server.logging.session_files.max_file_size` (default `"10MB"`, must be positive): when an append would cross the cap the file rotates once to `<session-id>.log.1` (replacing any prior rotation) and a fresh file starts. These logs are read back over MCP as the passive resources `remindb://sessions/logs` (index) and `remindb://sessions/logs/{id}` (one session's structured records, active file only) — see [resources](./resources.md#the-sessionslogs-envelope). Like every `.remindb/` entry the directory is excluded from `compile`, the `serve` rescan loop, `MemoryCompile`, and `bench`, so session logs never become memory nodes. Disabled or unconfigured ⇒ no files written and the logger chain is byte-identical to today.

## Rescan history: `.remindb/rescan.jsonl`

Off by default. Set `server.rescan_files.enabled: true` (and run `serve` with a source workspace) and every source-rescan tick appends exactly one JSON line to `.remindb/rescan.jsonl` — the same per-tick shape the `remindb://rescan` resource projects (run timestamp, error string, add/modify/remove counts, per-file purge list). This is the durable counterpart of the in-process `rescanstat` holder #111 added: the resource exposes only the *latest* tick and is lost on restart, whereas this file accumulates across reconnects and `serve` restarts (append, never truncate).

Like the ledger and session logfiles, it **never** contains payloads, summaries, or node bodies — only the counts, paths, and per-tick metadata the holder already carries. The file is bounded by `server.rescan_files.max_file_size` (default `"10MB"`, must be positive): when an append would cross the cap the file rotates once to `rescan.jsonl.1` (replacing any prior rotation) and a fresh file starts. Reading it back (a resource or CLI) is out of scope for now — file separately if a consumer need appears. Like every `.remindb/` entry it's excluded from `compile`, the `serve` rescan loop, `MemoryCompile`, and `bench`, so the history never becomes a memory node. Disabled or unconfigured ⇒ no file written and behavior is identical to the in-process-only holder.
2 changes: 1 addition & 1 deletion docs/resources.md
Original file line number Diff line number Diff line change
Expand Up @@ -344,7 +344,7 @@ The shape is **locked**. Notes:
- `last_meta` is exactly one tick's result, replaced wholesale each tick. `run_at` is Unix **seconds** (`0` = never run); `error` is that tick's failure string, empty on success (a failed tick still publishes — counts stay zero, `error` is set).
- `added` / `modified` / `removed` are the tick's compile counts. There is **no `total`** — a consumer sums the three.
- `purged_files` lists each source file that was deleted from disk that tick, with `nodes` = how many context nodes it carried. Always present (`[]` when nothing was purged). Purging is **whole-file only** — a file removed from the source tree drops all its context nodes — so there is no separate `purged_nodes` count; the per-file `nodes` fully describes the purge. Entries sort by `path`.
- Reading this resource does **not** boost, lock, or snapshot. The holder is in-process memory only; it is not persisted and resets on restart. (Durable JSONL rescan history under `.remindb/` is tracked separately, mirroring the per-session-logfile work.)
- Reading this resource does **not** boost, lock, or snapshot. The holder is in-process memory only; it is not persisted and resets on restart. For durable history, opt into `server.rescan_files` — every tick is then also appended to `.remindb/rescan.jsonl` (this exact per-tick shape), surviving restarts. See [configuration](./configuration.md#rescan-history-remindbrescanjsonl). Reading that file back over MCP is out of scope for now.

## Live updates — subscriptions

Expand Down
2 changes: 1 addition & 1 deletion internal/mcptest/mcptest.go
Original file line number Diff line number Diff line change
Expand Up @@ -188,7 +188,7 @@ func NewEnvWithRescan(t *testing.T) *Env {
t.Fatalf("NewServer: %v", err)
}

loop, err := remindb.NewRescanLoop(st, dir, time.Second, config.CompileConfig{}, nil, status)
loop, err := remindb.NewRescanLoop(st, dir, time.Second, config.CompileConfig{}, nil, status, nil)
if err != nil {
t.Fatalf("NewRescanLoop: %v", err)
}
Expand Down
20 changes: 15 additions & 5 deletions pkg/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,11 +30,17 @@ type Config struct {
}

type ServerConfig struct {
Transport *string `json:"transport,omitempty"`
Listen *string `json:"listen,omitempty"`
Logging LoggingConfig `json:"logging"`
Resources ResourcesConfig `json:"resources"`
Sessions SessionsConfig `json:"sessions"`
Transport *string `json:"transport,omitempty"`
Listen *string `json:"listen,omitempty"`
Logging LoggingConfig `json:"logging"`
Resources ResourcesConfig `json:"resources"`
Sessions SessionsConfig `json:"sessions"`
RescanFiles RescanFilesConfig `json:"rescan_files"`
}

type RescanFilesConfig struct {
Enabled *bool `json:"enabled,omitempty"`
MaxFileSize *ByteSize `json:"max_file_size,omitempty"`
}

type SessionsConfig struct {
Expand Down Expand Up @@ -308,6 +314,10 @@ func (c Config) Validate() error {
return errors.New("server.logging.session_files.max_file_size must be positive")
}

if mfs := sc.RescanFiles.MaxFileSize; mfs != nil && *mfs <= 0 {
return errors.New("server.rescan_files.max_file_size must be positive")
}

return nil
}

Expand Down
38 changes: 38 additions & 0 deletions pkg/config/config_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -661,3 +661,41 @@ func TestValidate_ServerSessionFilesBlock(t *testing.T) {
t.Errorf("valid session_files block should pass, got %v", err)
}
}

func TestLoad_ServerRescanFilesBlock(t *testing.T) {
ws := t.TempDir()
writeConfig(t, ws, `{"server": {"rescan_files": {"enabled": true, "max_file_size": "4MB"}}}`)

cfg, err := Load(ws)
if err != nil {
t.Fatalf("unexpected error: %v", err)
}

rf := cfg.Server.RescanFiles
if rf.Enabled == nil || !*rf.Enabled {
t.Errorf("enabled = %v, want true", rf.Enabled)
}
if rf.MaxFileSize == nil || *rf.MaxFileSize != 4<<20 {
t.Errorf("max_file_size = %v, want 4MiB", rf.MaxFileSize)
}
}

func TestValidate_ServerRescanFilesBlock(t *testing.T) {
neg := ByteSize(-1)
zero := ByteSize(0)
ok := ByteSize(1 << 20)

for i, c := range []Config{
{Server: ServerConfig{RescanFiles: RescanFilesConfig{MaxFileSize: &neg}}},
{Server: ServerConfig{RescanFiles: RescanFilesConfig{MaxFileSize: &zero}}},
} {
if err := c.Validate(); err == nil {
t.Errorf("case %d: expected validation error, got nil", i)
}
}

good := Config{Server: ServerConfig{RescanFiles: RescanFilesConfig{MaxFileSize: &ok}}}
if err := good.Validate(); err != nil {
t.Errorf("valid rescan_files block should pass, got %v", err)
}
}
15 changes: 13 additions & 2 deletions pkg/mcp/rescan.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ import (
"github.com/radimsem/remindb/pkg/config"
"github.com/radimsem/remindb/pkg/diff"
"github.com/radimsem/remindb/pkg/emitter"
"github.com/radimsem/remindb/pkg/mcp/rescanlog"
"github.com/radimsem/remindb/pkg/mcp/rescanstat"
"github.com/radimsem/remindb/pkg/parser"
"github.com/radimsem/remindb/pkg/store"
Expand All @@ -44,6 +45,7 @@ type RescanLoop struct {
ignore *ignore.Matcher
compileOpts []compiler.Option
status *rescanstat.Status
rescanLog *rescanlog.Sink
onChange func()
}

Expand All @@ -58,7 +60,7 @@ func (r *RescanLoop) notifyChange() {
}
}

func NewRescanLoop(st *store.Store, dir string, interval time.Duration, cc config.CompileConfig, logger *slog.Logger, status *rescanstat.Status) (*RescanLoop, error) {
func NewRescanLoop(st *store.Store, dir string, interval time.Duration, cc config.CompileConfig, logger *slog.Logger, status *rescanstat.Status, rescanLog *rescanlog.Sink) (*RescanLoop, error) {
if interval <= 0 {
interval = defaultRescanInterval
}
Expand Down Expand Up @@ -88,6 +90,7 @@ func NewRescanLoop(st *store.Store, dir string, interval time.Duration, cc confi
ignore: matcher,
compileOpts: compiler.ConfigOptions(cc),
status: status,
rescanLog: rescanLog,
}, nil
}

Expand Down Expand Up @@ -172,7 +175,15 @@ func (r *RescanLoop) scan(ctx context.Context) {
now := r.now()

snap := rescanstat.Snapshot{RunAt: now.Unix()}
defer func() { r.status.Set(int64(r.interval/time.Second), snap) }()
defer func() {
r.status.Set(int64(r.interval/time.Second), snap)

if r.rescanLog != nil {
if err := r.rescanLog.Append(snap); err != nil {
r.logger.Warn("rescan: failed to persist tick", "err", err)
}
}
}()

walkErr := r.walkFn(r.dir, func(path string, d os.DirEntry, err error) error {
if err != nil {
Expand Down
Loading
Loading