Skip to content

Commit 66a0116

Browse files
fix: subdirectory dated files now decay by filename date
1 parent 5abcb96 commit 66a0116

4 files changed

Lines changed: 109 additions & 7 deletions

File tree

README.md

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -152,7 +152,8 @@ The SQLite index is a **derived cache** — always rebuildable from the Markdown
152152
| `memory/MEMORY.md` | **Evergreen** — never decays, write-protected during `flush()` |
153153
| `memory/2026-03-21.md` | **Dated** — subject to temporal decay (older memories rank lower) |
154154
| `memory/researcher_agent/` | **Agent-scoped** — isolated namespace per agent |
155-
| `memory/episodes/event.md` | **Episodic** — named events, timestamped |
155+
| `memory/episodes/known-facts.md` | **Evergreen** — non-dated file in a subdirectory, always full score |
156+
| `memory/sessions/2026-04-01.md` | **Dated** — subdirectory dated file, decays by filename date |
156157

157158
Evergreen files hold foundational facts that should always surface at full score. Dated files accumulate daily learning and fade naturally — recent memories rank higher.
158159

@@ -248,9 +249,10 @@ With the default `half_life_days=30`:
248249

249250
| File | Age source | Decays? |
250251
|------|------------|---------|
251-
| `memory/MEMORY.md`, `memory/architecture.md` (any non-dated file under `memory/`) || **No** — evergreen, always full score |
252+
| `memory/MEMORY.md`, `memory/architecture.md` (any non-dated file directly under `memory/`) || **No** — evergreen, always full score |
253+
| `memory/agents/notes.md` (non-dated file in any `memory/` subdirectory) || **No** — evergreen, same rule as root non-dated files |
252254
| `memory/2026-03-21.md` (dated daily log) | Date parsed from filename | **Yes** |
253-
| `sessions/foo.md`, `memory/agents/notes.md` (undated, non-evergreen) | File `mtime` on disk | **Yes** |
255+
| `memory/sessions/2026-03-21.md` (dated file in any `memory/` subdirectory) | Date parsed from filename | **Yes** — same rule as root dated files |
254256

255257
Evergreen files hold foundational facts — stack choices, hard constraints, permanent preferences — that should always surface at full score regardless of when they were written. Daily logs capture evolving context and fade naturally as new sessions add fresher knowledge.
256258

memweave/search/temporal_decay.py

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -33,8 +33,9 @@
3333
from memweave.search.strategy import RawSearchRow
3434

3535
# Regex that matches dated memory paths: memory/YYYY-MM-DD.md
36-
# Works with both ``memory/2026-03-28.md`` and ``./memory/2026-03-28.md``
37-
_DATED_PATH_RE = re.compile(r"(?:^|\/)memory\/(\d{4})-(\d{2})-(\d{2})\.md$")
36+
# Also matches one level of subdirectory: memory/sessions/YYYY-MM-DD.md
37+
# Works with both ``memory/2026-03-28.md`` and ``./memory/sessions/2026-03-28.md``
38+
_DATED_PATH_RE = re.compile(r"(?:^|\/)memory\/(?:[^/]+\/)?(\d{4})-(\d{2})-(\d{2})\.md$")
3839

3940
_DAY_SECONDS = 86_400.0
4041

@@ -162,7 +163,7 @@ def is_evergreen_path(file_path: str) -> bool:
162163
return True
163164
if not normalized.startswith("memory/"):
164165
return False
165-
# Under memory/ but not dated → evergreen
166+
# Under memory/ but not dated → evergreen (applies at any depth)
166167
return _DATED_PATH_RE.search(file_path.replace("\\", "/")) is None
167168

168169

@@ -190,12 +191,18 @@ async def _extract_date(
190191
"""Extract the effective date for a file — from path first, then mtime.
191192
192193
Steps:
193-
1. Parse date from filename (``memory/YYYY-MM-DD.md``). Return if found.
194+
1. Parse date from filename (``memory/YYYY-MM-DD.md`` or
195+
``memory/<subdir>/YYYY-MM-DD.md``). Return if found.
194196
2. If the file is evergreen → return ``None`` (no decay applies).
197+
All ``memory/`` files with non-dated filenames are evergreen at any depth.
195198
3. If ``workspace_dir`` is ``None`` → return ``None`` (cannot resolve path).
196199
4. Resolve the absolute path and ``stat()`` the file.
197200
5. Return the mtime as a :class:`datetime.date`, or ``None`` on error.
198201
202+
Steps 3–5 (mtime fallback) are only reached for files outside ``memory/``
203+
(e.g. ``extra_paths`` external files with no date in their filename).
204+
All ``memory/``-managed files are fully resolved by steps 1–2.
205+
199206
Args:
200207
file_path: Relative or absolute file path.
201208
workspace_dir: Root workspace directory used to resolve relative paths.

tests/integration/test_temporal_decay.py

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,3 +86,55 @@ async def test_temporal_decay(workspace: Path, embedding_model: str) -> None:
8686
old_long = long_scores.get(old_path, scores[old_path])
8787

8888
assert old_long > old_decay, "Longer half-life should preserve more of the old file's score"
89+
90+
91+
_SESSION_OLD_DATE = "2026-01-01" # ~100 days before 2026-04-11
92+
_SESSION_NEW_DATE = "2026-04-01" # ~10 days before 2026-04-11
93+
_SESSION_CONTENT = (
94+
"We confirmed the rollback plan: revert the migration and restart the service.\n"
95+
"The on-call engineer will monitor for 24 hours after deployment.\n"
96+
)
97+
98+
99+
@pytest.mark.asyncio
100+
async def test_session_subdir_dated_decay(workspace: Path, embedding_model: str) -> None:
101+
"""Dated files under memory/sessions/ decay by filename date — same rule as memory/ root."""
102+
sessions_dir = workspace / "memory" / "sessions"
103+
sessions_dir.mkdir()
104+
105+
(sessions_dir / f"{_SESSION_OLD_DATE}.md").write_text(_SESSION_CONTENT)
106+
(sessions_dir / f"{_SESSION_NEW_DATE}.md").write_text(_SESSION_CONTENT)
107+
108+
config = MemoryConfig(
109+
workspace_dir=workspace,
110+
embedding=EmbeddingConfig(model=embedding_model),
111+
query=QueryConfig(min_score=0.0, max_results=10),
112+
)
113+
114+
query = "rollback plan on-call deployment"
115+
old_path = f"memory/sessions/{_SESSION_OLD_DATE}.md"
116+
new_path = f"memory/sessions/{_SESSION_NEW_DATE}.md"
117+
118+
async with MemWeave(config) as mem:
119+
await mem.index()
120+
121+
# Without decay: identical content → nearly identical scores
122+
r_no_decay = await mem.search(query, min_score=0.0)
123+
scores = {r.path: r.score for r in r_no_decay}
124+
assert old_path in scores, f"{old_path} not found in results"
125+
assert new_path in scores, f"{new_path} not found in results"
126+
diff = abs(scores[old_path] - scores[new_path])
127+
assert diff < 0.05, f"Identical content without decay should score similarly, diff={diff:.4f}"
128+
129+
# With aggressive decay: ~100-day-old session file should drop sharply
130+
# half_life=7d → 2^(-100/7) ≈ 0.000054 multiplier
131+
r_decay = await mem.search(query, min_score=0.0, decay_half_life_days=7.0)
132+
decay_scores = {r.path: r.score for r in r_decay}
133+
134+
old_decay = decay_scores.get(old_path, 0.0)
135+
new_decay = decay_scores.get(new_path, scores.get(new_path, 0.0))
136+
137+
assert new_decay > old_decay * 5, (
138+
f"With half_life=7d, new session should score >5× old. "
139+
f"new={new_decay:.4f} old={old_decay:.6f}"
140+
)

tests/unit/test_search_temporal_decay.py

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -156,6 +156,12 @@ def test_empty_string(self):
156156
def test_session_file(self):
157157
assert parse_date_from_path("sessions/2026-03-28.md") is None
158158

159+
def test_subdir_dated_path(self):
160+
assert parse_date_from_path("memory/sessions/2026-03-28.md") == date(2026, 3, 28)
161+
162+
def test_subdir_dated_path_researcher(self):
163+
assert parse_date_from_path("memory/researcher/2026-04-01.md") == date(2026, 4, 1)
164+
159165

160166
# ── is_evergreen_path ─────────────────────────────────────────────────────────
161167

@@ -185,6 +191,21 @@ def test_session_file_not_evergreen(self):
185191
def test_arbitrary_file_not_evergreen(self):
186192
assert is_evergreen_path("docs/readme.md") is False
187193

194+
def test_sessions_subdir_nondated_is_evergreen(self):
195+
assert is_evergreen_path("memory/sessions/foo.md") is True
196+
197+
def test_sessions_subdir_dated_not_evergreen(self):
198+
assert is_evergreen_path("memory/sessions/2026-03-29.md") is False
199+
200+
def test_researcher_subdir_nondated_is_evergreen(self):
201+
assert is_evergreen_path("memory/researcher/analysis.md") is True
202+
203+
def test_researcher_subdir_dated_not_evergreen(self):
204+
assert is_evergreen_path("memory/researcher/2026-04-01.md") is False
205+
206+
def test_custom_subdir_nondated_is_evergreen(self):
207+
assert is_evergreen_path("memory/episodes/standup.md") is True
208+
188209

189210
# ── age_in_days ───────────────────────────────────────────────────────────────
190211

@@ -312,6 +333,26 @@ async def test_session_file_fresh_score_nearly_unchanged(self, tmp_path: Path):
312333
result = await apply_temporal_decay(rows, half_life_days=30, workspace_dir=tmp_path)
313334
assert result[0].score > 0.9
314335

336+
async def test_memory_sessions_nondated_is_evergreen(self):
337+
"""Non-dated memory/sessions/ file is evergreen — score unchanged."""
338+
rows = [_row("s_nd", "memory/sessions/known-facts.md", 0.8)]
339+
result = await apply_temporal_decay(rows, half_life_days=30, now=date(2026, 3, 28))
340+
assert result[0].score == pytest.approx(0.8)
341+
342+
async def test_memory_sessions_dated_file_decays_by_date(self):
343+
"""Dated memory/sessions/YYYY-MM-DD.md decays by filename date, no mtime needed."""
344+
rows = [_row("s_d", "memory/sessions/2026-01-01.md", 1.0)]
345+
result = await apply_temporal_decay(rows, half_life_days=30, now=date(2026, 3, 28))
346+
expected = apply_decay_to_score(1.0, 86.0, 30.0)
347+
assert result[0].score == pytest.approx(expected, rel=1e-6)
348+
349+
async def test_memory_researcher_dated_file_decays_by_date(self):
350+
"""Dated memory/researcher/YYYY-MM-DD.md decays by filename date."""
351+
rows = [_row("r_d", "memory/researcher/2026-01-01.md", 1.0)]
352+
result = await apply_temporal_decay(rows, half_life_days=30, now=date(2026, 3, 28))
353+
expected = apply_decay_to_score(1.0, 86.0, 30.0)
354+
assert result[0].score == pytest.approx(expected, rel=1e-6)
355+
315356
async def test_mtime_cache_deduplicates_stat_calls(self, tmp_path: Path, monkeypatch):
316357
"""Multiple chunks from the same file path are stat-ted only once."""
317358
import aiofiles.os as aio_os

0 commit comments

Comments
 (0)