Skip to content

Commit f4fbd47

Browse files
wesmclaude
andcommitted
fix: Prevent cache invalidation from display filters and ensure no gaps
Three critical fixes for the two-tier cache system: 1. Separate display filters from cache behavior - --mtd and --year now only filter the VIEW, not what's cached - Cache always stores full data (year=None, since=None) - Prevents --mtd from nuking an existing full cache 2. Fix partial refresh API calls - Monarch API requires BOTH startDate and endDate - Was passing None for one date, causing API failure - Added get_hot_refresh_date_range() and get_cold_refresh_date_range() 3. Prevent gaps between cache tiers - Hot refresh now uses cold's latest_date (from metadata) - Cold refresh now uses hot's earliest_date (from metadata) - Both use 7-day overlap (TIER_OVERLAP_DAYS) - Fixes gap that would grow daily as boundary moves Added 12 regression tests covering: - Display filters don't invalidate cache - Partial refresh date ranges always non-None - Tier overlap ensures no gaps 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
1 parent 35e41d9 commit f4fbd47

File tree

3 files changed

+381
-31
lines changed

3 files changed

+381
-31
lines changed

moneyflow/app.py

Lines changed: 42 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -245,6 +245,7 @@ def __init__(
245245
self.cache_manager = None # Will be set if caching is enabled
246246
self.cache_year_filter = None # Track what filters the cache uses
247247
self.cache_since_filter = None
248+
self.display_start_date = None # Display filter (--mtd/--since) separate from cache
248249
self.config_dir = config_dir # Custom config directory (None = default ~/.moneyflow)
249250
self.encryption_key: Optional[bytes] = None # Encryption key for cache (set after login)
250251
# Controller will be initialized after data_manager is ready
@@ -367,27 +368,27 @@ def _initialize_managers(
367368
def _determine_date_range(self):
368369
"""Determine date range based on CLI arguments.
369370
371+
Separates display filtering (--mtd, --since) from cache behavior:
372+
- display_start_date: What the user wants to VIEW (filters the UI)
373+
- cache filters: What the cache actually STORES (preserved on refresh)
374+
370375
Returns:
371-
tuple: (start_date, end_date, cache_year_filter, cache_since_filter)
376+
tuple: (display_start_date, cache_year_filter, cache_since_filter)
372377
"""
378+
# Display filter - what user wants to see
373379
if self.custom_start_date:
374-
start_date = self.custom_start_date
375-
end_date = datetime.now().strftime("%Y-%m-%d")
376-
cache_year_filter = None
377-
cache_since_filter = self.custom_start_date
380+
display_start_date = self.custom_start_date
378381
elif self.start_year:
379-
start_date = f"{self.start_year}-01-01"
380-
end_date = datetime.now().strftime("%Y-%m-%d")
381-
cache_year_filter = self.start_year
382-
cache_since_filter = None
382+
display_start_date = f"{self.start_year}-01-01"
383383
else:
384-
# Fetch ALL transactions (no date filter for offline-first approach)
385-
start_date = None
386-
end_date = None
387-
cache_year_filter = None
388-
cache_since_filter = None
384+
display_start_date = None
385+
386+
# Cache filters - determined by existing cache or first fetch
387+
# These are set later based on what's actually cached
388+
cache_year_filter = None
389+
cache_since_filter = None
389390

390-
return start_date, end_date, cache_year_filter, cache_since_filter
391+
return display_start_date, cache_year_filter, cache_since_filter
391392

392393
@staticmethod
393394
def _filter_df_by_start_date(df: pl.DataFrame, start_date: str) -> pl.DataFrame:
@@ -1027,14 +1028,15 @@ async def fetch_operation():
10271028
)
10281029

10291030
# Save to cache for next time (only if --cache was passed)
1031+
# Always save as full cache (no filters) - display filters applied separately
10301032
if self.cache_manager:
10311033
loading_status.update("💾 Saving to cache...")
10321034
self.cache_manager.save_cache(
10331035
transactions_df=df,
10341036
categories=categories,
10351037
category_groups=category_groups,
1036-
year=self.cache_year_filter,
1037-
since=self.cache_since_filter,
1038+
year=None, # Full cache - no year filter
1039+
since=None, # Full cache - no since filter
10381040
)
10391041
loading_status.update(f"✅ Loaded {len(df):,} transactions and cached!")
10401042
else:
@@ -1102,14 +1104,12 @@ def update_progress(msg: str) -> None:
11021104

11031105
try:
11041106
# Fetch the expired tier from API
1107+
# Use helper methods to ensure both dates are always provided (API requirement)
11051108
if is_hot_refresh:
1106-
fetch_start, fetch_end = boundary_str, None
1107-
loading_status.update(f"📊 Fetching transactions since {boundary_str}...")
1109+
fetch_start, fetch_end = self.cache_manager.get_hot_refresh_date_range()
1110+
loading_status.update(f"📊 Fetching transactions since {fetch_start}...")
11081111
else:
1109-
fetch_start, fetch_end = (
1110-
None,
1111-
(boundary_date - timedelta(days=1)).strftime("%Y-%m-%d"),
1112-
)
1112+
fetch_start, fetch_end = self.cache_manager.get_cold_refresh_date_range()
11131113
loading_status.update(
11141114
f"📊 Fetching historical transactions before {boundary_str}..."
11151115
)
@@ -1322,8 +1322,8 @@ async def initialize_data(self) -> None:
13221322
profile_dir=determined_profile_dir, backend_type=determined_backend_type
13231323
)
13241324

1325-
# Step 4: Determine date range
1326-
start_date, end_date, self.cache_year_filter, self.cache_since_filter = (
1325+
# Step 4: Determine display filter (separate from cache)
1326+
self.display_start_date, self.cache_year_filter, self.cache_since_filter = (
13271327
self._determine_date_range()
13281328
)
13291329

@@ -1335,9 +1335,9 @@ async def initialize_data(self) -> None:
13351335
df, categories, category_groups = cached_data
13361336
# Filter cached data to match requested date range (e.g., --mtd)
13371337
# Cache may contain more data than requested (e.g., full year cache for MTD request)
1338-
if start_date:
1338+
if self.display_start_date:
13391339
original_count = len(df)
1340-
df = self._filter_df_by_start_date(df, start_date)
1340+
df = self._filter_df_by_start_date(df, self.display_start_date)
13411341
if len(df) < original_count:
13421342
loading_status.update(
13431343
f"📦 Filtered cache: {len(df):,} of {original_count:,} transactions"
@@ -1348,32 +1348,43 @@ async def initialize_data(self) -> None:
13481348
if partial_result:
13491349
df, categories, category_groups = partial_result
13501350
# Filter if needed
1351-
if start_date:
1351+
if self.display_start_date:
13521352
original_count = len(df)
1353-
df = self._filter_df_by_start_date(df, start_date)
1353+
df = self._filter_df_by_start_date(df, self.display_start_date)
13541354
if len(df) < original_count:
13551355
loading_status.update(
13561356
f"📦 Filtered: {len(df):,} of {original_count:,} transactions"
13571357
)
13581358
else:
13591359
# Partial refresh failed, fall back to full fetch
1360+
# Always fetch full data - display filter applied after
13601361
fetch_result = await self._fetch_data_with_retry(
1361-
creds, start_date, end_date, loading_status
1362+
creds, None, None, loading_status
13621363
)
13631364
if fetch_result is None:
13641365
has_error = True
13651366
return
13661367
df, categories, category_groups = fetch_result
13671368
else:
13681369
# Step 6: Full fetch from API (BOTH, ALL, or no cache)
1370+
# Always fetch full data - display filter applied after
13691371
fetch_result = await self._fetch_data_with_retry(
1370-
creds, start_date, end_date, loading_status
1372+
creds, None, None, loading_status
13711373
)
13721374
if fetch_result is None:
13731375
has_error = True
13741376
return
13751377
df, categories, category_groups = fetch_result
13761378

1379+
# Apply display filter after fetch (cache stores full data)
1380+
if self.display_start_date and strategy != RefreshStrategy.NONE:
1381+
original_count = len(df)
1382+
df = self._filter_df_by_start_date(df, self.display_start_date)
1383+
if len(df) < original_count:
1384+
loading_status.update(
1385+
f"📦 Filtered: {len(df):,} of {original_count:,} transactions"
1386+
)
1387+
13771388
# Step 7: Store data
13781389
self._store_data(df, categories, category_groups)
13791390

moneyflow/cache_manager.py

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,73 @@ def _get_boundary_date(self) -> date:
8888
"""Get the boundary date between hot and cold cache (90 days ago)."""
8989
return date.today() - timedelta(days=self.HOT_WINDOW_DAYS)
9090

91+
# Overlap days to fetch before cold's end date (ensures no gaps from timing/date changes)
92+
TIER_OVERLAP_DAYS = 7
93+
94+
def get_hot_refresh_date_range(self) -> tuple[str, str]:
95+
"""Get the date range for refreshing the hot cache tier.
96+
97+
CRITICAL: Must start from cold cache's latest_date to avoid gaps.
98+
The boundary moves forward each day, but cold data is fixed until
99+
cold cache expires (30 days). Without this, gaps would grow daily.
100+
101+
Subtracts TIER_OVERLAP_DAYS to handle transactions that might change
102+
dates or timing variations during refresh.
103+
104+
Returns:
105+
Tuple of (start_date, end_date) as ISO format strings.
106+
Both values are always non-None to satisfy API requirements.
107+
"""
108+
today = date.today()
109+
110+
# MUST use cold cache's latest date to avoid gaps
111+
try:
112+
metadata = self.load_metadata()
113+
cold_meta = metadata.get("cold", {})
114+
cold_latest = cold_meta.get("latest_date")
115+
if cold_latest:
116+
cold_end = date.fromisoformat(cold_latest)
117+
# Overlap: start a few days before cold ends
118+
start = (cold_end - timedelta(days=self.TIER_OVERLAP_DAYS))
119+
return start.isoformat(), today.isoformat()
120+
except Exception:
121+
pass
122+
123+
# Fallback only if no cold metadata (shouldn't happen in normal use)
124+
boundary = self._get_boundary_date()
125+
start = (boundary - timedelta(days=self.TIER_OVERLAP_DAYS)).isoformat()
126+
return start, today.isoformat()
127+
128+
def get_cold_refresh_date_range(self) -> tuple[str, str]:
129+
"""Get the date range for refreshing the cold cache tier.
130+
131+
CRITICAL: Must end at hot cache's earliest_date + overlap to ensure
132+
proper coverage. Uses stored metadata, not computed boundary.
133+
134+
Returns:
135+
Tuple of (start_date, end_date) as ISO format strings.
136+
Both values are always non-None to satisfy API requirements.
137+
"""
138+
start = "2000-01-01"
139+
140+
# Use hot cache's earliest date to ensure overlap
141+
try:
142+
metadata = self.load_metadata()
143+
hot_meta = metadata.get("hot", {})
144+
hot_earliest = hot_meta.get("earliest_date")
145+
if hot_earliest:
146+
hot_start = date.fromisoformat(hot_earliest)
147+
# Overlap: end a few days after hot starts
148+
end = (hot_start + timedelta(days=self.TIER_OVERLAP_DAYS)).isoformat()
149+
return start, end
150+
except Exception:
151+
pass
152+
153+
# Fallback only if no hot metadata
154+
boundary = self._get_boundary_date()
155+
end = (boundary + timedelta(days=self.TIER_OVERLAP_DAYS)).isoformat()
156+
return start, end
157+
91158
def cache_exists(self) -> bool:
92159
"""Check if two-tier cache files exist."""
93160
return (

0 commit comments

Comments
 (0)