Skip to content

Commit 4e1a85e

Browse files
wesmclaude
andcommitted
fix: Filter cached data when using --mtd or --since flags
When loading from cache with --mtd or --since, the cached data was not being filtered to match the requested date range. This caused all cached transactions to be shown instead of just the requested subset. Added _filter_df_by_start_date() method and apply it after cache load. Includes 8 unit tests for the filtering logic. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
1 parent eac3a08 commit 4e1a85e

File tree

2 files changed

+205
-0
lines changed

2 files changed

+205
-0
lines changed

moneyflow/app.py

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -389,6 +389,22 @@ def _determine_date_range(self):
389389

390390
return start_date, end_date, cache_year_filter, cache_since_filter
391391

392+
@staticmethod
393+
def _filter_df_by_start_date(df: pl.DataFrame, start_date: str) -> pl.DataFrame:
394+
"""Filter DataFrame to only include transactions on or after start_date.
395+
396+
Used to filter cached data when --mtd or --since is specified, since the cache
397+
may contain more data than requested (e.g., full year cache for MTD request).
398+
399+
Args:
400+
df: Transaction DataFrame with a 'date' column
401+
start_date: Start date string in YYYY-MM-DD format
402+
403+
Returns:
404+
Filtered DataFrame with only transactions >= start_date
405+
"""
406+
return df.filter(pl.col("date") >= pl.lit(start_date).str.to_date())
407+
392408
def _store_data(self, df, categories, category_groups):
393409
"""Store data in data manager and state."""
394410
self.data_manager.df = df
@@ -1171,6 +1187,15 @@ async def initialize_data(self) -> None:
11711187

11721188
if cached_data:
11731189
df, categories, category_groups = cached_data
1190+
# Filter cached data to match requested date range (e.g., --mtd)
1191+
# Cache may contain more data than requested (e.g., full year cache for MTD request)
1192+
if start_date:
1193+
original_count = len(df)
1194+
df = self._filter_df_by_start_date(df, start_date)
1195+
if len(df) < original_count:
1196+
loading_status.update(
1197+
f"📦 Filtered cache: {len(df):,} of {original_count:,} transactions"
1198+
)
11741199
else:
11751200
# Step 6: Fetch from API with retry logic
11761201
fetch_result = await self._fetch_data_with_retry(

tests/test_cache.py

Lines changed: 180 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -988,3 +988,183 @@ def test_empty_string_since_filter(
988988
# Empty string is treated as no filter (all data)
989989
assert cache_manager.is_cache_valid(since="")
990990
assert cache_manager.is_cache_valid(since=None) # Cache covers all data
991+
992+
993+
class TestCacheDataFiltering:
994+
"""Test filtering cached data by date range (for --mtd, --since flags)."""
995+
996+
def test_filter_by_start_date_basic(self):
997+
"""Test basic filtering of cached data by start date."""
998+
from moneyflow.app import MoneyflowApp
999+
1000+
# Create DataFrame with transactions across multiple months
1001+
df = pl.DataFrame(
1002+
{
1003+
"id": ["tx1", "tx2", "tx3", "tx4", "tx5"],
1004+
"date": [
1005+
datetime(2025, 1, 15),
1006+
datetime(2025, 2, 10),
1007+
datetime(2025, 3, 5),
1008+
datetime(2025, 12, 1),
1009+
datetime(2025, 12, 15),
1010+
],
1011+
"merchant": ["Store A", "Store B", "Store C", "Store D", "Store E"],
1012+
"amount": [-10.0, -20.0, -30.0, -40.0, -50.0],
1013+
}
1014+
)
1015+
1016+
# Filter to December only (simulating --mtd in December)
1017+
filtered = MoneyflowApp._filter_df_by_start_date(df, "2025-12-01")
1018+
1019+
assert len(filtered) == 2
1020+
assert filtered["id"].to_list() == ["tx4", "tx5"]
1021+
1022+
def test_filter_by_start_date_includes_boundary(self):
1023+
"""Test that filtering includes transactions on the start date."""
1024+
from moneyflow.app import MoneyflowApp
1025+
1026+
df = pl.DataFrame(
1027+
{
1028+
"id": ["tx1", "tx2", "tx3"],
1029+
"date": [
1030+
datetime(2025, 12, 1),
1031+
datetime(2025, 12, 1), # Same date, should be included
1032+
datetime(2025, 12, 2),
1033+
],
1034+
"merchant": ["Store A", "Store B", "Store C"],
1035+
"amount": [-10.0, -20.0, -30.0],
1036+
}
1037+
)
1038+
1039+
filtered = MoneyflowApp._filter_df_by_start_date(df, "2025-12-01")
1040+
1041+
assert len(filtered) == 3
1042+
assert filtered["id"].to_list() == ["tx1", "tx2", "tx3"]
1043+
1044+
def test_filter_by_start_date_excludes_earlier(self):
1045+
"""Test that filtering excludes transactions before start date."""
1046+
from moneyflow.app import MoneyflowApp
1047+
1048+
df = pl.DataFrame(
1049+
{
1050+
"id": ["tx1", "tx2", "tx3"],
1051+
"date": [
1052+
datetime(2025, 11, 30), # Day before, excluded
1053+
datetime(2025, 12, 1), # Included
1054+
datetime(2025, 12, 2), # Included
1055+
],
1056+
"merchant": ["Store A", "Store B", "Store C"],
1057+
"amount": [-10.0, -20.0, -30.0],
1058+
}
1059+
)
1060+
1061+
filtered = MoneyflowApp._filter_df_by_start_date(df, "2025-12-01")
1062+
1063+
assert len(filtered) == 2
1064+
assert "tx1" not in filtered["id"].to_list()
1065+
1066+
def test_filter_by_start_date_empty_result(self):
1067+
"""Test filtering when all transactions are before start date."""
1068+
from moneyflow.app import MoneyflowApp
1069+
1070+
df = pl.DataFrame(
1071+
{
1072+
"id": ["tx1", "tx2"],
1073+
"date": [datetime(2025, 1, 1), datetime(2025, 6, 15)],
1074+
"merchant": ["Store A", "Store B"],
1075+
"amount": [-10.0, -20.0],
1076+
}
1077+
)
1078+
1079+
filtered = MoneyflowApp._filter_df_by_start_date(df, "2025-12-01")
1080+
1081+
assert len(filtered) == 0
1082+
1083+
def test_filter_by_start_date_all_included(self):
1084+
"""Test filtering when all transactions are after start date."""
1085+
from moneyflow.app import MoneyflowApp
1086+
1087+
df = pl.DataFrame(
1088+
{
1089+
"id": ["tx1", "tx2", "tx3"],
1090+
"date": [
1091+
datetime(2025, 12, 5),
1092+
datetime(2025, 12, 10),
1093+
datetime(2025, 12, 15),
1094+
],
1095+
"merchant": ["Store A", "Store B", "Store C"],
1096+
"amount": [-10.0, -20.0, -30.0],
1097+
}
1098+
)
1099+
1100+
filtered = MoneyflowApp._filter_df_by_start_date(df, "2025-12-01")
1101+
1102+
assert len(filtered) == 3
1103+
1104+
def test_filter_preserves_all_columns(self):
1105+
"""Test that filtering preserves all DataFrame columns."""
1106+
from moneyflow.app import MoneyflowApp
1107+
1108+
df = pl.DataFrame(
1109+
{
1110+
"id": ["tx1", "tx2"],
1111+
"date": [datetime(2025, 11, 15), datetime(2025, 12, 15)],
1112+
"merchant": ["Store A", "Store B"],
1113+
"amount": [-10.0, -20.0],
1114+
"category": ["Food", "Shopping"],
1115+
"notes": ["Note 1", "Note 2"],
1116+
}
1117+
)
1118+
1119+
filtered = MoneyflowApp._filter_df_by_start_date(df, "2025-12-01")
1120+
1121+
assert filtered.columns == df.columns
1122+
assert len(filtered) == 1
1123+
assert filtered["merchant"][0] == "Store B"
1124+
assert filtered["category"][0] == "Shopping"
1125+
assert filtered["notes"][0] == "Note 2"
1126+
1127+
def test_filter_with_string_dates(self):
1128+
"""Test filtering works with string date column (pre-parsed)."""
1129+
from moneyflow.app import MoneyflowApp
1130+
1131+
# Some DataFrames may have date as string
1132+
df = pl.DataFrame(
1133+
{
1134+
"id": ["tx1", "tx2", "tx3"],
1135+
"date": ["2025-11-30", "2025-12-01", "2025-12-15"],
1136+
"merchant": ["Store A", "Store B", "Store C"],
1137+
"amount": [-10.0, -20.0, -30.0],
1138+
}
1139+
).with_columns(pl.col("date").str.to_date())
1140+
1141+
filtered = MoneyflowApp._filter_df_by_start_date(df, "2025-12-01")
1142+
1143+
assert len(filtered) == 2
1144+
1145+
def test_filter_mtd_scenario(self):
1146+
"""Test realistic MTD filtering scenario with full year of data."""
1147+
from moneyflow.app import MoneyflowApp
1148+
1149+
# Simulate cached full year data
1150+
dates = []
1151+
ids = []
1152+
for month in range(1, 13):
1153+
for day in [1, 15]:
1154+
dates.append(datetime(2025, month, day))
1155+
ids.append(f"tx_{month}_{day}")
1156+
1157+
df = pl.DataFrame(
1158+
{
1159+
"id": ids,
1160+
"date": dates,
1161+
"merchant": [f"Store {i}" for i in range(len(ids))],
1162+
"amount": [-10.0] * len(ids),
1163+
}
1164+
)
1165+
1166+
# Filter to December (MTD scenario)
1167+
filtered = MoneyflowApp._filter_df_by_start_date(df, "2025-12-01")
1168+
1169+
assert len(filtered) == 2 # Dec 1 and Dec 15
1170+
assert all(d.month == 12 for d in filtered["date"].to_list())

0 commit comments

Comments
 (0)