Skip to content

Commit 91653ac

Browse files
Merge pull request #32 from merendamattia/copilot/add-caching-for-yahoo-finance
Add session-level cache for Yahoo Finance data retrieval
2 parents 14f5588 + af65e4a commit 91653ac

File tree

3 files changed

+443
-9
lines changed

3 files changed

+443
-9
lines changed

.env.example

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,11 +22,11 @@ LOG_LEVEL=INFO
2222

2323
# RAG Configuration (Asset Retriever)
2424
RAG_DATA_DIR=dataset/ETFs
25-
RAG_CACHE_DIR=dataset/ETFs/.cache
26-
RAG_EMBEDDINGS_CACHE=dataset/ETFs/.cache/embeddings.pkl
25+
RAG_CACHE_DIR=${RAG_DATA_DIR}/.cache
2726
RAG_CHUNK_SIZE=800
2827
RAG_CHUNK_OVERLAP=120
2928
RAG_EMBEDDING_MODEL=all-roberta-large-v1
29+
RAG_EMBEDDINGS_CACHE=${RAG_CACHE_DIR}/${RAG_EMBEDDING_MODEL}-embeddings.pkl
3030

3131
# Monte Carlo Simulation Configuration
3232
MONTECARLO_SIMULATION_SCENARIOS=1000

src/tools/analyze_financial_asset.py

Lines changed: 122 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,10 @@
44
This module provides a single tool that combines price retrieval and return calculation
55
for a comprehensive financial asset analysis.
66
"""
7+
78
import logging
89
from datetime import datetime, timedelta
9-
from typing import Any, Dict
10+
from typing import Any, Dict, Optional
1011

1112
import yfinance as yf
1213
from datapizza.tools import tool
@@ -16,9 +17,94 @@
1617
# Configure logger
1718
logger = logging.getLogger(__name__)
1819

20+
# Cache for storing financial analysis results (session-level)
21+
_CACHE = {}
22+
23+
24+
def _get_cache_key(ticker: str, years: int) -> str:
25+
"""
26+
Generate a cache key for the given ticker and years.
27+
28+
Args:
29+
ticker: The ticker symbol
30+
years: Number of years to analyze
31+
32+
Returns:
33+
str: Cache key in format "{TICKER}_{years}"
34+
"""
35+
return f"{ticker.upper()}_{years}"
36+
37+
38+
def _get_cached_analysis(ticker: str, years: int) -> Optional[str]:
39+
"""
40+
Retrieve cached analysis result if available.
41+
42+
Args:
43+
ticker: The ticker symbol
44+
years: Number of years to analyze
45+
46+
Returns:
47+
Optional[str]: Cached JSON response or None if not found
48+
"""
49+
cache_key = _get_cache_key(ticker, years)
50+
51+
# Try to get from Streamlit session state first (if available)
52+
try:
53+
import streamlit as st
54+
55+
if hasattr(st, "session_state") and hasattr(
56+
st.session_state, "financial_asset_cache"
57+
):
58+
cache = st.session_state.financial_asset_cache
59+
if cache_key in cache:
60+
logger.info("Cache HIT for %s (from session_state)", cache_key)
61+
return cache[cache_key]
62+
except (ImportError, RuntimeError):
63+
# Streamlit not available or not in a session context
64+
pass
65+
66+
# Fall back to module-level cache
67+
if cache_key in _CACHE:
68+
logger.info("Cache HIT for %s (from module cache)", cache_key)
69+
return _CACHE[cache_key]
70+
71+
logger.info("Cache MISS for %s", cache_key)
72+
return None
73+
74+
75+
def _set_cached_analysis(ticker: str, years: int, result: str) -> None:
76+
"""
77+
Store analysis result in cache.
78+
79+
Args:
80+
ticker: The ticker symbol
81+
years: Number of years to analyze
82+
result: JSON response to cache
83+
"""
84+
cache_key = _get_cache_key(ticker, years)
85+
86+
# Try to store in Streamlit session state first (if available)
87+
try:
88+
import streamlit as st
89+
90+
if hasattr(st, "session_state"):
91+
if not hasattr(st.session_state, "financial_asset_cache"):
92+
st.session_state.financial_asset_cache = {}
93+
st.session_state.financial_asset_cache[cache_key] = result
94+
logger.debug("Cached result for %s in session_state", cache_key)
95+
except (ImportError, RuntimeError):
96+
# Streamlit not available or not in a session context
97+
pass
98+
99+
# Also store in module-level cache as fallback
100+
_CACHE[cache_key] = result
101+
logger.debug("Cached result for %s in module cache", cache_key)
102+
19103

20104
@tool
21-
def analyze_financial_asset(ticker: str, years: int = 10) -> str:
105+
def analyze_financial_asset(
106+
ticker: str, years: int = 10, use_cache: bool = True
107+
) -> str:
22108
"""
23109
Comprehensive financial asset analysis tool.
24110
@@ -28,10 +114,14 @@ def analyze_financial_asset(ticker: str, years: int = 10) -> str:
28114
3. Calculating returns for 1, 2, 3, 4, and 5 years (or specified years)
29115
4. Returning a structured JSON response with all key metrics
30116
117+
Results are cached per session to improve performance for repeated queries.
118+
31119
Args:
32120
ticker: The stock/ETF ticker symbol (e.g., 'SWDA', 'AAPL', 'BND')
33121
years: Number of years to analyze (default: 10). Returns will be calculated
34122
for each year from 1 to the specified number of years.
123+
use_cache: Whether to use cached results if available (default: True).
124+
Set to False to force fresh data retrieval.
35125
36126
Returns:
37127
str: A JSON string containing the analysis response with:
@@ -48,7 +138,18 @@ def analyze_financial_asset(ticker: str, years: int = 10) -> str:
48138
- error: Error message if unsuccessful
49139
"""
50140
try:
51-
logger.info("Starting analysis for %s with %d years", ticker, years)
141+
logger.info(
142+
"Starting analysis for %s with %d years (use_cache=%s)",
143+
ticker,
144+
years,
145+
use_cache,
146+
)
147+
148+
# Check cache if enabled
149+
if use_cache:
150+
cached_result = _get_cached_analysis(ticker, years)
151+
if cached_result is not None:
152+
return cached_result
52153

53154
# Step 1: Resolve symbol
54155
symbol_resolution = _search_and_resolve_symbol(ticker)
@@ -135,7 +236,19 @@ def analyze_financial_asset(ticker: str, years: int = 10) -> str:
135236
)
136237

137238
logger.info("Analysis completed successfully")
138-
return response.model_dump_json()
239+
result_json = response.model_dump_json()
240+
241+
# Cache the successful result
242+
# Note: Error responses are intentionally NOT cached because:
243+
# 1. Transient errors (network issues, API rate limits) should be retried
244+
# 2. Invalid tickers fail fast (symbol resolution is quick)
245+
# 3. Caching errors could hide resolved issues
246+
# The current cache for successful responses has no expiration (no TTL).
247+
# If error caching is needed in the future, implement with short TTL and proper invalidation.
248+
if use_cache:
249+
_set_cached_analysis(ticker, years, result_json)
250+
251+
return result_json
139252

140253
except Exception as e:
141254
logger.error("Error during analysis: %s", str(e), exc_info=True)
@@ -387,9 +500,11 @@ def get_price_at_date(target_date):
387500
logger.debug(
388501
"%d-year return: %.2f%%",
389502
year,
390-
returns_dict[f"{year}_year"]
391-
if isinstance(returns_dict[f"{year}_year"], (int, float))
392-
else None,
503+
(
504+
returns_dict[f"{year}_year"]
505+
if isinstance(returns_dict[f"{year}_year"], (int, float))
506+
else None
507+
),
393508
)
394509
else:
395510
returns_dict[f"{year}_year"] = "N/A"

0 commit comments

Comments
 (0)