Skip to content

Commit 6be58d4

Browse files
[core] feat: add support for caching fetching results
1 parent e075165 commit 6be58d4

File tree

9 files changed

+291
-28
lines changed

9 files changed

+291
-28
lines changed

libs/core/garf_core/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,4 +26,4 @@
2626
'ApiReportFetcher',
2727
]
2828

29-
__version__ = '0.4.3'
29+
__version__ = '0.5.0'

libs/core/garf_core/cache.py

Lines changed: 128 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,128 @@
1+
# Copyright 2025 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# https://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
"""Stores and loads reports from a cache instead of calling API."""
16+
17+
from __future__ import annotations
18+
19+
import datetime
20+
import hashlib
21+
import json
22+
import logging
23+
import os
24+
import pathlib
25+
from typing import Final
26+
27+
from garf_core import exceptions, query_editor, report
28+
29+
logger = logging.getLogger(__name__)
30+
31+
32+
class GarfCacheFileNotFoundError(exceptions.GarfError):
33+
"""Exception for not found cached report."""
34+
35+
36+
DEFAULT_CACHE_LOCATION: Final[str] = os.getenv(
37+
'GARF_CACHE_LOCATION', str(pathlib.Path.home() / '.garf/cache/')
38+
)
39+
40+
41+
class GarfCache:
42+
"""Stores and loads reports from a cache instead of calling API.
43+
44+
Attribute:
45+
location: Folder where cached results are stored.
46+
"""
47+
48+
def __init__(
49+
self,
50+
location: str | None = None,
51+
ttl_seconds: int = 3600,
52+
) -> None:
53+
"""Stores and loads reports from a cache instead of calling API.
54+
55+
Args:
56+
location: Folder where cached results are stored.
57+
ttl_seconds: Maximum lifespan of cached objects.
58+
"""
59+
self.location = pathlib.Path(location or DEFAULT_CACHE_LOCATION)
60+
self.ttl_seconds = ttl_seconds
61+
62+
@property
63+
def max_cache_timestamp(self) -> float:
64+
return (
65+
datetime.datetime.now() - datetime.timedelta(seconds=self.ttl_seconds)
66+
).timestamp()
67+
68+
def load(
69+
self, query: query_editor.BaseQueryElements, args=None, kwargs=None
70+
) -> report.GarfReport:
71+
"""Loads report from cache based on query definition.
72+
73+
Args:
74+
query: Query elements.
75+
args: Query parameters.
76+
kwargs: Optional keyword arguments.
77+
78+
Returns:
79+
Cached report.
80+
81+
Raises:
82+
GarfCacheFileNotFoundError: If cached report not found
83+
"""
84+
args_hash = args.hash if args else ''
85+
kwargs_hash = (
86+
hashlib.md5(json.dumps(kwargs).encode('utf-8')).hexdigest()
87+
if kwargs
88+
else ''
89+
)
90+
hash_identifier = f'{query.hash}:{args_hash}:{kwargs_hash}'
91+
cached_path = self.location / f'{hash_identifier}.json'
92+
if (
93+
cached_path.exists()
94+
and cached_path.stat().st_ctime > self.max_cache_timestamp
95+
):
96+
with open(cached_path, 'r', encoding='utf-8') as f:
97+
data = json.load(f)
98+
logger.debug('Report is loaded from cache: %s', str(cached_path))
99+
return report.GarfReport.from_json(data)
100+
raise GarfCacheFileNotFoundError
101+
102+
def save(
103+
self,
104+
fetched_report: report.GarfReport,
105+
query: query_editor.BaseQueryElements,
106+
args=None,
107+
kwargs=None,
108+
) -> None:
109+
"""Saves report to cache based on query definition.
110+
111+
Args:
112+
fetched_report: Report to save.
113+
query: Query elements.
114+
args: Query parameters.
115+
kwargs: Optional keyword arguments.
116+
"""
117+
self.location.mkdir(parents=True, exist_ok=True)
118+
args_hash = args.hash if args else ''
119+
kwargs_hash = (
120+
hashlib.md5(json.dumps(kwargs).encode('utf-8')).hexdigest()
121+
if kwargs
122+
else ''
123+
)
124+
hash_identifier = f'{query.hash}:{args_hash}:{kwargs_hash}'
125+
cached_path = self.location / f'{hash_identifier}.json'
126+
logger.debug('Report is saved to cache: %s', str(cached_path))
127+
with open(cached_path, 'w', encoding='utf-8') as f:
128+
json.dump(fetched_report.to_json(), f)

libs/core/garf_core/query_editor.py

Lines changed: 33 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,9 @@
1515

1616
from __future__ import annotations
1717

18-
import dataclasses
1918
import datetime
19+
import hashlib
20+
import json
2021
import logging
2122
import re
2223
from typing import Generator, Union
@@ -39,6 +40,11 @@ class GarfQueryParameters(pydantic.BaseModel):
3940
macro: QueryParameters = pydantic.Field(default_factory=dict)
4041
template: QueryParameters = pydantic.Field(default_factory=dict)
4142

43+
@property
44+
def hash(self) -> str:
45+
hash_fields = self.model_dump(exclude_none=True)
46+
return hashlib.md5(json.dumps(hash_fields).encode('utf-8')).hexdigest()
47+
4248

4349
class GarfMacroError(query_parser.GarfQueryError):
4450
"""Specifies incorrect macro in Garf query."""
@@ -52,33 +58,32 @@ class GarfBuiltInQueryError(query_parser.GarfQueryError):
5258
"""Specifies non-existing builtin query."""
5359

5460

55-
@dataclasses.dataclass
56-
class BaseQueryElements:
61+
class BaseQueryElements(pydantic.BaseModel):
5762
"""Contains raw query and parsed elements.
5863
5964
Attributes:
60-
title: Title of the query that needs to be parsed.
61-
text: Text of the query that needs to be parsed.
62-
resource_name: Name of Google Ads API reporting resource.
63-
fields: Ads API fields that need to be fetched.
64-
column_names: Friendly names for fields which are used when saving data
65-
column_names: Friendly names for fields which are used when saving data
66-
customizers: Attributes of fields that need to be be extracted.
67-
virtual_columns: Attributes of fields that need to be be calculated.
68-
is_builtin_query: Whether query is built-in.
65+
title: Title of the query that needs to be parsed.
66+
text: Text of the query that needs to be parsed.
67+
resource_name: Name of Google Ads API reporting resource.
68+
fields: Ads API fields that need to be fetched.
69+
column_names: Friendly names for fields which are used when saving data
70+
column_names: Friendly names for fields which are used when saving data
71+
customizers: Attributes of fields that need to be be extracted.
72+
virtual_columns: Attributes of fields that need to be be calculated.
73+
is_builtin_query: Whether query is built-in.
6974
"""
7075

71-
title: str
76+
title: str | None
7277
text: str
7378
resource_name: str | None = None
74-
fields: list[str] = dataclasses.field(default_factory=list)
75-
filters: list[str] = dataclasses.field(default_factory=list)
76-
sorts: list[str] = dataclasses.field(default_factory=list)
77-
column_names: list[str] = dataclasses.field(default_factory=list)
78-
customizers: dict[str, dict[str, str]] = dataclasses.field(
79+
fields: list[str] = pydantic.Field(default_factory=list)
80+
filters: list[str] = pydantic.Field(default_factory=list)
81+
sorts: list[str] = pydantic.Field(default_factory=list)
82+
column_names: list[str] = pydantic.Field(default_factory=list)
83+
customizers: dict[str, query_parser.Customizer] = pydantic.Field(
7984
default_factory=dict
8085
)
81-
virtual_columns: dict[str, query_parser.VirtualColumn] = dataclasses.field(
86+
virtual_columns: dict[str, query_parser.VirtualColumn] = pydantic.Field(
8287
default_factory=dict
8388
)
8489
is_builtin_query: bool = False
@@ -87,12 +92,16 @@ def __eq__(self, other: BaseQueryElements) -> bool: # noqa: D105
8792
return (
8893
self.column_names,
8994
self.fields,
95+
self.filters,
96+
self.sorts,
9097
self.resource_name,
9198
self.customizers,
9299
self.virtual_columns,
93100
) == (
94101
other.column_names,
95102
other.fields,
103+
other.filters,
104+
other.sorts,
96105
other.resource_name,
97106
other.customizers,
98107
other.virtual_columns,
@@ -103,6 +112,11 @@ def request(self) -> str:
103112
"""API request."""
104113
return ','.join(self.fields)
105114

115+
@property
116+
def hash(self) -> str:
117+
hash_fields = self.model_dump(exclude_none=True, exclude={'title', 'text'})
118+
return hashlib.md5(json.dumps(hash_fields).encode('utf-8')).hexdigest()
119+
106120

107121
class CommonParametersMixin:
108122
"""Helper mixin to inject set of common parameters to all queries."""

libs/core/garf_core/report_fetcher.py

Lines changed: 24 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,12 +23,14 @@
2323

2424
import asyncio
2525
import logging
26+
import pathlib
2627
from typing import Callable
2728

2829
from opentelemetry import trace
2930

3031
from garf_core import (
3132
api_clients,
33+
cache,
3234
exceptions,
3335
parsers,
3436
query_editor,
@@ -63,6 +65,8 @@ class ApiReportFetcher:
6365
query_specification_builder: Class to perform query parsing.
6466
builtin_queries:
6567
Mapping between query name and function for generating GarfReport.
68+
enable_cache: Whether to load / save report from / to cache.
69+
cache: Cache object.
6670
"""
6771

6872
def __init__(
@@ -74,6 +78,9 @@ def __init__(
7478
),
7579
builtin_queries: dict[str, Callable[[ApiReportFetcher], report.GarfReport]]
7680
| None = None,
81+
enable_cache: bool = False,
82+
cache_path: str | pathlib.Path | None = None,
83+
cache_ttl_seconds: int = 3600,
7784
**kwargs: str,
7885
) -> None:
7986
"""Instantiates ApiReportFetcher based on provided api client.
@@ -84,11 +91,16 @@ def __init__(
8491
query_specification_builder: Class to perform query parsing.
8592
builtin_queries:
8693
Mapping between query name and function for generating GarfReport.
94+
enable_cache: Whether to load / save report from / to cache.
95+
cache_path: Optional path to cache folder.
96+
cache_ttl_seconds: Maximum lifespan of cached reports.
8797
"""
8898
self.api_client = api_client
8999
self.parser = parser
90100
self.query_specification_builder = query_specification_builder
91101
self.query_args = kwargs
102+
self.enable_cache = enable_cache
103+
self.cache = cache.GarfCache(cache_path, cache_ttl_seconds)
92104
self.builtin_queries = builtin_queries or {}
93105

94106
def add_builtin_queries(
@@ -156,13 +168,24 @@ def fetch(
156168
)
157169
return builtin_report(self, **kwargs)
158170

171+
if self.enable_cache:
172+
try:
173+
cached_report = self.cache.load(query, args, kwargs)
174+
logger.warning('Cached version of report is loaded')
175+
span.set_attribute('is_cached_report', True)
176+
return cached_report
177+
except cache.GarfCacheFileNotFoundError:
178+
logger.debug('Cached version not found, generating')
159179
response = self.api_client.call_api(query, **kwargs)
160180
if not response:
161181
return report.GarfReport(query_specification=query)
162182

163183
parsed_response = self.parser(query).parse_response(response)
164-
return report.GarfReport(
184+
fetched_report = report.GarfReport(
165185
results=parsed_response,
166186
column_names=query.column_names,
167187
query_specification=query,
168188
)
189+
if self.enable_cache:
190+
self.cache.save(fetched_report, query, args, kwargs)
191+
return fetched_report

libs/core/tests/unit/test_cache.py

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
# Copyright 2025 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# https://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
import pytest
16+
from garf_core import cache, query_editor, report
17+
18+
19+
class TestGarfCache:
20+
@pytest.fixture()
21+
def test_load_returns_report_from_cache(self, tmp_path):
22+
test_cache = cache.GarfCache(location=str(tmp_path))
23+
test_report = report.GarfReport(results=[[1]], column_names=['test'])
24+
query = query_editor.QuerySpecification(
25+
text='SELECT test FROM test'
26+
).generate()
27+
28+
test_cache.save(test_report, query)
29+
loaded_report = cache.load(query)
30+
31+
assert loaded_report == test_cache
32+
33+
def test_load_raises_error_on_outdated_cache(self, tmp_path):
34+
test_cache = cache.GarfCache(location=str(tmp_path), ttl_seconds=0)
35+
test_report = report.GarfReport(results=[[1]], column_names=['test'])
36+
query = query_editor.QuerySpecification(
37+
text='SELECT test FROM test'
38+
).generate()
39+
40+
test_cache.save(test_report, query)
41+
with pytest.raises(cache.GarfCacheFileNotFoundError):
42+
test_cache.load(query)

0 commit comments

Comments
 (0)