Skip to content

Commit 15925f6

Browse files
committed
feat: add caching for timezone offsets, significantly speeds up import
this is different from pr #1181. that pr only makes import faster but still incurs cost on the first usage. this one leverages an optional cache. closes #533
1 parent 47acb88 commit 15925f6

File tree

4 files changed

+91
-3
lines changed

4 files changed

+91
-3
lines changed

.gitignore

+1
Original file line numberDiff line numberDiff line change
@@ -52,3 +52,4 @@ docs/_build
5252

5353
# Other
5454
raw_data
55+
*.pkl

dateparser/timezone_parser.py

+51-3
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,7 @@
1+
import os
2+
import pickle
13
from datetime import datetime, timedelta, timezone, tzinfo
4+
from pathlib import Path
25

36
import regex as re
47

@@ -85,7 +88,52 @@ def get_local_tz_offset():
8588

8689

8790
_search_regex_parts = []
88-
_tz_offsets = list(build_tz_offsets(_search_regex_parts))
89-
_search_regex = re.compile("|".join(_search_regex_parts))
90-
_search_regex_ignorecase = re.compile("|".join(_search_regex_parts), re.IGNORECASE)
9191
local_tz_offset = get_local_tz_offset()
92+
93+
DEFAULT_CACHE_PATH = ".dateparser_tz_cache.pkl"
94+
95+
_tz_offsets = None
96+
_search_regex = None
97+
_search_regex_ignorecase = None
98+
99+
100+
def _load_offsets(cache=False):
101+
from dateparser import __version__
102+
103+
global _tz_offsets, _search_regex, _search_regex_ignorecase
104+
105+
if cache:
106+
path = Path(os.environ.get("DATEPARSER_TZ_CACHE_PATH", DEFAULT_CACHE_PATH))
107+
path.parents[0].mkdir(parents=True, exist_ok=True)
108+
109+
try:
110+
with open(path, mode="rb") as file:
111+
(
112+
version,
113+
_tz_offsets,
114+
_search_regex,
115+
_search_regex_ignorecase,
116+
) = pickle.load(file)
117+
118+
if version == __version__:
119+
return
120+
except FileNotFoundError:
121+
pass
122+
except (ValueError, TypeError) as ex:
123+
from .utils import get_logger
124+
125+
get_logger().error("Error loading tz cache: %s", ex)
126+
127+
_tz_offsets = list(build_tz_offsets(_search_regex_parts))
128+
_search_regex = re.compile("|".join(_search_regex_parts))
129+
_search_regex_ignorecase = re.compile("|".join(_search_regex_parts), re.IGNORECASE)
130+
131+
if cache:
132+
with open(path, mode="wb") as file:
133+
pickle.dump(
134+
(__version__, _tz_offsets, _search_regex, _search_regex_ignorecase),
135+
file,
136+
)
137+
138+
139+
_load_offsets("DATEPARSER_TZ_CACHE" in os.environ)

docs/settings.rst

+7
Original file line numberDiff line numberDiff line change
@@ -225,3 +225,10 @@ Dateparser in the future. For example, to ignore relative times:
225225

226226
``CACHE_SIZE_LIMIT``: limits the size of caches, that store data for already processed dates.
227227
Default to ``1000``, but you can set ``0`` for turning off the limit.
228+
229+
230+
Environment variables
231+
++++++++++++++
232+
233+
```DATEPARSER_TZ_CACHE```: Whether or not to cache tz offsets and related search regexes. This speeds up the initialization time of dateparser. Defaults to False.
234+
```DATEPARSER_TZ_CACHE_PATH```: The path to use for the tz cache file. Defaults to ``.dateparser_tz_cache.pkl``.

tests/test_timezone_parser.py

+32
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
import datetime as dt
2+
import pickle
23
from datetime import datetime, timedelta
4+
from pathlib import Path
35
from unittest import SkipTest
46
from unittest.mock import Mock, patch
57

@@ -240,3 +242,33 @@ def when_date_is_localized(self, given_date):
240242
def then_localized_date_is(self, expected_date, expected_tzname):
241243
self.assertEqual(self.localized_date.date(), expected_date.date())
242244
self.assertEqual(self.localized_date.tzname(), expected_tzname)
245+
246+
247+
class TestOffsetCaching(BaseTestCase):
248+
def setUp(self):
249+
super().setUp()
250+
251+
self.cache_file = Path(dateparser.timezone_parser.DEFAULT_CACHE_PATH)
252+
self.cache_file.unlink(missing_ok=True)
253+
254+
def test_no_cache(self):
255+
dateparser.timezone_parser._load_offsets()
256+
self.assertFalse(self.cache_file.exists())
257+
258+
def test_cache(self):
259+
dateparser.timezone_parser._tz_offsets = None
260+
dateparser.timezone_parser._load_offsets(True)
261+
self.assertTrue(self.cache_file.exists())
262+
self.assertTrue(dateparser.timezone_parser._tz_offsets)
263+
264+
dateparser.timezone_parser._tz_offsets = None
265+
dateparser.timezone_parser._load_offsets(True)
266+
self.assertTrue(dateparser.timezone_parser._tz_offsets)
267+
268+
def test_cache_error(self):
269+
with open(self.cache_file, "wb") as file:
270+
pickle.dump(1, file)
271+
self.assertTrue(self.cache_file.exists())
272+
dateparser.timezone_parser._tz_offsets = None
273+
dateparser.timezone_parser._load_offsets(True)
274+
self.assertTrue(dateparser.timezone_parser._tz_offsets)

0 commit comments

Comments
 (0)