Skip to content

Commit 0b3a522

Browse files
committed
feat: add caching for timezone offsets, significantly speeds up import
this is different from pr #1181. that pr only makes import faster but still incurs cost on the first usage. this one leverages an optional cache. closes #533
1 parent 47acb88 commit 0b3a522

File tree

5 files changed

+86
-5
lines changed

5 files changed

+86
-5
lines changed

.gitignore

+1
Original file line numberDiff line numberDiff line change
@@ -52,3 +52,4 @@ docs/_build
5252

5353
# Other
5454
raw_data
55+
*.pkl

dateparser/data/__init__.py

+1-2
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,2 @@
11
from dateparser.data import date_translation_data
2-
3-
from .languages_info import language_locale_dict, language_order
2+
from .languages_info import language_order, language_locale_dict

dateparser/timezone_parser.py

+45-3
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,9 @@
11
from datetime import datetime, timedelta, timezone, tzinfo
22

3+
import os
4+
import pickle
35
import regex as re
6+
from pathlib import Path
47

58
from .timezones import timezone_info_list
69

@@ -85,7 +88,46 @@ def get_local_tz_offset():
8588

8689

8790
_search_regex_parts = []
88-
_tz_offsets = list(build_tz_offsets(_search_regex_parts))
89-
_search_regex = re.compile("|".join(_search_regex_parts))
90-
_search_regex_ignorecase = re.compile("|".join(_search_regex_parts), re.IGNORECASE)
9191
local_tz_offset = get_local_tz_offset()
92+
93+
DEFAULT_CACHE_PATH = ".dateparser_tz_cache.pkl"
94+
95+
_tz_offsets = None
96+
_search_regex = None
97+
_search_regex_ignorecase = None
98+
99+
100+
def _load_offsets(cache = False):
101+
from dateparser import __version__
102+
103+
global _tz_offsets, _search_regex, _search_regex_ignorecase
104+
105+
if cache:
106+
path = Path(os.environ.get("DATEPARSER_TZ_CACHE_PATH", DEFAULT_CACHE_PATH))
107+
path.parents[0].mkdir(parents=True, exist_ok=True)
108+
109+
try:
110+
with open(path, mode="rb") as file:
111+
version, _tz_offsets, _search_regex, _search_regex_ignorecase = pickle.load(file)
112+
113+
if version == __version__:
114+
return
115+
except FileNotFoundError:
116+
pass
117+
except (ValueError, TypeError) as ex:
118+
from .utils import get_logger
119+
get_logger().error("Error loading tz cache: %s", ex)
120+
121+
_tz_offsets = list(build_tz_offsets(_search_regex_parts))
122+
_search_regex = re.compile("|".join(_search_regex_parts))
123+
_search_regex_ignorecase = re.compile("|".join(_search_regex_parts), re.IGNORECASE)
124+
125+
if cache:
126+
with open(path, mode="wb") as file:
127+
pickle.dump(
128+
(__version__, _tz_offsets, _search_regex, _search_regex_ignorecase),
129+
file,
130+
)
131+
132+
133+
_load_offsets("DATEPARSER_TZ_CACHE" in os.environ)

docs/settings.rst

+7
Original file line numberDiff line numberDiff line change
@@ -225,3 +225,10 @@ Dateparser in the future. For example, to ignore relative times:
225225

226226
``CACHE_SIZE_LIMIT``: limits the size of caches, that store data for already processed dates.
227227
Default to ``1000``, but you can set ``0`` for turning off the limit.
228+
229+
230+
Environment variables
231+
++++++++++++++
232+
233+
```DATEPARSER_TZ_CACHE```: Whether or not to cache tz offsets and related search regexes. This speeds up the initialization time of dateparser. Defaults to False.
234+
```DATEPARSER_TZ_CACHE_PATH```: The path to use for the tz cache file. Defaults to ``.dateparser_tz_cache.pkl``.

tests/test_timezone_parser.py

+32
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
import datetime as dt
2+
import pickle
23
from datetime import datetime, timedelta
4+
from pathlib import Path
35
from unittest import SkipTest
46
from unittest.mock import Mock, patch
57

@@ -240,3 +242,33 @@ def when_date_is_localized(self, given_date):
240242
def then_localized_date_is(self, expected_date, expected_tzname):
241243
self.assertEqual(self.localized_date.date(), expected_date.date())
242244
self.assertEqual(self.localized_date.tzname(), expected_tzname)
245+
246+
247+
class TestOffsetCaching(BaseTestCase):
248+
def setUp(self):
249+
super().setUp()
250+
251+
self.cache_file = Path(dateparser.timezone_parser.DEFAULT_CACHE_PATH)
252+
self.cache_file.unlink(missing_ok=True)
253+
254+
def test_no_cache(self):
255+
dateparser.timezone_parser._load_offsets()
256+
self.assertFalse(self.cache_file.exists())
257+
258+
def test_cache(self):
259+
dateparser.timezone_parser._tz_offsets = None
260+
dateparser.timezone_parser._load_offsets(True)
261+
self.assertTrue(self.cache_file.exists())
262+
self.assertTrue(dateparser.timezone_parser._tz_offsets)
263+
264+
dateparser.timezone_parser._tz_offsets = None
265+
dateparser.timezone_parser._load_offsets(True)
266+
self.assertTrue(dateparser.timezone_parser._tz_offsets)
267+
268+
def test_cache_error(self):
269+
with open(self.cache_file, "wb") as file:
270+
pickle.dump(1, file)
271+
self.assertTrue(self.cache_file.exists())
272+
dateparser.timezone_parser._tz_offsets = None
273+
dateparser.timezone_parser._load_offsets(True)
274+
self.assertTrue(dateparser.timezone_parser._tz_offsets)

0 commit comments

Comments
 (0)