Skip to content

Commit 1f6c7c6

Browse files
committed
feat: add caching for timezone offsets, significantly speeds up import
this is different from pr #1181. it builds a cache at install time which can be distributed. closes #533
1 parent 47acb88 commit 1f6c7c6

File tree

4 files changed

+60
-4
lines changed

4 files changed

+60
-4
lines changed

CONTRIBUTING.rst

+13
Original file line numberDiff line numberDiff line change
@@ -179,3 +179,16 @@ Whenever the content of
179179
the corresponding documentation table::
180180

181181
dateparser_scripts/update_supported_languages_and_locales.py
182+
183+
184+
Updating the Timezone Cache
185+
----------------------------------------------------
186+
187+
Whenever the content of
188+
``dateparser/timezones.py`` is modified you need to rebuild the timezone cache.
189+
190+
Run this command:
191+
``BUILD_TZ_CACHE=1 python -c "import dateparser"``
192+
193+
which should update
194+
``dateparser/data/dateparser_tz_cache.pkl``

MANIFEST.in

+1
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ include CONTRIBUTING.rst
33
include HISTORY.rst
44
include LICENSE
55
include README.rst
6+
include dateparser/data/dateparser_tz_cache.pkl
67
include dateparser_data/settings.py
78
include requirements.txt
89

131 KB
Binary file not shown.

dateparser/timezone_parser.py

+46-4
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,8 @@
1+
import os
2+
import pickle
3+
import zlib
14
from datetime import datetime, timedelta, timezone, tzinfo
5+
from pathlib import Path
26

37
import regex as re
48

@@ -84,8 +88,46 @@ def get_local_tz_offset():
8488
return offset
8589

8690

87-
_search_regex_parts = []
88-
_tz_offsets = list(build_tz_offsets(_search_regex_parts))
89-
_search_regex = re.compile("|".join(_search_regex_parts))
90-
_search_regex_ignorecase = re.compile("|".join(_search_regex_parts), re.IGNORECASE)
9191
local_tz_offset = get_local_tz_offset()
92+
93+
_tz_offsets = None
94+
_search_regex = None
95+
_search_regex_ignorecase = None
96+
97+
98+
def _load_offsets(cache_path, current_hash):
99+
global _tz_offsets, _search_regex, _search_regex_ignorecase
100+
101+
try:
102+
with open(cache_path, mode="rb") as file:
103+
(
104+
serialized_hash,
105+
_tz_offsets,
106+
_search_regex,
107+
_search_regex_ignorecase,
108+
) = pickle.load(file)
109+
if current_hash is None or current_hash == serialized_hash:
110+
return
111+
except (FileNotFoundError, ValueError, TypeError):
112+
pass
113+
114+
_search_regex_parts = []
115+
_tz_offsets = list(build_tz_offsets(_search_regex_parts))
116+
_search_regex = re.compile("|".join(_search_regex_parts))
117+
_search_regex_ignorecase = re.compile("|".join(_search_regex_parts), re.IGNORECASE)
118+
119+
with open(cache_path, mode="wb") as file:
120+
pickle.dump(
121+
(current_hash, _tz_offsets, _search_regex, _search_regex_ignorecase),
122+
file,
123+
)
124+
125+
126+
CACHE_PATH = Path(__file__).parent.joinpath("data", "dateparser_tz_cache.pkl")
127+
128+
if "BUILD_TZ_CACHE" in os.environ:
129+
current_hash = zlib.crc32(str(timezone_info_list).encode("utf-8"))
130+
else:
131+
current_hash = None
132+
133+
_load_offsets(CACHE_PATH, current_hash)

0 commit comments

Comments
 (0)