Skip to content

Commit 44cad01

Browse files
committed
Marshalling: Restore CrateDB standard encoder
1 parent f53cfe6 commit 44cad01

File tree

3 files changed

+44
-16
lines changed

3 files changed

+44
-16
lines changed

CHANGES.rst

+11-5
Original file line numberDiff line numberDiff line change
@@ -7,15 +7,21 @@ Unreleased
77

88
- Switched JSON encoder to use the `orjson`_ library, to improve JSON
99
marshalling performance. Thanks, @widmogrod.
10+
1011
orjson is fast and in some spots even more correct when compared against
1112
Python's stdlib ``json`` module. Contrary to the stdlib variant, orjson
12-
will serialize to ``bytes`` instead of ``str``. Please also note it
13-
will not deserialize to dataclasses, UUIDs, decimals, etc., or support
14-
``object_hook``. Within ``crate-python``, it is applied with an encoder
15-
function for additional type support about Python's ``Decimal`` type and
16-
freezegun's ``FakeDatetime`` type.
13+
will serialize to ``bytes`` instead of ``str``. When sending data to CrateDB,
14+
``crate-python`` uses a custom encoder to add support for additional data
15+
types.
16+
17+
- Python's ``Decimal`` type will be serialized to ``str``.
18+
- Python's ``dt.datetime`` and ``dt.date`` types will be serialized to
19+
``int`` (``LONG``) after converting to milliseconds since epoch, to
20+
optimally accommodate CrateDB's `TIMESTAMP`_ representation.
21+
- NumPy's data types will be handled by ``orjson`` without any ado.
1722

1823
.. _orjson: https://github.com/ijl/orjson
24+
.. _TIMESTAMP: https://cratedb.com/docs/crate/reference/en/latest/general/ddl/data-types.html#type-timestamp
1925

2026
2024/11/23 1.0.1
2127
================

src/crate/client/http.py

+29-7
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,8 @@
2020
# software solely pursuant to the terms of the relevant commercial agreement.
2121

2222

23+
import calendar
24+
import datetime as dt
2325
import heapq
2426
import io
2527
import logging
@@ -84,19 +86,35 @@ def super_len(o):
8486
return None
8587

8688

87-
def cratedb_json_encoder(obj: t.Any) -> str:
89+
epoch_aware = dt.datetime(1970, 1, 1, tzinfo=dt.timezone.utc)
90+
epoch_naive = dt.datetime(1970, 1, 1)
91+
92+
93+
def json_encoder(obj: t.Any) -> t.Union[int, str]:
8894
"""
8995
Encoder function for orjson, with additional type support.
9096
91-
- Python's `Decimal` type.
92-
- freezegun's `FakeDatetime` type.
97+
- Python's `Decimal` type will be serialized to `str`.
98+
- Python's `dt.datetime` and `dt.date` types will be
99+
serialized to `int` after converting to milliseconds
100+
since epoch.
93101
94102
https://github.com/ijl/orjson#default
103+
https://cratedb.com/docs/crate/reference/en/latest/general/ddl/data-types.html#type-timestamp
95104
"""
96105
if isinstance(obj, Decimal):
97106
return str(obj)
98-
elif hasattr(obj, "isoformat"):
99-
return obj.isoformat()
107+
if isinstance(obj, dt.datetime):
108+
if obj.tzinfo is not None:
109+
delta = obj - epoch_aware
110+
else:
111+
delta = obj - epoch_naive
112+
return int(
113+
delta.microseconds / 1000.0
114+
+ (delta.seconds + delta.days * 24 * 3600) * 1000.0
115+
)
116+
if isinstance(obj, dt.date):
117+
return calendar.timegm(obj.timetuple()) * 1000
100118
raise TypeError
101119

102120

@@ -108,8 +126,12 @@ def json_dumps(obj: t.Any) -> bytes:
108126
"""
109127
return orjson.dumps(
110128
obj,
111-
default=cratedb_json_encoder,
112-
option=(orjson.OPT_NON_STR_KEYS | orjson.OPT_SERIALIZE_NUMPY),
129+
default=json_encoder,
130+
option=(
131+
orjson.OPT_PASSTHROUGH_DATETIME
132+
| orjson.OPT_NON_STR_KEYS
133+
| orjson.OPT_SERIALIZE_NUMPY
134+
),
113135
)
114136

115137

tests/client/test_http.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -318,7 +318,7 @@ def test_datetime_is_converted_to_ts(self, request):
318318
# convert string to dict
319319
# because the order of the keys isn't deterministic
320320
data = json.loads(request.call_args[1]["data"])
321-
self.assertEqual(data["args"], ["2015-02-28T07:31:40"])
321+
self.assertEqual(data["args"], [1425108700000])
322322
client.close()
323323

324324
@patch(REQUEST, autospec=True)
@@ -329,7 +329,7 @@ def test_date_is_converted_to_ts(self, request):
329329
day = dt.date(2016, 4, 21)
330330
client.sql("insert into users (dt) values (?)", (day,))
331331
data = json.loads(request.call_args[1]["data"])
332-
self.assertEqual(data["args"], ["2016-04-21"])
332+
self.assertEqual(data["args"], [1461196800000])
333333
client.close()
334334

335335
def test_socket_options_contain_keepalive(self):
@@ -725,9 +725,9 @@ class TestCrateJsonEncoder(TestCase):
725725
def test_naive_datetime(self):
726726
data = dt.datetime.fromisoformat("2023-06-26T09:24:00.123")
727727
result = json_dumps(data)
728-
self.assertEqual(result, b'"2023-06-26T09:24:00.123000"')
728+
self.assertEqual(result, b"1687771440123")
729729

730730
def test_aware_datetime(self):
731731
data = dt.datetime.fromisoformat("2023-06-26T09:24:00.123+02:00")
732732
result = json_dumps(data)
733-
self.assertEqual(result, b'"2023-06-26T09:24:00.123000+02:00"')
733+
self.assertEqual(result, b"1687764240123")

0 commit comments

Comments
 (0)