Skip to content

Commit efdc4b4

Browse files
authored
Merge pull request #513 from itamarst/more-json-encodings
More json encodings
2 parents 3e996e7 + e9ba422 commit efdc4b4

File tree

8 files changed

+271
-22
lines changed

8 files changed

+271
-22
lines changed

.github/workflows/main.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ jobs:
1515

1616
strategy:
1717
matrix:
18-
python-version: ["3.8", "3.9", "3.10", "3.11", "3.12", "3.13-dev", "pypy3.9", "pypy3.10"]
18+
python-version: ["3.9", "3.10", "3.11", "3.12", "3.13", "pypy3.9", "pypy3.10"]
1919

2020
steps:
2121
- uses: "actions/checkout@v3"

README.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ Eliot supports a range of use cases and 3rd party libraries:
2525

2626
Eliot is only used to generate your logs; you will might need tools like Logstash and ElasticSearch to aggregate and store logs if you are using multiple processes across multiple machines.
2727

28-
Eliot supports Python 3.8-3.12, as well as PyPy3.
28+
Eliot supports Python 3.9-3.13, as well as PyPy3.
2929
It is maintained by Itamar Turner-Trauring, and released under the Apache 2.0 License.
3030

3131
* `Read the documentation <https://eliot.readthedocs.io>`_.
Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
"""
2+
Benchmark of message serialization.
3+
4+
The goal here is to mostly focus on performance of serialization, in a vaguely
5+
realistic manner. That is, mesages are logged in context of a message with a
6+
small number of fields.
7+
"""
8+
9+
import time
10+
import polars as pl
11+
from eliot import start_action, to_file
12+
13+
# Ensure JSON serialization is part of benchmark:
14+
to_file(open("/dev/null", "w"))
15+
16+
N = 100_000
17+
18+
MY_SET = {1, 2, 3, 4}
19+
SERIES = pl.Series([1, 2, 3])
20+
21+
22+
def run():
23+
start = time.time()
24+
for i in range(N):
25+
with start_action(action_type="my_action"):
26+
with start_action(action_type="my_action2") as ctx:
27+
ctx.log(
28+
message_type="my_message",
29+
series=SERIES,
30+
my_set=MY_SET,
31+
)
32+
end = time.time()
33+
34+
# Each iteration has 5 messages: start/end of my_action, start/end of
35+
# my_action2, and my_message.
36+
print("%.6f per message" % ((end - start) / (N * 5),))
37+
print("%s messages/sec" % (int(N / (end - start)),))
38+
39+
40+
if __name__ == "__main__":
41+
run()

docs/source/news.rst

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,17 @@
11
What's New
22
==========
33

4+
1.17.0
5+
^^^^^^
6+
7+
Enhancements:
8+
9+
* Eliot's JSON output can now serialize pathlib.Path, Pandas objects, Polars objects, times, dates, Pydantic objects, sets, and complex numbers. Thanks to Anton Kulaga for the patch.
10+
11+
Deprecations and removals:
12+
13+
* Dropped support for Python 3.8.
14+
415
1.16.0
516
^^^^^^
617

eliot/json.py

Lines changed: 78 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,9 @@
33
from typing import Callable
44
import json
55
import sys
6+
from pathlib import Path
7+
from datetime import date, time
8+
import platform
69

710

811
class EliotJSONEncoder(json.JSONEncoder):
@@ -19,7 +22,8 @@ def default(self, o):
1922
def json_default(o: object) -> object:
2023
"""
2124
JSON object encoder for non-standard types. In particular, supports NumPy
22-
types. If you are wrappnig it, call it last, as it will raise a
25+
types, Path objects, Pydantic models, dataclasses, Pandas and Polars
26+
objects. If you are wrapping it, call it last, as it will raise a
2327
``TypeError`` on unsupported types.
2428
"""
2529
numpy = sys.modules.get("numpy", None)
@@ -39,9 +43,82 @@ def json_default(o: object) -> object:
3943
}
4044
else:
4145
return o.tolist()
46+
47+
# Add Pydantic support
48+
pydantic = sys.modules.get("pydantic", None)
49+
if pydantic is not None and isinstance(o, pydantic.BaseModel):
50+
return o.model_dump()
51+
52+
if isinstance(o, Path):
53+
return str(o)
54+
55+
if isinstance(o, date):
56+
return o.isoformat()
57+
58+
if isinstance(o, time):
59+
return o.isoformat()
60+
61+
if isinstance(o, set):
62+
return list(o)
63+
64+
if isinstance(o, complex):
65+
return {"real": o.real, "imag": o.imag}
66+
67+
# Add Pandas support
68+
pandas = sys.modules.get("pandas", None)
69+
if pandas is not None:
70+
if isinstance(o, pandas.Timestamp):
71+
return o.isoformat()
72+
if isinstance(o, pandas.Series):
73+
return o.to_list()
74+
if isinstance(o, pandas.DataFrame):
75+
return o.to_dict(orient="records")
76+
if isinstance(o, pandas.Interval):
77+
return {"left": o.left, "right": o.right, "closed": o.closed}
78+
if isinstance(o, pandas.Period):
79+
return str(o)
80+
81+
# Add Polars support
82+
polars = sys.modules.get("polars", None)
83+
if polars is not None:
84+
if isinstance(o, polars.Series):
85+
return o.to_list()
86+
if isinstance(o, polars.DataFrame):
87+
return o.to_dicts()
88+
if isinstance(o, polars.Datetime):
89+
return o.isoformat()
90+
4291
raise TypeError("Unsupported type")
4392

4493

94+
if platform.python_implementation() == "PyPy":
95+
# We're not using orjson, so need to serialize a few more types.
96+
97+
original_json_default = json_default
98+
99+
def json_default(o: object, original_json_default=original_json_default) -> object:
100+
from datetime import datetime
101+
from enum import Enum
102+
from uuid import UUID
103+
104+
# Add dataclass support
105+
if hasattr(o, "__dataclass_fields__"):
106+
return {field: getattr(o, field) for field in o.__dataclass_fields__}
107+
if isinstance(o, datetime):
108+
return o.isoformat()
109+
110+
if isinstance(o, UUID):
111+
return str(o)
112+
113+
if isinstance(o, Enum):
114+
return o.value
115+
116+
return original_json_default(o)
117+
118+
json_default.__doc__ = original_json_default.__doc__
119+
del original_json_default
120+
121+
45122
def _encoder_to_default_function(
46123
encoder: json.JSONEncoder,
47124
) -> Callable[[object], object]:

eliot/tests/test_json.py

Lines changed: 123 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44

55
from unittest import TestCase, skipUnless, skipIf
66
from json import loads
7+
from importlib.metadata import PackageNotFoundError, version as package_version
78

89
try:
910
import numpy as np
@@ -18,6 +19,15 @@
1819
)
1920

2021

22+
def package_installed(name: str) -> bool:
23+
"""Return whether the package is installed."""
24+
try:
25+
package_version(name)
26+
return True
27+
except PackageNotFoundError:
28+
return False
29+
30+
2131
class EliotJSONEncoderTests(TestCase):
2232
"""Tests for L{EliotJSONEncoder} and L{json_default}."""
2333

@@ -83,3 +93,116 @@ def test_large_numpy_array(self):
8393
loads(dumps(a1002, default=json_default)),
8494
{"array_start": a1002.flat[:10000].tolist(), "original_shape": [2, 5001]},
8595
)
96+
97+
def test_basic_types(self):
98+
"""Test serialization of basic Python types."""
99+
from pathlib import Path
100+
from datetime import datetime, date, time
101+
from uuid import UUID
102+
from collections import defaultdict, OrderedDict, Counter
103+
from enum import Enum
104+
105+
class TestEnum(Enum):
106+
A = 1
107+
B = "test"
108+
109+
test_data = {
110+
"path": Path("/tmp/test"),
111+
"datetime": datetime(2024, 1, 1, 12, 0),
112+
"date": date(2024, 1, 1),
113+
"time": time(12, 0),
114+
"uuid": UUID("12345678-1234-5678-1234-567812345678"),
115+
"set": {1, 2, 3},
116+
"defaultdict": defaultdict(list, {"a": [1, 2]}),
117+
"ordered_dict": OrderedDict([("a", 1), ("b", 2)]),
118+
"counter": Counter(["a", "a", "b"]),
119+
"complex": 1 + 2j,
120+
"enum": TestEnum.A,
121+
"enum2": TestEnum.B,
122+
}
123+
124+
serialized = loads(dumps(test_data, default=json_default))
125+
126+
self.assertEqual(serialized["path"], "/tmp/test")
127+
self.assertEqual(serialized["datetime"], "2024-01-01T12:00:00")
128+
self.assertEqual(serialized["date"], "2024-01-01")
129+
self.assertEqual(serialized["time"], "12:00:00")
130+
self.assertEqual(serialized["uuid"], "12345678-1234-5678-1234-567812345678")
131+
self.assertEqual(serialized["set"], [1, 2, 3])
132+
self.assertEqual(serialized["defaultdict"], {"a": [1, 2]})
133+
self.assertEqual(serialized["ordered_dict"], {"a": 1, "b": 2})
134+
self.assertEqual(serialized["counter"], {"a": 2, "b": 1})
135+
self.assertEqual(serialized["complex"], {"real": 1.0, "imag": 2.0})
136+
self.assertEqual(serialized["enum"], 1)
137+
self.assertEqual(serialized["enum2"], "test")
138+
139+
@skipUnless(package_installed("pydantic"), "Pydantic not installed.")
140+
def test_pydantic(self):
141+
"""Test serialization of Pydantic models."""
142+
from pydantic import BaseModel
143+
144+
class TestModel(BaseModel):
145+
name: str
146+
value: int
147+
148+
model = TestModel(name="test", value=42)
149+
serialized = loads(dumps(model, default=json_default))
150+
self.assertEqual(serialized, {"name": "test", "value": 42})
151+
152+
@skipUnless(package_installed("pandas"), "Pandas not installed.")
153+
def test_pandas(self):
154+
"""Test serialization of Pandas objects."""
155+
import pandas as pd
156+
157+
# Test Timestamp
158+
ts = pd.Timestamp("2024-01-01 12:00:00")
159+
self.assertEqual(loads(dumps(ts, default=json_default)), "2024-01-01T12:00:00")
160+
161+
# Test Series
162+
series = pd.Series([1, 2, 3])
163+
self.assertEqual(loads(dumps(series, default=json_default)), [1, 2, 3])
164+
165+
# Test DataFrame
166+
df = pd.DataFrame({"a": [1, 2], "b": [3, 4]})
167+
self.assertEqual(
168+
loads(dumps(df, default=json_default)), [{"a": 1, "b": 3}, {"a": 2, "b": 4}]
169+
)
170+
171+
# Test Interval
172+
interval = pd.Interval(0, 1, closed="both")
173+
self.assertEqual(
174+
loads(dumps(interval, default=json_default)),
175+
{"left": 0, "right": 1, "closed": "both"},
176+
)
177+
178+
# Test Period
179+
period = pd.Period("2024-01")
180+
self.assertEqual(loads(dumps(period, default=json_default)), "2024-01")
181+
182+
@skipUnless(package_installed("polars"), "Polars not installed.")
183+
def test_polars(self):
184+
"""Test serialization of Polars objects."""
185+
import polars as pl
186+
187+
# Test Series
188+
series = pl.Series("a", [1, 2, 3])
189+
self.assertEqual(loads(dumps(series, default=json_default)), [1, 2, 3])
190+
191+
# Test DataFrame
192+
df = pl.DataFrame({"a": [1, 2], "b": [3, 4]})
193+
self.assertEqual(
194+
loads(dumps(df, default=json_default)), [{"a": 1, "b": 3}, {"a": 2, "b": 4}]
195+
)
196+
197+
def test_dataclass(self):
198+
"""Test serialization of dataclasses."""
199+
from dataclasses import dataclass
200+
201+
@dataclass
202+
class TestDataClass:
203+
name: str
204+
value: int
205+
206+
obj = TestDataClass(name="test", value=42)
207+
serialized = loads(dumps(obj, default=json_default))
208+
self.assertEqual(serialized, {"name": "test", "value": 42})

setup.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,6 @@ def read(path):
1818
"Operating System :: OS Independent",
1919
"Programming Language :: Python",
2020
"Programming Language :: Python :: 3",
21-
"Programming Language :: Python :: 3.8",
2221
"Programming Language :: Python :: 3.9",
2322
"Programming Language :: Python :: 3.10",
2423
"Programming Language :: Python :: 3.11",
@@ -32,7 +31,7 @@ def read(path):
3231
version=versioneer.get_version(),
3332
cmdclass=versioneer.get_cmdclass(),
3433
description="Logging library that tells you why it happened",
35-
python_requires=">=3.8.0",
34+
python_requires=">=3.9.0",
3635
install_requires=[
3736
# Internal code documentation:
3837
"zope.interface",

0 commit comments

Comments
 (0)