Skip to content

Commit 88240f7

Browse files
committed
Add data validation support using Pydantic models.
1 parent 560e0ce commit 88240f7

7 files changed

Lines changed: 301 additions & 5 deletions

File tree

README.md

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,7 @@ Here the hierarchy of possible installation targets available when running `pip
6565
- `[yaml]`
6666
- `[parse]`
6767
- `[s3]`
68+
- `[validate]`
6869

6970
## Usage
7071

@@ -279,6 +280,36 @@ Here are the details of the supported formats, operations and extra options docs
279280
| `xml` | :white_check_mark: | :white_check_mark: | [xmltodict](https://github.com/martinblech/xmltodict) |
280281
| `yaml` | :white_check_mark: | :white_check_mark: | [PyYAML](https://pyyaml.org/wiki/PyYAMLDocumentation) |
281282

283+
#### Data validation
284+
285+
`benedict` supports data validation using `Pydantic` models.
286+
287+
This feature **requires** the `validate` extra to be installed:
288+
289+
```bash
290+
pip install "python-benedict[validate]"
291+
```
292+
293+
You can validate data in different ways:
294+
295+
##### Using the `validate` method directly
296+
```python
297+
d = benedict(my_data)
298+
d.validate(schema=MySchema)
299+
```
300+
301+
##### Using the `schema` parameter during initialization
302+
```python
303+
d = benedict(my_data, schema=MySchema)
304+
```
305+
306+
##### Using the `schema` parameter with any `from_{format}` method
307+
```python
308+
d = benedict.from_json(my_data, schema=MySchema)
309+
```
310+
311+
If validation fails, a `ValidationError` will be raised with details about what went wrong.
312+
282313
### API
283314

284315
- **Utility methods**
@@ -333,6 +364,7 @@ Here are the details of the supported formats, operations and extra options docs
333364
- [`to_toml`](#to_toml)
334365
- [`to_xml`](#to_xml)
335366
- [`to_yaml`](#to_yaml)
367+
- [`validate`](#validate)
336368

337369
- **Parse methods**
338370

@@ -815,6 +847,14 @@ s = d.to_xml(**kwargs)
815847
s = d.to_yaml(**kwargs)
816848
```
817849

850+
#### `validate`
851+
852+
```python
853+
# Validate the dict and update it using a Pydantic schema.
854+
# A ValidationError is raised in case of failure.
855+
d.validate(schema=MySchema)
856+
```
857+
818858
### Parse methods
819859

820860
These methods are wrappers of the `get` method, they parse data trying to return it in the expected type.

benedict/dicts/io/io_dict.py

Lines changed: 25 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,8 @@
33
from benedict.dicts.base import BaseDict
44
from benedict.dicts.io import io_util
55
from benedict.exceptions import ExtrasRequireModuleNotFoundError
6-
from benedict.utils import type_util
6+
from benedict.utils import pydantic_util, type_util
7+
from benedict.utils.pydantic_util import PydanticModel
78

89

910
class IODict(BaseDict):
@@ -20,18 +21,24 @@ def __init__(self, *args, **kwargs):
2021
d = IODict._decode_init(arg, **kwargs)
2122
super().__init__(d)
2223
return
24+
25+
schema = kwargs.pop("schema", None)
2326
super().__init__(*args, **kwargs)
27+
if schema:
28+
self.validate(schema=schema)
2429

2530
@staticmethod
2631
def _decode_init(s, **kwargs):
2732
autodetected_format = io_util.autodetect_format(s)
2833
default_format = autodetected_format or "json"
2934
format = kwargs.pop("format", default_format).lower()
3035
# decode data-string and initialize with dict data.
31-
return IODict._decode(s, format, **kwargs)
36+
data = IODict._decode(s, format, **kwargs)
37+
return data
3238

3339
@staticmethod
3440
def _decode(s, format, **kwargs):
41+
schema = kwargs.pop("schema", None)
3542
data = None
3643
try:
3744
data = io_util.decode(s, format, **kwargs)
@@ -45,12 +52,15 @@ def _decode(s, format, **kwargs):
4552
) from None
4653
# if possible return data as dict, otherwise raise exception
4754
if type_util.is_dict(data):
48-
return data
55+
pass
4956
elif type_util.is_list(data):
5057
# force list to dict
51-
return {"values": data}
58+
data = {"values": data}
5259
else:
5360
raise ValueError(f"Invalid data type: {type(data)}, expected dict or list.")
61+
if schema:
62+
data = pydantic_util.validate_data(data, schema=schema)
63+
return data
5464

5565
@staticmethod
5666
def _encode(d, format, **kwargs):
@@ -325,3 +335,14 @@ def to_yaml(self, **kwargs):
325335
A ValueError is raised in case of failure.
326336
"""
327337
return self._encode(self.dict(), "yaml", **kwargs)
338+
339+
def validate(self, *, schema: PydanticModel):
340+
"""
341+
Validate the dict and update it using a Pydantic schema.
342+
343+
Args:
344+
schema: Pydantic model class for validation
345+
"""
346+
data = pydantic_util.validate_data(self, schema=schema)
347+
self.clear()
348+
self.update(data)

benedict/extras.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
"require_parse",
66
"require_s3",
77
"require_toml",
8+
"require_validate",
89
"require_xls",
910
"require_xml",
1011
"require_yaml",
@@ -32,6 +33,10 @@ def require_toml(*, installed):
3233
_require_optional_dependencies(target="toml", installed=installed)
3334

3435

36+
def require_validate(*, installed):
37+
_require_optional_dependencies(target="validate", installed=installed)
38+
39+
3540
def require_xls(*, installed):
3641
_require_optional_dependencies(target="xls", installed=installed)
3742

benedict/utils/pydantic_util.py

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
from typing import Any
2+
3+
from benedict.extras import require_validate
4+
5+
try:
6+
from pydantic.v2 import BaseModel
7+
from pydantic.v2.json import pydantic_encoder
8+
9+
pydantic_installed = True
10+
except ImportError:
11+
pydantic_installed = False
12+
BaseModel = None
13+
pydantic_encoder = None
14+
15+
PydanticModel = type["BaseModel"]
16+
17+
18+
def is_pydantic_model(obj: Any) -> bool:
19+
"""
20+
Check if an object is a Pydantic model.
21+
"""
22+
return pydantic_installed and isinstance(obj, BaseModel)
23+
24+
25+
def is_pydantic_model_class(obj: Any) -> bool:
26+
"""
27+
Check if an object is a Pydantic model class.
28+
"""
29+
return (
30+
pydantic_installed
31+
and BaseModel is not None
32+
and isinstance(obj, type)
33+
and issubclass(obj, BaseModel)
34+
)
35+
36+
37+
def validate_data(data: Any, *, schema: PydanticModel | None = None) -> Any:
38+
"""
39+
Validate data against a Pydantic schema if provided.
40+
"""
41+
if schema is None:
42+
return data
43+
44+
require_validate(installed=pydantic_installed)
45+
46+
if not is_pydantic_model_class(schema):
47+
raise ValueError("Invalid schema. Schema must be a Pydantic model class.")
48+
49+
validated = schema.model_validate(data)
50+
return validated.model_dump()

pyproject.toml

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -115,7 +115,7 @@ Twitter = "https://twitter.com/fabiocaccamo"
115115

116116
[project.optional-dependencies]
117117
all = [
118-
"python-benedict[io,parse,s3]",
118+
"python-benedict[io,parse,s3,validate]",
119119
]
120120
html = [
121121
"beautifulsoup4 >= 4.12.0, < 5.0.0",
@@ -136,6 +136,9 @@ s3 = [
136136
toml = [
137137
"toml >= 0.10.2, < 1.0.0",
138138
]
139+
validate = [
140+
"pydantic >= 1.10.0",
141+
]
139142
xls = [
140143
"openpyxl >= 3.0.0, < 4.0.0",
141144
"xlrd >= 2.0.0, < 3.0.0",
Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
import json
2+
import unittest
3+
4+
from benedict import benedict
5+
6+
try:
7+
from pydantic.v2 import BaseModel
8+
from pydantic.v2.errors import ValidationError
9+
10+
pydantic_installed = True
11+
except ImportError:
12+
pydantic_installed = False
13+
BaseModel = None
14+
ValidationError = None
15+
16+
17+
@unittest.skipIf(not pydantic_installed, "pydantic not installed")
18+
class TestIODictSchema(unittest.TestCase):
19+
def setUp(self):
20+
class User(BaseModel):
21+
name: str
22+
age: int
23+
email: str
24+
25+
class UserList(BaseModel):
26+
users: list[User]
27+
28+
class UserOptional(BaseModel):
29+
name: str
30+
age: int | None = None
31+
email: str | None = None
32+
33+
self.User = User
34+
self.UserList = UserList
35+
self.UserOptional = UserOptional
36+
self.valid_data = {
37+
"name": "John",
38+
"age": 30,
39+
"email": "john@example.com",
40+
}
41+
self.invalid_data = {
42+
"name": "John",
43+
"age": "not_an_int",
44+
"email": "john@example.com",
45+
}
46+
self.minimal_data = {"name": "John"}
47+
48+
def test_constructor_with_schema(self):
49+
d = benedict(self.valid_data, schema=self.User)
50+
self.assertEqual(d["name"], "John")
51+
self.assertEqual(d["age"], 30)
52+
self.assertEqual(d["email"], "john@example.com")
53+
54+
with self.assertRaises(ValidationError):
55+
benedict(self.invalid_data, schema=self.User)
56+
57+
def test_constructor_with_schema_and_optional_fields(self):
58+
d = benedict(self.minimal_data, schema=self.UserOptional)
59+
self.assertEqual(d["name"], "John")
60+
self.assertIsNone(d.get("age"))
61+
self.assertIsNone(d.get("email"))
62+
63+
def test_constructor_with_invalid_schema(self):
64+
class InvalidSchema:
65+
pass
66+
67+
with self.assertRaises(ValueError):
68+
benedict(self.valid_data, schema=InvalidSchema)
69+
70+
with self.assertRaises(ValueError):
71+
benedict(self.valid_data, schema="not_a_schema")
72+
73+
def test_from_json_with_schema_and_valid_data(self):
74+
json_data = json.dumps(self.valid_data)
75+
d = benedict.from_json(json_data, schema=self.User)
76+
self.assertEqual(d["name"], "John")
77+
self.assertEqual(d["age"], 30)
78+
79+
def test_from_json_with_schema_and_invalid_data(self):
80+
json_data = json.dumps(self.invalid_data)
81+
with self.assertRaises(ValidationError):
82+
benedict.from_json(json_data, schema=self.User)
83+
84+
85+
if __name__ == "__main__":
86+
unittest.main()

0 commit comments

Comments
 (0)