Skip to content

Commit 01a85de

Browse files
committed
fix: DoclingDocument model validator should deal with any raw input
Signed-off-by: Cesar Berrospi Ramis <[email protected]>
1 parent d52bd37 commit 01a85de

File tree

2 files changed

+11
-3
lines changed

2 files changed

+11
-3
lines changed

docling_core/types/doc/document.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2308,12 +2308,12 @@ class DoclingDocument(BaseModel):
23082308

23092309
@model_validator(mode="before")
23102310
@classmethod
2311-
def transform_to_content_layer(cls, data: dict) -> dict:
2311+
def transform_to_content_layer(cls, data: Any) -> Any:
23122312
"""transform_to_content_layer."""
23132313
# Since version 1.1.0, all NodeItems carry content_layer property.
23142314
# We must assign previous page_header and page_footer instances to furniture.
23152315
# Note: model_validators which check on the version must use "before".
2316-
if "version" in data and data["version"] == "1.0.0":
2316+
if isinstance(data, dict) and data.get("version", "") == "1.0.0":
23172317
for item in data.get("texts", []):
23182318
if "label" in item and item["label"] in [
23192319
DocItemLabel.PAGE_HEADER.value,

test/test_docling_doc.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
import yaml
1010
from PIL import Image as PILImage
1111
from PIL import ImageDraw
12-
from pydantic import AnyUrl, ValidationError
12+
from pydantic import AnyUrl, BaseModel, ValidationError
1313

1414
from docling_core.types.doc.base import BoundingBox, CoordOrigin, ImageRefMode, Size
1515
from docling_core.types.doc.document import ( # BoundingBox,
@@ -1156,6 +1156,14 @@ def test_upgrade_content_layer_from_1_0_0():
11561156
assert doc.version == CURRENT_VERSION
11571157
assert doc.texts[0].content_layer == ContentLayer.FURNITURE
11581158

1159+
# test that transform_to_content_layer model validator can handle any data type
1160+
class ContentOutput(BaseModel):
1161+
content: str | DoclingDocument
1162+
1163+
co = ContentOutput.model_validate_json('{"content": "Random string with version"}')
1164+
assert co
1165+
assert isinstance(co.content, str)
1166+
11591167

11601168
def test_version_doc():
11611169

0 commit comments

Comments
 (0)