Skip to content

Commit 465b282

Browse files
committed
fix: improve meta migration
`setdefault` would not update anything if key was mapped to `None`. Signed-off-by: Panos Vagenas <[email protected]>
1 parent 56b3c42 commit 465b282

File tree

1 file changed

+60
-94
lines changed

1 file changed

+60
-94
lines changed

docling_core/types/doc/document.py

Lines changed: 60 additions & 94 deletions
Original file line numberDiff line numberDiff line change
@@ -27,8 +27,6 @@
2727
Field,
2828
FieldSerializationInfo,
2929
StringConstraints,
30-
TypeAdapter,
31-
ValidationError,
3230
computed_field,
3331
field_serializer,
3432
field_validator,
@@ -1595,88 +1593,68 @@ class PictureItem(FloatingItem):
15951593
deprecated("Field `annotations` is deprecated; use `meta` instead."),
15961594
] = []
15971595

1598-
@model_validator(mode="before")
1599-
@classmethod
1600-
def _migrate_annotations_to_meta(cls, data: Any) -> Any:
1596+
@model_validator(mode="after")
1597+
def _migrate_annotations_to_meta(self) -> Self:
16011598
"""Migrate the `annotations` field to `meta`."""
1602-
if isinstance(data, dict) and (annotations := data.get("annotations")):
1599+
if self.annotations:
16031600
_logger.warning(
16041601
"Migrating deprecated `annotations` to `meta`; this will be removed in the future. "
16051602
"Note that only the first available instance of each annotation type will be migrated."
16061603
)
1607-
for raw_ann in annotations:
1604+
for ann in self.annotations:
16081605
# migrate annotations to meta
16091606

1610-
try:
1611-
ann: PictureDataType = TypeAdapter(PictureDataType).validate_python(
1612-
raw_ann
1613-
)
1614-
except ValidationError as e:
1615-
raise e
1616-
16171607
# ensure meta field is present
1618-
data.setdefault("meta", {})
1608+
if self.meta is None:
1609+
self.meta = PictureMeta()
16191610

16201611
if isinstance(ann, PictureClassificationData):
1621-
data["meta"].setdefault(
1622-
MetaFieldName.CLASSIFICATION.value,
1623-
PictureClassificationMetaField(
1624-
predictions=[
1625-
PictureClassificationPrediction(
1626-
class_name=pred.class_name,
1627-
confidence=pred.confidence,
1628-
created_by=ann.provenance,
1629-
)
1630-
for pred in ann.predicted_classes
1631-
],
1632-
).model_dump(mode="json"),
1612+
self.meta.classification = PictureClassificationMetaField(
1613+
predictions=[
1614+
PictureClassificationPrediction(
1615+
class_name=pred.class_name,
1616+
confidence=pred.confidence,
1617+
created_by=ann.provenance,
1618+
)
1619+
for pred in ann.predicted_classes
1620+
],
16331621
)
16341622
elif isinstance(ann, DescriptionAnnotation):
1635-
data["meta"].setdefault(
1636-
MetaFieldName.DESCRIPTION.value,
1637-
DescriptionMetaField(
1638-
text=ann.text,
1639-
created_by=ann.provenance,
1640-
).model_dump(mode="json"),
1623+
self.meta.description = DescriptionMetaField(
1624+
text=ann.text,
1625+
created_by=ann.provenance,
16411626
)
16421627
elif isinstance(ann, PictureMoleculeData):
1643-
data["meta"].setdefault(
1644-
MetaFieldName.MOLECULE.value,
1645-
MoleculeMetaField(
1646-
smi=ann.smi,
1647-
confidence=ann.confidence,
1648-
created_by=ann.provenance,
1649-
**{
1650-
MetaUtils._create_migrated_meta_field_name(
1651-
name="segmentation"
1652-
): ann.segmentation,
1653-
MetaUtils._create_migrated_meta_field_name(
1654-
name="class_name"
1655-
): ann.class_name,
1656-
},
1657-
).model_dump(mode="json"),
1628+
self.meta.molecule = MoleculeMetaField(
1629+
smi=ann.smi,
1630+
confidence=ann.confidence,
1631+
created_by=ann.provenance,
1632+
**{
1633+
MetaUtils._create_migrated_meta_field_name(
1634+
name="segmentation"
1635+
): ann.segmentation,
1636+
MetaUtils._create_migrated_meta_field_name(
1637+
name="class_name"
1638+
): ann.class_name,
1639+
},
16581640
)
16591641
elif isinstance(ann, PictureTabularChartData):
1660-
data["meta"].setdefault(
1661-
MetaFieldName.TABULAR_CHART.value,
1662-
TabularChartMetaField(
1663-
title=ann.title,
1664-
chart_data=ann.chart_data,
1665-
).model_dump(mode="json"),
1666-
)
1667-
elif isinstance(ann, MiscAnnotation):
1668-
data["meta"].setdefault(
1669-
MetaUtils._create_migrated_meta_field_name(name=ann.kind),
1670-
ann.content,
1642+
self.meta.tabular_chart = TabularChartMetaField(
1643+
title=ann.title,
1644+
chart_data=ann.chart_data,
16711645
)
16721646
else:
1673-
# fall back to reusing original annotation type name (in namespaced format)
1674-
data["meta"].setdefault(
1675-
MetaUtils._create_migrated_meta_field_name(name=ann.kind),
1676-
ann.model_dump(mode="json"),
1647+
self.meta.set_custom_field(
1648+
namespace=MetaUtils._META_FIELD_LEGACY_NAMESPACE,
1649+
name=ann.kind,
1650+
value=(
1651+
ann.content
1652+
if isinstance(ann, MiscAnnotation)
1653+
else ann.model_dump(mode="json")
1654+
),
16771655
)
16781656

1679-
return data
1657+
return self
16801658

16811659
# Convert the image to Base64
16821660
def _image_to_base64(self, pil_image, format="PNG"):
@@ -1829,49 +1807,37 @@ class TableItem(FloatingItem):
18291807
deprecated("Field `annotations` is deprecated; use `meta` instead."),
18301808
] = []
18311809

1832-
@model_validator(mode="before")
1833-
@classmethod
1834-
def migrate_annotations_to_meta(cls, data: Any) -> Any:
1810+
@model_validator(mode="after")
1811+
def _migrate_annotations_to_meta(self) -> Self:
18351812
"""Migrate the `annotations` field to `meta`."""
1836-
if isinstance(data, dict) and (annotations := data.get("annotations")):
1813+
if self.annotations:
18371814
_logger.warning(
18381815
"Migrating deprecated `annotations` to `meta`; this will be removed in the future. "
18391816
"Note that only the first available instance of each annotation type will be migrated."
18401817
)
1841-
for raw_ann in annotations:
1842-
# migrate annotations to meta
1843-
1844-
try:
1845-
ann: TableAnnotationType = TypeAdapter(
1846-
TableAnnotationType
1847-
).validate_python(raw_ann)
1848-
except ValidationError as e:
1849-
raise e
1818+
for ann in self.annotations:
18501819

18511820
# ensure meta field is present
1852-
data.setdefault("meta", {})
1821+
if self.meta is None:
1822+
self.meta = FloatingMeta()
18531823

18541824
if isinstance(ann, DescriptionAnnotation):
1855-
data["meta"].setdefault(
1856-
MetaFieldName.DESCRIPTION.value,
1857-
DescriptionMetaField(
1858-
text=ann.text,
1859-
created_by=ann.provenance,
1860-
).model_dump(mode="json"),
1861-
)
1862-
elif isinstance(ann, MiscAnnotation):
1863-
data["meta"].setdefault(
1864-
MetaUtils._create_migrated_meta_field_name(name=ann.kind),
1865-
ann.content,
1825+
self.meta.description = DescriptionMetaField(
1826+
text=ann.text,
1827+
created_by=ann.provenance,
18661828
)
18671829
else:
1868-
# fall back to reusing original annotation type name (in namespaced format)
1869-
data["meta"].setdefault(
1870-
MetaUtils._create_migrated_meta_field_name(name=ann.kind),
1871-
ann.model_dump(mode="json"),
1830+
self.meta.set_custom_field(
1831+
namespace=MetaUtils._META_FIELD_LEGACY_NAMESPACE,
1832+
name=ann.kind,
1833+
value=(
1834+
ann.content
1835+
if isinstance(ann, MiscAnnotation)
1836+
else ann.model_dump(mode="json")
1837+
),
18721838
)
18731839

1874-
return data
1840+
return self
18751841

18761842
def export_to_dataframe(
18771843
self, doc: Optional["DoclingDocument"] = None

0 commit comments

Comments
 (0)