Skip to content

Commit 8223654

Browse files
committed
add classification and description in PictureData
Signed-off-by: Michele Dolfi <[email protected]>
1 parent baceeae commit 8223654

File tree

6 files changed

+54
-20
lines changed

6 files changed

+54
-20
lines changed

docling_core/types/experimental/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@
77

88
from .base import BoundingBox, CoordOrigin, Size
99
from .document import (
10-
BasePictureData,
1110
BaseTableData,
1211
DescriptionItem,
1312
DocItem,
@@ -19,6 +18,7 @@
1918
KeyValueItem,
2019
NodeItem,
2120
PageItem,
21+
PictureData,
2222
PictureItem,
2323
ProvenanceItem,
2424
RefItem,

docling_core/types/experimental/document.py

Lines changed: 23 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -46,8 +46,26 @@
4646
}
4747

4848

49-
class BasePictureData(BaseModel): # TBD
50-
"""BasePictureData."""
49+
class PictureClassificationData(BaseModel):
50+
"""PictureClassificationData."""
51+
52+
provenance: str
53+
predicted_class: str
54+
confidence: float
55+
56+
57+
class PictureDescriptionData(BaseModel):
58+
"""PictureDescriptionData."""
59+
60+
text: str
61+
provenance: str = ""
62+
63+
64+
class PictureData(BaseModel):
65+
"""PictureData."""
66+
67+
classification: Optional[PictureClassificationData] = None
68+
description: Optional[PictureDescriptionData] = None
5169

5270

5371
class TableCell(BaseModel):
@@ -384,7 +402,7 @@ class PictureItem(FloatingItem):
384402

385403
label: typing.Literal[DocItemLabel.PICTURE] = DocItemLabel.PICTURE
386404

387-
data: BasePictureData
405+
data: PictureData
388406

389407
def export_to_document_tokens(
390408
self,
@@ -863,14 +881,14 @@ def add_table(
863881

864882
def add_picture(
865883
self,
866-
data: BasePictureData,
884+
data: PictureData,
867885
caption: Optional[Union[TextItem, RefItem]] = None,
868886
prov: Optional[ProvenanceItem] = None,
869887
parent: Optional[GroupItem] = None,
870888
):
871889
"""add_picture.
872890
873-
:param data: BasePictureData:
891+
:param data: PictureData:
874892
:param caption: Optional[Union[TextItem:
875893
:param RefItem]]: (Default value = None)
876894
:param prov: Optional[ProvenanceItem]: (Default value = None)

test/data/docling_document/unit/PictureItem.yaml

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
captions: []
22
children: []
3-
data: {}
3+
data:
4+
classification: null
5+
description: null
46
footnotes: []
57
image: null
68
label: picture

test/data/experimental/2206.01062.experimental.yaml

Lines changed: 15 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -216,7 +216,9 @@ pictures:
216216
- captions:
217217
- $ref: '#/texts/12'
218218
children: []
219-
data: {}
219+
data:
220+
classification: null
221+
description: null
220222
footnotes: []
221223
image: null
222224
label: picture
@@ -238,7 +240,9 @@ pictures:
238240
- captions:
239241
- $ref: '#/texts/39'
240242
children: []
241-
data: {}
243+
data:
244+
classification: null
245+
description: null
242246
footnotes: []
243247
image: null
244248
label: picture
@@ -260,7 +264,9 @@ pictures:
260264
- captions:
261265
- $ref: '#/texts/70'
262266
children: []
263-
data: {}
267+
data:
268+
classification: null
269+
description: null
264270
footnotes: []
265271
image: null
266272
label: picture
@@ -282,7 +288,9 @@ pictures:
282288
- captions:
283289
- $ref: '#/texts/76'
284290
children: []
285-
data: {}
291+
data:
292+
classification: null
293+
description: null
286294
footnotes: []
287295
image: null
288296
label: picture
@@ -304,7 +312,9 @@ pictures:
304312
- captions:
305313
- $ref: '#/texts/117'
306314
children: []
307-
data: {}
315+
data:
316+
classification: null
317+
description: null
308318
footnotes: []
309319
image: null
310320
label: picture

test/data/experimental/dummy_doc.yaml

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -133,10 +133,14 @@ pictures: # All pictures...
133133
$ref: "#/body"
134134
captions:
135135
- $ref: "#/texts/3"
136-
data: # BaseFigureData Type
137-
classification: "illustration"
138-
confidence: 0.78
139-
description: "...."
136+
data: # PictureData Type
137+
classification:
138+
provenance: model1
139+
predicted_class: "illustration"
140+
confidence: 0.78
141+
description:
142+
text: "..."
143+
provenance: "model2"
140144
# content structure?
141145
image:
142146
mimetype: image/png

test/test_docling_doc.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,14 +6,14 @@
66

77
from docling_core.types.experimental.document import (
88
CURRENT_VERSION,
9-
BasePictureData,
109
BaseTableData,
1110
DescriptionItem,
1211
DocItem,
1312
DoclingDocument,
1413
FloatingItem,
1514
KeyValueItem,
1615
ListItem,
16+
PictureData,
1717
PictureItem,
1818
SectionHeaderItem,
1919
TableCell,
@@ -107,7 +107,7 @@ def verify(dc, obj):
107107
elif dc is PictureItem:
108108
obj = dc(
109109
self_ref="#",
110-
data=BasePictureData(),
110+
data=PictureData(),
111111
)
112112
verify(dc, obj)
113113

@@ -203,7 +203,7 @@ def _test_serialize_and_reload(doc):
203203
assert doc_reload is not doc # can't be identical
204204

205205

206-
def _test_export_methods(doc):
206+
def _test_export_methods(doc: DoclingDocument):
207207
### Iterate all elements
208208
doc.print_element_tree()
209209
## Export stuff
@@ -341,7 +341,7 @@ def _construct_doc() -> DoclingDocument:
341341
fig_caption = doc.add_text(
342342
label=DocItemLabel.CAPTION, text="This is the caption of figure 1."
343343
)
344-
fig_item = doc.add_picture(data=BasePictureData(), caption=fig_caption)
344+
fig_item = doc.add_picture(data=PictureData(), caption=fig_caption)
345345

346346
return doc
347347

0 commit comments

Comments
 (0)