File tree Expand file tree Collapse file tree
Expand file tree Collapse file tree Original file line number Diff line number Diff line change @@ -23,6 +23,8 @@ for line in page.all_text():
2323 print (line)
2424```
2525
26+ All dataclasses are serializable with ` to_dict ` /` from_dict ` and ` to_json ` /` from_json ` via [ dataclasses-json] [ dcj ] .
27+
2628### Data model
2729
2830| Class | Import from |
@@ -81,6 +83,7 @@ Released under the [MIT License](LICENSE).
8183[ workflows ] : https://github.com/SCDH/pygexml/actions/workflows/checks_tests_docs.yml
8284[ workflows-badge ] : https://github.com/SCDH/pygexml/actions/workflows/checks_tests_docs.yml/badge.svg
8385[ hypothesis ] : https://hypothesis.readthedocs.io
86+ [ dcj ] : https://pypi.org/project/dataclasses-json/
8487[ api-docs ] : https://scdh.github.io/pygexml
8588[ api-docs-strategies ] : https://scdh.github.io/pygexml/pygexml/strategies.html
8689[ api-docs-badge ] : https://img.shields.io/badge/API%20docs-online-blue?logo=gitbook&logoColor=lightgrey
Original file line number Diff line number Diff line change 11from re import Pattern , compile
22from warnings import warn
33from dataclasses import dataclass
4+ from dataclasses_json import DataClassJsonMixin
45from typing import ClassVar
56from collections .abc import Iterable
67from lxml import etree
@@ -25,7 +26,7 @@ class PageXMLError(Exception):
2526
2627
2728@dataclass
28- class Coords :
29+ class Coords ( DataClassJsonMixin ) :
2930 polygon : Polygon
3031
3132 # Loose regex that allows for negative values that can be handled by
@@ -79,7 +80,7 @@ def parse(cls, points_str: str) -> "Coords":
7980
8081
8182@dataclass
82- class TextLine :
83+ class TextLine ( DataClassJsonMixin ) :
8384 id : ID
8485 coords : Coords
8586 text : str
@@ -112,7 +113,7 @@ def from_xml(cls, element: Element) -> "TextLine":
112113
113114
114115@dataclass
115- class TextRegion :
116+ class TextRegion ( DataClassJsonMixin ) :
116117 id : ID
117118 coords : Coords
118119 textlines : dict [ID , TextLine ]
@@ -149,7 +150,7 @@ def all_words(self) -> Iterable[str]:
149150
150151
151152@dataclass
152- class Page :
153+ class Page ( DataClassJsonMixin ) :
153154 image_filename : str
154155 regions : dict [ID , TextRegion ]
155156
Original file line number Diff line number Diff line change @@ -14,6 +14,7 @@ authors = [
1414]
1515dependencies = [
1616 " lxml" ,
17+ " dataclasses-json"
1718]
1819
1920[project .urls ]
You can’t perform that action at this time.
0 commit comments