Skip to content

Commit da8094d

Browse files
committed
feat: make PageLayout.elements a cached property
- default `PageLayout.get_elements_with_detection_model` now returns `LayoutElements` - `PageLayout.elements` is a cached property computed from `elements_array` property to save memory and cpu costs
1 parent 5d6e50b commit da8094d

File tree

3 files changed

+12
-8
lines changed

3 files changed

+12
-8
lines changed

CHANGELOG.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
1-
## 0.8.8-dev0
1+
## 0.8.8-dev1
22

33
* fix: pdfminer-six dependencies
4+
* feat: `PageLayout.elements` is now a `cached_property` to reduce unecessary memory and cpu costs
45

56
## 0.8.7
67

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
__version__ = "0.8.8-dev0" # pragma: no cover
1+
__version__ = "0.8.8-dev1" # pragma: no cover

unstructured_inference/inference/layout.py

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
import os
44
import tempfile
5+
from functools import cached_property
56
from pathlib import PurePath
67
from typing import Any, BinaryIO, Collection, List, Optional, Union, cast
78

@@ -149,7 +150,6 @@ def __init__(
149150
self.number = number
150151
self.detection_model = detection_model
151152
self.element_extraction_model = element_extraction_model
152-
self.elements: Collection[LayoutElement] = []
153153
self.elements_array: LayoutElements | None = None
154154
self.password = password
155155
# NOTE(alan): Dropped LocationlessLayoutElement that was created for chipper - chipper has
@@ -159,10 +159,16 @@ def __init__(
159159
def __str__(self) -> str:
160160
return "\n\n".join([str(element) for element in self.elements])
161161

162+
@cached_property
163+
def elements(self) -> Collection[LayoutElement]:
164+
"""return a list of layout elements from the array data structure; intended for backward
165+
compatibility"""
166+
return self.elements_array.as_list()
167+
162168
def get_elements_using_image_extraction(
163169
self,
164170
inplace=True,
165-
) -> Optional[List[LayoutElement]]:
171+
) -> Optional[LayoutElements]:
166172
"""Uses end-to-end text element extraction model to extract the elements on the page."""
167173
if self.element_extraction_model is None:
168174
raise ValueError(
@@ -178,7 +184,6 @@ def get_elements_using_image_extraction(
178184
def get_elements_with_detection_model(
179185
self,
180186
inplace: bool = True,
181-
array_only: bool = False,
182187
) -> Optional[List[LayoutElement]]:
183188
"""Uses specified model to detect the elements on the page."""
184189
if self.detection_model is None:
@@ -198,11 +203,9 @@ def get_elements_with_detection_model(
198203

199204
if inplace:
200205
self.elements_array = inferred_layout
201-
if not array_only:
202-
self.elements = inferred_layout.as_list()
203206
return None
204207

205-
return inferred_layout.as_list()
208+
return inferred_layout
206209

207210
def _get_image_array(self) -> Union[np.ndarray[Any, Any], None]:
208211
"""Converts the raw image into a numpy array."""

0 commit comments

Comments
 (0)