Skip to content

Commit ae5c2a8

Browse files
authored
chore: chipper model name should point to latest chipper version (#250)
Update model names so that `"chipper"` points to latest version of Chipper, while `"chipperv1"` points to old version.
1 parent d989527 commit ae5c2a8

File tree

5 files changed

+10
-7
lines changed

5 files changed

+10
-7
lines changed

CHANGELOG.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
## 0.7.3-dev2
1+
## 0.7.3
22

33
* Integration of Chipperv2 and additional Chipper functionality, which includes automatic detection of GPU,
44
bounding box prediction and hierarchical representation.

test_unstructured_inference/models/test_chippermodel.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -237,7 +237,7 @@ def test_postprocess_bbox(decoded_str, expected_classes):
237237

238238

239239
def test_run_chipper_v2():
240-
model = get_model("chipperv2")
240+
model = get_model("chipper")
241241
img = Image.open("sample-docs/easy_table.jpg")
242242
elements = model(img)
243243
tables = [el for el in elements if el.type == "Table"]
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
__version__ = "0.7.3-dev2" # pragma: no cover
1+
__version__ = "0.7.3" # pragma: no cover

unstructured_inference/constants.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ class Source(Enum):
1111
DETECTRON2_ONNX = "detectron2_onnx"
1212
DETECTRON2_LP = "detectron2_lp"
1313
CHIPPER = "chipper"
14+
CHIPPERV1 = "chipperv1"
1415
CHIPPERV2 = "chipperv2"
1516
PDFMINER = "pdfminer"
1617
MERGED = "merged"

unstructured_inference/models/chipper.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -18,15 +18,15 @@
1818
from unstructured_inference.utils import LazyDict
1919

2020
MODEL_TYPES: Dict[Optional[str], Union[LazyDict, dict]] = {
21-
"chipper": {
21+
"chipperv1": {
2222
"pre_trained_model_repo": "unstructuredio/ved-fine-tuning",
2323
"swap_head": False,
2424
"start_token_prefix": "<s_",
2525
"prompt": "<s>",
2626
"max_length": 1200,
2727
"heatmap_h": 52,
2828
"heatmap_w": 39,
29-
"source": Source.CHIPPER,
29+
"source": Source.CHIPPERV1,
3030
},
3131
"chipperv2": {
3232
"pre_trained_model_repo": "unstructuredio/chipper-fast-fine-tuning",
@@ -37,10 +37,12 @@
3737
"max_length": 1536,
3838
"heatmap_h": 40,
3939
"heatmap_w": 30,
40-
"source": Source.CHIPPERV2,
40+
"source": Source.CHIPPER,
4141
},
4242
}
4343

44+
MODEL_TYPES["chipper"] = MODEL_TYPES["chipperv2"]
45+
4446

4547
class UnstructuredChipperModel(UnstructuredElementExtractionModel):
4648
def initialize(
@@ -309,7 +311,7 @@ def deduplicate_detected_elements(
309311
min_text_size: int = 15,
310312
) -> List[LayoutElement]:
311313
"""For chipper, remove elements from other sources."""
312-
return [el for el in elements if el.source in (Source.CHIPPER, Source.CHIPPERV2)]
314+
return [el for el in elements if el.source in (Source.CHIPPER, Source.CHIPPERV1)]
313315

314316
def adjust_bbox(self, bbox, x_offset, y_offset, ratio, target_size):
315317
"""Translate bbox by (x_offset, y_offset) and shrink by ratio."""

0 commit comments

Comments
 (0)