11import pytest
2- from unittest .mock import patch
2+ from unittest .mock import patch , mock_open
33
44import layoutparser as lp
55from layoutparser .elements import Layout , Rectangle , TextBlock
66import numpy as np
77from PIL import Image
88
9- from unstructured_inference .inference .layout import DocumentLayout , PageLayout
9+ import unstructured_inference .inference .layout as layout
10+ import unstructured_inference .models as models
11+
1012import unstructured_inference .models .detectron2 as detectron2
1113import unstructured_inference .models .tesseract as tesseract
1214
@@ -28,7 +30,7 @@ def mock_page_layout():
2830
2931
3032def test_pdf_page_converts_images_to_array (mock_image ):
31- page = PageLayout (number = 0 , image = mock_image , layout = Layout ())
33+ page = layout . PageLayout (number = 0 , image = mock_image , layout = Layout ())
3234 assert page .image_array is None
3335
3436 image_array = page ._get_image_array ()
@@ -47,7 +49,7 @@ def detect(self, *args):
4749 monkeypatch .setattr (tesseract , "is_pytesseract_available" , lambda * args : True )
4850
4951 image = np .random .randint (12 , 24 , (40 , 40 ))
50- page = PageLayout (number = 0 , image = image , layout = Layout ())
52+ page = layout . PageLayout (number = 0 , image = image , layout = Layout ())
5153 rectangle = Rectangle (1 , 2 , 3 , 4 )
5254 text_block = TextBlock (rectangle , text = None )
5355
@@ -67,7 +69,7 @@ def test_get_page_elements(monkeypatch, mock_page_layout):
6769 monkeypatch .setattr (detectron2 , "is_detectron2_available" , lambda * args : True )
6870
6971 image = np .random .randint (12 , 24 , (40 , 40 ))
70- page = PageLayout (number = 0 , image = image , layout = mock_page_layout )
72+ page = layout . PageLayout (number = 0 , image = image , layout = mock_page_layout )
7173
7274 elements = page .get_elements (inplace = False )
7375
@@ -79,17 +81,17 @@ def test_get_page_elements(monkeypatch, mock_page_layout):
7981
8082
8183def test_get_page_elements_with_ocr (monkeypatch ):
82- monkeypatch .setattr (PageLayout , "ocr" , lambda * args : "An Even Catchier Title" )
84+ monkeypatch .setattr (layout . PageLayout , "ocr" , lambda * args : "An Even Catchier Title" )
8385
8486 rectangle = Rectangle (2 , 4 , 6 , 8 )
8587 text_block = TextBlock (rectangle , text = None , type = "Title" )
86- layout = Layout ([text_block ])
88+ doc_layout = Layout ([text_block ])
8789
88- monkeypatch .setattr (detectron2 , "load_default_model" , lambda : MockLayoutModel (layout ))
90+ monkeypatch .setattr (detectron2 , "load_default_model" , lambda : MockLayoutModel (doc_layout ))
8991 monkeypatch .setattr (detectron2 , "is_detectron2_available" , lambda * args : True )
9092
9193 image = np .random .randint (12 , 24 , (40 , 40 ))
92- page = PageLayout (number = 0 , image = image , layout = layout )
94+ page = layout . PageLayout (number = 0 , image = image , layout = doc_layout )
9395 page .get_elements ()
9496
9597 assert str (page ) == "An Even Catchier Title"
@@ -105,7 +107,7 @@ def test_read_pdf(monkeypatch, mock_page_layout):
105107 monkeypatch .setattr (detectron2 , "is_detectron2_available" , lambda * args : True )
106108
107109 with patch .object (lp , "load_pdf" , return_value = (layouts , images )):
108- doc = DocumentLayout .from_file ("fake-file.pdf" )
110+ doc = layout . DocumentLayout .from_file ("fake-file.pdf" )
109111
110112 assert str (doc ).startswith ("A Catchy Title" )
111113 assert str (doc ).count ("A Catchy Title" ) == 2 # Once for each page
@@ -115,3 +117,62 @@ def test_read_pdf(monkeypatch, mock_page_layout):
115117
116118 pages = doc .pages
117119 assert str (doc ) == "\n \n " .join ([str (page ) for page in pages ])
120+
121+
122+ @pytest .mark .parametrize ("model_name" , [None , "checkbox" , "fake" ])
123+ def test_process_data_with_model (monkeypatch , mock_page_layout , model_name ):
124+ monkeypatch .setattr (models , "get_model" , lambda x : MockLayoutModel (mock_page_layout ))
125+ monkeypatch .setattr (
126+ layout .DocumentLayout ,
127+ "from_file" ,
128+ lambda * args , ** kwargs : layout .DocumentLayout .from_pages ([]),
129+ )
130+ monkeypatch .setattr (
131+ models , "load_model" , lambda * args , ** kwargs : MockLayoutModel (mock_page_layout )
132+ )
133+ monkeypatch .setattr (
134+ models ,
135+ "_get_model_loading_info" ,
136+ lambda * args , ** kwargs : (
137+ "fake-binary-path" ,
138+ "fake-config-path" ,
139+ {0 : "Unchecked" , 1 : "Checked" },
140+ ),
141+ )
142+ with patch ("builtins.open" , mock_open (read_data = b"000000" )):
143+ assert layout .process_data_with_model (open ("" ), model_name = model_name )
144+
145+
146+ def test_process_data_with_model_raises_on_invalid_model_name ():
147+ with patch ("builtins.open" , mock_open (read_data = b"000000" )):
148+ with pytest .raises (models .UnknownModelException ):
149+ layout .process_data_with_model (open ("" ), model_name = "fake" )
150+
151+
152+ @pytest .mark .parametrize ("model_name" , [None , "checkbox" ])
153+ def test_process_file_with_model (monkeypatch , mock_page_layout , model_name ):
154+ monkeypatch .setattr (models , "get_model" , lambda x : MockLayoutModel (mock_page_layout ))
155+ monkeypatch .setattr (
156+ layout .DocumentLayout ,
157+ "from_file" ,
158+ lambda * args , ** kwargs : layout .DocumentLayout .from_pages ([]),
159+ )
160+ monkeypatch .setattr (
161+ models , "load_model" , lambda * args , ** kwargs : MockLayoutModel (mock_page_layout )
162+ )
163+ monkeypatch .setattr (
164+ models ,
165+ "_get_model_loading_info" ,
166+ lambda * args , ** kwargs : (
167+ "fake-binary-path" ,
168+ "fake-config-path" ,
169+ {0 : "Unchecked" , 1 : "Checked" },
170+ ),
171+ )
172+ filename = ""
173+ assert layout .process_file_with_model (filename , model_name = model_name )
174+
175+
176+ def test_process_file_with_model_raises_on_invalid_model_name ():
177+ with pytest .raises (models .UnknownModelException ):
178+ layout .process_file_with_model ("" , model_name = "fake" )
0 commit comments