Unstructured-IO
diff --git a/‎CHANGELOG.md‎
Lines changed: 2 additions & 0 deletions b/‎CHANGELOG.md‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎Makefile‎
Lines changed: 4 additions & 0 deletions b/‎Makefile‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎pytest.ini‎
Lines changed: 3 additions & 0 deletions b/‎pytest.ini‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎sample-docs/loremipsum_multipage.pdf‎
73.2 KB b/‎sample-docs/loremipsum_multipage.pdf‎
73.2 KB
diff --git a/‎test_unstructured_inference/models/test_yolox.py‎
Lines changed: 197 additions & 0 deletions b/‎test_unstructured_inference/models/test_yolox.py‎
Lines changed: 197 additions & 0 deletions
diff --git a/‎test_unstructured_inference/test_api.py‎
Lines changed: 2 additions & 84 deletions b/‎test_unstructured_inference/test_api.py‎
Lines changed: 2 additions & 84 deletions
diff --git a/‎unstructured_inference/api.py‎
Lines changed: 4 additions & 1 deletion b/‎unstructured_inference/api.py‎
Lines changed: 4 additions & 1 deletion
@@ -1,5 +1,7 @@
 ## 0.2.8-dev0
 
+* Improved testing time
+
 ## 0.2.7
 
 * Fixed duplicated load_pdf call
 
@@ -95,6 +95,10 @@ stop-app-local:
 ## test:                    runs all unittests
 .PHONY: test
 test:
+	PYTHONPATH=. pytest -m "not slow" test_${PACKAGE_NAME} --cov=${PACKAGE_NAME} --cov-report term-missing
+
+.PHONY: test-slow
+test-slow:
 	PYTHONPATH=. pytest test_${PACKAGE_NAME} --cov=${PACKAGE_NAME} --cov-report term-missing
 
 ## check:                   runs linters (includes tests)
 
@@ -0,0 +1,3 @@
+[pytest]
+markers =
+    slow: marks tests as slow (deselect with '-m "not long"')
@@ -0,0 +1,197 @@
+import os
+import shutil
+
+import jsons
+import pytest
+from fastapi.testclient import TestClient
+
+from unstructured_inference import api
+from unstructured_inference.inference.layout import DocumentLayout
+from unstructured_inference.models.yolox import yolox_local_inference, get_model_loading_info
+from unstructured_inference.models.base import UnknownModelException
+
+
+@pytest.mark.slow
+def test_layout_v02_api_parsing_image():
+    filename = os.path.join("sample-docs", "test-image.jpg")
+
+    client = TestClient(api.app)
+    response = client.post(
+        "/layout/yolox/image",
+        headers={"Accept": "multipart/mixed"},
+        files=[("file", (filename, open(filename, "rb"), "image/png"))],
+        data={"version": "yolox"},
+    )
+    doc_layout = jsons.load(response.json(), DocumentLayout)
+    assert len(doc_layout.pages) == 1
+    # NOTE(benjamin) The example sent to the test contains 13 detections
+    assert len(doc_layout.pages[0]["layout"]) == 13
+    assert response.status_code == 200
+
+
+@pytest.mark.slow
+def test_layout_v02_api_parsing_pdf():
+    filename = os.path.join("sample-docs", "loremipsum.pdf")
+
+    client = TestClient(api.app)
+    response = client.post(
+        "/layout/yolox/pdf",
+        files={"file": (filename, open(filename, "rb"))},
+        data={"version": "yolox"},
+    )
+    doc_layout = jsons.load(response.json(), DocumentLayout)
+    assert len(doc_layout.pages) == 1
+    # NOTE(benjamin) The example sent to the test contains 5 detections
+    assert len(doc_layout.pages[0]["layout"]) == 5
+    assert response.status_code == 200
+
+
+@pytest.mark.slow
+def test_layout_v02_api_parsing_pdf_ocr():
+    filename = os.path.join("sample-docs", "non-embedded.pdf")
+
+    client = TestClient(api.app)
+    response = client.post(
+        "/layout/yolox/pdf",
+        files={"file": (filename, open(filename, "rb"))},
+        data={"force_ocr": True, "version": "yolox"},
+    )
+    doc_layout = jsons.load(response.json(), DocumentLayout)
+    assert len(doc_layout.pages) == 10
+    assert len(doc_layout.pages[0]["layout"]) > 1
+    assert response.status_code == 200
+
+
+@pytest.mark.slow
+def test_layout_v02_local_parsing_image():
+    filename = os.path.join("sample-docs", "test-image.jpg")
+    OUTPUT_DIR = "yolox_output"
+    # NOTE(benjamin) keep_output = True create a file for each image in
+    # localstorage for visualization of the result
+    if os.path.exists(OUTPUT_DIR):
+        # NOTE(benjamin): should delete the default output folder on test?
+        shutil.rmtree(OUTPUT_DIR)
+    document_layout_1 = yolox_local_inference(
+        filename, type="image", output_directory=OUTPUT_DIR, version="yolox"
+    )
+    assert len(document_layout_1.pages) == 1
+    document_layout_2 = yolox_local_inference(filename, type="image", version="yolox")
+    # NOTE(benjamin) The example image should result in one page result
+    assert len(document_layout_2.pages) == 1
+    # NOTE(benjamin) The example sent to the test contains 13 detections
+    assert len(document_layout_2.pages[0].layout) == 13
+
+
+@pytest.mark.slow
+def test_layout_v02_local_parsing_pdf():
+    filename = os.path.join("sample-docs", "loremipsum.pdf")
+    document_layout = yolox_local_inference(filename, type="pdf", version="yolox")
+    content = document_layout.to_string()
+    assert "Lorem ipsum" in content
+    assert len(document_layout.pages) == 1
+    # NOTE(benjamin) The example sent to the test contains 5 detections
+    assert len(document_layout.pages[0].layout) == 5
+
+
+@pytest.mark.slow
+def test_layout_v02_local_parsing_empty_pdf():
+    filename = os.path.join("sample-docs", "empty-document.pdf")
+    document_layout = yolox_local_inference(filename, type="pdf", version="yolox")
+    assert len(document_layout.pages) == 1
+    # NOTE(benjamin) The example sent to the test contains 5 detections
+    assert len(document_layout.pages[0].layout) == 0
+
+
+def test_invalid_model():
+    with pytest.raises(UnknownModelException):
+        get_model_loading_info("invalidmodel")
+
+
+########################
+# ONLY SHORT TESTS BELOW
+########################
+
+
+def test_layout_v02_api_parsing_image_soft():
+    filename = os.path.join("sample-docs", "test-image.jpg")
+
+    client = TestClient(api.app)
+    response = client.post(
+        "/layout/yolox/image",
+        headers={"Accept": "multipart/mixed"},
+        files=[("file", (filename, open(filename, "rb"), "image/png"))],
+        data={"version": "yolox_tiny"},
+    )
+    doc_layout = jsons.load(response.json(), DocumentLayout)
+    assert len(doc_layout.pages) == 1
+    # NOTE(benjamin) Soft version of the test, run make test-long in order to run with full model
+    assert len(doc_layout.pages[0]["layout"]) > 0
+    assert response.status_code == 200
+
+
+def test_layout_v02_api_parsing_pdf_soft():
+    filename = os.path.join("sample-docs", "loremipsum.pdf")
+
+    client = TestClient(api.app)
+    response = client.post(
+        "/layout/yolox/pdf",
+        files={"file": (filename, open(filename, "rb"))},
+        data={"version": "yolox_tiny"},
+    )
+    doc_layout = jsons.load(response.json(), DocumentLayout)
+    assert len(doc_layout.pages) == 1
+    # NOTE(benjamin) Soft version of the test, run make test-long in order to run with full model
+    assert len(doc_layout.pages[0]["layout"]) > 0
+    assert response.status_code == 200
+
+
+def test_layout_v02_api_parsing_pdf_ocr_soft():
+    filename = os.path.join("sample-docs", "non-embedded.pdf")
+
+    client = TestClient(api.app)
+    response = client.post(
+        "/layout/yolox/pdf",
+        files={"file": (filename, open(filename, "rb"))},
+        data={"force_ocr": True, "version": "yolox_tiny"},
+    )
+    doc_layout = jsons.load(response.json(), DocumentLayout)
+    assert len(doc_layout.pages) == 10
+    assert len(doc_layout.pages[0]["layout"]) > 1
+    assert response.status_code == 200
+
+
+def test_layout_v02_local_parsing_image_soft():
+    filename = os.path.join("sample-docs", "test-image.jpg")
+    OUTPUT_DIR = "yolox_output"
+    # NOTE(benjamin) keep_output = True create a file for each image in
+    # localstorage for visualization of the result
+    if os.path.exists(OUTPUT_DIR):
+        # NOTE(benjamin): should delete the default output folder on test?
+        shutil.rmtree(OUTPUT_DIR)
+    document_layout_1 = yolox_local_inference(
+        filename, type="image", output_directory=OUTPUT_DIR, version="yolox_tiny"
+    )
+    assert len(document_layout_1.pages) == 1
+    document_layout_2 = yolox_local_inference(filename, type="image", version="yolox_tiny")
+    # NOTE(benjamin) The example image should result in one page result
+    assert len(document_layout_2.pages) == 1
+    # NOTE(benjamin) Soft version of the test, run make test-long in order to run with full model
+    assert len(document_layout_2.pages[0].layout) > 0
+
+
+def test_layout_v02_local_parsing_pdf_soft():
+    filename = os.path.join("sample-docs", "loremipsum.pdf")
+    document_layout = yolox_local_inference(filename, type="pdf", version="yolox_tiny")
+    content = document_layout.to_string()
+    assert "Lorem ipsum" in content
+    assert len(document_layout.pages) == 1
+    # NOTE(benjamin) Soft version of the test, run make test-long in order to run with full model
+    assert len(document_layout.pages[0].layout) > 0
+
+
+def test_layout_v02_local_parsing_empty_pdf_soft():
+    filename = os.path.join("sample-docs", "empty-document.pdf")
+    document_layout = yolox_local_inference(filename, type="pdf", version="yolox_tiny")
+    assert len(document_layout.pages) == 1
+    # NOTE(benjamin) The example sent to the test contains 5 detections
+    assert len(document_layout.pages[0].layout) == 0
@@ -1,14 +1,11 @@
 import os
-import shutil
 
-import jsons
 import pytest
 from fastapi.testclient import TestClient
 
 from unstructured_inference import api
 from unstructured_inference.models import base as models
 from unstructured_inference.inference.layout import DocumentLayout
-from unstructured_inference.models.yolox import yolox_local_inference  # DocumentLayout #maybe
 
 
 class MockModel:
@@ -53,89 +50,10 @@ def test_layout_parsing_api(monkeypatch, filetype, ext, data, process_func, expe
 def test_bad_route_404():
     client = TestClient(api.app)
     filename = os.path.join("sample-docs", "loremipsum.pdf")
-    response = client.post("/layout/badroute", files={"file": (filename, open(filename, "rb"))})
-    assert response.status_code == 404
-
-
-def test_layout_v02_api_parsing_image():
-    filename = os.path.join("sample-docs", "test-image.jpg")
-
-    client = TestClient(api.app)
-    response = client.post(
-        "/layout/yolox/image",
-        headers={"Accept": "multipart/mixed"},
-        files=[("file", (filename, open(filename, "rb"), "image/png"))],
-    )
-    doc_layout = jsons.load(response.json(), DocumentLayout)
-    assert len(doc_layout.pages) == 1
-    # NOTE(benjamin) The example sent to the test contains 13 detections
-    assert len(doc_layout.pages[0]["layout"]) == 13
-    assert response.status_code == 200
-
-
-def test_layout_v02_api_parsing_pdf():
-    filename = os.path.join("sample-docs", "loremipsum.pdf")
-
-    client = TestClient(api.app)
     response = client.post(
-        "/layout/yolox/pdf",
-        files={"file": (filename, open(filename, "rb"))},
+        "/layout/detectron/badroute", files={"file": (filename, open(filename, "rb"))}
     )
-    doc_layout = jsons.load(response.json(), DocumentLayout)
-    assert len(doc_layout.pages) == 1
-    # NOTE(benjamin) The example sent to the test contains 5 detections
-    assert len(doc_layout.pages[0]["layout"]) == 5
-    assert response.status_code == 200
-
-
-def test_layout_v02_api_parsing_pdf_ocr():
-    filename = os.path.join("sample-docs", "non-embedded.pdf")
-
-    client = TestClient(api.app)
-    response = client.post(
-        "/layout/yolox/pdf",
-        files={"file": (filename, open(filename, "rb"))},
-        data={"force_ocr": True},
-    )
-    doc_layout = jsons.load(response.json(), DocumentLayout)
-    assert len(doc_layout.pages) == 10
-    assert len(doc_layout.pages[0]["layout"]) > 1
-    assert response.status_code == 200
-
-
-def test_layout_v02_local_parsing_image():
-    filename = os.path.join("sample-docs", "test-image.jpg")
-    OUTPUT_DIR = "yolox_output"
-    # NOTE(benjamin) keep_output = True create a file for each image in
-    # localstorage for visualization of the result
-    if os.path.exists(OUTPUT_DIR):
-        # NOTE(benjamin): should delete the default output folder on test?
-        shutil.rmtree(OUTPUT_DIR)
-    document_layout_1 = yolox_local_inference(filename, type="image", output_directory=OUTPUT_DIR)
-    assert len(document_layout_1.pages) == 1
-    document_layout_2 = yolox_local_inference(filename, type="image")
-    # NOTE(benjamin) The example image should result in one page result
-    assert len(document_layout_2.pages) == 1
-    # NOTE(benjamin) The example sent to the test contains 13 detections
-    assert len(document_layout_2.pages[0].layout) == 13
-
-
-def test_layout_v02_local_parsing_pdf():
-    filename = os.path.join("sample-docs", "loremipsum.pdf")
-    document_layout = yolox_local_inference(filename, type="pdf")
-    content = document_layout.to_string()
-    assert "Lorem ipsum" in content
-    assert len(document_layout.pages) == 1
-    # NOTE(benjamin) The example sent to the test contains 5 detections
-    assert len(document_layout.pages[0].layout) == 5
-
-
-def test_layout_v02_local_parsing_empty_pdf():
-    filename = os.path.join("sample-docs", "empty-document.pdf")
-    document_layout = yolox_local_inference(filename, type="pdf")
-    assert len(document_layout.pages) == 1
-    # NOTE(benjamin) The example sent to the test contains 5 detections
-    assert len(document_layout.pages[0].layout) == 0
+    assert response.status_code == 404
 
 
 def test_healthcheck(monkeypatch):
 
@@ -47,10 +47,13 @@ async def layout_parsing_yolox(
     request: Request,
     file: List[UploadFile] = File(default=None),
     force_ocr=Form(default=False),
+    version=Form(default="yolox"),
 ):
     with tempfile.NamedTemporaryFile() as tmp_file:
         tmp_file.write(file[0].file.read())
-        detections = yolox_local_inference(tmp_file.name, type=filetype, use_ocr=force_ocr)
+        detections = yolox_local_inference(
+            tmp_file.name, type=filetype, use_ocr=force_ocr, version=version
+        )
 
     return detections
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	`+[pytest]`
	`2`	`+markers =`
	`3`	`+ slow: marks tests as slow (deselect with '-m "not long"')`