Unstructured-IO
diff --git a/Diff for: ‎CHANGELOG.md
+11 b/Diff for: ‎CHANGELOG.md
+11
diff --git a/Diff for: ‎requirements/constraints.in
+1-1 b/Diff for: ‎requirements/constraints.in
+1-1
diff --git a/Diff for: ‎requirements/extra-pdf-image.txt
+1-1 b/Diff for: ‎requirements/extra-pdf-image.txt
+1-1
diff --git a/Diff for: ‎requirements/ingest-confluence.txt
+1-1 b/Diff for: ‎requirements/ingest-confluence.txt
+1-1
diff --git a/Diff for: ‎test_unstructured_ingest/expected-structured-output/azure/IRS-form-1987.pdf.json
+776-6 b/Diff for: ‎test_unstructured_ingest/expected-structured-output/azure/IRS-form-1987.pdf.json
+776-6
diff --git a/Diff for: ‎test_unstructured_ingest/expected-structured-output/biomed-api/65/11/main.PMC6312790.pdf.json
+8-8 b/Diff for: ‎test_unstructured_ingest/expected-structured-output/biomed-api/65/11/main.PMC6312790.pdf.json
+8-8
diff --git a/Diff for: ‎test_unstructured_ingest/expected-structured-output/biomed-api/75/29/main.PMC6312793.pdf.json
+8-8 b/Diff for: ‎test_unstructured_ingest/expected-structured-output/biomed-api/75/29/main.PMC6312793.pdf.json
+8-8
diff --git a/Diff for: ‎test_unstructured_ingest/expected-structured-output/local-single-file-with-pdf-infer-table-structure/layout-parser-paper.pdf.json
+21-11 b/Diff for: ‎test_unstructured_ingest/expected-structured-output/local-single-file-with-pdf-infer-table-structure/layout-parser-paper.pdf.json
+21-11
@@ -1,3 +1,14 @@
+## 0.10.2
+
+### Enhancements
+* Bump unstructured-inference==0.5.13:
+  - Fix extracted image elements being included in layout merge, addresses the issue
+    where an entire-page image in a PDF was not passed to the layout model when using hi_res.
+
+### Features
+
+### Fixes
+
 ## 0.10.1
 
 ### Enhancements
 
@@ -26,4 +26,4 @@ Pillow<10.0.0
 # AttributeError: 'ResourcePath' object has no attribute 'collection'
 Office365-REST-Python-Client<2.4.3
 # NOTE(christine) Pinned to set the `unstructured-inference` version
-unstructured-inference==0.5.12
+unstructured-inference==0.5.13
@@ -205,7 +205,7 @@ typing-extensions==4.7.1
     #   torch
 tzdata==2023.3
     # via pandas
-unstructured-inference==0.5.12
+unstructured-inference==0.5.13
     # via
     #   -c requirements/constraints.in
     #   -r requirements/extra-pdf-image.in
 
@@ -4,7 +4,7 @@
 #
 #    pip-compile requirements/ingest-confluence.in
 #
-atlassian-python-api==3.40.1
+atlassian-python-api==3.41.0
     # via -r requirements/ingest-confluence.in
 certifi==2023.7.22
     # via
 
@@ -10,34 +10,34 @@
     "text": "Data in Brief 22 (2019) 451–457"
   },
   {
-    "type": "Image",
-    "element_id": "70d50409ea726a2789ebbd004bec31f4",
+    "type": "UncategorizedText",
+    "element_id": "869adddb184177031536477262e0dde0",
     "metadata": {
       "data_source": {},
       "filetype": "application/pdf",
       "page_number": 1
     },
-    "text": "Contents lists available at ScienceDirect Data in Brief journal homepage: www.elsevier.com/locate/dib"
+    "text": "Contents lists available at ScienceDirect"
   },
   {
     "type": "UncategorizedText",
-    "element_id": "869adddb184177031536477262e0dde0",
+    "element_id": "e6fa42b5b4d85001b900e47c050b645b",
     "metadata": {
       "data_source": {},
       "filetype": "application/pdf",
       "page_number": 1
     },
-    "text": "Contents lists available at ScienceDirect"
+    "text": "Data in Brief"
   },
   {
-    "type": "UncategorizedText",
-    "element_id": "e6fa42b5b4d85001b900e47c050b645b",
+    "type": "NarrativeText",
+    "element_id": "9234133787d0a6b3976b16569c0b5cf3",
     "metadata": {
       "data_source": {},
       "filetype": "application/pdf",
       "page_number": 1
     },
-    "text": "Data in Brief"
+    "text": "journal homepage: www.elsevier.com/locate/dib"
   },
   {
     "type": "UncategorizedText",
 
@@ -10,34 +10,34 @@
     "text": "Data in Brief 22 (2019) 484–487"
   },
   {
-    "type": "Image",
-    "element_id": "70d50409ea726a2789ebbd004bec31f4",
+    "type": "UncategorizedText",
+    "element_id": "869adddb184177031536477262e0dde0",
     "metadata": {
       "data_source": {},
       "filetype": "application/pdf",
       "page_number": 1
     },
-    "text": "Contents lists available at ScienceDirect Data in Brief journal homepage: www.elsevier.com/locate/dib"
+    "text": "Contents lists available at ScienceDirect"
   },
   {
     "type": "UncategorizedText",
-    "element_id": "869adddb184177031536477262e0dde0",
+    "element_id": "e6fa42b5b4d85001b900e47c050b645b",
     "metadata": {
       "data_source": {},
       "filetype": "application/pdf",
       "page_number": 1
     },
-    "text": "Contents lists available at ScienceDirect"
+    "text": "Data in Brief"
   },
   {
-    "type": "UncategorizedText",
-    "element_id": "e6fa42b5b4d85001b900e47c050b645b",
+    "type": "NarrativeText",
+    "element_id": "9234133787d0a6b3976b16569c0b5cf3",
     "metadata": {
       "data_source": {},
       "filetype": "application/pdf",
       "page_number": 1
     },
-    "text": "Data in Brief"
+    "text": "journal homepage: www.elsevier.com/locate/dib"
   },
   {
     "type": "UncategorizedText",
 
@@ -852,7 +852,7 @@
   },
   {
     "type": "FigureCaption",
-    "element_id": "185e67615d123b35d38ea72e0cdb6d99",
+    "element_id": "d21661161ae2c8dc39e96ee5c660704b",
     "metadata": {
       "data_source": {},
       "filetype": "application/pdf",
@@ -960,16 +960,6 @@
     },
     "text": "LayoutParser provides a uniﬁed interface for existing OCR tools. Though there are many OCR tools available, they are usually conﬁgured diﬀerently with distinct APIs or protocols for using them. It can be ineﬃcient to add new OCR tools into an existing pipeline, and diﬃcult to make direct comparisons among the available tools to ﬁnd the best option for a particular project. To this end, LayoutParser builds a series of wrappers among existing OCR engines, and provides nearly the same syntax for using them. It supports a plug-and-play style of using OCR engines, making it eﬀortless to switch, evaluate, and compare diﬀerent OCR modules:"
   },
-  {
-    "type": "Image",
-    "element_id": "65ac0f9ae348b12ed9484b8af7296617",
-    "metadata": {
-      "data_source": {},
-      "filetype": "application/pdf",
-      "page_number": 7
-    },
-    "text": "ocr_agent = lp.TesseractAgent ()pOi"
-  },
   {
     "type": "ListItem",
     "element_id": "bebbb4e94f1f97edeb5b96e252720a93",
@@ -1351,6 +1341,26 @@
     },
     "text": "x09 Burpunog uayor Aeydsiq 1 vondo 10g Guypunog usyoy apir:z uondo Mode I: Showing Layout on the Original Image Mode Il: Drawing OCR'd Text at the Correspoding Position"
   },
+  {
+    "type": "NarrativeText",
+    "element_id": "aed1b21a388cefaa841f20f48d19ca98",
+    "metadata": {
+      "data_source": {},
+      "filetype": "application/pdf",
+      "page_number": 9
+    },
+    "text": "Mode I: Showing Layout on the Original Image"
+  },
+  {
+    "type": "NarrativeText",
+    "element_id": "915bc5f1403e01b56e77300d9354fded",
+    "metadata": {
+      "data_source": {},
+      "filetype": "application/pdf",
+      "page_number": 9
+    },
+    "text": "Mode Il: Drawing OCR'd Text at the Correspoding Position"
+  },
   {
     "type": "NarrativeText",
     "element_id": "cc8ad6e0f933633a37b82200e6724f9e",
Original file line number	Diff line number	Diff line change
`@@ -4,7 +4,7 @@`
`4`	`4`	`#`
`5`	`5`	`# pip-compile requirements/ingest-confluence.in`
`6`	`6`	`#`
`7`		`-atlassian-python-api==3.40.1`
	`7`	`+atlassian-python-api==3.41.0`
`8`	`8`	`# via -r requirements/ingest-confluence.in`
`9`	`9`	`certifi==2023.7.22`
`10`	`10`	`# via`