Unstructured-IO
diff --git a/‎CHANGELOG.md
Lines changed: 2 additions & 1 deletion b/‎CHANGELOG.md
Lines changed: 2 additions & 1 deletion
diff --git a/‎test_unstructured/partition/test_auto.py
Lines changed: 7 additions & 0 deletions b/‎test_unstructured/partition/test_auto.py
Lines changed: 7 additions & 0 deletions
diff --git a/‎test_unstructured/partition/test_common.py
Lines changed: 4 additions & 4 deletions b/‎test_unstructured/partition/test_common.py
Lines changed: 4 additions & 4 deletions
diff --git a/‎test_unstructured/staging/test_base_staging.py
Lines changed: 1 addition & 2 deletions b/‎test_unstructured/staging/test_base_staging.py
Lines changed: 1 addition & 2 deletions
@@ -1,8 +1,9 @@
-## 0.4.9-dev2
+## 0.4.9
 
 * Added ingest modules and s3 connector
 * Default to `url=None` for `partition_pdf` and `partition_image`
 * Add ability to skip English specific check by setting the `UNSTRUCTURED_LANGUAGE` env var to `""`.
+* Document `Element` objects now track metadata
 
 ## 0.4.8
 
 
@@ -25,6 +25,7 @@ def test_auto_partition_email_from_filename():
     elements = partition(filename=filename)
     assert len(elements) > 0
     assert elements == EXPECTED_EMAIL_OUTPUT
+    assert elements[0].metadata.filename == filename
 
 
 def test_auto_partition_email_from_file():
@@ -83,6 +84,7 @@ def test_auto_partition_docx_with_filename(mock_docx_document, expected_docx_ele
 
     elements = partition(filename=filename)
     assert elements == expected_docx_elements
+    assert elements[0].metadata.filename == filename
 
 
 def test_auto_partition_docx_with_file(mock_docx_document, expected_docx_elements, tmpdir):
@@ -98,6 +100,7 @@ def test_auto_partition_html_from_filename():
     filename = os.path.join(EXAMPLE_DOCS_DIRECTORY, "example-10k.html")
     elements = partition(filename=filename)
     assert len(elements) > 0
+    assert elements[0].metadata.filename == filename
 
 
 def test_auto_partition_html_from_file():
@@ -129,6 +132,7 @@ def test_auto_partition_text_from_filename():
     elements = partition(filename=filename)
     assert len(elements) > 0
     assert elements == EXPECTED_TEXT_OUTPUT
+    assert elements[0].metadata.filename == filename
 
 
 def test_auto_partition_text_from_file():
@@ -149,6 +153,8 @@ def test_auto_partition_pdf_from_filename():
     assert isinstance(elements[1], NarrativeText)
     assert elements[1].text.startswith("Zejiang Shen 1")
 
+    assert elements[0].metadata.filename == filename
+
 
 def test_auto_partition_pdf_from_file():
     filename = os.path.join(EXAMPLE_DOCS_DIRECTORY, "layout-parser-paper-fast.pdf")
@@ -206,6 +212,7 @@ def test_auto_partition_pptx_from_filename():
     filename = os.path.join(EXAMPLE_DOCS_DIRECTORY, "fake-power-point.pptx")
     elements = partition(filename=filename)
     assert elements == EXPECTED_PPTX_OUTPUT
+    assert elements[0].metadata.filename == filename
 
 
 def test_auto_with_page_breaks():
 
@@ -18,7 +18,7 @@ def test_normalize_layout_element_dict():
         "text": "Some lovely text",
     }
     element = common.normalize_layout_element(layout_element)
-    assert element == Title(text="Some lovely text", coordinates=[[2, 2], [3, 4], [5, 6], [7, 8]])
+    assert element == Title(text="Some lovely text", coordinates=[[1, 2], [3, 4], [5, 6], [7, 8]])
 
 
 def test_normalize_layout_element_dict_caption():
@@ -29,7 +29,7 @@ def test_normalize_layout_element_dict_caption():
     }
     element = common.normalize_layout_element(layout_element)
     assert element == FigureCaption(
-        text="Some lovely text", coordinates=[[2, 2], [3, 4], [5, 6], [7, 8]]
+        text="Some lovely text", coordinates=[[1, 2], [3, 4], [5, 6], [7, 8]]
     )
 
 
@@ -40,7 +40,7 @@ def test_normalize_layout_element_dict_misc():
         "text": "Some lovely text",
     }
     element = common.normalize_layout_element(layout_element)
-    assert element == Text(text="Some lovely text", coordinates=[[2, 2], [3, 4], [5, 6], [7, 8]])
+    assert element == Text(text="Some lovely text", coordinates=[[1, 2], [3, 4], [5, 6], [7, 8]])
 
 
 def test_normalize_layout_element_layout_element():
@@ -51,7 +51,7 @@ def test_normalize_layout_element_layout_element():
     )
     element = common.normalize_layout_element(layout_element)
     assert element == NarrativeText(
-        text="Some lovely text", coordinates=[[2, 2], [3, 4], [5, 6], [7, 8]]
+        text="Some lovely text", coordinates=[[1, 2], [3, 4], [5, 6], [7, 8]]
     )
 
 
 
@@ -48,10 +48,9 @@ def test_convert_to_isd_csv(output_csv_file):
         isd_csv_string = base.convert_to_isd_csv(elements)
         csv_file.write(isd_csv_string)
 
-    fieldnames = ["type", "text"]
     with open(output_csv_file, "r") as csv_file:
         csv_rows = csv.DictReader(csv_file)
-        assert all(set(row.keys()) == set(fieldnames) for row in csv_rows)
+        assert all(set(row.keys()) == set(base.TABLE_FIELDNAMES) for row in csv_rows)
 
 
 def test_convert_to_dataframe():
Original file line number	Diff line number	Diff line change
`@@ -18,7 +18,7 @@ def test_normalize_layout_element_dict():`
`18`	`18`	`"text": "Some lovely text",`
`19`	`19`	`}`
`20`	`20`	`element = common.normalize_layout_element(layout_element)`
`21`		`- assert element == Title(text="Some lovely text", coordinates=[[2, 2], [3, 4], [5, 6], [7, 8]])`
	`21`	`+ assert element == Title(text="Some lovely text", coordinates=[[1, 2], [3, 4], [5, 6], [7, 8]])`
`22`	`22`
`23`	`23`
`24`	`24`	`def test_normalize_layout_element_dict_caption():`
`@@ -29,7 +29,7 @@ def test_normalize_layout_element_dict_caption():`
`29`	`29`	`}`
`30`	`30`	`element = common.normalize_layout_element(layout_element)`
`31`	`31`	`assert element == FigureCaption(`
`32`		`- text="Some lovely text", coordinates=[[2, 2], [3, 4], [5, 6], [7, 8]]`
	`32`	`+ text="Some lovely text", coordinates=[[1, 2], [3, 4], [5, 6], [7, 8]]`
`33`	`33`	`)`
`34`	`34`
`35`	`35`
`@@ -40,7 +40,7 @@ def test_normalize_layout_element_dict_misc():`
`40`	`40`	`"text": "Some lovely text",`
`41`	`41`	`}`
`42`	`42`	`element = common.normalize_layout_element(layout_element)`
`43`		`- assert element == Text(text="Some lovely text", coordinates=[[2, 2], [3, 4], [5, 6], [7, 8]])`
	`43`	`+ assert element == Text(text="Some lovely text", coordinates=[[1, 2], [3, 4], [5, 6], [7, 8]])`
`44`	`44`
`45`	`45`
`46`	`46`	`def test_normalize_layout_element_layout_element():`
`@@ -51,7 +51,7 @@ def test_normalize_layout_element_layout_element():`
`51`	`51`	`)`
`52`	`52`	`element = common.normalize_layout_element(layout_element)`
`53`	`53`	`assert element == NarrativeText(`
`54`		`- text="Some lovely text", coordinates=[[2, 2], [3, 4], [5, 6], [7, 8]]`
	`54`	`+ text="Some lovely text", coordinates=[[1, 2], [3, 4], [5, 6], [7, 8]]`
`55`	`55`	`)`
`56`	`56`
`57`	`57`