Skip to content

Commit f5ff140

Browse files
authored
fix: ElementMetadata serializes when the filename is a Path object (#233)
1 parent 3c1b089 commit f5ff140

File tree

4 files changed

+24
-2
lines changed

4 files changed

+24
-2
lines changed

Diff for: CHANGELOG.md

+4
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,7 @@
1+
## 0.4.10
2+
3+
* Fixes `ElementMetadata` so that it's JSON serializable when the filename is a `Path` object.
4+
15
## 0.4.9
26

37
* Added ingest modules and s3 connector, sample ingest script

Diff for: test_unstructured/staging/test_base_staging.py

+14-1
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,14 @@
11
import csv
2+
import json
23
import os
4+
import pathlib
35
import pytest
46

57
import pandas as pd
68

79
import unstructured.staging.base as base
810

9-
from unstructured.documents.elements import Title, NarrativeText, ListItem
11+
from unstructured.documents.elements import ElementMetadata, Title, NarrativeText, ListItem
1012

1113

1214
@pytest.fixture
@@ -64,3 +66,14 @@ def test_convert_to_dataframe():
6466
)
6567
assert df.type.equals(expected_df.type) is True
6668
assert df.text.equals(expected_df.text) is True
69+
70+
71+
def test_convert_to_isd_serializes_with_posix_paths():
72+
metadata = ElementMetadata(filename=pathlib.PosixPath("../../fake-file.txt"))
73+
elements = [
74+
Title(text="Title 1", metadata=metadata),
75+
NarrativeText(text="Narrative 1", metadata=metadata),
76+
]
77+
output = base.convert_to_isd(elements)
78+
# NOTE(robinson) - json.dumps should run without raising an exception
79+
json.dumps(output)

Diff for: unstructured/__version__.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
__version__ = "0.4.9" # pragma: no cover
1+
__version__ = "0.4.10" # pragma: no cover

Diff for: unstructured/documents/elements.py

+5
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
from dataclasses import dataclass
33
import hashlib
44
from typing import Callable, List, Optional, Union
5+
import pathlib
56

67

78
class NoID(ABC):
@@ -16,6 +17,10 @@ class ElementMetadata:
1617
page_number: Optional[int] = None
1718
url: Optional[str] = None
1819

20+
def __post_init__(self):
21+
if isinstance(self.filename, pathlib.Path):
22+
self.filename = str(self.filename)
23+
1924
def to_dict(self):
2025
return {key: value for key, value in self.__dict__.items() if value is not None}
2126

0 commit comments

Comments
 (0)