Skip to content

bug/<TypeError: unstructured.partition.common.add_element_metadata() got multiple values for keyword argument 'coordinates'>  #3665

Open
@MrForExample

Description

@MrForExample

Describe the bug

/app $ python3 docker_test/langchain_unstructured_test_pdf_to_text.py
INFO: pikepdf C++ to Python logger bridge initialized
INFO: Reading PDF for file: /app/example-docs/pdf/2409.12431v1.pdf ...
Traceback (most recent call last):
  File "/app/docker_test/langchain_unstructured_test_pdf_to_text.py", line 19, in <module>
    docs = load_and_process_pdf_structured(pdf_path)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/app/docker_test/langchain_unstructured_test_pdf_to_text.py", line 11, in load_and_process_pdf_structured
    for doc in loader.lazy_load():
  File "/home/notebook-user/.local/lib/python3.11/site-packages/langchain_unstructured/document_loaders.py", line 178, in lazy_load
    yield from load_file(f=self.file, f_path=self.file_path)
  File "/home/notebook-user/.local/lib/python3.11/site-packages/langchain_unstructured/document_loaders.py", line 212, in lazy_load
    else self._elements_json
         ^^^^^^^^^^^^^^^^^^^
  File "/home/notebook-user/.local/lib/python3.11/site-packages/langchain_unstructured/document_loaders.py", line 231, in _elements_json
    return self._convert_elements_to_dicts(self._elements_via_local)
                                           ^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/notebook-user/.local/lib/python3.11/site-packages/langchain_unstructured/document_loaders.py", line 249, in _elements_via_local      
    return partition(
           ^^^^^^^^^^
  File "/home/notebook-user/.local/lib/python3.11/site-packages/unstructured/partition/auto.py", line 342, in partition
    elements = partition_pdf(
               ^^^^^^^^^^^^^^
  File "/home/notebook-user/.local/lib/python3.11/site-packages/unstructured/documents/elements.py", line 605, in wrapper
    elements = func(*args, **kwargs)
               ^^^^^^^^^^^^^^^^^^^^^
  File "/home/notebook-user/.local/lib/python3.11/site-packages/unstructured/file_utils/filetype.py", line 731, in wrapper
    elements = func(*args, **kwargs)
               ^^^^^^^^^^^^^^^^^^^^^
  File "/home/notebook-user/.local/lib/python3.11/site-packages/unstructured/file_utils/filetype.py", line 687, in wrapper
    elements = func(*args, **kwargs)
               ^^^^^^^^^^^^^^^^^^^^^
  File "/home/notebook-user/.local/lib/python3.11/site-packages/unstructured/chunking/dispatch.py", line 74, in wrapper
    elements = func(*args, **kwargs)
               ^^^^^^^^^^^^^^^^^^^^^
  File "/home/notebook-user/.local/lib/python3.11/site-packages/unstructured/partition/pdf.py", line 205, in partition_pdf
    return partition_pdf_or_image(
           ^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/notebook-user/.local/lib/python3.11/site-packages/unstructured/partition/pdf.py", line 307, in partition_pdf_or_image
    elements = _partition_pdf_or_image_local(
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/notebook-user/.local/lib/python3.11/site-packages/unstructured/utils.py", line 217, in wrapper
    return func(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^
  File "/home/notebook-user/.local/lib/python3.11/site-packages/unstructured/partition/pdf.py", line 720, in _partition_pdf_or_image_local
    elements = document_to_element_list(
               ^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/notebook-user/.local/lib/python3.11/site-packages/unstructured/partition/common.py", line 657, in document_to_element_list
    add_element_metadata(
TypeError: unstructured.partition.common.add_element_metadata() got multiple values for keyword argument 'coordinates'

To Reproduce

def load_and_process_pdf_structured(file_path: str):
    # Doesn't work
    from langchain_unstructured import UnstructuredLoader
    loader = UnstructuredLoader(
        file_path=file_path,
        strategy="hi_res",
        partition_via_api=False,
        coordinates=False, # doesn't matter true or false
    )
    docs = []
    for doc in loader.lazy_load():
        docs.append(doc)
        
    return docs

if __name__ == "__main__":
    pdf_path = "/app/example-docs/pdf/2409.12431v1.pdf"
        
    docs = load_and_process_pdf_structured(pdf_path)
    for doc in docs:
        print(f"\n{doc.page_content}")

Expected behavior
To run the code without error

Environment Info
I used the official unstructured docker image, in Windows 11, WSL 2

Metadata

Metadata

Assignees

No one assigned

    Labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions