Open
Description
from unstructured.ingest.connector.confluence import ConfluenceAccessConfig, SimpleConfluenceConfig
from unstructured.ingest.interfaces import PartitionConfig, ProcessorConfig, ReadConfig
from unstructured.ingest.runner import ConfluenceRunner
if __name__ == "__main__":
runner = ConfluenceRunner(
processor_config=ProcessorConfig(
verbose=True,
output_dir="confluence-ingest-output",
num_processes=2,
),
read_config=ReadConfig(),
partition_config=PartitionConfig(strategy="hi_res",pdf_infer_table_structure=True,
metadata_exclude=["filename", "file_directory", "metadata.data_source.date_processed"],
),
connector_config=SimpleConfluenceConfig(
access_config=ConfluenceAccessConfig(
api_token="api-key",
),
user_email="my-email",
url="url",
),
)
runner.run()
This returns a list of json with hierarchy but even with hi_res and pdf_infer_table_structure=True I'm unable to access any image data. All I get is textual data which is required but in my usecase I'm also looking for images from same document