Description
Describe the bug
When I call the partition function for a 100-page PDF, it raises an AsyncLibraryNotFoundError. This issue does not always reproduce.
my parameters:
files=files, pdf_infer_table_structure=True, extract_image_block_types=["Image"], strategy=shared.Strategy.HI_RES, output_format=shared.OutputFormat.APPLICATION_JSON, unique_element_ids=True, encoding="utf-8", coordinates=True,
relevant logs:
`{"asctime": "2024-10-17 09:02:16,380", "levelname": "ERROR", "module": "document_worker_service", "funcName": "document_pipeline", "lineno": 27, "thread": 140252027388288, "message": "Failure in document pipeline processing, datasource_id: 019299ae-5f80-7b37-a54a-11608642bec1, sub_datasource_id: 019299ae-5f80-7b37-a54a-11608642bec1",
"exc_info":
"Traceback (most recent call last):\n
File "/home/cdw/.local/lib/python3.11/site-packages/unstructured_client/utils/retries.py",
line 204, in retry_with_backoff_async\n return await func()\n ^^^^^^^^^^^^\n
File "/home/cdw/.local/lib/python3.11/site-packages/unstructured_client/utils/retries.py",
line 149, in do_request\n raise PermanentError(exception) from exception\nunstructured_client.utils.retries.PermanentError: unknown async library, or not in async context\n\nDuring handling of the above exception, another exception occurred:\n\nTraceback (most recent call last):\n "/home/cdw/.local/lib/python3.11/site-packages/unstructured_client/_hooks/custom/split_pdf_hook.py", line 312, in call_api_partial\n response = await request_utils.call_api_async(\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n
File "/home/cdw/.local/lib/python3.11/site-packages/unstructured_client/_hooks/custom/request_utils.py", line 96, in call_api_async\n response = await retry_async(\n ^^^^^^^^^^^^^^^^^^\n
File "/home/cdw/.local/lib/python3.11/site-packages/unstructured_client/utils/retries.py", line 153, in retry_async\n return await retry_with_backoff_async(\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n
File "/home/cdw/.local/lib/python3.11/site-packages/unstructured_client/utils/retries.py", line 206, in retry_with_backoff_async\n raise exception.inner\n
File "/home/cdw/.local/lib/python3.11/site-packages/unstructured_client/utils/retries.py", line 121, in do_request\n res = await func()\n ^^^^^^^^^^^^\n
File "/home/cdw/.local/lib/python3.11/site-packages/unstructured_client/_hooks/custom/request_utils.py", line 93, in do_request\n return await client.send(new_request)\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n File "/home/cdw/.local/lib/python3.11/site-packages/httpx/_client.py", line 1674, in send\n response = await self._send_handling_auth(\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n
File "/home/cdw/.local/lib/python3.11/site-packages/httpx/_client.py", line 1702, in _send_handling_auth\n response = await self._send_handling_redirects(\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n
File "/home/cdw/.local/lib/python3.11/site-packages/httpx/_client.py", line 1739, in _send_handling_redirects\n response = await self._send_single_request(request)\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n
File "/home/cdw/.local/lib/python3.11/site-packages/httpx/_client.py", line 1776, in _send_single_request\n response = await transport.handle_async_request(request)\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n File "/home/cdw/.local/lib/python3.11/site-packages/httpx/_transports/default.py", line 377, in handle_async_request\n resp = await self._pool.handle_async_request(req)\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n
File "/home/cdw/.local/lib/python3.11/site-packages/httpcore/_async/connection_pool.py", line 215, in handle_async_request\n await self._close_connections(closing)\n
File "/home/cdw/.local/lib/python3.11/site-packages/httpcore/_async/connection_pool.py", line 303, in _close_connections\n with AsyncShieldCancellation():\n ^^^^^^^^^^^^^^^^^^^^^^^^^\n File "/home/cdw/.local/lib/python3.11/site-packages/httpcore/_synchronization.py", line 202, in init\n self._backend = current_async_library()\n ^^^^^^^^^^^^^^^^^^^^^^^\n
File "/home/cdw/.local/lib/python3.11/site-packages/httpcore/_synchronization.py", line 29, in current_async_library\n environment = sniffio.current_async_library()\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n
File "/home/cdw/.local/lib/python3.11/site-packages/sniffio/_impl.py", line 93, in current_async_library\n raise AsyncLibraryNotFoundError(\nsniffio._impl.AsyncLibraryNotFoundError: unknown async library, or not in async context\n\nDuring handling of the above exception, another exception occurred:\n\nTraceback (most recent call last):\n
File "/home/cdw/src/service/document_worker_service.py", line 24, in document_pipeline\n contents = self._process_stage(metadata=metadata)\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n
File "/home/cdw/src/service/document_worker_service.py", line 88, in _process_stage\n return loader.load(metadata)\n ^^^^^^^^^^^^^^^^^^^^^\n
File "/home/cdw/src/loader/impl/unstructured_loader.py", line 72, in load\n raise exc\n
File "/home/cdw/src/loader/impl/unstructured_loader.py", line 48, in load\n file_elements = self._fetch_file_partition(\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^\n
File "/home/cdw/src/loader/impl/unstructured_loader.py", line 85, in _fetch_file_partition\n response = self._client.general.partition(\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n
File "/home/cdw/.local/lib/python3.11/site-packages/unstructured_client/general.py", line 77, in partition\n http_res = self.do_request(\n ^^^^^^^^^^^^^^^^\n
File "/home/cdw/.local/lib/python3.11/site-packages/unstructured_client/basesdk.py", line 265, in do_request\n http_res = self.sdk_configuration.get_hooks().after_success(\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n
File "/home/cdw/.local/lib/python3.11/site-packages/unstructured_client/_hooks/sdkhooks.py", line 59, in after_success\n out = hook.after_success(hook_ctx, response)\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n
File "/home/cdw/.local/lib/python3.11/site-packages/unstructured_client/_hooks/custom/split_pdf_hook.py", line 423, in after_success\n elements = self._await_elements(operation_id)\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n
File "/home/cdw/.local/lib/python3.11/site-packages/unstructured_client/_hooks/custom/split_pdf_hook.py", line 371, in _await_elements\n task_responses: list[tuple[int, httpx.Response]] = ioloop.run_until_complete(\n ^^^^^^^^^^^^^^^^^^^^^^^^^^\n
File "/home/cdw/.local/lib/python3.11/site-packages/nest_asyncio.py", line 98, in run_until_complete\n return f.result()\n ^^^^^^^^^^\n
File "/usr/local/lib/python3.11/asyncio/futures.py", line 203, in result\n raise self._exception.with_traceback(self._exception_tb)\n
File "/usr/local/lib/python3.11/asyncio/tasks.py", line 277, in __step\n result = coro.send(None)\n ^^^^^^^^^^^^^^^\n
File "/home/cdw/.local/lib/python3.11/site-packages/unstructured_client/_hooks/custom/split_pdf_hook.py", line 63, in run_tasks\n index, response = await future\n ^^^^^^^^^^^^\n
File "/usr/local/lib/python3.11/asyncio/tasks.py", line 615, in _wait_for_one\n return f.result() # May raise f.exception().\n ^^^^^^^^^^\n
File "/usr/local/lib/python3.11/asyncio/futures.py", line 203, in result\n raise self._exception.with_traceback(self._exception_tb)\n
File "/usr/local/lib/python3.11/asyncio/tasks.py", line 277, in __step\n result = coro.send(None)\n ^^^^^^^^^^^^^^^\n
File "/home/cdw/.local/lib/python3.11/site-packages/unstructured_client/_hooks/custom/split_pdf_hook.py", line 50, in _order_keeper\n response = await coro\n ^^^^^^^^^^\n
File "/home/cdw/.local/lib/python3.11/site-packages/unstructured_client/_hooks/custom/split_pdf_hook.py", line 311, in call_api_partial\n async with httpx.AsyncClient(timeout=client_timeout) as client:\n
File "/home/cdw/.local/lib/python3.11/site-packages/httpx/_client.py", line 2062, in aexit\n await self._transport.aexit(exc_type, exc_value, traceback)\n
File "/home/cdw/.local/lib/python3.11/site-packages/httpx/_transports/default.py", line 356, in aexit\n await self._pool.aexit(exc_type, exc_value, traceback)\n
File "/home/cdw/.local/lib/python3.11/site-packages/httpcore/_async/connection_pool.py", line 324, in aexit\n await self.aclose()\n
File "/home/cdw/.local/lib/python3.11/site-packages/httpcore/_async/connection_pool.py", line 313, in aclose\n await self._close_connections(closing_connections)\n
File "/home/cdw/.local/lib/python3.11/site-packages/httpcore/_async/connection_pool.py", line 303, in _close_connections\n with AsyncShieldCancellation():\n ^^^^^^^^^^^^^^^^^^^^^^^^^\n
File "/home/cdw/.local/lib/python3.11/site-packages/httpcore/_synchronization.py", line 202, in init\n self._backend = current_async_library()\n ^^^^^^^^^^^^^^^^^^^^^^^\n
File "/home/cdw/.local/lib/python3.11/site-packages/httpcore/_synchronization.py", line 29, in current_async_library\n environment = sniffio.current_async_library()\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n
File "/home/cdw/.local/lib/python3.11/site-packages/sniffio/_impl.py", line 93, in current_async_library\n raise AsyncLibraryNotFoundError(\nsniffio._impl.AsyncLibraryNotFoundError: unknown async library, or not in async context", "component_type": "cdw", "app_name": "cdw"}`
To Reproduce
Perform partition for huge PDF files with Python client twice in a short interval.
Expected behavior
Partition is successful.
Environment Info
Self-hosted unstructured, Python client version is 0.26.0b3. Python version is 3.11.
Additional context
I tried to use gather and add async / await , for now it not reproduced. Not sure whether it works.
https://github.com/jimmyxu1985/unstructured-python-client/pull/1/files