22
33import tempfile
44from pathlib import Path
5+ from typing import Literal
56
67import httpx
78import json
1516from unstructured_client import UnstructuredClient
1617from unstructured_client .models import shared , operations
1718from unstructured_client .models .errors import HTTPValidationError
19+ from unstructured_client .models .shared .partition_parameters import Strategy
1820from unstructured_client .utils .retries import BackoffStrategy , RetryConfig
1921from unstructured_client ._hooks .custom import form_utils
2022from unstructured_client ._hooks .custom import split_pdf_hook
@@ -105,19 +107,22 @@ def test_integration_split_pdf_has_same_output_as_non_split(
105107 )
106108 assert len (diff ) == 0
107109
108- @ pytest . mark . parametrize ( ( "filename" , "expected_ok" , "strategy" ), [
109- ( "_sample_docs/layout-parser-paper.pdf " , True , "hi_res " ), # 16
110- ] # pages
111- )
112- @pytest .mark .parametrize ( ("use_caching" , "cache_dir" ), [
110+
111+ @ pytest . mark . parametrize (( "filename " , "expected_ok" , "strategy " ), [
112+ ( "_sample_docs/layout-parser-paper.pdf" , True , shared . Strategy . HI_RES ), # 16 pages
113+ ] )
114+ @pytest .mark .parametrize (("use_caching" , "cache_dir" ), [
113115 (True , None ), # Use default cache dir
114116 (True , Path (tempfile .gettempdir ()) / "test_integration_unstructured_client1" ), # Use custom cache dir
115117 (False , None ), # Don't use caching
116118 (False , Path (tempfile .gettempdir ()) / "test_integration_unstructured_client2" ), # Don't use caching, use custom cache dir
117119])
118120def test_integration_split_pdf_with_caching (
119- filename : str , expected_ok : bool , strategy : str , use_caching : bool ,
120- cache_dir : Path | None
121+ filename : str ,
122+ expected_ok : bool ,
123+ strategy : Literal [Strategy .HI_RES ],
124+ use_caching : bool ,
125+ cache_dir : Path | None ,
121126):
122127 try :
123128 response = requests .get ("http://localhost:8000/general/docs" )
@@ -140,10 +145,9 @@ def test_integration_split_pdf_with_caching(
140145 parameters = shared .PartitionParameters (
141146 files = files ,
142147 strategy = strategy ,
143- languages = ["eng" ],
144148 split_pdf_page = True ,
145149 split_pdf_cache_tmp_data = use_caching ,
146- split_pdf_cache_dir = cache_dir ,
150+ split_pdf_cache_tmp_data_dir = str ( cache_dir ) ,
147151 )
148152
149153 req = operations .PartitionRequest (
@@ -185,6 +189,7 @@ def test_integration_split_pdf_with_caching(
185189 if cache_dir :
186190 assert not Path (cache_dir ).exists ()
187191
192+
188193@pytest .mark .parametrize ("filename" , ["_sample_docs/super_long_pages.pdf" ])
189194def test_long_pages_hi_res (filename ):
190195 req = operations .PartitionRequest (partition_parameters = shared .PartitionParameters (
0 commit comments