@@ -109,7 +109,7 @@ def get_pdf_splits(pdf_pages: List[PageObject], split_size: int = 1):
109
109
for page in pdf_pages [offset :end ]:
110
110
new_pdf .add_page (page )
111
111
112
- new_pdf .write (pdf_buffer ) # type: ignore
112
+ new_pdf .write (pdf_buffer ) # type: ignore
113
113
pdf_buffer .seek (0 )
114
114
115
115
yield (pdf_buffer .read (), offset )
@@ -196,7 +196,7 @@ def partition_pdf_splits(
196
196
request : Request ,
197
197
pdf_pages : List [PageObject ],
198
198
file : IO [bytes ],
199
- metadata_filename : str ,
199
+ metadata_filename : Optional [ str ] ,
200
200
content_type : str ,
201
201
coordinates : bool ,
202
202
** partition_kwargs : Dict [str , Any ],
@@ -265,7 +265,7 @@ def __enter__(self):
265
265
266
266
Is_Chipper_Processing = True
267
267
268
- def __exit__ (self , exc_type , exc_value , exc_tb ):
268
+ def __exit__ (self , exc_type , exc_value , exc_tb ): # type: ignore
269
269
global Is_Chipper_Processing
270
270
Is_Chipper_Processing = False
271
271
@@ -500,7 +500,7 @@ def pipeline_api(
500
500
# Clean up returned elements
501
501
# Note(austin): pydantic should control this sort of thing for us
502
502
for i , element in enumerate (elements ):
503
- elements [i ].metadata .filename = os .path .basename (filename )
503
+ elements [i ].metadata .filename = os .path .basename (filename ) # type: ignore
504
504
505
505
if not show_coordinates and element .metadata .coordinates :
506
506
elements [i ].metadata .coordinates = None
@@ -591,15 +591,14 @@ def _validate_chunking_strategy(m_chunking_strategy: List[str]) -> Union[str, No
591
591
return chunking_strategy
592
592
593
593
594
- def _set_pdf_infer_table_structure (m_pdf_infer_table_structure : List [str ], strategy : str ):
594
+ def _set_pdf_infer_table_structure (m_pdf_infer_table_structure : List [str ], strategy : str ) -> bool :
595
595
pdf_infer_table_structure = (
596
596
m_pdf_infer_table_structure [0 ] if len (m_pdf_infer_table_structure ) else "false"
597
597
).lower ()
598
598
if strategy == "hi_res" and pdf_infer_table_structure == "true" :
599
- pdf_infer_table_structure = True
599
+ return True
600
600
else :
601
- pdf_infer_table_structure = False
602
- return pdf_infer_table_structure
601
+ return False
603
602
604
603
605
604
def get_validated_mimetype (file : UploadFile ):
@@ -635,7 +634,12 @@ def get_validated_mimetype(file: UploadFile):
635
634
class MultipartMixedResponse (StreamingResponse ):
636
635
CRLF = b"\r \n "
637
636
638
- def __init__ (self , * args : Any , content_type : Union [str , None ] = None , ** kwargs : Dict [str , Any ]):
637
+ def __init__ (
638
+ self ,
639
+ * args : Any ,
640
+ content_type : Union [str , None ] = None ,
641
+ ** kwargs ,
642
+ ):
639
643
super ().__init__ (* args , ** kwargs )
640
644
self .content_type = content_type
641
645
@@ -821,11 +825,11 @@ def response_generator(is_multipart: bool):
821
825
def join_responses (responses : List [Any ]):
822
826
if media_type != "text/csv" :
823
827
return responses
824
- data = pd .read_csv (io .BytesIO (responses [0 ].body ))
828
+ data = pd .read_csv (io .BytesIO (responses [0 ].body )) # type: ignore
825
829
if len (responses ) > 1 :
826
830
for resp in responses [1 :]:
827
- resp_data = pd .read_csv (io .BytesIO (resp .body ))
828
- data = data .merge (resp_data , how = "outer" )
831
+ resp_data = pd .read_csv (io .BytesIO (resp .body )) # type: ignore
832
+ data = data .merge (resp_data , how = "outer" ) # type: ignore
829
833
return PlainTextResponse (data .to_csv ())
830
834
831
835
if content_type == "multipart/mixed" :
0 commit comments