Skip to content

Commit de4774b

Browse files
committed
WIP
1 parent 41909ef commit de4774b

File tree

1 file changed

+16
-12
lines changed

1 file changed

+16
-12
lines changed

Diff for: prepline_general/api/general.py

+16-12
Original file line numberDiff line numberDiff line change
@@ -109,7 +109,7 @@ def get_pdf_splits(pdf_pages: List[PageObject], split_size: int = 1):
109109
for page in pdf_pages[offset:end]:
110110
new_pdf.add_page(page)
111111

112-
new_pdf.write(pdf_buffer) # type: ignore
112+
new_pdf.write(pdf_buffer) # type: ignore
113113
pdf_buffer.seek(0)
114114

115115
yield (pdf_buffer.read(), offset)
@@ -196,7 +196,7 @@ def partition_pdf_splits(
196196
request: Request,
197197
pdf_pages: List[PageObject],
198198
file: IO[bytes],
199-
metadata_filename: str,
199+
metadata_filename: Optional[str],
200200
content_type: str,
201201
coordinates: bool,
202202
**partition_kwargs: Dict[str, Any],
@@ -265,7 +265,7 @@ def __enter__(self):
265265

266266
Is_Chipper_Processing = True
267267

268-
def __exit__(self, exc_type, exc_value, exc_tb):
268+
def __exit__(self, exc_type, exc_value, exc_tb): # type: ignore
269269
global Is_Chipper_Processing
270270
Is_Chipper_Processing = False
271271

@@ -500,7 +500,7 @@ def pipeline_api(
500500
# Clean up returned elements
501501
# Note(austin): pydantic should control this sort of thing for us
502502
for i, element in enumerate(elements):
503-
elements[i].metadata.filename = os.path.basename(filename)
503+
elements[i].metadata.filename = os.path.basename(filename) # type: ignore
504504

505505
if not show_coordinates and element.metadata.coordinates:
506506
elements[i].metadata.coordinates = None
@@ -591,15 +591,14 @@ def _validate_chunking_strategy(m_chunking_strategy: List[str]) -> Union[str, No
591591
return chunking_strategy
592592

593593

594-
def _set_pdf_infer_table_structure(m_pdf_infer_table_structure: List[str], strategy: str):
594+
def _set_pdf_infer_table_structure(m_pdf_infer_table_structure: List[str], strategy: str) -> bool:
595595
pdf_infer_table_structure = (
596596
m_pdf_infer_table_structure[0] if len(m_pdf_infer_table_structure) else "false"
597597
).lower()
598598
if strategy == "hi_res" and pdf_infer_table_structure == "true":
599-
pdf_infer_table_structure = True
599+
return True
600600
else:
601-
pdf_infer_table_structure = False
602-
return pdf_infer_table_structure
601+
return False
603602

604603

605604
def get_validated_mimetype(file: UploadFile):
@@ -635,7 +634,12 @@ def get_validated_mimetype(file: UploadFile):
635634
class MultipartMixedResponse(StreamingResponse):
636635
CRLF = b"\r\n"
637636

638-
def __init__(self, *args: Any, content_type: Union[str, None] = None, **kwargs: Dict[str, Any]):
637+
def __init__(
638+
self,
639+
*args: Any,
640+
content_type: Union[str, None] = None,
641+
**kwargs, # type: ignore
642+
):
639643
super().__init__(*args, **kwargs)
640644
self.content_type = content_type
641645

@@ -821,11 +825,11 @@ def response_generator(is_multipart: bool):
821825
def join_responses(responses: List[Any]):
822826
if media_type != "text/csv":
823827
return responses
824-
data = pd.read_csv(io.BytesIO(responses[0].body))
828+
data = pd.read_csv(io.BytesIO(responses[0].body)) # type: ignore
825829
if len(responses) > 1:
826830
for resp in responses[1:]:
827-
resp_data = pd.read_csv(io.BytesIO(resp.body))
828-
data = data.merge(resp_data, how="outer")
831+
resp_data = pd.read_csv(io.BytesIO(resp.body)) # type: ignore
832+
data = data.merge(resp_data, how="outer") # type: ignore
829833
return PlainTextResponse(data.to_csv())
830834

831835
if content_type == "multipart/mixed":

0 commit comments

Comments
 (0)