Skip to content

Commit d6d71d1

Browse files
authored
Fix file utils imports
1 parent 69ff775 commit d6d71d1

File tree

1 file changed

+6
-2
lines changed

1 file changed

+6
-2
lines changed

cdp_backend/utils/file_utils.py

+6-2
Original file line numberDiff line numberDiff line change
@@ -16,10 +16,8 @@
1616

1717
import fireo
1818
import fsspec
19-
import pypdf
2019
import requests
2120
from fsspec.core import url_to_fs
22-
from tika import parser
2321

2422
from ..database import models as db_models
2523

@@ -854,6 +852,8 @@ def parse_doc_file(document_raw: bytes) -> str:
854852
str:
855853
A str of all text in the .doc file.
856854
"""
855+
from tika import parser
856+
857857
parsed_content = parser.from_buffer(document_raw)["content"]
858858
return remove_duplicate_space(parsed_content)
859859

@@ -872,6 +872,8 @@ def parse_pdf_file(document_raw: bytes) -> str:
872872
str:
873873
A str of all text in the .pdf file.
874874
"""
875+
import pypdf
876+
875877
pdf_reader = pypdf.PdfReader(io.BytesIO(document_raw))
876878
text = ""
877879

@@ -898,6 +900,8 @@ def parse_pptx_file(document_raw: bytes) -> str:
898900
str:
899901
A str of all text in the .pdf file.
900902
"""
903+
from tika import parser
904+
901905
parsed_pptx = parser.from_buffer(document_raw)["content"]
902906
return remove_duplicate_space(parsed_pptx)
903907

0 commit comments

Comments
 (0)