File tree 1 file changed +6
-2
lines changed
1 file changed +6
-2
lines changed Original file line number Diff line number Diff line change 16
16
17
17
import fireo
18
18
import fsspec
19
- import pypdf
20
19
import requests
21
20
from fsspec .core import url_to_fs
22
- from tika import parser
23
21
24
22
from ..database import models as db_models
25
23
@@ -854,6 +852,8 @@ def parse_doc_file(document_raw: bytes) -> str:
854
852
str:
855
853
A str of all text in the .doc file.
856
854
"""
855
+ from tika import parser
856
+
857
857
parsed_content = parser .from_buffer (document_raw )["content" ]
858
858
return remove_duplicate_space (parsed_content )
859
859
@@ -872,6 +872,8 @@ def parse_pdf_file(document_raw: bytes) -> str:
872
872
str:
873
873
A str of all text in the .pdf file.
874
874
"""
875
+ import pypdf
876
+
875
877
pdf_reader = pypdf .PdfReader (io .BytesIO (document_raw ))
876
878
text = ""
877
879
@@ -898,6 +900,8 @@ def parse_pptx_file(document_raw: bytes) -> str:
898
900
str:
899
901
A str of all text in the .pdf file.
900
902
"""
903
+ from tika import parser
904
+
901
905
parsed_pptx = parser .from_buffer (document_raw )["content" ]
902
906
return remove_duplicate_space (parsed_pptx )
903
907
You can’t perform that action at this time.
0 commit comments