File tree Expand file tree Collapse file tree 4 files changed +12
-13
lines changed
Expand file tree Collapse file tree 4 files changed +12
-13
lines changed Original file line number Diff line number Diff line change 1212from src .app .services .tutor .agents import TEMPLATES
1313from src .app .services .tutor .models import (
1414 ExtractorOutputList ,
15- SummariesOutputModel ,
1615 SyllabusFeedback ,
1716 SyllabusResponse ,
1817 SyllabusResponseAgent ,
4847@router .post ("/files/content" )
4948async def extract_files_content (
5049 files : Annotated [list [UploadFile ], File ()],
51- ) -> SummariesOutputModel | None :
50+ ) -> ExtractorOutputList | None :
5251 files_content = await get_files_content (files )
5352 files_content_str = ("__DOCUMENT_SEPARATOR__" ).join (files_content )
5453
@@ -67,7 +66,7 @@ async def extract_files_content(
6766 summaries = await chatfactory .chat_client .completion (messages = messages )
6867 assert isinstance (summaries , str )
6968 json_summaries = extract_json_from_response (summaries )
70- summaries_output = SummariesOutputModel (** json_summaries )
69+ summaries_output = ExtractorOutputList (** json_summaries )
7170
7271 return summaries_output
7372
Original file line number Diff line number Diff line change @@ -57,6 +57,8 @@ async def completion(
5757 response_format : Optional [Union [dict , Type [BaseModel ]]] = None ,
5858 ) -> dict | str :
5959
60+ logger .info ("starting completion with model_name=%s" , self .model )
61+
6062 if self .is_azure_model :
6163 return await self .az_completion (messages )
6264
Original file line number Diff line number Diff line change @@ -14,10 +14,6 @@ class ExtractorOutputList(BaseModel):
1414 extracts : list [ExtractorOutput ]
1515
1616
17- class SummariesOutputModel (BaseModel ):
18- summaries : list [str ]
19-
20-
2117class TutorSearchResponse (BaseModel ):
2218 extracts : list [ExtractorOutput ]
2319 nb_results : int
Original file line number Diff line number Diff line change 22
33from docx import Document as DocxReader
44from fastapi import HTTPException , UploadFile
5-
6- # from src.app.services.pdf_extractor import extract_txt_from_pdf_with_tika
75from pypdf import PdfReader
86from qdrant_client .models import ScoredPoint
97
108from src .app .api .dependencies import get_settings
9+ from src .app .services .pdf_extractor import extract_txt_from_pdf_with_tika
1110from src .app .utils .decorators import log_time_and_error_sync
1211
1312settings = get_settings ()
@@ -97,11 +96,14 @@ async def get_file_content(file: UploadFile) -> str:
9796
9897
9998async def _extract_pdf_content (file ) -> str :
100- reader = PdfReader (file .file )
101- return "\n " .join (page .extract_text () or "" for page in reader .pages )
102- # content = extract_txt_from_pdf_with_tika(file.file, settings.TIKA_URL_BASE)
99+ content = ""
100+ try :
101+ content = extract_txt_from_pdf_with_tika (file .file , settings .TIKA_URL_BASE )
102+ except Exception :
103+ reader = PdfReader (file .file )
104+ content = "\n " .join (page .extract_text () or "" for page in reader .pages )
103105
104- # return content
106+ return content
105107
106108
107109async def _extract_text_content (file ) -> str :
You can’t perform that action at this time.
0 commit comments