88from app .dependencies .redis import RedisDep
99from app .models .document_model import DocumentModel
1010from app .models .html_document_request import HTMLDocumentRequest
11+ from app .models .raw_document_request import RawDocumentRequest
1112
1213
1314class Repository :
@@ -19,7 +20,7 @@ def get_digest(self, source_id: str) -> str | None:
1920 digest = self .redis .client .get (f"{ config .DIGEST_PREFIX } :{ source_id } " )
2021 return digest .decode () if digest else None
2122
22- def save (self , doc : HTMLDocumentRequest ) -> None :
23+ def save (self , doc : HTMLDocumentRequest | RawDocumentRequest ) -> None :
2324 docs = self .preprocess_doc (doc )
2425 existing_keys = self .redis .client .keys (
2526 f"{ self .redis .key_prefix } :{ doc .source_id } :*"
@@ -42,8 +43,24 @@ def reset(self) -> None:
4243 self .redis .client .delete (* digest_keys )
4344 self .redis ._create_index_if_not_exist (config .EMBEDDING_DIM )
4445
45- def preprocess_doc (self , doc : HTMLDocumentRequest ) -> list [DocumentModel ]:
46- return self .document_transformer .transform_documents ([preprocess (doc )])
46+ @staticmethod
47+ def _preprocess (
48+ doc : HTMLDocumentRequest | RawDocumentRequest ,
49+ ) -> DocumentModel :
50+ match doc :
51+ case HTMLDocumentRequest ():
52+ return preprocess (doc )
53+ case RawDocumentRequest ():
54+ return DocumentModel (
55+ metadata = doc .metadata ,
56+ page_content = doc .page_content ,
57+ )
58+
59+ def preprocess_doc (
60+ self ,
61+ doc : HTMLDocumentRequest | RawDocumentRequest ,
62+ ) -> list [DocumentModel ]:
63+ return self .document_transformer .transform_documents ([self ._preprocess (doc )])
4764
4865
4966RepositoryDep = Annotated [Repository , Depends (Repository )]
0 commit comments