99
1010from config import (
1111 COLLECTION_NAME ,
12+ LOCAL_EMBEDDING_MODEL ,
1213 MILVUS_HOST ,
1314 MILVUS_PORT ,
1415 MOSEC_EMBEDDING_ENDPOINT ,
1516 MOSEC_EMBEDDING_MODEL ,
1617 TEI_EMBEDDING_ENDPOINT ,
17- TEI_EMBEDDING_MODEL ,
1818)
1919from fastapi import Body , File , Form , HTTPException , UploadFile
2020from langchain .text_splitter import RecursiveCharacterTextSplitter
@@ -73,7 +73,7 @@ def empty_embedding() -> List[float]:
7373 return [e if e is not None else empty_embedding () for e in batched_embeddings ]
7474
7575
76- def ingest_chunks_to_milvus (file_name : str , chunks : List , embedder ):
76+ def ingest_chunks_to_milvus (file_name : str , chunks : List ):
7777 if logflag :
7878 logger .info (f"[ ingest chunks ] file name: { file_name } " )
7979
@@ -94,7 +94,7 @@ def ingest_chunks_to_milvus(file_name: str, chunks: List, embedder):
9494 try :
9595 _ = Milvus .from_documents (
9696 batch_docs ,
97- embedder ,
97+ embeddings ,
9898 collection_name = COLLECTION_NAME ,
9999 connection_args = {"host" : MILVUS_HOST , "port" : MILVUS_PORT },
100100 partition_key_field = partition_field_name ,
@@ -110,7 +110,7 @@ def ingest_chunks_to_milvus(file_name: str, chunks: List, embedder):
110110 return True
111111
112112
113- def ingest_data_to_milvus (doc_path : DocPath , embedder ):
113+ def ingest_data_to_milvus (doc_path : DocPath ):
114114 """Ingest document to Milvus."""
115115 path = doc_path .path
116116 file_name = path .split ("/" )[- 1 ]
@@ -151,7 +151,7 @@ def ingest_data_to_milvus(doc_path: DocPath, embedder):
151151 if logflag :
152152 logger .info (f"[ ingest data ] Done preprocessing. Created { len (chunks )} chunks of the original file." )
153153
154- return ingest_chunks_to_milvus (file_name , chunks , embedder )
154+ return ingest_chunks_to_milvus (file_name , chunks )
155155
156156
157157def search_by_file (collection , file_name ):
@@ -210,28 +210,9 @@ async def ingest_documents(
210210 if files and link_list :
211211 raise HTTPException (status_code = 400 , detail = "Provide either a file or a string list, not both." )
212212
213- # Create vectorstore
214- if MOSEC_EMBEDDING_ENDPOINT :
215- # create embeddings using MOSEC endpoint service
216- if logflag :
217- logger .info (
218- f"[ upload ] MOSEC_EMBEDDING_ENDPOINT:{ MOSEC_EMBEDDING_ENDPOINT } , MOSEC_EMBEDDING_MODEL:{ MOSEC_EMBEDDING_MODEL } "
219- )
220- embedder = MosecEmbeddings (model = MOSEC_EMBEDDING_MODEL )
221- elif TEI_EMBEDDING_ENDPOINT :
222- # create embeddings using TEI endpoint service
223- if logflag :
224- logger .info (f"[ upload ] TEI_EMBEDDING_ENDPOINT:{ TEI_EMBEDDING_ENDPOINT } " )
225- embedder = HuggingFaceHubEmbeddings (model = TEI_EMBEDDING_ENDPOINT )
226- else :
227- # create embeddings using local embedding model
228- if logflag :
229- logger .info (f"[ upload ] Local TEI_EMBEDDING_MODEL:{ TEI_EMBEDDING_MODEL } " )
230- embedder = HuggingFaceBgeEmbeddings (model_name = TEI_EMBEDDING_MODEL )
231-
232213 # define Milvus obj
233214 my_milvus = Milvus (
234- embedding_function = embedder ,
215+ embedding_function = embeddings ,
235216 collection_name = COLLECTION_NAME ,
236217 connection_args = {"host" : MILVUS_HOST , "port" : MILVUS_PORT },
237218 index_params = index_params ,
@@ -274,7 +255,6 @@ async def ingest_documents(
274255 process_table = process_table ,
275256 table_strategy = table_strategy ,
276257 ),
277- embedder ,
278258 )
279259 uploaded_files .append (save_path )
280260 if logflag :
@@ -294,7 +274,6 @@ async def ingest_documents(
294274 # process_table=process_table,
295275 # table_strategy=table_strategy,
296276 # ),
297- # embedder
298277 # )
299278
300279 # try:
@@ -352,7 +331,6 @@ async def ingest_documents(
352331 process_table = process_table ,
353332 table_strategy = table_strategy ,
354333 ),
355- embedder ,
356334 )
357335 if logflag :
358336 logger .info (f"[ upload ] Successfully saved link list { link_list } " )
@@ -368,28 +346,9 @@ async def rag_get_file_structure():
368346 if logflag :
369347 logger .info ("[ get ] start to get file structure" )
370348
371- # Create vectorstore
372- if MOSEC_EMBEDDING_ENDPOINT :
373- # create embeddings using MOSEC endpoint service
374- if logflag :
375- logger .info (
376- f"[ get ] MOSEC_EMBEDDING_ENDPOINT:{ MOSEC_EMBEDDING_ENDPOINT } , MOSEC_EMBEDDING_MODEL:{ MOSEC_EMBEDDING_MODEL } "
377- )
378- embedder = MosecEmbeddings (model = MOSEC_EMBEDDING_MODEL )
379- elif TEI_EMBEDDING_ENDPOINT :
380- # create embeddings using TEI endpoint service
381- if logflag :
382- logger .info (f"[ get ] TEI_EMBEDDING_ENDPOINT:{ TEI_EMBEDDING_ENDPOINT } " )
383- embedder = HuggingFaceHubEmbeddings (model = TEI_EMBEDDING_ENDPOINT )
384- else :
385- # create embeddings using local embedding model
386- if logflag :
387- logger .info (f"[ get ] Local TEI_EMBEDDING_MODEL:{ TEI_EMBEDDING_MODEL } " )
388- embedder = HuggingFaceBgeEmbeddings (model_name = TEI_EMBEDDING_MODEL )
389-
390349 # define Milvus obj
391350 my_milvus = Milvus (
392- embedding_function = embedder ,
351+ embedding_function = embeddings ,
393352 collection_name = COLLECTION_NAME ,
394353 connection_args = {"host" : MILVUS_HOST , "port" : MILVUS_PORT },
395354 index_params = index_params ,
@@ -445,28 +404,9 @@ async def delete_single_file(file_path: str = Body(..., embed=True)):
445404 if logflag :
446405 logger .info (file_path )
447406
448- # Create vectorstore
449- if MOSEC_EMBEDDING_ENDPOINT :
450- # create embeddings using MOSEC endpoint service
451- if logflag :
452- logger .info (
453- f"[ delete ] MOSEC_EMBEDDING_ENDPOINT:{ MOSEC_EMBEDDING_ENDPOINT } , MOSEC_EMBEDDING_MODEL:{ MOSEC_EMBEDDING_MODEL } "
454- )
455- embedder = MosecEmbeddings (model = MOSEC_EMBEDDING_MODEL )
456- elif TEI_EMBEDDING_ENDPOINT :
457- # create embeddings using TEI endpoint service
458- if logflag :
459- logger .info (f"[ delete ] TEI_EMBEDDING_ENDPOINT:{ TEI_EMBEDDING_ENDPOINT } " )
460- embedder = HuggingFaceHubEmbeddings (model = TEI_EMBEDDING_ENDPOINT )
461- else :
462- # create embeddings using local embedding model
463- if logflag :
464- logger .info (f"[ delete ] Local TEI_EMBEDDING_MODEL:{ TEI_EMBEDDING_MODEL } " )
465- embedder = HuggingFaceBgeEmbeddings (model_name = TEI_EMBEDDING_MODEL )
466-
467407 # define Milvus obj
468408 my_milvus = Milvus (
469- embedding_function = embedder ,
409+ embedding_function = embeddings ,
470410 collection_name = COLLECTION_NAME ,
471411 connection_args = {"host" : MILVUS_HOST , "port" : MILVUS_PORT },
472412 index_params = index_params ,
@@ -533,4 +473,23 @@ async def delete_single_file(file_path: str = Body(..., embed=True)):
533473if __name__ == "__main__" :
534474 create_upload_folder (upload_folder )
535475
476+ # Create vectorstore
477+ if MOSEC_EMBEDDING_ENDPOINT :
478+ # create embeddings using MOSEC endpoint service
479+ if logflag :
480+ logger .info (
481+ f"[ prepare_doc_milvus ] MOSEC_EMBEDDING_ENDPOINT:{ MOSEC_EMBEDDING_ENDPOINT } , MOSEC_EMBEDDING_MODEL:{ MOSEC_EMBEDDING_MODEL } "
482+ )
483+ embeddings = MosecEmbeddings (model = MOSEC_EMBEDDING_MODEL )
484+ elif TEI_EMBEDDING_ENDPOINT :
485+ # create embeddings using TEI endpoint service
486+ if logflag :
487+ logger .info (f"[ prepare_doc_milvus ] TEI_EMBEDDING_ENDPOINT:{ TEI_EMBEDDING_ENDPOINT } " )
488+ embeddings = HuggingFaceHubEmbeddings (model = TEI_EMBEDDING_ENDPOINT )
489+ else :
490+ # create embeddings using local embedding model
491+ if logflag :
492+ logger .info (f"[ prepare_doc_milvus ] LOCAL_EMBEDDING_MODEL:{ LOCAL_EMBEDDING_MODEL } " )
493+ embeddings = HuggingFaceBgeEmbeddings (model_name = LOCAL_EMBEDDING_MODEL )
494+
536495 opea_microservices ["opea_service@prepare_doc_milvus" ].start ()
0 commit comments