@@ -394,13 +394,13 @@ def _delete_vectors(self, index: str, ids: List[str], namespace: Optional[str])
394
394
395
395
def _upsert_vectors (
396
396
self ,
397
- index : str ,
397
+ index_name : str ,
398
398
data : List [Tuple ],
399
399
namespace : Optional [str ],
400
400
use_async : bool = False ,
401
401
batch_size : int = DEFAULT_BATCH_SIZE ,
402
402
) -> None :
403
- index = self .pinecone_indexes [index ]
403
+ index = self .pinecone_indexes [index_name ]
404
404
results = [
405
405
index .upsert (vectors = batch , namespace = namespace , async_req = use_async )
406
406
for batch in get_batches_from_generator (data , batch_size )
@@ -580,13 +580,15 @@ def write_documents(
580
580
pool_threads = self .pinecone_indexes [index ].pool_threads
581
581
if use_async and pool_threads == 1 :
582
582
logger .warning (
583
- f"Documents will be upserted synchronosly, because the number of threads for Pinecone index is set to { pool_threads } . "
584
- f"To enable upsert in parallel, initialize PineconeDocumentStore() again setting parameter `pool_threads`."
583
+ "Documents will be upserted synchronosly, because the number of threads for Pinecone index is set to %s. "
584
+ "To enable upsert in parallel, initialize PineconeDocumentStore() again setting parameter `pool_threads`." ,
585
+ pool_threads ,
585
586
)
586
587
elif not use_async and pool_threads != 1 :
587
588
logger .warning (
588
- f"Parameter `use_async` set to `False` will be ignored and documents will be upserted asynchronously, "
589
- f"because the number of threads for Pinecone index is set to { pool_threads } ."
589
+ "Parameter `use_async` set to `False` will be ignored and documents will be upserted asynchronously, "
590
+ "because the number of threads for Pinecone index is set to %s." ,
591
+ pool_threads ,
590
592
)
591
593
592
594
field_map = self ._create_document_field_map ()
@@ -674,13 +676,7 @@ def write_documents(
674
676
675
677
data_to_write_to_pinecone = list (zip (ids , embeddings , metadata ))
676
678
# Store chunk by chunk (for regular upsert) or chunk by chunk (for async upsert) in vector store
677
- self ._upsert_vectors (
678
- index = index ,
679
- data = data_to_write_to_pinecone ,
680
- namespace = namespace ,
681
- use_async = use_async ,
682
- batch_size = batch_size ,
683
- )
679
+ self ._upsert_vectors (index , data_to_write_to_pinecone , namespace , use_async , batch_size ) # type: ignore
684
680
# Add IDs to ID list
685
681
self ._add_local_ids (index , ids )
686
682
progress_bar .update (chunk_size )
@@ -753,13 +749,15 @@ def update_embeddings(
753
749
pool_threads = self .pinecone_indexes [index ].pool_threads
754
750
if use_async and pool_threads == 1 :
755
751
logger .warning (
756
- f"Embeddings will be upserted synchronosly, because the number of threads for Pinecone index is { pool_threads } . "
757
- f"To enable upsert in parallel, initialize PineconeDocumentStore() again setting parameter `pool_threads`."
752
+ "Embeddings will be upserted synchronosly, because the number of threads for Pinecone index is %s. "
753
+ "To enable upsert in parallel, initialize PineconeDocumentStore() again setting parameter `pool_threads`." ,
754
+ pool_threads ,
758
755
)
759
756
elif not use_async and pool_threads > 1 :
760
757
logger .warning (
761
- f"Parameter `use_async` set to `False` will be ignored and embeddings will be upserted asynchronously, "
762
- f"because the number of threads for Pinecone index is set to { pool_threads } ."
758
+ "Parameter `use_async` set to `False` will be ignored and embeddings will be upserted asynchronously, "
759
+ "because the number of threads for Pinecone index is set to %s." ,
760
+ pool_threads ,
763
761
)
764
762
765
763
document_count = self .get_document_count (
@@ -828,7 +826,7 @@ def update_embeddings(
828
826
ids .append (doc .id )
829
827
# Update existing vectors in pinecone index
830
828
data = list (zip (ids , embeddings .tolist (), metadata ))
831
- self ._upsert_vectors (index , data , namespace , use_async , batch_size )
829
+ self ._upsert_vectors (index , data , namespace , use_async , batch_size ) # type: ignore
832
830
# Add these vector IDs to local store
833
831
self ._add_local_ids (index , ids )
834
832
progress_bar .set_description_str ("Documents Processed" )
@@ -1088,7 +1086,7 @@ def _move_documents_by_id_namespace(
1088
1086
embedding_matrix = [result ["vectors" ][_id ]["values" ] for _id in vector_id_matrix ]
1089
1087
data_to_write_to_pinecone = list (zip (vector_id_matrix , embedding_matrix , meta_matrix ))
1090
1088
# Store metadata nd embeddings in new target_namespace
1091
- self ._upsert_vectors (index , data_to_write_to_pinecone , target_namespace , use_async = False )
1089
+ self ._upsert_vectors (index , data_to_write_to_pinecone , target_namespace , use_async = False ) # type: ignore
1092
1090
# Delete vectors from source_namespace
1093
1091
self .delete_documents (index = index , ids = id_batch , namespace = source_namespace , drop_ids = False )
1094
1092
progress_bar .set_description_str ("Documents Moved" )
@@ -1214,7 +1212,7 @@ def update_document_meta(self, id: str, meta: Dict[str, str], index: Optional[st
1214
1212
if doc .embedding is not None :
1215
1213
meta = {"content" : doc .content , "content_type" : doc .content_type , ** meta }
1216
1214
data = [(id , doc .embedding .tolist (), meta )]
1217
- self ._upsert_vectors (index , data , self .namespace , use_async = False )
1215
+ self ._upsert_vectors (index , data , self .namespace , use_async = False ) # type: ignore
1218
1216
1219
1217
def delete_documents (
1220
1218
self ,
0 commit comments