4
4
5
5
"""
6
6
7
+ import asyncio
7
8
import logging
8
9
from collections import Counter
9
10
from functools import partial
10
11
from typing import Any , Callable , Dict , List , Optional , cast
11
- import asyncio
12
- from llama_index .utils import iter_batch
12
+
13
13
from llama_index .bridge .pydantic import PrivateAttr
14
14
from llama_index .schema import BaseNode , MetadataMode , TextNode
15
+ from llama_index .utils import iter_batch
15
16
from llama_index .vector_stores .types import (
16
17
BasePydanticVectorStore ,
17
18
MetadataFilters ,
@@ -100,12 +101,16 @@ def _to_pinecone_filter(standard_filters: MetadataFilters) -> dict:
100
101
return filters
101
102
102
103
103
- async def async_upload (index , vectors , batch_size , semaphore ):
104
- async def send_batch (batch ):
104
+ async def async_upload (
105
+ index : Any , vectors : List [Dict ], batch_size : int , semaphore : asyncio .Semaphore
106
+ ) -> None :
107
+ async def send_batch (batch : List [Dict ]): # type: ignore
105
108
async with semaphore :
106
109
return await asyncio .to_thread (index .upsert , batch , async_req = True )
107
-
108
- await asyncio .gather (* [send_batch (chunk ) for chunk in iter_batch (vectors , size = batch_size )])
110
+
111
+ await asyncio .gather (
112
+ * [send_batch (chunk ) for chunk in iter_batch (vectors , size = batch_size )]
113
+ )
109
114
110
115
111
116
import_err_msg = (
@@ -250,12 +255,12 @@ def _prepare_entries_for_upsert(self, nodes: List[BaseNode]) -> List[Dict]:
250
255
if self .add_sparse_vector :
251
256
sparse_vector = generate_sparse_vectors (
252
257
[node .get_content (metadata_mode = MetadataMode .EMBED )],
253
- self ._tokenizer ,
258
+ self ._tokenizer , # type: ignore
254
259
)[0 ]
255
260
entry [SPARSE_VECTOR_KEY ] = sparse_vector
256
261
257
262
entries .append (entry )
258
-
263
+
259
264
return entries
260
265
261
266
def add (
@@ -268,7 +273,6 @@ def add(
268
273
nodes: List[BaseNode]: list of nodes with embeddings
269
274
270
275
"""
271
-
272
276
entries = self ._prepare_entries_for_upsert (nodes )
273
277
274
278
[
@@ -293,15 +297,13 @@ async def async_add(
293
297
Returns:
294
298
List[str]: List of IDs of the added documents.
295
299
"""
296
-
297
300
entries = self ._prepare_entries_for_upsert (nodes )
298
301
299
302
semaphore = asyncio .Semaphore (SEM_MAX_CONCURRENT )
300
303
await async_upload (self ._pinecone_index , entries , DEFAULT_BATCH_SIZE , semaphore )
301
304
302
305
return [entry [ID_KEY ] for entry in entries ]
303
306
304
-
305
307
def delete (self , ref_doc_id : str , ** delete_kwargs : Any ) -> None :
306
308
"""
307
309
Delete nodes using with ref_doc_id.
0 commit comments