Skip to content

Commit 8f5fc90

Browse files
committed
add async_add implementation and async upsert to Pinecone vector store
1 parent 2690bfb commit 8f5fc90

File tree

3 files changed

+40
-22
lines changed

3 files changed

+40
-22
lines changed

llama_index/indices/vector_store/base.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -145,7 +145,7 @@ async def _async_add_nodes_to_index(
145145
return
146146

147147
nodes = await self._aget_node_with_embedding(nodes, show_progress)
148-
new_ids = self._vector_store.add(nodes)
148+
new_ids = await self._vector_store.async_add(nodes)
149149

150150
# if the vector store doesn't store text, we need to add the nodes to the
151151
# index struct and document store

llama_index/vector_stores/pinecone.py

+27-9
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,8 @@
88
from collections import Counter
99
from functools import partial
1010
from typing import Any, Callable, Dict, List, Optional, cast
11-
11+
import asyncio
12+
from llama_index.utils import iter_batch
1213
from llama_index.bridge.pydantic import PrivateAttr
1314
from llama_index.schema import BaseNode, MetadataMode, TextNode
1415
from llama_index.vector_stores.types import (
@@ -30,7 +31,7 @@
3031
SPARSE_VECTOR_KEY = "sparse_values"
3132
METADATA_KEY = "metadata"
3233

33-
DEFAULT_BATCH_SIZE = 100
34+
DEFAULT_BATCH_SIZE = 200
3435

3536
_logger = logging.getLogger(__name__)
3637

@@ -172,7 +173,7 @@ def __init__(
172173

173174
if tokenizer is None:
174175
tokenizer = get_default_tokenizer()
175-
self._tokenizer = tokenizer
176+
self._tokenizer = tokenizer # type: ignore
176177

177178
super().__init__(
178179
index_name=index_name,
@@ -258,14 +259,31 @@ def add(
258259

259260
ids.append(node_id)
260261
entries.append(entry)
261-
self._pinecone_index.upsert(
262-
entries,
263-
namespace=self.namespace,
264-
batch_size=self.batch_size,
265-
**self.insert_kwargs,
266-
)
262+
263+
[
264+
self._pinecone_index.upsert(
265+
vectors=batch,
266+
async_req=True,
267+
)
268+
for batch in iter_batch(entries, self.batch_size)
269+
]
270+
267271
return ids
268272

273+
async def async_add(
274+
self,
275+
nodes: List[BaseNode],
276+
) -> List[str]:
277+
"""Asynchronously add a list of embedding results to the collection.
278+
279+
Args:
280+
nodes (List[BaseNode]): Embedding results to add.
281+
282+
Returns:
283+
List[str]: List of IDs of the added documents.
284+
"""
285+
return await asyncio.to_thread(self.add, nodes) # type: ignore
286+
269287
def delete(self, ref_doc_id: str, **delete_kwargs: Any) -> None:
270288
"""
271289
Delete nodes using with ref_doc_id.

tests/indices/vector_store/utils.py

+12-12
Original file line numberDiff line numberDiff line change
@@ -12,19 +12,19 @@
1212
class MockPineconeIndex:
1313
def __init__(self) -> None:
1414
"""Mock pinecone index."""
15-
self._tuples: List[Dict[str, Any]] = []
15+
self._vectors: List[Dict[str, Any]] = []
1616

17-
def upsert(self, tuples: List[Dict[str, Any]], **kwargs: Any) -> None:
17+
def upsert(self, vectors: List[Dict[str, Any]], **kwargs: Any) -> None:
1818
"""Mock upsert."""
19-
self._tuples.extend(tuples)
19+
self._vectors.extend(vectors)
2020

2121
def delete(self, ids: List[str]) -> None:
2222
"""Mock delete."""
23-
new_tuples = []
24-
for tup in self._tuples:
25-
if tup["id"] not in ids:
26-
new_tuples.append(tup)
27-
self._tuples = new_tuples
23+
new_vectors = []
24+
for vec in self._vectors:
25+
if vec["id"] not in ids:
26+
new_vectors.append(vec)
27+
self._vectors = new_vectors
2828

2929
def query(
3030
self,
@@ -38,7 +38,7 @@ def query(
3838
) -> Any:
3939
"""Mock query."""
4040
# index_mat is n x k
41-
index_mat = np.array([tup["values"] for tup in self._tuples])
41+
index_mat = np.array([tup["values"] for tup in self._vectors])
4242
query_vec = np.array(vector)[np.newaxis, :]
4343

4444
# compute distances
@@ -49,10 +49,10 @@ def query(
4949

5050
matches = []
5151
for index in indices:
52-
tup = self._tuples[index]
52+
vec = self._vectors[index]
5353
match = MagicMock()
54-
match.metadata = tup["metadata"]
55-
match.id = tup["id"]
54+
match.metadata = vec["metadata"]
55+
match.id = vec["id"]
5656
matches.append(match)
5757

5858
response = MagicMock()

0 commit comments

Comments
 (0)