2
2
import os
3
3
from typing import Any , Optional
4
4
5
+ from llama_index .core .schema import BaseNode
5
6
from llama_index .core .vector_stores .types import BasePydanticVectorStore
6
7
from llama_index .vector_stores .pinecone import PineconeVectorStore
7
8
from pinecone import NotFoundException
@@ -33,10 +34,11 @@ class Constants:
33
34
34
35
class Pinecone (VectorDBAdapter ):
35
36
def __init__ (self , settings : dict [str , Any ]):
36
- super ().__init__ ("Pinecone" )
37
- self .config = settings
38
- self .client : Optional [LLamaIndexPinecone ] = None
39
- self .collection_name : str = VectorDbConstants .DEFAULT_VECTOR_DB_NAME
37
+ self ._config = settings
38
+ self ._client : Optional [LLamaIndexPinecone ] = None
39
+ self ._collection_name : str = VectorDbConstants .DEFAULT_VECTOR_DB_NAME
40
+ self ._vector_db_instance = self ._get_vector_db_instance ()
41
+ super ().__init__ ("Pinecone" , self ._vector_db_instance )
40
42
41
43
@staticmethod
42
44
def get_id () -> str :
@@ -62,65 +64,101 @@ def get_json_schema() -> str:
62
64
return schema
63
65
64
66
def get_vector_db_instance (self ) -> BasePydanticVectorStore :
65
- try :
66
- self .client = LLamaIndexPinecone (
67
- api_key = str (self .config .get (Constants .API_KEY ))
68
- )
69
- collection_name = VectorDBHelper .get_collection_name (
70
- self .config .get (VectorDbConstants .VECTOR_DB_NAME ),
71
- self .config .get (VectorDbConstants .EMBEDDING_DIMENSION ),
72
- )
73
- self .collection_name = collection_name .replace ("_" , "-" ).lower ()
74
- dimension = self .config .get (
75
- VectorDbConstants .EMBEDDING_DIMENSION ,
76
- VectorDbConstants .DEFAULT_EMBEDDING_SIZE ,
77
- )
67
+ return self ._vector_db_instance
78
68
79
- specification = self .config .get (Constants .SPECIFICATION )
80
- if specification == Constants .SPEC_POD :
81
- environment = self .config .get (Constants .ENVIRONMENT )
82
- spec = PodSpec (
83
- environment = environment ,
84
- replicas = Constants .DEFAULT_SPEC_COUNT_VALUE ,
85
- shards = Constants .DEFAULT_SPEC_COUNT_VALUE ,
86
- pods = Constants .DEFAULT_SPEC_COUNT_VALUE ,
87
- pod_type = Constants .DEFAULT_POD_TYPE ,
88
- )
89
- elif specification == Constants .SPEC_SERVERLESS :
90
- cloud = self .config .get (Constants .CLOUD )
91
- region = self .config .get (Constants .REGION )
92
- spec = ServerlessSpec (cloud = cloud , region = region )
93
- logger .info (f"Setting up Pinecone spec for { spec } " )
94
- try :
95
- self .client .describe_index (name = self .collection_name )
96
- except NotFoundException :
97
- logger .info (
98
- f"Index:{ self .collection_name } does not exist. Creating it."
99
- )
100
- self .client .create_index (
101
- name = self .collection_name ,
102
- dimension = dimension ,
103
- metric = Constants .METRIC ,
104
- spec = spec ,
105
- )
106
- vector_db : BasePydanticVectorStore = PineconeVectorStore (
107
- index_name = self .collection_name ,
108
- api_key = str (self .config .get (Constants .API_KEY )),
109
- environment = str (self .config .get (Constants .ENVIRONMENT )),
69
+ def _get_vector_db_instance (self ) -> BasePydanticVectorStore :
70
+
71
+ self ._client = LLamaIndexPinecone (
72
+ api_key = str (self ._config .get (Constants .API_KEY ))
73
+ )
74
+ collection_name = VectorDBHelper .get_collection_name (
75
+ self ._config .get (VectorDbConstants .VECTOR_DB_NAME ),
76
+ self ._config .get (VectorDbConstants .EMBEDDING_DIMENSION ),
77
+ )
78
+ self ._collection_name = collection_name .replace ("_" , "-" ).lower ()
79
+ dimension = self ._config .get (
80
+ VectorDbConstants .EMBEDDING_DIMENSION ,
81
+ VectorDbConstants .DEFAULT_EMBEDDING_SIZE ,
82
+ )
83
+
84
+ specification = self ._config .get (Constants .SPECIFICATION )
85
+ if specification == Constants .SPEC_POD :
86
+ environment = self ._config .get (Constants .ENVIRONMENT )
87
+ spec = PodSpec (
88
+ environment = environment ,
89
+ replicas = Constants .DEFAULT_SPEC_COUNT_VALUE ,
90
+ shards = Constants .DEFAULT_SPEC_COUNT_VALUE ,
91
+ pods = Constants .DEFAULT_SPEC_COUNT_VALUE ,
92
+ pod_type = Constants .DEFAULT_POD_TYPE ,
110
93
)
111
- return vector_db
112
- except Exception as e :
113
- raise AdapterError (str (e ))
94
+ elif specification == Constants .SPEC_SERVERLESS :
95
+ cloud = self ._config .get (Constants .CLOUD )
96
+ region = self ._config .get (Constants .REGION )
97
+ spec = ServerlessSpec (cloud = cloud , region = region )
98
+ logger .info (f"Setting up Pinecone spec for { spec } " )
99
+ try :
100
+ self ._client .describe_index (name = self ._collection_name )
101
+ except NotFoundException :
102
+ logger .info (f"Index:{ self ._collection_name } does not exist. Creating it." )
103
+ self ._client .create_index (
104
+ name = self ._collection_name ,
105
+ dimension = dimension ,
106
+ metric = Constants .METRIC ,
107
+ spec = spec ,
108
+ )
109
+ self .vector_db : BasePydanticVectorStore = PineconeVectorStore (
110
+ index_name = self ._collection_name ,
111
+ api_key = str (self ._config .get (Constants .API_KEY )),
112
+ environment = str (self ._config .get (Constants .ENVIRONMENT )),
113
+ )
114
+ return self .vector_db
114
115
115
116
def test_connection (self ) -> bool :
116
- self .config [VectorDbConstants .EMBEDDING_DIMENSION ] = (
117
+ self ._config [VectorDbConstants .EMBEDDING_DIMENSION ] = (
117
118
VectorDbConstants .TEST_CONNECTION_EMBEDDING_SIZE
118
119
)
119
120
vector_db = self .get_vector_db_instance ()
120
121
test_result : bool = VectorDBHelper .test_vector_db_instance (
121
122
vector_store = vector_db
122
123
)
123
124
# Delete the collection that was created for testing
124
- if self .client :
125
- self .client .delete_index (self .collection_name )
125
+ if self ._client :
126
+ self ._client .delete_index (self ._collection_name )
126
127
return test_result
128
+
129
+ def close (self , ** kwargs : Any ) -> None :
130
+ # Close connection is not defined for this client
131
+ pass
132
+
133
+ def delete (self , ref_doc_id : str , ** delete_kwargs : dict [Any , Any ]) -> None :
134
+ specification = self ._config .get (Constants .SPECIFICATION )
135
+ if specification == Constants .SPEC_SERVERLESS :
136
+ # To delete all records representing chunks of a single document,
137
+ # first list the record IDs based on their common ID prefix,
138
+ # and then delete the records by ID:
139
+ try :
140
+ index = self ._client .Index (self ._collection_name ) # type: ignore
141
+ # Get all record having the ref_doc_id and delete them
142
+ for ids in index .list (prefix = ref_doc_id ):
143
+ logger .info (ids )
144
+ index .delete (ids = ids )
145
+ except Exception as e :
146
+ raise AdapterError (str (e ))
147
+ elif specification == Constants .SPEC_POD :
148
+ if self .vector_db .environment == "gcp-starter" : # type: ignore
149
+ raise AdapterError (
150
+ "Re-indexing is not supported on Starter indexes. "
151
+ "Use Serverless or paid plan for Pod spec"
152
+ )
153
+ else :
154
+ super ().delete (ref_doc_id = ref_doc_id , ** delete_kwargs )
155
+
156
+ def add (
157
+ self ,
158
+ ref_doc_id : str ,
159
+ nodes : list [BaseNode ],
160
+ ) -> list [str ]:
161
+ for i , node in enumerate (nodes ):
162
+ node_id = ref_doc_id + "-" + node .node_id
163
+ nodes [i ].id_ = node_id
164
+ return self .vector_db .add (nodes = nodes )
0 commit comments