@@ -39,19 +39,14 @@ def __init__(self, api, project, data: dict):
3939 self .table_name = Identifier (parts = [self .project .name , self .name ])
4040
4141 self .storage = None
42- if data ['storage' ] is not None :
43- # if name contents '.' there could be errors
44-
45- parts = data ['storage' ].split ('.' )
46- if len (parts ) == 2 :
47- database_name , table_name = parts
48- database = Database (project , database_name )
49- table = Table (database , table_name )
50- self .storage = table
42+ if data .get ('vector_database_table' ) is not None :
43+ database = Database (project , data ['vector_database' ])
44+ table = Table (database , data ['vector_database_table' ])
45+ self .storage = table
5146
5247 self .model = None
53- if data ['model ' ] is not None :
54- self .model = Model (self .project , {'name' : data ['model ' ]})
48+ if data ['embedding_model ' ] is not None :
49+ self .model = Model (self .project , {'name' : data ['embedding_model ' ]})
5550
5651 params = data .get ('params' , {})
5752 if isinstance (params , str ):
@@ -122,7 +117,11 @@ def insert_files(self, file_paths: List[str]):
122117 """
123118 Insert data from file to knowledge base
124119 """
125- self .api .insert_files_into_knowledge_base (self .project .name , self .name , file_paths )
120+ self .api .insert_into_knowledge_base (
121+ self .project .name ,
122+ self .name ,
123+ data = {'files' : file_paths }
124+ )
126125
127126 def insert_webpages (self , urls : List [str ], crawl_depth : int = 1 , filters : List [str ] = None ):
128127 """
@@ -132,41 +131,62 @@ def insert_webpages(self, urls: List[str], crawl_depth: int = 1, filters: List[s
132131 :param crawl_depth: How deep to crawl from each base URL. 0 = scrape given URLs only
133132 :param filters: Include only URLs that match these regex patterns
134133 """
135- self .api .insert_webpages_into_knowledge_base (self .project .name , self .name , urls , crawl_depth = crawl_depth , filters = filters )
134+ self .api .insert_into_knowledge_base (
135+ self .project .name ,
136+ self .name ,
137+ data = {
138+ 'urls' : urls ,
139+ 'crawl_depth' : crawl_depth ,
140+ 'filters' : [] if filters is None else filters
141+ }
142+ )
136143
137144 def insert (self , data : Union [pd .DataFrame , Query , dict ]):
138145 """
139146 Insert data to knowledge base
140147
141- >>> # insert using query
142- >>> my_kb.insert(server.databases.example_db.tables.houses_sales.filter(type='house'))
143148 >>> # using dataframe
144149 >>> my_kb.insert(pd.read_csv('house_sales.csv'))
145150 >>> # using dict
146151 >>> my_kb.insert({'type': 'house', 'date': '2020-02-02'})
147152
148- Data will be if id (defined by id_column param, see create knowledge base) is already exists in knowledge base
149- it will be replaced
153+ If id is already exists in knowledge base:
154+ - it will be replaced
155+ - `id` column can be defined by id_column param, see create knowledge base
150156
151157 :param data: Dataframe or Query object or dict.
152158 """
153159
160+ if isinstance (data , Query ):
161+ # for back compatibility
162+ return self .insert_query (data )
163+
154164 if isinstance (data , dict ):
155- data = pd .DataFrame ([data ])
165+ data = [data ]
166+ elif isinstance (data , pd .DataFrame ):
167+ data = data .to_dict ('records' )
168+ else :
169+ raise ValueError ("Unknown data type, accepted types: DataFrame, Query, dict" )
156170
157- if isinstance (data , pd .DataFrame ):
158- # insert data
159- data_split = data .to_dict ('split' )
171+ return self .api .insert_into_knowledge_base (
172+ self .project .name ,
173+ self .name ,
174+ data = {'rows' : data }
175+ )
160176
161- ast_query = Insert (
162- table = self .table_name ,
163- columns = data_split ['columns' ],
164- values = data_split ['data' ]
165- )
166- sql = ast_query .to_string ()
177+ def insert_query (self , data : Query ):
178+ """
179+ Insert data to knowledge base using query
167180
168- else :
169- # insert from select
181+ >>> my_kb.insert(server.databases.example_db.tables.houses_sales.filter(type='house'))
182+
183+ Data will be if id (defined by id_column param, see create knowledge base) is already exists in knowledge base
184+ it will be replaced
185+
186+ :param data: Dataframe or Query object or dict.
187+ """
188+ if is_saving ():
189+ # generate insert from select query
170190 if data .database is not None :
171191 ast_query = Insert (
172192 table = self .table_name ,
@@ -176,11 +196,20 @@ def insert(self, data: Union[pd.DataFrame, Query, dict]):
176196 else :
177197 sql = f'INSERT INTO { self .table_name .to_string ()} ({ data .sql } )'
178198
179- if is_saving ():
180199 # don't execute it right now, return query object
181200 return Query (self , sql , self .database )
182201
183- self .api .sql_query (sql , self .database )
202+ # query have to be in context of mindsdb project
203+ self .api .insert_into_knowledge_base (
204+ self .project .name ,
205+ self .name ,
206+ data = {'query' : data .sql }
207+ )
208+
209+ def completion (self , query , ** data ):
210+ data ['query' ] = query
211+
212+ return self .api .knowledge_base_completion (self .project .name , self .name , data )
184213
185214
186215class KnowledgeBases (CollectionBase ):
@@ -212,24 +241,6 @@ def __init__(self, project, api):
212241 self .project = project
213242 self .api = api
214243
215- def _list (self , name : str = None ) -> List [KnowledgeBase ]:
216-
217- # TODO add filter by project. for now 'project' is empty
218- ast_query = Select (targets = [Star ()], from_table = Identifier (parts = ['information_schema' , 'knowledge_bases' ]))
219- if name is not None :
220- ast_query .where = dict_to_binary_op ({'name' : name })
221-
222- df = self .api .sql_query (ast_query .to_string (), database = self .project .name )
223-
224- # columns to lower case
225- cols_map = {i : i .lower () for i in df .columns }
226- df = df .rename (columns = cols_map )
227-
228- return [
229- KnowledgeBase (self .api , self .project , item )
230- for item in df .to_dict ('records' )
231- ]
232-
233244 def list (self ) -> List [KnowledgeBase ]:
234245 """
235246
@@ -239,7 +250,11 @@ def list(self) -> List[KnowledgeBase]:
239250
240251 :return: list of knowledge bases
241252 """
242- return self ._list ()
253+
254+ return [
255+ KnowledgeBase (self .api , self .project , item )
256+ for item in self .api .list_knowledge_bases (self .project .name )
257+ ]
243258
244259 def get (self , name : str ) -> KnowledgeBase :
245260 """
@@ -248,13 +263,9 @@ def get(self, name: str) -> KnowledgeBase:
248263 :param name: name of the knowledge base
249264 :return: KnowledgeBase object
250265 """
251- item = self ._list (name )
252- if len (item ) == 1 :
253- return item [0 ]
254- elif len (item ) == 0 :
255- raise AttributeError ("KnowledgeBase doesn't exist" )
256- else :
257- raise RuntimeError ("Several knowledgeBases with the same name" )
266+
267+ data = self .api .get_knowledge_base (self .project .name , name )
268+ return KnowledgeBase (self .api , self .project , data )
258269
259270 def create (
260271 self ,
@@ -305,27 +316,21 @@ def create(
305316 params_out .update (params )
306317
307318 if model is not None :
308- model_name = Identifier (parts = [model .project .name , model .name ])
309- else :
310- model_name = None
311-
312- if storage is not None :
313- storage_name = Identifier (parts = [storage .db .name , storage .name ])
314- else :
315- storage_name = None
319+ model = model .name
316320
317- ast_query = CreateKnowledgeBase (
318- Identifier (parts = [self .project .name , name ]),
319- model = model_name ,
320- storage = storage_name ,
321- params = params_out
322- )
323- sql = ast_query .to_string ()
321+ payload = {
322+ 'name' : name ,
323+ 'model' : model ,
324+ 'params' : params_out
325+ }
324326
325- if is_saving ():
326- return Query (self , sql )
327+ if storage is not None :
328+ payload ['storage' ] = {
329+ 'database' : storage .db .name ,
330+ 'table' : storage .name
331+ }
327332
328- self .api .sql_query ( sql )
333+ self .api .create_knowledge_base ( self . project . name , data = payload )
329334
330335 return self .get (name )
331336
@@ -336,10 +341,4 @@ def drop(self, name: str):
336341 :return:
337342 """
338343
339- ast_query = DropKnowledgeBase (Identifier (parts = [self .project .name , name ]))
340- sql = ast_query .to_string ()
341-
342- if is_saving ():
343- return Query (self , sql )
344-
345- self .api .sql_query (sql )
344+ return self .api .delete_knowledge_base (self .project .name , name )
0 commit comments