Skip to content

Commit bff0f39

Browse files
authored
Merge pull request #167 from mindsdb/feat-ml-188
Knowledge base to api endpoints
2 parents 2be5709 + edbf451 commit bff0f39

File tree

3 files changed

+212
-150
lines changed

3 files changed

+212
-150
lines changed

mindsdb_sdk/connectors/rest_api.py

Lines changed: 31 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -405,32 +405,51 @@ def delete_skill(self, project: str, name: str):
405405

406406
# Knowledge Base operations.
407407
@_try_relogin
408-
def insert_files_into_knowledge_base(self, project: str, knowledge_base_name: str, file_names: List[str]):
408+
def insert_into_knowledge_base(self, project: str, knowledge_base_name: str, data):
409409
r = self.session.put(
410410
self.url + f'/api/projects/{project}/knowledge_bases/{knowledge_base_name}',
411411
json={
412-
'knowledge_base': {
413-
'files': file_names
414-
}
412+
'knowledge_base': data
415413
}
416414
)
417415
_raise_for_status(r)
418416

419417
return r.json()
420418

421419
@_try_relogin
422-
def insert_webpages_into_knowledge_base(self, project: str, knowledge_base_name: str, urls: List[str], crawl_depth: int = 1, filters: List[str] = None):
423-
data = {
424-
'urls': urls,
425-
'crawl_depth': crawl_depth,
426-
'filters': [] if filters is None else filters
427-
}
428-
r = self.session.put(
429-
self.url + f'/api/projects/{project}/knowledge_bases/{knowledge_base_name}',
420+
def list_knowledge_bases(self, project: str):
421+
r = self.session.get(self.url + f'/api/projects/{project}/knowledge_bases')
422+
_raise_for_status(r)
423+
return r.json()
424+
425+
@_try_relogin
426+
def get_knowledge_base(self, project: str, knowledge_base_name):
427+
r = self.session.get(self.url + f'/api/projects/{project}/knowledge_bases/{knowledge_base_name}')
428+
_raise_for_status(r)
429+
return r.json()
430+
431+
@_try_relogin
432+
def delete_knowledge_base(self, project: str, knowledge_base_name):
433+
r = self.session.delete(self.url + f'/api/projects/{project}/knowledge_bases/{knowledge_base_name}')
434+
_raise_for_status(r)
435+
436+
@_try_relogin
437+
def create_knowledge_base(self, project: str, data):
438+
r = self.session.post(
439+
self.url + f'/api/projects/{project}/knowledge_bases',
430440
json={
431441
'knowledge_base': data
432442
}
433443
)
434444
_raise_for_status(r)
435445

436446
return r.json()
447+
448+
def knowledge_base_completion(self, project: str, knowledge_base_name, payload):
449+
r = self.session.post(
450+
self.url + f'/api/projects/{project}/knowledge_bases/{knowledge_base_name}/completions',
451+
json=payload
452+
)
453+
_raise_for_status(r)
454+
return r.json()
455+

mindsdb_sdk/knowledge_bases.py

Lines changed: 81 additions & 82 deletions
Original file line numberDiff line numberDiff line change
@@ -39,19 +39,14 @@ def __init__(self, api, project, data: dict):
3939
self.table_name = Identifier(parts=[self.project.name, self.name])
4040

4141
self.storage = None
42-
if data['storage'] is not None:
43-
# if name contents '.' there could be errors
44-
45-
parts = data['storage'].split('.')
46-
if len(parts) == 2:
47-
database_name, table_name = parts
48-
database = Database(project, database_name)
49-
table = Table(database, table_name)
50-
self.storage = table
42+
if data.get('vector_database_table') is not None:
43+
database = Database(project, data['vector_database'])
44+
table = Table(database, data['vector_database_table'])
45+
self.storage = table
5146

5247
self.model = None
53-
if data['model'] is not None:
54-
self.model = Model(self.project, {'name': data['model']})
48+
if data['embedding_model'] is not None:
49+
self.model = Model(self.project, {'name': data['embedding_model']})
5550

5651
params = data.get('params', {})
5752
if isinstance(params, str):
@@ -122,7 +117,11 @@ def insert_files(self, file_paths: List[str]):
122117
"""
123118
Insert data from file to knowledge base
124119
"""
125-
self.api.insert_files_into_knowledge_base(self.project.name, self.name, file_paths)
120+
self.api.insert_into_knowledge_base(
121+
self.project.name,
122+
self.name,
123+
data={'files': file_paths}
124+
)
126125

127126
def insert_webpages(self, urls: List[str], crawl_depth: int = 1, filters: List[str] = None):
128127
"""
@@ -132,41 +131,62 @@ def insert_webpages(self, urls: List[str], crawl_depth: int = 1, filters: List[s
132131
:param crawl_depth: How deep to crawl from each base URL. 0 = scrape given URLs only
133132
:param filters: Include only URLs that match these regex patterns
134133
"""
135-
self.api.insert_webpages_into_knowledge_base(self.project.name, self.name, urls, crawl_depth=crawl_depth, filters=filters)
134+
self.api.insert_into_knowledge_base(
135+
self.project.name,
136+
self.name,
137+
data={
138+
'urls': urls,
139+
'crawl_depth': crawl_depth,
140+
'filters': [] if filters is None else filters
141+
}
142+
)
136143

137144
def insert(self, data: Union[pd.DataFrame, Query, dict]):
138145
"""
139146
Insert data to knowledge base
140147
141-
>>> # insert using query
142-
>>> my_kb.insert(server.databases.example_db.tables.houses_sales.filter(type='house'))
143148
>>> # using dataframe
144149
>>> my_kb.insert(pd.read_csv('house_sales.csv'))
145150
>>> # using dict
146151
>>> my_kb.insert({'type': 'house', 'date': '2020-02-02'})
147152
148-
Data will be if id (defined by id_column param, see create knowledge base) is already exists in knowledge base
149-
it will be replaced
153+
If id is already exists in knowledge base:
154+
- it will be replaced
155+
- `id` column can be defined by id_column param, see create knowledge base
150156
151157
:param data: Dataframe or Query object or dict.
152158
"""
153159

160+
if isinstance(data, Query):
161+
# for back compatibility
162+
return self.insert_query(data)
163+
154164
if isinstance(data, dict):
155-
data = pd.DataFrame([data])
165+
data = [data]
166+
elif isinstance(data, pd.DataFrame):
167+
data = data.to_dict('records')
168+
else:
169+
raise ValueError("Unknown data type, accepted types: DataFrame, Query, dict")
156170

157-
if isinstance(data, pd.DataFrame):
158-
# insert data
159-
data_split = data.to_dict('split')
171+
return self.api.insert_into_knowledge_base(
172+
self.project.name,
173+
self.name,
174+
data={'rows': data}
175+
)
160176

161-
ast_query = Insert(
162-
table=self.table_name,
163-
columns=data_split['columns'],
164-
values=data_split['data']
165-
)
166-
sql = ast_query.to_string()
177+
def insert_query(self, data: Query):
178+
"""
179+
Insert data to knowledge base using query
167180
168-
else:
169-
# insert from select
181+
>>> my_kb.insert(server.databases.example_db.tables.houses_sales.filter(type='house'))
182+
183+
Data will be if id (defined by id_column param, see create knowledge base) is already exists in knowledge base
184+
it will be replaced
185+
186+
:param data: Dataframe or Query object or dict.
187+
"""
188+
if is_saving():
189+
# generate insert from select query
170190
if data.database is not None:
171191
ast_query = Insert(
172192
table=self.table_name,
@@ -176,11 +196,20 @@ def insert(self, data: Union[pd.DataFrame, Query, dict]):
176196
else:
177197
sql = f'INSERT INTO {self.table_name.to_string()} ({data.sql})'
178198

179-
if is_saving():
180199
# don't execute it right now, return query object
181200
return Query(self, sql, self.database)
182201

183-
self.api.sql_query(sql, self.database)
202+
# query have to be in context of mindsdb project
203+
self.api.insert_into_knowledge_base(
204+
self.project.name,
205+
self.name,
206+
data={'query': data.sql}
207+
)
208+
209+
def completion(self, query, **data):
210+
data['query'] = query
211+
212+
return self.api.knowledge_base_completion(self.project.name, self.name, data)
184213

185214

186215
class KnowledgeBases(CollectionBase):
@@ -212,24 +241,6 @@ def __init__(self, project, api):
212241
self.project = project
213242
self.api = api
214243

215-
def _list(self, name: str = None) -> List[KnowledgeBase]:
216-
217-
# TODO add filter by project. for now 'project' is empty
218-
ast_query = Select(targets=[Star()], from_table=Identifier(parts=['information_schema', 'knowledge_bases']))
219-
if name is not None:
220-
ast_query.where = dict_to_binary_op({'name': name})
221-
222-
df = self.api.sql_query(ast_query.to_string(), database=self.project.name)
223-
224-
# columns to lower case
225-
cols_map = {i: i.lower() for i in df.columns}
226-
df = df.rename(columns=cols_map)
227-
228-
return [
229-
KnowledgeBase(self.api, self.project, item)
230-
for item in df.to_dict('records')
231-
]
232-
233244
def list(self) -> List[KnowledgeBase]:
234245
"""
235246
@@ -239,7 +250,11 @@ def list(self) -> List[KnowledgeBase]:
239250
240251
:return: list of knowledge bases
241252
"""
242-
return self._list()
253+
254+
return [
255+
KnowledgeBase(self.api, self.project, item)
256+
for item in self.api.list_knowledge_bases(self.project.name)
257+
]
243258

244259
def get(self, name: str) -> KnowledgeBase:
245260
"""
@@ -248,13 +263,9 @@ def get(self, name: str) -> KnowledgeBase:
248263
:param name: name of the knowledge base
249264
:return: KnowledgeBase object
250265
"""
251-
item = self._list(name)
252-
if len(item) == 1:
253-
return item[0]
254-
elif len(item) == 0:
255-
raise AttributeError("KnowledgeBase doesn't exist")
256-
else:
257-
raise RuntimeError("Several knowledgeBases with the same name")
266+
267+
data = self.api.get_knowledge_base(self.project.name, name)
268+
return KnowledgeBase(self.api, self.project, data)
258269

259270
def create(
260271
self,
@@ -305,27 +316,21 @@ def create(
305316
params_out.update(params)
306317

307318
if model is not None:
308-
model_name = Identifier(parts=[model.project.name, model.name])
309-
else:
310-
model_name = None
311-
312-
if storage is not None:
313-
storage_name = Identifier(parts=[storage.db.name, storage.name])
314-
else:
315-
storage_name = None
319+
model = model.name
316320

317-
ast_query = CreateKnowledgeBase(
318-
Identifier(parts=[self.project.name, name]),
319-
model=model_name,
320-
storage=storage_name,
321-
params=params_out
322-
)
323-
sql = ast_query.to_string()
321+
payload = {
322+
'name': name,
323+
'model': model,
324+
'params': params_out
325+
}
324326

325-
if is_saving():
326-
return Query(self, sql)
327+
if storage is not None:
328+
payload['storage'] = {
329+
'database': storage.db.name,
330+
'table': storage.name
331+
}
327332

328-
self.api.sql_query(sql)
333+
self.api.create_knowledge_base(self.project.name, data=payload)
329334

330335
return self.get(name)
331336

@@ -336,10 +341,4 @@ def drop(self, name: str):
336341
:return:
337342
"""
338343

339-
ast_query = DropKnowledgeBase(Identifier(parts=[self.project.name, name]))
340-
sql = ast_query.to_string()
341-
342-
if is_saving():
343-
return Query(self, sql)
344-
345-
self.api.sql_query(sql)
344+
return self.api.delete_knowledge_base(self.project.name, name)

0 commit comments

Comments
 (0)