Skip to content

Commit a66ab16

Browse files
committed
Merge remote-tracking branch 'upstream/main'
2 parents f86d1a5 + 577c96b commit a66ab16

85 files changed

Lines changed: 3278 additions & 2275 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

admin/client/parser.py

Lines changed: 57 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -84,10 +84,10 @@
8484
| list_user_chats
8585
| create_user_chat
8686
| drop_user_chat
87-
| create_index
88-
| drop_index
89-
| create_doc_meta_index
90-
| drop_doc_meta_index
87+
| create_dataset_table
88+
| drop_dataset_table
89+
| create_metadata_table
90+
| drop_metadata_table
9191
| list_user_model_providers
9292
| list_user_default_models
9393
| parse_dataset_docs
@@ -102,6 +102,7 @@
102102
| update_chunk
103103
| set_metadata
104104
| remove_tags
105+
| remove_chunks
105106
| create_chat_session
106107
| drop_chat_session
107108
| list_chat_sessions
@@ -143,6 +144,7 @@
143144
ACTIVE: "ACTIVE"i
144145
ADMIN: "ADMIN"i
145146
PASSWORD: "PASSWORD"i
147+
DATASET_TABLE: "DATASET TABLE"i
146148
DATASET: "DATASET"i
147149
DATASETS: "DATASETS"i
148150
OF: "OF"i
@@ -187,7 +189,8 @@
187189
INTO: "INTO"i
188190
IN: "IN"i
189191
WITH: "WITH"i
190-
VECTOR_SIZE: "VECTOR_SIZE"i
192+
VECTOR: "VECTOR"i
193+
SIZE: "SIZE"i
191194
PARSER: "PARSER"i
192195
PIPELINE: "PIPELINE"i
193196
SEARCH: "SEARCH"i
@@ -210,13 +213,12 @@
210213
CHECK: "CHECK"i
211214
CONFIG: "CONFIG"i
212215
INDEX: "INDEX"i
213-
DOC_META: "DOC_META"i
216+
TABLE: "TABLE"i
214217
CHUNK: "CHUNK"i
215218
CHUNKS: "CHUNKS"i
216219
GET: "GET"i
217220
INSERT: "INSERT"i
218221
PAGE: "PAGE"i
219-
SIZE: "SIZE"i
220222
KEYWORDS: "KEYWORDS"i
221223
AVAILABLE: "AVAILABLE"i
222224
FILE: "FILE"i
@@ -345,10 +347,6 @@
345347
list_user_chats: LIST CHATS ";"
346348
create_user_chat: CREATE CHAT quoted_string ";"
347349
drop_user_chat: DROP CHAT quoted_string ";"
348-
create_index: CREATE INDEX FOR DATASET quoted_string VECTOR_SIZE NUMBER ";"
349-
drop_index: DROP INDEX FOR DATASET quoted_string ";"
350-
create_doc_meta_index: CREATE INDEX DOC_META ";"
351-
drop_doc_meta_index: DROP INDEX DOC_META ";"
352350
create_chat_session: CREATE CHAT quoted_string SESSION ";"
353351
drop_chat_session: DROP CHAT quoted_string SESSION quoted_string ";"
354352
list_chat_sessions: LIST CHAT quoted_string SESSIONS ";"
@@ -359,18 +357,23 @@
359357
search_on_datasets: SEARCH quoted_string ON DATASETS quoted_string ";"
360358
get_chunk: GET CHUNK quoted_string ";"
361359
list_chunks: LIST CHUNKS OF DOCUMENT quoted_string ("PAGE" NUMBER)? ("SIZE" NUMBER)? ("KEYWORDS" quoted_string)? ("AVAILABLE" NUMBER)? ";"
360+
set_metadata: SET METADATA OF DOCUMENT quoted_string TO quoted_string ";"
361+
remove_tags: REMOVE TAGS quoted_string (COMMA quoted_string)* FROM DATASET quoted_string ";"
362+
remove_chunks: REMOVE CHUNKS quoted_string (COMMA quoted_string)* FROM DOCUMENT quoted_string ";"
363+
| REMOVE ALL CHUNKS FROM DOCUMENT quoted_string ";"
362364
363365
parse_dataset_docs: PARSE quoted_string OF DATASET quoted_string ";"
364366
parse_dataset_sync: PARSE DATASET quoted_string SYNC ";"
365367
parse_dataset_async: PARSE DATASET quoted_string ASYNC ";"
366368
367-
update_chunk: UPDATE CHUNK quoted_string OF DATASET quoted_string SET quoted_string ";"
368-
set_metadata: SET METADATA OF DOCUMENT quoted_string TO quoted_string ";"
369-
remove_tags: REMOVE TAGS quoted_string (COMMA quoted_string)* FROM DATASET quoted_string ";"
370-
371-
// Internal CLI for GO
369+
// Internal CLI only for GO
370+
create_dataset_table: CREATE DATASET TABLE quoted_string VECTOR SIZE NUMBER ";"
371+
drop_dataset_table: DROP DATASET TABLE quoted_string ";"
372+
create_metadata_table: CREATE METADATA TABLE ";"
373+
drop_metadata_table: DROP METADATA TABLE ";"
372374
insert_dataset_from_file: INSERT DATASET FROM FILE quoted_string ";"
373375
insert_metadata_from_file: INSERT METADATA FROM FILE quoted_string ";"
376+
update_chunk: UPDATE CHUNK quoted_string OF DATASET quoted_string SET quoted_string ";"
374377
375378
identifier_list: identifier (COMMA identifier)*
376379
@@ -690,30 +693,29 @@ def drop_user_chat(self, items):
690693
chat_name = items[2].children[0].strip("'\"")
691694
return {"type": "drop_user_chat", "chat_name": chat_name}
692695

693-
def create_index(self, items):
694-
# items: CREATE, INDEX, FOR, DATASET, quoted_string, VECTOR_SIZE, NUMBER, ";"
696+
def create_dataset_table(self, items):
695697
dataset_name = None
696698
vector_size = None
697699
for i, item in enumerate(items):
698700
if hasattr(item, 'data') and item.data == 'quoted_string':
699701
dataset_name = item.children[0].strip("'\"")
700702
if hasattr(item, 'type') and item.type == 'NUMBER':
701-
if i > 0 and items[i-1].type == 'VECTOR_SIZE':
703+
if i > 0 and items[i-1].type == 'SIZE' and items[i-2].type == 'VECTOR':
702704
vector_size = int(item)
703-
return {"type": "create_index", "dataset_name": dataset_name, "vector_size": vector_size}
705+
return {"type": "create_dataset_table", "dataset_name": dataset_name, "vector_size": vector_size}
704706

705-
def drop_index(self, items):
707+
def drop_dataset_table(self, items):
706708
dataset_name = None
707709
for item in items:
708710
if hasattr(item, 'data') and item.data == 'quoted_string':
709711
dataset_name = item.children[0].strip("'\"")
710-
return {"type": "drop_index", "dataset_name": dataset_name}
712+
return {"type": "drop_dataset_table", "dataset_name": dataset_name}
711713

712-
def create_doc_meta_index(self, items):
713-
return {"type": "create_doc_meta_index"}
714+
def create_metadata_table(self, items):
715+
return {"type": "create_metadata_table"}
714716

715-
def drop_doc_meta_index(self, items):
716-
return {"type": "drop_doc_meta_index"}
717+
def drop_metadata_table(self, items):
718+
return {"type": "drop_metadata_table"}
717719

718720
def list_user_model_providers(self, items):
719721
return {"type": "list_user_model_providers"}
@@ -825,6 +827,35 @@ def remove_tags(self, items):
825827
break
826828
return {"type": "remove_tags", "dataset_name": dataset_name, "tags": tags}
827829

830+
def remove_chunks(self, items):
831+
# Handle two cases:
832+
# 1. REMOVE CHUNKS quoted_string (COMMA quoted_string)* FROM DOCUMENT quoted_string ";"
833+
# 2. REMOVE ALL CHUNKS FROM DOCUMENT quoted_string ";"
834+
835+
# Check if it's "REMOVE ALL CHUNKS"
836+
for item in items:
837+
if hasattr(item, 'type') and item.type == 'ALL':
838+
# Find doc_id
839+
for j, inner_item in enumerate(items):
840+
if hasattr(inner_item, 'type') and inner_item.type == 'DOCUMENT':
841+
doc_id = items[j + 1].children[0].strip("'\"")
842+
return {"type": "remove_chunks", "doc_id": doc_id, "delete_all": True}
843+
844+
# Otherwise, we have chunk_ids
845+
chunk_ids = []
846+
doc_id = None
847+
for i, item in enumerate(items):
848+
if hasattr(item, 'type') and item.type == 'DOCUMENT':
849+
doc_id = items[i + 1].children[0].strip("'\"")
850+
elif hasattr(item, 'children') and item.children:
851+
val = item.children[0].strip("'\"")
852+
# Skip if it's "FROM" or "DOCUMENT"
853+
if val.upper() in ['FROM', 'DOCUMENT']:
854+
continue
855+
chunk_ids.append(val)
856+
857+
return {"type": "remove_chunks", "doc_id": doc_id, "chunk_ids": chunk_ids}
858+
828859
def list_chunks(self, items):
829860
doc_id = items[4].children[0].strip("'\"")
830861
result = {"type": "list_chunks", "doc_id": doc_id}

admin/client/ragflow_client.py

Lines changed: 62 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -1029,7 +1029,7 @@ def create_user_chat(self, command):
10291029
else:
10301030
print(f"Fail to create chat {chat_name}, code: {res_json['code']}, message: {res_json['message']}")
10311031

1032-
def create_index(self, command):
1032+
def create_dataset_table(self, command):
10331033
if self.server_type != "user":
10341034
print("This command is only allowed in USER mode")
10351035
return
@@ -1045,15 +1045,15 @@ def create_index(self, command):
10451045
# Build payload
10461046
payload = {"kb_id": dataset_id, "vector_size": vector_size}
10471047
# Call API
1048-
response = self.http_client.request("POST", "/kb/index", json_body=payload,
1048+
response = self.http_client.request("POST", "/kb/doc_engine_table", json_body=payload,
10491049
use_api_base=False, auth_kind="web")
10501050
res_json = response.json()
10511051
if response.status_code == 200 and res_json.get("code") == 0:
1052-
print(f"Success to create index for dataset: {dataset_name}")
1052+
print(f"Success to create table for dataset: {dataset_name}")
10531053
else:
1054-
print(f"Fail to create index for dataset {dataset_name}, code: {res_json.get('code')}, message: {res_json.get('message')}")
1054+
print(f"Fail to create table for dataset {dataset_name}, code: {res_json.get('code')}, message: {res_json.get('message')}")
10551055

1056-
def drop_index(self, command):
1056+
def drop_dataset_table(self, command):
10571057
if self.server_type != "user":
10581058
print("This command is only allowed in USER mode")
10591059
return
@@ -1062,41 +1062,41 @@ def drop_index(self, command):
10621062
dataset_id = self._get_dataset_id(dataset_name)
10631063
if dataset_id is None:
10641064
return
1065-
# Call API to delete index
1065+
# Call API to delete table
10661066
payload = {"kb_id": dataset_id}
1067-
response = self.http_client.request("DELETE", "/kb/index", json_body=payload,
1067+
response = self.http_client.request("DELETE", "/kb/doc_engine_table", json_body=payload,
10681068
use_api_base=False, auth_kind="web")
10691069
res_json = response.json()
10701070
if response.status_code == 200 and res_json.get("code") == 0:
1071-
print(f"Success to drop index for dataset: {dataset_name}")
1071+
print(f"Success to drop table for dataset: {dataset_name}")
10721072
else:
1073-
print(f"Fail to drop index for dataset {dataset_name}, code: {res_json.get('code')}, message: {res_json.get('message')}")
1073+
print(f"Fail to drop table for dataset {dataset_name}, code: {res_json.get('code')}, message: {res_json.get('message')}")
10741074

1075-
def create_doc_meta_index(self, command):
1075+
def create_metadata_table(self, command):
10761076
if self.server_type != "user":
10771077
print("This command is only allowed in USER mode")
10781078
return
1079-
# Call API to create doc meta index
1080-
response = self.http_client.request("POST", "/tenant/doc_meta_index",
1079+
# Call API to create metadata table
1080+
response = self.http_client.request("POST", "/tenant/doc_engine_metadata_table",
10811081
use_api_base=False, auth_kind="web")
10821082
res_json = response.json()
10831083
if response.status_code == 200 and res_json.get("code") == 0:
1084-
print("Success to create doc meta index")
1084+
print("Success to create metadata table")
10851085
else:
1086-
print(f"Fail to create doc meta index, code: {res_json.get('code')}, message: {res_json.get('message')}")
1086+
print(f"Fail to create metadata table, code: {res_json.get('code')}, message: {res_json.get('message')}")
10871087

1088-
def drop_doc_meta_index(self, command):
1088+
def drop_metadata_table(self, command):
10891089
if self.server_type != "user":
10901090
print("This command is only allowed in USER mode")
10911091
return
1092-
# Call API to delete doc meta index
1093-
response = self.http_client.request("DELETE", "/tenant/doc_meta_index",
1092+
# Call API to delete metadata table
1093+
response = self.http_client.request("DELETE", "/tenant/doc_engine_metadata_table",
10941094
use_api_base=False, auth_kind="web")
10951095
res_json = response.json()
10961096
if response.status_code == 200 and res_json.get("code") == 0:
1097-
print("Success to drop doc meta index")
1097+
print("Success to drop metadata table")
10981098
else:
1099-
print(f"Fail to drop doc meta index, code: {res_json.get('code')}, message: {res_json.get('message')}")
1099+
print(f"Fail to drop metadata table, code: {res_json.get('code')}, message: {res_json.get('message')}")
11001100

11011101
def drop_user_chat(self, command):
11021102
if self.server_type != "user":
@@ -1548,9 +1548,13 @@ def update_chunk(self, command_dict):
15481548
print(f"Invalid JSON body: {e}")
15491549
return
15501550

1551-
# Call PUT /datasets/{dataset_id}/documents/{doc_id}/chunks/{chunk_id}
1552-
path = f"/datasets/{dataset_id}/documents/{doc_id}/chunks/{chunk_id}"
1553-
response = self.http_client.request("PUT", path, json_body=payload, use_api_base=True, auth_kind="api")
1551+
# Add IDs to payload
1552+
payload["dataset_id"] = dataset_id
1553+
payload["document_id"] = doc_id
1554+
payload["chunk_id"] = chunk_id
1555+
1556+
# Call POST /v1/chunk/update
1557+
response = self.http_client.request("POST", "/chunk/update", json_body=payload, use_api_base=False, auth_kind="web")
15541558
res_json = response.json()
15551559
if response.status_code == 200:
15561560
if res_json.get("code") == 0:
@@ -1583,7 +1587,7 @@ def set_metadata(self, command_dict):
15831587
else:
15841588
print(f"Fail to set metadata, code: {res_json.get('code')}, message: {res_json.get('message')}")
15851589
else:
1586-
print(f"Fail to set metadata, HTTP {response.status_code}")
1590+
print(f"Fail to set metadata, HTTP {response.status_code}: {res_json.get('message', 'no message')}")
15871591

15881592
def remove_tags(self, command_dict):
15891593
if self.server_type != "user":
@@ -1613,6 +1617,31 @@ def remove_tags(self, command_dict):
16131617
else:
16141618
print(f"Fail to remove tags, HTTP {response.status_code}")
16151619

1620+
def remove_chunks(self, command_dict):
1621+
if self.server_type != "user":
1622+
print("This command is only allowed in USER mode")
1623+
return
1624+
1625+
doc_id = command_dict["doc_id"]
1626+
payload = {"doc_id": doc_id}
1627+
1628+
if command_dict.get("delete_all"):
1629+
payload["delete_all"] = True
1630+
elif command_dict.get("chunk_ids"):
1631+
payload["chunk_ids"] = command_dict["chunk_ids"]
1632+
1633+
response = self.http_client.request("POST", "/chunk/rm", json_body=payload,
1634+
use_api_base=False, auth_kind="web")
1635+
res_json = response.json()
1636+
if response.status_code == 200:
1637+
if res_json.get("code") == 0:
1638+
deleted_count = res_json.get("data", 0)
1639+
print(f"Success to remove chunks from document {doc_id}: {deleted_count} chunks deleted")
1640+
else:
1641+
print(f"Fail to remove chunks, code: {res_json.get('code')}, message: {res_json.get('message')}")
1642+
else:
1643+
print(f"Fail to remove chunks, HTTP {response.status_code}")
1644+
16161645
def list_chunks(self, command_dict):
16171646
if self.server_type != "user":
16181647
print("This command is only allowed in USER mode")
@@ -1976,14 +2005,14 @@ def run_command(client: RAGFlowClient, command_dict: dict):
19762005
client.create_user_chat(command_dict)
19772006
case "drop_user_chat":
19782007
client.drop_user_chat(command_dict)
1979-
case "create_index":
1980-
client.create_index(command_dict)
1981-
case "drop_index":
1982-
client.drop_index(command_dict)
1983-
case "create_doc_meta_index":
1984-
client.create_doc_meta_index(command_dict)
1985-
case "drop_doc_meta_index":
1986-
client.drop_doc_meta_index(command_dict)
2008+
case "create_dataset_table":
2009+
client.create_dataset_table(command_dict)
2010+
case "drop_dataset_table":
2011+
client.drop_dataset_table(command_dict)
2012+
case "create_metadata_table":
2013+
client.create_metadata_table(command_dict)
2014+
case "drop_metadata_table":
2015+
client.drop_metadata_table(command_dict)
19872016
case "create_chat_session":
19882017
client.create_chat_session(command_dict)
19892018
case "drop_chat_session":
@@ -2016,6 +2045,8 @@ def run_command(client: RAGFlowClient, command_dict: dict):
20162045
return client.set_metadata(command_dict)
20172046
case "remove_tags":
20182047
return client.remove_tags(command_dict)
2048+
case "remove_chunks":
2049+
return client.remove_chunks(command_dict)
20192050
case "list_chunks":
20202051
return client.list_chunks(command_dict)
20212052
case "meta":
@@ -2077,10 +2108,6 @@ def show_help():
20772108
LIST METADATA SUMMARY OF DATASET <dataset> DOCUMENTS <doc_id>[, <doc_id>]*
20782109
GET CHUNK <chunk_id>
20792110
LIST CHUNKS OF DOCUMENT <doc_id> [PAGE <page>] [SIZE <size>] [KEYWORDS <keywords>] [AVAILABLE <0|1>]
2080-
CREATE INDEX FOR DATASET <dataset> VECTOR_SIZE <vector_size>
2081-
DROP INDEX FOR DATASET <dataset>
2082-
CREATE INDEX DOC_META
2083-
DROP INDEX DOC_META
20842111
20852112
Meta Commands:
20862113
\\?, \\h, \\help Show this help

agent/sandbox/providers/self_managed.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,7 @@ def initialize(self, config: Dict[str, Any]) -> bool:
7070
# Try to fall back to SANDBOX_HOST from settings if we are using localhost
7171
if "localhost" in self.endpoint or "127.0.0.1" in self.endpoint:
7272
try:
73-
from api import settings
73+
from common import settings
7474
if settings.SANDBOX_HOST and settings.SANDBOX_HOST not in self.endpoint:
7575
original_endpoint = self.endpoint
7676
self.endpoint = f"http://{settings.SANDBOX_HOST}:9385"

agent/templates/customer_feedback_dispatcher.json

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -107,7 +107,7 @@
107107
"component_name": "Message",
108108
"params": {
109109
"content": [
110-
"Customer's original review: {begin@customer_review}\n\nFinale review category is: \n{VariableAggregator:FlatBerriesRest@ReviewCategory}"
110+
"Customer's original review: {begin@customer_review}\n\nFinal review category is: \n{VariableAggregator:FlatBerriesRest@ReviewCategory}"
111111
]
112112
}
113113
},
@@ -590,7 +590,7 @@
590590
"data": {
591591
"form": {
592592
"content": [
593-
"Customer's original review: {begin@customer_review}\n\nFinale review category is: \n{VariableAggregator:FlatBerriesRest@ReviewCategory}"
593+
"Customer's original review: {begin@customer_review}\n\nFinal review category is: \n{VariableAggregator:FlatBerriesRest@ReviewCategory}"
594594
]
595595
},
596596
"label": "Message",

0 commit comments

Comments
 (0)