infiniflow · JinHai-CN · Apr 22, 2026 · Apr 21, 2026 · Apr 21, 2026 · Apr 21, 2026
diff --git a/api/apps/document_app.py b/api/apps/document_app.py
@@ -374,27 +374,6 @@ async def change_status():
     return get_json_result(data=result)
 
 
-@manager.route("/rm", methods=["POST"])  # noqa: F821
-@login_required
-@validate_request("doc_id")
-async def rm():
-    req = await get_request_json()
-    doc_ids = req["doc_id"]
-    if isinstance(doc_ids, str):
-        doc_ids = [doc_ids]
-
-    for doc_id in doc_ids:
-        if not DocumentService.accessible4deletion(doc_id, current_user.id):
-            return get_json_result(data=False, message="No authorization.", code=RetCode.AUTHENTICATION_ERROR)
-
-    errors = await thread_pool_exec(FileService.delete_docs, doc_ids, current_user.id)
-
-    if errors:
-        return get_json_result(data=False, message=errors, code=RetCode.SERVER_ERROR)
-
-    return get_json_result(data=True)
-
-
 @manager.route("/run", methods=["POST"])  # noqa: F821
 @login_required
 @validate_request("doc_ids", "run")

diff --git a/api/apps/restful_apis/document_api.py b/api/apps/restful_apis/document_api.py
@@ -27,14 +27,17 @@
 from api.db import VALID_FILE_TYPES
 from api.db.services.doc_metadata_service import DocMetadataService
 from api.db.services.document_service import DocumentService
+from api.db.services.file_service import FileService
 from api.db.services.knowledgebase_service import KnowledgebaseService
+from api.common.check_team_permission import check_kb_team_permission
 from api.utils.api_utils import get_data_error_result, get_error_data_result, get_result, get_json_result, \
-    server_error_response, add_tenant_id_to_kwargs, get_request_json
+    server_error_response, add_tenant_id_to_kwargs, get_request_json, get_error_argument_result, check_duplicate_ids
 from api.utils.validation_utils import (
-    UpdateDocumentReq, format_validation_error_message,
+    UpdateDocumentReq, format_validation_error_message, validate_and_parse_json_request, DeleteDocumentReq,
 )
 from common.constants import RetCode
 from common.metadata_utils import convert_conditions, meta_filter, turn2jsonschema
+from common.misc_utils import thread_pool_exec
 
 @manager.route("/datasets/<dataset_id>/documents/<document_id>", methods=["PATCH"]) # noqa: F821
 @login_required
@@ -260,9 +263,7 @@ async def upload_document(dataset_id, tenant_id):
                     description: Processing status.
     """
     from api.constants import FILE_NAME_LEN_LIMIT
-    from api.common.check_team_permission import check_kb_team_permission
     from api.db.services.file_service import FileService
-    from common.misc_utils import thread_pool_exec
 
     form = await request.form
     files = await request.files
@@ -660,3 +661,88 @@ def _parse_doc_id_filter_with_metadata(req, kb_id):
                 return RetCode.SUCCESS, "", [], return_empty_metadata
 
     return RetCode.SUCCESS, "", list(doc_ids_filter) if doc_ids_filter is not None else [], return_empty_metadata
+
+
+@manager.route("/datasets/<dataset_id>/documents", methods=["DELETE"])  # noqa: F821
+@login_required
+@add_tenant_id_to_kwargs
+async def delete_documents(tenant_id, dataset_id):
+    """
+    Delete documents from a dataset.
+    ---
+    tags:
+      - Documents
+    security:
+      - ApiKeyAuth: []
+    parameters:
+      - in: path
+        name: dataset_id
+        type: string
+        required: true
+        description: ID of the dataset containing the documents.
+      - in: header
+        name: Authorization
+        type: string
+        required: true
+        description: Bearer token for authentication.
+      - in: body
+        name: body
+        description: Document deletion parameters.
+        required: true
+        schema:
+          type: object
+          properties:
+            ids:
+              type: array or null
+              items:
+                type: string
+              description: |
+                Specifies the documents to delete:
+                - An array of IDs, only the specified documents will be deleted.
+            delete_all:
+              type: boolean
+              default: false
+              description: Whether to delete all documents in the dataset.
+    responses:
+      200:
+        description: Successful operation.
+        schema:
+          type: object
+    """
+    req, err = await validate_and_parse_json_request(request, DeleteDocumentReq)
+    if err is not None or req is None:
+        return get_error_argument_result(err)
+
+    try:
+        # Validate dataset exists and user has permission
+        if not KnowledgebaseService.accessible(kb_id=dataset_id, user_id=tenant_id):
+            return get_error_data_result(message=f"You don't own the dataset {dataset_id}. ")
+
+        # Get documents to delete
+        doc_ids = req.get("ids") or []
+        delete_all = req.get("delete_all", False)
+        if not delete_all and len(doc_ids) == 0:
+            return get_error_data_result(message=f"should either provide doc ids or set delete_all(true), dataset: {dataset_id}. ")
+
+        if len(doc_ids) > 0 and delete_all:
+            return get_error_data_result(message=f"should not provide both doc ids and delete_all(true), dataset: {dataset_id}. ")
+        if delete_all:
+            doc_ids = [doc.id for doc in DocumentService.query(kb_id=dataset_id)]
+
+        # make sure each id is unique
+        unique_doc_ids, duplicate_messages = check_duplicate_ids(doc_ids, "document")
+        if duplicate_messages:
+            logging.warning(f"duplicate_messages:{duplicate_messages}")
+        else:
+            doc_ids = unique_doc_ids
+
+        # Delete documents using existing FileService.delete_docs
+        errors = await thread_pool_exec(FileService.delete_docs, doc_ids, tenant_id)
+
+        if errors:
+            return get_error_data_result(message=str(errors))
+
+        return get_result(data={"deleted": len(doc_ids)})
+    except Exception as e:
+        logging.exception(e)
+        return get_error_data_result(message="Internal server error")
diff --git a/api/apps/sdk/doc.py b/api/apps/sdk/doc.py
@@ -21,20 +21,19 @@
 from pydantic import BaseModel, Field, validator
 from quart import request, send_file
 
-from api.db.db_models import APIToken, Document, File, Task
+from api.db.db_models import APIToken, Document, Task
 from api.db.joint_services.tenant_model_service import get_model_config_by_id, get_model_config_by_type_and_name, get_tenant_default_model_by_type
 from api.db.services.doc_metadata_service import DocMetadataService
 from api.db.services.document_service import DocumentService
 from api.db.services.file2document_service import File2DocumentService
-from api.db.services.file_service import FileService
 from api.db.services.knowledgebase_service import KnowledgebaseService
 from api.db.services.llm_service import LLMBundle
 from api.db.services.task_service import TaskService, cancel_all_task_of, queue_tasks
 from api.db.services.tenant_llm_service import TenantLLMService
 from api.utils.api_utils import check_duplicate_ids, construct_json_result, get_error_data_result, get_request_json, get_result, server_error_response, token_required
 from api.utils.image_utils import store_chunk_image
 from common import settings
-from common.constants import FileSource, LLMType, ParserType, RetCode, TaskStatus
+from common.constants import LLMType, ParserType, RetCode, TaskStatus
 from common.metadata_utils import convert_conditions, meta_filter
 from common.misc_utils import thread_pool_exec
 from common.string_utils import is_content_empty, remove_redundant_spaces
@@ -209,120 +208,6 @@ async def metadata_batch_update(dataset_id, tenant_id):
     return get_result(data={"updated": updated, "matched_docs": len(target_doc_ids)})
 
 
-@manager.route("/datasets/<dataset_id>/documents", methods=["DELETE"])  # noqa: F821
-@token_required
-async def delete(tenant_id, dataset_id):
-    """
-    Delete documents from a dataset.
-    ---
-    tags:
-      - Documents
-    security:
-      - ApiKeyAuth: []
-    parameters:
-      - in: path
-        name: dataset_id
-        type: string
-        required: true
-        description: ID of the dataset.
-      - in: body
-        name: body
-        description: Document deletion parameters.
-        required: true
-        schema:
-          type: object
-          properties:
-            ids:
-              type: array
-              items:
-                type: string
-              description: |
-                List of document IDs to delete.
-                If omitted, `null`, or an empty array is provided, no documents will be deleted.
-      - in: header
-        name: Authorization
-        type: string
-        required: true
-        description: Bearer token for authentication.
-    responses:
-      200:
-        description: Documents deleted successfully.
-        schema:
-          type: object
-    """
-    if not KnowledgebaseService.accessible(kb_id=dataset_id, user_id=tenant_id):
-        return get_error_data_result(message=f"You don't own the dataset {dataset_id}. ")
-    req = await get_request_json()
-    if not req:
-        return get_result()
-
-    doc_ids = req.get("ids")
-    if not doc_ids:
-        if req.get("delete_all") is True:
-            doc_ids = [doc.id for doc in DocumentService.query(kb_id=dataset_id)]
-            if not doc_ids:
-                return get_result()
-        else:
-            return get_result()
-
-    doc_list = doc_ids
-
-    unique_doc_ids, duplicate_messages = check_duplicate_ids(doc_list, "document")
-    doc_list = unique_doc_ids
-
-    root_folder = FileService.get_root_folder(tenant_id)
-    pf_id = root_folder["id"]
-    FileService.init_knowledgebase_docs(pf_id, tenant_id)
-    errors = ""
-    not_found = []
-    success_count = 0
-    for doc_id in doc_list:
-        try:
-            e, doc = DocumentService.get_by_id(doc_id)
-            if not e:
-                not_found.append(doc_id)
-                continue
-            tenant_id = DocumentService.get_tenant_id(doc_id)
-            if not tenant_id:
-                return get_error_data_result(message="Tenant not found!")
-
-            b, n = File2DocumentService.get_storage_address(doc_id=doc_id)
-
-            if not DocumentService.remove_document(doc, tenant_id):
-                return get_error_data_result(message="Database error (Document removal)!")
-
-            f2d = File2DocumentService.get_by_document_id(doc_id)
-            FileService.filter_delete(
-                [
-                    File.source_type == FileSource.KNOWLEDGEBASE,
-                    File.id == f2d[0].file_id,
-                ]
-            )
-            File2DocumentService.delete_by_document_id(doc_id)
-
-            settings.STORAGE_IMPL.rm(b, n)
-            success_count += 1
-        except Exception as e:
-            errors += str(e)
-
-    if not_found:
-        return get_result(message=f"Documents not found: {not_found}", code=RetCode.DATA_ERROR)
-
-    if errors:
-        return get_result(message=errors, code=RetCode.SERVER_ERROR)
-
-    if duplicate_messages:
-        if success_count > 0:
-            return get_result(
-                message=f"Partially deleted {success_count} datasets with {len(duplicate_messages)} errors",
-                data={"success_count": success_count, "errors": duplicate_messages},
-            )
-        else:
-            return get_error_data_result(message=";".join(duplicate_messages))
-
-    return get_result()
-
-
 DOC_STOP_PARSING_INVALID_STATE_MESSAGE = "Can't stop parsing document that has not started or already completed"
 DOC_STOP_PARSING_INVALID_STATE_ERROR_CODE = "DOC_STOP_PARSING_INVALID_STATE"
 

diff --git a/api/utils/validation_utils.py b/api/utils/validation_utils.py
@@ -818,6 +818,9 @@ def validate_ids(cls, v_list: list[str] | None) -> list[str] | None:
 class DeleteDatasetReq(DeleteReq): ...
 
 
+class DeleteDocumentReq(DeleteReq): ...
+
+
 class BaseListReq(BaseModel):
     model_config = ConfigDict(extra="forbid")
 

diff --git a/test/testcases/test_http_api/test_file_management_within_dataset/test_delete_documents.py b/test/testcases/test_http_api/test_file_management_within_dataset/test_delete_documents.py
@@ -26,11 +26,11 @@ class TestAuthorization:
     @pytest.mark.parametrize(
         "invalid_auth, expected_code, expected_message",
         [
-            (None, 0, "`Authorization` can't be empty"),
+            (None, 401, "<Unauthorized '401: Unauthorized'>"),
             (
                 RAGFlowHttpApiAuth(INVALID_API_TOKEN),
-                109,
-                "Authentication error: API key is invalid!",
+                401,
+                "<Unauthorized '401: Unauthorized'>",
             ),
         ],
     )
@@ -45,19 +45,19 @@ class TestDocumentsDeletion:
     @pytest.mark.parametrize(
         "payload, expected_code, expected_message, remaining",
         [
-            (None, 0, "", 3),
-            ({"ids": []}, 0, "", 3),
-            ({"ids": ["invalid_id"]}, 102, "Documents not found: ['invalid_id']", 3),
+            ({}, 102, "should either provide doc ids or set delete_all(true), dataset", 3),
+            ({"ids": []}, 102, "should either provide doc ids or set delete_all(true), dataset", 3),
+            ({"ids": ["invalid_id"]}, 101, "Field: <ids> - Message: <Invalid UUID1 format> - Value: <['invalid_id']>", 3),
             (
                 {"ids": ["\n!?。；！？\"'"]},
-                102,
-                """Documents not found: [\'\\n!?。；！？"\\\'\']""",
+                101,
+                "Field: <ids> - Message: <Invalid UUID1 format> - Value:",
                 3,
             ),
             (
                 "not json",
-                100,
-                "AttributeError(\"'str' object has no attribute 'get'\")",
+                101,
+                "Invalid request payload: expected object, got str",
                 3,
             ),
             (lambda r: {"ids": r[:1]}, 0, "", 2),
@@ -79,7 +79,7 @@ def test_basic_scenarios(
         res = delete_documents(HttpApiAuth, dataset_id, payload)
         assert res["code"] == expected_code
         if res["code"] != 0:
-            assert res["message"] == expected_message
+            assert expected_message in res["message"]
 
         res = list_documents(HttpApiAuth, dataset_id)
         assert len(res["data"]["docs"]) == remaining
@@ -117,12 +117,12 @@ def test_delete_partial_invalid_id(self, HttpApiAuth, add_documents_func, payloa
         if callable(payload):
             payload = payload(document_ids)
         res = delete_documents(HttpApiAuth, dataset_id, payload)
-        assert res["code"] == 102
-        assert res["message"] == "Documents not found: ['invalid_id']"
+        assert res["code"] == 101
+        assert "Field: <ids> - Message: <Invalid UUID1 format> - Value" in res["message"]
 
         res = list_documents(HttpApiAuth, dataset_id)
-        assert len(res["data"]["docs"]) == 0
-        assert res["data"]["total"] == 0
+        assert len(res["data"]["docs"]) == 3
+        assert res["data"]["total"] == 3
 
     @pytest.mark.p2
     def test_repeated_deletion(self, HttpApiAuth, add_documents_func):
@@ -132,19 +132,18 @@ def test_repeated_deletion(self, HttpApiAuth, add_documents_func):
 
         res = delete_documents(HttpApiAuth, dataset_id, {"ids": document_ids})
         assert res["code"] == 102
-        assert "Documents not found" in res["message"]
+        assert "Document not found" in res["message"]
 
     @pytest.mark.p2
     def test_duplicate_deletion(self, HttpApiAuth, add_documents_func):
         dataset_id, document_ids = add_documents_func
         res = delete_documents(HttpApiAuth, dataset_id, {"ids": document_ids + document_ids})
-        assert res["code"] == 0
-        assert "Duplicate document ids" in res["data"]["errors"][0]
-        assert res["data"]["success_count"] == 3
+        assert res["code"] == 101, res
+        assert "Field: <ids> - Message: <Duplicate ids:" in res["message"]
 
         res = list_documents(HttpApiAuth, dataset_id)
-        assert len(res["data"]["docs"]) == 0
-        assert res["data"]["total"] == 0
+        assert len(res["data"]["docs"]) == 3
+        assert res["data"]["total"] == 3
 
 
 @pytest.mark.p3