infiniflow · yingfeng · Apr 23, 2026 · Apr 22, 2026 · Apr 22, 2026 · Apr 22, 2026
diff --git a/api/apps/document_app.py b/api/apps/document_app.py
@@ -25,7 +25,6 @@
 from api.db import FileType
 from api.db.db_models import Task
 from api.db.services import duplicate_name
-from api.db.services.doc_metadata_service import DocMetadataService
 from api.db.services.document_service import DocumentService, doc_upload_and_parse
 from api.db.services.file2document_service import File2DocumentService
 from api.db.services.file_service import FileService
@@ -183,33 +182,6 @@ async def create():
         return server_error_response(e)
 
 
-@manager.route("/metadata/update", methods=["POST"])  # noqa: F821
-@login_required
-@validate_request("doc_ids")
-async def metadata_update():
-    req = await get_request_json()
-    kb_id = req.get("kb_id")
-    document_ids = req.get("doc_ids")
-    updates = req.get("updates", []) or []
-    deletes = req.get("deletes", []) or []
-
-    if not kb_id:
-        return get_json_result(data=False, message='Lack of "KB ID"', code=RetCode.ARGUMENT_ERROR)
-
-    if not isinstance(updates, list) or not isinstance(deletes, list):
-        return get_json_result(data=False, message="updates and deletes must be lists.", code=RetCode.ARGUMENT_ERROR)
-
-    for upd in updates:
-        if not isinstance(upd, dict) or not upd.get("key") or "value" not in upd:
-            return get_json_result(data=False, message="Each update requires key and value.", code=RetCode.ARGUMENT_ERROR)
-    for d in deletes:
-        if not isinstance(d, dict) or not d.get("key"):
-            return get_json_result(data=False, message="Each delete requires key.", code=RetCode.ARGUMENT_ERROR)
-
-    updated = DocMetadataService.batch_update_metadata(kb_id, document_ids, updates, deletes)
-    return get_json_result(data={"updated": updated, "matched_docs": len(document_ids)})
-
-
 @manager.route("/thumbnails", methods=["GET"])  # noqa: F821
 # @login_required
 def thumbnails():

diff --git a/api/apps/restful_apis/document_api.py b/api/apps/restful_apis/document_api.py
@@ -891,3 +891,131 @@ async def update_metadata_config(tenant_id, dataset_id, document_id):
         return get_json_result(code=RetCode.EXCEPTION_ERROR, message=repr(e))
 
     return get_result(data=doc.to_dict())
+
+
+@manager.route("/datasets/<dataset_id>/documents/metadatas", methods=["PATCH"])  # noqa: F821
+@login_required
+@add_tenant_id_to_kwargs
+async def update_metadata(tenant_id, dataset_id):
+    """
+    Update document metadata in batch.
+    ---
+    tags:
+      - Documents
+    security:
+      - ApiKeyAuth: []
+    parameters:
+      - in: path
+        name: dataset_id
+        type: string
+        required: true
+        description: ID of the dataset.
+      - in: header
+        name: Authorization
+        type: string
+        required: true
+        description: Bearer token for authentication.
+      - in: body
+        name: body
+        description: Metadata update request.
+        required: true
+        schema:
+          type: object
+          properties:
+            selector:
+              type: object
+              description: Document selector.
+              properties:
+                document_ids:
+                  type: array
+                  items:
+                    type: string
+                  description: List of document IDs to update.
+                metadata_condition:
+                  type: object
+                  description: Filter documents by existing metadata.
+            updates:
+              type: array
+              items:
+                type: object
+                properties:
+                  key:
+                    type: string
+                  value:
+                    type: any
+              description: List of metadata key-value pairs to update.
+            deletes:
+              type: array
+              items:
+                type: object
+                properties:
+                  key:
+                    type: string
+              description: List of metadata keys to delete.
+    responses:
+      200:
+        description: Metadata updated successfully.
+    """
+    # Verify ownership of dataset
+    if not KnowledgebaseService.accessible(kb_id=dataset_id, user_id=tenant_id):
+        return get_error_data_result(message=f"You don't own the dataset {dataset_id}.")
+
+    # Get request body
+    req = await get_request_json()
+    selector = req.get("selector", {}) or {}
+    updates = req.get("updates", []) or []
+    deletes = req.get("deletes", []) or []
+
+    # Validate selector
+    if not isinstance(selector, dict):
+        return get_error_data_result(message="selector must be an object.")
+    if not isinstance(updates, list) or not isinstance(deletes, list):
+        return get_error_data_result(message="updates and deletes must be lists.")
+
+    # Validate metadata_condition
+    metadata_condition = selector.get("metadata_condition", {}) or {}
+    if metadata_condition and not isinstance(metadata_condition, dict):
+        return get_error_data_result(message="metadata_condition must be an object.")
+
+    # Validate document_ids
+    document_ids = selector.get("document_ids", []) or []
+    if document_ids and not isinstance(document_ids, list):
+        return get_error_data_result(message="document_ids must be a list.")
+
+    # Validate updates
+    for upd in updates:
+        if not isinstance(upd, dict) or not upd.get("key") or "value" not in upd:
+            return get_error_data_result(message="Each update requires key and value.")
+
+    # Validate deletes
+    for d in deletes:
+        if not isinstance(d, dict) or not d.get("key"):
+            return get_error_data_result(message="Each delete requires key.")
+
+    # Initialize target document IDs
+    target_doc_ids = set()
+
+    # If document_ids provided, validate they belong to the dataset
+    if document_ids:
+        kb_doc_ids = KnowledgebaseService.list_documents_by_ids([dataset_id])
+        invalid_ids = set(document_ids) - set(kb_doc_ids)
+        if invalid_ids:
+            return get_error_data_result(
+                message=f"These documents do not belong to dataset {dataset_id}: {', '.join(invalid_ids)}"
+            )
+        target_doc_ids = set(document_ids)
+
+    # Apply metadata_condition filtering if provided
+    if metadata_condition:
+        metas = DocMetadataService.get_flatted_meta_by_kbs([dataset_id])
+        filtered_ids = set(
+            meta_filter(metas, convert_conditions(metadata_condition), metadata_condition.get("logic", "and"))
+        )
+        target_doc_ids = target_doc_ids & filtered_ids
+        if metadata_condition.get("conditions") and not target_doc_ids:
+            return get_result(data={"updated": 0, "matched_docs": 0})
-    # Apply metadata_condition filtering if provided
-    if metadata_condition:
-        metas = DocMetadataService.get_flatted_meta_by_kbs([dataset_id])
-        filtered_ids = set(
-            meta_filter(metas, convert_conditions(metadata_condition), metadata_condition.get("logic", "and"))
-        )
-        target_doc_ids = target_doc_ids & filtered_ids
-        if metadata_condition.get("conditions") and not target_doc_ids:
-            return get_result(data={"updated": 0, "matched_docs": 0})
+    # Apply metadata_condition filtering if provided
+    if metadata_condition:
+        metas = DocMetadataService.get_flatted_meta_by_kbs([dataset_id])
+        filtered_ids = set(
+            meta_filter(metas, convert_conditions(metadata_condition), metadata_condition.get("logic", "and"))
+        )
+        target_doc_ids = target_doc_ids & filtered_ids if target_doc_ids else filtered_ids
+        if metadata_condition.get("conditions") and not target_doc_ids:
+            return get_result(data={"updated": 0, "matched_docs": 0})
-    # Apply metadata_condition filtering if provided
-    if metadata_condition:
-        metas = DocMetadataService.get_flatted_meta_by_kbs([dataset_id])
-        filtered_ids = set(
-            meta_filter(metas, convert_conditions(metadata_condition), metadata_condition.get("logic", "and"))
-        )
-        target_doc_ids = target_doc_ids & filtered_ids
-        if metadata_condition.get("conditions") and not target_doc_ids:
-            return get_result(data={"updated": 0, "matched_docs": 0})
+    # Apply metadata_condition filtering if provided
+    if metadata_condition:
+        metas = DocMetadataService.get_flatted_meta_by_kbs([dataset_id])
+        filtered_ids = set(
+            meta_filter(metas, convert_conditions(metadata_condition), metadata_condition.get("logic", "and"))
+        )
+        target_doc_ids = target_doc_ids & filtered_ids if target_doc_ids else filtered_ids
+        if metadata_condition.get("conditions") and not target_doc_ids:
+            return get_result(data={"updated": 0, "matched_docs": 0})
+
+    # Convert to list and perform update
+    target_doc_ids = list(target_doc_ids)
+    updated = DocMetadataService.batch_update_metadata(dataset_id, target_doc_ids, updates, deletes)
+    return get_result(data={"updated": updated, "matched_docs": len(target_doc_ids)})
diff --git a/api/apps/sdk/doc.py b/api/apps/sdk/doc.py
@@ -157,57 +157,6 @@ async def download_doc(document_id):
     )
 
 
-@manager.route("/datasets/<dataset_id>/metadata/update", methods=["POST"])  # noqa: F821
-@token_required
-async def metadata_batch_update(dataset_id, tenant_id):
-    if not KnowledgebaseService.accessible(kb_id=dataset_id, user_id=tenant_id):
-        return get_error_data_result(message=f"You don't own the dataset {dataset_id}. ")
-
-    req = await get_request_json()
-    selector = req.get("selector", {}) or {}
-    updates = req.get("updates", []) or []
-    deletes = req.get("deletes", []) or []
-
-    if not isinstance(selector, dict):
-        return get_error_data_result(message="selector must be an object.")
-    if not isinstance(updates, list) or not isinstance(deletes, list):
-        return get_error_data_result(message="updates and deletes must be lists.")
-
-    metadata_condition = selector.get("metadata_condition", {}) or {}
-    if metadata_condition and not isinstance(metadata_condition, dict):
-        return get_error_data_result(message="metadata_condition must be an object.")
-
-    document_ids = selector.get("document_ids", []) or []
-    if document_ids and not isinstance(document_ids, list):
-        return get_error_data_result(message="document_ids must be a list.")
-
-    for upd in updates:
-        if not isinstance(upd, dict) or not upd.get("key") or "value" not in upd:
-            return get_error_data_result(message="Each update requires key and value.")
-    for d in deletes:
-        if not isinstance(d, dict) or not d.get("key"):
-            return get_error_data_result(message="Each delete requires key.")
-
-    if document_ids:
-        kb_doc_ids = KnowledgebaseService.list_documents_by_ids([dataset_id])
-        target_doc_ids = set(kb_doc_ids)
-        invalid_ids = set(document_ids) - set(kb_doc_ids)
-        if invalid_ids:
-            return get_error_data_result(message=f"These documents do not belong to dataset {dataset_id}: {', '.join(invalid_ids)}")
-        target_doc_ids = set(document_ids)
-
-    if metadata_condition:
-        metas = DocMetadataService.get_flatted_meta_by_kbs([dataset_id])
-        filtered_ids = set(meta_filter(metas, convert_conditions(metadata_condition), metadata_condition.get("logic", "and")))
-        target_doc_ids = target_doc_ids & filtered_ids
-        if metadata_condition.get("conditions") and not target_doc_ids:
-            return get_result(data={"updated": 0, "matched_docs": 0})
-
-    target_doc_ids = list(target_doc_ids)
-    updated = DocMetadataService.batch_update_metadata(dataset_id, target_doc_ids, updates, deletes)
-    return get_result(data={"updated": updated, "matched_docs": len(target_doc_ids)})
-
-
 DOC_STOP_PARSING_INVALID_STATE_MESSAGE = "Can't stop parsing document that has not started or already completed"
 DOC_STOP_PARSING_INVALID_STATE_ERROR_CODE = "DOC_STOP_PARSING_INVALID_STATE"
 

diff --git a/docker/.env b/docker/.env
@@ -159,7 +159,7 @@ GO_ADMIN_PORT=9383
 API_PROXY_SCHEME=python # use pure python server deployment
 
 # The RAGFlow Docker image to download. v0.22+ doesn't include embedding models.
-RAGFLOW_IMAGE=infiniflow/ragflow:v0.25.0
+RAGFLOW_IMAGE=infiniflow/ragflow:latest
 
 # If you cannot download the RAGFlow Docker image:
 # RAGFLOW_IMAGE=swr.cn-north-4.myhuaweicloud.com/infiniflow/ragflow:v0.25.0

diff --git a/test/testcases/test_http_api/common.py b/test/testcases/test_http_api/common.py
@@ -341,6 +341,16 @@ def metadata_batch_update(auth, dataset_id, payload=None):
     return res.json()
 
 
+def update_documents_metadata(auth, dataset_id, payload=None):
+    """New unified API for updating document metadata.
+
+    Uses PATCH method at /api/v1/datasets/{dataset_id}/documents/metadatas
+    """
+    url = f"{HOST_ADDRESS}{DATASETS_API_URL}/{dataset_id}/documents/metadatas"
+    res = requests.patch(url=url, headers=HEADERS, auth=auth, json=payload)
+    return res.json()
+
+
 # CHAT COMPLETIONS AND RELATED QUESTIONS
 def related_questions(auth, payload=None):
     url = f"{HOST_ADDRESS}/api/{VERSION}/sessions/related_questions"

diff --git a/test/testcases/test_http_api/test_file_management_within_dataset/test_doc_sdk_routes_unit.py b/test/testcases/test_http_api/test_file_management_within_dataset/test_doc_sdk_routes_unit.py
@@ -388,95 +388,6 @@ def test_download_and_download_doc_errors(self, monkeypatch):
         res = _run(module.download_doc("doc-1"))
         assert res["filename"] == "doc.txt"
 
-    def test_metadata_batch_update(self, monkeypatch):
-        module = _load_doc_module(monkeypatch)
-        monkeypatch.setattr(module, "convert_conditions", lambda cond: cond)
-        monkeypatch.setattr(module.KnowledgebaseService, "accessible", lambda **_kwargs: False)
-        monkeypatch.setattr(module, "get_request_json", lambda: _AwaitableValue({"selector": {}}))
-        res = _run(module.metadata_batch_update.__wrapped__("ds-1", "tenant-1"))
-        assert "don't own the dataset" in res["message"]
-
-        monkeypatch.setattr(module.KnowledgebaseService, "accessible", lambda **_kwargs: True)
-        monkeypatch.setattr(module, "get_request_json", lambda: _AwaitableValue({"selector": [1]}))
-        res = _run(module.metadata_batch_update.__wrapped__("ds-1", "tenant-1"))
-        assert res["message"] == "selector must be an object."
-
-        monkeypatch.setattr(module, "get_request_json", lambda: _AwaitableValue({"selector": {}, "updates": {"k": "v"}, "deletes": []}))
-        res = _run(module.metadata_batch_update.__wrapped__("ds-1", "tenant-1"))
-        assert res["message"] == "updates and deletes must be lists."
-
-        monkeypatch.setattr(
-            module,
-            "get_request_json",
-            lambda: _AwaitableValue({"selector": {"metadata_condition": [1]}, "updates": [], "deletes": []}),
-        )
-        res = _run(module.metadata_batch_update.__wrapped__("ds-1", "tenant-1"))
-        assert res["message"] == "metadata_condition must be an object."
-
-        monkeypatch.setattr(
-            module,
-            "get_request_json",
-            lambda: _AwaitableValue({"selector": {"document_ids": "doc-1"}, "updates": [], "deletes": []}),
-        )
-        res = _run(module.metadata_batch_update.__wrapped__("ds-1", "tenant-1"))
-        assert res["message"] == "document_ids must be a list."
-
-        monkeypatch.setattr(
-            module,
-            "get_request_json",
-            lambda: _AwaitableValue({"selector": {}, "updates": [{"key": ""}], "deletes": []}),
-        )
-        res = _run(module.metadata_batch_update.__wrapped__("ds-1", "tenant-1"))
-        assert "Each update requires key and value." in res["message"]
-
-        monkeypatch.setattr(
-            module,
-            "get_request_json",
-            lambda: _AwaitableValue({"selector": {}, "updates": [], "deletes": [{"x": "y"}]}),
-        )
-        res = _run(module.metadata_batch_update.__wrapped__("ds-1", "tenant-1"))
-        assert "Each delete requires key." in res["message"]
-
-        monkeypatch.setattr(
-            module,
-            "get_request_json",
-            lambda: _AwaitableValue(
-                {
-                    "selector": {"document_ids": ["bad"], "metadata_condition": {"conditions": []}},
-                    "updates": [{"key": "k", "value": "v"}],
-                    "deletes": [],
-                }
-            ),
-        )
-        monkeypatch.setattr(module.KnowledgebaseService, "list_documents_by_ids", lambda _ids: ["doc-1"])
-        res = _run(module.metadata_batch_update.__wrapped__("ds-1", "tenant-1"))
-        assert "do not belong to dataset" in res["message"]
-
-        monkeypatch.setattr(
-            module,
-            "get_request_json",
-            lambda: _AwaitableValue(
-                {
-                    "selector": {"document_ids": ["doc-1"], "metadata_condition": {"conditions": [{"f": "x"}]}},
-                    "updates": [{"key": "k", "value": "v"}],
-                    "deletes": [],
-                }
-            ),
-        )
-        monkeypatch.setattr(module, "meta_filter", lambda *_args, **_kwargs: [])
-        monkeypatch.setattr(module.DocMetadataService, "get_flatted_meta_by_kbs", lambda _kbs: [])
-        res = _run(module.metadata_batch_update.__wrapped__("ds-1", "tenant-1"))
-        assert res["code"] == 0
-        assert res["data"]["updated"] == 0
-        assert res["data"]["matched_docs"] == 0
-
-        monkeypatch.setattr(module, "meta_filter", lambda *_args, **_kwargs: ["doc-1"])
-        monkeypatch.setattr(module.DocMetadataService, "batch_update_metadata", lambda *_args, **_kwargs: 1)
-        res = _run(module.metadata_batch_update.__wrapped__("ds-1", "tenant-1"))
-        assert res["code"] == 0
-        assert res["data"]["updated"] == 1
-        assert res["data"]["matched_docs"] == 1
-
 
     def test_parse_branches(self, monkeypatch):
         module = _load_doc_module(monkeypatch)