Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 0 additions & 16 deletions api/apps/document_app.py
Original file line number Diff line number Diff line change
Expand Up @@ -183,22 +183,6 @@ async def create():
return server_error_response(e)


@manager.route("/infos", methods=["POST"]) # noqa: F821
@login_required
async def doc_infos():
req = await get_request_json()
doc_ids = req["doc_ids"]
for doc_id in doc_ids:
if not DocumentService.accessible(doc_id, current_user.id):
return get_json_result(data=False, message="No authorization.", code=RetCode.AUTHENTICATION_ERROR)
docs = DocumentService.get_by_ids(doc_ids)
docs_list = list(docs.dicts())
# Add meta_fields for each document
for doc in docs_list:
doc["meta_fields"] = DocMetadataService.get_document_metadata(doc["id"])
return get_json_result(data=docs_list)


@manager.route("/metadata/update", methods=["POST"]) # noqa: F821
@login_required
@validate_request("doc_ids")
Expand Down
6 changes: 6 additions & 0 deletions api/apps/restful_apis/document_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -527,6 +527,12 @@ def _get_docs_with_request(req, dataset_id:str):
if doc_name and not DocumentService.query(name=doc_name, kb_id=dataset_id):
return RetCode.DATA_ERROR, f"You don't own the document {doc_name}.", [], 0

doc_ids = q.getlist("ids")
if doc_id and len(doc_ids) > 0:
return RetCode.DATA_ERROR, f"Should not provide both 'id':{doc_id} and 'ids'{doc_ids}"
if len(doc_ids) > 0:
doc_ids_filter = doc_ids

docs, total = DocumentService.get_by_kb_id(dataset_id, page, page_size, orderby, desc, keywords, run_status_converted, types, suffix,
name=doc_name, doc_ids=doc_ids_filter, return_empty_metadata=return_empty_metadata)

Expand Down
9 changes: 9 additions & 0 deletions sdk/python/ragflow_sdk/modules/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ def upload_documents(self, document_list: list[dict]):
def list_documents(
self,
id: str | None = None,
ids: list[str] | None = None,
name: str | None = None,
keywords: str | None = None,
page: int = 1,
Expand All @@ -75,6 +76,10 @@ def list_documents(
create_time_from: int = 0,
create_time_to: int = 0,
):
# Validate that id and ids are not used together
if id and ids:
raise ValueError("Cannot use both 'id' and 'ids' parameters at the same time.")

params = {
"id": id,
"name": name,
Expand All @@ -86,6 +91,10 @@ def list_documents(
"create_time_from": create_time_from,
"create_time_to": create_time_to,
}
# Handle ids parameter - convert to multiple query params
if ids:
for doc_id in ids:
params.append(("ids", doc_id))
res = self.get(f"/datasets/{self.id}/documents", params=params)
res = res.json()
documents = []
Expand Down
33 changes: 29 additions & 4 deletions sdk/python/test/test_frontend_api/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,11 +75,36 @@ def list_document(auth, dataset_id):
return res.json()


def get_docs_info(auth, doc_ids):
def get_docs_info(auth, dataset_id, doc_ids=None, doc_id=None):
"""
Get document information by IDs.

Args:
auth: Authorization header
dataset_id: Dataset ID
doc_ids: List of document IDs (use for multiple) - exclusive with doc_id
doc_id: Single document ID (use for one) - exclusive with doc_ids

Raises:
ValueError: If both doc_id and doc_ids are provided
"""
# Validate that id and ids are not used together
if doc_id and doc_ids:
raise ValueError("Cannot use both 'id' and 'ids' parameters at the same time.")

authorization = {"Authorization": auth}
json_req = {"doc_ids": doc_ids}
url = f"{HOST_ADDRESS}/v1/document/infos"
res = requests.post(url=url, headers=authorization, json=json_req)
params = {}
if doc_ids:
# Multiple IDs
for id in doc_ids:
params.append(("ids", id))
elif doc_id:
# Single ID
params["id"] = doc_id
Comment on lines +96 to +103
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🔴 Critical

🧩 Analysis chain

🏁 Script executed:

cd sdk/python && find . -name "common.py" -path "*/test_frontend_api/*" | head -20

Repository: infiniflow/ragflow

Length of output: 96


🏁 Script executed:

cat -n sdk/python/test/test_frontend_api/common.py | sed -n '90,110p'

Repository: infiniflow/ragflow

Length of output: 828


🏁 Script executed:

cat -n sdk/python/test/test_frontend_api/common.py | sed -n '78,110p'

Repository: infiniflow/ragflow

Length of output: 1334


🏁 Script executed:

# Check if similar pattern exists in dataset.py
find . -name "dataset.py" -path "*/ragflow_sdk/modules/*" | head -5

Repository: infiniflow/ragflow

Length of output: 105


🏁 Script executed:

cat -n sdk/python/ragflow_sdk/modules/dataset.py | grep -A 20 "params.*append\|params.*\[\]" | head -40

Repository: infiniflow/ragflow

Length of output: 1043


🏁 Script executed:

cat -n sdk/python/ragflow_sdk/modules/dataset.py | sed -n '85,110p'

Repository: infiniflow/ragflow

Length of output: 1270


🏁 Script executed:

cat -n sdk/python/ragflow_sdk/modules/dataset.py | sed -n '65,100p'

Repository: infiniflow/ragflow

Length of output: 1471


🏁 Script executed:

# Check ruff formatting/linting status of the file
cd sdk/python && python -m pip install ruff -q && ruff check test/test_frontend_api/common.py --select=E,F,W 2>&1 | head -50

Repository: infiniflow/ragflow

Length of output: 2256


Fix params initialization to support repeated query parameters.

Line 96 initializes params = {}, but line 100 calls .append() on it. Dicts don't have .append(), so the first doc_ids request fails with AttributeError. The fix is to initialize params = [] and convert the single-ID branch to append as well:

Fix
-    params = {}
+    params = []
     if doc_ids:
         # Multiple IDs
         for id in doc_ids:
             params.append(("ids", id))
     elif doc_id:
         # Single ID
-        params["id"] = doc_id
+        params.append(("id", doc_id))

Additionally, ruff reports 4 whitespace linting violations (W293) at lines 81, 87, 94, 104. Run ruff check --fix to resolve them per the coding guidelines.

🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@sdk/python/test/test_frontend_api/common.py` around lines 96 - 103, The bug
is that params is initialized as a dict but used like a list when handling
repeated query params in the function handling doc_ids/doc_id; change params =
{} to params = [] and update the single-ID branch to use params.append(("id",
doc_id)) so both branches build a list of tuples (used for repeated query
parameters), ensuring symbols to edit are params, doc_ids and doc_id; also run
ruff check --fix to address the W293 whitespace violations reported around the
nearby lines.


# Use /api/v1 prefix for dataset API
url = f"{HOST_ADDRESS}/api/v1/datasets/{dataset_id}/documents"
res = requests.get(url=url, headers=authorization, params=params)
return res.json()


Expand Down
4 changes: 2 additions & 2 deletions sdk/python/test/test_frontend_api/test_chunk.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,14 +48,14 @@ def test_parse_txt_document(get_auth):
for doc in res['data']['docs']:
doc_id_list.append(doc['id'])

res = get_docs_info(get_auth, doc_id_list)
res = get_docs_info(get_auth, dataset_id, doc_ids=doc_id_list)
print(doc_id_list)
doc_count = len(doc_id_list)
res = parse_docs(get_auth, doc_id_list)

start_ts = timer()
while True:
res = get_docs_info(get_auth, doc_id_list)
res = get_docs_info(get_auth, dataset_id, doc_ids=doc_id_list)
finished_count = 0
for doc_info in res['data']:
if doc_info['progress'] == 1:
Expand Down
4 changes: 2 additions & 2 deletions test/testcases/test_web_api/test_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -397,8 +397,8 @@ def document_filter(auth, dataset_id, payload=None, *, headers=HEADERS, data=Non
return res.json()


def document_infos(auth, payload=None, *, headers=HEADERS, data=None):
res = requests.post(url=f"{HOST_ADDRESS}{DOCUMENT_APP_URL}/infos", headers=headers, auth=auth, json=payload, data=data)
def document_infos(auth, dataset_id, params=None, payload=None, *, headers=HEADERS, data=None):
res = requests.get(url=f"{HOST_ADDRESS}{DATASETS_URL}/{dataset_id}/documents", params=params, json=payload, headers=headers, auth=auth, data=data)
return res.json()


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ def test_filter_auth_invalid(self, invalid_auth, expected_code, expected_fragmen
@pytest.mark.p2
@pytest.mark.parametrize("invalid_auth, expected_code, expected_fragment", INVALID_AUTH_CASES)
def test_infos_auth_invalid(self, invalid_auth, expected_code, expected_fragment):
res = document_infos(invalid_auth, {"doc_ids": ["doc_id"]})
res = document_infos(invalid_auth, "kb_id", {"doc_ids": ["doc_id"]})
assert res["code"] == expected_code, res
Comment on lines 46 to 48
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

Update the remaining document_infos tests to the new dataset-scoped contract.

Line 47 still uses doc_ids instead of ids, and Lines 178-180 still use the removed (auth, payload) call shape. The invalid-doc case will now request /datasets/{'doc_ids': ['invalid_id']}/documents, so it no longer validates the migrated endpoint behavior.

Suggested test update
-        res = document_infos(invalid_auth, "kb_id", {"doc_ids": ["doc_id"]})
+        res = document_infos(invalid_auth, "kb_id", {"ids": ["doc_id"]})
...
-    def test_infos_invalid_doc_id(self, WebApiAuth):
-        res = document_infos(WebApiAuth, {"doc_ids": ["invalid_id"]})
+    def test_infos_invalid_doc_id(self, WebApiAuth, add_document_func):
+        dataset_id, _ = add_document_func
+        res = document_infos(WebApiAuth, dataset_id, {"ids": ["invalid_id"]})

Also applies to: 177-180

🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@test/testcases/test_web_api/test_document_app/test_document_metadata.py`
around lines 46 - 48, The test uses the old document-scoped contract: update
test_infos_auth_invalid and the other failing cases to the dataset-scoped
contract by replacing payload key "doc_ids" with "ids" and by changing calls
that currently use the removed (auth, payload) shape to the new (auth,
dataset_id, payload) shape; e.g., call document_infos(invalid_auth, "kb_id",
{"ids": ["doc_id"]}) instead of document_infos(invalid_auth, {"doc_ids":
["doc_id"]}) so the tests hit /datasets/{dataset_id}/documents with the new
payload format.

assert expected_fragment in res["message"], res

Expand Down Expand Up @@ -91,11 +91,12 @@ def test_filter(self, WebApiAuth, add_dataset_func):

@pytest.mark.p2
def test_infos(self, WebApiAuth, add_document_func):
_, doc_id = add_document_func
res = document_infos(WebApiAuth, {"doc_ids": [doc_id]})
dataset_id, doc_id = add_document_func
res = document_infos(WebApiAuth, dataset_id, {"ids": [doc_id]})
assert res["code"] == 0, res
assert len(res["data"]) == 1, res
assert res["data"][0]["id"] == doc_id, res
docs = res["data"]["docs"]
assert len(docs) == 1, docs
assert docs[0]["id"] == doc_id, res

## The inputs has been changed to add 'doc_ids'
## TODO:
Expand Down Expand Up @@ -138,20 +139,22 @@ def test_infos(self, WebApiAuth, add_document_func):

@pytest.mark.p2
def test_change_status(self, WebApiAuth, add_document_func):
_, doc_id = add_document_func
dataset_id, doc_id = add_document_func
res = document_change_status(WebApiAuth, {"doc_ids": [doc_id], "status": "1"})

assert res["code"] == 0, res
assert res["data"][doc_id]["status"] == "1", res
info_res = document_infos(WebApiAuth, {"doc_ids": [doc_id]})
info_res = document_infos(WebApiAuth, dataset_id, {"ids": [doc_id]})

assert info_res["code"] == 0, info_res
assert info_res["data"][0]["status"] == "1", info_res
assert info_res["data"]["docs"][0]["status"] == "1", info_res


class TestDocumentMetadataNegative:
@pytest.mark.p2
def test_filter_missing_kb_id(self, WebApiAuth, add_document_func):
kb_id, doc_id = add_document_func
res = document_filter(WebApiAuth, "", {"doc_ids": [doc_id]})
res = document_filter(WebApiAuth, "", {"ids": [doc_id]})
assert res["code"] == 100, res
assert "<MethodNotAllowed '405: Method Not Allowed'>" == res["message"], res

Expand Down Expand Up @@ -228,26 +231,6 @@ def _allow_kb(self, module, monkeypatch, kb_id="kb1", tenant_id="tenant1"):
monkeypatch.setattr(module.UserTenantService, "query", lambda **_kwargs: [SimpleNamespace(tenant_id=tenant_id)])
monkeypatch.setattr(module.KnowledgebaseService, "query", lambda **_kwargs: True if _kwargs.get("id") == kb_id else False)


def test_infos_meta_fields(self, document_app_module, monkeypatch):
module = document_app_module
monkeypatch.setattr(module.DocumentService, "accessible", lambda *_args, **_kwargs: True)

class _Docs:
def dicts(self):
return [{"id": "doc1"}]

monkeypatch.setattr(module.DocumentService, "get_by_ids", lambda _ids: _Docs())
monkeypatch.setattr(module.DocMetadataService, "get_document_metadata", lambda _doc_id: {"author": "alice"})

async def fake_request_json():
return {"doc_ids": ["doc1"]}

monkeypatch.setattr(module, "get_request_json", fake_request_json)
res = _run(module.doc_infos())
assert res["code"] == 0
assert res["data"][0]["meta_fields"]["author"] == "alice"

def test_metadata_update_missing_kb_id(self, document_app_module, monkeypatch):
module = document_app_module

Expand Down
5 changes: 0 additions & 5 deletions web/src/services/knowledge-service.ts
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,6 @@ const {
documentUpload,
webCrawl,
knowledgeGraph,
documentInfos,
listTagByKnowledgeIds,
setMeta,
getMeta,
Expand Down Expand Up @@ -101,10 +100,6 @@ const methods = {
url: webCrawl,
method: 'post',
},
documentInfos: {
url: documentInfos,
method: 'post',
},
setMeta: {
url: setMeta,
method: 'post',
Expand Down
1 change: 0 additions & 1 deletion web/src/utils/api.ts
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,6 @@ export default {
documentUpload: (datasetId: string) =>
`${restAPIv1}/datasets/${datasetId}/documents`,
webCrawl: `${webAPI}/document/web_crawl`,
documentInfos: `${webAPI}/document/infos`,
uploadAndParse: `${webAPI}/document/upload_info`,
setMeta: `${webAPI}/document/set_meta`,
getDatasetFilter: (datasetId: string) =>
Expand Down
Loading