Skip to content

Commit 086ca2b

Browse files
committed
fix: remove pagerank_fea on ES/OS when chunk feedback hits zero
rank_feature fields must not be set to 0; align chunk feedback with kb_app by sending remove for elasticsearch/opensearch. Extend single-doc update in es_conn and opensearch_conn to honor remove alongside doc payloads. Tests: stub DOC_ENGINE=infinity by default; add ES/OS remove assertions.
1 parent 7467363 commit 086ca2b

File tree

4 files changed

+70
-7
lines changed

4 files changed

+70
-7
lines changed

api/db/services/chunk_feedback_service.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -198,9 +198,14 @@ def update_chunk_weight(
198198
# Clamp to valid range (integer; matches doc-store column types)
199199
new_weight = max(MIN_PAGERANK_WEIGHT, min(MAX_PAGERANK_WEIGHT, new_weight))
200200

201-
# Update the chunk
201+
# Elasticsearch/OpenSearch map pagerank_fea as rank_feature; zero must not be
202+
# indexed — remove the field (same as kb_app / dataset_api_service).
202203
condition = {"id": chunk_id}
203-
new_value = {PAGERANK_FLD: int(new_weight)}
204+
engine = settings.DOC_ENGINE.lower()
205+
if new_weight == 0 and engine in ("elasticsearch", "opensearch"):
206+
new_value = {"remove": PAGERANK_FLD}
207+
else:
208+
new_value = {PAGERANK_FLD: int(new_weight)}
204209

205210
success = settings.docStoreConn.update(
206211
condition, new_value, idx_name, kb_id

rag/utils/es_conn.py

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -303,7 +303,9 @@ def update(self, condition: dict, new_value: dict, index_name: str, knowledgebas
303303
# update specific single document
304304
chunk_id = condition["id"]
305305
for i in range(ATTEMPT_TIME):
306-
for k in doc.keys():
306+
doc_part = copy.deepcopy(doc)
307+
remove_field = doc_part.pop("remove", None) if isinstance(doc_part.get("remove"), str) else None
308+
for k in doc_part.keys():
307309
if "feas" != k.split("_")[-1]:
308310
continue
309311
try:
@@ -312,8 +314,16 @@ def update(self, condition: dict, new_value: dict, index_name: str, knowledgebas
312314
self.logger.exception(
313315
f"ESConnection.update(index={index_name}, id={chunk_id}, doc={json.dumps(condition, ensure_ascii=False)}) got exception")
314316
try:
315-
self.es.update(index=index_name, id=chunk_id, doc=doc)
316-
return True
317+
if remove_field is not None:
318+
self.es.update(
319+
index=index_name,
320+
id=chunk_id,
321+
script=f"ctx._source.remove('{remove_field}');",
322+
)
323+
if doc_part:
324+
self.es.update(index=index_name, id=chunk_id, doc=doc_part)
325+
if remove_field is not None or doc_part:
326+
return True
317327
except Exception as e:
318328
self.logger.exception(
319329
f"ESConnection.update(index={index_name}, id={chunk_id}, doc={json.dumps(condition, ensure_ascii=False)}) got exception: " + str(

rag/utils/opensearch_conn.py

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -329,9 +329,19 @@ def update(self, condition: dict, newValue: dict, indexName: str, knowledgebaseI
329329
# update specific single document
330330
chunkId = condition["id"]
331331
for i in range(ATTEMPT_TIME):
332+
doc_part = copy.deepcopy(doc)
333+
remove_field = doc_part.pop("remove", None) if isinstance(doc_part.get("remove"), str) else None
332334
try:
333-
self.os.update(index=indexName, id=chunkId, body={"doc": doc})
334-
return True
335+
if remove_field is not None:
336+
self.os.update(
337+
index=indexName,
338+
id=chunkId,
339+
body={"script": {"source": f"ctx._source.remove('{remove_field}');"}},
340+
)
341+
if doc_part:
342+
self.os.update(index=indexName, id=chunkId, body={"doc": doc_part})
343+
if remove_field is not None or doc_part:
344+
return True
335345
except Exception as e:
336346
logger.exception(
337347
f"OSConnection.update(index={indexName}, id={id}, doc={json.dumps(condition, ensure_ascii=False)}) got exception")

test/testcases/test_web_api/test_chunk_feedback/test_chunk_feedback_service.py

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,8 @@ def _load_feedback_module(monkeypatch):
4343

4444
settings_mod = ModuleType("common.settings")
4545
settings_mod.docStoreConn = MagicMock()
46+
# Non-ES engines accept pagerank_fea=0; tests below override for elasticsearch/opensearch.
47+
settings_mod.DOC_ENGINE = "infinity"
4648
monkeypatch.setitem(sys.modules, "common.settings", settings_mod)
4749
common_pkg.settings = settings_mod
4850

@@ -251,6 +253,42 @@ def test_update_weight_clamp_min(self, feedback_env):
251253
new_value = call_args[0][1]
252254
assert new_value["pagerank_fea"] == mod.MIN_PAGERANK_WEIGHT
253255

256+
def test_update_weight_zero_elasticsearch_removes_field(self, feedback_env):
257+
"""rank_feature cannot store 0 on ES — use remove payload (see kb_app)."""
258+
mod, settings_mod = feedback_env
259+
settings_mod.DOC_ENGINE = "elasticsearch"
260+
mock_doc_store = MagicMock()
261+
mock_doc_store.get.return_value = {"pagerank_fea": 1}
262+
mock_doc_store.update.return_value = True
263+
settings_mod.docStoreConn = mock_doc_store
264+
265+
mod.ChunkFeedbackService.update_chunk_weight(
266+
tenant_id="tenant1",
267+
chunk_id="chunk1",
268+
kb_id="kb1",
269+
delta=-1,
270+
)
271+
272+
new_value = mock_doc_store.update.call_args[0][1]
273+
assert new_value == {"remove": "pagerank_fea"}
274+
275+
def test_update_weight_zero_opensearch_removes_field(self, feedback_env):
276+
mod, settings_mod = feedback_env
277+
settings_mod.DOC_ENGINE = "opensearch"
278+
mock_doc_store = MagicMock()
279+
mock_doc_store.get.return_value = {"pagerank_fea": 2}
280+
mock_doc_store.update.return_value = True
281+
settings_mod.docStoreConn = mock_doc_store
282+
283+
mod.ChunkFeedbackService.update_chunk_weight(
284+
tenant_id="tenant1",
285+
chunk_id="chunk1",
286+
kb_id="kb1",
287+
delta=-2,
288+
)
289+
290+
assert mock_doc_store.update.call_args[0][1] == {"remove": "pagerank_fea"}
291+
254292

255293
class TestApplyFeedback:
256294
"""Tests for apply_feedback method."""

0 commit comments

Comments
 (0)