Skip to content

Commit a4aa356

Browse files
committed
修复qdrant的分页方式
1 parent 274309c commit a4aa356

1 file changed

Lines changed: 55 additions & 9 deletions

File tree

bella_rag/vector_stores/qdrant.py

Lines changed: 55 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -612,15 +612,61 @@ def query_by_filter(
612612
else:
613613
query_filter = Filter(must=[id_condition])
614614

615-
# 执行滚动查询
616-
scroll_result = self.client.scroll(
617-
collection_name=self.collection_name,
618-
scroll_filter=query_filter,
619-
limit=limit or 100,
620-
offset=offset or 0,
621-
with_payload=True,
622-
with_vectors=kwargs.get("with_vectors", False)
623-
)
615+
# 安全的limit限制,避免内存问题
616+
MAX_LIMIT = 10000 # qdrant建议的最大单次查询量
617+
safe_limit = min(limit or 100, MAX_LIMIT)
618+
619+
# 兼容offset/limit分页:安全地跳过offset条记录
620+
if offset and offset > 0:
621+
# 分批跳过offset条记录,避免单次limit过大
622+
current_offset = 0
623+
next_cursor = None
624+
625+
while current_offset < offset:
626+
skip_batch = min(offset - current_offset, MAX_LIMIT)
627+
skip_result = self.client.scroll(
628+
collection_name=self.collection_name,
629+
scroll_filter=query_filter,
630+
limit=skip_batch,
631+
offset=next_cursor,
632+
with_payload=False,
633+
with_vectors=False
634+
)
635+
636+
# 如果没有更多数据,提前退出
637+
if not skip_result[0]:
638+
next_cursor = None
639+
break
640+
641+
current_offset += len(skip_result[0])
642+
next_cursor = skip_result[1]
643+
644+
# 如果没有下一页cursor,说明数据已经遍历完
645+
if not next_cursor:
646+
break
647+
648+
# 获取实际需要的数据
649+
if next_cursor is not None:
650+
scroll_result = self.client.scroll(
651+
collection_name=self.collection_name,
652+
scroll_filter=query_filter,
653+
limit=safe_limit,
654+
offset=next_cursor,
655+
with_payload=True,
656+
with_vectors=kwargs.get("with_vectors", False)
657+
)
658+
else:
659+
# 没有更多数据了
660+
scroll_result = ([], None)
661+
else:
662+
# 直接从头开始查询
663+
scroll_result = self.client.scroll(
664+
collection_name=self.collection_name,
665+
scroll_filter=query_filter,
666+
limit=safe_limit,
667+
with_payload=True,
668+
with_vectors=kwargs.get("with_vectors", False)
669+
)
624670

625671
nodes = []
626672
for point in scroll_result[0]: # scroll返回(points, next_page_offset)

0 commit comments

Comments
 (0)