Skip to content

Commit 127e8f6

Browse files
committed
fix(kb): align vector rename tenancy with delete and restore chunk diagnostics
- Extend VectorIndexStore.rename_collection_data with user_id and is_admin; LanceDB implementation builds the same filter as delete_collection_data before table.update so non-admins only rename their rows and admins rename all rows for the old collection key. - Pass caller identity from rename_collection_api. - When read_chunks_for_embedding finds zero rows under user scope, log at ERROR under DEBUG if admin-unscoped count shows hidden rows (user_id mismatch or legacy NULL ownership). - Update lancedb store unit test for the new rename signature.
1 parent f33d76a commit 127e8f6

5 files changed

Lines changed: 88 additions & 3 deletions

File tree

src/xagent/core/tools/core/RAG_tools/storage/contracts.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -440,9 +440,23 @@ def rename_collection_data(
440440
self,
441441
collection_name: str,
442442
new_name: str,
443+
user_id: Optional[int],
444+
is_admin: bool,
443445
) -> List[str]:
444446
"""Rename collection key across vector-side tables.
445447
448+
Applies the same multi-tenancy filter semantics as
449+
:meth:`delete_collection_data`: non-admin callers only rename rows they
450+
can see (``user_id`` match; legacy NULL ``user_id`` is admin-only).
451+
Admins omit the user predicate and rename all rows for the old collection
452+
name.
453+
454+
Args:
455+
collection_name: Current collection name stored in vector tables.
456+
new_name: Target collection name after rename.
457+
user_id: User ID for multi-tenancy filtering.
458+
is_admin: Whether the caller has admin privileges.
459+
446460
Returns:
447461
Warning messages generated during best-effort updates.
448462
"""

src/xagent/core/tools/core/RAG_tools/storage/lancedb_stores.py

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -581,11 +581,20 @@ def rename_collection_data(
581581
self,
582582
collection_name: str,
583583
new_name: str,
584+
user_id: Optional[int],
585+
is_admin: bool,
584586
) -> List[str]:
585587
from ..LanceDB.schema_manager import _safe_close_table
586588

587589
warnings: List[str] = []
588-
safe_old_name = escape_lancedb_string(collection_name)
590+
filter_expr = self.build_filter_expression(
591+
build_filter_from_dict({"collection": collection_name}),
592+
user_id=user_id,
593+
is_admin=is_admin,
594+
)
595+
if not filter_expr:
596+
return warnings
597+
589598
conn = self._get_connection()
590599
for table_name in self.list_table_names():
591600
if table_name not in {
@@ -598,7 +607,7 @@ def rename_collection_data(
598607
try:
599608
table = conn.open_table(table_name)
600609
table.update(
601-
f"collection = '{safe_old_name}'",
610+
filter_expr,
602611
{"collection": new_name},
603612
)
604613
except Exception as exc: # noqa: BLE001

src/xagent/core/tools/core/RAG_tools/vector_storage/vector_manager.py

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@
3939
)
4040
from ..LanceDB.model_tag_utils import to_model_tag
4141
from ..LanceDB.schema_manager import _safe_close_table, ensure_embeddings_table
42+
from ..storage.contracts import VectorIndexStore
4243
from ..storage.factory import get_vector_index_store
4344
from ..utils.metadata_utils import deserialize_metadata, serialize_metadata
4445

@@ -183,6 +184,53 @@ def _safe_int_conversion(value: Any, default: int = 0) -> int:
183184
return default
184185

185186

187+
def _maybe_log_chunks_hidden_by_user_filter(
188+
vector_store: VectorIndexStore,
189+
query_filters: Dict[str, Any],
190+
*,
191+
user_id: Optional[int],
192+
is_admin: bool,
193+
) -> None:
194+
"""If DEBUG logging is enabled, detect chunks matching scope filters but excluded by user_id filter.
195+
196+
Non-admin callers filter rows by ``user_id``; admins effectively omit that predicate.
197+
When the scoped count is zero but an admin-equivalent count is positive, rows likely
198+
exist under another ``user_id`` or legacy NULL ``user_id`` — typical symptom of a
199+
permission or ingestion ownership mismatch.
200+
201+
Args:
202+
vector_store: Vector index abstraction bound to the active backend.
203+
query_filters: Column equality filters (collection, doc_id, parse_hash, ...).
204+
user_id: Authenticated user id passed into the read path.
205+
is_admin: Whether the caller is treated as admin for tenancy filtering.
206+
"""
207+
if is_admin or user_id is None:
208+
return
209+
if not logger.isEnabledFor(logging.DEBUG):
210+
return
211+
try:
212+
count_without_user_scope = vector_store.count_rows_or_zero(
213+
table_name="chunks",
214+
filters=query_filters,
215+
user_id=user_id,
216+
is_admin=True,
217+
)
218+
except Exception as exc: # noqa: BLE001
219+
logger.debug("Skipped chunks user-filter diagnostic count: %s", exc)
220+
return
221+
222+
if count_without_user_scope > 0:
223+
logger.error(
224+
"Chunks exist for doc/collection scope but user filter excluded them "
225+
"(possible user_id mismatch, legacy NULL user_id row, or permission bug). "
226+
"caller_user_id=%s is_admin=%s unscoped_count=%s filters=%s",
227+
user_id,
228+
is_admin,
229+
count_without_user_scope,
230+
query_filters,
231+
)
232+
233+
186234
def _safe_str_value(value: Any) -> Optional[str]:
187235
"""Extract string value, returning None for NaN/None values.
188236
@@ -250,6 +298,12 @@ def read_chunks_for_embedding(
250298
is_admin=is_admin,
251299
)
252300
if total_count == 0:
301+
_maybe_log_chunks_hidden_by_user_filter(
302+
vector_store,
303+
query_filters,
304+
user_id=user_id,
305+
is_admin=is_admin,
306+
)
253307
logger.info("No chunks found for the given criteria")
254308
return EmbeddingReadResponse(chunks=[], total_count=0, pending_count=0)
255309

src/xagent/web/api/kb.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1290,6 +1290,7 @@ def _download_file() -> None:
12901290
file_config = config.model_copy(
12911291
update={"parse_method": normalized_parse_method}
12921292
)
1293+
12931294
def _run_cloud_ingestion() -> IngestionResult:
12941295
return run_document_ingestion(
12951296
collection=safe_collection,
@@ -3548,6 +3549,8 @@ async def rename_collection_api(
35483549
vector_store.rename_collection_data(
35493550
collection_name=safe_old_collection,
35503551
new_name=safe_new_collection,
3552+
user_id=int(_user.id),
3553+
is_admin=bool(_user.is_admin),
35513554
)
35523555
)
35533556

tests/core/tools/core/RAG_tools/storage/test_lancedb_stores.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -337,7 +337,12 @@ def test_vector_store_rename_collection_data_updates_expected_tables(
337337
mock_conn.open_table.return_value = mock_table
338338

339339
store = LanceDBVectorIndexStore()
340-
warnings = store.rename_collection_data("old_name", "new_name")
340+
warnings = store.rename_collection_data(
341+
"old_name",
342+
"new_name",
343+
user_id=1,
344+
is_admin=False,
345+
)
341346

342347
assert warnings == []
343348
# 4 target tables should be updated; control-plane table excluded.

0 commit comments

Comments
 (0)