Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
b5a62aa
feat(storage): route KB document listing through VectorIndexStore (PR…
sqhyz55 May 8, 2026
6ddf870
feat(storage): add sync search_fts and route sparse retrieval through…
sqhyz55 May 8, 2026
504017a
refactor(storage): isolate cascade vector-plane execution for cleanup…
sqhyz55 May 8, 2026
cf126d7
refactor(storage): move version candidate listing behind VectorIndexS…
sqhyz55 May 8, 2026
474a471
refactor(web): move LanceDB startup migration logic into service (PR-A5)
sqhyz55 May 8, 2026
ac16afe
refactor(metadata): delegate metadata table DDL to MetadataStore (PR-A6)
sqhyz55 May 8, 2026
8b3be21
fix(types): relax documents table type to Any in migration service
sqhyz55 May 8, 2026
484477c
fix(errors): preserve validation exceptions and classify backfill fai…
sqhyz55 May 8, 2026
ea1dd5d
style(cleanup): apply remaining formatting and unused-import fixes
sqhyz55 May 8, 2026
beba1ad
perf(storage): reduce redundant table opens and harden failure observ…
sqhyz55 May 8, 2026
7484620
fix migration task scheduling and stabilize regression tests
sqhyz55 May 8, 2026
79aa1ad
test(version-management): restore list_candidates filter and ordering…
sqhyz55 May 9, 2026
7a4d5c9
fix(storage): propagate user scope in async vector search path (#362)
sqhyz55 May 9, 2026
b9a22c6
test(storage): add missing sync and async coverage gaps (#363)
sqhyz55 May 9, 2026
e9a8daf
refactor(storage): address abstraction-layer code smells (#364)
sqhyz55 May 9, 2026
1e5d824
fix(web): always start migration task when auto-migrate is enabled
sqhyz55 May 9, 2026
e0017c2
feat(storage): add file_ids filter to list_document_records
sqhyz55 May 11, 2026
0732788
refactor(web): route reconcile/status functions through VectorIndexStore
sqhyz55 May 11, 2026
51cbcad
fix(storage): close probe table handles in by-model search helpers
sqhyz55 May 14, 2026
d4204a1
fix(storage): address PR #359 review on tenancy, scans, and migrations
sqhyz55 May 19, 2026
3176731
test(integration): align auto-migrate-disabled startup with check-onl…
sqhyz55 May 19, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -12,14 +12,10 @@
from functools import wraps
from typing import Any, Awaitable, Callable, Optional, TypeVar

import pyarrow as pa # type: ignore

from ..core.parser_registry import get_supported_parsers, validate_parser_compatibility
from ..core.schemas import CollectionInfo
from ..LanceDB.schema_manager import _safe_close_table
from ..storage.contracts import MetadataStore
from ..storage.factory import get_metadata_store, get_vector_index_store
from ..utils.lancedb_query_utils import list_table_names
from ..utils.model_resolver import resolve_embedding_adapter
from ..utils.tag_mapping import register_tag_mapping

Expand Down Expand Up @@ -177,10 +173,6 @@ def _get_metadata_store(self) -> MetadataStore:
"""
return get_metadata_store()

async def _get_connection(self) -> Any:
"""Get raw metadata storage connection for legacy helper methods."""
return self._get_metadata_store().get_raw_connection()

async def get_collection(self, collection_name: str) -> CollectionInfo:
"""Get collection metadata from storage.

Expand Down Expand Up @@ -269,74 +261,8 @@ async def _save_collection_with_retry(
await asyncio.sleep(wait_time)

async def _ensure_metadata_table(self) -> None:
"""Ensure collection_metadata table exists in LanceDB.

Creates the table if it doesn't exist, otherwise does nothing.
"""

conn = await self._get_connection()

schema = pa.schema(
[
("name", pa.string()),
("schema_version", pa.string()),
("embedding_model_id", pa.string()), # Nullable
("embedding_dimension", pa.int32()), # Nullable
("documents", pa.int32()),
("processed_documents", pa.int32()),
("parses", pa.int32()),
("chunks", pa.int32()),
("embeddings", pa.int32()),
("document_names", pa.string()), # JSON string
(
"owners",
pa.string(),
), # Schema-only; not maintained (derived at list time from user_id)
("collection_locked", pa.bool_()),
("allow_mixed_parse_methods", pa.bool_()),
("skip_config_validation", pa.bool_()),
("ingestion_config", pa.string()), # JSON string
("created_at", pa.timestamp("us")),
("updated_at", pa.timestamp("us")),
("last_accessed_at", pa.timestamp("us")),
("extra_metadata", pa.string()), # JSON string
]
)

# Check if table already exists (prefer list_tables(); avoids deprecation warnings)
table_exists = False
try:
existing_tables = list_table_names(conn)
table_exists = "collection_metadata" in existing_tables
except Exception as e:
logger.debug("Table names check failed: %s", e)

if not table_exists:
try:
conn.create_table("collection_metadata", schema=schema)
except Exception as e:
logger.debug("Table creation failed (may already exist): %s", e)
# Table might already exist, continue
else:
# Table exists: ensure it has the "owners" column (schema compat; column is not maintained)
table = None
try:
table = conn.open_table("collection_metadata")
if hasattr(table, "schema") and table.schema is not None:
names = getattr(table.schema, "names", None) or []
if "owners" not in names:
add_fn = getattr(table, "add_columns", None)
if add_fn is not None:
add_fn({"owners": "cast('[]' as string)"})
logger.info(
"collection_metadata: added missing 'owners' column (schema-only)"
)
except Exception as e:
logger.debug(
"Could not migrate collection_metadata schema (add owners): %s", e
)
finally:
_safe_close_table(table)
"""Ensure collection metadata table via MetadataStore ownership."""
await self._get_metadata_store().ensure_collection_metadata_table()

async def initialize_collection_embedding(
self, collection_name: str, embedding_model_id: str
Expand Down
Loading
Loading