Skip to content

Commit 4c99e99

Browse files
committed
perf: reuse embedding service singleton
1 parent e67e9d3 commit 4c99e99

6 files changed

Lines changed: 25 additions & 12 deletions

File tree

backend/api/main.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,8 @@
1010
import logging
1111

1212
# Import RAG components
13-
from backend.vector_db import ChromaClient, EmbeddingService, RAGRetrieval
13+
from backend.vector_db import ChromaClient, RAGRetrieval
14+
from backend.vector_db.embeddings import get_embedding_service
1415
from backend.learning import KnowledgeRetention, AccuracyScorer
1516
from backend.learning.continuum_memory import ContinuumMemory
1617
from backend.services.rss_fetcher import RSSFetcher
@@ -281,7 +282,7 @@ def _initialize_rag_components():
281282

282283
# CRITICAL: Initialize EmbeddingService FIRST so we can pass it to ChromaClient
283284
# This prevents ChromaDB from using default ONNX model (all-MiniLM-L6-v2)
284-
embedding_service = EmbeddingService()
285+
embedding_service = get_embedding_service()
285286
logger.info("✓ Embedding service initialized")
286287

287288
# CRITICAL FIX: Only use reset_on_error=True if explicitly requested (force_reset or dashboard_reset)

backend/api/routers/system_router.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1135,8 +1135,8 @@ async def re_embed_foundational_knowledge_endpoint(
11351135
if not chroma_client:
11361136
raise HTTPException(status_code=503, detail="ChromaDB client not available")
11371137

1138-
from stillme_core.rag.embeddings import EmbeddingService
1139-
embedding_service = EmbeddingService()
1138+
from backend.vector_db.embeddings import get_embedding_service
1139+
embedding_service = get_embedding_service()
11401140

11411141
# Get collection
11421142
collection = chroma_client.knowledge_collection
@@ -1352,8 +1352,8 @@ async def migrate_collection_to_cosine_endpoint(
13521352
if not chroma_client:
13531353
raise HTTPException(status_code=503, detail="ChromaDB client not available")
13541354

1355-
from stillme_core.rag.embeddings import EmbeddingService
1356-
embedding_service = EmbeddingService()
1355+
from backend.vector_db.embeddings import get_embedding_service
1356+
embedding_service = get_embedding_service()
13571357

13581358
# Check if collection exists
13591359
try:

backend/core/philosophical_detector_semantic.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -81,8 +81,8 @@ def _initialize(self):
8181
try:
8282
# Lazy import to avoid circular dependencies
8383
if self.embedding_service is None:
84-
from stillme_core.rag.embeddings import EmbeddingService
85-
self.embedding_service = EmbeddingService()
84+
from backend.vector_db.embeddings import get_embedding_service
85+
self.embedding_service = get_embedding_service()
8686

8787
# Embed all philosophical examples
8888
logger.info(f"Initializing semantic philosophical detector with {len(PHILOSOPHICAL_EXAMPLES)} examples")

backend/services/codebase_indexer.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -491,10 +491,10 @@ def get_codebase_indexer():
491491
# Fallback: Initialize directly (for scripts/testing)
492492
try:
493493
from backend.vector_db.chroma_client import ChromaClient
494-
from backend.vector_db.embeddings import EmbeddingService
494+
from backend.vector_db.embeddings import get_embedding_service
495495

496496
logger.info("📦 Initializing ChromaDB client and EmbeddingService directly...")
497-
embedding_service = EmbeddingService()
497+
embedding_service = get_embedding_service()
498498
chroma_client = ChromaClient(embedding_service=embedding_service)
499499

500500
_codebase_indexer_instance = CodebaseIndexer(

backend/services/git_history_retriever.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -472,8 +472,8 @@ def get_git_history_retriever(
472472

473473
# If still None, initialize directly (standalone mode)
474474
if embedding_service is None:
475-
from backend.vector_db import EmbeddingService
476-
embedding_service = EmbeddingService()
475+
from backend.vector_db.embeddings import get_embedding_service
476+
embedding_service = get_embedding_service()
477477
logger.info("📦 Initializing EmbeddingService directly (standalone mode)")
478478

479479
return GitHistoryRetriever(

backend/vector_db/embeddings.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -643,3 +643,15 @@ def batch_encode(self, texts: List[str], batch_size: int = 32) -> List[List[floa
643643
except Exception as e:
644644
logger.error(f"Failed to batch encode texts: {e}")
645645
raise
646+
647+
648+
# Global embedding service instance (singleton)
649+
_embedding_service: Optional["EmbeddingService"] = None
650+
651+
652+
def get_embedding_service(model_name: str = "paraphrase-multilingual-MiniLM-L12-v2") -> "EmbeddingService":
653+
"""Get global embedding service instance (singleton)."""
654+
global _embedding_service
655+
if _embedding_service is None:
656+
_embedding_service = EmbeddingService(model_name=model_name)
657+
return _embedding_service

0 commit comments

Comments
 (0)