Skip to content

Commit 4c7e46f

Browse files
medevsclaude
andcommitted
Fix reranker param, rag delete error, update service exports
🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
1 parent 647b47f commit 4c7e46f

4 files changed

Lines changed: 102 additions & 295 deletions

File tree

backend/app/services/__init__.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
from .embeddings import EmbeddingService, get_embedding_service
66
from .document_loader import DocumentLoader, get_document_loader
77
from .rag import RAGService, get_rag_service
8+
from .reranker import Reranker, get_reranker
89

910
__all__ = [
1011
"OllamaClient",
@@ -17,5 +18,7 @@
1718
"get_document_loader",
1819
"RAGService",
1920
"get_rag_service",
21+
"Reranker",
22+
"get_reranker",
2023
]
2124

backend/app/services/chroma_client.py

Lines changed: 46 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -111,6 +111,11 @@ def add_documents(
111111
) -> List[str]:
112112
"""
113113
Add documents to the collection.
114+
Validates metadata against rag.txt requirements:
115+
- chunk_id
116+
- document_id
117+
- source
118+
- position
114119
115120
Args:
116121
texts: List of text content
@@ -123,10 +128,18 @@ def add_documents(
123128
if not texts:
124129
logger.warning("No texts provided to add")
125130
return []
131+
132+
# Validate metadata fields
133+
required_fields = {"chunk_id", "document_id", "source", "position"}
134+
for idx, meta in enumerate(metadatas):
135+
missing = required_fields - meta.keys()
136+
if missing:
137+
logger.error(f"Metadata at index {idx} missing required fields: {missing}")
138+
raise ValueError(f"Metadata missing required fields: {missing}")
126139

127140
# Generate IDs if not provided
128141
if ids is None:
129-
ids = [f"chunk_{uuid.uuid4().hex[:12]}" for _ in texts]
142+
ids = [meta.get("chunk_id", f"chunk_{uuid.uuid4().hex[:12]}") for meta in metadatas]
130143

131144
# Generate embeddings
132145
embeddings = self.embedding_service.embed_texts(texts)
@@ -150,48 +163,54 @@ def add_documents(
150163

151164
def query(
152165
self,
153-
query_text: str,
166+
query_text: Optional[str] = None,
167+
query_texts: Optional[List[str]] = None,
154168
n_results: int = 3,
155-
where: Optional[Dict[str, Any]] = None
169+
where: Optional[Dict[str, Any]] = None,
170+
include: Optional[List[str]] = None
156171
) -> Dict[str, Any]:
157172
"""
158173
Query the collection for similar documents.
159174
160175
Args:
161-
query_text: Text to search for
176+
query_text: Single text to search for (legacy support)
177+
query_texts: List of texts to search for (preferred)
162178
n_results: Number of results to return
163179
where: Optional filter conditions
180+
include: Optional list of fields to include
164181
165182
Returns:
166183
Dict with documents, metadatas, and distances
167184
"""
168-
if not query_text or not query_text.strip():
169-
return {"documents": [], "metadatas": [], "distances": []}
185+
# Support both query_text and query_texts
186+
texts = query_texts if query_texts else ([query_text] if query_text else [])
187+
188+
if not texts or all(not t or not t.strip() for t in texts):
189+
return {"documents": [], "metadatas": [], "distances": [], "ids": []}
190+
191+
# Generate query embeddings
192+
query_embeddings = self.embedding_service.embed_texts(texts)
170193

171-
# Generate query embedding
172-
query_embedding = self.embedding_service.embed_text(query_text)
194+
if not query_embeddings:
195+
logger.error("Failed to generate query embeddings")
196+
return {"documents": [], "metadatas": [], "distances": [], "ids": []}
173197

174-
if not query_embedding:
175-
logger.error("Failed to generate query embedding")
176-
return {"documents": [], "metadatas": [], "distances": []}
198+
default_include = ["documents", "metadatas", "distances"]
177199

178200
try:
179201
results = self.collection.query(
180-
query_embeddings=[query_embedding],
202+
query_embeddings=query_embeddings,
181203
n_results=n_results,
182204
where=where,
183-
include=["documents", "metadatas", "distances"]
205+
include=include if include else default_include
184206
)
185207

186-
# Flatten results (query returns nested lists)
187-
return {
188-
"documents": results.get("documents", [[]])[0],
189-
"metadatas": results.get("metadatas", [[]])[0],
190-
"distances": results.get("distances", [[]])[0],
191-
}
208+
# Return raw results structure which contains lists of lists
209+
# The caller handles flattening if needed
210+
return results
192211
except Exception as e:
193212
logger.error(f"Error querying ChromaDB: {e}")
194-
return {"documents": [], "metadatas": [], "distances": []}
213+
return {"documents": [], "metadatas": [], "distances": [], "ids": []}
195214

196215
def delete_by_document_id(self, document_id: str) -> bool:
197216
"""
@@ -240,9 +259,12 @@ def get_all_documents(self) -> List[Dict[str, Any]]:
240259
for metadata in results["metadatas"]:
241260
doc_id = metadata.get("document_id")
242261
if doc_id and doc_id not in documents:
262+
# Use 'source' field (set during ingestion) as filename
263+
source = metadata.get("source", "Unknown")
243264
documents[doc_id] = {
244265
"id": doc_id,
245-
"filename": metadata.get("filename", "Unknown"),
266+
"filename": source,
267+
"source": source,
246268
"file_type": metadata.get("file_type", "Unknown"),
247269
"file_size": metadata.get("file_size", 0),
248270
"chunk_count": 0,
@@ -300,3 +322,6 @@ def get_chroma_service() -> ChromaService:
300322
_chroma_service = ChromaService()
301323
return _chroma_service
302324

325+
def get_chroma_client() -> ChromaService:
326+
"""Legacy alias for get_chroma_service."""
327+
return get_chroma_service()

backend/app/services/rag.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -274,8 +274,13 @@ def delete_document(self, document_id: str) -> Dict[str, Any]:
274274

275275
if db_deleted or file_deleted:
276276
# Refresh hybrid search index for advanced RAG
277+
# Wrap in try-except to prevent 500 error if refresh fails
278+
# (deletion already succeeded at this point)
277279
if self.use_advanced_rag and self.advanced_rag:
278-
self.advanced_rag.refresh_index()
280+
try:
281+
self.advanced_rag.refresh_index()
282+
except Exception as e:
283+
logger.warning(f"Failed to refresh index after deletion: {e}")
279284

280285
return {
281286
"success": True,

0 commit comments

Comments
 (0)