Skip to content

Commit 4148798

Browse files
committed
update: retrieve relevant category
1 parent 44ef388 commit 4148798

1 file changed

Lines changed: 26 additions & 20 deletions

File tree

memu/memory/recall_agent.py

Lines changed: 26 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -162,31 +162,37 @@ def retrieve_relevant_category(self, character_name: str, query: str, top_k: int
162162
if not content:
163163
continue
164164

165-
# Calculate category name similarity to query
166-
category_lower = category.lower()
167-
category_words = set(category_lower.split('_'))
168-
169-
# Exact match bonus
170-
exact_match_score = 1.0 if query_lower in category_lower else 0.0
171-
172-
# Word overlap score
173-
word_overlap = len(query_words.intersection(category_words)) / len(query_words) if query_words else 0
174-
175-
# Content relevance (simple keyword matching)
176-
content_lower = content.lower()
177-
content_relevance = sum(1 for word in query_words if word in content_lower) / len(query_words) if query_words else 0
165+
# Semantic search for content relevance
166+
content_relevance = 0.0
167+
if self.semantic_search_enabled and self.embedding_client:
168+
try:
169+
# Generate embeddings for query and content
170+
query_embedding = self.embedding_client.embed(query)
171+
content_embedding = self.embedding_client.embed(content[:1000]) # Limit content length for embedding
172+
173+
# Calculate semantic similarity
174+
semantic_similarity = self._cosine_similarity(query_embedding, content_embedding)
175+
content_relevance = semantic_similarity
176+
except Exception as e:
177+
logger.warning(f"Semantic search failed for {category}: {e}")
178+
# Fallback to simple keyword matching
179+
content_lower = content.lower()
180+
content_relevance = sum(1 for word in query_words if word in content_lower) / len(query_words) if query_words else 0
181+
else:
182+
# Fallback to simple keyword matching when semantic search is not available
183+
content_lower = content.lower()
184+
content_relevance = sum(1 for word in query_words if word in content_lower) / len(query_words) if query_words else 0
178185

179-
# Combined score
180-
combined_score = exact_match_score * 0.5 + word_overlap * 0.3 + content_relevance * 0.2
186+
# Use semantic score directly
187+
combined_score = content_relevance
181188

182189
if combined_score > 0:
183190
category_scores.append({
184191
"category": category,
185192
"content": content,
186193
"score": combined_score,
187-
"exact_match": exact_match_score > 0,
188-
"word_overlap": word_overlap,
189194
"content_relevance": content_relevance,
195+
"semantic_search_used": self.semantic_search_enabled and self.embedding_client is not None,
190196
"length": len(content),
191197
"lines": len(content.split('\n'))
192198
})
@@ -203,9 +209,8 @@ def retrieve_relevant_category(self, character_name: str, query: str, top_k: int
203209
"content": item["content"],
204210
"content_type": "relevant_category",
205211
"relevance_score": item["score"],
206-
"exact_match": item["exact_match"],
207-
"word_overlap": item["word_overlap"],
208212
"content_relevance": item["content_relevance"],
213+
"semantic_search_used": item["semantic_search_used"],
209214
"length": item["length"],
210215
"lines": item["lines"],
211216
"character": character_name
@@ -220,9 +225,10 @@ def retrieve_relevant_category(self, character_name: str, query: str, top_k: int
220225
"all_categories_found": all_categories,
221226
"excluded_categories": excluded_categories,
222227
"available_categories": relevant_categories,
228+
"semantic_search_enabled": self.semantic_search_enabled,
223229
"results": results,
224230
"total_items": len(results),
225-
"message": f"Retrieved top {len(results)} relevant categories for query '{query}' from {len(all_categories)} total categories"
231+
"message": f"Retrieved top {len(results)} relevant categories for query '{query}' using semantic search from {len(all_categories)} total categories"
226232
}
227233

228234
except Exception as e:

0 commit comments

Comments
 (0)