@@ -162,31 +162,37 @@ def retrieve_relevant_category(self, character_name: str, query: str, top_k: int
162162 if not content :
163163 continue
164164
165- # Calculate category name similarity to query
166- category_lower = category .lower ()
167- category_words = set (category_lower .split ('_' ))
168-
169- # Exact match bonus
170- exact_match_score = 1.0 if query_lower in category_lower else 0.0
171-
172- # Word overlap score
173- word_overlap = len (query_words .intersection (category_words )) / len (query_words ) if query_words else 0
174-
175- # Content relevance (simple keyword matching)
176- content_lower = content .lower ()
177- content_relevance = sum (1 for word in query_words if word in content_lower ) / len (query_words ) if query_words else 0
165+ # Semantic search for content relevance
166+ content_relevance = 0.0
167+ if self .semantic_search_enabled and self .embedding_client :
168+ try :
169+ # Generate embeddings for query and content
170+ query_embedding = self .embedding_client .embed (query )
171+ content_embedding = self .embedding_client .embed (content [:1000 ]) # Limit content length for embedding
172+
173+ # Calculate semantic similarity
174+ semantic_similarity = self ._cosine_similarity (query_embedding , content_embedding )
175+ content_relevance = semantic_similarity
176+ except Exception as e :
177+ logger .warning (f"Semantic search failed for { category } : { e } " )
178+ # Fallback to simple keyword matching
179+ content_lower = content .lower ()
180+ content_relevance = sum (1 for word in query_words if word in content_lower ) / len (query_words ) if query_words else 0
181+ else :
182+ # Fallback to simple keyword matching when semantic search is not available
183+ content_lower = content .lower ()
184+ content_relevance = sum (1 for word in query_words if word in content_lower ) / len (query_words ) if query_words else 0
178185
179- # Combined score
180- combined_score = exact_match_score * 0.5 + word_overlap * 0.3 + content_relevance * 0.2
186+ # Use semantic score directly
187+ combined_score = content_relevance
181188
182189 if combined_score > 0 :
183190 category_scores .append ({
184191 "category" : category ,
185192 "content" : content ,
186193 "score" : combined_score ,
187- "exact_match" : exact_match_score > 0 ,
188- "word_overlap" : word_overlap ,
189194 "content_relevance" : content_relevance ,
195+ "semantic_search_used" : self .semantic_search_enabled and self .embedding_client is not None ,
190196 "length" : len (content ),
191197 "lines" : len (content .split ('\n ' ))
192198 })
@@ -203,9 +209,8 @@ def retrieve_relevant_category(self, character_name: str, query: str, top_k: int
203209 "content" : item ["content" ],
204210 "content_type" : "relevant_category" ,
205211 "relevance_score" : item ["score" ],
206- "exact_match" : item ["exact_match" ],
207- "word_overlap" : item ["word_overlap" ],
208212 "content_relevance" : item ["content_relevance" ],
213+ "semantic_search_used" : item ["semantic_search_used" ],
209214 "length" : item ["length" ],
210215 "lines" : item ["lines" ],
211216 "character" : character_name
@@ -220,9 +225,10 @@ def retrieve_relevant_category(self, character_name: str, query: str, top_k: int
220225 "all_categories_found" : all_categories ,
221226 "excluded_categories" : excluded_categories ,
222227 "available_categories" : relevant_categories ,
228+ "semantic_search_enabled" : self .semantic_search_enabled ,
223229 "results" : results ,
224230 "total_items" : len (results ),
225- "message" : f"Retrieved top { len (results )} relevant categories for query '{ query } ' from { len (all_categories )} total categories"
231+ "message" : f"Retrieved top { len (results )} relevant categories for query '{ query } ' using semantic search from { len (all_categories )} total categories"
226232 }
227233
228234 except Exception as e :
0 commit comments