@@ -351,13 +351,13 @@ async def _try_reuse_cluster(self, query: str) -> Optional[KnowledgeCluster]:
351351 return None
352352
353353 try :
354- # Wait for the model (non-blocking via executor) instead of
355- # returning None immediately — this ensures reuse works even
356- # on the very first search call .
354+ # Wait briefly for the model so reuse can work when it's already loading.
355+ # Use a short timeout to avoid blocking the first request (e.g. in Docker
356+ # the model may take 30–60s to load; we skip reuse and do full search instead) .
357357 if not self .embedding_client .is_ready ():
358358 self .embedding_client .start_loading ()
359359 try :
360- await self .embedding_client ._ensure_model_async (timeout = 60 )
360+ await self .embedding_client ._ensure_model_async (timeout = 5 )
361361 except Exception :
362362 await self ._logger .debug (
363363 "Embedding model not ready yet, skipping cluster reuse"
@@ -486,34 +486,42 @@ async def _save_cluster_with_embedding(self, cluster: KnowledgeCluster) -> None:
486486 await self ._logger .warning (f"Failed to save knowledge cluster: { update_error } " )
487487 return
488488
489- # Compute and store embedding for the cluster.
490- # embed() internally awaits model readiness via _ensure_model_async(),
491- # so even if the background loading thread hasn't finished yet, we
492- # block (non-blocking async) until the model is ready rather than
493- # silently skipping the embedding — which would make the cluster
494- # invisible to future similarity searches.
489+ # Compute and store embedding for the cluster when the model is ready.
490+ # Use a short wait to avoid blocking the response if the model is still
491+ # loading (e.g. first request in Docker). If not ready, skip embedding
492+ # so the cluster is still saved and can be reused after the next load.
495493 if self .embedding_client :
496494 try :
497- from sirchmunk .utils .embedding_util import compute_text_hash
498-
499- combined_text = self .knowledge_storage .combine_cluster_fields (
500- cluster .queries
501- )
502- text_hash = compute_text_hash (combined_text )
495+ if not self .embedding_client .is_ready ():
496+ try :
497+ await self .embedding_client ._ensure_model_async (timeout = 3 )
498+ except Exception :
499+ pass
500+ if self .embedding_client .is_ready ():
501+ from sirchmunk .utils .embedding_util import compute_text_hash
502+
503+ combined_text = self .knowledge_storage .combine_cluster_fields (
504+ cluster .queries
505+ )
506+ text_hash = compute_text_hash (combined_text )
503507
504- embedding_vector = (await self .embedding_client .embed ([combined_text ]))[0 ]
508+ embedding_vector = (await self .embedding_client .embed ([combined_text ]))[0 ]
505509
506- await self .knowledge_storage .store_embedding (
507- cluster_id = cluster .id ,
508- embedding_vector = embedding_vector ,
509- embedding_model = self .embedding_client .model_id ,
510- embedding_text_hash = text_hash ,
511- )
510+ await self .knowledge_storage .store_embedding (
511+ cluster_id = cluster .id ,
512+ embedding_vector = embedding_vector ,
513+ embedding_model = self .embedding_client .model_id ,
514+ embedding_text_hash = text_hash ,
515+ )
512516
513- await self ._logger .info (
514- f"Stored embedding for cluster { cluster .id } "
515- f"(dim={ len (embedding_vector )} , model={ self .embedding_client .model_id } )"
516- )
517+ await self ._logger .info (
518+ f"Stored embedding for cluster { cluster .id } "
519+ f"(dim={ len (embedding_vector )} , model={ self .embedding_client .model_id } )"
520+ )
521+ else :
522+ await self ._logger .debug (
523+ f"Embedding model not ready — skipping embedding for cluster { cluster .id } "
524+ )
517525
518526 except Exception as e :
519527 await self ._logger .warning (f"Failed to compute embedding for cluster { cluster .id } : { e } " )
0 commit comments