Skip to content

Commit 65e8e2a

Browse files
ok Merge branch 'main' of github.com:modelscope/sirchmunk into release/0.0.6
2 parents ce2a7e0 + a63bb28 commit 65e8e2a

4 files changed

Lines changed: 52 additions & 27 deletions

File tree

README.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -462,6 +462,8 @@ docker pull modelscope-registry.cn-beijing.cr.aliyuncs.com/modelscope-repo/sirch
462462
# Start the service
463463
docker run -d \
464464
--name sirchmunk \
465+
--cpus="4" \
466+
--memory="2g" \
465467
-p 8584:8584 \
466468
-e LLM_API_KEY="your-api-key-here" \
467469
-e LLM_BASE_URL="https://api.openai.com/v1" \

README_zh.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -461,6 +461,8 @@ docker pull modelscope-registry.cn-beijing.cr.aliyuncs.com/modelscope-repo/sirch
461461
# 启动服务
462462
docker run -d \
463463
--name sirchmunk \
464+
--cpus="4" \
465+
--memory="2g" \
464466
-p 8584:8584 \
465467
-e LLM_API_KEY="your-api-key-here" \
466468
-e LLM_BASE_URL="https://api.openai.com/v1" \

src/sirchmunk/api/main.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,19 @@
7575
allow_headers=["*"],
7676
)
7777

78+
79+
@app.on_event("startup")
80+
def _prewarm_chat_search():
81+
"""Create the chat search singleton at startup so the embedding model starts loading immediately.
82+
This reduces the chance of the first user request blocking on model load (e.g. in Docker).
83+
"""
84+
try:
85+
from .chat import get_search_instance
86+
get_search_instance()
87+
except Exception:
88+
pass
89+
90+
7891
# Include all API routers (registered before static mount so they take priority)
7992
app.include_router(knowledge_router)
8093
app.include_router(settings_router)

src/sirchmunk/search.py

Lines changed: 35 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -351,13 +351,13 @@ async def _try_reuse_cluster(self, query: str) -> Optional[KnowledgeCluster]:
351351
return None
352352

353353
try:
354-
# Wait for the model (non-blocking via executor) instead of
355-
# returning None immediately — this ensures reuse works even
356-
# on the very first search call.
354+
# Wait briefly for the model so reuse can work when it's already loading.
355+
# Use a short timeout to avoid blocking the first request (e.g. in Docker
356+
# the model may take 30–60s to load; we skip reuse and do full search instead).
357357
if not self.embedding_client.is_ready():
358358
self.embedding_client.start_loading()
359359
try:
360-
await self.embedding_client._ensure_model_async(timeout=60)
360+
await self.embedding_client._ensure_model_async(timeout=5)
361361
except Exception:
362362
await self._logger.debug(
363363
"Embedding model not ready yet, skipping cluster reuse"
@@ -486,34 +486,42 @@ async def _save_cluster_with_embedding(self, cluster: KnowledgeCluster) -> None:
486486
await self._logger.warning(f"Failed to save knowledge cluster: {update_error}")
487487
return
488488

489-
# Compute and store embedding for the cluster.
490-
# embed() internally awaits model readiness via _ensure_model_async(),
491-
# so even if the background loading thread hasn't finished yet, we
492-
# block (non-blocking async) until the model is ready rather than
493-
# silently skipping the embedding — which would make the cluster
494-
# invisible to future similarity searches.
489+
# Compute and store embedding for the cluster when the model is ready.
490+
# Use a short wait to avoid blocking the response if the model is still
491+
# loading (e.g. first request in Docker). If not ready, skip embedding
492+
# so the cluster is still saved and can be reused after the next load.
495493
if self.embedding_client:
496494
try:
497-
from sirchmunk.utils.embedding_util import compute_text_hash
498-
499-
combined_text = self.knowledge_storage.combine_cluster_fields(
500-
cluster.queries
501-
)
502-
text_hash = compute_text_hash(combined_text)
495+
if not self.embedding_client.is_ready():
496+
try:
497+
await self.embedding_client._ensure_model_async(timeout=3)
498+
except Exception:
499+
pass
500+
if self.embedding_client.is_ready():
501+
from sirchmunk.utils.embedding_util import compute_text_hash
502+
503+
combined_text = self.knowledge_storage.combine_cluster_fields(
504+
cluster.queries
505+
)
506+
text_hash = compute_text_hash(combined_text)
503507

504-
embedding_vector = (await self.embedding_client.embed([combined_text]))[0]
508+
embedding_vector = (await self.embedding_client.embed([combined_text]))[0]
505509

506-
await self.knowledge_storage.store_embedding(
507-
cluster_id=cluster.id,
508-
embedding_vector=embedding_vector,
509-
embedding_model=self.embedding_client.model_id,
510-
embedding_text_hash=text_hash,
511-
)
510+
await self.knowledge_storage.store_embedding(
511+
cluster_id=cluster.id,
512+
embedding_vector=embedding_vector,
513+
embedding_model=self.embedding_client.model_id,
514+
embedding_text_hash=text_hash,
515+
)
512516

513-
await self._logger.info(
514-
f"Stored embedding for cluster {cluster.id} "
515-
f"(dim={len(embedding_vector)}, model={self.embedding_client.model_id})"
516-
)
517+
await self._logger.info(
518+
f"Stored embedding for cluster {cluster.id} "
519+
f"(dim={len(embedding_vector)}, model={self.embedding_client.model_id})"
520+
)
521+
else:
522+
await self._logger.debug(
523+
f"Embedding model not ready — skipping embedding for cluster {cluster.id}"
524+
)
517525

518526
except Exception as e:
519527
await self._logger.warning(f"Failed to compute embedding for cluster {cluster.id}: {e}")

0 commit comments

Comments
 (0)