Skip to content

Commit 2afe83f

Browse files
committed
Merge remote-tracking branch 'upstream/main' into claude/card-forge-extract
# Conflicts: # app/main_server.py
2 parents ba5db13 + 03cbce3 commit 2afe83f

638 files changed

Lines changed: 106144 additions & 14900 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

.dockerignore

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,13 @@ config/user_preferences.json
4141
config/voice_storage.json
4242
N.E.K.O/
4343

44+
# Embedding model weights are intentionally NOT ignored: CI pre-fetches the
45+
# pinned revision on the native runner (cached via actions/cache) and ships it in
46+
# the build context so the in-image step runs fully offline — see step 6b in
47+
# docker/Dockerfile{,.full}. That step force-re-downloads when the bundled
48+
# (repo, revision) doesn't match the pin, so a developer's stale/partial local
49+
# copy riding in via `COPY . /app` is corrected rather than shipped.
50+
4451
# Build artifacts
4552
build/
4653
dist/

.github/workflows/build-desktop-linux.yml

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -178,6 +178,16 @@ jobs:
178178
# anonymous profile folder so the bundled app can run vector memory
179179
# offline. The runtime (memory/embeddings.py) auto-falls back to bundled
180180
# assets when the user's app-data profile is incomplete.
181+
# Cache the weights by revision so huggingface.co is hit at most once per
182+
# pin (per OS) instead of every build — the same per-IP HTTP 429 throttle
183+
# that broke the Docker build can bite here too. Cache hit -> the next step
184+
# is a no-op ("keep existing"); only a cold cache actually downloads.
185+
- name: Cache embedding model weights
186+
uses: actions/cache@v4
187+
with:
188+
path: data/embedding_models
189+
key: embedding-model-${{ runner.os }}-${{ env.EMBEDDING_MODEL_REVISION }}-both
190+
181191
- name: Prepare embedding model assets
182192
shell: bash
183193
run: |
@@ -278,6 +288,12 @@ jobs:
278288
NUITKA_OPTS="$NUITKA_OPTS --include-package=tiktoken"
279289
NUITKA_OPTS="$NUITKA_OPTS --include-package=tiktoken_ext"
280290
NUITKA_OPTS="$NUITKA_OPTS --include-package=onnxruntime"
291+
# onnxruntime.transformers/ is a benchmark + model-conversion toolbox
292+
# nothing at runtime imports (rapidocr uses only the core InferenceSession).
293+
# --include-package=onnxruntime would recurse and compile the whole subtree,
294+
# including a 130k+ line gpt2 benchmark C unit that crashes the C backend.
295+
# Mirrors build-desktop.yml.
296+
NUITKA_OPTS="$NUITKA_OPTS --nofollow-import-to=onnxruntime.transformers"
281297
NUITKA_OPTS="$NUITKA_OPTS --include-package=tokenizers"
282298
283299
# Package data

.github/workflows/build-desktop.yml

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -209,6 +209,16 @@ jobs:
209209
# anonymous profile folder so the bundled app can run vector memory
210210
# offline. The runtime (memory/embeddings.py) auto-falls back to bundled
211211
# assets when the user's app-data profile is incomplete.
212+
# Cache the weights by revision so huggingface.co is hit at most once per
213+
# pin (per OS) instead of every build — the same per-IP HTTP 429 throttle
214+
# that broke the Docker build can bite here too. Cache hit -> the next step
215+
# is a no-op ("keep existing"); only a cold cache actually downloads.
216+
- name: Cache embedding model weights
217+
uses: actions/cache@v4
218+
with:
219+
path: data/embedding_models
220+
key: embedding-model-${{ runner.os }}-${{ env.EMBEDDING_MODEL_REVISION }}-both
221+
212222
- name: Prepare embedding model assets
213223
shell: bash
214224
run: |
@@ -337,6 +347,12 @@ jobs:
337347
NUITKA_OPTS="$NUITKA_OPTS --include-package=tiktoken"
338348
NUITKA_OPTS="$NUITKA_OPTS --include-package=tiktoken_ext"
339349
NUITKA_OPTS="$NUITKA_OPTS --include-package=onnxruntime"
350+
# onnxruntime.transformers/ is a benchmark + model-conversion toolbox
351+
# nothing at runtime imports (rapidocr uses only the core InferenceSession).
352+
# --include-package=onnxruntime would recurse and compile the whole subtree,
353+
# including a 130k+ line gpt2 benchmark C unit that crashes the C backend.
354+
# Synced with build_nuitka.bat.
355+
NUITKA_OPTS="$NUITKA_OPTS --nofollow-import-to=onnxruntime.transformers"
340356
NUITKA_OPTS="$NUITKA_OPTS --include-package=tokenizers"
341357
# bilibili_dm/bilibili_danmaku plugins import bilibili_api; compile the
342358
# package itself (CI previously only carried its package-data).
@@ -502,6 +518,12 @@ jobs:
502518
set NUITKA_OPTS=%NUITKA_OPTS% --include-package=tiktoken
503519
set NUITKA_OPTS=%NUITKA_OPTS% --include-package=tiktoken_ext
504520
set NUITKA_OPTS=%NUITKA_OPTS% --include-package=onnxruntime
521+
rem onnxruntime.transformers/ is a benchmark + model-conversion toolbox
522+
rem nothing at runtime imports (rapidocr uses only the core InferenceSession).
523+
rem --include-package=onnxruntime would recurse and compile the whole subtree,
524+
rem including a 130k+ line gpt2 benchmark C unit that crashes the C backend.
525+
rem Synced with build_nuitka.bat.
526+
set NUITKA_OPTS=%NUITKA_OPTS% --nofollow-import-to=onnxruntime.transformers
505527
set NUITKA_OPTS=%NUITKA_OPTS% --include-package=tokenizers
506528
set NUITKA_OPTS=%NUITKA_OPTS% --include-package-data=jinja2
507529
set NUITKA_OPTS=%NUITKA_OPTS% --include-package-data=certifi

.github/workflows/docker-cleanup.yml

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -42,8 +42,13 @@ jobs:
4242
# 会连带处理子镜像、不会对级联删除的 digest 二次删。
4343
- name: Delete old container versions
4444
# 第三方社区 action + GHCR 删除权限/token,固定到 commit SHA 防 tag 被改写。
45-
# 下方 SHA 对应 v1.2.0;升级时一并更新 SHA 和行尾版本注释。
46-
uses: dataaxiom/ghcr-cleanup-action@374e2028c8fb93b7219f3771cd405fab95d3dec4 # v1.2.0
45+
# 下方 SHA 对应 v1.2.1;升级时一并更新 SHA 和行尾版本注释。
46+
#
47+
# 为什么不是 v1.2.0:v1.2.0 对级联回收只吞掉「第一个」404,删父 manifest
48+
# 触发 GHCR 连带删子 digest 后,第二个已消失 digest 的 404 不被 catch,
49+
# 直接 ##[error]Package not found 把 job 挂掉(每周稳定复现)。v1.2.1 的
50+
# "tolerate every 404 on package version delete" 修掉这条路径。
51+
uses: dataaxiom/ghcr-cleanup-action@f092b48ba3b604b2a83690dc4b2bbb3392e1045f # v1.2.1
4752
with:
4853
token: ${{ secrets.GITHUB_TOKEN }}
4954
owner: project-n-e-k-o

.github/workflows/docker-multi-arch.yml

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,13 @@ concurrency:
4444
env:
4545
REGISTRY_GHCR: ghcr.io
4646
IMAGE_NAME: project-n-e-k-o/n.e.k.o
47+
# Embedding model pin (kept identical to .github/workflows/build-desktop.yml and
48+
# docker/Dockerfile{,.full} ARG defaults). The weights are pre-fetched on the
49+
# native runner and cached so the Docker build never hits huggingface.co — see
50+
# the "Prepare embedding model" steps below and Dockerfile step 6b.
51+
EMBEDDING_MODEL_REPO: jinaai/jina-embeddings-v5-text-nano-retrieval
52+
EMBEDDING_MODEL_PROFILE_ID: local-text-retrieval-v1
53+
EMBEDDING_MODEL_REVISION: ac5d898c8d382b17167c33e5c8af644a3519b47d
4754

4855
jobs:
4956
# ============================================================================
@@ -154,6 +161,27 @@ jobs:
154161
with:
155162
fetch-depth: 0
156163

164+
# Pre-fetch the embedding weights on the native runner and cache them by
165+
# revision, so the (QEMU-emulated, throttle-prone) Docker build below never
166+
# touches huggingface.co. Cache hit -> the downloader is a no-op; only a
167+
# cold cache (revision bump / 7-day eviction) actually downloads, natively,
168+
# once. The weights ride into the build via the context (Dockerfile 6b).
169+
# Standard image ships int8 only to stay lightweight.
170+
- name: Cache embedding model weights (int8)
171+
uses: actions/cache@v4
172+
with:
173+
path: data/embedding_models
174+
key: embedding-model-${{ env.EMBEDDING_MODEL_REVISION }}-int8
175+
176+
- name: Prepare embedding model assets (int8)
177+
run: |
178+
python3 scripts/prepare_embedding_model.py \
179+
--repo "$EMBEDDING_MODEL_REPO" \
180+
--revision "$EMBEDDING_MODEL_REVISION" \
181+
--profile-id "$EMBEDDING_MODEL_PROFILE_ID" \
182+
--output-root data/embedding_models \
183+
--variant int8
184+
157185
- name: Check if Dockerfile exists
158186
id: check-dockerfile
159187
run: |
@@ -247,6 +275,10 @@ jobs:
247275
file: ./docker/Dockerfile
248276
platforms: ${{ matrix.platform }}
249277
push: true
278+
build-args: |
279+
EMBEDDING_MODEL_REPO=${{ env.EMBEDDING_MODEL_REPO }}
280+
EMBEDDING_MODEL_REVISION=${{ env.EMBEDDING_MODEL_REVISION }}
281+
EMBEDDING_MODEL_PROFILE_ID=${{ env.EMBEDDING_MODEL_PROFILE_ID }}
250282
tags: ${{ steps.meta_ghcr.outputs.tags || steps.meta_both.outputs.tags }}
251283
labels: ${{ steps.meta_ghcr.outputs.labels || steps.meta_both.outputs.labels }}
252284
cache-from: type=gha
@@ -272,6 +304,27 @@ jobs:
272304
with:
273305
fetch-depth: 0
274306

307+
# Pre-fetch the embedding weights on the native runner and cache them by
308+
# revision, so the (QEMU-emulated, throttle-prone) Docker build below never
309+
# touches huggingface.co. Cache hit -> the downloader is a no-op; only a
310+
# cold cache (revision bump / 7-day eviction) actually downloads, natively,
311+
# once. The weights ride into the build via the context (Dockerfile.full
312+
# 6b). Full image bundles both int8 and fp32.
313+
- name: Cache embedding model weights (both)
314+
uses: actions/cache@v4
315+
with:
316+
path: data/embedding_models
317+
key: embedding-model-${{ env.EMBEDDING_MODEL_REVISION }}-both
318+
319+
- name: Prepare embedding model assets (both)
320+
run: |
321+
python3 scripts/prepare_embedding_model.py \
322+
--repo "$EMBEDDING_MODEL_REPO" \
323+
--revision "$EMBEDDING_MODEL_REVISION" \
324+
--profile-id "$EMBEDDING_MODEL_PROFILE_ID" \
325+
--output-root data/embedding_models \
326+
--variant both
327+
275328
- name: Set up QEMU
276329
uses: docker/setup-qemu-action@v3
277330

@@ -350,6 +403,10 @@ jobs:
350403
file: ./docker/Dockerfile.full
351404
platforms: ${{ matrix.platform }}
352405
push: true
406+
build-args: |
407+
EMBEDDING_MODEL_REPO=${{ env.EMBEDDING_MODEL_REPO }}
408+
EMBEDDING_MODEL_REVISION=${{ env.EMBEDDING_MODEL_REVISION }}
409+
EMBEDDING_MODEL_PROFILE_ID=${{ env.EMBEDDING_MODEL_PROFILE_ID }}
353410
tags: ${{ steps.meta_ghcr.outputs.tags || steps.meta_both.outputs.tags }}
354411
labels: ${{ steps.meta_ghcr.outputs.labels || steps.meta_both.outputs.labels }}
355412
cache-from: type=gha

.github/workflows/health-check.yml

Lines changed: 31 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -15,15 +15,17 @@ jobs:
1515
- name: Check lanlan.app (old)
1616
id: check_app
1717
run: |
18+
CURL_EXIT=0
1819
HTTP_CODE=$(curl -s -o /tmp/resp_app.json -w "%{http_code}" \
1920
--max-time 30 --connect-timeout 10 \
2021
-X POST "https://lanlan.app/text/v1/chat/completions" \
2122
-H "Content-Type: application/json" \
2223
-H "Authorization: Bearer free-access" \
2324
-d '{"model":"free-mini-model","messages":[{"role":"user","content":"sends some useful information"}],"max_completion_tokens":5}' \
24-
2>/dev/null || echo "000")
25+
2>/dev/null) || CURL_EXIT=$?
2526
BODY=$(cat /tmp/resp_app.json 2>/dev/null || echo "{}")
2627
echo "http_code=$HTTP_CODE" >> "$GITHUB_OUTPUT"
28+
echo "curl_exit=$CURL_EXIT" >> "$GITHUB_OUTPUT"
2729
2830
if [ "$HTTP_CODE" = "200" ]; then
2931
if echo "$BODY" | jq -e '(.choices | type) == "array" and (.choices | length) > 0 and (.choices[0].message | type) == "object" and .choices[0].message.role == "assistant" and (.choices[0].message | has("content"))' >/dev/null 2>&1; then
@@ -39,15 +41,17 @@ jobs:
3941
- name: Check www.lanlan.app (new)
4042
id: check_api_app
4143
run: |
44+
CURL_EXIT=0
4245
HTTP_CODE=$(curl -s -o /tmp/resp_api_app.json -w "%{http_code}" \
4346
--max-time 30 --connect-timeout 10 \
4447
-X POST "https://www.lanlan.app/text/v1/chat/completions" \
4548
-H "Content-Type: application/json" \
4649
-H "Authorization: Bearer free-access" \
4750
-d '{"model":"free-mini-model","messages":[{"role":"user","content":"sends some useful information"}],"max_completion_tokens":5}' \
48-
2>/dev/null || echo "000")
51+
2>/dev/null) || CURL_EXIT=$?
4952
BODY=$(cat /tmp/resp_api_app.json 2>/dev/null || echo "{}")
5053
echo "http_code=$HTTP_CODE" >> "$GITHUB_OUTPUT"
54+
echo "curl_exit=$CURL_EXIT" >> "$GITHUB_OUTPUT"
5155
5256
if [ "$HTTP_CODE" = "200" ]; then
5357
if echo "$BODY" | jq -e '(.choices | type) == "array" and (.choices | length) > 0 and (.choices[0].message | type) == "object" and .choices[0].message.role == "assistant" and (.choices[0].message | has("content"))' >/dev/null 2>&1; then
@@ -63,15 +67,17 @@ jobs:
6367
- name: Check www.lanlan.tech (new)
6468
id: check_api_tech
6569
run: |
70+
CURL_EXIT=0
6671
HTTP_CODE=$(curl -s -o /tmp/resp_api_tech.json -w "%{http_code}" \
6772
--max-time 30 --connect-timeout 10 \
6873
-X POST "https://www.lanlan.tech/text/v1/chat/completions" \
6974
-H "Content-Type: application/json" \
7075
-H "Authorization: Bearer free-access" \
7176
-d '{"model":"free-mini-model","messages":[{"role":"user","content":"sends some useful information"}],"max_completion_tokens":5}' \
72-
2>/dev/null || echo "000")
77+
2>/dev/null) || CURL_EXIT=$?
7378
BODY=$(cat /tmp/resp_api_tech.json 2>/dev/null || echo "{}")
7479
echo "http_code=$HTTP_CODE" >> "$GITHUB_OUTPUT"
80+
echo "curl_exit=$CURL_EXIT" >> "$GITHUB_OUTPUT"
7581
7682
if [ "$HTTP_CODE" = "200" ]; then
7783
if echo "$BODY" | jq -e '(.choices | type) == "array" and (.choices | length) > 0 and (.choices[0].message | type) == "object" and .choices[0].message.role == "assistant" and (.choices[0].message | has("content"))' >/dev/null 2>&1; then
@@ -99,6 +105,22 @@ jobs:
99105
API_APP_CODE="${{ steps.check_api_app.outputs.http_code }}"
100106
API_TECH_CODE="${{ steps.check_api_tech.outputs.http_code }}"
101107
108+
APP_EXIT="${{ steps.check_app.outputs.curl_exit }}"
109+
API_APP_EXIT="${{ steps.check_api_app.outputs.curl_exit }}"
110+
API_TECH_EXIT="${{ steps.check_api_tech.outputs.curl_exit }}"
111+
112+
# curl 退出码非零(拿不到响应、即 http_code=000)时附带退出码(28=超时, 7=拒连, 6=DNS, 35=TLS)以区分故障类型
113+
fmt_code() {
114+
if [ -n "$2" ] && [ "$2" != "0" ]; then
115+
echo "$1, curl exit $2"
116+
else
117+
echo "$1"
118+
fi
119+
}
120+
APP_CODE_DISP=$(fmt_code "$APP_CODE" "$APP_EXIT")
121+
API_APP_CODE_DISP=$(fmt_code "$API_APP_CODE" "$API_APP_EXIT")
122+
API_TECH_CODE_DISP=$(fmt_code "$API_TECH_CODE" "$API_TECH_EXIT")
123+
102124
ALL_HEALTHY="true"
103125
ANY_DOWN="false"
104126
for h in "$APP_HEALTHY" "$API_APP_HEALTHY" "$API_TECH_HEALTHY"; do
@@ -120,21 +142,21 @@ jobs:
120142
fi
121143
122144
if [ "$APP_HEALTHY" = "true" ]; then
123-
APP_STATUS=":white_check_mark: OK (HTTP $APP_CODE)"
145+
APP_STATUS=":white_check_mark: OK (HTTP $APP_CODE_DISP)"
124146
else
125-
APP_STATUS=":x: DOWN (HTTP $APP_CODE)"
147+
APP_STATUS=":x: DOWN (HTTP $APP_CODE_DISP)"
126148
fi
127149
128150
if [ "$API_APP_HEALTHY" = "true" ]; then
129-
API_APP_STATUS=":white_check_mark: OK (HTTP $API_APP_CODE)"
151+
API_APP_STATUS=":white_check_mark: OK (HTTP $API_APP_CODE_DISP)"
130152
else
131-
API_APP_STATUS=":x: DOWN (HTTP $API_APP_CODE)"
153+
API_APP_STATUS=":x: DOWN (HTTP $API_APP_CODE_DISP)"
132154
fi
133155
134156
if [ "$API_TECH_HEALTHY" = "true" ]; then
135-
API_TECH_STATUS=":white_check_mark: OK (HTTP $API_TECH_CODE)"
157+
API_TECH_STATUS=":white_check_mark: OK (HTTP $API_TECH_CODE_DISP)"
136158
else
137-
API_TECH_STATUS=":x: DOWN (HTTP $API_TECH_CODE)"
159+
API_TECH_STATUS=":x: DOWN (HTTP $API_TECH_CODE_DISP)"
138160
fi
139161
140162
# 只在有故障时 @everyone

.gitignore

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,9 @@ __pycache__/
5858
.venv_monitor/
5959
.matplotlib/
6060
.pyinstaller-config/
61+
.hypothesis/
62+
.agent-logs/
63+
.kiro/
6164
# Local Windows Nuitka build script — maintainer-only, not for repo
6265
build_nuitka.bat
6366
# Playwright browsers cache (used by build_nuitka.bat, not for repo)
@@ -94,6 +97,7 @@ node_modules/
9497
dist/
9598
build/
9699
*.tsbuildinfo
100+
docs/.vitepress/cache/
97101
static/react/neko-chat/
98102

99103
# Python venvs and wheels

app/agent_server.py

Lines changed: 29 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1342,7 +1342,9 @@ def _check_agent_api_gate() -> Dict[str, Any]:
13421342
try:
13431343
cm = get_config_manager()
13441344
ok, reasons = cm.is_agent_api_ready()
1345-
return {"ready": ok, "reasons": reasons, "is_free_version": cm.is_free_version()}
1345+
# 字段名保留 is_free_version(前端/下游 gate 消费者沿用),值取 agent 维度的
1346+
# is_agent_free():判 agent 是否走内置免费模型,而非 core/assist 的版本免费。
1347+
return {"ready": ok, "reasons": reasons, "is_free_version": cm.is_agent_free()}
13461348
except Exception as e:
13471349
return {"ready": False, "reasons": [f"Agent API check failed: {e}"], "is_free_version": False}
13481350

@@ -2245,7 +2247,15 @@ async def _on_plugin_progress(
22452247
async def _run_user_plugin_dispatch():
22462248
try:
22472249
from utils.instrument import counter as _ic
2250+
# agent_invoked 只按 agent_type 分,保持单 key 即"plugin
2251+
# 总计"——本地 admin 视图 get_top_counters 按完整 metric_key
2252+
# GROUP BY、不做 dim 聚合,若把 plugin_id 塞进这里会把该
2253+
# 总计行打散成 per-plugin 行、丢掉聚合。per-plugin 细分另发
2254+
# 独立指标 plugin_invoked,其全量之和恒等于本行,互不重复
2255+
# 计数。plugin_id 基数由已安装插件数限定,截断兜底防异常长
2256+
# id 撑爆 counter key 空间。
22482257
_ic("agent_invoked", agent_type="plugin")
2258+
_ic("plugin_invoked", plugin_id=str(plugin_id or "unknown")[:48])
22492259
except Exception:
22502260
pass # 埋点 best-effort,不阻塞 plugin 分派
22512261
# Default delivery mode; overridden after the plugin result
@@ -3401,6 +3411,24 @@ async def _http_plugin_provider(force_refresh: bool = False):
34013411
await Modules.agent_bridge.start()
34023412
except Exception as e:
34033413
logger.warning(f"[Agent] Event bridge startup failed: {e}")
3414+
# 免费版 Agent 每日配额耗尽 → 节流通知前端弹提示(最多每 10 秒一次)。
3415+
# consume_agent_daily_quota 跑在 worker 线程里调这个回调,用 run_coroutine_threadsafe
3416+
# 把异步 ZeroMQ emit 调度回 agent_server 的事件循环;不 .result(),保持非阻塞。
3417+
try:
3418+
_quota_notify_loop = asyncio.get_running_loop()
3419+
3420+
def _notify_agent_quota_exceeded(used: int, limit: int) -> None:
3421+
try:
3422+
asyncio.run_coroutine_threadsafe(
3423+
_emit_main_event("agent_quota_exceeded", None, used=used, limit=limit),
3424+
_quota_notify_loop,
3425+
)
3426+
except Exception as e:
3427+
logger.debug("[Agent] schedule agent_quota_exceeded emit failed: %s", e)
3428+
3429+
get_config_manager().register_quota_exceeded_notifier(_notify_agent_quota_exceeded)
3430+
except Exception as e:
3431+
logger.warning(f"[Agent] register quota-exceeded notifier failed: {e}")
34043432
# Push initial server status so frontend can render Agent popup without waiting.
34053433
_bump_state_revision()
34063434

0 commit comments

Comments
 (0)