-
Notifications
You must be signed in to change notification settings - Fork 159
Expand file tree
/
Copy pathsystem_router.py
More file actions
7513 lines (6709 loc) · 353 KB
/
system_router.py
File metadata and controls
7513 lines (6709 loc) · 353 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
# -*- coding: utf-8 -*-
"""
System Router
Handles system-related endpoints including:
- Server shutdown
- Emotion analysis
- Steam achievements
- File utilities (file-exists, find-first-image, proxy-image)
URL convention: routes declared WITHOUT trailing slash (no ``@router.get('/')``).
See ``main_routers/characters_router.py`` docstring or
``.agent/rules/neko-guide.md`` (§"API URL 末尾不带斜杠") for the rationale;
enforced by ``scripts/check_api_trailing_slash.py``.
"""
import os
import sys
import asyncio
import base64
import difflib
import hashlib
import hmac
import ipaddress
import json
import math
import random
import re
import secrets
import shutil
import subprocess
import tempfile
import time
from collections import deque
from io import BytesIO
from pathlib import Path
from typing import Any
from urllib.parse import unquote, urlsplit
from uuid import uuid4
from fastapi import APIRouter, Request
from fastapi.responses import JSONResponse, Response
from openai import APIConnectionError, InternalServerError, RateLimitError
from utils.llm_client import SystemMessage, HumanMessage, create_chat_llm
from utils.tokenize import count_tokens
import ssl
import httpx
from PIL import Image
# Phase 2 proactive output ceiling. The model occasionally runs off; this
# fence cuts the stream and aborts TTS once the running output exceeds the
# token budget. We use sync `count_tokens` here on purpose:
# - At fence time `full_text` is < 1 KB (we abort at 300 tokens ≈ 400 CJK
# chars); tiktoken Rust encode of that size is sub-millisecond.
# - tiktoken's Rust core releases the GIL inside `encode`, so a sync call
# does NOT block other coroutines' IO callbacks for any meaningful time.
# - `asyncio.to_thread` adds ~0.1 ms scheduling overhead per call (warmed
# thread pool) — 3-4× the actual encode work. Across a 30-chunk stream
# that's a few milliseconds saved per turn, but more importantly avoids
# the cold-start case where the first thread hop can take much longer.
from cachetools import TTLCache
from .shared_state import ensure_steamworks as get_steamworks, get_config_manager, get_sync_message_queue, get_session_manager
from main_logic.omni_realtime_client import OmniRealtimeClient
from main_logic.activity.system_signals import is_remote_backend_deployment
from main_logic.activity.tracker import _EXTERNAL_SIGNAL_MIN_INTERVAL
from config import (
AUTOSTART_ALLOWED_ORIGINS,
AUTOSTART_CSRF_TOKEN,
MEMORY_SERVER_PORT,
get_extra_body,
PROACTIVE_PHASE1_FETCH_PER_SOURCE,
PROACTIVE_PHASE1_TOTAL_TOPICS,
PROACTIVE_EXTERNAL_PER_ITEM_MAX_TOKENS,
PROACTIVE_EXTERNAL_TOTAL_MAX_TOKENS,
PROACTIVE_PHASE2_OUTPUT_MAX_TOKENS as PHASE2_OUTPUT_MAX_TOKENS,
PROACTIVE_PHASE2_GENERATE_MAX_TOKENS,
PROACTIVE_PHASE1_UNIFIED_MAX_TOKENS,
PROACTIVE_CHAT_HISTORY_MAX,
ANTI_REPEAT_DROP_THRESHOLD,
ANTI_REPEAT_INJECT_TOP_K,
ANTI_REPEAT_REGEN_THRESHOLD,
MINI_GAME_INVITE_ENABLED,
MINI_GAME_INVITE_FORCE_GAME_TYPE,
MINI_GAME_INVITE_TRIGGER_PROBABILITY,
MINI_GAME_INVITE_COOLDOWN_AFTER_ACCEPT_SECONDS,
MINI_GAME_INVITE_COOLDOWN_AFTER_DECLINE_SECONDS,
MINI_GAME_INVITE_COOLDOWN_CHATS,
MINI_GAME_INVITE_NEW_USER_FORCE_AT,
MINI_GAME_INVITE_AVAILABLE_GAMES,
MINI_GAME_INVITE_LATER_SUPPRESS_SECONDS,
MINI_GAME_LAUNCH_URL_BY_GAME,
PROACTIVE_SOURCE_HARD_SKIP_SECONDS,
PROACTIVE_SOURCE_HALF_LIFE_BY_KIND,
PROACTIVE_SOURCE_HALF_LIFE_DEFAULT,
PROACTIVE_SOURCE_FORGET_P,
EMOTION_ANALYSIS_MAX_TOKENS,
)
from config.prompts.prompts_sys import _loc
from config.prompts.prompts_emotion import (
get_outward_emotion_analysis_prompt,
get_emotion_keywords_flat,
get_angry_attack_patterns_flat,
get_sad_vulnerable_patterns_flat,
get_happy_playful_patterns_flat,
get_heuristic_negation_tokens_flat,
get_heuristic_tight_negation_tokens_flat,
get_heuristic_negation_blocklist_flat,
get_heuristic_contrast_conjunctions_flat,
get_emotion_label_aliases_flat,
)
from config.prompts.prompts_memory import PROACTIVE_FOLLOWUP_HEADER
from config.prompts.prompts_directives import render_regen_avoid_instruction, render_format_fix_instruction
from config.prompts.prompts_proactive import (
get_proactive_screen_prompt, get_proactive_generate_prompt,
get_proactive_music_playing_hint,
get_proactive_music_unknown_track_name,
get_proactive_music_failsafe_hint,
get_proactive_music_strict_constraint,
get_proactive_format_sections,
get_screen_section_header, get_screen_section_footer, get_screen_img_hint,
RECENT_PROACTIVE_CHATS_HEADER, RECENT_PROACTIVE_CHATS_FOOTER,
RECENT_PROACTIVE_TIME_LABELS, RECENT_PROACTIVE_CHANNEL_LABELS,
BEGIN_GENERATE,
SCREEN_WINDOW_TITLE,
EXTERNAL_TOPIC_HEADER, EXTERNAL_TOPIC_FOOTER,
MUSIC_SECTION_HEADER, MUSIC_SECTION_FOOTER,
MEME_SECTION_HEADER, MEME_SECTION_FOOTER,
get_meme_topic_line,
PROACTIVE_SOURCE_LABELS,
PROACTIVE_MUSIC_TAG_INSTRUCTIONS,
MUSIC_SEARCH_RESULT_TEXTS,
MINI_GAME_INVITE_LINES_BY_GAME,
MINI_GAME_INVITE_OPTION_LABELS,
MINI_GAME_INVITE_KEYWORDS,
build_proactive_action_note,
)
from utils.file_utils import atomic_write_json_async, read_json
from utils.workshop_utils import get_workshop_path
from utils.screenshot_utils import (
compress_screenshot,
decode_and_compress_screenshot_b64,
COMPRESS_TARGET_HEIGHT,
COMPRESS_JPEG_QUALITY,
)
from utils.language_utils import detect_language, translate_text, normalize_language_code, get_global_language, is_supported_language_code
from utils.web_scraper import (
fetch_trending_content, format_trending_content,
fetch_window_context_content, format_window_context_content,
fetch_video_content, format_video_content,
fetch_news_content, format_news_content,
fetch_personal_dynamics, format_personal_dynamics,
)
from utils.music_crawlers import fetch_music_content
from utils.meme_fetcher import fetch_meme_content, MEME_ALLOWED_HOSTS
from utils.logger_config import get_module_logger
from utils.autostart_prompt_state import (
get_autostart_prompt_state_response,
process_autostart_prompt_heartbeat,
record_autostart_prompt_shown,
record_autostart_prompt_decision,
)
from utils.tutorial_prompt_state import (
get_tutorial_prompt_state_response,
process_tutorial_prompt_heartbeat,
record_tutorial_prompt_shown,
record_tutorial_prompt_decision,
record_tutorial_started,
record_tutorial_completed,
reset_tutorial_prompt_state,
)
from utils.storage_location_bootstrap import build_storage_location_bootstrap_payload
from utils.config_manager import get_config_manager as get_runtime_config_manager
from config import APP_NAME
router = APIRouter(prefix="/api", tags=["system"])
logger = get_module_logger(__name__, "Main")
_AUTOSTART_CSRF_HEADER = "X-CSRF-Token"
_YUI_GUIDE_HANDOFF_TOKEN_VERSION = 1
_YUI_GUIDE_HANDOFF_FLOW_ID = "home_yui_guide_v1"
_YUI_GUIDE_HANDOFF_TTL_SECONDS = 5 * 60
_YUI_GUIDE_HANDOFF_MAX_RECORDS = 128
_YUI_GUIDE_HANDOFF_SECRET = secrets.token_bytes(32)
_yui_guide_handoff_lock = asyncio.Lock()
_yui_guide_handoff_tokens: dict[str, dict[str, Any]] = {}
def _set_no_store_headers(response: Response) -> None:
response.headers["Cache-Control"] = "no-store, no-cache, must-revalidate, max-age=0"
response.headers["Pragma"] = "no-cache"
response.headers["Expires"] = "0"
def _is_loopback_request(request: Request) -> bool:
client_host = request.client.host if request.client else ""
if client_host == "localhost":
return True
normalized_host = str(client_host or "").removeprefix("::ffff:")
try:
return ipaddress.ip_address(normalized_host).is_loopback
except ValueError:
return False
# /screenshot 和 /screenshot/interactive 都是在后端机器上抓屏的,部署到
# 远程服务器时抓出来的是服务器自己的桌面而不是用户的。loopback 校验
# 会被反向代理 / 隧道绕过,``NEKO_ACTIVITY_TRACKER_REMOTE`` 是运维显式
# 声明"后端不在用户本机"的硬开关,命中就直接拒绝本地截图。
#
# 真正的实现在 ``main_logic/activity/system_signals.is_remote_backend_deployment``
# —— PR #1015 给 activity tracker 用的,这里直接复用避免再发明一套部署变量。
# 私有别名保留是为了 ``tests/unit/test_system_screenshot_router.py`` 还
# 在调 ``system_router_module._is_remote_backend_deployment()``。
_is_remote_backend_deployment = is_remote_backend_deployment
def _run_macos_interactive_screenshot(output_path: str) -> tuple[int, str]:
cmd = shutil.which("screencapture")
if not cmd:
raise FileNotFoundError("screencapture not found")
completed = subprocess.run(
[cmd, "-i", "-s", "-x", output_path],
capture_output=True,
text=True,
check=False,
)
return completed.returncode, (completed.stderr or "").strip()
def _image_path_to_jpeg_data_url(image_path: str) -> tuple[str, int]:
with Image.open(image_path) as shot:
if shot.mode in ("RGBA", "LA", "P"):
shot = shot.convert("RGB")
jpg_bytes = compress_screenshot(
shot,
target_h=COMPRESS_TARGET_HEIGHT,
quality=COMPRESS_JPEG_QUALITY,
)
b64 = base64.b64encode(jpg_bytes).decode("utf-8")
return f"data:image/jpeg;base64,{b64}", len(jpg_bytes)
def _is_interactive_screenshot_canceled(platform_name: str, returncode: int, stderr: str, file_size: int) -> bool:
if file_size > 0:
return False
normalized_stderr = str(stderr or "").strip()
if returncode == 0:
return True
if platform_name == "darwin":
return returncode == 1
return returncode == 1 and not normalized_stderr
def _format_backend_screenshot_error(exc: Exception) -> str:
text = str(exc or "").strip()
lower = text.lower()
if sys.platform.startswith("linux") and "gnome-screenshot" in lower and not shutil.which("gnome-screenshot"):
return "gnome-screenshot not installed; install it with: sudo apt install gnome-screenshot"
if "pillow" in lower:
try:
Image.new("RGB", (1, 1))
if "gnome-screenshot" in lower:
return "gnome-screenshot not installed; install it with: sudo apt install gnome-screenshot"
except Exception:
pass
return text or type(exc).__name__
def _json_no_store_response(content: dict, status_code: int = 200) -> JSONResponse:
response = JSONResponse(content, status_code=status_code)
_set_no_store_headers(response)
return response
def _derive_system_lifecycle_state(storage_bootstrap: dict[str, Any]) -> str:
if not isinstance(storage_bootstrap, dict):
return "starting"
if (
bool(storage_bootstrap.get("selection_required"))
or bool(storage_bootstrap.get("migration_pending"))
or bool(storage_bootstrap.get("recovery_required"))
or bool(str(storage_bootstrap.get("blocking_reason") or "").strip())
):
return "migration_required"
return "ready"
def _build_public_error_response(
*,
error_code: str,
status_code: int,
result: dict | None = None,
defaults: dict | None = None,
):
public_messages = {
"status_failed": "Failed to read autostart status",
"enable_failed": "Failed to enable autostart",
"disable_failed": "Failed to disable autostart",
"unsupported_platform": "Autostart is not supported on this platform",
"launch_command_unavailable": "Autostart launch command is unavailable",
"csrf_validation_failed": "Request could not be verified",
}
content = {}
if defaults:
content.update(defaults)
if result:
content.update(result)
content["ok"] = False
content["error_code"] = error_code
content["error"] = public_messages.get(error_code, "Operation failed")
return JSONResponse(status_code=status_code, content=content)
def _normalize_origin_value(raw_value: str | None) -> str:
if not raw_value:
return ""
try:
parsed = urlsplit(raw_value.strip())
except ValueError:
return ""
if parsed.scheme not in {"http", "https"} or not parsed.netloc:
return ""
return f"{parsed.scheme.lower()}://{parsed.netloc.lower()}".rstrip("/")
def _get_request_origin(request: Request) -> str:
origin = _normalize_origin_value(request.headers.get("origin"))
if origin:
return origin
return _normalize_origin_value(request.headers.get("referer"))
def _get_system_config_manager():
try:
return get_config_manager()
except RuntimeError:
# The storage bootstrap sentinel must keep working during limited startup
# even if main_server shared_state is not fully published yet.
return get_runtime_config_manager(APP_NAME, migrate=False)
def _get_allowed_local_origins(request: Request) -> set[str]:
allowed_origins = {
normalized_origin
for origin in AUTOSTART_ALLOWED_ORIGINS
if isinstance(origin, str)
if (normalized_origin := _normalize_origin_value(origin))
}
request_origin = _normalize_origin_value(str(request.base_url))
if request_origin:
allowed_origins.add(request_origin)
return allowed_origins
def _validate_local_mutation_request(
request: Request,
*,
payload: dict[str, Any] | None = None,
error_defaults: dict[str, Any] | None = None,
) -> JSONResponse | None:
csrf_token = request.headers.get(_AUTOSTART_CSRF_HEADER, "")
if not csrf_token and payload:
body_token = payload.get("_csrf_token")
csrf_token = body_token if isinstance(body_token, str) else ""
has_valid_csrf = bool(
csrf_token
and AUTOSTART_CSRF_TOKEN
and secrets.compare_digest(csrf_token, AUTOSTART_CSRF_TOKEN)
)
request_origin = _get_request_origin(request)
allowed_origins = _get_allowed_local_origins(request)
has_valid_origin = bool(request_origin and request_origin in allowed_origins)
if has_valid_csrf and has_valid_origin:
return None
logger.warning(
"Rejected local mutation request due to failed CSRF/origin validation: "
"origin=%r allowed_origins=%r has_csrf=%s referer=%r",
request_origin,
sorted(allowed_origins),
has_valid_csrf,
request.headers.get("referer"),
)
return _build_public_error_response(
error_code="csrf_validation_failed",
status_code=403,
defaults=error_defaults,
)
async def _safe_fire_proactive_done(scope: dict) -> None:
"""从 proactive_chat 的异常处理路径安全复位状态机。
异常可能发生在 PROACTIVE_START 之前(mgr 未绑定、_SE 未 import)或之后,
这里统一用 locals() dict 查找避免 NameError。状态机 fire 本身 idempotent:
状态已经是 IDLE 时 PROACTIVE_DONE 只是空操作。
"""
mgr = scope.get("mgr")
se = scope.get("_SE")
emitted = scope.get("_proactive_done_emitted", False)
if mgr is None or se is None or emitted:
return
try:
await mgr.state.fire(se.PROACTIVE_DONE)
except Exception as err: # 状态机不该抛,但兜底 swallow
logger.warning("safe_fire_proactive_done 异常: %s", err)
async def _read_json_object(request: Request) -> dict[str, object]:
"""Read a JSON request body and normalize non-object payloads to {}."""
try:
payload = await request.json()
except Exception:
return {}
return payload if isinstance(payload, dict) else {}
def _normalize_yui_handoff_text(value: object, *, max_length: int = 160) -> str:
if not isinstance(value, str):
return ""
return value.strip()[:max_length]
def _build_yui_handoff_signature(record: dict[str, Any]) -> str:
signed_fields = (
str(record.get("token") or ""),
str(record.get("token_version") or ""),
str(record.get("flow_id") or ""),
str(record.get("source_origin") or ""),
str(record.get("source_page") or ""),
str(record.get("source_path") or ""),
str(record.get("target_page") or ""),
str(record.get("target_path") or ""),
str(record.get("resume_scene") or ""),
str(record.get("expires_at") or ""),
)
message = "\n".join(signed_fields).encode("utf-8")
return hmac.new(_YUI_GUIDE_HANDOFF_SECRET, message, hashlib.sha256).hexdigest()
def _public_yui_handoff_record(record: dict[str, Any]) -> dict[str, Any]:
return {
"token": record.get("token", ""),
"token_version": record.get("token_version", _YUI_GUIDE_HANDOFF_TOKEN_VERSION),
"flow_id": record.get("flow_id", _YUI_GUIDE_HANDOFF_FLOW_ID),
"source_page": record.get("source_page", ""),
"source_path": record.get("source_path", ""),
"target_page": record.get("target_page", ""),
"target_path": record.get("target_path", ""),
"resume_scene": record.get("resume_scene") or None,
"created_at": record.get("created_at", 0),
"expires_at": record.get("expires_at", 0),
"consumed": bool(record.get("consumed_at")),
"consumed_by": record.get("consumed_by", ""),
"consumed_at": record.get("consumed_at", 0),
"signature": record.get("signature", ""),
"authority": "server",
}
def _prune_yui_handoff_records(now_ms: int) -> None:
expired_tokens = [
token
for token, record in _yui_guide_handoff_tokens.items()
if int(record.get("expires_at", 0) or 0) <= now_ms
]
for token in expired_tokens:
_yui_guide_handoff_tokens.pop(token, None)
if len(_yui_guide_handoff_tokens) <= _YUI_GUIDE_HANDOFF_MAX_RECORDS:
return
ordered_tokens = sorted(
_yui_guide_handoff_tokens,
key=lambda token: int(_yui_guide_handoff_tokens[token].get("created_at", 0) or 0),
)
overflow = len(_yui_guide_handoff_tokens) - _YUI_GUIDE_HANDOFF_MAX_RECORDS
for token in ordered_tokens[:overflow]:
_yui_guide_handoff_tokens.pop(token, None)
@router.post("/yui-guide/handoff/create")
async def create_yui_guide_handoff(request: Request):
payload = await _read_json_object(request)
validation_error = _validate_local_mutation_request(request, payload=payload)
if validation_error is not None:
_set_no_store_headers(validation_error)
return validation_error
target_page = _normalize_yui_handoff_text(payload.get("target_page"), max_length=80)
if not target_page:
return _json_no_store_response(
{
"ok": False,
"error_code": "invalid_target_page",
"error": "target_page is required",
},
status_code=400,
)
now_ms = int(time.time() * 1000)
request_origin = _get_request_origin(request) or _normalize_origin_value(str(request.base_url))
record: dict[str, Any] = {
"token": secrets.token_urlsafe(24),
"token_version": _YUI_GUIDE_HANDOFF_TOKEN_VERSION,
"flow_id": _normalize_yui_handoff_text(payload.get("flow_id"), max_length=80) or _YUI_GUIDE_HANDOFF_FLOW_ID,
"source_origin": request_origin,
"source_page": _normalize_yui_handoff_text(payload.get("source_page"), max_length=80) or "home",
"source_path": _normalize_yui_handoff_text(payload.get("source_path"), max_length=240),
"target_page": target_page,
"target_path": _normalize_yui_handoff_text(payload.get("target_path"), max_length=240),
"resume_scene": _normalize_yui_handoff_text(payload.get("resume_scene"), max_length=120) or None,
"created_at": now_ms,
"expires_at": now_ms + (_YUI_GUIDE_HANDOFF_TTL_SECONDS * 1000),
"consumed_at": 0,
"consumed_by": "",
}
record["signature"] = _build_yui_handoff_signature(record)
async with _yui_guide_handoff_lock:
_prune_yui_handoff_records(now_ms)
_yui_guide_handoff_tokens[record["token"]] = record
return _json_no_store_response({"ok": True, "token": _public_yui_handoff_record(record)})
@router.post("/yui-guide/handoff/consume")
async def consume_yui_guide_handoff(request: Request):
payload = await _read_json_object(request)
validation_error = _validate_local_mutation_request(request, payload=payload)
if validation_error is not None:
_set_no_store_headers(validation_error)
return validation_error
token = _normalize_yui_handoff_text(payload.get("token"), max_length=128)
signature = _normalize_yui_handoff_text(payload.get("signature"), max_length=128)
expected_page = _normalize_yui_handoff_text(payload.get("expected_page"), max_length=80)
consumed_by = _normalize_yui_handoff_text(payload.get("consumer_id"), max_length=120)
request_origin = _get_request_origin(request) or _normalize_origin_value(str(request.base_url))
now_ms = int(time.time() * 1000)
if not token or not signature:
return _json_no_store_response(
{
"ok": False,
"error_code": "invalid_handoff_token",
"error": "token and signature are required",
},
status_code=400,
)
if not expected_page:
return _json_no_store_response(
{
"ok": False,
"error_code": "invalid_expected_page",
"error": "expected_page is required",
},
status_code=400,
)
async with _yui_guide_handoff_lock:
_prune_yui_handoff_records(now_ms)
record = _yui_guide_handoff_tokens.get(token)
if not record:
return _json_no_store_response(
{
"ok": False,
"error_code": "handoff_token_not_found",
"error": "handoff token not found",
},
status_code=404,
)
stored_signature = str(record.get("signature") or "")
if not hmac.compare_digest(signature, stored_signature):
return _json_no_store_response(
{
"ok": False,
"error_code": "handoff_signature_mismatch",
"error": "handoff signature mismatch",
},
status_code=403,
)
source_origin = str(record.get("source_origin") or "")
if source_origin and request_origin and request_origin != source_origin:
return _json_no_store_response(
{
"ok": False,
"error_code": "handoff_origin_mismatch",
"error": "handoff origin mismatch",
},
status_code=403,
)
target_page = str(record.get("target_page") or "")
if expected_page != target_page:
return _json_no_store_response(
{
"ok": False,
"error_code": "handoff_target_mismatch",
"error": "handoff target mismatch",
},
status_code=403,
)
if record.get("consumed_at"):
return _json_no_store_response(
{
"ok": False,
"error_code": "handoff_token_consumed",
"error": "handoff token already consumed",
},
status_code=409,
)
record["consumed_at"] = now_ms
record["consumed_by"] = consumed_by or request_origin or "unknown"
return _json_no_store_response({"ok": True, "token": _public_yui_handoff_record(record)})
@router.get("/system/status")
async def get_system_status(response: Response):
"""Return a lightweight readiness snapshot for the web bootstrap sentinel."""
_set_no_store_headers(response)
try:
config_manager = _get_system_config_manager()
storage_bootstrap = build_storage_location_bootstrap_payload(config_manager)
lifecycle_state = _derive_system_lifecycle_state(storage_bootstrap)
return {
"ok": True,
"status": lifecycle_state,
"ready": lifecycle_state == "ready",
"storage": {
"selection_required": bool(storage_bootstrap.get("selection_required")),
"migration_pending": bool(storage_bootstrap.get("migration_pending")),
"recovery_required": bool(storage_bootstrap.get("recovery_required")),
"legacy_cleanup_pending": bool(storage_bootstrap.get("legacy_cleanup_pending")),
"blocking_reason": str(storage_bootstrap.get("blocking_reason") or ""),
"last_error_summary": str(storage_bootstrap.get("last_error_summary") or ""),
"stage": storage_bootstrap.get("stage") or "",
},
}
except Exception as exc:
logger.warning("system status probe unavailable during startup: %s", exc)
return {
"ok": True,
"status": "starting",
"ready": False,
"storage": {
"selection_required": False,
"migration_pending": False,
"recovery_required": False,
"legacy_cleanup_pending": False,
"blocking_reason": "",
"last_error_summary": "",
"stage": "",
},
}
# 统一的表情包图源白名单由 utils.meme_fetcher 维护,本文件仅用于引入
# 多语言关键词/别名表统一在 config/prompts/prompts_emotion.py 维护,此处只做扁平索引。
_EMOTION_LABEL_ALIASES = get_emotion_label_aliases_flat()
_EMOTION_CANONICAL_LABELS = ("happy", "sad", "angry", "surprised", "neutral")
_EMOTION_NORMALIZED_ALIAS_LOOKUP = {}
_EMOTION_COMPACT_ALIAS_LOOKUP = {}
for _alias, _canonical in _EMOTION_LABEL_ALIASES.items():
_normalized_alias = re.sub(r"[\s\-_]+", " ", str(_alias).strip().lower())
if not _normalized_alias:
continue
_EMOTION_NORMALIZED_ALIAS_LOOKUP[_normalized_alias] = _canonical
_compact_alias = re.sub(r"[\W_]+", "", _normalized_alias, flags=re.UNICODE)
if _compact_alias and _compact_alias not in _EMOTION_COMPACT_ALIAS_LOOKUP:
_EMOTION_COMPACT_ALIAS_LOOKUP[_compact_alias] = _canonical
_EMOTION_FUZZY_ALIAS_KEYS = tuple(_EMOTION_NORMALIZED_ALIAS_LOOKUP.keys())
_EMOTION_FUZZY_COMPACT_KEYS = tuple(_EMOTION_COMPACT_ALIAS_LOOKUP.keys())
_ASCII_EMOTION_ALIAS_RE = re.compile(r"^[a-z0-9]+(?:\s+[a-z0-9]+)*$")
_EMOTION_NEGATION_WORDS = frozenset((
"not", "no", "never", "without",
"안", "아니", "못", "않", "아니다", "아닌", "아님",
"не", "нет", "никогда",
))
_EMOTION_NEGATION_PREFIXES = (
"不是", "并不", "并非", "不太", "没那么", "没有", "并没有",
"不", "没", "無", "无", "非", "别", "別",
"안", "아니", "못",
"не", "нет", "никогда",
)
_EMOTION_NEGATION_SUFFIXES = (
"지 않", "지않", "지 않아", "지않아", "지 않다", "지않다", "지 않음", "지않음",
"지 못", "지못", "지 못해", "지못해", "지 못하다", "지못하다",
"않", "않아", "않다", "않음", "아냐", "아니야", "아니다", "아닌", "아님",
)
_EMOTION_TOKEN_RE = re.compile(r"[^\W_]+", flags=re.UNICODE)
_EMOTION_NEGATION_COMPACT_PREFIXES = tuple(sorted({
re.sub(r"[\W_]+", "", str(negation).strip().lower(), flags=re.UNICODE)
for negation in (*_EMOTION_NEGATION_PREFIXES, *_EMOTION_NEGATION_WORDS)
if str(negation).strip()
}, key=len, reverse=True))
_EMOTION_NEGATION_COMPACT_SUFFIXES = tuple(sorted({
re.sub(r"[\W_]+", "", str(negation).strip().lower(), flags=re.UNICODE)
for negation in _EMOTION_NEGATION_SUFFIXES
if str(negation).strip()
}, key=len, reverse=True))
_EMOTION_NEGATION_CONTEXT_WINDOW = max(
(len(negation) for negation in _EMOTION_NEGATION_COMPACT_PREFIXES),
default=6,
)
def _looks_like_emotion_compact_candidate(candidate, cutoff):
if not candidate:
return False
if candidate in _EMOTION_COMPACT_ALIAS_LOOKUP:
return True
return bool(difflib.get_close_matches(
candidate,
_EMOTION_FUZZY_COMPACT_KEYS,
n=1,
cutoff=cutoff,
))
def _has_negated_emotion_phrase(normalized_text, compact_text, fuzzy_compact_cutoff):
tokens = [token for token in _EMOTION_TOKEN_RE.findall(normalized_text) if token]
if tokens and any(token in _EMOTION_NEGATION_WORDS for token in tokens):
remaining_compact = re.sub(
r"[\W_]+",
"",
"".join(token for token in tokens if token not in _EMOTION_NEGATION_WORDS),
flags=re.UNICODE,
)
if _looks_like_emotion_compact_candidate(remaining_compact, fuzzy_compact_cutoff):
return True
for negation in _EMOTION_NEGATION_COMPACT_PREFIXES:
if not compact_text.startswith(negation):
continue
if _looks_like_emotion_compact_candidate(compact_text[len(negation):], fuzzy_compact_cutoff):
return True
for negation in _EMOTION_NEGATION_COMPACT_SUFFIXES:
marker_index = compact_text.find(negation)
if marker_index <= 0:
continue
if _looks_like_emotion_compact_candidate(compact_text[:marker_index], fuzzy_compact_cutoff):
return True
return False
# 启发式关键词/patterns 全部在 config/prompts/prompts_emotion.py 按语种维护,此处只做扁平化。
_EMOTION_KEYWORDS = get_emotion_keywords_flat()
_SAD_VULNERABLE_PATTERNS = get_sad_vulnerable_patterns_flat()
_ANGRY_ATTACK_PATTERNS = get_angry_attack_patterns_flat()
_HAPPY_PLAYFUL_PATTERNS = get_happy_playful_patterns_flat()
def _normalize_emotion_label(raw_emotion, raw_confidence=None):
emotion_text = str(raw_emotion or "").strip().lower()
if not emotion_text:
return "neutral"
normalized_text = re.sub(r"[\s\-_]+", " ", emotion_text)
if normalized_text in _EMOTION_NORMALIZED_ALIAS_LOOKUP:
return _EMOTION_NORMALIZED_ALIAS_LOOKUP[normalized_text]
compact_text = re.sub(r"[\W_]+", "", emotion_text, flags=re.UNICODE)
if compact_text in _EMOTION_COMPACT_ALIAS_LOOKUP:
return _EMOTION_COMPACT_ALIAS_LOOKUP[compact_text]
high_confidence = raw_confidence is not None and _coerce_emotion_confidence(raw_confidence, 0.0) >= 0.72
fuzzy_alias_cutoff = 0.74 if high_confidence else 0.9
fuzzy_compact_cutoff = 0.72 if high_confidence else 0.88
if _has_negated_emotion_phrase(normalized_text, compact_text, fuzzy_compact_cutoff):
return "neutral"
def _is_negated_ascii_match(match_start):
prefix_tokens = _EMOTION_TOKEN_RE.findall(normalized_text[:match_start])
return any(token in _EMOTION_NEGATION_WORDS for token in prefix_tokens[-3:])
def _is_negated_compact_match(match_start):
prefix = compact_text[max(0, match_start - _EMOTION_NEGATION_CONTEXT_WINDOW):match_start]
return any(prefix.endswith(negation) for negation in _EMOTION_NEGATION_COMPACT_PREFIXES)
alias_items = sorted(
_EMOTION_NORMALIZED_ALIAS_LOOKUP.items(),
key=lambda item: len(item[0]),
reverse=True
)
for alias, canonical in alias_items:
if not alias:
continue
if _ASCII_EMOTION_ALIAS_RE.match(alias):
pattern = r"(?<![a-z0-9])" + re.escape(alias) + r"(?![a-z0-9])"
for match in re.finditer(pattern, normalized_text):
if not _is_negated_ascii_match(match.start()):
return canonical
continue
compact_alias = re.sub(r"[\W_]+", "", alias, flags=re.UNICODE)
if not compact_alias:
continue
search_start = 0
while True:
match_start = compact_text.find(compact_alias, search_start)
if match_start < 0:
break
if not _is_negated_compact_match(match_start):
return canonical
search_start = match_start + len(compact_alias)
fuzzy_alias_match = difflib.get_close_matches(
normalized_text,
_EMOTION_FUZZY_ALIAS_KEYS,
n=1,
cutoff=fuzzy_alias_cutoff
)
if fuzzy_alias_match:
return _EMOTION_NORMALIZED_ALIAS_LOOKUP[fuzzy_alias_match[0]]
if compact_text:
fuzzy_compact_match = difflib.get_close_matches(
compact_text,
_EMOTION_FUZZY_COMPACT_KEYS,
n=1,
cutoff=fuzzy_compact_cutoff
)
if fuzzy_compact_match:
return _EMOTION_COMPACT_ALIAS_LOOKUP[fuzzy_compact_match[0]]
if high_confidence:
fuzzy_canonical = difflib.get_close_matches(
normalized_text,
_EMOTION_CANONICAL_LABELS,
n=1,
cutoff=0.55
)
if fuzzy_canonical:
return fuzzy_canonical[0]
return "neutral"
def _push_emotion_update(lanlan_name, emotion, confidence):
sync_message_queue = get_sync_message_queue()
if lanlan_name and lanlan_name in sync_message_queue:
sync_message_queue[lanlan_name].put({
"type": "json",
"data": {
"type": "emotion",
"emotion": emotion,
"confidence": confidence
}
})
def _emotion_response(emotion, confidence):
return {
"emotion": emotion,
"confidence": confidence
}
def _coerce_emotion_confidence(raw_confidence, default=0.5):
try:
confidence = float(raw_confidence)
except (TypeError, ValueError):
confidence = float(default)
if not math.isfinite(confidence):
confidence = float(default)
return max(0.0, min(1.0, confidence))
# 启发式打分时的否定回看 token / 转折连词表统一在 config/prompts/prompts_emotion.py 按语种维护。
_HEURISTIC_NEGATION_TOKENS = get_heuristic_negation_tokens_flat()
_HEURISTIC_TIGHT_NEGATION_TOKENS = get_heuristic_tight_negation_tokens_flat()
_HEURISTIC_NEGATION_BLOCKLIST = get_heuristic_negation_blocklist_flat()
_HEURISTIC_CONTRAST_CONJUNCTIONS = get_heuristic_contrast_conjunctions_flat()
_HEURISTIC_NEGATION_LOOKBACK = 14
# zh 单字否定(`不/没/别/未` 等)假阳率高,必须紧邻情绪词才算真否定,
# 避免 `不错/不思议/不具合` 等非否定词组里的单字误触发。
_HEURISTIC_TIGHT_NEGATION_LOOKBACK = 2
# 子句分隔符:回看窗口越过分隔符后的内容视为另一小句,不再修饰本次命中。
# 避免 "我不是难过,我是生气" 中 `生气` 的回看抓到前一小句的 `不` 而被误判否定。
_HEURISTIC_CLAUSE_DELIMITERS = (
'.', ',', ';', '!', '?', '\n',
',', '。', ';', '!', '?', '、', ':', ':',
)
def _has_heuristic_negation_before(text_value, position):
if position <= 0:
return False
start = max(0, position - _HEURISTIC_NEGATION_LOOKBACK)
window = text_value[start:position]
# 1) 窗口越过子句分隔符(标点)的部分丢掉,只看与命中关键词同小句的前文
last_delim = -1
for delim in _HEURISTIC_CLAUSE_DELIMITERS:
idx = window.rfind(delim)
if idx > last_delim:
last_delim = idx
if last_delim >= 0:
window = window[last_delim + 1:]
# 2) 句首场景补一个前导空格,统一处理带前导空格的 token(否定 ` no `、连词 ` but `)
window = ' ' + window
# 3) 让步/转折连词同样切断否定范围:处理 "not X but Y / 不是 X 而是 Y" 对比句,
# 避免前半的否定被错误带到后半的情绪关键词。
last_conj = -1
for conj in _HEURISTIC_CONTRAST_CONJUNCTIONS:
idx = window.rfind(conj)
if idx >= 0:
end_pos = idx + len(conj)
if end_pos > last_conj:
last_conj = end_pos
if last_conj >= 0:
window = window[last_conj:]
# 4) 排除非否定固定搭配(`not only / 不仅 / не только` 等肯定结构里的 not/不/не
# 并不是真否定):把这些短语从 window 里替换成等长空白后再做 token 匹配。
sanitized = window
for phrase in _HEURISTIC_NEGATION_BLOCKLIST:
if phrase and phrase in sanitized:
sanitized = sanitized.replace(phrase, ' ' * len(phrase))
# 5) 多字否定 token(宽 lookback)
if any(token in sanitized for token in _HEURISTIC_NEGATION_TOKENS):
return True
# 6) zh 单字否定 token:仅在紧邻命中关键词的尾部窗口里才算真否定,
# 避免 `不错/不思议/不具合` 等非否定词组里的单字误触发整个否定。
if _HEURISTIC_TIGHT_NEGATION_TOKENS:
tight_window = sanitized[-_HEURISTIC_TIGHT_NEGATION_LOOKBACK:]
if any(token in tight_window for token in _HEURISTIC_TIGHT_NEGATION_TOKENS):
return True
return False
# 英文 keyword 用 ASCII-only 词边界匹配,避免 `happy` 命中 `unhappy`、`surprised`
# 命中 `unsurprised` 这类反向情绪嵌入。
# 注意:不能用 `\b`,因为 Python regex 默认 Unicode 模式下 CJK 也算 \w,
# 在 mixed-script 文本(如 `好happy啊 / 超annoyed欸`)里 `好` 和 `h` 之间没有
# word boundary,导致英文 keyword 完全失配。改用前后 ASCII 字母断言:
# `(?<![a-zA-Z])keyword(?![a-zA-Z])`,CJK / 标点 / 空白都允许作为边界。
_ASCII_WORD_KEYWORD_RE_CACHE = {}
def _is_ascii_word_keyword(keyword):
if not keyword:
return False
return all(c.isascii() and (c.isalpha() or c in " '") for c in keyword)
def _count_keyword_hits(text_value, keyword):
if not keyword or not text_value:
return 0
if _is_ascii_word_keyword(keyword):
pattern = _ASCII_WORD_KEYWORD_RE_CACHE.get(keyword)
if pattern is None:
pattern = re.compile(r'(?<![a-zA-Z])' + re.escape(keyword) + r'(?![a-zA-Z])')
_ASCII_WORD_KEYWORD_RE_CACHE[keyword] = pattern
hits = 0
for match in pattern.finditer(text_value):
if not _has_heuristic_negation_before(text_value, match.start()):
hits += 1
return hits
hits = 0
search_start = 0
while True:
pos = text_value.find(keyword, search_start)
if pos < 0:
break
if not _has_heuristic_negation_before(text_value, pos):
hits += 1
search_start = pos + len(keyword)
return hits
def _infer_emotion_from_text(text):
text_value = str(text or "").lower()
if not text_value:
return None, 0
scores = {key: 0 for key in _EMOTION_KEYWORDS}
for emotion, keywords in _EMOTION_KEYWORDS.items():
for keyword in keywords:
scores[emotion] += _count_keyword_hits(text_value, keyword)