Project-N-E-K-O
diff --git a/‎docs/design/user-activity-tracker.md‎
Lines changed: 48 additions & 5 deletions b/‎docs/design/user-activity-tracker.md‎
Lines changed: 48 additions & 5 deletions
diff --git a/‎main_logic/activity/system_signals.py‎
Lines changed: 32 additions & 8 deletions b/‎main_logic/activity/system_signals.py‎
Lines changed: 32 additions & 8 deletions
diff --git a/‎main_logic/activity/tracker.py‎
Lines changed: 24 additions & 1 deletion b/‎main_logic/activity/tracker.py‎
Lines changed: 24 additions & 1 deletion
diff --git a/‎main_routers/agent_router.py‎
Lines changed: 44 additions & 0 deletions b/‎main_routers/agent_router.py‎
Lines changed: 44 additions & 0 deletions
@@ -794,7 +794,7 @@ and to weigh conversation signals more heavily.
 `UserActivityTracker.push_external_system_signal(...)` accepts OS
 signals from outside the backend — designed for a frontend (Electron
 app, browser via WebSocket, mobile shell) to read its local OS state
-and POST it on a heartbeat. When fresh (≤ 30s), pushed signals
+and POST it on a heartbeat. When fresh (≤ 15s), pushed signals
 override the local collector entirely. When stale (heartbeat stops),
 the tracker falls back to the local collector and the degraded marker
 re-appears.
@@ -815,10 +815,53 @@ All fields optional — pass whatever the frontend can read on each
 platform. The push primes `os_signals_available=True` so the AI sees
 non-degraded state.
 
-The HTTP endpoint to receive these pushes hasn't been added yet — when
-the frontend implementation lands, wire it via something like
-`POST /api/activity_signal/{lanlan_name}` in `system_router.py`.
-Until then, the API surface exists for whoever builds it.
+#### HTTP endpoint
+
+`POST /api/activity_signal` (in `main_routers/system_router.py`) is the
+public surface. Body is a JSON object with `lanlan_name` (required)
+plus any subset of the snake_case fields above. The endpoint enforces:
+
+- 400 on malformed body / out-of-range fields
+- 404 when `lanlan_name` isn't registered
+- 429 when pushed faster than 5s per lanlan_name (matches the heartbeat
+  cadence — `_EXTERNAL_SIGNAL_MIN_INTERVAL` in `tracker.py`). Honour
+  `Retry-After`.
+- 503 if the character's tracker hasn't initialised yet (boot race —
+  retry on the next heartbeat)
+- 500 if the tracker raises
+
+No auth header today — defended by the per-character `lanlan_name`
+lookup + rate limit. A stricter CSRF/Origin guard is tracked for a
+follow-up PR; the threat model write-up lives in issue #1023.
+
+#### Renderer client
+
+`static/app-activity-signal.js` does the 5s heartbeat in the desktop
+pet window. It reads OS signals through the Electron preload bridge
+(`window.nekoActivitySignal.read()` — exposed by the NEKO-PC sibling
+repo), normalises camelCase → snake_case, and POSTs to the endpoint
+above. The module is defensive: when the bridge isn't exposed
+(non-Electron dev runs, mobile browser shell, NEKO-PC older than the
+companion PR), it logs once and stays silent — the backend tracker's
+local collector handles the rest in degraded mode.
+
+#### Electron bridge contract (NEKO-PC side)
+
+The companion PR in NEKO-PC adds an IPC handler that returns:
+
+```js
+{
+    windowTitle: "VS Code — neko",  // active-win → activeWindow.title
+    processName: "Code.exe",        // active-win → activeWindow.owner.name
+    idleSeconds: 12,                // powerMonitor.getSystemIdleTime() (seconds, not ms)
+    cpuAvg30s: 27.5,                // os.cpus() rolling diff, [0, 100]
+    gpuUtilization: 65.0,           // nvidia-smi (optional — None on AMD/Intel)
+}
+```
+
+The renderer (`app-activity-signal.js`) drops any field that fails
+type/range validation and POSTs the rest. A partial snapshot is still
+better than no push.
 
 ### What works in fully-degraded remote mode
 
 
@@ -42,17 +42,35 @@
 _IS_WINDOWS = platform.system() == 'Windows'
 
 
-def _force_degraded_from_env() -> bool:
+def is_remote_backend_deployment() -> bool:
     """Honour ``NEKO_ACTIVITY_TRACKER_REMOTE`` / ``ACTIVITY_TRACKER_REMOTE``.
 
-    Set to ``1`` / ``true`` / ``yes`` when the backend is on a different
-    machine from the user — covers the Windows-remote edge case where
-    the local OS APIs would happily report the server's foreground
-    window (since pygetwindow technically works), but those signals
-    are about the server, not the user.
+    Single source of truth for the "is the backend running on a different
+    machine from the user" question. Two unrelated consumers used to keep
+    their own copies of this env-var check and drifted:
+
+      * the activity collector here — flips the OS-signal pipeline into
+        degraded mode (window/idle/CPU/GPU come from frontend push or
+        not at all).
+      * ``main_routers/system_router._is_remote_backend_deployment`` —
+        blocks local-machine operations like ``/api/screenshot`` from
+        accidentally capturing the *server's* desktop and returning it
+        to the user. ``main_routers/agent_router`` follows the same
+        rule for ``computer_use`` / agent commands.
+
+    Both now call into this function. The check itself is intentionally
+    cheap (env lookup) so it's safe to call inline on every request.
+
+    Set to ``1`` / ``true`` / ``yes`` / ``on`` when the backend is on a
+    different machine from the user — covers the Windows-remote edge
+    case where the local OS APIs would happily report the server's
+    foreground window (since pygetwindow technically works), but those
+    signals are about the server, not the user. Same applies to
+    pyautogui screenshots and computer_use commands — they target the
+    backend machine, which is wrong when that machine isn't the user's.
 
     Default off — most users run backend on their own PC where local
-    OS signals are correct.
+    OS signals / screenshots / computer_use are correct.
     """
     for key in ('NEKO_ACTIVITY_TRACKER_REMOTE', 'ACTIVITY_TRACKER_REMOTE'):
         raw = os.getenv(key, '').strip().lower()
@@ -61,6 +79,12 @@ def _force_degraded_from_env() -> bool:
     return False
 
 
+# Legacy private alias — keeps in-flight callers (and tests that patch
+# the private name) working without a sweep. New code calls
+# ``is_remote_backend_deployment`` directly.
+_force_degraded_from_env = is_remote_backend_deployment
+
+
 # ── Public snapshot dataclass ───────────────────────────────────────
 
 @dataclass(frozen=True, slots=True)
@@ -185,7 +209,7 @@ def __init__(self, *, poll_interval: float = 5.0) -> None:
         # for the case where the backend is a Windows server and the user
         # is on a different machine — local OS APIs would technically
         # work but report data about the server, not the user.
-        env_force_degraded = _force_degraded_from_env()
+        env_force_degraded = is_remote_backend_deployment()
         self._os_signals_available: bool = bool(
             _IS_WINDOWS and self._gw is not None and not env_force_degraded
         )
 
@@ -72,7 +72,30 @@
 # seconds. After that the tracker falls back to the local collector
 # (which on remote deployments will be in degraded mode) — better to
 # advertise "no signal" than to keep using stale window data.
-_EXTERNAL_SIGNAL_TTL_SECONDS = 30.0
+#
+# 15s = 3× the 5s heartbeat. The push pipeline stacks two unsynchronised
+# 5s timers — the NEKO-PC bridge sampler (reads OS signals) and the
+# renderer heartbeat (reads the bridge's cached snapshot + POSTs) — so
+# worst-case data age can already approach ~10-12s before any loss. 15s
+# therefore tolerates ~2 consecutive dropped pushes before falling back.
+# Shorter (e.g. 10s) would thrash between fresh/degraded on a single
+# drop over a lossy remote link; 30s keeps trusting a stale "user
+# active" snapshot for too long after the heartbeat dies. 15s balances
+# faster stale-detection against fallback thrash.
+_EXTERNAL_SIGNAL_TTL_SECONDS = 15.0
+
+# Minimum interval between accepted external-signal pushes for a given
+# lanlan_name. Tuned together with the frontend heartbeat: the Electron
+# preload pushes every ~5s, so anything more frequent is either a buggy
+# client (re-entering the heartbeat) or spam. Enforced by the
+# ``/api/activity_signal`` endpoint, not the tracker itself — the
+# tracker is happily idempotent and just overwrites the last push.
+#
+# Pairs with TTL above: TTL is the "data freshness" window, this is the
+# "request frequency" cap. TTL is 3× this interval, so the tracker
+# tolerates ~2 consecutive rate-limited/dropped pushes and still has
+# data within the freshness window.
+_EXTERNAL_SIGNAL_MIN_INTERVAL = 5.0
 
 
 # ── Break-reminder defaults ─────────────────────────────────────────
 
@@ -26,6 +26,7 @@
 from .shared_state import get_session_manager, get_config_manager, get_templates
 from config import TOOL_SERVER_PORT, USER_PLUGIN_BASE
 from main_logic.agent_event_bus import publish_session_event
+from main_logic.activity.system_signals import is_remote_backend_deployment
 
 router = APIRouter(prefix="/api/agent", tags=["agent"])
 logger = get_module_logger(__name__, "Main")
@@ -56,6 +57,37 @@
 }
 
 
+def _remote_backend_block() -> JSONResponse | None:
+    """Reject agent mutation when backend is deployed away from the user.
+
+    In remote mode (``NEKO_ACTIVITY_TRACKER_REMOTE=1``) the "computer"
+    that computer_use / browser_use / openclaw would control is the
+    *server's*, not the user's — there's no useful action to take, and
+    silently forwarding the command to a localhost tool_server on the
+    server side is actively dangerous (anyone who can reach the public
+    backend HTTP can drive the agent on the server). Returning 501
+    matches the same-env block on ``/api/screenshot`` in
+    ``main_routers/system_router.py`` so the frontend can surface a
+    uniform "agent unavailable on remote backend" state.
+
+    Threat model context: a deeper CSRF + Origin guard (defending
+    against DNS-rebinding-style attacks on a *local* backend) is
+    deferred to a follow-up — it needs the ~15 frontend agent fetch
+    sites to start sending ``X-CSRF-Token`` first, which doesn't fit
+    PR B's scope. See issue #1023 for the audit + scope decision.
+    """
+    if is_remote_backend_deployment():
+        return JSONResponse(
+            {
+                "success": False,
+                "error": "agent disabled in remote backend deployment "
+                         "(NEKO_ACTIVITY_TRACKER_REMOTE)",
+            },
+            status_code=501,
+        )
+    return None
+
+
 async def force_disable_agent_for_character_switch(current_lanlan: str, previous_lanlan: str | None = None) -> bool:
     """角色切换后强制关闭猫爪，避免工具服务的全局旧状态串到新角色。"""
     names = {
@@ -197,6 +229,9 @@ async def _close_http_client():
 @router.post('/flags')
 async def update_agent_flags(request: Request):
     """来自前端的Agent开关更新，级联到各自的session manager。"""
+    blocked = _remote_backend_block()
+    if blocked is not None:
+        return blocked
     try:
         data = await request.json()
         _config_manager = get_config_manager()
@@ -276,6 +311,9 @@ async def get_agent_state():
 @router.post('/command')
 async def post_agent_command(request: Request):
     """统一命令入口，前端只发送 command，不直接操作多路开关。"""
+    blocked = _remote_backend_block()
+    if blocked is not None:
+        return blocked
     t0 = time.perf_counter()
     try:
         data = await request.json()
@@ -521,6 +559,9 @@ async def proxy_task_detail(task_id: str):
 @router.post('/tasks/{task_id}/cancel')
 async def proxy_task_cancel(task_id: str):
     """Cancel a specific task via tool server proxy."""
+    blocked = _remote_backend_block()
+    if blocked is not None:
+        return blocked
     try:
         client = _get_http_client()
         r = await client.post(f"{TOOL_SERVER_BASE}/tasks/{task_id}/cancel", timeout=5.0)
@@ -534,6 +575,9 @@ async def proxy_task_cancel(task_id: str):
 @router.post('/admin/control')
 async def proxy_admin_control(payload: dict = Body(...)):
     """Proxy admin control commands to tool server."""
+    blocked = _remote_backend_block()
+    if blocked is not None:
+        return blocked
     try:
         client = _get_http_client()
         r = await client.post(f"{TOOL_SERVER_BASE}/admin/control", json=payload, timeout=5.0)