zhushanwen321
diff --git a/‎src/views/project/llm-simple-router/Overview.vue‎
Lines changed: 22 additions & 14 deletions b/‎src/views/project/llm-simple-router/Overview.vue‎
Lines changed: 22 additions & 14 deletions
diff --git a/‎src/views/project/llm-simple-router/architecture/RequestPipeline.vue‎
Lines changed: 5 additions & 3 deletions b/‎src/views/project/llm-simple-router/architecture/RequestPipeline.vue‎
Lines changed: 5 additions & 3 deletions
diff --git a/‎src/views/project/llm-simple-router/architecture/SystemContext.vue‎
Lines changed: 11 additions & 0 deletions b/‎src/views/project/llm-simple-router/architecture/SystemContext.vue‎
Lines changed: 11 additions & 0 deletions
diff --git a/‎src/views/project/llm-simple-router/features/AutoRetry.vue‎
Lines changed: 6 additions & 6 deletions b/‎src/views/project/llm-simple-router/features/AutoRetry.vue‎
Lines changed: 6 additions & 6 deletions
diff --git a/‎src/views/project/llm-simple-router/features/Concurrency.vue‎
Lines changed: 36 additions & 8 deletions b/‎src/views/project/llm-simple-router/features/Concurrency.vue‎
Lines changed: 36 additions & 8 deletions
@@ -49,15 +49,19 @@ const isZh = computed(() => locale.value === 'zh')
           <tr><th>功能</th><th>说明</th></tr>
         </thead>
         <tbody>
-          <tr><td>自动重试</td><td>对 429/400/网络超时自动指数退避重试</td></tr>
-          <tr><td>多供应商支持</td><td>智谱、Moonshot、Minimax、火山引擎、阿里云、腾讯云等</td></tr>
-          <tr><td>模型分时段映射</td><td>按时间段自动切换后端模型</td></tr>
-          <tr><td>并发队列等待</td><td>按 Provider 配置并发数上限，超限请求排队等待</td></tr>
+          <tr><td>自动重试</td><td>对 429/400/1305(ZAI过载)/网络超时自动指数退避重试，支持状态码级可配置规则</td></tr>
+          <tr><td>多供应商支持</td><td>智谱、Moonshot、Minimax、火山引擎、阿里云、腾讯云等，支持自定义 upstream_path</td></tr>
+          <tr><td>模型分时段调度</td><td>按时间段自动切换后端模型，支持 transform_rule，可视化 pipeline 编辑器</td></tr>
+          <tr><td>自适应并发控制</td><td>根据上游响应时间动态调整并发度，超限排队，信号量机制</td></tr>
+          <tr><td>LLM 循环检测</td><td>N-gram 算法检测输出循环，自动中断，节省 Token</td></tr>
           <tr><td>Failover 故障转移</td><td>多 Provider 互备，失败自动切换下一个</td></tr>
-          <tr><td>实时请求监控</td><td>SSE 推送活跃请求、队列状态、流式输出实时查看</td></tr>
+          <tr><td>每模型流式超时</td><td>按模型粒度配置 stream_timeout_ms，超时 408 错误返回</td></tr>
+          <tr><td>网络代理</td><td>Provider 级别配置 SOCKS5 / HTTPS 代理，支持认证</td></tr>
+          <tr><td>实时请求监控</td><td>Dashboard 按 Provider 分 Tab，SSE 推送活跃请求、队列状态</td></tr>
           <tr><td>多密钥管理</td><td>独立密钥 + 模型白名单，支持多用户/多项目</td></tr>
-          <tr><td>请求日志</td><td>四阶段完整链路（客户端请求/上游请求/上游响应/客户端响应）</td></tr>
-          <tr><td>性能指标</td><td>TTFT、TPS、Token 用量、缓存命中率</td></tr>
+          <tr><td>请求日志</td><td>四阶段完整链路 + 分页 + 工具错误日志</td></tr>
+          <tr><td>性能指标</td><td>TTFT、TPS、Token 用量（Input/Output 分拆）、缓存命中率</td></tr>
+          <tr><td>OpenAI 兼容</td><td>/v1/chat/completions + /v1/responses 端点，Provider Patch 自动转换</td></tr>
         </tbody>
       </table>
     </template>
@@ -102,15 +106,19 @@ const isZh = computed(() => locale.value === 'zh')
           <tr><th>Feature</th><th>Description</th></tr>
         </thead>
         <tbody>
-          <tr><td>Auto Retry</td><td>Automatic exponential backoff retry for 429/400/network timeout errors</td></tr>
-          <tr><td>Multi-Provider</td><td>Zhipu, Moonshot, Minimax, Volcengine, Alibaba Cloud, Tencent Cloud, etc.</td></tr>
-          <tr><td>Time-based Model Mapping</td><td>Auto-switch backend models by time window</td></tr>
-          <tr><td>Concurrency Queue</td><td>Configurable per-provider concurrency limits with queue waiting</td></tr>
+          <tr><td>Auto Retry</td><td>Exponential backoff for 429/400/1305(ZAI overload)/timeout, per-status-code rules</td></tr>
+          <tr><td>Multi-Provider</td><td>Zhipu, Moonshot, Minimax, Volcengine, Alibaba Cloud, Tencent Cloud, etc. + custom upstream_path</td></tr>
+          <tr><td>Time-based Model Scheduling</td><td>Auto-switch by time window + transform_rule, visual pipeline editor</td></tr>
+          <tr><td>Adaptive Concurrency</td><td>Dynamic concurrency based on upstream response time, semaphore queuing</td></tr>
+          <tr><td>LLM Loop Detection</td><td>N-gram algorithm detects output loops, auto-interrupt to save tokens</td></tr>
           <tr><td>Failover</td><td>Multi-provider backup, auto-switch on failure</td></tr>
-          <tr><td>Live Monitor</td><td>SSE-pushed active requests, queue status, streaming output</td></tr>
+          <tr><td>Per-Model Stream Timeout</td><td>Model-level stream_timeout_ms config with 408 error response</td></tr>
+          <tr><td>Network Proxy</td><td>Per-Provider SOCKS5/HTTPS proxy configuration with authentication</td></tr>
+          <tr><td>Live Monitor</td><td>Dashboard with per-Provider tabs, SSE-pushed active requests and queue status</td></tr>
           <tr><td>Multi-Key</td><td>Independent API keys with model whitelists for multi-user/multi-project</td></tr>
-          <tr><td>Request Logging</td><td>Complete four-stage pipeline logging</td></tr>
-          <tr><td>Performance Metrics</td><td>TTFT, TPS, token usage, cache hit rate</td></tr>
+          <tr><td>Request Logging</td><td>Complete four-stage pipeline + pagination + tool error logging</td></tr>
+          <tr><td>Performance Metrics</td><td>TTFT, TPS, split input/output token usage, cache hit rate</td></tr>
+          <tr><td>OpenAI Compatible</td><td>/v1/chat/completions + /v1/responses endpoints, Provider Patch auto-conversion</td></tr>
         </tbody>
       </table>
     </template>
 
@@ -7,7 +7,8 @@ const isZh = computed(() => locale.value === 'zh')
 
 const stages = [
   { nameZh: '认证', nameEn: 'Auth', descZh: 'Token 校验', descEn: 'Token Verify' },
-  { nameZh: '模型映射', nameEn: 'Model Map', descZh: 'A → B 转换', descEn: 'A → B Mapping' },
+  { nameZh: '模型映射', nameEn: 'Model Map', descZh: '调度匹配', descEn: 'Schedule Match' },
+  { nameZh: '网络代理', nameEn: 'Proxy', descZh: 'SOCKS5/HTTPS', descEn: 'SOCKS5/HTTPS' },
   { nameZh: '并发排队', nameEn: 'Concurrency', descZh: '信号量控制', descEn: 'Semaphore' },
   { nameZh: '调用上游', nameEn: 'Upstream', descZh: 'HTTP 代理', descEn: 'HTTP Proxy' },
   { nameZh: '日志 + 指标', nameEn: 'Log + Metric', descZh: '记录采集', descEn: 'Log & Collect' },
@@ -52,8 +53,9 @@ const stages = [
       <thead><tr><th>{{ isZh ? '阶段' : 'Stage' }}</th><th>{{ isZh ? '做什么' : 'What It Does' }}</th></tr></thead>
       <tbody>
         <tr><td>{{ isZh ? '认证' : 'Auth' }}</td><td>{{ isZh ? 'Bearer Token SHA256 哈希后查询 router_keys 表' : 'Bearer Token SHA256 hashed and looked up in router_keys table' }}</td></tr>
-        <tr><td>{{ isZh ? '模型映射 + 路由策略' : 'Model Mapping + Routing' }}</td><td>{{ isZh ? '客户端模型名映射到后端实际模型；支持分时段/轮询/随机/故障转移' : 'Map client model name to backend actual model; supports scheduled/round-robin/random/failover' }}</td></tr>
-        <tr><td>{{ isZh ? '并发排队' : 'Concurrency Queue' }}</td><td>{{ isZh ? 'Provider 级信号量，队列满返回 503，超时返回 504' : 'Provider-level semaphore, queue full → 503, timeout → 504' }}</td></tr>
+        <tr><td>{{ isZh ? '模型映射 + 路由策略' : 'Model Mapping + Routing' }}</td><td>{{ isZh ? '客户端模型名通过调度层匹配到后端模型；支持分时段调度、故障转移和 transform_rule 规则' : 'Client model name resolved through scheduling layer to backend model; supports scheduled, failover, and transform_rule' }}</td></tr>
+        <tr><td>{{ isZh ? '网络代理' : 'Network Proxy' }}</td><td>{{ isZh ? '通过 SOCKS5 或 HTTPS 代理连接上游 Provider（可选，按 Provider 配置）' : 'Connect to upstream Provider via SOCKS5 or HTTPS proxy (optional, per Provider config)' }}</td></tr>
+        <tr><td>{{ isZh ? '并发排队' : 'Concurrency Queue' }}</td><td>{{ isZh ? 'Provider 级信号量，支持自适应动态调整。队列满返回 503，超时返回 504' : 'Provider-level semaphore with adaptive adjustment. Queue full → 503, timeout → 504' }}</td></tr>
         <tr><td>{{ isZh ? '调用上游' : 'Upstream Call' }}</td><td>{{ isZh ? '原生 HTTP 代理，支持 SSE 流式；失败自动重试，Failover 切换 Provider' : 'Native HTTP proxy, supports SSE streaming; auto-retry on failure, failover switches Provider' }}</td></tr>
         <tr><td>{{ isZh ? '日志 + 指标' : 'Logging + Metrics' }}</td><td>{{ isZh ? '记录完整请求链路，采集 Token 用量、TTFT、TPS' : 'Record complete request pipeline, collect token usage, TTFT, TPS' }}</td></tr>
         <tr><td>{{ isZh ? '返回响应' : 'Return Response' }}</td><td>{{ isZh ? '将上游响应原样返回客户端' : 'Return upstream response as-is to client' }}</td></tr>
 
@@ -64,5 +64,16 @@ const isZh = computed(() => locale.value === 'zh')
         <tr><td>{{ isZh ? '上游 Provider' : 'Upstream Providers' }}</td><td>{{ isZh ? '接收转发的请求，返回 SSE 流或 JSON 响应' : 'Receive forwarded requests, return SSE streams or JSON responses' }}</td></tr>
       </tbody>
     </table>
+
+    <h2>{{ isZh ? '内部架构：Monorepo' : 'Internal Architecture: Monorepo' }}</h2>
+    <p>{{ isZh ? 'v0.9.0 起，项目重构为 Monorepo 结构，拆分为三个独立包：' : 'Since v0.9.0, the project has been restructured as a monorepo with three independent packages:' }}</p>
+    <table>
+      <thead><tr><th>{{ isZh ? '包名' : 'Package' }}</th><th>{{ isZh ? '说明' : 'Description' }}</th></tr></thead>
+      <tbody>
+        <tr><td><code>@llm-router/core</code></td><td>{{ isZh ? '核心模块库：并发控制、循环检测、日志、监控、错误处理等可复用组件' : 'Core module library: concurrency control, loop detection, logging, monitoring, error handling — reusable components' }}</td></tr>
+        <tr><td><code>router</code></td><td>{{ isZh ? '主路由服务：HTTP 代理、模型映射调度、Provider Patch 转换' : 'Main router service: HTTP proxy, model mapping scheduling, Provider Patch conversion' }}</td></tr>
+        <tr><td><code>pi-extension</code></td><td>{{ isZh ? 'Pi 编码代理插件：将并发、循环检测、监控等能力对接 pi agent 平台' : 'Pi coding agent extension: adapts concurrency, loop detection, and monitoring for the pi agent platform' }}</td></tr>
+      </tbody>
+    </table>
   </div>
 </template>
@@ -11,7 +11,7 @@ const isZh = computed(() => locale.value === 'zh')
     <template v-if="isZh">
       <h1>自动重试</h1>
       <p>
-        当后端 Provider 返回 429（限流）、400（特定错误）或网络超时时，Router 自动按重试规则进行指数退避重试，无需手动干预。
+        当后端 Provider 返回 429（限流）、400（特定错误）、1305（ZAI 模型过载）或网络超时时，Router 自动按重试规则进行指数退避重试，无需手动干预。
       </p>
       <h2>重试策略</h2>
       <table>
@@ -22,12 +22,12 @@ const isZh = computed(() => locale.value === 'zh')
         </tbody>
       </table>
       <h2>重试规则配置</h2>
-      <p>在管理后台的重试规则页面，可以按状态码配置是否重试、重试策略、最大重试次数、基础延迟等参数。</p>
+      <p>在管理后台的重试规则页面，可以按状态码配置是否重试、重试策略、最大重试次数、基础延迟等参数。支持的状态码包括 429、400、1305（ZAI 平台模型过载）等。</p>
       <ScreenShot src="/images/llm-simple-router/retry.png" caption="重试规则配置" />
       <h2>重试流程</h2>
       <div class="not-prose my-4 rounded-lg border border-white/10 bg-surface-50 p-4">
         <code class="text-sm font-mono text-gray-300 block leading-loose">
-          请求发送 → 收到 429/400/超时<br>
+          请求发送 → 收到 429/400/1305/超时<br>
           → 判断是否可重试（匹配重试规则）<br>
           → 等待（fixed 或 exponential 退避）<br>
           → 重新发送请求<br>
@@ -46,7 +46,7 @@ const isZh = computed(() => locale.value === 'zh')
     <template v-else>
       <h1>Auto Retry</h1>
       <p>
-        When the backend Provider returns a 429 (rate limit), 400 (specific error), or a network timeout, the Router automatically retries with exponential backoff based on configured rules — no manual intervention needed.
+        When the backend Provider returns 429 (rate limit), 400 (specific error), 1305 (ZAI model overload), or a network timeout, the Router automatically retries with exponential backoff based on configured rules — no manual intervention needed.
       </p>
       <h2>Retry Strategies</h2>
       <table>
@@ -57,12 +57,12 @@ const isZh = computed(() => locale.value === 'zh')
         </tbody>
       </table>
       <h2>Rule Configuration</h2>
-      <p>On the admin panel's Retry Rules page, configure retry by status code, strategy, max attempts, and base delay.</p>
+      <p>On the admin panel's Retry Rules page, configure retry by status code (e.g. 429, 400, 1305 for ZAI overload), strategy, max attempts, and base delay.</p>
       <ScreenShot src="/images/llm-simple-router/retry.png" caption="Retry Rules Configuration" />
       <h2>Retry Flow</h2>
       <div class="not-prose my-4 rounded-lg border border-white/10 bg-surface-50 p-4">
         <code class="text-sm font-mono text-gray-300 block leading-loose">
-          Request sent → Receive 429/400/Timeout<br>
+          Request sent → Receive 429/400/1305/Timeout<br>
           → Check if retryable (match retry rules)<br>
           → Wait (fixed or exponential backoff)<br>
           → Resend request<br>
 
@@ -9,7 +9,8 @@ const isZh = computed(() => locale.value === 'zh')
   <div class="prose prose-invert max-w-none">
     <template v-if="isZh">
       <h1>并发控制</h1>
-      <p>按 Provider 配置并发数上限，超限请求排队等待。防止同时发送过多请求导致 Provider 限流或报错。</p>
+      <p>按 Provider 配置并发数上限，超限请求排队等待。支持自适应动态调整，防止同时发送过多请求导致 Provider 限流或报错。</p>
+
       <h2>工作原理</h2>
       <div class="not-prose my-4 rounded-lg border border-white/10 bg-surface-50 p-4">
         <code class="text-sm font-mono text-gray-300 block leading-loose">
@@ -20,16 +21,30 @@ const isZh = computed(() => locale.value === 'zh')
           → 等待超时：返回 504 Gateway Timeout
         </code>
       </div>
+
+      <h2>自适应并发控制</h2>
+      <p>v0.9.21 新增自适应并发控制，不再依赖人工估算最大并发数：</p>
+      <ul>
+        <li><strong>动态调整</strong>：根据上游 Provider 的响应时间自动增减并发度</li>
+        <li><strong>冷启动平滑</strong>：初始并发度从低值起步，避免冷启动阶段冲击上游</li>
+        <li><strong>非并发错误区分</strong>：仅根据并发相关错误（429 限流等）触发退避，普通业务错误不影响并发度</li>
+        <li><strong>信号量超时可视化</strong>：信号量等待超时的请求会在监控面板中明确标记</li>
+        <li><strong>stream_error 正确处理</strong>：流式传输中的错误按类型区分，避免误过滤</li>
+      </ul>
+
       <h2>配置方式</h2>
-      <p>在管理后台 &gt; Provider 页面，为每个 Provider 设置最大并发数。建议根据 Provider 的 API 限制合理配置。</p>
+      <p>在管理后台 &gt; Provider 页面，为每个 Provider 设置最大并发数。支持手动固定值或启用自适应模式。建议根据 Provider 的 API 限制合理配置。</p>
+
       <h2>信号量机制</h2>
-      <p>Router 使用基于 Promise 的信号量机制实现并发控制。每个 Provider 维护独立的信号量，支持 AbortSignal 和超时。</p>
+      <p>Router 使用基于 Promise 的信号量机制实现并发控制。每个 Provider 维护独立的信号量，支持 AbortSignal 和超时。自适应模式下，信号量上限由 SemaphoreManager 动态调整。</p>
+
       <h2>监控</h2>
-      <p>实时监控页面可以看到每个 Provider 的当前活跃请求数、等待队列长度等状态。详见 <router-link to="/project/llm-simple-router/guide/features/monitor">实时监控</router-link>。</p>
+      <p>实时监控页面可以看到每个 Provider 的当前活跃请求数、等待队列长度、自适应并发度变化趋势等状态。详见 <router-link to="/project/llm-simple-router/guide/features/monitor">实时监控</router-link>。</p>
     </template>
     <template v-else>
       <h1>Concurrency Control</h1>
-      <p>Configure per-Provider concurrency limits. Requests exceeding the limit are queued. Prevents sending too many requests simultaneously causing provider rate limits.</p>
+      <p>Configure per-Provider concurrency limits with adaptive dynamic adjustment. Requests exceeding the limit are queued. Prevents rate limit collisions across projects.</p>
+
       <h2>How It Works</h2>
       <div class="not-prose my-4 rounded-lg border border-white/10 bg-surface-50 p-4">
         <code class="text-sm font-mono text-gray-300 block leading-loose">
@@ -40,12 +55,25 @@ const isZh = computed(() => locale.value === 'zh')
           → Wait timeout: return 504 Gateway Timeout
         </code>
       </div>
+
+      <h2>Adaptive Concurrency Control</h2>
+      <p>Since v0.9.21, the Router supports adaptive concurrency — no more manual max-concurrency estimation:</p>
+      <ul>
+        <li><strong>Dynamic Adjustment</strong>: Auto increase/decrease concurrency based on upstream response time</li>
+        <li><strong>Smooth Cold Start</strong>: Start from a low concurrency value, avoid shocking upstream at boot</li>
+        <li><strong>Error Classification</strong>: Only trigger backoff on actual rate-limit errors (429). Normal business errors don't affect concurrency</li>
+        <li><strong>Semaphore Timeout Visibility</strong>: Requests that timeout waiting for semaphore slots are clearly marked in the monitor panel</li>
+        <li><strong>stream_error Handling</strong>: Streaming errors are classified by type, avoiding incorrect filtering</li>
+      </ul>
+
       <h2>Configuration</h2>
-      <p>Admin Panel &gt; Provider page, set max concurrency per provider.</p>
+      <p>Admin Panel &gt; Provider page. Set max concurrency manually or enable adaptive mode. Configure based on Provider API limits.</p>
+
       <h2>Semaphore Mechanism</h2>
-      <p>Router uses Promise-based semaphore for concurrency control. Each Provider has independent semaphore, supports AbortSignal and timeout.</p>
+      <p>Promise-based semaphore with independent per-Provider semaphores. Supports AbortSignal and timeout. In adaptive mode, the SemaphoreManager dynamically adjusts the semaphore limit.</p>
+
       <h2>Monitoring</h2>
-      <p>See the <router-link to="/project/llm-simple-router/guide/features/monitor">Live Monitor</router-link> page for active requests and queue length per Provider.</p>
+      <p>See the <router-link to="/project/llm-simple-router/guide/features/monitor">Live Monitor</router-link> page for active requests, queue length, and adaptive concurrency trends per Provider.</p>
     </template>
   </div>
 </template>