docs: reposition auth split and benchmark clarity

gaoguobin · gaoguobin · commit 5873bab024cd · 2026-05-08T21:16:00.000+08:00
diff --git a/.codex-plugin/plugin.json b/.codex-plugin/plugin.json
@@ -1,7 +1,7 @@
 {
   "name": "codex-fast-proxy",
   "version": "0.1.0",
-  "description": "Codex App proxy for third-party OpenAI-compatible Responses API providers with Fast/Priority controls.",
+  "description": "Codex App auth-split proxy for signing in with ChatGPT while using third-party OpenAI-compatible APIs.",
   "author": {
     "name": "gaoguobin",
     "url": "https://github.com/gaoguobin"
@@ -19,13 +19,15 @@
     "responses-api",
     "fast-mode",
     "service-tier",
-    "openai-compatible-api"
+    "openai-compatible-api",
+    "chatgpt-login",
+    "auth-split"
   ],
   "skills": "./skills/",
   "interface": {
     "displayName": "Codex Fast Proxy",
-    "shortDescription": "Third-party API proxy for Codex App with Fast/Priority controls.",
-    "longDescription": "Manage a local, safety-bounded proxy and Agent Skill so Codex App users on third-party OpenAI-compatible API providers can preserve the App Fast toggle, optionally force global Fast/Priority, split upstream auth for ChatGPT login, verify traffic, benchmark, update, and uninstall.",
+    "shortDescription": "Use Codex App sign-in with third-party API providers.",
+    "longDescription": "Manage a local, safety-bounded proxy and Agent Skill so Codex App users can sign in with ChatGPT for UI features while model requests continue through a third-party OpenAI-compatible provider. It supports upstream auth split, App-controlled Fast/Priority behavior, benchmark verification, update, and uninstall workflows.",
     "developerName": "gaoguobin",
     "category": "Developer Tools",
     "capabilities": [
@@ -34,16 +36,17 @@
       "Benchmark"
     ],
     "websiteURL": "https://github.com/gaoguobin/codex-fast-proxy",
-    "privacyPolicyURL": "https://github.com/gaoguobin/codex-fast-proxy#safety-model",
+    "privacyPolicyURL": "https://github.com/gaoguobin/codex-fast-proxy#safety",
     "termsOfServiceURL": "https://github.com/gaoguobin/codex-fast-proxy/blob/main/LICENSE",
     "defaultPrompt": [
       "Enable Codex Fast proxy.",
+      "Prepare Codex Fast proxy for ChatGPT account login.",
       "Show Codex Fast proxy status.",
       "Run Codex Fast proxy benchmark."
     ],
     "brandColor": "#10A37F",
     "screenshots": [
-      "./docs/assets/dashboard.png"
+      "./docs/assets/hero.png"
     ]
   }
 }
diff --git a/README.md b/README.md
@@ -2,33 +2,60 @@
 
 [![CI](https://github.com/gaoguobin/codex-fast-proxy/actions/workflows/ci.yml/badge.svg)](https://github.com/gaoguobin/codex-fast-proxy/actions/workflows/ci.yml)
 
-Codex App Fast/Priority proxy for third-party OpenAI-compatible APIs.
+Codex App auth-split proxy for third-party OpenAI-compatible APIs.
 
-This project is for Codex App users who already use a third-party API provider or relay service.
-It lets Codex App route model requests through a local proxy, keep streaming intact, preserve the
-App's own Fast control when available, and optionally inject `service_tier="priority"` when Codex
-does not send a tier.
+Use Codex App while you sign in with ChatGPT for the full App UI, but keep model requests on your
+third-party OpenAI-compatible API provider. `codex-fast-proxy` routes provider traffic through a
+local proxy, applies an optional provider-auth override, preserves streaming, and keeps the App's
+own Fast controls intact when they are available.
 
 [Chinese Guide](docs/README.zh-CN.md) · [Quick Start](#quick-start) · [Common Workflows](#common-workflows) · [Dashboard](#dashboard) · [Safety](#safety) · [Advanced Usage](docs/advanced-usage.md) · [Sponsor](#sponsor)
 
-![Codex Fast Proxy dashboard](docs/assets/dashboard.png)
+![Codex Fast Proxy overview](docs/assets/hero.png)
+
+[Watch the 10-second overview video](docs/assets/codex-fast-proxy-promo.mp4)
 
 ## Why
 
-Codex CLI can already use Fast mode. The main use case here is Codex App + third-party API
-providers, where users may still want the richer App experience: plugin marketplace, GitHub/Apps
-connectors, manual Fast controls, status hints, voice input, and a local dashboard.
+Codex App features such as plugin marketplace, GitHub/Apps connectors, manual Fast controls, status
+hints, and voice input are tied to signing in with ChatGPT. Users of third-party API providers still
+need model requests to use the provider's endpoint and API key.
+
+This project keeps those two concerns separate: Codex App can stay signed in with ChatGPT for UI and
+connector features, while `/v1/responses` model traffic continues through your configured provider.
+Fast/Priority routing is then treated as a provider capability that should be measured, not assumed.
 
 ## What It Does
 
+- Lets Codex App stay signed in with ChatGPT while provider API requests use your third-party
+  upstream.
 - Routes Codex provider traffic from `http://127.0.0.1:8787/v1` to your saved upstream provider.
+- Optionally replaces proxied provider `Authorization` with a key from an environment variable, so
+  ChatGPT account auth is not forwarded to the third-party provider.
 - Only patches `POST /v1/responses`, and only when the configured Fast policy allows it.
 - Leaves `model`, `reasoning`, `tools`, `input`, request bodies, and SSE frames unchanged.
-- Supports an optional auth split for ChatGPT-login users: provider API requests can use a separate
-  environment variable while ChatGPT plugin/GitHub/App connector traffic remains untouched.
+- Preserves Codex App's manual Fast controls when the App sends its own `service_tier`.
 - Installs a Codex `SessionStart` hook so future Codex sessions can start a missing proxy.
 - Provides a read-only local dashboard with redacted status, recent traffic, and benchmark summary.
 
+## Fast Effect
+
+Fast/Priority is an important feature, but it is not a local guarantee. This proxy can send the
+priority hint, but the real latency effect depends on the upstream OpenAI-compatible provider. Some
+providers accept `service_tier="priority"` without making the measured workload faster, and some may
+not echo priority metadata in the response.
+
+Use the built-in A/B benchmark as the source of truth for your current provider and model:
+
+```text
+Run the Codex Fast proxy A/B benchmark
+```
+
+Benchmark results separate three facts: whether priority requests were accepted, whether the
+measured workload got faster, and whether provider response metadata explicitly confirmed priority.
+The benchmark also records whether the control split was valid, so default samples must omit
+`service_tier` while priority samples send the expected value.
+
 ## Quick Start
 
 Paste this into Codex:
@@ -84,10 +111,11 @@ In API-key mode, the default `auto` policy can inject global priority when Codex
 `service_tier`. In ChatGPT-login or unclear states, the default behavior is conservative and
 preserves Codex's own Fast choice.
 
-## ChatGPT Login
+## Sign In With ChatGPT
 
-ChatGPT login is optional. Use it only if you want the full Codex App UI, such as plugin
-marketplace, GitHub/Apps/connectors, manual Fast controls, status hints, or voice input.
+Signing in with ChatGPT is optional. Use it only if you want the full Codex App UI, such as plugin
+marketplace, GitHub/Apps/connectors, manual Fast controls, status hints, or voice input. The proxy's
+auth split keeps model requests on your third-party provider after that sign-in.
 
 Before switching Codex App to ChatGPT login, ask Codex to prepare provider auth:
 
diff --git a/docs/README.zh-CN.md b/docs/README.zh-CN.md
@@ -1,6 +1,8 @@
 # codex-fast-proxy 中文指南
 
-`codex-fast-proxy` 面向使用兼容 OpenAI API 的第三方供应商的 Codex App 用户。它把 Codex App 的模型请求转到本地代理，再转发到第三方上游；默认尊重 Codex App 自己的 Fast 选择，也可以在 API-key 模式下补缺失的 `service_tier="priority"`。
+`codex-fast-proxy` 面向使用兼容 OpenAI API 的第三方供应商的 Codex App 用户。核心用途是让 Codex App 可以保持 ChatGPT 账户登录，继续使用插件市场、GitHub/Apps/connectors、Fast 手动选择、状态提示和语音输入等 UI 能力，同时把模型请求转到第三方上游。
+
+Fast/Priority 是重要能力，但实际是否加速取决于上游 API 提供商是否支持；请以 A/B benchmark 的结果为准。
 
 [返回英文 README](../README.md)
 
diff --git a/docs/advanced-usage.md b/docs/advanced-usage.md
@@ -174,6 +174,8 @@ python -m codex_fast_proxy benchmark --api-key-env PACKY_API_KEY
 
 Interpretation:
 
+- `service_tier_control.valid=true`: default samples omitted `service_tier` and priority samples sent
+  the expected value.
 - `priority_accepted=true`: at least one priority sample succeeded.
 - `observed_priority_effective=true`: the measured workload benefited.
 - `provider_confirmed_priority=true`: provider response metadata explicitly confirmed priority when
diff --git a/docs/assets/codex-fast-proxy-promo.mp4 b/docs/assets/codex-fast-proxy-promo.mp4
diff --git a/docs/assets/dashboard.png b/docs/assets/dashboard.png
diff --git a/docs/assets/hero.png b/docs/assets/hero.png
diff --git a/pyproject.toml b/pyproject.toml
@@ -5,7 +5,7 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "codex-fast-proxy"
 version = "0.1.0"
-description = "Codex App proxy for third-party OpenAI-compatible Responses API providers with Fast/Priority controls."
+description = "Codex App auth-split proxy for signing in with ChatGPT while using third-party OpenAI-compatible APIs."
 readme = "README.md"
 requires-python = ">=3.11"
 dependencies = []
@@ -21,6 +21,8 @@ keywords = [
     "fast-mode",
     "service-tier",
     "openai-compatible-api",
+    "chatgpt-login",
+    "auth-split",
 ]
 classifiers = [
     "License :: OSI Approved :: MIT License",
diff --git a/skills/codex-fast-proxy/SKILL.md b/skills/codex-fast-proxy/SKILL.md
@@ -1,9 +1,9 @@
 ---
 name: codex-fast-proxy
-description: Codex App Fast proxy for third-party OpenAI-compatible APIs. Preserves UI Fast toggles, benchmarks, changes upstream, supports ChatGPT-login upstream auth split, and optionally injects priority service_tier.
+description: Codex App Fast proxy and auth-split for third-party OpenAI-compatible APIs. Supports Sign in with ChatGPT, priority service_tier, Responses API benchmark, enable/check/update/uninstall.
 ---
 
-Use this skill when the user wants Codex to manage the local Fast proxy for Codex App.
+Use this skill when the user wants Codex to manage the local auth-split and Fast proxy for Codex App.
 
 ## Trigger patterns
 
@@ -207,7 +207,8 @@ For upstream URL changes after enable, prefer `set-upstream --upstream-base <url
 - Do not print API keys, `auth.json`, request bodies, prompts, or Codex history.
 - For benchmark results, report profile, medians, observed speedup, `priority_accepted`,
   `observed_priority_effective`, provider-confirmed priority metadata when present, sample counts,
-  and errors. Prioritize full-response total latency and first-output latency over first-event/TTFB.
+  `service_tier_control.valid`, and errors. Prioritize full-response total latency and first-output
+  latency over first-event/TTFB.
   Treat `priority_accepted=true` as proof that the wire parameter is accepted, and
   `observed_priority_effective=true` as proof that this measured workload benefited. Report
   `benchmark_mode` and do not present Codex CLI/app-server benchmark results as an App-specific
diff --git a/src/codex_fast_proxy/benchmark.py b/src/codex_fast_proxy/benchmark.py
@@ -340,6 +340,9 @@ def default_connection_factory(scheme: str, host: str, port: int | None, timeout
     "upgrade",
 }
 
+ABSENT_SERVICE_TIER = "<absent>"
+UNKNOWN_SERVICE_TIER = "<unknown>"
+
 
 def forward_path(upstream_base: str, incoming_path: str) -> str:
     parsed = urlsplit(upstream_base)
@@ -656,6 +659,7 @@ def run_sample(
     return {
         "tier": label,
         "status": response.status,
+        "request_service_tier": target.service_tier if label == "priority" else None,
         "ttfb_ms": first_event_ms,
         "first_event_ms": first_event_ms,
         "first_output_ms": first_output_ms,
@@ -696,6 +700,7 @@ def summarize_samples(samples: list[dict[str, Any]], label: str) -> dict[str, An
     output_char_values = [
         float(sample["output_chars"]) for sample in ok_samples if sample.get("output_chars") is not None
     ]
+    request_tiers = sorted({request_service_tier_label(sample) for sample in matching})
     tiers = sorted({sample["response_service_tier"] for sample in ok_samples if sample.get("response_service_tier")})
     return {
         "count": len(matching),
@@ -704,10 +709,44 @@ def summarize_samples(samples: list[dict[str, Any]], label: str) -> dict[str, An
         "median_ttfb_ms": median(ttfb_values),
         "median_first_output_ms": median(first_output_values),
         "median_output_chars": median(output_char_values),
+        "request_service_tiers": request_tiers,
         "response_service_tiers": tiers,
     }
 
 
+def request_service_tier_label(sample: dict[str, Any]) -> str:
+    if "request_service_tier" not in sample:
+        return UNKNOWN_SERVICE_TIER
+    value = sample.get("request_service_tier")
+    if value is None:
+        return ABSENT_SERVICE_TIER
+    if isinstance(value, str) and value:
+        return value
+    return str(value)
+
+
+def service_tier_control(
+    target: BenchmarkTarget,
+    default_summary: dict[str, Any],
+    priority_summary: dict[str, Any],
+) -> dict[str, Any]:
+    default_tiers = default_summary.get("request_service_tiers", [])
+    priority_tiers = priority_summary.get("request_service_tiers", [])
+    valid = default_tiers == [ABSENT_SERVICE_TIER] and priority_tiers == [target.service_tier]
+    message = (
+        "Default samples omitted service_tier and priority samples sent the expected value."
+        if valid
+        else "Benchmark request tiers did not match the expected default-vs-priority split."
+    )
+    return {
+        "valid": valid,
+        "default_request_service_tiers": default_tiers,
+        "priority_request_service_tiers": priority_tiers,
+        "expected_priority_service_tier": target.service_tier,
+        "message": message,
+    }
+
+
 def speedup(default_ms: float | None, priority_ms: float | None) -> float | None:
     if not default_ms or not priority_ms:
         return None
@@ -849,6 +888,7 @@ def summarize_benchmark_result(
     profile = profile_for_name(target.profile)
     default_summary = summarize_samples(samples, "default")
     priority_summary = summarize_samples(samples, "priority")
+    tier_control = service_tier_control(target, default_summary, priority_summary)
     total_speedup = speedup(default_summary["median_total_ms"], priority_summary["median_total_ms"])
     ttfb_speedup = speedup(default_summary["median_ttfb_ms"], priority_summary["median_ttfb_ms"])
     first_output_speedup = speedup(default_summary["median_first_output_ms"], priority_summary["median_first_output_ms"])
@@ -868,6 +908,7 @@ def summarize_benchmark_result(
         "samples": samples,
         "default": default_summary,
         "priority": priority_summary,
+        "service_tier_control": tier_control,
         "observed_speedup_total": total_speedup,
         "observed_speedup_ttfb": ttfb_speedup,
         "observed_speedup_first_output": first_output_speedup,
diff --git a/tests/test_benchmark.py b/tests/test_benchmark.py
@@ -184,14 +184,40 @@ def test_run_benchmark_keeps_only_redacted_metrics(self) -> None:
         self.assertEqual(result["priority"]["ok"], 1)
         self.assertEqual(result["priority"]["median_first_output_ms"], 6000.0)
         self.assertEqual(result["priority"]["median_output_chars"], 11.0)
+        self.assertEqual(result["default"]["request_service_tiers"], ["<absent>"])
+        self.assertEqual(result["priority"]["request_service_tiers"], ["priority"])
         self.assertEqual(result["priority"]["response_service_tiers"], ["priority"])
+        self.assertTrue(result["service_tier_control"]["valid"])
         self.assertTrue(result["priority_accepted"])
         self.assertFalse(result["observed_priority_effective"])
         self.assertTrue(result["provider_confirmed_priority"])
         self.assertNotIn("secret", json.dumps(result))
         self.assertNotIn("Codex App Responses API proxy", json.dumps(result))
         self.assertNotIn("review text", json.dumps(result))
 
+    def test_benchmark_detects_invalid_service_tier_control(self) -> None:
+        target = BenchmarkTarget(
+            provider="acme",
+            upstream_base="https://api.example.test/v1",
+            model="gpt-test",
+            profile="full",
+            service_tier="priority",
+            api_key_source="auth.json:OPENAI_API_KEY",
+            api_key="secret",
+        )
+        result = benchmark.summarize_benchmark_result(
+            target,
+            pairs=1,
+            samples=[
+                {"tier": "default", "status": 200, "request_service_tier": "priority"},
+                {"tier": "priority", "status": 200, "request_service_tier": "priority"},
+            ],
+            mode="direct",
+        )
+
+        self.assertFalse(result["service_tier_control"]["valid"])
+        self.assertEqual(result["service_tier_control"]["default_request_service_tiers"], ["priority"])
+
 
 if __name__ == "__main__":
     unittest.main()