@@ -55,104 +55,52 @@ def _strip_provider_prefix(model: str) -> str:
5555_ENDPOINT_MODEL_CACHE_TTL = 300
5656
5757# Descending tiers for context length probing when the model is unknown.
58- # We start high and step down on context-length errors until one works.
58+ # We start at 128K (a safe default for most modern models) and step down
59+ # on context-length errors until one works.
5960CONTEXT_PROBE_TIERS = [
60- 2_000_000 ,
61- 1_000_000 ,
62- 512_000 ,
63- 200_000 ,
6461 128_000 ,
6562 64_000 ,
6663 32_000 ,
64+ 16_000 ,
65+ 8_000 ,
6766]
6867
68+ # Default context length when no detection method succeeds.
69+ DEFAULT_FALLBACK_CONTEXT = CONTEXT_PROBE_TIERS [0 ]
70+
71+ # Thin fallback defaults — only broad model family patterns.
72+ # These fire only when provider is unknown AND models.dev/OpenRouter/Anthropic
73+ # all miss. Replaced the previous 80+ entry dict.
74+ # For provider-specific context lengths, models.dev is the primary source.
6975DEFAULT_CONTEXT_LENGTHS = {
70- "anthropic/claude-opus-4" : 200000 ,
71- "anthropic/claude-opus-4.5" : 200000 ,
72- "anthropic/claude-opus-4.6" : 1000000 ,
73- "anthropic/claude-sonnet-4" : 200000 ,
74- "anthropic/claude-sonnet-4-20250514" : 200000 ,
75- "anthropic/claude-sonnet-4.5" : 200000 ,
76- "anthropic/claude-sonnet-4.6" : 1000000 ,
77- "anthropic/claude-haiku-4.5" : 200000 ,
78- # Bare Anthropic model IDs (for native API provider)
76+ # Anthropic Claude 4.6 (1M context) — bare IDs only to avoid
77+ # fuzzy-match collisions (e.g. "anthropic/claude-sonnet-4" is a
78+ # substring of "anthropic/claude-sonnet-4.6").
79+ # OpenRouter-prefixed models resolve via OpenRouter live API or models.dev.
7980 "claude-opus-4-6" : 1000000 ,
8081 "claude-sonnet-4-6" : 1000000 ,
81- "claude-opus-4-5-20251101" : 200000 ,
82- "claude-sonnet-4-5-20250929" : 200000 ,
83- "claude-opus-4-1-20250805" : 200000 ,
84- "claude-opus-4-20250514" : 200000 ,
85- "claude-sonnet-4-20250514" : 200000 ,
86- "claude-haiku-4-5-20251001" : 200000 ,
87- "openai/gpt-5" : 128000 ,
88- "openai/gpt-4.1" : 1047576 ,
89- "openai/gpt-4.1-mini" : 1047576 ,
90- "openai/gpt-4o" : 128000 ,
91- "openai/gpt-4-turbo" : 128000 ,
92- "openai/gpt-4o-mini" : 128000 ,
93- "google/gemini-3-pro-preview" : 1048576 ,
94- "google/gemini-3-flash" : 1048576 ,
95- "google/gemini-2.5-flash" : 1048576 ,
96- "google/gemini-2.0-flash" : 1048576 ,
97- "google/gemini-2.5-pro" : 1048576 ,
98- "deepseek/deepseek-v3.2" : 65536 ,
99- "meta-llama/llama-3.3-70b-instruct" : 131072 ,
100- "deepseek/deepseek-chat-v3" : 65536 ,
101- "qwen/qwen-2.5-72b-instruct" : 32768 ,
102- "glm-4.7" : 202752 ,
103- "glm-5" : 202752 ,
104- "glm-4.5" : 131072 ,
105- "glm-4.5-flash" : 131072 ,
106- "kimi-for-coding" : 262144 ,
107- "kimi-k2.5" : 262144 ,
108- "kimi-k2-thinking" : 262144 ,
109- "kimi-k2-thinking-turbo" : 262144 ,
110- "kimi-k2-turbo-preview" : 262144 ,
111- "kimi-k2-0905-preview" : 131072 ,
112- "MiniMax-M2.7" : 204800 ,
113- "MiniMax-M2.7-highspeed" : 204800 ,
114- "MiniMax-M2.5" : 204800 ,
115- "MiniMax-M2.5-highspeed" : 204800 ,
116- "MiniMax-M2.1" : 204800 ,
117- # OpenCode Zen models
118- "gpt-5.4-pro" : 128000 ,
119- "gpt-5.4" : 128000 ,
120- "gpt-5.3-codex" : 128000 ,
121- "gpt-5.3-codex-spark" : 128000 ,
122- "gpt-5.2" : 128000 ,
123- "gpt-5.2-codex" : 128000 ,
124- "gpt-5.1" : 128000 ,
125- "gpt-5.1-codex" : 128000 ,
126- "gpt-5.1-codex-max" : 128000 ,
127- "gpt-5.1-codex-mini" : 128000 ,
82+ "claude-opus-4.6" : 1000000 ,
83+ "claude-sonnet-4.6" : 1000000 ,
84+ # Catch-all for older Claude models (must sort after specific entries)
85+ "claude" : 200000 ,
86+ # OpenAI
87+ "gpt-4.1" : 1047576 ,
12888 "gpt-5" : 128000 ,
129- "gpt-5-codex" : 128000 ,
130- "gpt-5-nano" : 128000 ,
131- # Bare model IDs without provider prefix (avoid duplicates with entries above)
132- "claude-opus-4-5" : 200000 ,
133- "claude-opus-4-1" : 200000 ,
134- "claude-sonnet-4-5" : 200000 ,
135- "claude-sonnet-4" : 200000 ,
136- "claude-haiku-4-5" : 200000 ,
137- "claude-3-5-haiku" : 200000 ,
138- "gemini-3.1-pro" : 1048576 ,
139- "gemini-3-pro" : 1048576 ,
140- "gemini-3-flash" : 1048576 ,
141- "minimax-m2.5" : 204800 ,
142- "minimax-m2.5-free" : 204800 ,
143- "minimax-m2.1" : 204800 ,
144- "glm-4.6" : 202752 ,
145- "kimi-k2" : 262144 ,
146- "qwen3-coder" : 32768 ,
147- "big-pickle" : 128000 ,
148- # Alibaba Cloud / DashScope Qwen models
149- "qwen3.5-plus" : 131072 ,
150- "qwen3-max" : 131072 ,
151- "qwen3-coder-plus" : 131072 ,
152- "qwen3-coder-next" : 131072 ,
153- "qwen-plus-latest" : 131072 ,
154- "qwen3.5-flash" : 131072 ,
155- "qwen-vl-max" : 32768 ,
89+ "gpt-4" : 128000 ,
90+ # Google
91+ "gemini" : 1048576 ,
92+ # DeepSeek
93+ "deepseek" : 128000 ,
94+ # Meta
95+ "llama" : 131072 ,
96+ # Qwen
97+ "qwen" : 131072 ,
98+ # MiniMax
99+ "minimax" : 204800 ,
100+ # GLM
101+ "glm" : 202752 ,
102+ # Kimi
103+ "kimi" : 262144 ,
156104}
157105
158106_CONTEXT_LENGTH_KEYS = (
@@ -693,22 +641,100 @@ def _query_local_context_length(model: str, base_url: str) -> Optional[int]:
693641 return None
694642
695643
644+ def _normalize_model_version (model : str ) -> str :
645+ """Normalize version separators for matching.
646+
647+ Nous uses dashes: claude-opus-4-6, claude-sonnet-4-5
648+ OpenRouter uses dots: claude-opus-4.6, claude-sonnet-4.5
649+ Normalize both to dashes for comparison.
650+ """
651+ return model .replace ("." , "-" )
652+
653+
654+ def _query_anthropic_context_length (model : str , base_url : str , api_key : str ) -> Optional [int ]:
655+ """Query Anthropic's /v1/models endpoint for context length.
656+
657+ Only works with regular ANTHROPIC_API_KEY (sk-ant-api*).
658+ OAuth tokens (sk-ant-oat*) from Claude Code return 401.
659+ """
660+ if not api_key or api_key .startswith ("sk-ant-oat" ):
661+ return None # OAuth tokens can't access /v1/models
662+ try :
663+ base = base_url .rstrip ("/" )
664+ if base .endswith ("/v1" ):
665+ base = base [:- 3 ]
666+ url = f"{ base } /v1/models?limit=1000"
667+ headers = {
668+ "x-api-key" : api_key ,
669+ "anthropic-version" : "2023-06-01" ,
670+ }
671+ resp = requests .get (url , headers = headers , timeout = 10 )
672+ if resp .status_code != 200 :
673+ return None
674+ data = resp .json ()
675+ for m in data .get ("data" , []):
676+ if m .get ("id" ) == model :
677+ ctx = m .get ("max_input_tokens" )
678+ if isinstance (ctx , int ) and ctx > 0 :
679+ return ctx
680+ except Exception as e :
681+ logger .debug ("Anthropic /v1/models query failed: %s" , e )
682+ return None
683+
684+
685+ def _resolve_nous_context_length (model : str ) -> Optional [int ]:
686+ """Resolve Nous Portal model context length via OpenRouter metadata.
687+
688+ Nous model IDs are bare (e.g. 'claude-opus-4-6') while OpenRouter uses
689+ prefixed IDs (e.g. 'anthropic/claude-opus-4.6'). Try suffix matching
690+ with version normalization (dot↔dash).
691+ """
692+ metadata = fetch_model_metadata () # OpenRouter cache
693+ # Exact match first
694+ if model in metadata :
695+ return metadata [model ].get ("context_length" )
696+
697+ normalized = _normalize_model_version (model ).lower ()
698+
699+ for or_id , entry in metadata .items ():
700+ bare = or_id .split ("/" , 1 )[1 ] if "/" in or_id else or_id
701+ if bare .lower () == model .lower () or _normalize_model_version (bare ).lower () == normalized :
702+ return entry .get ("context_length" )
703+
704+ # Partial prefix match for cases like gemini-3-flash → gemini-3-flash-preview
705+ # Require match to be at a word boundary (followed by -, :, or end of string)
706+ model_lower = model .lower ()
707+ for or_id , entry in metadata .items ():
708+ bare = or_id .split ("/" , 1 )[1 ] if "/" in or_id else or_id
709+ for candidate , query in [(bare .lower (), model_lower ), (_normalize_model_version (bare ).lower (), normalized )]:
710+ if candidate .startswith (query ) and (
711+ len (candidate ) == len (query ) or candidate [len (query )] in "-:."
712+ ):
713+ return entry .get ("context_length" )
714+
715+ return None
716+
717+
696718def get_model_context_length (
697719 model : str ,
698720 base_url : str = "" ,
699721 api_key : str = "" ,
700722 config_context_length : int | None = None ,
723+ provider : str = "" ,
701724) -> int :
702725 """Get the context length for a model.
703726
704727 Resolution order:
705- 0. Explicit config override (model.context_length in config.yaml )
728+ 0. Explicit config override (model.context_length or custom_providers per-model )
706729 1. Persistent cache (previously discovered via probing)
707730 2. Active endpoint metadata (/models for explicit custom endpoints)
708- 3. Local server query (for local endpoints when model not in /models list)
709- 4. OpenRouter API metadata
710- 5. Hardcoded DEFAULT_CONTEXT_LENGTHS (fuzzy match for hosted routes only)
711- 6. First probe tier (2M) — will be narrowed on first context error
731+ 3. Local server query (for local endpoints)
732+ 4. Anthropic /v1/models API (API-key users only, not OAuth)
733+ 5. OpenRouter live API metadata
734+ 6. Nous suffix-match via OpenRouter cache
735+ 7. models.dev registry lookup (provider-aware)
736+ 8. Thin hardcoded defaults (broad family patterns)
737+ 9. Default fallback (128K)
712738 """
713739 # 0. Explicit config override — user knows best
714740 if config_context_length is not None and isinstance (config_context_length , int ) and config_context_length > 0 :
@@ -744,9 +770,7 @@ def get_model_context_length(
744770 if isinstance (context_length , int ):
745771 return context_length
746772 if not _is_known_provider_base_url (base_url ):
747- # Explicit third-party endpoints should not borrow fuzzy global
748- # defaults from unrelated providers with similarly named models.
749- # But first try querying the local server directly.
773+ # 3. Try querying local server directly
750774 if is_local_endpoint (base_url ):
751775 local_ctx = _query_local_context_length (model , base_url )
752776 if local_ctx and local_ctx > 0 :
@@ -756,31 +780,53 @@ def get_model_context_length(
756780 "Could not detect context length for model %r at %s — "
757781 "defaulting to %s tokens (probe-down). Set model.context_length "
758782 "in config.yaml to override." ,
759- model , base_url , f"{ CONTEXT_PROBE_TIERS [ 0 ] :,} " ,
783+ model , base_url , f"{ DEFAULT_FALLBACK_CONTEXT :,} " ,
760784 )
761- return CONTEXT_PROBE_TIERS [ 0 ]
785+ return DEFAULT_FALLBACK_CONTEXT
762786
763- # 3. OpenRouter API metadata
787+ # 4. Anthropic /v1/models API (only for regular API keys, not OAuth)
788+ if provider == "anthropic" or (
789+ base_url and "api.anthropic.com" in base_url
790+ ):
791+ ctx = _query_anthropic_context_length (model , base_url or "https://api.anthropic.com" , api_key )
792+ if ctx :
793+ return ctx
794+
795+ # 5. Provider-aware lookups (before generic OpenRouter cache)
796+ # These are provider-specific and take priority over the generic OR cache,
797+ # since the same model can have different context limits per provider
798+ # (e.g. claude-opus-4.6 is 1M on Anthropic but 128K on GitHub Copilot).
799+ if provider == "nous" :
800+ ctx = _resolve_nous_context_length (model )
801+ if ctx :
802+ return ctx
803+ elif provider :
804+ from agent .models_dev import lookup_models_dev_context
805+ ctx = lookup_models_dev_context (provider , model )
806+ if ctx :
807+ return ctx
808+
809+ # 6. OpenRouter live API metadata (provider-unaware fallback)
764810 metadata = fetch_model_metadata ()
765811 if model in metadata :
766812 return metadata [model ].get ("context_length" , 128000 )
767813
768- # 4 . Hardcoded defaults (fuzzy match — longest key first for specificity)
814+ # 8 . Hardcoded defaults (fuzzy match — longest key first for specificity)
769815 for default_model , length in sorted (
770816 DEFAULT_CONTEXT_LENGTHS .items (), key = lambda x : len (x [0 ]), reverse = True
771817 ):
772818 if default_model in model or model in default_model :
773819 return length
774820
775- # 5 . Query local server for unknown models before defaulting to 2M
821+ # 9 . Query local server as last resort
776822 if base_url and is_local_endpoint (base_url ):
777823 local_ctx = _query_local_context_length (model , base_url )
778824 if local_ctx and local_ctx > 0 :
779825 save_context_length (model , base_url , local_ctx )
780826 return local_ctx
781827
782- # 6. Unknown model — start at highest probe tier
783- return CONTEXT_PROBE_TIERS [ 0 ]
828+ # 10. Default fallback — 128K
829+ return DEFAULT_FALLBACK_CONTEXT
784830
785831
786832def estimate_tokens_rough (text : str ) -> int :
0 commit comments