|
140 | 140 | # upstream model 500s, and transient infrastructure errors. |
141 | 141 | _RETRIABLE_SERVER_ERROR_CODES: frozenset[int] = frozenset({-32603}) |
142 | 142 |
|
| 143 | +# -32601 = "Method not found" (JSON-RPC spec). An ACP server raises this when a |
| 144 | +# model-selection call uses the mechanism it does *not* implement — e.g. |
| 145 | +# `session/set_model` on a CLI that moved model selection to `configOptions` |
| 146 | +# (codex-acp 0.16+, claude-agent-acp 0.46+), or vice versa. We use it to fall |
| 147 | +# back to the other mechanism if response-detection picked the wrong one. |
| 148 | +_METHOD_NOT_FOUND_CODE: Final[int] = -32601 |
| 149 | + |
143 | 150 |
|
144 | 151 | # Maximum characters for ACP tool call content — matches MAX_CMD_OUTPUT_SIZE |
145 | 152 | # used by the terminal tool and the default max_message_chars in LLM config. |
@@ -437,6 +444,48 @@ def _apply_acp_model( |
437 | 444 | return conn.set_session_model(model_id=model, session_id=session_id) |
438 | 445 |
|
439 | 446 |
|
| 447 | +def _is_method_not_found(exc: ACPRequestError) -> bool: |
| 448 | + """Whether ``exc`` is a JSON-RPC "method not found" — i.e. the server does |
| 449 | + not implement the model-selection call we used.""" |
| 450 | + return exc.code == _METHOD_NOT_FOUND_CODE |
| 451 | + |
| 452 | + |
| 453 | +async def _apply_acp_model_with_fallback( |
| 454 | + conn: ClientSideConnection, |
| 455 | + session_id: str, |
| 456 | + model: str, |
| 457 | + *, |
| 458 | + via_config_option: bool, |
| 459 | +) -> bool: |
| 460 | + """Apply ``model`` via the detected mechanism, falling back to the other if |
| 461 | + the server reports the method missing. |
| 462 | +
|
| 463 | + Response-detection (``_session_selects_model_via_config_option``) is correct |
| 464 | + for every CLI we've validated, but it reads an UNSTABLE capability and the |
| 465 | + response shape can vary by build/auth state. If the chosen call raises |
| 466 | + ``-32601 "Method not found"``, the server simply uses the *other* mechanism, |
| 467 | + so we retry with it instead of crashing session init. Returns the |
| 468 | + ``via_config_option`` value that actually applied the model. |
| 469 | + """ |
| 470 | + try: |
| 471 | + await _apply_acp_model( |
| 472 | + conn, session_id, model, via_config_option=via_config_option |
| 473 | + ) |
| 474 | + return via_config_option |
| 475 | + except ACPRequestError as exc: |
| 476 | + if not _is_method_not_found(exc): |
| 477 | + raise |
| 478 | + logger.info( |
| 479 | + "ACP model-apply via %s rejected as method-not-found; retrying via %s", |
| 480 | + "set_config_option" if via_config_option else "set_session_model", |
| 481 | + "set_session_model" if via_config_option else "set_config_option", |
| 482 | + ) |
| 483 | + await _apply_acp_model( |
| 484 | + conn, session_id, model, via_config_option=not via_config_option |
| 485 | + ) |
| 486 | + return not via_config_option |
| 487 | + |
| 488 | + |
440 | 489 | def _extract_session_models( |
441 | 490 | response: Any, |
442 | 491 | ) -> tuple[str | None, list[ACPModelInfo] | None]: |
@@ -654,7 +703,7 @@ async def _maybe_set_session_model( |
654 | 703 | return False |
655 | 704 | provider = detect_acp_provider_by_agent_name(agent_name) |
656 | 705 | if provider is not None and provider.supports_set_session_model: |
657 | | - await _apply_acp_model( |
| 706 | + await _apply_acp_model_with_fallback( |
658 | 707 | conn, session_id, acp_model, via_config_option=via_config_option |
659 | 708 | ) |
660 | 709 | return True |
@@ -727,7 +776,9 @@ async def _reapply_session_model_on_resume( |
727 | 776 | if provider is not None: |
728 | 777 | # Known provider: apply via the mechanism the resumed session uses |
729 | 778 | # (set_config_option for codex-acp 0.16+/claude-agent-acp 0.46+, |
730 | | - # else set_session_model). |
| 779 | + # else set_session_model). A rejection is already tolerated below |
| 780 | + # (the session keeps the server default), so resume doesn't need the |
| 781 | + # cross-mechanism fallback the init/switch paths use. |
731 | 782 | await _apply_acp_model( |
732 | 783 | conn, session_id, acp_model, via_config_option=via_config_option |
733 | 784 | ) |
@@ -3643,25 +3694,48 @@ def set_acp_model(self, model: str) -> None: |
3643 | 3694 | timeout=self.acp_prompt_timeout, |
3644 | 3695 | ) |
3645 | 3696 | except ACPRequestError as e: |
3646 | | - # Server-internal failures (JSON-RPC -32603) are not the caller's |
3647 | | - # fault, and the prompt path already treats them as retriable. Let |
3648 | | - # them propagate (-> 5xx) instead of mislabeling them as a 400 |
3649 | | - # client error. |
3650 | | - if e.code in _RETRIABLE_SERVER_ERROR_CODES: |
3651 | | - raise |
3652 | | - # acp.exceptions.RequestError derives from Exception (not |
3653 | | - # RuntimeError); surface a true client/protocol rejection (e.g. |
3654 | | - # method-not-found, invalid model id) as a ValueError so callers — |
3655 | | - # and the agent-server route — treat it as a 400-class client error |
3656 | | - # rather than an opaque 500. |
3657 | | - method = ( |
3658 | | - "set_config_option(model)" |
3659 | | - if self._model_via_config_option |
3660 | | - else "set_session_model" |
3661 | | - ) |
3662 | | - raise ValueError( |
3663 | | - f"ACP server rejected {method}(model={model!r}): {e}" |
3664 | | - ) from e |
| 3697 | + pending_error: ACPRequestError | None = e |
| 3698 | + if _is_method_not_found(e): |
| 3699 | + # The session uses the other model-selection mechanism (or the |
| 3700 | + # init-time detection picked the wrong one): retry once with it, |
| 3701 | + # and remember the working mechanism for later switches. If the |
| 3702 | + # retry also fails, fall through to the error translation below. |
| 3703 | + flipped = not self._model_via_config_option |
| 3704 | + try: |
| 3705 | + self._executor.run_async( |
| 3706 | + _apply_acp_model( |
| 3707 | + self._conn, |
| 3708 | + self._session_id, |
| 3709 | + model, |
| 3710 | + via_config_option=flipped, |
| 3711 | + ), |
| 3712 | + timeout=self.acp_prompt_timeout, |
| 3713 | + ) |
| 3714 | + except ACPRequestError as e2: |
| 3715 | + pending_error = e2 |
| 3716 | + else: |
| 3717 | + self._model_via_config_option = flipped |
| 3718 | + pending_error = None # both selections applied |
| 3719 | + if pending_error is not None: |
| 3720 | + # Server-internal failures (JSON-RPC -32603) are not the caller's |
| 3721 | + # fault, and the prompt path already treats them as retriable. |
| 3722 | + # Let them propagate (-> 5xx) instead of mislabeling them as a |
| 3723 | + # 400 client error. |
| 3724 | + if pending_error.code in _RETRIABLE_SERVER_ERROR_CODES: |
| 3725 | + raise pending_error |
| 3726 | + # acp.exceptions.RequestError derives from Exception (not |
| 3727 | + # RuntimeError); surface a true client/protocol rejection (e.g. |
| 3728 | + # method-not-found on both mechanisms, invalid model id) as a |
| 3729 | + # ValueError so callers — and the agent-server route — treat it |
| 3730 | + # as a 400-class client error rather than an opaque 500. |
| 3731 | + method = ( |
| 3732 | + "set_config_option(model)" |
| 3733 | + if self._model_via_config_option |
| 3734 | + else "set_session_model" |
| 3735 | + ) |
| 3736 | + raise ValueError( |
| 3737 | + f"ACP server rejected {method}(model={model!r}): {pending_error}" |
| 3738 | + ) from pending_error |
3665 | 3739 | # Reflect the live model on the sentinel LLM + metrics so cost/token |
3666 | 3740 | # accounting and serialized state show the model actually in use |
3667 | 3741 | # (mirrors model_post_init). The ``acp_model`` field is frozen, so the |
|
0 commit comments