feat(sdk): allow Conversation.switch_profile to accept an inline LLM (#3018)

VascoSch92 · web-flow · commit 468d09896bb9 · 2026-04-30T16:14:12.000+02:00
diff --git a/openhands-agent-server/openhands/agent_server/conversation_router.py b/openhands-agent-server/openhands/agent_server/conversation_router.py
@@ -319,6 +319,28 @@ async def switch_conversation_profile(
     return Success()
 
 
+@conversation_router.post(
+    "/{conversation_id}/switch_llm",
+    responses={404: {"description": "Conversation not found"}},
+)
+async def switch_conversation_llm(
+    conversation_id: UUID,
+    llm: LLM = Body(..., embed=True),  # noqa: B008
+    conversation_service: ConversationService = Depends(get_conversation_service),
+) -> Success:
+    """Swap the conversation's LLM to a caller-supplied object.
+
+    Used by app-servers that own the LLM directly and don't push profiles
+    to the agent-server's filesystem (see #3017).
+    """
+    event_service = await conversation_service.get_event_service(conversation_id)
+    if event_service is None:
+        raise HTTPException(status.HTTP_404_NOT_FOUND)
+    conversation = event_service.get_conversation()
+    conversation.switch_llm(llm)
+    return Success()
+
+
 @conversation_router.patch(
     "/{conversation_id}", responses={404: {"description": "Item not found"}}
 )
diff --git a/openhands-sdk/openhands/sdk/conversation/impl/local_conversation.py b/openhands-sdk/openhands/sdk/conversation/impl/local_conversation.py
@@ -632,11 +632,33 @@ def _pin_prompt_cache_key(self) -> None:
         if self.agent.llm._prompt_cache_key is None:
             self.agent.llm._prompt_cache_key = str(self._state.id)
 
+    def switch_llm(self, llm: LLM) -> None:
+        """Swap the agent's LLM to the given object.
+
+        The caller owns ``llm.usage_id``; it is the registry key. If an
+        entry with that key already exists, the cached LLM is reused and
+        the passed ``llm`` is dropped — matching the rest of the
+        registry's "first-write-wins" contract.
+
+        Args:
+            llm: LLM to install on the agent.
+        """
+        try:
+            new_llm = self.llm_registry.get(llm.usage_id)
+        except KeyError:
+            new_llm = llm
+            self.llm_registry.add(new_llm)
+        with self._state:
+            self.agent = self.agent.model_copy(update={"llm": new_llm})
+            self._state.agent = self.agent
+            self._pin_prompt_cache_key()
+
     def switch_profile(self, profile_name: str) -> None:
-        """Switch the agent's LLM to a named profile.
+        """Switch the agent's LLM to a profile loaded from disk.
 
-        Loads the profile from the LLMProfileStore (cached in the registry
-        after the first load) and updates the agent and conversation state.
+        Loads the profile from :class:`LLMProfileStore` (cached in the
+        registry under ``profile:{profile_name}`` after first load) and
+        delegates the swap to :meth:`switch_llm`.
 
         Args:
             profile_name: Name of a profile previously saved via LLMProfileStore.
@@ -647,15 +669,11 @@ def switch_profile(self, profile_name: str) -> None:
         """
         usage_id = f"profile:{profile_name}"
         try:
-            new_llm = self.llm_registry.get(usage_id)
+            cached = self.llm_registry.get(usage_id)
         except KeyError:
-            new_llm = self._profile_store.load(profile_name)
-            new_llm = new_llm.model_copy(update={"usage_id": usage_id})
-            self.llm_registry.add(new_llm)
-        with self._state:
-            self.agent = self.agent.model_copy(update={"llm": new_llm})
-            self._state.agent = self.agent
-            self._pin_prompt_cache_key()
+            loaded = self._profile_store.load(profile_name)
+            cached = loaded.model_copy(update={"usage_id": usage_id})
+        self.switch_llm(cached)
 
     @observe(name="conversation.send_message")
     def send_message(self, message: str | Message, sender: str | None = None) -> None:
diff --git a/tests/agent_server/test_conversation_router.py b/tests/agent_server/test_conversation_router.py
@@ -1602,6 +1602,69 @@ def test_switch_conversation_profile_corrupted_profile(
         client.app.dependency_overrides.clear()
 
 
+def test_switch_conversation_llm_success(
+    client, mock_conversation_service, mock_event_service, sample_conversation_id
+):
+    """The /switch_llm endpoint forwards the inline LLM to switch_llm,
+    bypassing the profile store (#3017).
+    """
+    mock_conversation = MagicMock()
+    mock_conversation_service.get_event_service.return_value = mock_event_service
+    mock_event_service.get_conversation.return_value = mock_conversation
+
+    client.app.dependency_overrides[get_conversation_service] = (
+        lambda: mock_conversation_service
+    )
+
+    llm_payload = {
+        "model": "openai/gpt-4o",
+        "api_key": "sk-test",
+        "usage_id": "caller-supplied-id",
+    }
+
+    try:
+        response = client.post(
+            f"/api/conversations/{sample_conversation_id}/switch_llm",
+            json={"llm": llm_payload},
+        )
+
+        assert response.status_code == 200
+        mock_conversation.switch_llm.assert_called_once()
+        forwarded_llm = mock_conversation.switch_llm.call_args.args[0]
+        assert isinstance(forwarded_llm, LLM)
+        assert forwarded_llm.model == "openai/gpt-4o"
+        assert forwarded_llm.usage_id == "caller-supplied-id"
+    finally:
+        client.app.dependency_overrides.clear()
+
+
+def test_switch_conversation_llm_not_found(
+    client, mock_conversation_service, sample_conversation_id
+):
+    """The /switch_llm endpoint returns 404 when the conversation is missing."""
+    mock_conversation_service.get_event_service.return_value = None
+
+    client.app.dependency_overrides[get_conversation_service] = (
+        lambda: mock_conversation_service
+    )
+
+    try:
+        response = client.post(
+            f"/api/conversations/{sample_conversation_id}/switch_llm",
+            json={
+                "llm": {
+                    "model": "openai/gpt-4o",
+                    "api_key": "sk-test",
+                    "usage_id": "x",
+                }
+            },
+        )
+
+        assert response.status_code == 404
+    finally:
+        client.app.dependency_overrides.clear()
+
+
 def test_fork_conversation_success(
     client, mock_conversation_service, sample_conversation_info, sample_conversation_id
 ):
diff --git a/tests/sdk/conversation/test_switch_model.py b/tests/sdk/conversation/test_switch_model.py
@@ -117,3 +117,98 @@ def test_switch_then_send_message(profile_store):
     # send_message triggers _ensure_agent_ready which re-registers agent LLMs;
     # the switched LLM must not cause a duplicate registration error.
     conv.send_message("hello")
+
+
+@pytest.fixture()
+def empty_profile_store(tmp_path, monkeypatch):
+    """Empty profile dir — simulates the agent-server sandbox where the
+    app-server has never uploaded profile JSON. This is the real failure
+    mode #3017 is fixing.
+    """
+    profile_dir = tmp_path / "profiles"
+    profile_dir.mkdir()
+    monkeypatch.setattr(llm_profile_store, "_DEFAULT_PROFILE_DIR", profile_dir)
+    return profile_dir
+
+
+def test_switch_llm_swaps_when_store_empty(empty_profile_store):
+    """Real app-server case (#3017): profile is unknown to the sandbox FS,
+    the app-server supplies the LLM directly, and the swap succeeds.
+    """
+    conv = _make_conversation()
+    inline = _make_llm("inline-model", "caller-supplied-id")
+
+    conv.switch_llm(inline)
+
+    assert conv.agent.llm.model == "inline-model"
+    # State must agree — agent_server reads agent.llm via _state.
+    assert conv.state.agent.llm.model == "inline-model"
+    # Caller's usage_id is preserved as the registry key.
+    assert conv.agent.llm.usage_id == "caller-supplied-id"
+    assert conv.llm_registry.get("caller-supplied-id").model == "inline-model"
+    # Cache-key must be repinned (regression guard for #2918 on the new path).
+    assert conv.agent.llm._prompt_cache_key == str(conv.id)
+
+
+def test_switch_llm_then_send_message(empty_profile_store):
+    """send_message triggers _ensure_agent_ready, which re-registers agent
+    LLMs in the registry. switch_llm adds an entry under the caller's
+    usage_id; this must not collide with the agent's own LLM
+    re-registration on the next send_message().
+    """
+    conv = _make_conversation()
+    conv.switch_llm(_make_llm("inline-model", "x"))
+    conv.send_message("hello")
+
+
+def test_switch_between_two_llms(empty_profile_store):
+    """Consecutive switch_llm calls under distinct usage_ids each register
+    their own slot and end up as the agent's LLM.
+    """
+    conv = _make_conversation()
+
+    conv.switch_llm(_make_llm("model-a", "x"))
+    assert conv.agent.llm.model == "model-a"
+
+    conv.switch_llm(_make_llm("model-b", "y"))
+    assert conv.agent.llm.model == "model-b"
+
+
+def test_switch_llm_does_not_consult_store(empty_profile_store, monkeypatch):
+    """switch_llm must not hit LLMProfileStore.load — the caller is
+    authoritative. Guards against a regression where the inline path
+    silently falls through to disk IO.
+    """
+    calls: list[str] = []
+
+    def _spy_load(self, name):
+        calls.append(name)
+        raise FileNotFoundError(name)
+
+    monkeypatch.setattr(LLMProfileStore, "load", _spy_load)
+
+    conv = _make_conversation()
+    conv.switch_llm(_make_llm("inline-model", "x"))
+
+    assert calls == [], f"profile store was consulted: {calls}"
+
+
+def test_switch_profile_delegates_to_switch_llm(profile_store, monkeypatch):
+    """switch_profile loads from disk and delegates to switch_llm; the LLM
+    handed off carries the canonical ``profile:{name}`` usage_id.
+    """
+    conv = _make_conversation()
+    seen: list[LLM] = []
+    real_switch_llm = conv.switch_llm
+
+    def _spy(llm):
+        seen.append(llm)
+        real_switch_llm(llm)
+
+    monkeypatch.setattr(conv, "switch_llm", _spy)
+
+    conv.switch_profile("fast")
+
+    assert len(seen) == 1
+    assert seen[0].usage_id == "profile:fast"
+    assert seen[0].model == "fast-model"