Add context loading for ChatModel (mlflow#19250)

BenWilson2 · web-flow · commit ef548106e771 · 2025-12-08T17:32:42.000Z
Signed-off-by: Ben Wilson &lt;benjamin.wilson@databricks.com&gt;
diff --git a/mlflow/pyfunc/__init__.py b/mlflow/pyfunc/__init__.py
@@ -3172,7 +3172,7 @@ def predict(self, context, model_input: List[str], params=None) -> List[str]:
             )
     elif isinstance(python_model, ChatAgent):
         input_example = _save_model_chat_agent_helper(
-            python_model, mlflow_model, signature, input_example
+            python_model, mlflow_model, signature, input_example, artifacts, model_config
         )
     elif IS_RESPONSES_AGENT_AVAILABLE and isinstance(python_model, ResponsesAgent):
         input_example = _save_model_responses_agent_helper(
@@ -3754,7 +3754,9 @@ def _save_model_with_loader_module_and_data_path(
     return mlflow_model
 
 
-def _save_model_chat_agent_helper(python_model, mlflow_model, signature, input_example):
+def _save_model_chat_agent_helper(
+    python_model, mlflow_model, signature, input_example, artifacts, model_config
+):
     """Helper method for save_model for ChatAgent models
 
     Returns: a dict input_example
@@ -3792,6 +3794,8 @@ def _save_model_chat_agent_helper(python_model, mlflow_model, signature, input_e
         input_example = CHAT_AGENT_INPUT_EXAMPLE
 
     _logger.info("Predicting on input example to validate output")
+    context = PythonModelContext(artifacts, model_config)
+    python_model.load_context(context)
     request = ChatAgentRequest(**input_example)
     output = python_model.predict(request.messages, request.context, request.custom_inputs)
     try:
diff --git a/tests/pyfunc/test_chat_agent.py b/tests/pyfunc/test_chat_agent.py
@@ -430,3 +430,41 @@ def test_chat_agent_predict_with_params(tmp_path):
     responses = list(loaded_model.predict_stream(CHAT_AGENT_INPUT_EXAMPLE, params=None))
     for i, resp in enumerate(responses[:-1]):
         assert resp["delta"]["content"] == f"message {i}"
+
+
+def test_chat_agent_load_context_called_during_save(tmp_path):
+    class ChatAgentWithArtifacts(ChatAgent):
+        def __init__(self):
+            self.prefix = None
+
+        def load_context(self, context):
+            self.prefix = "loaded_prefix"
+
+        def predict(
+            self,
+            messages: list[ChatAgentMessage],
+            context: ChatContext,
+            custom_inputs: dict[str, Any],
+        ) -> ChatAgentResponse:
+            if self.prefix is None:
+                raise ValueError("load_context was not called - prefix is None")
+            return ChatAgentResponse(
+                messages=[
+                    {
+                        "role": "assistant",
+                        "content": f"{self.prefix}: {messages[0].content}",
+                        "id": str(uuid4()),
+                    }
+                ]
+            )
+
+    model = ChatAgentWithArtifacts()
+    save_path = tmp_path / "model"
+    mlflow.pyfunc.save_model(
+        python_model=model,
+        path=save_path,
+    )
+
+    loaded_model = mlflow.pyfunc.load_model(save_path)
+    response = loaded_model.predict({"messages": [{"role": "user", "content": "Hello!"}]})
+    assert response["messages"][0]["content"] == "loaded_prefix: Hello!"