NVIDIA-NeMo · nac7 · Jun 7, 2026 · Jun 8, 2026 · Jun 8, 2026 · Jun 6, 2026
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -9,6 +9,12 @@ This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.htm
 >
 > The changes related to the Colang language and runtime have moved to [CHANGELOG-Colang](./CHANGELOG-Colang.md) file.
 
+## [Unreleased]
+
+### 🐛 Bug Fixes
+
+- *(llmrails)* Normalize OpenAI multi-part content lists to plain strings before rail evaluation, fixing garbled self-check prompts and TypeError crash in `get_colang_history` ([#1741](https://github.com/NVIDIA-NeMo/Guardrails/issues/1741))
+
 ## [0.22.0] - 2026-05-22
 
 ### 🚀 Features

diff --git a/nemoguardrails/rails/llm/llmrails.py b/nemoguardrails/rails/llm/llmrails.py
@@ -100,6 +100,7 @@
 )
 from nemoguardrails.rails.llm.utils import (
     get_action_details_from_flow_id,
+    get_content_text,
     get_history_cache_key,
 )
 from nemoguardrails.streaming import END_OF_STREAM, StreamingHandler
@@ -765,10 +766,11 @@ def _get_events_for_messages(self, messages: List[dict], state: Any):
             for idx in range(p, len(messages)):
                 msg = messages[idx]
                 if msg["role"] == "user":
+                    user_text = get_content_text(msg["content"])
                     events.append(
                         {
                             "type": "UtteranceUserActionFinished",
-                            "final_transcript": msg["content"],
+                            "final_transcript": user_text,
                         }
                     )
 
@@ -777,7 +779,7 @@ def _get_events_for_messages(self, messages: List[dict], state: Any):
                         events.append(
                             {
                                 "type": "UserMessage",
-                                "text": msg["content"],
+                                "text": user_text,
                             }
                         )
 
@@ -812,7 +814,7 @@ def _get_events_for_messages(self, messages: List[dict], state: Any):
                         user_message = None
                         for prev_msg in reversed(messages[:idx]):
                             if prev_msg["role"] == "user":
-                                user_message = prev_msg["content"]
+                                user_message = get_content_text(prev_msg["content"])
                                 break
 
                         if user_message:
@@ -847,7 +849,7 @@ def _get_events_for_messages(self, messages: List[dict], state: Any):
                     events.append(
                         {
                             "type": "UtteranceUserActionFinished",
-                            "final_transcript": msg["content"],
+                            "final_transcript": get_content_text(msg["content"]),
                         }
                     )
 

diff --git a/nemoguardrails/rails/llm/utils.py b/nemoguardrails/rails/llm/utils.py
@@ -18,6 +18,28 @@
 from nemoguardrails.colang.v1_0.runtime.flows import _normalize_flow_id
 
 
+def get_content_text(content: Any) -> str:
+    """Normalize an OpenAI message ``content`` field to a plain string.
+
+    The OpenAI API allows ``content`` to be a plain string **or** a list of
+    content parts (the multi-part format used for multimodal messages)::
+
+        [{"type": "text", "text": "..."}, {"type": "image_url", ...}]
+
+    All ``type: text`` parts are extracted and joined with a single space so
+    the rest of the pipeline always receives a ``str``.  ``None`` is
+    normalised to an empty string; any other non-list value is converted via
+    ``str()``.
+    """
+    if isinstance(content, list):
+        return " ".join(
+            str(part.get("text", "") or "") for part in content if isinstance(part, dict) and part.get("type") == "text"
+        )
+    if content is None:
+        return ""
+    return str(content)
+
+
 def get_history_cache_key(messages: List[dict]) -> str:
     """Compute the cache key for a sequence of messages.
 
@@ -34,17 +56,7 @@ def get_history_cache_key(messages: List[dict]) -> str:
 
     for msg in messages:
         if msg["role"] == "user":
-            # Check if content is a string or a list (multimodal content)
-            if isinstance(msg["content"], list):
-                # For multimodal content, join all text parts
-                text_parts = []
-                for item in msg["content"]:
-                    if item.get("type") == "text":
-                        text_parts.append(item.get("text", ""))
-                key_items.append(" ".join(text_parts))
-            else:
-                # Use the content directly without json.dumps
-                key_items.append(msg["content"])
+            key_items.append(get_content_text(msg["content"]))
         elif msg["role"] == "assistant":
             key_items.append(msg["content"])
         elif msg["role"] == "context":

diff --git a/tests/test_llmrails.py b/tests/test_llmrails.py
@@ -24,6 +24,7 @@
 from nemoguardrails.logging.explain import ExplainInfo
 from nemoguardrails.rails.llm.config import Model
 from nemoguardrails.rails.llm.options import GenerationOptions
+from nemoguardrails.rails.llm.utils import get_content_text
 from tests.conftest import REASONING_TRACE_MOCK_PATH
 from tests.utils import FakeLLMModel, clean_events, event_sequence_conforms
 
@@ -1586,3 +1587,166 @@ async def test_warning_behavior(self, no_main_llm_config, caplog, options, has_l
             else:
                 await rails.generate_async(messages=messages, options=options)
         assert _count_no_llm_warnings(caplog) == expected_warnings
+
+
+# ---------------------------------------------------------------------------
+# Tests for OpenAI multi-part content normalization (Issue #1741)
+# ---------------------------------------------------------------------------
+
+
+class TestGetContentText:
+    """Unit tests for the get_content_text() normalisation helper."""
+
+    def test_plain_string_passthrough(self):
+        assert get_content_text("Hello") == "Hello"
+
+    def test_none_returns_empty_string(self):
+        assert get_content_text(None) == ""
+
+    def test_non_string_non_list_converted_via_str(self):
+        assert get_content_text(42) == "42"
+
+    def test_single_text_part(self):
+        content = [{"type": "text", "text": "Hello"}]
+        assert get_content_text(content) == "Hello"
+
+    def test_multiple_text_parts_joined(self):
+        content = [{"type": "text", "text": "Hello"}, {"type": "text", "text": "World"}]
+        assert get_content_text(content) == "Hello World"
+
+    def test_non_text_parts_skipped(self):
+        content = [
+            {"type": "image_url", "image_url": {"url": "http://example.com/img.png"}},
+            {"type": "text", "text": "Describe this image"},
+        ]
+        assert get_content_text(content) == "Describe this image"
+
+    def test_empty_list_returns_empty_string(self):
+        assert get_content_text([]) == ""
+
+    def test_list_with_only_non_text_parts(self):
+        content = [{"type": "image_url", "image_url": {"url": "http://example.com/img.png"}}]
+        assert get_content_text(content) == ""
+
+    def test_missing_text_key_in_part(self):
+        content = [{"type": "text"}]
+        assert get_content_text(content) == ""
+
+
+@pytest.fixture
+def simple_rails_config():
+    return RailsConfig.parse_object(
+        {
+            "models": [{"type": "main", "engine": "fake", "model": "fake"}],
+            "user_messages": {"express greeting": ["Hello!"]},
+            "flows": [{"elements": [{"user": "express greeting"}, {"bot": "express greeting"}]}],
+            "bot_messages": {"express greeting": ["Hi there!"]},
+        }
+    )
+
+
+@pytest.mark.asyncio
+async def test_multipart_content_single_turn(simple_rails_config):
+    """Multi-part content on a single user turn is normalised before rail evaluation."""
+    llm = FakeLLMModel(responses=["  express greeting"])
+    rails = LLMRails(config=simple_rails_config, llm=llm)
+
+    messages = [{"role": "user", "content": [{"type": "text", "text": "Hello!"}]}]
+    result = await rails.generate_async(messages=messages)
+
+    assert result["role"] == "assistant"
+    assert isinstance(result["content"], str)
+    assert result["content"] == "Hi there!"
+
+
+@pytest.mark.asyncio
+async def test_multipart_content_multi_turn_does_not_crash(simple_rails_config):
+    """Multi-part content in a non-final turn must not raise TypeError in get_colang_history."""
+    llm = FakeLLMModel(responses=["  express greeting", "  express greeting"])
+    rails = LLMRails(config=simple_rails_config, llm=llm)
+
+    messages = [
+        {"role": "user", "content": [{"type": "text", "text": "Hello!"}]},
+        {"role": "assistant", "content": "Hi there!"},
+        {"role": "user", "content": [{"type": "text", "text": "Hello again!"}]},
+    ]
+    result = await rails.generate_async(messages=messages)
+
+    assert result["role"] == "assistant"
+    assert isinstance(result["content"], str)
+
+
+@pytest.mark.asyncio
+async def test_multipart_content_mixed_parts(simple_rails_config):
+    """Only text parts are extracted; image_url parts are silently dropped."""
+    llm = FakeLLMModel(responses=["  express greeting"])
+    rails = LLMRails(config=simple_rails_config, llm=llm)
+
+    messages = [
+        {
+            "role": "user",
+            "content": [
+                {"type": "image_url", "image_url": {"url": "http://example.com/img.png"}},
+                {"type": "text", "text": "Hello!"},
+            ],
+        }
+    ]
+    result = await rails.generate_async(messages=messages)
+
+    assert result["role"] == "assistant"
+    assert isinstance(result["content"], str)
+
+
+def test_tool_message_with_multipart_user_content(simple_rails_config):
+    """Colang 1.0 tool-message branch: previous user multipart content is normalised
+    before being stored in the UserMessage event (line 817)."""
+    rails = LLMRails(config=simple_rails_config, llm=FakeLLMModel(responses=[]))
+    messages = [
+        {
+            "role": "user",
+            "content": [{"type": "text", "text": "What is the weather?"}],
+        },
+        {
+            "role": "assistant",
+            "content": None,
+            "tool_calls": [{"id": "call_abc", "function": {"name": "get_weather", "arguments": "{}"}}],
+        },
+        {
+            "role": "tool",
+            "content": "Sunny, 72F",
+            "tool_call_id": "call_abc",
+        },
+    ]
+    events = rails._get_events_for_messages(messages, state=None)
+
+    user_message_events = [e for e in events if e.get("type") == "UserMessage"]
+    assert len(user_message_events) >= 1
+    # All UserMessage events must carry the normalised string, not the list repr
+    for event in user_message_events:
+        assert event["text"] == "What is the weather?"
+
+
+def test_colang2_multipart_content_normalization():
+    """Colang 2.0 user-message branch: multipart content is normalised in
+    UtteranceUserActionFinished (line 852)."""
+    config = RailsConfig.from_content(
+        colang_content="""
+flow greeting
+  user said "Hello!"
+  bot say "Hi there!"
+
+flow main
+  activate greeting
+""",
+        yaml_content="""
+colang_version: "2.x"
+models: []
+""",
+    )
+    rails = LLMRails(config=config)
+    messages = [{"role": "user", "content": [{"type": "text", "text": "Hello!"}]}]
+    events = rails._get_events_for_messages(messages, state=None)
+
+    utterance_events = [e for e in events if e.get("type") == "UtteranceUserActionFinished"]
+    assert len(utterance_events) == 1
+    assert utterance_events[0]["final_transcript"] == "Hello!"