Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,12 @@ This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.htm
>
> The changes related to the Colang language and runtime have moved to [CHANGELOG-Colang](./CHANGELOG-Colang.md) file.

## [Unreleased]

### 🐛 Bug Fixes

- *(llmrails)* Normalize OpenAI multi-part content lists to plain strings before rail evaluation, fixing garbled self-check prompts and TypeError crash in `get_colang_history` ([#1741](https://github.com/NVIDIA-NeMo/Guardrails/issues/1741))

## [0.22.0] - 2026-05-22

### 🚀 Features
Expand Down
10 changes: 6 additions & 4 deletions nemoguardrails/rails/llm/llmrails.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,7 @@
)
from nemoguardrails.rails.llm.utils import (
get_action_details_from_flow_id,
get_content_text,
get_history_cache_key,
)
from nemoguardrails.streaming import END_OF_STREAM, StreamingHandler
Expand Down Expand Up @@ -765,10 +766,11 @@ def _get_events_for_messages(self, messages: List[dict], state: Any):
for idx in range(p, len(messages)):
msg = messages[idx]
if msg["role"] == "user":
user_text = get_content_text(msg["content"])
events.append(
{
"type": "UtteranceUserActionFinished",
"final_transcript": msg["content"],
"final_transcript": user_text,
}
)

Expand All @@ -777,7 +779,7 @@ def _get_events_for_messages(self, messages: List[dict], state: Any):
events.append(
{
"type": "UserMessage",
"text": msg["content"],
"text": user_text,
}
)

Expand Down Expand Up @@ -812,7 +814,7 @@ def _get_events_for_messages(self, messages: List[dict], state: Any):
user_message = None
for prev_msg in reversed(messages[:idx]):
if prev_msg["role"] == "user":
user_message = prev_msg["content"]
user_message = get_content_text(prev_msg["content"])
break

if user_message:
Expand Down Expand Up @@ -847,7 +849,7 @@ def _get_events_for_messages(self, messages: List[dict], state: Any):
events.append(
{
"type": "UtteranceUserActionFinished",
"final_transcript": msg["content"],
"final_transcript": get_content_text(msg["content"]),
}
)

Expand Down
34 changes: 23 additions & 11 deletions nemoguardrails/rails/llm/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,28 @@
from nemoguardrails.colang.v1_0.runtime.flows import _normalize_flow_id


def get_content_text(content: Any) -> str:
"""Normalize an OpenAI message ``content`` field to a plain string.

The OpenAI API allows ``content`` to be a plain string **or** a list of
content parts (the multi-part format used for multimodal messages)::

[{"type": "text", "text": "..."}, {"type": "image_url", ...}]

All ``type: text`` parts are extracted and joined with a single space so
the rest of the pipeline always receives a ``str``. ``None`` is
normalised to an empty string; any other non-list value is converted via
``str()``.
"""
if isinstance(content, list):
return " ".join(
str(part.get("text", "") or "") for part in content if isinstance(part, dict) and part.get("type") == "text"
)
if content is None:
return ""
return str(content)


def get_history_cache_key(messages: List[dict]) -> str:
"""Compute the cache key for a sequence of messages.

Expand All @@ -34,17 +56,7 @@ def get_history_cache_key(messages: List[dict]) -> str:

for msg in messages:
if msg["role"] == "user":
# Check if content is a string or a list (multimodal content)
if isinstance(msg["content"], list):
# For multimodal content, join all text parts
text_parts = []
for item in msg["content"]:
if item.get("type") == "text":
text_parts.append(item.get("text", ""))
key_items.append(" ".join(text_parts))
else:
# Use the content directly without json.dumps
key_items.append(msg["content"])
key_items.append(get_content_text(msg["content"]))
elif msg["role"] == "assistant":
key_items.append(msg["content"])
elif msg["role"] == "context":
Expand Down
164 changes: 164 additions & 0 deletions tests/test_llmrails.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
from nemoguardrails.logging.explain import ExplainInfo
from nemoguardrails.rails.llm.config import Model
from nemoguardrails.rails.llm.options import GenerationOptions
from nemoguardrails.rails.llm.utils import get_content_text
from tests.conftest import REASONING_TRACE_MOCK_PATH
from tests.utils import FakeLLMModel, clean_events, event_sequence_conforms

Expand Down Expand Up @@ -1586,3 +1587,166 @@ async def test_warning_behavior(self, no_main_llm_config, caplog, options, has_l
else:
await rails.generate_async(messages=messages, options=options)
assert _count_no_llm_warnings(caplog) == expected_warnings


# ---------------------------------------------------------------------------
# Tests for OpenAI multi-part content normalization (Issue #1741)
# ---------------------------------------------------------------------------


class TestGetContentText:
"""Unit tests for the get_content_text() normalisation helper."""

def test_plain_string_passthrough(self):
assert get_content_text("Hello") == "Hello"

def test_none_returns_empty_string(self):
assert get_content_text(None) == ""

def test_non_string_non_list_converted_via_str(self):
assert get_content_text(42) == "42"

def test_single_text_part(self):
content = [{"type": "text", "text": "Hello"}]
assert get_content_text(content) == "Hello"

def test_multiple_text_parts_joined(self):
content = [{"type": "text", "text": "Hello"}, {"type": "text", "text": "World"}]
assert get_content_text(content) == "Hello World"

def test_non_text_parts_skipped(self):
content = [
{"type": "image_url", "image_url": {"url": "http://example.com/img.png"}},
{"type": "text", "text": "Describe this image"},
]
assert get_content_text(content) == "Describe this image"

def test_empty_list_returns_empty_string(self):
assert get_content_text([]) == ""

def test_list_with_only_non_text_parts(self):
content = [{"type": "image_url", "image_url": {"url": "http://example.com/img.png"}}]
assert get_content_text(content) == ""

def test_missing_text_key_in_part(self):
content = [{"type": "text"}]
assert get_content_text(content) == ""


@pytest.fixture
def simple_rails_config():
return RailsConfig.parse_object(
{
"models": [{"type": "main", "engine": "fake", "model": "fake"}],
"user_messages": {"express greeting": ["Hello!"]},
"flows": [{"elements": [{"user": "express greeting"}, {"bot": "express greeting"}]}],
"bot_messages": {"express greeting": ["Hi there!"]},
}
)


@pytest.mark.asyncio
async def test_multipart_content_single_turn(simple_rails_config):
"""Multi-part content on a single user turn is normalised before rail evaluation."""
llm = FakeLLMModel(responses=[" express greeting"])
rails = LLMRails(config=simple_rails_config, llm=llm)

messages = [{"role": "user", "content": [{"type": "text", "text": "Hello!"}]}]
result = await rails.generate_async(messages=messages)

assert result["role"] == "assistant"
assert isinstance(result["content"], str)
assert result["content"] == "Hi there!"


@pytest.mark.asyncio
async def test_multipart_content_multi_turn_does_not_crash(simple_rails_config):
"""Multi-part content in a non-final turn must not raise TypeError in get_colang_history."""
llm = FakeLLMModel(responses=[" express greeting", " express greeting"])
rails = LLMRails(config=simple_rails_config, llm=llm)

messages = [
{"role": "user", "content": [{"type": "text", "text": "Hello!"}]},
{"role": "assistant", "content": "Hi there!"},
{"role": "user", "content": [{"type": "text", "text": "Hello again!"}]},
]
result = await rails.generate_async(messages=messages)

assert result["role"] == "assistant"
assert isinstance(result["content"], str)


@pytest.mark.asyncio
async def test_multipart_content_mixed_parts(simple_rails_config):
"""Only text parts are extracted; image_url parts are silently dropped."""
llm = FakeLLMModel(responses=[" express greeting"])
rails = LLMRails(config=simple_rails_config, llm=llm)

messages = [
{
"role": "user",
"content": [
{"type": "image_url", "image_url": {"url": "http://example.com/img.png"}},
{"type": "text", "text": "Hello!"},
],
}
]
result = await rails.generate_async(messages=messages)

assert result["role"] == "assistant"
assert isinstance(result["content"], str)


def test_tool_message_with_multipart_user_content(simple_rails_config):
"""Colang 1.0 tool-message branch: previous user multipart content is normalised
before being stored in the UserMessage event (line 817)."""
rails = LLMRails(config=simple_rails_config, llm=FakeLLMModel(responses=[]))
messages = [
{
"role": "user",
"content": [{"type": "text", "text": "What is the weather?"}],
},
{
"role": "assistant",
"content": None,
"tool_calls": [{"id": "call_abc", "function": {"name": "get_weather", "arguments": "{}"}}],
},
{
"role": "tool",
"content": "Sunny, 72F",
"tool_call_id": "call_abc",
},
]
events = rails._get_events_for_messages(messages, state=None)

user_message_events = [e for e in events if e.get("type") == "UserMessage"]
assert len(user_message_events) >= 1
# All UserMessage events must carry the normalised string, not the list repr
for event in user_message_events:
assert event["text"] == "What is the weather?"


def test_colang2_multipart_content_normalization():
"""Colang 2.0 user-message branch: multipart content is normalised in
UtteranceUserActionFinished (line 852)."""
config = RailsConfig.from_content(
colang_content="""
flow greeting
user said "Hello!"
bot say "Hi there!"

flow main
activate greeting
""",
yaml_content="""
colang_version: "2.x"
models: []
""",
)
rails = LLMRails(config=config)
messages = [{"role": "user", "content": [{"type": "text", "text": "Hello!"}]}]
events = rails._get_events_for_messages(messages, state=None)

utterance_events = [e for e in events if e.get("type") == "UtteranceUserActionFinished"]
assert len(utterance_events) == 1
assert utterance_events[0]["final_transcript"] == "Hello!"