BerriAI · Sameerlite · Apr 10, 2026 · greptile-apps · Apr 10, 2026 · greptile-apps
diff --git a/docs/my-website/docs/proxy/config_settings.md b/docs/my-website/docs/proxy/config_settings.md
@@ -197,6 +197,7 @@ router_settings:
 | key_generation_settings | object | Restricts who can generate keys. [Further docs](./virtual_keys.md#restricting-key-generation) |
 | disable_add_transform_inline_image_block | boolean | For Fireworks AI models - if true, turns off the auto-add of `#transform=inline` to the url of the image_url, if the model is not a vision model. |
 | use_chat_completions_url_for_anthropic_messages | boolean | If true, routes OpenAI `/v1/messages` requests through chat/completions instead of the Responses API. Can also be set via env var `LITELLM_USE_CHAT_COMPLETIONS_URL_FOR_ANTHROPIC_MESSAGES=true`. |
+| skip_system_message_in_guardrail | boolean | If true, unified guardrails omit `role: system` from scanned input on **chat completions** and **Anthropic `/v1/messages`** only; the LLM still receives full messages. Per-guardrail override: `litellm_params.skip_system_message_in_guardrail` on each guardrail. [Guardrails quick start](./guardrails/quick_start#skip-system-messages-in-guardrail-evaluation) |
 | disable_hf_tokenizer_download | boolean | If true, it defaults to using the openai tokenizer for all models (including huggingface models). |
 | enable_json_schema_validation | boolean | If true, enables json schema validation for all requests. |
 | enable_key_alias_format_validation | boolean | If true, validates `key_alias` format on `/key/generate` and `/key/update`. Must be 2-255 chars, start/end with alphanumeric, only allow `a-zA-Z0-9_-/.@`. Default `false`. |

diff --git a/docs/my-website/docs/proxy/guardrails/quick_start.md b/docs/my-website/docs/proxy/guardrails/quick_start.md
@@ -82,6 +82,23 @@ For generic guardrail APIs you can also set **static headers** (`headers`: key/v
 - `during_call` Run **during** LLM call, on **input** Same as `pre_call` but runs in parallel as LLM call.  Response not returned until guardrail check completes
 - A list of the above values to run multiple modes, e.g. `mode: [pre_call, post_call]`
 
+### Skip system messages in guardrail evaluation
+
+You can stop **unified** guardrails from scanning `role: system` content while still sending the full `messages` list to the model.
+
+**Global** — in `litellm_settings`:
+
+```yaml
+litellm_settings:
+  skip_system_message_in_guardrail: true
+```
+
+**Per guardrail** — under that guardrail’s `litellm_params`: set `skip_system_message_in_guardrail: true` or `false`. If omitted, the global `litellm_settings` value is used; per-guardrail `false` forces system messages to be included even when the global flag is `true`.
+
+**Where this applies:** Only the **unified** guardrail path (providers that implement `apply_guardrail` and run through LiteLLM’s message translation layer) on **OpenAI Chat Completions** (`/v1/chat/completions`) and **Anthropic Messages** (`/v1/messages`). Examples include Presidio, Bedrock guardrails, `litellm_content_filter`, OpenAI Moderation, Generic Guardrail API, and custom code guardrails that define `apply_guardrail`.
+
+**Where this does *not* apply:** Guardrails that run only via direct hooks on the raw request (e.g. Lakera v2, Aporia, DynamoAI, Javelin, Lasso, Pangea, Model Armor, Azure Content Safety hooks, Guardrails AI, AIM, tool permission, MCP security). It also does not apply to other routes until those endpoints use the same translation layer (e.g. Responses API, embeddings, speech).
+
 ### Load Balancing Guardrails
 
 Need to distribute guardrail requests across multiple accounts or regions? See [Guardrail Load Balancing](./guardrail_load_balancing.md) for details on:

diff --git a/litellm/__init__.py b/litellm/__init__.py
@@ -203,6 +203,7 @@
     bool
 ] = None  # adds user_id, team_id, token hash (params from StandardLoggingMetadata) to request headers
 store_audit_logs = False  # Enterprise feature, allow users to see audit logs
+skip_system_message_in_guardrail: bool = False
 ### end of callbacks #############
 
 email: Optional[

diff --git a/litellm/llms/anthropic/chat/guardrail_translation/handler.py b/litellm/llms/anthropic/chat/guardrail_translation/handler.py
@@ -21,6 +21,10 @@
     LiteLLMAnthropicMessagesAdapter,
 )
 from litellm.llms.base_llm.guardrail_translation.base_translation import BaseTranslation
+from litellm.llms.base_llm.guardrail_translation.utils import (
+    effective_skip_system_message_for_guardrail,
+    openai_messages_without_system,
+)
 from litellm.proxy.pass_through_endpoints.llm_provider_handlers.anthropic_passthrough_logging_handler import (
     AnthropicPassthroughLoggingHandler,
 )
@@ -75,6 +79,8 @@ async def process_input_messages(
         if messages is None:
             return data
 
+        skip_system = effective_skip_system_message_for_guardrail(guardrail_to_apply)
+
         (
             chat_completion_compatible_request,
             _tool_name_mapping,
@@ -84,6 +90,10 @@ async def process_input_messages(
         )
 
         structured_messages = chat_completion_compatible_request.get("messages", [])
+        if skip_system:
+            structured_messages = openai_messages_without_system(
+                list(structured_messages)
+            )
 
         texts_to_check: List[str] = []
         images_to_check: List[str] = []
@@ -102,6 +112,7 @@ async def process_input_messages(
                 texts_to_check=texts_to_check,
                 images_to_check=images_to_check,
                 task_mappings=task_mappings,
+                skip_system_message=skip_system,
             )
 
         # Step 2: Apply guardrail to all texts in batch
@@ -165,12 +176,16 @@ def _extract_input_text_and_images(
         texts_to_check: List[str],
         images_to_check: List[str],
         task_mappings: List[Tuple[int, Optional[int]]],
+        skip_system_message: bool = False,
     ) -> None:
         """
         Extract text content and images from a message.
 
         Override this method to customize text/image extraction logic.
         """
+        if skip_system_message and str(message.get("role") or "").lower() == "system":
+            return
+
         content = message.get("content", None)
         tools = message.get("tools", None)
         if content is None and tools is None:

diff --git a/litellm/llms/base_llm/guardrail_translation/utils.py b/litellm/llms/base_llm/guardrail_translation/utils.py
@@ -0,0 +1,22 @@
+from __future__ import annotations
+
+from typing import Any, Dict, List
+
+
+def effective_skip_system_message_for_guardrail(guardrail_to_apply: Any) -> bool:
+    per = getattr(guardrail_to_apply, "skip_system_message_in_guardrail", None)
+    if per is not None:
+        return bool(per)
+    import litellm
-    import litellm
+    import litellm  # deferred to avoid circular import (this module is loaded during litellm package init)
-    import litellm
+    import litellm  # deferred to avoid circular import (this module is loaded during litellm package init)
+
+    return bool(getattr(litellm, "skip_system_message_in_guardrail", False))
+
+
+def openai_messages_without_system(
+    messages: List[Dict[str, Any]],
+) -> List[Dict[str, Any]]:
+    return [
+        m
+        for m in messages
+        if str((m or {}).get("role") or "").lower() != "system"
+    ]
diff --git a/litellm/llms/openai/chat/guardrail_translation/handler.py b/litellm/llms/openai/chat/guardrail_translation/handler.py
@@ -19,6 +19,10 @@
 import litellm
 from litellm._logging import verbose_proxy_logger
 from litellm.llms.base_llm.guardrail_translation.base_translation import BaseTranslation
+from litellm.llms.base_llm.guardrail_translation.utils import (
+    effective_skip_system_message_for_guardrail,
+    openai_messages_without_system,
+)
 from litellm.main import stream_chunk_builder
 from litellm.types.llms.openai import ChatCompletionToolParam
 from litellm.types.utils import (
@@ -57,6 +61,8 @@ async def process_input_messages(
         if messages is None:
             return data
 
+        skip_system = effective_skip_system_message_for_guardrail(guardrail_to_apply)
+
         texts_to_check: List[str] = []
         images_to_check: List[str] = []
         tool_calls_to_check: List[ChatCompletionToolParam] = []
@@ -76,6 +82,7 @@ async def process_input_messages(
                 tool_calls_to_check=tool_calls_to_check,
                 text_task_mappings=text_task_mappings,
                 tool_call_task_mappings=tool_call_task_mappings,
+                skip_system_message=skip_system,
             )
 
         # Step 2: Apply guardrail to all texts and tool calls in batch
@@ -86,9 +93,11 @@ async def process_input_messages(
             if tool_calls_to_check:
                 inputs["tool_calls"] = tool_calls_to_check  # type: ignore
             if messages:
-                inputs[
-                    "structured_messages"
-                ] = messages  # pass the openai /chat/completions messages to the guardrail, as-is
+                inputs["structured_messages"] = (
+                    openai_messages_without_system(messages)
+                    if skip_system
+                    else messages
+                )
             # Pass tools (function definitions) to the guardrail
             tools = data.get("tools")
             if tools:
@@ -157,12 +166,16 @@ def _extract_inputs(
         tool_calls_to_check: List[ChatCompletionToolParam],
         text_task_mappings: List[Tuple[int, Optional[int]]],
         tool_call_task_mappings: List[Tuple[int, int]],
+        skip_system_message: bool = False,
     ) -> None:
         """
         Extract text content, images, and tool calls from a message.
 
         Override this method to customize text/image/tool call extraction logic.
         """
+        if skip_system_message and str(message.get("role") or "").lower() == "system":
+            return
+
         content = message.get("content", None)
         if content is not None:
             if isinstance(content, str):

diff --git a/litellm/proxy/guardrails/guardrail_registry.py b/litellm/proxy/guardrails/guardrail_registry.py
@@ -472,6 +472,13 @@ def initialize_guardrail(
         else:
             raise ValueError(f"Unsupported guardrail: {guardrail_type}")
 
+        if custom_guardrail_callback is not None:
+            setattr(
+                custom_guardrail_callback,
+                "skip_system_message_in_guardrail",
+                getattr(litellm_params, "skip_system_message_in_guardrail", None),
+            )
+
         parsed_guardrail = Guardrail(
             guardrail_id=guardrail.get("guardrail_id"),
             guardrail_name=guardrail["guardrail_name"],

diff --git a/litellm/types/guardrails.py b/litellm/types/guardrails.py
@@ -607,6 +607,16 @@ class BaseLitellmParams(
         description="When True, guardrails only receive the latest message for the relevant role (e.g., newest user input pre-call, newest assistant output post-call)",
     )
 
+    skip_system_message_in_guardrail: Optional[bool] = Field(
+        default=None,
+        description=(
+            "When True, unified guardrails skip system-role messages when building "
+            "evaluation inputs (texts and structured_messages). When False, system "
+            "messages are included even if litellm_settings sets a global skip. When "
+            "None, use the global litellm.skip_system_message_in_guardrail setting."
+        ),
+    )
+
     # Lakera specific params
     category_thresholds: Optional[LakeraCategoryThresholds] = Field(
         default=None,

diff --git a/...est_litellm/proxy/guardrails/guardrail_hooks/unified_guardrails/test_unified_guardrail.py b/...est_litellm/proxy/guardrails/guardrail_hooks/unified_guardrails/test_unified_guardrail.py
@@ -2,9 +2,17 @@
 
 import pytest
 
+import litellm
 from litellm.caching import DualCache
 from litellm.integrations.custom_guardrail import CustomGuardrail
 from litellm.llms.base_llm.guardrail_translation.base_translation import BaseTranslation
+from litellm.llms.base_llm.guardrail_translation.utils import (
+    effective_skip_system_message_for_guardrail,
+    openai_messages_without_system,
+)
+from litellm.llms.openai.chat.guardrail_translation.handler import (
+    OpenAIChatCompletionsHandler,
+)
 from litellm.llms.base_llm.ocr.transformation import OCRPage, OCRResponse
 from litellm.llms.mistral.ocr.guardrail_translation.handler import OCRHandler
 from litellm.proxy._experimental.mcp_server.guardrail_translation.handler import (
@@ -68,6 +76,109 @@ def _inject_mcp_handler_mapping():
 
 
 class TestUnifiedLLMGuardrails:
+    class TestSkipSystemMessageForChatCompletions:
+        def test_openai_messages_without_system(self):
+            msgs = [
+                {"role": "system", "content": "sys"},
+                {"role": "user", "content": "hi"},
+            ]
+            out = openai_messages_without_system(msgs)
+            assert len(out) == 1
+            assert out[0]["role"] == "user"
+            assert msgs[0]["content"] == "sys"
+
+        def test_effective_skip_respects_per_guardrail_over_global(self, monkeypatch):
+            monkeypatch.setattr(
+                litellm, "skip_system_message_in_guardrail", True, raising=False
+            )
+
+            class G:
+                skip_system_message_in_guardrail = False
+
+            assert effective_skip_system_message_for_guardrail(G()) is False
+
+            class G2:
+                skip_system_message_in_guardrail = None
+
+            assert effective_skip_system_message_for_guardrail(G2()) is True
+
+        @pytest.mark.asyncio
+        async def test_openai_handler_skips_system_in_guardrail_inputs(
+            self, monkeypatch
+        ):
+            monkeypatch.setattr(
+                litellm, "skip_system_message_in_guardrail", True, raising=False
+            )
+
+            captured = {}
+
+            class MockGuardrail:
+                skip_system_message_in_guardrail = None
+
+                async def apply_guardrail(
+                    self, inputs, request_data, input_type, logging_obj=None
+                ):
+                    captured["inputs"] = inputs
+                    return inputs
+
+            data = {
+                "messages": [
+                    {"role": "system", "content": "secret system"},
+                    {"role": "user", "content": "hello"},
+                ],
+                "model": "gpt-4o",
+            }
+
+            handler = OpenAIChatCompletionsHandler()
+            await handler.process_input_messages(
+                data=data,
+                guardrail_to_apply=MockGuardrail(),
+                litellm_logging_obj=None,
+            )
+
+            assert captured["inputs"]["texts"] == ["hello"]
+            sm = captured["inputs"].get("structured_messages") or []
+            assert all(m.get("role") != "system" for m in sm)
+            assert data["messages"][0]["content"] == "secret system"
+
+        @pytest.mark.asyncio
+        async def test_openai_handler_per_guardrail_skip_false_overrides_global(
+            self, monkeypatch
+        ):
+            monkeypatch.setattr(
+                litellm, "skip_system_message_in_guardrail", True, raising=False
+            )
+
+            captured = {}
+
+            class MockGuardrail:
+                skip_system_message_in_guardrail = False
+
+                async def apply_guardrail(
+                    self, inputs, request_data, input_type, logging_obj=None
+                ):
+                    captured["inputs"] = inputs
+                    return inputs
+
+            data = {
+                "messages": [
+                    {"role": "system", "content": "sys"},
+                    {"role": "user", "content": "u"},
+                ],
+            }
+
+            await OpenAIChatCompletionsHandler().process_input_messages(
+                data=data,
+                guardrail_to_apply=MockGuardrail(),
+                litellm_logging_obj=None,
+            )
+
+            assert "sys" in captured["inputs"]["texts"]
+            roles = {
+                m.get("role") for m in (captured["inputs"].get("structured_messages") or [])
+            }
+            assert "system" in roles
+
     class TestAsyncPreCallHook:
         @pytest.mark.asyncio
         async def test_uses_mcp_event_type(self):