Skip to content

Commit 37c04c8

Browse files
committed
feat: add detailed plugin violation information to OTEL spans
- Add violation.reason, violation.code, violation.description to spans - Include optional http_status_code and mcp_error_code when present - Sanitize violation.details to prevent PII leakage using existing redaction - Mark spans as error when violations occur for better visibility - Update both OTEL spans and internal observability spans Closes #4267 Signed-off-by: Bob Shell <bob@example.com> Signed-off-by: Vishu Bhatnagar <vishu.bhatnagar@ibm.com>
1 parent d785882 commit 37c04c8

File tree

5 files changed

+265
-22
lines changed

5 files changed

+265
-22
lines changed

mcpgateway/alembic/versions/d3e4f5a6b7c8_add_binding_reference_id_to_tool_plugin_bindings.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,8 @@
1010
from typing import Sequence, Union
1111

1212
# Third-Party
13-
import sqlalchemy as sa
1413
from alembic import op
14+
import sqlalchemy as sa
1515

1616
# revision identifiers, used by Alembic.
1717
revision: str = "d3e4f5a6b7c8" # pragma: allowlist secret

mcpgateway/middleware/request_logging_middleware.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -62,8 +62,8 @@
6262

6363
# First-Party
6464
from mcpgateway.auth import get_current_user
65-
from mcpgateway.config import settings
6665
from mcpgateway.common.validators import SecurityValidator
66+
from mcpgateway.config import settings
6767
from mcpgateway.middleware.path_filter import should_skip_request_logging
6868
from mcpgateway.services.logging_service import LoggingService
6969
from mcpgateway.services.structured_logger import get_structured_logger

mcpgateway/plugins/framework/manager.py

Lines changed: 58 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -477,17 +477,68 @@ async def _execute_with_timeout(self, hook_ref: HookRef, payload: PluginPayload,
477477
otel_span.set_attribute("plugin.continue_processing", result.continue_processing)
478478
otel_span.set_attribute("plugin.stopped_chain", not result.continue_processing)
479479

480-
# End span with success
480+
# Add detailed violation information when present
481+
if result.violation:
482+
# First-Party
483+
from mcpgateway.observability import set_span_attribute, set_span_error
484+
485+
# Add core violation fields (set_span_attribute handles sanitization)
486+
set_span_attribute(otel_span, "plugin.violation.reason", result.violation.reason)
487+
set_span_attribute(otel_span, "plugin.violation.code", result.violation.code)
488+
set_span_attribute(otel_span, "plugin.violation.description", result.violation.description)
489+
490+
# Add HTTP status code if present (e.g., 429 for rate limiting)
491+
if result.violation.http_status_code:
492+
set_span_attribute(otel_span, "plugin.violation.http_status_code", result.violation.http_status_code)
493+
494+
# Add MCP error code if present
495+
if result.violation.mcp_error_code:
496+
set_span_attribute(otel_span, "plugin.violation.mcp_error_code", result.violation.mcp_error_code)
497+
498+
# Add violation details with sanitization to prevent PII leakage
499+
# Pass just the key name (not the full path) to sanitization for proper field matching
500+
if result.violation.details:
501+
# First-Party
502+
from mcpgateway.utils.trace_redaction import sanitize_trace_attribute_value
503+
504+
for key, value in result.violation.details.items():
505+
# Sanitize using just the key name for proper redaction field matching
506+
sanitized_value = sanitize_trace_attribute_value(key, value)
507+
# Use _set_pre_sanitized_span_attribute to avoid double sanitization
508+
if otel_span and sanitized_value is not None:
509+
otel_span.set_attribute(f"plugin.violation.details.{key}", sanitized_value)
510+
511+
# Mark span as error for better visibility in observability platforms
512+
set_span_error(otel_span, result.violation.description, record_exception=False)
513+
514+
# End span with success or error based on violation
481515
if span_id is not None:
482516
try:
517+
span_attributes = {
518+
"plugin.had_violation": result.violation is not None,
519+
"plugin.modified_payload": result.modified_payload is not None,
520+
"plugin.continue_processing": result.continue_processing,
521+
}
522+
523+
# Add violation details to internal observability span as well
524+
if result.violation:
525+
# First-Party
526+
from mcpgateway.utils.trace_redaction import sanitize_trace_attribute_value
527+
528+
span_attributes["plugin.violation.reason"] = sanitize_trace_attribute_value("plugin.violation.reason", result.violation.reason)
529+
span_attributes["plugin.violation.code"] = sanitize_trace_attribute_value("plugin.violation.code", result.violation.code)
530+
span_attributes["plugin.violation.description"] = sanitize_trace_attribute_value("plugin.violation.description", result.violation.description)
531+
532+
if result.violation.http_status_code:
533+
span_attributes["plugin.violation.http_status_code"] = result.violation.http_status_code
534+
535+
if result.violation.mcp_error_code:
536+
span_attributes["plugin.violation.mcp_error_code"] = result.violation.mcp_error_code
537+
483538
self.observability.end_span(
484539
span_id=span_id,
485-
status="ok",
486-
attributes={
487-
"plugin.had_violation": result.violation is not None,
488-
"plugin.modified_payload": result.modified_payload is not None,
489-
"plugin.continue_processing": result.continue_processing,
490-
},
540+
status="error" if result.violation else "ok",
541+
attributes=span_attributes,
491542
)
492543
except Exception as e:
493544
logger.debug("Plugin observability end_span failed: %s", e)

mcpgateway/services/tool_plugin_binding_service.py

Lines changed: 3 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -173,15 +173,9 @@ def upsert_bindings(
173173
# different external references are claiming the same (team, tool, plugin)
174174
# triple. The new reference_id wins (last-caller-wins), but the old
175175
# caller's DELETE by reference will now be a no-op.
176-
if (
177-
existing.binding_reference_id
178-
and policy.binding_reference_id
179-
and existing.binding_reference_id != policy.binding_reference_id
180-
):
176+
if existing.binding_reference_id and policy.binding_reference_id and existing.binding_reference_id != policy.binding_reference_id:
181177
logger.warning(
182-
"binding_reference_id ownership transfer: "
183-
"team=%s tool=%s plugin=%s old_ref=%s new_ref=%s — "
184-
"DELETE by old_ref will now be a no-op",
178+
"binding_reference_id ownership transfer: " "team=%s tool=%s plugin=%s old_ref=%s new_ref=%s — " "DELETE by old_ref will now be a no-op",
185179
team_id,
186180
tool_name,
187181
policy.plugin_id.value,
@@ -284,8 +278,7 @@ def list_bindings(
284278
if binding_reference_id:
285279
if team_id:
286280
logger.warning(
287-
"Both team_id=%r and binding_reference_id=%r supplied to list_bindings; "
288-
"team_id will be ignored. Omit team_id when filtering by binding_reference_id.",
281+
"Both team_id=%r and binding_reference_id=%r supplied to list_bindings; " "team_id will be ignored. Omit team_id when filtering by binding_reference_id.",
289282
team_id,
290283
binding_reference_id,
291284
)

tests/unit/mcpgateway/plugins/framework/test_observability.py

Lines changed: 202 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -22,11 +22,13 @@
2222
GlobalContext,
2323
Plugin,
2424
PluginConfig,
25+
PluginContext,
2526
PluginManager,
2627
PluginMode,
2728
PluginResult,
2829
PromptHookType,
2930
PromptPrehookPayload,
31+
PromptPrehookResult,
3032
)
3133
from mcpgateway.plugins.framework.base import HookRef
3234
from mcpgateway.plugins.framework.manager import PluginExecutor
@@ -381,9 +383,206 @@ def record_span(name: str, attributes: Optional[Dict[str, Any]] = None):
381383
plugin_span = recorded[1]
382384
assert hook_chain_span.name == "plugin.hook.invoke"
383385
assert hook_chain_span.attributes["plugin.chain.stopped"] is True
384-
assert hook_chain_span.attributes["plugin.chain.stopped_by"] == "BlockingPlugin"
385-
assert plugin_span.name == "plugin.execute"
386-
assert plugin_span.attributes["plugin.name"] == "BlockingPlugin"
386+
387+
388+
@pytest.mark.asyncio
389+
async def test_plugin_manager_captures_violation_details_in_otel_spans():
390+
"""Plugin manager should capture detailed violation information in OTEL spans with proper sanitization."""
391+
manager = PluginManager("./tests/unit/mcpgateway/plugins/fixtures/configs/valid_no_plugin.yaml", observability=None)
392+
await manager.initialize()
393+
394+
# Create a plugin that returns a violation with full details
395+
config = PluginConfig(
396+
name="ViolationPlugin",
397+
description="Plugin that returns violations",
398+
author="Test",
399+
version="1.0",
400+
tags=["test"],
401+
kind="ViolationPlugin",
402+
hooks=["prompt_pre_fetch"],
403+
config={},
404+
mode=PluginMode.ENFORCE,
405+
)
406+
407+
class ViolationPlugin(Plugin):
408+
async def prompt_pre_fetch(self, payload: PromptPrehookPayload, context: PluginContext) -> PromptPrehookResult:
409+
from mcpgateway.plugins.framework import PluginViolation
410+
411+
violation = PluginViolation(
412+
reason="Content policy violation",
413+
description="Request contains prohibited content",
414+
code="PROHIBITED_CONTENT",
415+
details={
416+
"matched_pattern": "sensitive_keyword",
417+
"field": "user_input",
418+
"password": "secret123", # Should be sanitized
419+
"api_key": "sk-test-key", # Should be sanitized
420+
},
421+
http_status_code=403,
422+
mcp_error_code=-32001,
423+
)
424+
return PromptPrehookResult(continue_processing=False, violation=violation)
425+
426+
plugin = ViolationPlugin(config)
427+
428+
class RecordingSpan:
429+
def __init__(self, name: str, attributes: Optional[Dict[str, Any]] = None):
430+
self.name = name
431+
self.attributes = dict(attributes or {})
432+
self.status = None
433+
self.status_description = None
434+
435+
def set_attribute(self, key: str, value: Any) -> None:
436+
self.attributes[key] = value
437+
438+
def set_status(self, status: Any) -> None:
439+
self.status = status
440+
if hasattr(status, "description"):
441+
self.status_description = status.description
442+
443+
recorded: List[RecordingSpan] = []
444+
445+
@contextmanager
446+
def record_span(name: str, attributes: Optional[Dict[str, Any]] = None):
447+
span = RecordingSpan(name, attributes)
448+
recorded.append(span)
449+
yield span
450+
451+
with patch.object(manager._registry, "get_hook_refs_for_hook") as mock_get:
452+
hook_ref = HookRef(PromptHookType.PROMPT_PRE_FETCH, PluginRef(plugin))
453+
mock_get.return_value = [hook_ref]
454+
455+
payload = PromptPrehookPayload(prompt_id="test", args={"user": "test input"})
456+
global_context = GlobalContext(request_id="req-violation-test")
457+
458+
with patch("mcpgateway.plugins.framework.manager.create_span", side_effect=record_span):
459+
result, _ = await manager.invoke_hook(
460+
PromptHookType.PROMPT_PRE_FETCH,
461+
payload,
462+
global_context=global_context,
463+
)
464+
465+
# Verify violation was returned
466+
assert result.continue_processing is False
467+
assert result.violation is not None
468+
469+
# Find the plugin execution span
470+
plugin_span = next((s for s in recorded if s.name == "plugin.execute"), None)
471+
assert plugin_span is not None
472+
473+
# Verify core violation attributes are captured
474+
assert plugin_span.attributes["plugin.had_violation"] is True
475+
assert plugin_span.attributes["plugin.violation.reason"] == "Content policy violation"
476+
assert plugin_span.attributes["plugin.violation.code"] == "PROHIBITED_CONTENT"
477+
assert plugin_span.attributes["plugin.violation.description"] == "Request contains prohibited content"
478+
assert plugin_span.attributes["plugin.violation.http_status_code"] == 403
479+
assert plugin_span.attributes["plugin.violation.mcp_error_code"] == -32001
480+
481+
# Verify violation details are captured
482+
assert "plugin.violation.details.matched_pattern" in plugin_span.attributes
483+
assert plugin_span.attributes["plugin.violation.details.matched_pattern"] == "sensitive_keyword"
484+
assert "plugin.violation.details.field" in plugin_span.attributes
485+
assert plugin_span.attributes["plugin.violation.details.field"] == "user_input"
486+
487+
# Verify sensitive fields are sanitized (password and api_key should be redacted)
488+
assert "plugin.violation.details.password" in plugin_span.attributes
489+
assert plugin_span.attributes["plugin.violation.details.password"] == "***"
490+
assert "plugin.violation.details.api_key" in plugin_span.attributes
491+
assert plugin_span.attributes["plugin.violation.details.api_key"] == "***"
492+
493+
# Verify span is marked as error
494+
assert plugin_span.status is not None
495+
assert plugin_span.status_description == "Request contains prohibited content"
496+
497+
await manager.shutdown()
498+
499+
500+
@pytest.mark.asyncio
501+
async def test_plugin_manager_handles_violation_without_optional_fields():
502+
"""Plugin manager should handle violations that don't have optional fields (http_status_code, mcp_error_code, details)."""
503+
manager = PluginManager("./tests/unit/mcpgateway/plugins/fixtures/configs/valid_no_plugin.yaml", observability=None)
504+
await manager.initialize()
505+
506+
config = PluginConfig(
507+
name="MinimalViolationPlugin",
508+
description="Plugin with minimal violation",
509+
author="Test",
510+
version="1.0",
511+
tags=["test"],
512+
kind="MinimalViolationPlugin",
513+
hooks=["prompt_pre_fetch"],
514+
config={},
515+
mode=PluginMode.ENFORCE,
516+
)
517+
518+
class MinimalViolationPlugin(Plugin):
519+
async def prompt_pre_fetch(self, payload: PromptPrehookPayload, context: PluginContext) -> PromptPrehookResult:
520+
from mcpgateway.plugins.framework import PluginViolation
521+
522+
# Violation with only required fields
523+
violation = PluginViolation(
524+
reason="Rate limit exceeded",
525+
description="Too many requests",
526+
code="RATE_LIMIT",
527+
)
528+
return PromptPrehookResult(continue_processing=False, violation=violation)
529+
530+
plugin = MinimalViolationPlugin(config)
531+
532+
class RecordingSpan:
533+
def __init__(self, name: str, attributes: Optional[Dict[str, Any]] = None):
534+
self.name = name
535+
self.attributes = dict(attributes or {})
536+
537+
def set_attribute(self, key: str, value: Any) -> None:
538+
self.attributes[key] = value
539+
540+
def set_status(self, status: Any) -> None:
541+
pass
542+
543+
recorded: List[RecordingSpan] = []
544+
545+
@contextmanager
546+
def record_span(name: str, attributes: Optional[Dict[str, Any]] = None):
547+
span = RecordingSpan(name, attributes)
548+
recorded.append(span)
549+
yield span
550+
551+
with patch.object(manager._registry, "get_hook_refs_for_hook") as mock_get:
552+
hook_ref = HookRef(PromptHookType.PROMPT_PRE_FETCH, PluginRef(plugin))
553+
mock_get.return_value = [hook_ref]
554+
555+
payload = PromptPrehookPayload(prompt_id="test", args={})
556+
global_context = GlobalContext(request_id="req-minimal-violation")
557+
558+
with patch("mcpgateway.plugins.framework.manager.create_span", side_effect=record_span):
559+
result, _ = await manager.invoke_hook(
560+
PromptHookType.PROMPT_PRE_FETCH,
561+
payload,
562+
global_context=global_context,
563+
)
564+
565+
# Verify violation was returned
566+
assert result.continue_processing is False
567+
assert result.violation is not None
568+
569+
# Find the plugin execution span
570+
plugin_span = next((s for s in recorded if s.name == "plugin.execute"), None)
571+
assert plugin_span is not None
572+
573+
# Verify core violation attributes are captured
574+
assert plugin_span.attributes["plugin.had_violation"] is True
575+
assert plugin_span.attributes["plugin.violation.reason"] == "Rate limit exceeded"
576+
assert plugin_span.attributes["plugin.violation.code"] == "RATE_LIMIT"
577+
assert plugin_span.attributes["plugin.violation.description"] == "Too many requests"
578+
579+
# Verify optional fields are not present when not set
580+
assert "plugin.violation.http_status_code" not in plugin_span.attributes
581+
assert "plugin.violation.mcp_error_code" not in plugin_span.attributes
582+
# Details dict is empty, so no detail attributes should be present
583+
assert not any(k.startswith("plugin.violation.details.") for k in plugin_span.attributes.keys())
584+
585+
await manager.shutdown()
387586

388587
await manager.shutdown()
389588

0 commit comments

Comments
 (0)