Skip to content

Commit 01919f3

Browse files
smrutisahoo10Smruti Sahoocrivetimihai
authored andcommitted
[FIX][SECURITY]: Add log sanitizer utility to prevent log injection attacks (#3000) (#3227)
* Added utility for strips and control character from values before logging Signed-off-by: Smruti Sahoo <smruti.sahoo1@ibm.com> * fix(tests): remove unused pytest import from log sanitizer tests Signed-off-by: Mihai Criveti <crivetimihai@gmail.com> * style(tests): remove trailing whitespace in log sanitizer tests Signed-off-by: Mihai Criveti <crivetimihai@gmail.com> --------- Signed-off-by: Smruti Sahoo <smruti.sahoo1@ibm.com> Signed-off-by: Mihai Criveti <crivetimihai@gmail.com> Co-authored-by: Smruti Sahoo <smruti.sahoo1@ibm.com> Co-authored-by: Mihai Criveti <crivetimihai@gmail.com> Signed-off-by: Yosief Eyob <yosiefogbazion@gmail.com>
1 parent 9cbffbf commit 01919f3

File tree

5 files changed

+505
-5
lines changed

5 files changed

+505
-5
lines changed

mcpgateway/routers/oauth_router.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@
3535
from mcpgateway.services.encryption_service import protect_oauth_config_for_storage
3636
from mcpgateway.services.oauth_manager import OAuthError, OAuthManager
3737
from mcpgateway.services.token_storage_service import TokenStorageService
38+
from mcpgateway.utils.log_sanitizer import sanitize_for_log
3839

3940
logger = logging.getLogger(__name__)
4041

@@ -451,7 +452,8 @@ async def oauth_callback(
451452
if error:
452453
error_text = escape(error)
453454
description_text = escape(error_description or "OAuth provider returned an authorization error.")
454-
logger.warning(f"OAuth provider returned error callback: error={error}, description={error_description}")
455+
# Sanitize untrusted query parameters before logging to prevent log injection
456+
logger.warning(f"OAuth provider returned error callback: error={sanitize_for_log(error)}, description={sanitize_for_log(error_description)}")
455457
return HTMLResponse(
456458
content=f"""
457459
<!DOCTYPE html>

mcpgateway/routers/sso.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
from mcpgateway.middleware.rbac import get_current_user_with_permissions, require_permission
2525
from mcpgateway.services.logging_service import LoggingService
2626
from mcpgateway.services.sso_service import SSOService
27+
from mcpgateway.utils.log_sanitizer import sanitize_for_log
2728

2829
# Initialize logging
2930
logging_service = LoggingService()
@@ -252,7 +253,8 @@ async def initiate_sso_login(
252253
# Validate redirect_uri to prevent open redirect attacks
253254
# Uses server-side allowlist (allowed_origins, app_domain) - does NOT trust Host header
254255
if not _validate_redirect_uri(redirect_uri, request):
255-
logger.warning(f"SSO login rejected - invalid redirect_uri: {redirect_uri}")
256+
# Sanitize untrusted redirect_uri before logging to prevent log injection
257+
logger.warning(f"SSO login rejected - invalid redirect_uri: {sanitize_for_log(redirect_uri)}")
256258
raise HTTPException(
257259
status_code=status.HTTP_400_BAD_REQUEST,
258260
detail="Invalid redirect_uri. Must be a relative path or URL matching allowed origins.",

mcpgateway/routers/well_known.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
from mcpgateway.db import get_db
2727
from mcpgateway.services.logging_service import LoggingService
2828
from mcpgateway.services.server_service import ServerError, ServerNotFoundError, ServerService
29+
from mcpgateway.utils.log_sanitizer import sanitize_for_log
2930
from mcpgateway.utils.verify_credentials import require_auth
3031

3132
# Get logger instance
@@ -164,19 +165,22 @@ async def get_oauth_protected_resource_rfc9728(
164165

165166
# Validate path structure
166167
if len(path_parts) < 2 or path_parts[0] != "servers":
167-
logger.debug(f"Invalid RFC 9728 path format: {path}")
168+
# Sanitize untrusted path before logging to prevent log injection
169+
logger.debug(f"Invalid RFC 9728 path format: {sanitize_for_log(path)}")
168170
raise HTTPException(status_code=404, detail="Invalid resource path format. Expected: /.well-known/oauth-protected-resource/servers/{server_id}/mcp")
169171

170172
server_id = path_parts[1]
171173

172174
# Validate server_id is a valid UUID (prevents path traversal and injection)
173175
if not UUID_PATTERN.match(server_id):
174-
logger.warning(f"Invalid server_id format (not a UUID): {server_id}")
176+
# Sanitize untrusted server_id before logging to prevent log injection
177+
logger.warning(f"Invalid server_id format (not a UUID): {sanitize_for_log(server_id)}")
175178
raise HTTPException(status_code=404, detail="Invalid server_id format. Must be a valid UUID.")
176179

177180
# Reject paths with extra segments after /mcp (e.g., servers/uuid/mcp/extra)
178181
if len(path_parts) > 3:
179-
logger.warning(f"RFC 9728 path has unexpected segments: {path}")
182+
# Sanitize untrusted path before logging to prevent log injection
183+
logger.warning(f"RFC 9728 path has unexpected segments: {sanitize_for_log(path)}")
180184
raise HTTPException(status_code=404, detail="Invalid resource path format. Expected: /.well-known/oauth-protected-resource/servers/{server_id}/mcp")
181185

182186
# Build resource URL with /mcp suffix per MCP specification

mcpgateway/utils/log_sanitizer.py

Lines changed: 146 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,146 @@
1+
# -*- coding: utf-8 -*-
2+
"""Location: ./mcpgateway/utils/log_sanitizer.py
3+
Copyright 2025
4+
SPDX-License-Identifier: Apache-2.0
5+
6+
Log Sanitization Utility.
7+
8+
This module provides utilities to sanitize untrusted input before logging to prevent
9+
log injection attacks. Control characters like newlines (\n, \r) can be used to inject
10+
fabricated log entries when logging unauthenticated user input.
11+
12+
Security Context:
13+
Log injection occurs when an attacker includes control characters (especially newlines)
14+
in query parameters, headers, or other user-controlled input that gets logged. When
15+
URL-decoded by the ASGI framework, these characters are passed to Python's logging
16+
module which does not sanitize them, allowing injection of fake log lines.
17+
18+
Example attack:
19+
GET /oauth/callback?error=foo&error_description=bar%0ACRITICAL:root:SECURITY+BREACH
20+
21+
This produces two log lines:
22+
WARNING:oauth:OAuth error: bar
23+
CRITICAL:root:SECURITY BREACH
24+
25+
The second line is entirely fabricated by the attacker.
26+
27+
Mitigation:
28+
This utility strips or replaces control characters before logging. Structured logging
29+
(JSON format) also mitigates this by encapsulating the full message as a single field.
30+
31+
Examples:
32+
>>> from mcpgateway.utils.log_sanitizer import sanitize_for_log
33+
>>> sanitize_for_log("normal text")
34+
'normal text'
35+
>>> sanitize_for_log("text with\\nnewline")
36+
'text with newline'
37+
>>> sanitize_for_log("text with\\r\\nCRLF")
38+
'text with CRLF'
39+
>>> sanitize_for_log("tab\\there")
40+
'tab here'
41+
>>> sanitize_for_log(None)
42+
'None'
43+
>>> sanitize_for_log(123)
44+
'123'
45+
"""
46+
47+
# Standard
48+
import re
49+
from typing import Any, Optional
50+
51+
# Regex pattern to match control characters that could be used for log injection
52+
# Includes: \n (LF), \r (CR), \t (TAB), \v (VT), \f (FF), and other C0/C1 control chars
53+
# We preserve space (0x20) as it's safe and commonly used
54+
CONTROL_CHARS_PATTERN = re.compile(r"[\x00-\x1f\x7f-\x9f]")
55+
56+
57+
def sanitize_for_log(value: Any, replacement: str = " ") -> str:
58+
"""
59+
Sanitize a value for safe logging by removing control characters.
60+
61+
This function converts the input to a string and removes all control characters
62+
that could be used for log injection attacks. Control characters include newlines,
63+
carriage returns, tabs, and other non-printable characters.
64+
65+
Args:
66+
value: The value to sanitize. Can be any type; will be converted to string.
67+
replacement: The string to replace control characters with. Defaults to a space.
68+
Use empty string '' to remove control characters entirely.
69+
70+
Returns:
71+
A sanitized string safe for logging, with control characters replaced.
72+
73+
Security Notes:
74+
- Always use this function when logging unauthenticated user input
75+
- Particularly important for query parameters, headers, and form data
76+
- Does not protect against other injection types (SQL, XSS, etc.)
77+
- Structured logging (JSON) provides additional protection
78+
79+
Examples:
80+
>>> sanitize_for_log("error: bad scope\\nCRITICAL:root:FAKE LOG")
81+
'error: bad scope CRITICAL:root:FAKE LOG'
82+
>>> sanitize_for_log("path/to/file\\x00null")
83+
'path/to/file null'
84+
>>> sanitize_for_log("normal text")
85+
'normal text'
86+
>>> sanitize_for_log(None)
87+
'None'
88+
>>> sanitize_for_log({"key": "value"})
89+
"{'key': 'value'}"
90+
"""
91+
# Convert to string first (handles None, numbers, objects, etc.)
92+
str_value = str(value)
93+
94+
# Replace all control characters with the replacement string
95+
sanitized = CONTROL_CHARS_PATTERN.sub(replacement, str_value)
96+
97+
return sanitized
98+
99+
100+
def sanitize_dict_for_log(data: dict[str, Any], replacement: str = " ") -> dict[str, str]:
101+
"""
102+
Sanitize all values in a dictionary for safe logging.
103+
104+
This is useful when logging multiple related values, such as query parameters
105+
or form data. Each value is sanitized individually.
106+
107+
Args:
108+
data: Dictionary with string keys and any values
109+
replacement: The string to replace control characters with
110+
111+
Returns:
112+
A new dictionary with all values sanitized as strings
113+
114+
Examples:
115+
>>> sanitize_dict_for_log({"error": "foo", "desc": "bar\\nFAKE"})
116+
{'error': 'foo', 'desc': 'bar FAKE'}
117+
>>> sanitize_dict_for_log({"count": 42, "name": "test\\ttab"})
118+
{'count': '42', 'name': 'test tab'}
119+
"""
120+
return {key: sanitize_for_log(value, replacement) for key, value in data.items()}
121+
122+
123+
def sanitize_optional(value: Optional[Any], replacement: str = " ") -> Optional[str]:
124+
"""
125+
Sanitize an optional value, preserving None.
126+
127+
This is useful when you want to maintain None as None rather than converting
128+
it to the string "None".
129+
130+
Args:
131+
value: The value to sanitize, or None
132+
replacement: The string to replace control characters with
133+
134+
Returns:
135+
Sanitized string if value is not None, otherwise None
136+
137+
Examples:
138+
>>> sanitize_optional("text\\nwith newline")
139+
'text with newline'
140+
>>> sanitize_optional(None)
141+
>>> sanitize_optional(None) is None
142+
True
143+
"""
144+
if value is None:
145+
return None
146+
return sanitize_for_log(value, replacement)

0 commit comments

Comments
 (0)