Skip to content

Commit ea4c378

Browse files
authored
Enforce size limits on browser SDK snippet injection to guard against decompression bombs and oversized HTML response bodies (#47233)
* Enforce size limits on browser SDK snippet injection to guard against decompression bombs and oversized HTML response bodies * Add CHANGELOG * Address feedback * Fix format and lint
1 parent 4823427 commit ea4c378

4 files changed

Lines changed: 167 additions & 7 deletions

File tree

sdk/monitor/azure-monitor-opentelemetry/CHANGELOG.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@
77
### Breaking Changes
88

99
### Bugs Fixed
10+
- Enforce size limits on browser SDK snippet injection to guard against decompression bombs and oversized HTML response bodies (1 MiB compressed / 5 MiB decompressed caps).
11+
([#47233](https://github.com/Azure/azure-sdk-for-python/pull/47233))
1012

1113
### Other Changes
1214

sdk/monitor/azure-monitor-opentelemetry/azure/monitor/opentelemetry/_browser_sdk_loader/snippet_injector.py

Lines changed: 80 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -6,11 +6,16 @@
66

77
import gzip
88
import importlib
9+
import io
910
import re
1011
from logging import getLogger
1112
from typing import Any, Dict, Optional, Tuple
1213

1314
from ._config import BrowserSDKConfig
15+
from .._constants import (
16+
_BROWSER_SDK_MAX_COMPRESSED_BYTES as _MAX_COMPRESSED_BYTES,
17+
_BROWSER_SDK_MAX_DECOMPRESSED_BYTES as _MAX_DECOMPRESSED_BYTES,
18+
)
1419

1520
# Optional compression libraries
1621
_BROTLI_MODULE: Optional[Any]
@@ -34,6 +39,58 @@
3439
_logger = getLogger(__name__)
3540

3641

42+
def _bounded_decompress(data: bytes, encoding: str) -> bytes:
43+
"""Decompress ``data`` with the given encoding, raising if output exceeds the cap.
44+
45+
Supports ``gzip``, ``deflate`` (zlib), and ``br`` (brotli). Returns the
46+
decompressed bytes or raises ``ValueError`` if the output would exceed
47+
``_MAX_DECOMPRESSED_BYTES`` (defense against decompression bombs).
48+
49+
:param data: Compressed input bytes.
50+
:type data: bytes
51+
:param encoding: One of ``gzip``, ``deflate``, ``br``.
52+
:type encoding: str
53+
:return: Decompressed bytes (at most ``_MAX_DECOMPRESSED_BYTES``).
54+
:rtype: bytes
55+
"""
56+
cap = _MAX_DECOMPRESSED_BYTES
57+
if encoding == "gzip":
58+
# GzipFile.read(N) reads at most N bytes; ask for cap+1 to detect overflow.
59+
with gzip.GzipFile(fileobj=io.BytesIO(data)) as gz:
60+
out = gz.read(cap + 1)
61+
elif encoding == "deflate":
62+
if _ZLIB_MODULE is None:
63+
raise RuntimeError("zlib module not available")
64+
# zlib's decompressobj supports a native max_length parameter.
65+
decompressor = _ZLIB_MODULE.decompressobj()
66+
out = decompressor.decompress(data, cap + 1)
67+
if decompressor.unconsumed_tail or len(out) > cap:
68+
raise ValueError("deflate-decompressed body exceeds cap")
69+
out += decompressor.flush()
70+
if len(out) > cap:
71+
raise ValueError("deflate-decompressed body exceeds cap")
72+
return bytes(out)
73+
elif encoding == "br":
74+
if _BROTLI_MODULE is None:
75+
raise RuntimeError("brotli module not available")
76+
# Brotli has no built-in size limit; feed chunks and tally bytes.
77+
decompressor = _BROTLI_MODULE.Decompressor()
78+
chunk = 64 * 1024
79+
pieces, total = [], 0
80+
for start in range(0, len(data), chunk):
81+
piece = decompressor.process(data[start : start + chunk])
82+
total += len(piece)
83+
if total > cap:
84+
raise ValueError("brotli-decompressed body exceeds cap")
85+
pieces.append(piece)
86+
return b"".join(pieces)
87+
else:
88+
raise ValueError(f"unsupported encoding: {encoding}")
89+
if len(out) > cap:
90+
raise ValueError(f"{encoding}-decompressed body exceeds cap")
91+
return out
92+
93+
3794
def _mark_browser_loader_feature(is_enabled: bool) -> None:
3895
"""Record browser SDK loader usage in statsbeat when available.
3996
@@ -107,7 +164,7 @@ def __init__(self, config: BrowserSDKConfig) -> None:
107164
]
108165
_mark_browser_loader_feature(self.config.enabled)
109166

110-
def should_inject(
167+
def should_inject( # pylint: disable=too-many-return-statements
111168
self, request_method: str, content_type: Optional[str], content: bytes, content_encoding: Optional[str] = None
112169
) -> bool:
113170
"""Determine whether the web snippet should be injected into the response.
@@ -131,8 +188,21 @@ def should_inject(
131188
# Check content type for HTML
132189
if not content_type or "html" not in content_type.lower():
133190
return False
134-
# Get decompressed content once and cache it for reuse
135-
decompressed_content = self._get_decompressed_content(content, content_encoding)
191+
# Bail out on oversized bodies; never decompress/scan/recompress them.
192+
if len(content) > _MAX_COMPRESSED_BYTES:
193+
_logger.debug(
194+
"Response body %d bytes exceeds injection cap; skipping snippet injection",
195+
len(content),
196+
)
197+
return False
198+
# Get decompressed content once and cache it for reuse. If decompression
199+
# fails (e.g., size cap exceeded or malformed body), skip injection rather
200+
# than risk modifying a body we cannot safely decode.
201+
try:
202+
decompressed_content = self._get_decompressed_content(content, content_encoding)
203+
except Exception as ex: # pylint: disable=broad-exception-caught
204+
_logger.debug("Skipping snippet injection; decompression failed: %s", ex)
205+
return False
136206
# Check if Web SDK is already present using cached decompressed content
137207
if self._has_existing_web_sdk_from_decompressed(decompressed_content):
138208
_logger.debug("Web SDK already detected in HTML, skipping injection")
@@ -372,19 +442,23 @@ def _decompress_content(self, content: bytes, encoding: Optional[str]) -> bytes:
372442
try:
373443
normalized = encoding.lower()
374444
if normalized == "gzip":
375-
result = gzip.decompress(content)
445+
result = _bounded_decompress(content, "gzip")
376446
elif normalized == "br":
377447
if not HAS_BROTLI or _BROTLI_MODULE is None:
378448
_logger.warning("brotli library not available for decompression")
379449
else:
380-
result = _BROTLI_MODULE.decompress(content)
450+
result = _bounded_decompress(content, "br")
381451
elif normalized == "deflate":
382452
if not HAS_ZLIB or _ZLIB_MODULE is None:
383453
_logger.warning("zlib library not available for decompression")
384454
else:
385-
result = _ZLIB_MODULE.decompress(content)
455+
result = _bounded_decompress(content, "deflate")
386456
except Exception as ex: # pylint: disable=broad-exception-caught
457+
# Re-raise so callers (e.g., inject_with_compression / should_inject) can
458+
# fall back to returning the original response untouched instead of
459+
# treating still-compressed bytes as HTML and double-compressing them.
387460
_logger.warning("Failed to decompress content with encoding %s: %s", encoding, ex)
461+
raise
388462
return result
389463

390464
def _compress_content(self, content: bytes, encoding: str) -> bytes:

sdk/monitor/azure-monitor-opentelemetry/azure/monitor/opentelemetry/_constants.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -89,3 +89,8 @@
8989

9090
_AZURE_APP_SERVICE_RESOURCE_DETECTOR_NAME = "azure_app_service"
9191
_AZURE_VM_RESOURCE_DETECTOR_NAME = "azure_vm"
92+
93+
# --------------------Browser SDK snippet injection------------------------------
94+
95+
_BROWSER_SDK_MAX_COMPRESSED_BYTES = 1 * 1024 * 1024 # 1 MiB on the wire
96+
_BROWSER_SDK_MAX_DECOMPRESSED_BYTES = 5 * 1024 * 1024 # 5 MiB after decompression

sdk/monitor/azure-monitor-opentelemetry/tests/browserSdkLoader/test_snippet_injector.py

Lines changed: 80 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,10 +6,17 @@
66

77
import gzip
88
import unittest
9+
import zlib
910
from unittest.mock import patch, MagicMock
1011

1112
from azure.monitor.opentelemetry._browser_sdk_loader._config import BrowserSDKConfig
12-
from azure.monitor.opentelemetry._browser_sdk_loader.snippet_injector import WebSnippetInjector
13+
from azure.monitor.opentelemetry._browser_sdk_loader import snippet_injector as snippet_injector_module
14+
from azure.monitor.opentelemetry._browser_sdk_loader.snippet_injector import (
15+
WebSnippetInjector,
16+
_bounded_decompress,
17+
_MAX_COMPRESSED_BYTES,
18+
_MAX_DECOMPRESSED_BYTES,
19+
)
1320

1421

1522
class TestWebSnippetInjector(unittest.TestCase):
@@ -371,5 +378,77 @@ def test_should_inject_script_url_detection(self):
371378
self.assertFalse(result, f"Should detect existing script URL in: {content}")
372379

373380

381+
class TestSizeCaps(unittest.TestCase):
382+
"""Tests for the response-size and decompression-size guards."""
383+
384+
def setUp(self):
385+
self.config = BrowserSDKConfig(
386+
enabled=True,
387+
connection_string=(
388+
"InstrumentationKey=12345678-1234-1234-1234-123456789012;"
389+
"IngestionEndpoint=https://test.in.applicationinsights.azure.com/"
390+
),
391+
)
392+
self.injector = WebSnippetInjector(self.config)
393+
394+
def test_should_inject_rejects_oversized_body(self):
395+
"""Bodies larger than the compressed cap must be skipped before any decompression."""
396+
oversized = b"<html><body>" + b"a" * (_MAX_COMPRESSED_BYTES + 1) + b"</body></html>"
397+
# Patch decompression to make sure we don't even reach it.
398+
with patch.object(self.injector, "_get_decompressed_content") as mock_decompress:
399+
result = self.injector.should_inject("GET", "text/html", oversized)
400+
self.assertFalse(result)
401+
mock_decompress.assert_not_called()
402+
403+
def test_should_inject_accepts_body_at_cap(self):
404+
"""A body exactly at the cap should still be processed."""
405+
prefix = b"<html><head></head><body>"
406+
suffix = b"</body></html>"
407+
body = prefix + (b"a" * (_MAX_COMPRESSED_BYTES - len(prefix) - len(suffix))) + suffix
408+
self.assertEqual(len(body), _MAX_COMPRESSED_BYTES)
409+
self.assertTrue(self.injector.should_inject("GET", "text/html", body))
410+
411+
def test_bounded_decompress_gzip_under_cap(self):
412+
payload = b"<html>hello</html>"
413+
self.assertEqual(_bounded_decompress(gzip.compress(payload), "gzip"), payload)
414+
415+
def test_bounded_decompress_gzip_bomb_raises(self):
416+
bomb = gzip.compress(b"a" * (_MAX_DECOMPRESSED_BYTES + 1024))
417+
with self.assertRaises(ValueError):
418+
_bounded_decompress(bomb, "gzip")
419+
420+
def test_bounded_decompress_deflate_under_cap(self):
421+
payload = b"<html>hello</html>"
422+
self.assertEqual(_bounded_decompress(zlib.compress(payload), "deflate"), payload)
423+
424+
def test_bounded_decompress_deflate_bomb_raises(self):
425+
bomb = zlib.compress(b"a" * (_MAX_DECOMPRESSED_BYTES + 1024))
426+
with self.assertRaises(ValueError):
427+
_bounded_decompress(bomb, "deflate")
428+
429+
def test_bounded_decompress_brotli_under_cap(self):
430+
if not snippet_injector_module.HAS_BROTLI:
431+
self.skipTest("brotli not installed")
432+
payload = b"<html>hello</html>"
433+
compressed = snippet_injector_module._BROTLI_MODULE.compress(payload)
434+
self.assertEqual(_bounded_decompress(compressed, "br"), payload)
435+
436+
def test_bounded_decompress_brotli_bomb_raises(self):
437+
if not snippet_injector_module.HAS_BROTLI:
438+
self.skipTest("brotli not installed")
439+
bomb = snippet_injector_module._BROTLI_MODULE.compress(b"a" * (_MAX_DECOMPRESSED_BYTES + 1024))
440+
with self.assertRaises(ValueError):
441+
_bounded_decompress(bomb, "br")
442+
443+
def test_inject_with_compression_returns_original_on_bomb(self):
444+
"""A gzip bomb must not be expanded into memory by inject_with_compression."""
445+
bomb = gzip.compress(b"a" * (_MAX_DECOMPRESSED_BYTES + 1024))
446+
modified, encoding = self.injector.inject_with_compression(bomb, "gzip")
447+
# On decompression cap / failure, injection must be skipped and the
448+
# original body returned unchanged (no double-compression).
449+
self.assertEqual(modified, bomb)
450+
self.assertEqual(encoding, "gzip")
451+
452+
374453
if __name__ == "__main__":
375454
unittest.main()

0 commit comments

Comments
 (0)