66
77import gzip
88import importlib
9+ import io
910import re
1011from logging import getLogger
1112from typing import Any , Dict , Optional , Tuple
1213
1314from ._config import BrowserSDKConfig
15+ from .._constants import (
16+ _BROWSER_SDK_MAX_COMPRESSED_BYTES as _MAX_COMPRESSED_BYTES ,
17+ _BROWSER_SDK_MAX_DECOMPRESSED_BYTES as _MAX_DECOMPRESSED_BYTES ,
18+ )
1419
1520# Optional compression libraries
1621_BROTLI_MODULE : Optional [Any ]
3439_logger = getLogger (__name__ )
3540
3641
42+ def _bounded_decompress (data : bytes , encoding : str ) -> bytes :
43+ """Decompress ``data`` with the given encoding, raising if output exceeds the cap.
44+
45+ Supports ``gzip``, ``deflate`` (zlib), and ``br`` (brotli). Returns the
46+ decompressed bytes or raises ``ValueError`` if the output would exceed
47+ ``_MAX_DECOMPRESSED_BYTES`` (defense against decompression bombs).
48+
49+ :param data: Compressed input bytes.
50+ :type data: bytes
51+ :param encoding: One of ``gzip``, ``deflate``, ``br``.
52+ :type encoding: str
53+ :return: Decompressed bytes (at most ``_MAX_DECOMPRESSED_BYTES``).
54+ :rtype: bytes
55+ """
56+ cap = _MAX_DECOMPRESSED_BYTES
57+ if encoding == "gzip" :
58+ # GzipFile.read(N) reads at most N bytes; ask for cap+1 to detect overflow.
59+ with gzip .GzipFile (fileobj = io .BytesIO (data )) as gz :
60+ out = gz .read (cap + 1 )
61+ elif encoding == "deflate" :
62+ if _ZLIB_MODULE is None :
63+ raise RuntimeError ("zlib module not available" )
64+ # zlib's decompressobj supports a native max_length parameter.
65+ decompressor = _ZLIB_MODULE .decompressobj ()
66+ out = decompressor .decompress (data , cap + 1 )
67+ if decompressor .unconsumed_tail or len (out ) > cap :
68+ raise ValueError ("deflate-decompressed body exceeds cap" )
69+ out += decompressor .flush ()
70+ if len (out ) > cap :
71+ raise ValueError ("deflate-decompressed body exceeds cap" )
72+ return bytes (out )
73+ elif encoding == "br" :
74+ if _BROTLI_MODULE is None :
75+ raise RuntimeError ("brotli module not available" )
76+ # Brotli has no built-in size limit; feed chunks and tally bytes.
77+ decompressor = _BROTLI_MODULE .Decompressor ()
78+ chunk = 64 * 1024
79+ pieces , total = [], 0
80+ for start in range (0 , len (data ), chunk ):
81+ piece = decompressor .process (data [start : start + chunk ])
82+ total += len (piece )
83+ if total > cap :
84+ raise ValueError ("brotli-decompressed body exceeds cap" )
85+ pieces .append (piece )
86+ return b"" .join (pieces )
87+ else :
88+ raise ValueError (f"unsupported encoding: { encoding } " )
89+ if len (out ) > cap :
90+ raise ValueError (f"{ encoding } -decompressed body exceeds cap" )
91+ return out
92+
93+
3794def _mark_browser_loader_feature (is_enabled : bool ) -> None :
3895 """Record browser SDK loader usage in statsbeat when available.
3996
@@ -107,7 +164,7 @@ def __init__(self, config: BrowserSDKConfig) -> None:
107164 ]
108165 _mark_browser_loader_feature (self .config .enabled )
109166
110- def should_inject (
167+ def should_inject ( # pylint: disable=too-many-return-statements
111168 self , request_method : str , content_type : Optional [str ], content : bytes , content_encoding : Optional [str ] = None
112169 ) -> bool :
113170 """Determine whether the web snippet should be injected into the response.
@@ -131,8 +188,21 @@ def should_inject(
131188 # Check content type for HTML
132189 if not content_type or "html" not in content_type .lower ():
133190 return False
134- # Get decompressed content once and cache it for reuse
135- decompressed_content = self ._get_decompressed_content (content , content_encoding )
191+ # Bail out on oversized bodies; never decompress/scan/recompress them.
192+ if len (content ) > _MAX_COMPRESSED_BYTES :
193+ _logger .debug (
194+ "Response body %d bytes exceeds injection cap; skipping snippet injection" ,
195+ len (content ),
196+ )
197+ return False
198+ # Get decompressed content once and cache it for reuse. If decompression
199+ # fails (e.g., size cap exceeded or malformed body), skip injection rather
200+ # than risk modifying a body we cannot safely decode.
201+ try :
202+ decompressed_content = self ._get_decompressed_content (content , content_encoding )
203+ except Exception as ex : # pylint: disable=broad-exception-caught
204+ _logger .debug ("Skipping snippet injection; decompression failed: %s" , ex )
205+ return False
136206 # Check if Web SDK is already present using cached decompressed content
137207 if self ._has_existing_web_sdk_from_decompressed (decompressed_content ):
138208 _logger .debug ("Web SDK already detected in HTML, skipping injection" )
@@ -372,19 +442,23 @@ def _decompress_content(self, content: bytes, encoding: Optional[str]) -> bytes:
372442 try :
373443 normalized = encoding .lower ()
374444 if normalized == "gzip" :
375- result = gzip . decompress (content )
445+ result = _bounded_decompress (content , "gzip" )
376446 elif normalized == "br" :
377447 if not HAS_BROTLI or _BROTLI_MODULE is None :
378448 _logger .warning ("brotli library not available for decompression" )
379449 else :
380- result = _BROTLI_MODULE . decompress (content )
450+ result = _bounded_decompress (content , "br" )
381451 elif normalized == "deflate" :
382452 if not HAS_ZLIB or _ZLIB_MODULE is None :
383453 _logger .warning ("zlib library not available for decompression" )
384454 else :
385- result = _ZLIB_MODULE . decompress (content )
455+ result = _bounded_decompress (content , "deflate" )
386456 except Exception as ex : # pylint: disable=broad-exception-caught
457+ # Re-raise so callers (e.g., inject_with_compression / should_inject) can
458+ # fall back to returning the original response untouched instead of
459+ # treating still-compressed bytes as HTML and double-compressing them.
387460 _logger .warning ("Failed to decompress content with encoding %s: %s" , encoding , ex )
461+ raise
388462 return result
389463
390464 def _compress_content (self , content : bytes , encoding : str ) -> bytes :
0 commit comments