diff --git a/headroom/proxy/server.py b/headroom/proxy/server.py index 5bc73951..0291fb04 100644 --- a/headroom/proxy/server.py +++ b/headroom/proxy/server.py @@ -1018,9 +1018,14 @@ async def handle_anthropic_messages( cached=True, ) + # Remove compression headers from cached response + response_headers = dict(cached.response_headers) + response_headers.pop("content-encoding", None) + response_headers.pop("content-length", None) + return Response( content=cached.response_body, - headers=cached.response_headers, + headers=response_headers, media_type="application/json", ) @@ -1297,10 +1302,15 @@ async def api_call_fn( f"(saved {tokens_saved:,} tokens)" ) + # Remove compression headers since httpx already decompressed the response + response_headers = dict(response.headers) + response_headers.pop("content-encoding", None) + response_headers.pop("content-length", None) # Length changed after decompression + return Response( content=response.content, status_code=response.status_code, - headers=dict(response.headers), + headers=response_headers, ) except Exception as e: @@ -1411,7 +1421,13 @@ async def handle_openai_chat( latency_ms=(time.time() - start_time) * 1000, cached=True, ) - return Response(content=cached.response_body, headers=cached.response_headers) + + # Remove compression headers from cached response + response_headers = dict(cached.response_headers) + response_headers.pop("content-encoding", None) + response_headers.pop("content-length", None) + + return Response(content=cached.response_body, headers=response_headers) # Token counting tokenizer = get_tokenizer(model) @@ -1535,10 +1551,15 @@ async def handle_openai_chat( f"(saved {tokens_saved:,} tokens)" ) + # Remove compression headers since httpx already decompressed the response + response_headers = dict(response.headers) + response_headers.pop("content-encoding", None) + response_headers.pop("content-length", None) # Length changed after decompression + return Response( content=response.content, status_code=response.status_code, - headers=dict(response.headers), + headers=response_headers, ) except Exception as e: self.metrics.record_failed() @@ -1561,10 +1582,15 @@ async def handle_passthrough(self, request: Request, base_url: str) -> Response: content=body, ) + # Remove compression headers since httpx already decompressed the response + response_headers = dict(response.headers) + response_headers.pop("content-encoding", None) + response_headers.pop("content-length", None) # Length changed after decompression + return Response( content=response.content, status_code=response.status_code, - headers=dict(response.headers), + headers=response_headers, ) diff --git a/tests/test_proxy_compression_headers.py b/tests/test_proxy_compression_headers.py new file mode 100644 index 00000000..2039a65f --- /dev/null +++ b/tests/test_proxy_compression_headers.py @@ -0,0 +1,152 @@ +"""Tests for compression header handling in the proxy server. + +These tests verify that the proxy correctly removes Content-Encoding headers +from responses after httpx automatically decompresses them, preventing +double-decompression errors (ZlibError) in clients. +""" + +import gzip +import json + +import pytest + +from headroom.proxy.server import ProxyConfig + + +@pytest.fixture +def mock_anthropic_response_with_compression_headers(): + """Create a mock response that simulates httpx behavior. + + httpx automatically decompresses responses but leaves compression headers. + This is what causes the ZlibError bug we're testing for. + """ + + class MockResponse: + """Mock httpx response with compression headers.""" + + def __init__(self): + self.response_data = { + "id": "msg_test123", + "type": "message", + "role": "assistant", + "content": [{"type": "text", "text": "Hello!"}], + "model": "claude-3-5-sonnet-20241022", + "stop_reason": "end_turn", + "usage": {"input_tokens": 10, "output_tokens": 5}, + } + # Body is already decompressed (httpx does this automatically) + self.content = json.dumps(self.response_data).encode("utf-8") + self.status_code = 200 + + # Headers still contain compression info (this is the bug!) + self.headers = { + "content-type": "application/json", + "content-encoding": "gzip", # Should be removed! + "content-length": str(len(gzip.compress(self.content))), # Wrong! + "x-request-id": "test-request-id", + } + + return MockResponse() + + +class TestCompressionHeaderRemoval: + """Tests for Content-Encoding header removal logic.""" + + def test_compression_headers_are_removed_from_dict( + self, mock_anthropic_response_with_compression_headers + ): + """Test that our fix removes compression headers from response headers.""" + mock_response = mock_anthropic_response_with_compression_headers + + # Simulate what the fixed code does + response_headers = dict(mock_response.headers) + response_headers.pop("content-encoding", None) + response_headers.pop("content-length", None) + + # Verify compression headers are removed + assert "content-encoding" not in response_headers + assert "content-length" not in response_headers + + # Verify other headers are preserved + assert response_headers["content-type"] == "application/json" + assert response_headers["x-request-id"] == "test-request-id" + + def test_response_body_is_decompressed_not_compressed( + self, mock_anthropic_response_with_compression_headers + ): + """Verify the response content is already decompressed (httpx behavior).""" + mock_response = mock_anthropic_response_with_compression_headers + + # The content should be valid JSON (decompressed) + response_data = json.loads(mock_response.content) + assert response_data["id"] == "msg_test123" + + # Trying to decompress it again should fail (proving it's not compressed) + with pytest.raises((gzip.BadGzipFile, OSError, Exception)): + gzip.decompress(mock_response.content) + + def test_headers_with_wrong_content_length_cause_issues( + self, mock_anthropic_response_with_compression_headers + ): + """Demonstrate that keeping compression headers causes length mismatch.""" + mock_response = mock_anthropic_response_with_compression_headers + + # The content-length header says the body is compressed size + claimed_length = int(mock_response.headers["content-length"]) + + # But the actual content is decompressed size + actual_length = len(mock_response.content) + + # They don't match! This can cause client issues + assert claimed_length != actual_length + assert claimed_length < actual_length # Compressed is smaller + + def test_removing_headers_fixes_length_mismatch( + self, mock_anthropic_response_with_compression_headers + ): + """Show that removing compression headers allows proper content-length.""" + mock_response = mock_anthropic_response_with_compression_headers + + # Apply the fix + response_headers = dict(mock_response.headers) + response_headers.pop("content-encoding", None) + response_headers.pop("content-length", None) + + # Now we can set correct content-length + response_headers["content-length"] = str(len(mock_response.content)) + + # Verify it matches actual content + assert int(response_headers["content-length"]) == len(mock_response.content) + + +class TestNoRegressionForUncompressedResponses: + """Ensure the fix doesn't break responses that were never compressed.""" + + def test_pop_on_missing_keys_is_safe(self): + """Verify that .pop() on non-existent keys doesn't cause errors.""" + headers = { + "content-type": "application/json", + # No compression headers + } + + # This should not raise KeyError + headers.pop("content-encoding", None) + headers.pop("content-length", None) + + # Headers should be unchanged + assert headers == {"content-type": "application/json"} + + def test_dict_conversion_preserves_headers(self): + """Verify dict() conversion doesn't lose headers.""" + original_headers = { + "content-type": "application/json", + "x-custom-header": "value", + "authorization": "Bearer token", + } + + # Convert to dict (as the fix does) + converted = dict(original_headers) + + # All headers preserved + assert converted == original_headers + assert converted is not original_headers # New object