Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 31 additions & 5 deletions headroom/proxy/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -1018,9 +1018,14 @@ async def handle_anthropic_messages(
cached=True,
)

# Remove compression headers from cached response
response_headers = dict(cached.response_headers)
response_headers.pop("content-encoding", None)
response_headers.pop("content-length", None)

return Response(
content=cached.response_body,
headers=cached.response_headers,
headers=response_headers,
media_type="application/json",
)

Expand Down Expand Up @@ -1297,10 +1302,15 @@ async def api_call_fn(
f"(saved {tokens_saved:,} tokens)"
)

# Remove compression headers since httpx already decompressed the response
response_headers = dict(response.headers)
response_headers.pop("content-encoding", None)
response_headers.pop("content-length", None) # Length changed after decompression

return Response(
content=response.content,
status_code=response.status_code,
headers=dict(response.headers),
headers=response_headers,
)

except Exception as e:
Expand Down Expand Up @@ -1411,7 +1421,13 @@ async def handle_openai_chat(
latency_ms=(time.time() - start_time) * 1000,
cached=True,
)
return Response(content=cached.response_body, headers=cached.response_headers)

# Remove compression headers from cached response
response_headers = dict(cached.response_headers)
response_headers.pop("content-encoding", None)
response_headers.pop("content-length", None)

return Response(content=cached.response_body, headers=response_headers)

# Token counting
tokenizer = get_tokenizer(model)
Expand Down Expand Up @@ -1535,10 +1551,15 @@ async def handle_openai_chat(
f"(saved {tokens_saved:,} tokens)"
)

# Remove compression headers since httpx already decompressed the response
response_headers = dict(response.headers)
response_headers.pop("content-encoding", None)
response_headers.pop("content-length", None) # Length changed after decompression

return Response(
content=response.content,
status_code=response.status_code,
headers=dict(response.headers),
headers=response_headers,
)
except Exception as e:
self.metrics.record_failed()
Expand All @@ -1561,10 +1582,15 @@ async def handle_passthrough(self, request: Request, base_url: str) -> Response:
content=body,
)

# Remove compression headers since httpx already decompressed the response
response_headers = dict(response.headers)
response_headers.pop("content-encoding", None)
response_headers.pop("content-length", None) # Length changed after decompression

return Response(
content=response.content,
status_code=response.status_code,
headers=dict(response.headers),
headers=response_headers,
)


Expand Down
152 changes: 152 additions & 0 deletions tests/test_proxy_compression_headers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,152 @@
"""Tests for compression header handling in the proxy server.

These tests verify that the proxy correctly removes Content-Encoding headers
from responses after httpx automatically decompresses them, preventing
double-decompression errors (ZlibError) in clients.
"""

import gzip
import json

import pytest

from headroom.proxy.server import ProxyConfig


@pytest.fixture
def mock_anthropic_response_with_compression_headers():
"""Create a mock response that simulates httpx behavior.

httpx automatically decompresses responses but leaves compression headers.
This is what causes the ZlibError bug we're testing for.
"""

class MockResponse:
"""Mock httpx response with compression headers."""

def __init__(self):
self.response_data = {
"id": "msg_test123",
"type": "message",
"role": "assistant",
"content": [{"type": "text", "text": "Hello!"}],
"model": "claude-3-5-sonnet-20241022",
"stop_reason": "end_turn",
"usage": {"input_tokens": 10, "output_tokens": 5},
}
# Body is already decompressed (httpx does this automatically)
self.content = json.dumps(self.response_data).encode("utf-8")
self.status_code = 200

# Headers still contain compression info (this is the bug!)
self.headers = {
"content-type": "application/json",
"content-encoding": "gzip", # Should be removed!
"content-length": str(len(gzip.compress(self.content))), # Wrong!
"x-request-id": "test-request-id",
}

return MockResponse()


class TestCompressionHeaderRemoval:
"""Tests for Content-Encoding header removal logic."""

def test_compression_headers_are_removed_from_dict(
self, mock_anthropic_response_with_compression_headers
):
"""Test that our fix removes compression headers from response headers."""
mock_response = mock_anthropic_response_with_compression_headers

# Simulate what the fixed code does
response_headers = dict(mock_response.headers)
response_headers.pop("content-encoding", None)
response_headers.pop("content-length", None)

# Verify compression headers are removed
assert "content-encoding" not in response_headers
assert "content-length" not in response_headers

# Verify other headers are preserved
assert response_headers["content-type"] == "application/json"
assert response_headers["x-request-id"] == "test-request-id"

def test_response_body_is_decompressed_not_compressed(
self, mock_anthropic_response_with_compression_headers
):
"""Verify the response content is already decompressed (httpx behavior)."""
mock_response = mock_anthropic_response_with_compression_headers

# The content should be valid JSON (decompressed)
response_data = json.loads(mock_response.content)
assert response_data["id"] == "msg_test123"

# Trying to decompress it again should fail (proving it's not compressed)
with pytest.raises((gzip.BadGzipFile, OSError, Exception)):
gzip.decompress(mock_response.content)

def test_headers_with_wrong_content_length_cause_issues(
self, mock_anthropic_response_with_compression_headers
):
"""Demonstrate that keeping compression headers causes length mismatch."""
mock_response = mock_anthropic_response_with_compression_headers

# The content-length header says the body is compressed size
claimed_length = int(mock_response.headers["content-length"])

# But the actual content is decompressed size
actual_length = len(mock_response.content)

# They don't match! This can cause client issues
assert claimed_length != actual_length
assert claimed_length < actual_length # Compressed is smaller

def test_removing_headers_fixes_length_mismatch(
self, mock_anthropic_response_with_compression_headers
):
"""Show that removing compression headers allows proper content-length."""
mock_response = mock_anthropic_response_with_compression_headers

# Apply the fix
response_headers = dict(mock_response.headers)
response_headers.pop("content-encoding", None)
response_headers.pop("content-length", None)

# Now we can set correct content-length
response_headers["content-length"] = str(len(mock_response.content))

# Verify it matches actual content
assert int(response_headers["content-length"]) == len(mock_response.content)


class TestNoRegressionForUncompressedResponses:
"""Ensure the fix doesn't break responses that were never compressed."""

def test_pop_on_missing_keys_is_safe(self):
"""Verify that .pop() on non-existent keys doesn't cause errors."""
headers = {
"content-type": "application/json",
# No compression headers
}

# This should not raise KeyError
headers.pop("content-encoding", None)
headers.pop("content-length", None)

# Headers should be unchanged
assert headers == {"content-type": "application/json"}

def test_dict_conversion_preserves_headers(self):
"""Verify dict() conversion doesn't lose headers."""
original_headers = {
"content-type": "application/json",
"x-custom-header": "value",
"authorization": "Bearer token",
}

# Convert to dict (as the fix does)
converted = dict(original_headers)

# All headers preserved
assert converted == original_headers
assert converted is not original_headers # New object
Loading