Skip to content

Commit 4204b1e

Browse files
Dumbrisclaude
andauthored
fix: preserve ImageContent and AudioContent blocks through proxy (#368) (#369)
The proxy was JSON-serializing upstream CallToolResult and wrapping the entire payload in a single TextContent block. This destroyed the native content type of any ImageContent, AudioContent, or other non-text block, forcing vision-capable LLMs to receive base64 as text — 50-250x more token-consumptive than the native vision path. Fix: introduce forwardContentResult() which walks CallToolResult.Content and preserves non-text blocks unchanged while applying truncation only to TextContent.Text. Applied in three sites: - handleCallToolVariant (call_tool_read/write/destructive) - handleCallTool (legacy handler) - makeDirectModeHandler (direct routing mode) Tests: - internal/server/content_forward_test.go — unit tests covering image preservation, selective truncation, text-only case, and the non-CallToolResult fallback path - internal/server/e2e_content_forward_test.go — end-to-end test that spins up a mock upstream returning text+image+audio and verifies the downstream client receives all three native content types (fails before this fix with "expected 3 content blocks, got 1") - updated TestE2E_ToolCalling: it was inadvertently asserting the buggy double-wrapping behavior; updated to reflect native forwarding Co-authored-by: Claude Code <noreply@anthropic.com>
1 parent d98458b commit 4204b1e

6 files changed

Lines changed: 531 additions & 143 deletions

File tree

internal/server/content_forward.go

Lines changed: 114 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,114 @@
1+
package server
2+
3+
import (
4+
"encoding/json"
5+
"fmt"
6+
7+
"github.com/mark3labs/mcp-go/mcp"
8+
9+
"github.com/smart-mcp-proxy/mcpproxy-go/internal/truncate"
10+
)
11+
12+
// forwardContentResult preserves non-text content blocks (ImageContent, AudioContent,
13+
// EmbeddedResource) from an upstream CallToolResult while applying truncation only to
14+
// TextContent blocks. This fixes issue #368 where all content types were being
15+
// serialized to JSON and wrapped in a single TextContent, destroying the ability of
16+
// vision-capable LLMs to process images efficiently.
17+
//
18+
// Parameters:
19+
// - result: the upstream CallToolResult (passed as interface{} from Manager.CallTool)
20+
// - truncator: applies size limits to text blocks only
21+
// - toolName, args: passed to truncator for caching
22+
//
23+
// Returns:
24+
// - forwarded: the CallToolResult to return downstream (with original content types)
25+
// - textRepresentation: a text rendering of the full result for logging/metrics
26+
// - wasTruncated: whether any TextContent block was truncated
27+
// - cacheStored: whether the full response was stored in the cache (for audit)
28+
//
29+
// If result is not a *mcp.CallToolResult, it falls back to JSON-serializing the whole
30+
// thing into a TextContent block (legacy behavior).
31+
func forwardContentResult(result interface{}, truncator *truncate.Truncator, toolName string, args map[string]interface{}) (forwarded *mcp.CallToolResult, textRepresentation string, wasTruncated bool) {
32+
ctr, ok := result.(*mcp.CallToolResult)
33+
if !ok || ctr == nil {
34+
// Fallback: not a CallToolResult (should not happen with current upstream chain,
35+
// but guard against future changes). Use legacy JSON-wrap behavior.
36+
jsonBytes, err := json.Marshal(result)
37+
if err != nil {
38+
return mcp.NewToolResultError(fmt.Sprintf("Failed to serialize result: %v", err)), "", false
39+
}
40+
text := string(jsonBytes)
41+
if truncator != nil && truncator.ShouldTruncate(text) {
42+
tr := truncator.Truncate(text, toolName, args)
43+
text = tr.TruncatedContent
44+
wasTruncated = true
45+
}
46+
return mcp.NewToolResultText(text), text, wasTruncated
47+
}
48+
49+
// Walk the Content slice, truncating only TextContent blocks.
50+
// Build a parallel text representation for logging/metrics.
51+
newContent := make([]mcp.Content, 0, len(ctr.Content))
52+
var textBuilder []string
53+
for _, c := range ctr.Content {
54+
switch tc := c.(type) {
55+
case mcp.TextContent:
56+
txt := tc.Text
57+
if truncator != nil && truncator.ShouldTruncate(txt) {
58+
tr := truncator.Truncate(txt, toolName, args)
59+
txt = tr.TruncatedContent
60+
wasTruncated = true
61+
}
62+
tc.Text = txt
63+
newContent = append(newContent, tc)
64+
textBuilder = append(textBuilder, txt)
65+
case mcp.ImageContent:
66+
// Preserve image blocks unchanged. For logging, emit a placeholder.
67+
newContent = append(newContent, tc)
68+
textBuilder = append(textBuilder, fmt.Sprintf("[image:%s len=%d]", tc.MIMEType, len(tc.Data)))
69+
case mcp.AudioContent:
70+
// Preserve audio blocks unchanged. For logging, emit a placeholder.
71+
newContent = append(newContent, tc)
72+
textBuilder = append(textBuilder, fmt.Sprintf("[audio:%s len=%d]", tc.MIMEType, len(tc.Data)))
73+
default:
74+
// Unknown content type (e.g., EmbeddedResource). Forward as-is and
75+
// best-effort JSON encode for logging.
76+
newContent = append(newContent, c)
77+
if b, err := json.Marshal(c); err == nil {
78+
textBuilder = append(textBuilder, string(b))
79+
}
80+
}
81+
}
82+
83+
forwarded = &mcp.CallToolResult{
84+
Result: ctr.Result,
85+
Content: newContent,
86+
StructuredContent: ctr.StructuredContent,
87+
IsError: ctr.IsError,
88+
}
89+
textRepresentation = joinTextParts(textBuilder)
90+
return forwarded, textRepresentation, wasTruncated
91+
}
92+
93+
// joinTextParts concatenates text parts with a newline separator.
94+
// Equivalent to strings.Join but avoids importing strings just for this.
95+
func joinTextParts(parts []string) string {
96+
switch len(parts) {
97+
case 0:
98+
return ""
99+
case 1:
100+
return parts[0]
101+
}
102+
total := len(parts) - 1 // separators
103+
for _, p := range parts {
104+
total += len(p)
105+
}
106+
out := make([]byte, 0, total)
107+
for i, p := range parts {
108+
if i > 0 {
109+
out = append(out, '\n')
110+
}
111+
out = append(out, p...)
112+
}
113+
return string(out)
114+
}
Lines changed: 123 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,123 @@
1+
package server
2+
3+
import (
4+
"strings"
5+
"testing"
6+
7+
"github.com/mark3labs/mcp-go/mcp"
8+
"github.com/stretchr/testify/assert"
9+
"github.com/stretchr/testify/require"
10+
11+
"github.com/smart-mcp-proxy/mcpproxy-go/internal/truncate"
12+
)
13+
14+
// TestForwardContentResult_PreservesImageContent verifies that an ImageContent
15+
// block from upstream is forwarded unchanged to the downstream client.
16+
// Regression test for issue #368.
17+
func TestForwardContentResult_PreservesImageContent(t *testing.T) {
18+
upstream := &mcp.CallToolResult{
19+
Content: []mcp.Content{
20+
mcp.NewTextContent("Here is your image:"),
21+
mcp.NewImageContent("iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwC", "image/png"),
22+
},
23+
}
24+
truncator := truncate.NewTruncator(0) // disabled
25+
26+
forwarded, text, truncated := forwardContentResult(upstream, truncator, "test:tool", nil)
27+
28+
require.NotNil(t, forwarded)
29+
require.Equal(t, 2, len(forwarded.Content), "both content blocks must be forwarded")
30+
assert.False(t, truncated)
31+
32+
// First block: text preserved
33+
tc, ok := forwarded.Content[0].(mcp.TextContent)
34+
require.True(t, ok, "block 0 should remain TextContent")
35+
assert.Equal(t, "Here is your image:", tc.Text)
36+
37+
// Second block: image preserved as native type
38+
ic, ok := forwarded.Content[1].(mcp.ImageContent)
39+
require.True(t, ok, "block 1 should remain ImageContent (not serialized to text)")
40+
assert.Equal(t, "image/png", ic.MIMEType)
41+
assert.Equal(t, "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwC", ic.Data)
42+
43+
// Text representation used for logging should reference both blocks
44+
assert.Contains(t, text, "Here is your image:")
45+
assert.Contains(t, text, "[image:image/png")
46+
}
47+
48+
// TestForwardContentResult_TruncatesOnlyText verifies that truncation applies
49+
// to TextContent but leaves ImageContent and AudioContent untouched regardless
50+
// of their size.
51+
func TestForwardContentResult_TruncatesOnlyText(t *testing.T) {
52+
// Build a very large base64 payload to show it survives truncation
53+
bigData := strings.Repeat("A", 10000)
54+
bigText := strings.Repeat("x", 2000)
55+
56+
upstream := &mcp.CallToolResult{
57+
Content: []mcp.Content{
58+
mcp.NewTextContent(bigText),
59+
mcp.NewImageContent(bigData, "image/png"),
60+
mcp.NewAudioContent(bigData, "audio/wav"),
61+
},
62+
}
63+
// Truncator with a 500-char limit
64+
truncator := truncate.NewTruncator(500)
65+
66+
forwarded, _, truncated := forwardContentResult(upstream, truncator, "test:tool", nil)
67+
require.NotNil(t, forwarded)
68+
require.Equal(t, 3, len(forwarded.Content))
69+
assert.True(t, truncated, "text block should be marked as truncated")
70+
71+
// Text was truncated
72+
tc, ok := forwarded.Content[0].(mcp.TextContent)
73+
require.True(t, ok)
74+
assert.Less(t, len(tc.Text), len(bigText), "text block should be shorter after truncation")
75+
76+
// Image unchanged
77+
ic, ok := forwarded.Content[1].(mcp.ImageContent)
78+
require.True(t, ok)
79+
assert.Equal(t, bigData, ic.Data, "image data must be forwarded byte-for-byte")
80+
81+
// Audio unchanged
82+
ac, ok := forwarded.Content[2].(mcp.AudioContent)
83+
require.True(t, ok)
84+
assert.Equal(t, bigData, ac.Data, "audio data must be forwarded byte-for-byte")
85+
}
86+
87+
// TestForwardContentResult_TextOnlyNoTruncation exercises the common case of a
88+
// small text-only response. Verifies the result is forwarded unchanged.
89+
func TestForwardContentResult_TextOnlyNoTruncation(t *testing.T) {
90+
upstream := &mcp.CallToolResult{
91+
Content: []mcp.Content{
92+
mcp.NewTextContent("small result"),
93+
},
94+
}
95+
truncator := truncate.NewTruncator(0)
96+
97+
forwarded, text, truncated := forwardContentResult(upstream, truncator, "test:tool", nil)
98+
require.NotNil(t, forwarded)
99+
require.Equal(t, 1, len(forwarded.Content))
100+
assert.False(t, truncated)
101+
assert.Equal(t, "small result", text)
102+
103+
tc, ok := forwarded.Content[0].(mcp.TextContent)
104+
require.True(t, ok)
105+
assert.Equal(t, "small result", tc.Text)
106+
}
107+
108+
// TestForwardContentResult_Fallback verifies that if result is not a
109+
// *mcp.CallToolResult (e.g., nil or some other interface value), the function
110+
// falls back to legacy JSON-wrapping behavior without panicking.
111+
func TestForwardContentResult_Fallback(t *testing.T) {
112+
// Case 1: nil — should not panic, returns a JSON "null" text wrapper
113+
forwarded, _, _ := forwardContentResult(nil, truncate.NewTruncator(0), "t", nil)
114+
require.NotNil(t, forwarded)
115+
require.Equal(t, 1, len(forwarded.Content))
116+
117+
// Case 2: a plain map — legacy JSON marshal path
118+
forwarded, text, _ := forwardContentResult(map[string]string{"key": "value"}, truncate.NewTruncator(0), "t", nil)
119+
require.NotNil(t, forwarded)
120+
require.Equal(t, 1, len(forwarded.Content))
121+
assert.Contains(t, text, "key")
122+
assert.Contains(t, text, "value")
123+
}

0 commit comments

Comments
 (0)