Skip to content

Commit 36fc248

Browse files
yuzisunmathetake
andauthored
backport: do not sign content-length for AWS (#694) (#697)
**Commit Message** Previously, AWS signing has included the "content-length" header. However, Envoy's extproc filter strips it from the request as we are using CONTINUE_AND_REPLACE option to reduce the memory overhead. While we have still no clue as to why AWS doesn't complain when the request body is small, excluding content-length from the signing target headers will make the tests with both small and large bodies pass. **Related Issues/PRs (if applicable)** CONTINUE_AND_REPLACE was introduced in #636 to avoid sending a request body twice between Envoy and the ExtProc. Signed-off-by: Takeshi Yoneda <t.y.mathetake@gmail.com> Co-authored-by: Takeshi Yoneda <t.y.mathetake@gmail.com>
1 parent 4b5ebbd commit 36fc248

File tree

2 files changed

+38
-21
lines changed

2 files changed

+38
-21
lines changed

internal/extproc/backendauth/aws.go

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,14 @@ func (a *awsHandler) Do(ctx context.Context, requestHeaders map[string]string, h
106106
if err != nil {
107107
return fmt.Errorf("cannot create request: %w", err)
108108
}
109+
// By setting the content length to -1, we can avoid the inclusion of the `Content-Length` header in the signature.
110+
// https://github.com/aws/aws-sdk-go-v2/blob/755839b2eebb246c7eec79b65404aee105196d5b/aws/signer/v4/v4.go#L427-L431
111+
//
112+
// The reason why we want to avoid this is that the ExtProc filter will remove the content-length header
113+
// from the request currently. Envoy will instead do "transfer-encoding: chunked" for the request body,
114+
// which should be acceptable for AWS Bedrock or any modern HTTP service.
115+
// https://github.com/envoyproxy/envoy/blob/60b2b5187cf99db79ecfc54675354997af4765ea/source/extensions/filters/http/ext_proc/processor_state.cc#L180-L183
116+
req.ContentLength = -1
109117

110118
err = a.signer.SignHTTP(ctx, a.credentials, req,
111119
hex.EncodeToString(payloadHash[:]), "bedrock", a.region, time.Now())

tests/extproc/real_providers_test.go

Lines changed: 30 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -85,34 +85,14 @@ func TestWithRealProviders(t *testing.T) {
8585
requireExtProc(t, os.Stdout, extProcExecutablePath(), configPath)
8686

8787
t.Run("health-checking", func(t *testing.T) {
88-
client := openai.NewClient(option.WithBaseURL(listenerAddress + "/v1/"))
8988
for _, tc := range []realProvidersTestCase{
9089
{name: "openai", modelName: "gpt-4o-mini", required: internaltesting.RequiredCredentialOpenAI},
9190
{name: "aws-bedrock", modelName: "us.meta.llama3-2-1b-instruct-v1:0", required: internaltesting.RequiredCredentialAWS},
9291
{name: "azure-openai", modelName: "o1", required: internaltesting.RequiredCredentialAzure},
9392
} {
9493
t.Run(tc.modelName, func(t *testing.T) {
9594
cc.MaybeSkip(t, tc.required)
96-
require.Eventually(t, func() bool {
97-
chatCompletion, err := client.Chat.Completions.New(t.Context(), openai.ChatCompletionNewParams{
98-
Messages: []openai.ChatCompletionMessageParamUnion{
99-
openai.UserMessage("Say this is a test"),
100-
},
101-
Model: tc.modelName,
102-
})
103-
if err != nil {
104-
t.Logf("error: %v", err)
105-
return false
106-
}
107-
nonEmptyCompletion := false
108-
for _, choice := range chatCompletion.Choices {
109-
t.Logf("choice: %s", choice.Message.Content)
110-
if choice.Message.Content != "" {
111-
nonEmptyCompletion = true
112-
}
113-
}
114-
return nonEmptyCompletion
115-
}, eventuallyTimeout, eventuallyInterval)
95+
requireEventuallyNonStreamingRequestOK(t, tc.modelName, "Say this is a test")
11696
})
11797
}
11898
})
@@ -322,6 +302,11 @@ func TestWithRealProviders(t *testing.T) {
322302
"o1",
323303
}, models)
324304
})
305+
t.Run("aws-bedrock-large-body", func(t *testing.T) {
306+
cc.MaybeSkip(t, internaltesting.RequiredCredentialAWS)
307+
requireEventuallyNonStreamingRequestOK(t,
308+
"us.meta.llama3-2-1b-instruct-v1:0", strings.Repeat("Say this is a test", 10000))
309+
})
325310
}
326311

327312
// realProvidersTestCase is a base test case for the real providers, which is mainly for the centralization of the
@@ -331,3 +316,27 @@ type realProvidersTestCase struct {
331316
modelName string
332317
required internaltesting.RequiredCredential
333318
}
319+
320+
func requireEventuallyNonStreamingRequestOK(t *testing.T, modelName, msg string) {
321+
client := openai.NewClient(option.WithBaseURL(listenerAddress + "/v1/"))
322+
require.Eventually(t, func() bool {
323+
chatCompletion, err := client.Chat.Completions.New(t.Context(), openai.ChatCompletionNewParams{
324+
Messages: []openai.ChatCompletionMessageParamUnion{
325+
openai.UserMessage(msg),
326+
},
327+
Model: modelName,
328+
})
329+
if err != nil {
330+
t.Logf("error: %v", err)
331+
return false
332+
}
333+
nonEmptyCompletion := false
334+
for _, choice := range chatCompletion.Choices {
335+
t.Logf("choice: %s", choice.Message.Content)
336+
if choice.Message.Content != "" {
337+
nonEmptyCompletion = true
338+
}
339+
}
340+
return nonEmptyCompletion
341+
}, eventuallyTimeout, eventuallyInterval)
342+
}

0 commit comments

Comments
 (0)