Skip to content

Commit 72f2b54

Browse files
author
Daniel Duong
committed
perf(autorag): use s3 transfermanager for concurrent multipart dl
1 parent 6c5773c commit 72f2b54

5 files changed

Lines changed: 37 additions & 5 deletions

File tree

packages/autorag/bff/go.mod

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ go 1.24.3
55
require (
66
github.com/aws/aws-sdk-go-v2 v1.41.2
77
github.com/aws/aws-sdk-go-v2/credentials v1.19.10
8+
github.com/aws/aws-sdk-go-v2/feature/s3/transfermanager v0.1.6
89
github.com/aws/aws-sdk-go-v2/service/s3 v1.96.2
910
github.com/google/uuid v1.6.0
1011
github.com/julienschmidt/httprouter v1.3.0

packages/autorag/bff/go.sum

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,12 +2,20 @@ github.com/aws/aws-sdk-go-v2 v1.41.2 h1:LuT2rzqNQsauaGkPK/7813XxcZ3o3yePY0Iy891T
22
github.com/aws/aws-sdk-go-v2 v1.41.2/go.mod h1:IvvlAZQXvTXznUPfRVfryiG1fbzE2NGK6m9u39YQ+S4=
33
github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.7.5 h1:zWFmPmgw4sveAYi1mRqG+E/g0461cJ5M4bJ8/nc6d3Q=
44
github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.7.5/go.mod h1:nVUlMLVV8ycXSb7mSkcNu9e3v/1TJq2RTlrPwhYWr5c=
5+
github.com/aws/aws-sdk-go-v2/config v1.32.10 h1:9DMthfO6XWZYLfzZglAgW5Fyou2nRI5CuV44sTedKBI=
6+
github.com/aws/aws-sdk-go-v2/config v1.32.10/go.mod h1:2rUIOnA2JaiqYmSKYmRJlcMWy6qTj1vuRFscppSBMcw=
57
github.com/aws/aws-sdk-go-v2/credentials v1.19.10 h1:EEhmEUFCE1Yhl7vDhNOI5OCL/iKMdkkYFTRpZXNw7m8=
68
github.com/aws/aws-sdk-go-v2/credentials v1.19.10/go.mod h1:RnnlFCAlxQCkN2Q379B67USkBMu1PipEEiibzYN5UTE=
9+
github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.18.18 h1:Ii4s+Sq3yDfaMLpjrJsqD6SmG/Wq/P5L/hw2qa78UAY=
10+
github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.18.18/go.mod h1:6x81qnY++ovptLE6nWQeWrpXxbnlIex+4H4eYYGcqfc=
11+
github.com/aws/aws-sdk-go-v2/feature/s3/transfermanager v0.1.6 h1:AaBtONvA5gHSZHo2EgjXMQCvYdQWclo3MznTkQnQ8w0=
12+
github.com/aws/aws-sdk-go-v2/feature/s3/transfermanager v0.1.6/go.mod h1:+gC6IbVU+BWeCB0W9MWNgOpKuTmggJ/ES54eyKA8zQM=
713
github.com/aws/aws-sdk-go-v2/internal/configsources v1.4.18 h1:F43zk1vemYIqPAwhjTjYIz0irU2EY7sOb/F5eJ3HuyM=
814
github.com/aws/aws-sdk-go-v2/internal/configsources v1.4.18/go.mod h1:w1jdlZXrGKaJcNoL+Nnrj+k5wlpGXqnNrKoP22HvAug=
915
github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.7.18 h1:xCeWVjj0ki0l3nruoyP2slHsGArMxeiiaoPN5QZH6YQ=
1016
github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.7.18/go.mod h1:r/eLGuGCBw6l36ZRWiw6PaZwPXb6YOj+i/7MizNl5/k=
17+
github.com/aws/aws-sdk-go-v2/internal/ini v1.8.4 h1:WKuaxf++XKWlHWu9ECbMlha8WOEGm0OUEZqm4K/Gcfk=
18+
github.com/aws/aws-sdk-go-v2/internal/ini v1.8.4/go.mod h1:ZWy7j6v1vWGmPReu0iSGvRiise4YI5SkR3OHKTZ6Wuc=
1119
github.com/aws/aws-sdk-go-v2/internal/v4a v1.4.18 h1:eZioDaZGJ0tMM4gzmkNIO2aAoQd+je7Ug7TkvAzlmkU=
1220
github.com/aws/aws-sdk-go-v2/internal/v4a v1.4.18/go.mod h1:CCXwUKAJdoWr6/NcxZ+zsiPr6oH/Q5aTooRGYieAyj4=
1321
github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.13.5 h1:CeY9LUdur+Dxoeldqoun6y4WtJ3RQtzk0JMP2gfUay0=
@@ -20,6 +28,14 @@ github.com/aws/aws-sdk-go-v2/service/internal/s3shared v1.19.18 h1:/A/xDuZAVD2Bp
2028
github.com/aws/aws-sdk-go-v2/service/internal/s3shared v1.19.18/go.mod h1:hWe9b4f+djUQGmyiGEeOnZv69dtMSgpDRIvNMvuvzvY=
2129
github.com/aws/aws-sdk-go-v2/service/s3 v1.96.2 h1:M1A9AjcFwlxTLuf0Faj88L8Iqw0n/AJHjpZTQzMMsSc=
2230
github.com/aws/aws-sdk-go-v2/service/s3 v1.96.2/go.mod h1:KsdTV6Q9WKUZm2mNJnUFmIoXfZux91M3sr/a4REX8e0=
31+
github.com/aws/aws-sdk-go-v2/service/signin v1.0.6 h1:MzORe+J94I+hYu2a6XmV5yC9huoTv8NRcCrUNedDypQ=
32+
github.com/aws/aws-sdk-go-v2/service/signin v1.0.6/go.mod h1:hXzcHLARD7GeWnifd8j9RWqtfIgxj4/cAtIVIK7hg8g=
33+
github.com/aws/aws-sdk-go-v2/service/sso v1.30.11 h1:7oGD8KPfBOJGXiCoRKrrrQkbvCp8N++u36hrLMPey6o=
34+
github.com/aws/aws-sdk-go-v2/service/sso v1.30.11/go.mod h1:0DO9B5EUJQlIDif+XJRWCljZRKsAFKh3gpFz7UnDtOo=
35+
github.com/aws/aws-sdk-go-v2/service/ssooidc v1.35.15 h1:edCcNp9eGIUDUCrzoCu1jWAXLGFIizeqkdkKgRlJwWc=
36+
github.com/aws/aws-sdk-go-v2/service/ssooidc v1.35.15/go.mod h1:lyRQKED9xWfgkYC/wmmYfv7iVIM68Z5OQ88ZdcV1QbU=
37+
github.com/aws/aws-sdk-go-v2/service/sts v1.41.7 h1:NITQpgo9A5NrDZ57uOWj+abvXSb83BbyggcUBVksN7c=
38+
github.com/aws/aws-sdk-go-v2/service/sts v1.41.7/go.mod h1:sks5UWBhEuWYDPdwlnRFn1w7xWdH29Jcpe+/PJQefEs=
2339
github.com/aws/smithy-go v1.24.1 h1:VbyeNfmYkWoxMVpGUAbQumkODcYmfMRfZ8yQiH30SK0=
2440
github.com/aws/smithy-go v1.24.1/go.mod h1:LEj2LM3rBRQJxPZTB4KuzZkaZYnZPnvgIhb4pu07mx0=
2541
github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM=

packages/autorag/bff/go.work.sum

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,9 @@ github.com/asaskevich/govalidator v0.0.0-20190424111038-f61b66f89f4a h1:idn718Q4
4040
github.com/asaskevich/govalidator v0.0.0-20190424111038-f61b66f89f4a/go.mod h1:lB+ZfQJz7igIIfQNfa7Ml4HSf2uFQQRzpGGRXenZAgY=
4141
github.com/aws/aws-sdk-go v1.55.6 h1:cSg4pvZ3m8dgYcgqB97MrcdjUmZ1BeMYKUxMMB89IPk=
4242
github.com/aws/aws-sdk-go v1.55.6/go.mod h1:eRwEWoyTWFMVYVQzKMNHWP5/RV4xIUGMQfXQHfHkpNU=
43+
github.com/aws/aws-sdk-go-v2/config v1.32.10/go.mod h1:2rUIOnA2JaiqYmSKYmRJlcMWy6qTj1vuRFscppSBMcw=
4344
github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.18.18/go.mod h1:6x81qnY++ovptLE6nWQeWrpXxbnlIex+4H4eYYGcqfc=
45+
github.com/aws/aws-sdk-go-v2/internal/ini v1.8.4/go.mod h1:ZWy7j6v1vWGmPReu0iSGvRiise4YI5SkR3OHKTZ6Wuc=
4446
github.com/aws/aws-sdk-go-v2/service/signin v1.0.6/go.mod h1:hXzcHLARD7GeWnifd8j9RWqtfIgxj4/cAtIVIK7hg8g=
4547
github.com/aws/aws-sdk-go-v2/service/sso v1.30.11/go.mod h1:0DO9B5EUJQlIDif+XJRWCljZRKsAFKh3gpFz7UnDtOo=
4648
github.com/aws/aws-sdk-go-v2/service/ssooidc v1.35.15/go.mod h1:lyRQKED9xWfgkYC/wmmYfv7iVIM68Z5OQ88ZdcV1QbU=

packages/autorag/bff/internal/api/s3_handler_test.go

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -84,7 +84,6 @@ func TestGetS3FileHandler_MissingKey(t *testing.T) {
8484
assert.Equal(t, http.StatusBadRequest, res.StatusCode)
8585
}
8686

87-
8887
func TestGetS3FileHandler_SecretNotFound(t *testing.T) {
8988
// Mock client returns empty secrets list
9089
mockClient := &mockKubernetesClientForSecrets{secrets: []corev1.Secret{}}

packages/autorag/bff/internal/repositories/s3.go

Lines changed: 18 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ import (
77

88
"github.com/aws/aws-sdk-go-v2/aws"
99
"github.com/aws/aws-sdk-go-v2/credentials"
10+
"github.com/aws/aws-sdk-go-v2/feature/s3/transfermanager"
1011
"github.com/aws/aws-sdk-go-v2/service/s3"
1112
k8s "github.com/opendatahub-io/autorag-library/bff/internal/integrations/kubernetes"
1213
corev1 "k8s.io/api/core/v1"
@@ -93,7 +94,8 @@ func (r *S3Repository) GetS3Credentials(
9394
return creds, nil
9495
}
9596

96-
// GetS3Object retrieves an object from S3 and returns a reader for the content
97+
// GetS3Object retrieves an object from S3 using transfer manager for optimized downloading
98+
// and returns a reader for the content. Uses concurrent multipart downloads for large files.
9799
func (r *S3Repository) GetS3Object(
98100
ctx context.Context,
99101
creds *S3Credentials,
@@ -113,10 +115,21 @@ func (r *S3Repository) GetS3Object(
113115
o.UsePathStyle = true
114116
})
115117

116-
// Get the object
117-
result, err := s3Client.GetObject(ctx, &s3.GetObjectInput{
118+
// Create transfer manager for optimized downloads
119+
transferClient := transfermanager.New(s3Client)
120+
121+
// Get the object using transfer manager
122+
// This automatically handles multipart downloads for large files with concurrency
123+
result, err := transferClient.GetObject(ctx, &transfermanager.GetObjectInput{
118124
Bucket: aws.String(bucket),
119125
Key: aws.String(key),
126+
}, func(o *transfermanager.Options) {
127+
// Configure for optimal streaming performance
128+
o.Concurrency = 10 // 10 concurrent part downloads
129+
o.PartSizeBytes = 64 * 1024 * 1024 // 64MB parts for large files
130+
o.GetObjectBufferSize = 1024 * 1024 // 1MB buffer for streaming
131+
o.PartBodyMaxRetries = 3 // Retry failed parts up to 3 times
132+
o.DisableChecksumValidation = false // Enable checksum validation for data integrity
120133
})
121134
if err != nil {
122135
return nil, "", fmt.Errorf("error retrieving object from S3: %w", err)
@@ -128,7 +141,8 @@ func (r *S3Repository) GetS3Object(
128141
contentType = *result.ContentType
129142
}
130143

131-
return result.Body, contentType, nil
144+
// Transfer manager's GetObject returns io.Reader, wrap it with NopCloser for io.ReadCloser
145+
return io.NopCloser(result.Body), contentType, nil
132146
}
133147

134148
// Helper functions for case conversion

0 commit comments

Comments
 (0)