-
Notifications
You must be signed in to change notification settings - Fork 402
lakectl enhance downloader reduce call to stat object for faster download #9555
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
fae390d
64f2353
f961c6b
f198db9
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
|
@@ -9,9 +9,12 @@ import ( | |||||
"path/filepath" | ||||||
|
||||||
"github.com/go-openapi/swag" | ||||||
"github.com/hashicorp/go-retryablehttp" | ||||||
"github.com/jedib0t/go-pretty/v6/progress" | ||||||
"github.com/treeverse/lakefs/pkg/api/apigen" | ||||||
"github.com/treeverse/lakefs/pkg/api/apiutil" | ||||||
"github.com/treeverse/lakefs/pkg/logging" | ||||||
"github.com/treeverse/lakefs/pkg/stats" | ||||||
"github.com/treeverse/lakefs/pkg/uri" | ||||||
"golang.org/x/sync/errgroup" | ||||||
) | ||||||
|
@@ -40,10 +43,14 @@ type downloadPart struct { | |||||
func NewDownloader(client *apigen.ClientWithResponses, preSign bool) *Downloader { | ||||||
// setup http client | ||||||
transport := http.DefaultTransport.(*http.Transport).Clone() | ||||||
transport.MaxIdleConnsPerHost = 10 | ||||||
httpClient := &http.Client{ | ||||||
Transport: transport, | ||||||
} | ||||||
transport.MaxIdleConnsPerHost = 50 | ||||||
|
||||||
retryClient := retryablehttp.NewClient() | ||||||
retryClient.Logger = &stats.LoggerAdapter{Logger: logging.ContextUnavailable().WithField("component", "downloader")} | ||||||
retryClient.RetryMax = 3 | ||||||
retryClient.Backoff = retryablehttp.DefaultBackoff | ||||||
retryClient.HTTPClient.Transport = transport | ||||||
httpClient := retryClient.StandardClient() | ||||||
|
||||||
return &Downloader{ | ||||||
Client: client, | ||||||
|
@@ -55,8 +62,13 @@ func NewDownloader(client *apigen.ClientWithResponses, preSign bool) *Downloader | |||||
} | ||||||
} | ||||||
|
||||||
// Download downloads an object from lakeFS to a local file, create the destination directory if needed. | ||||||
func (d *Downloader) Download(ctx context.Context, src uri.URI, dst string, tracker *progress.Tracker) error { | ||||||
// downloadObjectCore handles the common download logic for both Download and DownloadWithObjectInfo | ||||||
func (d *Downloader) downloadObjectCore(ctx context.Context, src uri.URI, dst string, tracker *progress.Tracker, objectStat *apigen.ObjectStats) error { | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is this better?
Suggested change
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. didn't want to mix with DownloadWithObjectInfo |
||||||
// Validate that object stats are provided when required | ||||||
if (d.SymlinkSupport || d.PreSign) && objectStat == nil { | ||||||
return fmt.Errorf("object stats are required for symlink support or presign downloads but were not provided: %w", ErrValidation) | ||||||
} | ||||||
|
||||||
// delete destination file if it exists | ||||||
if err := os.Remove(dst); err != nil && !os.IsNotExist(err) { | ||||||
return fmt.Errorf("failed to remove existing destination file '%s': %w", dst, err) | ||||||
|
@@ -66,25 +78,8 @@ func (d *Downloader) Download(ctx context.Context, src uri.URI, dst string, trac | |||||
dir := filepath.Dir(dst) | ||||||
_ = os.MkdirAll(dir, os.ModePerm) | ||||||
|
||||||
// Check if we need to call StatObjectWithResponse (for symlinks or presign multipart) | ||||||
var objectStat *apigen.ObjectStats | ||||||
if d.SymlinkSupport || d.PreSign { | ||||||
statResp, err := d.Client.StatObjectWithResponse(ctx, src.Repository, src.Ref, &apigen.StatObjectParams{ | ||||||
Path: apiutil.Value(src.Path), | ||||||
UserMetadata: swag.Bool(d.SymlinkSupport), // Only request metadata if symlink support is enabled | ||||||
Presign: swag.Bool(d.PreSign), // Only presign if needed | ||||||
}) | ||||||
if err != nil { | ||||||
return fmt.Errorf("download failed: %w", err) | ||||||
} | ||||||
if statResp.JSON200 == nil { | ||||||
return fmt.Errorf("download failed: %w: %s", ErrRequestFailed, statResp.Status()) | ||||||
} | ||||||
objectStat = statResp.JSON200 | ||||||
} | ||||||
|
||||||
// If symlink support is enabled, check if the object is a symlink and create it if so | ||||||
if d.SymlinkSupport { | ||||||
if d.SymlinkSupport && objectStat != nil { | ||||||
symlinkTarget, found := objectStat.Metadata.Get(apiutil.SymlinkMetadataKey) | ||||||
if found && symlinkTarget != "" { | ||||||
if tracker != nil { | ||||||
|
@@ -103,7 +98,7 @@ func (d *Downloader) Download(ctx context.Context, src uri.URI, dst string, trac | |||||
|
||||||
// download object | ||||||
var err error | ||||||
if d.PreSign && objectStat.SizeBytes != nil { | ||||||
if d.PreSign && objectStat != nil && swag.Int64Value(objectStat.SizeBytes) >= d.PartSize { | ||||||
// download using presigned multipart download, it will fall back to presign single object download if needed | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. When will it fallback to presign single object download? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Cloud return an error in case the object There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If |
||||||
err = d.downloadPresignMultipart(ctx, src, dst, tracker, objectStat) | ||||||
} else { | ||||||
|
@@ -115,17 +110,46 @@ func (d *Downloader) Download(ctx context.Context, src uri.URI, dst string, trac | |||||
return nil | ||||||
} | ||||||
|
||||||
func (d *Downloader) downloadPresignMultipart(ctx context.Context, src uri.URI, dst string, tracker *progress.Tracker, objectStat *apigen.ObjectStats) (err error) { | ||||||
// Use provided stat response object metadata for size and physical address (presigned) | ||||||
// DownloadWithObjectInfo downloads an object from lakeFS using pre-fetched object information, | ||||||
// avoiding the need for a separate stat call. | ||||||
func (d *Downloader) DownloadWithObjectInfo(ctx context.Context, src uri.URI, dst string, tracker *progress.Tracker, objectStat *apigen.ObjectStats) error { | ||||||
return d.downloadObjectCore(ctx, src, dst, tracker, objectStat) | ||||||
} | ||||||
|
||||||
// Download downloads an object from lakeFS to a local file, create the destination directory if needed. | ||||||
func (d *Downloader) Download(ctx context.Context, src uri.URI, dst string, tracker *progress.Tracker) error { | ||||||
// Check if we need to call StatObjectWithResponse (for symlinks or presign multipart) | ||||||
var objectStat *apigen.ObjectStats | ||||||
if d.SymlinkSupport || d.PreSign { | ||||||
statResp, err := d.Client.StatObjectWithResponse(ctx, src.Repository, src.Ref, &apigen.StatObjectParams{ | ||||||
Path: apiutil.Value(src.Path), | ||||||
UserMetadata: swag.Bool(d.SymlinkSupport), // Only request metadata if symlink support is enabled | ||||||
Presign: swag.Bool(d.PreSign), // Only presign if needed | ||||||
}) | ||||||
if err != nil { | ||||||
return fmt.Errorf("download failed: %w", err) | ||||||
} | ||||||
if statResp.JSON200 == nil { | ||||||
return fmt.Errorf("download failed: %w: %s", ErrRequestFailed, statResp.Status()) | ||||||
} | ||||||
objectStat = statResp.JSON200 | ||||||
} | ||||||
|
||||||
return d.downloadObjectCore(ctx, src, dst, tracker, objectStat) | ||||||
} | ||||||
|
||||||
// downloadPresignMultipart downloads a large object, must be larger or equal to PartSize using a presigned URL. | ||||||
// It uses multiple concurrent range requests to download the object in parts. | ||||||
// If the object is smaller than PartSize, it falls back to a single `downloadObject` call. | ||||||
func (d *Downloader) downloadPresignMultipart(ctx context.Context, src uri.URI, dst string, tracker *progress.Tracker, objectStat *apigen.ObjectStats) (err error) { | ||||||
// check if the object is small enough to download in one request | ||||||
size := swag.Int64Value(objectStat.SizeBytes) | ||||||
if size < d.PartSize { | ||||||
return fmt.Errorf("object is smaller than PartSize (%d): %w", d.PartSize, ErrValidation) | ||||||
} | ||||||
if tracker != nil { | ||||||
tracker.UpdateTotal(size) | ||||||
} | ||||||
if size < d.PartSize { | ||||||
return d.downloadObject(ctx, src, dst, tracker) | ||||||
} | ||||||
|
||||||
f, err := os.Create(dst) | ||||||
if err != nil { | ||||||
|
@@ -140,13 +164,24 @@ func (d *Downloader) downloadPresignMultipart(ctx context.Context, src uri.URI, | |||||
return fmt.Errorf("failed to truncate '%s' to size %d: %w", f.Name(), size, err) | ||||||
} | ||||||
|
||||||
// download the file using ranges and concurrency | ||||||
physicalAddress := objectStat.PhysicalAddress | ||||||
// download the file using ranges and concurrency with a fresh presigned URL | ||||||
statResp, err := d.Client.StatObjectWithResponse(ctx, src.Repository, src.Ref, &apigen.StatObjectParams{ | ||||||
Path: apiutil.Value(src.Path), | ||||||
UserMetadata: swag.Bool(d.SymlinkSupport), // Only request metadata if symlink support is enabled | ||||||
Presign: swag.Bool(d.PreSign), // Only presign if needed | ||||||
}) | ||||||
if err != nil { | ||||||
return fmt.Errorf("download failed: %w", err) | ||||||
} | ||||||
if statResp.JSON200 == nil { | ||||||
return fmt.Errorf("download failed: %w: %s", ErrRequestFailed, statResp.Status()) | ||||||
} | ||||||
physicalAddress := statResp.JSON200.PhysicalAddress | ||||||
|
||||||
ch := make(chan downloadPart, DefaultDownloadConcurrency) | ||||||
// start download workers | ||||||
ch := make(chan downloadPart, DefaultDownloadConcurrency) | ||||||
g, grpCtx := errgroup.WithContext(context.Background()) | ||||||
for i := 0; i < DefaultDownloadConcurrency; i++ { | ||||||
for range DefaultDownloadConcurrency { | ||||||
g.Go(func() error { | ||||||
buf := make([]byte, d.PartSize) | ||||||
for part := range ch { | ||||||
|
Uh oh!
There was an error while loading. Please reload this page.