Skip to content

Avoid some List allocations and resizes #857

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: v6
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 48 additions & 0 deletions src/Algolia.Search/Clients/SearchIndex.cs
Original file line number Diff line number Diff line change
Expand Up @@ -274,6 +274,54 @@ internal async Task<BatchIndexingResponse> SplitIntoBatchesAsync<T>(IEnumerable<
RequestOptions requestOptions = null, CancellationToken ct = default) where T : class
{
BatchIndexingResponse ret = new BatchIndexingResponse { Responses = new List<BatchResponse>() };

// Highly likely to be some form of list or array that we can get the size from
// Then we can avoid resizing the records list from 4->8->16->32->64->128->256->512->1024 for the default max batch size
if (data is IReadOnlyCollection<T> dataCollection)
{
if (dataCollection.Count <= Config.BatchSize)
{
// if the size is less than max batch size, just use it directly instead of allocating and copying to a new list
var request = new BatchRequest<T>(actionType, dataCollection);
BatchResponse batch = await BatchAsync(request, requestOptions, ct).ConfigureAwait(false);
ret.Responses.Add(batch);
return ret;
}

// otherwise we can precisely calculate the sizes of batches and use an array that iterates faster
var maxBatch = new T[Config.BatchSize];
var batchIndex = 0;
int remaining = dataCollection.Count;
foreach (T item in dataCollection)
{
maxBatch[batchIndex] = item;
batchIndex++;
if (batchIndex < maxBatch.Length)
{
continue;
}
// batch size reached, send batch
var request = new BatchRequest<T>(actionType, maxBatch);
BatchResponse batch = await BatchAsync(request, requestOptions, ct).ConfigureAwait(false);
ret.Responses.Add(batch);
remaining -= batchIndex;
if (remaining == 0)
{
// avoid doing calculations & allocations for the last batch
break;
}
int nextArraySize = Math.Min(remaining, Config.BatchSize);
if (nextArraySize != maxBatch.Length)
{
// Only allocate a new array if required, otherwise reuse the old one
// System.Memory<T> could have been used to just expose a subset of the existing array, but it is not in netstandard 1.3-2.0
maxBatch = new T[nextArraySize];
}
batchIndex = 0;
}
return ret;
}

List<T> records = new List<T>();

foreach (var item in data)
Expand Down
10 changes: 9 additions & 1 deletion src/Algolia.Search/Models/Batch/BatchRequest.cs
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,15 @@ public BatchRequest(string actionType, IEnumerable<T> data)
throw new ArgumentNullException(nameof(data));
}

Operations = new List<BatchOperation<T>>();
if (data is IReadOnlyCollection<T> dataCollection)
{
// If it is a collection, we can avoid resizing the Operations list from 4->8->16->32->64->128->256->512->1024 for the default max batch size
Operations = new List<BatchOperation<T>>(dataCollection.Count);
}
else
{
Operations = new List<BatchOperation<T>>();
}

foreach (var item in data)
{
Expand Down