Skip to content

Enable sort optimization on int, short and byte fields #127968

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions docs/changelog/127968.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
pr: 127968
summary: "Enable sort optimization on int, short and byte fields"
area: Search
type: enhancement
issues: []
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the "Elastic License
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
* Public License v 1"; you may not use this file except in compliance with, at
* your election, the "Elastic License 2.0", the "GNU Affero General Public
* License v3.0 only", or the "Server Side Public License, v 1".
*/

package org.elasticsearch.upgrades;

import com.carrotsearch.randomizedtesting.annotations.Name;

import org.elasticsearch.client.Request;
import org.elasticsearch.common.settings.Settings;

/**
* Tests that index sorting works correctly after a rolling upgrade.
*/
public class IndexSortUpgradeIT extends AbstractRollingUpgradeTestCase {

public IndexSortUpgradeIT(@Name("upgradedNodes") int upgradedNodes) {
super(upgradedNodes);
}

public void testIndexSortForNumericTypes() throws Exception {
record IndexConfig(String indexName, String fieldName, String fieldType) {}
var configs = new IndexConfig[] {
new IndexConfig("index_byte", "byte_field", "byte"),
new IndexConfig("index_short", "short_field", "short"),
new IndexConfig("index_int", "int_field", "integer") };

if (isOldCluster()) {
int numShards = randomIntBetween(1, 3);
for (var config : configs) {
createIndex(
config.indexName(),
Settings.builder()
.put("index.number_of_shards", numShards)
.put("index.number_of_replicas", 0)
.put("index.sort.field", config.fieldName())
.put("index.sort.order", "desc")
.build(),
"""
{
"properties": {
"%s": {
"type": "%s"
}
}
}
""".formatted(config.fieldName(), config.fieldType())
);
}
}

final int numDocs = randomIntBetween(10, 25);
for (var config : configs) {
var bulkRequest = new Request("POST", "/" + config.indexName() + "/_bulk");
StringBuilder bulkBody = new StringBuilder();
for (int i = 0; i < numDocs; i++) {
bulkBody.append("{\"index\": {}}\n");
bulkBody.append("{\"" + config.fieldName() + "\": ").append(i).append("}\n");
}
bulkRequest.setJsonEntity(bulkBody.toString());
bulkRequest.addParameter("refresh", "true");
var bulkResponse = client().performRequest(bulkRequest);
assertOK(bulkResponse);

var searchRequest = new Request("GET", "/" + config.indexName() + "/_search");
searchRequest.setJsonEntity("""
{
"query": {
"match_all": {}
},
"sort": {
"%s": {
"order": "desc"
}
}
}
""".formatted(config.fieldName()));
var searchResponse = client().performRequest(searchRequest);
assertOK(searchResponse);
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,12 @@
package org.elasticsearch.search;

import org.apache.lucene.util.BytesRef;
import org.elasticsearch.action.bulk.BulkRequestBuilder;
import org.elasticsearch.index.query.InnerHitBuilder;
import org.elasticsearch.index.query.MatchAllQueryBuilder;
import org.elasticsearch.search.collapse.CollapseBuilder;
import org.elasticsearch.search.sort.SortBuilders;
import org.elasticsearch.search.sort.SortOrder;
import org.elasticsearch.test.ESIntegTestCase;
import org.elasticsearch.xcontent.XContentType;

Expand Down Expand Up @@ -115,4 +118,90 @@ public void testCollapseWithStoredFields() {
}
);
}

public void testCollapseOnMixedIntAndLongSortTypes() {
assertAcked(
prepareCreate("shop_short").setMapping("brand_id", "type=short", "price", "type=integer"),
prepareCreate("shop_long").setMapping("brand_id", "type=long", "price", "type=integer"),
prepareCreate("shop_int").setMapping("brand_id", "type=integer", "price", "type=integer")
);

BulkRequestBuilder bulkRequest = client().prepareBulk();
bulkRequest.add(client().prepareIndex("shop_short").setId("short01").setSource("brand_id", 1, "price", 100));
bulkRequest.add(client().prepareIndex("shop_short").setId("short02").setSource("brand_id", 1, "price", 101));
bulkRequest.add(client().prepareIndex("shop_short").setId("short03").setSource("brand_id", 1, "price", 102));
bulkRequest.add(client().prepareIndex("shop_short").setId("short04").setSource("brand_id", 3, "price", 301));
bulkRequest.get();

BulkRequestBuilder bulkRequest1 = client().prepareBulk();
bulkRequest1.add(client().prepareIndex("shop_long").setId("long01").setSource("brand_id", 1, "price", 100));
bulkRequest1.add(client().prepareIndex("shop_long").setId("long02").setSource("brand_id", 1, "price", 103));
bulkRequest1.add(client().prepareIndex("shop_long").setId("long03").setSource("brand_id", 1, "price", 105));
bulkRequest1.add(client().prepareIndex("shop_long").setId("long04").setSource("brand_id", 2, "price", 200));
bulkRequest1.add(client().prepareIndex("shop_long").setId("long05").setSource("brand_id", 2, "price", 201));
bulkRequest1.get();

BulkRequestBuilder bulkRequest2 = client().prepareBulk();
bulkRequest2.add(client().prepareIndex("shop_int").setId("int01").setSource("brand_id", 1, "price", 101));
bulkRequest2.add(client().prepareIndex("shop_int").setId("int02").setSource("brand_id", 1, "price", 102));
bulkRequest2.add(client().prepareIndex("shop_int").setId("int03").setSource("brand_id", 1, "price", 104));
bulkRequest2.add(client().prepareIndex("shop_int").setId("int04").setSource("brand_id", 2, "price", 201));
bulkRequest2.add(client().prepareIndex("shop_int").setId("int05").setSource("brand_id", 2, "price", 202));
bulkRequest2.add(client().prepareIndex("shop_int").setId("int06").setSource("brand_id", 3, "price", 300));
bulkRequest2.get();
refresh();

assertNoFailuresAndResponse(
prepareSearch("shop_long", "shop_int", "shop_short").setQuery(new MatchAllQueryBuilder())
.setCollapse(
new CollapseBuilder("brand_id").setInnerHits(
new InnerHitBuilder("ih").setSize(3).addSort(SortBuilders.fieldSort("price").order(SortOrder.DESC))
)
)
.addSort("brand_id", SortOrder.ASC)
.addSort("price", SortOrder.DESC),
response -> {
SearchHits hits = response.getHits();
assertEquals(3, hits.getHits().length);

// First hit should be brand_id=1 with highest price
Map<String, Object> firstHitSource = hits.getAt(0).getSourceAsMap();
assertEquals(1, firstHitSource.get("brand_id"));
assertEquals(105, firstHitSource.get("price"));
assertEquals("long03", hits.getAt(0).getId());

// Check inner hits for brand_id=1
SearchHits innerHits1 = hits.getAt(0).getInnerHits().get("ih");
assertEquals(3, innerHits1.getHits().length);
assertEquals("long03", innerHits1.getAt(0).getId());
assertEquals("int03", innerHits1.getAt(1).getId());
assertEquals("long02", innerHits1.getAt(2).getId());

// Second hit should be brand_id=2 with highest price
Map<String, Object> secondHitSource = hits.getAt(1).getSourceAsMap();
assertEquals(2, secondHitSource.get("brand_id"));
assertEquals(202, secondHitSource.get("price"));
assertEquals("int05", hits.getAt(1).getId());

// Check inner hits for brand_id=2
SearchHits innerHits2 = hits.getAt(1).getInnerHits().get("ih");
assertEquals(3, innerHits2.getHits().length);
assertEquals("int05", innerHits2.getAt(0).getId());
assertEquals("int04", innerHits2.getAt(1).getId());
assertEquals("long05", innerHits2.getAt(2).getId());

// third hit should be brand_id=3 with highest price
Map<String, Object> thirdHitSource = hits.getAt(2).getSourceAsMap();
assertEquals(3, thirdHitSource.get("brand_id"));
assertEquals(301, thirdHitSource.get("price"));
assertEquals("short04", hits.getAt(2).getId());

// Check inner hits for brand_id=3
SearchHits innerHits3 = hits.getAt(2).getInnerHits().get("ih");
assertEquals(2, innerHits3.getHits().length);
assertEquals("short04", innerHits3.getAt(0).getId());
assertEquals("int06", innerHits3.getAt(1).getId());
}
);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
import org.elasticsearch.action.bulk.BulkRequestBuilder;
import org.elasticsearch.action.index.IndexRequestBuilder;
import org.elasticsearch.action.search.SearchPhaseExecutionException;
import org.elasticsearch.action.search.SearchRequestBuilder;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.action.search.ShardSearchFailure;
import org.elasticsearch.cluster.metadata.IndexMetadata;
Expand Down Expand Up @@ -64,6 +65,7 @@
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked;
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertFirstHit;
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertHitCount;
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertHitSize;
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertNoFailures;
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertNoFailuresAndResponse;
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertResponse;
Expand Down Expand Up @@ -2180,11 +2182,50 @@ public void testSortMixedFieldTypes() {
}
}

public void testMixedIntAndLongSortTypes() {
assertAcked(
prepareCreate("index_long").setMapping("field1", "type=long", "field2", "type=long"),
prepareCreate("index_integer").setMapping("field1", "type=integer", "field2", "type=integer"),
prepareCreate("index_short").setMapping("field1", "type=short", "field2", "type=short"),
prepareCreate("index_byte").setMapping("field1", "type=byte", "field2", "type=byte")
);

for (int i = 0; i < 5; i++) {
prepareIndex("index_long").setId(String.valueOf(i)).setSource("field1", i).get(); // missing field2 sorts last
prepareIndex("index_integer").setId(String.valueOf(i)).setSource("field1", i).get(); // missing field2 sorts last
prepareIndex("index_short").setId(String.valueOf(i)).setSource("field1", i, "field2", i * 10).get();
prepareIndex("index_byte").setId(String.valueOf(i)).setSource("field1", i, "field2", i).get();
}
refresh();

Object[] searchAfter = null;
int[] expectedHitSizes = { 8, 8, 4 };
Object[][] expectedLastDocValues = {
new Object[] { 1L, 9223372036854775807L },
new Object[] { 3L, 9223372036854775807L },
new Object[] { 4L, 9223372036854775807L } };

for (int i = 0; i < 3; i++) {
SearchRequestBuilder request = prepareSearch("index_long", "index_integer", "index_short", "index_byte").setSize(8)
.addSort(new FieldSortBuilder("field1"))
.addSort(new FieldSortBuilder("field2"));
if (searchAfter != null) {
request.searchAfter(searchAfter);
}
SearchResponse response = request.get();
assertHitSize(response, expectedHitSizes[i]);
Object[] lastDocSortValues = response.getHits().getAt(response.getHits().getHits().length - 1).getSortValues();
assertThat(lastDocSortValues, equalTo(expectedLastDocValues[i]));
searchAfter = lastDocSortValues;
response.decRef();
}
}

public void testSortMixedFieldTypesWithNoDocsForOneType() {
assertAcked(
prepareCreate("index_long").setMapping("foo", "type=long"),
prepareCreate("index_other").setMapping("bar", "type=keyword"),
prepareCreate("index_double").setMapping("foo", "type=double")
prepareCreate("index_int").setMapping("foo", "type=integer")
);

prepareIndex("index_long").setId("1").setSource("foo", "123").get();
Expand All @@ -2193,8 +2234,7 @@ public void testSortMixedFieldTypesWithNoDocsForOneType() {
refresh();

assertNoFailures(
prepareSearch("index_long", "index_double", "index_other").addSort(new FieldSortBuilder("foo").unmappedType("boolean"))
.setSize(10)
prepareSearch("index_long", "index_int", "index_other").addSort(new FieldSortBuilder("foo").unmappedType("boolean")).setSize(10)
);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -58,9 +58,11 @@
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Set;
import java.util.concurrent.Executor;
import java.util.function.BiFunction;
import java.util.function.Consumer;
Expand Down Expand Up @@ -152,12 +154,12 @@ static TopDocs mergeTopDocs(List<TopDocs> results, int topN, int from) {
}
final TopDocs mergedTopDocs;
if (topDocs instanceof TopFieldGroups firstTopDocs) {
final Sort sort = new Sort(firstTopDocs.fields);
final Sort sort = validateSameSortTypesAndMaybeRewrite(results, firstTopDocs.fields);
TopFieldGroups[] shardTopDocs = topDocsList.toArray(new TopFieldGroups[0]);
mergedTopDocs = TopFieldGroups.merge(sort, from, topN, shardTopDocs, false);
} else if (topDocs instanceof TopFieldDocs firstTopDocs) {
TopFieldDocs[] shardTopDocs = topDocsList.toArray(new TopFieldDocs[0]);
final Sort sort = checkSameSortTypes(topDocsList, firstTopDocs.fields);
final Sort sort = validateSameSortTypesAndMaybeRewrite(results, firstTopDocs.fields);
mergedTopDocs = TopDocs.merge(sort, from, topN, shardTopDocs);
} else {
final TopDocs[] shardTopDocs = topDocsList.toArray(new TopDocs[0]);
Expand All @@ -166,12 +168,13 @@ static TopDocs mergeTopDocs(List<TopDocs> results, int topN, int from) {
return mergedTopDocs;
}

private static Sort checkSameSortTypes(Collection<TopDocs> results, SortField[] firstSortFields) {
private static Sort validateSameSortTypesAndMaybeRewrite(Collection<TopDocs> results, SortField[] firstSortFields) {
Sort sort = new Sort(firstSortFields);
if (results.size() < 2) return sort;

SortField.Type[] firstTypes = null;
boolean isFirstResult = true;
Set<Integer> fieldIdsWithMixedIntAndLongSorts = new HashSet<>();
for (TopDocs topDocs : results) {
// We don't actually merge in empty score docs, so ignore potentially mismatched types if there are no docs
if (topDocs.scoreDocs == null || topDocs.scoreDocs.length == 0) {
Expand All @@ -197,22 +200,55 @@ private static Sort checkSameSortTypes(Collection<TopDocs> results, SortField[]
// for custom types that we can't resolve, we can't do the check
return sort;
}
throw new IllegalArgumentException(
"Can't sort on field ["
+ curSortFields[i].getField()
+ "]; the field has incompatible sort types: ["
+ firstTypes[i]
+ "] and ["
+ curType
+ "] across shards!"
);
// Check if we are mixing INT and LONG sort types, which is allowed
if ((firstTypes[i] == SortField.Type.INT && curType == SortField.Type.LONG)
|| (firstTypes[i] == SortField.Type.LONG && curType == SortField.Type.INT)) fieldIdsWithMixedIntAndLongSorts
.add(i);
else {
throw new IllegalArgumentException(
"Can't sort on field ["
+ curSortFields[i].getField()
+ "]; the field has incompatible sort types: ["
+ firstTypes[i]
+ "] and ["
+ curType
+ "] across shards!"
);
}
}
}
}
}
if (fieldIdsWithMixedIntAndLongSorts.size() > 0) {
sort = rewriteSortAndResultsToLong(sort, results, fieldIdsWithMixedIntAndLongSorts);
}
return sort;
}

/**
* Rewrite Sort objects and shards results for long sort for mixed fields:
* convert Sort to Long sort and convert fields' values to Long values.
* This is necessary to enable comparison of fields' values across shards for merging.
*/
private static Sort rewriteSortAndResultsToLong(Sort sort, Collection<TopDocs> results, Set<Integer> fieldIdsWithMixedIntAndLongSorts) {
SortField[] newSortFields = sort.getSort();
for (int fieldIdx : fieldIdsWithMixedIntAndLongSorts) {
for (TopDocs topDocs : results) {
if (topDocs.scoreDocs == null || topDocs.scoreDocs.length == 0) continue;
SortField[] sortFields = ((TopFieldDocs) topDocs).fields;
if (getType(sortFields[fieldIdx]) == SortField.Type.INT) {
for (ScoreDoc scoreDoc : topDocs.scoreDocs) {
FieldDoc fieldDoc = (FieldDoc) scoreDoc;
fieldDoc.fields[fieldIdx] = ((Number) fieldDoc.fields[fieldIdx]).longValue();
}
} else { // SortField.Type.LONG
newSortFields[fieldIdx] = sortFields[fieldIdx];
}
}
}
return new Sort(newSortFields);
}

private static SortField.Type getType(SortField sortField) {
if (sortField instanceof SortedNumericSortField sf) {
return sf.getNumericType();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -289,7 +289,7 @@ public Sort buildIndexSort(
if (fieldData == null) {
throw new IllegalArgumentException("docvalues not found for index sort field:[" + sortSpec.field + "]");
}
sortFields[i] = fieldData.sortField(sortSpec.missingValue, mode, null, reverse);
sortFields[i] = fieldData.sortField(this.indexCreatedVersion, sortSpec.missingValue, mode, null, reverse);
validateIndexSortField(sortFields[i]);
}
return new Sort(sortFields);
Expand Down
Loading
Loading