Skip to content

[ES|QL] Add suggested_cast #127139

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 9 commits into from
Apr 28, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions docs/changelog/127139.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
pr: 127139
summary: Add `suggested_cast`
area: ES|QL
type: enhancement
issues: []
Original file line number Diff line number Diff line change
Expand Up @@ -708,6 +708,29 @@ public boolean isDate() {
};
}

public static DataType suggestedCast(Set<DataType> originalTypes) {
if (originalTypes.isEmpty() || originalTypes.contains(UNSUPPORTED)) {
return null;
}
if (originalTypes.contains(DATE_NANOS) && originalTypes.contains(DATETIME) && originalTypes.size() == 2) {
return DATE_NANOS;
}
if (originalTypes.contains(AGGREGATE_METRIC_DOUBLE)) {
boolean allNumeric = true;
for (DataType type : originalTypes) {
if (type.isNumeric() == false && type != AGGREGATE_METRIC_DOUBLE) {
allNumeric = false;
break;
}
}
if (allNumeric) {
return AGGREGATE_METRIC_DOUBLE;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is it an option to use one of the underlying metrics (e.g. max) in a mixed query? Or, we'd have to convert the e.g. double to aggregate_metric_double first, and then use the max from all of them (original and converted)? That'd be one expensive operation to read all values - it may be fine, if there's no better way to mix.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not really. Casting is only something we can do with one value at a time but these underlying metrics are things you'd get from an agg. I think. Right?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah the underlying metrics would be accessed in specific aggregations.
Maybe when we implement implicit casting of aggregate_metric_double in aggregations instead of going double -> aggregate_metric_double -> submetric (double/int), we can do aggregate_metric_double -> submetric (double/int), and have no conversion for doubles/ints (or if the other mapping is a long or something convert that to the submetric type)?
(The count submetric is an int but the others are doubles)

}
}

return KEYWORD;
}

/**
* Named parameters with default values. It's just easier to do this with
* a builder in java....
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
import org.elasticsearch.xcontent.XContentBuilder;
import org.elasticsearch.xcontent.XContentType;
import org.elasticsearch.xcontent.json.JsonXContent;
import org.elasticsearch.xpack.esql.core.type.DataType;
import org.elasticsearch.xpack.esql.qa.rest.RestEsqlTestCase;
import org.elasticsearch.xpack.esql.tools.ProfileParser;
import org.hamcrest.Matchers;
Expand All @@ -40,11 +41,14 @@
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Comparator;
import java.util.HashSet;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Set;
import java.util.stream.Collectors;
import java.util.stream.Stream;

import static org.elasticsearch.test.ListMatcher.matchesList;
import static org.elasticsearch.test.MapMatcher.assertMap;
Expand Down Expand Up @@ -648,6 +652,120 @@ public void testForceSleepsProfile() throws IOException {
}
}

public void testSuggestedCast() throws IOException {
// TODO: Figure out how best to make sure we don't leave out new types
Map<DataType, String> typesAndValues = Map.ofEntries(
Map.entry(DataType.BOOLEAN, "\"true\""),
Map.entry(DataType.LONG, "-1234567890234567"),
Map.entry(DataType.INTEGER, "123"),
Map.entry(DataType.UNSIGNED_LONG, "1234567890234567"),
Map.entry(DataType.DOUBLE, "12.4"),
Map.entry(DataType.KEYWORD, "\"keyword\""),
Map.entry(DataType.TEXT, "\"some text\""),
Map.entry(DataType.DATE_NANOS, "\"2015-01-01T12:10:30.123456789Z\""),
Map.entry(DataType.DATETIME, "\"2015-01-01T12:10:30Z\""),
Map.entry(DataType.IP, "\"192.168.30.1\""),
Map.entry(DataType.VERSION, "\"8.19.0\""),
Map.entry(DataType.GEO_POINT, "[-71.34, 41.12]"),
Map.entry(DataType.GEO_SHAPE, """
{
"type": "Point",
"coordinates": [-77.03653, 38.897676]
}
"""),
Map.entry(DataType.AGGREGATE_METRIC_DOUBLE, """
{
"max": 14983.1
}
""")
);
Set<DataType> shouldBeSupported = Stream.of(DataType.values()).filter(DataType::isRepresentable).collect(Collectors.toSet());
shouldBeSupported.remove(DataType.CARTESIAN_POINT);
shouldBeSupported.remove(DataType.CARTESIAN_SHAPE);
shouldBeSupported.remove(DataType.NULL);
shouldBeSupported.remove(DataType.DOC_DATA_TYPE);
shouldBeSupported.remove(DataType.TSID_DATA_TYPE);
for (DataType type : shouldBeSupported) {
assertTrue(typesAndValues.containsKey(type));
}
assertThat(typesAndValues.size(), equalTo(shouldBeSupported.size()));

for (DataType type : typesAndValues.keySet()) {
String additionalProperties = "";
if (type == DataType.AGGREGATE_METRIC_DOUBLE) {
additionalProperties += """
,
"metrics": ["max"],
"default_metric": "max"
""";
}
createIndex("index-" + type.esType(), null, """
"properties": {
"my_field": {
"type": "%s" %s
}
}
""".formatted(type.esType(), additionalProperties));
Request doc = new Request("PUT", "index-" + type.esType() + "/_doc/1");
doc.setJsonEntity("{\"my_field\": " + typesAndValues.get(type) + "}");
client().performRequest(doc);
}

List<DataType> listOfTypes = new ArrayList<>(typesAndValues.keySet());
listOfTypes.sort(Comparator.comparing(DataType::typeName));

for (int i = 0; i < listOfTypes.size(); i++) {
for (int j = i + 1; j < listOfTypes.size(); j++) {
String query = """
{
"query": "FROM index-%s,index-%s | LIMIT 100 | KEEP my_field"
}
""".formatted(listOfTypes.get(i).esType(), listOfTypes.get(j).esType());
Request request = new Request("POST", "/_query");
request.setJsonEntity(query);
Response resp = client().performRequest(request);
Map<String, Object> results = entityAsMap(resp);
List<?> columns = (List<?>) results.get("columns");
DataType suggestedCast = DataType.suggestedCast(Set.of(listOfTypes.get(i), listOfTypes.get(j)));
assertThat(
columns,
equalTo(
List.of(
Map.ofEntries(
Map.entry("name", "my_field"),
Map.entry("type", "unsupported"),
Map.entry("original_types", List.of(listOfTypes.get(i).typeName(), listOfTypes.get(j).typeName())),
Map.entry("suggested_cast", suggestedCast.typeName())
)
)
)
);

String castedQuery = """
{
"query": "FROM index-%s,index-%s | LIMIT 100 | EVAL my_field = my_field::%s"
}
""".formatted(
listOfTypes.get(i).esType(),
listOfTypes.get(j).esType(),
suggestedCast == DataType.KEYWORD ? "STRING" : suggestedCast.nameUpper()
);
Request castedRequest = new Request("POST", "/_query");
castedRequest.setJsonEntity(castedQuery);
Response castedResponse = client().performRequest(castedRequest);
Map<String, Object> castedResults = entityAsMap(castedResponse);
List<?> castedColumns = (List<?>) castedResults.get("columns");
assertThat(
castedColumns,
equalTo(List.of(Map.ofEntries(Map.entry("name", "my_field"), Map.entry("type", suggestedCast.typeName()))))
);
}
}
for (DataType type : typesAndValues.keySet()) {
deleteIndex("index-" + type.esType());
}
}

static MapMatcher commonProfile() {
return matchesMap() //
.entry("description", any(String.class))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
import org.elasticsearch.xcontent.XContentType;
import org.elasticsearch.xcontent.json.JsonXContent;
import org.elasticsearch.xpack.esql.action.EsqlCapabilities;
import org.elasticsearch.xpack.esql.core.type.DataType;
import org.elasticsearch.xpack.esql.plugin.QueryPragmas;
import org.hamcrest.Matcher;
import org.junit.Before;
Expand All @@ -45,8 +46,10 @@
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Objects;
import java.util.TreeMap;
import java.util.function.Function;
import java.util.stream.Collectors;

import static org.elasticsearch.test.ListMatcher.matchesList;
import static org.elasticsearch.test.MapMatcher.assertMap;
Expand Down Expand Up @@ -690,7 +693,7 @@ public void testByteFieldWithIntSubfieldTooBig() throws IOException {
* </pre>.
*/
public void testIncompatibleTypes() throws IOException {
assumeOriginalTypesReported();
assumeSuggestedCastReported();
keywordTest().createIndex("test1", "f");
index("test1", """
{"f": "f1"}""");
Expand Down Expand Up @@ -764,7 +767,7 @@ public void testDistinctInEachIndex() throws IOException {
* </pre>.
*/
public void testMergeKeywordAndObject() throws IOException {
assumeOriginalTypesReported();
assumeSuggestedCastReported();
keywordTest().createIndex("test1", "file");
index("test1", """
{"file": "f1"}""");
Expand Down Expand Up @@ -959,7 +962,7 @@ public void testIntegerDocValuesConflict() throws IOException {
* In an ideal world we'd promote the {@code integer} to an {@code long} and just go.
*/
public void testLongIntegerConflict() throws IOException {
assumeOriginalTypesReported();
assumeSuggestedCastReported();
longTest().sourceMode(SourceMode.DEFAULT).createIndex("test1", "emp_no");
index("test1", """
{"emp_no": 1}""");
Expand Down Expand Up @@ -1002,7 +1005,7 @@ public void testLongIntegerConflict() throws IOException {
* In an ideal world we'd promote the {@code short} to an {@code integer} and just go.
*/
public void testIntegerShortConflict() throws IOException {
assumeOriginalTypesReported();
assumeSuggestedCastReported();
intTest().sourceMode(SourceMode.DEFAULT).createIndex("test1", "emp_no");
index("test1", """
{"emp_no": 1}""");
Expand Down Expand Up @@ -1051,7 +1054,7 @@ public void testIntegerShortConflict() throws IOException {
* </pre>.
*/
public void testTypeConflictInObject() throws IOException {
assumeOriginalTypesReported();
assumeSuggestedCastReported();
createIndex("test1", empNoInObject("integer"));
index("test1", """
{"foo": {"emp_no": 1}}""");
Expand Down Expand Up @@ -1379,6 +1382,12 @@ private void assumeOriginalTypesReported() throws IOException {
assumeTrue("This test makes sense for versions that report original types", requiredClusterCapability);
}

private void assumeSuggestedCastReported() throws IOException {
var capsName = EsqlCapabilities.Cap.SUGGESTED_CAST.name().toLowerCase(Locale.ROOT);
boolean requiredClusterCapability = clusterHasCapability("POST", "/_query", List.of(), List.of(capsName)).orElse(false);
assumeTrue("This test makes sense for versions that report suggested casts", requiredClusterCapability);
}

private CheckedConsumer<XContentBuilder, IOException> empNoInObject(String empNoType) {
return index -> {
index.startObject("properties");
Expand Down Expand Up @@ -1715,7 +1724,19 @@ private static Map<String, Object> columnInfo(String name, String type) {
}

private static Map<String, Object> unsupportedColumnInfo(String name, String... originalTypes) {
return Map.of("name", name, "type", "unsupported", "original_types", List.of(originalTypes));
DataType suggested = DataType.suggestedCast(
List.of(originalTypes).stream().map(DataType::fromTypeName).filter(Objects::nonNull).collect(Collectors.toSet())
);
if (suggested == null) {
return Map.of("name", name, "type", "unsupported", "original_types", List.of(originalTypes));
} else {
return Map.ofEntries(
Map.entry("name", name),
Map.entry("type", "unsupported"),
Map.entry("original_types", List.of(originalTypes)),
Map.entry("suggested_cast", suggested.typeName())
);
}
}

private static void index(String name, String... docs) throws IOException {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -481,3 +481,23 @@ x:integer | agg_metric:aggregate_metric_double
[5032, 11111, 40814] | {"min":5032.0,"max":40814.0,"sum":56957.0,"value_count":3}
//end::toAggregateMetricDoubleMv-result[]
;

convertToAggregateMetricDoubleCastingOperatorFromDouble
required_capability: suggested_cast
ROW x = 29384.1256
| EVAL agg_metric = x::aggregate_metric_double
;

x:double | agg_metric:aggregate_metric_double
29384.1256 | {"min":29384.1256,"max":29384.1256,"sum":29384.1256,"value_count":1}
;

convertToAggregateMetricDoubleCastingOperatorFromInt
required_capability: suggested_cast
ROW x = 55555
| EVAL agg_metric = x::aggregate_metric_double
;

x:integer | agg_metric:aggregate_metric_double
55555 | {"min":55555,"max":55555,"sum":55555,"value_count":1}
;
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
import java.io.IOException;
import java.util.List;
import java.util.Objects;
import java.util.stream.Collectors;

import static org.elasticsearch.xcontent.ConstructingObjectParser.constructorArg;
import static org.elasticsearch.xcontent.ConstructingObjectParser.optionalConstructorArg;
Expand Down Expand Up @@ -72,6 +73,9 @@ public static ColumnInfo fromXContent(XContentParser parser) {
@Nullable
private final List<String> originalTypes;

@Nullable
private final DataType suggestedCast;

@ParserConstructor
public ColumnInfoImpl(String name, String type, @Nullable List<String> originalTypes) {
this(name, DataType.fromEs(type), originalTypes);
Expand All @@ -81,15 +85,27 @@ public ColumnInfoImpl(String name, DataType type, @Nullable List<String> origina
this.name = name;
this.type = type;
this.originalTypes = originalTypes;
this.suggestedCast = calculateSuggestedCast(this.originalTypes);
}

private static DataType calculateSuggestedCast(List<String> originalTypes) {
if (originalTypes == null) {
return null;
}
return DataType.suggestedCast(
originalTypes.stream().map(DataType::fromTypeName).filter(Objects::nonNull).collect(Collectors.toSet())
);
}

public ColumnInfoImpl(StreamInput in) throws IOException {
this.name = in.readString();
this.type = DataType.fromEs(in.readString());
if (in.getTransportVersion().onOrAfter(TransportVersions.ESQL_REPORT_ORIGINAL_TYPES)) {
this.originalTypes = in.readOptionalStringCollectionAsList();
this.suggestedCast = calculateSuggestedCast(this.originalTypes);
} else {
this.originalTypes = null;
this.suggestedCast = null;
}
}

Expand All @@ -110,6 +126,9 @@ public XContentBuilder toXContent(XContentBuilder builder, ToXContent.Params par
if (originalTypes != null) {
builder.field("original_types", originalTypes);
}
if (suggestedCast != null) {
builder.field("suggested_cast", suggestedCast.typeName());
}
builder.endObject();
return builder;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1038,7 +1038,12 @@ public enum Cap {
/**
* Support for the SAMPLE command
*/
SAMPLE(Build.current().isSnapshot());
SAMPLE(Build.current().isSnapshot()),

/**
* The {@code _query} API now gives a cast recommendation if multiple types are found in certain instances.
*/
SUGGESTED_CAST;

private final boolean enabled;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
import org.elasticsearch.xpack.esql.core.util.NumericUtils;
import org.elasticsearch.xpack.esql.core.util.StringUtils;
import org.elasticsearch.xpack.esql.expression.function.scalar.convert.AbstractConvertFunction;
import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToAggregateMetricDouble;
import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToBoolean;
import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToCartesianPoint;
import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToCartesianShape;
Expand Down Expand Up @@ -70,6 +71,7 @@
import java.util.function.Function;

import static java.util.Map.entry;
import static org.elasticsearch.xpack.esql.core.type.DataType.AGGREGATE_METRIC_DOUBLE;
import static org.elasticsearch.xpack.esql.core.type.DataType.BOOLEAN;
import static org.elasticsearch.xpack.esql.core.type.DataType.CARTESIAN_POINT;
import static org.elasticsearch.xpack.esql.core.type.DataType.CARTESIAN_SHAPE;
Expand Down Expand Up @@ -112,6 +114,7 @@ public class EsqlDataTypeConverter {
public static final DateFormatter HOUR_MINUTE_SECOND = DateFormatter.forPattern("strict_hour_minute_second_fraction");

private static final Map<DataType, BiFunction<Source, Expression, AbstractConvertFunction>> TYPE_TO_CONVERTER_FUNCTION = Map.ofEntries(
entry(AGGREGATE_METRIC_DOUBLE, ToAggregateMetricDouble::new),
entry(BOOLEAN, ToBoolean::new),
entry(CARTESIAN_POINT, ToCartesianPoint::new),
entry(CARTESIAN_SHAPE, ToCartesianShape::new),
Expand Down
Loading
Loading