Skip to content

Commit cee8c04

Browse files
committed
recommended cast
1 parent 6354c0f commit cee8c04

File tree

6 files changed

+191
-7
lines changed

6 files changed

+191
-7
lines changed

x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/type/DataType.java

+23
Original file line numberDiff line numberDiff line change
@@ -708,6 +708,29 @@ public boolean isDate() {
708708
};
709709
}
710710

711+
public static DataType suggestedCast(Set<DataType> originalTypes) {
712+
if (originalTypes.isEmpty() || originalTypes.contains(UNSUPPORTED)) {
713+
return null;
714+
}
715+
if (originalTypes.contains(DATE_NANOS) && originalTypes.contains(DATETIME) && originalTypes.size() == 2) {
716+
return DATETIME;
717+
}
718+
if (originalTypes.contains(AGGREGATE_METRIC_DOUBLE)) {
719+
boolean allNumeric = true;
720+
for (DataType type : originalTypes) {
721+
if (type.isNumeric() == false && type != AGGREGATE_METRIC_DOUBLE) {
722+
allNumeric = false;
723+
break;
724+
}
725+
}
726+
if (allNumeric) {
727+
return AGGREGATE_METRIC_DOUBLE;
728+
}
729+
}
730+
731+
return KEYWORD;
732+
}
733+
711734
/**
712735
* Named parameters with default values. It's just easier to do this with
713736
* a builder in java....

x-pack/plugin/esql/qa/server/src/main/java/org/elasticsearch/xpack/esql/qa/rest/FieldExtractorTestCase.java

+31-6
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@
3232
import org.elasticsearch.xcontent.XContentType;
3333
import org.elasticsearch.xcontent.json.JsonXContent;
3434
import org.elasticsearch.xpack.esql.action.EsqlCapabilities;
35+
import org.elasticsearch.xpack.esql.core.type.DataType;
3536
import org.elasticsearch.xpack.esql.plugin.QueryPragmas;
3637
import org.hamcrest.Matcher;
3738
import org.junit.Before;
@@ -45,8 +46,10 @@
4546
import java.util.List;
4647
import java.util.Locale;
4748
import java.util.Map;
49+
import java.util.Objects;
4850
import java.util.TreeMap;
4951
import java.util.function.Function;
52+
import java.util.stream.Collectors;
5053

5154
import static org.elasticsearch.test.ListMatcher.matchesList;
5255
import static org.elasticsearch.test.MapMatcher.assertMap;
@@ -690,7 +693,7 @@ public void testByteFieldWithIntSubfieldTooBig() throws IOException {
690693
* </pre>.
691694
*/
692695
public void testIncompatibleTypes() throws IOException {
693-
assumeOriginalTypesReported();
696+
assumeSuggestedCastReported();
694697
keywordTest().createIndex("test1", "f");
695698
index("test1", """
696699
{"f": "f1"}""");
@@ -764,7 +767,7 @@ public void testDistinctInEachIndex() throws IOException {
764767
* </pre>.
765768
*/
766769
public void testMergeKeywordAndObject() throws IOException {
767-
assumeOriginalTypesReported();
770+
assumeSuggestedCastReported();
768771
keywordTest().createIndex("test1", "file");
769772
index("test1", """
770773
{"file": "f1"}""");
@@ -959,7 +962,7 @@ public void testIntegerDocValuesConflict() throws IOException {
959962
* In an ideal world we'd promote the {@code integer} to an {@code long} and just go.
960963
*/
961964
public void testLongIntegerConflict() throws IOException {
962-
assumeOriginalTypesReported();
965+
assumeSuggestedCastReported();
963966
longTest().sourceMode(SourceMode.DEFAULT).createIndex("test1", "emp_no");
964967
index("test1", """
965968
{"emp_no": 1}""");
@@ -1002,7 +1005,7 @@ public void testLongIntegerConflict() throws IOException {
10021005
* In an ideal world we'd promote the {@code short} to an {@code integer} and just go.
10031006
*/
10041007
public void testIntegerShortConflict() throws IOException {
1005-
assumeOriginalTypesReported();
1008+
assumeSuggestedCastReported();
10061009
intTest().sourceMode(SourceMode.DEFAULT).createIndex("test1", "emp_no");
10071010
index("test1", """
10081011
{"emp_no": 1}""");
@@ -1051,7 +1054,7 @@ public void testIntegerShortConflict() throws IOException {
10511054
* </pre>.
10521055
*/
10531056
public void testTypeConflictInObject() throws IOException {
1054-
assumeOriginalTypesReported();
1057+
assumeSuggestedCastReported();
10551058
createIndex("test1", empNoInObject("integer"));
10561059
index("test1", """
10571060
{"foo": {"emp_no": 1}}""");
@@ -1379,6 +1382,12 @@ private void assumeOriginalTypesReported() throws IOException {
13791382
assumeTrue("This test makes sense for versions that report original types", requiredClusterCapability);
13801383
}
13811384

1385+
private void assumeSuggestedCastReported() throws IOException {
1386+
var capsName = EsqlCapabilities.Cap.SUGGESTED_CAST.name().toLowerCase(Locale.ROOT);
1387+
boolean requiredClusterCapability = clusterHasCapability("POST", "/_query", List.of(), List.of(capsName)).orElse(false);
1388+
assumeTrue("This test makes sense for versions that report suggested casts", requiredClusterCapability);
1389+
}
1390+
13821391
private CheckedConsumer<XContentBuilder, IOException> empNoInObject(String empNoType) {
13831392
return index -> {
13841393
index.startObject("properties");
@@ -1715,7 +1724,23 @@ private static Map<String, Object> columnInfo(String name, String type) {
17151724
}
17161725

17171726
private static Map<String, Object> unsupportedColumnInfo(String name, String... originalTypes) {
1718-
return Map.of("name", name, "type", "unsupported", "original_types", List.of(originalTypes));
1727+
DataType suggested = DataType.suggestedCast(
1728+
List.of(originalTypes).stream().map(DataType::fromTypeName).filter(Objects::nonNull).collect(Collectors.toSet())
1729+
);
1730+
if (suggested == null) {
1731+
return Map.of("name", name, "type", "unsupported", "original_types", List.of(originalTypes));
1732+
} else {
1733+
return Map.of(
1734+
"name",
1735+
name,
1736+
"type",
1737+
"unsupported",
1738+
"original_types",
1739+
List.of(originalTypes),
1740+
"suggested_cast",
1741+
suggested.typeName()
1742+
);
1743+
}
17191744
}
17201745

17211746
private static void index(String name, String... docs) throws IOException {

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/ColumnInfoImpl.java

+19
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
import java.io.IOException;
2424
import java.util.List;
2525
import java.util.Objects;
26+
import java.util.stream.Collectors;
2627

2728
import static org.elasticsearch.xcontent.ConstructingObjectParser.constructorArg;
2829
import static org.elasticsearch.xcontent.ConstructingObjectParser.optionalConstructorArg;
@@ -72,6 +73,9 @@ public static ColumnInfo fromXContent(XContentParser parser) {
7273
@Nullable
7374
private final List<String> originalTypes;
7475

76+
@Nullable
77+
private final DataType suggestedCast;
78+
7579
@ParserConstructor
7680
public ColumnInfoImpl(String name, String type, @Nullable List<String> originalTypes) {
7781
this(name, DataType.fromEs(type), originalTypes);
@@ -81,15 +85,27 @@ public ColumnInfoImpl(String name, DataType type, @Nullable List<String> origina
8185
this.name = name;
8286
this.type = type;
8387
this.originalTypes = originalTypes;
88+
this.suggestedCast = calculateSuggestedCast(this.originalTypes);
89+
}
90+
91+
private static DataType calculateSuggestedCast(List<String> originalTypes) {
92+
if (originalTypes == null) {
93+
return null;
94+
}
95+
return DataType.suggestedCast(
96+
originalTypes.stream().map(DataType::fromTypeName).filter(Objects::nonNull).collect(Collectors.toSet())
97+
);
8498
}
8599

86100
public ColumnInfoImpl(StreamInput in) throws IOException {
87101
this.name = in.readString();
88102
this.type = DataType.fromEs(in.readString());
89103
if (in.getTransportVersion().onOrAfter(TransportVersions.ESQL_REPORT_ORIGINAL_TYPES)) {
90104
this.originalTypes = in.readOptionalStringCollectionAsList();
105+
this.suggestedCast = calculateSuggestedCast(this.originalTypes);
91106
} else {
92107
this.originalTypes = null;
108+
this.suggestedCast = null;
93109
}
94110
}
95111

@@ -110,6 +126,9 @@ public XContentBuilder toXContent(XContentBuilder builder, ToXContent.Params par
110126
if (originalTypes != null) {
111127
builder.field("original_types", originalTypes);
112128
}
129+
if (suggestedCast != null) {
130+
builder.field("suggested_cast", suggestedCast.typeName());
131+
}
113132
builder.endObject();
114133
return builder;
115134
}

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java

+6-1
Original file line numberDiff line numberDiff line change
@@ -1038,7 +1038,12 @@ public enum Cap {
10381038
/**
10391039
* Support for the SAMPLE command
10401040
*/
1041-
SAMPLE(Build.current().isSnapshot());
1041+
SAMPLE(Build.current().isSnapshot()),
1042+
1043+
/**
1044+
* The {@code _query} API now gives a cast recommendation if multiple types are found in certain instances.
1045+
*/
1046+
SUGGESTED_CAST;
10421047

10431048
private final boolean enabled;
10441049

x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/type/EsqlDataTypeConverterTests.java

+35
Original file line numberDiff line numberDiff line change
@@ -12,9 +12,14 @@
1212
import org.elasticsearch.xpack.esql.core.type.DataType;
1313

1414
import java.time.Instant;
15+
import java.util.ArrayList;
1516
import java.util.Arrays;
17+
import java.util.Collections;
18+
import java.util.HashSet;
1619
import java.util.List;
20+
import java.util.Set;
1721

22+
import static org.elasticsearch.xpack.esql.core.type.DataType.AGGREGATE_METRIC_DOUBLE;
1823
import static org.elasticsearch.xpack.esql.core.type.DataType.BOOLEAN;
1924
import static org.elasticsearch.xpack.esql.core.type.DataType.BYTE;
2025
import static org.elasticsearch.xpack.esql.core.type.DataType.CARTESIAN_POINT;
@@ -47,6 +52,7 @@
4752
import static org.elasticsearch.xpack.esql.core.type.DataType.isDateTime;
4853
import static org.elasticsearch.xpack.esql.core.type.DataType.isDateTimeOrNanosOrTemporal;
4954
import static org.elasticsearch.xpack.esql.core.type.DataType.isString;
55+
import static org.elasticsearch.xpack.esql.core.type.DataType.suggestedCast;
5056
import static org.elasticsearch.xpack.esql.type.EsqlDataTypeConverter.commonType;
5157

5258
public class EsqlDataTypeConverterTests extends ESTestCase {
@@ -186,4 +192,33 @@ private static void assertNullCommonType(DataType dataType1, DataType dataType2)
186192
assertNull("Expected null for " + dataType1 + " and " + dataType2, commonType(dataType1, dataType2));
187193
assertNull("Expected null for " + dataType1 + " and " + dataType2, commonType(dataType2, dataType1));
188194
}
195+
196+
public void testSuggestedCast() {
197+
// date
198+
{
199+
assertEquals(DATETIME, DataType.suggestedCast(Set.of(DATETIME, DATE_NANOS)));
200+
DataType randomType = DataType.values()[random().nextInt(DataType.values().length)];
201+
DataType suggested = DataType.suggestedCast(Set.of(DATETIME, DATE_NANOS, randomType));
202+
if (randomType != DATETIME && randomType != DATE_NANOS) {
203+
assertEquals(KEYWORD, suggested);
204+
} else {
205+
assertEquals(DATETIME, suggested);
206+
}
207+
}
208+
209+
// aggregate metric double
210+
{
211+
List<DataType> NUMERICS = new ArrayList<>(Arrays.stream(DataType.values()).filter(DataType::isNumeric).toList());
212+
Collections.shuffle(NUMERICS, random());
213+
Set<DataType> subset = new HashSet<>(NUMERICS.subList(0, random().nextInt(NUMERICS.size())));
214+
subset.add(AGGREGATE_METRIC_DOUBLE);
215+
assertEquals(AGGREGATE_METRIC_DOUBLE, suggestedCast(subset));
216+
}
217+
218+
// unsupported tests
219+
{
220+
assertNull(DataType.suggestedCast(Set.of()));
221+
assertNull(DataType.suggestedCast(Set.of(UNSUPPORTED, DataType.values()[random().nextInt(DataType.values().length)])));
222+
}
223+
}
189224
}

x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/esql/160_union_types.yml

+77
Original file line numberDiff line numberDiff line change
@@ -905,3 +905,80 @@ CASE:
905905
- match: { values.2.0: 3 }
906906
- match: { values.2.1: null }
907907
- match: { values.2.2: "a" }
908+
909+
---
910+
suggested_type:
911+
- requires:
912+
capabilities:
913+
- method: POST
914+
path: /_query
915+
parameters: []
916+
capabilities: [recommended_cast]
917+
reason: "uses recommended_cast"
918+
919+
- do:
920+
indices.create:
921+
index: metrics_1
922+
body:
923+
mappings:
924+
properties:
925+
my_metric:
926+
type: aggregate_metric_double
927+
metrics: [ min, max, sum, value_count ]
928+
default_metric: max
929+
my_date:
930+
type: date
931+
my_double:
932+
type: double
933+
some_other_field:
934+
type: geo_point
935+
- do:
936+
indices.create:
937+
index: metrics_2
938+
body:
939+
mappings:
940+
properties:
941+
my_metric:
942+
type: long
943+
my_date:
944+
type: date_nanos
945+
my_double:
946+
type: double
947+
some_other_field:
948+
type: ip
949+
950+
- do:
951+
bulk:
952+
refresh: true
953+
body:
954+
- { "index" : { "_index": "metrics_1" } }
955+
- { "my_metric": { "min": 1.0, "max": 3.0, "sum": 10.1, "value_count": 5 }, "my_date": "2021-04-28T18:50:04.467Z", "my_double": 105.2, "some_other_field": "52.374081,4.912350" }
956+
- { "index" : { "_index": "metrics_2" } }
957+
- { "my_metric": 5, "my_date": "2021-04-28T19:34:00.000Z", "my_double": 843205.9, "some_other_field": 192.168.30.1 }
958+
959+
960+
- do:
961+
allowed_warnings_regex:
962+
- "No limit defined, adding default limit of \\[.*\\]"
963+
esql.query:
964+
body:
965+
query: 'FROM metrics_* | KEEP my_metric, my_date, my_double, some_other_field'
966+
967+
- match: { columns.0.name: "my_metric" }
968+
- match: { columns.0.type: "unsupported" }
969+
- match: { columns.0.original_types: ["aggregate_metric_double", "long"] }
970+
- match: { columns.0.suggested_cast: "aggregate_metric_double" }
971+
- match: { columns.1.name: "my_date" }
972+
- match: { columns.1.type: "unsupported" }
973+
- match: { columns.1.original_types: ["date_nanos", "datetime"] }
974+
- match: { columns.1.suggested_cast: "datetime" }
975+
- match: { columns.2.name: "my_double" }
976+
- match: { columns.2.type: "double" }
977+
- is_false: columns.2.original_types
978+
- is_false: columns.2.suggested_cast
979+
- match: { columns.3.name: "some_other_field" }
980+
- match: { columns.3.type: "unsupported" }
981+
- match: { columns.3.original_types: ["geo_point", "ip"] }
982+
- match: { columns.3.suggested_cast: "keyword" }
983+
- length: { values: 2 }
984+

0 commit comments

Comments
 (0)