Skip to content

ESQL: Keep DROP attributes when resolving field names #127009

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 16 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions docs/changelog/127009.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
pr: 127009
summary: "ESQL: Keep `DROP` attributes when resolving field names"
area: ES|QL
type: bug
issues:
- 126418
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,6 @@ public abstract class GenerativeRestTest extends ESRestTestCase {
"Unknown column \\[<all-fields-projected>\\]", // https://github.com/elastic/elasticsearch/issues/121741,
"Plan \\[ProjectExec\\[\\[<no-fields>.* optimized incorrectly due to missing references", // https://github.com/elastic/elasticsearch/issues/125866
"optimized incorrectly due to missing references", // https://github.com/elastic/elasticsearch/issues/116781
"No matches found for pattern", // https://github.com/elastic/elasticsearch/issues/126418
"Unknown column", // https://github.com/elastic/elasticsearch/issues/127467
"only supports KEYWORD or TEXT values", // https://github.com/elastic/elasticsearch/issues/127468
"The incoming YAML document exceeds the limit:" // still to investigate, but it seems to be specific to the test framework
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -172,3 +172,19 @@ Milky Way
Milky Way
Milky Way
;

dropAgainWithWildcardAfterEval
required_capability: drop_again_with_wildcard_after_eval
from languages
| eval language_code = 12, x = 13
| drop language_code
| drop language*
| keep x
| limit 3
;

x:integer
13
13
13
;
Original file line number Diff line number Diff line change
Expand Up @@ -1650,3 +1650,26 @@ event_duration:long
2764889
3450233
;

dropAgainWithWildcardAfterEval2
required_capability: join_lookup_v12
required_capability: drop_again_with_wildcard_after_eval
from addresses,cartesian_multipolygons,hosts
| rename city.name as message
| lookup join message_types_lookup on message
| eval card = -6013949614291505456, hOntTwnVC = null, PQAF = null, DXkxCFXyw = null, number = -7336429038807752405
| eval dewAwHC = -1186293612, message = null
| sort number ASC, street ASC, ip0 DESC, name ASC NULLS FIRST, host ASC
| drop number, host_group, *umber, `city.country.continent.name`, dewAwHC, `zip_code`, `message`, city.country.continent.planet.name, `name`, `ip1`, message, zip_code
| drop description, *e, id
| keep `hOntTwnVC`, city.country.continent.planet.galaxy, street
| limit 5
;

hOntTwnVC:null | city.country.continent.planet.galaxy:keyword | street:keyword
null | Milky Way | Kearny St
null | Milky Way | Keizersgracht
null | Milky Way | Marunouchi
null | null | null
null | null | null
;
Original file line number Diff line number Diff line change
Expand Up @@ -1036,6 +1036,12 @@ public enum Cap {
*/
FIX_JOIN_MASKING_EVAL,

/**
* Support for keeping `DROP` attributes when resolving field names.
* see <a href="https://github.com/elastic/elasticsearch/issues/126418"> ES|QL: no matches for pattern #126418 </a>
*/
DROP_AGAIN_WITH_WILDCARD_AFTER_EVAL,

/**
* Support last_over_time aggregation that gets evaluated per time-series
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -590,9 +590,15 @@ static PreAnalysisResult fieldNames(LogicalPlan parsed, Set<String> enrichPolicy
}

var referencesBuilder = AttributeSet.builder();
// "keep" attributes are special whenever a wildcard is used in their name
// "keep" and "drop" attributes are special whenever a wildcard is used in their name, as the wildcard can shadow some
// attributes ("lookup join" generated columns among others) and steps like removal of Aliases should ignore the fields
// to remove if their name matches one of these wildcards.
//
// ie "from test | eval lang = languages + 1 | keep *l" should consider both "languages" and "*l" as valid fields to ask for
var keepCommandRefsBuilder = AttributeSet.builder();
// "from test | eval first_name = 1 | drop first_name | drop *name should also consider "*name" as valid field to ask for
//
// NOTE: the grammar allows wildcards to be used in other commands as well, but these are forbidden in the LogicalPlanBuilder
var shadowingRefsBuilder = AttributeSet.builder();
var keepJoinRefsBuilder = AttributeSet.builder();
Set<String> wildcardJoinIndices = new java.util.HashSet<>();

Expand All @@ -617,12 +623,12 @@ static PreAnalysisResult fieldNames(LogicalPlan parsed, Set<String> enrichPolicy
if (join.config().type() instanceof JoinTypes.UsingJoinType usingJoinType) {
keepJoinRefsBuilder.addAll(usingJoinType.columns());
}
if (keepCommandRefsBuilder.isEmpty()) {
if (shadowingRefsBuilder.isEmpty()) {
// No KEEP commands after the JOIN, so we need to mark this index for "*" field resolution
wildcardJoinIndices.add(((UnresolvedRelation) join.right()).indexPattern().indexPattern());
} else {
// Keep commands can reference the join columns with names that shadow aliases, so we block their removal
keepJoinRefsBuilder.addAll(keepCommandRefsBuilder);
keepJoinRefsBuilder.addAll(shadowingRefsBuilder);
}
} else {
referencesBuilder.addAll(p.references());
Expand All @@ -634,12 +640,10 @@ static PreAnalysisResult fieldNames(LogicalPlan parsed, Set<String> enrichPolicy
p.forEachExpression(UnresolvedNamePattern.class, up -> {
var ua = new UnresolvedAttribute(up.source(), up.name());
referencesBuilder.add(ua);
if (p instanceof Keep) {
keepCommandRefsBuilder.add(ua);
}
shadowingRefsBuilder.add(ua);
});
if (p instanceof Keep) {
keepCommandRefsBuilder.addAll(p.references());
shadowingRefsBuilder.addAll(p.references());
}
}

Expand Down Expand Up @@ -669,7 +673,7 @@ static PreAnalysisResult fieldNames(LogicalPlan parsed, Set<String> enrichPolicy
if (fieldNames.contains(alias.name())) {
return;
}
referencesBuilder.removeIf(attr -> matchByName(attr, alias.name(), keepCommandRefsBuilder.contains(attr)));
referencesBuilder.removeIf(attr -> matchByName(attr, alias.name(), shadowingRefsBuilder.contains(attr)));
});
}
});
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1710,6 +1710,45 @@ public void testEnrichAndJoinMaskingEvalWh() {
| keep emp_no, language_name""", Set.of("emp_no", "language_name", "languages", "language_name.*", "languages.*", "emp_no.*"));
}

public void testDropAgainWithWildcardAfterEval() {
assertFieldNames("""
from employees
| eval full_name = 12
| drop full_name
| drop *name
| keep emp_no
""", Set.of("emp_no", "emp_no.*", "*name", "*name.*"));
}

public void testDropWildcardedFields_AfterRename() {
assertFieldNames(
"""
from employees
| rename first_name AS first_names, last_name AS last_names
| eval first_names = 1
| drop first_names
| drop *_names
| keep gender""",
Set.of("first_name", "first_name.*", "last_name", "last_name.*", "*_names", "*_names.*", "gender", "gender.*")
);
}

public void testDropWildcardFields_WithLookupJoin() {
assumeTrue("LOOKUP JOIN available as snapshot only", EsqlCapabilities.Cap.JOIN_LOOKUP_V12.isEnabled());
assertFieldNames(
"""
FROM sample_data
| EVAL client_ip = client_ip::keyword
| LOOKUP JOIN clientips_lookup ON client_ip
| LOOKUP JOIN message_types_lookup ON message
| KEEP @timestamp, message, *e*
| SORT @timestamp
| DROP *e""",
Set.of("client_ip", "client_ip.*", "message", "message.*", "@timestamp", "@timestamp.*", "*e*", "*e", "*e.*"),
Set.of()
);
}

private Set<String> fieldNames(String query, Set<String> enrichPolicyMatchFields) {
var preAnalysisResult = new EsqlSession.PreAnalysisResult(null);
return EsqlSession.fieldNames(parser.createStatement(query), enrichPolicyMatchFields, preAnalysisResult).fieldNames();
Expand Down
Loading