Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[CALCITE-6836] Add Rule to convert INTERSECT to EXISTS #4209

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
*/
package org.apache.calcite.rel.rules;

import org.apache.calcite.plan.RelOptUtil.Exists;
import org.apache.calcite.rel.RelNode;
import org.apache.calcite.rel.core.Aggregate;
import org.apache.calcite.rel.core.Calc;
Expand Down Expand Up @@ -348,6 +349,11 @@ private CoreRules() {}
public static final IntersectToDistinctRule INTERSECT_TO_DISTINCT =
IntersectToDistinctRule.Config.DEFAULT.toRule();

/** Rule that translates a {@link Intersect}
* into a {@link Exists} subquery. */
public static final IntersectToExistsRule INTERSECT_TO_EXISTS =
IntersectToExistsRule.Config.DEFAULT.toRule();

/** Rule that translates a distinct
* {@link Minus} into a group of operators
* composed of {@link Union}, {@link Aggregate}, etc. */
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,179 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to you under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.calcite.rel.rules;

import org.apache.calcite.plan.RelOptRuleCall;
import org.apache.calcite.plan.RelOptUtil.Exists;
import org.apache.calcite.plan.RelRule;
import org.apache.calcite.plan.hep.HepRelVertex;
import org.apache.calcite.rel.RelNode;
import org.apache.calcite.rel.core.CorrelationId;
import org.apache.calcite.rel.core.Intersect;
import org.apache.calcite.rel.logical.LogicalIntersect;
import org.apache.calcite.rel.type.RelDataType;
import org.apache.calcite.rel.type.RelDataTypeField;
import org.apache.calcite.rex.RexBuilder;
import org.apache.calcite.rex.RexNode;
import org.apache.calcite.rex.RexSubQuery;
import org.apache.calcite.rex.RexUtil;
import org.apache.calcite.sql.fun.SqlStdOperatorTable;
import org.apache.calcite.tools.RelBuilder;
import org.apache.calcite.tools.RelBuilderFactory;
import org.apache.calcite.util.ImmutableBitSet;

import com.google.common.collect.ImmutableSet;

import org.immutables.value.Value;

import java.util.ArrayList;
import java.util.List;
import java.util.stream.Collectors;

/**
* Planner rule that translates a {@link Intersect}
* (<code>all</code> = <code>false</code>)
* into a {@link Exists}.
*
* @see CoreRules#INTERSECT_TO_EXISTS
*/
@Value.Enclosing
public class IntersectToExistsRule
extends RelRule<IntersectToExistsRule.Config>
implements TransformationRule {

/** Creates an IntersectToExistRule. */
protected IntersectToExistsRule(Config config) {
super(config);
}

@Deprecated // to be removed before 2.0
public IntersectToExistsRule(Class<? extends Intersect> intersectClass,
RelBuilderFactory relBuilderFactory) {
this(Config.DEFAULT.withRelBuilderFactory(relBuilderFactory)
.as(Config.class)
.withOperandFor(intersectClass));
}

//~ Methods ----------------------------------------------------------------

@Override public void onMatch(RelOptRuleCall call) {
final Intersect intersect = call.rel(0);
if (intersect.all) {
return; // nothing we can do
}

final RelBuilder builder = call.builder();
final RexBuilder rexBuilder = builder.getRexBuilder();

RelDataType rowType = intersect.getRowType();
List<RelNode> inputs = intersect.getInputs();
RelNode current = inputs.get(0);

// get all column indices of intersect
ImmutableBitSet fieldIndices = ImmutableBitSet.of(rowType.getFieldList()
.stream().map(RelDataTypeField::getIndex)
.collect(Collectors.toList()));

// iterate over the inputs and apply exists subquery
for (int i = 1; i < inputs.size(); i++) {
RelNode nextInput = removeHepRelVertex(inputs.get(i));

// create correlation
CorrelationId correlationId = intersect.getCluster().createCorrel();
RexNode correl =
rexBuilder.makeCorrel(rowType, correlationId);

// create condition in exists filter, and use correlation
List<RexNode> conditions = new ArrayList<>();
for (int fieldIndex : fieldIndices) {
RexNode outerField = rexBuilder.makeInputRef(rowType, fieldIndex);
RexNode innerField = rexBuilder.makeFieldAccess(correl, fieldIndex);
conditions.add(
rexBuilder.makeCall(SqlStdOperatorTable.IS_NOT_DISTINCT_FROM,
outerField, innerField));
}
RexNode condition = RexUtil.composeConjunction(rexBuilder, conditions);

// build exists subquery
RelNode existsSubQuery = builder.push(nextInput)
.filter(condition)
.project(builder.fields(fieldIndices))
.build();

// apply exists subquery to the current relation
current = builder.push(current)
.filter(ImmutableSet.of(correlationId),
RexSubQuery.exists(existsSubQuery))
.build();
}

builder.push(current);
List<RexNode> projects = new ArrayList<>();
for (int fieldIndex : fieldIndices) {
RexNode rexNode = builder.fields().get(fieldIndex);
RelDataType originalType =
rowType.getFieldList().get(projects.size()).getType();
RexNode expr;
if (!originalType.equals(rexNode.getType())) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What situation is the type adjustment for here?

Copy link
Contributor Author

@xiedeyantu xiedeyantu Mar 5, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What situation is the type adjustment for here?

Here we can get java type and sql type, when we using setop.

expr = rexBuilder.makeCast(originalType, rexNode, true, false);
} else {
expr = rexNode;
}
projects.add(expr);
}

RelNode result = builder.project(projects)
.distinct()
.build();

call.transformTo(result);
}

/**
* Will not eliminate HepRelVertex in subqueries
* during the optimization process.
* We need to eliminate it here. */
private RelNode removeHepRelVertex(RelNode input) {
Copy link
Contributor

@suibianwanwank suibianwanwank Mar 5, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You may need to stipped().

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You may need to stipped().

Yes, thanks.

if (input instanceof HepRelVertex) {
HepRelVertex hepRelVertex = (HepRelVertex) input;
RelNode currentRel = hepRelVertex.getCurrentRel();
return removeHepRelVertex(currentRel);
}
List<RelNode> newInputs = new ArrayList<>();
for (RelNode child : input.getInputs()) {
newInputs.add(removeHepRelVertex(child));
}
return input.copy(input.getTraitSet(), newInputs);
}

/** Rule configuration. */
@Value.Immutable
public interface Config extends RelRule.Config {
Config DEFAULT = ImmutableIntersectToExistsRule.Config.of()
.withOperandFor(LogicalIntersect.class);

@Override default IntersectToExistsRule toRule() {
return new IntersectToExistsRule(this);
}

/** Defines an operand tree for the given classes. */
default Config withOperandFor(Class<? extends Intersect> intersectClass) {
return withOperandSupplier(b -> b.operand(intersectClass).anyInputs())
.as(Config.class);
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
import org.apache.calcite.rel.rules.AggregateProjectMergeRule;
import org.apache.calcite.rel.rules.CoreRules;
import org.apache.calcite.rel.rules.FilterJoinRule;
import org.apache.calcite.rel.rules.IntersectToExistsRule;
import org.apache.calcite.rel.rules.ProjectOverSumToSum0Rule;
import org.apache.calcite.rel.rules.ProjectToWindowRule;
import org.apache.calcite.rel.rules.PruneEmptyRules;
Expand Down Expand Up @@ -244,6 +245,33 @@ private static String toSql(RelNode root, SqlDialect dialect,
sql(query).withMysql().ok(expected);
}

/**
* Test case of
* <a href="https://issues.apache.org/jira/browse/CALCITE-6836">[CALCITE-6836]
* Add Rule to convert INTERSECT to EXISTS</a>. */
@Test void testIntersectToExistsRule() {
String query = "SELECT \"product_name\"\n"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In fact, I don't think it's necessary to test in RelToSqlConverter because the semantics of relational algebra is already expressed in RelNode. What is tested here is the correctness of RelNode -> sqlnode -> sqlText.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In fact, I don't think it's necessary to test in RelToSqlConverter because the semantics of relational algebra is already expressed in RelNode. What is tested here is the correctness of RelNode -> sqlnode -> sqlText.

@suibianwanwank Thanks, I agree with you, but currently Calcite may not have the ability to execute this plan, and I have no other good options.

+ "FROM \"foodmart\".\"product\"\n"
+ "INTERSECT\n"
+ "SELECT \"product_name\"\n"
+ "FROM \"foodmart\".\"product\"";
String expected = "SELECT `product_name`\n"
+ "FROM (SELECT `product_name`\n"
+ "FROM `foodmart`.`product`) AS `t`\n"
+ "WHERE EXISTS (SELECT *\n"
+ "FROM (SELECT `product_name`\n"
+ "FROM `foodmart`.`product`) AS `t0`\n"
+ "WHERE `product_name` IS NOT DISTINCT FROM `t`.`product_name`)\n"
+ "GROUP BY `product_name`";
HepProgramBuilder builder = new HepProgramBuilder();
builder.addRuleClass(IntersectToExistsRule.class);
HepPlanner hepPlanner = new HepPlanner(builder.build());
RuleSet rules =
RuleSets.ofList(CoreRules.INTERSECT_TO_EXISTS);

sql(query).withMysql().optimize(rules, hepPlanner).ok(expected);
}

@Test void testGroupByBooleanLiteral() {
String query = "select avg(\"salary\") from \"employee\" group by true";
String expectedRedshift = "SELECT AVG(\"employee\".\"salary\")\n"
Expand Down
46 changes: 46 additions & 0 deletions core/src/test/java/org/apache/calcite/test/RelOptRulesTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -9716,4 +9716,50 @@ private void checkJoinAssociateRuleWithTopAlwaysTrueCondition(boolean allowAlway
fixture().withRelBuilderConfig(a -> a.withBloat(-1))
.relFn(relFn).withPlanner(planner).check();
}

/**
* Test case of
* <a href="https://issues.apache.org/jira/browse/CALCITE-6836">[CALCITE-6836]
* Add Rule to convert INTERSECT to EXISTS</a>. */
@Test void testIntersectToExistsRuleOneField() {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

since plans are hard to read, I would encourage you to also add some quidem tests, these are much simpler to read and write

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

since plans are hard to read, I would encourage you to also add some quidem tests, these are much simpler to read and write

@mihaibudiu Could you give me a description of quidem tests? I don't quite understand how to write them now.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Look at the files with iq suffix

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Look at the files with iq suffix

@mihaibudiu Thanks, I have seen the file with this suffix. Do I need to write a SQL test to output the final result through the optimization rules? Is there a similar example for reference?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just as @mihaibudiu said, currently this quidem file cannot specify specific optimizations. Before Quidem's test, we wrote such test cases in JdbcTest. Please feel free to check it.

String sql = "SELECT a.ename FROM emp AS a\n"
+ "INTERSECT\n"
+ "SELECT b.name FROM dept AS b";
sql(sql).withRule(CoreRules.INTERSECT_TO_EXISTS)
.check();
}

@Test void testIntersectToExistsRulePrimaryKey() {
String sql = "SELECT a.empno FROM emp AS a\n"
+ "INTERSECT\n"
+ "SELECT b.empno FROM emp AS b";
sql(sql).withRule(CoreRules.INTERSECT_TO_EXISTS)
.check();
}

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For all remaining tests, also add the jira case

@Test void testIntersectToExistsRuleMultiFields() {
String sql = "SELECT a.ename, a.job FROM emp AS a\n"
+ "INTERSECT\n"
+ "SELECT b.ename, b.job FROM emp AS b";
sql(sql).withRule(CoreRules.INTERSECT_TO_EXISTS)
.check();
}

@Test void testIntersectToExistsRuleMultiIntersect() {
String sql = "SELECT a.ename FROM emp AS a\n"
+ "INTERSECT\n"
+ "SELECT b.name FROM dept AS b\n"
+ "INTERSECT\n"
+ "SELECT c.ename FROM emp AS c";
sql(sql).withRule(CoreRules.INTERSECT_TO_EXISTS)
.check();
}

@Test void testIntersectToExistsRuleWithAll() {
String sql = "SELECT a.ename FROM emp AS a\n"
+ "INTERSECT ALL\n"
+ "SELECT b.name FROM dept AS b";
sql(sql).withRule(CoreRules.INTERSECT_TO_EXISTS)
.checkUnchanged();
}
}
Loading