Skip to content

Commit

Permalink
[CALCITE-6836] Add Rule to convert INTERSECT to EXISTS
Browse files Browse the repository at this point in the history
  • Loading branch information
xiedeyantu committed Feb 24, 2025
1 parent 0a4f00d commit ed98960
Show file tree
Hide file tree
Showing 4 changed files with 292 additions and 0 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
*/
package org.apache.calcite.rel.rules;

import org.apache.calcite.plan.RelOptUtil.Exists;
import org.apache.calcite.rel.RelNode;
import org.apache.calcite.rel.core.Aggregate;
import org.apache.calcite.rel.core.Calc;
Expand Down Expand Up @@ -348,6 +349,11 @@ private CoreRules() {}
public static final IntersectToDistinctRule INTERSECT_TO_DISTINCT =
IntersectToDistinctRule.Config.DEFAULT.toRule();

/** Rule that translates a {@link Intersect}
* into a {@link Exists} subquery. */
public static final IntersectToExistsRule INTERSECT_TO_EXISTS =
IntersectToExistsRule.Config.DEFAULT.toRule();

/** Rule that translates a distinct
* {@link Minus} into a group of operators
* composed of {@link Union}, {@link Aggregate}, etc. */
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,145 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to you under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.calcite.rel.rules;

import org.apache.calcite.plan.RelOptRuleCall;
import org.apache.calcite.plan.RelOptUtil.Exists;
import org.apache.calcite.plan.RelRule;
import org.apache.calcite.rel.RelNode;
import org.apache.calcite.rel.core.CorrelationId;
import org.apache.calcite.rel.core.Intersect;
import org.apache.calcite.rel.logical.LogicalIntersect;
import org.apache.calcite.rel.type.RelDataTypeField;
import org.apache.calcite.rex.RexBuilder;
import org.apache.calcite.rex.RexNode;
import org.apache.calcite.rex.RexSubQuery;
import org.apache.calcite.rex.RexUtil;
import org.apache.calcite.sql.fun.SqlStdOperatorTable;
import org.apache.calcite.tools.RelBuilder;
import org.apache.calcite.tools.RelBuilderFactory;

import com.google.common.collect.ImmutableSet;

import org.immutables.value.Value;

import java.util.ArrayList;
import java.util.List;
import java.util.stream.Collectors;

/**
* Planner rule that translates a {@link Intersect}
* (<code>all</code> = <code>false</code>)
* into a {@link Exists}.
*
* @see CoreRules#INTERSECT_TO_EXISTS
*/
@Value.Enclosing
public class IntersectToExistsRule
extends RelRule<IntersectToExistsRule.Config>
implements TransformationRule {

/** Creates an IntersectToExistRule. */
protected IntersectToExistsRule(Config config) {
super(config);
}

@Deprecated // to be removed before 2.0
public IntersectToExistsRule(Class<? extends Intersect> intersectClass,
RelBuilderFactory relBuilderFactory) {
this(Config.DEFAULT.withRelBuilderFactory(relBuilderFactory)
.as(Config.class)
.withOperandFor(intersectClass));
}

//~ Methods ----------------------------------------------------------------

@Override public void onMatch(RelOptRuleCall call) {
final Intersect intersect = call.rel(0);
if (intersect.all) {
return; // nothing we can do
}

final RelBuilder builder = call.builder();
final RexBuilder rexBuilder = builder.getRexBuilder();

// get all column indices of intersect
List<Integer> fieldIndices = intersect.getRowType().getFieldList()
.stream().map(RelDataTypeField::getIndex)
.collect(Collectors.toList());

List<RelNode> inputs = intersect.getInputs();
RelNode current = inputs.get(0);

// iterate over the inputs and apply exists subquery
for (int i = 1; i < inputs.size(); i++) {
RelNode nextInput = inputs.get(i);

// create correlation
CorrelationId correlationId = intersect.getCluster().createCorrel();
RexNode correl =
rexBuilder.makeCorrel(nextInput.getRowType(), correlationId);

// create condition in exists filter, and use correlation
List<RexNode> conditions = new ArrayList<>();
for (int fieldIndex : fieldIndices) {
RexNode outerField = rexBuilder.makeInputRef(current, fieldIndex);
RexNode innerField =
rexBuilder.makeFieldAccess(correl, fieldIndex);
conditions.add(
rexBuilder.makeCall(SqlStdOperatorTable.EQUALS,
outerField, innerField));
}
RexNode condition = RexUtil.composeConjunction(rexBuilder, conditions);

// build exists subquery
RelNode existsSubQuery = builder.push(nextInput)
.filter(condition)
.project(builder.fields(fieldIndices))
.build();

// apply exists subquery to the current relation
current = builder.push(current)
.filter(ImmutableSet.of(correlationId),
RexSubQuery.exists(existsSubQuery))
.build();
}

RelNode result = builder.push(current)
.project(builder.fields(fieldIndices))
.distinct()
.build();

call.transformTo(result);
}

/** Rule configuration. */
@Value.Immutable
public interface Config extends RelRule.Config {
Config DEFAULT = ImmutableIntersectToExistsRule.Config.of()
.withOperandFor(LogicalIntersect.class);

@Override default IntersectToExistsRule toRule() {
return new IntersectToExistsRule(this);
}

/** Defines an operand tree for the given classes. */
default Config withOperandFor(Class<? extends Intersect> intersectClass) {
return withOperandSupplier(b -> b.operand(intersectClass).anyInputs())
.as(Config.class);
}
}
}
34 changes: 34 additions & 0 deletions core/src/test/java/org/apache/calcite/test/RelOptRulesTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -9585,4 +9585,38 @@ private void checkJoinAssociateRuleWithTopAlwaysTrueCondition(boolean allowAlway
.withRule(CoreRules.MULTI_JOIN_OPTIMIZE)
.check();
}

@Test void testIntersectToExistsRuleOneField() {
String sql = "SELECT a.deptno FROM dept AS a\n"
+ "INTERSECT\n"
+ "SELECT b.empno FROM emp AS b";
sql(sql).withRule(CoreRules.INTERSECT_TO_EXISTS)
.check();
}

@Test void testIntersectToExistsRuleMultiFields() {
String sql = "SELECT a.empno, a.ename FROM emp AS a\n"
+ "INTERSECT\n"
+ "SELECT b.empno, b.ename FROM emp AS b";
sql(sql).withRule(CoreRules.INTERSECT_TO_EXISTS)
.check();
}

@Test void testIntersectToExistsRuleMultiIntersect() {
String sql = "SELECT a.deptno FROM dept AS a\n"
+ "INTERSECT\n"
+ "SELECT b.empno FROM emp AS b\n"
+ "INTERSECT\n"
+ "SELECT a.deptno FROM dept AS a";
sql(sql).withRule(CoreRules.INTERSECT_TO_EXISTS)
.check();
}

@Test void testIntersectToExistsRuleWithAll() {
String sql = "SELECT a.deptno FROM dept AS a\n"
+ "INTERSECT ALL\n"
+ "SELECT b.empno FROM emp AS b";
sql(sql).withRule(CoreRules.INTERSECT_TO_EXISTS)
.checkUnchanged();
}
}
107 changes: 107 additions & 0 deletions core/src/test/resources/org/apache/calcite/test/RelOptRulesTest.xml
Original file line number Diff line number Diff line change
Expand Up @@ -5440,6 +5440,113 @@ LogicalIntersect(all=[true])
LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4], SAL=[$5], COMM=[$6], DEPTNO=[$7], SLACKER=[$8])
LogicalFilter(condition=[=($7, 30)])
LogicalTableScan(table=[[CATALOG, SALES, EMP]])
]]>
</Resource>
</TestCase>
<TestCase name="testIntersectToExistsRuleMultiFields">
<Resource name="sql">
<![CDATA[SELECT a.empno, a.ename FROM emp AS a
INTERSECT
SELECT b.empno, b.ename FROM emp AS b]]>
</Resource>
<Resource name="planBefore">
<![CDATA[
LogicalIntersect(all=[false])
LogicalProject(EMPNO=[$0], ENAME=[$1])
LogicalTableScan(table=[[CATALOG, SALES, EMP]])
LogicalProject(EMPNO=[$0], ENAME=[$1])
LogicalTableScan(table=[[CATALOG, SALES, EMP]])
]]>
</Resource>
<Resource name="planAfter">
<![CDATA[
LogicalFilter(condition=[EXISTS({
LogicalFilter(condition=[AND(=($0, $cor0.EMPNO), =($1, $cor0.ENAME))])
LogicalProject(EMPNO=[$0], ENAME=[$1])
LogicalTableScan(table=[[CATALOG, SALES, EMP]])
})], variablesSet=[[$cor0]])
LogicalProject(EMPNO=[$0], ENAME=[$1])
LogicalTableScan(table=[[CATALOG, SALES, EMP]])
]]>
</Resource>
</TestCase>
<TestCase name="testIntersectToExistsRuleMultiIntersect">
<Resource name="sql">
<![CDATA[SELECT a.deptno FROM dept AS a
INTERSECT
SELECT b.empno FROM emp AS b
INTERSECT
SELECT a.deptno FROM dept AS a]]>
</Resource>
<Resource name="planBefore">
<![CDATA[
LogicalIntersect(all=[false])
LogicalIntersect(all=[false])
LogicalProject(DEPTNO=[$0])
LogicalTableScan(table=[[CATALOG, SALES, DEPT]])
LogicalProject(EMPNO=[$0])
LogicalTableScan(table=[[CATALOG, SALES, EMP]])
LogicalProject(DEPTNO=[$0])
LogicalTableScan(table=[[CATALOG, SALES, DEPT]])
]]>
</Resource>
<Resource name="planAfter">
<![CDATA[
LogicalFilter(condition=[EXISTS({
LogicalFilter(condition=[=($0, $cor0.DEPTNO)])
LogicalProject(DEPTNO=[$0])
LogicalTableScan(table=[[CATALOG, SALES, DEPT]])
})], variablesSet=[[$cor0]])
LogicalFilter(condition=[EXISTS({
LogicalFilter(condition=[=($0, $cor1.EMPNO)])
LogicalProject(EMPNO=[$0])
LogicalTableScan(table=[[CATALOG, SALES, EMP]])
})], variablesSet=[[$cor1]])
LogicalProject(DEPTNO=[$0])
LogicalTableScan(table=[[CATALOG, SALES, DEPT]])
]]>
</Resource>
</TestCase>
<TestCase name="testIntersectToExistsRuleOneField">
<Resource name="sql">
<![CDATA[SELECT a.deptno FROM dept AS a
INTERSECT
SELECT b.empno FROM emp AS b]]>
</Resource>
<Resource name="planBefore">
<![CDATA[
LogicalIntersect(all=[false])
LogicalProject(DEPTNO=[$0])
LogicalTableScan(table=[[CATALOG, SALES, DEPT]])
LogicalProject(EMPNO=[$0])
LogicalTableScan(table=[[CATALOG, SALES, EMP]])
]]>
</Resource>
<Resource name="planAfter">
<![CDATA[
LogicalFilter(condition=[EXISTS({
LogicalFilter(condition=[=($0, $cor0.EMPNO)])
LogicalProject(EMPNO=[$0])
LogicalTableScan(table=[[CATALOG, SALES, EMP]])
})], variablesSet=[[$cor0]])
LogicalProject(DEPTNO=[$0])
LogicalTableScan(table=[[CATALOG, SALES, DEPT]])
]]>
</Resource>
</TestCase>
<TestCase name="testIntersectToExistsRuleWithAll">
<Resource name="sql">
<![CDATA[SELECT a.deptno FROM dept AS a
INTERSECT ALL
SELECT b.empno FROM emp AS b]]>
</Resource>
<Resource name="planBefore">
<![CDATA[
LogicalIntersect(all=[true])
LogicalProject(DEPTNO=[$0])
LogicalTableScan(table=[[CATALOG, SALES, DEPT]])
LogicalProject(EMPNO=[$0])
LogicalTableScan(table=[[CATALOG, SALES, EMP]])
]]>
</Resource>
</TestCase>
Expand Down

0 comments on commit ed98960

Please sign in to comment.