[fix](analyzer) InferFilterRule bug: equations in on clause of outer/anti join are not inferable. (#11515)

This commit is contained in:
minghong
2022-08-11 09:36:43 +08:00
committed by GitHub
parent d60afe82a4
commit 02a3f21b65
6 changed files with 131 additions and 20 deletions

View File

@ -30,6 +30,7 @@ import org.apache.doris.common.UserException;
import org.apache.doris.common.io.Text;
import org.apache.doris.common.io.Writable;
import org.apache.doris.rewrite.ExprRewriter;
import org.apache.doris.rewrite.ExprRewriter.ClauseType;
import com.google.common.base.Joiner;
import com.google.common.base.Preconditions;
@ -567,7 +568,7 @@ public class TableRef implements ParseNode, Writable {
Preconditions.checkState(isAnalyzed);
if (onClause != null) {
Expr expr = onClause.clone();
onClause = rewriter.rewrite(onClause, analyzer, ExprRewriter.ClauseType.ON_CLAUSE);
onClause = rewriter.rewrite(onClause, analyzer, ClauseType.fromJoinType(joinOp));
if (joinOp.isOuterJoin() || joinOp.isSemiAntiJoin()) {
if (onClause instanceof BoolLiteral && !((BoolLiteral) onClause).getValue()) {
onClause = expr;

View File

@ -22,6 +22,7 @@ package org.apache.doris.rewrite;
import org.apache.doris.analysis.Analyzer;
import org.apache.doris.analysis.Expr;
import org.apache.doris.analysis.JoinOperator;
import org.apache.doris.common.AnalysisException;
import com.google.common.collect.Lists;
@ -53,9 +54,47 @@ public class ExprRewriter {
// The type of clause that executes the rule.
// This type is only used in InferFiltersRule, RewriteDateLiteralRule, other rules are not used
public enum ClauseType {
ON_CLAUSE,
INNER_JOIN_CLAUSE,
LEFT_OUTER_JOIN_CLAUSE,
RIGHT_OUTER_JOIN_CLAUSE,
FULL_OUTER_JOIN_CLAUSE,
LEFT_SEMI_JOIN_CLAUSE,
RIGHT_SEMI_JOIN_CLAUSE,
LEFT_ANTI_JOIN_CLAUSE,
RIGHT_ANTI_JOIN_CLAUSE,
CROSS_JOIN_CLAUSE,
WHERE_CLAUSE,
OTHER_CLAUSE, // All other clauses that are not on and not where
OTHER_CLAUSE; // All other clauses that are not on and not where
public static ClauseType fromJoinType(JoinOperator joinOp) {
switch (joinOp) {
case INNER_JOIN: return INNER_JOIN_CLAUSE;
case LEFT_OUTER_JOIN: return LEFT_OUTER_JOIN_CLAUSE;
case RIGHT_OUTER_JOIN: return RIGHT_OUTER_JOIN_CLAUSE;
case FULL_OUTER_JOIN: return FULL_OUTER_JOIN_CLAUSE;
case LEFT_SEMI_JOIN: return LEFT_SEMI_JOIN_CLAUSE;
case RIGHT_SEMI_JOIN: return RIGHT_SEMI_JOIN_CLAUSE;
case LEFT_ANTI_JOIN: return LEFT_ANTI_JOIN_CLAUSE;
case RIGHT_ANTI_JOIN: return RIGHT_ANTI_JOIN_CLAUSE;
case CROSS_JOIN: return CROSS_JOIN_CLAUSE;
default: return OTHER_CLAUSE;
}
}
public boolean isInferable() {
if (this == INNER_JOIN_CLAUSE
|| this == LEFT_SEMI_JOIN_CLAUSE
|| this == RIGHT_SEMI_JOIN_CLAUSE
|| this == WHERE_CLAUSE
|| this == OTHER_CLAUSE) {
return true;
}
return false;
}
public boolean isOnClause() {
return this.compareTo(WHERE_CLAUSE) < 0;
}
}
// Once-only Rules

View File

@ -71,18 +71,22 @@ public class InferFiltersRule implements ExprRewriteRule {
return expr;
}
if (!clauseType.isInferable()) {
return expr;
}
// slotEqSlotExpr: Record existing and infer equivalent connections
List<Expr> slotEqSlotExpr = analyzer.getOnSlotEqSlotExpr();
// slotEqSlotDeDuplication: De-Duplication for slotEqSlotExpr
Set<Pair<Expr, Expr>> slotEqSlotDeDuplication = (clauseType == ExprRewriter.ClauseType.ON_CLAUSE)
Set<Pair<Expr, Expr>> slotEqSlotDeDuplication = (clauseType.isOnClause())
? analyzer.getOnSlotEqSlotDeDuplication() : Sets.newHashSet();
// slotToLiteralExpr: Record existing and infer expr which slot and literal are equal
List<Expr> slotToLiteralExpr = analyzer.getOnSlotToLiteralExpr();
// slotToLiteralDeDuplication: De-Duplication for slotToLiteralExpr
Set<Pair<Expr, Expr>> slotToLiteralDeDuplication = (clauseType == ExprRewriter.ClauseType.ON_CLAUSE)
Set<Pair<Expr, Expr>> slotToLiteralDeDuplication = (clauseType.isOnClause())
? analyzer.getOnSlotToLiteralDeDuplication() : Sets.newHashSet();
// newExprWithState: just record infer expr which slot and literal are equal and which is not null predicate
@ -94,7 +98,7 @@ public class InferFiltersRule implements ExprRewriteRule {
List<Expr> isNullExpr = analyzer.getOnIsNullExpr();
// isNullDeDuplication: De-Duplication for isNullExpr
Set<Expr> isNullDeDuplication = (clauseType == ExprRewriter.ClauseType.ON_CLAUSE)
Set<Expr> isNullDeDuplication = (clauseType.isOnClause())
? analyzer.getOnIsNullDeDuplication() : Sets.newHashSet();
// inExpr: Record existing and infer in predicate
@ -102,7 +106,7 @@ public class InferFiltersRule implements ExprRewriteRule {
// inDeDuplication: De-Duplication for inExpr
Set<Expr> inDeDuplication =
(clauseType == ExprRewriter.ClauseType.ON_CLAUSE) ? analyzer.getInDeDuplication() : Sets.newHashSet();
(clauseType.isOnClause()) ? analyzer.getInDeDuplication() : Sets.newHashSet();
// exprToWarshallArraySubscript/warshallArraySubscriptToExpr:
// function is easy to build warshall and newExprWithState
@ -180,7 +184,7 @@ public class InferFiltersRule implements ExprRewriteRule {
if (!slotToLiteralDeDuplication.contains(pair)) {
slotToLiteralDeDuplication.add(pair);
slotToLiteralExpr.add(conjunct);
if (clauseType == ExprRewriter.ClauseType.ON_CLAUSE) {
if (clauseType.isOnClause()) {
analyzer.registerOnSlotToLiteralDeDuplication(pair);
analyzer.registerOnSlotToLiteralExpr(conjunct);
}
@ -197,7 +201,7 @@ public class InferFiltersRule implements ExprRewriteRule {
&& !slotEqSlotDeDuplication.contains(eqPair)) {
slotEqSlotDeDuplication.add(pair);
slotEqSlotExpr.add(conjunct);
if (clauseType == ExprRewriter.ClauseType.ON_CLAUSE) {
if (clauseType.isOnClause()) {
analyzer.registerOnSlotEqSlotDeDuplication(pair);
analyzer.registerOnSlotEqSlotExpr(conjunct);
}
@ -210,7 +214,7 @@ public class InferFiltersRule implements ExprRewriteRule {
&& ((IsNullPredicate) conjunct).isNotNull()) {
isNullDeDuplication.add(conjunct.getChild(0).unwrapSlotRef());
isNullExpr.add(conjunct);
if (clauseType == ExprRewriter.ClauseType.ON_CLAUSE) {
if (clauseType.isOnClause()) {
analyzer.registerOnIsNullDeDuplication(conjunct.getChild(0).unwrapSlotRef());
analyzer.registerOnIsNullExpr(conjunct);
}
@ -221,7 +225,7 @@ public class InferFiltersRule implements ExprRewriteRule {
if (!inDeDuplication.contains(conjunct.getChild(0).unwrapSlotRef())) {
inDeDuplication.add(conjunct.getChild(0).unwrapSlotRef());
inExpr.add(conjunct);
if (clauseType == ExprRewriter.ClauseType.ON_CLAUSE) {
if (clauseType.isOnClause()) {
analyzer.registerInExpr(conjunct);
analyzer.registerInDeDuplication(conjunct.getChild(0).unwrapSlotRef());
}
@ -359,7 +363,7 @@ public class InferFiltersRule implements ExprRewriteRule {
slotEqSlotExpr.add(new BinaryPredicate(BinaryPredicate.Operator.EQ,
warshallArraySubscriptToExpr.get(slotPair.first),
warshallArraySubscriptToExpr.get(slotPair.second)));
if (clauseType == ExprRewriter.ClauseType.ON_CLAUSE) {
if (clauseType.isOnClause()) {
analyzer.registerOnSlotEqSlotDeDuplication(pair);
analyzer.registerOnSlotEqSlotExpr(new BinaryPredicate(BinaryPredicate.Operator.EQ,
warshallArraySubscriptToExpr.get(slotPair.first),
@ -448,7 +452,7 @@ public class InferFiltersRule implements ExprRewriteRule {
*/
private boolean checkNeedInfer(JoinOperator joinOperator, boolean needChange, ExprRewriter.ClauseType clauseType) {
boolean ret = false;
if (clauseType == ExprRewriter.ClauseType.ON_CLAUSE) {
if (clauseType.isOnClause()) {
if (joinOperator.isInnerJoin()
|| (joinOperator == JoinOperator.LEFT_SEMI_JOIN)
|| (!needChange && joinOperator == JoinOperator.RIGHT_OUTER_JOIN)
@ -496,7 +500,7 @@ public class InferFiltersRule implements ExprRewriteRule {
slotToLiteralDeDuplication.add(pair);
Pair<Expr, Boolean> newBPWithBool = new Pair<>(newBP, needAddnewExprWithState);
newExprWithState.add(newBPWithBool);
if (clauseType == ExprRewriter.ClauseType.ON_CLAUSE) {
if (clauseType.isOnClause()) {
analyzer.registerOnSlotToLiteralDeDuplication(pair);
analyzer.registerOnSlotToLiteralExpr(newBP);
}
@ -575,7 +579,7 @@ public class InferFiltersRule implements ExprRewriteRule {
isNullDeDuplication.add(newExpr.getChild(0));
Pair<Expr, Boolean> newExprWithBoolean = new Pair<>(newExpr, true);
newExprWithState.add(newExprWithBoolean);
if (clauseType == ExprRewriter.ClauseType.ON_CLAUSE) {
if (clauseType.isOnClause()) {
analyzer.registerOnIsNullExpr(newExpr);
analyzer.registerOnIsNullDeDuplication(newExpr);
}
@ -660,7 +664,7 @@ public class InferFiltersRule implements ExprRewriteRule {
inDeDuplication.add(newIP);
Pair<Expr, Boolean> newBPWithBool = new Pair<>(newIP, needAddnewExprWithState);
newExprWithState.add(newBPWithBool);
if (clauseType == ExprRewriter.ClauseType.ON_CLAUSE) {
if (clauseType.isOnClause()) {
analyzer.registerInDeDuplication(newIP);
analyzer.registerInExpr(newIP);
}

View File

@ -140,15 +140,24 @@ public class InferFiltersRuleTest {
}
@Test
public void testOn2TablesLeftAntiJoinEqLiteralAt1st() throws Exception {
public void testOn2TablesLeftJoinNotInferable() throws Exception {
SessionVariable sessionVariable = dorisAssert.getSessionVariable();
sessionVariable.setEnableInferPredicate(true);
Assert.assertTrue(sessionVariable.isEnableInferPredicate());
String query = "select * from tb1 left anti join tb2 on tb1.k1 = tb2.k1 and tb1.k1 = 1";
String query = "select * from tb1 left join tb2 on tb1.k1 = tb2.k1 and tb2.k1 = 1";
String planString = dorisAssert.query(query).explainQuery();
Assert.assertTrue(planString.contains("`tb2`.`k1` = 1"));
Assert.assertFalse(planString.contains("`tb1`.`k1` = 1"));
}
/*
the following 3 test case is valid. But we cannot tell them from other incorrect inferences.
In origin design we made a mistake: we assume inference is symmetrical.
For example, t1.x=t2.x and t1.x=1 => t2.x=1
this is not always true.
if this is left join, t1 is left, t2.x=1 is not valid.
However, in inferFilterRule, we do not know whether t1.x is from left or right table.
And hence, we have to skip inference for outer/anti join for quick fix.
@Test
public void testOn3Tables1stInner2ndRightJoinEqLiteralAt2nd() throws Exception {
SessionVariable sessionVariable = dorisAssert.getSessionVariable();
@ -172,7 +181,16 @@ public class InferFiltersRuleTest {
Assert.assertTrue(planString.contains("`tb2`.`k1` = 1"));
Assert.assertTrue(planString.contains("`tb1`.`k1` = 1"));
}
@Test
public void testOn2TablesLeftAntiJoinEqLiteralAt1st() throws Exception {
SessionVariable sessionVariable = dorisAssert.getSessionVariable();
sessionVariable.setEnableInferPredicate(true);
Assert.assertTrue(sessionVariable.isEnableInferPredicate());
String query = "select * from tb1 left anti join tb2 on tb1.k1 = tb2.k1 and tb1.k1 = 1";
String planString = dorisAssert.query(query).explainQuery();
Assert.assertTrue(planString.contains("`tb2`.`k1` = 1"));
}
*/
@Test
public void testOnIsNotNullPredicate() throws Exception {
SessionVariable sessionVariable = dorisAssert.getSessionVariable();