planner: support join elimination rule (#8021)
This commit is contained in:
@ -393,6 +393,19 @@ Projection_5 8000.00 root test.ta.a
|
||||
└─TableReader_9 10000.00 root data:TableScan_8
|
||||
└─TableScan_8 10000.00 cop table:ta, range:[-inf,+inf], keep order:false, stats:pseudo
|
||||
rollback;
|
||||
drop table if exists t1, t2;
|
||||
create table t1(a int, b int, c int, primary key(a, b));
|
||||
create table t2(a int, b int, c int, primary key(a));
|
||||
explain select t1.a, t1.b from t1 left outer join t2 on t1.a = t2.a;
|
||||
id count task operator info
|
||||
TableReader_7 10000.00 root data:TableScan_6
|
||||
└─TableScan_6 10000.00 cop table:t1, range:[-inf,+inf], keep order:false, stats:pseudo
|
||||
explain select distinct t1.a, t1.b from t1 left outer join t2 on t1.a = t2.a;
|
||||
id count task operator info
|
||||
StreamAgg_19 8000.00 root group by:col_2, col_3, funcs:firstrow(col_0), firstrow(col_1)
|
||||
└─IndexReader_20 8000.00 root index:StreamAgg_10
|
||||
└─StreamAgg_10 8000.00 cop group by:test.t1.a, test.t1.b, funcs:firstrow(test.t1.a), firstrow(test.t1.b)
|
||||
└─IndexScan_18 10000.00 cop table:t1, index:a, b, range:[NULL,+inf], keep order:true, stats:pseudo
|
||||
drop table if exists t;
|
||||
create table t(a int, nb int not null, nc int not null);
|
||||
explain select ifnull(a, 0) from t;
|
||||
|
||||
@ -82,6 +82,13 @@ insert tb values ('1');
|
||||
explain select * from ta where a = 1;
|
||||
rollback;
|
||||
|
||||
# outer join elimination
|
||||
drop table if exists t1, t2;
|
||||
create table t1(a int, b int, c int, primary key(a, b));
|
||||
create table t2(a int, b int, c int, primary key(a));
|
||||
explain select t1.a, t1.b from t1 left outer join t2 on t1.a = t2.a;
|
||||
explain select distinct t1.a, t1.b from t1 left outer join t2 on t1.a = t2.a;
|
||||
|
||||
# https://github.com/pingcap/tidb/issues/7918
|
||||
drop table if exists t;
|
||||
create table t(a int, nb int not null, nc int not null);
|
||||
|
||||
@ -309,9 +309,13 @@ func (b *PlanBuilder) buildJoin(joinNode *ast.Join) (LogicalPlan, error) {
|
||||
// Set join type.
|
||||
switch joinNode.Tp {
|
||||
case ast.LeftJoin:
|
||||
// left outer join need to be checked elimination
|
||||
b.optFlag = b.optFlag | flagEliminateOuterJoin
|
||||
joinPlan.JoinType = LeftOuterJoin
|
||||
resetNotNullFlag(joinPlan.schema, leftPlan.Schema().Len(), joinPlan.schema.Len())
|
||||
case ast.RightJoin:
|
||||
// right outer join need to be checked elimination
|
||||
b.optFlag = b.optFlag | flagEliminateOuterJoin
|
||||
joinPlan.JoinType = RightOuterJoin
|
||||
resetNotNullFlag(joinPlan.schema, 0, leftPlan.Schema().Len())
|
||||
default:
|
||||
|
||||
@ -2026,3 +2026,67 @@ func (s *testPlanSuite) TestNameResolver(c *C) {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (s *testPlanSuite) TestOuterJoinEliminator(c *C) {
|
||||
defer testleak.AfterTest(c)()
|
||||
tests := []struct {
|
||||
sql string
|
||||
best string
|
||||
}{
|
||||
// Test left outer join + distinct
|
||||
{
|
||||
sql: "select distinct t1.a, t1.b from t t1 left outer join t t2 on t1.b = t2.b",
|
||||
best: "DataScan(t1)->Aggr(firstrow(t1.a),firstrow(t1.b))",
|
||||
},
|
||||
// Test right outer join + distinct
|
||||
{
|
||||
sql: "select distinct t2.a, t2.b from t t1 right outer join t t2 on t1.b = t2.b",
|
||||
best: "DataScan(t2)->Aggr(firstrow(t2.a),firstrow(t2.b))",
|
||||
},
|
||||
// Test duplicate agnostic agg functions on join
|
||||
{
|
||||
sql: "select max(t1.a), min(t1.b) from t t1 left join t t2 on t1.b = t2.b",
|
||||
best: "DataScan(t1)->Aggr(max(t1.a),min(t1.b))->Projection",
|
||||
},
|
||||
{
|
||||
sql: "select sum(distinct t1.a) from t t1 left join t t2 on t1.a = t2.a and t1.b = t2.b",
|
||||
best: "DataScan(t1)->Aggr(sum(t1.a))->Projection",
|
||||
},
|
||||
{
|
||||
sql: "select count(distinct t1.a, t1.b) from t t1 left join t t2 on t1.b = t2.b",
|
||||
best: "DataScan(t1)->Aggr(count(t1.a, t1.b))->Projection",
|
||||
},
|
||||
// Test left outer join
|
||||
{
|
||||
sql: "select t1.b from t t1 left outer join t t2 on t1.a = t2.a",
|
||||
best: "DataScan(t1)->Projection",
|
||||
},
|
||||
// Test right outer join
|
||||
{
|
||||
sql: "select t2.b from t t1 right outer join t t2 on t1.a = t2.a",
|
||||
best: "DataScan(t2)->Projection",
|
||||
},
|
||||
// For complex join query
|
||||
{
|
||||
sql: "select max(t3.b) from (t t1 left join t t2 on t1.a = t2.a) right join t t3 on t1.b = t3.b",
|
||||
best: "DataScan(t3)->TopN([t3.b true],0,1)->Aggr(max(t3.b))->Projection",
|
||||
},
|
||||
}
|
||||
|
||||
for i, tt := range tests {
|
||||
comment := Commentf("case:%v sql:%s", i, tt.sql)
|
||||
stmt, err := s.ParseOneStmt(tt.sql, "", "")
|
||||
c.Assert(err, IsNil, comment)
|
||||
Preprocess(s.ctx, stmt, s.is, false)
|
||||
builder := &PlanBuilder{
|
||||
ctx: mockContext(),
|
||||
is: s.is,
|
||||
colMapper: make(map[*ast.ColumnNameExpr]int),
|
||||
}
|
||||
p, err := builder.Build(stmt)
|
||||
c.Assert(err, IsNil)
|
||||
p, err = logicalOptimize(builder.optFlag, p.(LogicalPlan))
|
||||
c.Assert(err, IsNil)
|
||||
c.Assert(ToString(p), Equals, tt.best, comment)
|
||||
}
|
||||
}
|
||||
|
||||
@ -39,6 +39,7 @@ const (
|
||||
flagEliminateProjection
|
||||
flagMaxMinEliminate
|
||||
flagPredicatePushDown
|
||||
flagEliminateOuterJoin
|
||||
flagPartitionProcessor
|
||||
flagPushDownAgg
|
||||
flagPushDownTopN
|
||||
@ -52,6 +53,7 @@ var optRuleList = []logicalOptRule{
|
||||
&projectionEliminater{},
|
||||
&maxMinEliminator{},
|
||||
&ppdSolver{},
|
||||
&outerJoinEliminator{},
|
||||
&partitionProcessor{},
|
||||
&aggregationPushDownSolver{},
|
||||
&pushDownTopNOptimizer{},
|
||||
|
||||
192
planner/core/rule_join_elimination.go
Normal file
192
planner/core/rule_join_elimination.go
Normal file
@ -0,0 +1,192 @@
|
||||
// Copyright 2018 PingCAP, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package core
|
||||
|
||||
import (
|
||||
"github.com/pingcap/parser/ast"
|
||||
"github.com/pingcap/tidb/expression"
|
||||
)
|
||||
|
||||
type outerJoinEliminator struct {
|
||||
}
|
||||
|
||||
// tryToEliminateOuterJoin will eliminate outer join plan base on the following rules
|
||||
// 1. outer join elimination: For example left outer join, if the parent only use the
|
||||
// columns from left table and the join key of right table(the inner table) is a unique
|
||||
// key of the right table. the left outer join can be eliminated.
|
||||
// 2. outer join elimination with duplicate agnostic aggregate functions: For example left outer join.
|
||||
// If the parent only use the columns from left table with 'distinct' label. The left outer join can
|
||||
// be eliminated.
|
||||
func (o *outerJoinEliminator) tryToEliminateOuterJoin(p *LogicalJoin, aggCols []*expression.Column, parentSchema *expression.Schema) LogicalPlan {
|
||||
var innerChildIdx int
|
||||
switch p.JoinType {
|
||||
case LeftOuterJoin:
|
||||
innerChildIdx = 1
|
||||
case RightOuterJoin:
|
||||
innerChildIdx = 0
|
||||
default:
|
||||
return p
|
||||
}
|
||||
|
||||
outerPlan := p.children[1^innerChildIdx]
|
||||
innerPlan := p.children[innerChildIdx]
|
||||
// outer join elimination with duplicate agnostic aggregate functions
|
||||
if o.isAggColsAllFromOuterTable(outerPlan, aggCols) {
|
||||
return outerPlan
|
||||
}
|
||||
// outer join elimination without duplicate agnostic aggregate functions
|
||||
if !o.isParentColsAllFromOuterTable(outerPlan, parentSchema) {
|
||||
return p
|
||||
}
|
||||
innerJoinKeys := o.extractInnerJoinKeys(p, innerChildIdx)
|
||||
if o.isInnerJoinKeysContainUniqueKey(innerPlan, innerJoinKeys) {
|
||||
return outerPlan
|
||||
}
|
||||
if o.isInnerJoinKeysContainIndex(innerPlan, innerJoinKeys) {
|
||||
return outerPlan
|
||||
}
|
||||
return p
|
||||
}
|
||||
|
||||
// extract join keys as a schema for inner child of a outer join
|
||||
func (o *outerJoinEliminator) extractInnerJoinKeys(join *LogicalJoin, innerChildIdx int) *expression.Schema {
|
||||
var joinKeys []*expression.Column
|
||||
for _, eqCond := range join.EqualConditions {
|
||||
joinKeys = append(joinKeys, eqCond.GetArgs()[innerChildIdx].(*expression.Column))
|
||||
}
|
||||
return expression.NewSchema(joinKeys...)
|
||||
}
|
||||
|
||||
func (o *outerJoinEliminator) isAggColsAllFromOuterTable(outerPlan LogicalPlan, aggCols []*expression.Column) bool {
|
||||
if len(aggCols) == 0 {
|
||||
return false
|
||||
}
|
||||
for _, col := range aggCols {
|
||||
columnName := &ast.ColumnName{Schema: col.DBName, Table: col.TblName, Name: col.ColName}
|
||||
if c, _ := outerPlan.Schema().FindColumn(columnName); c == nil {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
// check whether schema cols of join's parent plan are all from outer join table
|
||||
func (o *outerJoinEliminator) isParentColsAllFromOuterTable(outerPlan LogicalPlan, parentSchema *expression.Schema) bool {
|
||||
if parentSchema == nil {
|
||||
return false
|
||||
}
|
||||
for _, col := range parentSchema.Columns {
|
||||
columnName := &ast.ColumnName{Schema: col.DBName, Table: col.TblName, Name: col.ColName}
|
||||
if c, _ := outerPlan.Schema().FindColumn(columnName); c == nil {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
// check whether one of unique keys sets is contained by inner join keys
|
||||
func (o *outerJoinEliminator) isInnerJoinKeysContainUniqueKey(innerPlan LogicalPlan, joinKeys *expression.Schema) bool {
|
||||
for _, keyInfo := range innerPlan.Schema().Keys {
|
||||
joinKeysContainKeyInfo := true
|
||||
for _, col := range keyInfo {
|
||||
columnName := &ast.ColumnName{Schema: col.DBName, Table: col.TblName, Name: col.ColName}
|
||||
if c, _ := joinKeys.FindColumn(columnName); c == nil {
|
||||
joinKeysContainKeyInfo = false
|
||||
break
|
||||
}
|
||||
}
|
||||
if joinKeysContainKeyInfo {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// check whether one of index sets is contained by inner join index
|
||||
func (o *outerJoinEliminator) isInnerJoinKeysContainIndex(innerPlan LogicalPlan, joinKeys *expression.Schema) bool {
|
||||
ds, ok := innerPlan.(*DataSource)
|
||||
if !ok {
|
||||
return false
|
||||
}
|
||||
for _, path := range ds.possibleAccessPaths {
|
||||
if path.isTablePath {
|
||||
continue
|
||||
}
|
||||
idx := path.index
|
||||
if !idx.Unique {
|
||||
continue
|
||||
}
|
||||
joinKeysContainIndex := true
|
||||
for _, idxCol := range idx.Columns {
|
||||
columnName := &ast.ColumnName{Schema: ds.DBName, Table: ds.tableInfo.Name, Name: idxCol.Name}
|
||||
if c, _ := joinKeys.FindColumn(columnName); c == nil {
|
||||
joinKeysContainIndex = false
|
||||
break
|
||||
}
|
||||
}
|
||||
if joinKeysContainIndex {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// Check whether a LogicalPlan is a LogicalAggregation and its all aggregate functions is duplicate agnostic.
|
||||
// Also, check all the args are expression.Column.
|
||||
func (o *outerJoinEliminator) isDuplicateAgnosticAgg(p LogicalPlan) (_ bool, cols []*expression.Column) {
|
||||
agg, ok := p.(*LogicalAggregation)
|
||||
if !ok {
|
||||
return false, nil
|
||||
}
|
||||
cols = agg.groupByCols
|
||||
for _, aggDesc := range agg.AggFuncs {
|
||||
if !aggDesc.HasDistinct &&
|
||||
aggDesc.Name != ast.AggFuncFirstRow &&
|
||||
aggDesc.Name != ast.AggFuncMax &&
|
||||
aggDesc.Name != ast.AggFuncMin {
|
||||
return false, nil
|
||||
}
|
||||
for _, expr := range aggDesc.Args {
|
||||
if col, ok := expr.(*expression.Column); ok {
|
||||
cols = append(cols, col)
|
||||
} else {
|
||||
return false, nil
|
||||
}
|
||||
}
|
||||
}
|
||||
return true, cols
|
||||
}
|
||||
|
||||
func (o *outerJoinEliminator) doOptimize(p LogicalPlan, aggCols []*expression.Column, parentSchema *expression.Schema) LogicalPlan {
|
||||
// check the duplicate agnostic aggregate functions
|
||||
if ok, newCols := o.isDuplicateAgnosticAgg(p); ok {
|
||||
aggCols = newCols
|
||||
}
|
||||
|
||||
newChildren := make([]LogicalPlan, 0, len(p.Children()))
|
||||
for _, child := range p.Children() {
|
||||
newChild := o.doOptimize(child, aggCols, p.Schema())
|
||||
newChildren = append(newChildren, newChild)
|
||||
}
|
||||
p.SetChildren(newChildren...)
|
||||
join, isJoin := p.(*LogicalJoin)
|
||||
if !isJoin {
|
||||
return p
|
||||
}
|
||||
return o.tryToEliminateOuterJoin(join, aggCols, parentSchema)
|
||||
}
|
||||
|
||||
func (o *outerJoinEliminator) optimize(p LogicalPlan) (LogicalPlan, error) {
|
||||
return o.doOptimize(p, nil, nil), nil
|
||||
}
|
||||
Reference in New Issue
Block a user