1674 lines
64 KiB
Go
1674 lines
64 KiB
Go
// Copyright 2024 PingCAP, Inc.
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
package core
|
|
|
|
import (
|
|
"bytes"
|
|
"fmt"
|
|
"math"
|
|
|
|
"github.com/pingcap/failpoint"
|
|
"github.com/pingcap/tidb/pkg/expression"
|
|
"github.com/pingcap/tidb/pkg/kv"
|
|
"github.com/pingcap/tidb/pkg/parser/ast"
|
|
"github.com/pingcap/tidb/pkg/parser/mysql"
|
|
"github.com/pingcap/tidb/pkg/planner/cardinality"
|
|
"github.com/pingcap/tidb/pkg/planner/core/base"
|
|
"github.com/pingcap/tidb/pkg/planner/core/cost"
|
|
"github.com/pingcap/tidb/pkg/planner/core/operator/logicalop"
|
|
"github.com/pingcap/tidb/pkg/planner/funcdep"
|
|
"github.com/pingcap/tidb/pkg/planner/property"
|
|
"github.com/pingcap/tidb/pkg/planner/util"
|
|
"github.com/pingcap/tidb/pkg/planner/util/optimizetrace"
|
|
"github.com/pingcap/tidb/pkg/planner/util/utilfuncp"
|
|
"github.com/pingcap/tidb/pkg/types"
|
|
utilhint "github.com/pingcap/tidb/pkg/util/hint"
|
|
"github.com/pingcap/tidb/pkg/util/intset"
|
|
"github.com/pingcap/tidb/pkg/util/plancodec"
|
|
)
|
|
|
|
// JoinType contains CrossJoin, InnerJoin, LeftOuterJoin, RightOuterJoin, SemiJoin, AntiJoin.
|
|
type JoinType int
|
|
|
|
const (
|
|
// InnerJoin means inner join.
|
|
InnerJoin JoinType = iota
|
|
// LeftOuterJoin means left join.
|
|
LeftOuterJoin
|
|
// RightOuterJoin means right join.
|
|
RightOuterJoin
|
|
// SemiJoin means if row a in table A matches some rows in B, just output a.
|
|
SemiJoin
|
|
// AntiSemiJoin means if row a in table A does not match any row in B, then output a.
|
|
AntiSemiJoin
|
|
// LeftOuterSemiJoin means if row a in table A matches some rows in B, output (a, true), otherwise, output (a, false).
|
|
LeftOuterSemiJoin
|
|
// AntiLeftOuterSemiJoin means if row a in table A matches some rows in B, output (a, false), otherwise, output (a, true).
|
|
AntiLeftOuterSemiJoin
|
|
)
|
|
|
|
// IsOuterJoin returns if this joiner is an outer joiner
|
|
func (tp JoinType) IsOuterJoin() bool {
|
|
return tp == LeftOuterJoin || tp == RightOuterJoin ||
|
|
tp == LeftOuterSemiJoin || tp == AntiLeftOuterSemiJoin
|
|
}
|
|
|
|
// IsSemiJoin returns if this joiner is a semi/anti-semi joiner
|
|
func (tp JoinType) IsSemiJoin() bool {
|
|
return tp == SemiJoin || tp == AntiSemiJoin ||
|
|
tp == LeftOuterSemiJoin || tp == AntiLeftOuterSemiJoin
|
|
}
|
|
|
|
func (tp JoinType) String() string {
|
|
switch tp {
|
|
case InnerJoin:
|
|
return "inner join"
|
|
case LeftOuterJoin:
|
|
return "left outer join"
|
|
case RightOuterJoin:
|
|
return "right outer join"
|
|
case SemiJoin:
|
|
return "semi join"
|
|
case AntiSemiJoin:
|
|
return "anti semi join"
|
|
case LeftOuterSemiJoin:
|
|
return "left outer semi join"
|
|
case AntiLeftOuterSemiJoin:
|
|
return "anti left outer semi join"
|
|
}
|
|
return "unsupported join type"
|
|
}
|
|
|
|
// LogicalJoin is the logical join plan.
|
|
type LogicalJoin struct {
|
|
logicalop.LogicalSchemaProducer
|
|
|
|
JoinType JoinType
|
|
Reordered bool
|
|
CartesianJoin bool
|
|
StraightJoin bool
|
|
|
|
// HintInfo stores the join algorithm hint information specified by client.
|
|
HintInfo *utilhint.PlanHints
|
|
PreferJoinType uint
|
|
PreferJoinOrder bool
|
|
LeftPreferJoinType uint
|
|
RightPreferJoinType uint
|
|
|
|
EqualConditions []*expression.ScalarFunction
|
|
// NAEQConditions means null aware equal conditions, which is used for null aware semi joins.
|
|
NAEQConditions []*expression.ScalarFunction
|
|
LeftConditions expression.CNFExprs
|
|
RightConditions expression.CNFExprs
|
|
OtherConditions expression.CNFExprs
|
|
|
|
LeftProperties [][]*expression.Column
|
|
RightProperties [][]*expression.Column
|
|
|
|
// DefaultValues is only used for left/right outer join, which is values the inner row's should be when the outer table
|
|
// doesn't match any inner table's row.
|
|
// That it's nil just means the default values is a slice of NULL.
|
|
// Currently, only `aggregation push down` phase will set this.
|
|
DefaultValues []types.Datum
|
|
|
|
// FullSchema contains all the columns that the Join can output. It's ordered as [outer schema..., inner schema...].
|
|
// This is useful for natural joins and "using" joins. In these cases, the join key columns from the
|
|
// inner side (or the right side when it's an inner join) will not be in the schema of Join.
|
|
// But upper operators should be able to find those "redundant" columns, and the user also can specifically select
|
|
// those columns, so we put the "redundant" columns here to make them be able to be found.
|
|
//
|
|
// For example:
|
|
// create table t1(a int, b int); create table t2(a int, b int);
|
|
// select * from t1 join t2 using (b);
|
|
// schema of the Join will be [t1.b, t1.a, t2.a]; FullSchema will be [t1.a, t1.b, t2.a, t2.b].
|
|
//
|
|
// We record all columns and keep them ordered is for correctly handling SQLs like
|
|
// select t1.*, t2.* from t1 join t2 using (b);
|
|
// (*PlanBuilder).unfoldWildStar() handles the schema for such case.
|
|
FullSchema *expression.Schema
|
|
FullNames types.NameSlice
|
|
|
|
// EqualCondOutCnt indicates the estimated count of joined rows after evaluating `EqualConditions`.
|
|
EqualCondOutCnt float64
|
|
}
|
|
|
|
// Init initializes LogicalJoin.
|
|
func (p LogicalJoin) Init(ctx base.PlanContext, offset int) *LogicalJoin {
|
|
p.BaseLogicalPlan = logicalop.NewBaseLogicalPlan(ctx, plancodec.TypeJoin, &p, offset)
|
|
return &p
|
|
}
|
|
|
|
// *************************** start implementation of Plan interface ***************************
|
|
|
|
// ExplainInfo implements Plan interface.
|
|
func (p *LogicalJoin) ExplainInfo() string {
|
|
evalCtx := p.SCtx().GetExprCtx().GetEvalCtx()
|
|
buffer := bytes.NewBufferString(p.JoinType.String())
|
|
if len(p.EqualConditions) > 0 {
|
|
fmt.Fprintf(buffer, ", equal:%v", p.EqualConditions)
|
|
}
|
|
if len(p.LeftConditions) > 0 {
|
|
fmt.Fprintf(buffer, ", left cond:%s",
|
|
expression.SortedExplainExpressionList(evalCtx, p.LeftConditions))
|
|
}
|
|
if len(p.RightConditions) > 0 {
|
|
fmt.Fprintf(buffer, ", right cond:%s",
|
|
expression.SortedExplainExpressionList(evalCtx, p.RightConditions))
|
|
}
|
|
if len(p.OtherConditions) > 0 {
|
|
fmt.Fprintf(buffer, ", other cond:%s",
|
|
expression.SortedExplainExpressionList(evalCtx, p.OtherConditions))
|
|
}
|
|
return buffer.String()
|
|
}
|
|
|
|
// ReplaceExprColumns implements base.LogicalPlan interface.
|
|
func (p *LogicalJoin) ReplaceExprColumns(replace map[string]*expression.Column) {
|
|
for _, equalExpr := range p.EqualConditions {
|
|
ResolveExprAndReplace(equalExpr, replace)
|
|
}
|
|
for _, leftExpr := range p.LeftConditions {
|
|
ResolveExprAndReplace(leftExpr, replace)
|
|
}
|
|
for _, rightExpr := range p.RightConditions {
|
|
ResolveExprAndReplace(rightExpr, replace)
|
|
}
|
|
for _, otherExpr := range p.OtherConditions {
|
|
ResolveExprAndReplace(otherExpr, replace)
|
|
}
|
|
}
|
|
|
|
// *************************** end implementation of Plan interface ***************************
|
|
|
|
// *************************** start implementation of logicalPlan interface ***************************
|
|
|
|
// HashCode inherits the BaseLogicalPlan.LogicalPlan.<0th> implementation.
|
|
|
|
// PredicatePushDown implements the base.LogicalPlan.<1st> interface.
|
|
func (p *LogicalJoin) PredicatePushDown(predicates []expression.Expression, opt *optimizetrace.LogicalOptimizeOp) (ret []expression.Expression, retPlan base.LogicalPlan) {
|
|
var equalCond []*expression.ScalarFunction
|
|
var leftPushCond, rightPushCond, otherCond, leftCond, rightCond []expression.Expression
|
|
switch p.JoinType {
|
|
case LeftOuterJoin, LeftOuterSemiJoin, AntiLeftOuterSemiJoin:
|
|
predicates = p.outerJoinPropConst(predicates)
|
|
dual := Conds2TableDual(p, predicates)
|
|
if dual != nil {
|
|
appendTableDualTraceStep(p, dual, predicates, opt)
|
|
return ret, dual
|
|
}
|
|
// Handle where conditions
|
|
predicates = expression.ExtractFiltersFromDNFs(p.SCtx().GetExprCtx(), predicates)
|
|
// Only derive left where condition, because right where condition cannot be pushed down
|
|
equalCond, leftPushCond, rightPushCond, otherCond = p.extractOnCondition(predicates, true, false)
|
|
leftCond = leftPushCond
|
|
// Handle join conditions, only derive right join condition, because left join condition cannot be pushed down
|
|
_, derivedRightJoinCond := DeriveOtherConditions(
|
|
p, p.Children()[0].Schema(), p.Children()[1].Schema(), false, true)
|
|
rightCond = append(p.RightConditions, derivedRightJoinCond...)
|
|
p.RightConditions = nil
|
|
ret = append(expression.ScalarFuncs2Exprs(equalCond), otherCond...)
|
|
ret = append(ret, rightPushCond...)
|
|
case RightOuterJoin:
|
|
predicates = p.outerJoinPropConst(predicates)
|
|
dual := Conds2TableDual(p, predicates)
|
|
if dual != nil {
|
|
appendTableDualTraceStep(p, dual, predicates, opt)
|
|
return ret, dual
|
|
}
|
|
// Handle where conditions
|
|
predicates = expression.ExtractFiltersFromDNFs(p.SCtx().GetExprCtx(), predicates)
|
|
// Only derive right where condition, because left where condition cannot be pushed down
|
|
equalCond, leftPushCond, rightPushCond, otherCond = p.extractOnCondition(predicates, false, true)
|
|
rightCond = rightPushCond
|
|
// Handle join conditions, only derive left join condition, because right join condition cannot be pushed down
|
|
derivedLeftJoinCond, _ := DeriveOtherConditions(
|
|
p, p.Children()[0].Schema(), p.Children()[1].Schema(), true, false)
|
|
leftCond = append(p.LeftConditions, derivedLeftJoinCond...)
|
|
p.LeftConditions = nil
|
|
ret = append(expression.ScalarFuncs2Exprs(equalCond), otherCond...)
|
|
ret = append(ret, leftPushCond...)
|
|
case SemiJoin, InnerJoin:
|
|
tempCond := make([]expression.Expression, 0, len(p.LeftConditions)+len(p.RightConditions)+len(p.EqualConditions)+len(p.OtherConditions)+len(predicates))
|
|
tempCond = append(tempCond, p.LeftConditions...)
|
|
tempCond = append(tempCond, p.RightConditions...)
|
|
tempCond = append(tempCond, expression.ScalarFuncs2Exprs(p.EqualConditions)...)
|
|
tempCond = append(tempCond, p.OtherConditions...)
|
|
tempCond = append(tempCond, predicates...)
|
|
tempCond = expression.ExtractFiltersFromDNFs(p.SCtx().GetExprCtx(), tempCond)
|
|
tempCond = expression.PropagateConstant(p.SCtx().GetExprCtx(), tempCond)
|
|
// Return table dual when filter is constant false or null.
|
|
dual := Conds2TableDual(p, tempCond)
|
|
if dual != nil {
|
|
appendTableDualTraceStep(p, dual, tempCond, opt)
|
|
return ret, dual
|
|
}
|
|
equalCond, leftPushCond, rightPushCond, otherCond = p.extractOnCondition(tempCond, true, true)
|
|
p.LeftConditions = nil
|
|
p.RightConditions = nil
|
|
p.EqualConditions = equalCond
|
|
p.OtherConditions = otherCond
|
|
leftCond = leftPushCond
|
|
rightCond = rightPushCond
|
|
case AntiSemiJoin:
|
|
predicates = expression.PropagateConstant(p.SCtx().GetExprCtx(), predicates)
|
|
// Return table dual when filter is constant false or null.
|
|
dual := Conds2TableDual(p, predicates)
|
|
if dual != nil {
|
|
appendTableDualTraceStep(p, dual, predicates, opt)
|
|
return ret, dual
|
|
}
|
|
// `predicates` should only contain left conditions or constant filters.
|
|
_, leftPushCond, rightPushCond, _ = p.extractOnCondition(predicates, true, true)
|
|
// Do not derive `is not null` for anti join, since it may cause wrong results.
|
|
// For example:
|
|
// `select * from t t1 where t1.a not in (select b from t t2)` does not imply `t2.b is not null`,
|
|
// `select * from t t1 where t1.a not in (select a from t t2 where t1.b = t2.b` does not imply `t1.b is not null`,
|
|
// `select * from t t1 where not exists (select * from t t2 where t2.a = t1.a)` does not imply `t1.a is not null`,
|
|
leftCond = leftPushCond
|
|
rightCond = append(p.RightConditions, rightPushCond...)
|
|
p.RightConditions = nil
|
|
}
|
|
leftCond = expression.RemoveDupExprs(leftCond)
|
|
rightCond = expression.RemoveDupExprs(rightCond)
|
|
leftRet, lCh := p.Children()[0].PredicatePushDown(leftCond, opt)
|
|
rightRet, rCh := p.Children()[1].PredicatePushDown(rightCond, opt)
|
|
utilfuncp.AddSelection(p, lCh, leftRet, 0, opt)
|
|
utilfuncp.AddSelection(p, rCh, rightRet, 1, opt)
|
|
p.updateEQCond()
|
|
buildKeyInfo(p)
|
|
return ret, p.Self()
|
|
}
|
|
|
|
// PruneColumns implements the base.LogicalPlan.<2nd> interface.
|
|
func (p *LogicalJoin) PruneColumns(parentUsedCols []*expression.Column, opt *optimizetrace.LogicalOptimizeOp) (base.LogicalPlan, error) {
|
|
leftCols, rightCols := p.extractUsedCols(parentUsedCols)
|
|
|
|
var err error
|
|
p.Children()[0], err = p.Children()[0].PruneColumns(leftCols, opt)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
addConstOneForEmptyProjection(p.Children()[0])
|
|
|
|
p.Children()[1], err = p.Children()[1].PruneColumns(rightCols, opt)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
addConstOneForEmptyProjection(p.Children()[1])
|
|
|
|
p.mergeSchema()
|
|
if p.JoinType == LeftOuterSemiJoin || p.JoinType == AntiLeftOuterSemiJoin {
|
|
joinCol := p.Schema().Columns[len(p.Schema().Columns)-1]
|
|
parentUsedCols = append(parentUsedCols, joinCol)
|
|
}
|
|
p.InlineProjection(parentUsedCols, opt)
|
|
return p, nil
|
|
}
|
|
|
|
// FindBestTask inherits the BaseLogicalPlan.LogicalPlan.<3rd> implementation.
|
|
|
|
// BuildKeyInfo implements the base.LogicalPlan.<4th> interface.
|
|
func (p *LogicalJoin) BuildKeyInfo(selfSchema *expression.Schema, childSchema []*expression.Schema) {
|
|
p.LogicalSchemaProducer.BuildKeyInfo(selfSchema, childSchema)
|
|
switch p.JoinType {
|
|
case SemiJoin, LeftOuterSemiJoin, AntiSemiJoin, AntiLeftOuterSemiJoin:
|
|
selfSchema.Keys = childSchema[0].Clone().Keys
|
|
case InnerJoin, LeftOuterJoin, RightOuterJoin:
|
|
// If there is no equal conditions, then cartesian product can't be prevented and unique key information will destroy.
|
|
if len(p.EqualConditions) == 0 {
|
|
return
|
|
}
|
|
lOk := false
|
|
rOk := false
|
|
// Such as 'select * from t1 join t2 where t1.a = t2.a and t1.b = t2.b'.
|
|
// If one sides (a, b) is a unique key, then the unique key information is remained.
|
|
// But we don't consider this situation currently.
|
|
// Only key made by one column is considered now.
|
|
evalCtx := p.SCtx().GetExprCtx().GetEvalCtx()
|
|
for _, expr := range p.EqualConditions {
|
|
ln := expr.GetArgs()[0].(*expression.Column)
|
|
rn := expr.GetArgs()[1].(*expression.Column)
|
|
for _, key := range childSchema[0].Keys {
|
|
if len(key) == 1 && key[0].Equal(evalCtx, ln) {
|
|
lOk = true
|
|
break
|
|
}
|
|
}
|
|
for _, key := range childSchema[1].Keys {
|
|
if len(key) == 1 && key[0].Equal(evalCtx, rn) {
|
|
rOk = true
|
|
break
|
|
}
|
|
}
|
|
}
|
|
// For inner join, if one side of one equal condition is unique key,
|
|
// another side's unique key information will all be reserved.
|
|
// If it's an outer join, NULL value will fill some position, which will destroy the unique key information.
|
|
if lOk && p.JoinType != LeftOuterJoin {
|
|
selfSchema.Keys = append(selfSchema.Keys, childSchema[1].Keys...)
|
|
}
|
|
if rOk && p.JoinType != RightOuterJoin {
|
|
selfSchema.Keys = append(selfSchema.Keys, childSchema[0].Keys...)
|
|
}
|
|
}
|
|
}
|
|
|
|
// PushDownTopN implements the base.LogicalPlan.<5th> interface.
|
|
func (p *LogicalJoin) PushDownTopN(topNLogicalPlan base.LogicalPlan, opt *optimizetrace.LogicalOptimizeOp) base.LogicalPlan {
|
|
var topN *LogicalTopN
|
|
if topNLogicalPlan != nil {
|
|
topN = topNLogicalPlan.(*LogicalTopN)
|
|
}
|
|
switch p.JoinType {
|
|
case LeftOuterJoin, LeftOuterSemiJoin, AntiLeftOuterSemiJoin:
|
|
p.Children()[0] = p.pushDownTopNToChild(topN, 0, opt)
|
|
p.Children()[1] = p.Children()[1].PushDownTopN(nil, opt)
|
|
case RightOuterJoin:
|
|
p.Children()[1] = p.pushDownTopNToChild(topN, 1, opt)
|
|
p.Children()[0] = p.Children()[0].PushDownTopN(nil, opt)
|
|
default:
|
|
return p.BaseLogicalPlan.PushDownTopN(topN, opt)
|
|
}
|
|
|
|
// The LogicalJoin may be also a LogicalApply. So we must use self to set parents.
|
|
if topN != nil {
|
|
return topN.AttachChild(p.Self(), opt)
|
|
}
|
|
return p.Self()
|
|
}
|
|
|
|
// DeriveTopN inherits the BaseLogicalPlan.LogicalPlan.<6th> implementation.
|
|
|
|
// PredicateSimplification inherits the BaseLogicalPlan.LogicalPlan.<7th> implementation.
|
|
|
|
// ConstantPropagation implements the base.LogicalPlan.<8th> interface.
|
|
// about the logic of constant propagation in From List.
|
|
// Query: select * from t, (select a, b from s where s.a>1) tmp where tmp.a=t.a
|
|
// Origin logical plan:
|
|
/*
|
|
+----------------+
|
|
| LogicalJoin |
|
|
+-------^--------+
|
|
|
|
|
+-------------+--------------+
|
|
| |
|
|
+-----+------+ +------+------+
|
|
| Projection | | TableScan |
|
|
+-----^------+ +-------------+
|
|
|
|
|
|
|
|
+-----+------+
|
|
| Selection |
|
|
| s.a>1 |
|
|
+------------+
|
|
*/
|
|
// 1. 'PullUpConstantPredicates': Call this function until find selection and pull up the constant predicate layer by layer
|
|
// LogicalSelection: find the s.a>1
|
|
// LogicalProjection: get the s.a>1 and pull up it, changed to tmp.a>1
|
|
// 2. 'addCandidateSelection': Add selection above of LogicalJoin,
|
|
// put all predicates pulled up from the lower layer into the current new selection.
|
|
// LogicalSelection: tmp.a >1
|
|
//
|
|
// Optimized plan:
|
|
/*
|
|
+----------------+
|
|
| Selection |
|
|
| tmp.a>1 |
|
|
+-------^--------+
|
|
|
|
|
+-------+--------+
|
|
| LogicalJoin |
|
|
+-------^--------+
|
|
|
|
|
+-------------+--------------+
|
|
| |
|
|
+-----+------+ +------+------+
|
|
| Projection | | TableScan |
|
|
+-----^------+ +-------------+
|
|
|
|
|
|
|
|
+-----+------+
|
|
| Selection |
|
|
| s.a>1 |
|
|
+------------+
|
|
*/
|
|
// Return nil if the root of plan has not been changed
|
|
// Return new root if the root of plan is changed to selection
|
|
func (p *LogicalJoin) ConstantPropagation(parentPlan base.LogicalPlan, currentChildIdx int, opt *optimizetrace.LogicalOptimizeOp) (newRoot base.LogicalPlan) {
|
|
// step1: get constant predicate from left or right according to the JoinType
|
|
var getConstantPredicateFromLeft bool
|
|
var getConstantPredicateFromRight bool
|
|
switch p.JoinType {
|
|
case LeftOuterJoin:
|
|
getConstantPredicateFromLeft = true
|
|
case RightOuterJoin:
|
|
getConstantPredicateFromRight = true
|
|
case InnerJoin:
|
|
getConstantPredicateFromLeft = true
|
|
getConstantPredicateFromRight = true
|
|
default:
|
|
return
|
|
}
|
|
var candidateConstantPredicates []expression.Expression
|
|
if getConstantPredicateFromLeft {
|
|
candidateConstantPredicates = p.Children()[0].PullUpConstantPredicates()
|
|
}
|
|
if getConstantPredicateFromRight {
|
|
candidateConstantPredicates = append(candidateConstantPredicates, p.Children()[1].PullUpConstantPredicates()...)
|
|
}
|
|
if len(candidateConstantPredicates) == 0 {
|
|
return
|
|
}
|
|
|
|
// step2: add selection above of LogicalJoin
|
|
return addCandidateSelection(p, currentChildIdx, parentPlan, candidateConstantPredicates, opt)
|
|
}
|
|
|
|
// PullUpConstantPredicates inherits the BaseLogicalPlan.LogicalPlan.<9th> implementation.
|
|
|
|
// RecursiveDeriveStats inherits the BaseLogicalPlan.LogicalPlan.<10th> implementation.
|
|
|
|
// DeriveStats implements the base.LogicalPlan.<11th> interface.
|
|
// If the type of join is SemiJoin, the selectivity of it will be same as selection's.
|
|
// If the type of join is LeftOuterSemiJoin, it will not add or remove any row. The last column is a boolean value, whose NDV should be two.
|
|
// If the type of join is inner/outer join, the output of join(s, t) should be N(s) * N(t) / (V(s.key) * V(t.key)) * Min(s.key, t.key).
|
|
// N(s) stands for the number of rows in relation s. V(s.key) means the NDV of join key in s.
|
|
// This is a quite simple strategy: We assume every bucket of relation which will participate join has the same number of rows, and apply cross join for
|
|
// every matched bucket.
|
|
func (p *LogicalJoin) DeriveStats(childStats []*property.StatsInfo, selfSchema *expression.Schema, childSchema []*expression.Schema, colGroups [][]*expression.Column) (*property.StatsInfo, error) {
|
|
if p.StatsInfo() != nil {
|
|
// Reload GroupNDVs since colGroups may have changed.
|
|
p.StatsInfo().GroupNDVs = p.getGroupNDVs(colGroups, childStats)
|
|
return p.StatsInfo(), nil
|
|
}
|
|
leftProfile, rightProfile := childStats[0], childStats[1]
|
|
leftJoinKeys, rightJoinKeys, _, _ := p.GetJoinKeys()
|
|
p.EqualCondOutCnt = cardinality.EstimateFullJoinRowCount(p.SCtx(),
|
|
0 == len(p.EqualConditions),
|
|
leftProfile, rightProfile,
|
|
leftJoinKeys, rightJoinKeys,
|
|
childSchema[0], childSchema[1],
|
|
nil, nil)
|
|
if p.JoinType == SemiJoin || p.JoinType == AntiSemiJoin {
|
|
p.SetStats(&property.StatsInfo{
|
|
RowCount: leftProfile.RowCount * cost.SelectionFactor,
|
|
ColNDVs: make(map[int64]float64, len(leftProfile.ColNDVs)),
|
|
})
|
|
for id, c := range leftProfile.ColNDVs {
|
|
p.StatsInfo().ColNDVs[id] = c * cost.SelectionFactor
|
|
}
|
|
return p.StatsInfo(), nil
|
|
}
|
|
if p.JoinType == LeftOuterSemiJoin || p.JoinType == AntiLeftOuterSemiJoin {
|
|
p.SetStats(&property.StatsInfo{
|
|
RowCount: leftProfile.RowCount,
|
|
ColNDVs: make(map[int64]float64, selfSchema.Len()),
|
|
})
|
|
for id, c := range leftProfile.ColNDVs {
|
|
p.StatsInfo().ColNDVs[id] = c
|
|
}
|
|
p.StatsInfo().ColNDVs[selfSchema.Columns[selfSchema.Len()-1].UniqueID] = 2.0
|
|
p.StatsInfo().GroupNDVs = p.getGroupNDVs(colGroups, childStats)
|
|
return p.StatsInfo(), nil
|
|
}
|
|
count := p.EqualCondOutCnt
|
|
if p.JoinType == LeftOuterJoin {
|
|
count = math.Max(count, leftProfile.RowCount)
|
|
} else if p.JoinType == RightOuterJoin {
|
|
count = math.Max(count, rightProfile.RowCount)
|
|
}
|
|
colNDVs := make(map[int64]float64, selfSchema.Len())
|
|
for id, c := range leftProfile.ColNDVs {
|
|
colNDVs[id] = math.Min(c, count)
|
|
}
|
|
for id, c := range rightProfile.ColNDVs {
|
|
colNDVs[id] = math.Min(c, count)
|
|
}
|
|
p.SetStats(&property.StatsInfo{
|
|
RowCount: count,
|
|
ColNDVs: colNDVs,
|
|
})
|
|
p.StatsInfo().GroupNDVs = p.getGroupNDVs(colGroups, childStats)
|
|
return p.StatsInfo(), nil
|
|
}
|
|
|
|
// ExtractColGroups implements the base.LogicalPlan.<12th> interface.
|
|
func (p *LogicalJoin) ExtractColGroups(colGroups [][]*expression.Column) [][]*expression.Column {
|
|
leftJoinKeys, rightJoinKeys, _, _ := p.GetJoinKeys()
|
|
extracted := make([][]*expression.Column, 0, 2+len(colGroups))
|
|
if len(leftJoinKeys) > 1 && (p.JoinType == InnerJoin || p.JoinType == LeftOuterJoin || p.JoinType == RightOuterJoin) {
|
|
extracted = append(extracted, expression.SortColumns(leftJoinKeys), expression.SortColumns(rightJoinKeys))
|
|
}
|
|
var outerSchema *expression.Schema
|
|
if p.JoinType == LeftOuterJoin || p.JoinType == LeftOuterSemiJoin || p.JoinType == AntiLeftOuterSemiJoin {
|
|
outerSchema = p.Children()[0].Schema()
|
|
} else if p.JoinType == RightOuterJoin {
|
|
outerSchema = p.Children()[1].Schema()
|
|
}
|
|
if len(colGroups) == 0 || outerSchema == nil {
|
|
return extracted
|
|
}
|
|
_, offsets := outerSchema.ExtractColGroups(colGroups)
|
|
if len(offsets) == 0 {
|
|
return extracted
|
|
}
|
|
for _, offset := range offsets {
|
|
extracted = append(extracted, colGroups[offset])
|
|
}
|
|
return extracted
|
|
}
|
|
|
|
// PreparePossibleProperties implements base.LogicalPlan.<13th> interface.
|
|
func (p *LogicalJoin) PreparePossibleProperties(_ *expression.Schema, childrenProperties ...[][]*expression.Column) [][]*expression.Column {
|
|
leftProperties := childrenProperties[0]
|
|
rightProperties := childrenProperties[1]
|
|
// TODO: We should consider properties propagation.
|
|
p.LeftProperties = leftProperties
|
|
p.RightProperties = rightProperties
|
|
if p.JoinType == LeftOuterJoin || p.JoinType == LeftOuterSemiJoin {
|
|
rightProperties = nil
|
|
} else if p.JoinType == RightOuterJoin {
|
|
leftProperties = nil
|
|
}
|
|
resultProperties := make([][]*expression.Column, len(leftProperties)+len(rightProperties))
|
|
for i, cols := range leftProperties {
|
|
resultProperties[i] = make([]*expression.Column, len(cols))
|
|
copy(resultProperties[i], cols)
|
|
}
|
|
leftLen := len(leftProperties)
|
|
for i, cols := range rightProperties {
|
|
resultProperties[leftLen+i] = make([]*expression.Column, len(cols))
|
|
copy(resultProperties[leftLen+i], cols)
|
|
}
|
|
return resultProperties
|
|
}
|
|
|
|
// ExhaustPhysicalPlans implements the base.LogicalPlan.<14th> interface.
|
|
// it can generates hash join, index join and sort merge join.
|
|
// Firstly we check the hint, if hint is figured by user, we force to choose the corresponding physical plan.
|
|
// If the hint is not matched, it will get other candidates.
|
|
// If the hint is not figured, we will pick all candidates.
|
|
func (p *LogicalJoin) ExhaustPhysicalPlans(prop *property.PhysicalProperty) ([]base.PhysicalPlan, bool, error) {
|
|
failpoint.Inject("MockOnlyEnableIndexHashJoin", func(val failpoint.Value) {
|
|
if val.(bool) && !p.SCtx().GetSessionVars().InRestrictedSQL {
|
|
indexJoins, _ := tryToGetIndexJoin(p, prop)
|
|
failpoint.Return(indexJoins, true, nil)
|
|
}
|
|
})
|
|
|
|
if !isJoinHintSupportedInMPPMode(p.PreferJoinType) {
|
|
if hasMPPJoinHints(p.PreferJoinType) {
|
|
// If there are MPP hints but has some conflicts join method hints, all the join hints are invalid.
|
|
p.SCtx().GetSessionVars().StmtCtx.SetHintWarning("The MPP join hints are in conflict, and you can only specify join method hints that are currently supported by MPP mode now")
|
|
p.PreferJoinType = 0
|
|
} else {
|
|
// If there are no MPP hints but has some conflicts join method hints, the MPP mode will be blocked.
|
|
p.SCtx().GetSessionVars().RaiseWarningWhenMPPEnforced("MPP mode may be blocked because you have used hint to specify a join algorithm which is not supported by mpp now.")
|
|
if prop.IsFlashProp() {
|
|
return nil, false, nil
|
|
}
|
|
}
|
|
}
|
|
if prop.MPPPartitionTp == property.BroadcastType {
|
|
return nil, false, nil
|
|
}
|
|
joins := make([]base.PhysicalPlan, 0, 8)
|
|
canPushToTiFlash := p.CanPushToCop(kv.TiFlash)
|
|
if p.SCtx().GetSessionVars().IsMPPAllowed() && canPushToTiFlash {
|
|
if (p.PreferJoinType & utilhint.PreferShuffleJoin) > 0 {
|
|
if shuffleJoins := tryToGetMppHashJoin(p, prop, false); len(shuffleJoins) > 0 {
|
|
return shuffleJoins, true, nil
|
|
}
|
|
}
|
|
if (p.PreferJoinType & utilhint.PreferBCJoin) > 0 {
|
|
if bcastJoins := tryToGetMppHashJoin(p, prop, true); len(bcastJoins) > 0 {
|
|
return bcastJoins, true, nil
|
|
}
|
|
}
|
|
if preferMppBCJ(p) {
|
|
mppJoins := tryToGetMppHashJoin(p, prop, true)
|
|
joins = append(joins, mppJoins...)
|
|
} else {
|
|
mppJoins := tryToGetMppHashJoin(p, prop, false)
|
|
joins = append(joins, mppJoins...)
|
|
}
|
|
} else {
|
|
hasMppHints := false
|
|
var errMsg string
|
|
if (p.PreferJoinType & utilhint.PreferShuffleJoin) > 0 {
|
|
errMsg = "The join can not push down to the MPP side, the shuffle_join() hint is invalid"
|
|
hasMppHints = true
|
|
}
|
|
if (p.PreferJoinType & utilhint.PreferBCJoin) > 0 {
|
|
errMsg = "The join can not push down to the MPP side, the broadcast_join() hint is invalid"
|
|
hasMppHints = true
|
|
}
|
|
if hasMppHints {
|
|
p.SCtx().GetSessionVars().StmtCtx.SetHintWarning(errMsg)
|
|
}
|
|
}
|
|
if prop.IsFlashProp() {
|
|
return joins, true, nil
|
|
}
|
|
|
|
if !p.IsNAAJ() {
|
|
// naaj refuse merge join and index join.
|
|
mergeJoins := GetMergeJoin(p, prop, p.Schema(), p.StatsInfo(), p.Children()[0].StatsInfo(), p.Children()[1].StatsInfo())
|
|
if (p.PreferJoinType&utilhint.PreferMergeJoin) > 0 && len(mergeJoins) > 0 {
|
|
return mergeJoins, true, nil
|
|
}
|
|
joins = append(joins, mergeJoins...)
|
|
|
|
indexJoins, forced := tryToGetIndexJoin(p, prop)
|
|
if forced {
|
|
return indexJoins, true, nil
|
|
}
|
|
joins = append(joins, indexJoins...)
|
|
}
|
|
|
|
hashJoins, forced := getHashJoins(p, prop)
|
|
if forced && len(hashJoins) > 0 {
|
|
return hashJoins, true, nil
|
|
}
|
|
joins = append(joins, hashJoins...)
|
|
|
|
if p.PreferJoinType > 0 {
|
|
// If we reach here, it means we have a hint that doesn't work.
|
|
// It might be affected by the required property, so we enforce
|
|
// this property and try the hint again.
|
|
return joins, false, nil
|
|
}
|
|
return joins, true, nil
|
|
}
|
|
|
|
// ExtractCorrelatedCols implements the base.LogicalPlan.<15th> interface.
|
|
func (p *LogicalJoin) ExtractCorrelatedCols() []*expression.CorrelatedColumn {
|
|
corCols := make([]*expression.CorrelatedColumn, 0, len(p.EqualConditions)+len(p.LeftConditions)+len(p.RightConditions)+len(p.OtherConditions))
|
|
for _, fun := range p.EqualConditions {
|
|
corCols = append(corCols, expression.ExtractCorColumns(fun)...)
|
|
}
|
|
for _, fun := range p.LeftConditions {
|
|
corCols = append(corCols, expression.ExtractCorColumns(fun)...)
|
|
}
|
|
for _, fun := range p.RightConditions {
|
|
corCols = append(corCols, expression.ExtractCorColumns(fun)...)
|
|
}
|
|
for _, fun := range p.OtherConditions {
|
|
corCols = append(corCols, expression.ExtractCorColumns(fun)...)
|
|
}
|
|
return corCols
|
|
}
|
|
|
|
// MaxOneRow inherits the BaseLogicalPlan.LogicalPlan.<16th> implementation.
|
|
|
|
// Children inherits the BaseLogicalPlan.LogicalPlan.<17th> implementation.
|
|
|
|
// SetChildren inherits the BaseLogicalPlan.LogicalPlan.<18th> implementation.
|
|
|
|
// SetChild inherits the BaseLogicalPlan.LogicalPlan.<19th> implementation.
|
|
|
|
// RollBackTaskMap inherits the BaseLogicalPlan.LogicalPlan.<20th> implementation.
|
|
|
|
// CanPushToCop inherits the BaseLogicalPlan.LogicalPlan.<21st> implementation.
|
|
|
|
// ExtractFD implements the base.LogicalPlan.<22th> interface.
|
|
func (p *LogicalJoin) ExtractFD() *funcdep.FDSet {
|
|
switch p.JoinType {
|
|
case InnerJoin:
|
|
return p.extractFDForInnerJoin(nil)
|
|
case LeftOuterJoin, RightOuterJoin:
|
|
return p.extractFDForOuterJoin(nil)
|
|
case SemiJoin:
|
|
return p.extractFDForSemiJoin(nil)
|
|
default:
|
|
return &funcdep.FDSet{HashCodeToUniqueID: make(map[string]int)}
|
|
}
|
|
}
|
|
|
|
// GetBaseLogicalPlan inherits the BaseLogicalPlan.LogicalPlan.<23th> implementation.
|
|
|
|
// ConvertOuterToInnerJoin implements base.LogicalPlan.<24th> interface.
|
|
func (p *LogicalJoin) ConvertOuterToInnerJoin(predicates []expression.Expression) base.LogicalPlan {
|
|
innerTable := p.Children()[0]
|
|
outerTable := p.Children()[1]
|
|
switchChild := false
|
|
|
|
if p.JoinType == LeftOuterJoin {
|
|
innerTable, outerTable = outerTable, innerTable
|
|
switchChild = true
|
|
}
|
|
|
|
// First, simplify this join
|
|
if p.JoinType == LeftOuterJoin || p.JoinType == RightOuterJoin {
|
|
canBeSimplified := false
|
|
for _, expr := range predicates {
|
|
isOk := util.IsNullRejected(p.SCtx(), innerTable.Schema(), expr)
|
|
if isOk {
|
|
canBeSimplified = true
|
|
break
|
|
}
|
|
}
|
|
if canBeSimplified {
|
|
p.JoinType = InnerJoin
|
|
}
|
|
}
|
|
|
|
// Next simplify join children
|
|
|
|
combinedCond := mergeOnClausePredicates(p, predicates)
|
|
if p.JoinType == LeftOuterJoin || p.JoinType == RightOuterJoin {
|
|
innerTable = innerTable.ConvertOuterToInnerJoin(combinedCond)
|
|
outerTable = outerTable.ConvertOuterToInnerJoin(predicates)
|
|
} else if p.JoinType == InnerJoin || p.JoinType == SemiJoin {
|
|
innerTable = innerTable.ConvertOuterToInnerJoin(combinedCond)
|
|
outerTable = outerTable.ConvertOuterToInnerJoin(combinedCond)
|
|
} else if p.JoinType == AntiSemiJoin {
|
|
innerTable = innerTable.ConvertOuterToInnerJoin(predicates)
|
|
outerTable = outerTable.ConvertOuterToInnerJoin(combinedCond)
|
|
} else {
|
|
innerTable = innerTable.ConvertOuterToInnerJoin(predicates)
|
|
outerTable = outerTable.ConvertOuterToInnerJoin(predicates)
|
|
}
|
|
|
|
if switchChild {
|
|
p.SetChild(0, outerTable)
|
|
p.SetChild(1, innerTable)
|
|
} else {
|
|
p.SetChild(0, innerTable)
|
|
p.SetChild(1, outerTable)
|
|
}
|
|
|
|
return p
|
|
}
|
|
|
|
// *************************** end implementation of logicalPlan interface ***************************
|
|
|
|
// IsNAAJ checks if the join is a non-adjacent-join.
|
|
func (p *LogicalJoin) IsNAAJ() bool {
|
|
return len(p.NAEQConditions) > 0
|
|
}
|
|
|
|
// Shallow copies a LogicalJoin struct.
|
|
func (p *LogicalJoin) Shallow() *LogicalJoin {
|
|
join := *p
|
|
return join.Init(p.SCtx(), p.QueryBlockOffset())
|
|
}
|
|
|
|
func (p *LogicalJoin) extractFDForSemiJoin(filtersFromApply []expression.Expression) *funcdep.FDSet {
|
|
// 1: since semi join will keep the part or all rows of the outer table, it's outer FD can be saved.
|
|
// 2: the un-projected column will be left for the upper layer projection or already be pruned from bottom up.
|
|
outerFD, _ := p.Children()[0].ExtractFD(), p.Children()[1].ExtractFD()
|
|
fds := outerFD
|
|
|
|
eqCondSlice := expression.ScalarFuncs2Exprs(p.EqualConditions)
|
|
allConds := append(eqCondSlice, p.OtherConditions...)
|
|
allConds = append(allConds, filtersFromApply...)
|
|
notNullColsFromFilters := ExtractNotNullFromConds(allConds, p)
|
|
|
|
constUniqueIDs := ExtractConstantCols(p.LeftConditions, p.SCtx(), fds)
|
|
|
|
fds.MakeNotNull(notNullColsFromFilters)
|
|
fds.AddConstants(constUniqueIDs)
|
|
p.SetFDs(fds)
|
|
return fds
|
|
}
|
|
|
|
func (p *LogicalJoin) extractFDForInnerJoin(filtersFromApply []expression.Expression) *funcdep.FDSet {
|
|
leftFD, rightFD := p.Children()[0].ExtractFD(), p.Children()[1].ExtractFD()
|
|
fds := leftFD
|
|
fds.MakeCartesianProduct(rightFD)
|
|
|
|
eqCondSlice := expression.ScalarFuncs2Exprs(p.EqualConditions)
|
|
// some join eq conditions are stored in the OtherConditions.
|
|
allConds := append(eqCondSlice, p.OtherConditions...)
|
|
allConds = append(allConds, filtersFromApply...)
|
|
notNullColsFromFilters := ExtractNotNullFromConds(allConds, p)
|
|
|
|
constUniqueIDs := ExtractConstantCols(allConds, p.SCtx(), fds)
|
|
|
|
equivUniqueIDs := ExtractEquivalenceCols(allConds, p.SCtx(), fds)
|
|
|
|
fds.MakeNotNull(notNullColsFromFilters)
|
|
fds.AddConstants(constUniqueIDs)
|
|
for _, equiv := range equivUniqueIDs {
|
|
fds.AddEquivalence(equiv[0], equiv[1])
|
|
}
|
|
// merge the not-null-cols/registered-map from both side together.
|
|
fds.NotNullCols.UnionWith(rightFD.NotNullCols)
|
|
if fds.HashCodeToUniqueID == nil {
|
|
fds.HashCodeToUniqueID = rightFD.HashCodeToUniqueID
|
|
} else {
|
|
for k, v := range rightFD.HashCodeToUniqueID {
|
|
// If there's same constant in the different subquery, we might go into this IF branch.
|
|
if _, ok := fds.HashCodeToUniqueID[k]; ok {
|
|
continue
|
|
}
|
|
fds.HashCodeToUniqueID[k] = v
|
|
}
|
|
}
|
|
for i, ok := rightFD.GroupByCols.Next(0); ok; i, ok = rightFD.GroupByCols.Next(i + 1) {
|
|
fds.GroupByCols.Insert(i)
|
|
}
|
|
fds.HasAggBuilt = fds.HasAggBuilt || rightFD.HasAggBuilt
|
|
p.SetFDs(fds)
|
|
return fds
|
|
}
|
|
|
|
func (p *LogicalJoin) extractFDForOuterJoin(filtersFromApply []expression.Expression) *funcdep.FDSet {
|
|
outerFD, innerFD := p.Children()[0].ExtractFD(), p.Children()[1].ExtractFD()
|
|
innerCondition := p.RightConditions
|
|
outerCondition := p.LeftConditions
|
|
outerCols, innerCols := intset.NewFastIntSet(), intset.NewFastIntSet()
|
|
for _, col := range p.Children()[0].Schema().Columns {
|
|
outerCols.Insert(int(col.UniqueID))
|
|
}
|
|
for _, col := range p.Children()[1].Schema().Columns {
|
|
innerCols.Insert(int(col.UniqueID))
|
|
}
|
|
if p.JoinType == RightOuterJoin {
|
|
innerFD, outerFD = outerFD, innerFD
|
|
innerCondition = p.LeftConditions
|
|
outerCondition = p.RightConditions
|
|
innerCols, outerCols = outerCols, innerCols
|
|
}
|
|
|
|
eqCondSlice := expression.ScalarFuncs2Exprs(p.EqualConditions)
|
|
allConds := append(eqCondSlice, p.OtherConditions...)
|
|
allConds = append(allConds, innerCondition...)
|
|
allConds = append(allConds, outerCondition...)
|
|
allConds = append(allConds, filtersFromApply...)
|
|
notNullColsFromFilters := ExtractNotNullFromConds(allConds, p)
|
|
|
|
filterFD := &funcdep.FDSet{HashCodeToUniqueID: make(map[string]int)}
|
|
|
|
constUniqueIDs := ExtractConstantCols(allConds, p.SCtx(), filterFD)
|
|
|
|
equivUniqueIDs := ExtractEquivalenceCols(allConds, p.SCtx(), filterFD)
|
|
|
|
filterFD.AddConstants(constUniqueIDs)
|
|
equivOuterUniqueIDs := intset.NewFastIntSet()
|
|
equivAcrossNum := 0
|
|
for _, equiv := range equivUniqueIDs {
|
|
filterFD.AddEquivalence(equiv[0], equiv[1])
|
|
if equiv[0].SubsetOf(outerCols) && equiv[1].SubsetOf(innerCols) {
|
|
equivOuterUniqueIDs.UnionWith(equiv[0])
|
|
equivAcrossNum++
|
|
continue
|
|
}
|
|
if equiv[0].SubsetOf(innerCols) && equiv[1].SubsetOf(outerCols) {
|
|
equivOuterUniqueIDs.UnionWith(equiv[1])
|
|
equivAcrossNum++
|
|
}
|
|
}
|
|
filterFD.MakeNotNull(notNullColsFromFilters)
|
|
|
|
// pre-perceive the filters for the convenience judgement of 3.3.1.
|
|
var opt funcdep.ArgOpts
|
|
if equivAcrossNum > 0 {
|
|
// find the equivalence FD across left and right cols.
|
|
var outConditionCols []*expression.Column
|
|
if len(outerCondition) != 0 {
|
|
outConditionCols = append(outConditionCols, expression.ExtractColumnsFromExpressions(nil, outerCondition, nil)...)
|
|
}
|
|
if len(p.OtherConditions) != 0 {
|
|
// other condition may contain right side cols, it doesn't affect the judgement of intersection of non-left-equiv cols.
|
|
outConditionCols = append(outConditionCols, expression.ExtractColumnsFromExpressions(nil, p.OtherConditions, nil)...)
|
|
}
|
|
outerConditionUniqueIDs := intset.NewFastIntSet()
|
|
for _, col := range outConditionCols {
|
|
outerConditionUniqueIDs.Insert(int(col.UniqueID))
|
|
}
|
|
// judge whether left filters is on non-left-equiv cols.
|
|
if outerConditionUniqueIDs.Intersects(outerCols.Difference(equivOuterUniqueIDs)) {
|
|
opt.SkipFDRule331 = true
|
|
}
|
|
} else {
|
|
// if there is none across equivalence condition, skip rule 3.3.1.
|
|
opt.SkipFDRule331 = true
|
|
}
|
|
|
|
opt.OnlyInnerFilter = len(eqCondSlice) == 0 && len(outerCondition) == 0 && len(p.OtherConditions) == 0
|
|
if opt.OnlyInnerFilter {
|
|
// if one of the inner condition is constant false, the inner side are all null, left make constant all of that.
|
|
for _, one := range innerCondition {
|
|
if c, ok := one.(*expression.Constant); ok && c.DeferredExpr == nil && c.ParamMarker == nil {
|
|
if isTrue, err := c.Value.ToBool(p.SCtx().GetSessionVars().StmtCtx.TypeCtx()); err == nil {
|
|
if isTrue == 0 {
|
|
// c is false
|
|
opt.InnerIsFalse = true
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
fds := outerFD
|
|
fds.MakeOuterJoin(innerFD, filterFD, outerCols, innerCols, &opt)
|
|
p.SetFDs(fds)
|
|
return fds
|
|
}
|
|
|
|
// GetJoinKeys extracts join keys(columns) from EqualConditions. It returns left join keys, right
|
|
// join keys and an `isNullEQ` array which means the `joinKey[i]` is a `NullEQ` function. The `hasNullEQ`
|
|
// means whether there is a `NullEQ` of a join key.
|
|
func (p *LogicalJoin) GetJoinKeys() (leftKeys, rightKeys []*expression.Column, isNullEQ []bool, hasNullEQ bool) {
|
|
for _, expr := range p.EqualConditions {
|
|
leftKeys = append(leftKeys, expr.GetArgs()[0].(*expression.Column))
|
|
rightKeys = append(rightKeys, expr.GetArgs()[1].(*expression.Column))
|
|
isNullEQ = append(isNullEQ, expr.FuncName.L == ast.NullEQ)
|
|
hasNullEQ = hasNullEQ || expr.FuncName.L == ast.NullEQ
|
|
}
|
|
return
|
|
}
|
|
|
|
// GetNAJoinKeys extracts join keys(columns) from NAEqualCondition.
|
|
func (p *LogicalJoin) GetNAJoinKeys() (leftKeys, rightKeys []*expression.Column) {
|
|
for _, expr := range p.NAEQConditions {
|
|
leftKeys = append(leftKeys, expr.GetArgs()[0].(*expression.Column))
|
|
rightKeys = append(rightKeys, expr.GetArgs()[1].(*expression.Column))
|
|
}
|
|
return
|
|
}
|
|
|
|
// GetPotentialPartitionKeys return potential partition keys for join, the potential partition keys are
|
|
// the join keys of EqualConditions
|
|
func (p *LogicalJoin) GetPotentialPartitionKeys() (leftKeys, rightKeys []*property.MPPPartitionColumn) {
|
|
for _, expr := range p.EqualConditions {
|
|
_, coll := expr.CharsetAndCollation()
|
|
collateID := property.GetCollateIDByNameForPartition(coll)
|
|
leftKeys = append(leftKeys, &property.MPPPartitionColumn{Col: expr.GetArgs()[0].(*expression.Column), CollateID: collateID})
|
|
rightKeys = append(rightKeys, &property.MPPPartitionColumn{Col: expr.GetArgs()[1].(*expression.Column), CollateID: collateID})
|
|
}
|
|
return
|
|
}
|
|
|
|
// Decorrelate eliminate the correlated column with if the col is in schema.
|
|
func (p *LogicalJoin) Decorrelate(schema *expression.Schema) {
|
|
for i, cond := range p.LeftConditions {
|
|
p.LeftConditions[i] = cond.Decorrelate(schema)
|
|
}
|
|
for i, cond := range p.RightConditions {
|
|
p.RightConditions[i] = cond.Decorrelate(schema)
|
|
}
|
|
for i, cond := range p.OtherConditions {
|
|
p.OtherConditions[i] = cond.Decorrelate(schema)
|
|
}
|
|
for i, cond := range p.EqualConditions {
|
|
p.EqualConditions[i] = cond.Decorrelate(schema).(*expression.ScalarFunction)
|
|
}
|
|
}
|
|
|
|
// ColumnSubstituteAll is used in projection elimination in apply de-correlation.
|
|
// Substitutions for all conditions should be successful, otherwise, we should keep all conditions unchanged.
|
|
func (p *LogicalJoin) ColumnSubstituteAll(schema *expression.Schema, exprs []expression.Expression) (hasFail bool) {
|
|
// make a copy of exprs for convenience of substitution (may change/partially change the expr tree)
|
|
cpLeftConditions := make(expression.CNFExprs, len(p.LeftConditions))
|
|
cpRightConditions := make(expression.CNFExprs, len(p.RightConditions))
|
|
cpOtherConditions := make(expression.CNFExprs, len(p.OtherConditions))
|
|
cpEqualConditions := make([]*expression.ScalarFunction, len(p.EqualConditions))
|
|
copy(cpLeftConditions, p.LeftConditions)
|
|
copy(cpRightConditions, p.RightConditions)
|
|
copy(cpOtherConditions, p.OtherConditions)
|
|
copy(cpEqualConditions, p.EqualConditions)
|
|
|
|
exprCtx := p.SCtx().GetExprCtx()
|
|
// try to substitute columns in these condition.
|
|
for i, cond := range cpLeftConditions {
|
|
if hasFail, cpLeftConditions[i] = expression.ColumnSubstituteAll(exprCtx, cond, schema, exprs); hasFail {
|
|
return
|
|
}
|
|
}
|
|
|
|
for i, cond := range cpRightConditions {
|
|
if hasFail, cpRightConditions[i] = expression.ColumnSubstituteAll(exprCtx, cond, schema, exprs); hasFail {
|
|
return
|
|
}
|
|
}
|
|
|
|
for i, cond := range cpOtherConditions {
|
|
if hasFail, cpOtherConditions[i] = expression.ColumnSubstituteAll(exprCtx, cond, schema, exprs); hasFail {
|
|
return
|
|
}
|
|
}
|
|
|
|
for i, cond := range cpEqualConditions {
|
|
var tmp expression.Expression
|
|
if hasFail, tmp = expression.ColumnSubstituteAll(exprCtx, cond, schema, exprs); hasFail {
|
|
return
|
|
}
|
|
cpEqualConditions[i] = tmp.(*expression.ScalarFunction)
|
|
}
|
|
|
|
// if all substituted, change them atomically here.
|
|
p.LeftConditions = cpLeftConditions
|
|
p.RightConditions = cpRightConditions
|
|
p.OtherConditions = cpOtherConditions
|
|
p.EqualConditions = cpEqualConditions
|
|
|
|
for i := len(p.EqualConditions) - 1; i >= 0; i-- {
|
|
newCond := p.EqualConditions[i]
|
|
|
|
// If the columns used in the new filter all come from the left child,
|
|
// we can push this filter to it.
|
|
if expression.ExprFromSchema(newCond, p.Children()[0].Schema()) {
|
|
p.LeftConditions = append(p.LeftConditions, newCond)
|
|
p.EqualConditions = append(p.EqualConditions[:i], p.EqualConditions[i+1:]...)
|
|
continue
|
|
}
|
|
|
|
// If the columns used in the new filter all come from the right
|
|
// child, we can push this filter to it.
|
|
if expression.ExprFromSchema(newCond, p.Children()[1].Schema()) {
|
|
p.RightConditions = append(p.RightConditions, newCond)
|
|
p.EqualConditions = append(p.EqualConditions[:i], p.EqualConditions[i+1:]...)
|
|
continue
|
|
}
|
|
|
|
_, lhsIsCol := newCond.GetArgs()[0].(*expression.Column)
|
|
_, rhsIsCol := newCond.GetArgs()[1].(*expression.Column)
|
|
|
|
// If the columns used in the new filter are not all expression.Column,
|
|
// we can not use it as join's equal condition.
|
|
if !(lhsIsCol && rhsIsCol) {
|
|
p.OtherConditions = append(p.OtherConditions, newCond)
|
|
p.EqualConditions = append(p.EqualConditions[:i], p.EqualConditions[i+1:]...)
|
|
continue
|
|
}
|
|
|
|
p.EqualConditions[i] = newCond
|
|
}
|
|
return false
|
|
}
|
|
|
|
// AttachOnConds extracts on conditions for join and set the `EqualConditions`, `LeftConditions`, `RightConditions` and
|
|
// `OtherConditions` by the result of extract.
|
|
func (p *LogicalJoin) AttachOnConds(onConds []expression.Expression) {
|
|
eq, left, right, other := p.extractOnCondition(onConds, false, false)
|
|
p.AppendJoinConds(eq, left, right, other)
|
|
}
|
|
|
|
// AppendJoinConds appends new join conditions.
|
|
func (p *LogicalJoin) AppendJoinConds(eq []*expression.ScalarFunction, left, right, other []expression.Expression) {
|
|
p.EqualConditions = append(eq, p.EqualConditions...)
|
|
p.LeftConditions = append(left, p.LeftConditions...)
|
|
p.RightConditions = append(right, p.RightConditions...)
|
|
p.OtherConditions = append(other, p.OtherConditions...)
|
|
}
|
|
|
|
// ExtractJoinKeys extract join keys as a schema for child with childIdx.
|
|
func (p *LogicalJoin) ExtractJoinKeys(childIdx int) *expression.Schema {
|
|
joinKeys := make([]*expression.Column, 0, len(p.EqualConditions))
|
|
for _, eqCond := range p.EqualConditions {
|
|
joinKeys = append(joinKeys, eqCond.GetArgs()[childIdx].(*expression.Column))
|
|
}
|
|
return expression.NewSchema(joinKeys...)
|
|
}
|
|
|
|
// extractUsedCols extracts all the needed columns.
|
|
func (p *LogicalJoin) extractUsedCols(parentUsedCols []*expression.Column) (leftCols []*expression.Column, rightCols []*expression.Column) {
|
|
for _, eqCond := range p.EqualConditions {
|
|
parentUsedCols = append(parentUsedCols, expression.ExtractColumns(eqCond)...)
|
|
}
|
|
for _, leftCond := range p.LeftConditions {
|
|
parentUsedCols = append(parentUsedCols, expression.ExtractColumns(leftCond)...)
|
|
}
|
|
for _, rightCond := range p.RightConditions {
|
|
parentUsedCols = append(parentUsedCols, expression.ExtractColumns(rightCond)...)
|
|
}
|
|
for _, otherCond := range p.OtherConditions {
|
|
parentUsedCols = append(parentUsedCols, expression.ExtractColumns(otherCond)...)
|
|
}
|
|
for _, naeqCond := range p.NAEQConditions {
|
|
parentUsedCols = append(parentUsedCols, expression.ExtractColumns(naeqCond)...)
|
|
}
|
|
lChild := p.Children()[0]
|
|
rChild := p.Children()[1]
|
|
for _, col := range parentUsedCols {
|
|
if lChild.Schema().Contains(col) {
|
|
leftCols = append(leftCols, col)
|
|
} else if rChild.Schema().Contains(col) {
|
|
rightCols = append(rightCols, col)
|
|
}
|
|
}
|
|
return leftCols, rightCols
|
|
}
|
|
|
|
// MergeSchema merge the schema of left and right child of join.
|
|
func (p *LogicalJoin) mergeSchema() {
|
|
p.SetSchema(buildLogicalJoinSchema(p.JoinType, p))
|
|
}
|
|
|
|
// pushDownTopNToChild will push a topN to one child of join. The idx stands for join child index. 0 is for left child.
|
|
func (p *LogicalJoin) pushDownTopNToChild(topN *LogicalTopN, idx int, opt *optimizetrace.LogicalOptimizeOp) base.LogicalPlan {
|
|
if topN == nil {
|
|
return p.Children()[idx].PushDownTopN(nil, opt)
|
|
}
|
|
|
|
for _, by := range topN.ByItems {
|
|
cols := expression.ExtractColumns(by.Expr)
|
|
for _, col := range cols {
|
|
if !p.Children()[idx].Schema().Contains(col) {
|
|
return p.Children()[idx].PushDownTopN(nil, opt)
|
|
}
|
|
}
|
|
}
|
|
|
|
newTopN := LogicalTopN{
|
|
Count: topN.Count + topN.Offset,
|
|
ByItems: make([]*util.ByItems, len(topN.ByItems)),
|
|
PreferLimitToCop: topN.PreferLimitToCop,
|
|
}.Init(topN.SCtx(), topN.QueryBlockOffset())
|
|
for i := range topN.ByItems {
|
|
newTopN.ByItems[i] = topN.ByItems[i].Clone()
|
|
}
|
|
appendTopNPushDownJoinTraceStep(p, newTopN, idx, opt)
|
|
return p.Children()[idx].PushDownTopN(newTopN, opt)
|
|
}
|
|
|
|
// Add a new selection between parent plan and current plan with candidate predicates
|
|
/*
|
|
+-------------+ +-------------+
|
|
| parentPlan | | parentPlan |
|
|
+-----^-------+ +-----^-------+
|
|
| --addCandidateSelection---> |
|
|
+-----+-------+ +-----------+--------------+
|
|
| currentPlan | | selection |
|
|
+-------------+ | candidate predicate |
|
|
+-----------^--------------+
|
|
|
|
|
|
|
|
+----+--------+
|
|
| currentPlan |
|
|
+-------------+
|
|
*/
|
|
// If the currentPlan at the top of query plan, return new root plan (selection)
|
|
// Else return nil
|
|
func addCandidateSelection(currentPlan base.LogicalPlan, currentChildIdx int, parentPlan base.LogicalPlan,
|
|
candidatePredicates []expression.Expression, opt *optimizetrace.LogicalOptimizeOp) (newRoot base.LogicalPlan) {
|
|
// generate a new selection for candidatePredicates
|
|
selection := LogicalSelection{Conditions: candidatePredicates}.Init(currentPlan.SCtx(), currentPlan.QueryBlockOffset())
|
|
// add selection above of p
|
|
if parentPlan == nil {
|
|
newRoot = selection
|
|
} else {
|
|
parentPlan.SetChild(currentChildIdx, selection)
|
|
}
|
|
selection.SetChildren(currentPlan)
|
|
appendAddSelectionTraceStep(parentPlan, currentPlan, selection, opt)
|
|
if parentPlan == nil {
|
|
return newRoot
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func (p *LogicalJoin) getGroupNDVs(colGroups [][]*expression.Column, childStats []*property.StatsInfo) []property.GroupNDV {
|
|
outerIdx := int(-1)
|
|
if p.JoinType == LeftOuterJoin || p.JoinType == LeftOuterSemiJoin || p.JoinType == AntiLeftOuterSemiJoin {
|
|
outerIdx = 0
|
|
} else if p.JoinType == RightOuterJoin {
|
|
outerIdx = 1
|
|
}
|
|
if outerIdx >= 0 && len(colGroups) > 0 {
|
|
return childStats[outerIdx].GroupNDVs
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// PreferAny checks whether the join type is in the joinFlags.
|
|
func (p *LogicalJoin) PreferAny(joinFlags ...uint) bool {
|
|
for _, flag := range joinFlags {
|
|
if p.PreferJoinType&flag > 0 {
|
|
return true
|
|
}
|
|
}
|
|
return false
|
|
}
|
|
|
|
// ExtractOnCondition divide conditions in CNF of join node into 4 groups.
|
|
// These conditions can be where conditions, join conditions, or collection of both.
|
|
// If deriveLeft/deriveRight is set, we would try to derive more conditions for left/right plan.
|
|
func (p *LogicalJoin) ExtractOnCondition(
|
|
conditions []expression.Expression,
|
|
leftSchema *expression.Schema,
|
|
rightSchema *expression.Schema,
|
|
deriveLeft bool,
|
|
deriveRight bool) (eqCond []*expression.ScalarFunction, leftCond []expression.Expression,
|
|
rightCond []expression.Expression, otherCond []expression.Expression) {
|
|
ctx := p.SCtx()
|
|
for _, expr := range conditions {
|
|
// For queries like `select a in (select a from s where s.b = t.b) from t`,
|
|
// if subquery is empty caused by `s.b = t.b`, the result should always be
|
|
// false even if t.a is null or s.a is null. To make this join "empty aware",
|
|
// we should differentiate `t.a = s.a` from other column equal conditions, so
|
|
// we put it into OtherConditions instead of EqualConditions of join.
|
|
if expression.IsEQCondFromIn(expr) {
|
|
otherCond = append(otherCond, expr)
|
|
continue
|
|
}
|
|
binop, ok := expr.(*expression.ScalarFunction)
|
|
if ok && len(binop.GetArgs()) == 2 {
|
|
arg0, lOK := binop.GetArgs()[0].(*expression.Column)
|
|
arg1, rOK := binop.GetArgs()[1].(*expression.Column)
|
|
if lOK && rOK {
|
|
leftCol := leftSchema.RetrieveColumn(arg0)
|
|
rightCol := rightSchema.RetrieveColumn(arg1)
|
|
if leftCol == nil || rightCol == nil {
|
|
leftCol = leftSchema.RetrieveColumn(arg1)
|
|
rightCol = rightSchema.RetrieveColumn(arg0)
|
|
arg0, arg1 = arg1, arg0
|
|
}
|
|
if leftCol != nil && rightCol != nil {
|
|
if deriveLeft {
|
|
if util.IsNullRejected(ctx, leftSchema, expr) && !mysql.HasNotNullFlag(leftCol.RetType.GetFlag()) {
|
|
notNullExpr := expression.BuildNotNullExpr(ctx.GetExprCtx(), leftCol)
|
|
leftCond = append(leftCond, notNullExpr)
|
|
}
|
|
}
|
|
if deriveRight {
|
|
if util.IsNullRejected(ctx, rightSchema, expr) && !mysql.HasNotNullFlag(rightCol.RetType.GetFlag()) {
|
|
notNullExpr := expression.BuildNotNullExpr(ctx.GetExprCtx(), rightCol)
|
|
rightCond = append(rightCond, notNullExpr)
|
|
}
|
|
}
|
|
if binop.FuncName.L == ast.EQ {
|
|
cond := expression.NewFunctionInternal(ctx.GetExprCtx(), ast.EQ, types.NewFieldType(mysql.TypeTiny), arg0, arg1)
|
|
eqCond = append(eqCond, cond.(*expression.ScalarFunction))
|
|
continue
|
|
}
|
|
}
|
|
}
|
|
}
|
|
columns := expression.ExtractColumns(expr)
|
|
// `columns` may be empty, if the condition is like `correlated_column op constant`, or `constant`,
|
|
// push this kind of constant condition down according to join type.
|
|
if len(columns) == 0 {
|
|
leftCond, rightCond = p.pushDownConstExpr(expr, leftCond, rightCond, deriveLeft || deriveRight)
|
|
continue
|
|
}
|
|
allFromLeft, allFromRight := true, true
|
|
for _, col := range columns {
|
|
if !leftSchema.Contains(col) {
|
|
allFromLeft = false
|
|
}
|
|
if !rightSchema.Contains(col) {
|
|
allFromRight = false
|
|
}
|
|
}
|
|
if allFromRight {
|
|
rightCond = append(rightCond, expr)
|
|
} else if allFromLeft {
|
|
leftCond = append(leftCond, expr)
|
|
} else {
|
|
// Relax expr to two supersets: leftRelaxedCond and rightRelaxedCond, the expression now is
|
|
// `expr AND leftRelaxedCond AND rightRelaxedCond`. Motivation is to push filters down to
|
|
// children as much as possible.
|
|
if deriveLeft {
|
|
leftRelaxedCond := expression.DeriveRelaxedFiltersFromDNF(ctx.GetExprCtx(), expr, leftSchema)
|
|
if leftRelaxedCond != nil {
|
|
leftCond = append(leftCond, leftRelaxedCond)
|
|
}
|
|
}
|
|
if deriveRight {
|
|
rightRelaxedCond := expression.DeriveRelaxedFiltersFromDNF(ctx.GetExprCtx(), expr, rightSchema)
|
|
if rightRelaxedCond != nil {
|
|
rightCond = append(rightCond, rightRelaxedCond)
|
|
}
|
|
}
|
|
otherCond = append(otherCond, expr)
|
|
}
|
|
}
|
|
return
|
|
}
|
|
|
|
// pushDownConstExpr checks if the condition is from filter condition, if true, push it down to both
|
|
// children of join, whatever the join type is; if false, push it down to inner child of outer join,
|
|
// and both children of non-outer-join.
|
|
func (p *LogicalJoin) pushDownConstExpr(expr expression.Expression, leftCond []expression.Expression,
|
|
rightCond []expression.Expression, filterCond bool) ([]expression.Expression, []expression.Expression) {
|
|
switch p.JoinType {
|
|
case LeftOuterJoin, LeftOuterSemiJoin, AntiLeftOuterSemiJoin:
|
|
if filterCond {
|
|
leftCond = append(leftCond, expr)
|
|
// Append the expr to right join condition instead of `rightCond`, to make it able to be
|
|
// pushed down to children of join.
|
|
p.RightConditions = append(p.RightConditions, expr)
|
|
} else {
|
|
rightCond = append(rightCond, expr)
|
|
}
|
|
case RightOuterJoin:
|
|
if filterCond {
|
|
rightCond = append(rightCond, expr)
|
|
p.LeftConditions = append(p.LeftConditions, expr)
|
|
} else {
|
|
leftCond = append(leftCond, expr)
|
|
}
|
|
case SemiJoin, InnerJoin:
|
|
leftCond = append(leftCond, expr)
|
|
rightCond = append(rightCond, expr)
|
|
case AntiSemiJoin:
|
|
if filterCond {
|
|
leftCond = append(leftCond, expr)
|
|
}
|
|
rightCond = append(rightCond, expr)
|
|
}
|
|
return leftCond, rightCond
|
|
}
|
|
|
|
func (p *LogicalJoin) extractOnCondition(conditions []expression.Expression, deriveLeft bool,
|
|
deriveRight bool) (eqCond []*expression.ScalarFunction, leftCond []expression.Expression,
|
|
rightCond []expression.Expression, otherCond []expression.Expression) {
|
|
return p.ExtractOnCondition(conditions, p.Children()[0].Schema(), p.Children()[1].Schema(), deriveLeft, deriveRight)
|
|
}
|
|
|
|
// SetPreferredJoinTypeAndOrder sets the preferred join type and order for the LogicalJoin.
|
|
func (p *LogicalJoin) SetPreferredJoinTypeAndOrder(hintInfo *utilhint.PlanHints) {
|
|
if hintInfo == nil {
|
|
return
|
|
}
|
|
|
|
lhsAlias := extractTableAlias(p.Children()[0], p.QueryBlockOffset())
|
|
rhsAlias := extractTableAlias(p.Children()[1], p.QueryBlockOffset())
|
|
if hintInfo.IfPreferMergeJoin(lhsAlias) {
|
|
p.PreferJoinType |= utilhint.PreferMergeJoin
|
|
p.LeftPreferJoinType |= utilhint.PreferMergeJoin
|
|
}
|
|
if hintInfo.IfPreferMergeJoin(rhsAlias) {
|
|
p.PreferJoinType |= utilhint.PreferMergeJoin
|
|
p.RightPreferJoinType |= utilhint.PreferMergeJoin
|
|
}
|
|
if hintInfo.IfPreferNoMergeJoin(lhsAlias) {
|
|
p.PreferJoinType |= utilhint.PreferNoMergeJoin
|
|
p.LeftPreferJoinType |= utilhint.PreferNoMergeJoin
|
|
}
|
|
if hintInfo.IfPreferNoMergeJoin(rhsAlias) {
|
|
p.PreferJoinType |= utilhint.PreferNoMergeJoin
|
|
p.RightPreferJoinType |= utilhint.PreferNoMergeJoin
|
|
}
|
|
if hintInfo.IfPreferBroadcastJoin(lhsAlias) {
|
|
p.PreferJoinType |= utilhint.PreferBCJoin
|
|
p.LeftPreferJoinType |= utilhint.PreferBCJoin
|
|
}
|
|
if hintInfo.IfPreferBroadcastJoin(rhsAlias) {
|
|
p.PreferJoinType |= utilhint.PreferBCJoin
|
|
p.RightPreferJoinType |= utilhint.PreferBCJoin
|
|
}
|
|
if hintInfo.IfPreferShuffleJoin(lhsAlias) {
|
|
p.PreferJoinType |= utilhint.PreferShuffleJoin
|
|
p.LeftPreferJoinType |= utilhint.PreferShuffleJoin
|
|
}
|
|
if hintInfo.IfPreferShuffleJoin(rhsAlias) {
|
|
p.PreferJoinType |= utilhint.PreferShuffleJoin
|
|
p.RightPreferJoinType |= utilhint.PreferShuffleJoin
|
|
}
|
|
if hintInfo.IfPreferHashJoin(lhsAlias) {
|
|
p.PreferJoinType |= utilhint.PreferHashJoin
|
|
p.LeftPreferJoinType |= utilhint.PreferHashJoin
|
|
}
|
|
if hintInfo.IfPreferHashJoin(rhsAlias) {
|
|
p.PreferJoinType |= utilhint.PreferHashJoin
|
|
p.RightPreferJoinType |= utilhint.PreferHashJoin
|
|
}
|
|
if hintInfo.IfPreferNoHashJoin(lhsAlias) {
|
|
p.PreferJoinType |= utilhint.PreferNoHashJoin
|
|
p.LeftPreferJoinType |= utilhint.PreferNoHashJoin
|
|
}
|
|
if hintInfo.IfPreferNoHashJoin(rhsAlias) {
|
|
p.PreferJoinType |= utilhint.PreferNoHashJoin
|
|
p.RightPreferJoinType |= utilhint.PreferNoHashJoin
|
|
}
|
|
if hintInfo.IfPreferINLJ(lhsAlias) {
|
|
p.PreferJoinType |= utilhint.PreferLeftAsINLJInner
|
|
p.LeftPreferJoinType |= utilhint.PreferINLJ
|
|
}
|
|
if hintInfo.IfPreferINLJ(rhsAlias) {
|
|
p.PreferJoinType |= utilhint.PreferRightAsINLJInner
|
|
p.RightPreferJoinType |= utilhint.PreferINLJ
|
|
}
|
|
if hintInfo.IfPreferINLHJ(lhsAlias) {
|
|
p.PreferJoinType |= utilhint.PreferLeftAsINLHJInner
|
|
p.LeftPreferJoinType |= utilhint.PreferINLHJ
|
|
}
|
|
if hintInfo.IfPreferINLHJ(rhsAlias) {
|
|
p.PreferJoinType |= utilhint.PreferRightAsINLHJInner
|
|
p.RightPreferJoinType |= utilhint.PreferINLHJ
|
|
}
|
|
if hintInfo.IfPreferINLMJ(lhsAlias) {
|
|
p.PreferJoinType |= utilhint.PreferLeftAsINLMJInner
|
|
p.LeftPreferJoinType |= utilhint.PreferINLMJ
|
|
}
|
|
if hintInfo.IfPreferINLMJ(rhsAlias) {
|
|
p.PreferJoinType |= utilhint.PreferRightAsINLMJInner
|
|
p.RightPreferJoinType |= utilhint.PreferINLMJ
|
|
}
|
|
if hintInfo.IfPreferNoIndexJoin(lhsAlias) {
|
|
p.PreferJoinType |= utilhint.PreferNoIndexJoin
|
|
p.LeftPreferJoinType |= utilhint.PreferNoIndexJoin
|
|
}
|
|
if hintInfo.IfPreferNoIndexJoin(rhsAlias) {
|
|
p.PreferJoinType |= utilhint.PreferNoIndexJoin
|
|
p.RightPreferJoinType |= utilhint.PreferNoIndexJoin
|
|
}
|
|
if hintInfo.IfPreferNoIndexHashJoin(lhsAlias) {
|
|
p.PreferJoinType |= utilhint.PreferNoIndexHashJoin
|
|
p.LeftPreferJoinType |= utilhint.PreferNoIndexHashJoin
|
|
}
|
|
if hintInfo.IfPreferNoIndexHashJoin(rhsAlias) {
|
|
p.PreferJoinType |= utilhint.PreferNoIndexHashJoin
|
|
p.RightPreferJoinType |= utilhint.PreferNoIndexHashJoin
|
|
}
|
|
if hintInfo.IfPreferNoIndexMergeJoin(lhsAlias) {
|
|
p.PreferJoinType |= utilhint.PreferNoIndexMergeJoin
|
|
p.LeftPreferJoinType |= utilhint.PreferNoIndexMergeJoin
|
|
}
|
|
if hintInfo.IfPreferNoIndexMergeJoin(rhsAlias) {
|
|
p.PreferJoinType |= utilhint.PreferNoIndexMergeJoin
|
|
p.RightPreferJoinType |= utilhint.PreferNoIndexMergeJoin
|
|
}
|
|
if hintInfo.IfPreferHJBuild(lhsAlias) {
|
|
p.PreferJoinType |= utilhint.PreferLeftAsHJBuild
|
|
p.LeftPreferJoinType |= utilhint.PreferHJBuild
|
|
}
|
|
if hintInfo.IfPreferHJBuild(rhsAlias) {
|
|
p.PreferJoinType |= utilhint.PreferRightAsHJBuild
|
|
p.RightPreferJoinType |= utilhint.PreferHJBuild
|
|
}
|
|
if hintInfo.IfPreferHJProbe(lhsAlias) {
|
|
p.PreferJoinType |= utilhint.PreferLeftAsHJProbe
|
|
p.LeftPreferJoinType |= utilhint.PreferHJProbe
|
|
}
|
|
if hintInfo.IfPreferHJProbe(rhsAlias) {
|
|
p.PreferJoinType |= utilhint.PreferRightAsHJProbe
|
|
p.RightPreferJoinType |= utilhint.PreferHJProbe
|
|
}
|
|
hasConflict := false
|
|
if !p.SCtx().GetSessionVars().EnableAdvancedJoinHint || p.SCtx().GetSessionVars().StmtCtx.StraightJoinOrder {
|
|
if containDifferentJoinTypes(p.PreferJoinType) {
|
|
hasConflict = true
|
|
}
|
|
} else if p.SCtx().GetSessionVars().EnableAdvancedJoinHint {
|
|
if containDifferentJoinTypes(p.LeftPreferJoinType) || containDifferentJoinTypes(p.RightPreferJoinType) {
|
|
hasConflict = true
|
|
}
|
|
}
|
|
if hasConflict {
|
|
p.SCtx().GetSessionVars().StmtCtx.SetHintWarning(
|
|
"Join hints are conflict, you can only specify one type of join")
|
|
p.PreferJoinType = 0
|
|
}
|
|
// set the join order
|
|
if hintInfo.LeadingJoinOrder != nil {
|
|
p.PreferJoinOrder = hintInfo.MatchTableName([]*utilhint.HintedTable{lhsAlias, rhsAlias}, hintInfo.LeadingJoinOrder)
|
|
}
|
|
// set hintInfo for further usage if this hint info can be used.
|
|
if p.PreferJoinType != 0 || p.PreferJoinOrder {
|
|
p.HintInfo = hintInfo
|
|
}
|
|
}
|
|
|
|
// SetPreferredJoinType generates hint information for the logicalJoin based on the hint information of its left and right children.
|
|
func (p *LogicalJoin) SetPreferredJoinType() {
|
|
if p.LeftPreferJoinType == 0 && p.RightPreferJoinType == 0 {
|
|
return
|
|
}
|
|
p.PreferJoinType = setPreferredJoinTypeFromOneSide(p.LeftPreferJoinType, true) | setPreferredJoinTypeFromOneSide(p.RightPreferJoinType, false)
|
|
if containDifferentJoinTypes(p.PreferJoinType) {
|
|
p.SCtx().GetSessionVars().StmtCtx.SetHintWarning(
|
|
"Join hints conflict after join reorder phase, you can only specify one type of join")
|
|
p.PreferJoinType = 0
|
|
}
|
|
}
|
|
|
|
// updateEQCond will extract the arguments of a equal condition that connect two expressions.
|
|
func (p *LogicalJoin) updateEQCond() {
|
|
lChild, rChild := p.Children()[0], p.Children()[1]
|
|
var lKeys, rKeys []expression.Expression
|
|
var lNAKeys, rNAKeys []expression.Expression
|
|
// We need two steps here:
|
|
// step1: try best to extract normal EQ condition from OtherCondition to join EqualConditions.
|
|
for i := len(p.OtherConditions) - 1; i >= 0; i-- {
|
|
need2Remove := false
|
|
if eqCond, ok := p.OtherConditions[i].(*expression.ScalarFunction); ok && eqCond.FuncName.L == ast.EQ {
|
|
// If it is a column equal condition converted from `[not] in (subq)`, do not move it
|
|
// to EqualConditions, and keep it in OtherConditions. Reference comments in `extractOnCondition`
|
|
// for detailed reasons.
|
|
if expression.IsEQCondFromIn(eqCond) {
|
|
continue
|
|
}
|
|
lExpr, rExpr := eqCond.GetArgs()[0], eqCond.GetArgs()[1]
|
|
if expression.ExprFromSchema(lExpr, lChild.Schema()) && expression.ExprFromSchema(rExpr, rChild.Schema()) {
|
|
lKeys = append(lKeys, lExpr)
|
|
rKeys = append(rKeys, rExpr)
|
|
need2Remove = true
|
|
} else if expression.ExprFromSchema(lExpr, rChild.Schema()) && expression.ExprFromSchema(rExpr, lChild.Schema()) {
|
|
lKeys = append(lKeys, rExpr)
|
|
rKeys = append(rKeys, lExpr)
|
|
need2Remove = true
|
|
}
|
|
}
|
|
if need2Remove {
|
|
p.OtherConditions = append(p.OtherConditions[:i], p.OtherConditions[i+1:]...)
|
|
}
|
|
}
|
|
// eg: explain select * from t1, t3 where t1.a+1 = t3.a;
|
|
// tidb only accept the join key in EqualCondition as a normal column (join OP take granted for that)
|
|
// so once we found the left and right children's schema can supply the all columns in complicated EQ condition that used by left/right key.
|
|
// we will add a layer of projection here to convert the complicated expression of EQ's left or right side to be a normal column.
|
|
adjustKeyForm := func(leftKeys, rightKeys []expression.Expression, isNA bool) {
|
|
if len(leftKeys) > 0 {
|
|
needLProj, needRProj := false, false
|
|
for i := range leftKeys {
|
|
_, lOk := leftKeys[i].(*expression.Column)
|
|
_, rOk := rightKeys[i].(*expression.Column)
|
|
needLProj = needLProj || !lOk
|
|
needRProj = needRProj || !rOk
|
|
}
|
|
|
|
var lProj, rProj *LogicalProjection
|
|
if needLProj {
|
|
lProj = p.getProj(0)
|
|
}
|
|
if needRProj {
|
|
rProj = p.getProj(1)
|
|
}
|
|
for i := range leftKeys {
|
|
lKey, rKey := leftKeys[i], rightKeys[i]
|
|
if lProj != nil {
|
|
lKey = lProj.appendExpr(lKey)
|
|
}
|
|
if rProj != nil {
|
|
rKey = rProj.appendExpr(rKey)
|
|
}
|
|
eqCond := expression.NewFunctionInternal(p.SCtx().GetExprCtx(), ast.EQ, types.NewFieldType(mysql.TypeTiny), lKey, rKey)
|
|
if isNA {
|
|
p.NAEQConditions = append(p.NAEQConditions, eqCond.(*expression.ScalarFunction))
|
|
} else {
|
|
p.EqualConditions = append(p.EqualConditions, eqCond.(*expression.ScalarFunction))
|
|
}
|
|
}
|
|
}
|
|
}
|
|
adjustKeyForm(lKeys, rKeys, false)
|
|
|
|
// Step2: when step1 is finished, then we can determine whether we need to extract NA-EQ from OtherCondition to NAEQConditions.
|
|
// when there are still no EqualConditions, let's try to be a NAAJ.
|
|
// todo: by now, when there is already a normal EQ condition, just keep NA-EQ as other-condition filters above it.
|
|
// eg: select * from stu where stu.name not in (select name from exam where exam.stu_id = stu.id);
|
|
// combination of <stu.name NAEQ exam.name> and <exam.stu_id EQ stu.id> for join key is little complicated for now.
|
|
canBeNAAJ := (p.JoinType == AntiSemiJoin || p.JoinType == AntiLeftOuterSemiJoin) && len(p.EqualConditions) == 0
|
|
if canBeNAAJ && p.SCtx().GetSessionVars().OptimizerEnableNAAJ {
|
|
var otherCond expression.CNFExprs
|
|
for i := 0; i < len(p.OtherConditions); i++ {
|
|
eqCond, ok := p.OtherConditions[i].(*expression.ScalarFunction)
|
|
if ok && eqCond.FuncName.L == ast.EQ && expression.IsEQCondFromIn(eqCond) {
|
|
// here must be a EQCondFromIn.
|
|
lExpr, rExpr := eqCond.GetArgs()[0], eqCond.GetArgs()[1]
|
|
if expression.ExprFromSchema(lExpr, lChild.Schema()) && expression.ExprFromSchema(rExpr, rChild.Schema()) {
|
|
lNAKeys = append(lNAKeys, lExpr)
|
|
rNAKeys = append(rNAKeys, rExpr)
|
|
} else if expression.ExprFromSchema(lExpr, rChild.Schema()) && expression.ExprFromSchema(rExpr, lChild.Schema()) {
|
|
lNAKeys = append(lNAKeys, rExpr)
|
|
rNAKeys = append(rNAKeys, lExpr)
|
|
}
|
|
continue
|
|
}
|
|
otherCond = append(otherCond, p.OtherConditions[i])
|
|
}
|
|
p.OtherConditions = otherCond
|
|
// here is for cases like: select (a+1, b*3) not in (select a,b from t2) from t1.
|
|
adjustKeyForm(lNAKeys, rNAKeys, true)
|
|
}
|
|
}
|
|
|
|
func (p *LogicalJoin) getProj(idx int) *LogicalProjection {
|
|
child := p.Children()[idx]
|
|
proj, ok := child.(*LogicalProjection)
|
|
if ok {
|
|
return proj
|
|
}
|
|
proj = LogicalProjection{Exprs: make([]expression.Expression, 0, child.Schema().Len())}.Init(p.SCtx(), child.QueryBlockOffset())
|
|
for _, col := range child.Schema().Columns {
|
|
proj.Exprs = append(proj.Exprs, col)
|
|
}
|
|
proj.SetSchema(child.Schema().Clone())
|
|
proj.SetChildren(child)
|
|
p.Children()[idx] = proj
|
|
return proj
|
|
}
|
|
|
|
// outerJoinPropConst propagates constant equal and column equal conditions over outer join.
|
|
func (p *LogicalJoin) outerJoinPropConst(predicates []expression.Expression) []expression.Expression {
|
|
outerTable := p.Children()[0]
|
|
innerTable := p.Children()[1]
|
|
if p.JoinType == RightOuterJoin {
|
|
innerTable, outerTable = outerTable, innerTable
|
|
}
|
|
lenJoinConds := len(p.EqualConditions) + len(p.LeftConditions) + len(p.RightConditions) + len(p.OtherConditions)
|
|
joinConds := make([]expression.Expression, 0, lenJoinConds)
|
|
for _, equalCond := range p.EqualConditions {
|
|
joinConds = append(joinConds, equalCond)
|
|
}
|
|
joinConds = append(joinConds, p.LeftConditions...)
|
|
joinConds = append(joinConds, p.RightConditions...)
|
|
joinConds = append(joinConds, p.OtherConditions...)
|
|
p.EqualConditions = nil
|
|
p.LeftConditions = nil
|
|
p.RightConditions = nil
|
|
p.OtherConditions = nil
|
|
nullSensitive := p.JoinType == AntiLeftOuterSemiJoin || p.JoinType == LeftOuterSemiJoin
|
|
joinConds, predicates = expression.PropConstOverOuterJoin(p.SCtx().GetExprCtx(), joinConds, predicates, outerTable.Schema(), innerTable.Schema(), nullSensitive)
|
|
p.AttachOnConds(joinConds)
|
|
return predicates
|
|
}
|