Files
tidb/pkg/planner/core/logical_join.go

790 lines
30 KiB
Go

// Copyright 2024 PingCAP, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package core
import (
"github.com/pingcap/tidb/pkg/expression"
"github.com/pingcap/tidb/pkg/parser/ast"
"github.com/pingcap/tidb/pkg/parser/mysql"
"github.com/pingcap/tidb/pkg/planner/core/operator/logicalop"
"github.com/pingcap/tidb/pkg/planner/funcdep"
"github.com/pingcap/tidb/pkg/planner/property"
"github.com/pingcap/tidb/pkg/planner/util"
"github.com/pingcap/tidb/pkg/types"
utilhint "github.com/pingcap/tidb/pkg/util/hint"
"github.com/pingcap/tidb/pkg/util/intset"
)
// JoinType contains CrossJoin, InnerJoin, LeftOuterJoin, RightOuterJoin, SemiJoin, AntiJoin.
type JoinType int
const (
// InnerJoin means inner join.
InnerJoin JoinType = iota
// LeftOuterJoin means left join.
LeftOuterJoin
// RightOuterJoin means right join.
RightOuterJoin
// SemiJoin means if row a in table A matches some rows in B, just output a.
SemiJoin
// AntiSemiJoin means if row a in table A does not match any row in B, then output a.
AntiSemiJoin
// LeftOuterSemiJoin means if row a in table A matches some rows in B, output (a, true), otherwise, output (a, false).
LeftOuterSemiJoin
// AntiLeftOuterSemiJoin means if row a in table A matches some rows in B, output (a, false), otherwise, output (a, true).
AntiLeftOuterSemiJoin
)
// IsOuterJoin returns if this joiner is an outer joiner
func (tp JoinType) IsOuterJoin() bool {
return tp == LeftOuterJoin || tp == RightOuterJoin ||
tp == LeftOuterSemiJoin || tp == AntiLeftOuterSemiJoin
}
// IsSemiJoin returns if this joiner is a semi/anti-semi joiner
func (tp JoinType) IsSemiJoin() bool {
return tp == SemiJoin || tp == AntiSemiJoin ||
tp == LeftOuterSemiJoin || tp == AntiLeftOuterSemiJoin
}
func (tp JoinType) String() string {
switch tp {
case InnerJoin:
return "inner join"
case LeftOuterJoin:
return "left outer join"
case RightOuterJoin:
return "right outer join"
case SemiJoin:
return "semi join"
case AntiSemiJoin:
return "anti semi join"
case LeftOuterSemiJoin:
return "left outer semi join"
case AntiLeftOuterSemiJoin:
return "anti left outer semi join"
}
return "unsupported join type"
}
// LogicalJoin is the logical join plan.
type LogicalJoin struct {
logicalop.LogicalSchemaProducer
JoinType JoinType
Reordered bool
CartesianJoin bool
StraightJoin bool
// HintInfo stores the join algorithm hint information specified by client.
HintInfo *utilhint.PlanHints
PreferJoinType uint
PreferJoinOrder bool
LeftPreferJoinType uint
RightPreferJoinType uint
EqualConditions []*expression.ScalarFunction
// NAEQConditions means null aware equal conditions, which is used for null aware semi joins.
NAEQConditions []*expression.ScalarFunction
LeftConditions expression.CNFExprs
RightConditions expression.CNFExprs
OtherConditions expression.CNFExprs
LeftProperties [][]*expression.Column
RightProperties [][]*expression.Column
// DefaultValues is only used for left/right outer join, which is values the inner row's should be when the outer table
// doesn't match any inner table's row.
// That it's nil just means the default values is a slice of NULL.
// Currently, only `aggregation push down` phase will set this.
DefaultValues []types.Datum
// FullSchema contains all the columns that the Join can output. It's ordered as [outer schema..., inner schema...].
// This is useful for natural joins and "using" joins. In these cases, the join key columns from the
// inner side (or the right side when it's an inner join) will not be in the schema of Join.
// But upper operators should be able to find those "redundant" columns, and the user also can specifically select
// those columns, so we put the "redundant" columns here to make them be able to be found.
//
// For example:
// create table t1(a int, b int); create table t2(a int, b int);
// select * from t1 join t2 using (b);
// schema of the Join will be [t1.b, t1.a, t2.a]; FullSchema will be [t1.a, t1.b, t2.a, t2.b].
//
// We record all columns and keep them ordered is for correctly handling SQLs like
// select t1.*, t2.* from t1 join t2 using (b);
// (*PlanBuilder).unfoldWildStar() handles the schema for such case.
FullSchema *expression.Schema
FullNames types.NameSlice
// EqualCondOutCnt indicates the estimated count of joined rows after evaluating `EqualConditions`.
EqualCondOutCnt float64
}
func (p *LogicalJoin) isNAAJ() bool {
return len(p.NAEQConditions) > 0
}
// Shallow shallow copies a LogicalJoin struct.
func (p *LogicalJoin) Shallow() *LogicalJoin {
join := *p
return join.Init(p.SCtx(), p.QueryBlockOffset())
}
// ExtractFD implements the interface LogicalPlan.
func (p *LogicalJoin) ExtractFD() *funcdep.FDSet {
switch p.JoinType {
case InnerJoin:
return p.extractFDForInnerJoin(nil)
case LeftOuterJoin, RightOuterJoin:
return p.extractFDForOuterJoin(nil)
case SemiJoin:
return p.extractFDForSemiJoin(nil)
default:
return &funcdep.FDSet{HashCodeToUniqueID: make(map[string]int)}
}
}
func (p *LogicalJoin) extractFDForSemiJoin(filtersFromApply []expression.Expression) *funcdep.FDSet {
// 1: since semi join will keep the part or all rows of the outer table, it's outer FD can be saved.
// 2: the un-projected column will be left for the upper layer projection or already be pruned from bottom up.
outerFD, _ := p.Children()[0].ExtractFD(), p.Children()[1].ExtractFD()
fds := outerFD
eqCondSlice := expression.ScalarFuncs2Exprs(p.EqualConditions)
allConds := append(eqCondSlice, p.OtherConditions...)
allConds = append(allConds, filtersFromApply...)
notNullColsFromFilters := ExtractNotNullFromConds(allConds, p)
constUniqueIDs := ExtractConstantCols(p.LeftConditions, p.SCtx(), fds)
fds.MakeNotNull(notNullColsFromFilters)
fds.AddConstants(constUniqueIDs)
p.SetFDs(fds)
return fds
}
func (p *LogicalJoin) extractFDForInnerJoin(filtersFromApply []expression.Expression) *funcdep.FDSet {
leftFD, rightFD := p.Children()[0].ExtractFD(), p.Children()[1].ExtractFD()
fds := leftFD
fds.MakeCartesianProduct(rightFD)
eqCondSlice := expression.ScalarFuncs2Exprs(p.EqualConditions)
// some join eq conditions are stored in the OtherConditions.
allConds := append(eqCondSlice, p.OtherConditions...)
allConds = append(allConds, filtersFromApply...)
notNullColsFromFilters := ExtractNotNullFromConds(allConds, p)
constUniqueIDs := ExtractConstantCols(allConds, p.SCtx(), fds)
equivUniqueIDs := ExtractEquivalenceCols(allConds, p.SCtx(), fds)
fds.MakeNotNull(notNullColsFromFilters)
fds.AddConstants(constUniqueIDs)
for _, equiv := range equivUniqueIDs {
fds.AddEquivalence(equiv[0], equiv[1])
}
// merge the not-null-cols/registered-map from both side together.
fds.NotNullCols.UnionWith(rightFD.NotNullCols)
if fds.HashCodeToUniqueID == nil {
fds.HashCodeToUniqueID = rightFD.HashCodeToUniqueID
} else {
for k, v := range rightFD.HashCodeToUniqueID {
// If there's same constant in the different subquery, we might go into this IF branch.
if _, ok := fds.HashCodeToUniqueID[k]; ok {
continue
}
fds.HashCodeToUniqueID[k] = v
}
}
for i, ok := rightFD.GroupByCols.Next(0); ok; i, ok = rightFD.GroupByCols.Next(i + 1) {
fds.GroupByCols.Insert(i)
}
fds.HasAggBuilt = fds.HasAggBuilt || rightFD.HasAggBuilt
p.SetFDs(fds)
return fds
}
func (p *LogicalJoin) extractFDForOuterJoin(filtersFromApply []expression.Expression) *funcdep.FDSet {
outerFD, innerFD := p.Children()[0].ExtractFD(), p.Children()[1].ExtractFD()
innerCondition := p.RightConditions
outerCondition := p.LeftConditions
outerCols, innerCols := intset.NewFastIntSet(), intset.NewFastIntSet()
for _, col := range p.Children()[0].Schema().Columns {
outerCols.Insert(int(col.UniqueID))
}
for _, col := range p.Children()[1].Schema().Columns {
innerCols.Insert(int(col.UniqueID))
}
if p.JoinType == RightOuterJoin {
innerFD, outerFD = outerFD, innerFD
innerCondition = p.LeftConditions
outerCondition = p.RightConditions
innerCols, outerCols = outerCols, innerCols
}
eqCondSlice := expression.ScalarFuncs2Exprs(p.EqualConditions)
allConds := append(eqCondSlice, p.OtherConditions...)
allConds = append(allConds, innerCondition...)
allConds = append(allConds, outerCondition...)
allConds = append(allConds, filtersFromApply...)
notNullColsFromFilters := ExtractNotNullFromConds(allConds, p)
filterFD := &funcdep.FDSet{HashCodeToUniqueID: make(map[string]int)}
constUniqueIDs := ExtractConstantCols(allConds, p.SCtx(), filterFD)
equivUniqueIDs := ExtractEquivalenceCols(allConds, p.SCtx(), filterFD)
filterFD.AddConstants(constUniqueIDs)
equivOuterUniqueIDs := intset.NewFastIntSet()
equivAcrossNum := 0
for _, equiv := range equivUniqueIDs {
filterFD.AddEquivalence(equiv[0], equiv[1])
if equiv[0].SubsetOf(outerCols) && equiv[1].SubsetOf(innerCols) {
equivOuterUniqueIDs.UnionWith(equiv[0])
equivAcrossNum++
continue
}
if equiv[0].SubsetOf(innerCols) && equiv[1].SubsetOf(outerCols) {
equivOuterUniqueIDs.UnionWith(equiv[1])
equivAcrossNum++
}
}
filterFD.MakeNotNull(notNullColsFromFilters)
// pre-perceive the filters for the convenience judgement of 3.3.1.
var opt funcdep.ArgOpts
if equivAcrossNum > 0 {
// find the equivalence FD across left and right cols.
var outConditionCols []*expression.Column
if len(outerCondition) != 0 {
outConditionCols = append(outConditionCols, expression.ExtractColumnsFromExpressions(nil, outerCondition, nil)...)
}
if len(p.OtherConditions) != 0 {
// other condition may contain right side cols, it doesn't affect the judgement of intersection of non-left-equiv cols.
outConditionCols = append(outConditionCols, expression.ExtractColumnsFromExpressions(nil, p.OtherConditions, nil)...)
}
outerConditionUniqueIDs := intset.NewFastIntSet()
for _, col := range outConditionCols {
outerConditionUniqueIDs.Insert(int(col.UniqueID))
}
// judge whether left filters is on non-left-equiv cols.
if outerConditionUniqueIDs.Intersects(outerCols.Difference(equivOuterUniqueIDs)) {
opt.SkipFDRule331 = true
}
} else {
// if there is none across equivalence condition, skip rule 3.3.1.
opt.SkipFDRule331 = true
}
opt.OnlyInnerFilter = len(eqCondSlice) == 0 && len(outerCondition) == 0 && len(p.OtherConditions) == 0
if opt.OnlyInnerFilter {
// if one of the inner condition is constant false, the inner side are all null, left make constant all of that.
for _, one := range innerCondition {
if c, ok := one.(*expression.Constant); ok && c.DeferredExpr == nil && c.ParamMarker == nil {
if isTrue, err := c.Value.ToBool(p.SCtx().GetSessionVars().StmtCtx.TypeCtx()); err == nil {
if isTrue == 0 {
// c is false
opt.InnerIsFalse = true
}
}
}
}
}
fds := outerFD
fds.MakeOuterJoin(innerFD, filterFD, outerCols, innerCols, &opt)
p.SetFDs(fds)
return fds
}
// GetJoinKeys extracts join keys(columns) from EqualConditions. It returns left join keys, right
// join keys and an `isNullEQ` array which means the `joinKey[i]` is a `NullEQ` function. The `hasNullEQ`
// means whether there is a `NullEQ` of a join key.
func (p *LogicalJoin) GetJoinKeys() (leftKeys, rightKeys []*expression.Column, isNullEQ []bool, hasNullEQ bool) {
for _, expr := range p.EqualConditions {
leftKeys = append(leftKeys, expr.GetArgs()[0].(*expression.Column))
rightKeys = append(rightKeys, expr.GetArgs()[1].(*expression.Column))
isNullEQ = append(isNullEQ, expr.FuncName.L == ast.NullEQ)
hasNullEQ = hasNullEQ || expr.FuncName.L == ast.NullEQ
}
return
}
// GetNAJoinKeys extracts join keys(columns) from NAEqualCondition.
func (p *LogicalJoin) GetNAJoinKeys() (leftKeys, rightKeys []*expression.Column) {
for _, expr := range p.NAEQConditions {
leftKeys = append(leftKeys, expr.GetArgs()[0].(*expression.Column))
rightKeys = append(rightKeys, expr.GetArgs()[1].(*expression.Column))
}
return
}
// GetPotentialPartitionKeys return potential partition keys for join, the potential partition keys are
// the join keys of EqualConditions
func (p *LogicalJoin) GetPotentialPartitionKeys() (leftKeys, rightKeys []*property.MPPPartitionColumn) {
for _, expr := range p.EqualConditions {
_, coll := expr.CharsetAndCollation()
collateID := property.GetCollateIDByNameForPartition(coll)
leftKeys = append(leftKeys, &property.MPPPartitionColumn{Col: expr.GetArgs()[0].(*expression.Column), CollateID: collateID})
rightKeys = append(rightKeys, &property.MPPPartitionColumn{Col: expr.GetArgs()[1].(*expression.Column), CollateID: collateID})
}
return
}
// decorrelate eliminate the correlated column with if the col is in schema.
func (p *LogicalJoin) decorrelate(schema *expression.Schema) {
for i, cond := range p.LeftConditions {
p.LeftConditions[i] = cond.Decorrelate(schema)
}
for i, cond := range p.RightConditions {
p.RightConditions[i] = cond.Decorrelate(schema)
}
for i, cond := range p.OtherConditions {
p.OtherConditions[i] = cond.Decorrelate(schema)
}
for i, cond := range p.EqualConditions {
p.EqualConditions[i] = cond.Decorrelate(schema).(*expression.ScalarFunction)
}
}
// columnSubstituteAll is used in projection elimination in apply de-correlation.
// Substitutions for all conditions should be successful, otherwise, we should keep all conditions unchanged.
func (p *LogicalJoin) columnSubstituteAll(schema *expression.Schema, exprs []expression.Expression) (hasFail bool) {
// make a copy of exprs for convenience of substitution (may change/partially change the expr tree)
cpLeftConditions := make(expression.CNFExprs, len(p.LeftConditions))
cpRightConditions := make(expression.CNFExprs, len(p.RightConditions))
cpOtherConditions := make(expression.CNFExprs, len(p.OtherConditions))
cpEqualConditions := make([]*expression.ScalarFunction, len(p.EqualConditions))
copy(cpLeftConditions, p.LeftConditions)
copy(cpRightConditions, p.RightConditions)
copy(cpOtherConditions, p.OtherConditions)
copy(cpEqualConditions, p.EqualConditions)
exprCtx := p.SCtx().GetExprCtx()
// try to substitute columns in these condition.
for i, cond := range cpLeftConditions {
if hasFail, cpLeftConditions[i] = expression.ColumnSubstituteAll(exprCtx, cond, schema, exprs); hasFail {
return
}
}
for i, cond := range cpRightConditions {
if hasFail, cpRightConditions[i] = expression.ColumnSubstituteAll(exprCtx, cond, schema, exprs); hasFail {
return
}
}
for i, cond := range cpOtherConditions {
if hasFail, cpOtherConditions[i] = expression.ColumnSubstituteAll(exprCtx, cond, schema, exprs); hasFail {
return
}
}
for i, cond := range cpEqualConditions {
var tmp expression.Expression
if hasFail, tmp = expression.ColumnSubstituteAll(exprCtx, cond, schema, exprs); hasFail {
return
}
cpEqualConditions[i] = tmp.(*expression.ScalarFunction)
}
// if all substituted, change them atomically here.
p.LeftConditions = cpLeftConditions
p.RightConditions = cpRightConditions
p.OtherConditions = cpOtherConditions
p.EqualConditions = cpEqualConditions
for i := len(p.EqualConditions) - 1; i >= 0; i-- {
newCond := p.EqualConditions[i]
// If the columns used in the new filter all come from the left child,
// we can push this filter to it.
if expression.ExprFromSchema(newCond, p.Children()[0].Schema()) {
p.LeftConditions = append(p.LeftConditions, newCond)
p.EqualConditions = append(p.EqualConditions[:i], p.EqualConditions[i+1:]...)
continue
}
// If the columns used in the new filter all come from the right
// child, we can push this filter to it.
if expression.ExprFromSchema(newCond, p.Children()[1].Schema()) {
p.RightConditions = append(p.RightConditions, newCond)
p.EqualConditions = append(p.EqualConditions[:i], p.EqualConditions[i+1:]...)
continue
}
_, lhsIsCol := newCond.GetArgs()[0].(*expression.Column)
_, rhsIsCol := newCond.GetArgs()[1].(*expression.Column)
// If the columns used in the new filter are not all expression.Column,
// we can not use it as join's equal condition.
if !(lhsIsCol && rhsIsCol) {
p.OtherConditions = append(p.OtherConditions, newCond)
p.EqualConditions = append(p.EqualConditions[:i], p.EqualConditions[i+1:]...)
continue
}
p.EqualConditions[i] = newCond
}
return false
}
// AttachOnConds extracts on conditions for join and set the `EqualConditions`, `LeftConditions`, `RightConditions` and
// `OtherConditions` by the result of extract.
func (p *LogicalJoin) AttachOnConds(onConds []expression.Expression) {
eq, left, right, other := p.extractOnCondition(onConds, false, false)
p.AppendJoinConds(eq, left, right, other)
}
// AppendJoinConds appends new join conditions.
func (p *LogicalJoin) AppendJoinConds(eq []*expression.ScalarFunction, left, right, other []expression.Expression) {
p.EqualConditions = append(eq, p.EqualConditions...)
p.LeftConditions = append(left, p.LeftConditions...)
p.RightConditions = append(right, p.RightConditions...)
p.OtherConditions = append(other, p.OtherConditions...)
}
// ExtractCorrelatedCols implements LogicalPlan interface.
func (p *LogicalJoin) ExtractCorrelatedCols() []*expression.CorrelatedColumn {
corCols := make([]*expression.CorrelatedColumn, 0, len(p.EqualConditions)+len(p.LeftConditions)+len(p.RightConditions)+len(p.OtherConditions))
for _, fun := range p.EqualConditions {
corCols = append(corCols, expression.ExtractCorColumns(fun)...)
}
for _, fun := range p.LeftConditions {
corCols = append(corCols, expression.ExtractCorColumns(fun)...)
}
for _, fun := range p.RightConditions {
corCols = append(corCols, expression.ExtractCorColumns(fun)...)
}
for _, fun := range p.OtherConditions {
corCols = append(corCols, expression.ExtractCorColumns(fun)...)
}
return corCols
}
// ExtractJoinKeys extract join keys as a schema for child with childIdx.
func (p *LogicalJoin) ExtractJoinKeys(childIdx int) *expression.Schema {
joinKeys := make([]*expression.Column, 0, len(p.EqualConditions))
for _, eqCond := range p.EqualConditions {
joinKeys = append(joinKeys, eqCond.GetArgs()[childIdx].(*expression.Column))
}
return expression.NewSchema(joinKeys...)
}
// PreferAny checks whether the join type prefers any of the join types specified in the joinFlags.
func (p *LogicalJoin) PreferAny(joinFlags ...uint) bool {
for _, flag := range joinFlags {
if p.PreferJoinType&flag > 0 {
return true
}
}
return false
}
// ExtractOnCondition divide conditions in CNF of join node into 4 groups.
// These conditions can be where conditions, join conditions, or collection of both.
// If deriveLeft/deriveRight is set, we would try to derive more conditions for left/right plan.
func (p *LogicalJoin) ExtractOnCondition(
conditions []expression.Expression,
leftSchema *expression.Schema,
rightSchema *expression.Schema,
deriveLeft bool,
deriveRight bool) (eqCond []*expression.ScalarFunction, leftCond []expression.Expression,
rightCond []expression.Expression, otherCond []expression.Expression) {
ctx := p.SCtx()
for _, expr := range conditions {
// For queries like `select a in (select a from s where s.b = t.b) from t`,
// if subquery is empty caused by `s.b = t.b`, the result should always be
// false even if t.a is null or s.a is null. To make this join "empty aware",
// we should differentiate `t.a = s.a` from other column equal conditions, so
// we put it into OtherConditions instead of EqualConditions of join.
if expression.IsEQCondFromIn(expr) {
otherCond = append(otherCond, expr)
continue
}
binop, ok := expr.(*expression.ScalarFunction)
if ok && len(binop.GetArgs()) == 2 {
arg0, lOK := binop.GetArgs()[0].(*expression.Column)
arg1, rOK := binop.GetArgs()[1].(*expression.Column)
if lOK && rOK {
leftCol := leftSchema.RetrieveColumn(arg0)
rightCol := rightSchema.RetrieveColumn(arg1)
if leftCol == nil || rightCol == nil {
leftCol = leftSchema.RetrieveColumn(arg1)
rightCol = rightSchema.RetrieveColumn(arg0)
arg0, arg1 = arg1, arg0
}
if leftCol != nil && rightCol != nil {
if deriveLeft {
if util.IsNullRejected(ctx, leftSchema, expr) && !mysql.HasNotNullFlag(leftCol.RetType.GetFlag()) {
notNullExpr := expression.BuildNotNullExpr(ctx.GetExprCtx(), leftCol)
leftCond = append(leftCond, notNullExpr)
}
}
if deriveRight {
if util.IsNullRejected(ctx, rightSchema, expr) && !mysql.HasNotNullFlag(rightCol.RetType.GetFlag()) {
notNullExpr := expression.BuildNotNullExpr(ctx.GetExprCtx(), rightCol)
rightCond = append(rightCond, notNullExpr)
}
}
if binop.FuncName.L == ast.EQ {
cond := expression.NewFunctionInternal(ctx.GetExprCtx(), ast.EQ, types.NewFieldType(mysql.TypeTiny), arg0, arg1)
eqCond = append(eqCond, cond.(*expression.ScalarFunction))
continue
}
}
}
}
columns := expression.ExtractColumns(expr)
// `columns` may be empty, if the condition is like `correlated_column op constant`, or `constant`,
// push this kind of constant condition down according to join type.
if len(columns) == 0 {
leftCond, rightCond = p.pushDownConstExpr(expr, leftCond, rightCond, deriveLeft || deriveRight)
continue
}
allFromLeft, allFromRight := true, true
for _, col := range columns {
if !leftSchema.Contains(col) {
allFromLeft = false
}
if !rightSchema.Contains(col) {
allFromRight = false
}
}
if allFromRight {
rightCond = append(rightCond, expr)
} else if allFromLeft {
leftCond = append(leftCond, expr)
} else {
// Relax expr to two supersets: leftRelaxedCond and rightRelaxedCond, the expression now is
// `expr AND leftRelaxedCond AND rightRelaxedCond`. Motivation is to push filters down to
// children as much as possible.
if deriveLeft {
leftRelaxedCond := expression.DeriveRelaxedFiltersFromDNF(ctx.GetExprCtx(), expr, leftSchema)
if leftRelaxedCond != nil {
leftCond = append(leftCond, leftRelaxedCond)
}
}
if deriveRight {
rightRelaxedCond := expression.DeriveRelaxedFiltersFromDNF(ctx.GetExprCtx(), expr, rightSchema)
if rightRelaxedCond != nil {
rightCond = append(rightCond, rightRelaxedCond)
}
}
otherCond = append(otherCond, expr)
}
}
return
}
// pushDownConstExpr checks if the condition is from filter condition, if true, push it down to both
// children of join, whatever the join type is; if false, push it down to inner child of outer join,
// and both children of non-outer-join.
func (p *LogicalJoin) pushDownConstExpr(expr expression.Expression, leftCond []expression.Expression,
rightCond []expression.Expression, filterCond bool) ([]expression.Expression, []expression.Expression) {
switch p.JoinType {
case LeftOuterJoin, LeftOuterSemiJoin, AntiLeftOuterSemiJoin:
if filterCond {
leftCond = append(leftCond, expr)
// Append the expr to right join condition instead of `rightCond`, to make it able to be
// pushed down to children of join.
p.RightConditions = append(p.RightConditions, expr)
} else {
rightCond = append(rightCond, expr)
}
case RightOuterJoin:
if filterCond {
rightCond = append(rightCond, expr)
p.LeftConditions = append(p.LeftConditions, expr)
} else {
leftCond = append(leftCond, expr)
}
case SemiJoin, InnerJoin:
leftCond = append(leftCond, expr)
rightCond = append(rightCond, expr)
case AntiSemiJoin:
if filterCond {
leftCond = append(leftCond, expr)
}
rightCond = append(rightCond, expr)
}
return leftCond, rightCond
}
func (p *LogicalJoin) extractOnCondition(conditions []expression.Expression, deriveLeft bool,
deriveRight bool) (eqCond []*expression.ScalarFunction, leftCond []expression.Expression,
rightCond []expression.Expression, otherCond []expression.Expression) {
return p.ExtractOnCondition(conditions, p.Children()[0].Schema(), p.Children()[1].Schema(), deriveLeft, deriveRight)
}
// SetPreferredJoinTypeAndOrder sets the preferred join type and order for the LogicalJoin.
func (p *LogicalJoin) SetPreferredJoinTypeAndOrder(hintInfo *utilhint.PlanHints) {
if hintInfo == nil {
return
}
lhsAlias := extractTableAlias(p.Children()[0], p.QueryBlockOffset())
rhsAlias := extractTableAlias(p.Children()[1], p.QueryBlockOffset())
if hintInfo.IfPreferMergeJoin(lhsAlias) {
p.PreferJoinType |= utilhint.PreferMergeJoin
p.LeftPreferJoinType |= utilhint.PreferMergeJoin
}
if hintInfo.IfPreferMergeJoin(rhsAlias) {
p.PreferJoinType |= utilhint.PreferMergeJoin
p.RightPreferJoinType |= utilhint.PreferMergeJoin
}
if hintInfo.IfPreferNoMergeJoin(lhsAlias) {
p.PreferJoinType |= utilhint.PreferNoMergeJoin
p.LeftPreferJoinType |= utilhint.PreferNoMergeJoin
}
if hintInfo.IfPreferNoMergeJoin(rhsAlias) {
p.PreferJoinType |= utilhint.PreferNoMergeJoin
p.RightPreferJoinType |= utilhint.PreferNoMergeJoin
}
if hintInfo.IfPreferBroadcastJoin(lhsAlias) {
p.PreferJoinType |= utilhint.PreferBCJoin
p.LeftPreferJoinType |= utilhint.PreferBCJoin
}
if hintInfo.IfPreferBroadcastJoin(rhsAlias) {
p.PreferJoinType |= utilhint.PreferBCJoin
p.RightPreferJoinType |= utilhint.PreferBCJoin
}
if hintInfo.IfPreferShuffleJoin(lhsAlias) {
p.PreferJoinType |= utilhint.PreferShuffleJoin
p.LeftPreferJoinType |= utilhint.PreferShuffleJoin
}
if hintInfo.IfPreferShuffleJoin(rhsAlias) {
p.PreferJoinType |= utilhint.PreferShuffleJoin
p.RightPreferJoinType |= utilhint.PreferShuffleJoin
}
if hintInfo.IfPreferHashJoin(lhsAlias) {
p.PreferJoinType |= utilhint.PreferHashJoin
p.LeftPreferJoinType |= utilhint.PreferHashJoin
}
if hintInfo.IfPreferHashJoin(rhsAlias) {
p.PreferJoinType |= utilhint.PreferHashJoin
p.RightPreferJoinType |= utilhint.PreferHashJoin
}
if hintInfo.IfPreferNoHashJoin(lhsAlias) {
p.PreferJoinType |= utilhint.PreferNoHashJoin
p.LeftPreferJoinType |= utilhint.PreferNoHashJoin
}
if hintInfo.IfPreferNoHashJoin(rhsAlias) {
p.PreferJoinType |= utilhint.PreferNoHashJoin
p.RightPreferJoinType |= utilhint.PreferNoHashJoin
}
if hintInfo.IfPreferINLJ(lhsAlias) {
p.PreferJoinType |= utilhint.PreferLeftAsINLJInner
p.LeftPreferJoinType |= utilhint.PreferINLJ
}
if hintInfo.IfPreferINLJ(rhsAlias) {
p.PreferJoinType |= utilhint.PreferRightAsINLJInner
p.RightPreferJoinType |= utilhint.PreferINLJ
}
if hintInfo.IfPreferINLHJ(lhsAlias) {
p.PreferJoinType |= utilhint.PreferLeftAsINLHJInner
p.LeftPreferJoinType |= utilhint.PreferINLHJ
}
if hintInfo.IfPreferINLHJ(rhsAlias) {
p.PreferJoinType |= utilhint.PreferRightAsINLHJInner
p.RightPreferJoinType |= utilhint.PreferINLHJ
}
if hintInfo.IfPreferINLMJ(lhsAlias) {
p.PreferJoinType |= utilhint.PreferLeftAsINLMJInner
p.LeftPreferJoinType |= utilhint.PreferINLMJ
}
if hintInfo.IfPreferINLMJ(rhsAlias) {
p.PreferJoinType |= utilhint.PreferRightAsINLMJInner
p.RightPreferJoinType |= utilhint.PreferINLMJ
}
if hintInfo.IfPreferNoIndexJoin(lhsAlias) {
p.PreferJoinType |= utilhint.PreferNoIndexJoin
p.LeftPreferJoinType |= utilhint.PreferNoIndexJoin
}
if hintInfo.IfPreferNoIndexJoin(rhsAlias) {
p.PreferJoinType |= utilhint.PreferNoIndexJoin
p.RightPreferJoinType |= utilhint.PreferNoIndexJoin
}
if hintInfo.IfPreferNoIndexHashJoin(lhsAlias) {
p.PreferJoinType |= utilhint.PreferNoIndexHashJoin
p.LeftPreferJoinType |= utilhint.PreferNoIndexHashJoin
}
if hintInfo.IfPreferNoIndexHashJoin(rhsAlias) {
p.PreferJoinType |= utilhint.PreferNoIndexHashJoin
p.RightPreferJoinType |= utilhint.PreferNoIndexHashJoin
}
if hintInfo.IfPreferNoIndexMergeJoin(lhsAlias) {
p.PreferJoinType |= utilhint.PreferNoIndexMergeJoin
p.LeftPreferJoinType |= utilhint.PreferNoIndexMergeJoin
}
if hintInfo.IfPreferNoIndexMergeJoin(rhsAlias) {
p.PreferJoinType |= utilhint.PreferNoIndexMergeJoin
p.RightPreferJoinType |= utilhint.PreferNoIndexMergeJoin
}
if hintInfo.IfPreferHJBuild(lhsAlias) {
p.PreferJoinType |= utilhint.PreferLeftAsHJBuild
p.LeftPreferJoinType |= utilhint.PreferHJBuild
}
if hintInfo.IfPreferHJBuild(rhsAlias) {
p.PreferJoinType |= utilhint.PreferRightAsHJBuild
p.RightPreferJoinType |= utilhint.PreferHJBuild
}
if hintInfo.IfPreferHJProbe(lhsAlias) {
p.PreferJoinType |= utilhint.PreferLeftAsHJProbe
p.LeftPreferJoinType |= utilhint.PreferHJProbe
}
if hintInfo.IfPreferHJProbe(rhsAlias) {
p.PreferJoinType |= utilhint.PreferRightAsHJProbe
p.RightPreferJoinType |= utilhint.PreferHJProbe
}
hasConflict := false
if !p.SCtx().GetSessionVars().EnableAdvancedJoinHint || p.SCtx().GetSessionVars().StmtCtx.StraightJoinOrder {
if containDifferentJoinTypes(p.PreferJoinType) {
hasConflict = true
}
} else if p.SCtx().GetSessionVars().EnableAdvancedJoinHint {
if containDifferentJoinTypes(p.LeftPreferJoinType) || containDifferentJoinTypes(p.RightPreferJoinType) {
hasConflict = true
}
}
if hasConflict {
p.SCtx().GetSessionVars().StmtCtx.SetHintWarning(
"Join hints are conflict, you can only specify one type of join")
p.PreferJoinType = 0
}
// set the join order
if hintInfo.LeadingJoinOrder != nil {
p.PreferJoinOrder = hintInfo.MatchTableName([]*utilhint.HintedTable{lhsAlias, rhsAlias}, hintInfo.LeadingJoinOrder)
}
// set hintInfo for further usage if this hint info can be used.
if p.PreferJoinType != 0 || p.PreferJoinOrder {
p.HintInfo = hintInfo
}
}
// SetPreferredJoinType generates hint information for the logicalJoin based on the hint information of its left and right children.
func (p *LogicalJoin) SetPreferredJoinType() {
if p.LeftPreferJoinType == 0 && p.RightPreferJoinType == 0 {
return
}
p.PreferJoinType = setPreferredJoinTypeFromOneSide(p.LeftPreferJoinType, true) | setPreferredJoinTypeFromOneSide(p.RightPreferJoinType, false)
if containDifferentJoinTypes(p.PreferJoinType) {
p.SCtx().GetSessionVars().StmtCtx.SetHintWarning(
"Join hints conflict after join reorder phase, you can only specify one type of join")
p.PreferJoinType = 0
}
}