Files
tidb/pkg/planner/core/index_join_path.go

828 lines
34 KiB
Go

// Copyright 2024 PingCAP, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package core
import (
"bytes"
"fmt"
"math"
"strings"
"github.com/pingcap/errors"
"github.com/pingcap/tidb/pkg/expression"
"github.com/pingcap/tidb/pkg/parser/ast"
"github.com/pingcap/tidb/pkg/parser/mysql"
"github.com/pingcap/tidb/pkg/planner/cardinality"
"github.com/pingcap/tidb/pkg/planner/core/base"
"github.com/pingcap/tidb/pkg/planner/core/operator/logicalop"
"github.com/pingcap/tidb/pkg/planner/core/operator/physicalop"
"github.com/pingcap/tidb/pkg/planner/planctx"
"github.com/pingcap/tidb/pkg/planner/property"
"github.com/pingcap/tidb/pkg/planner/util"
"github.com/pingcap/tidb/pkg/statistics"
"github.com/pingcap/tidb/pkg/types"
"github.com/pingcap/tidb/pkg/util/collate"
"github.com/pingcap/tidb/pkg/util/logutil"
"github.com/pingcap/tidb/pkg/util/ranger"
"go.uber.org/zap"
)
// mutableIndexJoinRange is designed for IndexJoin plan in the Plan Cache.
// Before reusing a cached IndexJoin plan from the Plan Cache, we have to
// first update the ranges of the IndexJoin plan according to the current parameters.
// mutableIndexJoinRanges stores all necessary information to rebuild the ranges of an IndexJoin plan.
type mutableIndexJoinRange struct {
ranges ranger.Ranges
rangeInfo string
indexJoinInfo *indexJoinPathInfo // read-only
path *util.AccessPath // read-only
}
func (mr *mutableIndexJoinRange) CloneForPlanCache() ranger.MutableRanges {
cloned := new(mutableIndexJoinRange)
if mr.ranges != nil {
cloned.ranges = mr.ranges.CloneForPlanCache().(ranger.Ranges)
}
cloned.rangeInfo = mr.rangeInfo
cloned.indexJoinInfo = mr.indexJoinInfo
cloned.path = mr.path
return cloned
}
func (mr *mutableIndexJoinRange) Range() ranger.Ranges {
return mr.ranges
}
func (mr *mutableIndexJoinRange) Rebuild(sctx planctx.PlanContext) error {
result, empty, err := indexJoinPathBuild(sctx, mr.path, mr.indexJoinInfo, true)
if err != nil {
return err
}
if empty { // empty ranges are dangerous for plan-cache, it's better to optimize the whole plan again in this case
return errors.New("failed to rebuild range: empty range")
}
newRanges := result.chosenRanges.Range()
if len(mr.ranges) != len(newRanges) || (len(mr.ranges) > 0 && mr.ranges[0].Width() != newRanges[0].Width()) {
// some access conditions cannot be used to calculate the range after parameters change, return an error in this case for safety.
return errors.New("failed to rebuild range: range width changed")
}
mr.rangeInfo = indexJoinPathRangeInfo(sctx, mr.indexJoinInfo.outerJoinKeys, result)
mr.ranges = result.chosenRanges.Range()
return nil
}
// indexJoinPathResult records necessary information if we build IndexJoin on this chosen access path (or index).
type indexJoinPathResult struct {
chosenPath *util.AccessPath // the chosen access path (or index)
candidate *candidatePath // the candidate representation of the chosen path, used for SkylinePruning
chosenAccess []expression.Expression // expressions used to access this index
chosenRemained []expression.Expression // remaining expressions after accessing this index
chosenRanges ranger.MutableRanges // the ranges used to access this index
usedColsLen int // the number of columns used on this index, `t1.a=t2.a and t1.b=t2.b` can use 2 columns of index t1(a, b, c)
usedColsNDV float64 // the estimated NDV of the used columns on this index, the NDV of `t1(a, b)`
idxOff2KeyOff []int
lastColManager *physicalop.ColWithCmpFuncManager
}
// indexJoinPathInfo records necessary information to build IndexJoin.
type indexJoinPathInfo struct {
joinOtherConditions []expression.Expression
outerJoinKeys []*expression.Column
innerJoinKeys []*expression.Column
innerSchema *expression.Schema
innerPushedConditions []expression.Expression
innerTableStats *property.StatsInfo // the original table's stats
}
// indexJoinPathTmp records temporary information when building IndexJoin.
type indexJoinPathTmp struct {
curPossibleUsedKeys []*expression.Column
curNotUsedIndexCols []*expression.Column
curNotUsedColLens []int
// Store the corresponding innerKeys offset of each column in the current path, reset by "resetContextForIndex()"
curIdxOff2KeyOff []int
}
type indexJoinTmpRange struct {
ranges ranger.Ranges
emptyRange bool
keyCntInRange int
eqAndInCntInRange int
nextColInRange bool
extraColInRange bool
err error
}
// indexJoinPathNewMutableRange creates a mutableIndexJoinRange.
// See more info in the comments of mutableIndexJoinRange.
func indexJoinPathNewMutableRange(
sctx planctx.PlanContext,
indexJoinInfo *indexJoinPathInfo,
relatedExprs []expression.Expression,
ranges []*ranger.Range,
path *util.AccessPath) ranger.MutableRanges {
// if the plan-cache is enabled and these ranges depend on some parameters, we have to rebuild these ranges after changing parameters
if expression.MaybeOverOptimized4PlanCache(sctx.GetExprCtx(), relatedExprs...) {
// assume that path, innerKeys and outerKeys will not be modified in the follow-up process
return &mutableIndexJoinRange{
ranges: ranges,
indexJoinInfo: indexJoinInfo,
path: path,
}
}
return ranger.Ranges(ranges)
}
func indexJoinPathUpdateTmpRange(
sctx planctx.PlanContext,
buildTmp *indexJoinPathTmp,
tempRangeRes *indexJoinTmpRange,
accesses, remained []expression.Expression) (lastColPos int, newAccesses, newRemained []expression.Expression) {
lastColPos = tempRangeRes.keyCntInRange + tempRangeRes.eqAndInCntInRange
buildTmp.curPossibleUsedKeys = buildTmp.curPossibleUsedKeys[:tempRangeRes.keyCntInRange]
for i := lastColPos; i < len(buildTmp.curIdxOff2KeyOff); i++ {
buildTmp.curIdxOff2KeyOff[i] = -1
}
newAccesses = accesses[:tempRangeRes.eqAndInCntInRange]
newRemained = ranger.AppendConditionsIfNotExist(sctx.GetExprCtx().GetEvalCtx(),
remained, accesses[tempRangeRes.eqAndInCntInRange:])
return
}
// indexJoinPathBuild tries to build an index join on this specified access path.
// The result is recorded in the *indexJoinPathResult, see more info in that structure.
func indexJoinPathBuild(sctx planctx.PlanContext,
path *util.AccessPath,
indexJoinInfo *indexJoinPathInfo,
rebuildMode bool) (result *indexJoinPathResult, emptyRange bool, err error) {
if len(path.IdxCols) == 0 {
return nil, false, nil
}
accesses := make([]expression.Expression, 0, len(path.IdxCols))
buildTmp := indexJoinPathTmpInit(sctx, indexJoinInfo, path.IdxCols, path.IdxColLens)
notKeyEqAndIn, remained, rangeFilterCandidates, emptyRange := indexJoinPathFindUsefulEQIn(sctx, indexJoinInfo, buildTmp)
if emptyRange {
return nil, true, nil
}
var remainedEqAndIn []expression.Expression
notKeyEqAndIn, remainedEqAndIn = indexJoinPathRemoveUselessEQIn(buildTmp, path.IdxCols, notKeyEqAndIn)
matchedKeyCnt := len(buildTmp.curPossibleUsedKeys)
// If no join key is matched while join keys actually are not empty. We don't choose index join for now.
if matchedKeyCnt <= 0 && len(indexJoinInfo.innerJoinKeys) > 0 {
return nil, false, nil
}
accesses = append(accesses, notKeyEqAndIn...)
remained = ranger.AppendConditionsIfNotExist(sctx.GetExprCtx().GetEvalCtx(), remained, remainedEqAndIn)
lastColPos := matchedKeyCnt + len(notKeyEqAndIn)
// There should be some equal conditions. But we don't need that there must be some join key in accesses here.
// A more strict check is applied later.
if lastColPos <= 0 {
return nil, false, nil
}
rangeMaxSize := sctx.GetSessionVars().RangeMaxSize
if rebuildMode {
// When rebuilding ranges for plan cache, we don't restrict range mem limit.
rangeMaxSize = 0
}
// If all the index columns are covered by eq/in conditions, we don't need to consider other conditions anymore.
if lastColPos == len(path.IdxCols) {
// If there's no join key matching index column, then choosing hash join is always a better idea.
// e.g. select * from t1, t2 where t2.a=1 and t2.b=1. And t2 has index(a, b).
// If we don't have the following check, TiDB will build index join for this case.
if matchedKeyCnt <= 0 {
return nil, false, nil
}
remained = append(remained, rangeFilterCandidates...)
tempRangeRes := indexJoinPathBuildTmpRange(sctx, buildTmp, matchedKeyCnt, notKeyEqAndIn, nil, false, rangeMaxSize)
if tempRangeRes.err != nil || tempRangeRes.emptyRange || tempRangeRes.keyCntInRange <= 0 {
return nil, tempRangeRes.emptyRange, tempRangeRes.err
}
lastColPos, accesses, remained = indexJoinPathUpdateTmpRange(sctx, buildTmp, tempRangeRes, accesses, remained)
mutableRange := indexJoinPathNewMutableRange(sctx, indexJoinInfo, accesses, tempRangeRes.ranges, path)
ret := indexJoinPathConstructResult(sctx, indexJoinInfo, buildTmp, mutableRange, path, accesses, remained, nil, lastColPos)
return ret, false, nil
}
lastPossibleCol := path.IdxCols[lastColPos]
lastColManager := &physicalop.ColWithCmpFuncManager{
TargetCol: lastPossibleCol,
ColLength: path.IdxColLens[lastColPos],
AffectedColSchema: expression.NewSchema(),
}
lastColAccess := indexJoinPathBuildColManager(indexJoinInfo, lastPossibleCol, lastColManager)
// If the column manager holds no expression, then we fallback to find whether there're useful normal filters
if len(lastColAccess) == 0 {
// If there's no join key matching index column, then choosing hash join is always a better idea.
// e.g. select * from t1, t2 where t2.a=1 and t2.b=1 and t2.c > 10 and t2.c < 20. And t2 has index(a, b, c).
// If we don't have the following check, TiDB will build index join for this case.
if matchedKeyCnt <= 0 {
return nil, false, nil
}
colAccesses, colRemained := ranger.DetachCondsForColumn(sctx.GetRangerCtx(), rangeFilterCandidates, lastPossibleCol)
var nextColRange []*ranger.Range
var err error
if len(colAccesses) > 0 {
var colRemained2 []expression.Expression
nextColRange, colAccesses, colRemained2, err = ranger.BuildColumnRange(colAccesses, sctx.GetRangerCtx(), lastPossibleCol.RetType, path.IdxColLens[lastColPos], rangeMaxSize)
if err != nil {
return nil, false, err
}
if len(colRemained2) > 0 {
colRemained = append(colRemained, colRemained2...)
nextColRange = nil
}
}
tempRangeRes := indexJoinPathBuildTmpRange(sctx, buildTmp, matchedKeyCnt, notKeyEqAndIn, nextColRange, false, rangeMaxSize)
if tempRangeRes.err != nil || tempRangeRes.emptyRange || tempRangeRes.keyCntInRange <= 0 {
return nil, tempRangeRes.emptyRange, tempRangeRes.err
}
lastColPos, accesses, remained = indexJoinPathUpdateTmpRange(sctx, buildTmp, tempRangeRes, accesses, remained)
// update accesses and remained by colAccesses and colRemained.
remained = append(remained, colRemained...)
if tempRangeRes.nextColInRange {
if path.IdxColLens[lastColPos] != types.UnspecifiedLength {
remained = append(remained, colAccesses...)
}
accesses = append(accesses, colAccesses...)
lastColPos = lastColPos + 1
} else {
remained = append(remained, colAccesses...)
}
mutableRange := indexJoinPathNewMutableRange(sctx, indexJoinInfo, accesses, tempRangeRes.ranges, path)
ret := indexJoinPathConstructResult(sctx, indexJoinInfo, buildTmp, mutableRange, path, accesses, remained, nil, lastColPos)
return ret, false, nil
}
tempRangeRes := indexJoinPathBuildTmpRange(sctx, buildTmp, matchedKeyCnt, notKeyEqAndIn, nil, true, rangeMaxSize)
if tempRangeRes.err != nil || tempRangeRes.emptyRange {
return nil, tempRangeRes.emptyRange, tempRangeRes.err
}
lastColPos, accesses, remained = indexJoinPathUpdateTmpRange(sctx, buildTmp, tempRangeRes, accesses, remained)
remained = append(remained, rangeFilterCandidates...)
if tempRangeRes.extraColInRange {
accesses = append(accesses, lastColAccess...)
lastColPos = lastColPos + 1
} else {
if tempRangeRes.keyCntInRange <= 0 {
return nil, false, nil
}
lastColManager = nil
}
mutableRange := indexJoinPathNewMutableRange(sctx, indexJoinInfo, accesses, tempRangeRes.ranges, path)
ret := indexJoinPathConstructResult(sctx, indexJoinInfo, buildTmp, mutableRange, path, accesses, remained, lastColManager, lastColPos)
return ret, false, nil
}
// indexJoinPathCompare compares these 2 index join paths and returns whether the current is better than the base.
func indexJoinPathCompare(ds *logicalop.DataSource, best, current *indexJoinPathResult) (curIsBetter bool) {
// Notice that there may be the cases like `t1.a = t2.a and b > 2 and b < 1`, so ranges can be nil though the conditions are valid.
// Obviously when the range is nil, we don't need index join.
if current == nil || len(current.chosenRanges.Range()) == 0 {
return false
}
if best == nil {
return true
}
// reuse Skyline pruning to compare the index join paths.
prop := &property.PhysicalProperty{ExpectedCnt: math.MaxFloat64} // default property without any requirement
preferRange := ds.SCtx().GetSessionVars().GetAllowPreferRangeScan()
cmpResult, _ := compareCandidates(ds.SCtx(), ds.StatisticTable, prop, current.candidate, best.candidate, preferRange)
if cmpResult == 1 {
return true
} else if cmpResult == -1 {
return false
}
// cmpResult == 0, go on to use NDV to compare.
// We choose the index by the NDV of the used columns, the larger the better.
// If NDVs are same, we choose index which uses more columns.
// Note that these 2 heuristic rules are too simple to cover all cases,
// since the NDV of outer join keys are not considered, and the detached access conditions
// may contain expressions like `t1.a > t2.a`. It's pretty hard to evaluate the join selectivity
// of these non-column-equal conditions, so I prefer to keep these heuristic rules simple at least for now.
if current.usedColsNDV < best.usedColsNDV || (current.usedColsNDV == best.usedColsNDV && current.usedColsLen <= best.usedColsLen) {
return false
}
return true
}
// indexJoinPathConstructResult constructs the index join path result.
func indexJoinPathConstructResult(
sctx planctx.PlanContext,
indexJoinInfo *indexJoinPathInfo,
buildTmp *indexJoinPathTmp,
ranges ranger.MutableRanges,
path *util.AccessPath, accesses,
remained []expression.Expression,
lastColManager *physicalop.ColWithCmpFuncManager,
usedColsLen int) *indexJoinPathResult {
var innerNDV float64
if stats := indexJoinInfo.innerTableStats; stats != nil && stats.StatsVersion != statistics.PseudoVersion {
// NOTE: use the original table's stats (DataSource.TableStats) to estimate NDV instead of stats
// AFTER APPLYING ALL FILTERS (DataSource.stats). Because here we'll use the NDV to measure how many
// rows we need to scan in double-read, and these filters will be applied after the scanning, so we need to
// ignore these filters to avoid underestimation. See #63869.
innerNDV, _ = cardinality.EstimateColsNDVWithMatchedLen(
sctx, path.IdxCols[:usedColsLen], indexJoinInfo.innerSchema, stats)
}
idxOff2KeyOff := make([]int, len(buildTmp.curIdxOff2KeyOff))
copy(idxOff2KeyOff, buildTmp.curIdxOff2KeyOff)
return &indexJoinPathResult{
chosenPath: path,
candidate: getIndexCandidateForIndexJoin(sctx, path, usedColsLen),
usedColsLen: len(ranges.Range()[0].LowVal),
usedColsNDV: innerNDV,
chosenRanges: ranges,
chosenAccess: accesses,
chosenRemained: remained,
idxOff2KeyOff: idxOff2KeyOff,
lastColManager: lastColManager,
}
}
func indexJoinPathBuildTmpRange(
sctx planctx.PlanContext,
buildTmp *indexJoinPathTmp,
matchedKeyCnt int,
eqAndInFuncs []expression.Expression,
nextColRange []*ranger.Range,
haveExtraCol bool,
rangeMaxSize int64) (res *indexJoinTmpRange) {
res = &indexJoinTmpRange{}
sc := sctx.GetSessionVars().StmtCtx
defer func() {
if sc.MemTracker != nil && res != nil && len(res.ranges) > 0 {
sc.MemTracker.Consume(2 * types.EstimatedMemUsage(res.ranges[0].LowVal, len(res.ranges)))
}
}()
pointLength := matchedKeyCnt + len(eqAndInFuncs)
ranges := ranger.Ranges{&ranger.Range{}}
for i, j := 0, 0; i+j < pointLength; {
if buildTmp.curIdxOff2KeyOff[i+j] != -1 {
// This position is occupied by join key.
var fallback bool
ranges, fallback = appendTailTemplateRange(ranges, rangeMaxSize)
if fallback {
sctx.GetSessionVars().StmtCtx.RecordRangeFallback(rangeMaxSize)
res.ranges = ranges
res.keyCntInRange = i
res.eqAndInCntInRange = j
return
}
i++
} else {
exprs := []expression.Expression{eqAndInFuncs[j]}
oneColumnRan, _, remained, err := ranger.BuildColumnRange(exprs, sctx.GetRangerCtx(), buildTmp.curNotUsedIndexCols[j].RetType, buildTmp.curNotUsedColLens[j], rangeMaxSize)
if err != nil {
return &indexJoinTmpRange{err: err}
}
if len(oneColumnRan) == 0 {
return &indexJoinTmpRange{emptyRange: true}
}
if sc.MemTracker != nil {
sc.MemTracker.Consume(2 * types.EstimatedMemUsage(oneColumnRan[0].LowVal, len(oneColumnRan)))
}
if len(remained) > 0 {
res.ranges = ranges
res.keyCntInRange = i
res.eqAndInCntInRange = j
return
}
var fallback bool
ranges, fallback = ranger.AppendRanges2PointRanges(ranges, oneColumnRan, rangeMaxSize)
if fallback {
sctx.GetSessionVars().StmtCtx.RecordRangeFallback(rangeMaxSize)
res.ranges = ranges
res.keyCntInRange = i
res.eqAndInCntInRange = j
return
}
j++
}
}
if len(nextColRange) > 0 {
var fallback bool
ranges, fallback = ranger.AppendRanges2PointRanges(ranges, nextColRange, rangeMaxSize)
if fallback {
sctx.GetSessionVars().StmtCtx.RecordRangeFallback(rangeMaxSize)
}
res.ranges = ranges
res.keyCntInRange = matchedKeyCnt
res.eqAndInCntInRange = len(eqAndInFuncs)
res.nextColInRange = !fallback
return
}
if haveExtraCol {
var fallback bool
ranges, fallback = appendTailTemplateRange(ranges, rangeMaxSize)
if fallback {
sctx.GetSessionVars().StmtCtx.RecordRangeFallback(rangeMaxSize)
}
res.ranges = ranges
res.keyCntInRange = matchedKeyCnt
res.eqAndInCntInRange = len(eqAndInFuncs)
res.extraColInRange = !fallback
return
}
res.ranges = ranges
res.keyCntInRange = matchedKeyCnt
res.eqAndInCntInRange = len(eqAndInFuncs)
return
}
func indexJoinIntPKRangeInfo(ectx expression.EvalContext, outerJoinKeys []*expression.Column) string {
var buffer strings.Builder
buffer.WriteString("[")
for i, key := range outerJoinKeys {
if i != 0 {
buffer.WriteString(" ")
}
buffer.WriteString(key.StringWithCtx(ectx, errors.RedactLogDisable))
}
buffer.WriteString("]")
return buffer.String()
}
// indexJoinPathRangeInfo generates the range information for the index join path.
func indexJoinPathRangeInfo(sctx planctx.PlanContext,
outerJoinKeys []*expression.Column,
indexJoinResult *indexJoinPathResult) string {
buffer := bytes.NewBufferString("[")
isFirst := true
for idxOff, keyOff := range indexJoinResult.idxOff2KeyOff {
if keyOff == -1 {
continue
}
if !isFirst {
buffer.WriteString(" ")
} else {
isFirst = false
}
fmt.Fprintf(buffer, "eq(%v, %v)", indexJoinResult.chosenPath.IdxCols[idxOff], outerJoinKeys[keyOff])
}
ectx := sctx.GetExprCtx().GetEvalCtx()
// It is to build the range info which is used in explain. It is necessary to redact the range info.
redact := ectx.GetTiDBRedactLog()
for _, access := range indexJoinResult.chosenAccess {
if !isFirst {
buffer.WriteString(" ")
} else {
isFirst = false
}
fmt.Fprintf(buffer, "%v", access.StringWithCtx(ectx, redact))
}
buffer.WriteString("]")
return buffer.String()
}
// indexJoinPathGetRangeInfoAndMaxOneRow computes the range information string and determines
// whether the index join can guarantee at most one row will be returned per probe.
// This happens when:
// 1. The chosen index is unique AND
// 2. All index columns are used for the join (usedColsLen == len(FullIdxCols)) AND
// 3. Either there are no access conditions, or the last access condition is an equality condition
//
// Parameters:
// - sctx: the plan context
// - outerJoinKeys: the outer join keys used to generate range info string
// - indexJoinResult: the index join path result containing the chosen path and access conditions
//
// Returns:
// - rangeInfo: a string representation of the range information for explain output
// - maxOneRow: true if the index join guarantees at most one row per probe
func indexJoinPathGetRangeInfoAndMaxOneRow(
sctx planctx.PlanContext,
outerJoinKeys []*expression.Column,
indexJoinResult *indexJoinPathResult) (rangeInfo string, maxOneRow bool) {
rangeInfo = indexJoinPathRangeInfo(sctx, outerJoinKeys, indexJoinResult)
maxOneRow = false
if indexJoinResult.chosenPath.Index.Unique && indexJoinResult.usedColsLen == len(indexJoinResult.chosenPath.FullIdxCols) {
l := len(indexJoinResult.chosenAccess)
if l == 0 {
maxOneRow = true
} else {
sf, ok := indexJoinResult.chosenAccess[l-1].(*expression.ScalarFunction)
maxOneRow = ok && (sf.FuncName.L == ast.EQ)
}
}
return rangeInfo, maxOneRow
}
// Reset the 'curIdxOff2KeyOff', 'curNotUsedIndexCols' and 'curNotUsedColLens' by innerKeys and idxCols
/*
For each idxCols,
If column can be found in innerKeys
save offset of innerKeys in 'curIdxOff2KeyOff'
Else,
save -1 in 'curIdxOff2KeyOff'
*/
// For example, innerKeys[t1.a, t1.sum_b, t1.c], idxCols [a, b, c]
// 'curIdxOff2KeyOff' = [0, -1, 2]
func indexJoinPathTmpInit(
sctx planctx.PlanContext,
indexJoinInfo *indexJoinPathInfo,
idxCols []*expression.Column,
colLens []int) *indexJoinPathTmp {
tmpSchema := expression.NewSchema(indexJoinInfo.innerJoinKeys...)
buildTmp := new(indexJoinPathTmp)
buildTmp.curIdxOff2KeyOff = make([]int, len(idxCols))
buildTmp.curNotUsedIndexCols = make([]*expression.Column, 0, len(idxCols))
buildTmp.curNotUsedColLens = make([]int, 0, len(idxCols))
for i, idxCol := range idxCols {
buildTmp.curIdxOff2KeyOff[i] = tmpSchema.ColumnIndex(idxCol)
if buildTmp.curIdxOff2KeyOff[i] >= 0 {
// Don't use the join columns if their collations are unmatched and the new collation is enabled.
if collate.NewCollationEnabled() && types.IsString(idxCol.RetType.GetType()) && types.IsString(indexJoinInfo.outerJoinKeys[buildTmp.curIdxOff2KeyOff[i]].RetType.GetType()) {
et, err := expression.CheckAndDeriveCollationFromExprs(sctx.GetExprCtx(), "equal", types.ETInt, idxCol, indexJoinInfo.outerJoinKeys[buildTmp.curIdxOff2KeyOff[i]])
if err != nil {
logutil.BgLogger().Error("Unexpected error happened during constructing index join", zap.Stack("stack"))
}
if !collate.CompatibleCollate(idxCol.GetStaticType().GetCollate(), et.Collation) {
buildTmp.curIdxOff2KeyOff[i] = -1
}
}
continue
}
buildTmp.curNotUsedIndexCols = append(buildTmp.curNotUsedIndexCols, idxCol)
buildTmp.curNotUsedColLens = append(buildTmp.curNotUsedColLens, colLens[i])
}
return buildTmp
}
// indexJoinPathFindUsefulEQIn analyzes the PushedDownConds held by inner child and split them to three parts.
// usefulEqOrInFilters is the continuous eq/in conditions on current unused index columns.
// remainedEqOrIn is part of usefulEqOrInFilters, which needs to be evaluated again in selection.
// remainingRangeCandidates is the other conditions for future use.
func indexJoinPathFindUsefulEQIn(sctx planctx.PlanContext, indexJoinInfo *indexJoinPathInfo,
buildTmp *indexJoinPathTmp) (usefulEqOrInFilters, remainedEqOrIn, remainingRangeCandidates []expression.Expression, emptyRange bool) {
// Extract the eq/in functions of possible join key.
// you can see the comment of ExtractEqAndInCondition to get the meaning of the second return value.
usefulEqOrInFilters, remainedEqOrIn, remainingRangeCandidates, _, emptyRange = ranger.ExtractEqAndInCondition(
sctx.GetRangerCtx(),
indexJoinInfo.innerPushedConditions,
buildTmp.curNotUsedIndexCols,
buildTmp.curNotUsedColLens,
)
return usefulEqOrInFilters, remainedEqOrIn, remainingRangeCandidates, emptyRange
}
// indexJoinPathBuildColManager analyze the `OtherConditions` of join to see whether there're some filters can be used in manager.
// The returned value is just for outputting explain information
func indexJoinPathBuildColManager(indexJoinInfo *indexJoinPathInfo,
nextCol *expression.Column, cwc *physicalop.ColWithCmpFuncManager) []expression.Expression {
var lastColAccesses []expression.Expression
loopOtherConds:
for _, filter := range indexJoinInfo.joinOtherConditions {
sf, ok := filter.(*expression.ScalarFunction)
if !ok || !(sf.FuncName.L == ast.LE || sf.FuncName.L == ast.LT || sf.FuncName.L == ast.GE || sf.FuncName.L == ast.GT) {
continue
}
var funcName string
var anotherArg expression.Expression
if lCol, ok := sf.GetArgs()[0].(*expression.Column); ok && lCol.EqualColumn(nextCol) {
anotherArg = sf.GetArgs()[1]
funcName = sf.FuncName.L
} else if rCol, ok := sf.GetArgs()[1].(*expression.Column); ok && rCol.EqualColumn(nextCol) {
anotherArg = sf.GetArgs()[0]
// The column manager always build expression in the form of col op arg1.
// So we need use the symmetric one of the current function.
funcName = symmetricOp[sf.FuncName.L]
} else {
continue
}
affectedCols := expression.ExtractColumns(anotherArg)
if len(affectedCols) == 0 {
continue
}
for _, col := range affectedCols {
if indexJoinInfo.innerSchema.Contains(col) {
continue loopOtherConds
}
}
lastColAccesses = append(lastColAccesses, sf)
cwc.AppendNewExpr(funcName, anotherArg, affectedCols)
}
return lastColAccesses
}
// indexJoinPathRemoveUselessEQIn removes the useless eq/in conditions. It's designed for the following case:
//
// t1 join t2 on t1.a=t2.a and t1.c=t2.c where t1.b > t2.b-10 and t1.b < t2.b+10 there's index(a, b, c) on t1.
// In this case the curIdxOff2KeyOff is [0 -1 1] and the notKeyEqAndIn is [].
// It's clearly that the column c cannot be used to access data. So we need to remove it and reset the IdxOff2KeyOff to
// [0 -1 -1].
// So that we can use t1.a=t2.a and t1.b > t2.b-10 and t1.b < t2.b+10 to build ranges then access data.
func indexJoinPathRemoveUselessEQIn(buildTmp *indexJoinPathTmp, idxCols []*expression.Column,
notKeyEqAndIn []expression.Expression) (usefulEqAndIn, uselessOnes []expression.Expression) {
buildTmp.curPossibleUsedKeys = make([]*expression.Column, 0, len(idxCols))
for idxColPos, notKeyColPos := 0, 0; idxColPos < len(idxCols); idxColPos++ {
if buildTmp.curIdxOff2KeyOff[idxColPos] != -1 {
buildTmp.curPossibleUsedKeys = append(buildTmp.curPossibleUsedKeys, idxCols[idxColPos])
continue
}
if notKeyColPos < len(notKeyEqAndIn) && buildTmp.curNotUsedIndexCols[notKeyColPos].EqualColumn(idxCols[idxColPos]) {
notKeyColPos++
continue
}
for i := idxColPos + 1; i < len(idxCols); i++ {
buildTmp.curIdxOff2KeyOff[i] = -1
}
remained := make([]expression.Expression, 0, len(notKeyEqAndIn)-notKeyColPos)
remained = append(remained, notKeyEqAndIn[notKeyColPos:]...)
notKeyEqAndIn = notKeyEqAndIn[:notKeyColPos]
return notKeyEqAndIn, remained
}
return notKeyEqAndIn, nil
}
func getIndexJoinIntPKPathInfo(ds *logicalop.DataSource, innerJoinKeys, outerJoinKeys []*expression.Column,
checkPathValid func(path *util.AccessPath) bool) (
keyOff2IdxOff []int, newOuterJoinKeys []*expression.Column, ranges ranger.Ranges, chosenPath *util.AccessPath, ok bool) {
pkMatched := false
pkCol := ds.GetPKIsHandleCol()
if pkCol == nil {
return nil, nil, nil, nil, false
}
keyOff2IdxOff = make([]int, len(innerJoinKeys))
newOuterJoinKeys = make([]*expression.Column, 0)
for i, key := range innerJoinKeys {
if !key.EqualColumn(pkCol) {
keyOff2IdxOff[i] = -1
continue
}
pkMatched = true
keyOff2IdxOff[i] = 0
// Add to newOuterJoinKeys only if conditions contain inner primary key. For issue #14822.
newOuterJoinKeys = append(newOuterJoinKeys, outerJoinKeys[i])
}
if !pkMatched {
return nil, nil, nil, nil, false
}
ranges = ranger.FullIntRange(mysql.HasUnsignedFlag(pkCol.RetType.GetFlag()))
// compute the matchProp which require the int table path first.
for _, path := range ds.PossibleAccessPaths {
if checkPathValid(path) {
chosenPath = path
break
}
}
return keyOff2IdxOff, newOuterJoinKeys, ranges, chosenPath, true
}
// getBestIndexJoinInnerTaskByProp tries to build the best inner child task from ds for index join by the given property.
func getBestIndexJoinInnerTaskByProp(ds *logicalop.DataSource, prop *property.PhysicalProperty) (base.Task, error) {
// the below code is quite similar from the original logic
// reason1: we need to leverage original indexPathInfo down related logic to build constant range for index plan.
// reason2: the ranges from TS and IS couldn't be directly used to derive the stats' estimation, it's not real.
// reason3: skyline pruning should not prune the possible index path which could feel the runtime EQ access conditions.
//
// here we build TableScan(TS) and IndexScan(IS) separately according to different index join prop is for we couldn't decide
// which one as the copTask here is better, some more possible upper attached operator cost should be
// considered, besides the row count, double reader cost for index lookup should also be considered as
// a whole, so we leave the cost compare for index join itself just like what it was before.
var innerCopTask base.Task
if prop.IndexJoinProp.TableRangeScan {
innerCopTask = buildDataSource2TableScanByIndexJoinProp(ds, prop)
} else {
innerCopTask = buildDataSource2IndexScanByIndexJoinProp(ds, prop)
}
if innerCopTask.Invalid() {
return base.InvalidTask, nil
}
if prop.TaskTp == property.RootTaskType {
return innerCopTask.ConvertToRootTask(ds.SCtx()), nil
}
return innerCopTask, nil
}
// getBestIndexJoinPathResultByProp tries to iterate all possible access paths of the inner child and builds
// index join path for each access path based on push-down indexIndexProp. It returns the best index join path result and the mapping.
func getBestIndexJoinPathResultByProp(
innerDS *logicalop.DataSource,
indexJoinProp *property.IndexJoinRuntimeProp,
checkPathValid func(path *util.AccessPath) bool) (*indexJoinPathResult, []int) {
indexJoinInfo := &indexJoinPathInfo{
joinOtherConditions: indexJoinProp.OtherConditions, // other conditions is for complete last col non-eq range
outerJoinKeys: indexJoinProp.OuterJoinKeys,
innerJoinKeys: indexJoinProp.InnerJoinKeys,
innerPushedConditions: innerDS.PushedDownConds,
innerSchema: innerDS.Schema(),
innerTableStats: innerDS.TableStats,
}
var bestResult *indexJoinPathResult
for _, path := range innerDS.PossibleAccessPaths {
if checkPathValid(path) {
// here we still wrap indexJoinPathInfo to call index indexJoinPathBuild to get the chosen path result.
result, emptyRange, err := indexJoinPathBuild(innerDS.SCtx(), path, indexJoinInfo, false)
if emptyRange {
return nil, nil
}
if err != nil {
logutil.BgLogger().Warn("build index join failed", zap.Error(err))
continue
}
if indexJoinPathCompare(innerDS, bestResult, result) {
bestResult = result
}
}
}
if bestResult == nil || bestResult.chosenPath == nil {
return nil, nil
}
keyOff2IdxOff := make([]int, len(indexJoinProp.InnerJoinKeys))
for i := range keyOff2IdxOff {
keyOff2IdxOff[i] = -1
}
// reverse idxOff2KeyOff as keyOff2IdxOff, from the perspective of inner join key, we could easily get the offset of index col.
for idxOff, keyOff := range bestResult.idxOff2KeyOff {
if keyOff != -1 {
keyOff2IdxOff[keyOff] = idxOff
}
}
return bestResult, keyOff2IdxOff
}
// getBestIndexJoinPathResult tries to iterate all possible access paths of the inner child and builds
// index join path for each access path. It returns the best index join path result and the mapping.
func getBestIndexJoinPathResult(
join *logicalop.LogicalJoin,
innerChild *logicalop.DataSource,
innerJoinKeys, outerJoinKeys []*expression.Column,
checkPathValid func(path *util.AccessPath) bool) (*indexJoinPathResult, []int) {
indexJoinInfo := &indexJoinPathInfo{
joinOtherConditions: join.OtherConditions,
outerJoinKeys: outerJoinKeys,
innerJoinKeys: innerJoinKeys,
innerPushedConditions: innerChild.PushedDownConds,
innerSchema: innerChild.Schema(),
innerTableStats: innerChild.TableStats,
}
var bestResult *indexJoinPathResult
for _, path := range innerChild.PossibleAccessPaths {
if checkPathValid(path) {
result, emptyRange, err := indexJoinPathBuild(join.SCtx(), path, indexJoinInfo, false)
if emptyRange {
return nil, nil
}
if err != nil {
logutil.BgLogger().Warn("build index join failed", zap.Error(err))
continue
}
if indexJoinPathCompare(innerChild, bestResult, result) {
bestResult = result
}
}
}
if bestResult == nil || bestResult.chosenPath == nil {
return nil, nil
}
keyOff2IdxOff := make([]int, len(innerJoinKeys))
for i := range keyOff2IdxOff {
keyOff2IdxOff[i] = -1
}
// reverse idxOff2KeyOff as keyOff2IdxOff, from the perspective of inner join key, we could easily get the offset of index col.
for idxOff, keyOff := range bestResult.idxOff2KeyOff {
if keyOff != -1 {
keyOff2IdxOff[keyOff] = idxOff
}
}
return bestResult, keyOff2IdxOff
}
// appendTailTemplateRange appends empty datum for each range in originRanges.
// rangeMaxSize is the max memory limit for ranges. O indicates no memory limit.
// If the second return value is true, it means that the estimated memory after appending datums to originRanges exceeds
// rangeMaxSize and the function rejects appending datums to originRanges.
func appendTailTemplateRange(originRanges ranger.Ranges, rangeMaxSize int64) (ranger.Ranges, bool) {
if rangeMaxSize > 0 && originRanges.MemUsage()+(types.EmptyDatumSize*2+16)*int64(len(originRanges)) > rangeMaxSize {
return originRanges, true
}
for _, ran := range originRanges {
ran.LowVal = append(ran.LowVal, types.Datum{})
ran.HighVal = append(ran.HighVal, types.Datum{})
ran.Collators = append(ran.Collators, nil)
}
return originRanges, false
}
var symmetricOp = map[string]string{
ast.LT: ast.GT,
ast.GE: ast.LE,
ast.GT: ast.LT,
ast.LE: ast.GE,
}