443 lines
18 KiB
Go
443 lines
18 KiB
Go
// Copyright 2024 PingCAP, Inc.
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
package core
|
|
|
|
import (
|
|
"math"
|
|
|
|
"github.com/pingcap/tidb/pkg/expression"
|
|
"github.com/pingcap/tidb/pkg/infoschema"
|
|
"github.com/pingcap/tidb/pkg/parser/ast"
|
|
"github.com/pingcap/tidb/pkg/parser/model"
|
|
"github.com/pingcap/tidb/pkg/parser/mysql"
|
|
"github.com/pingcap/tidb/pkg/planner/cardinality"
|
|
"github.com/pingcap/tidb/pkg/planner/core/base"
|
|
"github.com/pingcap/tidb/pkg/planner/core/cost"
|
|
"github.com/pingcap/tidb/pkg/planner/core/operator/logicalop"
|
|
"github.com/pingcap/tidb/pkg/planner/property"
|
|
"github.com/pingcap/tidb/pkg/planner/util"
|
|
"github.com/pingcap/tidb/pkg/planner/util/debugtrace"
|
|
"github.com/pingcap/tidb/pkg/planner/util/tablesampler"
|
|
"github.com/pingcap/tidb/pkg/statistics"
|
|
"github.com/pingcap/tidb/pkg/table"
|
|
"github.com/pingcap/tidb/pkg/types"
|
|
h "github.com/pingcap/tidb/pkg/util/hint"
|
|
"github.com/pingcap/tidb/pkg/util/logutil"
|
|
"github.com/pingcap/tidb/pkg/util/ranger"
|
|
"go.uber.org/zap"
|
|
)
|
|
|
|
// DataSource represents a tableScan without condition push down.
|
|
type DataSource struct {
|
|
logicalop.LogicalSchemaProducer
|
|
|
|
AstIndexHints []*ast.IndexHint
|
|
IndexHints []h.HintedIndex
|
|
table table.Table
|
|
TableInfo *model.TableInfo
|
|
Columns []*model.ColumnInfo
|
|
DBName model.CIStr
|
|
|
|
TableAsName *model.CIStr
|
|
// IndexMergeHints are the hint for indexmerge.
|
|
IndexMergeHints []h.HintedIndex
|
|
// PushedDownConds are the conditions that will be pushed down to coprocessor.
|
|
PushedDownConds []expression.Expression
|
|
// AllConds contains all the filters on this table. For now it's maintained
|
|
// in predicate push down and used in partition pruning/index merge.
|
|
AllConds []expression.Expression
|
|
|
|
StatisticTable *statistics.Table
|
|
TableStats *property.StatsInfo
|
|
|
|
// PossibleAccessPaths stores all the possible access path for physical plan, including table scan.
|
|
PossibleAccessPaths []*util.AccessPath
|
|
|
|
// The data source may be a partition, rather than a real table.
|
|
PartitionDefIdx *int
|
|
PhysicalTableID int64
|
|
PartitionNames []model.CIStr
|
|
|
|
// handleCol represents the handle column for the datasource, either the
|
|
// int primary key column or extra handle column.
|
|
// handleCol *expression.Column
|
|
HandleCols util.HandleCols
|
|
UnMutableHandleCols util.HandleCols
|
|
// TblCols contains the original columns of table before being pruned, and it
|
|
// is used for estimating table scan cost.
|
|
TblCols []*expression.Column
|
|
// CommonHandleCols and CommonHandleLens save the info of primary key which is the clustered index.
|
|
CommonHandleCols []*expression.Column
|
|
CommonHandleLens []int
|
|
// TblColHists contains the Histogram of all original table columns,
|
|
// it is converted from StatisticTable, and used for IO/network cost estimating.
|
|
TblColHists *statistics.HistColl
|
|
// PreferStoreType means the DataSource is enforced to which storage.
|
|
PreferStoreType int
|
|
// PreferPartitions store the map, the key represents store type, the value represents the partition name list.
|
|
PreferPartitions map[int][]model.CIStr
|
|
SampleInfo *tablesampler.TableSampleInfo
|
|
IS infoschema.InfoSchema
|
|
// IsForUpdateRead should be true in either of the following situations
|
|
// 1. use `inside insert`, `update`, `delete` or `select for update` statement
|
|
// 2. isolation level is RC
|
|
IsForUpdateRead bool
|
|
|
|
// contain unique index and the first field is tidb_shard(),
|
|
// such as (tidb_shard(a), a ...), the fields are more than 2
|
|
ContainExprPrefixUk bool
|
|
|
|
// ColsRequiringFullLen is the columns that must be fetched with full length.
|
|
// It is used to decide whether single scan is enough when reading from an index.
|
|
ColsRequiringFullLen []*expression.Column
|
|
|
|
// AccessPathMinSelectivity is the minimal selectivity among the access paths.
|
|
// It's calculated after we generated the access paths and estimated row count for them, and before entering findBestTask.
|
|
// It considers CountAfterIndex for index paths and CountAfterAccess for table paths and index merge paths.
|
|
AccessPathMinSelectivity float64
|
|
}
|
|
|
|
// ExtractCorrelatedCols implements LogicalPlan interface.
|
|
func (ds *DataSource) ExtractCorrelatedCols() []*expression.CorrelatedColumn {
|
|
corCols := make([]*expression.CorrelatedColumn, 0, len(ds.PushedDownConds))
|
|
for _, expr := range ds.PushedDownConds {
|
|
corCols = append(corCols, expression.ExtractCorColumns(expr)...)
|
|
}
|
|
return corCols
|
|
}
|
|
|
|
// getTablePath finds the TablePath from a group of accessPaths.
|
|
func getTablePath(paths []*util.AccessPath) *util.AccessPath {
|
|
for _, path := range paths {
|
|
if path.IsTablePath() {
|
|
return path
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func (ds *DataSource) buildTableGather() base.LogicalPlan {
|
|
ts := LogicalTableScan{Source: ds, HandleCols: ds.HandleCols}.Init(ds.SCtx(), ds.QueryBlockOffset())
|
|
ts.SetSchema(ds.Schema())
|
|
sg := TiKVSingleGather{Source: ds, IsIndexGather: false}.Init(ds.SCtx(), ds.QueryBlockOffset())
|
|
sg.SetSchema(ds.Schema())
|
|
sg.SetChildren(ts)
|
|
return sg
|
|
}
|
|
|
|
func (ds *DataSource) buildIndexGather(path *util.AccessPath) base.LogicalPlan {
|
|
is := LogicalIndexScan{
|
|
Source: ds,
|
|
IsDoubleRead: false,
|
|
Index: path.Index,
|
|
FullIdxCols: path.FullIdxCols,
|
|
FullIdxColLens: path.FullIdxColLens,
|
|
IdxCols: path.IdxCols,
|
|
IdxColLens: path.IdxColLens,
|
|
}.Init(ds.SCtx(), ds.QueryBlockOffset())
|
|
|
|
is.Columns = make([]*model.ColumnInfo, len(ds.Columns))
|
|
copy(is.Columns, ds.Columns)
|
|
is.SetSchema(ds.Schema())
|
|
is.IdxCols, is.IdxColLens = expression.IndexInfo2PrefixCols(is.Columns, is.Schema().Columns, is.Index)
|
|
|
|
sg := TiKVSingleGather{
|
|
Source: ds,
|
|
IsIndexGather: true,
|
|
Index: path.Index,
|
|
}.Init(ds.SCtx(), ds.QueryBlockOffset())
|
|
sg.SetSchema(ds.Schema())
|
|
sg.SetChildren(is)
|
|
return sg
|
|
}
|
|
|
|
// Convert2Gathers builds logical TiKVSingleGathers from DataSource.
|
|
func (ds *DataSource) Convert2Gathers() (gathers []base.LogicalPlan) {
|
|
tg := ds.buildTableGather()
|
|
gathers = append(gathers, tg)
|
|
for _, path := range ds.PossibleAccessPaths {
|
|
if !path.IsIntHandlePath {
|
|
path.FullIdxCols, path.FullIdxColLens = expression.IndexInfo2Cols(ds.Columns, ds.Schema().Columns, path.Index)
|
|
path.IdxCols, path.IdxColLens = expression.IndexInfo2PrefixCols(ds.Columns, ds.Schema().Columns, path.Index)
|
|
// If index columns can cover all of the needed columns, we can use a IndexGather + IndexScan.
|
|
if ds.isSingleScan(path.FullIdxCols, path.FullIdxColLens) {
|
|
gathers = append(gathers, ds.buildIndexGather(path))
|
|
}
|
|
// TODO: If index columns can not cover the schema, use IndexLookUpGather.
|
|
}
|
|
}
|
|
return gathers
|
|
}
|
|
|
|
func detachCondAndBuildRangeForPath(
|
|
sctx base.PlanContext,
|
|
path *util.AccessPath,
|
|
conds []expression.Expression,
|
|
histColl *statistics.HistColl,
|
|
) error {
|
|
if len(path.IdxCols) == 0 {
|
|
path.TableFilters = conds
|
|
return nil
|
|
}
|
|
res, err := ranger.DetachCondAndBuildRangeForIndex(sctx.GetRangerCtx(), conds, path.IdxCols, path.IdxColLens, sctx.GetSessionVars().RangeMaxSize)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
path.Ranges = res.Ranges
|
|
path.AccessConds = res.AccessConds
|
|
path.TableFilters = res.RemainedConds
|
|
path.EqCondCount = res.EqCondCount
|
|
path.EqOrInCondCount = res.EqOrInCount
|
|
path.IsDNFCond = res.IsDNFCond
|
|
path.ConstCols = make([]bool, len(path.IdxCols))
|
|
if res.ColumnValues != nil {
|
|
for i := range path.ConstCols {
|
|
path.ConstCols[i] = res.ColumnValues[i] != nil
|
|
}
|
|
}
|
|
path.CountAfterAccess, err = cardinality.GetRowCountByIndexRanges(sctx, histColl, path.Index.ID, path.Ranges)
|
|
return err
|
|
}
|
|
|
|
func (ds *DataSource) deriveCommonHandleTablePathStats(path *util.AccessPath, conds []expression.Expression, isIm bool) error {
|
|
path.CountAfterAccess = float64(ds.StatisticTable.RealtimeCount)
|
|
path.Ranges = ranger.FullNotNullRange()
|
|
path.IdxCols, path.IdxColLens = expression.IndexInfo2PrefixCols(ds.Columns, ds.Schema().Columns, path.Index)
|
|
path.FullIdxCols, path.FullIdxColLens = expression.IndexInfo2Cols(ds.Columns, ds.Schema().Columns, path.Index)
|
|
if len(conds) == 0 {
|
|
return nil
|
|
}
|
|
if err := detachCondAndBuildRangeForPath(ds.SCtx(), path, conds, ds.TableStats.HistColl); err != nil {
|
|
return err
|
|
}
|
|
if path.EqOrInCondCount == len(path.AccessConds) {
|
|
accesses, remained := path.SplitCorColAccessCondFromFilters(ds.SCtx(), path.EqOrInCondCount)
|
|
path.AccessConds = append(path.AccessConds, accesses...)
|
|
path.TableFilters = remained
|
|
if len(accesses) > 0 && ds.StatisticTable.Pseudo {
|
|
path.CountAfterAccess = cardinality.PseudoAvgCountPerValue(ds.StatisticTable)
|
|
} else {
|
|
selectivity := path.CountAfterAccess / float64(ds.StatisticTable.RealtimeCount)
|
|
for i := range accesses {
|
|
col := path.IdxCols[path.EqOrInCondCount+i]
|
|
ndv := cardinality.EstimateColumnNDV(ds.StatisticTable, col.ID)
|
|
ndv *= selectivity
|
|
if ndv < 1 {
|
|
ndv = 1.0
|
|
}
|
|
path.CountAfterAccess = path.CountAfterAccess / ndv
|
|
}
|
|
}
|
|
}
|
|
// If the `CountAfterAccess` is less than `stats.RowCount`, there must be some inconsistent stats info.
|
|
// We prefer the `stats.RowCount` because it could use more stats info to calculate the selectivity.
|
|
if path.CountAfterAccess < ds.StatsInfo().RowCount && !isIm {
|
|
path.CountAfterAccess = math.Min(ds.StatsInfo().RowCount/cost.SelectionFactor, float64(ds.StatisticTable.RealtimeCount))
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// deriveTablePathStats will fulfill the information that the AccessPath need.
|
|
// isIm indicates whether this function is called to generate the partial path for IndexMerge.
|
|
func (ds *DataSource) deriveTablePathStats(path *util.AccessPath, conds []expression.Expression, isIm bool) error {
|
|
if ds.SCtx().GetSessionVars().StmtCtx.EnableOptimizerDebugTrace {
|
|
debugtrace.EnterContextCommon(ds.SCtx())
|
|
defer debugtrace.LeaveContextCommon(ds.SCtx())
|
|
}
|
|
if path.IsCommonHandlePath {
|
|
return ds.deriveCommonHandleTablePathStats(path, conds, isIm)
|
|
}
|
|
var err error
|
|
path.CountAfterAccess = float64(ds.StatisticTable.RealtimeCount)
|
|
path.TableFilters = conds
|
|
var pkCol *expression.Column
|
|
isUnsigned := false
|
|
if ds.TableInfo.PKIsHandle {
|
|
if pkColInfo := ds.TableInfo.GetPkColInfo(); pkColInfo != nil {
|
|
isUnsigned = mysql.HasUnsignedFlag(pkColInfo.GetFlag())
|
|
pkCol = expression.ColInfo2Col(ds.Schema().Columns, pkColInfo)
|
|
}
|
|
} else {
|
|
pkCol = ds.Schema().GetExtraHandleColumn()
|
|
}
|
|
if pkCol == nil {
|
|
path.Ranges = ranger.FullIntRange(isUnsigned)
|
|
return nil
|
|
}
|
|
|
|
path.Ranges = ranger.FullIntRange(isUnsigned)
|
|
if len(conds) == 0 {
|
|
return nil
|
|
}
|
|
// for cnf condition combination, c=1 and c=2 and (1 member of (a)),
|
|
// c=1 and c=2 will derive invalid range represented by an access condition as constant of 0 (false).
|
|
// later this constant of 0 will be built as empty range.
|
|
path.AccessConds, path.TableFilters = ranger.DetachCondsForColumn(ds.SCtx().GetRangerCtx(), conds, pkCol)
|
|
// If there's no access cond, we try to find that whether there's expression containing correlated column that
|
|
// can be used to access data.
|
|
corColInAccessConds := false
|
|
if len(path.AccessConds) == 0 {
|
|
for i, filter := range path.TableFilters {
|
|
eqFunc, ok := filter.(*expression.ScalarFunction)
|
|
if !ok || eqFunc.FuncName.L != ast.EQ {
|
|
continue
|
|
}
|
|
lCol, lOk := eqFunc.GetArgs()[0].(*expression.Column)
|
|
if lOk && lCol.Equal(ds.SCtx().GetExprCtx().GetEvalCtx(), pkCol) {
|
|
_, rOk := eqFunc.GetArgs()[1].(*expression.CorrelatedColumn)
|
|
if rOk {
|
|
path.AccessConds = append(path.AccessConds, filter)
|
|
path.TableFilters = append(path.TableFilters[:i], path.TableFilters[i+1:]...)
|
|
corColInAccessConds = true
|
|
break
|
|
}
|
|
}
|
|
rCol, rOk := eqFunc.GetArgs()[1].(*expression.Column)
|
|
if rOk && rCol.Equal(ds.SCtx().GetExprCtx().GetEvalCtx(), pkCol) {
|
|
_, lOk := eqFunc.GetArgs()[0].(*expression.CorrelatedColumn)
|
|
if lOk {
|
|
path.AccessConds = append(path.AccessConds, filter)
|
|
path.TableFilters = append(path.TableFilters[:i], path.TableFilters[i+1:]...)
|
|
corColInAccessConds = true
|
|
break
|
|
}
|
|
}
|
|
}
|
|
}
|
|
if corColInAccessConds {
|
|
path.CountAfterAccess = 1
|
|
return nil
|
|
}
|
|
var remainedConds []expression.Expression
|
|
path.Ranges, path.AccessConds, remainedConds, err = ranger.BuildTableRange(path.AccessConds, ds.SCtx().GetRangerCtx(), pkCol.RetType, ds.SCtx().GetSessionVars().RangeMaxSize)
|
|
path.TableFilters = append(path.TableFilters, remainedConds...)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
path.CountAfterAccess, err = cardinality.GetRowCountByIntColumnRanges(ds.SCtx(), &ds.StatisticTable.HistColl, pkCol.ID, path.Ranges)
|
|
// If the `CountAfterAccess` is less than `stats.RowCount`, there must be some inconsistent stats info.
|
|
// We prefer the `stats.RowCount` because it could use more stats info to calculate the selectivity.
|
|
if path.CountAfterAccess < ds.StatsInfo().RowCount && !isIm {
|
|
path.CountAfterAccess = math.Min(ds.StatsInfo().RowCount/cost.SelectionFactor, float64(ds.StatisticTable.RealtimeCount))
|
|
}
|
|
return err
|
|
}
|
|
|
|
func (ds *DataSource) fillIndexPath(path *util.AccessPath, conds []expression.Expression) error {
|
|
if ds.SCtx().GetSessionVars().StmtCtx.EnableOptimizerDebugTrace {
|
|
debugtrace.EnterContextCommon(ds.SCtx())
|
|
defer debugtrace.LeaveContextCommon(ds.SCtx())
|
|
}
|
|
path.Ranges = ranger.FullRange()
|
|
path.CountAfterAccess = float64(ds.StatisticTable.RealtimeCount)
|
|
path.IdxCols, path.IdxColLens = expression.IndexInfo2PrefixCols(ds.Columns, ds.Schema().Columns, path.Index)
|
|
path.FullIdxCols, path.FullIdxColLens = expression.IndexInfo2Cols(ds.Columns, ds.Schema().Columns, path.Index)
|
|
if !path.Index.Unique && !path.Index.Primary && len(path.Index.Columns) == len(path.IdxCols) {
|
|
handleCol := ds.getPKIsHandleCol()
|
|
if handleCol != nil && !mysql.HasUnsignedFlag(handleCol.RetType.GetFlag()) {
|
|
alreadyHandle := false
|
|
for _, col := range path.IdxCols {
|
|
if col.ID == model.ExtraHandleID || col.EqualColumn(handleCol) {
|
|
alreadyHandle = true
|
|
}
|
|
}
|
|
// Don't add one column twice to the index. May cause unexpected errors.
|
|
if !alreadyHandle {
|
|
path.IdxCols = append(path.IdxCols, handleCol)
|
|
path.IdxColLens = append(path.IdxColLens, types.UnspecifiedLength)
|
|
// Also updates the map that maps the index id to its prefix column ids.
|
|
if len(ds.TableStats.HistColl.Idx2ColUniqueIDs[path.Index.ID]) == len(path.Index.Columns) {
|
|
ds.TableStats.HistColl.Idx2ColUniqueIDs[path.Index.ID] = append(ds.TableStats.HistColl.Idx2ColUniqueIDs[path.Index.ID], handleCol.UniqueID)
|
|
}
|
|
}
|
|
}
|
|
}
|
|
err := detachCondAndBuildRangeForPath(ds.SCtx(), path, conds, ds.TableStats.HistColl)
|
|
return err
|
|
}
|
|
|
|
// deriveIndexPathStats will fulfill the information that the AccessPath need.
|
|
// conds is the conditions used to generate the DetachRangeResult for path.
|
|
// isIm indicates whether this function is called to generate the partial path for IndexMerge.
|
|
func (ds *DataSource) deriveIndexPathStats(path *util.AccessPath, _ []expression.Expression, isIm bool) {
|
|
if ds.SCtx().GetSessionVars().StmtCtx.EnableOptimizerDebugTrace {
|
|
debugtrace.EnterContextCommon(ds.SCtx())
|
|
defer debugtrace.LeaveContextCommon(ds.SCtx())
|
|
}
|
|
if path.EqOrInCondCount == len(path.AccessConds) {
|
|
accesses, remained := path.SplitCorColAccessCondFromFilters(ds.SCtx(), path.EqOrInCondCount)
|
|
path.AccessConds = append(path.AccessConds, accesses...)
|
|
path.TableFilters = remained
|
|
if len(accesses) > 0 && ds.StatisticTable.Pseudo {
|
|
path.CountAfterAccess = cardinality.PseudoAvgCountPerValue(ds.StatisticTable)
|
|
} else {
|
|
selectivity := path.CountAfterAccess / float64(ds.StatisticTable.RealtimeCount)
|
|
for i := range accesses {
|
|
col := path.IdxCols[path.EqOrInCondCount+i]
|
|
ndv := cardinality.EstimateColumnNDV(ds.StatisticTable, col.ID)
|
|
ndv *= selectivity
|
|
if ndv < 1 {
|
|
ndv = 1.0
|
|
}
|
|
path.CountAfterAccess = path.CountAfterAccess / ndv
|
|
}
|
|
}
|
|
}
|
|
var indexFilters []expression.Expression
|
|
indexFilters, path.TableFilters = ds.splitIndexFilterConditions(path.TableFilters, path.FullIdxCols, path.FullIdxColLens)
|
|
path.IndexFilters = append(path.IndexFilters, indexFilters...)
|
|
// If the `CountAfterAccess` is less than `stats.RowCount`, there must be some inconsistent stats info.
|
|
// We prefer the `stats.RowCount` because it could use more stats info to calculate the selectivity.
|
|
if path.CountAfterAccess < ds.StatsInfo().RowCount && !isIm {
|
|
path.CountAfterAccess = math.Min(ds.StatsInfo().RowCount/cost.SelectionFactor, float64(ds.StatisticTable.RealtimeCount))
|
|
}
|
|
if path.IndexFilters != nil {
|
|
selectivity, _, err := cardinality.Selectivity(ds.SCtx(), ds.TableStats.HistColl, path.IndexFilters, nil)
|
|
if err != nil {
|
|
logutil.BgLogger().Debug("calculate selectivity failed, use selection factor", zap.Error(err))
|
|
selectivity = cost.SelectionFactor
|
|
}
|
|
if isIm {
|
|
path.CountAfterIndex = path.CountAfterAccess * selectivity
|
|
} else {
|
|
path.CountAfterIndex = math.Max(path.CountAfterAccess*selectivity, ds.StatsInfo().RowCount)
|
|
}
|
|
} else {
|
|
path.CountAfterIndex = path.CountAfterAccess
|
|
}
|
|
}
|
|
|
|
func getPKIsHandleColFromSchema(cols []*model.ColumnInfo, schema *expression.Schema, pkIsHandle bool) *expression.Column {
|
|
if !pkIsHandle {
|
|
// If the PKIsHandle is false, return the ExtraHandleColumn.
|
|
for i, col := range cols {
|
|
if col.ID == model.ExtraHandleID {
|
|
return schema.Columns[i]
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
for i, col := range cols {
|
|
if mysql.HasPriKeyFlag(col.GetFlag()) {
|
|
return schema.Columns[i]
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func (ds *DataSource) getPKIsHandleCol() *expression.Column {
|
|
return getPKIsHandleColFromSchema(ds.Columns, ds.Schema(), ds.TableInfo.PKIsHandle)
|
|
}
|