899 lines
35 KiB
Go
899 lines
35 KiB
Go
// Copyright 2024 PingCAP, Inc.
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
package core
|
|
|
|
import (
|
|
"bytes"
|
|
"fmt"
|
|
"math"
|
|
|
|
"github.com/pingcap/tidb/pkg/expression"
|
|
"github.com/pingcap/tidb/pkg/infoschema"
|
|
"github.com/pingcap/tidb/pkg/kv"
|
|
"github.com/pingcap/tidb/pkg/parser/ast"
|
|
"github.com/pingcap/tidb/pkg/parser/model"
|
|
"github.com/pingcap/tidb/pkg/parser/mysql"
|
|
"github.com/pingcap/tidb/pkg/planner/cardinality"
|
|
"github.com/pingcap/tidb/pkg/planner/core/base"
|
|
"github.com/pingcap/tidb/pkg/planner/core/cost"
|
|
"github.com/pingcap/tidb/pkg/planner/core/operator/logicalop"
|
|
fd "github.com/pingcap/tidb/pkg/planner/funcdep"
|
|
"github.com/pingcap/tidb/pkg/planner/property"
|
|
"github.com/pingcap/tidb/pkg/planner/util"
|
|
"github.com/pingcap/tidb/pkg/planner/util/debugtrace"
|
|
"github.com/pingcap/tidb/pkg/planner/util/optimizetrace"
|
|
"github.com/pingcap/tidb/pkg/planner/util/optimizetrace/logicaltrace"
|
|
"github.com/pingcap/tidb/pkg/planner/util/tablesampler"
|
|
"github.com/pingcap/tidb/pkg/statistics"
|
|
"github.com/pingcap/tidb/pkg/table"
|
|
"github.com/pingcap/tidb/pkg/types"
|
|
h "github.com/pingcap/tidb/pkg/util/hint"
|
|
"github.com/pingcap/tidb/pkg/util/intset"
|
|
"github.com/pingcap/tidb/pkg/util/logutil"
|
|
"github.com/pingcap/tidb/pkg/util/plancodec"
|
|
"github.com/pingcap/tidb/pkg/util/ranger"
|
|
"go.uber.org/zap"
|
|
)
|
|
|
|
// DataSource represents a tableScan without condition push down.
|
|
type DataSource struct {
|
|
logicalop.LogicalSchemaProducer
|
|
|
|
AstIndexHints []*ast.IndexHint
|
|
IndexHints []h.HintedIndex
|
|
table table.Table
|
|
TableInfo *model.TableInfo
|
|
Columns []*model.ColumnInfo
|
|
DBName model.CIStr
|
|
|
|
TableAsName *model.CIStr
|
|
// IndexMergeHints are the hint for indexmerge.
|
|
IndexMergeHints []h.HintedIndex
|
|
// PushedDownConds are the conditions that will be pushed down to coprocessor.
|
|
PushedDownConds []expression.Expression
|
|
// AllConds contains all the filters on this table. For now it's maintained
|
|
// in predicate push down and used in partition pruning/index merge.
|
|
AllConds []expression.Expression
|
|
|
|
StatisticTable *statistics.Table
|
|
TableStats *property.StatsInfo
|
|
|
|
// PossibleAccessPaths stores all the possible access path for physical plan, including table scan.
|
|
PossibleAccessPaths []*util.AccessPath
|
|
|
|
// The data source may be a partition, rather than a real table.
|
|
PartitionDefIdx *int
|
|
PhysicalTableID int64
|
|
PartitionNames []model.CIStr
|
|
|
|
// handleCol represents the handle column for the datasource, either the
|
|
// int primary key column or extra handle column.
|
|
// handleCol *expression.Column
|
|
HandleCols util.HandleCols
|
|
UnMutableHandleCols util.HandleCols
|
|
// TblCols contains the original columns of table before being pruned, and it
|
|
// is used for estimating table scan cost.
|
|
TblCols []*expression.Column
|
|
// CommonHandleCols and CommonHandleLens save the info of primary key which is the clustered index.
|
|
CommonHandleCols []*expression.Column
|
|
CommonHandleLens []int
|
|
// TblColHists contains the Histogram of all original table columns,
|
|
// it is converted from StatisticTable, and used for IO/network cost estimating.
|
|
TblColHists *statistics.HistColl
|
|
// PreferStoreType means the DataSource is enforced to which storage.
|
|
PreferStoreType int
|
|
// PreferPartitions store the map, the key represents store type, the value represents the partition name list.
|
|
PreferPartitions map[int][]model.CIStr
|
|
SampleInfo *tablesampler.TableSampleInfo
|
|
IS infoschema.InfoSchema
|
|
// IsForUpdateRead should be true in either of the following situations
|
|
// 1. use `inside insert`, `update`, `delete` or `select for update` statement
|
|
// 2. isolation level is RC
|
|
IsForUpdateRead bool
|
|
|
|
// contain unique index and the first field is tidb_shard(),
|
|
// such as (tidb_shard(a), a ...), the fields are more than 2
|
|
ContainExprPrefixUk bool
|
|
|
|
// ColsRequiringFullLen is the columns that must be fetched with full length.
|
|
// It is used to decide whether single scan is enough when reading from an index.
|
|
ColsRequiringFullLen []*expression.Column
|
|
|
|
// AccessPathMinSelectivity is the minimal selectivity among the access paths.
|
|
// It's calculated after we generated the access paths and estimated row count for them, and before entering findBestTask.
|
|
// It considers CountAfterIndex for index paths and CountAfterAccess for table paths and index merge paths.
|
|
AccessPathMinSelectivity float64
|
|
}
|
|
|
|
// Init initializes DataSource.
|
|
func (ds DataSource) Init(ctx base.PlanContext, offset int) *DataSource {
|
|
ds.BaseLogicalPlan = logicalop.NewBaseLogicalPlan(ctx, plancodec.TypeDataSource, &ds, offset)
|
|
return &ds
|
|
}
|
|
|
|
// *************************** start implementation of Plan interface ***************************
|
|
|
|
// ExplainInfo implements Plan interface.
|
|
func (ds *DataSource) ExplainInfo() string {
|
|
buffer := bytes.NewBufferString("")
|
|
tblName := ds.TableInfo.Name.O
|
|
if ds.TableAsName != nil && ds.TableAsName.O != "" {
|
|
tblName = ds.TableAsName.O
|
|
}
|
|
fmt.Fprintf(buffer, "table:%s", tblName)
|
|
if ds.PartitionDefIdx != nil {
|
|
if pi := ds.TableInfo.GetPartitionInfo(); pi != nil {
|
|
fmt.Fprintf(buffer, ", partition:%s", pi.Definitions[*ds.PartitionDefIdx].Name.O)
|
|
}
|
|
}
|
|
return buffer.String()
|
|
}
|
|
|
|
// *************************** end implementation of Plan interface ****************************
|
|
|
|
// *************************** start implementation of logicalPlan interface ***************************
|
|
|
|
// HashCode inherits BaseLogicalPlan.<0th> interface.
|
|
|
|
// PredicatePushDown implements base.LogicalPlan.<1st> interface.
|
|
func (ds *DataSource) PredicatePushDown(predicates []expression.Expression, opt *optimizetrace.LogicalOptimizeOp) ([]expression.Expression, base.LogicalPlan) {
|
|
predicates = expression.PropagateConstant(ds.SCtx().GetExprCtx(), predicates)
|
|
predicates = DeleteTrueExprs(ds, predicates)
|
|
// Add tidb_shard() prefix to the condtion for shard index in some scenarios
|
|
// TODO: remove it to the place building logical plan
|
|
predicates = ds.AddPrefix4ShardIndexes(ds.SCtx(), predicates)
|
|
ds.AllConds = predicates
|
|
ds.PushedDownConds, predicates = expression.PushDownExprs(GetPushDownCtx(ds.SCtx()), predicates, kv.UnSpecified)
|
|
appendDataSourcePredicatePushDownTraceStep(ds, opt)
|
|
return predicates, ds
|
|
}
|
|
|
|
// PruneColumns implements base.LogicalPlan.<2nd> interface.
|
|
func (ds *DataSource) PruneColumns(parentUsedCols []*expression.Column, opt *optimizetrace.LogicalOptimizeOp) (base.LogicalPlan, error) {
|
|
used := expression.GetUsedList(ds.SCtx().GetExprCtx().GetEvalCtx(), parentUsedCols, ds.Schema())
|
|
|
|
exprCols := expression.ExtractColumnsFromExpressions(nil, ds.AllConds, nil)
|
|
exprUsed := expression.GetUsedList(ds.SCtx().GetExprCtx().GetEvalCtx(), exprCols, ds.Schema())
|
|
prunedColumns := make([]*expression.Column, 0)
|
|
|
|
originSchemaColumns := ds.Schema().Columns
|
|
originColumns := ds.Columns
|
|
|
|
ds.ColsRequiringFullLen = make([]*expression.Column, 0, len(used))
|
|
for i, col := range ds.Schema().Columns {
|
|
if used[i] || (ds.ContainExprPrefixUk && expression.GcColumnExprIsTidbShard(col.VirtualExpr)) {
|
|
ds.ColsRequiringFullLen = append(ds.ColsRequiringFullLen, col)
|
|
}
|
|
}
|
|
|
|
for i := len(used) - 1; i >= 0; i-- {
|
|
if !used[i] && !exprUsed[i] {
|
|
// If ds has a shard index, and the column is generated column by `tidb_shard()`
|
|
// it can't prune the generated column of shard index
|
|
if ds.ContainExprPrefixUk &&
|
|
expression.GcColumnExprIsTidbShard(ds.Schema().Columns[i].VirtualExpr) {
|
|
continue
|
|
}
|
|
prunedColumns = append(prunedColumns, ds.Schema().Columns[i])
|
|
ds.Schema().Columns = append(ds.Schema().Columns[:i], ds.Schema().Columns[i+1:]...)
|
|
ds.Columns = append(ds.Columns[:i], ds.Columns[i+1:]...)
|
|
}
|
|
}
|
|
logicaltrace.AppendColumnPruneTraceStep(ds, prunedColumns, opt)
|
|
addOneHandle := false
|
|
// For SQL like `select 1 from t`, tikv's response will be empty if no column is in schema.
|
|
// So we'll force to push one if schema doesn't have any column.
|
|
if ds.Schema().Len() == 0 {
|
|
var handleCol *expression.Column
|
|
var handleColInfo *model.ColumnInfo
|
|
handleCol, handleColInfo = preferKeyColumnFromTable(ds, originSchemaColumns, originColumns)
|
|
ds.Columns = append(ds.Columns, handleColInfo)
|
|
ds.Schema().Append(handleCol)
|
|
addOneHandle = true
|
|
}
|
|
// ref: https://github.com/pingcap/tidb/issues/44579
|
|
// when first entering columnPruner, we kept a column-a in datasource since upper agg function count(a) is used.
|
|
// then we mark the HandleCols as nil here.
|
|
// when second entering columnPruner, the count(a) is eliminated since it always not null. we should fill another
|
|
// extra col, in this way, handle col is useful again, otherwise, _tidb_rowid will be filled.
|
|
if ds.HandleCols != nil && ds.HandleCols.IsInt() && ds.Schema().ColumnIndex(ds.HandleCols.GetCol(0)) == -1 {
|
|
ds.HandleCols = nil
|
|
}
|
|
// Current DataSource operator contains all the filters on this table, and the columns used by these filters are always included
|
|
// in the output schema. Even if they are not needed by DataSource's parent operator. Thus add a projection here to prune useless columns
|
|
// Limit to MPP tasks, because TiKV can't benefit from this now(projection can't be pushed down to TiKV now).
|
|
if !addOneHandle && ds.Schema().Len() > len(parentUsedCols) && ds.SCtx().GetSessionVars().IsMPPEnforced() && ds.TableInfo.TiFlashReplica != nil {
|
|
proj := LogicalProjection{
|
|
Exprs: expression.Column2Exprs(parentUsedCols),
|
|
}.Init(ds.SCtx(), ds.QueryBlockOffset())
|
|
proj.SetStats(ds.StatsInfo())
|
|
proj.SetSchema(expression.NewSchema(parentUsedCols...))
|
|
proj.SetChildren(ds)
|
|
return proj, nil
|
|
}
|
|
return ds, nil
|
|
}
|
|
|
|
// FindBestTask implements the base.LogicalPlan.<3rd> interface.
|
|
// It will enumerate all the available indices and choose a plan with least cost.
|
|
func (ds *DataSource) FindBestTask(prop *property.PhysicalProperty, planCounter *base.PlanCounterTp, opt *optimizetrace.PhysicalOptimizeOp) (t base.Task, cntPlan int64, err error) {
|
|
return findBestTask4DS(ds, prop, planCounter, opt)
|
|
}
|
|
|
|
// BuildKeyInfo implements base.LogicalPlan.<4th> interface.
|
|
func (ds *DataSource) BuildKeyInfo(selfSchema *expression.Schema, _ []*expression.Schema) {
|
|
selfSchema.Keys = nil
|
|
var latestIndexes map[int64]*model.IndexInfo
|
|
var changed bool
|
|
var err error
|
|
check := ds.SCtx().GetSessionVars().IsIsolation(ast.ReadCommitted) || ds.IsForUpdateRead
|
|
check = check && ds.SCtx().GetSessionVars().ConnectionID > 0
|
|
// we should check index valid while forUpdateRead, see detail in https://github.com/pingcap/tidb/pull/22152
|
|
if check {
|
|
latestIndexes, changed, err = getLatestIndexInfo(ds.SCtx(), ds.table.Meta().ID, 0)
|
|
if err != nil {
|
|
return
|
|
}
|
|
}
|
|
for _, index := range ds.table.Meta().Indices {
|
|
if ds.IsForUpdateRead && changed {
|
|
latestIndex, ok := latestIndexes[index.ID]
|
|
if !ok || latestIndex.State != model.StatePublic {
|
|
continue
|
|
}
|
|
} else if index.State != model.StatePublic {
|
|
continue
|
|
}
|
|
if uniqueKey, newKey := checkIndexCanBeKey(index, ds.Columns, selfSchema); newKey != nil {
|
|
selfSchema.Keys = append(selfSchema.Keys, newKey)
|
|
} else if uniqueKey != nil {
|
|
selfSchema.UniqueKeys = append(selfSchema.UniqueKeys, uniqueKey)
|
|
}
|
|
}
|
|
if ds.TableInfo.PKIsHandle {
|
|
for i, col := range ds.Columns {
|
|
if mysql.HasPriKeyFlag(col.GetFlag()) {
|
|
selfSchema.Keys = append(selfSchema.Keys, []*expression.Column{selfSchema.Columns[i]})
|
|
break
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// PushDownTopN inherits BaseLogicalPlan.LogicalPlan.<5th> interface.
|
|
|
|
// DeriveTopN inherits BaseLogicalPlan.LogicalPlan.<6th> implementation.
|
|
|
|
// PredicateSimplification implements the base.LogicalPlan.<7th> interface.
|
|
func (ds *DataSource) PredicateSimplification(*optimizetrace.LogicalOptimizeOp) base.LogicalPlan {
|
|
p := ds.Self().(*DataSource)
|
|
p.PushedDownConds = applyPredicateSimplification(p.SCtx(), p.PushedDownConds)
|
|
p.AllConds = applyPredicateSimplification(p.SCtx(), p.AllConds)
|
|
return p
|
|
}
|
|
|
|
// ConstantPropagation inherits BaseLogicalPlan.LogicalPlan.<8th> implementation.
|
|
|
|
// PullUpConstantPredicates inherits BaseLogicalPlan.LogicalPlan.<9th> implementation.
|
|
|
|
// RecursiveDeriveStats inherits BaseLogicalPlan.LogicalPlan.<10th> implementation.
|
|
|
|
// DeriveStats implements base.LogicalPlan.<11th> interface.
|
|
func (ds *DataSource) DeriveStats(_ []*property.StatsInfo, _ *expression.Schema, _ []*expression.Schema, colGroups [][]*expression.Column) (*property.StatsInfo, error) {
|
|
if ds.StatsInfo() != nil && len(colGroups) == 0 {
|
|
return ds.StatsInfo(), nil
|
|
}
|
|
ds.initStats(colGroups)
|
|
if ds.StatsInfo() != nil {
|
|
// Just reload the GroupNDVs.
|
|
selectivity := ds.StatsInfo().RowCount / ds.TableStats.RowCount
|
|
ds.SetStats(ds.TableStats.Scale(selectivity))
|
|
return ds.StatsInfo(), nil
|
|
}
|
|
if ds.SCtx().GetSessionVars().StmtCtx.EnableOptimizerDebugTrace {
|
|
debugtrace.EnterContextCommon(ds.SCtx())
|
|
defer debugtrace.LeaveContextCommon(ds.SCtx())
|
|
}
|
|
// two preprocess here.
|
|
// 1: PushDownNot here can convert query 'not (a != 1)' to 'a = 1'.
|
|
// 2: EliminateNoPrecisionCast here can convert query 'cast(c<int> as bigint) = 1' to 'c = 1' to leverage access range.
|
|
exprCtx := ds.SCtx().GetExprCtx()
|
|
for i, expr := range ds.PushedDownConds {
|
|
ds.PushedDownConds[i] = expression.PushDownNot(exprCtx, expr)
|
|
ds.PushedDownConds[i] = expression.EliminateNoPrecisionLossCast(exprCtx, ds.PushedDownConds[i])
|
|
}
|
|
for _, path := range ds.PossibleAccessPaths {
|
|
if path.IsTablePath() {
|
|
continue
|
|
}
|
|
err := ds.fillIndexPath(path, ds.PushedDownConds)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
}
|
|
// TODO: Can we move ds.deriveStatsByFilter after pruning by heuristics? In this way some computation can be avoided
|
|
// when ds.PossibleAccessPaths are pruned.
|
|
ds.SetStats(ds.deriveStatsByFilter(ds.PushedDownConds, ds.PossibleAccessPaths))
|
|
err := ds.derivePathStatsAndTryHeuristics()
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
if err := ds.generateIndexMergePath(); err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
if ds.SCtx().GetSessionVars().StmtCtx.EnableOptimizerDebugTrace {
|
|
debugTraceAccessPaths(ds.SCtx(), ds.PossibleAccessPaths)
|
|
}
|
|
ds.AccessPathMinSelectivity = getMinSelectivityFromPaths(ds.PossibleAccessPaths, float64(ds.TblColHists.RealtimeCount))
|
|
|
|
return ds.StatsInfo(), nil
|
|
}
|
|
|
|
// ExtractColGroups inherits BaseLogicalPlan.LogicalPlan.<12th> implementation.
|
|
|
|
// PreparePossibleProperties implements base.LogicalPlan.<13th> interface.
|
|
func (ds *DataSource) PreparePossibleProperties(_ *expression.Schema, _ ...[][]*expression.Column) [][]*expression.Column {
|
|
result := make([][]*expression.Column, 0, len(ds.PossibleAccessPaths))
|
|
|
|
for _, path := range ds.PossibleAccessPaths {
|
|
if path.IsIntHandlePath {
|
|
col := ds.getPKIsHandleCol()
|
|
if col != nil {
|
|
result = append(result, []*expression.Column{col})
|
|
}
|
|
continue
|
|
}
|
|
|
|
if len(path.IdxCols) == 0 {
|
|
continue
|
|
}
|
|
result = append(result, make([]*expression.Column, len(path.IdxCols)))
|
|
copy(result[len(result)-1], path.IdxCols)
|
|
for i := 0; i < path.EqCondCount && i+1 < len(path.IdxCols); i++ {
|
|
result = append(result, make([]*expression.Column, len(path.IdxCols)-i-1))
|
|
copy(result[len(result)-1], path.IdxCols[i+1:])
|
|
}
|
|
}
|
|
return result
|
|
}
|
|
|
|
// ExhaustPhysicalPlans inherits BaseLogicalPlan.LogicalPlan.<14th> implementation.
|
|
|
|
// ExtractCorrelatedCols implements base.LogicalPlan.<15th> interface.
|
|
func (ds *DataSource) ExtractCorrelatedCols() []*expression.CorrelatedColumn {
|
|
corCols := make([]*expression.CorrelatedColumn, 0, len(ds.PushedDownConds))
|
|
for _, expr := range ds.PushedDownConds {
|
|
corCols = append(corCols, expression.ExtractCorColumns(expr)...)
|
|
}
|
|
return corCols
|
|
}
|
|
|
|
// MaxOneRow inherits BaseLogicalPlan.LogicalPlan.<16th> implementation.
|
|
|
|
// Children inherits BaseLogicalPlan.LogicalPlan.<17th> implementation.
|
|
|
|
// SetChildren inherits BaseLogicalPlan.LogicalPlan.<18th> implementation.
|
|
|
|
// SetChild inherits BaseLogicalPlan.LogicalPlan.<19th> implementation.
|
|
|
|
// RollBackTaskMap inherits BaseLogicalPlan.LogicalPlan.<20th> implementation.
|
|
|
|
// CanPushToCop inherits BaseLogicalPlan.LogicalPlan.<21st> implementation.
|
|
|
|
// ExtractFD implements the base.LogicalPlan.<22nd> interface.
|
|
func (ds *DataSource) ExtractFD() *fd.FDSet {
|
|
// FD in datasource (leaf node) can be cached and reused.
|
|
// Once the all conditions are not equal to nil, built it again.
|
|
if ds.FDs() == nil || ds.AllConds != nil {
|
|
fds := &fd.FDSet{HashCodeToUniqueID: make(map[string]int)}
|
|
allCols := intset.NewFastIntSet()
|
|
// should use the column's unique ID avoiding fdSet conflict.
|
|
for _, col := range ds.TblCols {
|
|
// todo: change it to int64
|
|
allCols.Insert(int(col.UniqueID))
|
|
}
|
|
// int pk doesn't store its index column in indexInfo.
|
|
if ds.TableInfo.PKIsHandle {
|
|
keyCols := intset.NewFastIntSet()
|
|
for _, col := range ds.TblCols {
|
|
if mysql.HasPriKeyFlag(col.RetType.GetFlag()) {
|
|
keyCols.Insert(int(col.UniqueID))
|
|
}
|
|
}
|
|
fds.AddStrictFunctionalDependency(keyCols, allCols)
|
|
fds.MakeNotNull(keyCols)
|
|
}
|
|
// we should check index valid while forUpdateRead, see detail in https://github.com/pingcap/tidb/pull/22152
|
|
var (
|
|
latestIndexes map[int64]*model.IndexInfo
|
|
changed bool
|
|
err error
|
|
)
|
|
check := ds.SCtx().GetSessionVars().IsIsolation(ast.ReadCommitted) || ds.IsForUpdateRead
|
|
check = check && ds.SCtx().GetSessionVars().ConnectionID > 0
|
|
if check {
|
|
latestIndexes, changed, err = getLatestIndexInfo(ds.SCtx(), ds.table.Meta().ID, 0)
|
|
if err != nil {
|
|
ds.SetFDs(fds)
|
|
return fds
|
|
}
|
|
}
|
|
// other indices including common handle.
|
|
for _, idx := range ds.TableInfo.Indices {
|
|
keyCols := intset.NewFastIntSet()
|
|
allColIsNotNull := true
|
|
if ds.IsForUpdateRead && changed {
|
|
latestIndex, ok := latestIndexes[idx.ID]
|
|
if !ok || latestIndex.State != model.StatePublic {
|
|
continue
|
|
}
|
|
}
|
|
if idx.State != model.StatePublic {
|
|
continue
|
|
}
|
|
for _, idxCol := range idx.Columns {
|
|
// Note: even the prefix column can also be the FD. For example:
|
|
// unique(char_column(10)), will also guarantee the prefix to be
|
|
// the unique which means the while column is unique too.
|
|
refCol := ds.TableInfo.Columns[idxCol.Offset]
|
|
if !mysql.HasNotNullFlag(refCol.GetFlag()) {
|
|
allColIsNotNull = false
|
|
}
|
|
keyCols.Insert(int(ds.TblCols[idxCol.Offset].UniqueID))
|
|
}
|
|
if idx.Primary {
|
|
fds.AddStrictFunctionalDependency(keyCols, allCols)
|
|
fds.MakeNotNull(keyCols)
|
|
} else if idx.Unique {
|
|
if allColIsNotNull {
|
|
fds.AddStrictFunctionalDependency(keyCols, allCols)
|
|
fds.MakeNotNull(keyCols)
|
|
} else {
|
|
// unique index:
|
|
// 1: normal value should be unique
|
|
// 2: null value can be multiple
|
|
// for this kind of lax to be strict, we need to make the determinant not-null.
|
|
fds.AddLaxFunctionalDependency(keyCols, allCols)
|
|
}
|
|
}
|
|
}
|
|
// handle the datasource conditions (maybe pushed down from upper layer OP)
|
|
if len(ds.AllConds) != 0 {
|
|
// extract the not null attributes from selection conditions.
|
|
notnullColsUniqueIDs := ExtractNotNullFromConds(ds.AllConds, ds)
|
|
|
|
// extract the constant cols from selection conditions.
|
|
constUniqueIDs := ExtractConstantCols(ds.AllConds, ds.SCtx(), fds)
|
|
|
|
// extract equivalence cols.
|
|
equivUniqueIDs := ExtractEquivalenceCols(ds.AllConds, ds.SCtx(), fds)
|
|
|
|
// apply conditions to FD.
|
|
fds.MakeNotNull(notnullColsUniqueIDs)
|
|
fds.AddConstants(constUniqueIDs)
|
|
for _, equiv := range equivUniqueIDs {
|
|
fds.AddEquivalence(equiv[0], equiv[1])
|
|
}
|
|
}
|
|
// build the dependency for generated columns.
|
|
// the generated column is sequentially dependent on the forward column.
|
|
// a int, b int as (a+1), c int as (b+1), here we can build the strict FD down:
|
|
// {a} -> {b}, {b} -> {c}, put the maintenance of the dependencies between generated columns to the FD graph.
|
|
notNullCols := intset.NewFastIntSet()
|
|
for _, col := range ds.TblCols {
|
|
if col.VirtualExpr != nil {
|
|
dependencies := intset.NewFastIntSet()
|
|
dependencies.Insert(int(col.UniqueID))
|
|
// dig out just for 1 level.
|
|
directBaseCol := expression.ExtractColumns(col.VirtualExpr)
|
|
determinant := intset.NewFastIntSet()
|
|
for _, col := range directBaseCol {
|
|
determinant.Insert(int(col.UniqueID))
|
|
}
|
|
fds.AddStrictFunctionalDependency(determinant, dependencies)
|
|
}
|
|
if mysql.HasNotNullFlag(col.RetType.GetFlag()) {
|
|
notNullCols.Insert(int(col.UniqueID))
|
|
}
|
|
}
|
|
fds.MakeNotNull(notNullCols)
|
|
ds.SetFDs(fds)
|
|
}
|
|
return ds.FDs()
|
|
}
|
|
|
|
// GetBaseLogicalPlan inherits BaseLogicalPlan.LogicalPlan.<23rd> implementation.
|
|
|
|
// ConvertOuterToInnerJoin inherits BaseLogicalPlan.LogicalPlan.<24th> implementation.
|
|
|
|
// *************************** end implementation of logicalPlan interface ***************************
|
|
|
|
// getTablePath finds the TablePath from a group of accessPaths.
|
|
func getTablePath(paths []*util.AccessPath) *util.AccessPath {
|
|
for _, path := range paths {
|
|
if path.IsTablePath() {
|
|
return path
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func (ds *DataSource) buildTableGather() base.LogicalPlan {
|
|
ts := LogicalTableScan{Source: ds, HandleCols: ds.HandleCols}.Init(ds.SCtx(), ds.QueryBlockOffset())
|
|
ts.SetSchema(ds.Schema())
|
|
sg := TiKVSingleGather{Source: ds, IsIndexGather: false}.Init(ds.SCtx(), ds.QueryBlockOffset())
|
|
sg.SetSchema(ds.Schema())
|
|
sg.SetChildren(ts)
|
|
return sg
|
|
}
|
|
|
|
func (ds *DataSource) buildIndexGather(path *util.AccessPath) base.LogicalPlan {
|
|
is := LogicalIndexScan{
|
|
Source: ds,
|
|
IsDoubleRead: false,
|
|
Index: path.Index,
|
|
FullIdxCols: path.FullIdxCols,
|
|
FullIdxColLens: path.FullIdxColLens,
|
|
IdxCols: path.IdxCols,
|
|
IdxColLens: path.IdxColLens,
|
|
}.Init(ds.SCtx(), ds.QueryBlockOffset())
|
|
|
|
is.Columns = make([]*model.ColumnInfo, len(ds.Columns))
|
|
copy(is.Columns, ds.Columns)
|
|
is.SetSchema(ds.Schema())
|
|
is.IdxCols, is.IdxColLens = expression.IndexInfo2PrefixCols(is.Columns, is.Schema().Columns, is.Index)
|
|
|
|
sg := TiKVSingleGather{
|
|
Source: ds,
|
|
IsIndexGather: true,
|
|
Index: path.Index,
|
|
}.Init(ds.SCtx(), ds.QueryBlockOffset())
|
|
sg.SetSchema(ds.Schema())
|
|
sg.SetChildren(is)
|
|
return sg
|
|
}
|
|
|
|
// Convert2Gathers builds logical TiKVSingleGathers from DataSource.
|
|
func (ds *DataSource) Convert2Gathers() (gathers []base.LogicalPlan) {
|
|
tg := ds.buildTableGather()
|
|
gathers = append(gathers, tg)
|
|
for _, path := range ds.PossibleAccessPaths {
|
|
if !path.IsIntHandlePath {
|
|
path.FullIdxCols, path.FullIdxColLens = expression.IndexInfo2Cols(ds.Columns, ds.Schema().Columns, path.Index)
|
|
path.IdxCols, path.IdxColLens = expression.IndexInfo2PrefixCols(ds.Columns, ds.Schema().Columns, path.Index)
|
|
// If index columns can cover all of the needed columns, we can use a IndexGather + IndexScan.
|
|
if ds.isSingleScan(path.FullIdxCols, path.FullIdxColLens) {
|
|
gathers = append(gathers, ds.buildIndexGather(path))
|
|
}
|
|
// TODO: If index columns can not cover the schema, use IndexLookUpGather.
|
|
}
|
|
}
|
|
return gathers
|
|
}
|
|
|
|
func detachCondAndBuildRangeForPath(
|
|
sctx base.PlanContext,
|
|
path *util.AccessPath,
|
|
conds []expression.Expression,
|
|
histColl *statistics.HistColl,
|
|
) error {
|
|
if len(path.IdxCols) == 0 {
|
|
path.TableFilters = conds
|
|
return nil
|
|
}
|
|
res, err := ranger.DetachCondAndBuildRangeForIndex(sctx.GetRangerCtx(), conds, path.IdxCols, path.IdxColLens, sctx.GetSessionVars().RangeMaxSize)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
path.Ranges = res.Ranges
|
|
path.AccessConds = res.AccessConds
|
|
path.TableFilters = res.RemainedConds
|
|
path.EqCondCount = res.EqCondCount
|
|
path.EqOrInCondCount = res.EqOrInCount
|
|
path.IsDNFCond = res.IsDNFCond
|
|
path.ConstCols = make([]bool, len(path.IdxCols))
|
|
if res.ColumnValues != nil {
|
|
for i := range path.ConstCols {
|
|
path.ConstCols[i] = res.ColumnValues[i] != nil
|
|
}
|
|
}
|
|
path.CountAfterAccess, err = cardinality.GetRowCountByIndexRanges(sctx, histColl, path.Index.ID, path.Ranges)
|
|
return err
|
|
}
|
|
|
|
func (ds *DataSource) deriveCommonHandleTablePathStats(path *util.AccessPath, conds []expression.Expression, isIm bool) error {
|
|
path.CountAfterAccess = float64(ds.StatisticTable.RealtimeCount)
|
|
path.Ranges = ranger.FullNotNullRange()
|
|
path.IdxCols, path.IdxColLens = expression.IndexInfo2PrefixCols(ds.Columns, ds.Schema().Columns, path.Index)
|
|
path.FullIdxCols, path.FullIdxColLens = expression.IndexInfo2Cols(ds.Columns, ds.Schema().Columns, path.Index)
|
|
if len(conds) == 0 {
|
|
return nil
|
|
}
|
|
if err := detachCondAndBuildRangeForPath(ds.SCtx(), path, conds, ds.TableStats.HistColl); err != nil {
|
|
return err
|
|
}
|
|
if path.EqOrInCondCount == len(path.AccessConds) {
|
|
accesses, remained := path.SplitCorColAccessCondFromFilters(ds.SCtx(), path.EqOrInCondCount)
|
|
path.AccessConds = append(path.AccessConds, accesses...)
|
|
path.TableFilters = remained
|
|
if len(accesses) > 0 && ds.StatisticTable.Pseudo {
|
|
path.CountAfterAccess = cardinality.PseudoAvgCountPerValue(ds.StatisticTable)
|
|
} else {
|
|
selectivity := path.CountAfterAccess / float64(ds.StatisticTable.RealtimeCount)
|
|
for i := range accesses {
|
|
col := path.IdxCols[path.EqOrInCondCount+i]
|
|
ndv := cardinality.EstimateColumnNDV(ds.StatisticTable, col.ID)
|
|
ndv *= selectivity
|
|
if ndv < 1 {
|
|
ndv = 1.0
|
|
}
|
|
path.CountAfterAccess = path.CountAfterAccess / ndv
|
|
}
|
|
}
|
|
}
|
|
// If the `CountAfterAccess` is less than `stats.RowCount`, there must be some inconsistent stats info.
|
|
// We prefer the `stats.RowCount` because it could use more stats info to calculate the selectivity.
|
|
if path.CountAfterAccess < ds.StatsInfo().RowCount && !isIm {
|
|
path.CountAfterAccess = math.Min(ds.StatsInfo().RowCount/cost.SelectionFactor, float64(ds.StatisticTable.RealtimeCount))
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// deriveTablePathStats will fulfill the information that the AccessPath need.
|
|
// isIm indicates whether this function is called to generate the partial path for IndexMerge.
|
|
func (ds *DataSource) deriveTablePathStats(path *util.AccessPath, conds []expression.Expression, isIm bool) error {
|
|
if ds.SCtx().GetSessionVars().StmtCtx.EnableOptimizerDebugTrace {
|
|
debugtrace.EnterContextCommon(ds.SCtx())
|
|
defer debugtrace.LeaveContextCommon(ds.SCtx())
|
|
}
|
|
if path.IsCommonHandlePath {
|
|
return ds.deriveCommonHandleTablePathStats(path, conds, isIm)
|
|
}
|
|
var err error
|
|
path.CountAfterAccess = float64(ds.StatisticTable.RealtimeCount)
|
|
path.TableFilters = conds
|
|
var pkCol *expression.Column
|
|
isUnsigned := false
|
|
if ds.TableInfo.PKIsHandle {
|
|
if pkColInfo := ds.TableInfo.GetPkColInfo(); pkColInfo != nil {
|
|
isUnsigned = mysql.HasUnsignedFlag(pkColInfo.GetFlag())
|
|
pkCol = expression.ColInfo2Col(ds.Schema().Columns, pkColInfo)
|
|
}
|
|
} else {
|
|
pkCol = ds.Schema().GetExtraHandleColumn()
|
|
}
|
|
if pkCol == nil {
|
|
path.Ranges = ranger.FullIntRange(isUnsigned)
|
|
return nil
|
|
}
|
|
|
|
path.Ranges = ranger.FullIntRange(isUnsigned)
|
|
if len(conds) == 0 {
|
|
return nil
|
|
}
|
|
// for cnf condition combination, c=1 and c=2 and (1 member of (a)),
|
|
// c=1 and c=2 will derive invalid range represented by an access condition as constant of 0 (false).
|
|
// later this constant of 0 will be built as empty range.
|
|
path.AccessConds, path.TableFilters = ranger.DetachCondsForColumn(ds.SCtx().GetRangerCtx(), conds, pkCol)
|
|
// If there's no access cond, we try to find that whether there's expression containing correlated column that
|
|
// can be used to access data.
|
|
corColInAccessConds := false
|
|
if len(path.AccessConds) == 0 {
|
|
for i, filter := range path.TableFilters {
|
|
eqFunc, ok := filter.(*expression.ScalarFunction)
|
|
if !ok || eqFunc.FuncName.L != ast.EQ {
|
|
continue
|
|
}
|
|
lCol, lOk := eqFunc.GetArgs()[0].(*expression.Column)
|
|
if lOk && lCol.Equal(ds.SCtx().GetExprCtx().GetEvalCtx(), pkCol) {
|
|
_, rOk := eqFunc.GetArgs()[1].(*expression.CorrelatedColumn)
|
|
if rOk {
|
|
path.AccessConds = append(path.AccessConds, filter)
|
|
path.TableFilters = append(path.TableFilters[:i], path.TableFilters[i+1:]...)
|
|
corColInAccessConds = true
|
|
break
|
|
}
|
|
}
|
|
rCol, rOk := eqFunc.GetArgs()[1].(*expression.Column)
|
|
if rOk && rCol.Equal(ds.SCtx().GetExprCtx().GetEvalCtx(), pkCol) {
|
|
_, lOk := eqFunc.GetArgs()[0].(*expression.CorrelatedColumn)
|
|
if lOk {
|
|
path.AccessConds = append(path.AccessConds, filter)
|
|
path.TableFilters = append(path.TableFilters[:i], path.TableFilters[i+1:]...)
|
|
corColInAccessConds = true
|
|
break
|
|
}
|
|
}
|
|
}
|
|
}
|
|
if corColInAccessConds {
|
|
path.CountAfterAccess = 1
|
|
return nil
|
|
}
|
|
var remainedConds []expression.Expression
|
|
path.Ranges, path.AccessConds, remainedConds, err = ranger.BuildTableRange(path.AccessConds, ds.SCtx().GetRangerCtx(), pkCol.RetType, ds.SCtx().GetSessionVars().RangeMaxSize)
|
|
path.TableFilters = append(path.TableFilters, remainedConds...)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
path.CountAfterAccess, err = cardinality.GetRowCountByIntColumnRanges(ds.SCtx(), &ds.StatisticTable.HistColl, pkCol.ID, path.Ranges)
|
|
// If the `CountAfterAccess` is less than `stats.RowCount`, there must be some inconsistent stats info.
|
|
// We prefer the `stats.RowCount` because it could use more stats info to calculate the selectivity.
|
|
if path.CountAfterAccess < ds.StatsInfo().RowCount && !isIm {
|
|
path.CountAfterAccess = math.Min(ds.StatsInfo().RowCount/cost.SelectionFactor, float64(ds.StatisticTable.RealtimeCount))
|
|
}
|
|
return err
|
|
}
|
|
|
|
func (ds *DataSource) fillIndexPath(path *util.AccessPath, conds []expression.Expression) error {
|
|
if ds.SCtx().GetSessionVars().StmtCtx.EnableOptimizerDebugTrace {
|
|
debugtrace.EnterContextCommon(ds.SCtx())
|
|
defer debugtrace.LeaveContextCommon(ds.SCtx())
|
|
}
|
|
path.Ranges = ranger.FullRange()
|
|
path.CountAfterAccess = float64(ds.StatisticTable.RealtimeCount)
|
|
path.IdxCols, path.IdxColLens = expression.IndexInfo2PrefixCols(ds.Columns, ds.Schema().Columns, path.Index)
|
|
path.FullIdxCols, path.FullIdxColLens = expression.IndexInfo2Cols(ds.Columns, ds.Schema().Columns, path.Index)
|
|
if !path.Index.Unique && !path.Index.Primary && len(path.Index.Columns) == len(path.IdxCols) {
|
|
handleCol := ds.getPKIsHandleCol()
|
|
if handleCol != nil && !mysql.HasUnsignedFlag(handleCol.RetType.GetFlag()) {
|
|
alreadyHandle := false
|
|
for _, col := range path.IdxCols {
|
|
if col.ID == model.ExtraHandleID || col.EqualColumn(handleCol) {
|
|
alreadyHandle = true
|
|
}
|
|
}
|
|
// Don't add one column twice to the index. May cause unexpected errors.
|
|
if !alreadyHandle {
|
|
path.IdxCols = append(path.IdxCols, handleCol)
|
|
path.IdxColLens = append(path.IdxColLens, types.UnspecifiedLength)
|
|
// Also updates the map that maps the index id to its prefix column ids.
|
|
if len(ds.TableStats.HistColl.Idx2ColUniqueIDs[path.Index.ID]) == len(path.Index.Columns) {
|
|
ds.TableStats.HistColl.Idx2ColUniqueIDs[path.Index.ID] = append(ds.TableStats.HistColl.Idx2ColUniqueIDs[path.Index.ID], handleCol.UniqueID)
|
|
}
|
|
}
|
|
}
|
|
}
|
|
err := detachCondAndBuildRangeForPath(ds.SCtx(), path, conds, ds.TableStats.HistColl)
|
|
return err
|
|
}
|
|
|
|
// deriveIndexPathStats will fulfill the information that the AccessPath need.
|
|
// conds is the conditions used to generate the DetachRangeResult for path.
|
|
// isIm indicates whether this function is called to generate the partial path for IndexMerge.
|
|
func (ds *DataSource) deriveIndexPathStats(path *util.AccessPath, _ []expression.Expression, isIm bool) {
|
|
if ds.SCtx().GetSessionVars().StmtCtx.EnableOptimizerDebugTrace {
|
|
debugtrace.EnterContextCommon(ds.SCtx())
|
|
defer debugtrace.LeaveContextCommon(ds.SCtx())
|
|
}
|
|
if path.EqOrInCondCount == len(path.AccessConds) {
|
|
accesses, remained := path.SplitCorColAccessCondFromFilters(ds.SCtx(), path.EqOrInCondCount)
|
|
path.AccessConds = append(path.AccessConds, accesses...)
|
|
path.TableFilters = remained
|
|
if len(accesses) > 0 && ds.StatisticTable.Pseudo {
|
|
path.CountAfterAccess = cardinality.PseudoAvgCountPerValue(ds.StatisticTable)
|
|
} else {
|
|
selectivity := path.CountAfterAccess / float64(ds.StatisticTable.RealtimeCount)
|
|
for i := range accesses {
|
|
col := path.IdxCols[path.EqOrInCondCount+i]
|
|
ndv := cardinality.EstimateColumnNDV(ds.StatisticTable, col.ID)
|
|
ndv *= selectivity
|
|
if ndv < 1 {
|
|
ndv = 1.0
|
|
}
|
|
path.CountAfterAccess = path.CountAfterAccess / ndv
|
|
}
|
|
}
|
|
}
|
|
var indexFilters []expression.Expression
|
|
indexFilters, path.TableFilters = ds.splitIndexFilterConditions(path.TableFilters, path.FullIdxCols, path.FullIdxColLens)
|
|
path.IndexFilters = append(path.IndexFilters, indexFilters...)
|
|
// If the `CountAfterAccess` is less than `stats.RowCount`, there must be some inconsistent stats info.
|
|
// We prefer the `stats.RowCount` because it could use more stats info to calculate the selectivity.
|
|
if path.CountAfterAccess < ds.StatsInfo().RowCount && !isIm {
|
|
path.CountAfterAccess = math.Min(ds.StatsInfo().RowCount/cost.SelectionFactor, float64(ds.StatisticTable.RealtimeCount))
|
|
}
|
|
if path.IndexFilters != nil {
|
|
selectivity, _, err := cardinality.Selectivity(ds.SCtx(), ds.TableStats.HistColl, path.IndexFilters, nil)
|
|
if err != nil {
|
|
logutil.BgLogger().Debug("calculate selectivity failed, use selection factor", zap.Error(err))
|
|
selectivity = cost.SelectionFactor
|
|
}
|
|
if isIm {
|
|
path.CountAfterIndex = path.CountAfterAccess * selectivity
|
|
} else {
|
|
path.CountAfterIndex = math.Max(path.CountAfterAccess*selectivity, ds.StatsInfo().RowCount)
|
|
}
|
|
} else {
|
|
path.CountAfterIndex = path.CountAfterAccess
|
|
}
|
|
}
|
|
|
|
func getPKIsHandleColFromSchema(cols []*model.ColumnInfo, schema *expression.Schema, pkIsHandle bool) *expression.Column {
|
|
if !pkIsHandle {
|
|
// If the PKIsHandle is false, return the ExtraHandleColumn.
|
|
for i, col := range cols {
|
|
if col.ID == model.ExtraHandleID {
|
|
return schema.Columns[i]
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
for i, col := range cols {
|
|
if mysql.HasPriKeyFlag(col.GetFlag()) {
|
|
return schema.Columns[i]
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func (ds *DataSource) getPKIsHandleCol() *expression.Column {
|
|
return getPKIsHandleColFromSchema(ds.Columns, ds.Schema(), ds.TableInfo.PKIsHandle)
|
|
}
|
|
|
|
func getMinSelectivityFromPaths(paths []*util.AccessPath, totalRowCount float64) float64 {
|
|
minSelectivity := 1.0
|
|
if totalRowCount <= 0 {
|
|
return minSelectivity
|
|
}
|
|
for _, path := range paths {
|
|
// For table path and index merge path, AccessPath.CountAfterIndex is not set and meaningless,
|
|
// but we still consider their AccessPath.CountAfterAccess.
|
|
if path.IsTablePath() || path.PartialIndexPaths != nil {
|
|
minSelectivity = min(minSelectivity, path.CountAfterAccess/totalRowCount)
|
|
continue
|
|
}
|
|
minSelectivity = min(minSelectivity, path.CountAfterIndex/totalRowCount)
|
|
}
|
|
return minSelectivity
|
|
}
|
|
|
|
// AddPrefix4ShardIndexes add expression prefix for shard index. e.g. an index is test.uk(tidb_shard(a), a).
|
|
// DataSource.PredicatePushDown ---> DataSource.AddPrefix4ShardIndexes
|
|
// It transforms the sql "SELECT * FROM test WHERE a = 10" to
|
|
// "SELECT * FROM test WHERE tidb_shard(a) = val AND a = 10", val is the value of tidb_shard(10).
|
|
// It also transforms the sql "SELECT * FROM test WHERE a IN (10, 20, 30)" to
|
|
// "SELECT * FROM test WHERE tidb_shard(a) = val1 AND a = 10 OR tidb_shard(a) = val2 AND a = 20"
|
|
// @param[in] conds the original condtion of this datasource
|
|
// @retval - the new condition after adding expression prefix
|
|
func (ds *DataSource) AddPrefix4ShardIndexes(sc base.PlanContext, conds []expression.Expression) []expression.Expression {
|
|
if !ds.ContainExprPrefixUk {
|
|
return conds
|
|
}
|
|
|
|
var err error
|
|
newConds := conds
|
|
|
|
for _, path := range ds.PossibleAccessPaths {
|
|
if !path.IsUkShardIndexPath {
|
|
continue
|
|
}
|
|
newConds, err = ds.addExprPrefixCond(sc, path, newConds)
|
|
if err != nil {
|
|
logutil.BgLogger().Error("Add tidb_shard expression failed",
|
|
zap.Error(err),
|
|
zap.Uint64("connection id", sc.GetSessionVars().ConnectionID),
|
|
zap.String("database name", ds.DBName.L),
|
|
zap.String("table name", ds.TableInfo.Name.L),
|
|
zap.String("index name", path.Index.Name.L))
|
|
return conds
|
|
}
|
|
}
|
|
|
|
return newConds
|
|
}
|