559 lines
15 KiB
Go
559 lines
15 KiB
Go
// Copyright 2017 PingCAP, Inc.
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
package plan
|
|
|
|
import (
|
|
"fmt"
|
|
"math"
|
|
|
|
"github.com/pingcap/tidb/expression"
|
|
"github.com/pingcap/tidb/expression/aggregation"
|
|
"github.com/pingcap/tidb/model"
|
|
"github.com/pingcap/tidb/mysql"
|
|
"github.com/pingcap/tidb/sessionctx"
|
|
"github.com/pingcap/tidb/types"
|
|
"github.com/pingcap/tidb/util/charset"
|
|
)
|
|
|
|
// task is a new version of `PhysicalPlanInfo`. It stores cost information for a task.
|
|
// A task may be CopTask, RootTask, MPPTask or a ParallelTask.
|
|
type task interface {
|
|
count() float64
|
|
addCost(cost float64)
|
|
cost() float64
|
|
copy() task
|
|
plan() PhysicalPlan
|
|
invalid() bool
|
|
}
|
|
|
|
// copTask is a task that runs in a distributed kv store.
|
|
// TODO: In future, we should split copTask to indexTask and tableTask.
|
|
type copTask struct {
|
|
indexPlan PhysicalPlan
|
|
tablePlan PhysicalPlan
|
|
cst float64
|
|
// indexPlanFinished means we have finished index plan.
|
|
indexPlanFinished bool
|
|
// keepOrder indicates if the plan scans data by order.
|
|
keepOrder bool
|
|
}
|
|
|
|
func (t *copTask) invalid() bool {
|
|
return t.tablePlan == nil && t.indexPlan == nil
|
|
}
|
|
|
|
func (t *rootTask) invalid() bool {
|
|
return t.p == nil
|
|
}
|
|
|
|
func (t *copTask) count() float64 {
|
|
if t.indexPlanFinished {
|
|
return t.tablePlan.StatsInfo().count
|
|
}
|
|
return t.indexPlan.StatsInfo().count
|
|
}
|
|
|
|
func (t *copTask) addCost(cst float64) {
|
|
t.cst += cst
|
|
}
|
|
|
|
func (t *copTask) cost() float64 {
|
|
return t.cst
|
|
}
|
|
|
|
func (t *copTask) copy() task {
|
|
nt := *t
|
|
return &nt
|
|
}
|
|
|
|
func (t *copTask) plan() PhysicalPlan {
|
|
if t.indexPlanFinished {
|
|
return t.tablePlan
|
|
}
|
|
return t.indexPlan
|
|
}
|
|
|
|
func attachPlan2Task(p PhysicalPlan, t task) task {
|
|
switch v := t.(type) {
|
|
case *copTask:
|
|
if v.indexPlanFinished {
|
|
p.SetChildren(v.tablePlan)
|
|
v.tablePlan = p
|
|
} else {
|
|
p.SetChildren(v.indexPlan)
|
|
v.indexPlan = p
|
|
}
|
|
case *rootTask:
|
|
p.SetChildren(v.p)
|
|
v.p = p
|
|
}
|
|
return t
|
|
}
|
|
|
|
// finishIndexPlan means we no longer add plan to index plan, and compute the network cost for it.
|
|
func (t *copTask) finishIndexPlan() {
|
|
if !t.indexPlanFinished {
|
|
t.cst += t.count() * netWorkFactor
|
|
t.indexPlanFinished = true
|
|
if t.tablePlan != nil {
|
|
t.tablePlan.(*PhysicalTableScan).stats = t.indexPlan.StatsInfo()
|
|
t.cst += t.count() * scanFactor
|
|
}
|
|
}
|
|
}
|
|
|
|
func (p *basePhysicalPlan) attach2Task(tasks ...task) task {
|
|
if tasks[0].invalid() {
|
|
return invalidTask
|
|
}
|
|
t := finishCopTask(p.ctx, tasks[0].copy())
|
|
return attachPlan2Task(p.self, t)
|
|
}
|
|
|
|
func (p *PhysicalApply) attach2Task(tasks ...task) task {
|
|
if tasks[0].invalid() || tasks[1].invalid() {
|
|
return invalidTask
|
|
}
|
|
lTask := finishCopTask(p.ctx, tasks[0].copy())
|
|
rTask := finishCopTask(p.ctx, tasks[1].copy())
|
|
p.SetChildren(lTask.plan(), rTask.plan())
|
|
p.PhysicalJoin.SetChildren(lTask.plan(), rTask.plan())
|
|
p.schema = buildPhysicalJoinSchema(p.PhysicalJoin.JoinType, p)
|
|
return &rootTask{
|
|
p: p,
|
|
cst: lTask.cost() + lTask.count()*rTask.cost(),
|
|
}
|
|
}
|
|
|
|
func (p *PhysicalIndexJoin) attach2Task(tasks ...task) task {
|
|
if tasks[p.OuterIndex].invalid() {
|
|
return invalidTask
|
|
}
|
|
outerTask := finishCopTask(p.ctx, tasks[p.OuterIndex].copy())
|
|
if p.OuterIndex == 0 {
|
|
p.SetChildren(outerTask.plan(), p.innerPlan)
|
|
} else {
|
|
p.SetChildren(p.innerPlan, outerTask.plan())
|
|
}
|
|
p.schema = buildPhysicalJoinSchema(p.JoinType, p)
|
|
return &rootTask{
|
|
p: p,
|
|
cst: outerTask.cost() + p.getCost(outerTask.count()),
|
|
}
|
|
}
|
|
|
|
func (p *PhysicalIndexJoin) getCost(lCnt float64) float64 {
|
|
if lCnt < 1 {
|
|
lCnt = 1
|
|
}
|
|
cst := lCnt * netWorkFactor
|
|
batchSize := p.ctx.GetSessionVars().IndexJoinBatchSize
|
|
cst += lCnt * math.Log2(math.Min(float64(batchSize), lCnt)) * 2
|
|
cst += lCnt / float64(batchSize) * netWorkStartFactor
|
|
return cst
|
|
}
|
|
|
|
func (p *PhysicalHashJoin) getCost(lCnt, rCnt float64) float64 {
|
|
smallTableCnt := lCnt
|
|
if p.InnerChildIdx == 1 {
|
|
smallTableCnt = rCnt
|
|
}
|
|
if smallTableCnt <= 1 {
|
|
smallTableCnt = 1
|
|
}
|
|
return (lCnt + rCnt) * (1 + math.Log2(smallTableCnt)/float64(p.Concurrency))
|
|
}
|
|
|
|
func (p *PhysicalHashJoin) attach2Task(tasks ...task) task {
|
|
if tasks[0].invalid() || tasks[1].invalid() {
|
|
return invalidTask
|
|
}
|
|
lTask := finishCopTask(p.ctx, tasks[0].copy())
|
|
rTask := finishCopTask(p.ctx, tasks[1].copy())
|
|
p.SetChildren(lTask.plan(), rTask.plan())
|
|
p.schema = buildPhysicalJoinSchema(p.JoinType, p)
|
|
return &rootTask{
|
|
p: p,
|
|
cst: lTask.cost() + rTask.cost() + p.getCost(lTask.count(), rTask.count()),
|
|
}
|
|
}
|
|
|
|
func (p *PhysicalMergeJoin) getCost(lCnt, rCnt float64) float64 {
|
|
return lCnt + rCnt
|
|
}
|
|
|
|
func (p *PhysicalMergeJoin) attach2Task(tasks ...task) task {
|
|
if tasks[0].invalid() || tasks[1].invalid() {
|
|
return invalidTask
|
|
}
|
|
lTask := finishCopTask(p.ctx, tasks[0].copy())
|
|
rTask := finishCopTask(p.ctx, tasks[1].copy())
|
|
p.SetChildren(lTask.plan(), rTask.plan())
|
|
p.schema = buildPhysicalJoinSchema(p.JoinType, p)
|
|
return &rootTask{
|
|
p: p,
|
|
cst: lTask.cost() + rTask.cost() + p.getCost(lTask.count(), rTask.count()),
|
|
}
|
|
}
|
|
|
|
// finishCopTask means we close the coprocessor task and create a root task.
|
|
func finishCopTask(ctx sessionctx.Context, task task) task {
|
|
t, ok := task.(*copTask)
|
|
if !ok {
|
|
return task
|
|
}
|
|
// FIXME: When it is a double reading. The cost should be more expensive. The right cost should add the
|
|
// `NetWorkStartCost` * (totalCount / perCountIndexRead)
|
|
t.finishIndexPlan()
|
|
if t.tablePlan != nil {
|
|
t.cst += t.count() * netWorkFactor
|
|
}
|
|
newTask := &rootTask{
|
|
cst: t.cst,
|
|
}
|
|
if t.indexPlan != nil && t.tablePlan != nil {
|
|
p := PhysicalIndexLookUpReader{tablePlan: t.tablePlan, indexPlan: t.indexPlan}.init(ctx)
|
|
p.stats = t.tablePlan.StatsInfo()
|
|
newTask.p = p
|
|
} else if t.indexPlan != nil {
|
|
p := PhysicalIndexReader{indexPlan: t.indexPlan}.init(ctx)
|
|
p.stats = t.indexPlan.StatsInfo()
|
|
newTask.p = p
|
|
} else {
|
|
p := PhysicalTableReader{tablePlan: t.tablePlan}.init(ctx)
|
|
p.stats = t.tablePlan.StatsInfo()
|
|
newTask.p = p
|
|
}
|
|
return newTask
|
|
}
|
|
|
|
// rootTask is the final sink node of a plan graph. It should be a single goroutine on tidb.
|
|
type rootTask struct {
|
|
p PhysicalPlan
|
|
cst float64
|
|
}
|
|
|
|
func (t *rootTask) copy() task {
|
|
return &rootTask{
|
|
p: t.p,
|
|
cst: t.cst,
|
|
}
|
|
}
|
|
|
|
func (t *rootTask) count() float64 {
|
|
return t.p.StatsInfo().count
|
|
}
|
|
|
|
func (t *rootTask) addCost(cst float64) {
|
|
t.cst += cst
|
|
}
|
|
|
|
func (t *rootTask) cost() float64 {
|
|
return t.cst
|
|
}
|
|
|
|
func (t *rootTask) plan() PhysicalPlan {
|
|
return t.p
|
|
}
|
|
|
|
func (p *PhysicalLimit) attach2Task(tasks ...task) task {
|
|
// If task is invalid, keep it remained.
|
|
if tasks[0].invalid() {
|
|
return invalidTask
|
|
}
|
|
t := tasks[0].copy()
|
|
if cop, ok := t.(*copTask); ok {
|
|
// If the table/index scans data by order and applies a double read, the limit cannot be pushed to the table side.
|
|
if !cop.keepOrder || !cop.indexPlanFinished || cop.indexPlan == nil {
|
|
// When limit be pushed down, it should remove its offset.
|
|
pushedDownLimit := PhysicalLimit{Count: p.Offset + p.Count}.init(p.ctx, p.stats)
|
|
cop = attachPlan2Task(pushedDownLimit, cop).(*copTask)
|
|
}
|
|
t = finishCopTask(p.ctx, cop)
|
|
}
|
|
t = attachPlan2Task(p, t)
|
|
return t
|
|
}
|
|
|
|
func (p *PhysicalSort) getCost(count float64) float64 {
|
|
if count < 2.0 {
|
|
count = 2.0
|
|
}
|
|
return count*cpuFactor + count*memoryFactor
|
|
}
|
|
|
|
func (p *PhysicalTopN) getCost(count float64) float64 {
|
|
return count*cpuFactor + float64(p.Count)*memoryFactor
|
|
}
|
|
|
|
// canPushDown checks if this topN can be pushed down. If each of the expression can be converted to pb, it can be pushed.
|
|
func (p *PhysicalTopN) canPushDown() bool {
|
|
exprs := make([]expression.Expression, 0, len(p.ByItems))
|
|
for _, item := range p.ByItems {
|
|
exprs = append(exprs, item.Expr)
|
|
}
|
|
_, _, remained := expression.ExpressionsToPB(p.ctx.GetSessionVars().StmtCtx, exprs, p.ctx.GetClient())
|
|
return len(remained) == 0
|
|
}
|
|
|
|
func (p *PhysicalTopN) allColsFromSchema(schema *expression.Schema) bool {
|
|
cols := make([]*expression.Column, 0, len(p.ByItems))
|
|
for _, item := range p.ByItems {
|
|
cols = append(cols, expression.ExtractColumns(item.Expr)...)
|
|
}
|
|
return len(schema.ColumnsIndices(cols)) > 0
|
|
}
|
|
|
|
func (p *PhysicalSort) attach2Task(tasks ...task) task {
|
|
if tasks[0].invalid() {
|
|
return invalidTask
|
|
}
|
|
t := tasks[0].copy()
|
|
t = attachPlan2Task(p, t)
|
|
t.addCost(p.getCost(t.count()))
|
|
return t
|
|
}
|
|
|
|
func (p *NominalSort) attach2Task(tasks ...task) task {
|
|
return tasks[0]
|
|
}
|
|
|
|
func (p *PhysicalTopN) getPushedDownTopN() *PhysicalTopN {
|
|
newByItems := make([]*ByItems, 0, len(p.ByItems))
|
|
for _, expr := range p.ByItems {
|
|
newByItems = append(newByItems, expr.Clone())
|
|
}
|
|
topN := PhysicalTopN{
|
|
ByItems: newByItems,
|
|
Count: p.Offset + p.Count,
|
|
}.init(p.ctx, p.stats)
|
|
return topN
|
|
}
|
|
|
|
func (p *PhysicalTopN) attach2Task(tasks ...task) task {
|
|
// If task is invalid, keep it remained.
|
|
if tasks[0].invalid() {
|
|
return invalidTask
|
|
}
|
|
t := tasks[0].copy()
|
|
// This is a topN plan.
|
|
if copTask, ok := t.(*copTask); ok && p.canPushDown() {
|
|
pushedDownTopN := p.getPushedDownTopN()
|
|
// If all columns in topN are from index plan, we can push it to index plan. Or we finish the index plan and
|
|
// push it to table plan.
|
|
if !copTask.indexPlanFinished && p.allColsFromSchema(copTask.indexPlan.Schema()) {
|
|
pushedDownTopN.SetChildren(copTask.indexPlan)
|
|
copTask.indexPlan = pushedDownTopN
|
|
} else {
|
|
// FIXME: When we pushed down a top-N plan to table plan branch in case of double reading. The cost should
|
|
// be more expensive in case of single reading, because we may execute table scan multi times.
|
|
copTask.finishIndexPlan()
|
|
pushedDownTopN.SetChildren(copTask.tablePlan)
|
|
copTask.tablePlan = pushedDownTopN
|
|
}
|
|
copTask.addCost(pushedDownTopN.getCost(t.count()))
|
|
}
|
|
t = finishCopTask(p.ctx, t)
|
|
t = attachPlan2Task(p, t)
|
|
t.addCost(p.getCost(t.count()))
|
|
return t
|
|
}
|
|
|
|
func (p *PhysicalProjection) attach2Task(tasks ...task) task {
|
|
if tasks[0].invalid() {
|
|
return invalidTask
|
|
}
|
|
t := tasks[0].copy()
|
|
switch tp := t.(type) {
|
|
case *copTask:
|
|
// TODO: Support projection push down.
|
|
t = finishCopTask(p.ctx, t)
|
|
t = attachPlan2Task(p, t)
|
|
return t
|
|
case *rootTask:
|
|
return attachPlan2Task(p, tp)
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func (p *PhysicalUnionAll) attach2Task(tasks ...task) task {
|
|
newTask := &rootTask{p: p}
|
|
newChildren := make([]PhysicalPlan, 0, len(p.children))
|
|
for _, task := range tasks {
|
|
if task.invalid() {
|
|
return invalidTask
|
|
}
|
|
task = finishCopTask(p.ctx, task)
|
|
newTask.cst += task.cost()
|
|
newChildren = append(newChildren, task.plan())
|
|
}
|
|
p.SetChildren(newChildren...)
|
|
return newTask
|
|
}
|
|
|
|
func (sel *PhysicalSelection) attach2Task(tasks ...task) task {
|
|
if tasks[0].invalid() {
|
|
return invalidTask
|
|
}
|
|
t := finishCopTask(sel.ctx, tasks[0].copy())
|
|
t.addCost(t.count() * cpuFactor)
|
|
t = attachPlan2Task(sel, t)
|
|
return t
|
|
}
|
|
|
|
func (p *basePhysicalAgg) newPartialAggregate() (partial, final PhysicalPlan) {
|
|
// Check if this aggregation can push down.
|
|
sc := p.ctx.GetSessionVars().StmtCtx
|
|
client := p.ctx.GetClient()
|
|
for _, aggFunc := range p.AggFuncs {
|
|
pb := aggregation.AggFuncToPBExpr(sc, client, aggFunc)
|
|
if pb == nil {
|
|
return nil, p.self
|
|
}
|
|
}
|
|
_, _, remained := expression.ExpressionsToPB(sc, p.GroupByItems, client)
|
|
if len(remained) > 0 {
|
|
return nil, p.self
|
|
}
|
|
|
|
finalSchema := p.schema
|
|
partialSchema := expression.NewSchema()
|
|
p.schema = partialSchema
|
|
partialAgg := p.self
|
|
|
|
// TODO: Refactor the way of constructing aggregation functions.
|
|
partialCursor := 0
|
|
finalAggFuncs := make([]*aggregation.AggFuncDesc, len(p.AggFuncs))
|
|
for i, aggFun := range p.AggFuncs {
|
|
finalAggFunc := &aggregation.AggFuncDesc{Name: aggFun.Name, HasDistinct: false}
|
|
args := make([]expression.Expression, 0, len(aggFun.Args))
|
|
if needCount(finalAggFunc) {
|
|
ft := types.NewFieldType(mysql.TypeLonglong)
|
|
ft.Flen, ft.Charset, ft.Collate = 21, charset.CharsetBin, charset.CollationBin
|
|
partialSchema.Append(&expression.Column{
|
|
FromID: p.ID(),
|
|
Position: partialCursor,
|
|
ColName: model.NewCIStr(fmt.Sprintf("col_%d", partialCursor)),
|
|
RetType: ft,
|
|
})
|
|
args = append(args, partialSchema.Columns[partialCursor].Clone())
|
|
partialCursor++
|
|
}
|
|
if needValue(finalAggFunc) {
|
|
partialSchema.Append(&expression.Column{
|
|
FromID: p.ID(),
|
|
Position: partialCursor,
|
|
ColName: model.NewCIStr(fmt.Sprintf("col_%d", partialCursor)),
|
|
RetType: finalSchema.Columns[i].GetType(),
|
|
})
|
|
args = append(args, partialSchema.Columns[partialCursor].Clone())
|
|
partialCursor++
|
|
}
|
|
finalAggFunc.Args = args
|
|
finalAggFunc.Mode = aggregation.FinalMode
|
|
finalAggFunc.RetTp = aggFun.RetTp
|
|
finalAggFuncs[i] = finalAggFunc
|
|
}
|
|
|
|
// add group by columns
|
|
groupByItems := make([]expression.Expression, 0, len(p.GroupByItems))
|
|
for i, gbyExpr := range p.GroupByItems {
|
|
gbyCol := &expression.Column{
|
|
FromID: p.ID(),
|
|
Position: partialCursor + i,
|
|
ColName: model.NewCIStr(fmt.Sprintf("col_%d", partialCursor+i)),
|
|
RetType: gbyExpr.GetType(),
|
|
}
|
|
partialSchema.Append(gbyCol)
|
|
groupByItems = append(groupByItems, gbyCol.Clone())
|
|
}
|
|
|
|
// Create physical "final" aggregation.
|
|
if p.tp == TypeStreamAgg {
|
|
finalAgg := basePhysicalAgg{
|
|
AggFuncs: finalAggFuncs,
|
|
GroupByItems: groupByItems,
|
|
}.initForStream(p.ctx, p.stats)
|
|
finalAgg.schema = finalSchema
|
|
return partialAgg, finalAgg
|
|
}
|
|
|
|
finalAgg := basePhysicalAgg{
|
|
AggFuncs: finalAggFuncs,
|
|
GroupByItems: groupByItems,
|
|
}.initForHash(p.ctx, p.stats)
|
|
finalAgg.schema = finalSchema
|
|
return partialAgg, finalAgg
|
|
}
|
|
|
|
func (p *PhysicalStreamAgg) attach2Task(tasks ...task) task {
|
|
// If task is invalid, keep it remained.
|
|
if tasks[0].invalid() {
|
|
return invalidTask
|
|
}
|
|
t := tasks[0].copy()
|
|
if cop, ok := t.(*copTask); ok {
|
|
partialAgg, finalAgg := p.newPartialAggregate()
|
|
if partialAgg != nil {
|
|
if cop.tablePlan != nil {
|
|
partialAgg.SetChildren(cop.tablePlan)
|
|
cop.tablePlan = partialAgg
|
|
} else {
|
|
partialAgg.SetChildren(cop.indexPlan)
|
|
cop.indexPlan = partialAgg
|
|
}
|
|
}
|
|
t = finishCopTask(p.ctx, cop)
|
|
attachPlan2Task(finalAgg, t)
|
|
} else {
|
|
attachPlan2Task(p, t)
|
|
}
|
|
t.addCost(t.count() * cpuFactor)
|
|
if p.hasDistinctFunc() {
|
|
t.addCost(t.count() * cpuFactor * distinctAggFactor)
|
|
}
|
|
return t
|
|
}
|
|
|
|
func (p *PhysicalHashAgg) attach2Task(tasks ...task) task {
|
|
// If task is invalid, keep it remained.
|
|
if tasks[0].invalid() {
|
|
return invalidTask
|
|
}
|
|
cardinality := p.StatsInfo().count
|
|
t := tasks[0].copy()
|
|
if cop, ok := t.(*copTask); ok {
|
|
partialAgg, finalAgg := p.newPartialAggregate()
|
|
if partialAgg != nil {
|
|
if cop.tablePlan != nil {
|
|
cop.finishIndexPlan()
|
|
partialAgg.SetChildren(cop.tablePlan)
|
|
cop.tablePlan = partialAgg
|
|
} else {
|
|
partialAgg.SetChildren(cop.indexPlan)
|
|
cop.indexPlan = partialAgg
|
|
}
|
|
}
|
|
t = finishCopTask(p.ctx, cop)
|
|
attachPlan2Task(finalAgg, t)
|
|
} else {
|
|
attachPlan2Task(p, t)
|
|
}
|
|
t.addCost(t.count()*cpuFactor*hashAggFactor + cardinality*createAggCtxFactor)
|
|
if p.hasDistinctFunc() {
|
|
t.addCost(t.count() * cpuFactor * distinctAggFactor)
|
|
}
|
|
return t
|
|
}
|