planner: separate cost model ver1/ver2 into different files (part 2) (#38273)

ref pingcap/tidb#36243
This commit is contained in:
Yuanjia Zhang
2022-09-30 17:03:46 +08:00
committed by GitHub
parent d70b022b54
commit ee8ebcb749
3 changed files with 469 additions and 25 deletions

View File

@ -569,6 +569,9 @@ func (p *PhysicalIndexJoin) GetPlanCost(taskType property.TaskType, option *Plan
if p.planCostInit && !hasCostFlag(costFlag, CostFlagRecalculate) {
return p.planCost, nil
}
if p.ctx.GetSessionVars().CostModelVersion == modelVer2 {
return p.getPlanCostVer2(taskType, option)
}
outerChild, innerChild := p.children[1-p.InnerChildIdx], p.children[p.InnerChildIdx]
outerCost, err := outerChild.GetPlanCost(taskType, option)
if err != nil {
@ -658,6 +661,9 @@ func (p *PhysicalIndexHashJoin) GetPlanCost(taskType property.TaskType, option *
if p.planCostInit && !hasCostFlag(costFlag, CostFlagRecalculate) {
return p.planCost, nil
}
if p.ctx.GetSessionVars().CostModelVersion == modelVer2 {
return p.getPlanCostVer2(taskType, option)
}
outerChild, innerChild := p.children[1-p.InnerChildIdx], p.children[p.InnerChildIdx]
outerCost, err := outerChild.GetPlanCost(taskType, option)
if err != nil {
@ -749,6 +755,9 @@ func (p *PhysicalIndexMergeJoin) GetPlanCost(taskType property.TaskType, option
if p.planCostInit && !hasCostFlag(costFlag, CostFlagRecalculate) {
return p.planCost, nil
}
if p.ctx.GetSessionVars().CostModelVersion == modelVer2 {
return p.getPlanCostVer2(taskType, option)
}
outerChild, innerChild := p.children[1-p.InnerChildIdx], p.children[p.InnerChildIdx]
outerCost, err := outerChild.GetPlanCost(taskType, option)
if err != nil {
@ -802,6 +811,9 @@ func (p *PhysicalApply) GetPlanCost(taskType property.TaskType, option *PlanCost
if p.planCostInit && !hasCostFlag(costFlag, CostFlagRecalculate) {
return p.planCost, nil
}
if p.ctx.GetSessionVars().CostModelVersion == modelVer2 {
return p.getPlanCostVer2(taskType, option)
}
outerChild, innerChild := p.children[1-p.InnerChildIdx], p.children[p.InnerChildIdx]
outerCost, err := outerChild.GetPlanCost(taskType, option)
if err != nil {
@ -876,6 +888,9 @@ func (p *PhysicalMergeJoin) GetPlanCost(taskType property.TaskType, option *Plan
if p.planCostInit && !hasCostFlag(costFlag, CostFlagRecalculate) {
return p.planCost, nil
}
if p.ctx.GetSessionVars().CostModelVersion == modelVer2 {
return p.getPlanCostVer2(taskType, option)
}
p.planCost = 0
for _, child := range p.children {
childCost, err := child.GetPlanCost(taskType, option)
@ -994,6 +1009,9 @@ func (p *PhysicalHashJoin) GetPlanCost(taskType property.TaskType, option *PlanC
if p.planCostInit && !hasCostFlag(costFlag, CostFlagRecalculate) {
return p.planCost, nil
}
if p.ctx.GetSessionVars().CostModelVersion == modelVer2 {
return p.getPlanCostVer2(taskType, option)
}
p.planCost = 0
for _, child := range p.children {
childCost, err := child.GetPlanCost(taskType, option)
@ -1036,6 +1054,9 @@ func (p *PhysicalStreamAgg) GetPlanCost(taskType property.TaskType, option *Plan
if p.planCostInit && !hasCostFlag(costFlag, CostFlagRecalculate) {
return p.planCost, nil
}
if p.ctx.GetSessionVars().CostModelVersion == modelVer2 {
return p.getPlanCostVer2(taskType, option)
}
childCost, err := p.children[0].GetPlanCost(taskType, option)
if err != nil {
return 0, err
@ -1084,6 +1105,9 @@ func (p *PhysicalHashAgg) GetPlanCost(taskType property.TaskType, option *PlanCo
if p.planCostInit && !hasCostFlag(costFlag, CostFlagRecalculate) {
return p.planCost, nil
}
if p.ctx.GetSessionVars().CostModelVersion == modelVer2 {
return p.getPlanCostVer2(taskType, option)
}
childCost, err := p.children[0].GetPlanCost(taskType, option)
if err != nil {
return 0, err
@ -1216,11 +1240,14 @@ func (p *BatchPointGetPlan) GetCost(opt *physicalOptimizeOp) float64 {
}
// GetPlanCost calculates the cost of the plan if it has not been calculated yet and returns the cost.
func (p *BatchPointGetPlan) GetPlanCost(_ property.TaskType, option *PlanCostOption) (float64, error) {
func (p *BatchPointGetPlan) GetPlanCost(taskType property.TaskType, option *PlanCostOption) (float64, error) {
costFlag := option.CostFlag
if p.planCostInit && !hasCostFlag(costFlag, CostFlagRecalculate) {
return p.planCost, nil
}
if p.ctx != nil && p.ctx.GetSessionVars() != nil && p.ctx.GetSessionVars().CostModelVersion == modelVer2 {
return p.getPlanCostVer2(taskType, option)
}
p.planCost = p.GetCost(option.tracer)
p.planCostInit = true
return p.planCost, nil
@ -1264,11 +1291,14 @@ func (p *PointGetPlan) GetCost(opt *physicalOptimizeOp) float64 {
}
// GetPlanCost calculates the cost of the plan if it has not been calculated yet and returns the cost.
func (p *PointGetPlan) GetPlanCost(_ property.TaskType, option *PlanCostOption) (float64, error) {
func (p *PointGetPlan) GetPlanCost(taskType property.TaskType, option *PlanCostOption) (float64, error) {
costFlag := option.CostFlag
if p.planCostInit && !hasCostFlag(costFlag, CostFlagRecalculate) {
return p.planCost, nil
}
if p.ctx != nil && p.ctx.GetSessionVars() != nil && p.ctx.GetSessionVars().CostModelVersion == modelVer2 {
return p.getPlanCostVer2(taskType, option)
}
p.planCost = p.GetCost(option.tracer)
p.planCostInit = true
return p.planCost, nil
@ -1311,18 +1341,16 @@ func (p *PhysicalExchangeReceiver) GetPlanCost(taskType property.TaskType, optio
if p.planCostInit && !hasCostFlag(costFlag, CostFlagRecalculate) {
return p.planCost, nil
}
if p.ctx.GetSessionVars().CostModelVersion == modelVer2 {
return p.getPlanCostVer2(taskType, option)
}
childCost, err := p.children[0].GetPlanCost(taskType, option)
if err != nil {
return 0, err
}
p.planCost = childCost
// accumulate net cost
if p.ctx.GetSessionVars().CostModelVersion == modelVer1 {
p.planCost += getCardinality(p.children[0], costFlag) * p.ctx.GetSessionVars().GetNetworkFactor(nil)
} else { // to avoid regression, only consider row-size on model ver2
rowSize := getTblStats(p.children[0]).GetAvgRowSize(p.ctx, p.children[0].Schema().Columns, false, false)
p.planCost += getCardinality(p.children[0], costFlag) * rowSize * p.ctx.GetSessionVars().GetNetworkFactor(nil)
}
p.planCost += getCardinality(p.children[0], costFlag) * p.ctx.GetSessionVars().GetNetworkFactor(nil)
p.planCostInit = true
return p.planCost, nil
}

View File

@ -17,6 +17,8 @@ package core
import (
"math"
"github.com/pingcap/tidb/expression"
"github.com/pingcap/tidb/expression/aggregation"
"github.com/pingcap/tidb/kv"
"github.com/pingcap/tidb/parser/model"
"github.com/pingcap/tidb/planner/property"
@ -24,19 +26,19 @@ import (
)
// getPlanCostVer2 returns the plan-cost of this sub-plan, which is:
// plan-cost = child-cost + sel-cost
// sel-cost = input-rows * len(conditions) * cpu-factor
// plan-cost = child-cost + filter-cost
func (p *PhysicalSelection) getPlanCostVer2(taskType property.TaskType, option *PlanCostOption) (float64, error) {
inputRows := getCardinality(p.children[0], option.CostFlag)
cpuFactor := getTaskCPUFactor(p, taskType)
selCost := inputRows * float64(len(p.Conditions)) * cpuFactor
filterCost := filterCostVer2(inputRows, p.Conditions, cpuFactor)
childCost, err := p.children[0].GetPlanCost(taskType, option)
if err != nil {
return 0, err
}
p.planCost = selCost + childCost
p.planCost = filterCost + childCost
p.planCostInit = true
return p.planCost, nil
}
@ -66,11 +68,10 @@ func (p *PhysicalProjection) getPlanCostVer2(taskType property.TaskType, option
// log2(row-size) is from experiments.
func (p *PhysicalIndexScan) getPlanCostVer2(taskType property.TaskType, option *PlanCostOption) (float64, error) {
rows := getCardinality(p, option.CostFlag)
scanFactor := getTaskScanFactor(p, taskType)
rowSize := math.Max(p.getScanRowSize(), 2.0)
logRowSize := math.Log2(rowSize)
scanFactor := getTaskScanFactor(p, taskType)
p.planCost = rows * logRowSize * scanFactor
p.planCost = scanCostVer2(rows, rowSize, scanFactor)
p.planCostInit = true
return p.planCost, nil
}
@ -80,15 +81,14 @@ func (p *PhysicalIndexScan) getPlanCostVer2(taskType property.TaskType, option *
// log2(row-size) is from experiments.
func (p *PhysicalTableScan) getPlanCostVer2(taskType property.TaskType, option *PlanCostOption) (float64, error) {
rows := getCardinality(p, option.CostFlag)
scanFactor := getTaskScanFactor(p, taskType)
rowSize := math.Max(p.getScanRowSize(), 2.0)
logRowSize := math.Log2(rowSize)
scanFactor := getTaskScanFactor(p, taskType)
p.planCost = rows * logRowSize * scanFactor
p.planCost = scanCostVer2(rows, rowSize, scanFactor)
// give TiFlash a start-up cost to let the optimizer prefers to use TiKV to process small table scans.
if p.StoreType == kv.TiFlash {
p.planCost += 10000 * logRowSize * scanFactor
p.planCost += scanCostVer2(10000, rowSize, scanFactor)
}
p.planCostInit = true
@ -105,7 +105,7 @@ func (p *PhysicalIndexReader) getPlanCostVer2(taskType property.TaskType, option
netFactor := getTaskNetFactor(p, taskType)
concurrency := float64(p.ctx.GetSessionVars().DistSQLScanConcurrency())
netCost := rows * rowSize * netFactor
netCost := netCostVer2(rows, rowSize, netFactor)
seekCost := estimateNetSeekCost(p.indexPlan)
childCost, err := p.indexPlan.GetPlanCost(property.CopSingleReadTaskType, option)
@ -128,7 +128,7 @@ func (p *PhysicalTableReader) getPlanCostVer2(taskType property.TaskType, option
netFactor := getTaskNetFactor(p, taskType)
concurrency := float64(p.ctx.GetSessionVars().DistSQLScanConcurrency())
netCost := rows * rowSize * netFactor
netCost := netCostVer2(rows, rowSize, netFactor)
seekCost := estimateNetSeekCost(p.tablePlan)
childCost, err := p.tablePlan.GetPlanCost(property.CopSingleReadTaskType, option)
@ -168,7 +168,7 @@ func (p *PhysicalIndexLookUpReader) getPlanCostVer2(taskType property.TaskType,
doubleReadConcurrency := float64(p.ctx.GetSessionVars().IndexLookupConcurrency())
// index-side
indexNetCost := indexRows * indexRowSize * netFactor
indexNetCost := netCostVer2(indexRows, indexRowSize, netFactor)
indexSeekCost := estimateNetSeekCost(p.indexPlan)
indexChildCost, err := p.indexPlan.GetPlanCost(property.CopDoubleReadTaskType, option)
if err != nil {
@ -177,7 +177,7 @@ func (p *PhysicalIndexLookUpReader) getPlanCostVer2(taskType property.TaskType,
indexSideCost := (indexNetCost + indexSeekCost + indexChildCost) / distConcurrency
// table-side
tableNetCost := tableRows * tableRowSize * netFactor
tableNetCost := netCostVer2(tableRows, tableRowSize, netFactor)
tableSeekCost := estimateNetSeekCost(p.tablePlan)
tableChildCost, err := p.tablePlan.GetPlanCost(property.CopDoubleReadTaskType, option)
if err != nil {
@ -211,7 +211,7 @@ func (p *PhysicalIndexMergeReader) getPlanCostVer2(taskType property.TaskType, o
rows := getCardinality(tablePath, option.CostFlag)
rowSize := getAvgRowSize(tablePath.Stats(), tablePath.Schema())
tableNetCost := rows * rowSize * netFactor
tableNetCost := netCostVer2(rows, rowSize, netFactor)
tableSeekCost := estimateNetSeekCost(tablePath)
tableChildCost, err := tablePath.GetPlanCost(taskType, option)
if err != nil {
@ -225,7 +225,7 @@ func (p *PhysicalIndexMergeReader) getPlanCostVer2(taskType property.TaskType, o
rows := getCardinality(indexPath, option.CostFlag)
rowSize := getAvgRowSize(indexPath.Stats(), indexPath.Schema())
indexNetCost := rows * rowSize * netFactor
indexNetCost := netCostVer2(rows, rowSize, netFactor)
indexSeekCost := estimateNetSeekCost(indexPath)
indexChildCost, err := indexPath.GetPlanCost(taskType, option)
if err != nil {
@ -306,6 +306,286 @@ func (p *PhysicalTopN) getPlanCostVer2(taskType property.TaskType, option *PlanC
return p.planCost, nil
}
// getPlanCostVer2 returns the plan-cost of this sub-plan, which is:
// plan-cost = child-cost + agg-cost + group-cost
func (p *PhysicalStreamAgg) getPlanCostVer2(taskType property.TaskType, option *PlanCostOption) (float64, error) {
rows := getCardinality(p.children[0], option.CostFlag)
cpuFactor := getTaskCPUFactor(p, taskType)
aggCost := aggCostVer2(rows, p.AggFuncs, cpuFactor)
groupCost := groupCostVer2(rows, p.GroupByItems, cpuFactor)
childCost, err := p.children[0].GetPlanCost(taskType, option)
if err != nil {
return 0, err
}
p.planCost = childCost + aggCost + groupCost
p.planCostInit = true
return p.planCost, nil
}
// getPlanCostVer2 returns the plan-cost of this sub-plan, which is:
// plan-cost = child-cost + (agg-cost + group-cost + hash-build-cost + hash-probe-cost) / concurrency
func (p *PhysicalHashAgg) getPlanCostVer2(taskType property.TaskType, option *PlanCostOption) (float64, error) {
inputRows := getCardinality(p.children[0], option.CostFlag)
outputRows := getCardinality(p, option.CostFlag)
outputRowSize := getAvgRowSize(p.Stats(), p.Schema())
cpuFactor := getTaskCPUFactor(p, taskType)
memFactor := getTaskMemFactor(p, taskType)
concurrency := p.ctx.GetSessionVars().GetConcurrencyFactor()
aggCost := aggCostVer2(inputRows, p.AggFuncs, cpuFactor)
groupCost := groupCostVer2(inputRows, p.GroupByItems, cpuFactor)
hashBuildCost := hashBuildCostVer2(outputRows, outputRowSize, p.GroupByItems, cpuFactor, memFactor)
hashProbeCost := hashProbeCostVer2(inputRows, p.GroupByItems, cpuFactor)
childCost, err := p.children[0].GetPlanCost(taskType, option)
if err != nil {
return 0, err
}
p.planCost = childCost + (aggCost+groupCost+hashBuildCost+hashProbeCost)/concurrency
p.planCostInit = true
return p.planCost, nil
}
// getPlanCostVer2 returns the plan-cost of this sub-plan, which is:
// plan-cost = left-child-cost + right-child-cost + filter-cost + group-cost
func (p *PhysicalMergeJoin) getPlanCostVer2(taskType property.TaskType, option *PlanCostOption) (float64, error) {
leftRows := getCardinality(p.children[0], option.CostFlag)
rightRows := getCardinality(p.children[1], option.CostFlag)
cpuFactor := getTaskCPUFactor(p, taskType)
filterCost := filterCostVer2(leftRows, p.LeftConditions, cpuFactor) +
filterCostVer2(rightRows, p.RightConditions, cpuFactor)
groupCost := groupCostVer2(leftRows, cols2Exprs(p.LeftJoinKeys), cpuFactor) +
groupCostVer2(rightRows, cols2Exprs(p.LeftJoinKeys), cpuFactor)
leftChildCost, err := p.children[0].GetPlanCost(taskType, option)
if err != nil {
return 0, err
}
rightChildCost, err := p.children[1].GetPlanCost(taskType, option)
if err != nil {
return 0, err
}
p.planCost = leftChildCost + rightChildCost + filterCost + groupCost
p.planCostInit = true
return p.planCost, nil
}
// getPlanCostVer2 returns the plan-cost of this sub-plan, which is:
// plan-cost = build-child-cost + probe-child-cost +
// build-hash-cost + build-filter-cost +
// (probe-filter-cost + probe-hash-cost) / concurrency
func (p *PhysicalHashJoin) getPlanCostVer2(taskType property.TaskType, option *PlanCostOption) (float64, error) {
build, probe := p.children[0], p.children[1]
buildFilters, probeFilters := p.LeftConditions, p.RightConditions
buildKeys, probeKeys := p.LeftJoinKeys, p.RightJoinKeys
if (p.InnerChildIdx == 1 && !p.UseOuterToBuild) || (p.InnerChildIdx == 0 && p.UseOuterToBuild) {
build, probe = probe, build
buildFilters, probeFilters = probeFilters, buildFilters
}
buildRows := getCardinality(build, option.CostFlag)
probeRows := getCardinality(probe, option.CostFlag)
buildRowSize := getAvgRowSize(build.Stats(), build.Schema())
concurrency := float64(p.Concurrency)
cpuFactor := getTaskCPUFactor(p, taskType)
memFactor := getTaskMemFactor(p, taskType)
buildFilterCost := filterCostVer2(buildRows, buildFilters, cpuFactor)
buildHashCost := hashBuildCostVer2(buildRows, buildRowSize, cols2Exprs(buildKeys), cpuFactor, memFactor)
probeFilterCost := filterCostVer2(probeRows, probeFilters, cpuFactor)
probeHashCost := hashProbeCostVer2(probeRows, cols2Exprs(probeKeys), cpuFactor)
buildChildCost, err := build.GetPlanCost(taskType, option)
if err != nil {
return 0, err
}
probeChildCost, err := probe.GetPlanCost(taskType, option)
if err != nil {
return 0, err
}
p.planCost = buildChildCost + probeChildCost + buildHashCost + buildFilterCost +
(probeFilterCost+probeHashCost)/concurrency
p.planCostInit = true
return p.planCost, nil
}
// getPlanCostVer2 returns the plan-cost of this sub-plan, which is:
// plan-cost = build-child-cost + build-filter-cost +
// (probe-cost + probe-filter-cost) / concurrency
// probe-cost = probe-child-cost * build-rows / batchRatio
func (p *PhysicalIndexJoin) getPlanCostVer2(taskType property.TaskType, option *PlanCostOption) (float64, error) {
build, probe := p.children[1-p.InnerChildIdx], p.children[p.InnerChildIdx]
buildRows := getCardinality(build, option.CostFlag)
probeRowsOne := getCardinality(probe, option.CostFlag)
probeRowsTot := probeRowsOne * buildRows
buildFilters, probeFilters := p.LeftConditions, p.RightConditions
probeConcurrency := float64(p.ctx.GetSessionVars().IndexLookupJoinConcurrency())
cpuFactor := getTaskCPUFactor(p, taskType)
buildFilterCost := filterCostVer2(buildRows, buildFilters, cpuFactor)
buildChildCost, err := build.GetPlanCost(taskType, option)
if err != nil {
return 0, err
}
probeFilterCost := filterCostVer2(probeRowsTot, probeFilters, cpuFactor)
probeChildCost, err := probe.GetPlanCost(taskType, option)
if err != nil {
return 0, err
}
// IndexJoin executes a batch of rows at a time, so the actual cost of this part should be
// `innerCostPerBatch * numberOfBatches` instead of `innerCostPerRow * numberOfOuterRow`.
// Use an empirical value batchRatio to handle this now.
// TODO: remove this empirical value.
batchRatio := 30.0
probeCost := probeChildCost * buildRows / batchRatio
p.planCost = buildChildCost + buildFilterCost + (probeCost+probeFilterCost)/probeConcurrency
p.planCostInit = true
return p.planCost, nil
}
func (p *PhysicalIndexHashJoin) getPlanCostVer2(taskType property.TaskType, option *PlanCostOption) (float64, error) {
// TODO: distinguish IndexHashJoin with IndexJoin
return p.PhysicalIndexJoin.getPlanCostVer2(taskType, option)
}
func (p *PhysicalIndexMergeJoin) getPlanCostVer2(taskType property.TaskType, option *PlanCostOption) (float64, error) {
// TODO: distinguish IndexMergeJoin with IndexJoin
return p.PhysicalIndexJoin.getPlanCostVer2(taskType, option)
}
// getPlanCostVer2 returns the plan-cost of this sub-plan, which is:
// plan-cost = build-child-cost + build-filter-cost + probe-cost + probe-filter-cost
// probe-cost = probe-child-cost * build-rows
func (p *PhysicalApply) getPlanCostVer2(taskType property.TaskType, option *PlanCostOption) (float64, error) {
buildRows := getCardinality(p.children[0], option.CostFlag)
probeRowsOne := getCardinality(p.children[1], option.CostFlag)
probeRowsTot := buildRows * probeRowsOne
cpuFactor := getTaskCPUFactor(p, taskType)
buildFilterCost := filterCostVer2(buildRows, p.LeftConditions, cpuFactor)
buildChildCost, err := p.children[0].GetPlanCost(taskType, option)
if err != nil {
return 0, err
}
probeFilterCost := filterCostVer2(probeRowsTot, p.RightConditions, cpuFactor)
probeChildCost, err := p.children[1].GetPlanCost(taskType, option)
if err != nil {
return 0, err
}
probeCost := probeChildCost * buildRows
p.planCost = buildChildCost + buildFilterCost + probeCost + probeFilterCost
p.planCostInit = true
return p.planCost, nil
}
// getPlanCostVer2 returns the plan-cost of this sub-plan, which is:
// plan-cost = child-cost + net-cost
func (p *PhysicalExchangeReceiver) getPlanCostVer2(taskType property.TaskType, option *PlanCostOption) (float64, error) {
rows := getCardinality(p, option.CostFlag)
rowSize := getAvgRowSize(p.stats, p.Schema())
netFactor := getTableNetFactor(p)
netCost := netCostVer2(rows, rowSize, netFactor)
childCost, err := p.children[0].GetPlanCost(taskType, option)
if err != nil {
return 0, err
}
p.planCost = childCost + netCost
p.planCostInit = true
return p.planCost, nil
}
// getPlanCostVer2 returns the plan-cost of this sub-plan, which is:
// plan-cost = seek-cost + net-cost
func (p *PointGetPlan) getPlanCostVer2(taskType property.TaskType, _ *PlanCostOption) (float64, error) {
if p.accessCols == nil { // from fast plan code path
p.planCost = 0
p.planCostInit = true
return 0, nil
}
rowSize := getAvgRowSize(p.stats, p.schema)
netFactor := getTaskNetFactor(p, taskType)
seekFactor := getTaskSeekFactor(p, taskType)
netCost := netCostVer2(1, rowSize, netFactor)
seekCost := 1 * seekFactor / 20 // 20 times faster than general request
p.planCost = netCost + seekCost
p.planCostInit = true
return p.planCost, nil
}
// getPlanCostVer2 returns the plan-cost of this sub-plan, which is:
// plan-cost = seek-cost + net-cost
func (p *BatchPointGetPlan) getPlanCostVer2(taskType property.TaskType, option *PlanCostOption) (float64, error) {
if p.accessCols == nil { // from fast plan code path
p.planCost = 0
p.planCostInit = true
return 0, nil
}
rows := getCardinality(p, option.CostFlag)
rowSize := getAvgRowSize(p.stats, p.schema)
netFactor := getTaskNetFactor(p, taskType)
seekFactor := getTaskSeekFactor(p, taskType)
netCost := netCostVer2(rows, rowSize, netFactor)
seekCost := 1 * seekFactor / 20 // in one batch
p.planCost = netCost + seekCost
p.planCostInit = true
return p.planCost, nil
}
func scanCostVer2(rows, rowSize, scanFactor float64) float64 {
// log2 from experiments
return rows * math.Log2(math.Max(1, rowSize)) * scanFactor
}
func netCostVer2(rows, rowSize, netFactor float64) float64 {
return rows * rowSize * netFactor
}
func filterCostVer2(rows float64, filters []expression.Expression, cpuFactor float64) float64 {
// TODO: consider types of filters
return rows * float64(len(filters)) * cpuFactor
}
func aggCostVer2(rows float64, aggFuncs []*aggregation.AggFuncDesc, cpuFactor float64) float64 {
// TODO: consider types of agg-funcs
return rows * float64(len(aggFuncs)) * cpuFactor
}
func groupCostVer2(rows float64, groupItems []expression.Expression, cpuFactor float64) float64 {
return rows * float64(len(groupItems)) * cpuFactor
}
func hashBuildCostVer2(buildRows, buildRowSize float64, keys []expression.Expression, cpuFactor, memFactor float64) float64 {
// TODO: 1) consider types of keys, 2) dedicated factor for build-probe hash table
hashKeyCost := buildRows * float64(len(keys)) * cpuFactor
hashMemCost := buildRows * buildRowSize * memFactor
hashBuildCost := buildRows * float64(len(keys)) * cpuFactor
return hashKeyCost + hashMemCost + hashBuildCost
}
func hashProbeCostVer2(probeRows float64, keys []expression.Expression, cpuFactor float64) float64 {
// TODO: 1) consider types of keys, 2) dedicated factor for build-probe hash table
hashKeyCost := probeRows * float64(len(keys)) * cpuFactor
hashProbeCost := probeRows * float64(len(keys)) * cpuFactor
return hashKeyCost + hashProbeCost
}
func getTaskCPUFactor(p PhysicalPlan, taskType property.TaskType) float64 {
switch taskType {
case property.RootTaskType: // TiDB
@ -372,3 +652,11 @@ func getTableInfo(p PhysicalPlan) *model.TableInfo {
return getTableInfo(x.Children()[0])
}
}
func cols2Exprs(cols []*expression.Column) []expression.Expression {
exprs := make([]expression.Expression, 0, len(cols))
for _, c := range cols {
exprs = append(exprs, c)
}
return exprs
}

View File

@ -0,0 +1,128 @@
// Copyright 2022 PingCAP, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package core_test
import (
"fmt"
"strconv"
"strings"
"testing"
"github.com/pingcap/tidb/testkit"
"github.com/stretchr/testify/require"
)
func testCostQueries(t *testing.T, tk *testkit.TestKit, queries []string) {
// costs of these queries expected increasing
var lastCost float64
var lastPlan []string
var lastQuery string
for _, q := range queries {
rs := tk.MustQuery("explain format='verbose' " + q).Rows()
cost, err := strconv.ParseFloat(rs[0][2].(string), 64)
require.Nil(t, err)
var plan []string
for _, r := range rs {
plan = append(plan, fmt.Sprintf("%v", r))
}
require.True(t, cost > lastCost, fmt.Sprintf("cost of %v should be larger than\n%v\n%v\n%v\n",
q, lastQuery, strings.Join(plan, "\n"), strings.Join(lastPlan, "\n")))
lastCost = cost
lastPlan = plan
lastQuery = q
}
}
func TestCostModelVer2(t *testing.T) {
store := testkit.CreateMockStore(t)
tk := testkit.NewTestKit(t, store)
tk.MustExec("use test")
tk.MustExec(`create table t (a int primary key, b int, c int, key(b))`)
vals := make([]string, 0, 100)
for i := 0; i < 100; i++ {
vals = append(vals, fmt.Sprintf("(%v, %v, %v)", i, i, i))
}
tk.MustExec(fmt.Sprintf("insert into t values %v", strings.Join(vals, ", ")))
tk.MustExec("analyze table t")
for _, q := range []string{
"set @@tidb_distsql_scan_concurrency=1",
"set @@tidb_executor_concurrency=1",
"set @@tidb_opt_tiflash_concurrency_factor=1",
"set @@tidb_index_lookup_concurrency=1",
"set @@tidb_cost_model_version=2",
} {
tk.MustExec(q)
}
seriesCases := [][]string{
{ // table scan more rows
"select /*+ use_index(t, primary) */ a from t where a<1",
"select /*+ use_index(t, primary) */ a from t where a<10",
"select /*+ use_index(t, primary) */ a from t where a<100",
},
{ // index scan more rows
"select /*+ use_index(t, b) */ b from t where b<1",
"select /*+ use_index(t, b) */ b from t where b<10",
"select /*+ use_index(t, b) */ b from t where b<100",
},
{ // table scan more cols
"select /*+ use_index(t, primary) */ a from t",
"select /*+ use_index(t, primary) */ a, b from t",
"select /*+ use_index(t, primary) */ a, b, c from t",
},
{ // index lookup more rows
"select /*+ use_index(t, b) */ * from t where b<1",
"select /*+ use_index(t, b) */ * from t where b<10",
"select /*+ use_index(t, b) */ * from t where b<100",
},
{ // selection more filters
"select /*+ use_index(t, primary) */ a from t where mod(a, 20)<10",
"select /*+ use_index(t, primary) */ a from t where mod(a, 20)<10 and mod(a, 20)<11",
"select /*+ use_index(t, primary) */ a from t where mod(a, 20)<10 and mod(a, 20)<11 and mod(a, 20)<12",
},
{ // projection more exprs
"select /*+ use_index(t, primary) */ a+1 from t",
"select /*+ use_index(t, primary) */ a+1, a+2 from t",
"select /*+ use_index(t, primary) */ a+1, a+2, a+3 from t",
},
{ // hash agg more agg-funcs
"select /*+ use_index(t, primary), hash_agg() */ sum(a) from t group by b",
"select /*+ use_index(t, primary), hash_agg() */ sum(a), sum(a+2) from t group by b",
"select /*+ use_index(t, primary), hash_agg() */ sum(a), sum(a+2), sum(a+4) from t group by b",
},
{ // hash agg more group-items
"select /*+ use_index(t, primary), hash_agg() */ sum(a) from t group by b",
"select /*+ use_index(t, primary), hash_agg() */ sum(a) from t group by b, b+1",
"select /*+ use_index(t, primary), hash_agg() */ sum(a) from t group by b, b+1, b+2",
},
{ // stream agg more agg-funcs
"select /*+ use_index(t, primary), stream_agg() */ sum(a) from t group by b",
"select /*+ use_index(t, primary), stream_agg() */ sum(a), sum(a+2) from t group by b",
"select /*+ use_index(t, primary), stream_agg() */ sum(a), sum(a+2), sum(a+4) from t group by b",
},
{ // hash join uses the small table to build hash table
"select /*+ hash_join_build(t1) */ * from t t1, t t2 where t1.b=t2.b and t1.a<10",
"select /*+ hash_join_build(t2) */ * from t t1, t t2 where t1.b=t2.b and t1.a<10",
},
{ // hash join more join keys
"select /*+ hash_join_build(t1) */ * from t t1, t t2 where t1.b=t2.b",
"select /*+ hash_join_build(t1) */ * from t t1, t t2 where t1.a=t2.a and t1.b=t2.b",
},
}
for _, cases := range seriesCases {
testCostQueries(t, tk, cases)
}
}