planner: prefer to use TiKV to process small table scans and consider IndexJoin batch size in cost model ver2 (#36690)

close pingcap/tidb#36635, close pingcap/tidb#36668
This commit is contained in:
Yuanjia Zhang
2022-07-29 20:07:12 +08:00
committed by GitHub
parent 324b5ee0c7
commit 7bcb73e579
2 changed files with 49 additions and 0 deletions

View File

@ -443,6 +443,11 @@ func (p *PhysicalTableScan) GetPlanCost(taskType property.TaskType, costFlag uin
rowSize := math.Max(p.getScanRowSize(), 2.0) // to guarantee logRowSize >= 1
logRowSize := math.Log2(rowSize)
selfCost = getCardinality(p, costFlag) * logRowSize * scanFactor
// give TiFlash a start-up cost to let the optimizer prefers to use TiKV to process small table scans.
if p.StoreType == kv.TiFlash {
selfCost += 2000 * logRowSize * scanFactor
}
}
p.planCost = selfCost
@ -528,6 +533,14 @@ func (p *PhysicalIndexJoin) GetCost(outerCnt, innerCnt, outerCost, innerCost flo
memoryCost := innerConcurrency * (batchSize * distinctFactor) * innerCnt * sessVars.GetMemoryFactor()
// Cost of inner child plan, i.e, mainly I/O and network cost.
innerPlanCost := outerCnt * innerCost
if p.ctx.GetSessionVars().CostModelVersion == 2 {
// IndexJoin executes a batch of rows at a time, so the actual cost of this part should be
// `innerCostPerBatch * numberOfBatches` instead of `innerCostPerRow * numberOfOuterRow`.
// Use an empirical value batchRatio to handle this now.
// TODO: remove this empirical value.
batchRatio := 30.0
innerPlanCost /= batchRatio
}
return outerCost + innerPlanCost + cpuCost + memoryCost + p.estDoubleReadCost(outerCnt)
}

View File

@ -1004,3 +1004,39 @@ func TestIssue36243(t *testing.T) {
selCost, readerCost = getCost()
require.True(t, selCost > readerCost)
}
func TestScanOnSmallTable(t *testing.T) {
store, clean := testkit.CreateMockStore(t)
defer clean()
tk := testkit.NewTestKit(t, store)
tk.MustExec("use test")
tk.MustExec(`create table t (a int)`)
tk.MustExec("insert into t values (1), (2), (3), (4), (5)")
tk.MustExec("analyze table t")
tk.MustExec(`set @@tidb_cost_model_version=2`)
// Create virtual tiflash replica info.
dom := domain.GetDomain(tk.Session())
is := dom.InfoSchema()
db, exists := is.SchemaByName(model.NewCIStr("test"))
require.True(t, exists)
for _, tblInfo := range db.Tables {
if tblInfo.Name.L == "t" {
tblInfo.TiFlashReplica = &model.TiFlashReplicaInfo{
Count: 1,
Available: true,
}
}
}
rs := tk.MustQuery("explain select * from t").Rows()
useTiKVScan := false
for _, r := range rs {
op := r[0].(string)
task := r[2].(string)
if strings.Contains(op, "Scan") && strings.Contains(task, "tikv") {
useTiKVScan = true
}
}
require.True(t, useTiKVScan)
}