planner: fix row count estimation for unique composite IndexScan of IndexJoin (#14167)
This commit is contained in:
committed by
pingcap-github-bot
parent
980f72dca5
commit
f18abc980d
@ -118,11 +118,11 @@ id count task operator info
|
||||
Projection_13 1.00 root test.st.id, test.dd.id, test.st.aid, test.st.cm, test.dd.dic, test.dd.ip, test.dd.t, test.st.p1, test.st.p2, test.st.p3, test.st.p4, test.st.p5, test.st.p6_md5, test.st.p7_md5, test.st.ext, test.st.t
|
||||
└─Limit_16 1.00 root offset:0, count:2500
|
||||
└─HashAgg_19 1.00 root group by:test.dd.dic, test.st.aid, funcs:firstrow(test.st.id)->test.st.id, funcs:firstrow(test.st.aid)->test.st.aid, funcs:firstrow(test.st.cm)->test.st.cm, funcs:firstrow(test.st.p1)->test.st.p1, funcs:firstrow(test.st.p2)->test.st.p2, funcs:firstrow(test.st.p3)->test.st.p3, funcs:firstrow(test.st.p4)->test.st.p4, funcs:firstrow(test.st.p5)->test.st.p5, funcs:firstrow(test.st.p6_md5)->test.st.p6_md5, funcs:firstrow(test.st.p7_md5)->test.st.p7_md5, funcs:firstrow(test.st.ext)->test.st.ext, funcs:firstrow(test.st.t)->test.st.t, funcs:firstrow(test.dd.id)->test.dd.id, funcs:firstrow(test.dd.dic)->test.dd.dic, funcs:firstrow(test.dd.ip)->test.dd.ip, funcs:firstrow(test.dd.t)->test.dd.t
|
||||
└─IndexMergeJoin_30 0.00 root inner join, inner:IndexLookUp_28, outer key:test.st.aid, inner key:test.dd.aid, other cond:eq(test.dd.ip, test.st.ip), gt(test.dd.t, test.st.t)
|
||||
├─IndexLookUp_28 0.00 root
|
||||
│ ├─IndexScan_25 1.00 cop[tikv] table:dd, index:aid, dic, range: decided by [eq(test.dd.aid, test.st.aid)], keep order:true, stats:pseudo
|
||||
│ └─Selection_27 0.00 cop[tikv] eq(test.dd.bm, 0), eq(test.dd.pt, "android"), gt(test.dd.t, 1478143908), not(isnull(test.dd.ip)), not(isnull(test.dd.t))
|
||||
│ └─TableScan_26 1.00 cop[tikv] table:dd, keep order:false, stats:pseudo
|
||||
└─HashRightJoin_34 0.00 root inner join, inner:IndexLookUp_52, equal:[eq(test.dd.aid, test.st.aid) eq(test.dd.ip, test.st.ip)], other cond:gt(test.dd.t, test.st.t)
|
||||
├─IndexLookUp_52 0.00 root
|
||||
│ ├─IndexScan_49 3333.33 cop[tikv] table:dd, index:t, range:(1478143908,+inf], keep order:false, stats:pseudo
|
||||
│ └─Selection_51 0.00 cop[tikv] eq(test.dd.bm, 0), eq(test.dd.pt, "android"), not(isnull(test.dd.ip))
|
||||
│ └─TableScan_50 3333.33 cop[tikv] table:dd, keep order:false, stats:pseudo
|
||||
└─IndexLookUp_41 3.33 root
|
||||
├─IndexScan_38 3333.33 cop[tikv] table:gad, index:t, range:(1478143908,+inf], keep order:false, stats:pseudo
|
||||
└─Selection_40 3.33 cop[tikv] eq(test.st.pt, "android"), not(isnull(test.st.ip))
|
||||
@ -137,9 +137,9 @@ Projection_10 0.00 root test.st.id, test.dd.id, test.st.aid, test.st.cm, test.dd
|
||||
│ └─Selection_34 0.00 cop[tikv] eq(test.st.bm, 0), eq(test.st.dit, "mac"), eq(test.st.pt, "ios"), not(isnull(test.st.dic))
|
||||
│ └─TableScan_33 3333.33 cop[tikv] table:gad, keep order:false, stats:pseudo
|
||||
└─IndexLookUp_22 0.00 root
|
||||
├─IndexScan_19 1.00 cop[tikv] table:sdk, index:aid, dic, range: decided by [eq(test.dd.aid, test.st.aid)], keep order:true, stats:pseudo
|
||||
├─IndexScan_19 10000.00 cop[tikv] table:sdk, index:aid, dic, range: decided by [eq(test.dd.aid, test.st.aid)], keep order:true, stats:pseudo
|
||||
└─Selection_21 0.00 cop[tikv] eq(test.dd.bm, 0), eq(test.dd.pt, "ios"), gt(test.dd.t, 1477971479), not(isnull(test.dd.mac)), not(isnull(test.dd.t))
|
||||
└─TableScan_20 1.00 cop[tikv] table:sdk, keep order:false, stats:pseudo
|
||||
└─TableScan_20 10000.00 cop[tikv] table:sdk, keep order:false, stats:pseudo
|
||||
explain SELECT cm, p1, p2, p3, p4, p5, p6_md5, p7_md5, count(1) as click_pv, count(DISTINCT ip) as click_ip FROM st WHERE (t between 1478188800 and 1478275200) and aid='cn.sbkcq' and pt='android' GROUP BY cm, p1, p2, p3, p4, p5, p6_md5, p7_md5;
|
||||
id count task operator info
|
||||
Projection_5 1.00 root test.st.cm, test.st.p1, test.st.p2, test.st.p3, test.st.p4, test.st.p5, test.st.p6_md5, test.st.p7_md5, Column#20, Column#21
|
||||
|
||||
@ -128,14 +128,13 @@ id count task operator info
|
||||
Projection_13 424.00 root test.st.id, test.dd.id, test.st.aid, test.st.cm, test.dd.dic, test.dd.ip, test.dd.t, test.st.p1, test.st.p2, test.st.p3, test.st.p4, test.st.p5, test.st.p6_md5, test.st.p7_md5, test.st.ext, test.st.t
|
||||
└─Limit_16 424.00 root offset:0, count:2500
|
||||
└─HashAgg_19 424.00 root group by:test.dd.dic, test.st.aid, funcs:firstrow(test.st.id)->test.st.id, funcs:firstrow(test.st.aid)->test.st.aid, funcs:firstrow(test.st.cm)->test.st.cm, funcs:firstrow(test.st.p1)->test.st.p1, funcs:firstrow(test.st.p2)->test.st.p2, funcs:firstrow(test.st.p3)->test.st.p3, funcs:firstrow(test.st.p4)->test.st.p4, funcs:firstrow(test.st.p5)->test.st.p5, funcs:firstrow(test.st.p6_md5)->test.st.p6_md5, funcs:firstrow(test.st.p7_md5)->test.st.p7_md5, funcs:firstrow(test.st.ext)->test.st.ext, funcs:firstrow(test.st.t)->test.st.t, funcs:firstrow(test.dd.id)->test.dd.id, funcs:firstrow(test.dd.dic)->test.dd.dic, funcs:firstrow(test.dd.ip)->test.dd.ip, funcs:firstrow(test.dd.t)->test.dd.t
|
||||
└─IndexMergeJoin_30 424.00 root inner join, inner:IndexLookUp_28, outer key:test.st.aid, inner key:test.dd.aid, other cond:eq(test.st.ip, test.dd.ip), gt(test.dd.t, test.st.t)
|
||||
└─HashRightJoin_34 424.00 root inner join, inner:TableReader_37, equal:[eq(test.st.aid, test.dd.aid) eq(test.st.ip, test.dd.ip)], other cond:gt(test.dd.t, test.st.t)
|
||||
├─TableReader_37 424.00 root data:Selection_36
|
||||
│ └─Selection_36 424.00 cop[tikv] eq(test.st.bm, 0), eq(test.st.pt, "android"), gt(test.st.t, 1478143908), not(isnull(test.st.ip))
|
||||
│ └─TableScan_35 1999.00 cop[tikv] table:gad, range:[0,+inf], keep order:false
|
||||
└─IndexLookUp_28 1.00 root
|
||||
├─IndexScan_25 1.00 cop[tikv] table:dd, index:aid, dic, range: decided by [eq(test.dd.aid, test.st.aid)], keep order:true
|
||||
└─Selection_27 1.00 cop[tikv] eq(test.dd.bm, 0), eq(test.dd.pt, "android"), gt(test.dd.t, 1478143908), not(isnull(test.dd.ip)), not(isnull(test.dd.t))
|
||||
└─TableScan_26 1.00 cop[tikv] table:dd, keep order:false
|
||||
└─TableReader_44 455.80 root data:Selection_43
|
||||
└─Selection_43 455.80 cop[tikv] eq(test.dd.bm, 0), eq(test.dd.pt, "android"), gt(test.dd.t, 1478143908), not(isnull(test.dd.ip)), not(isnull(test.dd.t))
|
||||
└─TableScan_42 2000.00 cop[tikv] table:dd, range:[0,+inf], keep order:false
|
||||
explain select gad.id as gid,sdk.id as sid,gad.aid as aid,gad.cm as cm,sdk.dic as dic,sdk.ip as ip, sdk.t as t, gad.p1 as p1, gad.p2 as p2, gad.p3 as p3, gad.p4 as p4, gad.p5 as p5, gad.p6_md5 as p6, gad.p7_md5 as p7, gad.ext as ext from st gad join dd sdk on gad.aid = sdk.aid and gad.dic = sdk.mac and gad.t < sdk.t where gad.t > 1477971479 and gad.bm = 0 and gad.pt = 'ios' and gad.dit = 'mac' and sdk.t > 1477971479 and sdk.bm = 0 and sdk.pt = 'ios' limit 3000;
|
||||
id count task operator info
|
||||
Projection_10 170.34 root test.st.id, test.dd.id, test.st.aid, test.st.cm, test.dd.dic, test.dd.ip, test.dd.t, test.st.p1, test.st.p2, test.st.p3, test.st.p4, test.st.p5, test.st.p6_md5, test.st.p7_md5, test.st.ext
|
||||
@ -145,9 +144,9 @@ Projection_10 170.34 root test.st.id, test.dd.id, test.st.aid, test.st.cm, test.
|
||||
│ └─Selection_30 170.34 cop[tikv] eq(test.st.bm, 0), eq(test.st.dit, "mac"), eq(test.st.pt, "ios"), gt(test.st.t, 1477971479), not(isnull(test.st.dic))
|
||||
│ └─TableScan_29 1999.00 cop[tikv] table:gad, range:[0,+inf], keep order:false
|
||||
└─IndexLookUp_22 1.00 root
|
||||
├─IndexScan_19 1.00 cop[tikv] table:sdk, index:aid, dic, range: decided by [eq(test.dd.aid, test.st.aid)], keep order:true
|
||||
├─IndexScan_19 3.93 cop[tikv] table:sdk, index:aid, dic, range: decided by [eq(test.dd.aid, test.st.aid)], keep order:true
|
||||
└─Selection_21 1.00 cop[tikv] eq(test.dd.bm, 0), eq(test.dd.pt, "ios"), gt(test.dd.t, 1477971479), not(isnull(test.dd.mac)), not(isnull(test.dd.t))
|
||||
└─TableScan_20 1.00 cop[tikv] table:sdk, keep order:false
|
||||
└─TableScan_20 3.93 cop[tikv] table:sdk, keep order:false
|
||||
explain SELECT cm, p1, p2, p3, p4, p5, p6_md5, p7_md5, count(1) as click_pv, count(DISTINCT ip) as click_ip FROM st WHERE (t between 1478188800 and 1478275200) and aid='cn.sbkcq' and pt='android' GROUP BY cm, p1, p2, p3, p4, p5, p6_md5, p7_md5;
|
||||
id count task operator info
|
||||
Projection_5 39.28 root test.st.cm, test.st.p1, test.st.p2, test.st.p3, test.st.p4, test.st.p5, test.st.p6_md5, test.st.p7_md5, Column#20, Column#21
|
||||
|
||||
@ -594,7 +594,17 @@ func (p *LogicalJoin) buildIndexJoinInner2IndexScan(
|
||||
}
|
||||
joins = make([]PhysicalPlan, 0, 3)
|
||||
rangeInfo := helper.buildRangeDecidedByInformation(helper.chosenPath.IdxCols, outerJoinKeys)
|
||||
innerTask := p.constructInnerIndexScanTask(ds, helper.chosenPath, helper.chosenRemained, outerJoinKeys, us, rangeInfo, false, false, avgInnerRowCnt)
|
||||
maxOneRow := false
|
||||
if helper.chosenPath.Index.Unique && helper.maxUsedCols == len(helper.chosenPath.FullIdxCols) {
|
||||
l := len(helper.chosenAccess)
|
||||
if l == 0 {
|
||||
maxOneRow = true
|
||||
} else {
|
||||
sf, ok := helper.chosenAccess[l-1].(*expression.ScalarFunction)
|
||||
maxOneRow = ok && (sf.FuncName.L == ast.EQ)
|
||||
}
|
||||
}
|
||||
innerTask := p.constructInnerIndexScanTask(ds, helper.chosenPath, helper.chosenRemained, outerJoinKeys, us, rangeInfo, false, false, avgInnerRowCnt, maxOneRow)
|
||||
|
||||
joins = append(joins, p.constructIndexJoin(prop, outerIdx, innerTask, helper.chosenRanges, keyOff2IdxOff, helper.chosenPath, helper.lastColManager)...)
|
||||
// The index merge join's inner plan is different from index join, so we
|
||||
@ -602,7 +612,7 @@ func (p *LogicalJoin) buildIndexJoinInner2IndexScan(
|
||||
// Because we can't keep order for union scan, if there is a union scan in inner task,
|
||||
// we can't construct index merge join.
|
||||
if us == nil {
|
||||
innerTask2 := p.constructInnerIndexScanTask(ds, helper.chosenPath, helper.chosenRemained, outerJoinKeys, us, rangeInfo, true, !prop.IsEmpty() && prop.Items[0].Desc, avgInnerRowCnt)
|
||||
innerTask2 := p.constructInnerIndexScanTask(ds, helper.chosenPath, helper.chosenRemained, outerJoinKeys, us, rangeInfo, true, !prop.IsEmpty() && prop.Items[0].Desc, avgInnerRowCnt, maxOneRow)
|
||||
joins = append(joins, p.constructIndexMergeJoin(prop, outerIdx, innerTask2, helper.chosenRanges, keyOff2IdxOff, helper.chosenPath, helper.lastColManager)...)
|
||||
}
|
||||
// We can reuse the `innerTask` here since index nested loop hash join
|
||||
@ -742,6 +752,7 @@ func (p *LogicalJoin) constructInnerIndexScanTask(
|
||||
keepOrder bool,
|
||||
desc bool,
|
||||
rowCount float64,
|
||||
maxOneRow bool,
|
||||
) task {
|
||||
is := PhysicalIndexScan{
|
||||
Table: ds.tableInfo,
|
||||
@ -793,7 +804,6 @@ func (p *LogicalJoin) constructInnerIndexScanTask(
|
||||
if rowCount <= 0 {
|
||||
rowCount = ds.tableStats.RowCount
|
||||
}
|
||||
maxOneRow := path.Index.Unique && len(outerJoinKeys) == len(path.FullIdxCols)
|
||||
if maxOneRow {
|
||||
// Theoretically, this line is unnecessary because row count estimation of join should guarantee rowCount is not larger
|
||||
// than 1.0; however, there may be rowCount larger than 1.0 in reality, e.g, pseudo statistics cases, which does not reflect
|
||||
|
||||
@ -303,5 +303,30 @@ func (s *testIntegrationSuite) TestINLJHintSmallTable(c *C) {
|
||||
tk.MustExec("insert into t2 values(1,1),(2,2),(3,3),(4,4),(5,5)")
|
||||
tk.MustExec("analyze table t1, t2")
|
||||
tk.MustExec("explain select /*+ TIDB_INLJ(t1) */ * from t1 join t2 on t1.a = t2.a")
|
||||
tk.MustQuery("show warnings").Check(testkit.Rows())
|
||||
}
|
||||
|
||||
func (s *testIntegrationSuite) TestIndexJoinUniqueCompositeIndex(c *C) {
|
||||
tk := testkit.NewTestKit(c, s.store)
|
||||
|
||||
tk.MustExec("use test")
|
||||
tk.MustExec("drop table if exists t1, t2")
|
||||
tk.MustExec("create table t1(a int not null, c int not null)")
|
||||
tk.MustExec("create table t2(a int not null, b int not null, c int not null, primary key(a,b))")
|
||||
tk.MustExec("insert into t1 values(1,1)")
|
||||
tk.MustExec("insert into t2 values(1,1,1),(1,2,1)")
|
||||
tk.MustExec("analyze table t1,t2")
|
||||
|
||||
var input []string
|
||||
var output []struct {
|
||||
SQL string
|
||||
Plan []string
|
||||
}
|
||||
s.testData.GetTestCases(c, &input, &output)
|
||||
for i, tt := range input {
|
||||
s.testData.OnRecord(func() {
|
||||
output[i].SQL = tt
|
||||
output[i].Plan = s.testData.ConvertRowsToStrings(tk.MustQuery(tt).Rows())
|
||||
})
|
||||
tk.MustQuery(tt).Check(testkit.Rows(output[i].Plan...))
|
||||
}
|
||||
}
|
||||
|
||||
11
planner/core/testdata/integration_suite_in.json
vendored
11
planner/core/testdata/integration_suite_in.json
vendored
@ -26,6 +26,17 @@
|
||||
"explain select * from t t1 left join t t2 on t1.a = t2.a where cast(t1.b as date) >= '2019-01-01'"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "TestIndexJoinUniqueCompositeIndex",
|
||||
"cases": [
|
||||
// Row count of IndexScan should be 2.
|
||||
"explain select /*+ TIDB_INLJ(t2) */ * from t1 join t2 on t1.a = t2.a and t1.c = t2.c",
|
||||
// Row count of IndexScan should be 2.
|
||||
"explain select /*+ TIDB_INLJ(t2) */ * from t1 join t2 on t1.a = t2.a and t1.c <= t2.b",
|
||||
// Row count of IndexScan should be 1.
|
||||
"explain select /*+ TIDB_INLJ(t2) */ * from t1 join t2 on t1.a = t2.a and t2.b = 1"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "TestPartitionTableStats",
|
||||
"cases": [
|
||||
|
||||
38
planner/core/testdata/integration_suite_out.json
vendored
38
planner/core/testdata/integration_suite_out.json
vendored
@ -78,6 +78,44 @@
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"Name": "TestIndexJoinUniqueCompositeIndex",
|
||||
"Cases": [
|
||||
{
|
||||
"SQL": "explain select /*+ TIDB_INLJ(t2) */ * from t1 join t2 on t1.a = t2.a and t1.c = t2.c",
|
||||
"Plan": [
|
||||
"IndexJoin_9 2.00 root inner join, inner:IndexLookUp_8, outer key:test.t1.a, inner key:test.t2.a, other cond:eq(test.t1.c, test.t2.c)",
|
||||
"├─TableReader_19 1.00 root data:TableScan_18",
|
||||
"│ └─TableScan_18 1.00 cop[tikv] table:t1, range:[-inf,+inf], keep order:false",
|
||||
"└─IndexLookUp_8 2.00 root ",
|
||||
" ├─IndexScan_6 2.00 cop[tikv] table:t2, index:a, b, range: decided by [eq(test.t2.a, test.t1.a)], keep order:false",
|
||||
" └─TableScan_7 2.00 cop[tikv] table:t2, keep order:false"
|
||||
]
|
||||
},
|
||||
{
|
||||
"SQL": "explain select /*+ TIDB_INLJ(t2) */ * from t1 join t2 on t1.a = t2.a and t1.c <= t2.b",
|
||||
"Plan": [
|
||||
"IndexJoin_9 2.00 root inner join, inner:IndexLookUp_8, outer key:test.t1.a, inner key:test.t2.a, other cond:le(test.t1.c, test.t2.b)",
|
||||
"├─TableReader_19 1.00 root data:TableScan_18",
|
||||
"│ └─TableScan_18 1.00 cop[tikv] table:t1, range:[-inf,+inf], keep order:false",
|
||||
"└─IndexLookUp_8 2.00 root ",
|
||||
" ├─IndexScan_6 2.00 cop[tikv] table:t2, index:a, b, range: decided by [eq(test.t2.a, test.t1.a) le(test.t1.c, test.t2.b)], keep order:false",
|
||||
" └─TableScan_7 2.00 cop[tikv] table:t2, keep order:false"
|
||||
]
|
||||
},
|
||||
{
|
||||
"SQL": "explain select /*+ TIDB_INLJ(t2) */ * from t1 join t2 on t1.a = t2.a and t2.b = 1",
|
||||
"Plan": [
|
||||
"IndexJoin_9 1.00 root inner join, inner:IndexLookUp_8, outer key:test.t1.a, inner key:test.t2.a",
|
||||
"├─TableReader_19 1.00 root data:TableScan_18",
|
||||
"│ └─TableScan_18 1.00 cop[tikv] table:t1, range:[-inf,+inf], keep order:false",
|
||||
"└─IndexLookUp_8 1.00 root ",
|
||||
" ├─IndexScan_6 1.00 cop[tikv] table:t2, index:a, b, range: decided by [eq(test.t2.a, test.t1.a) eq(test.t2.b, 1)], keep order:false",
|
||||
" └─TableScan_7 1.00 cop[tikv] table:t2, keep order:false"
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"Name": "TestPartitionTableStats",
|
||||
"Cases": [
|
||||
|
||||
Reference in New Issue
Block a user