planner: leverage stats collection rule to get operator num (#58635)

ref pingcap/tidb#51664
This commit is contained in:
Arenatlx
2025-01-02 15:10:51 +08:00
committed by GitHub
parent c44e9913a7
commit dc4cb9b2c2
7 changed files with 111 additions and 53 deletions

View File

@ -48,14 +48,18 @@ type Memo struct {
}
// NewMemo creates a new memo.
func NewMemo() *Memo {
func NewMemo(caps ...uint64) *Memo {
// default capacity is 4.
capacity := uint64(4)
if len(caps) > 1 {
capacity = caps[0]
}
return &Memo{
groupIDGen: &GroupIDGenerator{id: 0},
groups: list.New(),
groupID2Group: make(map[GroupID]*list.Element),
hash2GlobalGroupExpr: hashmap.New[*GroupExpression, *GroupExpression](
// todo: feel the operator count at the prev normalization rule.
4,
capacity,
func(a, b *GroupExpression) bool {
return a.Equals(b)
},

View File

@ -50,8 +50,9 @@ func TestDeriveStats(t *testing.T) {
p := parser.New()
var input []string
var output []struct {
SQL string
Str []string
SQL string
Str []string
OpNum uint64
}
statsSuiteData := GetCascadesSuiteData()
statsSuiteData.LoadTestCases(t, &input, &output)
@ -72,7 +73,7 @@ func TestDeriveStats(t *testing.T) {
lp := p.(base.LogicalPlan)
// after stats derive is done, which means the up-down propagation of group ndv is done, in bottom-up building phase
// of memo, we don't have to expect the upper operator's group cols passing down anymore.
mm := memo.NewMemo()
mm := memo.NewMemo(lp.SCtx().GetSessionVars().StmtCtx.OperatorNum)
_, err = mm.Init(lp)
require.Nil(t, err)
// check the stats state in memo group.
@ -117,6 +118,7 @@ func TestDeriveStats(t *testing.T) {
testdata.OnRecord(func() {
output[i].SQL = tt
output[i].Str = strs
output[i].OpNum = lp.SCtx().GetSessionVars().StmtCtx.OperatorNum
})
require.Equal(t, output[i].Str, strs, "case i:"+strconv.Itoa(i)+" "+tt)
}
@ -142,8 +144,9 @@ func TestGroupNDVCols(t *testing.T) {
p := parser.New()
var input []string
var output []struct {
SQL string
Str []string
SQL string
Str []string
OpNum uint64
}
statsSuiteData := GetCascadesSuiteData()
statsSuiteData.LoadTestCases(t, &input, &output)
@ -163,7 +166,7 @@ func TestGroupNDVCols(t *testing.T) {
lp := p.(base.LogicalPlan)
// after stats derive is done, which means the up-down propagation of group ndv is done, in bottom-up building phase
// of memo, we don't have to expect the upper operator's group cols passing down anymore.
mm := memo.NewMemo()
mm := memo.NewMemo(lp.SCtx().GetSessionVars().StmtCtx.OperatorNum)
mm.Init(lp)
// check the stats state in memo group.
b := &bytes.Buffer{}
@ -207,6 +210,7 @@ func TestGroupNDVCols(t *testing.T) {
testdata.OnRecord(func() {
output[i].SQL = tt
output[i].Str = strs
output[i].OpNum = lp.SCtx().GetSessionVars().StmtCtx.OperatorNum
})
require.Equal(t, output[i].Str, strs, "case i:"+strconv.Itoa(i)+" "+tt)
}

View File

@ -8,7 +8,8 @@
"GID:1, GE:DataSource_1{}, logic prop:{stats:{count 5, ColNDVs map[1:2 2:2], GroupNDVs [{[1 2] 4}]}, schema:{Column: [test.t1.a,test.t1.b] PKOrUK: [] NullableUK: []}}",
"GID:2, GE:Aggregation_2{GID:1}, logic prop:{stats:{count 4, ColNDVs map[4:4], GroupNDVs [{[1 2] 4}]}, schema:{Column: [Column#4] PKOrUK: [] NullableUK: []}}",
"GID:3, GE:Projection_3{GID:2}, logic prop:{stats:{count 4, ColNDVs map[4:4], GroupNDVs []}, schema:{Column: [Column#4] PKOrUK: [] NullableUK: []}}"
]
],
"OpNum": 3
},
{
"SQL": "select * from t1, t2 where t1.a = t2.a and t1.b = t2.b",
@ -17,7 +18,8 @@
"GID:2, GE:DataSource_5{}, logic prop:{stats:{count 10, ColNDVs map[4:3 5:3], GroupNDVs [{[4 5] 9}]}, schema:{Column: [test.t2.a,test.t2.b] PKOrUK: [] NullableUK: []}}",
"GID:3, GE:Join_9{GID:1, GID:2}, logic prop:{stats:{count 5.555555555555555, ColNDVs map[1:2 2:2 4:3 5:3], GroupNDVs []}, schema:{Column: [test.t1.a,test.t1.b,test.t2.a,test.t2.b] PKOrUK: [] NullableUK: []}}",
"GID:4, GE:Projection_8{GID:3}, logic prop:{stats:{count 5.555555555555555, ColNDVs map[1:2 2:2 4:3 5:3], GroupNDVs []}, schema:{Column: [test.t1.a,test.t1.b,test.t2.a,test.t2.b] PKOrUK: [] NullableUK: []}}"
]
],
"OpNum": 4
},
{
"SQL": "select count(1) from t1 where a > 0 group by a, b",
@ -25,7 +27,8 @@
"GID:1, GE:DataSource_10{}, logic prop:{stats:{count 5, ColNDVs map[1:2 2:2], GroupNDVs [{[1 2] 4}]}, schema:{Column: [test.t1.a,test.t1.b] PKOrUK: [] NullableUK: []}}",
"GID:2, GE:Aggregation_12{GID:1}, logic prop:{stats:{count 4, ColNDVs map[4:4], GroupNDVs [{[1 2] 4}]}, schema:{Column: [Column#4] PKOrUK: [] NullableUK: []}}",
"GID:3, GE:Projection_13{GID:2}, logic prop:{stats:{count 4, ColNDVs map[4:4], GroupNDVs []}, schema:{Column: [Column#4] PKOrUK: [] NullableUK: []}}"
]
],
"OpNum": 3
},
{
"SQL": "select count(1) from t1 where b > 0 group by a, b",
@ -33,7 +36,8 @@
"GID:1, GE:DataSource_14{}, logic prop:{stats:{count 5, ColNDVs map[1:2 2:2], GroupNDVs [{[1 2] 4}]}, schema:{Column: [test.t1.a,test.t1.b] PKOrUK: [] NullableUK: []}}",
"GID:2, GE:Aggregation_16{GID:1}, logic prop:{stats:{count 4, ColNDVs map[4:4], GroupNDVs [{[1 2] 4}]}, schema:{Column: [Column#4] PKOrUK: [] NullableUK: []}}",
"GID:3, GE:Projection_17{GID:2}, logic prop:{stats:{count 4, ColNDVs map[4:4], GroupNDVs []}, schema:{Column: [Column#4] PKOrUK: [] NullableUK: []}}"
]
],
"OpNum": 3
},
{
"SQL": "select count(1) from t1 where cos(a) > 0 group by a, b",
@ -41,7 +45,8 @@
"GID:1, GE:DataSource_18{}, logic prop:{stats:{count 4, ColNDVs map[1:1.6 2:1.6], GroupNDVs [{[1 2] 3.2}]}, schema:{Column: [test.t1.a,test.t1.b] PKOrUK: [] NullableUK: []}}",
"GID:2, GE:Aggregation_20{GID:1}, logic prop:{stats:{count 3.2, ColNDVs map[4:3.2], GroupNDVs [{[1 2] 3.2}]}, schema:{Column: [Column#4] PKOrUK: [] NullableUK: []}}",
"GID:3, GE:Projection_21{GID:2}, logic prop:{stats:{count 3.2, ColNDVs map[4:3.2], GroupNDVs []}, schema:{Column: [Column#4] PKOrUK: [] NullableUK: []}}"
]
],
"OpNum": 3
},
{
"SQL": "select count(c3) from (select a as c1, b as c2, a+1 as c3 from t1) as tmp group by c2, c1",
@ -49,7 +54,8 @@
"GID:1, GE:DataSource_22{}, logic prop:{stats:{count 5, ColNDVs map[1:2 2:2], GroupNDVs [{[1 2] 4}]}, schema:{Column: [test.t1.a,test.t1.b] PKOrUK: [] NullableUK: []}}",
"GID:2, GE:Aggregation_24{GID:1}, logic prop:{stats:{count 4, ColNDVs map[5:4], GroupNDVs [{[1 2] 4}]}, schema:{Column: [Column#5] PKOrUK: [] NullableUK: []}}",
"GID:3, GE:Projection_25{GID:2}, logic prop:{stats:{count 4, ColNDVs map[5:4], GroupNDVs []}, schema:{Column: [Column#5] PKOrUK: [] NullableUK: []}}"
]
],
"OpNum": 4
},
{
"SQL": "select count(c3) from (select a+b as c1, b as c2, a+1 as c3 from t1) as tmp group by c2, c1",
@ -57,7 +63,8 @@
"GID:1, GE:DataSource_26{}, logic prop:{stats:{count 5, ColNDVs map[1:2 2:2], GroupNDVs []}, schema:{Column: [test.t1.a,test.t1.b] PKOrUK: [] NullableUK: []}}",
"GID:2, GE:Aggregation_28{GID:1}, logic prop:{stats:{count 2, ColNDVs map[6:2], GroupNDVs []}, schema:{Column: [Column#6] PKOrUK: [] NullableUK: []}}",
"GID:3, GE:Projection_29{GID:2}, logic prop:{stats:{count 2, ColNDVs map[6:2], GroupNDVs []}, schema:{Column: [Column#6] PKOrUK: [] NullableUK: []}}"
]
],
"OpNum": 4
},
{
"SQL": "select count(tmp.cmp) from (select t1.a as a, t1.b as b, (t1.b > (select t2.b from t2 where t2.a = t1.a)) as cmp from t1) tmp group by tmp.a, tmp.b",
@ -68,7 +75,8 @@
"GID:4, GE:Apply_37{GID:1, GID:3}, logic prop:{stats:{count 5, ColNDVs map[1:2 2:2 7:5 8:5], GroupNDVs [{[1 2] 4}]}, schema:{Column: [test.t1.a,test.t1.b,test.t2.a,test.t2.b] PKOrUK: [] NullableUK: []}}",
"GID:5, GE:Aggregation_38{GID:4}, logic prop:{stats:{count 4, ColNDVs map[11:4], GroupNDVs [{[1 2] 4}]}, schema:{Column: [Column#11] PKOrUK: [] NullableUK: []}}",
"GID:6, GE:Projection_39{GID:5}, logic prop:{stats:{count 4, ColNDVs map[11:4], GroupNDVs []}, schema:{Column: [Column#11] PKOrUK: [] NullableUK: []}}"
]
],
"OpNum": 7
},
{
"SQL": "select count(1) from (select t1.a as a, t1.b as b from t1 where t1.b > (select t2.b from t2 where t2.a = t1.a)) tmp group by tmp.a, tmp.b",
@ -80,7 +88,8 @@
"GID:5, GE:Apply_46{GID:1, GID:4}, logic prop:{stats:{count 5, ColNDVs map[1:2 2:2 4:5 5:5], GroupNDVs []}, schema:{Column: [test.t1.a,test.t1.b,test.t2.a,test.t2.b] PKOrUK: [] NullableUK: []}}",
"GID:6, GE:Aggregation_48{GID:5}, logic prop:{stats:{count 2, ColNDVs map[7:2], GroupNDVs []}, schema:{Column: [Column#7] PKOrUK: [] NullableUK: []}}",
"GID:7, GE:Projection_49{GID:6}, logic prop:{stats:{count 2, ColNDVs map[7:2], GroupNDVs []}, schema:{Column: [Column#7] PKOrUK: [] NullableUK: []}}"
]
],
"OpNum": 7
},
{
"SQL": "select count(tmp.cmp) from (select t1.a as a, t1.b as b, (t1.b in (select t2.b from t2 where t2.a = t1.a limit 3)) as cmp from t1) tmp group by tmp.a, tmp.b",
@ -91,7 +100,8 @@
"GID:4, GE:Apply_58{GID:1, GID:3}, logic prop:{stats:{count 5, ColNDVs map[1:2 2:2 10:2], GroupNDVs [{[1 2] 4}]}, schema:{Column: [test.t1.a,test.t1.b,Column#10] PKOrUK: [] NullableUK: []}}",
"GID:5, GE:Aggregation_59{GID:4}, logic prop:{stats:{count 4, ColNDVs map[11:4], GroupNDVs [{[1 2] 4}]}, schema:{Column: [Column#11] PKOrUK: [] NullableUK: []}}",
"GID:6, GE:Projection_60{GID:5}, logic prop:{stats:{count 4, ColNDVs map[11:4], GroupNDVs []}, schema:{Column: [Column#11] PKOrUK: [] NullableUK: []}}"
]
],
"OpNum": 6
},
{
"SQL": "select count(tmp.cmp) from (select t1.a as a, t1.b as b, (t1.b not in (select t2.b from t2 where t2.a = t1.a limit 3)) as cmp from t1) tmp group by tmp.a, tmp.b",
@ -102,7 +112,8 @@
"GID:4, GE:Apply_70{GID:1, GID:3}, logic prop:{stats:{count 5, ColNDVs map[1:2 2:2 10:2], GroupNDVs [{[1 2] 4}]}, schema:{Column: [test.t1.a,test.t1.b,Column#10] PKOrUK: [] NullableUK: []}}",
"GID:5, GE:Aggregation_71{GID:4}, logic prop:{stats:{count 4, ColNDVs map[11:4], GroupNDVs [{[1 2] 4}]}, schema:{Column: [Column#11] PKOrUK: [] NullableUK: []}}",
"GID:6, GE:Projection_72{GID:5}, logic prop:{stats:{count 4, ColNDVs map[11:4], GroupNDVs []}, schema:{Column: [Column#11] PKOrUK: [] NullableUK: []}}"
]
],
"OpNum": 6
},
{
"SQL": "select count(1) from (select t1.a as a, t1.b as b from t1 where t1.b in (select t2.b from t2 where t2.a = t1.a limit 3)) tmp group by tmp.a, tmp.b",
@ -113,7 +124,8 @@
"GID:4, GE:Apply_81{GID:1, GID:3}, logic prop:{stats:{count 5, ColNDVs map[1:2 2:2], GroupNDVs []}, schema:{Column: [test.t1.a,test.t1.b] PKOrUK: [] NullableUK: []}}",
"GID:5, GE:Aggregation_83{GID:4}, logic prop:{stats:{count 2, ColNDVs map[7:2], GroupNDVs []}, schema:{Column: [Column#7] PKOrUK: [] NullableUK: []}}",
"GID:6, GE:Projection_84{GID:5}, logic prop:{stats:{count 2, ColNDVs map[7:2], GroupNDVs []}, schema:{Column: [Column#7] PKOrUK: [] NullableUK: []}}"
]
],
"OpNum": 6
},
{
"SQL": "select count(1) from (select t1.a as a, t1.b as b from t1 where t1.b not in (select t2.b from t2 where t2.a = t1.a limit 3)) tmp group by tmp.a, tmp.b",
@ -124,7 +136,8 @@
"GID:4, GE:Apply_93{GID:1, GID:3}, logic prop:{stats:{count 5, ColNDVs map[1:2 2:2], GroupNDVs []}, schema:{Column: [test.t1.a,test.t1.b] PKOrUK: [] NullableUK: []}}",
"GID:5, GE:Aggregation_95{GID:4}, logic prop:{stats:{count 2, ColNDVs map[7:2], GroupNDVs []}, schema:{Column: [Column#7] PKOrUK: [] NullableUK: []}}",
"GID:6, GE:Projection_96{GID:5}, logic prop:{stats:{count 2, ColNDVs map[7:2], GroupNDVs []}, schema:{Column: [Column#7] PKOrUK: [] NullableUK: []}}"
]
],
"OpNum": 6
},
{
"SQL": "select count(1) from t1, t2 where t1.a = t2.a group by t1.a, t1.b",
@ -134,7 +147,8 @@
"GID:3, GE:Join_105{GID:1, GID:2}, logic prop:{stats:{count 16.666666666666668, ColNDVs map[1:2 2:2 4:3], GroupNDVs []}, schema:{Column: [test.t1.a,test.t1.b,test.t2.a] PKOrUK: [] NullableUK: []}}",
"GID:4, GE:Aggregation_103{GID:3}, logic prop:{stats:{count 2, ColNDVs map[7:2], GroupNDVs []}, schema:{Column: [Column#7] PKOrUK: [] NullableUK: []}}",
"GID:5, GE:Projection_104{GID:4}, logic prop:{stats:{count 2, ColNDVs map[7:2], GroupNDVs []}, schema:{Column: [Column#7] PKOrUK: [] NullableUK: []}}"
]
],
"OpNum": 5
},
{
"SQL": "select count(1) from t1 left join t2 on t1.a = t2.a group by t1.a, t1.b",
@ -144,7 +158,8 @@
"GID:3, GE:Join_111{GID:1, GID:2}, logic prop:{stats:{count 16.666666666666668, ColNDVs map[1:2 2:2 4:3], GroupNDVs [{[1 2] 4}]}, schema:{Column: [test.t1.a,test.t1.b,test.t2.a] PKOrUK: [] NullableUK: []}}",
"GID:4, GE:Aggregation_109{GID:3}, logic prop:{stats:{count 4, ColNDVs map[7:4], GroupNDVs [{[1 2] 4}]}, schema:{Column: [Column#7] PKOrUK: [] NullableUK: []}}",
"GID:5, GE:Projection_110{GID:4}, logic prop:{stats:{count 4, ColNDVs map[7:4], GroupNDVs []}, schema:{Column: [Column#7] PKOrUK: [] NullableUK: []}}"
]
],
"OpNum": 5
},
{
"SQL": "select count(1) from t1 left join t2 on t1.a = t2.a group by t2.a, t2.b",
@ -154,7 +169,8 @@
"GID:3, GE:Join_117{GID:1, GID:2}, logic prop:{stats:{count 16.666666666666668, ColNDVs map[1:2 4:3 5:3], GroupNDVs []}, schema:{Column: [test.t1.a,test.t2.a,test.t2.b] PKOrUK: [] NullableUK: []}}",
"GID:4, GE:Aggregation_115{GID:3}, logic prop:{stats:{count 3, ColNDVs map[7:3], GroupNDVs []}, schema:{Column: [Column#7] PKOrUK: [] NullableUK: []}}",
"GID:5, GE:Projection_116{GID:4}, logic prop:{stats:{count 3, ColNDVs map[7:3], GroupNDVs []}, schema:{Column: [Column#7] PKOrUK: [] NullableUK: []}}"
]
],
"OpNum": 5
},
{
"SQL": "select count(1) from t1 right join t2 on t1.a = t2.a group by t1.a, t1.b",
@ -164,7 +180,8 @@
"GID:3, GE:Join_123{GID:1, GID:2}, logic prop:{stats:{count 16.666666666666668, ColNDVs map[1:2 2:2 4:3], GroupNDVs []}, schema:{Column: [test.t1.a,test.t1.b,test.t2.a] PKOrUK: [] NullableUK: []}}",
"GID:4, GE:Aggregation_121{GID:3}, logic prop:{stats:{count 2, ColNDVs map[7:2], GroupNDVs []}, schema:{Column: [Column#7] PKOrUK: [] NullableUK: []}}",
"GID:5, GE:Projection_122{GID:4}, logic prop:{stats:{count 2, ColNDVs map[7:2], GroupNDVs []}, schema:{Column: [Column#7] PKOrUK: [] NullableUK: []}}"
]
],
"OpNum": 5
},
{
"SQL": "select count(1) from t1 right join t2 on t1.a = t2.a group by t2.a, t2.b",
@ -174,7 +191,8 @@
"GID:3, GE:Join_129{GID:1, GID:2}, logic prop:{stats:{count 16.666666666666668, ColNDVs map[1:2 4:3 5:3], GroupNDVs [{[4 5] 9}]}, schema:{Column: [test.t1.a,test.t2.a,test.t2.b] PKOrUK: [] NullableUK: []}}",
"GID:4, GE:Aggregation_127{GID:3}, logic prop:{stats:{count 9, ColNDVs map[7:9], GroupNDVs [{[4 5] 9}]}, schema:{Column: [Column#7] PKOrUK: [] NullableUK: []}}",
"GID:5, GE:Projection_128{GID:4}, logic prop:{stats:{count 9, ColNDVs map[7:9], GroupNDVs []}, schema:{Column: [Column#7] PKOrUK: [] NullableUK: []}}"
]
],
"OpNum": 5
},
{
"SQL": "select count(tmp.cmp) from (select t1.a as a, t1.b as b, (t1.b in (select t2.b from t2 where t2.a > t1.a)) as cmp from t1) tmp group by tmp.a, tmp.b",
@ -184,7 +202,8 @@
"GID:3, GE:Join_136{GID:1, GID:2}, logic prop:{stats:{count 5, ColNDVs map[1:2 2:2 10:2], GroupNDVs [{[1 2] 4}]}, schema:{Column: [test.t1.a,test.t1.b,Column#10] PKOrUK: [] NullableUK: []}}",
"GID:4, GE:Aggregation_137{GID:3}, logic prop:{stats:{count 4, ColNDVs map[11:4], GroupNDVs [{[1 2] 4}]}, schema:{Column: [Column#11] PKOrUK: [] NullableUK: []}}",
"GID:5, GE:Projection_138{GID:4}, logic prop:{stats:{count 4, ColNDVs map[11:4], GroupNDVs []}, schema:{Column: [Column#11] PKOrUK: [] NullableUK: []}}"
]
],
"OpNum": 5
},
{
"SQL": "select count(tmp.cmp) from (select t1.a as a, t1.b as b, (t1.b not in (select t2.b from t2 where t2.a > t1.a)) as cmp from t1) tmp group by tmp.a, tmp.b",
@ -194,7 +213,8 @@
"GID:3, GE:Join_145{GID:1, GID:2}, logic prop:{stats:{count 5, ColNDVs map[1:2 2:2 10:2], GroupNDVs [{[1 2] 4}]}, schema:{Column: [test.t1.a,test.t1.b,Column#10] PKOrUK: [] NullableUK: []}}",
"GID:4, GE:Aggregation_146{GID:3}, logic prop:{stats:{count 4, ColNDVs map[11:4], GroupNDVs [{[1 2] 4}]}, schema:{Column: [Column#11] PKOrUK: [] NullableUK: []}}",
"GID:5, GE:Projection_147{GID:4}, logic prop:{stats:{count 4, ColNDVs map[11:4], GroupNDVs []}, schema:{Column: [Column#11] PKOrUK: [] NullableUK: []}}"
]
],
"OpNum": 5
},
{
"SQL": "select count(1) from (select t1.a as a, t1.b as b from t1 where t1.b in (select t2.b from t2 where t2.a > t1.a)) tmp group by tmp.a, tmp.b",
@ -204,7 +224,8 @@
"GID:3, GE:Join_153{GID:1, GID:2}, logic prop:{stats:{count 4, ColNDVs map[1:1.6 2:1.6], GroupNDVs []}, schema:{Column: [test.t1.a,test.t1.b] PKOrUK: [] NullableUK: []}}",
"GID:4, GE:Aggregation_155{GID:3}, logic prop:{stats:{count 1.6, ColNDVs map[7:1.6], GroupNDVs []}, schema:{Column: [Column#7] PKOrUK: [] NullableUK: []}}",
"GID:5, GE:Projection_156{GID:4}, logic prop:{stats:{count 1.6, ColNDVs map[7:1.6], GroupNDVs []}, schema:{Column: [Column#7] PKOrUK: [] NullableUK: []}}"
]
],
"OpNum": 5
},
{
"SQL": "select count(1) from (select t1.a as a, t1.b as b from t1 where t1.b not in (select t2.b from t2 where t2.a > t1.a)) tmp group by tmp.a, tmp.b",
@ -214,7 +235,8 @@
"GID:3, GE:Join_162{GID:1, GID:2}, logic prop:{stats:{count 4, ColNDVs map[1:1.6 2:1.6], GroupNDVs []}, schema:{Column: [test.t1.a,test.t1.b] PKOrUK: [] NullableUK: []}}",
"GID:4, GE:Aggregation_164{GID:3}, logic prop:{stats:{count 1.6, ColNDVs map[7:1.6], GroupNDVs []}, schema:{Column: [Column#7] PKOrUK: [] NullableUK: []}}",
"GID:5, GE:Projection_165{GID:4}, logic prop:{stats:{count 1.6, ColNDVs map[7:1.6], GroupNDVs []}, schema:{Column: [Column#7] PKOrUK: [] NullableUK: []}}"
]
],
"OpNum": 5
},
{
"SQL": "select * from t1 left join (select t2.a as a, t2.b as b, count(1) as cnt from t2 group by t2.a, t2.b) as tmp on t1.a = tmp.a and t1.b = tmp.b",
@ -224,7 +246,8 @@
"GID:3, GE:Aggregation_168{GID:2}, logic prop:{stats:{count 9, ColNDVs map[4:9 5:9 7:9], GroupNDVs [{[4 5] 9}]}, schema:{Column: [Column#7,test.t2.a,test.t2.b] PKOrUK: [[test.t2.a,test.t2.b]] NullableUK: []}}",
"GID:4, GE:Join_172{GID:1, GID:3}, logic prop:{stats:{count 5, ColNDVs map[1:2 2:2 4:5 5:5 7:5], GroupNDVs [{[1 2] 4}]}, schema:{Column: [test.t1.a,test.t1.b,Column#7,test.t2.a,test.t2.b] PKOrUK: [] NullableUK: []}}",
"GID:5, GE:Projection_171{GID:4}, logic prop:{stats:{count 5, ColNDVs map[1:2 2:2 4:5 5:5 7:5], GroupNDVs [{[1 2] 4}]}, schema:{Column: [test.t1.a,test.t1.b,test.t2.a,test.t2.b,Column#7] PKOrUK: [] NullableUK: []}}"
]
],
"OpNum": 5
},
{
"SQL": "select count(1) from (select t1.a as a, t1.b as b from t1 limit 3) tmp group by tmp.a, tmp.b",
@ -233,7 +256,8 @@
"GID:2, GE:Limit_179{GID:1}, logic prop:{stats:{count 3, ColNDVs map[1:2 2:2], GroupNDVs []}, schema:{Column: [test.t1.a,test.t1.b] PKOrUK: [] NullableUK: []}}",
"GID:3, GE:Aggregation_176{GID:2}, logic prop:{stats:{count 2, ColNDVs map[4:2], GroupNDVs []}, schema:{Column: [Column#4] PKOrUK: [] NullableUK: []}}",
"GID:4, GE:Projection_177{GID:3}, logic prop:{stats:{count 2, ColNDVs map[4:2], GroupNDVs []}, schema:{Column: [Column#4] PKOrUK: [] NullableUK: []}}"
]
],
"OpNum": 4
},
{
"SQL": "select count(tmp.a_sum) from (select t1.a as a, t1.b as b, sum(a) over() as a_sum from t1) tmp group by tmp.a, tmp.b",
@ -242,7 +266,8 @@
"GID:2, GE:Window_183{GID:1}, logic prop:{stats:{count 5, ColNDVs map[1:2 2:2 5:5], GroupNDVs [{[1 2] 4}]}, schema:{Column: [test.t1.a,test.t1.b,Column#5] PKOrUK: [] NullableUK: []}}",
"GID:3, GE:Aggregation_185{GID:2}, logic prop:{stats:{count 4, ColNDVs map[6:4], GroupNDVs [{[1 2] 4}]}, schema:{Column: [Column#6] PKOrUK: [] NullableUK: []}}",
"GID:4, GE:Projection_186{GID:3}, logic prop:{stats:{count 4, ColNDVs map[6:4], GroupNDVs []}, schema:{Column: [Column#6] PKOrUK: [] NullableUK: []}}"
]
],
"OpNum": 4
}
]
},
@ -255,7 +280,8 @@
"GID:1, GE:DataSource_2{}, logic prop:{stats:{count 4, ColNDVs map[1:2 2:2], GroupNDVs [{[1 2] 4}]}, schema:{Column: [test.t1.a,test.t1.b] PKOrUK: [] NullableUK: []}}",
"GID:2, GE:Aggregation_3{GID:1}, logic prop:{stats:{count 4, ColNDVs map[4:4], GroupNDVs [{[1 2] 4}]}, schema:{Column: [Column#4] PKOrUK: [] NullableUK: []}}",
"GID:3, GE:Projection_4{GID:2}, logic prop:{stats:{count 4, ColNDVs map[4:4], GroupNDVs []}, schema:{Column: [Column#4] PKOrUK: [] NullableUK: []}}"
]
],
"OpNum": 3
},
{
"SQL": "select * from t1, t2 where t1.a = t2.a and t1.b = t2.b",
@ -264,7 +290,8 @@
"GID:2, GE:DataSource_6{}, logic prop:{stats:{count 9, ColNDVs map[4:3 5:3], GroupNDVs [{[4 5] 9}]}, schema:{Column: [test.t2.a,test.t2.b] PKOrUK: [] NullableUK: []}}",
"GID:3, GE:Join_10{GID:1, GID:2}, logic prop:{stats:{count 4, ColNDVs map[1:2 2:2 4:3 5:3], GroupNDVs []}, schema:{Column: [test.t1.a,test.t1.b,test.t2.a,test.t2.b] PKOrUK: [] NullableUK: []}}",
"GID:4, GE:Projection_9{GID:3}, logic prop:{stats:{count 4, ColNDVs map[1:2 2:2 4:3 5:3], GroupNDVs []}, schema:{Column: [test.t1.a,test.t1.b,test.t2.a,test.t2.b] PKOrUK: [] NullableUK: []}}"
]
],
"OpNum": 4
},
{
"SQL": "select count(1) from t1 where a > 0 group by a, b",
@ -272,7 +299,8 @@
"GID:1, GE:DataSource_11{}, logic prop:{stats:{count 4, ColNDVs map[1:2 2:2], GroupNDVs [{[1 2] 4}]}, schema:{Column: [test.t1.a,test.t1.b] PKOrUK: [] NullableUK: []}}",
"GID:2, GE:Aggregation_13{GID:1}, logic prop:{stats:{count 4, ColNDVs map[4:4], GroupNDVs [{[1 2] 4}]}, schema:{Column: [Column#4] PKOrUK: [] NullableUK: []}}",
"GID:3, GE:Projection_14{GID:2}, logic prop:{stats:{count 4, ColNDVs map[4:4], GroupNDVs []}, schema:{Column: [Column#4] PKOrUK: [] NullableUK: []}}"
]
],
"OpNum": 3
},
{
"SQL": "select count(1) from t1 where b > 0 group by a, b",
@ -280,7 +308,8 @@
"GID:1, GE:DataSource_15{}, logic prop:{stats:{count 4, ColNDVs map[1:2 2:2], GroupNDVs [{[1 2] 4}]}, schema:{Column: [test.t1.a,test.t1.b] PKOrUK: [] NullableUK: []}}",
"GID:2, GE:Aggregation_17{GID:1}, logic prop:{stats:{count 4, ColNDVs map[4:4], GroupNDVs [{[1 2] 4}]}, schema:{Column: [Column#4] PKOrUK: [] NullableUK: []}}",
"GID:3, GE:Projection_18{GID:2}, logic prop:{stats:{count 4, ColNDVs map[4:4], GroupNDVs []}, schema:{Column: [Column#4] PKOrUK: [] NullableUK: []}}"
]
],
"OpNum": 3
},
{
"SQL": "select count(c3) from (select a as c1, b as c2, a+1 as c3 from t1) as tmp group by c2, c1",
@ -288,7 +317,8 @@
"GID:1, GE:DataSource_19{}, logic prop:{stats:{count 4, ColNDVs map[1:2 2:2], GroupNDVs [{[1 2] 4}]}, schema:{Column: [test.t1.a,test.t1.b] PKOrUK: [] NullableUK: []}}",
"GID:2, GE:Aggregation_21{GID:1}, logic prop:{stats:{count 4, ColNDVs map[5:4], GroupNDVs [{[1 2] 4}]}, schema:{Column: [Column#5] PKOrUK: [] NullableUK: []}}",
"GID:3, GE:Projection_22{GID:2}, logic prop:{stats:{count 4, ColNDVs map[5:4], GroupNDVs []}, schema:{Column: [Column#5] PKOrUK: [] NullableUK: []}}"
]
],
"OpNum": 4
},
{
"SQL": "select count(tmp.cmp) from (select t1.a as a, t1.b as b, (t1.b > (select t2.b from t2 where t2.a = t1.a)) as cmp from t1) tmp group by tmp.a, tmp.b",
@ -299,7 +329,8 @@
"GID:4, GE:Apply_30{GID:1, GID:3}, logic prop:{stats:{count 4, ColNDVs map[1:2 2:2 7:4 8:4], GroupNDVs [{[1 2] 4}]}, schema:{Column: [test.t1.a,test.t1.b,test.t2.a,test.t2.b] PKOrUK: [] NullableUK: []}}",
"GID:5, GE:Aggregation_31{GID:4}, logic prop:{stats:{count 4, ColNDVs map[11:4], GroupNDVs [{[1 2] 4}]}, schema:{Column: [Column#11] PKOrUK: [] NullableUK: []}}",
"GID:6, GE:Projection_32{GID:5}, logic prop:{stats:{count 4, ColNDVs map[11:4], GroupNDVs []}, schema:{Column: [Column#11] PKOrUK: [] NullableUK: []}}"
]
],
"OpNum": 7
},
{
"SQL": "select count(tmp.cmp) from (select t1.a as a, t1.b as b, (t1.b in (select t2.b from t2 where t2.a = t1.a limit 3)) as cmp from t1) tmp group by tmp.a, tmp.b",
@ -310,7 +341,8 @@
"GID:4, GE:Apply_40{GID:1, GID:3}, logic prop:{stats:{count 4, ColNDVs map[1:2 2:2 10:2], GroupNDVs [{[1 2] 4}]}, schema:{Column: [test.t1.a,test.t1.b,Column#10] PKOrUK: [] NullableUK: []}}",
"GID:5, GE:Aggregation_41{GID:4}, logic prop:{stats:{count 4, ColNDVs map[11:4], GroupNDVs [{[1 2] 4}]}, schema:{Column: [Column#11] PKOrUK: [] NullableUK: []}}",
"GID:6, GE:Projection_42{GID:5}, logic prop:{stats:{count 4, ColNDVs map[11:4], GroupNDVs []}, schema:{Column: [Column#11] PKOrUK: [] NullableUK: []}}"
]
],
"OpNum": 6
},
{
"SQL": "select count(tmp.cmp) from (select t1.a as a, t1.b as b, (t1.b not in (select t2.b from t2 where t2.a = t1.a limit 3)) as cmp from t1) tmp group by tmp.a, tmp.b",
@ -321,7 +353,8 @@
"GID:4, GE:Apply_52{GID:1, GID:3}, logic prop:{stats:{count 4, ColNDVs map[1:2 2:2 10:2], GroupNDVs [{[1 2] 4}]}, schema:{Column: [test.t1.a,test.t1.b,Column#10] PKOrUK: [] NullableUK: []}}",
"GID:5, GE:Aggregation_53{GID:4}, logic prop:{stats:{count 4, ColNDVs map[11:4], GroupNDVs [{[1 2] 4}]}, schema:{Column: [Column#11] PKOrUK: [] NullableUK: []}}",
"GID:6, GE:Projection_54{GID:5}, logic prop:{stats:{count 4, ColNDVs map[11:4], GroupNDVs []}, schema:{Column: [Column#11] PKOrUK: [] NullableUK: []}}"
]
],
"OpNum": 6
},
{
"SQL": "select count(1) from t1 left join t2 on t1.a = t2.a group by t1.a, t1.b",
@ -331,7 +364,8 @@
"GID:3, GE:Join_62{GID:1, GID:2}, logic prop:{stats:{count 12, ColNDVs map[1:2 2:2 4:3], GroupNDVs [{[1 2] 4}]}, schema:{Column: [test.t1.a,test.t1.b,test.t2.a] PKOrUK: [] NullableUK: []}}",
"GID:4, GE:Aggregation_60{GID:3}, logic prop:{stats:{count 4, ColNDVs map[7:4], GroupNDVs [{[1 2] 4}]}, schema:{Column: [Column#7] PKOrUK: [] NullableUK: []}}",
"GID:5, GE:Projection_61{GID:4}, logic prop:{stats:{count 4, ColNDVs map[7:4], GroupNDVs []}, schema:{Column: [Column#7] PKOrUK: [] NullableUK: []}}"
]
],
"OpNum": 5
},
{
"SQL": "select count(1) from t1 right join t2 on t1.a = t2.a group by t2.a, t2.b",
@ -341,7 +375,8 @@
"GID:3, GE:Join_68{GID:1, GID:2}, logic prop:{stats:{count 12, ColNDVs map[1:2 4:3 5:3], GroupNDVs [{[4 5] 9}]}, schema:{Column: [test.t1.a,test.t2.a,test.t2.b] PKOrUK: [] NullableUK: []}}",
"GID:4, GE:Aggregation_66{GID:3}, logic prop:{stats:{count 9, ColNDVs map[7:9], GroupNDVs [{[4 5] 9}]}, schema:{Column: [Column#7] PKOrUK: [] NullableUK: []}}",
"GID:5, GE:Projection_67{GID:4}, logic prop:{stats:{count 9, ColNDVs map[7:9], GroupNDVs []}, schema:{Column: [Column#7] PKOrUK: [] NullableUK: []}}"
]
],
"OpNum": 5
},
{
"SQL": "select count(tmp.cmp) from (select t1.a as a, t1.b as b, (t1.b in (select t2.b from t2 where t2.a > t1.a)) as cmp from t1) tmp group by tmp.a, tmp.b",
@ -351,7 +386,8 @@
"GID:3, GE:Join_75{GID:1, GID:2}, logic prop:{stats:{count 4, ColNDVs map[1:2 2:2 10:2], GroupNDVs [{[1 2] 4}]}, schema:{Column: [test.t1.a,test.t1.b,Column#10] PKOrUK: [] NullableUK: []}}",
"GID:4, GE:Aggregation_76{GID:3}, logic prop:{stats:{count 4, ColNDVs map[11:4], GroupNDVs [{[1 2] 4}]}, schema:{Column: [Column#11] PKOrUK: [] NullableUK: []}}",
"GID:5, GE:Projection_77{GID:4}, logic prop:{stats:{count 4, ColNDVs map[11:4], GroupNDVs []}, schema:{Column: [Column#11] PKOrUK: [] NullableUK: []}}"
]
],
"OpNum": 5
},
{
"SQL": "select count(tmp.cmp) from (select t1.a as a, t1.b as b, (t1.b not in (select t2.b from t2 where t2.a > t1.a)) as cmp from t1) tmp group by tmp.a, tmp.b",
@ -361,7 +397,8 @@
"GID:3, GE:Join_84{GID:1, GID:2}, logic prop:{stats:{count 4, ColNDVs map[1:2 2:2 10:2], GroupNDVs [{[1 2] 4}]}, schema:{Column: [test.t1.a,test.t1.b,Column#10] PKOrUK: [] NullableUK: []}}",
"GID:4, GE:Aggregation_85{GID:3}, logic prop:{stats:{count 4, ColNDVs map[11:4], GroupNDVs [{[1 2] 4}]}, schema:{Column: [Column#11] PKOrUK: [] NullableUK: []}}",
"GID:5, GE:Projection_86{GID:4}, logic prop:{stats:{count 4, ColNDVs map[11:4], GroupNDVs []}, schema:{Column: [Column#11] PKOrUK: [] NullableUK: []}}"
]
],
"OpNum": 5
},
{
"SQL": "select * from t1 left join (select t2.a as a, t2.b as b, count(1) as cnt from t2 group by t2.a, t2.b) as tmp on t1.a = tmp.a and t1.b = tmp.b",
@ -371,7 +408,8 @@
"GID:3, GE:Aggregation_89{GID:2}, logic prop:{stats:{count 9, ColNDVs map[4:9 5:9 7:9], GroupNDVs [{[4 5] 9}]}, schema:{Column: [Column#7,test.t2.a,test.t2.b] PKOrUK: [[test.t2.a,test.t2.b]] NullableUK: []}}",
"GID:4, GE:Join_93{GID:1, GID:3}, logic prop:{stats:{count 4, ColNDVs map[1:2 2:2 4:4 5:4 7:4], GroupNDVs [{[1 2] 4}]}, schema:{Column: [test.t1.a,test.t1.b,Column#7,test.t2.a,test.t2.b] PKOrUK: [] NullableUK: []}}",
"GID:5, GE:Projection_92{GID:4}, logic prop:{stats:{count 4, ColNDVs map[1:2 2:2 4:4 5:4 7:4], GroupNDVs [{[1 2] 4}]}, schema:{Column: [test.t1.a,test.t1.b,test.t2.a,test.t2.b,Column#7] PKOrUK: [] NullableUK: []}}"
]
],
"OpNum": 5
},
{
"SQL": "select count(tmp.a_sum) from (select t1.a as a, t1.b as b, sum(a) over() as a_sum from t1) tmp group by tmp.a, tmp.b",
@ -380,7 +418,8 @@
"GID:2, GE:Window_97{GID:1}, logic prop:{stats:{count 4, ColNDVs map[1:2 2:2 5:4], GroupNDVs [{[1 2] 4}]}, schema:{Column: [test.t1.a,test.t1.b,Column#5] PKOrUK: [] NullableUK: []}}",
"GID:3, GE:Aggregation_99{GID:2}, logic prop:{stats:{count 4, ColNDVs map[6:4], GroupNDVs [{[1 2] 4}]}, schema:{Column: [Column#6] PKOrUK: [] NullableUK: []}}",
"GID:4, GE:Projection_100{GID:3}, logic prop:{stats:{count 4, ColNDVs map[6:4], GroupNDVs []}, schema:{Column: [Column#6] PKOrUK: [] NullableUK: []}}"
]
],
"OpNum": 4
}
]
}

View File

@ -58,6 +58,9 @@ type columnStatsUsageCollector struct {
// tblID2PartitionIDs is used for tables with static pruning mode.
// Note that we've no longer suggested to use static pruning mode.
tblID2PartitionIDs map[int64][]int64
// operatorNum is the number of operators in the logical plan.
operatorNum uint64
}
func newColumnStatsUsageCollector(histNeeded bool, enabledPlanCapture bool) *columnStatsUsageCollector {
@ -304,6 +307,7 @@ func (c *columnStatsUsageCollector) collectFromPlan(askedColGroups [][]*expressi
c.updateColMap(col, []*expression.Column{x.SeedSchema.Columns[i]})
}
}
c.operatorNum++
}
// CollectColumnStatsUsage collects column stats usage from logical plan.
@ -312,17 +316,18 @@ func (c *columnStatsUsageCollector) collectFromPlan(askedColGroups [][]*expressi
// First return value: predicate columns
// Second return value: the visited table IDs(For partition table, we only record its global meta ID. The meta ID of each partition will be recorded in tblID2PartitionIDs)
// Third return value: the visited partition IDs. Used for static partition pruning.
// Forth return value: the recorded asked column group for each datasource table, which will require collecting composite index for it's group ndv info.
// Forth return value: the number of operators in the logical plan.
// TODO: remove the third return value when the static partition pruning is totally deprecated.
func CollectColumnStatsUsage(lp base.LogicalPlan, histNeeded bool) (
map[model.TableItemID]bool,
*intset.FastIntSet,
map[int64][]int64,
uint64,
) {
collector := newColumnStatsUsageCollector(histNeeded, lp.SCtx().GetSessionVars().IsPlanReplayerCaptureEnabled())
collector.collectFromPlan(nil, lp)
if collector.collectVisitedTable {
recordTableRuntimeStats(lp.SCtx(), collector.visitedtbls)
}
return collector.predicateCols, collector.visitedPhysTblIDs, collector.tblID2PartitionIDs
return collector.predicateCols, collector.visitedPhysTblIDs, collector.tblID2PartitionIDs, collector.operatorNum
}

View File

@ -80,7 +80,7 @@ func getStatsLoadItem(t *testing.T, is infoschema.InfoSchema, item model.StatsLo
}
func checkColumnStatsUsageForPredicates(t *testing.T, is infoschema.InfoSchema, lp base.LogicalPlan, expected []string, comment string) {
tblColIDs, _, _ := CollectColumnStatsUsage(lp, false)
tblColIDs, _, _, _ := CollectColumnStatsUsage(lp, false)
cols := make([]string, 0, len(tblColIDs))
for tblColID := range tblColIDs {
col := getColumnName(t, is, tblColID, comment)
@ -91,7 +91,7 @@ func checkColumnStatsUsageForPredicates(t *testing.T, is infoschema.InfoSchema,
}
func checkColumnStatsUsageForStatsLoad(t *testing.T, is infoschema.InfoSchema, lp base.LogicalPlan, expectedCols []string, expectedParts map[string][]string, comment string) {
predicateCols, _, expandedPartitions := CollectColumnStatsUsage(lp, true)
predicateCols, _, expandedPartitions, _ := CollectColumnStatsUsage(lp, true)
loadItems := make([]model.StatsLoadItem, 0, len(predicateCols))
for tblColID, fullLoad := range predicateCols {
loadItems = append(loadItems, model.StatsLoadItem{TableItemID: tblColID, FullLoad: fullLoad})

View File

@ -45,7 +45,10 @@ func (c *CollectPredicateColumnsPoint) Optimize(_ context.Context, plan base.Log
}
syncWait := plan.SCtx().GetSessionVars().StatsLoadSyncWait.Load()
histNeeded := syncWait > 0
predicateColumns, visitedPhysTblIDs, tid2pids := CollectColumnStatsUsage(plan, histNeeded)
predicateColumns, visitedPhysTblIDs, tid2pids, opNum := CollectColumnStatsUsage(plan, histNeeded)
// opNum is collected via the common stats load rule, some operators may be cleaned like proj for later rule.
// so opNum is not that accurate, but it's enough for the memo hashmap's init capacity.
plan.SCtx().GetSessionVars().StmtCtx.OperatorNum = opNum
if len(predicateColumns) > 0 {
plan.SCtx().UpdateColStatsUsage(maps.Keys(predicateColumns))
}

View File

@ -436,6 +436,9 @@ type StatementContext struct {
// and the `for share` execution is enabled by `tidb_enable_noop_functions`, no locks should be
// acquired in this case.
ForShareLockEnabledByNoop bool
// OperatorNum is used to record the number of operators in the current logical plan.
OperatorNum uint64
}
// DefaultStmtErrLevels is the default error levels for statement