From c671ebc88b761abebd99ca3a32abcbc70e86e8af Mon Sep 17 00:00:00 2001 From: Yifan Xu <30385241+xuyifangreeneyes@users.noreply.github.com> Date: Mon, 28 Mar 2022 10:52:27 +0800 Subject: [PATCH] statistics: fix wrong point range in crossValidationSelectivity (#33357) ref pingcap/tidb#28030 --- statistics/integration_test.go | 19 +++++++++++++++++++ statistics/table.go | 18 ++++-------------- 2 files changed, 23 insertions(+), 14 deletions(-) diff --git a/statistics/integration_test.go b/statistics/integration_test.go index b45f03ada0..e43a30e52b 100644 --- a/statistics/integration_test.go +++ b/statistics/integration_test.go @@ -625,3 +625,22 @@ func TestNotLoadedStatsOnAllNULLCol(t *testing.T) { " └─TableReader(Probe) 4.00 root data:TableFullScan", " └─TableFullScan 4.00 cop[tikv] table:t1 keep order:false")) } + +func TestCrossValidationSelectivity(t *testing.T) { + store, dom, clean := testkit.CreateMockStoreAndDomain(t) + defer clean() + tk := testkit.NewTestKit(t, store) + h := dom.StatsHandle() + tk.MustExec("use test") + tk.MustExec("drop table if exists t") + tk.MustExec("set @@tidb_analyze_version = 1") + tk.MustExec("create table t (a int, b int, c int, primary key (a, b) clustered)") + require.NoError(t, h.HandleDDLEvent(<-h.DDLEventCh())) + tk.MustExec("insert into t values (1,2,3), (1,4,5)") + require.NoError(t, h.DumpStatsDeltaToKV(handle.DumpAll)) + tk.MustExec("analyze table t") + tk.MustQuery("explain format = 'brief' select * from t where a = 1 and b > 0 and b < 1000 and c > 1000").Check(testkit.Rows( + "TableReader 0.00 root data:Selection", + "└─Selection 0.00 cop[tikv] gt(test.t.c, 1000)", + " └─TableRangeScan 2.00 cop[tikv] table:t range:(1 0,1 1000), keep order:false")) +} diff --git a/statistics/table.go b/statistics/table.go index b8bfa8d9aa..bee9dab89d 100644 --- a/statistics/table.go +++ b/statistics/table.go @@ -591,23 +591,13 @@ func (coll *HistColl) crossValidationSelectivity(sctx sessionctx.Context, idx *I if col.IsInvalid(sctx, coll.Pseudo) { continue } - lowExclude := idxPointRange.LowExclude - highExclude := idxPointRange.HighExclude - // Consider this case: - // create table t(a int, b int, c int, primary key(a,b,c)); - // insert into t values(1,1,1),(2,2,3); - // explain select * from t where (a,b) in ((1,1),(2,2)) and c > 2; - // For column a, we will get range: (1, 1], (2, 2], but GetColumnRowCount() with rang = (2, 2] will return 0. - // And the result of the explain statement will output estRow 0.0. So we change it to [2, 2]. - if lowExclude != highExclude && i < usedColsLen { - lowExclude = false - highExclude = false - } + // Since the column range is point range(LowVal is equal to HighVal), we need to set both LowExclude and HighExclude to false. + // Otherwise we would get 0.0 estRow from GetColumnRowCount. rang := ranger.Range{ LowVal: []types.Datum{idxPointRange.LowVal[i]}, - LowExclude: lowExclude, + LowExclude: false, HighVal: []types.Datum{idxPointRange.HighVal[i]}, - HighExclude: highExclude, + HighExclude: false, Collators: []collate.Collator{idxPointRange.Collators[i]}, }