statistics: fix wrong column stats loading after analyze twice (#42076)

close pingcap/tidb#42073
This commit is contained in:
Yifan Xu
2023-03-10 14:15:13 +08:00
committed by GitHub
parent 53f15f6ed7
commit fd45f737cc
2 changed files with 40 additions and 1 deletions

View File

@ -693,3 +693,29 @@ func TestSingleColumnIndexNDV(t *testing.T) {
require.Equal(t, expectedResults[i][2], row[7]) // null_count
}
}
func TestColumnStatsLazyLoad(t *testing.T) {
store, dom := testkit.CreateMockStoreAndDomain(t)
tk := testkit.NewTestKit(t, store)
h := dom.StatsHandle()
originLease := h.Lease()
defer h.SetLease(originLease)
// Set `Lease` to `Millisecond` to enable column stats lazy load.
h.SetLease(time.Millisecond)
tk.MustExec("use test")
tk.MustExec("create table t(a int, b int)")
tk.MustExec("insert into t values (1,2), (3,4), (5,6), (7,8)")
require.NoError(t, h.HandleDDLEvent(<-h.DDLEventCh()))
tk.MustExec("analyze table t")
is := dom.InfoSchema()
tbl, err := is.TableByName(model.NewCIStr("test"), model.NewCIStr("t"))
require.NoError(t, err)
tblInfo := tbl.Meta()
c1 := tblInfo.Columns[0]
c2 := tblInfo.Columns[1]
require.True(t, h.GetTableStats(tblInfo).Columns[c1.ID].IsAllEvicted())
require.True(t, h.GetTableStats(tblInfo).Columns[c2.ID].IsAllEvicted())
tk.MustExec("analyze table t")
require.True(t, h.GetTableStats(tblInfo).Columns[c1.ID].IsAllEvicted())
require.True(t, h.GetTableStats(tblInfo).Columns[c2.ID].IsAllEvicted())
}

View File

@ -334,10 +334,23 @@ func columnStatsFromStorage(reader *StatsReader, row chunk.Row, table *Table, ta
// 2. this column is not handle, and:
// 3. the column doesn't has any statistics before, and:
// 4. loadAll is false.
//
// Here is the explanation of the condition `!col.IsStatsInitialized() || col.IsAllEvicted()`.
// For one column:
// 1. If there is no stats for it in the storage(i.e., analyze has never been executed before), then its stats status
// would be `!col.IsStatsInitialized()`. In this case we should go the `notNeedLoad` path.
// 2. If there exists stats for it in the storage but its stats status is `col.IsAllEvicted()`, there are two
// sub cases for this case. One is that the column stats have never been used/needed by the optimizer so they have
// never been loaded. The other is that the column stats were loaded and then evicted. For the both sub cases,
// we should go the `notNeedLoad` path.
// 3. If some parts(Histogram/TopN/CMSketch) of stats for it exist in TiDB memory currently, we choose to load all of
// its new stats once we find stats version is updated.
notNeedLoad := lease > 0 &&
!isHandle &&
(col == nil || !col.IsStatsInitialized() && col.LastUpdateVersion < histVer) &&
(col == nil || ((!col.IsStatsInitialized() || col.IsAllEvicted()) && col.LastUpdateVersion < histVer)) &&
!loadAll
// Here is
//For one column, if there is no stats for it in the storage(analyze is never)
if notNeedLoad {
count, err := ColumnCountFromStorage(reader, table.PhysicalID, histID, statsVer)
if err != nil {