statistics: fix wrong column stats loading after analyze twice (#42076)
close pingcap/tidb#42073
This commit is contained in:
@ -693,3 +693,29 @@ func TestSingleColumnIndexNDV(t *testing.T) {
|
||||
require.Equal(t, expectedResults[i][2], row[7]) // null_count
|
||||
}
|
||||
}
|
||||
|
||||
func TestColumnStatsLazyLoad(t *testing.T) {
|
||||
store, dom := testkit.CreateMockStoreAndDomain(t)
|
||||
tk := testkit.NewTestKit(t, store)
|
||||
h := dom.StatsHandle()
|
||||
originLease := h.Lease()
|
||||
defer h.SetLease(originLease)
|
||||
// Set `Lease` to `Millisecond` to enable column stats lazy load.
|
||||
h.SetLease(time.Millisecond)
|
||||
tk.MustExec("use test")
|
||||
tk.MustExec("create table t(a int, b int)")
|
||||
tk.MustExec("insert into t values (1,2), (3,4), (5,6), (7,8)")
|
||||
require.NoError(t, h.HandleDDLEvent(<-h.DDLEventCh()))
|
||||
tk.MustExec("analyze table t")
|
||||
is := dom.InfoSchema()
|
||||
tbl, err := is.TableByName(model.NewCIStr("test"), model.NewCIStr("t"))
|
||||
require.NoError(t, err)
|
||||
tblInfo := tbl.Meta()
|
||||
c1 := tblInfo.Columns[0]
|
||||
c2 := tblInfo.Columns[1]
|
||||
require.True(t, h.GetTableStats(tblInfo).Columns[c1.ID].IsAllEvicted())
|
||||
require.True(t, h.GetTableStats(tblInfo).Columns[c2.ID].IsAllEvicted())
|
||||
tk.MustExec("analyze table t")
|
||||
require.True(t, h.GetTableStats(tblInfo).Columns[c1.ID].IsAllEvicted())
|
||||
require.True(t, h.GetTableStats(tblInfo).Columns[c2.ID].IsAllEvicted())
|
||||
}
|
||||
|
||||
@ -334,10 +334,23 @@ func columnStatsFromStorage(reader *StatsReader, row chunk.Row, table *Table, ta
|
||||
// 2. this column is not handle, and:
|
||||
// 3. the column doesn't has any statistics before, and:
|
||||
// 4. loadAll is false.
|
||||
//
|
||||
// Here is the explanation of the condition `!col.IsStatsInitialized() || col.IsAllEvicted()`.
|
||||
// For one column:
|
||||
// 1. If there is no stats for it in the storage(i.e., analyze has never been executed before), then its stats status
|
||||
// would be `!col.IsStatsInitialized()`. In this case we should go the `notNeedLoad` path.
|
||||
// 2. If there exists stats for it in the storage but its stats status is `col.IsAllEvicted()`, there are two
|
||||
// sub cases for this case. One is that the column stats have never been used/needed by the optimizer so they have
|
||||
// never been loaded. The other is that the column stats were loaded and then evicted. For the both sub cases,
|
||||
// we should go the `notNeedLoad` path.
|
||||
// 3. If some parts(Histogram/TopN/CMSketch) of stats for it exist in TiDB memory currently, we choose to load all of
|
||||
// its new stats once we find stats version is updated.
|
||||
notNeedLoad := lease > 0 &&
|
||||
!isHandle &&
|
||||
(col == nil || !col.IsStatsInitialized() && col.LastUpdateVersion < histVer) &&
|
||||
(col == nil || ((!col.IsStatsInitialized() || col.IsAllEvicted()) && col.LastUpdateVersion < histVer)) &&
|
||||
!loadAll
|
||||
// Here is
|
||||
//For one column, if there is no stats for it in the storage(analyze is never)
|
||||
if notNeedLoad {
|
||||
count, err := ColumnCountFromStorage(reader, table.PhysicalID, histID, statsVer)
|
||||
if err != nil {
|
||||
|
||||
Reference in New Issue
Block a user