planner, statistics: refine stats loaded status and when to use pseudo stats (#37444)
close pingcap/tidb#37485
This commit is contained in:
@ -183,6 +183,9 @@ func (e *ShowExec) appendTableForStatsHistograms(dbName, tblName, partitionName
|
||||
col.StatsLoadedStatus.StatusToString(), col.MemoryUsage())
|
||||
}
|
||||
for _, idx := range stableIdxsStats(statsTbl.Indices) {
|
||||
if !idx.IsStatsInitialized() {
|
||||
continue
|
||||
}
|
||||
e.histogramToRow(dbName, tblName, partitionName, idx.Info.Name.O, 1, idx.Histogram, 0,
|
||||
idx.StatsLoadedStatus.StatusToString(), idx.MemoryUsage())
|
||||
}
|
||||
|
||||
@ -4144,15 +4144,20 @@ func getStatsTable(ctx sessionctx.Context, tblInfo *model.TableInfo, pid int64)
|
||||
return statistics.PseudoTable(tblInfo)
|
||||
}
|
||||
|
||||
// 3. statistics is outdated.
|
||||
if ctx.GetSessionVars().GetEnablePseudoForOutdatedStats() {
|
||||
if statsTbl.IsOutdated() {
|
||||
tbl := *statsTbl
|
||||
tbl.Pseudo = true
|
||||
statsTbl = &tbl
|
||||
// 3. statistics is uninitialized or outdated.
|
||||
pseudoStatsForUninitialized := !statsTbl.IsInitialized()
|
||||
pseudoStatsForOutdated := ctx.GetSessionVars().GetEnablePseudoForOutdatedStats() && statsTbl.IsOutdated()
|
||||
if pseudoStatsForUninitialized || pseudoStatsForOutdated {
|
||||
tbl := *statsTbl
|
||||
tbl.Pseudo = true
|
||||
statsTbl = &tbl
|
||||
if pseudoStatsForUninitialized {
|
||||
pseudoEstimationNotAvailable.Inc()
|
||||
} else {
|
||||
pseudoEstimationOutdate.Inc()
|
||||
}
|
||||
}
|
||||
|
||||
return statsTbl
|
||||
}
|
||||
|
||||
|
||||
@ -112,14 +112,16 @@ func (h *Handle) initStatsHistograms4Chunk(is infoschema.InfoSchema, cache *stat
|
||||
}
|
||||
hist := statistics.NewHistogram(id, ndv, nullCount, version, types.NewFieldType(mysql.TypeBlob), chunk.InitialCapacity, 0)
|
||||
index := &statistics.Index{
|
||||
Histogram: *hist,
|
||||
CMSketch: cms,
|
||||
TopN: topN,
|
||||
Info: idxInfo,
|
||||
StatsVer: statsVer,
|
||||
Flag: row.GetInt64(10),
|
||||
PhysicalID: tblID,
|
||||
StatsLoadedStatus: statistics.NewStatsFullLoadStatus(),
|
||||
Histogram: *hist,
|
||||
CMSketch: cms,
|
||||
TopN: topN,
|
||||
Info: idxInfo,
|
||||
StatsVer: statsVer,
|
||||
Flag: row.GetInt64(10),
|
||||
PhysicalID: tblID,
|
||||
}
|
||||
if statsVer != statistics.Version0 {
|
||||
index.StatsLoadedStatus = statistics.NewStatsFullLoadStatus()
|
||||
}
|
||||
lastAnalyzePos.Copy(&index.LastAnalyzePos)
|
||||
table.Indices[hist.ID] = index
|
||||
@ -430,6 +432,18 @@ func (h *Handle) InitStats(is infoschema.InfoSchema) (err error) {
|
||||
if err != nil {
|
||||
return errors.Trace(err)
|
||||
}
|
||||
// Set columns' stats status.
|
||||
for _, table := range cache.Values() {
|
||||
for _, col := range table.Columns {
|
||||
if col.StatsVer != statistics.Version0 || col.Count > 0 {
|
||||
if mysql.HasPriKeyFlag(col.Info.GetFlag()) {
|
||||
col.StatsLoadedStatus = statistics.NewStatsFullLoadStatus()
|
||||
} else {
|
||||
col.StatsLoadedStatus = statistics.NewStatsAllEvictedStatus()
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
cache.FreshMemUsage()
|
||||
h.updateStatsCache(cache)
|
||||
v := h.statsCache.Load()
|
||||
|
||||
@ -118,6 +118,7 @@ func TestDDLHistogram(t *testing.T) {
|
||||
tableInfo := tbl.Meta()
|
||||
statsTbl := do.StatsHandle().GetTableStats(tableInfo)
|
||||
require.False(t, statsTbl.Pseudo)
|
||||
require.True(t, statsTbl.Columns[tableInfo.Columns[2].ID].IsStatsInitialized())
|
||||
require.Equal(t, int64(2), statsTbl.Columns[tableInfo.Columns[2].ID].NullCount)
|
||||
require.Equal(t, int64(0), statsTbl.Columns[tableInfo.Columns[2].ID].Histogram.NDV)
|
||||
|
||||
@ -131,6 +132,7 @@ func TestDDLHistogram(t *testing.T) {
|
||||
tableInfo = tbl.Meta()
|
||||
statsTbl = do.StatsHandle().GetTableStats(tableInfo)
|
||||
require.False(t, statsTbl.Pseudo)
|
||||
require.True(t, statsTbl.Columns[tableInfo.Columns[3].ID].IsStatsInitialized())
|
||||
sctx := mock.NewContext()
|
||||
count, err := statsTbl.ColumnEqualRowCount(sctx, types.NewIntDatum(0), tableInfo.Columns[3].ID)
|
||||
require.NoError(t, err)
|
||||
@ -161,6 +163,7 @@ func TestDDLHistogram(t *testing.T) {
|
||||
tableInfo = tbl.Meta()
|
||||
statsTbl = do.StatsHandle().GetTableStats(tableInfo)
|
||||
require.False(t, statsTbl.Pseudo)
|
||||
require.True(t, statsTbl.Columns[tableInfo.Columns[5].ID].IsStatsInitialized())
|
||||
require.Equal(t, 3.0, statsTbl.Columns[tableInfo.Columns[5].ID].AvgColSize(statsTbl.Count, false))
|
||||
|
||||
testKit.MustExec("alter table t add column c6 varchar(15) DEFAULT '123', add column c7 varchar(15) DEFAULT '123'")
|
||||
|
||||
@ -699,19 +699,24 @@ func (h *Handle) loadNeededColumnHistograms(reader *statsReader, col model.Table
|
||||
logutil.BgLogger().Error("fail to get stats version for this histogram", zap.Int64("table_id", col.TableID), zap.Int64("hist_id", col.ID))
|
||||
return errors.Trace(fmt.Errorf("fail to get stats version for this histogram, table_id:%v, hist_id:%v", col.TableID, col.ID))
|
||||
}
|
||||
statsVer := rows[0].GetInt64(0)
|
||||
colHist := &statistics.Column{
|
||||
PhysicalID: col.TableID,
|
||||
Histogram: *hg,
|
||||
Info: c.Info,
|
||||
CMSketch: cms,
|
||||
TopN: topN,
|
||||
FMSketch: fms,
|
||||
IsHandle: c.IsHandle,
|
||||
StatsVer: rows[0].GetInt64(0),
|
||||
StatsLoadedStatus: statistics.NewStatsFullLoadStatus(),
|
||||
PhysicalID: col.TableID,
|
||||
Histogram: *hg,
|
||||
Info: c.Info,
|
||||
CMSketch: cms,
|
||||
TopN: topN,
|
||||
FMSketch: fms,
|
||||
IsHandle: c.IsHandle,
|
||||
StatsVer: statsVer,
|
||||
}
|
||||
// Column.Count is calculated by Column.TotalRowCount(). Hence we don't set Column.Count when initializing colHist.
|
||||
colHist.Count = int64(colHist.TotalRowCount())
|
||||
// When adding/modifying a column, we create its stats(all values are default values) without setting stats_ver.
|
||||
// So we need add colHist.Count > 0 here.
|
||||
if statsVer != statistics.Version0 || colHist.Count > 0 {
|
||||
colHist.StatsLoadedStatus = statistics.NewStatsFullLoadStatus()
|
||||
}
|
||||
// Reload the latest stats cache, otherwise the `updateStatsCache` may fail with high probability, because functions
|
||||
// like `GetPartitionStats` called in `fmSketchFromStorage` would have modified the stats cache already.
|
||||
oldCache = h.statsCache.Load().(statsCache)
|
||||
@ -835,6 +840,7 @@ func (h *Handle) indexStatsFromStorage(reader *statsReader, row chunk.Row, table
|
||||
distinct := row.GetInt64(3)
|
||||
histVer := row.GetUint64(4)
|
||||
nullCount := row.GetInt64(5)
|
||||
statsVer := row.GetInt64(7)
|
||||
idx := table.Indices[histID]
|
||||
errorRate := statistics.ErrorRate{}
|
||||
flag := row.GetInt64(8)
|
||||
@ -861,10 +867,20 @@ func (h *Handle) indexStatsFromStorage(reader *statsReader, row chunk.Row, table
|
||||
if err != nil {
|
||||
return errors.Trace(err)
|
||||
}
|
||||
idx = &statistics.Index{Histogram: *hg, CMSketch: cms, TopN: topN, FMSketch: fmSketch,
|
||||
Info: idxInfo, ErrorRate: errorRate, StatsVer: row.GetInt64(7), Flag: flag,
|
||||
PhysicalID: table.PhysicalID,
|
||||
StatsLoadedStatus: statistics.NewStatsFullLoadStatus()}
|
||||
idx = &statistics.Index{
|
||||
Histogram: *hg,
|
||||
CMSketch: cms,
|
||||
TopN: topN,
|
||||
FMSketch: fmSketch,
|
||||
Info: idxInfo,
|
||||
ErrorRate: errorRate,
|
||||
StatsVer: statsVer,
|
||||
Flag: flag,
|
||||
PhysicalID: table.PhysicalID,
|
||||
}
|
||||
if statsVer != statistics.Version0 {
|
||||
idx.StatsLoadedStatus = statistics.NewStatsFullLoadStatus()
|
||||
}
|
||||
lastAnalyzePos.Copy(&idx.LastAnalyzePos)
|
||||
}
|
||||
break
|
||||
@ -923,6 +939,11 @@ func (h *Handle) columnStatsFromStorage(reader *statsReader, row chunk.Row, tabl
|
||||
Flag: flag,
|
||||
StatsVer: statsVer,
|
||||
}
|
||||
// When adding/modifying a column, we create its stats(all values are default values) without setting stats_ver.
|
||||
// So we need add col.Count > 0 here.
|
||||
if statsVer != statistics.Version0 || col.Count > 0 {
|
||||
col.StatsLoadedStatus = statistics.NewStatsAllEvictedStatus()
|
||||
}
|
||||
lastAnalyzePos.Copy(&col.LastAnalyzePos)
|
||||
col.Histogram.Correlation = correlation
|
||||
break
|
||||
@ -946,20 +967,24 @@ func (h *Handle) columnStatsFromStorage(reader *statsReader, row chunk.Row, tabl
|
||||
}
|
||||
}
|
||||
col = &statistics.Column{
|
||||
PhysicalID: table.PhysicalID,
|
||||
Histogram: *hg,
|
||||
Info: colInfo,
|
||||
CMSketch: cms,
|
||||
TopN: topN,
|
||||
FMSketch: fmSketch,
|
||||
ErrorRate: errorRate,
|
||||
IsHandle: tableInfo.PKIsHandle && mysql.HasPriKeyFlag(colInfo.GetFlag()),
|
||||
Flag: flag,
|
||||
StatsVer: statsVer,
|
||||
StatsLoadedStatus: statistics.NewStatsFullLoadStatus(),
|
||||
PhysicalID: table.PhysicalID,
|
||||
Histogram: *hg,
|
||||
Info: colInfo,
|
||||
CMSketch: cms,
|
||||
TopN: topN,
|
||||
FMSketch: fmSketch,
|
||||
ErrorRate: errorRate,
|
||||
IsHandle: tableInfo.PKIsHandle && mysql.HasPriKeyFlag(colInfo.GetFlag()),
|
||||
Flag: flag,
|
||||
StatsVer: statsVer,
|
||||
}
|
||||
// Column.Count is calculated by Column.TotalRowCount(). Hence we don't set Column.Count when initializing col.
|
||||
col.Count = int64(col.TotalRowCount())
|
||||
// When adding/modifying a column, we create its stats(all values are default values) without setting stats_ver.
|
||||
// So we need add colHist.Count > 0 here.
|
||||
if statsVer != statistics.Version0 || col.Count > 0 {
|
||||
col.StatsLoadedStatus = statistics.NewStatsFullLoadStatus()
|
||||
}
|
||||
lastAnalyzePos.Copy(&col.LastAnalyzePos)
|
||||
break
|
||||
}
|
||||
|
||||
@ -345,34 +345,42 @@ func (h *Handle) readStatsForOneItem(item model.TableItemID, w *statsWrapper, re
|
||||
zap.Int64("hist_id", item.ID), zap.Bool("is_index", item.IsIndex))
|
||||
return nil, errors.Trace(fmt.Errorf("fail to get stats version for this histogram, table_id:%v, hist_id:%v, is_index:%v", item.TableID, item.ID, item.IsIndex))
|
||||
}
|
||||
statsVer := rows[0].GetInt64(0)
|
||||
if item.IsIndex {
|
||||
idxHist := &statistics.Index{
|
||||
Histogram: *hg,
|
||||
CMSketch: cms,
|
||||
TopN: topN,
|
||||
FMSketch: fms,
|
||||
Info: index.Info,
|
||||
ErrorRate: index.ErrorRate,
|
||||
StatsVer: rows[0].GetInt64(0), Flag: index.Flag,
|
||||
PhysicalID: index.PhysicalID,
|
||||
StatsLoadedStatus: statistics.NewStatsFullLoadStatus(),
|
||||
Histogram: *hg,
|
||||
CMSketch: cms,
|
||||
TopN: topN,
|
||||
FMSketch: fms,
|
||||
Info: index.Info,
|
||||
ErrorRate: index.ErrorRate,
|
||||
StatsVer: statsVer,
|
||||
Flag: index.Flag,
|
||||
PhysicalID: index.PhysicalID,
|
||||
}
|
||||
if statsVer != statistics.Version0 {
|
||||
idxHist.StatsLoadedStatus = statistics.NewStatsFullLoadStatus()
|
||||
}
|
||||
index.LastAnalyzePos.Copy(&idxHist.LastAnalyzePos)
|
||||
w.idx = idxHist
|
||||
} else {
|
||||
colHist := &statistics.Column{
|
||||
PhysicalID: item.TableID,
|
||||
Histogram: *hg,
|
||||
Info: c.Info,
|
||||
CMSketch: cms,
|
||||
TopN: topN,
|
||||
FMSketch: fms,
|
||||
IsHandle: c.IsHandle,
|
||||
StatsVer: rows[0].GetInt64(0),
|
||||
StatsLoadedStatus: statistics.NewStatsFullLoadStatus(),
|
||||
PhysicalID: item.TableID,
|
||||
Histogram: *hg,
|
||||
Info: c.Info,
|
||||
CMSketch: cms,
|
||||
TopN: topN,
|
||||
FMSketch: fms,
|
||||
IsHandle: c.IsHandle,
|
||||
StatsVer: statsVer,
|
||||
}
|
||||
// Column.Count is calculated by Column.TotalRowCount(). Hence, we don't set Column.Count when initializing colHist.
|
||||
colHist.Count = int64(colHist.TotalRowCount())
|
||||
// When adding/modifying a column, we create its stats(all values are default values) without setting stats_ver.
|
||||
// So we need add colHist.Count > 0 here.
|
||||
if statsVer != statistics.Version0 || colHist.Count > 0 {
|
||||
colHist.StatsLoadedStatus = statistics.NewStatsFullLoadStatus()
|
||||
}
|
||||
w.col = colHist
|
||||
}
|
||||
return w, nil
|
||||
|
||||
@ -3357,3 +3357,38 @@ func TestAnalyzeTableLRUPut(t *testing.T) {
|
||||
tk.MustExec("analyze table test.t")
|
||||
require.Equal(t, tbl.Meta().ID, domain.GetDomain(tk.Session()).StatsHandle().GetStatsCacheFrontTable())
|
||||
}
|
||||
|
||||
func TestUninitializedStatsStatus(t *testing.T) {
|
||||
store, dom := testkit.CreateMockStoreAndDomain(t)
|
||||
dom.StatsHandle().SetLease(0)
|
||||
tk := testkit.NewTestKit(t, store)
|
||||
tk.MustExec("use test")
|
||||
tk.MustExec("drop table if exists t")
|
||||
tk.MustExec("create table t(a int, b int, c int, index idx_a(a))")
|
||||
h := dom.StatsHandle()
|
||||
require.NoError(t, h.HandleDDLEvent(<-h.DDLEventCh()))
|
||||
tk.MustExec("insert into t values (1,2,2), (3,4,4), (5,6,6), (7,8,8), (9,10,10)")
|
||||
require.NoError(t, h.DumpStatsDeltaToKV(handle.DumpAll))
|
||||
is := dom.InfoSchema()
|
||||
require.NoError(t, h.Update(is))
|
||||
tbl, err := is.TableByName(model.NewCIStr("test"), model.NewCIStr("t"))
|
||||
require.NoError(t, err)
|
||||
tblInfo := tbl.Meta()
|
||||
tblStats := h.GetTableStats(tblInfo)
|
||||
for _, col := range tblStats.Columns {
|
||||
require.False(t, col.IsStatsInitialized())
|
||||
}
|
||||
for _, idx := range tblStats.Indices {
|
||||
require.False(t, idx.IsStatsInitialized())
|
||||
}
|
||||
tk.MustQuery("show stats_histograms where db_name = 'test' and table_name = 't'").Check(testkit.Rows())
|
||||
checkStatsPseudo := func() {
|
||||
rows := tk.MustQuery("explain select * from t").Rows()
|
||||
operatorInfo := rows[len(rows)-1][4].(string)
|
||||
require.True(t, strings.Contains(operatorInfo, "stats:pseudo"))
|
||||
}
|
||||
tk.MustExec("set @@tidb_enable_pseudo_for_outdated_stats = true")
|
||||
checkStatsPseudo()
|
||||
tk.MustExec("set @@tidb_enable_pseudo_for_outdated_stats = false")
|
||||
checkStatsPseudo()
|
||||
}
|
||||
|
||||
@ -1910,7 +1910,9 @@ func TestLoadHistCorrelation(t *testing.T) {
|
||||
h.Clear()
|
||||
require.NoError(t, h.Update(dom.InfoSchema()))
|
||||
result := testKit.MustQuery("show stats_histograms where Table_name = 't'")
|
||||
require.Len(t, result.Rows(), 0)
|
||||
// After https://github.com/pingcap/tidb/pull/37444, `show stats_histograms` displays the columns whose hist/topn/cmsketch
|
||||
// are not loaded and their stats status is allEvicted.
|
||||
require.Len(t, result.Rows(), 1)
|
||||
testKit.MustExec("explain select * from t where c = 1")
|
||||
require.NoError(t, h.LoadNeededHistograms())
|
||||
result = testKit.MustQuery("show stats_histograms where Table_name = 't'")
|
||||
|
||||
@ -1570,6 +1570,16 @@ func NewStatsFullLoadStatus() StatsLoadedStatus {
|
||||
}
|
||||
}
|
||||
|
||||
// NewStatsAllEvictedStatus returns the status that only loads count/nullCount/NDV and doesn't load CMSketch/TopN/Histogram.
|
||||
// When we load table stats, column stats is in allEvicted status by default. CMSketch/TopN/Histogram of column is only
|
||||
// loaded when we really need column stats.
|
||||
func NewStatsAllEvictedStatus() StatsLoadedStatus {
|
||||
return StatsLoadedStatus{
|
||||
statsInitialized: true,
|
||||
evictedStatus: allEvicted,
|
||||
}
|
||||
}
|
||||
|
||||
// IsStatsInitialized indicates whether the column/index's statistics was loaded from storage before.
|
||||
// Note that `IsStatsInitialized` only can be set in initializing
|
||||
func (s StatsLoadedStatus) IsStatsInitialized() bool {
|
||||
|
||||
@ -20,6 +20,7 @@ import (
|
||||
"strconv"
|
||||
"strings"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/pingcap/failpoint"
|
||||
"github.com/pingcap/tidb/parser/model"
|
||||
@ -629,3 +630,27 @@ func TestCrossValidationSelectivity(t *testing.T) {
|
||||
"└─Selection 0.00 cop[tikv] gt(test.t.c, 1000)",
|
||||
" └─TableRangeScan 2.00 cop[tikv] table:t range:(1 0,1 1000), keep order:false"))
|
||||
}
|
||||
|
||||
func TestShowHistogramsLoadStatus(t *testing.T) {
|
||||
store, dom := testkit.CreateMockStoreAndDomain(t)
|
||||
tk := testkit.NewTestKit(t, store)
|
||||
h := dom.StatsHandle()
|
||||
origLease := h.Lease()
|
||||
h.SetLease(time.Second)
|
||||
defer func() { h.SetLease(origLease) }()
|
||||
tk.MustExec("use test")
|
||||
tk.MustExec("create table t(a int primary key, b int, c int, index idx(b, c))")
|
||||
require.NoError(t, h.HandleDDLEvent(<-h.DDLEventCh()))
|
||||
tk.MustExec("insert into t values (1,2,3), (4,5,6)")
|
||||
require.NoError(t, h.DumpStatsDeltaToKV(handle.DumpAll))
|
||||
tk.MustExec("analyze table t")
|
||||
require.NoError(t, h.Update(dom.InfoSchema()))
|
||||
rows := tk.MustQuery("show stats_histograms where db_name = 'test' and table_name = 't'").Rows()
|
||||
for _, row := range rows {
|
||||
if row[3] == "a" || row[3] == "idx" {
|
||||
require.Equal(t, "allLoaded", row[10].(string))
|
||||
} else {
|
||||
require.Equal(t, "allEvicted", row[10].(string))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -463,6 +463,21 @@ func (n *neededStatsMap) Length() int {
|
||||
// and use pseudo estimation.
|
||||
var RatioOfPseudoEstimate = atomic.NewFloat64(0.7)
|
||||
|
||||
// IsInitialized returns true if any column/index stats of the table is initialized.
|
||||
func (t *Table) IsInitialized() bool {
|
||||
for _, col := range t.Columns {
|
||||
if col != nil && col.IsStatsInitialized() {
|
||||
return true
|
||||
}
|
||||
}
|
||||
for _, idx := range t.Indices {
|
||||
if idx != nil && idx.IsStatsInitialized() {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// IsOutdated returns true if the table stats is outdated.
|
||||
func (t *Table) IsOutdated() bool {
|
||||
rowcount := t.GetColRowCount()
|
||||
|
||||
Reference in New Issue
Block a user