statistics: fix some problem related to stats async load (#57723)
close pingcap/tidb#57722, close pingcap/tidb#57735
This commit is contained in:
@ -82,7 +82,7 @@ go_test(
|
||||
data = glob(["testdata/**"]),
|
||||
embed = [":statistics"],
|
||||
flaky = True,
|
||||
shard_count = 37,
|
||||
shard_count = 38,
|
||||
deps = [
|
||||
"//pkg/config",
|
||||
"//pkg/meta/model",
|
||||
|
||||
@ -260,3 +260,13 @@ func (c *Column) StatsAvailable() bool {
|
||||
// StatsVer, so we check NDV > 0 || NullCount > 0 for the case.
|
||||
return c.IsAnalyzed() || c.NDV > 0 || c.NullCount > 0
|
||||
}
|
||||
|
||||
// EmptyColumn creates an empty column object. It may be used for pseudo estimation or to stop loading unexisting stats.
|
||||
func EmptyColumn(tid int64, pkIsHandle bool, colInfo *model.ColumnInfo) *Column {
|
||||
return &Column{
|
||||
PhysicalID: tid,
|
||||
Info: colInfo,
|
||||
Histogram: *NewHistogram(colInfo.ID, 0, 0, 0, &colInfo.FieldType, 0, 0),
|
||||
IsHandle: pkIsHandle && mysql.HasPriKeyFlag(colInfo.GetFlag()),
|
||||
}
|
||||
}
|
||||
|
||||
@ -632,30 +632,38 @@ func CleanFakeItemsForShowHistInFlights(statsCache statstypes.StatsCache) int {
|
||||
}
|
||||
|
||||
func loadNeededColumnHistograms(sctx sessionctx.Context, statsHandle statstypes.StatsHandle, col model.TableItemID, loadFMSketch bool, fullLoad bool) (err error) {
|
||||
tbl, ok := statsHandle.Get(col.TableID)
|
||||
statsTbl, ok := statsHandle.Get(col.TableID)
|
||||
if !ok {
|
||||
return nil
|
||||
}
|
||||
|
||||
var colInfo *model.ColumnInfo
|
||||
_, loadNeeded, analyzed := tbl.ColumnIsLoadNeeded(col.ID, true)
|
||||
if !loadNeeded || !analyzed {
|
||||
asyncload.AsyncLoadHistogramNeededItems.Delete(col)
|
||||
return nil
|
||||
}
|
||||
|
||||
// Now, we cannot init the column info in the ColAndIdxExistenceMap when to disable lite-init-stats.
|
||||
// so we have to get the column info from the domain.
|
||||
is := sctx.GetDomainInfoSchema().(infoschema.InfoSchema)
|
||||
tblInfo, ok := statsHandle.TableInfoByID(is, col.TableID)
|
||||
tbl, ok := statsHandle.TableInfoByID(is, col.TableID)
|
||||
if !ok {
|
||||
return nil
|
||||
}
|
||||
colInfo = tblInfo.Meta().GetColumnByID(col.ID)
|
||||
tblInfo := tbl.Meta()
|
||||
colInfo := tblInfo.GetColumnByID(col.ID)
|
||||
if colInfo == nil {
|
||||
asyncload.AsyncLoadHistogramNeededItems.Delete(col)
|
||||
return nil
|
||||
}
|
||||
|
||||
_, loadNeeded, analyzed := statsTbl.ColumnIsLoadNeeded(col.ID, true)
|
||||
if !loadNeeded || !analyzed {
|
||||
// If this column is not analyzed yet and we don't have it in memory.
|
||||
// We create a fake one for the pseudo estimation.
|
||||
// Otherwise, it will trigger the sync/async load again, even if the column has not been analyzed.
|
||||
if loadNeeded && !analyzed {
|
||||
fakeCol := statistics.EmptyColumn(tblInfo.ID, tblInfo.PKIsHandle, colInfo)
|
||||
statsTbl.SetCol(col.ID, fakeCol)
|
||||
statsHandle.UpdateStatsCache([]*statistics.Table{statsTbl}, nil)
|
||||
}
|
||||
asyncload.AsyncLoadHistogramNeededItems.Delete(col)
|
||||
return nil
|
||||
}
|
||||
|
||||
hg, _, statsVer, _, err := HistMetaFromStorageWithHighPriority(sctx, &col, colInfo)
|
||||
if hg == nil || err != nil {
|
||||
asyncload.AsyncLoadHistogramNeededItems.Delete(col)
|
||||
@ -690,29 +698,29 @@ func loadNeededColumnHistograms(sctx sessionctx.Context, statsHandle statstypes.
|
||||
CMSketch: cms,
|
||||
TopN: topN,
|
||||
FMSketch: fms,
|
||||
IsHandle: tblInfo.Meta().PKIsHandle && mysql.HasPriKeyFlag(colInfo.GetFlag()),
|
||||
IsHandle: tblInfo.PKIsHandle && mysql.HasPriKeyFlag(colInfo.GetFlag()),
|
||||
StatsVer: statsVer,
|
||||
}
|
||||
// Reload the latest stats cache, otherwise the `updateStatsCache` may fail with high probability, because functions
|
||||
// like `GetPartitionStats` called in `fmSketchFromStorage` would have modified the stats cache already.
|
||||
tbl, ok = statsHandle.Get(col.TableID)
|
||||
statsTbl, ok = statsHandle.Get(col.TableID)
|
||||
if !ok {
|
||||
return nil
|
||||
}
|
||||
tbl = tbl.Copy()
|
||||
statsTbl = statsTbl.Copy()
|
||||
if colHist.StatsAvailable() {
|
||||
if fullLoad {
|
||||
colHist.StatsLoadedStatus = statistics.NewStatsFullLoadStatus()
|
||||
} else {
|
||||
colHist.StatsLoadedStatus = statistics.NewStatsAllEvictedStatus()
|
||||
}
|
||||
tbl.LastAnalyzeVersion = max(tbl.LastAnalyzeVersion, colHist.LastUpdateVersion)
|
||||
if statsVer != statistics.Version0 {
|
||||
tbl.StatsVer = int(statsVer)
|
||||
statsTbl.LastAnalyzeVersion = max(statsTbl.LastAnalyzeVersion, colHist.LastUpdateVersion)
|
||||
statsTbl.StatsVer = int(statsVer)
|
||||
}
|
||||
}
|
||||
tbl.SetCol(col.ID, colHist)
|
||||
statsHandle.UpdateStatsCache([]*statistics.Table{tbl}, nil)
|
||||
statsTbl.SetCol(col.ID, colHist)
|
||||
statsHandle.UpdateStatsCache([]*statistics.Table{statsTbl}, nil)
|
||||
asyncload.AsyncLoadHistogramNeededItems.Delete(col)
|
||||
if col.IsSyncLoadFailed {
|
||||
logutil.BgLogger().Warn("Hist for column should already be loaded as sync but not found.",
|
||||
@ -771,9 +779,9 @@ func loadNeededIndexHistograms(sctx sessionctx.Context, is infoschema.InfoSchema
|
||||
tbl = tbl.Copy()
|
||||
if idxHist.StatsVer != statistics.Version0 {
|
||||
tbl.StatsVer = int(idxHist.StatsVer)
|
||||
tbl.LastAnalyzeVersion = max(tbl.LastAnalyzeVersion, idxHist.LastUpdateVersion)
|
||||
}
|
||||
tbl.SetIdx(idx.ID, idxHist)
|
||||
tbl.LastAnalyzeVersion = max(tbl.LastAnalyzeVersion, idxHist.LastUpdateVersion)
|
||||
statsHandle.UpdateStatsCache([]*statistics.Table{tbl}, nil)
|
||||
if idx.IsSyncLoadFailed {
|
||||
logutil.BgLogger().Warn("Hist for index should already be loaded as sync but not found.",
|
||||
|
||||
@ -357,13 +357,9 @@ func (s *statsSyncLoad) handleOneItemTask(task *statstypes.NeededItemTask) (err
|
||||
|
||||
// If this column is not analyzed yet and we don't have it in memory.
|
||||
// We create a fake one for the pseudo estimation.
|
||||
// Otherwise, it will trigger the sync/async load again, even if the column has not been analyzed.
|
||||
if loadNeeded && !analyzed {
|
||||
wrapper.col = &statistics.Column{
|
||||
PhysicalID: item.TableID,
|
||||
Info: wrapper.colInfo,
|
||||
Histogram: *statistics.NewHistogram(item.ID, 0, 0, 0, &wrapper.colInfo.FieldType, 0, 0),
|
||||
IsHandle: isPkIsHandle && mysql.HasPriKeyFlag(wrapper.colInfo.GetFlag()),
|
||||
}
|
||||
wrapper.col = statistics.EmptyColumn(item.TableID, isPkIsHandle, wrapper.colInfo)
|
||||
s.updateCachedItem(tblInfo, item, wrapper.col, wrapper.idx, task.Item.FullLoad)
|
||||
return nil
|
||||
}
|
||||
|
||||
@ -590,3 +590,25 @@ func TestGlobalIndexWithAnalyzeVersion1AndHistoricalStats(t *testing.T) {
|
||||
// Each analyze will only generate one record
|
||||
tk.MustQuery(fmt.Sprintf("select count(*) from mysql.stats_history where table_id=%d", tblID)).Equal(testkit.Rows("10"))
|
||||
}
|
||||
|
||||
func TestLastAnalyzeVersionNotChangedWithAsyncStatsLoad(t *testing.T) {
|
||||
store, dom := testkit.CreateMockStoreAndDomain(t)
|
||||
tk := testkit.NewTestKit(t, store)
|
||||
|
||||
tk.MustExec("set @@tidb_stats_load_sync_wait = 0;")
|
||||
tk.MustExec("use test")
|
||||
tk.MustExec("create table t(a int, b int);")
|
||||
require.NoError(t, dom.StatsHandle().HandleDDLEvent(<-dom.StatsHandle().DDLEventCh()))
|
||||
require.NoError(t, dom.StatsHandle().Update(context.Background(), dom.InfoSchema()))
|
||||
tk.MustExec("insert into t values (1, 1);")
|
||||
err := dom.StatsHandle().DumpStatsDeltaToKV(true)
|
||||
require.NoError(t, err)
|
||||
tk.MustExec("alter table t add column c int default 1;")
|
||||
dom.StatsHandle().HandleDDLEvent(<-dom.StatsHandle().DDLEventCh())
|
||||
tk.MustExec("select * from t where a = 1 or b = 1 or c = 1;")
|
||||
require.NoError(t, dom.StatsHandle().LoadNeededHistograms(dom.InfoSchema()))
|
||||
result := tk.MustQuery("show stats_meta where table_name = 't'")
|
||||
require.Len(t, result.Rows(), 1)
|
||||
// The last analyze time.
|
||||
require.Equal(t, "<nil>", result.Rows()[0][6])
|
||||
}
|
||||
|
||||
@ -811,7 +811,7 @@ func (t *Table) GetStatsHealthy() (int64, bool) {
|
||||
}
|
||||
|
||||
// ColumnIsLoadNeeded checks whether the column needs trigger the async/sync load.
|
||||
// The Column should be visible in the table and really has analyzed statistics in the stroage.
|
||||
// The Column should be visible in the table and really has analyzed statistics in the storage.
|
||||
// Also, if the stats has been loaded into the memory, we also don't need to load it.
|
||||
// We return the Column together with the checking result, to avoid accessing the map multiple times.
|
||||
// The first bool is whether we need to load it into memory. The second bool is whether this column has stats in the system table or not.
|
||||
@ -820,7 +820,7 @@ func (t *Table) ColumnIsLoadNeeded(id int64, fullLoad bool) (*Column, bool, bool
|
||||
return nil, false, false
|
||||
}
|
||||
// when we use non-lite init stats, it cannot init the stats for common columns.
|
||||
// so we need to foce to load the stats.
|
||||
// so we need to force to load the stats.
|
||||
col, ok := t.columns[id]
|
||||
if !ok {
|
||||
return nil, true, true
|
||||
@ -828,15 +828,16 @@ func (t *Table) ColumnIsLoadNeeded(id int64, fullLoad bool) (*Column, bool, bool
|
||||
hasAnalyzed := t.ColAndIdxExistenceMap.HasAnalyzed(id, false)
|
||||
|
||||
// If it's not analyzed yet.
|
||||
// The real check condition: !ok && !hashAnalyzed.
|
||||
// After this check, we will always have ok && hasAnalyzed.
|
||||
if !hasAnalyzed {
|
||||
return nil, false, false
|
||||
}
|
||||
|
||||
// Restore the condition from the simplified form:
|
||||
// 1. !ok && hasAnalyzed => need load
|
||||
// 2. ok && hasAnalyzed && fullLoad && !col.IsFullLoad => need load
|
||||
// 3. ok && hasAnalyzed && !fullLoad && !col.statsInitialized => need load
|
||||
if !ok || (fullLoad && !col.IsFullLoad()) || (!fullLoad && !col.statsInitialized) {
|
||||
// 1. ok && hasAnalyzed && fullLoad && !col.IsFullLoad => need load
|
||||
// 2. ok && hasAnalyzed && !fullLoad && !col.statsInitialized => need load
|
||||
if (fullLoad && !col.IsFullLoad()) || (!fullLoad && !col.statsInitialized) {
|
||||
return col, true, true
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user