Files
tidb/pkg/statistics/handle/storage/read.go

731 lines
28 KiB
Go

// Copyright 2023 PingCAP, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package storage
import (
"encoding/json"
"strconv"
"time"
"github.com/pingcap/errors"
"github.com/pingcap/failpoint"
"github.com/pingcap/tidb/pkg/config"
"github.com/pingcap/tidb/pkg/parser/ast"
"github.com/pingcap/tidb/pkg/parser/model"
"github.com/pingcap/tidb/pkg/parser/mysql"
"github.com/pingcap/tidb/pkg/sessionctx"
"github.com/pingcap/tidb/pkg/sessionctx/stmtctx"
"github.com/pingcap/tidb/pkg/statistics"
statslogutil "github.com/pingcap/tidb/pkg/statistics/handle/logutil"
statstypes "github.com/pingcap/tidb/pkg/statistics/handle/types"
"github.com/pingcap/tidb/pkg/statistics/handle/util"
"github.com/pingcap/tidb/pkg/types"
"github.com/pingcap/tidb/pkg/util/chunk"
"github.com/pingcap/tidb/pkg/util/logutil"
"github.com/pingcap/tidb/pkg/util/memory"
"github.com/pingcap/tidb/pkg/util/sqlexec"
"go.uber.org/zap"
)
// StatsMetaCountAndModifyCount reads count and modify_count for the given table from mysql.stats_meta.
func StatsMetaCountAndModifyCount(sctx sessionctx.Context, tableID int64) (count, modifyCount int64, isNull bool, err error) {
rows, _, err := util.ExecRows(sctx, "select count, modify_count from mysql.stats_meta where table_id = %?", tableID)
if err != nil {
return 0, 0, false, err
}
if len(rows) == 0 {
return 0, 0, true, nil
}
count = int64(rows[0].GetUint64(0))
modifyCount = rows[0].GetInt64(1)
return count, modifyCount, false, nil
}
// HistMetaFromStorage reads the meta info of the histogram from the storage.
func HistMetaFromStorage(sctx sessionctx.Context, item *model.TableItemID, possibleColInfo *model.ColumnInfo) (*statistics.Histogram, *types.Datum, int64, int64, error) {
isIndex := 0
var tp *types.FieldType
if item.IsIndex {
isIndex = 1
tp = types.NewFieldType(mysql.TypeBlob)
} else {
tp = &possibleColInfo.FieldType
}
rows, _, err := util.ExecRows(sctx,
"select distinct_count, version, null_count, tot_col_size, stats_ver, correlation, flag, last_analyze_pos from mysql.stats_histograms where table_id = %? and hist_id = %? and is_index = %?",
item.TableID,
item.ID,
isIndex,
)
if err != nil {
return nil, nil, 0, 0, err
}
if len(rows) == 0 {
return nil, nil, 0, 0, nil
}
hist := statistics.NewHistogram(item.ID, rows[0].GetInt64(0), rows[0].GetInt64(2), rows[0].GetUint64(1), tp, chunk.InitialCapacity, rows[0].GetInt64(3))
hist.Correlation = rows[0].GetFloat64(5)
lastPos := rows[0].GetDatum(7, types.NewFieldType(mysql.TypeBlob))
return hist, &lastPos, rows[0].GetInt64(4), rows[0].GetInt64(6), nil
}
// HistogramFromStorage reads histogram from storage.
func HistogramFromStorage(sctx sessionctx.Context, tableID int64, colID int64, tp *types.FieldType, distinct int64, isIndex int, ver uint64, nullCount int64, totColSize int64, corr float64) (_ *statistics.Histogram, err error) {
rows, fields, err := util.ExecRows(sctx, "select count, repeats, lower_bound, upper_bound, ndv from mysql.stats_buckets where table_id = %? and is_index = %? and hist_id = %? order by bucket_id", tableID, isIndex, colID)
if err != nil {
return nil, errors.Trace(err)
}
bucketSize := len(rows)
hg := statistics.NewHistogram(colID, distinct, nullCount, ver, tp, bucketSize, totColSize)
hg.Correlation = corr
totalCount := int64(0)
for i := 0; i < bucketSize; i++ {
count := rows[i].GetInt64(0)
repeats := rows[i].GetInt64(1)
var upperBound, lowerBound types.Datum
if isIndex == 1 {
lowerBound = rows[i].GetDatum(2, &fields[2].Column.FieldType)
upperBound = rows[i].GetDatum(3, &fields[3].Column.FieldType)
} else {
// Invalid date values may be inserted into table under some relaxed sql mode. Those values may exist in statistics.
// Hence, when reading statistics, we should skip invalid date check. See #39336.
sc := stmtctx.NewStmtCtxWithTimeZone(time.UTC)
sc.SetTypeFlags(sc.TypeFlags().WithIgnoreInvalidDateErr(true).WithIgnoreZeroInDate(true))
d := rows[i].GetDatum(2, &fields[2].Column.FieldType)
// For new collation data, when storing the bounds of the histogram, we store the collate key instead of the
// original value.
// But there's additional conversion logic for new collation data, and the collate key might be longer than
// the FieldType.flen.
// If we use the original FieldType here, there might be errors like "Invalid utf8mb4 character string"
// or "Data too long".
// So we change it to TypeBlob to bypass those logics here.
if tp.EvalType() == types.ETString && tp.GetType() != mysql.TypeEnum && tp.GetType() != mysql.TypeSet {
tp = types.NewFieldType(mysql.TypeBlob)
}
lowerBound, err = d.ConvertTo(sc.TypeCtx(), tp)
if err != nil {
return nil, errors.Trace(err)
}
d = rows[i].GetDatum(3, &fields[3].Column.FieldType)
upperBound, err = d.ConvertTo(sc.TypeCtx(), tp)
if err != nil {
return nil, errors.Trace(err)
}
}
totalCount += count
hg.AppendBucketWithNDV(&lowerBound, &upperBound, totalCount, repeats, rows[i].GetInt64(4))
}
hg.PreCalculateScalar()
return hg, nil
}
// CMSketchAndTopNFromStorage reads CMSketch and TopN from storage.
func CMSketchAndTopNFromStorage(sctx sessionctx.Context, tblID int64, isIndex, histID int64) (_ *statistics.CMSketch, _ *statistics.TopN, err error) {
topNRows, _, err := util.ExecRows(sctx, "select HIGH_PRIORITY value, count from mysql.stats_top_n where table_id = %? and is_index = %? and hist_id = %?", tblID, isIndex, histID)
if err != nil {
return nil, nil, err
}
rows, _, err := util.ExecRows(sctx, "select cm_sketch from mysql.stats_histograms where table_id = %? and is_index = %? and hist_id = %?", tblID, isIndex, histID)
if err != nil {
return nil, nil, err
}
if len(rows) == 0 {
return statistics.DecodeCMSketchAndTopN(nil, topNRows)
}
return statistics.DecodeCMSketchAndTopN(rows[0].GetBytes(0), topNRows)
}
// CMSketchFromStorage reads CMSketch from storage
func CMSketchFromStorage(sctx sessionctx.Context, tblID int64, isIndex int, histID int64) (_ *statistics.CMSketch, err error) {
rows, _, err := util.ExecRows(sctx, "select cm_sketch from mysql.stats_histograms where table_id = %? and is_index = %? and hist_id = %?", tblID, isIndex, histID)
if err != nil || len(rows) == 0 {
return nil, err
}
return statistics.DecodeCMSketch(rows[0].GetBytes(0))
}
// TopNFromStorage reads TopN from storage
func TopNFromStorage(sctx sessionctx.Context, tblID int64, isIndex int, histID int64) (_ *statistics.TopN, err error) {
rows, _, err := util.ExecRows(sctx, "select HIGH_PRIORITY value, count from mysql.stats_top_n where table_id = %? and is_index = %? and hist_id = %?", tblID, isIndex, histID)
if err != nil || len(rows) == 0 {
return nil, err
}
return statistics.DecodeTopN(rows), nil
}
// FMSketchFromStorage reads FMSketch from storage
func FMSketchFromStorage(sctx sessionctx.Context, tblID int64, isIndex, histID int64) (_ *statistics.FMSketch, err error) {
rows, _, err := util.ExecRows(sctx, "select value from mysql.stats_fm_sketch where table_id = %? and is_index = %? and hist_id = %?", tblID, isIndex, histID)
if err != nil || len(rows) == 0 {
return nil, err
}
return statistics.DecodeFMSketch(rows[0].GetBytes(0))
}
// CheckSkipPartition checks if we can skip loading the partition.
func CheckSkipPartition(sctx sessionctx.Context, tblID int64, isIndex int) error {
rows, _, err := util.ExecRows(sctx, "select distinct_count from mysql.stats_histograms where table_id =%? and is_index = %?", tblID, isIndex)
if err != nil {
return err
}
if len(rows) == 0 {
return types.ErrPartitionStatsMissing
}
return nil
}
// CheckSkipColumnPartiion checks if we can skip loading the partition.
func CheckSkipColumnPartiion(sctx sessionctx.Context, tblID int64, isIndex int, histsID int64) error {
rows, _, err := util.ExecRows(sctx, "select distinct_count from mysql.stats_histograms where table_id = %? and is_index = %? and hist_id = %?", tblID, isIndex, histsID)
if err != nil {
return err
}
if len(rows) == 0 {
return types.ErrPartitionColumnStatsMissing
}
return nil
}
// ExtendedStatsFromStorage reads extended stats from storage.
func ExtendedStatsFromStorage(sctx sessionctx.Context, table *statistics.Table, tableID int64, loadAll bool) (*statistics.Table, error) {
failpoint.Inject("injectExtStatsLoadErr", func() {
failpoint.Return(nil, errors.New("gofail extendedStatsFromStorage error"))
})
lastVersion := uint64(0)
if table.ExtendedStats != nil && !loadAll {
lastVersion = table.ExtendedStats.LastUpdateVersion
} else {
table.ExtendedStats = statistics.NewExtendedStatsColl()
}
rows, _, err := util.ExecRows(sctx, "select name, status, type, column_ids, stats, version from mysql.stats_extended where table_id = %? and status in (%?, %?, %?) and version > %?",
tableID, statistics.ExtendedStatsInited, statistics.ExtendedStatsAnalyzed, statistics.ExtendedStatsDeleted, lastVersion)
if err != nil || len(rows) == 0 {
return table, nil
}
for _, row := range rows {
lastVersion = max(lastVersion, row.GetUint64(5))
name := row.GetString(0)
status := uint8(row.GetInt64(1))
if status == statistics.ExtendedStatsDeleted || status == statistics.ExtendedStatsInited {
delete(table.ExtendedStats.Stats, name)
} else {
item := &statistics.ExtendedStatsItem{
Tp: uint8(row.GetInt64(2)),
}
colIDs := row.GetString(3)
err := json.Unmarshal([]byte(colIDs), &item.ColIDs)
if err != nil {
statslogutil.StatsLogger().Error("decode column IDs failed", zap.String("column_ids", colIDs), zap.Error(err))
return nil, err
}
statsStr := row.GetString(4)
if item.Tp == ast.StatsTypeCardinality || item.Tp == ast.StatsTypeCorrelation {
if statsStr != "" {
item.ScalarVals, err = strconv.ParseFloat(statsStr, 64)
if err != nil {
statslogutil.StatsLogger().Error("parse scalar stats failed", zap.String("stats", statsStr), zap.Error(err))
return nil, err
}
}
} else {
item.StringVals = statsStr
}
table.ExtendedStats.Stats[name] = item
}
}
table.ExtendedStats.LastUpdateVersion = lastVersion
return table, nil
}
func indexStatsFromStorage(sctx sessionctx.Context, row chunk.Row, table *statistics.Table, tableInfo *model.TableInfo, loadAll bool, lease time.Duration, tracker *memory.Tracker) error {
histID := row.GetInt64(2)
distinct := row.GetInt64(3)
histVer := row.GetUint64(4)
nullCount := row.GetInt64(5)
statsVer := row.GetInt64(7)
idx := table.Indices[histID]
flag := row.GetInt64(8)
lastAnalyzePos := row.GetDatum(10, types.NewFieldType(mysql.TypeBlob))
for _, idxInfo := range tableInfo.Indices {
if histID != idxInfo.ID {
continue
}
table.ColAndIdxExistenceMap.InsertIndex(idxInfo.ID, idxInfo, statsVer != statistics.Version0)
// All the objects in the table shares the same stats version.
// Update here.
if statsVer != statistics.Version0 {
table.StatsVer = int(statsVer)
table.LastAnalyzeVersion = max(table.LastAnalyzeVersion, histVer)
}
// We will not load buckets, topn and cmsketch if:
// 1. lease > 0, and:
// 2. the index doesn't have any of buckets, topn, cmsketch in memory before, and:
// 3. loadAll is false.
// 4. lite-init-stats is true(remove the condition when lite init stats is GA).
notNeedLoad := lease > 0 &&
(idx == nil || ((!idx.IsStatsInitialized() || idx.IsAllEvicted()) && idx.LastUpdateVersion < histVer)) &&
!loadAll &&
config.GetGlobalConfig().Performance.LiteInitStats
if notNeedLoad {
// If we don't have this index in memory, skip it.
if idx == nil {
return nil
}
idx = &statistics.Index{
Histogram: *statistics.NewHistogram(histID, distinct, nullCount, histVer, types.NewFieldType(mysql.TypeBlob), 0, 0),
StatsVer: statsVer,
Info: idxInfo,
Flag: flag,
PhysicalID: table.PhysicalID,
}
if idx.IsAnalyzed() {
idx.StatsLoadedStatus = statistics.NewStatsAllEvictedStatus()
}
lastAnalyzePos.Copy(&idx.LastAnalyzePos)
break
}
if idx == nil || idx.LastUpdateVersion < histVer || loadAll {
hg, err := HistogramFromStorage(sctx, table.PhysicalID, histID, types.NewFieldType(mysql.TypeBlob), distinct, 1, histVer, nullCount, 0, 0)
if err != nil {
return errors.Trace(err)
}
cms, topN, err := CMSketchAndTopNFromStorage(sctx, table.PhysicalID, 1, idxInfo.ID)
if err != nil {
return errors.Trace(err)
}
var fmSketch *statistics.FMSketch
if loadAll {
// FMSketch is only used when merging partition stats into global stats. When merging partition stats into global stats,
// we load all the statistics, i.e., loadAll is true.
fmSketch, err = FMSketchFromStorage(sctx, table.PhysicalID, 1, histID)
if err != nil {
return errors.Trace(err)
}
}
idx = &statistics.Index{
Histogram: *hg,
CMSketch: cms,
TopN: topN,
FMSketch: fmSketch,
Info: idxInfo,
StatsVer: statsVer,
Flag: flag,
PhysicalID: table.PhysicalID,
}
if statsVer != statistics.Version0 {
idx.StatsLoadedStatus = statistics.NewStatsFullLoadStatus()
}
lastAnalyzePos.Copy(&idx.LastAnalyzePos)
}
break
}
if idx != nil {
if tracker != nil {
tracker.Consume(idx.MemoryUsage().TotalMemoryUsage())
}
table.Indices[histID] = idx
} else {
logutil.BgLogger().Debug("we cannot find index id in table info. It may be deleted.", zap.Int64("indexID", histID), zap.String("table", tableInfo.Name.O))
}
return nil
}
func columnStatsFromStorage(sctx sessionctx.Context, row chunk.Row, table *statistics.Table, tableInfo *model.TableInfo, loadAll bool, lease time.Duration, tracker *memory.Tracker) error {
histID := row.GetInt64(2)
distinct := row.GetInt64(3)
histVer := row.GetUint64(4)
nullCount := row.GetInt64(5)
totColSize := row.GetInt64(6)
statsVer := row.GetInt64(7)
correlation := row.GetFloat64(9)
lastAnalyzePos := row.GetDatum(10, types.NewFieldType(mysql.TypeBlob))
col := table.Columns[histID]
flag := row.GetInt64(8)
for _, colInfo := range tableInfo.Columns {
if histID != colInfo.ID {
continue
}
table.ColAndIdxExistenceMap.InsertCol(histID, colInfo, statsVer != statistics.Version0 || distinct > 0 || nullCount > 0)
// All the objects in the table shares the same stats version.
// Update here.
if statsVer != statistics.Version0 {
table.StatsVer = int(statsVer)
table.LastAnalyzeVersion = max(table.LastAnalyzeVersion, histVer)
}
isHandle := tableInfo.PKIsHandle && mysql.HasPriKeyFlag(colInfo.GetFlag())
// We will not load buckets, topn and cmsketch if:
// 1. lease > 0, and:
// 2. this column is not handle or lite-init-stats is true(remove the condition when lite init stats is GA), and:
// 3. the column doesn't have any of buckets, topn, cmsketch in memory before, and:
// 4. loadAll is false.
//
// Here is the explanation of the condition `!col.IsStatsInitialized() || col.IsAllEvicted()`.
// For one column:
// 1. If there is no stats for it in the storage(i.e., analyze has never been executed before), then its stats status
// would be `!col.IsStatsInitialized()`. In this case we should go the `notNeedLoad` path.
// 2. If there exists stats for it in the storage but its stats status is `col.IsAllEvicted()`, there are two
// sub cases for this case. One is that the column stats have never been used/needed by the optimizer so they have
// never been loaded. The other is that the column stats were loaded and then evicted. For the both sub cases,
// we should go the `notNeedLoad` path.
// 3. If some parts(Histogram/TopN/CMSketch) of stats for it exist in TiDB memory currently, we choose to load all of
// its new stats once we find stats version is updated.
notNeedLoad := lease > 0 &&
(!isHandle || config.GetGlobalConfig().Performance.LiteInitStats) &&
(col == nil || ((!col.IsStatsInitialized() || col.IsAllEvicted()) && col.LastUpdateVersion < histVer)) &&
!loadAll
if notNeedLoad {
// If we don't have the column in memory currently, just skip it.
if col == nil {
return nil
}
col = &statistics.Column{
PhysicalID: table.PhysicalID,
Histogram: *statistics.NewHistogram(histID, distinct, nullCount, histVer, &colInfo.FieldType, 0, totColSize),
Info: colInfo,
IsHandle: tableInfo.PKIsHandle && mysql.HasPriKeyFlag(colInfo.GetFlag()),
Flag: flag,
StatsVer: statsVer,
}
if col.StatsAvailable() {
col.StatsLoadedStatus = statistics.NewStatsAllEvictedStatus()
}
lastAnalyzePos.Copy(&col.LastAnalyzePos)
col.Histogram.Correlation = correlation
break
}
if col == nil || col.LastUpdateVersion < histVer || loadAll {
hg, err := HistogramFromStorage(sctx, table.PhysicalID, histID, &colInfo.FieldType, distinct, 0, histVer, nullCount, totColSize, correlation)
if err != nil {
return errors.Trace(err)
}
cms, topN, err := CMSketchAndTopNFromStorage(sctx, table.PhysicalID, 0, colInfo.ID)
if err != nil {
return errors.Trace(err)
}
var fmSketch *statistics.FMSketch
if loadAll {
// FMSketch is only used when merging partition stats into global stats. When merging partition stats into global stats,
// we load all the statistics, i.e., loadAll is true.
fmSketch, err = FMSketchFromStorage(sctx, table.PhysicalID, 0, histID)
if err != nil {
return errors.Trace(err)
}
}
col = &statistics.Column{
PhysicalID: table.PhysicalID,
Histogram: *hg,
Info: colInfo,
CMSketch: cms,
TopN: topN,
FMSketch: fmSketch,
IsHandle: tableInfo.PKIsHandle && mysql.HasPriKeyFlag(colInfo.GetFlag()),
Flag: flag,
StatsVer: statsVer,
}
if col.StatsAvailable() {
col.StatsLoadedStatus = statistics.NewStatsFullLoadStatus()
}
lastAnalyzePos.Copy(&col.LastAnalyzePos)
break
}
if col.TotColSize != totColSize {
newCol := *col
newCol.TotColSize = totColSize
col = &newCol
}
break
}
if col != nil {
if tracker != nil {
tracker.Consume(col.MemoryUsage().TotalMemoryUsage())
}
table.Columns[col.ID] = col
} else {
// If we didn't find a Column or Index in tableInfo, we won't load the histogram for it.
// But don't worry, next lease the ddl will be updated, and we will load a same table for two times to
// avoid error.
logutil.BgLogger().Debug("we cannot find column in table info now. It may be deleted", zap.Int64("colID", histID), zap.String("table", tableInfo.Name.O))
}
return nil
}
// TableStatsFromStorage loads table stats info from storage.
func TableStatsFromStorage(sctx sessionctx.Context, snapshot uint64, tableInfo *model.TableInfo, tableID int64, loadAll bool, lease time.Duration, table *statistics.Table) (_ *statistics.Table, err error) {
tracker := memory.NewTracker(memory.LabelForAnalyzeMemory, -1)
tracker.AttachTo(sctx.GetSessionVars().MemTracker)
defer tracker.Detach()
// If table stats is pseudo, we also need to copy it, since we will use the column stats when
// the average error rate of it is small.
if table == nil || snapshot > 0 {
histColl := statistics.HistColl{
PhysicalID: tableID,
HavePhysicalID: true,
Columns: make(map[int64]*statistics.Column, 4),
Indices: make(map[int64]*statistics.Index, 4),
}
table = &statistics.Table{
HistColl: histColl,
ColAndIdxExistenceMap: statistics.NewColAndIndexExistenceMap(len(tableInfo.Columns), len(tableInfo.Indices)),
}
} else {
// We copy it before writing to avoid race.
table = table.Copy()
}
table.Pseudo = false
realtimeCount, modidyCount, isNull, err := StatsMetaCountAndModifyCount(sctx, tableID)
if err != nil || isNull {
return nil, err
}
table.ModifyCount = modidyCount
table.RealtimeCount = realtimeCount
rows, _, err := util.ExecRows(sctx, "select table_id, is_index, hist_id, distinct_count, version, null_count, tot_col_size, stats_ver, flag, correlation, last_analyze_pos from mysql.stats_histograms where table_id = %?", tableID)
// Check deleted table.
if err != nil || len(rows) == 0 {
return nil, nil
}
for _, row := range rows {
if err := sctx.GetSessionVars().SQLKiller.HandleSignal(); err != nil {
return nil, err
}
if row.GetInt64(1) > 0 {
err = indexStatsFromStorage(sctx, row, table, tableInfo, loadAll, lease, tracker)
} else {
err = columnStatsFromStorage(sctx, row, table, tableInfo, loadAll, lease, tracker)
}
if err != nil {
return nil, err
}
}
return ExtendedStatsFromStorage(sctx, table, tableID, loadAll)
}
// LoadHistogram will load histogram from storage.
func LoadHistogram(sctx sessionctx.Context, tableID int64, isIndex int, histID int64, tableInfo *model.TableInfo) (*statistics.Histogram, error) {
row, _, err := util.ExecRows(sctx, "select distinct_count, version, null_count, tot_col_size, stats_ver, flag, correlation, last_analyze_pos from mysql.stats_histograms where table_id = %? and is_index = %? and hist_id = %?", tableID, isIndex, histID)
if err != nil || len(row) == 0 {
return nil, err
}
distinct := row[0].GetInt64(0)
histVer := row[0].GetUint64(1)
nullCount := row[0].GetInt64(2)
var totColSize int64
var corr float64
var tp types.FieldType
if isIndex == 0 {
totColSize = row[0].GetInt64(3)
corr = row[0].GetFloat64(6)
for _, colInfo := range tableInfo.Columns {
if histID != colInfo.ID {
continue
}
tp = colInfo.FieldType
break
}
return HistogramFromStorage(sctx, tableID, histID, &tp, distinct, isIndex, histVer, nullCount, totColSize, corr)
}
return HistogramFromStorage(sctx, tableID, histID, types.NewFieldType(mysql.TypeBlob), distinct, isIndex, histVer, nullCount, 0, 0)
}
// LoadNeededHistograms will load histograms for those needed columns/indices.
func LoadNeededHistograms(sctx sessionctx.Context, statsCache statstypes.StatsCache, loadFMSketch bool) (err error) {
items := statistics.HistogramNeededItems.AllItems()
for _, item := range items {
if !item.IsIndex {
err = loadNeededColumnHistograms(sctx, statsCache, item, loadFMSketch)
} else {
err = loadNeededIndexHistograms(sctx, statsCache, item, loadFMSketch)
}
if err != nil {
return err
}
}
return nil
}
// CleanFakeItemsForShowHistInFlights cleans the invalid inserted items.
func CleanFakeItemsForShowHistInFlights(statsCache statstypes.StatsCache) int {
items := statistics.HistogramNeededItems.AllItems()
reallyNeeded := 0
for _, item := range items {
tbl, ok := statsCache.Get(item.TableID)
if !ok {
statistics.HistogramNeededItems.Delete(item)
continue
}
loadNeeded := false
if item.IsIndex {
_, loadNeeded = tbl.IndexIsLoadNeeded(item.ID)
} else {
_, loadNeeded = tbl.ColumnIsLoadNeeded(item.ID, true)
}
if !loadNeeded {
statistics.HistogramNeededItems.Delete(item)
continue
}
reallyNeeded++
}
return reallyNeeded
}
func loadNeededColumnHistograms(sctx sessionctx.Context, statsCache statstypes.StatsCache, col model.TableItemID, loadFMSketch bool) (err error) {
tbl, ok := statsCache.Get(col.TableID)
if !ok {
return nil
}
var colInfo *model.ColumnInfo
_, loadNeeded := tbl.ColumnIsLoadNeeded(col.ID, true)
if !loadNeeded {
statistics.HistogramNeededItems.Delete(col)
return nil
}
colInfo = tbl.ColAndIdxExistenceMap.GetCol(col.ID)
hgMeta, _, statsVer, _, err := HistMetaFromStorage(sctx, &col, colInfo)
if hgMeta == nil || err != nil {
statistics.HistogramNeededItems.Delete(col)
return err
}
hg, err := HistogramFromStorage(sctx, col.TableID, col.ID, &colInfo.FieldType, hgMeta.NDV, 0, hgMeta.LastUpdateVersion, hgMeta.NullCount, hgMeta.TotColSize, hgMeta.Correlation)
if err != nil {
return errors.Trace(err)
}
cms, topN, err := CMSketchAndTopNFromStorage(sctx, col.TableID, 0, col.ID)
if err != nil {
return errors.Trace(err)
}
var fms *statistics.FMSketch
if loadFMSketch {
fms, err = FMSketchFromStorage(sctx, col.TableID, 0, col.ID)
if err != nil {
return errors.Trace(err)
}
}
colHist := &statistics.Column{
PhysicalID: col.TableID,
Histogram: *hg,
Info: colInfo,
CMSketch: cms,
TopN: topN,
FMSketch: fms,
IsHandle: tbl.IsPkIsHandle && mysql.HasPriKeyFlag(colInfo.GetFlag()),
StatsVer: statsVer,
}
// Reload the latest stats cache, otherwise the `updateStatsCache` may fail with high probability, because functions
// like `GetPartitionStats` called in `fmSketchFromStorage` would have modified the stats cache already.
tbl, ok = statsCache.Get(col.TableID)
if !ok {
return nil
}
tbl = tbl.Copy()
if colHist.StatsAvailable() {
colHist.StatsLoadedStatus = statistics.NewStatsFullLoadStatus()
tbl.LastAnalyzeVersion = max(tbl.LastAnalyzeVersion, colHist.LastUpdateVersion)
if statsVer != statistics.Version0 {
tbl.StatsVer = int(statsVer)
}
}
tbl.Columns[col.ID] = colHist
statsCache.UpdateStatsCache([]*statistics.Table{tbl}, nil)
statistics.HistogramNeededItems.Delete(col)
if col.IsSyncLoadFailed {
logutil.BgLogger().Warn("Hist for column should already be loaded as sync but not found.",
zap.Int64("table_id", colHist.PhysicalID),
zap.Int64("column_id", colHist.Info.ID),
zap.String("column_name", colHist.Info.Name.O))
}
return nil
}
func loadNeededIndexHistograms(sctx sessionctx.Context, statsCache statstypes.StatsCache, idx model.TableItemID, loadFMSketch bool) (err error) {
tbl, ok := statsCache.Get(idx.TableID)
if !ok {
return nil
}
_, loadNeeded := tbl.IndexIsLoadNeeded(idx.ID)
if !loadNeeded {
statistics.HistogramNeededItems.Delete(idx)
return nil
}
hgMeta, lastAnalyzePos, statsVer, flag, err := HistMetaFromStorage(sctx, &idx, nil)
if hgMeta == nil || err != nil {
statistics.HistogramNeededItems.Delete(idx)
return err
}
idxInfo := tbl.ColAndIdxExistenceMap.GetIndex(idx.ID)
hg, err := HistogramFromStorage(sctx, idx.TableID, idx.ID, types.NewFieldType(mysql.TypeBlob), hgMeta.NDV, 1, hgMeta.LastUpdateVersion, hgMeta.NullCount, hgMeta.TotColSize, hgMeta.Correlation)
if err != nil {
return errors.Trace(err)
}
cms, topN, err := CMSketchAndTopNFromStorage(sctx, idx.TableID, 1, idx.ID)
if err != nil {
return errors.Trace(err)
}
var fms *statistics.FMSketch
if loadFMSketch {
fms, err = FMSketchFromStorage(sctx, idx.TableID, 1, idx.ID)
if err != nil {
return errors.Trace(err)
}
}
idxHist := &statistics.Index{Histogram: *hg, CMSketch: cms, TopN: topN, FMSketch: fms,
Info: idxInfo, StatsVer: statsVer,
Flag: flag, PhysicalID: idx.TableID,
StatsLoadedStatus: statistics.NewStatsFullLoadStatus()}
lastAnalyzePos.Copy(&idxHist.LastAnalyzePos)
tbl, ok = statsCache.Get(idx.TableID)
if !ok {
return nil
}
tbl = tbl.Copy()
if idxHist.StatsVer != statistics.Version0 {
tbl.StatsVer = int(idxHist.StatsVer)
}
tbl.Indices[idx.ID] = idxHist
tbl.LastAnalyzeVersion = max(tbl.LastAnalyzeVersion, idxHist.LastUpdateVersion)
statsCache.UpdateStatsCache([]*statistics.Table{tbl}, nil)
if idx.IsSyncLoadFailed {
logutil.BgLogger().Warn("Hist for column should already be loaded as sync but not found.",
zap.Int64("table_id", idx.TableID),
zap.Int64("column_id", idxHist.Info.ID),
zap.String("column_name", idxHist.Info.Name.O))
}
statistics.HistogramNeededItems.Delete(idx)
return nil
}
// StatsMetaByTableIDFromStorage gets the stats meta of a table from storage.
func StatsMetaByTableIDFromStorage(sctx sessionctx.Context, tableID int64, snapshot uint64) (version uint64, modifyCount, count int64, err error) {
var rows []chunk.Row
if snapshot == 0 {
rows, _, err = util.ExecRows(sctx,
"SELECT version, modify_count, count from mysql.stats_meta where table_id = %? order by version", tableID)
} else {
rows, _, err = util.ExecWithOpts(sctx,
[]sqlexec.OptionFuncAlias{sqlexec.ExecOptionWithSnapshot(snapshot), sqlexec.ExecOptionUseCurSession},
"SELECT version, modify_count, count from mysql.stats_meta where table_id = %? order by version", tableID)
}
if err != nil || len(rows) == 0 {
return
}
version = rows[0].GetUint64(0)
modifyCount = rows[0].GetInt64(1)
count = rows[0].GetInt64(2)
return
}