stats: optimize stats table copy to save memory (#63004)

close pingcap/tidb#63076
This commit is contained in:
Wenqi Mou
2025-08-28 15:45:22 -04:00
committed by GitHub
parent 8d1a00e351
commit d1311c1931
11 changed files with 238 additions and 70 deletions

View File

@ -4186,7 +4186,7 @@ func getStatsTable(ctx base.PlanContext, tblInfo *model.TableInfo, pid int64) *s
allowPseudoTblTriggerLoading = true
}
// Copy it so we can modify the ModifyCount and the RealtimeCount safely.
statsTbl = statsTbl.ShallowCopy()
statsTbl = statsTbl.CopyAs(statistics.MetaOnly)
statsTbl.RealtimeCount = analyzeCount
statsTbl.ModifyCount = 0
}

View File

@ -83,7 +83,7 @@ go_test(
data = glob(["testdata/**"]),
embed = [":statistics"],
flaky = True,
shard_count = 40,
shard_count = 41,
deps = [
"//pkg/config",
"//pkg/meta/model",

View File

@ -139,7 +139,11 @@ func (*Handle) initStatsHistograms4ChunkLite(cache statstypes.StatsCache, iter *
if !ok {
continue
}
table = table.Copy()
// optimization: doesn't need to copy and can do in place changes since
// 1. initStatsHistograms4ChunkLite is a single thread populating entries in a local cache, and later the
// cache will be called h.Replace(cache) to be the global cache
// 2. following logic only modifies ColAndIdxExistenceMap, so it won't impact the cache memory cost
// calculation
}
isIndex := row.GetInt64(1)
id := row.GetInt64(2)
@ -189,7 +193,7 @@ func (h *Handle) initStatsHistograms4Chunk(is infoschema.InfoSchema, cache stats
if !ok {
continue
}
table = table.Copy()
table = table.CopyAs(statistics.BothMapsWritable)
}
// All the objects in the table share the same stats version.
if statsVer != statistics.Version0 {
@ -396,7 +400,8 @@ func (*Handle) initStatsTopN4Chunk(cache statstypes.StatsCache, iter *chunk.Iter
if !ok {
continue
}
table = table.Copy()
// existing idx histogram is modified have to use deep copy
table = table.CopyAs(statistics.AllDataWritable)
}
idx := table.GetIdx(row.GetInt64(1))
if idx == nil || (idx.CMSketch == nil && idx.StatsVer <= statistics.Version1) {
@ -569,7 +574,8 @@ func (*Handle) initStatsBuckets4Chunk(cache statstypes.StatsCache, iter *chunk.I
if !ok {
continue
}
table = table.Copy()
// existing idx histogram is modified have to use deep copy
table = table.CopyAs(statistics.AllDataWritable)
}
var lower, upper types.Datum
var hist *statistics.Histogram

View File

@ -159,7 +159,7 @@ func (s *LFU) dropMemory(item *ristretto.Item) {
// We do not need to calculate the cost during onEvict,
// because the onexit function is also called when the evict event occurs.
// TODO(hawkingrei): not copy the useless part.
table := item.Value.(*statistics.Table).Copy()
table := item.Value.(*statistics.Table).CopyAs(statistics.AllDataWritable)
table.DropEvicted()
s.resultKeySet.AddKeyValue(int64(item.Key), table)
after := table.MemoryUsage().TotalTrackingMemUsage()

View File

@ -206,7 +206,7 @@ func (s *StatsCacheImpl) Update(ctx context.Context, is infoschema.InfoSchema, t
// If the column/index stats has not been updated, we can reuse the old table stats.
// Only need to update the count and modify count.
if ok && latestHistUpdateVersion > 0 && oldTbl.LastStatsHistVersion >= latestHistUpdateVersion {
tbl = oldTbl.ShallowCopy()
tbl = oldTbl.CopyAs(statistics.MetaOnly)
// count and modify count is updated in finalProcess
needLoadColAndIdxStats = false
}

View File

@ -505,6 +505,10 @@ func TableStatsFromStorage(sctx sessionctx.Context, snapshot uint64, tableInfo *
tracker := memory.NewTracker(memory.LabelForAnalyzeMemory, -1)
tracker.AttachTo(sctx.GetSessionVars().MemTracker)
defer tracker.Detach()
// Delay copying until we know what needs to be modified
needsCopy := true
// If table stats is pseudo, we also need to copy it, since we will use the column stats when
// the average error rate of it is small.
if table == nil || snapshot > 0 {
@ -513,24 +517,35 @@ func TableStatsFromStorage(sctx sessionctx.Context, snapshot uint64, tableInfo *
HistColl: histColl,
ColAndIdxExistenceMap: statistics.NewColAndIndexExistenceMap(len(tableInfo.Columns), len(tableInfo.Indices)),
}
} else {
// We copy it before writing to avoid race.
table = table.Copy()
needsCopy = false
}
table.Pseudo = false
realtimeCount, modidyCount, isNull, err := StatsMetaCountAndModifyCount(util.StatsCtx, sctx, tableID)
realtimeCount, modifyCount, isNull, err := StatsMetaCountAndModifyCount(util.StatsCtx, sctx, tableID)
if err != nil || isNull {
return nil, err
}
table.ModifyCount = modidyCount
table.RealtimeCount = realtimeCount
rows, _, err := util.ExecRows(sctx, "select table_id, is_index, hist_id, distinct_count, version, null_count, tot_col_size, stats_ver, correlation from mysql.stats_histograms where table_id = %?", tableID)
if err != nil {
return nil, err
}
// Check table has no index/column stats.
if needsCopy {
if len(rows) == 0 {
// Only metadata update needed
table = table.CopyAs(statistics.MetaOnly)
} else {
// Indexes and Columns maps potentially get updated
table = table.CopyAs(statistics.BothMapsWritable)
}
}
// Update metadata
table.Pseudo = false
table.ModifyCount = modifyCount
table.RealtimeCount = realtimeCount
// Early return if no histogram data to process
if len(rows) == 0 {
return table, nil
}
@ -569,7 +584,7 @@ func TableStatsFromStorage(sctx sessionctx.Context, snapshot uint64, tableInfo *
}
}
table.ColAndIdxExistenceMap.SetChecked()
return ExtendedStatsFromStorage(sctx, table, tableID, loadAll)
return ExtendedStatsFromStorage(sctx, table.CopyAs(statistics.ExtendedStatsWritable), tableID, loadAll)
}
// LoadHistogram will load histogram from storage.
@ -699,7 +714,7 @@ func loadNeededColumnHistograms(sctx sessionctx.Context, statsHandle statstypes.
// Otherwise, it will trigger the sync/async load again, even if the column has not been analyzed.
if loadNeeded && !analyzed {
fakeCol := statistics.EmptyColumn(tblInfo.ID, tblInfo.PKIsHandle, colInfo)
statsTbl = statsTbl.Copy()
statsTbl = statsTbl.CopyAs(statistics.ColumnMapWritable)
statsTbl.SetCol(col.ID, fakeCol)
statsHandle.UpdateStatsCache(statstypes.CacheUpdate{
Updated: []*statistics.Table{statsTbl},
@ -763,7 +778,7 @@ func loadNeededColumnHistograms(sctx sessionctx.Context, statsHandle statstypes.
)
return nil
}
statsTbl = statsTbl.Copy()
statsTbl = statsTbl.CopyAs(statistics.ColumnMapWritable)
if colHist.StatsAvailable() {
if fullLoad {
colHist.StatsLoadedStatus = statistics.NewStatsFullLoadStatus()
@ -876,7 +891,7 @@ func loadNeededIndexHistograms(sctx sessionctx.Context, is infoschema.InfoSchema
)
return nil
}
tbl = tbl.Copy()
tbl = tbl.CopyAs(statistics.IndexMapWritable)
if idxHist.StatsVer != statistics.Version0 {
tbl.StatsVer = int(idxHist.StatsVer)
tbl.LastAnalyzeVersion = max(tbl.LastAnalyzeVersion, idxHist.LastUpdateVersion)

View File

@ -321,7 +321,7 @@ func (s *statsReadWriter) ReloadExtendedStatistics() error {
return util.CallWithSCtx(s.statsHandler.SPool(), func(sctx sessionctx.Context) error {
tables := make([]*statistics.Table, 0, s.statsHandler.Len())
for _, tbl := range s.statsHandler.Values() {
t, err := ExtendedStatsFromStorage(sctx, tbl.Copy(), tbl.PhysicalID, true)
t, err := ExtendedStatsFromStorage(sctx, tbl.CopyAs(statistics.ExtendedStatsWritable), tbl.PhysicalID, true)
if err != nil {
return err
}

View File

@ -261,7 +261,7 @@ func removeExtendedStatsItem(statsCache types.StatsCache,
if !ok || tbl.ExtendedStats == nil || len(tbl.ExtendedStats.Stats) == 0 {
return
}
newTbl := tbl.Copy()
newTbl := tbl.CopyAs(statistics.ExtendedStatsWritable)
delete(newTbl.ExtendedStats.Stats, statsName)
statsCache.UpdateStatsCache(types.CacheUpdate{
Updated: []*statistics.Table{newTbl},

View File

@ -586,8 +586,10 @@ func (s *statsSyncLoad) updateCachedItem(tblInfo *model.TableInfo, item model.Ta
if !ok {
return false
}
var tableCopied bool
if !tbl.ColAndIdxExistenceMap.Checked() {
tbl = tbl.Copy()
tbl = tbl.CopyAs(statistics.BothMapsWritable)
tableCopied = true
for _, col := range tbl.HistColl.GetColSlice() {
if tblInfo.FindColumnByID(col.ID) == nil {
tbl.DelCol(col.ID)
@ -607,7 +609,9 @@ func (s *statsSyncLoad) updateCachedItem(tblInfo *model.TableInfo, item model.Ta
if c != nil && (c.IsFullLoad() || !fullLoaded) {
return false
}
tbl = tbl.Copy()
if !tableCopied {
tbl = tbl.CopyAs(statistics.ColumnMapWritable)
}
tbl.SetCol(item.ID, colHist)
// If the column is analyzed we refresh the map for the possible change.
@ -627,7 +631,9 @@ func (s *statsSyncLoad) updateCachedItem(tblInfo *model.TableInfo, item model.Ta
if index != nil && (index.IsFullLoad() || !fullLoaded) {
return true
}
tbl = tbl.Copy()
if !tableCopied {
tbl = tbl.CopyAs(statistics.IndexMapWritable)
}
tbl.SetIdx(item.ID, idxHist)
// If the index is analyzed we refresh the map for the possible change.
if idxHist.IsAnalyzed() {

View File

@ -41,6 +41,29 @@ const (
PseudoRowCount = 10000
)
// CopyIntent specifies what data structures are safe to modify in the copied table.
type CopyIntent uint8
const (
// MetaOnly shares all maps - only table metadata is safe to modify
MetaOnly CopyIntent = iota
// ColumnMapWritable clones columns map - safe to add/remove columns
ColumnMapWritable
// IndexMapWritable clones indices map - safe to add/remove indices
IndexMapWritable
// BothMapsWritable clones both maps - safe to add/remove columns and indices
BothMapsWritable
// ExtendedStatsWritable shares all maps - safe to modify ExtendedStats field
ExtendedStatsWritable
// AllDataWritable deep copies everything - safe to modify all data including histograms
AllDataWritable
)
// AutoAnalyzeMinCnt means if the count of table is less than this value, we don't need to do auto analyze.
// Exported for testing.
var AutoAnalyzeMinCnt int64 = 1000
@ -599,53 +622,70 @@ func (t *Table) MemoryUsage() *TableMemoryUsage {
return tMemUsage
}
// Copy copies the current table.
func (t *Table) Copy() *Table {
newHistColl := HistColl{
PhysicalID: t.PhysicalID,
RealtimeCount: t.RealtimeCount,
columns: make(map[int64]*Column, len(t.columns)),
indices: make(map[int64]*Index, len(t.indices)),
Pseudo: t.Pseudo,
ModifyCount: t.ModifyCount,
StatsVer: t.StatsVer,
}
for id, col := range t.columns {
newHistColl.columns[id] = col.Copy()
}
for id, idx := range t.indices {
newHistColl.indices[id] = idx.Copy()
}
nt := &Table{
HistColl: newHistColl,
Version: t.Version,
TblInfoUpdateTS: t.TblInfoUpdateTS,
LastAnalyzeVersion: t.LastAnalyzeVersion,
LastStatsHistVersion: t.LastStatsHistVersion,
}
if t.ExtendedStats != nil {
newExtStatsColl := &ExtendedStatsColl{
Stats: make(map[string]*ExtendedStatsItem),
LastUpdateVersion: t.ExtendedStats.LastUpdateVersion,
}
maps.Copy(newExtStatsColl.Stats, t.ExtendedStats.Stats)
nt.ExtendedStats = newExtStatsColl
}
if t.ColAndIdxExistenceMap != nil {
nt.ColAndIdxExistenceMap = t.ColAndIdxExistenceMap.Clone()
}
return nt
}
// CopyAs creates a copy of the table with the specified writability intent.
//
// PERFORMANCE NOTE: Choose the most minimal intent for your use case. Copying is heavily
// used at scale and unnecessary cloning causes significant memory pressure. Only use
// AllDataWritable when you truly need to modify histogram data.
//
// MetaOnly: Shares all maps, only metadata modifications are safe
// ColumnMapWritable: Clones columns map, safe to add/remove columns
// IndexMapWritable: Clones indices map, safe to add/remove indices
// BothMapsWritable: Clones both maps - safe to add/remove columns and indices
// ExtendedStatsWritable: Shares all maps, safe to modify ExtendedStats field
// AllDataWritable: Deep copies everything, safe to modify all data including histograms
func (t *Table) CopyAs(intent CopyIntent) *Table {
var columns map[int64]*Column
var indices map[int64]*Index
var existenceMap *ColAndIdxExistenceMap
switch intent {
case MetaOnly:
columns = t.columns
indices = t.indices
existenceMap = t.ColAndIdxExistenceMap
case ColumnMapWritable:
columns = maps.Clone(t.columns)
indices = t.indices
if t.ColAndIdxExistenceMap != nil {
existenceMap = t.ColAndIdxExistenceMap.Clone()
}
case IndexMapWritable:
columns = t.columns
indices = maps.Clone(t.indices)
if t.ColAndIdxExistenceMap != nil {
existenceMap = t.ColAndIdxExistenceMap.Clone()
}
case BothMapsWritable:
columns = maps.Clone(t.columns)
indices = maps.Clone(t.indices)
if t.ColAndIdxExistenceMap != nil {
existenceMap = t.ColAndIdxExistenceMap.Clone()
}
case ExtendedStatsWritable:
columns = t.columns
indices = t.indices
existenceMap = t.ColAndIdxExistenceMap
case AllDataWritable:
// For deep copy, create new maps and deep copy all content
columns = make(map[int64]*Column, len(t.columns))
for id, col := range t.columns {
columns[id] = col.Copy()
}
indices = make(map[int64]*Index, len(t.indices))
for id, idx := range t.indices {
indices[id] = idx.Copy()
}
if t.ColAndIdxExistenceMap != nil {
existenceMap = t.ColAndIdxExistenceMap.Clone()
}
}
// ShallowCopy copies the current table.
// It's different from Copy(). Only the struct Table (and also the embedded HistColl) is copied here.
// The internal containers, like t.Columns and t.Indices, and the stats, like TopN and Histogram are not copied.
func (t *Table) ShallowCopy() *Table {
newHistColl := HistColl{
PhysicalID: t.PhysicalID,
RealtimeCount: t.RealtimeCount,
columns: t.columns,
indices: t.indices,
columns: columns,
indices: indices,
Pseudo: t.Pseudo,
ModifyCount: t.ModifyCount,
StatsVer: t.StatsVer,
@ -654,11 +694,23 @@ func (t *Table) ShallowCopy() *Table {
HistColl: newHistColl,
Version: t.Version,
TblInfoUpdateTS: t.TblInfoUpdateTS,
ExtendedStats: t.ExtendedStats,
ColAndIdxExistenceMap: t.ColAndIdxExistenceMap,
ColAndIdxExistenceMap: existenceMap,
LastAnalyzeVersion: t.LastAnalyzeVersion,
LastStatsHistVersion: t.LastStatsHistVersion,
}
// Handle ExtendedStats for deep copy vs shallow copy
if (intent == AllDataWritable || intent == ExtendedStatsWritable) && t.ExtendedStats != nil {
newExtStatsColl := &ExtendedStatsColl{
Stats: make(map[string]*ExtendedStatsItem),
LastUpdateVersion: t.ExtendedStats.LastUpdateVersion,
}
maps.Copy(newExtStatsColl.Stats, t.ExtendedStats.Stats)
nt.ExtendedStats = newExtStatsColl
} else {
nt.ExtendedStats = t.ExtendedStats
}
return nt
}

View File

@ -29,3 +29,92 @@ func TestCloneColAndIdxExistenceMap(t *testing.T) {
m2 := m.Clone()
require.Equal(t, m, m2)
}
type ShareMode uint8
const (
Shared ShareMode = iota
Cloned
)
func TestCopyAs(t *testing.T) {
tests := []struct {
name string
intent CopyIntent
expectCols ShareMode
expectIdxs ShareMode
expectExist ShareMode
expectExtended ShareMode
}{
{"MetaOnly", MetaOnly, Shared, Shared, Shared, Shared},
{"ColumnMapWritable", ColumnMapWritable, Cloned, Shared, Cloned, Shared},
{"IndexMapWritable", IndexMapWritable, Shared, Cloned, Cloned, Shared},
{"BothMapsWritable", BothMapsWritable, Cloned, Cloned, Cloned, Shared},
{"ExtendedStatsWritable", ExtendedStatsWritable, Shared, Shared, Shared, Cloned},
{"AllDataWritable", AllDataWritable, Cloned, Cloned, Cloned, Cloned},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
// Create original table with ExtendedStats for testing
originalStats := &ExtendedStatsColl{
Stats: make(map[string]*ExtendedStatsItem),
LastUpdateVersion: 1,
}
originalStats.Stats["test"] = &ExtendedStatsItem{StringVals: "original"}
table := &Table{
HistColl: *NewHistColl(1, 1, 1, 1, 1),
ColAndIdxExistenceMap: NewColAndIndexExistenceMap(1, 1),
ExtendedStats: originalStats,
}
copied := table.CopyAs(tt.intent)
// Test columns map sharing/cloning
copied.SetCol(1, &Column{PhysicalID: 123})
if tt.expectCols == Shared {
require.NotNil(t, table.GetCol(1), "shared columns: addition to copy should appear in original")
} else {
require.Nil(t, table.GetCol(1), "cloned columns: addition to copy should not appear in original")
}
// Test indices map sharing/cloning
copied.SetIdx(1, &Index{PhysicalID: 123})
if tt.expectIdxs == Shared {
require.NotNil(t, table.GetIdx(1), "shared indices: addition to copy should appear in original")
} else {
require.Nil(t, table.GetIdx(1), "cloned indices: addition to copy should not appear in original")
}
// Test existence map sharing/cloning
if tt.expectExist == Shared {
require.Same(t, table.ColAndIdxExistenceMap, copied.ColAndIdxExistenceMap)
} else {
require.NotSame(t, table.ColAndIdxExistenceMap, copied.ColAndIdxExistenceMap)
}
// Test ExtendedStats handling
if tt.expectExtended == Cloned {
// Should be able to modify ExtendedStats without affecting original
newStats := &ExtendedStatsColl{
Stats: make(map[string]*ExtendedStatsItem),
LastUpdateVersion: 2,
}
newStats.Stats["test"] = &ExtendedStatsItem{StringVals: "modified"}
copied.ExtendedStats = newStats
// Verify original is unchanged
require.Equal(t, uint64(1), table.ExtendedStats.LastUpdateVersion)
require.Equal(t, "original", table.ExtendedStats.Stats["test"].StringVals)
// Verify copy was modified
require.Equal(t, uint64(2), copied.ExtendedStats.LastUpdateVersion)
require.Equal(t, "modified", copied.ExtendedStats.Stats["test"].StringVals)
} else {
// For shared ExtendedStats
require.Same(t, table.ExtendedStats, copied.ExtendedStats)
}
})
}
}