diff --git a/pkg/planner/core/logical_plan_builder.go b/pkg/planner/core/logical_plan_builder.go index d15f9a01a0..7a207d8062 100644 --- a/pkg/planner/core/logical_plan_builder.go +++ b/pkg/planner/core/logical_plan_builder.go @@ -4186,7 +4186,7 @@ func getStatsTable(ctx base.PlanContext, tblInfo *model.TableInfo, pid int64) *s allowPseudoTblTriggerLoading = true } // Copy it so we can modify the ModifyCount and the RealtimeCount safely. - statsTbl = statsTbl.ShallowCopy() + statsTbl = statsTbl.CopyAs(statistics.MetaOnly) statsTbl.RealtimeCount = analyzeCount statsTbl.ModifyCount = 0 } diff --git a/pkg/statistics/BUILD.bazel b/pkg/statistics/BUILD.bazel index d964a13a0b..3cee0ca23c 100644 --- a/pkg/statistics/BUILD.bazel +++ b/pkg/statistics/BUILD.bazel @@ -83,7 +83,7 @@ go_test( data = glob(["testdata/**"]), embed = [":statistics"], flaky = True, - shard_count = 40, + shard_count = 41, deps = [ "//pkg/config", "//pkg/meta/model", diff --git a/pkg/statistics/handle/bootstrap.go b/pkg/statistics/handle/bootstrap.go index 18e6a3d3aa..51732746ea 100644 --- a/pkg/statistics/handle/bootstrap.go +++ b/pkg/statistics/handle/bootstrap.go @@ -139,7 +139,11 @@ func (*Handle) initStatsHistograms4ChunkLite(cache statstypes.StatsCache, iter * if !ok { continue } - table = table.Copy() + // optimization: doesn't need to copy and can do in place changes since + // 1. initStatsHistograms4ChunkLite is a single thread populating entries in a local cache, and later the + // cache will be called h.Replace(cache) to be the global cache + // 2. following logic only modifies ColAndIdxExistenceMap, so it won't impact the cache memory cost + // calculation } isIndex := row.GetInt64(1) id := row.GetInt64(2) @@ -189,7 +193,7 @@ func (h *Handle) initStatsHistograms4Chunk(is infoschema.InfoSchema, cache stats if !ok { continue } - table = table.Copy() + table = table.CopyAs(statistics.BothMapsWritable) } // All the objects in the table share the same stats version. if statsVer != statistics.Version0 { @@ -396,7 +400,8 @@ func (*Handle) initStatsTopN4Chunk(cache statstypes.StatsCache, iter *chunk.Iter if !ok { continue } - table = table.Copy() + // existing idx histogram is modified have to use deep copy + table = table.CopyAs(statistics.AllDataWritable) } idx := table.GetIdx(row.GetInt64(1)) if idx == nil || (idx.CMSketch == nil && idx.StatsVer <= statistics.Version1) { @@ -569,7 +574,8 @@ func (*Handle) initStatsBuckets4Chunk(cache statstypes.StatsCache, iter *chunk.I if !ok { continue } - table = table.Copy() + // existing idx histogram is modified have to use deep copy + table = table.CopyAs(statistics.AllDataWritable) } var lower, upper types.Datum var hist *statistics.Histogram diff --git a/pkg/statistics/handle/cache/internal/lfu/lfu_cache.go b/pkg/statistics/handle/cache/internal/lfu/lfu_cache.go index fd0ebeb023..e77c3486ef 100644 --- a/pkg/statistics/handle/cache/internal/lfu/lfu_cache.go +++ b/pkg/statistics/handle/cache/internal/lfu/lfu_cache.go @@ -159,7 +159,7 @@ func (s *LFU) dropMemory(item *ristretto.Item) { // We do not need to calculate the cost during onEvict, // because the onexit function is also called when the evict event occurs. // TODO(hawkingrei): not copy the useless part. - table := item.Value.(*statistics.Table).Copy() + table := item.Value.(*statistics.Table).CopyAs(statistics.AllDataWritable) table.DropEvicted() s.resultKeySet.AddKeyValue(int64(item.Key), table) after := table.MemoryUsage().TotalTrackingMemUsage() diff --git a/pkg/statistics/handle/cache/statscache.go b/pkg/statistics/handle/cache/statscache.go index 4a4e794a42..6a9ecf9d75 100644 --- a/pkg/statistics/handle/cache/statscache.go +++ b/pkg/statistics/handle/cache/statscache.go @@ -206,7 +206,7 @@ func (s *StatsCacheImpl) Update(ctx context.Context, is infoschema.InfoSchema, t // If the column/index stats has not been updated, we can reuse the old table stats. // Only need to update the count and modify count. if ok && latestHistUpdateVersion > 0 && oldTbl.LastStatsHistVersion >= latestHistUpdateVersion { - tbl = oldTbl.ShallowCopy() + tbl = oldTbl.CopyAs(statistics.MetaOnly) // count and modify count is updated in finalProcess needLoadColAndIdxStats = false } diff --git a/pkg/statistics/handle/storage/read.go b/pkg/statistics/handle/storage/read.go index 91dcbb399a..36503852d5 100644 --- a/pkg/statistics/handle/storage/read.go +++ b/pkg/statistics/handle/storage/read.go @@ -505,6 +505,10 @@ func TableStatsFromStorage(sctx sessionctx.Context, snapshot uint64, tableInfo * tracker := memory.NewTracker(memory.LabelForAnalyzeMemory, -1) tracker.AttachTo(sctx.GetSessionVars().MemTracker) defer tracker.Detach() + + // Delay copying until we know what needs to be modified + needsCopy := true + // If table stats is pseudo, we also need to copy it, since we will use the column stats when // the average error rate of it is small. if table == nil || snapshot > 0 { @@ -513,24 +517,35 @@ func TableStatsFromStorage(sctx sessionctx.Context, snapshot uint64, tableInfo * HistColl: histColl, ColAndIdxExistenceMap: statistics.NewColAndIndexExistenceMap(len(tableInfo.Columns), len(tableInfo.Indices)), } - } else { - // We copy it before writing to avoid race. - table = table.Copy() + needsCopy = false } - table.Pseudo = false - realtimeCount, modidyCount, isNull, err := StatsMetaCountAndModifyCount(util.StatsCtx, sctx, tableID) + realtimeCount, modifyCount, isNull, err := StatsMetaCountAndModifyCount(util.StatsCtx, sctx, tableID) if err != nil || isNull { return nil, err } - table.ModifyCount = modidyCount - table.RealtimeCount = realtimeCount rows, _, err := util.ExecRows(sctx, "select table_id, is_index, hist_id, distinct_count, version, null_count, tot_col_size, stats_ver, correlation from mysql.stats_histograms where table_id = %?", tableID) if err != nil { return nil, err } - // Check table has no index/column stats. + + if needsCopy { + if len(rows) == 0 { + // Only metadata update needed + table = table.CopyAs(statistics.MetaOnly) + } else { + // Indexes and Columns maps potentially get updated + table = table.CopyAs(statistics.BothMapsWritable) + } + } + + // Update metadata + table.Pseudo = false + table.ModifyCount = modifyCount + table.RealtimeCount = realtimeCount + + // Early return if no histogram data to process if len(rows) == 0 { return table, nil } @@ -569,7 +584,7 @@ func TableStatsFromStorage(sctx sessionctx.Context, snapshot uint64, tableInfo * } } table.ColAndIdxExistenceMap.SetChecked() - return ExtendedStatsFromStorage(sctx, table, tableID, loadAll) + return ExtendedStatsFromStorage(sctx, table.CopyAs(statistics.ExtendedStatsWritable), tableID, loadAll) } // LoadHistogram will load histogram from storage. @@ -699,7 +714,7 @@ func loadNeededColumnHistograms(sctx sessionctx.Context, statsHandle statstypes. // Otherwise, it will trigger the sync/async load again, even if the column has not been analyzed. if loadNeeded && !analyzed { fakeCol := statistics.EmptyColumn(tblInfo.ID, tblInfo.PKIsHandle, colInfo) - statsTbl = statsTbl.Copy() + statsTbl = statsTbl.CopyAs(statistics.ColumnMapWritable) statsTbl.SetCol(col.ID, fakeCol) statsHandle.UpdateStatsCache(statstypes.CacheUpdate{ Updated: []*statistics.Table{statsTbl}, @@ -763,7 +778,7 @@ func loadNeededColumnHistograms(sctx sessionctx.Context, statsHandle statstypes. ) return nil } - statsTbl = statsTbl.Copy() + statsTbl = statsTbl.CopyAs(statistics.ColumnMapWritable) if colHist.StatsAvailable() { if fullLoad { colHist.StatsLoadedStatus = statistics.NewStatsFullLoadStatus() @@ -876,7 +891,7 @@ func loadNeededIndexHistograms(sctx sessionctx.Context, is infoschema.InfoSchema ) return nil } - tbl = tbl.Copy() + tbl = tbl.CopyAs(statistics.IndexMapWritable) if idxHist.StatsVer != statistics.Version0 { tbl.StatsVer = int(idxHist.StatsVer) tbl.LastAnalyzeVersion = max(tbl.LastAnalyzeVersion, idxHist.LastUpdateVersion) diff --git a/pkg/statistics/handle/storage/stats_read_writer.go b/pkg/statistics/handle/storage/stats_read_writer.go index 10e4347d64..5ae41239d7 100644 --- a/pkg/statistics/handle/storage/stats_read_writer.go +++ b/pkg/statistics/handle/storage/stats_read_writer.go @@ -321,7 +321,7 @@ func (s *statsReadWriter) ReloadExtendedStatistics() error { return util.CallWithSCtx(s.statsHandler.SPool(), func(sctx sessionctx.Context) error { tables := make([]*statistics.Table, 0, s.statsHandler.Len()) for _, tbl := range s.statsHandler.Values() { - t, err := ExtendedStatsFromStorage(sctx, tbl.Copy(), tbl.PhysicalID, true) + t, err := ExtendedStatsFromStorage(sctx, tbl.CopyAs(statistics.ExtendedStatsWritable), tbl.PhysicalID, true) if err != nil { return err } diff --git a/pkg/statistics/handle/storage/update.go b/pkg/statistics/handle/storage/update.go index 975e07487c..fc9c9808fe 100644 --- a/pkg/statistics/handle/storage/update.go +++ b/pkg/statistics/handle/storage/update.go @@ -261,7 +261,7 @@ func removeExtendedStatsItem(statsCache types.StatsCache, if !ok || tbl.ExtendedStats == nil || len(tbl.ExtendedStats.Stats) == 0 { return } - newTbl := tbl.Copy() + newTbl := tbl.CopyAs(statistics.ExtendedStatsWritable) delete(newTbl.ExtendedStats.Stats, statsName) statsCache.UpdateStatsCache(types.CacheUpdate{ Updated: []*statistics.Table{newTbl}, diff --git a/pkg/statistics/handle/syncload/stats_syncload.go b/pkg/statistics/handle/syncload/stats_syncload.go index 1f1007e11d..58021e3c58 100644 --- a/pkg/statistics/handle/syncload/stats_syncload.go +++ b/pkg/statistics/handle/syncload/stats_syncload.go @@ -586,8 +586,10 @@ func (s *statsSyncLoad) updateCachedItem(tblInfo *model.TableInfo, item model.Ta if !ok { return false } + var tableCopied bool if !tbl.ColAndIdxExistenceMap.Checked() { - tbl = tbl.Copy() + tbl = tbl.CopyAs(statistics.BothMapsWritable) + tableCopied = true for _, col := range tbl.HistColl.GetColSlice() { if tblInfo.FindColumnByID(col.ID) == nil { tbl.DelCol(col.ID) @@ -607,7 +609,9 @@ func (s *statsSyncLoad) updateCachedItem(tblInfo *model.TableInfo, item model.Ta if c != nil && (c.IsFullLoad() || !fullLoaded) { return false } - tbl = tbl.Copy() + if !tableCopied { + tbl = tbl.CopyAs(statistics.ColumnMapWritable) + } tbl.SetCol(item.ID, colHist) // If the column is analyzed we refresh the map for the possible change. @@ -627,7 +631,9 @@ func (s *statsSyncLoad) updateCachedItem(tblInfo *model.TableInfo, item model.Ta if index != nil && (index.IsFullLoad() || !fullLoaded) { return true } - tbl = tbl.Copy() + if !tableCopied { + tbl = tbl.CopyAs(statistics.IndexMapWritable) + } tbl.SetIdx(item.ID, idxHist) // If the index is analyzed we refresh the map for the possible change. if idxHist.IsAnalyzed() { diff --git a/pkg/statistics/table.go b/pkg/statistics/table.go index 28aa13c90c..db73e30549 100644 --- a/pkg/statistics/table.go +++ b/pkg/statistics/table.go @@ -41,6 +41,29 @@ const ( PseudoRowCount = 10000 ) +// CopyIntent specifies what data structures are safe to modify in the copied table. +type CopyIntent uint8 + +const ( + // MetaOnly shares all maps - only table metadata is safe to modify + MetaOnly CopyIntent = iota + + // ColumnMapWritable clones columns map - safe to add/remove columns + ColumnMapWritable + + // IndexMapWritable clones indices map - safe to add/remove indices + IndexMapWritable + + // BothMapsWritable clones both maps - safe to add/remove columns and indices + BothMapsWritable + + // ExtendedStatsWritable shares all maps - safe to modify ExtendedStats field + ExtendedStatsWritable + + // AllDataWritable deep copies everything - safe to modify all data including histograms + AllDataWritable +) + // AutoAnalyzeMinCnt means if the count of table is less than this value, we don't need to do auto analyze. // Exported for testing. var AutoAnalyzeMinCnt int64 = 1000 @@ -599,53 +622,70 @@ func (t *Table) MemoryUsage() *TableMemoryUsage { return tMemUsage } -// Copy copies the current table. -func (t *Table) Copy() *Table { - newHistColl := HistColl{ - PhysicalID: t.PhysicalID, - RealtimeCount: t.RealtimeCount, - columns: make(map[int64]*Column, len(t.columns)), - indices: make(map[int64]*Index, len(t.indices)), - Pseudo: t.Pseudo, - ModifyCount: t.ModifyCount, - StatsVer: t.StatsVer, - } - for id, col := range t.columns { - newHistColl.columns[id] = col.Copy() - } - for id, idx := range t.indices { - newHistColl.indices[id] = idx.Copy() - } - nt := &Table{ - HistColl: newHistColl, - Version: t.Version, - TblInfoUpdateTS: t.TblInfoUpdateTS, - LastAnalyzeVersion: t.LastAnalyzeVersion, - LastStatsHistVersion: t.LastStatsHistVersion, - } - if t.ExtendedStats != nil { - newExtStatsColl := &ExtendedStatsColl{ - Stats: make(map[string]*ExtendedStatsItem), - LastUpdateVersion: t.ExtendedStats.LastUpdateVersion, - } - maps.Copy(newExtStatsColl.Stats, t.ExtendedStats.Stats) - nt.ExtendedStats = newExtStatsColl - } - if t.ColAndIdxExistenceMap != nil { - nt.ColAndIdxExistenceMap = t.ColAndIdxExistenceMap.Clone() - } - return nt -} +// CopyAs creates a copy of the table with the specified writability intent. +// +// PERFORMANCE NOTE: Choose the most minimal intent for your use case. Copying is heavily +// used at scale and unnecessary cloning causes significant memory pressure. Only use +// AllDataWritable when you truly need to modify histogram data. +// +// MetaOnly: Shares all maps, only metadata modifications are safe +// ColumnMapWritable: Clones columns map, safe to add/remove columns +// IndexMapWritable: Clones indices map, safe to add/remove indices +// BothMapsWritable: Clones both maps - safe to add/remove columns and indices +// ExtendedStatsWritable: Shares all maps, safe to modify ExtendedStats field +// AllDataWritable: Deep copies everything, safe to modify all data including histograms +func (t *Table) CopyAs(intent CopyIntent) *Table { + var columns map[int64]*Column + var indices map[int64]*Index + var existenceMap *ColAndIdxExistenceMap + + switch intent { + case MetaOnly: + columns = t.columns + indices = t.indices + existenceMap = t.ColAndIdxExistenceMap + case ColumnMapWritable: + columns = maps.Clone(t.columns) + indices = t.indices + if t.ColAndIdxExistenceMap != nil { + existenceMap = t.ColAndIdxExistenceMap.Clone() + } + case IndexMapWritable: + columns = t.columns + indices = maps.Clone(t.indices) + if t.ColAndIdxExistenceMap != nil { + existenceMap = t.ColAndIdxExistenceMap.Clone() + } + case BothMapsWritable: + columns = maps.Clone(t.columns) + indices = maps.Clone(t.indices) + if t.ColAndIdxExistenceMap != nil { + existenceMap = t.ColAndIdxExistenceMap.Clone() + } + case ExtendedStatsWritable: + columns = t.columns + indices = t.indices + existenceMap = t.ColAndIdxExistenceMap + case AllDataWritable: + // For deep copy, create new maps and deep copy all content + columns = make(map[int64]*Column, len(t.columns)) + for id, col := range t.columns { + columns[id] = col.Copy() + } + indices = make(map[int64]*Index, len(t.indices)) + for id, idx := range t.indices { + indices[id] = idx.Copy() + } + if t.ColAndIdxExistenceMap != nil { + existenceMap = t.ColAndIdxExistenceMap.Clone() + } + } -// ShallowCopy copies the current table. -// It's different from Copy(). Only the struct Table (and also the embedded HistColl) is copied here. -// The internal containers, like t.Columns and t.Indices, and the stats, like TopN and Histogram are not copied. -func (t *Table) ShallowCopy() *Table { newHistColl := HistColl{ PhysicalID: t.PhysicalID, RealtimeCount: t.RealtimeCount, - columns: t.columns, - indices: t.indices, + columns: columns, + indices: indices, Pseudo: t.Pseudo, ModifyCount: t.ModifyCount, StatsVer: t.StatsVer, @@ -654,11 +694,23 @@ func (t *Table) ShallowCopy() *Table { HistColl: newHistColl, Version: t.Version, TblInfoUpdateTS: t.TblInfoUpdateTS, - ExtendedStats: t.ExtendedStats, - ColAndIdxExistenceMap: t.ColAndIdxExistenceMap, + ColAndIdxExistenceMap: existenceMap, LastAnalyzeVersion: t.LastAnalyzeVersion, LastStatsHistVersion: t.LastStatsHistVersion, } + + // Handle ExtendedStats for deep copy vs shallow copy + if (intent == AllDataWritable || intent == ExtendedStatsWritable) && t.ExtendedStats != nil { + newExtStatsColl := &ExtendedStatsColl{ + Stats: make(map[string]*ExtendedStatsItem), + LastUpdateVersion: t.ExtendedStats.LastUpdateVersion, + } + maps.Copy(newExtStatsColl.Stats, t.ExtendedStats.Stats) + nt.ExtendedStats = newExtStatsColl + } else { + nt.ExtendedStats = t.ExtendedStats + } + return nt } diff --git a/pkg/statistics/table_test.go b/pkg/statistics/table_test.go index 7c4ed913f4..af7ba8a45f 100644 --- a/pkg/statistics/table_test.go +++ b/pkg/statistics/table_test.go @@ -29,3 +29,92 @@ func TestCloneColAndIdxExistenceMap(t *testing.T) { m2 := m.Clone() require.Equal(t, m, m2) } + +type ShareMode uint8 + +const ( + Shared ShareMode = iota + Cloned +) + +func TestCopyAs(t *testing.T) { + tests := []struct { + name string + intent CopyIntent + expectCols ShareMode + expectIdxs ShareMode + expectExist ShareMode + expectExtended ShareMode + }{ + {"MetaOnly", MetaOnly, Shared, Shared, Shared, Shared}, + {"ColumnMapWritable", ColumnMapWritable, Cloned, Shared, Cloned, Shared}, + {"IndexMapWritable", IndexMapWritable, Shared, Cloned, Cloned, Shared}, + {"BothMapsWritable", BothMapsWritable, Cloned, Cloned, Cloned, Shared}, + {"ExtendedStatsWritable", ExtendedStatsWritable, Shared, Shared, Shared, Cloned}, + {"AllDataWritable", AllDataWritable, Cloned, Cloned, Cloned, Cloned}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + // Create original table with ExtendedStats for testing + originalStats := &ExtendedStatsColl{ + Stats: make(map[string]*ExtendedStatsItem), + LastUpdateVersion: 1, + } + originalStats.Stats["test"] = &ExtendedStatsItem{StringVals: "original"} + + table := &Table{ + HistColl: *NewHistColl(1, 1, 1, 1, 1), + ColAndIdxExistenceMap: NewColAndIndexExistenceMap(1, 1), + ExtendedStats: originalStats, + } + + copied := table.CopyAs(tt.intent) + + // Test columns map sharing/cloning + copied.SetCol(1, &Column{PhysicalID: 123}) + if tt.expectCols == Shared { + require.NotNil(t, table.GetCol(1), "shared columns: addition to copy should appear in original") + } else { + require.Nil(t, table.GetCol(1), "cloned columns: addition to copy should not appear in original") + } + + // Test indices map sharing/cloning + copied.SetIdx(1, &Index{PhysicalID: 123}) + if tt.expectIdxs == Shared { + require.NotNil(t, table.GetIdx(1), "shared indices: addition to copy should appear in original") + } else { + require.Nil(t, table.GetIdx(1), "cloned indices: addition to copy should not appear in original") + } + + // Test existence map sharing/cloning + if tt.expectExist == Shared { + require.Same(t, table.ColAndIdxExistenceMap, copied.ColAndIdxExistenceMap) + } else { + require.NotSame(t, table.ColAndIdxExistenceMap, copied.ColAndIdxExistenceMap) + } + + // Test ExtendedStats handling + if tt.expectExtended == Cloned { + // Should be able to modify ExtendedStats without affecting original + newStats := &ExtendedStatsColl{ + Stats: make(map[string]*ExtendedStatsItem), + LastUpdateVersion: 2, + } + newStats.Stats["test"] = &ExtendedStatsItem{StringVals: "modified"} + copied.ExtendedStats = newStats + + // Verify original is unchanged + require.Equal(t, uint64(1), table.ExtendedStats.LastUpdateVersion) + require.Equal(t, "original", table.ExtendedStats.Stats["test"].StringVals) + + // Verify copy was modified + require.Equal(t, uint64(2), copied.ExtendedStats.LastUpdateVersion) + require.Equal(t, "modified", copied.ExtendedStats.Stats["test"].StringVals) + } else { + // For shared ExtendedStats + require.Same(t, table.ExtendedStats, copied.ExtendedStats) + } + }) + } +}