statistics: implement batch size for auto analyze partition table in dynamic mode (#38153)

ref pingcap/tidb#37977
This commit is contained in:
Song Gao
2022-09-27 11:39:44 +08:00
committed by GitHub
parent 39aa4f59d9
commit 47739fff5c
4 changed files with 65 additions and 11 deletions

View File

@ -820,6 +820,15 @@ func TestSetVar(t *testing.T) {
tk.MustQuery("select @@global.tidb_opt_force_inline_cte").Check(testkit.Rows("0")) // default value is 0
tk.MustExec("set global tidb_opt_force_inline_cte=1")
tk.MustQuery("select @@global.tidb_opt_force_inline_cte").Check(testkit.Rows("1"))
// test tidb_auto_analyze_partition_batch_size
tk.MustQuery("select @@global.tidb_auto_analyze_partition_batch_size").Check(testkit.Rows("1")) // default value is 1
tk.MustExec("set global tidb_auto_analyze_partition_batch_size = 2")
tk.MustQuery("select @@global.tidb_auto_analyze_partition_batch_size").Check(testkit.Rows("2"))
tk.MustExec("set global tidb_auto_analyze_partition_batch_size = 0")
tk.MustQuery("select @@global.tidb_auto_analyze_partition_batch_size").Check(testkit.Rows("1")) // min value is 1
tk.MustExec("set global tidb_auto_analyze_partition_batch_size = 9999")
tk.MustQuery("select @@global.tidb_auto_analyze_partition_batch_size").Check(testkit.Rows("1024")) // max value is 1024
}
func TestGetSetNoopVars(t *testing.T) {

View File

@ -672,6 +672,19 @@ var defaultSysVars = []*SysVar{
TableCacheLease.Store(val)
return nil
}},
{Scope: ScopeGlobal, Name: TiDBAutoAnalyzePartitionBatchSize,
Value: strconv.Itoa(DefTiDBAutoAnalyzePartitionBatchSize),
Type: TypeUnsigned, MinValue: 1, MaxValue: 1024,
SetGlobal: func(vars *SessionVars, s string) error {
var val int64
val, err := strconv.ParseInt(s, 10, 64)
if err != nil {
return errors.Trace(err)
}
AutoAnalyzePartitionBatchSize.Store(val)
return nil
}},
// variable for top SQL feature.
// TopSQL enable only be controlled by TopSQL pub/sub sinker.
// This global variable only uses to update the global config which store in PD(ETCD).

View File

@ -625,6 +625,9 @@ const (
// TiDBAnalyzeVersion indicates how tidb collects the analyzed statistics and how use to it.
TiDBAnalyzeVersion = "tidb_analyze_version"
// TiDBAutoAnalyzePartitionBatchSize indicates the batch size for partition tables for auto analyze in dynamic mode
TiDBAutoAnalyzePartitionBatchSize = "tidb_auto_analyze_partition_batch_size"
// TiDBEnableIndexMergeJoin indicates whether to enable index merge join.
TiDBEnableIndexMergeJoin = "tidb_enable_index_merge_join"
@ -994,6 +997,7 @@ const (
DefTiDBEnable1PC = false
DefTiDBGuaranteeLinearizability = true
DefTiDBAnalyzeVersion = 2
DefTiDBAutoAnalyzePartitionBatchSize = 1
DefTiDBEnableIndexMergeJoin = false
DefTiDBTrackAggregateMemoryUsage = true
DefTiDBEnableExchangePartition = true
@ -1123,6 +1127,7 @@ var (
DDLForce2Queue = atomic.NewBool(false)
EnableNoopVariables = atomic.NewBool(DefTiDBEnableNoopVariables)
EnableMDL = atomic.NewBool(DefTiDBEnableMDL)
AutoAnalyzePartitionBatchSize = atomic.NewInt64(DefTiDBAutoAnalyzePartitionBatchSize)
// EnableFastReorg indicates whether to use lightning to enhance DDL reorg performance.
EnableFastReorg = atomic.NewBool(DefTiDBEnableFastReorg)
// DDLDiskQuota is the temporary variable for set disk quota for lightning

View File

@ -1105,7 +1105,7 @@ func (h *Handle) HandleAutoAnalyze(is infoschema.InfoSchema) (analyzed bool) {
continue
}
if pruneMode == variable.Dynamic {
analyzed := h.autoAnalyzePartitionTable(tblInfo, pi, db, autoAnalyzeRatio, analyzeSnapshot)
analyzed := h.autoAnalyzePartitionTableInDynamicMode(tblInfo, pi, db, autoAnalyzeRatio, analyzeSnapshot)
if analyzed {
return true
}
@ -1157,10 +1157,11 @@ func (h *Handle) autoAnalyzeTable(tblInfo *model.TableInfo, statsTbl *statistics
return false
}
func (h *Handle) autoAnalyzePartitionTable(tblInfo *model.TableInfo, pi *model.PartitionInfo, db string, ratio float64, analyzeSnapshot bool) bool {
func (h *Handle) autoAnalyzePartitionTableInDynamicMode(tblInfo *model.TableInfo, pi *model.PartitionInfo, db string, ratio float64, analyzeSnapshot bool) bool {
h.mu.RLock()
tableStatsVer := h.mu.ctx.GetSessionVars().AnalyzeVersion
h.mu.RUnlock()
analyzePartitionBatchSize := int(variable.AutoAnalyzePartitionBatchSize.Load())
partitionNames := make([]interface{}, 0, len(pi.Definitions))
for _, def := range pi.Definitions {
partitionStatsTbl := h.GetPartitionStats(tblInfo, def.ID)
@ -1184,13 +1185,29 @@ func (h *Handle) autoAnalyzePartitionTable(tblInfo *model.TableInfo, pi *model.P
sqlBuilder.WriteString(suffix)
return sqlBuilder.String()
}
if len(partitionNames) < 1 {
return false
}
logutil.BgLogger().Info("[stats] start to auto analyze",
zap.String("table", tblInfo.Name.String()),
zap.Any("partitions", partitionNames),
zap.Int("analyze partition batch size", analyzePartitionBatchSize))
if len(partitionNames) > 0 {
logutil.BgLogger().Info("[stats] auto analyze triggered")
sql := getSQL("analyze table %n.%n partition", "", len(partitionNames))
params := append([]interface{}{db, tblInfo.Name.O}, partitionNames...)
statsTbl := h.GetTableStats(tblInfo)
statistics.CheckAnalyzeVerOnTable(statsTbl, &tableStatsVer)
h.execAutoAnalyze(tableStatsVer, analyzeSnapshot, sql, params...)
for i := 0; i < len(partitionNames); i += analyzePartitionBatchSize {
start := i
end := start + analyzePartitionBatchSize
if end >= len(partitionNames) {
end = len(partitionNames)
}
sql := getSQL("analyze table %n.%n partition", "", end-start)
params := append([]interface{}{db, tblInfo.Name.O}, partitionNames[start:end]...)
logutil.BgLogger().Info("[stats] auto analyze triggered",
zap.String("table", tblInfo.Name.String()),
zap.Any("partitions", partitionNames[start:end]))
h.execAutoAnalyze(tableStatsVer, analyzeSnapshot, sql, params...)
}
return true
}
for _, idx := range tblInfo.Indices {
@ -1205,13 +1222,23 @@ func (h *Handle) autoAnalyzePartitionTable(tblInfo *model.TableInfo, pi *model.P
}
}
if len(partitionNames) > 0 {
logutil.BgLogger().Info("[stats] auto analyze for unanalyzed")
sql := getSQL("analyze table %n.%n partition", " index %n", len(partitionNames))
params := append([]interface{}{db, tblInfo.Name.O}, partitionNames...)
params = append(params, idx.Name.O)
statsTbl := h.GetTableStats(tblInfo)
statistics.CheckAnalyzeVerOnTable(statsTbl, &tableStatsVer)
h.execAutoAnalyze(tableStatsVer, analyzeSnapshot, sql, params...)
for i := 0; i < len(partitionNames); i += analyzePartitionBatchSize {
start := i
end := start + analyzePartitionBatchSize
if end >= len(partitionNames) {
end = len(partitionNames)
}
sql := getSQL("analyze table %n.%n partition", " index %n", end-start)
params := append([]interface{}{db, tblInfo.Name.O}, partitionNames[start:end]...)
params = append(params, idx.Name.O)
logutil.BgLogger().Info("[stats] auto analyze for unanalyzed",
zap.String("table", tblInfo.Name.String()),
zap.String("index", idx.Name.String()),
zap.Any("partitions", partitionNames[start:end]))
h.execAutoAnalyze(tableStatsVer, analyzeSnapshot, sql, params...)
}
return true
}
}