statistics: implement batch size for auto analyze partition table in dynamic mode (#38153)
ref pingcap/tidb#37977
This commit is contained in:
@ -820,6 +820,15 @@ func TestSetVar(t *testing.T) {
|
||||
tk.MustQuery("select @@global.tidb_opt_force_inline_cte").Check(testkit.Rows("0")) // default value is 0
|
||||
tk.MustExec("set global tidb_opt_force_inline_cte=1")
|
||||
tk.MustQuery("select @@global.tidb_opt_force_inline_cte").Check(testkit.Rows("1"))
|
||||
|
||||
// test tidb_auto_analyze_partition_batch_size
|
||||
tk.MustQuery("select @@global.tidb_auto_analyze_partition_batch_size").Check(testkit.Rows("1")) // default value is 1
|
||||
tk.MustExec("set global tidb_auto_analyze_partition_batch_size = 2")
|
||||
tk.MustQuery("select @@global.tidb_auto_analyze_partition_batch_size").Check(testkit.Rows("2"))
|
||||
tk.MustExec("set global tidb_auto_analyze_partition_batch_size = 0")
|
||||
tk.MustQuery("select @@global.tidb_auto_analyze_partition_batch_size").Check(testkit.Rows("1")) // min value is 1
|
||||
tk.MustExec("set global tidb_auto_analyze_partition_batch_size = 9999")
|
||||
tk.MustQuery("select @@global.tidb_auto_analyze_partition_batch_size").Check(testkit.Rows("1024")) // max value is 1024
|
||||
}
|
||||
|
||||
func TestGetSetNoopVars(t *testing.T) {
|
||||
|
||||
@ -672,6 +672,19 @@ var defaultSysVars = []*SysVar{
|
||||
TableCacheLease.Store(val)
|
||||
return nil
|
||||
}},
|
||||
{Scope: ScopeGlobal, Name: TiDBAutoAnalyzePartitionBatchSize,
|
||||
Value: strconv.Itoa(DefTiDBAutoAnalyzePartitionBatchSize),
|
||||
Type: TypeUnsigned, MinValue: 1, MaxValue: 1024,
|
||||
SetGlobal: func(vars *SessionVars, s string) error {
|
||||
var val int64
|
||||
val, err := strconv.ParseInt(s, 10, 64)
|
||||
if err != nil {
|
||||
return errors.Trace(err)
|
||||
}
|
||||
AutoAnalyzePartitionBatchSize.Store(val)
|
||||
return nil
|
||||
}},
|
||||
|
||||
// variable for top SQL feature.
|
||||
// TopSQL enable only be controlled by TopSQL pub/sub sinker.
|
||||
// This global variable only uses to update the global config which store in PD(ETCD).
|
||||
|
||||
@ -625,6 +625,9 @@ const (
|
||||
// TiDBAnalyzeVersion indicates how tidb collects the analyzed statistics and how use to it.
|
||||
TiDBAnalyzeVersion = "tidb_analyze_version"
|
||||
|
||||
// TiDBAutoAnalyzePartitionBatchSize indicates the batch size for partition tables for auto analyze in dynamic mode
|
||||
TiDBAutoAnalyzePartitionBatchSize = "tidb_auto_analyze_partition_batch_size"
|
||||
|
||||
// TiDBEnableIndexMergeJoin indicates whether to enable index merge join.
|
||||
TiDBEnableIndexMergeJoin = "tidb_enable_index_merge_join"
|
||||
|
||||
@ -994,6 +997,7 @@ const (
|
||||
DefTiDBEnable1PC = false
|
||||
DefTiDBGuaranteeLinearizability = true
|
||||
DefTiDBAnalyzeVersion = 2
|
||||
DefTiDBAutoAnalyzePartitionBatchSize = 1
|
||||
DefTiDBEnableIndexMergeJoin = false
|
||||
DefTiDBTrackAggregateMemoryUsage = true
|
||||
DefTiDBEnableExchangePartition = true
|
||||
@ -1123,6 +1127,7 @@ var (
|
||||
DDLForce2Queue = atomic.NewBool(false)
|
||||
EnableNoopVariables = atomic.NewBool(DefTiDBEnableNoopVariables)
|
||||
EnableMDL = atomic.NewBool(DefTiDBEnableMDL)
|
||||
AutoAnalyzePartitionBatchSize = atomic.NewInt64(DefTiDBAutoAnalyzePartitionBatchSize)
|
||||
// EnableFastReorg indicates whether to use lightning to enhance DDL reorg performance.
|
||||
EnableFastReorg = atomic.NewBool(DefTiDBEnableFastReorg)
|
||||
// DDLDiskQuota is the temporary variable for set disk quota for lightning
|
||||
|
||||
@ -1105,7 +1105,7 @@ func (h *Handle) HandleAutoAnalyze(is infoschema.InfoSchema) (analyzed bool) {
|
||||
continue
|
||||
}
|
||||
if pruneMode == variable.Dynamic {
|
||||
analyzed := h.autoAnalyzePartitionTable(tblInfo, pi, db, autoAnalyzeRatio, analyzeSnapshot)
|
||||
analyzed := h.autoAnalyzePartitionTableInDynamicMode(tblInfo, pi, db, autoAnalyzeRatio, analyzeSnapshot)
|
||||
if analyzed {
|
||||
return true
|
||||
}
|
||||
@ -1157,10 +1157,11 @@ func (h *Handle) autoAnalyzeTable(tblInfo *model.TableInfo, statsTbl *statistics
|
||||
return false
|
||||
}
|
||||
|
||||
func (h *Handle) autoAnalyzePartitionTable(tblInfo *model.TableInfo, pi *model.PartitionInfo, db string, ratio float64, analyzeSnapshot bool) bool {
|
||||
func (h *Handle) autoAnalyzePartitionTableInDynamicMode(tblInfo *model.TableInfo, pi *model.PartitionInfo, db string, ratio float64, analyzeSnapshot bool) bool {
|
||||
h.mu.RLock()
|
||||
tableStatsVer := h.mu.ctx.GetSessionVars().AnalyzeVersion
|
||||
h.mu.RUnlock()
|
||||
analyzePartitionBatchSize := int(variable.AutoAnalyzePartitionBatchSize.Load())
|
||||
partitionNames := make([]interface{}, 0, len(pi.Definitions))
|
||||
for _, def := range pi.Definitions {
|
||||
partitionStatsTbl := h.GetPartitionStats(tblInfo, def.ID)
|
||||
@ -1184,13 +1185,29 @@ func (h *Handle) autoAnalyzePartitionTable(tblInfo *model.TableInfo, pi *model.P
|
||||
sqlBuilder.WriteString(suffix)
|
||||
return sqlBuilder.String()
|
||||
}
|
||||
if len(partitionNames) < 1 {
|
||||
return false
|
||||
}
|
||||
logutil.BgLogger().Info("[stats] start to auto analyze",
|
||||
zap.String("table", tblInfo.Name.String()),
|
||||
zap.Any("partitions", partitionNames),
|
||||
zap.Int("analyze partition batch size", analyzePartitionBatchSize))
|
||||
if len(partitionNames) > 0 {
|
||||
logutil.BgLogger().Info("[stats] auto analyze triggered")
|
||||
sql := getSQL("analyze table %n.%n partition", "", len(partitionNames))
|
||||
params := append([]interface{}{db, tblInfo.Name.O}, partitionNames...)
|
||||
statsTbl := h.GetTableStats(tblInfo)
|
||||
statistics.CheckAnalyzeVerOnTable(statsTbl, &tableStatsVer)
|
||||
h.execAutoAnalyze(tableStatsVer, analyzeSnapshot, sql, params...)
|
||||
for i := 0; i < len(partitionNames); i += analyzePartitionBatchSize {
|
||||
start := i
|
||||
end := start + analyzePartitionBatchSize
|
||||
if end >= len(partitionNames) {
|
||||
end = len(partitionNames)
|
||||
}
|
||||
sql := getSQL("analyze table %n.%n partition", "", end-start)
|
||||
params := append([]interface{}{db, tblInfo.Name.O}, partitionNames[start:end]...)
|
||||
logutil.BgLogger().Info("[stats] auto analyze triggered",
|
||||
zap.String("table", tblInfo.Name.String()),
|
||||
zap.Any("partitions", partitionNames[start:end]))
|
||||
h.execAutoAnalyze(tableStatsVer, analyzeSnapshot, sql, params...)
|
||||
}
|
||||
return true
|
||||
}
|
||||
for _, idx := range tblInfo.Indices {
|
||||
@ -1205,13 +1222,23 @@ func (h *Handle) autoAnalyzePartitionTable(tblInfo *model.TableInfo, pi *model.P
|
||||
}
|
||||
}
|
||||
if len(partitionNames) > 0 {
|
||||
logutil.BgLogger().Info("[stats] auto analyze for unanalyzed")
|
||||
sql := getSQL("analyze table %n.%n partition", " index %n", len(partitionNames))
|
||||
params := append([]interface{}{db, tblInfo.Name.O}, partitionNames...)
|
||||
params = append(params, idx.Name.O)
|
||||
statsTbl := h.GetTableStats(tblInfo)
|
||||
statistics.CheckAnalyzeVerOnTable(statsTbl, &tableStatsVer)
|
||||
h.execAutoAnalyze(tableStatsVer, analyzeSnapshot, sql, params...)
|
||||
for i := 0; i < len(partitionNames); i += analyzePartitionBatchSize {
|
||||
start := i
|
||||
end := start + analyzePartitionBatchSize
|
||||
if end >= len(partitionNames) {
|
||||
end = len(partitionNames)
|
||||
}
|
||||
sql := getSQL("analyze table %n.%n partition", " index %n", end-start)
|
||||
params := append([]interface{}{db, tblInfo.Name.O}, partitionNames[start:end]...)
|
||||
params = append(params, idx.Name.O)
|
||||
logutil.BgLogger().Info("[stats] auto analyze for unanalyzed",
|
||||
zap.String("table", tblInfo.Name.String()),
|
||||
zap.String("index", idx.Name.String()),
|
||||
zap.Any("partitions", partitionNames[start:end]))
|
||||
h.execAutoAnalyze(tableStatsVer, analyzeSnapshot, sql, params...)
|
||||
}
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user