diff --git a/executor/set_test.go b/executor/set_test.go index e0b8860153..0f099e8182 100644 --- a/executor/set_test.go +++ b/executor/set_test.go @@ -820,6 +820,15 @@ func TestSetVar(t *testing.T) { tk.MustQuery("select @@global.tidb_opt_force_inline_cte").Check(testkit.Rows("0")) // default value is 0 tk.MustExec("set global tidb_opt_force_inline_cte=1") tk.MustQuery("select @@global.tidb_opt_force_inline_cte").Check(testkit.Rows("1")) + + // test tidb_auto_analyze_partition_batch_size + tk.MustQuery("select @@global.tidb_auto_analyze_partition_batch_size").Check(testkit.Rows("1")) // default value is 1 + tk.MustExec("set global tidb_auto_analyze_partition_batch_size = 2") + tk.MustQuery("select @@global.tidb_auto_analyze_partition_batch_size").Check(testkit.Rows("2")) + tk.MustExec("set global tidb_auto_analyze_partition_batch_size = 0") + tk.MustQuery("select @@global.tidb_auto_analyze_partition_batch_size").Check(testkit.Rows("1")) // min value is 1 + tk.MustExec("set global tidb_auto_analyze_partition_batch_size = 9999") + tk.MustQuery("select @@global.tidb_auto_analyze_partition_batch_size").Check(testkit.Rows("1024")) // max value is 1024 } func TestGetSetNoopVars(t *testing.T) { diff --git a/sessionctx/variable/sysvar.go b/sessionctx/variable/sysvar.go index 72d102b806..9e4409b138 100644 --- a/sessionctx/variable/sysvar.go +++ b/sessionctx/variable/sysvar.go @@ -672,6 +672,19 @@ var defaultSysVars = []*SysVar{ TableCacheLease.Store(val) return nil }}, + {Scope: ScopeGlobal, Name: TiDBAutoAnalyzePartitionBatchSize, + Value: strconv.Itoa(DefTiDBAutoAnalyzePartitionBatchSize), + Type: TypeUnsigned, MinValue: 1, MaxValue: 1024, + SetGlobal: func(vars *SessionVars, s string) error { + var val int64 + val, err := strconv.ParseInt(s, 10, 64) + if err != nil { + return errors.Trace(err) + } + AutoAnalyzePartitionBatchSize.Store(val) + return nil + }}, + // variable for top SQL feature. // TopSQL enable only be controlled by TopSQL pub/sub sinker. // This global variable only uses to update the global config which store in PD(ETCD). diff --git a/sessionctx/variable/tidb_vars.go b/sessionctx/variable/tidb_vars.go index 6a011a92f7..2d4c79954f 100644 --- a/sessionctx/variable/tidb_vars.go +++ b/sessionctx/variable/tidb_vars.go @@ -625,6 +625,9 @@ const ( // TiDBAnalyzeVersion indicates how tidb collects the analyzed statistics and how use to it. TiDBAnalyzeVersion = "tidb_analyze_version" + // TiDBAutoAnalyzePartitionBatchSize indicates the batch size for partition tables for auto analyze in dynamic mode + TiDBAutoAnalyzePartitionBatchSize = "tidb_auto_analyze_partition_batch_size" + // TiDBEnableIndexMergeJoin indicates whether to enable index merge join. TiDBEnableIndexMergeJoin = "tidb_enable_index_merge_join" @@ -994,6 +997,7 @@ const ( DefTiDBEnable1PC = false DefTiDBGuaranteeLinearizability = true DefTiDBAnalyzeVersion = 2 + DefTiDBAutoAnalyzePartitionBatchSize = 1 DefTiDBEnableIndexMergeJoin = false DefTiDBTrackAggregateMemoryUsage = true DefTiDBEnableExchangePartition = true @@ -1123,6 +1127,7 @@ var ( DDLForce2Queue = atomic.NewBool(false) EnableNoopVariables = atomic.NewBool(DefTiDBEnableNoopVariables) EnableMDL = atomic.NewBool(DefTiDBEnableMDL) + AutoAnalyzePartitionBatchSize = atomic.NewInt64(DefTiDBAutoAnalyzePartitionBatchSize) // EnableFastReorg indicates whether to use lightning to enhance DDL reorg performance. EnableFastReorg = atomic.NewBool(DefTiDBEnableFastReorg) // DDLDiskQuota is the temporary variable for set disk quota for lightning diff --git a/statistics/handle/update.go b/statistics/handle/update.go index b831259487..f6b8c31361 100644 --- a/statistics/handle/update.go +++ b/statistics/handle/update.go @@ -1105,7 +1105,7 @@ func (h *Handle) HandleAutoAnalyze(is infoschema.InfoSchema) (analyzed bool) { continue } if pruneMode == variable.Dynamic { - analyzed := h.autoAnalyzePartitionTable(tblInfo, pi, db, autoAnalyzeRatio, analyzeSnapshot) + analyzed := h.autoAnalyzePartitionTableInDynamicMode(tblInfo, pi, db, autoAnalyzeRatio, analyzeSnapshot) if analyzed { return true } @@ -1157,10 +1157,11 @@ func (h *Handle) autoAnalyzeTable(tblInfo *model.TableInfo, statsTbl *statistics return false } -func (h *Handle) autoAnalyzePartitionTable(tblInfo *model.TableInfo, pi *model.PartitionInfo, db string, ratio float64, analyzeSnapshot bool) bool { +func (h *Handle) autoAnalyzePartitionTableInDynamicMode(tblInfo *model.TableInfo, pi *model.PartitionInfo, db string, ratio float64, analyzeSnapshot bool) bool { h.mu.RLock() tableStatsVer := h.mu.ctx.GetSessionVars().AnalyzeVersion h.mu.RUnlock() + analyzePartitionBatchSize := int(variable.AutoAnalyzePartitionBatchSize.Load()) partitionNames := make([]interface{}, 0, len(pi.Definitions)) for _, def := range pi.Definitions { partitionStatsTbl := h.GetPartitionStats(tblInfo, def.ID) @@ -1184,13 +1185,29 @@ func (h *Handle) autoAnalyzePartitionTable(tblInfo *model.TableInfo, pi *model.P sqlBuilder.WriteString(suffix) return sqlBuilder.String() } + if len(partitionNames) < 1 { + return false + } + logutil.BgLogger().Info("[stats] start to auto analyze", + zap.String("table", tblInfo.Name.String()), + zap.Any("partitions", partitionNames), + zap.Int("analyze partition batch size", analyzePartitionBatchSize)) if len(partitionNames) > 0 { - logutil.BgLogger().Info("[stats] auto analyze triggered") - sql := getSQL("analyze table %n.%n partition", "", len(partitionNames)) - params := append([]interface{}{db, tblInfo.Name.O}, partitionNames...) statsTbl := h.GetTableStats(tblInfo) statistics.CheckAnalyzeVerOnTable(statsTbl, &tableStatsVer) - h.execAutoAnalyze(tableStatsVer, analyzeSnapshot, sql, params...) + for i := 0; i < len(partitionNames); i += analyzePartitionBatchSize { + start := i + end := start + analyzePartitionBatchSize + if end >= len(partitionNames) { + end = len(partitionNames) + } + sql := getSQL("analyze table %n.%n partition", "", end-start) + params := append([]interface{}{db, tblInfo.Name.O}, partitionNames[start:end]...) + logutil.BgLogger().Info("[stats] auto analyze triggered", + zap.String("table", tblInfo.Name.String()), + zap.Any("partitions", partitionNames[start:end])) + h.execAutoAnalyze(tableStatsVer, analyzeSnapshot, sql, params...) + } return true } for _, idx := range tblInfo.Indices { @@ -1205,13 +1222,23 @@ func (h *Handle) autoAnalyzePartitionTable(tblInfo *model.TableInfo, pi *model.P } } if len(partitionNames) > 0 { - logutil.BgLogger().Info("[stats] auto analyze for unanalyzed") - sql := getSQL("analyze table %n.%n partition", " index %n", len(partitionNames)) - params := append([]interface{}{db, tblInfo.Name.O}, partitionNames...) - params = append(params, idx.Name.O) statsTbl := h.GetTableStats(tblInfo) statistics.CheckAnalyzeVerOnTable(statsTbl, &tableStatsVer) - h.execAutoAnalyze(tableStatsVer, analyzeSnapshot, sql, params...) + for i := 0; i < len(partitionNames); i += analyzePartitionBatchSize { + start := i + end := start + analyzePartitionBatchSize + if end >= len(partitionNames) { + end = len(partitionNames) + } + sql := getSQL("analyze table %n.%n partition", " index %n", end-start) + params := append([]interface{}{db, tblInfo.Name.O}, partitionNames[start:end]...) + params = append(params, idx.Name.O) + logutil.BgLogger().Info("[stats] auto analyze for unanalyzed", + zap.String("table", tblInfo.Name.String()), + zap.String("index", idx.Name.String()), + zap.Any("partitions", partitionNames[start:end])) + h.execAutoAnalyze(tableStatsVer, analyzeSnapshot, sql, params...) + } return true } }