From d4a50aa47b6ba575aa0ef763caac5a72ef4fec79 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E4=BA=8C=E6=89=8B=E6=8E=89=E5=8C=85=E5=B7=A5=E7=A8=8B?= =?UTF-8?q?=E5=B8=88?= Date: Mon, 14 Aug 2023 16:38:00 +0800 Subject: [PATCH] planner: refactor `getFullAnalyzeColumnsInfo` (#46044) --- executor/test/analyzetest/analyze_test.go | 4 +- planner/core/planbuilder.go | 152 ++++++++++++++-------- planner/core/planbuilder_test.go | 22 +++- 3 files changed, 117 insertions(+), 61 deletions(-) diff --git a/executor/test/analyzetest/analyze_test.go b/executor/test/analyzetest/analyze_test.go index 3f527be850..379f437bec 100644 --- a/executor/test/analyzetest/analyze_test.go +++ b/executor/test/analyzetest/analyze_test.go @@ -2126,9 +2126,9 @@ func TestAnalyzeColumnsErrorAndWarning(t *testing.T) { // analyze version 1 doesn't support `ANALYZE COLUMNS c1, ..., cn`/`ANALYZE PREDICATE COLUMNS` currently tk.MustExec("set @@tidb_analyze_version = 1") err := tk.ExecToErr("analyze table t columns a") - require.Equal(t, "Only the analyze version 2 supports analyzing the specified columns", err.Error()) + require.Equal(t, "Only the version 2 of analyze supports analyzing the specified columns", err.Error()) err = tk.ExecToErr("analyze table t predicate columns") - require.Equal(t, "Only the analyze version 2 supports analyzing predicate columns", err.Error()) + require.Equal(t, "Only the version 2 of analyze supports analyzing predicate columns", err.Error()) tk.MustExec("set @@tidb_analyze_version = 2") // invalid column diff --git a/planner/core/planbuilder.go b/planner/core/planbuilder.go index 50d1e30ba2..9b5e90e4ad 100644 --- a/planner/core/planbuilder.go +++ b/planner/core/planbuilder.go @@ -2325,15 +2325,7 @@ func (b *PlanBuilder) getFullAnalyzeColumnsInfo( if mustAllColumns && warning && (columnChoice == model.PredicateColumns || columnChoice == model.ColumnList) { b.ctx.GetSessionVars().StmtCtx.AppendWarning(errors.Errorf("Table %s.%s has version 1 statistics so all the columns must be analyzed to overwrite the current statistics", tbl.Schema.L, tbl.Name.L)) } - colSet2colList := func(colSet map[int64]struct{}) []*model.ColumnInfo { - colList := make([]*model.ColumnInfo, 0, len(colSet)) - for _, colInfo := range tbl.TableInfo.Columns { - if _, ok := colSet[colInfo.ID]; ok { - colList = append(colList, colInfo) - } - } - return colList - } + switch columnChoice { case model.DefaultChoice, model.AllColumns: return tbl.TableInfo.Columns, nil, nil @@ -2349,52 +2341,86 @@ func (b *PlanBuilder) getFullAnalyzeColumnsInfo( if err != nil { return nil, nil, err } - colSet := make(map[int64]struct{}, len(predicate)+len(mustAnalyzed)) - for colID := range predicate { - colSet[colID] = struct{}{} - } - for colID := range mustAnalyzed { - colSet[colID] = struct{}{} - } - return colSet2colList(colSet), nil, nil + colSet := combineColumnSets(predicate, mustAnalyzed) + return getColumnListFromSet(tbl.TableInfo.Columns, colSet), nil, nil case model.ColumnList: - colSet := make(map[int64]struct{}, len(specifiedCols)) - for _, colInfo := range specifiedCols { - colSet[colInfo.ID] = struct{}{} - } + colSet := getColumnSetFromSpecifiedCols(specifiedCols) mustAnalyzed, err := b.getMustAnalyzedColumns(tbl, mustAnalyzedCols) if err != nil { return nil, nil, err } if warning { - missing := make(map[int64]struct{}, len(mustAnalyzed)) - for colID := range mustAnalyzed { - if _, ok := colSet[colID]; !ok { - missing[colID] = struct{}{} - } - } + missing := getMissingColumns(colSet, mustAnalyzed) if len(missing) > 0 { - missingNames := make([]string, 0, len(missing)) - for _, col := range tbl.TableInfo.Columns { - if _, ok := missing[col.ID]; ok { - missingNames = append(missingNames, col.Name.O) - } - } - b.ctx.GetSessionVars().StmtCtx.AppendWarning(errors.Errorf("Columns %s are missing in ANALYZE but their stats are needed for calculating stats for indexes/primary key/extended stats", strings.Join(missingNames, ","))) + missingNames := getColumnNamesFromIDs(tbl.TableInfo.Columns, missing) + warningMsg := fmt.Sprintf("Columns %s are missing in ANALYZE but their stats are needed for calculating stats for indexes/primary key/extended stats", strings.Join(missingNames, ",")) + b.ctx.GetSessionVars().StmtCtx.AppendWarning(errors.New(warningMsg)) } } - for colID := range mustAnalyzed { - colSet[colID] = struct{}{} - } - colList := colSet2colList(colSet) + colSet = combineColumnSets(colSet, mustAnalyzed) + colList := getColumnListFromSet(tbl.TableInfo.Columns, colSet) if mustAllColumns { return tbl.TableInfo.Columns, colList, nil } return colList, colList, nil } + return nil, nil, nil } +// Helper function to combine two column sets. +func combineColumnSets(sets ...map[int64]struct{}) map[int64]struct{} { + result := make(map[int64]struct{}) + for _, set := range sets { + for colID := range set { + result[colID] = struct{}{} + } + } + return result +} + +// Helper function to extract column IDs from specified columns. +func getColumnSetFromSpecifiedCols(cols []*model.ColumnInfo) map[int64]struct{} { + colSet := make(map[int64]struct{}, len(cols)) + for _, colInfo := range cols { + colSet[colInfo.ID] = struct{}{} + } + return colSet +} + +// Helper function to get missing columns from a set. +func getMissingColumns(colSet, mustAnalyzed map[int64]struct{}) map[int64]struct{} { + missing := make(map[int64]struct{}) + for colID := range mustAnalyzed { + if _, ok := colSet[colID]; !ok { + missing[colID] = struct{}{} + } + } + return missing +} + +// Helper function to get column names from IDs. +func getColumnNamesFromIDs(columns []*model.ColumnInfo, colIDs map[int64]struct{}) []string { + var missingNames []string + for _, col := range columns { + if _, ok := colIDs[col.ID]; ok { + missingNames = append(missingNames, col.Name.O) + } + } + return missingNames +} + +// Helper function to get a list of column infos from a set of column IDs. +func getColumnListFromSet(columns []*model.ColumnInfo, colSet map[int64]struct{}) []*model.ColumnInfo { + colList := make([]*model.ColumnInfo, 0, len(colSet)) + for _, colInfo := range columns { + if _, ok := colSet[colInfo.ID]; ok { + colList = append(colList, colInfo) + } + } + return colList +} + func getColOffsetForAnalyze(colsInfo []*model.ColumnInfo, colID int64) int { for i, col := range colsInfo { if colID == col.ID { @@ -2438,30 +2464,36 @@ func getModifiedIndexesInfoForAnalyze(sctx sessionctx.Context, tblInfo *model.Ta func (b *PlanBuilder) buildAnalyzeFullSamplingTask( as *ast.AnalyzeTableStmt, - taskSlice []AnalyzeColumnsTask, + tasks []AnalyzeColumnsTask, physicalIDs []int64, - names []string, + partitionNames []string, tbl *ast.TableName, version int, persistOpts bool, rsOptionsMap map[int64]V2AnalyzeOptions, ) ([]AnalyzeColumnsTask, error) { + // Version 2 doesn't support incremental analyze. + // And incremental analyze will be deprecated in the future. if as.Incremental { b.ctx.GetSessionVars().StmtCtx.AppendWarning(errors.Errorf("The version 2 stats would ignore the INCREMENTAL keyword and do full sampling")) } - astOpts, err := parseAnalyzeOptionsV2(as.AnalyzeOpts) + + astOpts, err := handleAnalyzeOptionsV2(as.AnalyzeOpts) if err != nil { return nil, err } + // Get all column info which need to be analyzed. astColList, err := getAnalyzeColumnList(as.ColumnNames, tbl) if err != nil { return nil, err } + var predicateCols, mustAnalyzedCols calcOnceMap ver := version statsHandle := domain.GetDomain(b.ctx).StatsHandle() // If the statistics of the table is version 1, we must analyze all columns to overwrites all of old statistics. mustAllColumns := !statsHandle.CheckAnalyzeVersion(tbl.TableInfo, physicalIDs, &ver) + astColsInfo, _, err := b.getFullAnalyzeColumnsInfo(tbl, as.ColumnChoice, astColList, &predicateCols, &mustAnalyzedCols, mustAllColumns, true) if err != nil { return nil, err @@ -2474,6 +2506,7 @@ func (b *PlanBuilder) buildAnalyzeFullSamplingTask( for physicalID, opts := range optionsMap { rsOptionsMap[physicalID] = opts } + for i, id := range physicalIDs { physicalID := id if id == tbl.TableInfo.ID { @@ -2482,7 +2515,7 @@ func (b *PlanBuilder) buildAnalyzeFullSamplingTask( info := AnalyzeInfo{ DBName: tbl.Schema.O, TableName: tbl.Name.O, - PartitionName: names[i], + PartitionName: partitionNames[i], TableID: statistics.AnalyzeTableID{TableID: tbl.TableInfo.ID, PartitionID: id}, Incremental: false, StatsVersion: version, @@ -2541,9 +2574,10 @@ func (b *PlanBuilder) buildAnalyzeFullSamplingTask( newTask.ColsInfo = append(newTask.ColsInfo, extraCol) newTask.HandleCols = &IntHandleCols{col: colInfoToColumn(extraCol, len(newTask.ColsInfo)-1)} } - taskSlice = append(taskSlice, newTask) + tasks = append(tasks, newTask) } - return taskSlice, nil + + return tasks, nil } func (b *PlanBuilder) genV2AnalyzeOptions( @@ -2708,10 +2742,13 @@ func mergeColumnList(choice1 model.ColumnChoice, list1 []*model.ColumnInfo, choi return choice2, list2 } +// buildAnalyzeTable constructs anylyze tasks for each table. func (b *PlanBuilder) buildAnalyzeTable(as *ast.AnalyzeTableStmt, opts map[ast.AnalyzeOptionType]uint64, version int) (Plan, error) { p := &Analyze{Opts: opts} p.OptionsMap = make(map[int64]V2AnalyzeOptions) usePersistedOptions := variable.PersistAnalyzeOptions.Load() + + // Construct tasks for each table. for _, tbl := range as.TableNames { if tbl.TableInfo.IsView() { return nil, errors.Errorf("analyze view %s is not supported now", tbl.Name.O) @@ -2719,15 +2756,16 @@ func (b *PlanBuilder) buildAnalyzeTable(as *ast.AnalyzeTableStmt, opts map[ast.A if tbl.TableInfo.IsSequence() { return nil, errors.Errorf("analyze sequence %s is not supported now", tbl.Name.O) } + idxInfo, colInfo := getColsInfo(tbl) - physicalIDs, names, err := GetPhysicalIDsAndPartitionNames(tbl.TableInfo, as.PartitionNames) + physicalIDs, partitionNames, err := GetPhysicalIDsAndPartitionNames(tbl.TableInfo, as.PartitionNames) if err != nil { return nil, err } var commonHandleInfo *model.IndexInfo // If we want to analyze this table with analyze version 2 but the existing stats is version 1 and stats feedback is enabled, // we will switch back to analyze version 1. - if statistics.FeedbackProbability.Load() > 0 && version == 2 { + if statistics.FeedbackProbability.Load() > 0 && version == statistics.Version2 { statsHandle := domain.GetDomain(b.ctx).StatsHandle() versionIsSame := statsHandle.CheckAnalyzeVersion(tbl.TableInfo, physicalIDs, &version) if !versionIsSame { @@ -2736,18 +2774,21 @@ func (b *PlanBuilder) buildAnalyzeTable(as *ast.AnalyzeTableStmt, opts map[ast.A "If you want to switch to version 2 statistics, please first disable query feedback by setting feedback-probability to 0.0 in the config file.", tbl.Name)) } } + if version == statistics.Version2 { - p.ColTasks, err = b.buildAnalyzeFullSamplingTask(as, p.ColTasks, physicalIDs, names, tbl, version, usePersistedOptions, p.OptionsMap) + p.ColTasks, err = b.buildAnalyzeFullSamplingTask(as, p.ColTasks, physicalIDs, partitionNames, tbl, version, usePersistedOptions, p.OptionsMap) if err != nil { return nil, err } continue } + + // Version 1 analyze. if as.ColumnChoice == model.PredicateColumns { - return nil, errors.Errorf("Only the analyze version 2 supports analyzing predicate columns") + return nil, errors.Errorf("Only the version 2 of analyze supports analyzing predicate columns") } if as.ColumnChoice == model.ColumnList { - return nil, errors.Errorf("Only the analyze version 2 supports analyzing the specified columns") + return nil, errors.Errorf("Only the version 2 of analyze supports analyzing the specified columns") } for _, idx := range idxInfo { // For prefix common handle. We don't use analyze mixed to handle it with columns. Because the full value @@ -2767,7 +2808,7 @@ func (b *PlanBuilder) buildAnalyzeTable(as *ast.AnalyzeTableStmt, opts map[ast.A info := AnalyzeInfo{ DBName: tbl.Schema.O, TableName: tbl.Name.O, - PartitionName: names[i], + PartitionName: partitionNames[i], TableID: statistics.AnalyzeTableID{TableID: tbl.TableInfo.ID, PartitionID: id}, Incremental: as.Incremental, StatsVersion: version, @@ -2788,7 +2829,7 @@ func (b *PlanBuilder) buildAnalyzeTable(as *ast.AnalyzeTableStmt, opts map[ast.A info := AnalyzeInfo{ DBName: tbl.Schema.O, TableName: tbl.Name.O, - PartitionName: names[i], + PartitionName: partitionNames[i], TableID: statistics.AnalyzeTableID{TableID: tbl.TableInfo.ID, PartitionID: id}, Incremental: as.Incremental, StatsVersion: version, @@ -2803,6 +2844,7 @@ func (b *PlanBuilder) buildAnalyzeTable(as *ast.AnalyzeTableStmt, opts map[ast.A } } } + return p, nil } @@ -2971,7 +3013,9 @@ var analyzeOptionDefaultV2 = map[ast.AnalyzeOptionType]uint64{ ast.AnalyzeOptSampleRate: math.Float64bits(-1), } -func parseAnalyzeOptionsV2(opts []ast.AnalyzeOpt) (map[ast.AnalyzeOptionType]uint64, error) { +// This function very similar to handleAnalyzeOptions, but it's used for analyze version 2. +// Remove this function after we remove the support of analyze version 1. +func handleAnalyzeOptionsV2(opts []ast.AnalyzeOpt) (map[ast.AnalyzeOptionType]uint64, error) { optMap := make(map[ast.AnalyzeOptionType]uint64, len(analyzeOptionDefault)) sampleNum, sampleRate := uint64(0), 0.0 for _, opt := range opts { @@ -3009,6 +3053,7 @@ func parseAnalyzeOptionsV2(opts []ast.AnalyzeOpt) (map[ast.AnalyzeOptionType]uin if sampleNum > 0 && sampleRate > 0 { return nil, errors.Errorf("You can only either set the value of the sample num or set the value of the sample rate. Don't set both of them") } + return optMap, nil } @@ -3074,7 +3119,8 @@ func handleAnalyzeOptions(opts []ast.AnalyzeOpt, statsVer int) (map[ast.AnalyzeO if sampleNum > 0 && sampleRate > 0 { return nil, errors.Errorf("You can only either set the value of the sample num or set the value of the sample rate. Don't set both of them") } - if optMap[ast.AnalyzeOptCMSketchWidth]*optMap[ast.AnalyzeOptCMSketchDepth] > CMSketchSizeLimit { + // Only version 1 has cmsketch. + if statsVer == statistics.Version1 && optMap[ast.AnalyzeOptCMSketchWidth]*optMap[ast.AnalyzeOptCMSketchDepth] > CMSketchSizeLimit { return nil, errors.Errorf("cm sketch size(depth * width) should not larger than %d", CMSketchSizeLimit) } return optMap, nil diff --git a/planner/core/planbuilder_test.go b/planner/core/planbuilder_test.go index 8608f50eaf..cf18bb5e27 100644 --- a/planner/core/planbuilder_test.go +++ b/planner/core/planbuilder_test.go @@ -542,7 +542,7 @@ func checkDeepClonedCore(v1, v2 reflect.Value, path string, whiteList []string, return nil } -func TestHandleAnalyzeOptions(t *testing.T) { +func TestHandleAnalyzeOptionsV1AndV2(t *testing.T) { require.Equal(t, len(analyzeOptionDefault), len(analyzeOptionDefaultV2), "analyzeOptionDefault and analyzeOptionDefaultV2 should have the same length") tests := []struct { @@ -559,7 +559,7 @@ func TestHandleAnalyzeOptions(t *testing.T) { Value: ast.NewValueExpr(16384+1, "", ""), }, }, - statsVer: 1, + statsVer: statistics.Version1, ExpectedErr: "Value of analyze option TOPN should not be larger than 16384", }, { @@ -570,7 +570,7 @@ func TestHandleAnalyzeOptions(t *testing.T) { Value: ast.NewValueExpr(1, "", ""), }, }, - statsVer: 1, + statsVer: statistics.Version1, ExpectedErr: "Version 1's statistics doesn't support the SAMPLERATE option, please set tidb_analyze_version to 2", }, { @@ -581,7 +581,7 @@ func TestHandleAnalyzeOptions(t *testing.T) { Value: ast.NewValueExpr(2, "", ""), }, }, - statsVer: 2, + statsVer: statistics.Version2, ExpectedErr: "Value of analyze option SAMPLERATE should not larger than 1.000000, and should be greater than 0", }, { @@ -607,7 +607,7 @@ func TestHandleAnalyzeOptions(t *testing.T) { Value: ast.NewValueExpr(0.1, "", ""), }, }, - statsVer: 2, + statsVer: statistics.Version2, ExpectedErr: "ou can only either set the value of the sample num or set the value of the sample rate. Don't set both of them", }, { @@ -622,7 +622,7 @@ func TestHandleAnalyzeOptions(t *testing.T) { Value: ast.NewValueExpr(2048, "", ""), }, }, - statsVer: 2, + statsVer: statistics.Version1, ExpectedErr: "cm sketch size(depth * width) should not larger than 1258291", }, } @@ -636,6 +636,16 @@ func TestHandleAnalyzeOptions(t *testing.T) { } else { require.NoError(t, err) } + + if tt.statsVer == statistics.Version2 { + _, err := handleAnalyzeOptionsV2(tt.opts) + if tt.ExpectedErr != "" { + require.Error(t, err) + require.Contains(t, err.Error(), tt.ExpectedErr) + } else { + require.NoError(t, err) + } + } }) } }