From 550ca8eaeb5ae875e42c836a85d8af5fe4fda78c Mon Sep 17 00:00:00 2001 From: Chengpeng Yan <41809508+Reminiscent@users.noreply.github.com> Date: Mon, 1 Mar 2021 16:40:50 +0800 Subject: [PATCH] statistics: enables global-level stats to be generated in fast analyze mode (#22931) --- errno/errcode.go | 1 + errno/errname.go | 3 ++- errors.toml | 5 +++++ executor/analyze.go | 8 ++++++-- executor/analyze_test.go | 20 ++++++++++++++++++++ planner/core/planbuilder.go | 15 --------------- statistics/handle/handle.go | 10 +++++++--- statistics/table.go | 6 +++--- types/errors.go | 3 +++ 9 files changed, 47 insertions(+), 24 deletions(-) diff --git a/errno/errcode.go b/errno/errcode.go index 20837dad18..24dc9824df 100644 --- a/errno/errcode.go +++ b/errno/errcode.go @@ -991,6 +991,7 @@ const ( ErrInvalidTableSample = 8128 ErrJSONObjectKeyTooLong = 8129 ErrMultiStatementDisabled = 8130 + ErrBuildGlobalLevelStatsFailed = 8131 // Error codes used by TiDB ddl package ErrUnsupportedDDLOperation = 8200 diff --git a/errno/errname.go b/errno/errname.go index 76e385c255..bef12b1ef6 100644 --- a/errno/errname.go +++ b/errno/errname.go @@ -1032,7 +1032,8 @@ var MySQLErrName = map[uint16]*mysql.ErrMessage{ ErrInvalidTableSample: mysql.Message("Invalid TABLESAMPLE: %s", nil), - ErrJSONObjectKeyTooLong: mysql.Message("TiDB does not yet support JSON objects with the key length >= 65536", nil), + ErrJSONObjectKeyTooLong: mysql.Message("TiDB does not yet support JSON objects with the key length >= 65536", nil), + ErrBuildGlobalLevelStatsFailed: mysql.Message("Build global-level stats failed due to missing partition-level stats", nil), ErrInvalidPlacementSpec: mysql.Message("Invalid placement policy '%s': %s", nil), ErrPlacementPolicyCheck: mysql.Message("Placement policy didn't meet the constraint, reason: %s", nil), diff --git a/errors.toml b/errors.toml index 1848a80016..42a91c57c3 100644 --- a/errors.toml +++ b/errors.toml @@ -1571,6 +1571,11 @@ error = ''' TiDB does not yet support JSON objects with the key length >= 65536 ''' +["types:8131"] +error = ''' +Build global-level stats failed due to missing partition-level stats +''' + ["variable:1193"] error = ''' Unknown system variable '%-.64s' diff --git a/executor/analyze.go b/executor/analyze.go index 1bb8dc00b9..870140bbc2 100644 --- a/executor/analyze.go +++ b/executor/analyze.go @@ -171,6 +171,11 @@ func (e *AnalyzeExec) Next(ctx context.Context, req *chunk.Chunk) error { sc := e.ctx.GetSessionVars().StmtCtx globalStats, err := statsHandle.MergePartitionStats2GlobalStats(sc, infoschema.GetInfoSchema(e.ctx), globalStatsID.tableID, info.isIndex, info.idxID) if err != nil { + if types.ErrBuildGlobalLevelStatsFailed.Equal(err) { + // When we find some partition-level stats are missing, we need to report warning. + sc.AppendWarning(err) + continue + } return err } for i := 0; i < globalStats.Num; i++ { @@ -809,8 +814,7 @@ func (e *AnalyzeFastExec) calculateEstimateSampleStep() (err error) { sql := new(strings.Builder) sqlexec.MustFormatSQL(sql, "select count(*) from %n.%n", dbInfo.Name.L, e.tblInfo.Name.L) - pruneMode := variable.PartitionPruneMode(e.ctx.GetSessionVars().PartitionPruneMode.Load()) - if pruneMode != variable.Dynamic && e.tblInfo.ID != e.tableID.GetStatisticsID() { + if e.tblInfo.ID != e.tableID.GetStatisticsID() { for _, definition := range e.tblInfo.Partition.Definitions { if definition.ID == e.tableID.GetStatisticsID() { sqlexec.MustFormatSQL(sql, " partition(%n)", definition.Name.L) diff --git a/executor/analyze_test.go b/executor/analyze_test.go index 5dc3403e79..55e8d1621f 100644 --- a/executor/analyze_test.go +++ b/executor/analyze_test.go @@ -473,6 +473,26 @@ func (s *testFastAnalyze) TestFastAnalyze(c *C) { "└─IndexRangeScan 2.00 cop[tikv] table:t3, partition:p1, index:k(v) range:[3,3], keep order:false", )) tk.MustExec(`set @@tidb_partition_prune_mode='` + string(variable.Dynamic) + `'`) + + // test fast analyze in dynamic mode + tk.MustExec("drop table if exists t4;") + tk.MustExec("create table t4(a int, b int) PARTITION BY HASH(a) PARTITIONS 2;") + tk.MustExec("insert into t4 values(1,1),(3,3),(4,4),(2,2),(5,5);") + // Because the statistics of partition p1 are missing, the construction of global-level stats will fail. + tk.MustExec("analyze table t4 partition p1;") + tk.MustQuery("show warnings").Check(testkit.Rows("Warning 8131 Build global-level stats failed due to missing partition-level stats")) + // Although the global-level stats build failed, we build partition-level stats for partition p1 success. + result := tk.MustQuery("show stats_meta where table_name = 't4'").Sort() + c.Assert(len(result.Rows()), Equals, 1) + c.Assert(result.Rows()[0][5], Equals, "3") + // Now, we have the partition-level stats for partition p0. We need get the stats for partition p1. And build the global-level stats. + tk.MustExec("analyze table t4 partition p0;") + tk.MustQuery("show warnings").Check(testkit.Rows()) + result = tk.MustQuery("show stats_meta where table_name = 't4'").Sort() + c.Assert(len(result.Rows()), Equals, 3) + c.Assert(result.Rows()[0][5], Equals, "5") + c.Assert(result.Rows()[1][5], Equals, "2") + c.Assert(result.Rows()[2][5], Equals, "3") } func (s *testSuite1) TestIssue15993(c *C) { diff --git a/planner/core/planbuilder.go b/planner/core/planbuilder.go index b470b79850..8e413424cc 100644 --- a/planner/core/planbuilder.go +++ b/planner/core/planbuilder.go @@ -1636,11 +1636,6 @@ func getPhysicalIDsAndPartitionNames(tblInfo *model.TableInfo, partitionNames [] func (b *PlanBuilder) buildAnalyzeTable(as *ast.AnalyzeTableStmt, opts map[ast.AnalyzeOptionType]uint64, version int) (Plan, error) { p := &Analyze{Opts: opts} - pruneMode := variable.PartitionPruneMode(b.ctx.GetSessionVars().PartitionPruneMode.Load()) - if len(as.PartitionNames) > 0 && pruneMode == variable.Dynamic { - logutil.BgLogger().Info("analyze partition didn't affect in dynamic-prune-mode", zap.String("partitions", as.PartitionNames[0].L)) - return p, nil - } for _, tbl := range as.TableNames { if tbl.TableInfo.IsView() { return nil, errors.Errorf("analyze view %s is not supported now.", tbl.Name.O) @@ -1713,11 +1708,6 @@ func (b *PlanBuilder) buildAnalyzeTable(as *ast.AnalyzeTableStmt, opts map[ast.A func (b *PlanBuilder) buildAnalyzeIndex(as *ast.AnalyzeTableStmt, opts map[ast.AnalyzeOptionType]uint64, version int) (Plan, error) { p := &Analyze{Opts: opts} tblInfo := as.TableNames[0].TableInfo - pruneMode := variable.PartitionPruneMode(b.ctx.GetSessionVars().PartitionPruneMode.Load()) - if len(as.PartitionNames) > 0 && pruneMode == variable.Dynamic { - logutil.BgLogger().Info("analyze partition didn't affect in dynamic-prune-mode", zap.String("table", tblInfo.Name.L), zap.String("partitions", as.PartitionNames[0].L)) - return p, nil - } physicalIDs, names, err := getPhysicalIDsAndPartitionNames(tblInfo, as.PartitionNames) if err != nil { return nil, err @@ -1778,11 +1768,6 @@ func (b *PlanBuilder) buildAnalyzeIndex(as *ast.AnalyzeTableStmt, opts map[ast.A func (b *PlanBuilder) buildAnalyzeAllIndex(as *ast.AnalyzeTableStmt, opts map[ast.AnalyzeOptionType]uint64, version int) (Plan, error) { p := &Analyze{Opts: opts} tblInfo := as.TableNames[0].TableInfo - pruneMode := variable.PartitionPruneMode(b.ctx.GetSessionVars().PartitionPruneMode.Load()) - if len(as.PartitionNames) > 0 && pruneMode == variable.Dynamic { - logutil.BgLogger().Info("analyze partition didn't affect in dynamic-prune-mode", zap.String("table", tblInfo.Name.L), zap.String("partitions", as.PartitionNames[0].L)) - return p, nil - } physicalIDs, names, err := getPhysicalIDsAndPartitionNames(tblInfo, as.PartitionNames) if err != nil { return nil, err diff --git a/statistics/handle/handle.go b/statistics/handle/handle.go index 8949e147b8..82963963db 100644 --- a/statistics/handle/handle.go +++ b/statistics/handle/handle.go @@ -354,18 +354,22 @@ func (h *Handle) MergePartitionStats2GlobalStats(sc *stmtctx.StatementContext, i if err != nil { return } + // if the err == nil && partitionStats == nil, it means we lack the partition-level stats which the physicalID is equal to partitionID. if partitionStats == nil { - err = errors.Errorf("[stats] error occurred when read partition-level stats of the table with tableID %d and partitionID %d", physicalID, partitionID) + err = types.ErrBuildGlobalLevelStatsFailed return } - globalStats.Count += partitionStats.Count for i := 0; i < globalStats.Num; i++ { ID := tableInfo.Columns[i].ID if isIndex != 0 { // If the statistics is the index stats, we should use the index ID to replace the column ID. ID = idxID } - hg, cms, topN, fms := partitionStats.GetStatsInfo(ID, isIndex == 1) + count, hg, cms, topN, fms := partitionStats.GetStatsInfo(ID, isIndex == 1) + if i == 0 { + // In a partition, we will only update globalStats.Count once + globalStats.Count += count + } allHg[i] = append(allHg[i], hg) allCms[i] = append(allCms[i], cms) allTopN[i] = append(allTopN[i], topN) diff --git a/statistics/table.go b/statistics/table.go index 94f41879f0..69a71ee448 100644 --- a/statistics/table.go +++ b/statistics/table.go @@ -198,13 +198,13 @@ func (t *Table) ColumnByName(colName string) *Column { } // GetStatsInfo returns their statistics according to the ID of the column or index, including histogram, CMSketch, TopN and FMSketch. -func (t *Table) GetStatsInfo(ID int64, isIndex bool) (*Histogram, *CMSketch, *TopN, *FMSketch) { +func (t *Table) GetStatsInfo(ID int64, isIndex bool) (int64, *Histogram, *CMSketch, *TopN, *FMSketch) { if isIndex { idxStatsInfo := t.Indices[ID] - return idxStatsInfo.Histogram.Copy(), idxStatsInfo.CMSketch.Copy(), idxStatsInfo.TopN.Copy(), nil + return int64(idxStatsInfo.TotalRowCount()), idxStatsInfo.Histogram.Copy(), idxStatsInfo.CMSketch.Copy(), idxStatsInfo.TopN.Copy(), nil } colStatsInfo := t.Columns[ID] - return colStatsInfo.Histogram.Copy(), colStatsInfo.CMSketch.Copy(), colStatsInfo.TopN.Copy(), colStatsInfo.FMSketch.Copy() + return int64(colStatsInfo.TotalRowCount()), colStatsInfo.Histogram.Copy(), colStatsInfo.CMSketch.Copy(), colStatsInfo.TopN.Copy(), colStatsInfo.FMSketch.Copy() } type tableColumnID struct { diff --git a/types/errors.go b/types/errors.go index 21d6a88a25..2fbf4e1e5f 100644 --- a/types/errors.go +++ b/types/errors.go @@ -83,4 +83,7 @@ var ( ErrWrongValue = dbterror.ClassTypes.NewStdErr(mysql.ErrTruncatedWrongValue, mysql.MySQLErrName[mysql.ErrWrongValue]) // ErrWrongValueForType is returned when the input value is in wrong format for function. ErrWrongValueForType = dbterror.ClassTypes.NewStdErr(mysql.ErrWrongValueForType, mysql.MySQLErrName[mysql.ErrWrongValueForType]) + // ErrBuildGlobalLevelStatsFailed is returned when the partition-level stats is missing and the build global-level stats fails. + // Put this error here is to prevent `import cycle not allowed`. + ErrBuildGlobalLevelStatsFailed = dbterror.ClassTypes.NewStd(mysql.ErrBuildGlobalLevelStatsFailed) )