From 8a9544fef0db61047d21146e3323dfd4c6705bf8 Mon Sep 17 00:00:00 2001 From: Rustin Liu Date: Tue, 19 Dec 2023 12:43:53 +0800 Subject: [PATCH] statistics: delete old partitions info after reorganize partitions (#49522) close pingcap/tidb#48226, close pingcap/tidb#48231, close pingcap/tidb#48233 --- pkg/statistics/handle/ddl/BUILD.bazel | 3 +- pkg/statistics/handle/ddl/ddl.go | 9 +- pkg/statistics/handle/ddl/ddl_test.go | 84 +++++++++++++++---- .../handle/ddl/reorganize_partition.go | 47 +++++++++++ 4 files changed, 120 insertions(+), 23 deletions(-) create mode 100644 pkg/statistics/handle/ddl/reorganize_partition.go diff --git a/pkg/statistics/handle/ddl/BUILD.bazel b/pkg/statistics/handle/ddl/BUILD.bazel index 2e0709a587..1cbc60d88a 100644 --- a/pkg/statistics/handle/ddl/BUILD.bazel +++ b/pkg/statistics/handle/ddl/BUILD.bazel @@ -6,6 +6,7 @@ go_library( "ddl.go", "drop_partition.go", "exchange_partition.go", + "reorganize_partition.go", "truncate_partition.go", ], importpath = "github.com/pingcap/tidb/pkg/statistics/handle/ddl", @@ -30,7 +31,7 @@ go_test( timeout = "short", srcs = ["ddl_test.go"], flaky = True, - shard_count = 9, + shard_count = 10, deps = [ "//pkg/parser/model", "//pkg/planner/cardinality", diff --git a/pkg/statistics/handle/ddl/ddl.go b/pkg/statistics/handle/ddl/ddl.go index 6645ad934a..b8f1038538 100644 --- a/pkg/statistics/handle/ddl/ddl.go +++ b/pkg/statistics/handle/ddl/ddl.go @@ -125,13 +125,8 @@ func (h *ddlHandlerImpl) HandleDDLEvent(t *util.DDLEvent) error { return err } case model.ActionReorganizePartition: - globalTableInfo, addedPartInfo, _ := t.GetReorganizePartitionInfo() - for _, def := range addedPartInfo.Definitions { - // TODO: Should we trigger analyze instead of adding 0s? - if err := h.statsWriter.InsertTableStats2KV(globalTableInfo, def.ID); err != nil { - return err - } - // Do not update global stats, since the data have not changed! + if err := h.onReorganizePartitions(t); err != nil { + return err } case model.ActionAlterTablePartitioning: globalTableInfo, addedPartInfo := t.GetAddPartitioningInfo() diff --git a/pkg/statistics/handle/ddl/ddl_test.go b/pkg/statistics/handle/ddl/ddl_test.go index 5799479556..6d4ce7a9d7 100644 --- a/pkg/statistics/handle/ddl/ddl_test.go +++ b/pkg/statistics/handle/ddl/ddl_test.go @@ -254,24 +254,78 @@ PARTITION BY RANGE ( a ) ( statsTbl := h.GetPartitionStats(tableInfo, def.ID) require.False(t, statsTbl.Pseudo) } - - reorganizePartition := "alter table t reorganize partition p0,p1 into (partition p0 values less than (11))" - testKit.MustExec(reorganizePartition) - is = do.InfoSchema() - tbl, err = is.TableByName(model.NewCIStr("test"), model.NewCIStr("t")) - require.NoError(t, err) - tableInfo = tbl.Meta() - err = h.HandleDDLEvent(<-h.DDLEventCh()) - require.NoError(t, err) - require.Nil(t, h.Update(is)) - pi = tableInfo.GetPartitionInfo() - for _, def := range pi.Definitions { - statsTbl := h.GetPartitionStats(tableInfo, def.ID) - require.False(t, statsTbl.Pseudo) - } } } +func TestReorgPartitions(t *testing.T) { + store, do := testkit.CreateMockStoreAndDomain(t) + testKit := testkit.NewTestKit(t, store) + h := do.StatsHandle() + testKit.MustExec("use test") + testKit.MustExec("drop table if exists t") + testKit.MustExec(` + create table t ( + a int, + b int, + primary key(a), + index idx(b) + ) + partition by range (a) ( + partition p0 values less than (6), + partition p1 values less than (11), + partition p2 values less than (16), + partition p3 values less than (21) + ) + `) + testKit.MustExec("insert into t values (1,2),(2,2),(6,2),(11,2),(16,2)") + testKit.MustExec("analyze table t") + is := do.InfoSchema() + tbl, err := is.TableByName( + model.NewCIStr("test"), model.NewCIStr("t"), + ) + require.NoError(t, err) + tableInfo := tbl.Meta() + pi := tableInfo.GetPartitionInfo() + for _, def := range pi.Definitions { + statsTbl := h.GetPartitionStats(tableInfo, def.ID) + require.False(t, statsTbl.Pseudo) + } + err = h.Update(is) + require.NoError(t, err) + // Get all the partition IDs. + partitionIDs := make(map[int64]struct{}, len(pi.Definitions)) + for _, def := range pi.Definitions { + partitionIDs[def.ID] = struct{}{} + } + + // Get partition p0 and p1's stats update version. + partitionP0ID := pi.Definitions[0].ID + partitionP1ID := pi.Definitions[1].ID + // Get it from stats_meat first. + rows := testKit.MustQuery( + "select version from mysql.stats_meta where table_id in (?, ?) order by table_id", partitionP0ID, partitionP1ID, + ).Rows() + require.Len(t, rows, 2) + versionP0 := rows[0][0].(string) + versionP1 := rows[1][0].(string) + + // Reorganize two partitions. + testKit.MustExec("alter table t reorganize partition p0, p1 into (partition p0 values less than (11))") + // Find the reorganize partition event. + reorganizePartitionEvent := findEvent(h.DDLEventCh(), model.ActionReorganizePartition) + err = h.HandleDDLEvent(reorganizePartitionEvent) + require.NoError(t, err) + require.Nil(t, h.Update(is)) + + // Check the version again. + rows = testKit.MustQuery( + "select version from mysql.stats_meta where table_id in (?, ?) order by table_id", partitionP0ID, partitionP1ID, + ).Rows() + require.Len(t, rows, 2) + require.NotEqual(t, versionP0, rows[0][0].(string)) + require.NotEqual(t, versionP1, rows[1][0].(string)) +} + func TestTruncateAPartition(t *testing.T) { store, do := testkit.CreateMockStoreAndDomain(t) testKit := testkit.NewTestKit(t, store) diff --git a/pkg/statistics/handle/ddl/reorganize_partition.go b/pkg/statistics/handle/ddl/reorganize_partition.go new file mode 100644 index 0000000000..1cc301b7b0 --- /dev/null +++ b/pkg/statistics/handle/ddl/reorganize_partition.go @@ -0,0 +1,47 @@ +// Copyright 2023 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package ddl + +import ( + "github.com/pingcap/tidb/pkg/statistics/handle/util" +) + +func (h *ddlHandlerImpl) onReorganizePartitions(t *util.DDLEvent) error { + globalTableInfo, + addedPartInfo, + droppedPartitionInfo := t.GetReorganizePartitionInfo() + // Avoid updating global stats as the data remains unchanged. + // For new partitions, it's crucial to correctly insert the count and modify count correctly. + // However, this is challenging due to the need to know the count of the new partitions. + // Given that a partition can be split into two, determining the count of the new partitions is so hard. + // It's acceptable to not update it immediately, + // as the new partitions will be analyzed shortly due to the absence of statistics for them. + // Therefore, the auto-analyze worker will handle them in the near future. + for _, def := range addedPartInfo.Definitions { + if err := h.statsWriter.InsertTableStats2KV(globalTableInfo, def.ID); err != nil { + return err + } + } + + // Reset the partition stats. + // It's OK to put those operations in different transactions. Because it will not affect the correctness. + for _, def := range droppedPartitionInfo.Definitions { + if err := h.statsWriter.ResetTableStats2KVForDrop(def.ID); err != nil { + return err + } + } + + return nil +}