441 lines
16 KiB
Go
441 lines
16 KiB
Go
// Copyright 2018 PingCAP, Inc.
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
package executor_test
|
|
|
|
import (
|
|
"fmt"
|
|
"io/ioutil"
|
|
"strconv"
|
|
"strings"
|
|
"sync/atomic"
|
|
"testing"
|
|
|
|
"github.com/pingcap/tidb/domain"
|
|
"github.com/pingcap/tidb/executor"
|
|
"github.com/pingcap/tidb/infoschema"
|
|
"github.com/pingcap/tidb/parser/ast"
|
|
"github.com/pingcap/tidb/parser/model"
|
|
"github.com/pingcap/tidb/parser/mysql"
|
|
"github.com/pingcap/tidb/planner/core"
|
|
"github.com/pingcap/tidb/session"
|
|
"github.com/pingcap/tidb/sessionctx"
|
|
"github.com/pingcap/tidb/sessionctx/stmtctx"
|
|
"github.com/pingcap/tidb/sessionctx/variable"
|
|
"github.com/pingcap/tidb/statistics"
|
|
"github.com/pingcap/tidb/store/mockstore"
|
|
"github.com/pingcap/tidb/table"
|
|
"github.com/pingcap/tidb/testkit"
|
|
"github.com/pingcap/tidb/types"
|
|
"github.com/pingcap/tidb/util/codec"
|
|
"github.com/pingcap/tidb/util/collate"
|
|
"github.com/stretchr/testify/require"
|
|
"github.com/tikv/client-go/v2/testutils"
|
|
)
|
|
|
|
func TestAnalyzeFastSample(t *testing.T) {
|
|
var cls testutils.Cluster
|
|
store, err := mockstore.NewMockStore(
|
|
mockstore.WithClusterInspector(func(c testutils.Cluster) {
|
|
mockstore.BootstrapWithSingleStore(c)
|
|
cls = c
|
|
}),
|
|
)
|
|
require.NoError(t, err)
|
|
defer func() {
|
|
err := store.Close()
|
|
require.NoError(t, err)
|
|
}()
|
|
var dom *domain.Domain
|
|
session.DisableStats4Test()
|
|
session.SetSchemaLease(0)
|
|
dom, err = session.BootstrapSession(store)
|
|
require.NoError(t, err)
|
|
defer dom.Close()
|
|
tk := testkit.NewTestKit(t, store)
|
|
atomic.StoreInt64(&executor.RandSeed, 123)
|
|
|
|
tk.MustExec("use test")
|
|
tk.MustExec("drop table if exists t")
|
|
tk.MustExec("create table t(a int primary key, b int, index index_b(b))")
|
|
tbl, err := dom.InfoSchema().TableByName(model.NewCIStr("test"), model.NewCIStr("t"))
|
|
require.NoError(t, err)
|
|
tblInfo := tbl.Meta()
|
|
tid := tblInfo.ID
|
|
|
|
// construct 5 regions split by {12, 24, 36, 48}
|
|
splitKeys := generateTableSplitKeyForInt(tid, []int{12, 24, 36, 48})
|
|
manipulateCluster(cls, splitKeys)
|
|
|
|
for i := 0; i < 60; i++ {
|
|
tk.MustExec(fmt.Sprintf("insert into t values (%d, %d)", i, i))
|
|
}
|
|
|
|
handleCols := core.BuildHandleColsForAnalyze(tk.Session(), tblInfo, true, nil)
|
|
var colsInfo []*model.ColumnInfo //nolint: prealloc
|
|
var indicesInfo []*model.IndexInfo
|
|
for _, col := range tblInfo.Columns {
|
|
if mysql.HasPriKeyFlag(col.GetFlag()) {
|
|
continue
|
|
}
|
|
colsInfo = append(colsInfo, col)
|
|
}
|
|
for _, idx := range tblInfo.Indices {
|
|
if idx.State == model.StatePublic {
|
|
indicesInfo = append(indicesInfo, idx)
|
|
}
|
|
}
|
|
opts := make(map[ast.AnalyzeOptionType]uint64)
|
|
opts[ast.AnalyzeOptNumSamples] = 20
|
|
// Get a start_ts later than the above inserts.
|
|
tk.MustExec("begin")
|
|
txn, err := tk.Session().Txn(false)
|
|
require.NoError(t, err)
|
|
ts := txn.StartTS()
|
|
tk.MustExec("commit")
|
|
mockExec := &executor.AnalyzeTestFastExec{
|
|
Ctx: tk.Session().(sessionctx.Context),
|
|
HandleCols: handleCols,
|
|
ColsInfo: colsInfo,
|
|
IdxsInfo: indicesInfo,
|
|
Concurrency: 1,
|
|
Snapshot: ts,
|
|
TableID: statistics.AnalyzeTableID{
|
|
PartitionID: -1,
|
|
TableID: tbl.(table.PhysicalTable).GetPhysicalID(),
|
|
},
|
|
TblInfo: tblInfo,
|
|
Opts: opts,
|
|
}
|
|
err = mockExec.TestFastSample()
|
|
require.NoError(t, err)
|
|
require.Len(t, mockExec.Collectors, 3)
|
|
for i := 0; i < 2; i++ {
|
|
samples := mockExec.Collectors[i].Samples
|
|
require.Len(t, samples, 20)
|
|
for j := 1; j < 20; j++ {
|
|
cmp, err := samples[j].Value.Compare(tk.Session().GetSessionVars().StmtCtx, &samples[j-1].Value, collate.GetBinaryCollator())
|
|
require.NoError(t, err)
|
|
require.Greater(t, cmp, 0)
|
|
}
|
|
}
|
|
}
|
|
|
|
func TestFastAnalyze(t *testing.T) {
|
|
t.Skip("Skip this unstable test(#25782) and bring it back before 2021-07-29.")
|
|
var cls testutils.Cluster
|
|
store, err := mockstore.NewMockStore(
|
|
mockstore.WithClusterInspector(func(c testutils.Cluster) {
|
|
mockstore.BootstrapWithSingleStore(c)
|
|
cls = c
|
|
}),
|
|
)
|
|
require.NoError(t, err)
|
|
defer func() {
|
|
err := store.Close()
|
|
require.NoError(t, err)
|
|
}()
|
|
var dom *domain.Domain
|
|
session.DisableStats4Test()
|
|
session.SetSchemaLease(0)
|
|
dom, err = session.BootstrapSession(store)
|
|
require.NoError(t, err)
|
|
dom.SetStatsUpdating(true)
|
|
defer dom.Close()
|
|
tk := testkit.NewTestKit(t, store)
|
|
atomic.StoreInt64(&executor.RandSeed, 123)
|
|
|
|
tk.MustExec("use test")
|
|
tk.MustExec("drop table if exists t")
|
|
tk.MustExec("create table t(a int primary key, b int, c char(10), index index_b(b))")
|
|
tk.MustExec("set @@session.tidb_enable_fast_analyze=1")
|
|
tk.MustExec("set @@session.tidb_build_stats_concurrency=1")
|
|
tk.MustExec("set @@tidb_analyze_version = 1")
|
|
// Should not panic.
|
|
tk.MustExec("analyze table t")
|
|
tblInfo, err := dom.InfoSchema().TableByName(model.NewCIStr("test"), model.NewCIStr("t"))
|
|
require.NoError(t, err)
|
|
tid := tblInfo.Meta().ID
|
|
|
|
// construct 6 regions split by {10, 20, 30, 40, 50}
|
|
splitKeys := generateTableSplitKeyForInt(tid, []int{10, 20, 30, 40, 50})
|
|
manipulateCluster(cls, splitKeys)
|
|
|
|
for i := 0; i < 20; i++ {
|
|
tk.MustExec(fmt.Sprintf(`insert into t values (%d, %d, "char")`, i*3, i*3))
|
|
}
|
|
tk.MustExec("analyze table t with 5 buckets, 6 samples")
|
|
|
|
is := tk.Session().(sessionctx.Context).GetInfoSchema().(infoschema.InfoSchema)
|
|
table, err := is.TableByName(model.NewCIStr("test"), model.NewCIStr("t"))
|
|
require.NoError(t, err)
|
|
tableInfo := table.Meta()
|
|
tbl := dom.StatsHandle().GetTableStats(tableInfo)
|
|
// TODO(tangenta): add stats_meta.row_count assertion.
|
|
for _, col := range tbl.Columns {
|
|
ok, err := checkHistogram(tk.Session().GetSessionVars().StmtCtx, &col.Histogram)
|
|
require.NoError(t, err)
|
|
require.True(t, ok)
|
|
}
|
|
for _, idx := range tbl.Indices {
|
|
ok, err := checkHistogram(tk.Session().GetSessionVars().StmtCtx, &idx.Histogram)
|
|
require.NoError(t, err)
|
|
require.True(t, ok)
|
|
}
|
|
|
|
// Test CM Sketch built from fast analyze.
|
|
tk.MustExec("create table t1(a int, b int, index idx(a, b))")
|
|
// Should not panic.
|
|
tk.MustExec("analyze table t1")
|
|
tk.MustExec("insert into t1 values (1,1),(1,1),(1,2),(1,2)")
|
|
tk.MustExec("analyze table t1")
|
|
tk.MustQuery("explain format = 'brief' select a from t1 where a = 1").Check(testkit.Rows(
|
|
"IndexReader 4.00 root index:IndexRangeScan",
|
|
"└─IndexRangeScan 4.00 cop[tikv] table:t1, index:idx(a, b) range:[1,1], keep order:false"))
|
|
tk.MustQuery("explain format = 'brief' select a, b from t1 where a = 1 and b = 1").Check(testkit.Rows(
|
|
"IndexReader 2.00 root index:IndexRangeScan",
|
|
"└─IndexRangeScan 2.00 cop[tikv] table:t1, index:idx(a, b) range:[1 1,1 1], keep order:false"))
|
|
tk.MustQuery("explain format = 'brief' select a, b from t1 where a = 1 and b = 2").Check(testkit.Rows(
|
|
"IndexReader 2.00 root index:IndexRangeScan",
|
|
"└─IndexRangeScan 2.00 cop[tikv] table:t1, index:idx(a, b) range:[1 2,1 2], keep order:false"))
|
|
|
|
tk.MustExec("create table t2 (a bigint unsigned, primary key(a))")
|
|
tk.MustExec("insert into t2 values (0), (18446744073709551615)")
|
|
tk.MustExec("analyze table t2")
|
|
tk.MustQuery("show stats_buckets where table_name = 't2'").Check(testkit.Rows(
|
|
"test t2 a 0 0 1 1 0 0 0",
|
|
"test t2 a 0 1 2 1 18446744073709551615 18446744073709551615 0"))
|
|
|
|
tk.MustExec(`set @@tidb_partition_prune_mode='` + string(variable.Static) + `'`)
|
|
tk.MustExec(`create table t3 (id int, v int, primary key(id), index k(v)) partition by hash (id) partitions 4`)
|
|
tk.MustExec(`insert into t3 values(1, 1), (2, 2), (5, 1), (9, 3), (13, 3), (17, 5), (3, 0)`)
|
|
tk.MustExec(`analyze table t3`)
|
|
tk.MustQuery(`explain format = 'brief' select v from t3 partition(p1) where v = 3`).Check(testkit.Rows(
|
|
"IndexReader 2.00 root index:IndexRangeScan",
|
|
"└─IndexRangeScan 2.00 cop[tikv] table:t3, partition:p1, index:k(v) range:[3,3], keep order:false",
|
|
))
|
|
//nolint:revive,all_revive
|
|
tk.MustExec(`set @@tidb_partition_prune_mode='` + string(variable.Dynamic) + `'`)
|
|
|
|
// global-stats depends on stats-ver2, but stats-ver2 is not compatible with fast-analyze, so forbid using global-stats with fast-analyze now.
|
|
// TODO: add more test cases about global-stats with fast-analyze after resolving the compatibility problem.
|
|
/*
|
|
// test fast analyze in dynamic mode
|
|
tk.MustExec("drop table if exists t4;")
|
|
tk.MustExec("create table t4(a int, b int) PARTITION BY HASH(a) PARTITIONS 2;")
|
|
tk.MustExec("insert into t4 values(1,1),(3,3),(4,4),(2,2),(5,5);")
|
|
// Because the statistics of partition p1 are missing, the construction of global-level stats will fail.
|
|
tk.MustExec("analyze table t4 partition p1;")
|
|
tk.MustQuery("show warnings").Check(testkit.Rows("Warning 8131 Build global-level stats failed due to missing partition-level stats"))
|
|
// Although the global-level stats build failed, we build partition-level stats for partition p1 success.
|
|
result := tk.MustQuery("show stats_meta where table_name = 't4'").Sort()
|
|
c.Assert(len(result.Rows()), Equals, 1)
|
|
c.Assert(result.Rows()[0][5], Equals, "3")
|
|
// Now, we have the partition-level stats for partition p0. We need get the stats for partition p1. And build the global-level stats.
|
|
tk.MustExec("analyze table t4 partition p0;")
|
|
tk.MustQuery("show warnings").Check(testkit.Rows())
|
|
result = tk.MustQuery("show stats_meta where table_name = 't4'").Sort()
|
|
c.Assert(len(result.Rows()), Equals, 3)
|
|
c.Assert(result.Rows()[0][5], Equals, "5")
|
|
c.Assert(result.Rows()[1][5], Equals, "2")
|
|
c.Assert(result.Rows()[2][5], Equals, "3")
|
|
*/
|
|
}
|
|
|
|
func checkHistogram(sc *stmtctx.StatementContext, hg *statistics.Histogram) (bool, error) {
|
|
for i := 0; i < len(hg.Buckets); i++ {
|
|
lower, upper := hg.GetLower(i), hg.GetUpper(i)
|
|
cmp, err := upper.Compare(sc, lower, collate.GetBinaryCollator())
|
|
if cmp < 0 || err != nil {
|
|
return false, err
|
|
}
|
|
if i == 0 {
|
|
continue
|
|
}
|
|
previousUpper := hg.GetUpper(i - 1)
|
|
cmp, err = lower.Compare(sc, previousUpper, collate.GetBinaryCollator())
|
|
if cmp <= 0 || err != nil {
|
|
return false, err
|
|
}
|
|
}
|
|
return true, nil
|
|
}
|
|
|
|
func TestAnalyzeIndexExtractTopN(t *testing.T) {
|
|
_ = checkHistogram
|
|
t.Skip("unstable, skip it and fix it before 20210618")
|
|
store, err := mockstore.NewMockStore()
|
|
require.NoError(t, err)
|
|
defer func() {
|
|
err := store.Close()
|
|
require.NoError(t, err)
|
|
}()
|
|
var dom *domain.Domain
|
|
session.DisableStats4Test()
|
|
session.SetSchemaLease(0)
|
|
dom, err = session.BootstrapSession(store)
|
|
require.NoError(t, err)
|
|
defer dom.Close()
|
|
tk := testkit.NewTestKit(t, store)
|
|
|
|
tk.MustExec("create database test_index_extract_topn")
|
|
tk.MustExec("use test_index_extract_topn")
|
|
tk.MustExec("drop table if exists t")
|
|
tk.MustExec("create table t(a int, b int, index idx(a, b))")
|
|
tk.MustExec("insert into t values(1, 1), (1, 1), (1, 2), (1, 2)")
|
|
tk.MustExec("set @@session.tidb_analyze_version=2")
|
|
tk.MustExec("analyze table t")
|
|
|
|
is := tk.Session().(sessionctx.Context).GetInfoSchema().(infoschema.InfoSchema)
|
|
table, err := is.TableByName(model.NewCIStr("test_index_extract_topn"), model.NewCIStr("t"))
|
|
require.NoError(t, err)
|
|
tableInfo := table.Meta()
|
|
tbl := dom.StatsHandle().GetTableStats(tableInfo)
|
|
|
|
// Construct TopN, should be (1, 1) -> 2 and (1, 2) -> 2
|
|
topn := statistics.NewTopN(2)
|
|
{
|
|
key1, err := codec.EncodeKey(tk.Session().GetSessionVars().StmtCtx, nil, types.NewIntDatum(1), types.NewIntDatum(1))
|
|
require.NoError(t, err)
|
|
topn.AppendTopN(key1, 2)
|
|
key2, err := codec.EncodeKey(tk.Session().GetSessionVars().StmtCtx, nil, types.NewIntDatum(1), types.NewIntDatum(2))
|
|
require.NoError(t, err)
|
|
topn.AppendTopN(key2, 2)
|
|
}
|
|
for _, idx := range tbl.Indices {
|
|
ok, err := checkHistogram(tk.Session().GetSessionVars().StmtCtx, &idx.Histogram)
|
|
require.NoError(t, err)
|
|
require.True(t, ok)
|
|
require.True(t, idx.TopN.Equal(topn))
|
|
}
|
|
}
|
|
|
|
func TestAnalyzePartitionTableForFloat(t *testing.T) {
|
|
store := testkit.CreateMockStore(t)
|
|
tk := testkit.NewTestKit(t, store)
|
|
tk.MustExec("set @@tidb_partition_prune_mode='dynamic'")
|
|
tk.MustExec("use test")
|
|
tk.MustExec("CREATE TABLE t1 ( id bigint(20) unsigned NOT NULL AUTO_INCREMENT, num float(9,8) DEFAULT NULL, PRIMARY KEY (id) ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_bin PARTITION BY HASH (id) PARTITIONS 128;")
|
|
// To reproduce the error we meet in https://github.com/pingcap/tidb/issues/35910, we should use the data provided in this issue
|
|
b, err := ioutil.ReadFile("testdata/analyze_test_data.sql")
|
|
require.NoError(t, err)
|
|
sqls := strings.Split(string(b), ";")
|
|
for _, sql := range sqls {
|
|
if len(sql) < 1 {
|
|
continue
|
|
}
|
|
tk.MustExec(sql)
|
|
}
|
|
tk.MustExec("analyze table t1")
|
|
}
|
|
|
|
func TestAnalyzePartitionTableByConcurrencyInDynamic(t *testing.T) {
|
|
store := testkit.CreateMockStore(t)
|
|
tk := testkit.NewTestKit(t, store)
|
|
tk.MustExec("set @@tidb_partition_prune_mode='dynamic'")
|
|
tk.MustExec("use test")
|
|
tk.MustExec("create table t(id int) partition by hash(id) partitions 4")
|
|
testcases := []struct {
|
|
concurrency string
|
|
}{
|
|
{
|
|
concurrency: "1",
|
|
},
|
|
{
|
|
concurrency: "2",
|
|
},
|
|
{
|
|
concurrency: "3",
|
|
},
|
|
{
|
|
concurrency: "4",
|
|
},
|
|
{
|
|
concurrency: "5",
|
|
},
|
|
}
|
|
// assert empty table
|
|
for _, tc := range testcases {
|
|
concurrency := tc.concurrency
|
|
fmt.Println("testcase ", concurrency)
|
|
tk.MustExec(fmt.Sprintf("set @@tidb_merge_partition_stats_concurrency=%v", concurrency))
|
|
tk.MustQuery("select @@tidb_merge_partition_stats_concurrency").Check(testkit.Rows(concurrency))
|
|
tk.MustExec(fmt.Sprintf("set @@tidb_analyze_partition_concurrency=%v", concurrency))
|
|
tk.MustQuery("select @@tidb_analyze_partition_concurrency").Check(testkit.Rows(concurrency))
|
|
|
|
tk.MustExec("analyze table t")
|
|
tk.MustQuery("show stats_topn where partition_name = 'global' and table_name = 't'")
|
|
}
|
|
|
|
for i := 1; i <= 500; i++ {
|
|
for j := 1; j <= 20; j++ {
|
|
tk.MustExec(fmt.Sprintf("insert into t (id) values (%v)", j))
|
|
}
|
|
}
|
|
var expected [][]interface{}
|
|
for i := 1; i <= 20; i++ {
|
|
expected = append(expected, []interface{}{
|
|
strconv.FormatInt(int64(i), 10), "500",
|
|
})
|
|
}
|
|
testcases = []struct {
|
|
concurrency string
|
|
}{
|
|
{
|
|
concurrency: "1",
|
|
},
|
|
{
|
|
concurrency: "2",
|
|
},
|
|
{
|
|
concurrency: "3",
|
|
},
|
|
{
|
|
concurrency: "4",
|
|
},
|
|
{
|
|
concurrency: "5",
|
|
},
|
|
}
|
|
for _, tc := range testcases {
|
|
concurrency := tc.concurrency
|
|
fmt.Println("testcase ", concurrency)
|
|
tk.MustExec(fmt.Sprintf("set @@tidb_merge_partition_stats_concurrency=%v", concurrency))
|
|
tk.MustQuery("select @@tidb_merge_partition_stats_concurrency").Check(testkit.Rows(concurrency))
|
|
tk.MustExec("analyze table t")
|
|
tk.MustQuery("show stats_topn where partition_name = 'global' and table_name = 't'").CheckAt([]int{5, 6}, expected)
|
|
}
|
|
}
|
|
|
|
func TestMergeGlobalStatsWithUnAnalyzedPartition(t *testing.T) {
|
|
store := testkit.CreateMockStore(t)
|
|
tk := testkit.NewTestKit(t, store)
|
|
tk.MustExec("use test")
|
|
tk.MustExec("set tidb_partition_prune_mode=dynamic;")
|
|
tk.MustExec("CREATE TABLE `t` ( `id` int(11) DEFAULT NULL, `a` int(11) DEFAULT NULL, `b` int(11) DEFAULT NULL, `c` int(11) DEFAULT NULL ) PARTITION BY RANGE (`id`) (PARTITION `p0` VALUES LESS THAN (3), PARTITION `p1` VALUES LESS THAN (7), PARTITION `p2` VALUES LESS THAN (11));")
|
|
tk.MustExec("insert into t values (1,1,1,1),(2,2,2,2),(4,4,4,4),(5,5,5,5),(6,6,6,6),(8,8,8,8),(9,9,9,9);")
|
|
tk.MustExec("create index idxa on t (a);")
|
|
tk.MustExec("create index idxb on t (b);")
|
|
tk.MustExec("create index idxc on t (c);")
|
|
tk.MustExec("analyze table t partition p0 index idxa;")
|
|
tk.MustExec("analyze table t partition p1 index idxb;")
|
|
tk.MustExec("analyze table t partition p2 index idxc;")
|
|
tk.MustQuery("show warnings").Check(testkit.Rows(
|
|
"Warning 1105 The version 2 would collect all statistics not only the selected indexes",
|
|
"Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.t's partition p2"))
|
|
tk.MustExec("analyze table t partition p0;")
|
|
tk.MustQuery("show warnings").Check(testkit.Rows(
|
|
"Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.t's partition p0"))
|
|
}
|