Files
tidb/statistics/histogram_test.go

492 lines
15 KiB
Go

// Copyright 2018 PingCAP, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package statistics
import (
"fmt"
"testing"
"github.com/pingcap/tidb/parser/model"
"github.com/pingcap/tidb/parser/mysql"
"github.com/pingcap/tidb/types"
"github.com/pingcap/tidb/util/chunk"
"github.com/pingcap/tidb/util/codec"
"github.com/pingcap/tidb/util/collate"
"github.com/pingcap/tidb/util/mock"
"github.com/pingcap/tidb/util/ranger"
"github.com/stretchr/testify/require"
)
func TestNewHistogramBySelectivity(t *testing.T) {
coll := &HistColl{
Count: 330,
Columns: make(map[int64]*Column),
Indices: make(map[int64]*Index),
}
ctx := mock.NewContext()
sc := ctx.GetSessionVars().StmtCtx
intCol := &Column{}
intCol.Histogram = *NewHistogram(1, 30, 30, 0, types.NewFieldType(mysql.TypeLonglong), chunk.InitialCapacity, 0)
intCol.IsHandle = true
intCol.StatsLoadedStatus = NewStatsFullLoadStatus()
for i := 0; i < 10; i++ {
intCol.Bounds.AppendInt64(0, int64(i*3))
intCol.Bounds.AppendInt64(0, int64(i*3+2))
intCol.Buckets = append(intCol.Buckets, Bucket{Repeat: 10, Count: int64(30*i + 30)})
}
coll.Columns[1] = intCol
node := &StatsNode{ID: 1, Tp: PkType, Selectivity: 0.56}
node.Ranges = append(node.Ranges, &ranger.Range{LowVal: types.MakeDatums(nil), HighVal: types.MakeDatums(nil), Collators: collate.GetBinaryCollatorSlice(1)})
node.Ranges = append(node.Ranges, &ranger.Range{LowVal: []types.Datum{types.MinNotNullDatum()}, HighVal: types.MakeDatums(2), Collators: collate.GetBinaryCollatorSlice(1)})
node.Ranges = append(node.Ranges, &ranger.Range{LowVal: types.MakeDatums(5), HighVal: types.MakeDatums(6), Collators: collate.GetBinaryCollatorSlice(1)})
node.Ranges = append(node.Ranges, &ranger.Range{LowVal: types.MakeDatums(8), HighVal: types.MakeDatums(10), Collators: collate.GetBinaryCollatorSlice(1)})
node.Ranges = append(node.Ranges, &ranger.Range{LowVal: types.MakeDatums(13), HighVal: types.MakeDatums(13), Collators: collate.GetBinaryCollatorSlice(1)})
node.Ranges = append(node.Ranges, &ranger.Range{LowVal: types.MakeDatums(25), HighVal: []types.Datum{types.MaxValueDatum()}, Collators: collate.GetBinaryCollatorSlice(1)})
intColResult := `column:1 ndv:16 totColSize:0
num: 30 lower_bound: 0 upper_bound: 2 repeats: 10 ndv: 0
num: 11 lower_bound: 6 upper_bound: 8 repeats: 0 ndv: 0
num: 30 lower_bound: 9 upper_bound: 11 repeats: 0 ndv: 0
num: 1 lower_bound: 12 upper_bound: 14 repeats: 0 ndv: 0
num: 30 lower_bound: 27 upper_bound: 29 repeats: 0 ndv: 0`
stringCol := &Column{}
stringCol.StatsLoadedStatus = NewStatsFullLoadStatus()
stringCol.Histogram = *NewHistogram(2, 15, 30, 0, types.NewFieldType(mysql.TypeString), chunk.InitialCapacity, 0)
stringCol.Bounds.AppendString(0, "a")
stringCol.Bounds.AppendString(0, "aaaabbbb")
stringCol.Buckets = append(stringCol.Buckets, Bucket{Repeat: 10, Count: 60})
stringCol.Bounds.AppendString(0, "bbbb")
stringCol.Bounds.AppendString(0, "fdsfdsfds")
stringCol.Buckets = append(stringCol.Buckets, Bucket{Repeat: 10, Count: 120})
stringCol.Bounds.AppendString(0, "kkkkk")
stringCol.Bounds.AppendString(0, "ooooo")
stringCol.Buckets = append(stringCol.Buckets, Bucket{Repeat: 10, Count: 180})
stringCol.Bounds.AppendString(0, "oooooo")
stringCol.Bounds.AppendString(0, "sssss")
stringCol.Buckets = append(stringCol.Buckets, Bucket{Repeat: 10, Count: 240})
stringCol.Bounds.AppendString(0, "ssssssu")
stringCol.Bounds.AppendString(0, "yyyyy")
stringCol.Buckets = append(stringCol.Buckets, Bucket{Repeat: 10, Count: 300})
stringCol.PreCalculateScalar()
coll.Columns[2] = stringCol
node2 := &StatsNode{ID: 2, Tp: ColType, Selectivity: 0.6}
node2.Ranges = append(node2.Ranges, &ranger.Range{LowVal: types.MakeDatums(nil), HighVal: types.MakeDatums(nil), Collators: collate.GetBinaryCollatorSlice(1)})
node2.Ranges = append(node2.Ranges, &ranger.Range{LowVal: []types.Datum{types.MinNotNullDatum()}, HighVal: types.MakeDatums("aaa"), Collators: collate.GetBinaryCollatorSlice(1)})
node2.Ranges = append(node2.Ranges, &ranger.Range{LowVal: types.MakeDatums("aaaaaaaaaaa"), HighVal: types.MakeDatums("aaaaaaaaaaaaaa"), Collators: collate.GetBinaryCollatorSlice(1)})
node2.Ranges = append(node2.Ranges, &ranger.Range{LowVal: types.MakeDatums("bbb"), HighVal: types.MakeDatums("cccc"), Collators: collate.GetBinaryCollatorSlice(1)})
node2.Ranges = append(node2.Ranges, &ranger.Range{LowVal: types.MakeDatums("ddd"), HighVal: types.MakeDatums("fff"), Collators: collate.GetBinaryCollatorSlice(1)})
node2.Ranges = append(node2.Ranges, &ranger.Range{LowVal: types.MakeDatums("ggg"), HighVal: []types.Datum{types.MaxValueDatum()}, Collators: collate.GetBinaryCollatorSlice(1)})
stringColResult := `column:2 ndv:9 totColSize:0
num: 60 lower_bound: a upper_bound: aaaabbbb repeats: 0 ndv: 0
num: 52 lower_bound: bbbb upper_bound: fdsfdsfds repeats: 0 ndv: 0
num: 54 lower_bound: kkkkk upper_bound: ooooo repeats: 0 ndv: 0
num: 60 lower_bound: oooooo upper_bound: sssss repeats: 0 ndv: 0
num: 60 lower_bound: ssssssu upper_bound: yyyyy repeats: 0 ndv: 0`
newColl := coll.NewHistCollBySelectivity(ctx, []*StatsNode{node, node2})
require.Equal(t, intColResult, newColl.Columns[1].String())
require.Equal(t, stringColResult, newColl.Columns[2].String())
idx := &Index{Info: &model.IndexInfo{Columns: []*model.IndexColumn{{Name: model.NewCIStr("a"), Offset: 0}}}}
coll.Indices[0] = idx
idx.Histogram = *NewHistogram(0, 15, 0, 0, types.NewFieldType(mysql.TypeBlob), 0, 0)
for i := 0; i < 5; i++ {
low, err1 := codec.EncodeKey(sc, nil, types.NewIntDatum(int64(i*3)))
require.NoError(t, err1)
high, err2 := codec.EncodeKey(sc, nil, types.NewIntDatum(int64(i*3+2)))
require.NoError(t, err2)
idx.Bounds.AppendBytes(0, low)
idx.Bounds.AppendBytes(0, high)
idx.Buckets = append(idx.Buckets, Bucket{Repeat: 10, Count: int64(30*i + 30)})
}
idx.PreCalculateScalar()
node3 := &StatsNode{ID: 0, Tp: IndexType, Selectivity: 0.47}
node3.Ranges = append(node3.Ranges, &ranger.Range{LowVal: types.MakeDatums(2), HighVal: types.MakeDatums(3), Collators: collate.GetBinaryCollatorSlice(1)})
node3.Ranges = append(node3.Ranges, &ranger.Range{LowVal: types.MakeDatums(10), HighVal: types.MakeDatums(13), Collators: collate.GetBinaryCollatorSlice(1)})
idxResult := `index:0 ndv:7
num: 30 lower_bound: 0 upper_bound: 2 repeats: 10 ndv: 0
num: 30 lower_bound: 3 upper_bound: 5 repeats: 10 ndv: 0
num: 30 lower_bound: 9 upper_bound: 11 repeats: 10 ndv: 0
num: 30 lower_bound: 12 upper_bound: 14 repeats: 10 ndv: 0`
newColl = coll.NewHistCollBySelectivity(ctx, []*StatsNode{node3})
require.Equal(t, idxResult, newColl.Indices[0].String())
}
func TestTruncateHistogram(t *testing.T) {
hist := NewHistogram(0, 0, 0, 0, types.NewFieldType(mysql.TypeLonglong), 1, 0)
low, high := types.NewIntDatum(0), types.NewIntDatum(1)
hist.AppendBucket(&low, &high, 0, 1)
newHist := hist.TruncateHistogram(1)
require.True(t, HistogramEqual(hist, newHist, true))
newHist = hist.TruncateHistogram(0)
require.Equal(t, 0, newHist.Len())
}
func TestValueToString4InvalidKey(t *testing.T) {
bytes, err := codec.EncodeKey(nil, nil, types.NewDatum(1), types.NewDatum(0.5))
require.NoError(t, err)
// Append invalid flag.
bytes = append(bytes, 20)
datum := types.NewDatum(bytes)
res, err := ValueToString(nil, &datum, 3, nil)
require.NoError(t, err)
require.Equal(t, "(1, 0.5, \x14)", res)
}
type bucket4Test struct {
lower int64
upper int64
count int64
repeat int64
ndv int64
}
type topN4Test struct {
data int64
count int64
}
func genHist4Test(t *testing.T, buckets []*bucket4Test, totColSize int64) *Histogram {
h := NewHistogram(0, 0, 0, 0, types.NewFieldType(mysql.TypeBlob), len(buckets), totColSize)
for _, bucket := range buckets {
lower, err := codec.EncodeKey(nil, nil, types.NewIntDatum(bucket.lower))
require.NoError(t, err)
upper, err := codec.EncodeKey(nil, nil, types.NewIntDatum(bucket.upper))
require.NoError(t, err)
di, du := types.NewBytesDatum(lower), types.NewBytesDatum(upper)
h.AppendBucketWithNDV(&di, &du, bucket.count, bucket.repeat, bucket.ndv)
}
return h
}
func TestMergePartitionLevelHist(t *testing.T) {
type testCase struct {
partitionHists [][]*bucket4Test
totColSize []int64
popedTopN []topN4Test
expHist []*bucket4Test
expBucketNumber int64
}
tests := []testCase{
{
partitionHists: [][]*bucket4Test{
{
// Col(1) = [1, 4,|| 6, 9, 9,|| 12, 12, 12,|| 13, 14, 15]
{
lower: 1,
upper: 4,
count: 2,
repeat: 1,
ndv: 2,
},
{
lower: 6,
upper: 9,
count: 5,
repeat: 2,
ndv: 2,
},
{
lower: 12,
upper: 12,
count: 8,
repeat: 3,
ndv: 1,
},
{
lower: 13,
upper: 15,
count: 11,
repeat: 1,
ndv: 3,
},
},
// Col(2) = [2, 5,|| 6, 7, 7,|| 11, 11, 11,|| 13, 14, 17]
{
{
lower: 2,
upper: 5,
count: 2,
repeat: 1,
ndv: 2,
},
{
lower: 6,
upper: 7,
count: 5,
repeat: 2,
ndv: 2,
},
{
lower: 11,
upper: 11,
count: 8,
repeat: 3,
ndv: 1,
},
{
lower: 13,
upper: 17,
count: 11,
repeat: 1,
ndv: 3,
},
},
},
totColSize: []int64{11, 11},
popedTopN: []topN4Test{},
expHist: []*bucket4Test{
{
lower: 1,
upper: 7,
count: 7,
repeat: 3,
ndv: 5,
},
{
lower: 7,
upper: 11,
count: 13,
repeat: 3,
ndv: 3,
},
{
lower: 11,
upper: 17,
count: 22,
repeat: 1,
ndv: 6,
},
},
expBucketNumber: 3,
},
{
partitionHists: [][]*bucket4Test{
{
// Col(1) = [1, 4,|| 6, 9, 9,|| 12, 12, 12,|| 13, 14, 15]
{
lower: 1,
upper: 4,
count: 2,
repeat: 1,
ndv: 2,
},
{
lower: 6,
upper: 9,
count: 5,
repeat: 2,
ndv: 2,
},
{
lower: 12,
upper: 12,
count: 8,
repeat: 3,
ndv: 1,
},
{
lower: 13,
upper: 15,
count: 11,
repeat: 1,
ndv: 3,
},
},
// Col(2) = [2, 5,|| 6, 7, 7,|| 11, 11, 11,|| 13, 14, 17]
{
{
lower: 2,
upper: 5,
count: 2,
repeat: 1,
ndv: 2,
},
{
lower: 6,
upper: 7,
count: 5,
repeat: 2,
ndv: 2,
},
{
lower: 11,
upper: 11,
count: 8,
repeat: 3,
ndv: 1,
},
{
lower: 13,
upper: 17,
count: 11,
repeat: 1,
ndv: 3,
},
},
},
totColSize: []int64{11, 11},
popedTopN: []topN4Test{
{
data: 18,
count: 5,
},
{
data: 4,
count: 6,
},
},
expHist: []*bucket4Test{
{
lower: 1,
upper: 5,
count: 10,
repeat: 1,
ndv: 3,
},
{
lower: 5,
upper: 12,
count: 22,
repeat: 3,
ndv: 6,
},
{
lower: 12,
upper: 18,
count: 33,
repeat: 5,
ndv: 6,
},
},
expBucketNumber: 3,
},
}
for _, tt := range tests {
var expTotColSize int64
hists := make([]*Histogram, 0, len(tt.partitionHists))
for i := range tt.partitionHists {
hists = append(hists, genHist4Test(t, tt.partitionHists[i], tt.totColSize[i]))
expTotColSize += tt.totColSize[i]
}
ctx := mock.NewContext()
sc := ctx.GetSessionVars().StmtCtx
poped := make([]TopNMeta, 0, len(tt.popedTopN))
for _, top := range tt.popedTopN {
b, err := codec.EncodeKey(sc, nil, types.NewIntDatum(top.data))
require.NoError(t, err)
tmp := TopNMeta{
Encoded: b,
Count: uint64(top.count),
}
poped = append(poped, tmp)
}
globalHist, err := MergePartitionHist2GlobalHist(sc, hists, poped, tt.expBucketNumber, true)
require.NoError(t, err)
for i, b := range tt.expHist {
lo, err := ValueToString(ctx.GetSessionVars(), globalHist.GetLower(i), 1, []byte{types.KindInt64})
require.NoError(t, err)
up, err := ValueToString(ctx.GetSessionVars(), globalHist.GetUpper(i), 1, []byte{types.KindInt64})
require.NoError(t, err)
require.Equal(t, lo, fmt.Sprintf("%v", b.lower))
require.Equal(t, up, fmt.Sprintf("%v", b.upper))
require.Equal(t, globalHist.Buckets[i].Count, b.count)
require.Equal(t, globalHist.Buckets[i].Repeat, b.repeat)
require.Equal(t, globalHist.Buckets[i].NDV, b.ndv)
}
require.Equal(t, expTotColSize, globalHist.TotColSize)
}
}
func genBucket4Merging4Test(lower, upper, ndv, disjointNDV int64) bucket4Merging {
l := types.NewIntDatum(lower)
r := types.NewIntDatum(upper)
return bucket4Merging{
lower: &l,
upper: &r,
Bucket: Bucket{
NDV: ndv,
},
disjointNDV: disjointNDV,
}
}
func TestMergeBucketNDV(t *testing.T) {
type testData struct {
left bucket4Merging
right bucket4Merging
result bucket4Merging
}
tests := []testData{
{
left: genBucket4Merging4Test(1, 2, 2, 0),
right: genBucket4Merging4Test(1, 2, 3, 0),
result: genBucket4Merging4Test(1, 2, 3, 0),
},
{
left: genBucket4Merging4Test(1, 3, 2, 0),
right: genBucket4Merging4Test(2, 3, 2, 0),
result: genBucket4Merging4Test(1, 3, 3, 0),
},
{
left: genBucket4Merging4Test(1, 3, 2, 0),
right: genBucket4Merging4Test(4, 6, 2, 2),
result: genBucket4Merging4Test(1, 3, 2, 4),
},
{
left: genBucket4Merging4Test(1, 5, 5, 0),
right: genBucket4Merging4Test(2, 6, 5, 0),
result: genBucket4Merging4Test(1, 6, 6, 0),
},
{
left: genBucket4Merging4Test(3, 5, 3, 0),
right: genBucket4Merging4Test(2, 6, 4, 0),
result: genBucket4Merging4Test(2, 6, 5, 0),
},
}
sc := mock.NewContext().GetSessionVars().StmtCtx
for _, tt := range tests {
res, err := mergeBucketNDV(sc, &tt.left, &tt.right)
require.NoError(t, err)
require.Equal(t, res.lower.GetInt64(), tt.result.lower.GetInt64())
require.Equal(t, res.upper.GetInt64(), tt.result.upper.GetInt64())
require.Equal(t, res.NDV, tt.result.NDV)
require.Equal(t, res.disjointNDV, tt.result.disjointNDV)
}
}
func TestIndexQueryBytes(t *testing.T) {
ctx := mock.NewContext()
sc := ctx.GetSessionVars().StmtCtx
idx := &Index{Info: &model.IndexInfo{Columns: []*model.IndexColumn{{Name: model.NewCIStr("a"), Offset: 0}}}}
idx.Histogram = *NewHistogram(0, 15, 0, 0, types.NewFieldType(mysql.TypeBlob), 0, 0)
low, err1 := codec.EncodeKey(sc, nil, types.NewBytesDatum([]byte("0")))
require.NoError(t, err1)
high, err2 := codec.EncodeKey(sc, nil, types.NewBytesDatum([]byte("3")))
require.NoError(t, err2)
idx.Bounds.AppendBytes(0, low)
idx.Bounds.AppendBytes(0, high)
idx.Buckets = append(idx.Buckets, Bucket{Repeat: 10, Count: 20, NDV: 20})
idx.PreCalculateScalar()
idx.CMSketch = nil
// Count / NDV
require.Equal(t, idx.QueryBytes(low), uint64(1))
// Repeat
require.Equal(t, idx.QueryBytes(high), uint64(10))
}