311 lines
11 KiB
Go
311 lines
11 KiB
Go
// Copyright 2018 PingCAP, Inc.
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
package statistics
|
|
|
|
import (
|
|
"bytes"
|
|
"testing"
|
|
|
|
"github.com/pingcap/log"
|
|
"github.com/pingcap/tidb/parser/mysql"
|
|
"github.com/pingcap/tidb/types"
|
|
"github.com/pingcap/tidb/util/codec"
|
|
"github.com/stretchr/testify/require"
|
|
"go.uber.org/zap"
|
|
)
|
|
|
|
func newFeedback(lower, upper, count, ndv int64) Feedback {
|
|
low, upp := types.NewIntDatum(lower), types.NewIntDatum(upper)
|
|
return Feedback{&low, &upp, count, 0, ndv}
|
|
}
|
|
|
|
func genFeedbacks(lower, upper int64) []Feedback {
|
|
var feedbacks []Feedback
|
|
for i := lower; i < upper; i++ {
|
|
feedbacks = append(feedbacks, newFeedback(i, upper, upper-i+1, upper-i+1))
|
|
}
|
|
return feedbacks
|
|
}
|
|
|
|
func appendBucket(h *Histogram, l, r int64) {
|
|
lower, upper := types.NewIntDatum(l), types.NewIntDatum(r)
|
|
h.AppendBucketWithNDV(&lower, &upper, 0, 0, 0)
|
|
}
|
|
|
|
func genHistogram() *Histogram {
|
|
h := NewHistogram(0, 0, 0, 0, types.NewFieldType(mysql.TypeLong), 5, 0)
|
|
appendBucket(h, 1, 1)
|
|
appendBucket(h, 2, 3)
|
|
appendBucket(h, 5, 7)
|
|
appendBucket(h, 10, 20)
|
|
appendBucket(h, 30, 50)
|
|
return h
|
|
}
|
|
|
|
func TestUpdateHistogram(t *testing.T) {
|
|
feedbacks := []Feedback{
|
|
newFeedback(0, 1, 10000, 1),
|
|
newFeedback(1, 2, 1, 1),
|
|
newFeedback(2, 3, 3, 1),
|
|
newFeedback(4, 5, 2, 1),
|
|
newFeedback(5, 7, 4, 1),
|
|
}
|
|
feedbacks = append(feedbacks, genFeedbacks(8, 20)...)
|
|
feedbacks = append(feedbacks, genFeedbacks(21, 60)...)
|
|
|
|
q := NewQueryFeedback(0, genHistogram(), 0, false)
|
|
q.Feedback = feedbacks
|
|
require.Equal(t,
|
|
"column:0 ndv:10053 totColSize:0\n"+
|
|
"num: 10001 lower_bound: 0 upper_bound: 2 repeats: 0 ndv: 2\n"+
|
|
"num: 7 lower_bound: 2 upper_bound: 5 repeats: 0 ndv: 2\n"+
|
|
"num: 4 lower_bound: 5 upper_bound: 7 repeats: 0 ndv: 1\n"+
|
|
"num: 11 lower_bound: 10 upper_bound: 20 repeats: 0 ndv: 11\n"+
|
|
"num: 19 lower_bound: 30 upper_bound: 49 repeats: 0 ndv: 19\n"+
|
|
"num: 11 lower_bound: 50 upper_bound: 60 repeats: 0 ndv: 11",
|
|
UpdateHistogramWithBucketCount(q.Hist, q, Version2, 7).ToString(0))
|
|
}
|
|
|
|
func TestSplitBuckets(t *testing.T) {
|
|
// test bucket split
|
|
feedbacks := []Feedback{newFeedback(0, 1, 1, 1)}
|
|
for i := 0; i < 100; i++ {
|
|
feedbacks = append(feedbacks, newFeedback(10, 15, 5, 5))
|
|
}
|
|
q := NewQueryFeedback(0, genHistogram(), 0, false)
|
|
q.Feedback = feedbacks
|
|
oldCnts := make([]int64, q.Hist.Len())
|
|
for i := range q.Hist.Buckets {
|
|
oldCnts[i] = q.Hist.bucketCount(i)
|
|
}
|
|
oldNdvs := make([]int64, q.Hist.Len())
|
|
for i := range q.Hist.Buckets {
|
|
oldNdvs[i] = q.Hist.Buckets[i].NDV
|
|
}
|
|
log.Warn("in test", zap.Int64s("ndvs", oldNdvs), zap.Int64s("cnts", oldCnts))
|
|
buckets, isNewBuckets, totalCount := splitBuckets(q.Hist, q, defaultBucketCount)
|
|
ndvs := make([]int64, len(buckets))
|
|
for i := range buckets {
|
|
ndvs[i] = buckets[i].Ndv
|
|
}
|
|
log.Warn("in test", zap.Int64s("ndvs", ndvs))
|
|
require.Equal(t,
|
|
"column:0 ndv:0 totColSize:0\n"+
|
|
"num: 1 lower_bound: 0 upper_bound: 1 repeats: 0 ndv: 1\n"+
|
|
"num: 0 lower_bound: 2 upper_bound: 3 repeats: 0 ndv: 0\n"+
|
|
"num: 0 lower_bound: 5 upper_bound: 7 repeats: 0 ndv: 0\n"+
|
|
"num: 5 lower_bound: 10 upper_bound: 15 repeats: 0 ndv: 5\n"+
|
|
"num: 0 lower_bound: 16 upper_bound: 20 repeats: 0 ndv: 0\n"+
|
|
"num: 0 lower_bound: 30 upper_bound: 50 repeats: 0 ndv: 0",
|
|
buildNewHistogram(q.Hist, buckets).ToString(0))
|
|
require.Equal(t, []bool{false, false, false, true, true, false}, isNewBuckets)
|
|
require.Equal(t, int64(6), totalCount)
|
|
|
|
// test do not split if the bucket count is too small
|
|
feedbacks = []Feedback{newFeedback(0, 1, 100000, 1)}
|
|
for i := 0; i < 100; i++ {
|
|
feedbacks = append(feedbacks, newFeedback(10, 15, 1, 1))
|
|
}
|
|
q = NewQueryFeedback(0, genHistogram(), 0, false)
|
|
q.Feedback = feedbacks
|
|
buckets, isNewBuckets, totalCount = splitBuckets(q.Hist, q, defaultBucketCount)
|
|
require.Equal(t,
|
|
"column:0 ndv:0 totColSize:0\n"+
|
|
"num: 100000 lower_bound: 0 upper_bound: 1 repeats: 0 ndv: 1\n"+
|
|
"num: 0 lower_bound: 2 upper_bound: 3 repeats: 0 ndv: 0\n"+
|
|
"num: 0 lower_bound: 5 upper_bound: 7 repeats: 0 ndv: 0\n"+
|
|
"num: 1 lower_bound: 10 upper_bound: 15 repeats: 0 ndv: 1\n"+
|
|
"num: 0 lower_bound: 16 upper_bound: 20 repeats: 0 ndv: 0\n"+
|
|
"num: 0 lower_bound: 30 upper_bound: 50 repeats: 0 ndv: 0",
|
|
buildNewHistogram(q.Hist, buckets).ToString(0))
|
|
require.Equal(t, []bool{false, false, false, true, true, false}, isNewBuckets)
|
|
require.Equal(t, int64(100001), totalCount)
|
|
|
|
// test do not split if the result bucket count is too small
|
|
h := NewHistogram(0, 0, 0, 0, types.NewFieldType(mysql.TypeLong), 5, 0)
|
|
appendBucket(h, 0, 1000000)
|
|
h.Buckets[0].Count = 1000000
|
|
h.Buckets[0].NDV = 1000000
|
|
feedbacks = feedbacks[:0]
|
|
for i := 0; i < 100; i++ {
|
|
feedbacks = append(feedbacks, newFeedback(0, 10, 1, 1))
|
|
}
|
|
q = NewQueryFeedback(0, h, 0, false)
|
|
q.Feedback = feedbacks
|
|
buckets, isNewBuckets, totalCount = splitBuckets(q.Hist, q, defaultBucketCount)
|
|
require.Equal(t,
|
|
"column:0 ndv:0 totColSize:0\n"+
|
|
"num: 1000000 lower_bound: 0 upper_bound: 1000000 repeats: 0 ndv: 1000000",
|
|
buildNewHistogram(q.Hist, buckets).ToString(0))
|
|
require.Equal(t, []bool{false}, isNewBuckets)
|
|
require.Equal(t, int64(1000000), totalCount)
|
|
|
|
// test split even if the feedback range is too small
|
|
h = NewHistogram(0, 0, 0, 0, types.NewFieldType(mysql.TypeLong), 5, 0)
|
|
appendBucket(h, 0, 1000000)
|
|
feedbacks = feedbacks[:0]
|
|
for i := 0; i < 100; i++ {
|
|
feedbacks = append(feedbacks, newFeedback(0, 10, 1, 1))
|
|
}
|
|
q = NewQueryFeedback(0, h, 0, false)
|
|
q.Feedback = feedbacks
|
|
buckets, isNewBuckets, totalCount = splitBuckets(q.Hist, q, defaultBucketCount)
|
|
require.Equal(t,
|
|
"column:0 ndv:0 totColSize:0\n"+
|
|
"num: 1 lower_bound: 0 upper_bound: 10 repeats: 0 ndv: 1\n"+
|
|
"num: 0 lower_bound: 11 upper_bound: 1000000 repeats: 0 ndv: 0",
|
|
buildNewHistogram(q.Hist, buckets).ToString(0))
|
|
require.Equal(t, []bool{true, true}, isNewBuckets)
|
|
require.Equal(t, int64(1), totalCount)
|
|
|
|
// test merge the non-overlapped feedbacks.
|
|
h = NewHistogram(0, 0, 0, 0, types.NewFieldType(mysql.TypeLong), 5, 0)
|
|
appendBucket(h, 0, 10000)
|
|
feedbacks = feedbacks[:0]
|
|
feedbacks = append(feedbacks, newFeedback(0, 4000, 4000, 4000))
|
|
feedbacks = append(feedbacks, newFeedback(4001, 9999, 1000, 1000))
|
|
q = NewQueryFeedback(0, h, 0, false)
|
|
q.Feedback = feedbacks
|
|
buckets, isNewBuckets, totalCount = splitBuckets(q.Hist, q, defaultBucketCount)
|
|
require.Equal(t,
|
|
"column:0 ndv:0 totColSize:0\n"+
|
|
"num: 5001 lower_bound: 0 upper_bound: 10000 repeats: 0 ndv: 5001",
|
|
buildNewHistogram(q.Hist, buckets).ToString(0))
|
|
require.Equal(t, []bool{false}, isNewBuckets)
|
|
require.Equal(t, int64(5001), totalCount)
|
|
}
|
|
|
|
func TestMergeBuckets(t *testing.T) {
|
|
tests := []struct {
|
|
points []int64
|
|
counts []int64
|
|
ndvs []int64
|
|
isNewBuckets []bool
|
|
bucketCount int
|
|
result string
|
|
}{
|
|
{
|
|
points: []int64{1, 2},
|
|
counts: []int64{1},
|
|
ndvs: []int64{1},
|
|
isNewBuckets: []bool{false},
|
|
bucketCount: 1,
|
|
result: "column:0 ndv:0 totColSize:0\nnum: 1 lower_bound: 1 upper_bound: 2 repeats: 0 ndv: 1",
|
|
},
|
|
{
|
|
points: []int64{1, 2, 2, 3, 3, 4},
|
|
counts: []int64{100000, 1, 1},
|
|
ndvs: []int64{1, 1, 1},
|
|
isNewBuckets: []bool{false, false, false},
|
|
bucketCount: 2,
|
|
result: "column:0 ndv:0 totColSize:0\n" +
|
|
"num: 100000 lower_bound: 1 upper_bound: 2 repeats: 0 ndv: 1\n" +
|
|
"num: 2 lower_bound: 2 upper_bound: 4 repeats: 0 ndv: 2",
|
|
},
|
|
// test do not Merge if the result bucket count is too large
|
|
{
|
|
points: []int64{1, 2, 2, 3, 3, 4, 4, 5},
|
|
counts: []int64{1, 1, 100000, 100000},
|
|
ndvs: []int64{1, 1, 1, 1},
|
|
isNewBuckets: []bool{false, false, false, false},
|
|
bucketCount: 3,
|
|
result: "column:0 ndv:0 totColSize:0\n" +
|
|
"num: 2 lower_bound: 1 upper_bound: 3 repeats: 0 ndv: 2\n" +
|
|
"num: 100000 lower_bound: 3 upper_bound: 4 repeats: 0 ndv: 1\n" +
|
|
"num: 100000 lower_bound: 4 upper_bound: 5 repeats: 0 ndv: 1",
|
|
},
|
|
}
|
|
for _, tt := range tests {
|
|
require.Equal(t, len(tt.ndvs), len(tt.counts))
|
|
bkts := make([]bucket, 0, len(tt.counts))
|
|
totalCount := int64(0)
|
|
for i := 0; i < len(tt.counts); i++ {
|
|
lower, upper := types.NewIntDatum(tt.points[2*i]), types.NewIntDatum(tt.points[2*i+1])
|
|
bkts = append(bkts, bucket{&lower, &upper, tt.counts[i], 0, tt.ndvs[i]})
|
|
totalCount += tt.counts[i]
|
|
}
|
|
bkts = mergeBuckets(bkts, tt.isNewBuckets, tt.bucketCount, float64(totalCount))
|
|
result := buildNewHistogram(&Histogram{Tp: types.NewFieldType(mysql.TypeLong)}, bkts).ToString(0)
|
|
require.Equal(t, tt.result, result)
|
|
}
|
|
}
|
|
|
|
func encodeInt(v int64) *types.Datum {
|
|
val := codec.EncodeInt(nil, v)
|
|
d := types.NewBytesDatum(val)
|
|
return &d
|
|
}
|
|
|
|
func TestFeedbackEncoding(t *testing.T) {
|
|
hist := NewHistogram(0, 0, 0, 0, types.NewFieldType(mysql.TypeLong), 0, 0)
|
|
q := &QueryFeedback{Hist: hist, Tp: PkType}
|
|
q.Feedback = append(q.Feedback, Feedback{encodeInt(0), encodeInt(3), 1, 0, 1})
|
|
q.Feedback = append(q.Feedback, Feedback{encodeInt(0), encodeInt(5), 1, 0, 1})
|
|
val, err := EncodeFeedback(q)
|
|
require.NoError(t, err)
|
|
rq := &QueryFeedback{}
|
|
require.NoError(t, DecodeFeedback(val, rq, nil, nil, hist.Tp))
|
|
for _, fb := range rq.Feedback {
|
|
fb.Lower.SetBytes(codec.EncodeInt(nil, fb.Lower.GetInt64()))
|
|
fb.Upper.SetBytes(codec.EncodeInt(nil, fb.Upper.GetInt64()))
|
|
}
|
|
require.True(t, q.Equal(rq))
|
|
|
|
hist.Tp = types.NewFieldType(mysql.TypeBlob)
|
|
q = &QueryFeedback{Hist: hist}
|
|
q.Feedback = append(q.Feedback, Feedback{encodeInt(0), encodeInt(3), 1, 0, 1})
|
|
q.Feedback = append(q.Feedback, Feedback{encodeInt(0), encodeInt(1), 1, 0, 1})
|
|
val, err = EncodeFeedback(q)
|
|
require.NoError(t, err)
|
|
rq = &QueryFeedback{}
|
|
cms := NewCMSketch(4, 4)
|
|
require.NoError(t, DecodeFeedback(val, rq, cms, nil, hist.Tp))
|
|
require.Equal(t, uint64(1), cms.QueryBytes(codec.EncodeInt(nil, 0)))
|
|
q.Feedback = q.Feedback[:1]
|
|
require.True(t, q.Equal(rq))
|
|
}
|
|
|
|
// Equal tests if two query feedback equal, it is only used in test.
|
|
func (q *QueryFeedback) Equal(rq *QueryFeedback) bool {
|
|
if len(q.Feedback) != len(rq.Feedback) {
|
|
return false
|
|
}
|
|
for i, fb := range q.Feedback {
|
|
rfb := rq.Feedback[i]
|
|
if fb.Count != rfb.Count {
|
|
return false
|
|
}
|
|
if fb.Ndv != rfb.Ndv {
|
|
return false
|
|
}
|
|
if fb.Lower.Kind() == types.KindInt64 {
|
|
if fb.Lower.GetInt64() != rfb.Lower.GetInt64() {
|
|
return false
|
|
}
|
|
if fb.Upper.GetInt64() != rfb.Upper.GetInt64() {
|
|
return false
|
|
}
|
|
} else {
|
|
if !bytes.Equal(fb.Lower.GetBytes(), rfb.Lower.GetBytes()) {
|
|
return false
|
|
}
|
|
if !bytes.Equal(fb.Upper.GetBytes(), rfb.Upper.GetBytes()) {
|
|
return false
|
|
}
|
|
}
|
|
}
|
|
return true
|
|
}
|