// Copyright 2023 PingCAP, Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package statistics import ( "math/rand" "testing" "github.com/pingcap/tidb/pkg/parser/mysql" "github.com/pingcap/tidb/pkg/types" "github.com/pingcap/tidb/pkg/util/memory" "github.com/pingcap/tidb/pkg/util/mock" "github.com/stretchr/testify/require" ) // BenchmarkBuildHistAndTopN is used to benchmark the performance of BuildHistAndTopN. // go test -benchmem -run=^$ -bench ^BenchmarkBuildHistAndTopN$ github.com/pingcap/tidb/pkg/statistics // * The NDV is 1000000 func BenchmarkBuildHistAndTopN(b *testing.B) { ctx := mock.NewContext() const cnt = 1000_000 sketch := NewFMSketch(cnt) data := make([]*SampleItem, 0, 8) for i := 1; i <= cnt; i++ { d := types.NewIntDatum(int64(i)) err := sketch.InsertValue(ctx.GetSessionVars().StmtCtx, d) require.NoError(b, err) data = append(data, &SampleItem{Value: d}) } for i := 1; i < 10; i++ { d := types.NewIntDatum(int64(2)) err := sketch.InsertValue(ctx.GetSessionVars().StmtCtx, d) require.NoError(b, err) data = append(data, &SampleItem{Value: d}) } for i := 1; i < 7; i++ { d := types.NewIntDatum(int64(4)) err := sketch.InsertValue(ctx.GetSessionVars().StmtCtx, d) require.NoError(b, err) data = append(data, &SampleItem{Value: d}) } for i := 1; i < 5; i++ { d := types.NewIntDatum(int64(7)) err := sketch.InsertValue(ctx.GetSessionVars().StmtCtx, d) require.NoError(b, err) data = append(data, &SampleItem{Value: d}) } for i := 1; i < 3; i++ { d := types.NewIntDatum(int64(11)) err := sketch.InsertValue(ctx.GetSessionVars().StmtCtx, d) require.NoError(b, err) data = append(data, &SampleItem{Value: d}) } collector := &SampleCollector{ Samples: data, NullCount: 0, Count: int64(len(data)), FMSketch: sketch, TotalSize: int64(len(data)) * 8, } filedType := types.NewFieldType(mysql.TypeLong) memoryTracker := memory.NewTracker(10, 1024*1024*1024) b.ResetTimer() for i := 0; i < b.N; i++ { _, _, _ = BuildHistAndTopN(ctx, 256, 500, 0, collector, filedType, true, memoryTracker, false) } } // BenchmarkBuildHistAndTopNWithLowNDV is used to benchmark the performance of BuildHistAndTopN with low NDV. // go test -benchmem -run=^$ -bench ^BenchmarkBuildHistAndTopNWithLowNDV github.com/pingcap/tidb/pkg/statistics // * NDV is 102 func BenchmarkBuildHistAndTopNWithLowNDV(b *testing.B) { ctx := mock.NewContext() const cnt = 1000_000 sketch := NewFMSketch(cnt) data := make([]*SampleItem, 0, 8) total := 0 for i := 1; i <= 1_000; i++ { total++ d := types.NewIntDatum(int64(1000)) err := sketch.InsertValue(ctx.GetSessionVars().StmtCtx, d) require.NoError(b, err) data = append(data, &SampleItem{Value: d}) } for i := 1; i <= 1_000; i++ { total++ d := types.NewIntDatum(int64(2000)) err := sketch.InsertValue(ctx.GetSessionVars().StmtCtx, d) require.NoError(b, err) data = append(data, &SampleItem{Value: d}) } end := total / 2 for range end { total++ d := types.NewIntDatum(rand.Int63n(50)) err := sketch.InsertValue(ctx.GetSessionVars().StmtCtx, d) require.NoError(b, err) data = append(data, &SampleItem{Value: d}) } end = cnt - total for range end { d := types.NewIntDatum(rand.Int63n(100)) err := sketch.InsertValue(ctx.GetSessionVars().StmtCtx, d) require.NoError(b, err) data = append(data, &SampleItem{Value: d}) } collector := &SampleCollector{ Samples: data, NullCount: 0, Count: int64(len(data)), FMSketch: sketch, TotalSize: int64(len(data)) * 8, } filedType := types.NewFieldType(mysql.TypeLong) memoryTracker := memory.NewTracker(10, 1024*1024*1024) b.ResetTimer() for i := 0; i < b.N; i++ { _, _, _ = BuildHistAndTopN(ctx, 256, 500, 0, collector, filedType, true, memoryTracker, false) } } // SequentialRangeChecker tests func TestSequentialRangeChecker(t *testing.T) { ranges := []TopNWithRange{ {TopNMeta: TopNMeta{Count: 10}, startIdx: 5, endIdx: 8}, // range [5,8] {TopNMeta: TopNMeta{Count: 15}, startIdx: 12, endIdx: 15}, // range [12,15] {TopNMeta: TopNMeta{Count: 8}, startIdx: 20, endIdx: 22}, // range [20,22] } checker := NewSequentialRangeChecker(ranges) // test basic functionality require.False(t, checker.IsIndexInTopNRange(4)) // before first range require.True(t, checker.IsIndexInTopNRange(6)) // in first range require.False(t, checker.IsIndexInTopNRange(10)) // between ranges require.True(t, checker.IsIndexInTopNRange(13)) // in second range require.True(t, checker.IsIndexInTopNRange(21)) // in third range require.False(t, checker.IsIndexInTopNRange(25)) // after all ranges // test edge cases emptyChecker := NewSequentialRangeChecker([]TopNWithRange{}) require.False(t, emptyChecker.IsIndexInTopNRange(0)) // test unsorted input - should be sorted internally unsortedRanges := []TopNWithRange{ {TopNMeta: TopNMeta{Count: 8}, startIdx: 20, endIdx: 22}, // third {TopNMeta: TopNMeta{Count: 10}, startIdx: 5, endIdx: 8}, // first {TopNMeta: TopNMeta{Count: 15}, startIdx: 12, endIdx: 15}, // second } unsortedChecker := NewSequentialRangeChecker(unsortedRanges) require.True(t, unsortedChecker.IsIndexInTopNRange(6)) require.True(t, unsortedChecker.IsIndexInTopNRange(13)) require.True(t, unsortedChecker.IsIndexInTopNRange(21)) }