From bebd5c59e4c8eb0e4e626ec67914766e270608db Mon Sep 17 00:00:00 2001 From: Haibin Xie Date: Mon, 10 Jul 2017 15:40:46 +0800 Subject: [PATCH] statistics: fix bug when calculate lowerbound (#3677) --- statistics/builder.go | 7 ++++--- statistics/histogram.go | 2 +- statistics/statistics_test.go | 18 ++++++++++++++++-- 3 files changed, 21 insertions(+), 6 deletions(-) diff --git a/statistics/builder.go b/statistics/builder.go index 7fac5b5ebe..8ad8c39a67 100644 --- a/statistics/builder.go +++ b/statistics/builder.go @@ -74,6 +74,9 @@ func build4SortedColumn(ctx context.Context, numBuckets, id int64, records ast.R hg.Buckets[bucketIdx].Count++ hg.Buckets[bucketIdx].UpperBound = data hg.Buckets[bucketIdx].Repeats = 1 + if bucketIdx == 0 && hg.Buckets[0].Count == 1 { + hg.Buckets[0].LowerBound = data + } hg.NDV++ } else { // All buckets are full, we should merge buckets. @@ -137,6 +140,7 @@ func BuildColumn(ctx context.Context, numBuckets, id int64, ndv int64, count int } bucketIdx := 0 var lastCount int64 + hg.Buckets[0].LowerBound = samples[0] for i := int64(0); i < int64(len(samples)); i++ { cmp, err := hg.Buckets[bucketIdx].UpperBound.CompareDatum(sc, samples[i]) if err != nil { @@ -158,9 +162,6 @@ func BuildColumn(ctx context.Context, numBuckets, id int64, ndv int64, count int hg.Buckets[bucketIdx].Count = int64(totalCount) hg.Buckets[bucketIdx].UpperBound = samples[i] hg.Buckets[bucketIdx].Repeats = int64(ndvFactor) - if bucketIdx == 0 { - hg.Buckets[bucketIdx].LowerBound = samples[i] - } } else { lastCount = hg.Buckets[bucketIdx].Count // The bucket is full, store the item in the next bucket. diff --git a/statistics/histogram.go b/statistics/histogram.go index 10e99dd94d..8c8b12d0c0 100644 --- a/statistics/histogram.go +++ b/statistics/histogram.go @@ -238,7 +238,7 @@ func (hg *Histogram) lessRowCount(sc *variable.StatementContext, value types.Dat if err != nil { return 0, errors.Trace(err) } - if c < 0 { + if c <= 0 { return prevCount, nil } return (prevCount + lessThanBucketValueCount) / 2, nil diff --git a/statistics/statistics_test.go b/statistics/statistics_test.go index db7aa38512..b8940bfdd3 100644 --- a/statistics/statistics_test.go +++ b/statistics/statistics_test.go @@ -70,7 +70,11 @@ func (r *recordSet) Close() error { func (s *testStatisticsSuite) SetUpSuite(c *C) { s.count = 100000 samples := make([]types.Datum, 10000) - start := 1000 // 1000 values is null + start := 1000 + samples[0].SetInt64(0) + for i := 1; i < start; i++ { + samples[i].SetInt64(2) + } for i := start; i < len(samples); i++ { samples[i].SetInt64(int64(i)) } @@ -90,6 +94,10 @@ func (s *testStatisticsSuite) SetUpSuite(c *C) { count: s.count, cursor: 0, } + rc.data[0].SetInt64(0) + for i := 1; i < start; i++ { + rc.data[i].SetInt64(2) + } for i := int64(start); i < rc.count; i++ { rc.data[i].SetInt64(int64(i)) } @@ -152,6 +160,9 @@ func (s *testStatisticsSuite) TestBuild(c *C) { count, err = col.betweenRowCount(sc, types.NewIntDatum(3000), types.NewIntDatum(3500)) c.Check(err, IsNil) c.Check(int(count), Equals, 5075) + count, err = col.lessRowCount(sc, types.NewIntDatum(1)) + c.Check(err, IsNil) + c.Check(int(count), Equals, 9) tblCount, col, err := BuildIndex(ctx, bucketCount, 1, ast.RecordSet(s.rc)) c.Check(err, IsNil) @@ -165,6 +176,9 @@ func (s *testStatisticsSuite) TestBuild(c *C) { count, err = col.betweenRowCount(sc, encodeKey(types.NewIntDatum(30000)), encodeKey(types.NewIntDatum(35000))) c.Check(err, IsNil) c.Check(int(count), Equals, 4618) + count, err = col.lessRowCount(sc, encodeKey(types.NewIntDatum(0))) + c.Check(err, IsNil) + c.Check(int(count), Equals, 0) tblCount, col, err = BuildPK(ctx, bucketCount, 4, ast.RecordSet(s.pk)) c.Check(err, IsNil) @@ -186,7 +200,7 @@ func (s *testStatisticsSuite) TestBuild(c *C) { c.Check(int(count), Equals, 100000) count, err = col.lessAndEqRowCount(sc, types.Datum{}) c.Check(err, IsNil) - c.Check(int(count), Equals, 256) + c.Check(int(count), Equals, 0) count, err = col.greaterRowCount(sc, types.NewIntDatum(1001)) c.Check(err, IsNil) c.Check(int(count), Equals, 99231)