statistics: fix bug when calculate lowerbound (#3677)

This commit is contained in:
Haibin Xie
2017-07-10 15:40:46 +08:00
committed by Han Fei
parent bd64339e93
commit bebd5c59e4
3 changed files with 21 additions and 6 deletions

View File

@ -74,6 +74,9 @@ func build4SortedColumn(ctx context.Context, numBuckets, id int64, records ast.R
hg.Buckets[bucketIdx].Count++
hg.Buckets[bucketIdx].UpperBound = data
hg.Buckets[bucketIdx].Repeats = 1
if bucketIdx == 0 && hg.Buckets[0].Count == 1 {
hg.Buckets[0].LowerBound = data
}
hg.NDV++
} else {
// All buckets are full, we should merge buckets.
@ -137,6 +140,7 @@ func BuildColumn(ctx context.Context, numBuckets, id int64, ndv int64, count int
}
bucketIdx := 0
var lastCount int64
hg.Buckets[0].LowerBound = samples[0]
for i := int64(0); i < int64(len(samples)); i++ {
cmp, err := hg.Buckets[bucketIdx].UpperBound.CompareDatum(sc, samples[i])
if err != nil {
@ -158,9 +162,6 @@ func BuildColumn(ctx context.Context, numBuckets, id int64, ndv int64, count int
hg.Buckets[bucketIdx].Count = int64(totalCount)
hg.Buckets[bucketIdx].UpperBound = samples[i]
hg.Buckets[bucketIdx].Repeats = int64(ndvFactor)
if bucketIdx == 0 {
hg.Buckets[bucketIdx].LowerBound = samples[i]
}
} else {
lastCount = hg.Buckets[bucketIdx].Count
// The bucket is full, store the item in the next bucket.

View File

@ -238,7 +238,7 @@ func (hg *Histogram) lessRowCount(sc *variable.StatementContext, value types.Dat
if err != nil {
return 0, errors.Trace(err)
}
if c < 0 {
if c <= 0 {
return prevCount, nil
}
return (prevCount + lessThanBucketValueCount) / 2, nil

View File

@ -70,7 +70,11 @@ func (r *recordSet) Close() error {
func (s *testStatisticsSuite) SetUpSuite(c *C) {
s.count = 100000
samples := make([]types.Datum, 10000)
start := 1000 // 1000 values is null
start := 1000
samples[0].SetInt64(0)
for i := 1; i < start; i++ {
samples[i].SetInt64(2)
}
for i := start; i < len(samples); i++ {
samples[i].SetInt64(int64(i))
}
@ -90,6 +94,10 @@ func (s *testStatisticsSuite) SetUpSuite(c *C) {
count: s.count,
cursor: 0,
}
rc.data[0].SetInt64(0)
for i := 1; i < start; i++ {
rc.data[i].SetInt64(2)
}
for i := int64(start); i < rc.count; i++ {
rc.data[i].SetInt64(int64(i))
}
@ -152,6 +160,9 @@ func (s *testStatisticsSuite) TestBuild(c *C) {
count, err = col.betweenRowCount(sc, types.NewIntDatum(3000), types.NewIntDatum(3500))
c.Check(err, IsNil)
c.Check(int(count), Equals, 5075)
count, err = col.lessRowCount(sc, types.NewIntDatum(1))
c.Check(err, IsNil)
c.Check(int(count), Equals, 9)
tblCount, col, err := BuildIndex(ctx, bucketCount, 1, ast.RecordSet(s.rc))
c.Check(err, IsNil)
@ -165,6 +176,9 @@ func (s *testStatisticsSuite) TestBuild(c *C) {
count, err = col.betweenRowCount(sc, encodeKey(types.NewIntDatum(30000)), encodeKey(types.NewIntDatum(35000)))
c.Check(err, IsNil)
c.Check(int(count), Equals, 4618)
count, err = col.lessRowCount(sc, encodeKey(types.NewIntDatum(0)))
c.Check(err, IsNil)
c.Check(int(count), Equals, 0)
tblCount, col, err = BuildPK(ctx, bucketCount, 4, ast.RecordSet(s.pk))
c.Check(err, IsNil)
@ -186,7 +200,7 @@ func (s *testStatisticsSuite) TestBuild(c *C) {
c.Check(int(count), Equals, 100000)
count, err = col.lessAndEqRowCount(sc, types.Datum{})
c.Check(err, IsNil)
c.Check(int(count), Equals, 256)
c.Check(int(count), Equals, 0)
count, err = col.greaterRowCount(sc, types.NewIntDatum(1001))
c.Check(err, IsNil)
c.Check(int(count), Equals, 99231)