statistics: fix bug when calculate lowerbound (#3677)
This commit is contained in:
@ -74,6 +74,9 @@ func build4SortedColumn(ctx context.Context, numBuckets, id int64, records ast.R
|
||||
hg.Buckets[bucketIdx].Count++
|
||||
hg.Buckets[bucketIdx].UpperBound = data
|
||||
hg.Buckets[bucketIdx].Repeats = 1
|
||||
if bucketIdx == 0 && hg.Buckets[0].Count == 1 {
|
||||
hg.Buckets[0].LowerBound = data
|
||||
}
|
||||
hg.NDV++
|
||||
} else {
|
||||
// All buckets are full, we should merge buckets.
|
||||
@ -137,6 +140,7 @@ func BuildColumn(ctx context.Context, numBuckets, id int64, ndv int64, count int
|
||||
}
|
||||
bucketIdx := 0
|
||||
var lastCount int64
|
||||
hg.Buckets[0].LowerBound = samples[0]
|
||||
for i := int64(0); i < int64(len(samples)); i++ {
|
||||
cmp, err := hg.Buckets[bucketIdx].UpperBound.CompareDatum(sc, samples[i])
|
||||
if err != nil {
|
||||
@ -158,9 +162,6 @@ func BuildColumn(ctx context.Context, numBuckets, id int64, ndv int64, count int
|
||||
hg.Buckets[bucketIdx].Count = int64(totalCount)
|
||||
hg.Buckets[bucketIdx].UpperBound = samples[i]
|
||||
hg.Buckets[bucketIdx].Repeats = int64(ndvFactor)
|
||||
if bucketIdx == 0 {
|
||||
hg.Buckets[bucketIdx].LowerBound = samples[i]
|
||||
}
|
||||
} else {
|
||||
lastCount = hg.Buckets[bucketIdx].Count
|
||||
// The bucket is full, store the item in the next bucket.
|
||||
|
||||
@ -238,7 +238,7 @@ func (hg *Histogram) lessRowCount(sc *variable.StatementContext, value types.Dat
|
||||
if err != nil {
|
||||
return 0, errors.Trace(err)
|
||||
}
|
||||
if c < 0 {
|
||||
if c <= 0 {
|
||||
return prevCount, nil
|
||||
}
|
||||
return (prevCount + lessThanBucketValueCount) / 2, nil
|
||||
|
||||
@ -70,7 +70,11 @@ func (r *recordSet) Close() error {
|
||||
func (s *testStatisticsSuite) SetUpSuite(c *C) {
|
||||
s.count = 100000
|
||||
samples := make([]types.Datum, 10000)
|
||||
start := 1000 // 1000 values is null
|
||||
start := 1000
|
||||
samples[0].SetInt64(0)
|
||||
for i := 1; i < start; i++ {
|
||||
samples[i].SetInt64(2)
|
||||
}
|
||||
for i := start; i < len(samples); i++ {
|
||||
samples[i].SetInt64(int64(i))
|
||||
}
|
||||
@ -90,6 +94,10 @@ func (s *testStatisticsSuite) SetUpSuite(c *C) {
|
||||
count: s.count,
|
||||
cursor: 0,
|
||||
}
|
||||
rc.data[0].SetInt64(0)
|
||||
for i := 1; i < start; i++ {
|
||||
rc.data[i].SetInt64(2)
|
||||
}
|
||||
for i := int64(start); i < rc.count; i++ {
|
||||
rc.data[i].SetInt64(int64(i))
|
||||
}
|
||||
@ -152,6 +160,9 @@ func (s *testStatisticsSuite) TestBuild(c *C) {
|
||||
count, err = col.betweenRowCount(sc, types.NewIntDatum(3000), types.NewIntDatum(3500))
|
||||
c.Check(err, IsNil)
|
||||
c.Check(int(count), Equals, 5075)
|
||||
count, err = col.lessRowCount(sc, types.NewIntDatum(1))
|
||||
c.Check(err, IsNil)
|
||||
c.Check(int(count), Equals, 9)
|
||||
|
||||
tblCount, col, err := BuildIndex(ctx, bucketCount, 1, ast.RecordSet(s.rc))
|
||||
c.Check(err, IsNil)
|
||||
@ -165,6 +176,9 @@ func (s *testStatisticsSuite) TestBuild(c *C) {
|
||||
count, err = col.betweenRowCount(sc, encodeKey(types.NewIntDatum(30000)), encodeKey(types.NewIntDatum(35000)))
|
||||
c.Check(err, IsNil)
|
||||
c.Check(int(count), Equals, 4618)
|
||||
count, err = col.lessRowCount(sc, encodeKey(types.NewIntDatum(0)))
|
||||
c.Check(err, IsNil)
|
||||
c.Check(int(count), Equals, 0)
|
||||
|
||||
tblCount, col, err = BuildPK(ctx, bucketCount, 4, ast.RecordSet(s.pk))
|
||||
c.Check(err, IsNil)
|
||||
@ -186,7 +200,7 @@ func (s *testStatisticsSuite) TestBuild(c *C) {
|
||||
c.Check(int(count), Equals, 100000)
|
||||
count, err = col.lessAndEqRowCount(sc, types.Datum{})
|
||||
c.Check(err, IsNil)
|
||||
c.Check(int(count), Equals, 256)
|
||||
c.Check(int(count), Equals, 0)
|
||||
count, err = col.greaterRowCount(sc, types.NewIntDatum(1001))
|
||||
c.Check(err, IsNil)
|
||||
c.Check(int(count), Equals, 99231)
|
||||
|
||||
Reference in New Issue
Block a user