statistics: BinarySearchRemoveVal should use decoded val instead of encoded (#59131)
close pingcap/tidb#59130
This commit is contained in:
@ -83,7 +83,7 @@ go_test(
|
||||
data = glob(["testdata/**"]),
|
||||
embed = [":statistics"],
|
||||
flaky = True,
|
||||
shard_count = 38,
|
||||
shard_count = 37,
|
||||
deps = [
|
||||
"//pkg/config",
|
||||
"//pkg/meta/model",
|
||||
|
||||
@ -363,10 +363,39 @@ func testIssues24349(t *testing.T, testKit *testkit.TestKit, store kv.Storage) {
|
||||
"test t global b 0 2 4",
|
||||
))
|
||||
testKit.MustExec("explain select * from t where a > 0 and b > 0")
|
||||
testKit.MustQuery("show stats_buckets where partition_name='global'").Check(testkit.Rows(
|
||||
testKit.MustQuery("show stats_topn where table_name = 't'").Sort().Check(testkit.Rows(
|
||||
"test t global a 0 1 6",
|
||||
"test t global b 0 2 4",
|
||||
"test t p0 a 0 0 4",
|
||||
"test t p0 b 0 3 3",
|
||||
"test t p1 a 0 1 6",
|
||||
"test t p1 b 0 2 3",
|
||||
"test t p2 a 0 2 2",
|
||||
"test t p2 b 0 1 2",
|
||||
))
|
||||
// column a is trival.
|
||||
// column b:
|
||||
// TopN:
|
||||
// p0: b=3, occurs 3 times
|
||||
// p1: b=2, occurs 3 times
|
||||
// p2: b=1, occurs 2 times
|
||||
// Histogram:
|
||||
// p0: hist of b: [2, 2] count=repeat=2
|
||||
// p1: hist of b: [1, 3] count=2, repeat=3. [4, 4] count==repeat=1
|
||||
// After merging global TopN, it should be 2 with 4 as the repeat.(constructed by p1's TopN and p0's histogram)
|
||||
// Kicking it out, the remained buckets for b are:(consider TopN as a bucket whose lower bound is the same as upper bound and count is the same as repeat)
|
||||
// [3, 3] count=repeat=4
|
||||
// [1, 1] count=repeat=2
|
||||
// [1, 3] count=1, repeat=0(merged into TopN)
|
||||
// [4, 4] count=repeat=1
|
||||
// Finally, get one global bucket [1, 4] count=8, repeat=1
|
||||
testKit.MustQuery("show stats_buckets where table_name='t'").Sort().Check(testkit.Rows(
|
||||
"test t global a 0 0 4 4 0 0 0",
|
||||
"test t global a 0 1 6 2 2 2 0",
|
||||
"test t global b 0 0 10 1 1 4 0",
|
||||
"test t global b 0 0 8 1 1 4 0",
|
||||
"test t p0 b 0 0 1 1 2 2 0",
|
||||
"test t p1 b 0 0 2 1 1 3 0",
|
||||
"test t p1 b 0 1 3 1 4 4 0",
|
||||
))
|
||||
}
|
||||
|
||||
|
||||
@ -150,7 +150,7 @@ func (worker *topnStatsMergeWorker) Run(timeZone *time.Location, isIndex bool, v
|
||||
count, _ := allHists[j].EqualRowCount(nil, datum, isIndex)
|
||||
if count != 0 {
|
||||
// Remove the value corresponding to encodedVal from the histogram.
|
||||
worker.statsWrapper.AllHg[j].BinarySearchRemoveVal(statistics.TopNMeta{Encoded: datum.GetBytes(), Count: uint64(count)})
|
||||
worker.statsWrapper.AllHg[j].BinarySearchRemoveVal(&datum, int64(count))
|
||||
}
|
||||
worker.shardMutex[j].Unlock()
|
||||
if count != 0 {
|
||||
|
||||
@ -194,7 +194,7 @@ func MergePartTopN2GlobalTopN(
|
||||
if count != 0 {
|
||||
counter[encodedVal] += count
|
||||
// Remove the value corresponding to encodedVal from the histogram.
|
||||
hists[j].BinarySearchRemoveVal(statistics.TopNMeta{Encoded: datum.GetBytes(), Count: uint64(count)})
|
||||
hists[j].BinarySearchRemoveVal(&datum, int64(count))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -296,15 +296,14 @@ func (hg *Histogram) BucketToString(bktID, idxCols int) string {
|
||||
}
|
||||
|
||||
// BinarySearchRemoveVal removes the value from the TopN using binary search.
|
||||
func (hg *Histogram) BinarySearchRemoveVal(valCntPairs TopNMeta) {
|
||||
func (hg *Histogram) BinarySearchRemoveVal(val *types.Datum, count int64) {
|
||||
lowIdx, highIdx := 0, hg.Len()-1
|
||||
column := hg.Bounds.Column(0)
|
||||
// if hg is too small, we don't need to check the branch. because the cost is more than binary search.
|
||||
if hg.Len() > 4 {
|
||||
if cmpResult := bytes.Compare(column.GetRaw(highIdx*2+1), valCntPairs.Encoded); cmpResult < 0 {
|
||||
if cmpResult := chunk.Compare(hg.Bounds.GetRow(highIdx*2+1), 0, val); cmpResult < 0 {
|
||||
return
|
||||
}
|
||||
if cmpResult := bytes.Compare(column.GetRaw(lowIdx), valCntPairs.Encoded); cmpResult > 0 {
|
||||
if cmpResult := chunk.Compare(hg.Bounds.GetRow(lowIdx), 0, val); cmpResult > 0 {
|
||||
return
|
||||
}
|
||||
}
|
||||
@ -312,12 +311,12 @@ func (hg *Histogram) BinarySearchRemoveVal(valCntPairs TopNMeta) {
|
||||
var found bool
|
||||
for lowIdx <= highIdx {
|
||||
midIdx = (lowIdx + highIdx) / 2
|
||||
cmpResult := bytes.Compare(column.GetRaw(midIdx*2), valCntPairs.Encoded)
|
||||
cmpResult := chunk.Compare(hg.Bounds.GetRow(midIdx*2), 0, val)
|
||||
if cmpResult > 0 {
|
||||
highIdx = midIdx - 1
|
||||
continue
|
||||
}
|
||||
cmpResult = bytes.Compare(column.GetRaw(midIdx*2+1), valCntPairs.Encoded)
|
||||
cmpResult = chunk.Compare(hg.Bounds.GetRow(midIdx*2+1), 0, val)
|
||||
if cmpResult < 0 {
|
||||
lowIdx = midIdx + 1
|
||||
continue
|
||||
@ -330,7 +329,7 @@ func (hg *Histogram) BinarySearchRemoveVal(valCntPairs TopNMeta) {
|
||||
if cmpResult == 0 {
|
||||
midbucket.Repeat = 0
|
||||
}
|
||||
midbucket.Count -= int64(valCntPairs.Count)
|
||||
midbucket.Count -= count
|
||||
if midbucket.Count < 0 {
|
||||
midbucket.Count = 0
|
||||
}
|
||||
@ -339,7 +338,7 @@ func (hg *Histogram) BinarySearchRemoveVal(valCntPairs TopNMeta) {
|
||||
}
|
||||
if found {
|
||||
for midIdx++; midIdx <= hg.Len()-1; midIdx++ {
|
||||
hg.Buckets[midIdx].Count -= int64(valCntPairs.Count)
|
||||
hg.Buckets[midIdx].Count -= count
|
||||
if hg.Buckets[midIdx].Count < 0 {
|
||||
hg.Buckets[midIdx].Count = 0
|
||||
}
|
||||
|
||||
@ -712,21 +712,3 @@ func generateData(t *testing.T) *Histogram {
|
||||
}
|
||||
return genHist4Test(t, data, 0)
|
||||
}
|
||||
|
||||
func TestVerifyHistsBinarySearchRemoveValAndRemoveVals(t *testing.T) {
|
||||
data1 := generateData(t)
|
||||
data2 := generateData(t)
|
||||
|
||||
require.Equal(t, data1, data2)
|
||||
ctx := mock.NewContext()
|
||||
sc := ctx.GetSessionVars().StmtCtx
|
||||
b, err := codec.EncodeKey(sc.TimeZone(), nil, types.NewIntDatum(150))
|
||||
require.NoError(t, err)
|
||||
tmp := TopNMeta{
|
||||
Encoded: b,
|
||||
Count: 2,
|
||||
}
|
||||
data1.RemoveVals([]TopNMeta{tmp})
|
||||
data2.BinarySearchRemoveVal(tmp)
|
||||
require.Equal(t, data1, data2)
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user