stats: log detailed stats info for query feedback (#7293)
This commit is contained in:
@ -14,6 +14,8 @@
|
||||
package statistics
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/juju/errors"
|
||||
"github.com/pingcap/tidb/infoschema"
|
||||
"github.com/pingcap/tidb/model"
|
||||
@ -49,6 +51,7 @@ func (h *Handle) initStatsMeta4Chunk(is infoschema.InfoSchema, tables statsCache
|
||||
tbl := &Table{
|
||||
HistColl: newHistColl,
|
||||
Version: row.GetUint64(0),
|
||||
name: getFullTableName(is, tableInfo),
|
||||
}
|
||||
tables[physicalID] = tbl
|
||||
}
|
||||
@ -257,3 +260,14 @@ func (h *Handle) InitStats(is infoschema.InfoSchema) error {
|
||||
h.statsCache.Store(tables)
|
||||
return nil
|
||||
}
|
||||
|
||||
func getFullTableName(is infoschema.InfoSchema, tblInfo *model.TableInfo) string {
|
||||
for _, schema := range is.AllSchemas() {
|
||||
if t, err := is.TableByName(schema.Name, tblInfo.Name); err == nil {
|
||||
if t.Meta().ID == tblInfo.ID {
|
||||
return schema.Name.O + "." + tblInfo.Name.O
|
||||
}
|
||||
}
|
||||
}
|
||||
return fmt.Sprintf("%d", tblInfo.ID)
|
||||
}
|
||||
|
||||
@ -16,6 +16,7 @@ package statistics
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/gob"
|
||||
"fmt"
|
||||
"math"
|
||||
"math/rand"
|
||||
"sort"
|
||||
@ -102,11 +103,11 @@ func (q *QueryFeedback) DecodeToRanges(isIndex bool) ([]*ranger.Range, error) {
|
||||
if isIndex {
|
||||
var err error
|
||||
// As we do not know the origin length, just use a custom value here.
|
||||
lowVal, err = codec.Decode(low.GetBytes(), 4)
|
||||
lowVal, err = codec.DecodeRange(low.GetBytes(), 4)
|
||||
if err != nil {
|
||||
return nil, errors.Trace(err)
|
||||
}
|
||||
highVal, err = codec.Decode(high.GetBytes(), 4)
|
||||
highVal, err = codec.DecodeRange(high.GetBytes(), 4)
|
||||
if err != nil {
|
||||
return nil, errors.Trace(err)
|
||||
}
|
||||
@ -759,3 +760,135 @@ func splitFeedbackByQueryType(feedbacks []feedback) ([]feedback, []feedback) {
|
||||
}
|
||||
return eqFB, ranFB
|
||||
}
|
||||
|
||||
// formatBuckets formats bucket from lowBkt to highBkt.
|
||||
func formatBuckets(hg *Histogram, lowBkt, highBkt, idxCols int) string {
|
||||
if lowBkt == highBkt {
|
||||
return hg.bucketToString(lowBkt, idxCols)
|
||||
}
|
||||
if lowBkt+1 == highBkt {
|
||||
return fmt.Sprintf("%s, %s", hg.bucketToString(lowBkt, 0), hg.bucketToString(highBkt, 0))
|
||||
}
|
||||
// do not care the middle buckets
|
||||
return fmt.Sprintf("%s, (%d buckets, total count %d), %s", hg.bucketToString(lowBkt, 0),
|
||||
highBkt-lowBkt-1, hg.Buckets[highBkt-1].Count-hg.Buckets[lowBkt].Count, hg.bucketToString(highBkt, 0))
|
||||
}
|
||||
|
||||
func colRangeToStr(c *Column, ran *ranger.Range, actual int64, factor float64) string {
|
||||
lowCount, lowBkt := c.lessRowCountWithBktIdx(ran.LowVal[0])
|
||||
highCount, highBkt := c.lessRowCountWithBktIdx(ran.HighVal[0])
|
||||
return fmt.Sprintf("range: %s, actual: %d, expected: %d, buckets: {%s}", ran.String(), actual,
|
||||
int64((highCount-lowCount)*factor), formatBuckets(&c.Histogram, lowBkt, highBkt, 0))
|
||||
}
|
||||
|
||||
func logForPK(prefix string, c *Column, ranges []*ranger.Range, actual []int64, factor float64) {
|
||||
for i, ran := range ranges {
|
||||
if ran.LowVal[0].GetInt64()+1 >= ran.HighVal[0].GetInt64() {
|
||||
continue
|
||||
}
|
||||
log.Debugf("%s column: %s, %s", prefix, c.Info.Name, colRangeToStr(c, ran, actual[i], factor))
|
||||
}
|
||||
}
|
||||
|
||||
func logForIndexRange(idx *Index, ran *ranger.Range, actual int64, factor float64) string {
|
||||
sc := &stmtctx.StatementContext{TimeZone: time.UTC}
|
||||
lb, err := codec.EncodeKey(sc, nil, ran.LowVal...)
|
||||
if err != nil {
|
||||
return ""
|
||||
}
|
||||
rb, err := codec.EncodeKey(sc, nil, ran.HighVal...)
|
||||
if err != nil {
|
||||
return ""
|
||||
}
|
||||
if idx.CMSketch != nil && bytes.Compare(kv.Key(lb).PrefixNext(), rb) >= 0 {
|
||||
str, err := types.DatumsToString(ran.LowVal, true)
|
||||
if err != nil {
|
||||
return ""
|
||||
}
|
||||
return fmt.Sprintf("value: %s, actual: %d, expected: %d", str, actual, int64(float64(idx.QueryBytes(lb))*factor))
|
||||
}
|
||||
l, r := types.NewBytesDatum(lb), types.NewBytesDatum(rb)
|
||||
lowCount, lowBkt := idx.lessRowCountWithBktIdx(l)
|
||||
highCount, highBkt := idx.lessRowCountWithBktIdx(r)
|
||||
return fmt.Sprintf("range: %s, actual: %d, expected: %d, histogram: {%s}", ran.String(), actual,
|
||||
int64((highCount-lowCount)*factor), formatBuckets(&idx.Histogram, lowBkt, highBkt, len(idx.Info.Columns)))
|
||||
}
|
||||
|
||||
func logForIndex(prefix string, t *Table, idx *Index, ranges []*ranger.Range, actual []int64, factor float64) {
|
||||
sc := &stmtctx.StatementContext{TimeZone: time.UTC}
|
||||
if idx.CMSketch == nil || idx.statsVer != version1 {
|
||||
for i, ran := range ranges {
|
||||
log.Debugf("%s index: %s, %s", prefix, idx.Info.Name.O, logForIndexRange(idx, ran, actual[i], factor))
|
||||
}
|
||||
return
|
||||
}
|
||||
for i, ran := range ranges {
|
||||
rangePosition := getOrdinalOfRangeCond(sc, ran)
|
||||
// only contains range or equality query
|
||||
if rangePosition == 0 || rangePosition == len(ran.LowVal) {
|
||||
log.Debugf("%s index: %s, %s", prefix, idx.Info.Name.O, logForIndexRange(idx, ran, actual[i], factor))
|
||||
continue
|
||||
}
|
||||
equalityString, err := types.DatumsToString(ran.LowVal[:rangePosition], true)
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
bytes, err := codec.EncodeKey(sc, nil, ran.LowVal[:rangePosition]...)
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
equalityCount := idx.CMSketch.QueryBytes(bytes)
|
||||
rang := ranger.Range{
|
||||
LowVal: []types.Datum{ran.LowVal[rangePosition]},
|
||||
HighVal: []types.Datum{ran.HighVal[rangePosition]},
|
||||
}
|
||||
colName := idx.Info.Columns[rangePosition].Name.L
|
||||
var rangeString string
|
||||
// prefer index stats over column stats
|
||||
if idx, ok := t.colName2Idx[colName]; ok {
|
||||
if t.Indices[idx] == nil {
|
||||
return
|
||||
}
|
||||
rangeString = logForIndexRange(t.Indices[idx], &rang, -1, factor)
|
||||
} else {
|
||||
id := t.colName2ID[colName]
|
||||
if t.Columns[id] == nil {
|
||||
return
|
||||
}
|
||||
rangeString = colRangeToStr(t.Columns[t.colName2ID[colName]], &rang, -1, factor)
|
||||
}
|
||||
log.Debugf("%s index: %s, actual: %d, equality: %s, expected equality: %d, %s", prefix, idx.Info.Name.O,
|
||||
actual[i], equalityString, equalityCount, rangeString)
|
||||
}
|
||||
}
|
||||
|
||||
func (q *QueryFeedback) logDetailedInfo(h *Handle) {
|
||||
t, ok := h.statsCache.Load().(statsCache)[q.tableID]
|
||||
if !ok {
|
||||
return
|
||||
}
|
||||
isIndex := q.hist.isIndexHist()
|
||||
ranges, err := q.DecodeToRanges(isIndex)
|
||||
if err != nil {
|
||||
log.Debug(err)
|
||||
return
|
||||
}
|
||||
actual := make([]int64, 0, len(q.feedback))
|
||||
for _, fb := range q.feedback {
|
||||
actual = append(actual, fb.count)
|
||||
}
|
||||
logPrefix := fmt.Sprintf("[stats-feedback] %s,", t.name)
|
||||
if isIndex {
|
||||
idx := t.Indices[q.hist.ID]
|
||||
if idx == nil {
|
||||
return
|
||||
}
|
||||
logForIndex(logPrefix, t, idx, ranges, actual, idx.getIncreaseFactor(t.Count))
|
||||
} else {
|
||||
c := t.Columns[q.hist.ID]
|
||||
if c == nil {
|
||||
return
|
||||
}
|
||||
logForPK(logPrefix, c, ranges, actual, c.getIncreaseFactor(t.Count))
|
||||
}
|
||||
}
|
||||
|
||||
@ -71,13 +71,13 @@ func (s *testFeedbackSuite) TestUpdateHistogram(c *C) {
|
||||
defer func() { defaultBucketCount = originBucketCount }()
|
||||
c.Assert(UpdateHistogram(q.Hist(), q).ToString(0), Equals,
|
||||
"column:0 ndv:0 totColSize:0\n"+
|
||||
"num: 10000\tlower_bound: 0\tupper_bound: 1\trepeats: 0\n"+
|
||||
"num: 10008\tlower_bound: 2\tupper_bound: 7\trepeats: 0\n"+
|
||||
"num: 10019\tlower_bound: 8\tupper_bound: 19\trepeats: 0\n"+
|
||||
"num: 10019\tlower_bound: 20\tupper_bound: 20\trepeats: 0\n"+
|
||||
"num: 10037\tlower_bound: 21\tupper_bound: 39\trepeats: 0\n"+
|
||||
"num: 10055\tlower_bound: 40\tupper_bound: 58\trepeats: 0\n"+
|
||||
"num: 10057\tlower_bound: 59\tupper_bound: 60\trepeats: 0")
|
||||
"num: 10000 lower_bound: 0 upper_bound: 1 repeats: 0\n"+
|
||||
"num: 8 lower_bound: 2 upper_bound: 7 repeats: 0\n"+
|
||||
"num: 11 lower_bound: 8 upper_bound: 19 repeats: 0\n"+
|
||||
"num: 0 lower_bound: 20 upper_bound: 20 repeats: 0\n"+
|
||||
"num: 18 lower_bound: 21 upper_bound: 39 repeats: 0\n"+
|
||||
"num: 18 lower_bound: 40 upper_bound: 58 repeats: 0\n"+
|
||||
"num: 2 lower_bound: 59 upper_bound: 60 repeats: 0")
|
||||
}
|
||||
|
||||
func (s *testFeedbackSuite) TestSplitBuckets(c *C) {
|
||||
@ -91,12 +91,12 @@ func (s *testFeedbackSuite) TestSplitBuckets(c *C) {
|
||||
buckets, isNewBuckets, totalCount := splitBuckets(q.Hist(), q)
|
||||
c.Assert(buildNewHistogram(q.Hist(), buckets).ToString(0), Equals,
|
||||
"column:0 ndv:0 totColSize:0\n"+
|
||||
"num: 1\tlower_bound: 0\tupper_bound: 1\trepeats: 0\n"+
|
||||
"num: 1\tlower_bound: 2\tupper_bound: 3\trepeats: 0\n"+
|
||||
"num: 1\tlower_bound: 5\tupper_bound: 7\trepeats: 0\n"+
|
||||
"num: 6\tlower_bound: 10\tupper_bound: 15\trepeats: 0\n"+
|
||||
"num: 6\tlower_bound: 16\tupper_bound: 20\trepeats: 0\n"+
|
||||
"num: 6\tlower_bound: 30\tupper_bound: 50\trepeats: 0")
|
||||
"num: 1 lower_bound: 0 upper_bound: 1 repeats: 0\n"+
|
||||
"num: 0 lower_bound: 2 upper_bound: 3 repeats: 0\n"+
|
||||
"num: 0 lower_bound: 5 upper_bound: 7 repeats: 0\n"+
|
||||
"num: 5 lower_bound: 10 upper_bound: 15 repeats: 0\n"+
|
||||
"num: 0 lower_bound: 16 upper_bound: 20 repeats: 0\n"+
|
||||
"num: 0 lower_bound: 30 upper_bound: 50 repeats: 0")
|
||||
c.Assert(isNewBuckets, DeepEquals, []bool{false, false, false, true, true, false})
|
||||
c.Assert(totalCount, Equals, int64(6))
|
||||
|
||||
@ -110,12 +110,12 @@ func (s *testFeedbackSuite) TestSplitBuckets(c *C) {
|
||||
buckets, isNewBuckets, totalCount = splitBuckets(q.Hist(), q)
|
||||
c.Assert(buildNewHistogram(q.Hist(), buckets).ToString(0), Equals,
|
||||
"column:0 ndv:0 totColSize:0\n"+
|
||||
"num: 100000\tlower_bound: 0\tupper_bound: 1\trepeats: 0\n"+
|
||||
"num: 100000\tlower_bound: 2\tupper_bound: 3\trepeats: 0\n"+
|
||||
"num: 100000\tlower_bound: 5\tupper_bound: 7\trepeats: 0\n"+
|
||||
"num: 100001\tlower_bound: 10\tupper_bound: 15\trepeats: 0\n"+
|
||||
"num: 100001\tlower_bound: 16\tupper_bound: 20\trepeats: 0\n"+
|
||||
"num: 100001\tlower_bound: 30\tupper_bound: 50\trepeats: 0")
|
||||
"num: 100000 lower_bound: 0 upper_bound: 1 repeats: 0\n"+
|
||||
"num: 0 lower_bound: 2 upper_bound: 3 repeats: 0\n"+
|
||||
"num: 0 lower_bound: 5 upper_bound: 7 repeats: 0\n"+
|
||||
"num: 1 lower_bound: 10 upper_bound: 15 repeats: 0\n"+
|
||||
"num: 0 lower_bound: 16 upper_bound: 20 repeats: 0\n"+
|
||||
"num: 0 lower_bound: 30 upper_bound: 50 repeats: 0")
|
||||
c.Assert(isNewBuckets, DeepEquals, []bool{false, false, false, true, true, false})
|
||||
c.Assert(totalCount, Equals, int64(100001))
|
||||
|
||||
@ -132,7 +132,7 @@ func (s *testFeedbackSuite) TestSplitBuckets(c *C) {
|
||||
buckets, isNewBuckets, totalCount = splitBuckets(q.Hist(), q)
|
||||
c.Assert(buildNewHistogram(q.Hist(), buckets).ToString(0), Equals,
|
||||
"column:0 ndv:0 totColSize:0\n"+
|
||||
"num: 1000000\tlower_bound: 0\tupper_bound: 1000000\trepeats: 0")
|
||||
"num: 1000000 lower_bound: 0 upper_bound: 1000000 repeats: 0")
|
||||
c.Assert(isNewBuckets, DeepEquals, []bool{false})
|
||||
c.Assert(totalCount, Equals, int64(1000000))
|
||||
|
||||
@ -148,8 +148,8 @@ func (s *testFeedbackSuite) TestSplitBuckets(c *C) {
|
||||
buckets, isNewBuckets, totalCount = splitBuckets(q.Hist(), q)
|
||||
c.Assert(buildNewHistogram(q.Hist(), buckets).ToString(0), Equals,
|
||||
"column:0 ndv:0 totColSize:0\n"+
|
||||
"num: 1\tlower_bound: 0\tupper_bound: 10\trepeats: 0\n"+
|
||||
"num: 1\tlower_bound: 11\tupper_bound: 1000000\trepeats: 0")
|
||||
"num: 1 lower_bound: 0 upper_bound: 10 repeats: 0\n"+
|
||||
"num: 0 lower_bound: 11 upper_bound: 1000000 repeats: 0")
|
||||
c.Assert(isNewBuckets, DeepEquals, []bool{true, true})
|
||||
c.Assert(totalCount, Equals, int64(1))
|
||||
}
|
||||
@ -169,7 +169,7 @@ func (s *testFeedbackSuite) TestMergeBuckets(c *C) {
|
||||
counts: []int64{1},
|
||||
isNewBuckets: []bool{false},
|
||||
bucketCount: 1,
|
||||
result: "column:0 ndv:0 totColSize:0\nnum: 1\tlower_bound: 1\tupper_bound: 2\trepeats: 0",
|
||||
result: "column:0 ndv:0 totColSize:0\nnum: 1 lower_bound: 1 upper_bound: 2 repeats: 0",
|
||||
},
|
||||
{
|
||||
points: []int64{1, 2, 2, 3, 3, 4},
|
||||
@ -177,8 +177,8 @@ func (s *testFeedbackSuite) TestMergeBuckets(c *C) {
|
||||
isNewBuckets: []bool{false, false, false},
|
||||
bucketCount: 2,
|
||||
result: "column:0 ndv:0 totColSize:0\n" +
|
||||
"num: 100000\tlower_bound: 1\tupper_bound: 2\trepeats: 0\n" +
|
||||
"num: 100002\tlower_bound: 2\tupper_bound: 4\trepeats: 0",
|
||||
"num: 100000 lower_bound: 1 upper_bound: 2 repeats: 0\n" +
|
||||
"num: 2 lower_bound: 2 upper_bound: 4 repeats: 0",
|
||||
},
|
||||
// test do not merge if the result bucket count is too large
|
||||
{
|
||||
@ -187,9 +187,9 @@ func (s *testFeedbackSuite) TestMergeBuckets(c *C) {
|
||||
isNewBuckets: []bool{false, false, false, false},
|
||||
bucketCount: 3,
|
||||
result: "column:0 ndv:0 totColSize:0\n" +
|
||||
"num: 2\tlower_bound: 1\tupper_bound: 3\trepeats: 0\n" +
|
||||
"num: 100002\tlower_bound: 3\tupper_bound: 4\trepeats: 0\n" +
|
||||
"num: 200002\tlower_bound: 4\tupper_bound: 5\trepeats: 0",
|
||||
"num: 2 lower_bound: 1 upper_bound: 3 repeats: 0\n" +
|
||||
"num: 100000 lower_bound: 3 upper_bound: 4 repeats: 0\n" +
|
||||
"num: 100000 lower_bound: 4 upper_bound: 5 repeats: 0",
|
||||
},
|
||||
}
|
||||
for _, t := range tests {
|
||||
|
||||
@ -159,6 +159,7 @@ func (h *Handle) Update(is infoschema.InfoSchema) error {
|
||||
tbl.Version = version
|
||||
tbl.Count = count
|
||||
tbl.ModifyCount = modifyCount
|
||||
tbl.name = getFullTableName(is, tableInfo)
|
||||
tables = append(tables, tbl)
|
||||
}
|
||||
h.mu.Lock()
|
||||
|
||||
@ -341,7 +341,7 @@ func ValueToString(value *types.Datum, idxCols int) (string, error) {
|
||||
if idxCols == 0 {
|
||||
return value.ToString()
|
||||
}
|
||||
decodedVals, err := codec.Decode(value.GetBytes(), idxCols)
|
||||
decodedVals, err := codec.DecodeRange(value.GetBytes(), idxCols)
|
||||
if err != nil {
|
||||
return "", errors.Trace(err)
|
||||
}
|
||||
@ -352,6 +352,14 @@ func ValueToString(value *types.Datum, idxCols int) (string, error) {
|
||||
return str, nil
|
||||
}
|
||||
|
||||
func (hg *Histogram) bucketToString(bktID, idxCols int) string {
|
||||
upperVal, err := ValueToString(hg.GetUpper(bktID), idxCols)
|
||||
terror.Log(errors.Trace(err))
|
||||
lowerVal, err := ValueToString(hg.GetLower(bktID), idxCols)
|
||||
terror.Log(errors.Trace(err))
|
||||
return fmt.Sprintf("num: %d lower_bound: %s upper_bound: %s repeats: %d", hg.bucketCount(bktID), lowerVal, upperVal, hg.Buckets[bktID].Repeat)
|
||||
}
|
||||
|
||||
// ToString gets the string representation for the histogram.
|
||||
func (hg *Histogram) ToString(idxCols int) string {
|
||||
strs := make([]string, 0, hg.Len()+1)
|
||||
@ -361,11 +369,7 @@ func (hg *Histogram) ToString(idxCols int) string {
|
||||
strs = append(strs, fmt.Sprintf("column:%d ndv:%d totColSize:%d", hg.ID, hg.NDV, hg.TotColSize))
|
||||
}
|
||||
for i := 0; i < hg.Len(); i++ {
|
||||
upperVal, err := ValueToString(hg.GetUpper(i), idxCols)
|
||||
terror.Log(errors.Trace(err))
|
||||
lowerVal, err := ValueToString(hg.GetLower(i), idxCols)
|
||||
terror.Log(errors.Trace(err))
|
||||
strs = append(strs, fmt.Sprintf("num: %d\tlower_bound: %s\tupper_bound: %s\trepeats: %d", hg.Buckets[i].Count, lowerVal, upperVal, hg.Buckets[i].Repeat))
|
||||
strs = append(strs, hg.bucketToString(i, idxCols))
|
||||
}
|
||||
return strings.Join(strs, "\n")
|
||||
}
|
||||
@ -405,14 +409,14 @@ func (hg *Histogram) greaterAndEqRowCount(value types.Datum) float64 {
|
||||
}
|
||||
|
||||
// lessRowCount estimates the row count where the column less than value.
|
||||
func (hg *Histogram) lessRowCount(value types.Datum) float64 {
|
||||
func (hg *Histogram) lessRowCountWithBktIdx(value types.Datum) (float64, int) {
|
||||
// all the values is null
|
||||
if hg.Bounds == nil {
|
||||
return 0
|
||||
return 0, 0
|
||||
}
|
||||
index, match := hg.Bounds.LowerBound(0, &value)
|
||||
if index == hg.Bounds.NumRows() {
|
||||
return hg.totalRowCount()
|
||||
return hg.totalRowCount(), hg.Len() - 1
|
||||
}
|
||||
// Since we store the lower and upper bound together, so dividing the index by 2 will get the bucket index.
|
||||
bucketIdx := index / 2
|
||||
@ -423,11 +427,16 @@ func (hg *Histogram) lessRowCount(value types.Datum) float64 {
|
||||
}
|
||||
if index%2 == 1 {
|
||||
if match {
|
||||
return curCount - curRepeat
|
||||
return curCount - curRepeat, bucketIdx
|
||||
}
|
||||
return preCount + hg.calcFraction(bucketIdx, &value)*(curCount-curRepeat-preCount)
|
||||
return preCount + hg.calcFraction(bucketIdx, &value)*(curCount-curRepeat-preCount), bucketIdx
|
||||
}
|
||||
return preCount
|
||||
return preCount, bucketIdx
|
||||
}
|
||||
|
||||
func (hg *Histogram) lessRowCount(value types.Datum) float64 {
|
||||
result, _ := hg.lessRowCountWithBktIdx(value)
|
||||
return result
|
||||
}
|
||||
|
||||
// lessAndEqRowCount estimates the row count where the column less than or equal to value.
|
||||
|
||||
@ -46,6 +46,7 @@ const (
|
||||
type Table struct {
|
||||
HistColl
|
||||
Version uint64
|
||||
name string
|
||||
}
|
||||
|
||||
// HistColl is a collection of histogram. It collects enough information for plan to calculate the selectivity.
|
||||
@ -88,6 +89,7 @@ func (t *Table) copy() *Table {
|
||||
nt := &Table{
|
||||
HistColl: newHistColl,
|
||||
Version: t.Version,
|
||||
name: t.name,
|
||||
}
|
||||
return nt
|
||||
}
|
||||
|
||||
@ -163,6 +163,13 @@ func mergeQueryFeedback(lq []*QueryFeedback, rq []*QueryFeedback) []*QueryFeedba
|
||||
return lq
|
||||
}
|
||||
|
||||
var (
|
||||
// MinLogScanCount is the minimum scan count for a feedback to be logged.
|
||||
MinLogScanCount = int64(1000)
|
||||
// MinLogErrorRate is the minimum error rate for a feedback to be logged.
|
||||
MinLogErrorRate = 0.5
|
||||
)
|
||||
|
||||
// StoreQueryFeedback will merges the feedback into stats collector.
|
||||
func (s *SessionStatsCollector) StoreQueryFeedback(feedback interface{}, h *Handle) error {
|
||||
q := feedback.(*QueryFeedback)
|
||||
@ -185,6 +192,9 @@ func (s *SessionStatsCollector) StoreQueryFeedback(feedback interface{}, h *Hand
|
||||
} else {
|
||||
rate = math.Abs(expected-float64(q.actual)) / float64(q.actual)
|
||||
}
|
||||
if rate >= MinLogErrorRate && (q.actual >= MinLogScanCount || q.expected >= MinLogScanCount) {
|
||||
q.logDetailedInfo(h)
|
||||
}
|
||||
metrics.StatsInaccuracyRate.Observe(rate)
|
||||
s.Lock()
|
||||
defer s.Unlock()
|
||||
|
||||
@ -20,6 +20,7 @@ import (
|
||||
|
||||
. "github.com/pingcap/check"
|
||||
"github.com/pingcap/tidb/domain"
|
||||
"github.com/pingcap/tidb/executor"
|
||||
"github.com/pingcap/tidb/kv"
|
||||
"github.com/pingcap/tidb/model"
|
||||
"github.com/pingcap/tidb/mysql"
|
||||
@ -30,6 +31,7 @@ import (
|
||||
"github.com/pingcap/tidb/util/ranger"
|
||||
"github.com/pingcap/tidb/util/testkit"
|
||||
"github.com/pingcap/tidb/util/testleak"
|
||||
log "github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
var _ = Suite(&testStatsUpdateSuite{})
|
||||
@ -555,25 +557,25 @@ func (s *testStatsUpdateSuite) TestQueryFeedback(c *C) {
|
||||
// test primary key feedback
|
||||
sql: "select * from t where t.a <= 5",
|
||||
hist: "column:1 ndv:3 totColSize:0\n" +
|
||||
"num: 1\tlower_bound: -9223372036854775808\tupper_bound: 1\trepeats: 0\n" +
|
||||
"num: 2\tlower_bound: 2\tupper_bound: 2\trepeats: 1\n" +
|
||||
"num: 4\tlower_bound: 3\tupper_bound: 5\trepeats: 0",
|
||||
"num: 1 lower_bound: -9223372036854775808 upper_bound: 1 repeats: 0\n" +
|
||||
"num: 1 lower_bound: 2 upper_bound: 2 repeats: 1\n" +
|
||||
"num: 2 lower_bound: 3 upper_bound: 5 repeats: 0",
|
||||
idxCols: 0,
|
||||
},
|
||||
{
|
||||
// test index feedback by double read
|
||||
sql: "select * from t use index(idx) where t.b <= 5",
|
||||
hist: "index:1 ndv:2\n" +
|
||||
"num: 2\tlower_bound: \tupper_bound: 2\trepeats: 0\n" +
|
||||
"num: 4\tlower_bound: 3\tupper_bound: 6\trepeats: 0",
|
||||
"num: 2 lower_bound: -inf upper_bound: 2 repeats: 0\n" +
|
||||
"num: 2 lower_bound: 3 upper_bound: 6 repeats: 0",
|
||||
idxCols: 1,
|
||||
},
|
||||
{
|
||||
// test index feedback by single read
|
||||
sql: "select b from t use index(idx) where t.b <= 5",
|
||||
hist: "index:1 ndv:2\n" +
|
||||
"num: 2\tlower_bound: \tupper_bound: 2\trepeats: 0\n" +
|
||||
"num: 4\tlower_bound: 3\tupper_bound: 6\trepeats: 0",
|
||||
"num: 2 lower_bound: -inf upper_bound: 2 repeats: 0\n" +
|
||||
"num: 2 lower_bound: 3 upper_bound: 6 repeats: 0",
|
||||
idxCols: 1,
|
||||
},
|
||||
}
|
||||
@ -710,10 +712,9 @@ func (s *testStatsUpdateSuite) TestUpdateStatsByLocalFeedback(c *C) {
|
||||
tbl = h.GetTableStats(tblInfo)
|
||||
|
||||
c.Assert(tbl.Columns[tblInfo.Columns[0].ID].ToString(0), Equals, "column:1 ndv:3 totColSize:0\n"+
|
||||
"num: 1\tlower_bound: 1\tupper_bound: 1\trepeats: 1\n"+
|
||||
"num: 2\tlower_bound: 2\tupper_bound: 2\trepeats: 1\n"+
|
||||
"num: 4\tlower_bound: 3\tupper_bound: 9223372036854775807\trepeats: 0")
|
||||
|
||||
"num: 1 lower_bound: 1 upper_bound: 1 repeats: 1\n"+
|
||||
"num: 1 lower_bound: 2 upper_bound: 2 repeats: 1\n"+
|
||||
"num: 2 lower_bound: 3 upper_bound: 9223372036854775807 repeats: 0")
|
||||
sc := &stmtctx.StatementContext{TimeZone: time.Local}
|
||||
low, err := codec.EncodeKey(sc, nil, types.NewIntDatum(5))
|
||||
c.Assert(err, IsNil)
|
||||
@ -721,9 +722,85 @@ func (s *testStatsUpdateSuite) TestUpdateStatsByLocalFeedback(c *C) {
|
||||
c.Assert(tbl.Indices[tblInfo.Indices[0].ID].CMSketch.QueryBytes(low), Equals, uint32(2))
|
||||
|
||||
c.Assert(tbl.Indices[tblInfo.Indices[0].ID].ToString(1), Equals, "index:1 ndv:2\n"+
|
||||
"num: 2\tlower_bound: \tupper_bound: 2\trepeats: 0\n"+
|
||||
"num: 4\tlower_bound: 3\tupper_bound: 6\trepeats: 0")
|
||||
"num: 2 lower_bound: -inf upper_bound: 2 repeats: 0\n"+
|
||||
"num: 2 lower_bound: 3 upper_bound: 6 repeats: 0")
|
||||
|
||||
// Test that it won't cause panic after update.
|
||||
testKit.MustQuery("select * from t use index(idx) where b > 0")
|
||||
}
|
||||
|
||||
type logHook struct {
|
||||
results string
|
||||
}
|
||||
|
||||
func (hook *logHook) Levels() []log.Level {
|
||||
return []log.Level{log.DebugLevel}
|
||||
}
|
||||
|
||||
func (hook *logHook) Fire(entry *log.Entry) error {
|
||||
message := entry.Message
|
||||
if idx := strings.Index(message, "[stats"); idx != -1 {
|
||||
hook.results = hook.results + message[idx:]
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *testStatsUpdateSuite) TestLogDetailedInfo(c *C) {
|
||||
defer cleanEnv(c, s.store, s.do)
|
||||
|
||||
oriProbability := statistics.FeedbackProbability
|
||||
oriMinLogCount := statistics.MinLogScanCount
|
||||
oriMinError := statistics.MinLogErrorRate
|
||||
oriLevel := log.GetLevel()
|
||||
oriBucketNum := executor.GetMaxBucketSizeForTest()
|
||||
defer func() {
|
||||
statistics.FeedbackProbability = oriProbability
|
||||
statistics.MinLogScanCount = oriMinLogCount
|
||||
statistics.MinLogErrorRate = oriMinError
|
||||
executor.SetMaxBucketSizeForTest(oriBucketNum)
|
||||
log.SetLevel(oriLevel)
|
||||
}()
|
||||
executor.SetMaxBucketSizeForTest(4)
|
||||
statistics.FeedbackProbability = 1
|
||||
statistics.MinLogScanCount = 0
|
||||
statistics.MinLogErrorRate = 0
|
||||
|
||||
testKit := testkit.NewTestKit(c, s.store)
|
||||
testKit.MustExec("use test")
|
||||
testKit.MustExec("create table t (a bigint(64), b bigint(64), primary key(a), index idx(b), index idx_ba(b,a))")
|
||||
for i := 0; i < 20; i++ {
|
||||
testKit.MustExec(fmt.Sprintf("insert into t values (%d, %d)", i, i))
|
||||
}
|
||||
testKit.MustExec("analyze table t")
|
||||
tests := []struct {
|
||||
sql string
|
||||
result string
|
||||
}{
|
||||
{
|
||||
sql: "select * from t where t.a <= 15",
|
||||
result: "[stats-feedback] test.t, column: a, range: [-inf,7), actual: 8, expected: 7, buckets: {num: 8 lower_bound: 0 upper_bound: 7 repeats: 1}" +
|
||||
"[stats-feedback] test.t, column: a, range: [8,15), actual: 8, expected: 7, buckets: {num: 8 lower_bound: 8 upper_bound: 15 repeats: 1}",
|
||||
},
|
||||
{
|
||||
sql: "select * from t use index(idx) where t.b <= 15",
|
||||
result: "[stats-feedback] test.t, index: idx, range: [-inf,7), actual: 8, expected: 7, histogram: {num: 8 lower_bound: 0 upper_bound: 7 repeats: 1}" +
|
||||
"[stats-feedback] test.t, index: idx, range: [8,15), actual: 8, expected: 7, histogram: {num: 8 lower_bound: 8 upper_bound: 15 repeats: 1}",
|
||||
},
|
||||
{
|
||||
sql: "select b from t use index(idx_ba) where b = 1 and a <= 5",
|
||||
result: "[stats-feedback] test.t, index: idx_ba, actual: 1, equality: 1, expected equality: 1, range: [-inf,6], actual: -1, expected: 6, buckets: {num: 8 lower_bound: 0 upper_bound: 7 repeats: 1}",
|
||||
},
|
||||
{
|
||||
sql: "select b from t use index(idx_ba) where b = 1",
|
||||
result: "[stats-feedback] test.t, index: idx_ba, value: 1, actual: 1, expected: 1",
|
||||
},
|
||||
}
|
||||
log.SetLevel(log.DebugLevel)
|
||||
var hook logHook
|
||||
log.AddHook(&hook)
|
||||
for _, t := range tests {
|
||||
hook.results = ""
|
||||
testKit.MustQuery(t.sql)
|
||||
c.Assert(hook.results, Equals, t.result)
|
||||
}
|
||||
}
|
||||
|
||||
@ -1827,12 +1827,21 @@ func handleTruncateError(sc *stmtctx.StatementContext) error {
|
||||
}
|
||||
|
||||
// DatumsToString converts several datums to formatted string.
|
||||
func DatumsToString(datums []Datum, handleNULL bool) (string, error) {
|
||||
func DatumsToString(datums []Datum, handleSpecialValue bool) (string, error) {
|
||||
var strs []string
|
||||
for _, datum := range datums {
|
||||
if datum.Kind() == KindNull && handleNULL {
|
||||
strs = append(strs, "NULL")
|
||||
continue
|
||||
if handleSpecialValue {
|
||||
switch datum.Kind() {
|
||||
case KindNull:
|
||||
strs = append(strs, "NULL")
|
||||
continue
|
||||
case KindMinNotNull:
|
||||
strs = append(strs, "-inf")
|
||||
continue
|
||||
case KindMaxValue:
|
||||
strs = append(strs, "+inf")
|
||||
continue
|
||||
}
|
||||
}
|
||||
str, err := datum.ToString()
|
||||
if err != nil {
|
||||
|
||||
@ -311,6 +311,43 @@ func Decode(b []byte, size int) ([]types.Datum, error) {
|
||||
return values, nil
|
||||
}
|
||||
|
||||
// DecodeRange decodes the range values from a byte slice that generated by EncodeKey.
|
||||
// It handles some special values like `MinNotNull` and `MaxValueDatum`.
|
||||
func DecodeRange(b []byte, size int) ([]types.Datum, error) {
|
||||
if len(b) < 1 {
|
||||
return nil, errors.New("invalid encoded key: length of key is zero")
|
||||
}
|
||||
|
||||
var (
|
||||
err error
|
||||
values = make([]types.Datum, 0, size)
|
||||
)
|
||||
|
||||
for len(b) > 1 {
|
||||
var d types.Datum
|
||||
b, d, err = DecodeOne(b)
|
||||
if err != nil {
|
||||
return nil, errors.Trace(err)
|
||||
}
|
||||
values = append(values, d)
|
||||
}
|
||||
|
||||
if len(b) == 1 {
|
||||
switch b[0] {
|
||||
case NilFlag:
|
||||
values = append(values, types.Datum{})
|
||||
case bytesFlag:
|
||||
values = append(values, types.MinNotNullDatum())
|
||||
// `maxFlag + 1` for PrefixNext
|
||||
case maxFlag, maxFlag + 1:
|
||||
values = append(values, types.MaxValueDatum())
|
||||
default:
|
||||
return nil, errors.Errorf("invalid encoded key flag %v", b[0])
|
||||
}
|
||||
}
|
||||
return values, nil
|
||||
}
|
||||
|
||||
// DecodeOne decodes on datum from a byte slice generated with EncodeKey or EncodeValue.
|
||||
func DecodeOne(b []byte) (remain []byte, d types.Datum, err error) {
|
||||
if len(b) < 1 {
|
||||
|
||||
Reference in New Issue
Block a user