diff --git a/statistics/boostrap.go b/statistics/boostrap.go index 0a6862394a..fb28b42cda 100644 --- a/statistics/boostrap.go +++ b/statistics/boostrap.go @@ -14,6 +14,8 @@ package statistics import ( + "fmt" + "github.com/juju/errors" "github.com/pingcap/tidb/infoschema" "github.com/pingcap/tidb/model" @@ -49,6 +51,7 @@ func (h *Handle) initStatsMeta4Chunk(is infoschema.InfoSchema, tables statsCache tbl := &Table{ HistColl: newHistColl, Version: row.GetUint64(0), + name: getFullTableName(is, tableInfo), } tables[physicalID] = tbl } @@ -257,3 +260,14 @@ func (h *Handle) InitStats(is infoschema.InfoSchema) error { h.statsCache.Store(tables) return nil } + +func getFullTableName(is infoschema.InfoSchema, tblInfo *model.TableInfo) string { + for _, schema := range is.AllSchemas() { + if t, err := is.TableByName(schema.Name, tblInfo.Name); err == nil { + if t.Meta().ID == tblInfo.ID { + return schema.Name.O + "." + tblInfo.Name.O + } + } + } + return fmt.Sprintf("%d", tblInfo.ID) +} diff --git a/statistics/feedback.go b/statistics/feedback.go index 451e076878..652b02386f 100644 --- a/statistics/feedback.go +++ b/statistics/feedback.go @@ -16,6 +16,7 @@ package statistics import ( "bytes" "encoding/gob" + "fmt" "math" "math/rand" "sort" @@ -102,11 +103,11 @@ func (q *QueryFeedback) DecodeToRanges(isIndex bool) ([]*ranger.Range, error) { if isIndex { var err error // As we do not know the origin length, just use a custom value here. - lowVal, err = codec.Decode(low.GetBytes(), 4) + lowVal, err = codec.DecodeRange(low.GetBytes(), 4) if err != nil { return nil, errors.Trace(err) } - highVal, err = codec.Decode(high.GetBytes(), 4) + highVal, err = codec.DecodeRange(high.GetBytes(), 4) if err != nil { return nil, errors.Trace(err) } @@ -759,3 +760,135 @@ func splitFeedbackByQueryType(feedbacks []feedback) ([]feedback, []feedback) { } return eqFB, ranFB } + +// formatBuckets formats bucket from lowBkt to highBkt. +func formatBuckets(hg *Histogram, lowBkt, highBkt, idxCols int) string { + if lowBkt == highBkt { + return hg.bucketToString(lowBkt, idxCols) + } + if lowBkt+1 == highBkt { + return fmt.Sprintf("%s, %s", hg.bucketToString(lowBkt, 0), hg.bucketToString(highBkt, 0)) + } + // do not care the middle buckets + return fmt.Sprintf("%s, (%d buckets, total count %d), %s", hg.bucketToString(lowBkt, 0), + highBkt-lowBkt-1, hg.Buckets[highBkt-1].Count-hg.Buckets[lowBkt].Count, hg.bucketToString(highBkt, 0)) +} + +func colRangeToStr(c *Column, ran *ranger.Range, actual int64, factor float64) string { + lowCount, lowBkt := c.lessRowCountWithBktIdx(ran.LowVal[0]) + highCount, highBkt := c.lessRowCountWithBktIdx(ran.HighVal[0]) + return fmt.Sprintf("range: %s, actual: %d, expected: %d, buckets: {%s}", ran.String(), actual, + int64((highCount-lowCount)*factor), formatBuckets(&c.Histogram, lowBkt, highBkt, 0)) +} + +func logForPK(prefix string, c *Column, ranges []*ranger.Range, actual []int64, factor float64) { + for i, ran := range ranges { + if ran.LowVal[0].GetInt64()+1 >= ran.HighVal[0].GetInt64() { + continue + } + log.Debugf("%s column: %s, %s", prefix, c.Info.Name, colRangeToStr(c, ran, actual[i], factor)) + } +} + +func logForIndexRange(idx *Index, ran *ranger.Range, actual int64, factor float64) string { + sc := &stmtctx.StatementContext{TimeZone: time.UTC} + lb, err := codec.EncodeKey(sc, nil, ran.LowVal...) + if err != nil { + return "" + } + rb, err := codec.EncodeKey(sc, nil, ran.HighVal...) + if err != nil { + return "" + } + if idx.CMSketch != nil && bytes.Compare(kv.Key(lb).PrefixNext(), rb) >= 0 { + str, err := types.DatumsToString(ran.LowVal, true) + if err != nil { + return "" + } + return fmt.Sprintf("value: %s, actual: %d, expected: %d", str, actual, int64(float64(idx.QueryBytes(lb))*factor)) + } + l, r := types.NewBytesDatum(lb), types.NewBytesDatum(rb) + lowCount, lowBkt := idx.lessRowCountWithBktIdx(l) + highCount, highBkt := idx.lessRowCountWithBktIdx(r) + return fmt.Sprintf("range: %s, actual: %d, expected: %d, histogram: {%s}", ran.String(), actual, + int64((highCount-lowCount)*factor), formatBuckets(&idx.Histogram, lowBkt, highBkt, len(idx.Info.Columns))) +} + +func logForIndex(prefix string, t *Table, idx *Index, ranges []*ranger.Range, actual []int64, factor float64) { + sc := &stmtctx.StatementContext{TimeZone: time.UTC} + if idx.CMSketch == nil || idx.statsVer != version1 { + for i, ran := range ranges { + log.Debugf("%s index: %s, %s", prefix, idx.Info.Name.O, logForIndexRange(idx, ran, actual[i], factor)) + } + return + } + for i, ran := range ranges { + rangePosition := getOrdinalOfRangeCond(sc, ran) + // only contains range or equality query + if rangePosition == 0 || rangePosition == len(ran.LowVal) { + log.Debugf("%s index: %s, %s", prefix, idx.Info.Name.O, logForIndexRange(idx, ran, actual[i], factor)) + continue + } + equalityString, err := types.DatumsToString(ran.LowVal[:rangePosition], true) + if err != nil { + continue + } + bytes, err := codec.EncodeKey(sc, nil, ran.LowVal[:rangePosition]...) + if err != nil { + continue + } + equalityCount := idx.CMSketch.QueryBytes(bytes) + rang := ranger.Range{ + LowVal: []types.Datum{ran.LowVal[rangePosition]}, + HighVal: []types.Datum{ran.HighVal[rangePosition]}, + } + colName := idx.Info.Columns[rangePosition].Name.L + var rangeString string + // prefer index stats over column stats + if idx, ok := t.colName2Idx[colName]; ok { + if t.Indices[idx] == nil { + return + } + rangeString = logForIndexRange(t.Indices[idx], &rang, -1, factor) + } else { + id := t.colName2ID[colName] + if t.Columns[id] == nil { + return + } + rangeString = colRangeToStr(t.Columns[t.colName2ID[colName]], &rang, -1, factor) + } + log.Debugf("%s index: %s, actual: %d, equality: %s, expected equality: %d, %s", prefix, idx.Info.Name.O, + actual[i], equalityString, equalityCount, rangeString) + } +} + +func (q *QueryFeedback) logDetailedInfo(h *Handle) { + t, ok := h.statsCache.Load().(statsCache)[q.tableID] + if !ok { + return + } + isIndex := q.hist.isIndexHist() + ranges, err := q.DecodeToRanges(isIndex) + if err != nil { + log.Debug(err) + return + } + actual := make([]int64, 0, len(q.feedback)) + for _, fb := range q.feedback { + actual = append(actual, fb.count) + } + logPrefix := fmt.Sprintf("[stats-feedback] %s,", t.name) + if isIndex { + idx := t.Indices[q.hist.ID] + if idx == nil { + return + } + logForIndex(logPrefix, t, idx, ranges, actual, idx.getIncreaseFactor(t.Count)) + } else { + c := t.Columns[q.hist.ID] + if c == nil { + return + } + logForPK(logPrefix, c, ranges, actual, c.getIncreaseFactor(t.Count)) + } +} diff --git a/statistics/feedback_test.go b/statistics/feedback_test.go index 17493375a1..4f78a133c8 100644 --- a/statistics/feedback_test.go +++ b/statistics/feedback_test.go @@ -71,13 +71,13 @@ func (s *testFeedbackSuite) TestUpdateHistogram(c *C) { defer func() { defaultBucketCount = originBucketCount }() c.Assert(UpdateHistogram(q.Hist(), q).ToString(0), Equals, "column:0 ndv:0 totColSize:0\n"+ - "num: 10000\tlower_bound: 0\tupper_bound: 1\trepeats: 0\n"+ - "num: 10008\tlower_bound: 2\tupper_bound: 7\trepeats: 0\n"+ - "num: 10019\tlower_bound: 8\tupper_bound: 19\trepeats: 0\n"+ - "num: 10019\tlower_bound: 20\tupper_bound: 20\trepeats: 0\n"+ - "num: 10037\tlower_bound: 21\tupper_bound: 39\trepeats: 0\n"+ - "num: 10055\tlower_bound: 40\tupper_bound: 58\trepeats: 0\n"+ - "num: 10057\tlower_bound: 59\tupper_bound: 60\trepeats: 0") + "num: 10000 lower_bound: 0 upper_bound: 1 repeats: 0\n"+ + "num: 8 lower_bound: 2 upper_bound: 7 repeats: 0\n"+ + "num: 11 lower_bound: 8 upper_bound: 19 repeats: 0\n"+ + "num: 0 lower_bound: 20 upper_bound: 20 repeats: 0\n"+ + "num: 18 lower_bound: 21 upper_bound: 39 repeats: 0\n"+ + "num: 18 lower_bound: 40 upper_bound: 58 repeats: 0\n"+ + "num: 2 lower_bound: 59 upper_bound: 60 repeats: 0") } func (s *testFeedbackSuite) TestSplitBuckets(c *C) { @@ -91,12 +91,12 @@ func (s *testFeedbackSuite) TestSplitBuckets(c *C) { buckets, isNewBuckets, totalCount := splitBuckets(q.Hist(), q) c.Assert(buildNewHistogram(q.Hist(), buckets).ToString(0), Equals, "column:0 ndv:0 totColSize:0\n"+ - "num: 1\tlower_bound: 0\tupper_bound: 1\trepeats: 0\n"+ - "num: 1\tlower_bound: 2\tupper_bound: 3\trepeats: 0\n"+ - "num: 1\tlower_bound: 5\tupper_bound: 7\trepeats: 0\n"+ - "num: 6\tlower_bound: 10\tupper_bound: 15\trepeats: 0\n"+ - "num: 6\tlower_bound: 16\tupper_bound: 20\trepeats: 0\n"+ - "num: 6\tlower_bound: 30\tupper_bound: 50\trepeats: 0") + "num: 1 lower_bound: 0 upper_bound: 1 repeats: 0\n"+ + "num: 0 lower_bound: 2 upper_bound: 3 repeats: 0\n"+ + "num: 0 lower_bound: 5 upper_bound: 7 repeats: 0\n"+ + "num: 5 lower_bound: 10 upper_bound: 15 repeats: 0\n"+ + "num: 0 lower_bound: 16 upper_bound: 20 repeats: 0\n"+ + "num: 0 lower_bound: 30 upper_bound: 50 repeats: 0") c.Assert(isNewBuckets, DeepEquals, []bool{false, false, false, true, true, false}) c.Assert(totalCount, Equals, int64(6)) @@ -110,12 +110,12 @@ func (s *testFeedbackSuite) TestSplitBuckets(c *C) { buckets, isNewBuckets, totalCount = splitBuckets(q.Hist(), q) c.Assert(buildNewHistogram(q.Hist(), buckets).ToString(0), Equals, "column:0 ndv:0 totColSize:0\n"+ - "num: 100000\tlower_bound: 0\tupper_bound: 1\trepeats: 0\n"+ - "num: 100000\tlower_bound: 2\tupper_bound: 3\trepeats: 0\n"+ - "num: 100000\tlower_bound: 5\tupper_bound: 7\trepeats: 0\n"+ - "num: 100001\tlower_bound: 10\tupper_bound: 15\trepeats: 0\n"+ - "num: 100001\tlower_bound: 16\tupper_bound: 20\trepeats: 0\n"+ - "num: 100001\tlower_bound: 30\tupper_bound: 50\trepeats: 0") + "num: 100000 lower_bound: 0 upper_bound: 1 repeats: 0\n"+ + "num: 0 lower_bound: 2 upper_bound: 3 repeats: 0\n"+ + "num: 0 lower_bound: 5 upper_bound: 7 repeats: 0\n"+ + "num: 1 lower_bound: 10 upper_bound: 15 repeats: 0\n"+ + "num: 0 lower_bound: 16 upper_bound: 20 repeats: 0\n"+ + "num: 0 lower_bound: 30 upper_bound: 50 repeats: 0") c.Assert(isNewBuckets, DeepEquals, []bool{false, false, false, true, true, false}) c.Assert(totalCount, Equals, int64(100001)) @@ -132,7 +132,7 @@ func (s *testFeedbackSuite) TestSplitBuckets(c *C) { buckets, isNewBuckets, totalCount = splitBuckets(q.Hist(), q) c.Assert(buildNewHistogram(q.Hist(), buckets).ToString(0), Equals, "column:0 ndv:0 totColSize:0\n"+ - "num: 1000000\tlower_bound: 0\tupper_bound: 1000000\trepeats: 0") + "num: 1000000 lower_bound: 0 upper_bound: 1000000 repeats: 0") c.Assert(isNewBuckets, DeepEquals, []bool{false}) c.Assert(totalCount, Equals, int64(1000000)) @@ -148,8 +148,8 @@ func (s *testFeedbackSuite) TestSplitBuckets(c *C) { buckets, isNewBuckets, totalCount = splitBuckets(q.Hist(), q) c.Assert(buildNewHistogram(q.Hist(), buckets).ToString(0), Equals, "column:0 ndv:0 totColSize:0\n"+ - "num: 1\tlower_bound: 0\tupper_bound: 10\trepeats: 0\n"+ - "num: 1\tlower_bound: 11\tupper_bound: 1000000\trepeats: 0") + "num: 1 lower_bound: 0 upper_bound: 10 repeats: 0\n"+ + "num: 0 lower_bound: 11 upper_bound: 1000000 repeats: 0") c.Assert(isNewBuckets, DeepEquals, []bool{true, true}) c.Assert(totalCount, Equals, int64(1)) } @@ -169,7 +169,7 @@ func (s *testFeedbackSuite) TestMergeBuckets(c *C) { counts: []int64{1}, isNewBuckets: []bool{false}, bucketCount: 1, - result: "column:0 ndv:0 totColSize:0\nnum: 1\tlower_bound: 1\tupper_bound: 2\trepeats: 0", + result: "column:0 ndv:0 totColSize:0\nnum: 1 lower_bound: 1 upper_bound: 2 repeats: 0", }, { points: []int64{1, 2, 2, 3, 3, 4}, @@ -177,8 +177,8 @@ func (s *testFeedbackSuite) TestMergeBuckets(c *C) { isNewBuckets: []bool{false, false, false}, bucketCount: 2, result: "column:0 ndv:0 totColSize:0\n" + - "num: 100000\tlower_bound: 1\tupper_bound: 2\trepeats: 0\n" + - "num: 100002\tlower_bound: 2\tupper_bound: 4\trepeats: 0", + "num: 100000 lower_bound: 1 upper_bound: 2 repeats: 0\n" + + "num: 2 lower_bound: 2 upper_bound: 4 repeats: 0", }, // test do not merge if the result bucket count is too large { @@ -187,9 +187,9 @@ func (s *testFeedbackSuite) TestMergeBuckets(c *C) { isNewBuckets: []bool{false, false, false, false}, bucketCount: 3, result: "column:0 ndv:0 totColSize:0\n" + - "num: 2\tlower_bound: 1\tupper_bound: 3\trepeats: 0\n" + - "num: 100002\tlower_bound: 3\tupper_bound: 4\trepeats: 0\n" + - "num: 200002\tlower_bound: 4\tupper_bound: 5\trepeats: 0", + "num: 2 lower_bound: 1 upper_bound: 3 repeats: 0\n" + + "num: 100000 lower_bound: 3 upper_bound: 4 repeats: 0\n" + + "num: 100000 lower_bound: 4 upper_bound: 5 repeats: 0", }, } for _, t := range tests { diff --git a/statistics/handle.go b/statistics/handle.go index 9785b2a6ee..c0ddaec4e0 100644 --- a/statistics/handle.go +++ b/statistics/handle.go @@ -159,6 +159,7 @@ func (h *Handle) Update(is infoschema.InfoSchema) error { tbl.Version = version tbl.Count = count tbl.ModifyCount = modifyCount + tbl.name = getFullTableName(is, tableInfo) tables = append(tables, tbl) } h.mu.Lock() diff --git a/statistics/histogram.go b/statistics/histogram.go index 13195dc15f..372a2ff8f9 100644 --- a/statistics/histogram.go +++ b/statistics/histogram.go @@ -341,7 +341,7 @@ func ValueToString(value *types.Datum, idxCols int) (string, error) { if idxCols == 0 { return value.ToString() } - decodedVals, err := codec.Decode(value.GetBytes(), idxCols) + decodedVals, err := codec.DecodeRange(value.GetBytes(), idxCols) if err != nil { return "", errors.Trace(err) } @@ -352,6 +352,14 @@ func ValueToString(value *types.Datum, idxCols int) (string, error) { return str, nil } +func (hg *Histogram) bucketToString(bktID, idxCols int) string { + upperVal, err := ValueToString(hg.GetUpper(bktID), idxCols) + terror.Log(errors.Trace(err)) + lowerVal, err := ValueToString(hg.GetLower(bktID), idxCols) + terror.Log(errors.Trace(err)) + return fmt.Sprintf("num: %d lower_bound: %s upper_bound: %s repeats: %d", hg.bucketCount(bktID), lowerVal, upperVal, hg.Buckets[bktID].Repeat) +} + // ToString gets the string representation for the histogram. func (hg *Histogram) ToString(idxCols int) string { strs := make([]string, 0, hg.Len()+1) @@ -361,11 +369,7 @@ func (hg *Histogram) ToString(idxCols int) string { strs = append(strs, fmt.Sprintf("column:%d ndv:%d totColSize:%d", hg.ID, hg.NDV, hg.TotColSize)) } for i := 0; i < hg.Len(); i++ { - upperVal, err := ValueToString(hg.GetUpper(i), idxCols) - terror.Log(errors.Trace(err)) - lowerVal, err := ValueToString(hg.GetLower(i), idxCols) - terror.Log(errors.Trace(err)) - strs = append(strs, fmt.Sprintf("num: %d\tlower_bound: %s\tupper_bound: %s\trepeats: %d", hg.Buckets[i].Count, lowerVal, upperVal, hg.Buckets[i].Repeat)) + strs = append(strs, hg.bucketToString(i, idxCols)) } return strings.Join(strs, "\n") } @@ -405,14 +409,14 @@ func (hg *Histogram) greaterAndEqRowCount(value types.Datum) float64 { } // lessRowCount estimates the row count where the column less than value. -func (hg *Histogram) lessRowCount(value types.Datum) float64 { +func (hg *Histogram) lessRowCountWithBktIdx(value types.Datum) (float64, int) { // all the values is null if hg.Bounds == nil { - return 0 + return 0, 0 } index, match := hg.Bounds.LowerBound(0, &value) if index == hg.Bounds.NumRows() { - return hg.totalRowCount() + return hg.totalRowCount(), hg.Len() - 1 } // Since we store the lower and upper bound together, so dividing the index by 2 will get the bucket index. bucketIdx := index / 2 @@ -423,11 +427,16 @@ func (hg *Histogram) lessRowCount(value types.Datum) float64 { } if index%2 == 1 { if match { - return curCount - curRepeat + return curCount - curRepeat, bucketIdx } - return preCount + hg.calcFraction(bucketIdx, &value)*(curCount-curRepeat-preCount) + return preCount + hg.calcFraction(bucketIdx, &value)*(curCount-curRepeat-preCount), bucketIdx } - return preCount + return preCount, bucketIdx +} + +func (hg *Histogram) lessRowCount(value types.Datum) float64 { + result, _ := hg.lessRowCountWithBktIdx(value) + return result } // lessAndEqRowCount estimates the row count where the column less than or equal to value. diff --git a/statistics/table.go b/statistics/table.go index 7ab1288237..e6d23b13a8 100644 --- a/statistics/table.go +++ b/statistics/table.go @@ -46,6 +46,7 @@ const ( type Table struct { HistColl Version uint64 + name string } // HistColl is a collection of histogram. It collects enough information for plan to calculate the selectivity. @@ -88,6 +89,7 @@ func (t *Table) copy() *Table { nt := &Table{ HistColl: newHistColl, Version: t.Version, + name: t.name, } return nt } diff --git a/statistics/update.go b/statistics/update.go index 6d8631d6d1..fa4f732343 100644 --- a/statistics/update.go +++ b/statistics/update.go @@ -163,6 +163,13 @@ func mergeQueryFeedback(lq []*QueryFeedback, rq []*QueryFeedback) []*QueryFeedba return lq } +var ( + // MinLogScanCount is the minimum scan count for a feedback to be logged. + MinLogScanCount = int64(1000) + // MinLogErrorRate is the minimum error rate for a feedback to be logged. + MinLogErrorRate = 0.5 +) + // StoreQueryFeedback will merges the feedback into stats collector. func (s *SessionStatsCollector) StoreQueryFeedback(feedback interface{}, h *Handle) error { q := feedback.(*QueryFeedback) @@ -185,6 +192,9 @@ func (s *SessionStatsCollector) StoreQueryFeedback(feedback interface{}, h *Hand } else { rate = math.Abs(expected-float64(q.actual)) / float64(q.actual) } + if rate >= MinLogErrorRate && (q.actual >= MinLogScanCount || q.expected >= MinLogScanCount) { + q.logDetailedInfo(h) + } metrics.StatsInaccuracyRate.Observe(rate) s.Lock() defer s.Unlock() diff --git a/statistics/update_test.go b/statistics/update_test.go index 651d1e6982..5d41462039 100644 --- a/statistics/update_test.go +++ b/statistics/update_test.go @@ -20,6 +20,7 @@ import ( . "github.com/pingcap/check" "github.com/pingcap/tidb/domain" + "github.com/pingcap/tidb/executor" "github.com/pingcap/tidb/kv" "github.com/pingcap/tidb/model" "github.com/pingcap/tidb/mysql" @@ -30,6 +31,7 @@ import ( "github.com/pingcap/tidb/util/ranger" "github.com/pingcap/tidb/util/testkit" "github.com/pingcap/tidb/util/testleak" + log "github.com/sirupsen/logrus" ) var _ = Suite(&testStatsUpdateSuite{}) @@ -555,25 +557,25 @@ func (s *testStatsUpdateSuite) TestQueryFeedback(c *C) { // test primary key feedback sql: "select * from t where t.a <= 5", hist: "column:1 ndv:3 totColSize:0\n" + - "num: 1\tlower_bound: -9223372036854775808\tupper_bound: 1\trepeats: 0\n" + - "num: 2\tlower_bound: 2\tupper_bound: 2\trepeats: 1\n" + - "num: 4\tlower_bound: 3\tupper_bound: 5\trepeats: 0", + "num: 1 lower_bound: -9223372036854775808 upper_bound: 1 repeats: 0\n" + + "num: 1 lower_bound: 2 upper_bound: 2 repeats: 1\n" + + "num: 2 lower_bound: 3 upper_bound: 5 repeats: 0", idxCols: 0, }, { // test index feedback by double read sql: "select * from t use index(idx) where t.b <= 5", hist: "index:1 ndv:2\n" + - "num: 2\tlower_bound: \tupper_bound: 2\trepeats: 0\n" + - "num: 4\tlower_bound: 3\tupper_bound: 6\trepeats: 0", + "num: 2 lower_bound: -inf upper_bound: 2 repeats: 0\n" + + "num: 2 lower_bound: 3 upper_bound: 6 repeats: 0", idxCols: 1, }, { // test index feedback by single read sql: "select b from t use index(idx) where t.b <= 5", hist: "index:1 ndv:2\n" + - "num: 2\tlower_bound: \tupper_bound: 2\trepeats: 0\n" + - "num: 4\tlower_bound: 3\tupper_bound: 6\trepeats: 0", + "num: 2 lower_bound: -inf upper_bound: 2 repeats: 0\n" + + "num: 2 lower_bound: 3 upper_bound: 6 repeats: 0", idxCols: 1, }, } @@ -710,10 +712,9 @@ func (s *testStatsUpdateSuite) TestUpdateStatsByLocalFeedback(c *C) { tbl = h.GetTableStats(tblInfo) c.Assert(tbl.Columns[tblInfo.Columns[0].ID].ToString(0), Equals, "column:1 ndv:3 totColSize:0\n"+ - "num: 1\tlower_bound: 1\tupper_bound: 1\trepeats: 1\n"+ - "num: 2\tlower_bound: 2\tupper_bound: 2\trepeats: 1\n"+ - "num: 4\tlower_bound: 3\tupper_bound: 9223372036854775807\trepeats: 0") - + "num: 1 lower_bound: 1 upper_bound: 1 repeats: 1\n"+ + "num: 1 lower_bound: 2 upper_bound: 2 repeats: 1\n"+ + "num: 2 lower_bound: 3 upper_bound: 9223372036854775807 repeats: 0") sc := &stmtctx.StatementContext{TimeZone: time.Local} low, err := codec.EncodeKey(sc, nil, types.NewIntDatum(5)) c.Assert(err, IsNil) @@ -721,9 +722,85 @@ func (s *testStatsUpdateSuite) TestUpdateStatsByLocalFeedback(c *C) { c.Assert(tbl.Indices[tblInfo.Indices[0].ID].CMSketch.QueryBytes(low), Equals, uint32(2)) c.Assert(tbl.Indices[tblInfo.Indices[0].ID].ToString(1), Equals, "index:1 ndv:2\n"+ - "num: 2\tlower_bound: \tupper_bound: 2\trepeats: 0\n"+ - "num: 4\tlower_bound: 3\tupper_bound: 6\trepeats: 0") + "num: 2 lower_bound: -inf upper_bound: 2 repeats: 0\n"+ + "num: 2 lower_bound: 3 upper_bound: 6 repeats: 0") // Test that it won't cause panic after update. testKit.MustQuery("select * from t use index(idx) where b > 0") } + +type logHook struct { + results string +} + +func (hook *logHook) Levels() []log.Level { + return []log.Level{log.DebugLevel} +} + +func (hook *logHook) Fire(entry *log.Entry) error { + message := entry.Message + if idx := strings.Index(message, "[stats"); idx != -1 { + hook.results = hook.results + message[idx:] + } + return nil +} + +func (s *testStatsUpdateSuite) TestLogDetailedInfo(c *C) { + defer cleanEnv(c, s.store, s.do) + + oriProbability := statistics.FeedbackProbability + oriMinLogCount := statistics.MinLogScanCount + oriMinError := statistics.MinLogErrorRate + oriLevel := log.GetLevel() + oriBucketNum := executor.GetMaxBucketSizeForTest() + defer func() { + statistics.FeedbackProbability = oriProbability + statistics.MinLogScanCount = oriMinLogCount + statistics.MinLogErrorRate = oriMinError + executor.SetMaxBucketSizeForTest(oriBucketNum) + log.SetLevel(oriLevel) + }() + executor.SetMaxBucketSizeForTest(4) + statistics.FeedbackProbability = 1 + statistics.MinLogScanCount = 0 + statistics.MinLogErrorRate = 0 + + testKit := testkit.NewTestKit(c, s.store) + testKit.MustExec("use test") + testKit.MustExec("create table t (a bigint(64), b bigint(64), primary key(a), index idx(b), index idx_ba(b,a))") + for i := 0; i < 20; i++ { + testKit.MustExec(fmt.Sprintf("insert into t values (%d, %d)", i, i)) + } + testKit.MustExec("analyze table t") + tests := []struct { + sql string + result string + }{ + { + sql: "select * from t where t.a <= 15", + result: "[stats-feedback] test.t, column: a, range: [-inf,7), actual: 8, expected: 7, buckets: {num: 8 lower_bound: 0 upper_bound: 7 repeats: 1}" + + "[stats-feedback] test.t, column: a, range: [8,15), actual: 8, expected: 7, buckets: {num: 8 lower_bound: 8 upper_bound: 15 repeats: 1}", + }, + { + sql: "select * from t use index(idx) where t.b <= 15", + result: "[stats-feedback] test.t, index: idx, range: [-inf,7), actual: 8, expected: 7, histogram: {num: 8 lower_bound: 0 upper_bound: 7 repeats: 1}" + + "[stats-feedback] test.t, index: idx, range: [8,15), actual: 8, expected: 7, histogram: {num: 8 lower_bound: 8 upper_bound: 15 repeats: 1}", + }, + { + sql: "select b from t use index(idx_ba) where b = 1 and a <= 5", + result: "[stats-feedback] test.t, index: idx_ba, actual: 1, equality: 1, expected equality: 1, range: [-inf,6], actual: -1, expected: 6, buckets: {num: 8 lower_bound: 0 upper_bound: 7 repeats: 1}", + }, + { + sql: "select b from t use index(idx_ba) where b = 1", + result: "[stats-feedback] test.t, index: idx_ba, value: 1, actual: 1, expected: 1", + }, + } + log.SetLevel(log.DebugLevel) + var hook logHook + log.AddHook(&hook) + for _, t := range tests { + hook.results = "" + testKit.MustQuery(t.sql) + c.Assert(hook.results, Equals, t.result) + } +} diff --git a/types/datum.go b/types/datum.go index 228e91d3dc..cf4e886a0f 100644 --- a/types/datum.go +++ b/types/datum.go @@ -1827,12 +1827,21 @@ func handleTruncateError(sc *stmtctx.StatementContext) error { } // DatumsToString converts several datums to formatted string. -func DatumsToString(datums []Datum, handleNULL bool) (string, error) { +func DatumsToString(datums []Datum, handleSpecialValue bool) (string, error) { var strs []string for _, datum := range datums { - if datum.Kind() == KindNull && handleNULL { - strs = append(strs, "NULL") - continue + if handleSpecialValue { + switch datum.Kind() { + case KindNull: + strs = append(strs, "NULL") + continue + case KindMinNotNull: + strs = append(strs, "-inf") + continue + case KindMaxValue: + strs = append(strs, "+inf") + continue + } } str, err := datum.ToString() if err != nil { diff --git a/util/codec/codec.go b/util/codec/codec.go index 57b462f3e3..1af3817445 100644 --- a/util/codec/codec.go +++ b/util/codec/codec.go @@ -311,6 +311,43 @@ func Decode(b []byte, size int) ([]types.Datum, error) { return values, nil } +// DecodeRange decodes the range values from a byte slice that generated by EncodeKey. +// It handles some special values like `MinNotNull` and `MaxValueDatum`. +func DecodeRange(b []byte, size int) ([]types.Datum, error) { + if len(b) < 1 { + return nil, errors.New("invalid encoded key: length of key is zero") + } + + var ( + err error + values = make([]types.Datum, 0, size) + ) + + for len(b) > 1 { + var d types.Datum + b, d, err = DecodeOne(b) + if err != nil { + return nil, errors.Trace(err) + } + values = append(values, d) + } + + if len(b) == 1 { + switch b[0] { + case NilFlag: + values = append(values, types.Datum{}) + case bytesFlag: + values = append(values, types.MinNotNullDatum()) + // `maxFlag + 1` for PrefixNext + case maxFlag, maxFlag + 1: + values = append(values, types.MaxValueDatum()) + default: + return nil, errors.Errorf("invalid encoded key flag %v", b[0]) + } + } + return values, nil +} + // DecodeOne decodes on datum from a byte slice generated with EncodeKey or EncodeValue. func DecodeOne(b []byte) (remain []byte, d types.Datum, err error) { if len(b) < 1 {