planner: create variables for recognizing over and underestimation risk (#62910)
ref pingcap/tidb#59333
This commit is contained in:
@ -170,7 +170,7 @@ func crossEstimateRowCount(sctx planctx.PlanContext,
|
||||
if idxExists && len(idxIDs) > 0 {
|
||||
idxID = idxIDs[0]
|
||||
}
|
||||
rangeCounts, _, ok := getColumnRangeCounts(sctx, colUniqueID, ranges, dsTableStats.HistColl, idxID)
|
||||
rangeCounts, _, _, ok := getColumnRangeCounts(sctx, colUniqueID, ranges, dsTableStats.HistColl, idxID)
|
||||
if !ok {
|
||||
return 0, false, corr
|
||||
}
|
||||
@ -180,7 +180,7 @@ func crossEstimateRowCount(sctx planctx.PlanContext,
|
||||
}
|
||||
var rangeCount float64
|
||||
if idxExists {
|
||||
rangeCount, _, err = GetRowCountByIndexRanges(sctx, dsTableStats.HistColl, idxID, convertedRanges, nil)
|
||||
rangeCount, _, _, err = GetRowCountByIndexRanges(sctx, dsTableStats.HistColl, idxID, convertedRanges, nil)
|
||||
} else {
|
||||
rangeCount, err = GetRowCountByColumnRanges(sctx, dsTableStats.HistColl, colUniqueID, convertedRanges)
|
||||
}
|
||||
@ -196,30 +196,30 @@ func crossEstimateRowCount(sctx planctx.PlanContext,
|
||||
}
|
||||
|
||||
// getColumnRangeCounts estimates row count for each range respectively.
|
||||
func getColumnRangeCounts(sctx planctx.PlanContext, colID int64, ranges []*ranger.Range, histColl *statistics.HistColl, idxID int64) ([]float64, float64, bool) {
|
||||
func getColumnRangeCounts(sctx planctx.PlanContext, colID int64, ranges []*ranger.Range, histColl *statistics.HistColl, idxID int64) (rangeCounts []float64, minCount float64, maxCount float64, ok bool) {
|
||||
var err error
|
||||
var count, corrCount float64
|
||||
rangeCounts := make([]float64, len(ranges))
|
||||
var count float64
|
||||
rangeCounts = make([]float64, len(ranges))
|
||||
for i, ran := range ranges {
|
||||
if idxID >= 0 {
|
||||
idxHist := histColl.GetIdx(idxID)
|
||||
if statistics.IndexStatsIsInvalid(sctx, idxHist, histColl, idxID) {
|
||||
return nil, 0, false
|
||||
return nil, 0, 0, false
|
||||
}
|
||||
count, corrCount, err = GetRowCountByIndexRanges(sctx, histColl, idxID, []*ranger.Range{ran}, nil)
|
||||
count, minCount, maxCount, err = GetRowCountByIndexRanges(sctx, histColl, idxID, []*ranger.Range{ran}, nil)
|
||||
} else {
|
||||
colHist := histColl.GetCol(colID)
|
||||
if statistics.ColumnStatsIsInvalid(colHist, sctx, histColl, colID) {
|
||||
return nil, 0, false
|
||||
return nil, 0, 0, false
|
||||
}
|
||||
count, err = GetRowCountByColumnRanges(sctx, histColl, colID, []*ranger.Range{ran})
|
||||
}
|
||||
if err != nil {
|
||||
return nil, 0, false
|
||||
return nil, 0, 0, false
|
||||
}
|
||||
rangeCounts[i] = count
|
||||
}
|
||||
return rangeCounts, corrCount, true
|
||||
return rangeCounts, minCount, maxCount, true
|
||||
}
|
||||
|
||||
// convertRangeFromExpectedCnt builds new ranges used to estimate row count we need to scan in table scan before finding specified
|
||||
|
||||
@ -412,7 +412,7 @@ func ColumnEqualRowCount(sctx planctx.PlanContext, t *statistics.Table, value ty
|
||||
|
||||
// getPseudoRowCountWithPartialStats calculates the row count if there are no statistics on the index, but there are column stats available.
|
||||
func getPseudoRowCountWithPartialStats(sctx planctx.PlanContext, coll *statistics.HistColl, indexRanges []*ranger.Range,
|
||||
tableRowCount float64, idxCols []*expression.Column) (totalCount float64, corrCount float64, err error) {
|
||||
tableRowCount float64, idxCols []*expression.Column) (totalCount float64, maxCount float64, err error) {
|
||||
if tableRowCount == 0 {
|
||||
return 0, 0, nil
|
||||
}
|
||||
@ -433,7 +433,7 @@ func getPseudoRowCountWithPartialStats(sctx planctx.PlanContext, coll *statistic
|
||||
colID int64
|
||||
)
|
||||
totalCount = float64(0)
|
||||
corrCount = float64(0)
|
||||
maxCount = float64(0)
|
||||
for _, indexRange := range indexRanges {
|
||||
selectivity := float64(1.0)
|
||||
corrSelectivity := float64(1.0)
|
||||
@ -456,8 +456,8 @@ func getPseudoRowCountWithPartialStats(sctx planctx.PlanContext, coll *statistic
|
||||
corrSelectivity = min(corrSelectivity, tempSelectivity)
|
||||
}
|
||||
totalCount += selectivity * tableRowCount
|
||||
corrCount += corrSelectivity * tableRowCount
|
||||
maxCount += corrSelectivity * tableRowCount
|
||||
}
|
||||
totalCount = mathutil.Clamp(totalCount, 1, tableRowCount)
|
||||
return totalCount, corrCount, nil
|
||||
return totalCount, maxCount, nil
|
||||
}
|
||||
|
||||
@ -41,7 +41,7 @@ import (
|
||||
|
||||
// GetRowCountByIndexRanges estimates the row count by a slice of Range.
|
||||
// idxCols used when index statistics are invalid, because coll may not have index info, can be nil whenever index statistics are valid.
|
||||
func GetRowCountByIndexRanges(sctx planctx.PlanContext, coll *statistics.HistColl, idxID int64, indexRanges []*ranger.Range, idxCols []*expression.Column) (result float64, corrResult float64, err error) {
|
||||
func GetRowCountByIndexRanges(sctx planctx.PlanContext, coll *statistics.HistColl, idxID int64, indexRanges []*ranger.Range, idxCols []*expression.Column) (result float64, minResult float64, maxResult float64, err error) {
|
||||
var name string
|
||||
if sctx.GetSessionVars().StmtCtx.EnableOptimizerDebugTrace {
|
||||
debugtrace.EnterContextCommon(sctx)
|
||||
@ -63,10 +63,9 @@ func GetRowCountByIndexRanges(sctx planctx.PlanContext, coll *statistics.HistCol
|
||||
}
|
||||
}
|
||||
recordUsedItemStatsStatus(sctx, idx, coll.PhysicalID, idxID)
|
||||
corrResult = float64(0)
|
||||
if statistics.IndexStatsIsInvalid(sctx, idx, coll, idxID) {
|
||||
if hasColumnStats(sctx, coll, idxCols) {
|
||||
result, corrResult, err = getPseudoRowCountWithPartialStats(sctx, coll, indexRanges, float64(coll.RealtimeCount), idxCols)
|
||||
result, maxResult, err = getPseudoRowCountWithPartialStats(sctx, coll, indexRanges, float64(coll.RealtimeCount), idxCols)
|
||||
} else {
|
||||
colsLen := -1
|
||||
if idx != nil && idx.Info.Unique {
|
||||
@ -77,7 +76,7 @@ func GetRowCountByIndexRanges(sctx planctx.PlanContext, coll *statistics.HistCol
|
||||
ceTraceRange(sctx, coll.PhysicalID, colNames, indexRanges, "Index Stats-Pseudo", uint64(result))
|
||||
}
|
||||
}
|
||||
return result, corrResult, err
|
||||
return result, minResult, maxResult, err
|
||||
}
|
||||
realtimeCnt, modifyCount := coll.GetScaledRealtimeAndModifyCnt(idx)
|
||||
if sctx.GetSessionVars().StmtCtx.EnableOptimizerDebugTrace {
|
||||
@ -90,12 +89,12 @@ func GetRowCountByIndexRanges(sctx planctx.PlanContext, coll *statistics.HistCol
|
||||
if idx.CMSketch != nil && idx.StatsVer == statistics.Version1 {
|
||||
result, err = getIndexRowCountForStatsV1(sctx, coll, idxID, indexRanges)
|
||||
} else {
|
||||
result, corrResult, err = getIndexRowCountForStatsV2(sctx, idx, coll, indexRanges, realtimeCnt, modifyCount)
|
||||
result, minResult, maxResult, err = getIndexRowCountForStatsV2(sctx, idx, coll, indexRanges, realtimeCnt, modifyCount)
|
||||
}
|
||||
if sc.EnableOptimizerCETrace {
|
||||
ceTraceRange(sctx, coll.PhysicalID, colNames, indexRanges, "Index Stats", uint64(result))
|
||||
}
|
||||
return result, corrResult, errors.Trace(err)
|
||||
return result, minResult, maxResult, errors.Trace(err)
|
||||
}
|
||||
|
||||
func getIndexRowCountForStatsV1(sctx planctx.PlanContext, coll *statistics.HistColl, idxID int64, indexRanges []*ranger.Range) (float64, error) {
|
||||
@ -125,7 +124,7 @@ func getIndexRowCountForStatsV1(sctx planctx.PlanContext, coll *statistics.HistC
|
||||
// values in this case.
|
||||
if rangePosition == 0 || isSingleColIdxNullRange(idx, ran) {
|
||||
realtimeCnt, modifyCount := coll.GetScaledRealtimeAndModifyCnt(idx)
|
||||
count, _, err := getIndexRowCountForStatsV2(sctx, idx, nil, []*ranger.Range{ran}, realtimeCnt, modifyCount)
|
||||
count, _, _, err := getIndexRowCountForStatsV2(sctx, idx, nil, []*ranger.Range{ran}, realtimeCnt, modifyCount)
|
||||
if err != nil {
|
||||
return 0, errors.Trace(err)
|
||||
}
|
||||
@ -189,7 +188,7 @@ func getIndexRowCountForStatsV1(sctx planctx.PlanContext, coll *statistics.HistC
|
||||
// prefer index stats over column stats
|
||||
if idxIDs, ok := coll.ColUniqueID2IdxIDs[colUniqueID]; ok && len(idxIDs) > 0 {
|
||||
idxID := idxIDs[0]
|
||||
count, _, err = GetRowCountByIndexRanges(sctx, coll, idxID, []*ranger.Range{&rang}, nil)
|
||||
count, _, _, err = GetRowCountByIndexRanges(sctx, coll, idxID, []*ranger.Range{&rang}, nil)
|
||||
} else {
|
||||
count, err = GetRowCountByColumnRanges(sctx, coll, colUniqueID, []*ranger.Range{&rang})
|
||||
}
|
||||
@ -223,7 +222,7 @@ func isSingleColIdxNullRange(idx *statistics.Index, ran *ranger.Range) bool {
|
||||
}
|
||||
|
||||
// It uses the modifyCount to validate, and realtimeRowCount to adjust the influence of modifications on the table.
|
||||
func getIndexRowCountForStatsV2(sctx planctx.PlanContext, idx *statistics.Index, coll *statistics.HistColl, indexRanges []*ranger.Range, realtimeRowCount, modifyCount int64) (totalCount float64, corrCount float64, err error) {
|
||||
func getIndexRowCountForStatsV2(sctx planctx.PlanContext, idx *statistics.Index, coll *statistics.HistColl, indexRanges []*ranger.Range, realtimeRowCount, modifyCount int64) (totalCount, minCount, maxCount float64, err error) {
|
||||
sc := sctx.GetSessionVars().StmtCtx
|
||||
debugTrace := sc.EnableOptimizerDebugTrace
|
||||
if debugTrace {
|
||||
@ -237,12 +236,12 @@ func getIndexRowCountForStatsV2(sctx planctx.PlanContext, idx *statistics.Index,
|
||||
lb, err = codec.EncodeKey(sc.TimeZone(), nil, indexRange.LowVal...)
|
||||
err = sc.HandleError(err)
|
||||
if err != nil {
|
||||
return 0, 0, err
|
||||
return 0, 0, 0, err
|
||||
}
|
||||
rb, err = codec.EncodeKey(sc.TimeZone(), nil, indexRange.HighVal...)
|
||||
err = sc.HandleError(err)
|
||||
if err != nil {
|
||||
return 0, 0, err
|
||||
return 0, 0, 0, err
|
||||
}
|
||||
if debugTrace {
|
||||
debugTraceStartEstimateRange(sctx, indexRange, lb, rb, totalCount)
|
||||
@ -301,14 +300,15 @@ func getIndexRowCountForStatsV2(sctx planctx.PlanContext, idx *statistics.Index,
|
||||
// Due to the limitation of calcFraction and convertDatumToScalar, the histogram actually won't estimate anything.
|
||||
// If the first column's range is point.
|
||||
if rangePosition := getOrdinalOfRangeCond(sc, indexRange); rangePosition > 0 && idx.StatsVer >= statistics.Version2 && coll != nil {
|
||||
var expBackoffSel, corrSel float64
|
||||
expBackoffSel, corrSel, expBackoffSuccess, err = expBackoffEstimation(sctx, idx, coll, indexRange)
|
||||
var expBackoffSel, minSel, maxSel float64
|
||||
expBackoffSel, minSel, maxSel, expBackoffSuccess, err = expBackoffEstimation(sctx, idx, coll, indexRange)
|
||||
if err != nil {
|
||||
return 0, 0, err
|
||||
return 0, 0, 0, err
|
||||
}
|
||||
if expBackoffSuccess {
|
||||
expBackoffCnt := expBackoffSel * idx.TotalRowCount()
|
||||
corrCnt := corrSel * idx.TotalRowCount()
|
||||
minCnt := minSel * idx.TotalRowCount()
|
||||
maxCnt := maxSel * idx.TotalRowCount()
|
||||
|
||||
upperLimit := expBackoffCnt
|
||||
// Use the multi-column stats to calculate the max possible row count of [l, r)
|
||||
@ -335,7 +335,8 @@ func getIndexRowCountForStatsV2(sctx planctx.PlanContext, idx *statistics.Index,
|
||||
expBackoffCnt = upperLimit
|
||||
}
|
||||
count += expBackoffCnt
|
||||
corrCount += corrCnt
|
||||
minCount += minCnt
|
||||
maxCount += maxCnt
|
||||
}
|
||||
}
|
||||
if !expBackoffSuccess {
|
||||
@ -345,7 +346,8 @@ func getIndexRowCountForStatsV2(sctx planctx.PlanContext, idx *statistics.Index,
|
||||
// If the current table row count has changed, we should scale the row count accordingly.
|
||||
increaseFactor := idx.GetIncreaseFactor(realtimeRowCount)
|
||||
count *= increaseFactor
|
||||
corrCount *= increaseFactor
|
||||
minCount *= increaseFactor
|
||||
maxCount *= increaseFactor
|
||||
|
||||
// handling the out-of-range part
|
||||
if (outOfRangeOnIndex(idx, l) && !(isSingleColIdx && lowIsNull)) || outOfRangeOnIndex(idx, r) {
|
||||
@ -387,7 +389,7 @@ func getIndexRowCountForStatsV2(sctx planctx.PlanContext, idx *statistics.Index,
|
||||
// Don't allow the final result to go below 1 row
|
||||
totalCount = mathutil.Clamp(totalCount, 1, float64(realtimeRowCount))
|
||||
}
|
||||
return totalCount, corrCount, nil
|
||||
return totalCount, minCount, maxCount, nil
|
||||
}
|
||||
|
||||
var nullKeyBytes, _ = codec.EncodeKey(time.UTC, nil, types.NewDatum(nil))
|
||||
@ -476,7 +478,7 @@ func equalRowCountOnIndex(sctx planctx.PlanContext, idx *statistics.Index, b []b
|
||||
}
|
||||
|
||||
// expBackoffEstimation estimate the multi-col cases following the Exponential Backoff. See comment below for details.
|
||||
func expBackoffEstimation(sctx planctx.PlanContext, idx *statistics.Index, coll *statistics.HistColl, indexRange *ranger.Range) (sel float64, corrSel float64, success bool, err error) {
|
||||
func expBackoffEstimation(sctx planctx.PlanContext, idx *statistics.Index, coll *statistics.HistColl, indexRange *ranger.Range) (sel float64, minSel float64, maxSel float64, success bool, err error) {
|
||||
if sctx.GetSessionVars().StmtCtx.EnableOptimizerDebugTrace {
|
||||
debugtrace.EnterContextCommon(sctx)
|
||||
defer func() {
|
||||
@ -497,6 +499,7 @@ func expBackoffEstimation(sctx planctx.PlanContext, idx *statistics.Index, coll
|
||||
}
|
||||
colsIDs := coll.Idx2ColUniqueIDs[idx.Histogram.ID]
|
||||
singleColumnEstResults := make([]float64, 0, len(indexRange.LowVal))
|
||||
minSel = float64(1)
|
||||
// The following codes uses Exponential Backoff to reduce the impact of independent assumption. It works like:
|
||||
// 1. Calc the selectivity of each column.
|
||||
// 2. Sort them and choose the first 4 most selective filter and the corresponding selectivity is sel_1, sel_2, sel_3, sel_4 where i < j => sel_i < sel_j.
|
||||
@ -532,7 +535,7 @@ func expBackoffEstimation(sctx planctx.PlanContext, idx *statistics.Index, coll
|
||||
continue
|
||||
}
|
||||
foundStats = true
|
||||
count, _, err = GetRowCountByIndexRanges(sctx, coll, idxID, tmpRan, nil)
|
||||
count, _, _, err = GetRowCountByIndexRanges(sctx, coll, idxID, tmpRan, nil)
|
||||
if err == nil {
|
||||
break
|
||||
}
|
||||
@ -544,9 +547,10 @@ func expBackoffEstimation(sctx planctx.PlanContext, idx *statistics.Index, coll
|
||||
continue
|
||||
}
|
||||
if err != nil {
|
||||
return 0, 0, false, err
|
||||
return 0, 0, 0, false, err
|
||||
}
|
||||
singleColumnEstResults = append(singleColumnEstResults, selectivity)
|
||||
minSel *= selectivity
|
||||
}
|
||||
// Sort them.
|
||||
slices.Sort(singleColumnEstResults)
|
||||
@ -556,9 +560,9 @@ func expBackoffEstimation(sctx planctx.PlanContext, idx *statistics.Index, coll
|
||||
l = 0
|
||||
})
|
||||
if l == 1 {
|
||||
return singleColumnEstResults[0], singleColumnEstResults[0], true, nil
|
||||
return singleColumnEstResults[0], singleColumnEstResults[0], singleColumnEstResults[0], true, nil
|
||||
} else if l == 0 {
|
||||
return 0, 0, false, nil
|
||||
return 0, 0, 0, false, nil
|
||||
}
|
||||
// Do not allow the exponential backoff to go below the available index bound. If the number of predicates
|
||||
// is less than the number of index columns - use 90% of the bound to differentiate a subset from full index match.
|
||||
@ -571,21 +575,23 @@ func expBackoffEstimation(sctx planctx.PlanContext, idx *statistics.Index, coll
|
||||
if l < len(idx.Info.Columns) {
|
||||
idxLowBound /= 0.9
|
||||
}
|
||||
// corrSel is the selectivity of the most filtering column
|
||||
corrSel = max(idxLowBound, singleColumnEstResults[0])
|
||||
// maxSel assumes correlation, so is the selectivity of the most filtering column
|
||||
maxSel = max(idxLowBound, singleColumnEstResults[0])
|
||||
// minSel assumes independence between columns, so is the product of all single column selectivities.
|
||||
minSel = max(idxLowBound, minSel)
|
||||
minTwoCol := min(singleColumnEstResults[0], singleColumnEstResults[1], idxLowBound)
|
||||
multTwoCol := singleColumnEstResults[0] * math.Sqrt(singleColumnEstResults[1])
|
||||
if l == 2 {
|
||||
return max(minTwoCol, multTwoCol), corrSel, true, nil
|
||||
return max(minTwoCol, multTwoCol), minSel, maxSel, true, nil
|
||||
}
|
||||
minThreeCol := min(minTwoCol, singleColumnEstResults[2])
|
||||
multThreeCol := multTwoCol * math.Sqrt(math.Sqrt(singleColumnEstResults[2]))
|
||||
if l == 3 {
|
||||
return max(minThreeCol, multThreeCol), corrSel, true, nil
|
||||
return max(minThreeCol, multThreeCol), minSel, maxSel, true, nil
|
||||
}
|
||||
minFourCol := min(minThreeCol, singleColumnEstResults[3])
|
||||
multFourCol := multThreeCol * math.Sqrt(math.Sqrt(math.Sqrt(singleColumnEstResults[3])))
|
||||
return max(minFourCol, multFourCol), corrSel, true, nil
|
||||
return max(minFourCol, multFourCol), minSel, maxSel, true, nil
|
||||
}
|
||||
|
||||
// outOfRangeOnIndex checks if the datum is out of the range.
|
||||
|
||||
@ -203,12 +203,13 @@ func Selectivity(
|
||||
if err != nil {
|
||||
return 0, nil, errors.Trace(err)
|
||||
}
|
||||
cnt, corrCnt, err := GetRowCountByIndexRanges(ctx, coll, id, ranges, nil)
|
||||
cnt, minCnt, maxCnt, err := GetRowCountByIndexRanges(ctx, coll, id, ranges, nil)
|
||||
if err != nil {
|
||||
return 0, nil, errors.Trace(err)
|
||||
}
|
||||
selectivity := cnt / float64(coll.RealtimeCount)
|
||||
corrSelectivity := corrCnt / float64(coll.RealtimeCount)
|
||||
minSelectivity := minCnt / float64(coll.RealtimeCount)
|
||||
maxSelectivity := maxCnt / float64(coll.RealtimeCount)
|
||||
nodes = append(nodes, &StatsNode{
|
||||
Tp: IndexType,
|
||||
ID: id,
|
||||
@ -216,7 +217,8 @@ func Selectivity(
|
||||
Ranges: ranges,
|
||||
numCols: len(idxStats.Info.Columns),
|
||||
Selectivity: selectivity,
|
||||
CorrSelectivity: corrSelectivity,
|
||||
MinSelectivity: minSelectivity,
|
||||
MaxSelectivity: maxSelectivity,
|
||||
partCover: partCover,
|
||||
minAccessCondsForDNFCond: minAccessCondsForDNFCond,
|
||||
})
|
||||
@ -553,10 +555,12 @@ type StatsNode struct {
|
||||
mask int64
|
||||
// Selectivity indicates the Selectivity of this column/index.
|
||||
Selectivity float64
|
||||
// CorrSelectivity indicates the Selectivity of this column/index with correlated column.
|
||||
// That is - it is the selectivity assuming the most filtering index column only, and all other
|
||||
// columns are correlated with this column.
|
||||
CorrSelectivity float64
|
||||
// MinSelectivity indicates the Selectivity of this column/index for the least rows that can qualify.
|
||||
// It takes into account situations that would decrease the row count, such as fully independent columns.
|
||||
MinSelectivity float64
|
||||
// MaxSelectivity indicates the Selectivity of this column/index for the most rows that can qualify.
|
||||
// It takes into account situations that would increase the row count, such as correlated columns.
|
||||
MaxSelectivity float64
|
||||
// numCols is the number of columns contained in the index or column(which is always 1).
|
||||
numCols int
|
||||
// partCover indicates whether the bit in the mask is for a full cover or partial cover. It is only true
|
||||
|
||||
@ -253,11 +253,11 @@ func TestEstimationForUnknownValues(t *testing.T) {
|
||||
require.Equal(t, 4.7, count)
|
||||
|
||||
idxID := table.Meta().Indices[0].ID
|
||||
count, _, err = cardinality.GetRowCountByIndexRanges(sctx, &statsTbl.HistColl, idxID, getRange(30, 30), nil)
|
||||
count, _, _, err = cardinality.GetRowCountByIndexRanges(sctx, &statsTbl.HistColl, idxID, getRange(30, 30), nil)
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, 0.1, count)
|
||||
|
||||
count, _, err = cardinality.GetRowCountByIndexRanges(sctx, &statsTbl.HistColl, idxID, getRange(9, 30), nil)
|
||||
count, _, _, err = cardinality.GetRowCountByIndexRanges(sctx, &statsTbl.HistColl, idxID, getRange(9, 30), nil)
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, 4.5, count)
|
||||
|
||||
@ -287,7 +287,7 @@ func TestEstimationForUnknownValues(t *testing.T) {
|
||||
require.Equal(t, 1.0, count)
|
||||
|
||||
idxID = table.Meta().Indices[0].ID
|
||||
count, _, err = cardinality.GetRowCountByIndexRanges(sctx, &statsTbl.HistColl, idxID, getRange(2, 2), nil)
|
||||
count, _, _, err = cardinality.GetRowCountByIndexRanges(sctx, &statsTbl.HistColl, idxID, getRange(2, 2), nil)
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, 0.0, count)
|
||||
}
|
||||
@ -440,11 +440,11 @@ func TestEstimationUniqueKeyEqualConds(t *testing.T) {
|
||||
|
||||
sctx := mock.NewContext()
|
||||
idxID := table.Meta().Indices[0].ID
|
||||
count, _, err := cardinality.GetRowCountByIndexRanges(sctx, &statsTbl.HistColl, idxID, getRange(7, 7), nil)
|
||||
count, _, _, err := cardinality.GetRowCountByIndexRanges(sctx, &statsTbl.HistColl, idxID, getRange(7, 7), nil)
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, 1.0, count)
|
||||
|
||||
count, _, err = cardinality.GetRowCountByIndexRanges(sctx, &statsTbl.HistColl, idxID, getRange(6, 6), nil)
|
||||
count, _, _, err = cardinality.GetRowCountByIndexRanges(sctx, &statsTbl.HistColl, idxID, getRange(6, 6), nil)
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, 1.0, count)
|
||||
|
||||
@ -1075,12 +1075,12 @@ func TestIssue39593(t *testing.T) {
|
||||
sctx := testKit.Session()
|
||||
idxID := tblInfo.Indices[0].ID
|
||||
vals := []int64{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20}
|
||||
count, _, err := cardinality.GetRowCountByIndexRanges(sctx.GetPlanCtx(), &statsTbl.HistColl, idxID, getRanges(vals, vals), nil)
|
||||
count, _, _, err := cardinality.GetRowCountByIndexRanges(sctx.GetPlanCtx(), &statsTbl.HistColl, idxID, getRanges(vals, vals), nil)
|
||||
require.NoError(t, err)
|
||||
// estimated row count without any changes, use range to reduce test flakiness
|
||||
require.InDelta(t, float64(462.6), count, float64(1))
|
||||
statsTbl.RealtimeCount *= 10
|
||||
count, _, err = cardinality.GetRowCountByIndexRanges(sctx.GetPlanCtx(), &statsTbl.HistColl, idxID, getRanges(vals, vals), nil)
|
||||
count, _, _, err = cardinality.GetRowCountByIndexRanges(sctx.GetPlanCtx(), &statsTbl.HistColl, idxID, getRanges(vals, vals), nil)
|
||||
require.NoError(t, err)
|
||||
// estimated row count after mock modify on the table, use range to reduce test flakiness
|
||||
require.InDelta(t, float64(3702.6), count, float64(1))
|
||||
@ -1569,15 +1569,15 @@ func TestRiskEqSkewRatio(t *testing.T) {
|
||||
// Search for the value "6" which will not be found in the histogram buckets, and since
|
||||
// there are NO topN values - the value will be considered skewed based upon skew ratio.
|
||||
testKit.MustExec("set @@session.tidb_opt_risk_eq_skew_ratio = 0")
|
||||
count, _, err := cardinality.GetRowCountByIndexRanges(sctx.GetPlanCtx(), &statsTbl.HistColl, idxID, getRange(6, 6), nil)
|
||||
count, _, _, err := cardinality.GetRowCountByIndexRanges(sctx.GetPlanCtx(), &statsTbl.HistColl, idxID, getRange(6, 6), nil)
|
||||
require.NoError(t, err)
|
||||
testKit.MustExec("set @@session.tidb_opt_risk_eq_skew_ratio = 0.5")
|
||||
count2, _, err2 := cardinality.GetRowCountByIndexRanges(sctx.GetPlanCtx(), &statsTbl.HistColl, idxID, getRange(6, 6), nil)
|
||||
count2, _, _, err2 := cardinality.GetRowCountByIndexRanges(sctx.GetPlanCtx(), &statsTbl.HistColl, idxID, getRange(6, 6), nil)
|
||||
require.NoError(t, err2)
|
||||
// Result of count2 should be larger than count because the risk ratio is higher
|
||||
require.Less(t, count, count2)
|
||||
testKit.MustExec("set @@session.tidb_opt_risk_eq_skew_ratio = 1")
|
||||
count3, _, err3 := cardinality.GetRowCountByIndexRanges(sctx.GetPlanCtx(), &statsTbl.HistColl, idxID, getRange(6, 6), nil)
|
||||
count3, _, _, err3 := cardinality.GetRowCountByIndexRanges(sctx.GetPlanCtx(), &statsTbl.HistColl, idxID, getRange(6, 6), nil)
|
||||
require.NoError(t, err3)
|
||||
// Result of count3 should be larger because the risk ratio is higher
|
||||
require.Less(t, count2, count3)
|
||||
@ -1589,27 +1589,27 @@ func TestRiskEqSkewRatio(t *testing.T) {
|
||||
require.NoError(t, h.DumpStatsDeltaToKV(true))
|
||||
// Rerun tests with 1 value in the TopN
|
||||
statsTbl = h.GetTableStats(tb.Meta())
|
||||
count, _, err = cardinality.GetRowCountByIndexRanges(sctx.GetPlanCtx(), &statsTbl.HistColl, idxID, getRange(6, 6), nil)
|
||||
count, _, _, err = cardinality.GetRowCountByIndexRanges(sctx.GetPlanCtx(), &statsTbl.HistColl, idxID, getRange(6, 6), nil)
|
||||
require.NoError(t, err)
|
||||
testKit.MustExec("set @@session.tidb_opt_risk_eq_skew_ratio = 0.5")
|
||||
count2, _, err2 = cardinality.GetRowCountByIndexRanges(sctx.GetPlanCtx(), &statsTbl.HistColl, idxID, getRange(6, 6), nil)
|
||||
count2, _, _, err2 = cardinality.GetRowCountByIndexRanges(sctx.GetPlanCtx(), &statsTbl.HistColl, idxID, getRange(6, 6), nil)
|
||||
require.NoError(t, err2)
|
||||
// Result of count2 should be larger than count because the risk ratio is higher
|
||||
require.Less(t, count, count2)
|
||||
testKit.MustExec("set @@session.tidb_opt_risk_eq_skew_ratio = 1")
|
||||
count3, _, err3 = cardinality.GetRowCountByIndexRanges(sctx.GetPlanCtx(), &statsTbl.HistColl, idxID, getRange(6, 6), nil)
|
||||
count3, _, _, err3 = cardinality.GetRowCountByIndexRanges(sctx.GetPlanCtx(), &statsTbl.HistColl, idxID, getRange(6, 6), nil)
|
||||
require.NoError(t, err3)
|
||||
// Result of count3 should be larger than count because the risk ratio is higher
|
||||
require.Less(t, count2, count3)
|
||||
// Repeat the prior test by setting the global variable instead of the session variable. This should have no effect.
|
||||
testKit.MustExec("set @@global.tidb_opt_risk_eq_skew_ratio = 0.5")
|
||||
count4, _, err4 := cardinality.GetRowCountByIndexRanges(sctx.GetPlanCtx(), &statsTbl.HistColl, idxID, getRange(6, 6), nil)
|
||||
count4, _, _, err4 := cardinality.GetRowCountByIndexRanges(sctx.GetPlanCtx(), &statsTbl.HistColl, idxID, getRange(6, 6), nil)
|
||||
require.NoError(t, err4)
|
||||
require.Less(t, count2, count4)
|
||||
// Repeat the prior test by setting the session variable to the default. Count4 should inherit the global
|
||||
// variable and be less than count3.
|
||||
testKit.MustExec("set @@session.tidb_opt_risk_eq_skew_ratio = default")
|
||||
count4, _, err4 = cardinality.GetRowCountByIndexRanges(sctx.GetPlanCtx(), &statsTbl.HistColl, idxID, getRange(6, 6), nil)
|
||||
count4, _, _, err4 = cardinality.GetRowCountByIndexRanges(sctx.GetPlanCtx(), &statsTbl.HistColl, idxID, getRange(6, 6), nil)
|
||||
require.NoError(t, err4)
|
||||
require.Less(t, count4, count3)
|
||||
// Reset global variable to default.
|
||||
@ -1641,27 +1641,27 @@ func TestRiskRangeSkewRatioWithinBucket(t *testing.T) {
|
||||
// Search for the range from 2 to 3, since there is only one bucket it will be a query within
|
||||
// a bucket.
|
||||
testKit.MustExec("set @@session.tidb_opt_risk_range_skew_ratio = 0")
|
||||
count, _, err := cardinality.GetRowCountByIndexRanges(sctx.GetPlanCtx(), &statsTbl.HistColl, idxID, getRange(2, 3), nil)
|
||||
count, _, _, err := cardinality.GetRowCountByIndexRanges(sctx.GetPlanCtx(), &statsTbl.HistColl, idxID, getRange(2, 3), nil)
|
||||
require.NoError(t, err)
|
||||
testKit.MustExec("set @@session.tidb_opt_risk_range_skew_ratio = 0.5")
|
||||
count2, _, err2 := cardinality.GetRowCountByIndexRanges(sctx.GetPlanCtx(), &statsTbl.HistColl, idxID, getRange(2, 3), nil)
|
||||
count2, _, _, err2 := cardinality.GetRowCountByIndexRanges(sctx.GetPlanCtx(), &statsTbl.HistColl, idxID, getRange(2, 3), nil)
|
||||
require.NoError(t, err2)
|
||||
// Result of count2 should be larger than count because the risk ratio is higher
|
||||
require.Less(t, count, count2)
|
||||
testKit.MustExec("set @@session.tidb_opt_risk_range_skew_ratio = 1")
|
||||
count3, _, err3 := cardinality.GetRowCountByIndexRanges(sctx.GetPlanCtx(), &statsTbl.HistColl, idxID, getRange(2, 3), nil)
|
||||
count3, _, _, err3 := cardinality.GetRowCountByIndexRanges(sctx.GetPlanCtx(), &statsTbl.HistColl, idxID, getRange(2, 3), nil)
|
||||
require.NoError(t, err3)
|
||||
// Result of count3 should be larger because the risk ratio is higher
|
||||
require.Less(t, count2, count3)
|
||||
// Repeat the prior test by setting the global variable instead of the session variable. This should have no effect.
|
||||
testKit.MustExec("set @@global.tidb_opt_risk_range_skew_ratio = 0.5")
|
||||
count4, _, err4 := cardinality.GetRowCountByIndexRanges(sctx.GetPlanCtx(), &statsTbl.HistColl, idxID, getRange(2, 3), nil)
|
||||
count4, _, _, err4 := cardinality.GetRowCountByIndexRanges(sctx.GetPlanCtx(), &statsTbl.HistColl, idxID, getRange(2, 3), nil)
|
||||
require.NoError(t, err4)
|
||||
require.Less(t, count2, count4)
|
||||
// Repeat the prior test by setting the session variable to the default. Count4 should inherit the global
|
||||
// variable and be less than count3.
|
||||
testKit.MustExec("set @@session.tidb_opt_risk_range_skew_ratio = default")
|
||||
count4, _, err4 = cardinality.GetRowCountByIndexRanges(sctx.GetPlanCtx(), &statsTbl.HistColl, idxID, getRange(2, 3), nil)
|
||||
count4, _, _, err4 = cardinality.GetRowCountByIndexRanges(sctx.GetPlanCtx(), &statsTbl.HistColl, idxID, getRange(2, 3), nil)
|
||||
require.NoError(t, err4)
|
||||
require.Less(t, count4, count3)
|
||||
// Reset global variable to default.
|
||||
@ -1800,11 +1800,11 @@ func TestLastBucketEndValueHeuristic(t *testing.T) {
|
||||
// Test index estimation as well
|
||||
idx := statsTbl.GetIdx(table.Meta().Indices[0].ID)
|
||||
if idx != nil {
|
||||
idxEnhancedCount, _, err := cardinality.GetRowCountByIndexRanges(sctx.GetPlanCtx(), &statsTbl.HistColl, table.Meta().Indices[0].ID, getRange(11, 11), nil)
|
||||
idxEnhancedCount, _, _, err := cardinality.GetRowCountByIndexRanges(sctx.GetPlanCtx(), &statsTbl.HistColl, table.Meta().Indices[0].ID, getRange(11, 11), nil)
|
||||
require.NoError(t, err)
|
||||
require.InDelta(t, 100.09, idxEnhancedCount, 0.1, "Index enhanced count should be approximately 100.09")
|
||||
|
||||
idxOtherCount, _, err := cardinality.GetRowCountByIndexRanges(sctx.GetPlanCtx(), &statsTbl.HistColl, table.Meta().Indices[0].ID, getRange(3, 3), nil)
|
||||
idxOtherCount, _, _, err := cardinality.GetRowCountByIndexRanges(sctx.GetPlanCtx(), &statsTbl.HistColl, table.Meta().Indices[0].ID, getRange(3, 3), nil)
|
||||
require.NoError(t, err)
|
||||
require.InDelta(t, 109.99, idxOtherCount, 0.1, "Index other count should be approximately 109.99")
|
||||
}
|
||||
|
||||
@ -227,14 +227,15 @@ func stabilizeGetStatsTblInfo(info *getStatsTblInfo) {
|
||||
*/
|
||||
|
||||
type accessPathForDebugTrace struct {
|
||||
IndexName string `json:",omitempty"`
|
||||
AccessConditions []string
|
||||
IndexFilters []string
|
||||
TableFilters []string
|
||||
PartialPaths []accessPathForDebugTrace `json:",omitempty"`
|
||||
CountAfterAccess float64
|
||||
CorrCountAfterAccess float64
|
||||
CountAfterIndex float64
|
||||
IndexName string `json:",omitempty"`
|
||||
AccessConditions []string
|
||||
IndexFilters []string
|
||||
TableFilters []string
|
||||
PartialPaths []accessPathForDebugTrace `json:",omitempty"`
|
||||
CountAfterAccess float64
|
||||
MinCountAfterAccess float64
|
||||
MaxCountAfterAccess float64
|
||||
CountAfterIndex float64
|
||||
}
|
||||
|
||||
func convertAccessPathForDebugTrace(ctx expression.EvalContext, path *util.AccessPath, out *accessPathForDebugTrace) {
|
||||
@ -245,7 +246,8 @@ func convertAccessPathForDebugTrace(ctx expression.EvalContext, path *util.Acces
|
||||
out.IndexFilters = expression.ExprsToStringsForDisplay(ctx, path.IndexFilters)
|
||||
out.TableFilters = expression.ExprsToStringsForDisplay(ctx, path.TableFilters)
|
||||
out.CountAfterAccess = path.CountAfterAccess
|
||||
out.CorrCountAfterAccess = path.CorrCountAfterAccess
|
||||
out.MaxCountAfterAccess = path.MaxCountAfterAccess
|
||||
out.MinCountAfterAccess = path.MinCountAfterAccess
|
||||
out.CountAfterIndex = path.CountAfterIndex
|
||||
out.PartialPaths = make([]accessPathForDebugTrace, len(path.PartialIndexPaths))
|
||||
for i, partialPath := range path.PartialIndexPaths {
|
||||
|
||||
@ -1513,11 +1513,12 @@ func constructDS2IndexScanTask(
|
||||
rowCount = math.Min(rowCount, 1.0)
|
||||
}
|
||||
tmpPath := &util.AccessPath{
|
||||
IndexFilters: indexConds,
|
||||
TableFilters: tblConds,
|
||||
CountAfterIndex: rowCount,
|
||||
CountAfterAccess: rowCount,
|
||||
CorrCountAfterAccess: 0,
|
||||
IndexFilters: indexConds,
|
||||
TableFilters: tblConds,
|
||||
CountAfterIndex: rowCount,
|
||||
CountAfterAccess: rowCount,
|
||||
MinCountAfterAccess: 0,
|
||||
MaxCountAfterAccess: 0,
|
||||
}
|
||||
// Assume equal conditions used by index join and other conditions are independent.
|
||||
if len(tblConds) > 0 {
|
||||
|
||||
@ -1084,24 +1084,22 @@ func compareGlobalIndex(lhs, rhs *candidatePath) int {
|
||||
return compareBool(lhs.path.Index.Global, rhs.path.Index.Global)
|
||||
}
|
||||
|
||||
func compareCorrRatio(lhs, rhs *candidatePath) (int, float64) {
|
||||
lhsCorrRatio, rhsCorrRatio := 0.0, 0.0
|
||||
// CorrCountAfterAccess tracks the "CountAfterAccess" only including the most selective index column, thus
|
||||
// lhs/rhsCorrRatio represents the "risk" of the CountAfterAccess value - lower value means less risk that
|
||||
// we do NOT know about actual correlation between indexed columns
|
||||
// TODO - corrCountAfterAccess is only currently used to compete 2 indexes - since they are the only paths
|
||||
// that potentially go through expBackOffEstimation
|
||||
if lhs.path.CorrCountAfterAccess > 0 && rhs.path.CorrCountAfterAccess > 0 {
|
||||
lhsCorrRatio = lhs.path.CorrCountAfterAccess / lhs.path.CountAfterAccess
|
||||
rhsCorrRatio = rhs.path.CorrCountAfterAccess / rhs.path.CountAfterAccess
|
||||
func compareRiskRatio(lhs, rhs *candidatePath) (int, float64) {
|
||||
lhsRiskRatio, rhsRiskRatio := 0.0, 0.0
|
||||
// MaxCountAfterAccess tracks the worst case "CountAfterAccess", accounting for scenarios that could
|
||||
// increase our row estimation, thus lhs/rhsRiskRatio represents the "risk" of the CountAfterAccess value.
|
||||
// Lower value means less risk that the actual row count is higher than the estimated one.
|
||||
if lhs.path.MaxCountAfterAccess > 0 && rhs.path.MaxCountAfterAccess > 0 {
|
||||
lhsRiskRatio = lhs.path.MaxCountAfterAccess / lhs.path.CountAfterAccess
|
||||
rhsRiskRatio = rhs.path.MaxCountAfterAccess / rhs.path.CountAfterAccess
|
||||
}
|
||||
// lhs has lower risk
|
||||
if lhsCorrRatio < rhsCorrRatio && lhs.path.CountAfterAccess < rhs.path.CountAfterAccess {
|
||||
return 1, lhsCorrRatio
|
||||
if lhsRiskRatio < rhsRiskRatio && lhs.path.CountAfterAccess < rhs.path.CountAfterAccess {
|
||||
return 1, lhsRiskRatio
|
||||
}
|
||||
// rhs has lower risk
|
||||
if rhsCorrRatio < lhsCorrRatio && rhs.path.CountAfterAccess < lhs.path.CountAfterAccess {
|
||||
return -1, rhsCorrRatio
|
||||
if rhsRiskRatio < lhsRiskRatio && rhs.path.CountAfterAccess < lhs.path.CountAfterAccess {
|
||||
return -1, rhsRiskRatio
|
||||
}
|
||||
return 0, 0
|
||||
}
|
||||
@ -1150,8 +1148,8 @@ func compareCandidates(sctx base.PlanContext, statsTbl *statistics.Table, tableI
|
||||
matchResult, globalResult := compareBool(lhs.isMatchProp, rhs.isMatchProp), compareGlobalIndex(lhs, rhs)
|
||||
accessResult, comparable1 := util.CompareCol2Len(lhs.accessCondsColMap, rhs.accessCondsColMap)
|
||||
scanResult, comparable2 := compareIndexBack(lhs, rhs)
|
||||
// TODO: corrResult is not added to sum to limit change to existing logic. Further testing required.
|
||||
corrResult, _ := compareCorrRatio(lhs, rhs)
|
||||
// TODO: riskResult is not added to sum to limit change to existing logic. Further testing required.
|
||||
riskResult, _ := compareRiskRatio(lhs, rhs)
|
||||
sum := accessResult + scanResult + matchResult + globalResult
|
||||
|
||||
// First rules apply when an index doesn't have statistics and another object (index or table) has statistics
|
||||
@ -1159,11 +1157,13 @@ func compareCandidates(sctx base.PlanContext, statsTbl *statistics.Table, tableI
|
||||
// If one index has statistics and the other does not, choose the index with statistics if it
|
||||
// has the same or higher number of equal/IN predicates.
|
||||
if !lhsPseudo && globalResult >= 0 && sum >= 0 &&
|
||||
lhs.path.EqOrInCondCount > 0 && lhs.path.EqOrInCondCount >= rhs.path.EqOrInCondCount {
|
||||
lhs.path.EqOrInCondCount > 0 && lhs.path.EqOrInCondCount >= rhs.path.EqOrInCondCount &&
|
||||
(rhs.path.MaxCountAfterAccess <= 0 || lhs.path.CountAfterAccess < rhs.path.MaxCountAfterAccess) {
|
||||
return 1, lhsPseudo // left wins and has statistics (lhsPseudo==false)
|
||||
}
|
||||
if !rhsPseudo && globalResult <= 0 && sum <= 0 &&
|
||||
rhs.path.EqOrInCondCount > 0 && rhs.path.EqOrInCondCount >= lhs.path.EqOrInCondCount {
|
||||
rhs.path.EqOrInCondCount > 0 && rhs.path.EqOrInCondCount >= lhs.path.EqOrInCondCount &&
|
||||
(lhs.path.MaxCountAfterAccess <= 0 || rhs.path.CountAfterAccess < lhs.path.MaxCountAfterAccess) {
|
||||
return -1, rhsPseudo // right wins and has statistics (rhsPseudo==false)
|
||||
}
|
||||
if preferRange {
|
||||
@ -1191,10 +1191,10 @@ func compareCandidates(sctx base.PlanContext, statsTbl *statistics.Table, tableI
|
||||
if threshold > 0 { // set it to 0 to disable this rule
|
||||
// corrResult is included to ensure we don't preference to a higher risk plan given that
|
||||
// this rule does not check the other criteria included below.
|
||||
if lhs.path.CountAfterAccess/rhs.path.CountAfterAccess > threshold && corrResult <= 0 {
|
||||
if lhs.path.CountAfterAccess/rhs.path.CountAfterAccess > threshold && riskResult <= 0 {
|
||||
return -1, rhsPseudo // right wins - also return whether it has statistics (pseudo) or not
|
||||
}
|
||||
if rhs.path.CountAfterAccess/lhs.path.CountAfterAccess > threshold && corrResult >= 0 {
|
||||
if rhs.path.CountAfterAccess/lhs.path.CountAfterAccess > threshold && riskResult >= 0 {
|
||||
return 1, lhsPseudo // left wins - also return whether it has statistics (pseudo) or not
|
||||
}
|
||||
}
|
||||
|
||||
@ -177,7 +177,8 @@ func fillIndexPath(ds *logicalop.DataSource, path *util.AccessPath, conds []expr
|
||||
}
|
||||
path.Ranges = ranger.FullRange()
|
||||
path.CountAfterAccess = float64(ds.StatisticTable.RealtimeCount)
|
||||
path.CorrCountAfterAccess = 0
|
||||
path.MinCountAfterAccess = 0
|
||||
path.MaxCountAfterAccess = 0
|
||||
path.IdxCols, path.IdxColLens = expression.IndexInfo2PrefixCols(ds.Columns, ds.Schema().Columns, path.Index)
|
||||
path.FullIdxCols, path.FullIdxColLens = expression.IndexInfo2Cols(ds.Columns, ds.Schema().Columns, path.Index)
|
||||
if !path.Index.Unique && !path.Index.Primary && len(path.Index.Columns) == len(path.IdxCols) {
|
||||
@ -415,10 +416,7 @@ func detachCondAndBuildRangeForPath(
|
||||
if len(indexCols) > len(path.Index.Columns) { // remove clustered primary key if it has been added to path.IdxCols
|
||||
indexCols = indexCols[0:len(path.Index.Columns)]
|
||||
}
|
||||
path.CountAfterAccess, path.CorrCountAfterAccess, err = cardinality.GetRowCountByIndexRanges(sctx, histColl, path.Index.ID, path.Ranges, indexCols)
|
||||
if path.CorrCountAfterAccess == 0 {
|
||||
path.CorrCountAfterAccess = path.CountAfterAccess
|
||||
}
|
||||
path.CountAfterAccess, path.MinCountAfterAccess, path.MaxCountAfterAccess, err = cardinality.GetRowCountByIndexRanges(sctx, histColl, path.Index.ID, path.Ranges, indexCols)
|
||||
return err
|
||||
}
|
||||
|
||||
|
||||
@ -42,12 +42,16 @@ type AccessPath struct {
|
||||
// CountAfterAccess is the row count after we apply range seek and before we use other filter to filter data.
|
||||
// For index merge path, CountAfterAccess is the row count after partial paths and before we apply table filters.
|
||||
CountAfterAccess float64
|
||||
// CorrCountAfterAccess is the row count after only applying the most filtering index columns.
|
||||
// against the index. This is used when we don't have a full index statistics
|
||||
// and we need to use the exponential backoff to estimate the row count.
|
||||
// Case CorrCountAfterAccess > 0 : we use the exponential backoff to estimate the row count (such as we don't have a full index statistics)
|
||||
// Default CorrCountAfterAccess = 0 : we use index of table estimate row coun directly (such as table full scan, point get etc)
|
||||
CorrCountAfterAccess float64
|
||||
// MinCountAfterAccess is a lower bound on CountAfterAccess, accounting for risks that could
|
||||
// lead to overestimation, such as assuming correlation with exponential backoff when columns are actually independent.
|
||||
// Case MinCountAfterAccess > 0 : we've encountered risky scenarios and have a potential lower row count estimation
|
||||
// Default MinCountAfterAccess = 0 : we have not identified risks that could lead to lower row count
|
||||
MinCountAfterAccess float64
|
||||
// MaxCountAfterAccess is an upper bound on the CountAfterAccess, accounting for risks that could
|
||||
// lead to underestimation, such as assuming independence between non-index columns.
|
||||
// Case MaxCountAfterAccess > 0 : we've encountered risky scenarios and have a potential greater row count estimation
|
||||
// Default MaxCountAfterAccess = 0 : we have not identified risks that could lead to greater row count
|
||||
MaxCountAfterAccess float64
|
||||
// CountAfterIndex is the row count after we apply filters on index and before we apply the table filters.
|
||||
CountAfterIndex float64
|
||||
AccessConds []expression.Expression
|
||||
@ -139,7 +143,8 @@ func (path *AccessPath) Clone() *AccessPath {
|
||||
ConstCols: slices.Clone(path.ConstCols),
|
||||
Ranges: CloneRanges(path.Ranges),
|
||||
CountAfterAccess: path.CountAfterAccess,
|
||||
CorrCountAfterAccess: path.CorrCountAfterAccess,
|
||||
MinCountAfterAccess: path.MinCountAfterAccess,
|
||||
MaxCountAfterAccess: path.MaxCountAfterAccess,
|
||||
CountAfterIndex: path.CountAfterIndex,
|
||||
AccessConds: CloneExprs(path.AccessConds),
|
||||
EqCondCount: path.EqCondCount,
|
||||
|
||||
@ -393,51 +393,51 @@ func SubTestIndexRanges() func(*testing.T) {
|
||||
HighVal: []types.Datum{types.MaxValueDatum()},
|
||||
Collators: collate.GetBinaryCollatorSlice(1),
|
||||
}}
|
||||
count, _, err := GetRowCountByIndexRanges(ctx, &tbl.HistColl, 0, ran, nil)
|
||||
count, _, _, err := GetRowCountByIndexRanges(ctx, &tbl.HistColl, 0, ran, nil)
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, 99900, int(count))
|
||||
ran[0].LowVal[0] = types.NewIntDatum(1000)
|
||||
ran[0].HighVal[0] = types.NewIntDatum(2000)
|
||||
count, _, err = GetRowCountByIndexRanges(ctx, &tbl.HistColl, 0, ran, nil)
|
||||
count, _, _, err = GetRowCountByIndexRanges(ctx, &tbl.HistColl, 0, ran, nil)
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, 2500, int(count))
|
||||
ran[0].LowVal[0] = types.NewIntDatum(1001)
|
||||
ran[0].HighVal[0] = types.NewIntDatum(1999)
|
||||
count, _, err = GetRowCountByIndexRanges(ctx, &tbl.HistColl, 0, ran, nil)
|
||||
count, _, _, err = GetRowCountByIndexRanges(ctx, &tbl.HistColl, 0, ran, nil)
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, 2500, int(count))
|
||||
ran[0].LowVal[0] = types.NewIntDatum(1000)
|
||||
ran[0].HighVal[0] = types.NewIntDatum(1000)
|
||||
count, _, err = GetRowCountByIndexRanges(ctx, &tbl.HistColl, 0, ran, nil)
|
||||
count, _, _, err = GetRowCountByIndexRanges(ctx, &tbl.HistColl, 0, ran, nil)
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, 100, int(count))
|
||||
|
||||
tbl.SetIdx(0, &Index{Info: &model.IndexInfo{Columns: []*model.IndexColumn{{Offset: 0}}, Unique: true}})
|
||||
ran[0].LowVal[0] = types.NewIntDatum(1000)
|
||||
ran[0].HighVal[0] = types.NewIntDatum(1000)
|
||||
count, _, err = GetRowCountByIndexRanges(ctx, &tbl.HistColl, 0, ran, nil)
|
||||
count, _, _, err = GetRowCountByIndexRanges(ctx, &tbl.HistColl, 0, ran, nil)
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, 1, int(count))
|
||||
|
||||
tbl.SetIdx(0, idx)
|
||||
ran[0].LowVal[0] = types.MinNotNullDatum()
|
||||
ran[0].HighVal[0] = types.MaxValueDatum()
|
||||
count, _, err = GetRowCountByIndexRanges(ctx, &tbl.HistColl, 0, ran, nil)
|
||||
count, _, _, err = GetRowCountByIndexRanges(ctx, &tbl.HistColl, 0, ran, nil)
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, 100000, int(count))
|
||||
ran[0].LowVal[0] = types.NewIntDatum(1000)
|
||||
ran[0].HighVal[0] = types.NewIntDatum(2000)
|
||||
count, _, err = GetRowCountByIndexRanges(ctx, &tbl.HistColl, 0, ran, nil)
|
||||
count, _, _, err = GetRowCountByIndexRanges(ctx, &tbl.HistColl, 0, ran, nil)
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, 1000, int(count))
|
||||
ran[0].LowVal[0] = types.NewIntDatum(1001)
|
||||
ran[0].HighVal[0] = types.NewIntDatum(1990)
|
||||
count, _, err = GetRowCountByIndexRanges(ctx, &tbl.HistColl, 0, ran, nil)
|
||||
count, _, _, err = GetRowCountByIndexRanges(ctx, &tbl.HistColl, 0, ran, nil)
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, 989, int(count))
|
||||
ran[0].LowVal[0] = types.NewIntDatum(1000)
|
||||
ran[0].HighVal[0] = types.NewIntDatum(1000)
|
||||
count, _, err = GetRowCountByIndexRanges(ctx, &tbl.HistColl, 0, ran, nil)
|
||||
count, _, _, err = GetRowCountByIndexRanges(ctx, &tbl.HistColl, 0, ran, nil)
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, 1, int(count))
|
||||
}
|
||||
|
||||
@ -50,7 +50,7 @@ var (
|
||||
// Note: all functions below will be removed after finishing moving all estimation functions into the cardinality package.
|
||||
|
||||
// GetRowCountByIndexRanges is a function type to get row count by index ranges.
|
||||
GetRowCountByIndexRanges func(sctx planctx.PlanContext, coll *HistColl, idxID int64, indexRanges []*ranger.Range, idxCol []*expression.Column) (result float64, corrResult float64, err error)
|
||||
GetRowCountByIndexRanges func(sctx planctx.PlanContext, coll *HistColl, idxID int64, indexRanges []*ranger.Range, idxCol []*expression.Column) (result float64, minResult float64, maxResult float64, err error)
|
||||
|
||||
// GetRowCountByIntColumnRanges is a function type to get row count by int column ranges.
|
||||
GetRowCountByIntColumnRanges func(sctx planctx.PlanContext, coll *HistColl, colID int64, intRanges []*ranger.Range) (result float64, err error)
|
||||
|
||||
Reference in New Issue
Block a user