From 1a88fd99dbdba07e107e49aaa5515fdca06e7166 Mon Sep 17 00:00:00 2001 From: Isabel Chen <92058925+ichen032@users.noreply.github.com> Date: Sat, 9 Aug 2025 10:01:45 -0700 Subject: [PATCH] planner: create variables for recognizing over and underestimation risk (#62910) ref pingcap/tidb#59333 --- pkg/planner/cardinality/cross_estimation.go | 20 +++---- pkg/planner/cardinality/row_count_column.go | 8 +-- pkg/planner/cardinality/row_count_index.go | 62 +++++++++++---------- pkg/planner/cardinality/selectivity.go | 18 +++--- pkg/planner/cardinality/selectivity_test.go | 44 +++++++-------- pkg/planner/core/debugtrace.go | 20 ++++--- pkg/planner/core/exhaust_physical_plans.go | 11 ++-- pkg/planner/core/find_best_task.go | 40 ++++++------- pkg/planner/core/stats.go | 8 +-- pkg/planner/util/path.go | 19 ++++--- pkg/statistics/statistics_test.go | 18 +++--- pkg/statistics/table.go | 2 +- 12 files changed, 143 insertions(+), 127 deletions(-) diff --git a/pkg/planner/cardinality/cross_estimation.go b/pkg/planner/cardinality/cross_estimation.go index 6101dc5bc7..c6c06cd2b2 100644 --- a/pkg/planner/cardinality/cross_estimation.go +++ b/pkg/planner/cardinality/cross_estimation.go @@ -170,7 +170,7 @@ func crossEstimateRowCount(sctx planctx.PlanContext, if idxExists && len(idxIDs) > 0 { idxID = idxIDs[0] } - rangeCounts, _, ok := getColumnRangeCounts(sctx, colUniqueID, ranges, dsTableStats.HistColl, idxID) + rangeCounts, _, _, ok := getColumnRangeCounts(sctx, colUniqueID, ranges, dsTableStats.HistColl, idxID) if !ok { return 0, false, corr } @@ -180,7 +180,7 @@ func crossEstimateRowCount(sctx planctx.PlanContext, } var rangeCount float64 if idxExists { - rangeCount, _, err = GetRowCountByIndexRanges(sctx, dsTableStats.HistColl, idxID, convertedRanges, nil) + rangeCount, _, _, err = GetRowCountByIndexRanges(sctx, dsTableStats.HistColl, idxID, convertedRanges, nil) } else { rangeCount, err = GetRowCountByColumnRanges(sctx, dsTableStats.HistColl, colUniqueID, convertedRanges) } @@ -196,30 +196,30 @@ func crossEstimateRowCount(sctx planctx.PlanContext, } // getColumnRangeCounts estimates row count for each range respectively. -func getColumnRangeCounts(sctx planctx.PlanContext, colID int64, ranges []*ranger.Range, histColl *statistics.HistColl, idxID int64) ([]float64, float64, bool) { +func getColumnRangeCounts(sctx planctx.PlanContext, colID int64, ranges []*ranger.Range, histColl *statistics.HistColl, idxID int64) (rangeCounts []float64, minCount float64, maxCount float64, ok bool) { var err error - var count, corrCount float64 - rangeCounts := make([]float64, len(ranges)) + var count float64 + rangeCounts = make([]float64, len(ranges)) for i, ran := range ranges { if idxID >= 0 { idxHist := histColl.GetIdx(idxID) if statistics.IndexStatsIsInvalid(sctx, idxHist, histColl, idxID) { - return nil, 0, false + return nil, 0, 0, false } - count, corrCount, err = GetRowCountByIndexRanges(sctx, histColl, idxID, []*ranger.Range{ran}, nil) + count, minCount, maxCount, err = GetRowCountByIndexRanges(sctx, histColl, idxID, []*ranger.Range{ran}, nil) } else { colHist := histColl.GetCol(colID) if statistics.ColumnStatsIsInvalid(colHist, sctx, histColl, colID) { - return nil, 0, false + return nil, 0, 0, false } count, err = GetRowCountByColumnRanges(sctx, histColl, colID, []*ranger.Range{ran}) } if err != nil { - return nil, 0, false + return nil, 0, 0, false } rangeCounts[i] = count } - return rangeCounts, corrCount, true + return rangeCounts, minCount, maxCount, true } // convertRangeFromExpectedCnt builds new ranges used to estimate row count we need to scan in table scan before finding specified diff --git a/pkg/planner/cardinality/row_count_column.go b/pkg/planner/cardinality/row_count_column.go index f61e6a7e42..70f3dd9af1 100644 --- a/pkg/planner/cardinality/row_count_column.go +++ b/pkg/planner/cardinality/row_count_column.go @@ -412,7 +412,7 @@ func ColumnEqualRowCount(sctx planctx.PlanContext, t *statistics.Table, value ty // getPseudoRowCountWithPartialStats calculates the row count if there are no statistics on the index, but there are column stats available. func getPseudoRowCountWithPartialStats(sctx planctx.PlanContext, coll *statistics.HistColl, indexRanges []*ranger.Range, - tableRowCount float64, idxCols []*expression.Column) (totalCount float64, corrCount float64, err error) { + tableRowCount float64, idxCols []*expression.Column) (totalCount float64, maxCount float64, err error) { if tableRowCount == 0 { return 0, 0, nil } @@ -433,7 +433,7 @@ func getPseudoRowCountWithPartialStats(sctx planctx.PlanContext, coll *statistic colID int64 ) totalCount = float64(0) - corrCount = float64(0) + maxCount = float64(0) for _, indexRange := range indexRanges { selectivity := float64(1.0) corrSelectivity := float64(1.0) @@ -456,8 +456,8 @@ func getPseudoRowCountWithPartialStats(sctx planctx.PlanContext, coll *statistic corrSelectivity = min(corrSelectivity, tempSelectivity) } totalCount += selectivity * tableRowCount - corrCount += corrSelectivity * tableRowCount + maxCount += corrSelectivity * tableRowCount } totalCount = mathutil.Clamp(totalCount, 1, tableRowCount) - return totalCount, corrCount, nil + return totalCount, maxCount, nil } diff --git a/pkg/planner/cardinality/row_count_index.go b/pkg/planner/cardinality/row_count_index.go index 3908cde361..5c9c8d18e4 100644 --- a/pkg/planner/cardinality/row_count_index.go +++ b/pkg/planner/cardinality/row_count_index.go @@ -41,7 +41,7 @@ import ( // GetRowCountByIndexRanges estimates the row count by a slice of Range. // idxCols used when index statistics are invalid, because coll may not have index info, can be nil whenever index statistics are valid. -func GetRowCountByIndexRanges(sctx planctx.PlanContext, coll *statistics.HistColl, idxID int64, indexRanges []*ranger.Range, idxCols []*expression.Column) (result float64, corrResult float64, err error) { +func GetRowCountByIndexRanges(sctx planctx.PlanContext, coll *statistics.HistColl, idxID int64, indexRanges []*ranger.Range, idxCols []*expression.Column) (result float64, minResult float64, maxResult float64, err error) { var name string if sctx.GetSessionVars().StmtCtx.EnableOptimizerDebugTrace { debugtrace.EnterContextCommon(sctx) @@ -63,10 +63,9 @@ func GetRowCountByIndexRanges(sctx planctx.PlanContext, coll *statistics.HistCol } } recordUsedItemStatsStatus(sctx, idx, coll.PhysicalID, idxID) - corrResult = float64(0) if statistics.IndexStatsIsInvalid(sctx, idx, coll, idxID) { if hasColumnStats(sctx, coll, idxCols) { - result, corrResult, err = getPseudoRowCountWithPartialStats(sctx, coll, indexRanges, float64(coll.RealtimeCount), idxCols) + result, maxResult, err = getPseudoRowCountWithPartialStats(sctx, coll, indexRanges, float64(coll.RealtimeCount), idxCols) } else { colsLen := -1 if idx != nil && idx.Info.Unique { @@ -77,7 +76,7 @@ func GetRowCountByIndexRanges(sctx planctx.PlanContext, coll *statistics.HistCol ceTraceRange(sctx, coll.PhysicalID, colNames, indexRanges, "Index Stats-Pseudo", uint64(result)) } } - return result, corrResult, err + return result, minResult, maxResult, err } realtimeCnt, modifyCount := coll.GetScaledRealtimeAndModifyCnt(idx) if sctx.GetSessionVars().StmtCtx.EnableOptimizerDebugTrace { @@ -90,12 +89,12 @@ func GetRowCountByIndexRanges(sctx planctx.PlanContext, coll *statistics.HistCol if idx.CMSketch != nil && idx.StatsVer == statistics.Version1 { result, err = getIndexRowCountForStatsV1(sctx, coll, idxID, indexRanges) } else { - result, corrResult, err = getIndexRowCountForStatsV2(sctx, idx, coll, indexRanges, realtimeCnt, modifyCount) + result, minResult, maxResult, err = getIndexRowCountForStatsV2(sctx, idx, coll, indexRanges, realtimeCnt, modifyCount) } if sc.EnableOptimizerCETrace { ceTraceRange(sctx, coll.PhysicalID, colNames, indexRanges, "Index Stats", uint64(result)) } - return result, corrResult, errors.Trace(err) + return result, minResult, maxResult, errors.Trace(err) } func getIndexRowCountForStatsV1(sctx planctx.PlanContext, coll *statistics.HistColl, idxID int64, indexRanges []*ranger.Range) (float64, error) { @@ -125,7 +124,7 @@ func getIndexRowCountForStatsV1(sctx planctx.PlanContext, coll *statistics.HistC // values in this case. if rangePosition == 0 || isSingleColIdxNullRange(idx, ran) { realtimeCnt, modifyCount := coll.GetScaledRealtimeAndModifyCnt(idx) - count, _, err := getIndexRowCountForStatsV2(sctx, idx, nil, []*ranger.Range{ran}, realtimeCnt, modifyCount) + count, _, _, err := getIndexRowCountForStatsV2(sctx, idx, nil, []*ranger.Range{ran}, realtimeCnt, modifyCount) if err != nil { return 0, errors.Trace(err) } @@ -189,7 +188,7 @@ func getIndexRowCountForStatsV1(sctx planctx.PlanContext, coll *statistics.HistC // prefer index stats over column stats if idxIDs, ok := coll.ColUniqueID2IdxIDs[colUniqueID]; ok && len(idxIDs) > 0 { idxID := idxIDs[0] - count, _, err = GetRowCountByIndexRanges(sctx, coll, idxID, []*ranger.Range{&rang}, nil) + count, _, _, err = GetRowCountByIndexRanges(sctx, coll, idxID, []*ranger.Range{&rang}, nil) } else { count, err = GetRowCountByColumnRanges(sctx, coll, colUniqueID, []*ranger.Range{&rang}) } @@ -223,7 +222,7 @@ func isSingleColIdxNullRange(idx *statistics.Index, ran *ranger.Range) bool { } // It uses the modifyCount to validate, and realtimeRowCount to adjust the influence of modifications on the table. -func getIndexRowCountForStatsV2(sctx planctx.PlanContext, idx *statistics.Index, coll *statistics.HistColl, indexRanges []*ranger.Range, realtimeRowCount, modifyCount int64) (totalCount float64, corrCount float64, err error) { +func getIndexRowCountForStatsV2(sctx planctx.PlanContext, idx *statistics.Index, coll *statistics.HistColl, indexRanges []*ranger.Range, realtimeRowCount, modifyCount int64) (totalCount, minCount, maxCount float64, err error) { sc := sctx.GetSessionVars().StmtCtx debugTrace := sc.EnableOptimizerDebugTrace if debugTrace { @@ -237,12 +236,12 @@ func getIndexRowCountForStatsV2(sctx planctx.PlanContext, idx *statistics.Index, lb, err = codec.EncodeKey(sc.TimeZone(), nil, indexRange.LowVal...) err = sc.HandleError(err) if err != nil { - return 0, 0, err + return 0, 0, 0, err } rb, err = codec.EncodeKey(sc.TimeZone(), nil, indexRange.HighVal...) err = sc.HandleError(err) if err != nil { - return 0, 0, err + return 0, 0, 0, err } if debugTrace { debugTraceStartEstimateRange(sctx, indexRange, lb, rb, totalCount) @@ -301,14 +300,15 @@ func getIndexRowCountForStatsV2(sctx planctx.PlanContext, idx *statistics.Index, // Due to the limitation of calcFraction and convertDatumToScalar, the histogram actually won't estimate anything. // If the first column's range is point. if rangePosition := getOrdinalOfRangeCond(sc, indexRange); rangePosition > 0 && idx.StatsVer >= statistics.Version2 && coll != nil { - var expBackoffSel, corrSel float64 - expBackoffSel, corrSel, expBackoffSuccess, err = expBackoffEstimation(sctx, idx, coll, indexRange) + var expBackoffSel, minSel, maxSel float64 + expBackoffSel, minSel, maxSel, expBackoffSuccess, err = expBackoffEstimation(sctx, idx, coll, indexRange) if err != nil { - return 0, 0, err + return 0, 0, 0, err } if expBackoffSuccess { expBackoffCnt := expBackoffSel * idx.TotalRowCount() - corrCnt := corrSel * idx.TotalRowCount() + minCnt := minSel * idx.TotalRowCount() + maxCnt := maxSel * idx.TotalRowCount() upperLimit := expBackoffCnt // Use the multi-column stats to calculate the max possible row count of [l, r) @@ -335,7 +335,8 @@ func getIndexRowCountForStatsV2(sctx planctx.PlanContext, idx *statistics.Index, expBackoffCnt = upperLimit } count += expBackoffCnt - corrCount += corrCnt + minCount += minCnt + maxCount += maxCnt } } if !expBackoffSuccess { @@ -345,7 +346,8 @@ func getIndexRowCountForStatsV2(sctx planctx.PlanContext, idx *statistics.Index, // If the current table row count has changed, we should scale the row count accordingly. increaseFactor := idx.GetIncreaseFactor(realtimeRowCount) count *= increaseFactor - corrCount *= increaseFactor + minCount *= increaseFactor + maxCount *= increaseFactor // handling the out-of-range part if (outOfRangeOnIndex(idx, l) && !(isSingleColIdx && lowIsNull)) || outOfRangeOnIndex(idx, r) { @@ -387,7 +389,7 @@ func getIndexRowCountForStatsV2(sctx planctx.PlanContext, idx *statistics.Index, // Don't allow the final result to go below 1 row totalCount = mathutil.Clamp(totalCount, 1, float64(realtimeRowCount)) } - return totalCount, corrCount, nil + return totalCount, minCount, maxCount, nil } var nullKeyBytes, _ = codec.EncodeKey(time.UTC, nil, types.NewDatum(nil)) @@ -476,7 +478,7 @@ func equalRowCountOnIndex(sctx planctx.PlanContext, idx *statistics.Index, b []b } // expBackoffEstimation estimate the multi-col cases following the Exponential Backoff. See comment below for details. -func expBackoffEstimation(sctx planctx.PlanContext, idx *statistics.Index, coll *statistics.HistColl, indexRange *ranger.Range) (sel float64, corrSel float64, success bool, err error) { +func expBackoffEstimation(sctx planctx.PlanContext, idx *statistics.Index, coll *statistics.HistColl, indexRange *ranger.Range) (sel float64, minSel float64, maxSel float64, success bool, err error) { if sctx.GetSessionVars().StmtCtx.EnableOptimizerDebugTrace { debugtrace.EnterContextCommon(sctx) defer func() { @@ -497,6 +499,7 @@ func expBackoffEstimation(sctx planctx.PlanContext, idx *statistics.Index, coll } colsIDs := coll.Idx2ColUniqueIDs[idx.Histogram.ID] singleColumnEstResults := make([]float64, 0, len(indexRange.LowVal)) + minSel = float64(1) // The following codes uses Exponential Backoff to reduce the impact of independent assumption. It works like: // 1. Calc the selectivity of each column. // 2. Sort them and choose the first 4 most selective filter and the corresponding selectivity is sel_1, sel_2, sel_3, sel_4 where i < j => sel_i < sel_j. @@ -532,7 +535,7 @@ func expBackoffEstimation(sctx planctx.PlanContext, idx *statistics.Index, coll continue } foundStats = true - count, _, err = GetRowCountByIndexRanges(sctx, coll, idxID, tmpRan, nil) + count, _, _, err = GetRowCountByIndexRanges(sctx, coll, idxID, tmpRan, nil) if err == nil { break } @@ -544,9 +547,10 @@ func expBackoffEstimation(sctx planctx.PlanContext, idx *statistics.Index, coll continue } if err != nil { - return 0, 0, false, err + return 0, 0, 0, false, err } singleColumnEstResults = append(singleColumnEstResults, selectivity) + minSel *= selectivity } // Sort them. slices.Sort(singleColumnEstResults) @@ -556,9 +560,9 @@ func expBackoffEstimation(sctx planctx.PlanContext, idx *statistics.Index, coll l = 0 }) if l == 1 { - return singleColumnEstResults[0], singleColumnEstResults[0], true, nil + return singleColumnEstResults[0], singleColumnEstResults[0], singleColumnEstResults[0], true, nil } else if l == 0 { - return 0, 0, false, nil + return 0, 0, 0, false, nil } // Do not allow the exponential backoff to go below the available index bound. If the number of predicates // is less than the number of index columns - use 90% of the bound to differentiate a subset from full index match. @@ -571,21 +575,23 @@ func expBackoffEstimation(sctx planctx.PlanContext, idx *statistics.Index, coll if l < len(idx.Info.Columns) { idxLowBound /= 0.9 } - // corrSel is the selectivity of the most filtering column - corrSel = max(idxLowBound, singleColumnEstResults[0]) + // maxSel assumes correlation, so is the selectivity of the most filtering column + maxSel = max(idxLowBound, singleColumnEstResults[0]) + // minSel assumes independence between columns, so is the product of all single column selectivities. + minSel = max(idxLowBound, minSel) minTwoCol := min(singleColumnEstResults[0], singleColumnEstResults[1], idxLowBound) multTwoCol := singleColumnEstResults[0] * math.Sqrt(singleColumnEstResults[1]) if l == 2 { - return max(minTwoCol, multTwoCol), corrSel, true, nil + return max(minTwoCol, multTwoCol), minSel, maxSel, true, nil } minThreeCol := min(minTwoCol, singleColumnEstResults[2]) multThreeCol := multTwoCol * math.Sqrt(math.Sqrt(singleColumnEstResults[2])) if l == 3 { - return max(minThreeCol, multThreeCol), corrSel, true, nil + return max(minThreeCol, multThreeCol), minSel, maxSel, true, nil } minFourCol := min(minThreeCol, singleColumnEstResults[3]) multFourCol := multThreeCol * math.Sqrt(math.Sqrt(math.Sqrt(singleColumnEstResults[3]))) - return max(minFourCol, multFourCol), corrSel, true, nil + return max(minFourCol, multFourCol), minSel, maxSel, true, nil } // outOfRangeOnIndex checks if the datum is out of the range. diff --git a/pkg/planner/cardinality/selectivity.go b/pkg/planner/cardinality/selectivity.go index a9e196bfcc..74a7e05a21 100644 --- a/pkg/planner/cardinality/selectivity.go +++ b/pkg/planner/cardinality/selectivity.go @@ -203,12 +203,13 @@ func Selectivity( if err != nil { return 0, nil, errors.Trace(err) } - cnt, corrCnt, err := GetRowCountByIndexRanges(ctx, coll, id, ranges, nil) + cnt, minCnt, maxCnt, err := GetRowCountByIndexRanges(ctx, coll, id, ranges, nil) if err != nil { return 0, nil, errors.Trace(err) } selectivity := cnt / float64(coll.RealtimeCount) - corrSelectivity := corrCnt / float64(coll.RealtimeCount) + minSelectivity := minCnt / float64(coll.RealtimeCount) + maxSelectivity := maxCnt / float64(coll.RealtimeCount) nodes = append(nodes, &StatsNode{ Tp: IndexType, ID: id, @@ -216,7 +217,8 @@ func Selectivity( Ranges: ranges, numCols: len(idxStats.Info.Columns), Selectivity: selectivity, - CorrSelectivity: corrSelectivity, + MinSelectivity: minSelectivity, + MaxSelectivity: maxSelectivity, partCover: partCover, minAccessCondsForDNFCond: minAccessCondsForDNFCond, }) @@ -553,10 +555,12 @@ type StatsNode struct { mask int64 // Selectivity indicates the Selectivity of this column/index. Selectivity float64 - // CorrSelectivity indicates the Selectivity of this column/index with correlated column. - // That is - it is the selectivity assuming the most filtering index column only, and all other - // columns are correlated with this column. - CorrSelectivity float64 + // MinSelectivity indicates the Selectivity of this column/index for the least rows that can qualify. + // It takes into account situations that would decrease the row count, such as fully independent columns. + MinSelectivity float64 + // MaxSelectivity indicates the Selectivity of this column/index for the most rows that can qualify. + // It takes into account situations that would increase the row count, such as correlated columns. + MaxSelectivity float64 // numCols is the number of columns contained in the index or column(which is always 1). numCols int // partCover indicates whether the bit in the mask is for a full cover or partial cover. It is only true diff --git a/pkg/planner/cardinality/selectivity_test.go b/pkg/planner/cardinality/selectivity_test.go index e4de90ada1..1882075549 100644 --- a/pkg/planner/cardinality/selectivity_test.go +++ b/pkg/planner/cardinality/selectivity_test.go @@ -253,11 +253,11 @@ func TestEstimationForUnknownValues(t *testing.T) { require.Equal(t, 4.7, count) idxID := table.Meta().Indices[0].ID - count, _, err = cardinality.GetRowCountByIndexRanges(sctx, &statsTbl.HistColl, idxID, getRange(30, 30), nil) + count, _, _, err = cardinality.GetRowCountByIndexRanges(sctx, &statsTbl.HistColl, idxID, getRange(30, 30), nil) require.NoError(t, err) require.Equal(t, 0.1, count) - count, _, err = cardinality.GetRowCountByIndexRanges(sctx, &statsTbl.HistColl, idxID, getRange(9, 30), nil) + count, _, _, err = cardinality.GetRowCountByIndexRanges(sctx, &statsTbl.HistColl, idxID, getRange(9, 30), nil) require.NoError(t, err) require.Equal(t, 4.5, count) @@ -287,7 +287,7 @@ func TestEstimationForUnknownValues(t *testing.T) { require.Equal(t, 1.0, count) idxID = table.Meta().Indices[0].ID - count, _, err = cardinality.GetRowCountByIndexRanges(sctx, &statsTbl.HistColl, idxID, getRange(2, 2), nil) + count, _, _, err = cardinality.GetRowCountByIndexRanges(sctx, &statsTbl.HistColl, idxID, getRange(2, 2), nil) require.NoError(t, err) require.Equal(t, 0.0, count) } @@ -440,11 +440,11 @@ func TestEstimationUniqueKeyEqualConds(t *testing.T) { sctx := mock.NewContext() idxID := table.Meta().Indices[0].ID - count, _, err := cardinality.GetRowCountByIndexRanges(sctx, &statsTbl.HistColl, idxID, getRange(7, 7), nil) + count, _, _, err := cardinality.GetRowCountByIndexRanges(sctx, &statsTbl.HistColl, idxID, getRange(7, 7), nil) require.NoError(t, err) require.Equal(t, 1.0, count) - count, _, err = cardinality.GetRowCountByIndexRanges(sctx, &statsTbl.HistColl, idxID, getRange(6, 6), nil) + count, _, _, err = cardinality.GetRowCountByIndexRanges(sctx, &statsTbl.HistColl, idxID, getRange(6, 6), nil) require.NoError(t, err) require.Equal(t, 1.0, count) @@ -1075,12 +1075,12 @@ func TestIssue39593(t *testing.T) { sctx := testKit.Session() idxID := tblInfo.Indices[0].ID vals := []int64{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20} - count, _, err := cardinality.GetRowCountByIndexRanges(sctx.GetPlanCtx(), &statsTbl.HistColl, idxID, getRanges(vals, vals), nil) + count, _, _, err := cardinality.GetRowCountByIndexRanges(sctx.GetPlanCtx(), &statsTbl.HistColl, idxID, getRanges(vals, vals), nil) require.NoError(t, err) // estimated row count without any changes, use range to reduce test flakiness require.InDelta(t, float64(462.6), count, float64(1)) statsTbl.RealtimeCount *= 10 - count, _, err = cardinality.GetRowCountByIndexRanges(sctx.GetPlanCtx(), &statsTbl.HistColl, idxID, getRanges(vals, vals), nil) + count, _, _, err = cardinality.GetRowCountByIndexRanges(sctx.GetPlanCtx(), &statsTbl.HistColl, idxID, getRanges(vals, vals), nil) require.NoError(t, err) // estimated row count after mock modify on the table, use range to reduce test flakiness require.InDelta(t, float64(3702.6), count, float64(1)) @@ -1569,15 +1569,15 @@ func TestRiskEqSkewRatio(t *testing.T) { // Search for the value "6" which will not be found in the histogram buckets, and since // there are NO topN values - the value will be considered skewed based upon skew ratio. testKit.MustExec("set @@session.tidb_opt_risk_eq_skew_ratio = 0") - count, _, err := cardinality.GetRowCountByIndexRanges(sctx.GetPlanCtx(), &statsTbl.HistColl, idxID, getRange(6, 6), nil) + count, _, _, err := cardinality.GetRowCountByIndexRanges(sctx.GetPlanCtx(), &statsTbl.HistColl, idxID, getRange(6, 6), nil) require.NoError(t, err) testKit.MustExec("set @@session.tidb_opt_risk_eq_skew_ratio = 0.5") - count2, _, err2 := cardinality.GetRowCountByIndexRanges(sctx.GetPlanCtx(), &statsTbl.HistColl, idxID, getRange(6, 6), nil) + count2, _, _, err2 := cardinality.GetRowCountByIndexRanges(sctx.GetPlanCtx(), &statsTbl.HistColl, idxID, getRange(6, 6), nil) require.NoError(t, err2) // Result of count2 should be larger than count because the risk ratio is higher require.Less(t, count, count2) testKit.MustExec("set @@session.tidb_opt_risk_eq_skew_ratio = 1") - count3, _, err3 := cardinality.GetRowCountByIndexRanges(sctx.GetPlanCtx(), &statsTbl.HistColl, idxID, getRange(6, 6), nil) + count3, _, _, err3 := cardinality.GetRowCountByIndexRanges(sctx.GetPlanCtx(), &statsTbl.HistColl, idxID, getRange(6, 6), nil) require.NoError(t, err3) // Result of count3 should be larger because the risk ratio is higher require.Less(t, count2, count3) @@ -1589,27 +1589,27 @@ func TestRiskEqSkewRatio(t *testing.T) { require.NoError(t, h.DumpStatsDeltaToKV(true)) // Rerun tests with 1 value in the TopN statsTbl = h.GetTableStats(tb.Meta()) - count, _, err = cardinality.GetRowCountByIndexRanges(sctx.GetPlanCtx(), &statsTbl.HistColl, idxID, getRange(6, 6), nil) + count, _, _, err = cardinality.GetRowCountByIndexRanges(sctx.GetPlanCtx(), &statsTbl.HistColl, idxID, getRange(6, 6), nil) require.NoError(t, err) testKit.MustExec("set @@session.tidb_opt_risk_eq_skew_ratio = 0.5") - count2, _, err2 = cardinality.GetRowCountByIndexRanges(sctx.GetPlanCtx(), &statsTbl.HistColl, idxID, getRange(6, 6), nil) + count2, _, _, err2 = cardinality.GetRowCountByIndexRanges(sctx.GetPlanCtx(), &statsTbl.HistColl, idxID, getRange(6, 6), nil) require.NoError(t, err2) // Result of count2 should be larger than count because the risk ratio is higher require.Less(t, count, count2) testKit.MustExec("set @@session.tidb_opt_risk_eq_skew_ratio = 1") - count3, _, err3 = cardinality.GetRowCountByIndexRanges(sctx.GetPlanCtx(), &statsTbl.HistColl, idxID, getRange(6, 6), nil) + count3, _, _, err3 = cardinality.GetRowCountByIndexRanges(sctx.GetPlanCtx(), &statsTbl.HistColl, idxID, getRange(6, 6), nil) require.NoError(t, err3) // Result of count3 should be larger than count because the risk ratio is higher require.Less(t, count2, count3) // Repeat the prior test by setting the global variable instead of the session variable. This should have no effect. testKit.MustExec("set @@global.tidb_opt_risk_eq_skew_ratio = 0.5") - count4, _, err4 := cardinality.GetRowCountByIndexRanges(sctx.GetPlanCtx(), &statsTbl.HistColl, idxID, getRange(6, 6), nil) + count4, _, _, err4 := cardinality.GetRowCountByIndexRanges(sctx.GetPlanCtx(), &statsTbl.HistColl, idxID, getRange(6, 6), nil) require.NoError(t, err4) require.Less(t, count2, count4) // Repeat the prior test by setting the session variable to the default. Count4 should inherit the global // variable and be less than count3. testKit.MustExec("set @@session.tidb_opt_risk_eq_skew_ratio = default") - count4, _, err4 = cardinality.GetRowCountByIndexRanges(sctx.GetPlanCtx(), &statsTbl.HistColl, idxID, getRange(6, 6), nil) + count4, _, _, err4 = cardinality.GetRowCountByIndexRanges(sctx.GetPlanCtx(), &statsTbl.HistColl, idxID, getRange(6, 6), nil) require.NoError(t, err4) require.Less(t, count4, count3) // Reset global variable to default. @@ -1641,27 +1641,27 @@ func TestRiskRangeSkewRatioWithinBucket(t *testing.T) { // Search for the range from 2 to 3, since there is only one bucket it will be a query within // a bucket. testKit.MustExec("set @@session.tidb_opt_risk_range_skew_ratio = 0") - count, _, err := cardinality.GetRowCountByIndexRanges(sctx.GetPlanCtx(), &statsTbl.HistColl, idxID, getRange(2, 3), nil) + count, _, _, err := cardinality.GetRowCountByIndexRanges(sctx.GetPlanCtx(), &statsTbl.HistColl, idxID, getRange(2, 3), nil) require.NoError(t, err) testKit.MustExec("set @@session.tidb_opt_risk_range_skew_ratio = 0.5") - count2, _, err2 := cardinality.GetRowCountByIndexRanges(sctx.GetPlanCtx(), &statsTbl.HistColl, idxID, getRange(2, 3), nil) + count2, _, _, err2 := cardinality.GetRowCountByIndexRanges(sctx.GetPlanCtx(), &statsTbl.HistColl, idxID, getRange(2, 3), nil) require.NoError(t, err2) // Result of count2 should be larger than count because the risk ratio is higher require.Less(t, count, count2) testKit.MustExec("set @@session.tidb_opt_risk_range_skew_ratio = 1") - count3, _, err3 := cardinality.GetRowCountByIndexRanges(sctx.GetPlanCtx(), &statsTbl.HistColl, idxID, getRange(2, 3), nil) + count3, _, _, err3 := cardinality.GetRowCountByIndexRanges(sctx.GetPlanCtx(), &statsTbl.HistColl, idxID, getRange(2, 3), nil) require.NoError(t, err3) // Result of count3 should be larger because the risk ratio is higher require.Less(t, count2, count3) // Repeat the prior test by setting the global variable instead of the session variable. This should have no effect. testKit.MustExec("set @@global.tidb_opt_risk_range_skew_ratio = 0.5") - count4, _, err4 := cardinality.GetRowCountByIndexRanges(sctx.GetPlanCtx(), &statsTbl.HistColl, idxID, getRange(2, 3), nil) + count4, _, _, err4 := cardinality.GetRowCountByIndexRanges(sctx.GetPlanCtx(), &statsTbl.HistColl, idxID, getRange(2, 3), nil) require.NoError(t, err4) require.Less(t, count2, count4) // Repeat the prior test by setting the session variable to the default. Count4 should inherit the global // variable and be less than count3. testKit.MustExec("set @@session.tidb_opt_risk_range_skew_ratio = default") - count4, _, err4 = cardinality.GetRowCountByIndexRanges(sctx.GetPlanCtx(), &statsTbl.HistColl, idxID, getRange(2, 3), nil) + count4, _, _, err4 = cardinality.GetRowCountByIndexRanges(sctx.GetPlanCtx(), &statsTbl.HistColl, idxID, getRange(2, 3), nil) require.NoError(t, err4) require.Less(t, count4, count3) // Reset global variable to default. @@ -1800,11 +1800,11 @@ func TestLastBucketEndValueHeuristic(t *testing.T) { // Test index estimation as well idx := statsTbl.GetIdx(table.Meta().Indices[0].ID) if idx != nil { - idxEnhancedCount, _, err := cardinality.GetRowCountByIndexRanges(sctx.GetPlanCtx(), &statsTbl.HistColl, table.Meta().Indices[0].ID, getRange(11, 11), nil) + idxEnhancedCount, _, _, err := cardinality.GetRowCountByIndexRanges(sctx.GetPlanCtx(), &statsTbl.HistColl, table.Meta().Indices[0].ID, getRange(11, 11), nil) require.NoError(t, err) require.InDelta(t, 100.09, idxEnhancedCount, 0.1, "Index enhanced count should be approximately 100.09") - idxOtherCount, _, err := cardinality.GetRowCountByIndexRanges(sctx.GetPlanCtx(), &statsTbl.HistColl, table.Meta().Indices[0].ID, getRange(3, 3), nil) + idxOtherCount, _, _, err := cardinality.GetRowCountByIndexRanges(sctx.GetPlanCtx(), &statsTbl.HistColl, table.Meta().Indices[0].ID, getRange(3, 3), nil) require.NoError(t, err) require.InDelta(t, 109.99, idxOtherCount, 0.1, "Index other count should be approximately 109.99") } diff --git a/pkg/planner/core/debugtrace.go b/pkg/planner/core/debugtrace.go index b2ee3166e6..2f58a668ee 100644 --- a/pkg/planner/core/debugtrace.go +++ b/pkg/planner/core/debugtrace.go @@ -227,14 +227,15 @@ func stabilizeGetStatsTblInfo(info *getStatsTblInfo) { */ type accessPathForDebugTrace struct { - IndexName string `json:",omitempty"` - AccessConditions []string - IndexFilters []string - TableFilters []string - PartialPaths []accessPathForDebugTrace `json:",omitempty"` - CountAfterAccess float64 - CorrCountAfterAccess float64 - CountAfterIndex float64 + IndexName string `json:",omitempty"` + AccessConditions []string + IndexFilters []string + TableFilters []string + PartialPaths []accessPathForDebugTrace `json:",omitempty"` + CountAfterAccess float64 + MinCountAfterAccess float64 + MaxCountAfterAccess float64 + CountAfterIndex float64 } func convertAccessPathForDebugTrace(ctx expression.EvalContext, path *util.AccessPath, out *accessPathForDebugTrace) { @@ -245,7 +246,8 @@ func convertAccessPathForDebugTrace(ctx expression.EvalContext, path *util.Acces out.IndexFilters = expression.ExprsToStringsForDisplay(ctx, path.IndexFilters) out.TableFilters = expression.ExprsToStringsForDisplay(ctx, path.TableFilters) out.CountAfterAccess = path.CountAfterAccess - out.CorrCountAfterAccess = path.CorrCountAfterAccess + out.MaxCountAfterAccess = path.MaxCountAfterAccess + out.MinCountAfterAccess = path.MinCountAfterAccess out.CountAfterIndex = path.CountAfterIndex out.PartialPaths = make([]accessPathForDebugTrace, len(path.PartialIndexPaths)) for i, partialPath := range path.PartialIndexPaths { diff --git a/pkg/planner/core/exhaust_physical_plans.go b/pkg/planner/core/exhaust_physical_plans.go index 3249a5d54a..2a04d3c700 100644 --- a/pkg/planner/core/exhaust_physical_plans.go +++ b/pkg/planner/core/exhaust_physical_plans.go @@ -1513,11 +1513,12 @@ func constructDS2IndexScanTask( rowCount = math.Min(rowCount, 1.0) } tmpPath := &util.AccessPath{ - IndexFilters: indexConds, - TableFilters: tblConds, - CountAfterIndex: rowCount, - CountAfterAccess: rowCount, - CorrCountAfterAccess: 0, + IndexFilters: indexConds, + TableFilters: tblConds, + CountAfterIndex: rowCount, + CountAfterAccess: rowCount, + MinCountAfterAccess: 0, + MaxCountAfterAccess: 0, } // Assume equal conditions used by index join and other conditions are independent. if len(tblConds) > 0 { diff --git a/pkg/planner/core/find_best_task.go b/pkg/planner/core/find_best_task.go index 27fb7b445e..a2c45aa62e 100644 --- a/pkg/planner/core/find_best_task.go +++ b/pkg/planner/core/find_best_task.go @@ -1084,24 +1084,22 @@ func compareGlobalIndex(lhs, rhs *candidatePath) int { return compareBool(lhs.path.Index.Global, rhs.path.Index.Global) } -func compareCorrRatio(lhs, rhs *candidatePath) (int, float64) { - lhsCorrRatio, rhsCorrRatio := 0.0, 0.0 - // CorrCountAfterAccess tracks the "CountAfterAccess" only including the most selective index column, thus - // lhs/rhsCorrRatio represents the "risk" of the CountAfterAccess value - lower value means less risk that - // we do NOT know about actual correlation between indexed columns - // TODO - corrCountAfterAccess is only currently used to compete 2 indexes - since they are the only paths - // that potentially go through expBackOffEstimation - if lhs.path.CorrCountAfterAccess > 0 && rhs.path.CorrCountAfterAccess > 0 { - lhsCorrRatio = lhs.path.CorrCountAfterAccess / lhs.path.CountAfterAccess - rhsCorrRatio = rhs.path.CorrCountAfterAccess / rhs.path.CountAfterAccess +func compareRiskRatio(lhs, rhs *candidatePath) (int, float64) { + lhsRiskRatio, rhsRiskRatio := 0.0, 0.0 + // MaxCountAfterAccess tracks the worst case "CountAfterAccess", accounting for scenarios that could + // increase our row estimation, thus lhs/rhsRiskRatio represents the "risk" of the CountAfterAccess value. + // Lower value means less risk that the actual row count is higher than the estimated one. + if lhs.path.MaxCountAfterAccess > 0 && rhs.path.MaxCountAfterAccess > 0 { + lhsRiskRatio = lhs.path.MaxCountAfterAccess / lhs.path.CountAfterAccess + rhsRiskRatio = rhs.path.MaxCountAfterAccess / rhs.path.CountAfterAccess } // lhs has lower risk - if lhsCorrRatio < rhsCorrRatio && lhs.path.CountAfterAccess < rhs.path.CountAfterAccess { - return 1, lhsCorrRatio + if lhsRiskRatio < rhsRiskRatio && lhs.path.CountAfterAccess < rhs.path.CountAfterAccess { + return 1, lhsRiskRatio } // rhs has lower risk - if rhsCorrRatio < lhsCorrRatio && rhs.path.CountAfterAccess < lhs.path.CountAfterAccess { - return -1, rhsCorrRatio + if rhsRiskRatio < lhsRiskRatio && rhs.path.CountAfterAccess < lhs.path.CountAfterAccess { + return -1, rhsRiskRatio } return 0, 0 } @@ -1150,8 +1148,8 @@ func compareCandidates(sctx base.PlanContext, statsTbl *statistics.Table, tableI matchResult, globalResult := compareBool(lhs.isMatchProp, rhs.isMatchProp), compareGlobalIndex(lhs, rhs) accessResult, comparable1 := util.CompareCol2Len(lhs.accessCondsColMap, rhs.accessCondsColMap) scanResult, comparable2 := compareIndexBack(lhs, rhs) - // TODO: corrResult is not added to sum to limit change to existing logic. Further testing required. - corrResult, _ := compareCorrRatio(lhs, rhs) + // TODO: riskResult is not added to sum to limit change to existing logic. Further testing required. + riskResult, _ := compareRiskRatio(lhs, rhs) sum := accessResult + scanResult + matchResult + globalResult // First rules apply when an index doesn't have statistics and another object (index or table) has statistics @@ -1159,11 +1157,13 @@ func compareCandidates(sctx base.PlanContext, statsTbl *statistics.Table, tableI // If one index has statistics and the other does not, choose the index with statistics if it // has the same or higher number of equal/IN predicates. if !lhsPseudo && globalResult >= 0 && sum >= 0 && - lhs.path.EqOrInCondCount > 0 && lhs.path.EqOrInCondCount >= rhs.path.EqOrInCondCount { + lhs.path.EqOrInCondCount > 0 && lhs.path.EqOrInCondCount >= rhs.path.EqOrInCondCount && + (rhs.path.MaxCountAfterAccess <= 0 || lhs.path.CountAfterAccess < rhs.path.MaxCountAfterAccess) { return 1, lhsPseudo // left wins and has statistics (lhsPseudo==false) } if !rhsPseudo && globalResult <= 0 && sum <= 0 && - rhs.path.EqOrInCondCount > 0 && rhs.path.EqOrInCondCount >= lhs.path.EqOrInCondCount { + rhs.path.EqOrInCondCount > 0 && rhs.path.EqOrInCondCount >= lhs.path.EqOrInCondCount && + (lhs.path.MaxCountAfterAccess <= 0 || rhs.path.CountAfterAccess < lhs.path.MaxCountAfterAccess) { return -1, rhsPseudo // right wins and has statistics (rhsPseudo==false) } if preferRange { @@ -1191,10 +1191,10 @@ func compareCandidates(sctx base.PlanContext, statsTbl *statistics.Table, tableI if threshold > 0 { // set it to 0 to disable this rule // corrResult is included to ensure we don't preference to a higher risk plan given that // this rule does not check the other criteria included below. - if lhs.path.CountAfterAccess/rhs.path.CountAfterAccess > threshold && corrResult <= 0 { + if lhs.path.CountAfterAccess/rhs.path.CountAfterAccess > threshold && riskResult <= 0 { return -1, rhsPseudo // right wins - also return whether it has statistics (pseudo) or not } - if rhs.path.CountAfterAccess/lhs.path.CountAfterAccess > threshold && corrResult >= 0 { + if rhs.path.CountAfterAccess/lhs.path.CountAfterAccess > threshold && riskResult >= 0 { return 1, lhsPseudo // left wins - also return whether it has statistics (pseudo) or not } } diff --git a/pkg/planner/core/stats.go b/pkg/planner/core/stats.go index 943842628b..05e83356bc 100644 --- a/pkg/planner/core/stats.go +++ b/pkg/planner/core/stats.go @@ -177,7 +177,8 @@ func fillIndexPath(ds *logicalop.DataSource, path *util.AccessPath, conds []expr } path.Ranges = ranger.FullRange() path.CountAfterAccess = float64(ds.StatisticTable.RealtimeCount) - path.CorrCountAfterAccess = 0 + path.MinCountAfterAccess = 0 + path.MaxCountAfterAccess = 0 path.IdxCols, path.IdxColLens = expression.IndexInfo2PrefixCols(ds.Columns, ds.Schema().Columns, path.Index) path.FullIdxCols, path.FullIdxColLens = expression.IndexInfo2Cols(ds.Columns, ds.Schema().Columns, path.Index) if !path.Index.Unique && !path.Index.Primary && len(path.Index.Columns) == len(path.IdxCols) { @@ -415,10 +416,7 @@ func detachCondAndBuildRangeForPath( if len(indexCols) > len(path.Index.Columns) { // remove clustered primary key if it has been added to path.IdxCols indexCols = indexCols[0:len(path.Index.Columns)] } - path.CountAfterAccess, path.CorrCountAfterAccess, err = cardinality.GetRowCountByIndexRanges(sctx, histColl, path.Index.ID, path.Ranges, indexCols) - if path.CorrCountAfterAccess == 0 { - path.CorrCountAfterAccess = path.CountAfterAccess - } + path.CountAfterAccess, path.MinCountAfterAccess, path.MaxCountAfterAccess, err = cardinality.GetRowCountByIndexRanges(sctx, histColl, path.Index.ID, path.Ranges, indexCols) return err } diff --git a/pkg/planner/util/path.go b/pkg/planner/util/path.go index 846f1e8dae..996a92e36f 100644 --- a/pkg/planner/util/path.go +++ b/pkg/planner/util/path.go @@ -42,12 +42,16 @@ type AccessPath struct { // CountAfterAccess is the row count after we apply range seek and before we use other filter to filter data. // For index merge path, CountAfterAccess is the row count after partial paths and before we apply table filters. CountAfterAccess float64 - // CorrCountAfterAccess is the row count after only applying the most filtering index columns. - // against the index. This is used when we don't have a full index statistics - // and we need to use the exponential backoff to estimate the row count. - // Case CorrCountAfterAccess > 0 : we use the exponential backoff to estimate the row count (such as we don't have a full index statistics) - // Default CorrCountAfterAccess = 0 : we use index of table estimate row coun directly (such as table full scan, point get etc) - CorrCountAfterAccess float64 + // MinCountAfterAccess is a lower bound on CountAfterAccess, accounting for risks that could + // lead to overestimation, such as assuming correlation with exponential backoff when columns are actually independent. + // Case MinCountAfterAccess > 0 : we've encountered risky scenarios and have a potential lower row count estimation + // Default MinCountAfterAccess = 0 : we have not identified risks that could lead to lower row count + MinCountAfterAccess float64 + // MaxCountAfterAccess is an upper bound on the CountAfterAccess, accounting for risks that could + // lead to underestimation, such as assuming independence between non-index columns. + // Case MaxCountAfterAccess > 0 : we've encountered risky scenarios and have a potential greater row count estimation + // Default MaxCountAfterAccess = 0 : we have not identified risks that could lead to greater row count + MaxCountAfterAccess float64 // CountAfterIndex is the row count after we apply filters on index and before we apply the table filters. CountAfterIndex float64 AccessConds []expression.Expression @@ -139,7 +143,8 @@ func (path *AccessPath) Clone() *AccessPath { ConstCols: slices.Clone(path.ConstCols), Ranges: CloneRanges(path.Ranges), CountAfterAccess: path.CountAfterAccess, - CorrCountAfterAccess: path.CorrCountAfterAccess, + MinCountAfterAccess: path.MinCountAfterAccess, + MaxCountAfterAccess: path.MaxCountAfterAccess, CountAfterIndex: path.CountAfterIndex, AccessConds: CloneExprs(path.AccessConds), EqCondCount: path.EqCondCount, diff --git a/pkg/statistics/statistics_test.go b/pkg/statistics/statistics_test.go index 3b7a924d47..507a271c19 100644 --- a/pkg/statistics/statistics_test.go +++ b/pkg/statistics/statistics_test.go @@ -393,51 +393,51 @@ func SubTestIndexRanges() func(*testing.T) { HighVal: []types.Datum{types.MaxValueDatum()}, Collators: collate.GetBinaryCollatorSlice(1), }} - count, _, err := GetRowCountByIndexRanges(ctx, &tbl.HistColl, 0, ran, nil) + count, _, _, err := GetRowCountByIndexRanges(ctx, &tbl.HistColl, 0, ran, nil) require.NoError(t, err) require.Equal(t, 99900, int(count)) ran[0].LowVal[0] = types.NewIntDatum(1000) ran[0].HighVal[0] = types.NewIntDatum(2000) - count, _, err = GetRowCountByIndexRanges(ctx, &tbl.HistColl, 0, ran, nil) + count, _, _, err = GetRowCountByIndexRanges(ctx, &tbl.HistColl, 0, ran, nil) require.NoError(t, err) require.Equal(t, 2500, int(count)) ran[0].LowVal[0] = types.NewIntDatum(1001) ran[0].HighVal[0] = types.NewIntDatum(1999) - count, _, err = GetRowCountByIndexRanges(ctx, &tbl.HistColl, 0, ran, nil) + count, _, _, err = GetRowCountByIndexRanges(ctx, &tbl.HistColl, 0, ran, nil) require.NoError(t, err) require.Equal(t, 2500, int(count)) ran[0].LowVal[0] = types.NewIntDatum(1000) ran[0].HighVal[0] = types.NewIntDatum(1000) - count, _, err = GetRowCountByIndexRanges(ctx, &tbl.HistColl, 0, ran, nil) + count, _, _, err = GetRowCountByIndexRanges(ctx, &tbl.HistColl, 0, ran, nil) require.NoError(t, err) require.Equal(t, 100, int(count)) tbl.SetIdx(0, &Index{Info: &model.IndexInfo{Columns: []*model.IndexColumn{{Offset: 0}}, Unique: true}}) ran[0].LowVal[0] = types.NewIntDatum(1000) ran[0].HighVal[0] = types.NewIntDatum(1000) - count, _, err = GetRowCountByIndexRanges(ctx, &tbl.HistColl, 0, ran, nil) + count, _, _, err = GetRowCountByIndexRanges(ctx, &tbl.HistColl, 0, ran, nil) require.NoError(t, err) require.Equal(t, 1, int(count)) tbl.SetIdx(0, idx) ran[0].LowVal[0] = types.MinNotNullDatum() ran[0].HighVal[0] = types.MaxValueDatum() - count, _, err = GetRowCountByIndexRanges(ctx, &tbl.HistColl, 0, ran, nil) + count, _, _, err = GetRowCountByIndexRanges(ctx, &tbl.HistColl, 0, ran, nil) require.NoError(t, err) require.Equal(t, 100000, int(count)) ran[0].LowVal[0] = types.NewIntDatum(1000) ran[0].HighVal[0] = types.NewIntDatum(2000) - count, _, err = GetRowCountByIndexRanges(ctx, &tbl.HistColl, 0, ran, nil) + count, _, _, err = GetRowCountByIndexRanges(ctx, &tbl.HistColl, 0, ran, nil) require.NoError(t, err) require.Equal(t, 1000, int(count)) ran[0].LowVal[0] = types.NewIntDatum(1001) ran[0].HighVal[0] = types.NewIntDatum(1990) - count, _, err = GetRowCountByIndexRanges(ctx, &tbl.HistColl, 0, ran, nil) + count, _, _, err = GetRowCountByIndexRanges(ctx, &tbl.HistColl, 0, ran, nil) require.NoError(t, err) require.Equal(t, 989, int(count)) ran[0].LowVal[0] = types.NewIntDatum(1000) ran[0].HighVal[0] = types.NewIntDatum(1000) - count, _, err = GetRowCountByIndexRanges(ctx, &tbl.HistColl, 0, ran, nil) + count, _, _, err = GetRowCountByIndexRanges(ctx, &tbl.HistColl, 0, ran, nil) require.NoError(t, err) require.Equal(t, 1, int(count)) } diff --git a/pkg/statistics/table.go b/pkg/statistics/table.go index a0f910f24b..288d8f94f3 100644 --- a/pkg/statistics/table.go +++ b/pkg/statistics/table.go @@ -50,7 +50,7 @@ var ( // Note: all functions below will be removed after finishing moving all estimation functions into the cardinality package. // GetRowCountByIndexRanges is a function type to get row count by index ranges. - GetRowCountByIndexRanges func(sctx planctx.PlanContext, coll *HistColl, idxID int64, indexRanges []*ranger.Range, idxCol []*expression.Column) (result float64, corrResult float64, err error) + GetRowCountByIndexRanges func(sctx planctx.PlanContext, coll *HistColl, idxID int64, indexRanges []*ranger.Range, idxCol []*expression.Column) (result float64, minResult float64, maxResult float64, err error) // GetRowCountByIntColumnRanges is a function type to get row count by int column ranges. GetRowCountByIntColumnRanges func(sctx planctx.PlanContext, coll *HistColl, colID int64, intRanges []*ranger.Range) (result float64, err error)