planner: add tolerance for row count floating point comparison (#59241)

close pingcap/tidb#59133
This commit is contained in:
tpp
2025-02-05 22:39:49 -08:00
committed by GitHub
parent fc8bdb54c6
commit 430fa8fc6a
6 changed files with 20 additions and 11 deletions

View File

@ -391,8 +391,8 @@
"SQL": "SELECT 1 FROM t_inlist_test FORCE INDEX (twoColIndex) WHERE a1 IN (44, 70, 76) AND (a1 > 70 OR (a1 = 70 AND b1 > 41));",
"Plan": [
"Projection 43.33 root 1->Column#5",
"└─IndexReader 54.17 root index:IndexRangeScan",
" └─IndexRangeScan 54.17 cop[tikv] table:t_inlist_test, index:twoColIndex(a1, b1) range:(70 41,70 +inf], [76,76], keep order:false, stats:pseudo"
"└─IndexReader 43.33 root index:IndexRangeScan",
" └─IndexRangeScan 43.33 cop[tikv] table:t_inlist_test, index:twoColIndex(a1, b1) range:(70 41,70 +inf], [76,76], keep order:false, stats:pseudo"
],
"Result": null
},

View File

@ -428,8 +428,8 @@
"Plan": [
" TableReader root ",
" └─ExchangeSender cop[tiflash] ",
" └─Selection cop[tiflash] gt(test.t1.c, ?)",
" └─TableFullScan cop[tiflash] table:t1, range:[?,?], pushed down filter:gt(test.t1.a, ?), gt(test.t1.b, ?), keep order:false"
" └─Selection cop[tiflash] gt(test.t1.a, ?)",
" └─TableFullScan cop[tiflash] table:t1, range:[?,?], pushed down filter:gt(test.t1.b, ?), gt(test.t1.c, ?), keep order:false"
]
},
{

View File

@ -24,6 +24,10 @@ const (
SelectionFactor = 0.8
DistinctFactor = 0.8
// ToleranceFactor is an arbitrary value used in (some) floating point
// comparisons to account for precision errors
ToleranceFactor = 0.00001
)
// AggFuncFactor is the basic factor for aggregation.

View File

@ -1704,7 +1704,8 @@ func convertToIndexMergeScan(ds *logicalop.DataSource, prop *property.PhysicalPr
scans = append(scans, scan)
}
totalRowCount := path.CountAfterAccess
if prop.ExpectedCnt < ds.StatsInfo().RowCount {
// Add an arbitrary tolerance factor to account for comparison with floating point
if (prop.ExpectedCnt + cost.ToleranceFactor) < ds.StatsInfo().RowCount {
totalRowCount *= prop.ExpectedCnt / ds.StatsInfo().RowCount
}
ts, remainingFilters2, moreColumn, err := buildIndexMergeTableScan(ds, path.TableFilters, totalRowCount, candidate.isMatchProp)
@ -2948,7 +2949,8 @@ func getOriginalPhysicalTableScan(ds *logicalop.DataSource, prop *property.Physi
}.Init(ds.SCtx(), ds.QueryBlockOffset())
ts.SetSchema(ds.Schema().Clone())
rowCount := path.CountAfterAccess
if prop.ExpectedCnt < ds.StatsInfo().RowCount {
// Add an arbitrary tolerance factor to account for comparison with floating point
if (prop.ExpectedCnt + cost.ToleranceFactor) < ds.StatsInfo().RowCount {
rowCount = cardinality.AdjustRowCountForTableScanByLimit(ds.SCtx(),
ds.StatsInfo(), ds.TableStats, ds.StatisticTable,
path, prop.ExpectedCnt, isMatchProp && prop.SortItems[0].Desc)

View File

@ -233,7 +233,8 @@ func deriveIndexPathStats(ds *logicalop.DataSource, path *util.AccessPath, _ []e
path.IndexFilters = append(path.IndexFilters, indexFilters...)
// If the `CountAfterAccess` is less than `stats.RowCount`, there must be some inconsistent stats info.
// We prefer the `stats.RowCount` because it could use more stats info to calculate the selectivity.
if path.CountAfterAccess < ds.StatsInfo().RowCount && !isIm {
// Add an arbitrary tolerance factor to account for comparison with floating point
if (path.CountAfterAccess+cost.ToleranceFactor) < ds.StatsInfo().RowCount && !isIm {
path.CountAfterAccess = math.Min(ds.StatsInfo().RowCount/cost.SelectionFactor, float64(ds.StatisticTable.RealtimeCount))
}
if path.IndexFilters != nil {
@ -332,7 +333,8 @@ func deriveTablePathStats(ds *logicalop.DataSource, path *util.AccessPath, conds
path.CountAfterAccess, err = cardinality.GetRowCountByIntColumnRanges(ds.SCtx(), &ds.StatisticTable.HistColl, pkCol.ID, path.Ranges)
// If the `CountAfterAccess` is less than `stats.RowCount`, there must be some inconsistent stats info.
// We prefer the `stats.RowCount` because it could use more stats info to calculate the selectivity.
if path.CountAfterAccess < ds.StatsInfo().RowCount && !isIm {
// Add an arbitrary tolerance factor to account for comparison with floating point
if (path.CountAfterAccess+cost.ToleranceFactor) < ds.StatsInfo().RowCount && !isIm {
path.CountAfterAccess = math.Min(ds.StatsInfo().RowCount/cost.SelectionFactor, float64(ds.StatisticTable.RealtimeCount))
}
return err
@ -370,7 +372,8 @@ func deriveCommonHandleTablePathStats(ds *logicalop.DataSource, path *util.Acces
}
// If the `CountAfterAccess` is less than `stats.RowCount`, there must be some inconsistent stats info.
// We prefer the `stats.RowCount` because it could use more stats info to calculate the selectivity.
if path.CountAfterAccess < ds.StatsInfo().RowCount && !isIm {
// Add an arbitrary tolerance factor to account for comparison with floating point
if (path.CountAfterAccess+cost.ToleranceFactor) < ds.StatsInfo().RowCount && !isIm {
path.CountAfterAccess = math.Min(ds.StatsInfo().RowCount/cost.SelectionFactor, float64(ds.StatisticTable.RealtimeCount))
}
return nil

View File

@ -109,11 +109,11 @@ id estRows task access object operator info
StreamAgg_17 1.00 root funcs:count(Column#8)->Column#6
└─IndexReader_18 1.00 root index:StreamAgg_9
└─StreamAgg_9 1.00 cop[tikv] funcs:count(1)->Column#8
└─IndexRangeScan_16 133.89 cop[tikv] table:tbl_0, index:idx_3(col_0) range:[803163,+inf], keep order:false
└─IndexRangeScan_16 107.12 cop[tikv] table:tbl_0, index:idx_3(col_0) range:[803163,+inf], keep order:false
explain select count(*) from wout_cluster_index.tbl_0 where col_0 >= 803163 ;
id estRows task access object operator info
StreamAgg_17 1.00 root funcs:count(Column#9)->Column#7
└─IndexReader_18 1.00 root index:StreamAgg_9
└─StreamAgg_9 1.00 cop[tikv] funcs:count(1)->Column#9
└─IndexRangeScan_16 133.89 cop[tikv] table:tbl_0, index:idx_3(col_0) range:[803163,+inf], keep order:false
└─IndexRangeScan_16 107.12 cop[tikv] table:tbl_0, index:idx_3(col_0) range:[803163,+inf], keep order:false
set @@tidb_enable_outer_join_reorder=false;