planner: set an lower-bound for NDV used in out-of-range estimation for EQ conditions when the Histogram is empty (#64139)
close pingcap/tidb#64137
This commit is contained in:
@ -64,7 +64,7 @@ go_test(
|
||||
data = glob(["testdata/**"]),
|
||||
embed = [":cardinality"],
|
||||
flaky = True,
|
||||
shard_count = 44,
|
||||
shard_count = 45,
|
||||
deps = [
|
||||
"//pkg/config",
|
||||
"//pkg/config/kerneltype",
|
||||
|
||||
@ -429,8 +429,8 @@ func estimateRowCountWithUniformDistribution(
|
||||
increaseFactor := stats.GetIncreaseFactor(realtimeRowCount)
|
||||
notNullCount := histogram.NotNullCount()
|
||||
|
||||
// Branch 1: all NDV's are in TopN, and no histograms.
|
||||
if histNDV <= 0 || notNullCount == 0 {
|
||||
var avgRowEstimate float64
|
||||
if histNDV <= 0 || notNullCount == 0 { // Branch 1: all NDV's are in TopN, and no histograms.
|
||||
// We have no histograms, but c.Histogram.NDV > c.TopN.Num().
|
||||
// This can happen when sampling collects fewer than all NDV.
|
||||
if histNDV > 0 && modifyCount == 0 {
|
||||
@ -441,12 +441,11 @@ func estimateRowCountWithUniformDistribution(
|
||||
if notNullCount <= 0 {
|
||||
notNullCount = totalRowCount - float64(histogram.NullCount)
|
||||
}
|
||||
outOfRangeCnt := outOfRangeFullNDV(float64(histogram.NDV), totalRowCount, notNullCount, float64(realtimeRowCount), increaseFactor, modifyCount)
|
||||
return statistics.DefaultRowEst(outOfRangeCnt)
|
||||
avgRowEstimate = outOfRangeFullNDV(float64(histogram.NDV), totalRowCount, notNullCount, float64(realtimeRowCount), increaseFactor, modifyCount)
|
||||
} else { // Branch 2: some NDV's are in histograms
|
||||
// Calculate the average histogram rows (which excludes topN) and NDV that excluded topN
|
||||
avgRowEstimate = notNullCount / histNDV
|
||||
}
|
||||
// branch 2: some NDV's are in histograms
|
||||
// Calculate the average histogram rows (which excludes topN) and NDV that excluded topN
|
||||
avgRowEstimate := notNullCount / histNDV
|
||||
|
||||
// skewRatio determines how much of the potential skew should be considered
|
||||
skewRatio := sctx.GetSessionVars().RiskEqSkewRatio
|
||||
|
||||
@ -1173,6 +1173,7 @@ func outOfRangeEQSelectivity(sctx planctx.PlanContext, ndv, realtimeRowCount, co
|
||||
// outOfRangeFullNDV estimates the number of qualified rows when the topN represents all NDV values
|
||||
// and the searched value does not appear in the topN
|
||||
func outOfRangeFullNDV(ndv, origRowCount, notNullCount, realtimeRowCount, increaseFactor float64, modifyCount int64) (result float64) {
|
||||
// TODO: align or merge this out-of-range-est methods with `Histogram.OutOfRangeRowCount`.
|
||||
// If the table hasn't been modified, it's safe to return 0.
|
||||
if modifyCount == 0 {
|
||||
return 0
|
||||
@ -1189,7 +1190,7 @@ func outOfRangeFullNDV(ndv, origRowCount, notNullCount, realtimeRowCount, increa
|
||||
if newRows < 0 {
|
||||
newRows = min(notNullCount, realtimeRowCount)
|
||||
}
|
||||
// if no NDV - derive an NDV using sqrt
|
||||
// if no NDV - derive an NDV using sqrt, this could happen for unanalyzed tables
|
||||
if ndv <= 0 {
|
||||
ndv = math.Sqrt(max(notNullCount, realtimeRowCount))
|
||||
} else {
|
||||
@ -1197,6 +1198,13 @@ func outOfRangeFullNDV(ndv, origRowCount, notNullCount, realtimeRowCount, increa
|
||||
// the caller of the function
|
||||
ndv *= increaseFactor
|
||||
}
|
||||
// If topN represents all NDV values, the NDV should be relatively small.
|
||||
// Small NDV could cause extremely inaccurate result, use `outOfRangeBetweenRate` to smooth the result.
|
||||
// For example, TopN = {(value:1, rows: 10000), (2, 10000), (3, 10000)} and newRows = 15000, we should assume most
|
||||
// newly added rows are 1, 2 or 3. Then for an out-of-range estimation like `where col=9999`, the result should be
|
||||
// close to 0, but if we still use the original NDV, the result could be extremely large: 15000/3 = 5000.
|
||||
// See #64137 for a concrete example.
|
||||
ndv = max(ndv, float64(outOfRangeBetweenRate)) // avoid inaccurate estimate caused by small NDV
|
||||
return max(1, newRows/ndv)
|
||||
}
|
||||
|
||||
|
||||
@ -574,8 +574,8 @@ func TestEstimationForUnknownValuesAfterModify(t *testing.T) {
|
||||
countEst, err = cardinality.GetColumnRowCount(sctx, col, getRange(15, 15), statsTblNew.RealtimeCount, statsTblNew.ModifyCount, false)
|
||||
count = countEst.Est
|
||||
require.NoError(t, err)
|
||||
require.Truef(t, count < 40, "expected: between 20 to 40, got: %v", count)
|
||||
require.Truef(t, count > 20, "expected: between 20 to 40, got: %v", count)
|
||||
require.Truef(t, count < 40, "expected: between 10 to 40, got: %v", count)
|
||||
require.Truef(t, count > 10, "expected: between 10 to 40, got: %v", count)
|
||||
}
|
||||
|
||||
func TestNewIndexWithoutStats(t *testing.T) {
|
||||
@ -2094,6 +2094,37 @@ func TestLastBucketEndValueHeuristic(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestIssue64137(t *testing.T) {
|
||||
store, dom := testkit.CreateMockStoreAndDomain(t)
|
||||
h := dom.StatsHandle()
|
||||
tk := testkit.NewTestKit(t, store)
|
||||
tk.MustExec(`use test`)
|
||||
tk.MustExec(`create table t (a int, key(a))`)
|
||||
tk.MustExec(`set @@cte_max_recursion_depth=10000`)
|
||||
tk.MustExec(`insert into t select * from (with recursive cte as (
|
||||
select 1 as a, 1 as num union all
|
||||
select 1 as a, num+1 as num from cte where num < 10000
|
||||
) select a from cte) tt;`) // insert 10000 rows with a=1
|
||||
require.NoError(t, h.DumpStatsDeltaToKV(true))
|
||||
tk.MustQuery(`select count(1) from t`).Check(testkit.Rows("10000"))
|
||||
tk.MustExec(`analyze table t`)
|
||||
tk.MustQuery(`show stats_topn where is_index=1`).Check(testkit.Rows("test t a 1 1 10000")) // 1 topN value with count 10000
|
||||
|
||||
tk.MustExec(`insert into t select * from t limit 2000`) // insert 2000 rows with a=1
|
||||
require.NoError(t, h.DumpStatsDeltaToKV(true))
|
||||
h.Update(context.Background(), dom.InfoSchema())
|
||||
statsMeta := tk.MustQuery(`show stats_meta`).Rows()[0]
|
||||
require.Equal(t, statsMeta[4], "2000") // modify_count = 2000
|
||||
require.Equal(t, statsMeta[5], "12000") // row_count = 10000+2000
|
||||
|
||||
tk.MustQuery(`explain select * from t where a=99999999`).Check(testkit.Rows(
|
||||
`IndexReader_7 24.00 root index:IndexRangeScan_6`, // out-of-range est for small NDV, result should close to zero
|
||||
`└─IndexRangeScan_6 24.00 cop[tikv] table:t, index:a(a) range:[99999999,99999999], keep order:false`))
|
||||
tk.MustQuery(`explain select * from t where a=1`).Check(testkit.Rows(
|
||||
`IndexReader_7 12000.00 root index:IndexRangeScan_6`, // in-range est for small NDV
|
||||
`└─IndexRangeScan_6 12000.00 cop[tikv] table:t, index:a(a) range:[1,1], keep order:false`))
|
||||
}
|
||||
|
||||
func TestUninitializedStats(t *testing.T) {
|
||||
store, _ := testkit.CreateMockStoreAndDomain(t)
|
||||
tk := testkit.NewTestKit(t, store)
|
||||
|
||||
@ -526,12 +526,14 @@
|
||||
"SQL": "EXPLAIN format = 'verbose' SELECT * FROM `tbl_cardcore_transaction` `transactio0_` WHERE `transactio0_`.`period` = '202502' AND `transactio0_`.`account_number` = '1901040107462200' ORDER BY `transactio0_`.`transaction_status`, `transactio0_`.`account_number`, `transactio0_`.`entry_date` ASC, `transactio0_`.`id` ASC;",
|
||||
"Plan": [
|
||||
"Sort_5 1.00 39640.65 root cardcore_issuing.tbl_cardcore_transaction.transaction_status, cardcore_issuing.tbl_cardcore_transaction.account_number, cardcore_issuing.tbl_cardcore_transaction.entry_date, cardcore_issuing.tbl_cardcore_transaction.id",
|
||||
"└─IndexLookUp_15 1.00 39619.45 root ",
|
||||
" ├─IndexRangeScan_12(Build) 16.16 4094.52 cop[tikv] table:transactio0_, index:tbl_cardcore_transaction_ix10(account_number, entry_date, value_date) range:[\"1901040107462200\",\"1901040107462200\"], keep order:false",
|
||||
" └─Selection_14(Probe) 1.00 5431.27 cop[tikv] eq(cardcore_issuing.tbl_cardcore_transaction.period, \"202502\")",
|
||||
" └─TableRowIDScan_13 16.16 4624.68 cop[tikv] table:transactio0_ keep order:false"
|
||||
"└─IndexLookUp_12 1.00 39619.45 root ",
|
||||
" ├─IndexRangeScan_9(Build) 16.16 4094.52 cop[tikv] table:transactio0_, index:tbl_cardcore_transaction_ix10(account_number, entry_date, value_date) range:[\"1901040107462200\",\"1901040107462200\"], keep order:false",
|
||||
" └─Selection_11(Probe) 1.00 5431.27 cop[tikv] eq(cardcore_issuing.tbl_cardcore_transaction.period, \"202502\")",
|
||||
" └─TableRowIDScan_10 16.16 4624.68 cop[tikv] table:transactio0_ keep order:false"
|
||||
],
|
||||
"Warn": null
|
||||
"Warn": [
|
||||
"Note 1105 [tbl_cardcore_transaction_ix10,tbl_cardcore_transaction_ix17] remain after pruning paths for transactio0_ given Prop{SortItems: [], TaskTp: rootTask}"
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
@ -576,9 +578,9 @@
|
||||
{
|
||||
"SQL": "explain select * from tbl_cardcore_statement s where s.latest_stmt_print_date = '2024-10-16';",
|
||||
"Plan": [
|
||||
"IndexLookUp_11 169960.89 root ",
|
||||
"├─IndexRangeScan_9(Build) 169960.89 cop[tikv] table:s, index:tbl_cardcore_statement_ix7(latest_stmt_print_date) range:[2024-10-16,2024-10-16], keep order:false",
|
||||
"└─TableRowIDScan_10(Probe) 169960.89 cop[tikv] table:s keep order:false"
|
||||
"IndexLookUp_11 53778.89 root ",
|
||||
"├─IndexRangeScan_9(Build) 53778.89 cop[tikv] table:s, index:tbl_cardcore_statement_ix7(latest_stmt_print_date) range:[2024-10-16,2024-10-16], keep order:false",
|
||||
"└─TableRowIDScan_10(Probe) 53778.89 cop[tikv] table:s keep order:false"
|
||||
],
|
||||
"Warn": null
|
||||
}
|
||||
|
||||
@ -526,12 +526,14 @@
|
||||
"SQL": "EXPLAIN format = 'verbose' SELECT * FROM `tbl_cardcore_transaction` `transactio0_` WHERE `transactio0_`.`period` = '202502' AND `transactio0_`.`account_number` = '1901040107462200' ORDER BY `transactio0_`.`transaction_status`, `transactio0_`.`account_number`, `transactio0_`.`entry_date` ASC, `transactio0_`.`id` ASC;",
|
||||
"Plan": [
|
||||
"Sort_5 1.00 39640.65 root cardcore_issuing.tbl_cardcore_transaction.transaction_status, cardcore_issuing.tbl_cardcore_transaction.account_number, cardcore_issuing.tbl_cardcore_transaction.entry_date, cardcore_issuing.tbl_cardcore_transaction.id",
|
||||
"└─IndexLookUp_15 1.00 39619.45 root ",
|
||||
" ├─IndexRangeScan_12(Build) 16.16 4094.52 cop[tikv] table:transactio0_, index:tbl_cardcore_transaction_ix10(account_number, entry_date, value_date) range:[\"1901040107462200\",\"1901040107462200\"], keep order:false",
|
||||
" └─Selection_14(Probe) 1.00 5431.27 cop[tikv] eq(cardcore_issuing.tbl_cardcore_transaction.period, \"202502\")",
|
||||
" └─TableRowIDScan_13 16.16 4624.68 cop[tikv] table:transactio0_ keep order:false"
|
||||
"└─IndexLookUp_12 1.00 39619.45 root ",
|
||||
" ├─IndexRangeScan_9(Build) 16.16 4094.52 cop[tikv] table:transactio0_, index:tbl_cardcore_transaction_ix10(account_number, entry_date, value_date) range:[\"1901040107462200\",\"1901040107462200\"], keep order:false",
|
||||
" └─Selection_11(Probe) 1.00 5431.27 cop[tikv] eq(cardcore_issuing.tbl_cardcore_transaction.period, \"202502\")",
|
||||
" └─TableRowIDScan_10 16.16 4624.68 cop[tikv] table:transactio0_ keep order:false"
|
||||
],
|
||||
"Warn": null
|
||||
"Warn": [
|
||||
"Note 1105 [tbl_cardcore_transaction_ix10,tbl_cardcore_transaction_ix17] remain after pruning paths for transactio0_ given Prop{SortItems: [], TaskTp: rootTask}"
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
@ -576,9 +578,9 @@
|
||||
{
|
||||
"SQL": "explain select * from tbl_cardcore_statement s where s.latest_stmt_print_date = '2024-10-16';",
|
||||
"Plan": [
|
||||
"IndexLookUp_11 169960.89 root ",
|
||||
"├─IndexRangeScan_9(Build) 169960.89 cop[tikv] table:s, index:tbl_cardcore_statement_ix7(latest_stmt_print_date) range:[2024-10-16,2024-10-16], keep order:false",
|
||||
"└─TableRowIDScan_10(Probe) 169960.89 cop[tikv] table:s keep order:false"
|
||||
"IndexLookUp_11 53778.89 root ",
|
||||
"├─IndexRangeScan_9(Build) 53778.89 cop[tikv] table:s, index:tbl_cardcore_statement_ix7(latest_stmt_print_date) range:[2024-10-16,2024-10-16], keep order:false",
|
||||
"└─TableRowIDScan_10(Probe) 53778.89 cop[tikv] table:s keep order:false"
|
||||
],
|
||||
"Warn": null
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user