!6047 解决samplescan行数估算差距大,执行计划选择错误导致的性能问题
Merge pull request !6047 from laishenghao/samplescan
This commit is contained in:
@ -6230,14 +6230,14 @@ void finalize_dml_cost(ModifyTable* plan)
|
||||
* Parameters:
|
||||
* @in pctnode: node of percent args.
|
||||
*
|
||||
* Return: float4
|
||||
* Return: float8
|
||||
*/
|
||||
static float4 get_samplefract(Node* pctnode)
|
||||
static float8 get_samplefract(Node* pctnode)
|
||||
{
|
||||
float4 samplefract;
|
||||
float8 samplefract;
|
||||
|
||||
if (IsA(pctnode, Const) && !((Const*)pctnode)->constisnull) {
|
||||
samplefract = DatumGetFloat4(((Const*)pctnode)->constvalue);
|
||||
samplefract = DatumGetFloat8(((Const*)pctnode)->constvalue);
|
||||
if (samplefract >= 0.0 && samplefract <= 100.0 && !isnan(samplefract)) {
|
||||
samplefract /= 100.0f;
|
||||
} else {
|
||||
@ -6265,7 +6265,7 @@ static float4 get_samplefract(Node* pctnode)
|
||||
void system_samplescangetsamplesize(PlannerInfo* root, RelOptInfo* baserel, List* paramexprs)
|
||||
{
|
||||
Node* pctnode = NULL;
|
||||
float4 samplefract;
|
||||
float8 samplefract;
|
||||
|
||||
/* Try to extract an estimate for the sample percentage */
|
||||
pctnode = (Node*)linitial(paramexprs);
|
||||
@ -6292,7 +6292,7 @@ void system_samplescangetsamplesize(PlannerInfo* root, RelOptInfo* baserel, List
|
||||
void bernoulli_samplescangetsamplesize(PlannerInfo* root, RelOptInfo* baserel, List* paramexprs)
|
||||
{
|
||||
Node* pctnode = NULL;
|
||||
float4 samplefract;
|
||||
float8 samplefract;
|
||||
|
||||
/* Try to extract an estimate for the sample percentage */
|
||||
pctnode = (Node*)linitial(paramexprs);
|
||||
@ -6325,7 +6325,7 @@ void hybrid_samplescangetsamplesize(PlannerInfo* root, RelOptInfo* baserel, List
|
||||
foreach (lc, paramexprs) {
|
||||
Node* paramnode = (Node*)lfirst(lc);
|
||||
Node* pctnode = estimate_expression_value(root, paramnode);
|
||||
float4 samplefract = 0.0;
|
||||
float8 samplefract = 0.0;
|
||||
if (likely(pctnode)) {
|
||||
samplefract = get_samplefract(pctnode);
|
||||
} else {
|
||||
|
||||
@ -20,15 +20,16 @@ select count(*) from ((select * from test_tablesample tablesample SYSTEM(20) REP
|
||||
(1 row)
|
||||
|
||||
explain (costs off) select * from test_tablesample tablesample BERNOULLI(50) REPEATABLE (200) left join test_tablesample2 tablesample BERNOULLI(50) REPEATABLE (200) on test_tablesample.id=test_tablesample2.id where test_tablesample2.id is NULL;
|
||||
QUERY PLAN
|
||||
---------------------------------------------------------------------------------------
|
||||
Nested Loop Anti Join
|
||||
Join Filter: (test_tablesample.id = test_tablesample2.id)
|
||||
QUERY PLAN
|
||||
---------------------------------------------------------------------------------------------
|
||||
Hash Anti Join
|
||||
Hash Cond: (test_tablesample.id = test_tablesample2.id)
|
||||
-> Sample Scan on test_tablesample
|
||||
Sampling: bernoulli (50::double precision) REPEATABLE (200::double precision)
|
||||
-> Sample Scan on test_tablesample2
|
||||
Sampling: bernoulli (50::double precision) REPEATABLE (200::double precision)
|
||||
(6 rows)
|
||||
-> Hash
|
||||
-> Sample Scan on test_tablesample2
|
||||
Sampling: bernoulli (50::double precision) REPEATABLE (200::double precision)
|
||||
(7 rows)
|
||||
|
||||
select * from test_tablesample tablesample BERNOULLI(50) REPEATABLE (200) left join test_tablesample2 tablesample BERNOULLI(50) REPEATABLE (200) on test_tablesample.id=test_tablesample2.id where test_tablesample2.id is NULL;
|
||||
id | name | salary | id | name | salary
|
||||
@ -37,15 +38,19 @@ select * from test_tablesample tablesample BERNOULLI(50) REPEATABLE (200) left j
|
||||
|
||||
set enable_hashjoin to off;
|
||||
explain (costs off) select * from test_tablesample tablesample BERNOULLI(50) REPEATABLE (200) left join test_tablesample2 tablesample BERNOULLI(50) REPEATABLE (200) on test_tablesample.id=test_tablesample2.id where test_tablesample2.id is NULL;
|
||||
QUERY PLAN
|
||||
---------------------------------------------------------------------------------------
|
||||
Nested Loop Anti Join
|
||||
Join Filter: (test_tablesample.id = test_tablesample2.id)
|
||||
-> Sample Scan on test_tablesample
|
||||
Sampling: bernoulli (50::double precision) REPEATABLE (200::double precision)
|
||||
-> Sample Scan on test_tablesample2
|
||||
Sampling: bernoulli (50::double precision) REPEATABLE (200::double precision)
|
||||
(6 rows)
|
||||
QUERY PLAN
|
||||
---------------------------------------------------------------------------------------------
|
||||
Merge Anti Join
|
||||
Merge Cond: (test_tablesample.id = test_tablesample2.id)
|
||||
-> Sort
|
||||
Sort Key: test_tablesample.id
|
||||
-> Sample Scan on test_tablesample
|
||||
Sampling: bernoulli (50::double precision) REPEATABLE (200::double precision)
|
||||
-> Sort
|
||||
Sort Key: test_tablesample2.id
|
||||
-> Sample Scan on test_tablesample2
|
||||
Sampling: bernoulli (50::double precision) REPEATABLE (200::double precision)
|
||||
(10 rows)
|
||||
|
||||
select * from test_tablesample tablesample BERNOULLI(50) REPEATABLE (200) left join test_tablesample2 tablesample BERNOULLI(50) REPEATABLE (200) on test_tablesample.id=test_tablesample2.id where test_tablesample2.id is NULL;
|
||||
id | name | salary | id | name | salary
|
||||
@ -54,15 +59,16 @@ select * from test_tablesample tablesample BERNOULLI(50) REPEATABLE (200) left j
|
||||
|
||||
set enable_mergejoin to off;
|
||||
explain (costs off) select * from test_tablesample tablesample BERNOULLI(50) REPEATABLE (200) left join test_tablesample2 tablesample BERNOULLI(50) REPEATABLE (200) on test_tablesample.id=test_tablesample2.id where test_tablesample2.id is NULL;
|
||||
QUERY PLAN
|
||||
---------------------------------------------------------------------------------------
|
||||
QUERY PLAN
|
||||
---------------------------------------------------------------------------------------------
|
||||
Nested Loop Anti Join
|
||||
Join Filter: (test_tablesample.id = test_tablesample2.id)
|
||||
-> Sample Scan on test_tablesample
|
||||
Sampling: bernoulli (50::double precision) REPEATABLE (200::double precision)
|
||||
-> Sample Scan on test_tablesample2
|
||||
Sampling: bernoulli (50::double precision) REPEATABLE (200::double precision)
|
||||
(6 rows)
|
||||
-> Materialize
|
||||
-> Sample Scan on test_tablesample2
|
||||
Sampling: bernoulli (50::double precision) REPEATABLE (200::double precision)
|
||||
(7 rows)
|
||||
|
||||
select * from test_tablesample tablesample BERNOULLI(50) REPEATABLE (200) left join test_tablesample2 tablesample BERNOULLI(50) REPEATABLE (200) on test_tablesample.id=test_tablesample2.id where test_tablesample2.id is NULL;
|
||||
id | name | salary | id | name | salary
|
||||
@ -315,8 +321,61 @@ select * from test_tablesample tablesample SYSTEM(50) REPEATABLE (200) left join
|
||||
----+------+--------+----+------+--------
|
||||
(0 rows)
|
||||
|
||||
-- test rows estimation of samplescan
|
||||
set enable_hashjoin to on;
|
||||
create table ss_rows_t1 (a int);
|
||||
create table ss_rows_t2 (b int);
|
||||
create table ss_rows_t3 (c int);
|
||||
insert into ss_rows_t1 values (generate_series(1, 20000));
|
||||
insert into ss_rows_t2 values (generate_series(1, 20000));
|
||||
insert into ss_rows_t3 values (generate_series(1, 20000));
|
||||
explain select a from ss_rows_t1 tablesample system (99.999999);
|
||||
--?.*
|
||||
--?.*
|
||||
--? Sample Scan on ss_rows_t1 (cost=.* rows=\d{5} width=4)
|
||||
Sampling: system (99.999999::double precision)
|
||||
(2 rows)
|
||||
|
||||
explain select b from ss_rows_t2 tablesample bernoulli (99.999999);
|
||||
--?.*
|
||||
--?.*
|
||||
--? Sample Scan on ss_rows_t2 (cost=.* rows=\d{5} width=4)
|
||||
Sampling: bernoulli (99.999999::double precision)
|
||||
(2 rows)
|
||||
|
||||
explain select c from ss_rows_t3 tablesample hybrid (99.999999,99.999999);
|
||||
--?.*
|
||||
--?.*
|
||||
--? Sample Scan on ss_rows_t3 (cost=.* rows=\d{5} width=4)
|
||||
Sampling: hybrid (99.999999::double precision, 99.999999::double precision)
|
||||
(2 rows)
|
||||
|
||||
explain select a, b, c from
|
||||
ss_rows_t1 tablesample system (99.999999) repeatable (325),
|
||||
ss_rows_t2 tablesample bernoulli (99.999999) repeatable (0),
|
||||
ss_rows_t3 tablesample hybrid (99.999999,99.999999) repeatable (510)
|
||||
where a = b and b = c;
|
||||
--?.*
|
||||
--?.*
|
||||
--? Hash Join (.* width=12)
|
||||
Hash Cond: (ss_rows_t1.a = ss_rows_t3.c)
|
||||
--? -> Hash Join (.* width=8)
|
||||
Hash Cond: (ss_rows_t1.a = ss_rows_t2.b)
|
||||
--? -> Sample Scan on ss_rows_t1 (cost=.* rows=\d{5} width=4)
|
||||
Sampling: system (99.999999::double precision) REPEATABLE (325::double precision)
|
||||
--? -> Hash (cost=.* rows=\d{5} width=4)
|
||||
--? -> Sample Scan on ss_rows_t2 (cost=.* rows=\d{5} width=4)
|
||||
Sampling: bernoulli (99.999999::double precision) REPEATABLE (0::double precision)
|
||||
--? -> Hash (cost=.* rows=\d{5} width=4)
|
||||
--? -> Sample Scan on ss_rows_t3 (cost=.* rows=\d{5} width=4)
|
||||
Sampling: hybrid (99.999999::double precision, 99.999999::double precision) REPEATABLE (510::double precision)
|
||||
(12 rows)
|
||||
|
||||
reset search_path;
|
||||
drop schema tablesample_schema4 cascade;
|
||||
NOTICE: drop cascades to 2 other objects
|
||||
NOTICE: drop cascades to 5 other objects
|
||||
DETAIL: drop cascades to table tablesample_schema4.test_tablesample
|
||||
drop cascades to table tablesample_schema4.test_tablesample2
|
||||
drop cascades to table tablesample_schema4.ss_rows_t1
|
||||
drop cascades to table tablesample_schema4.ss_rows_t2
|
||||
drop cascades to table tablesample_schema4.ss_rows_t3
|
||||
|
||||
@ -95,5 +95,24 @@ select * from test_tablesample tablesample BERNOULLI(50) REPEATABLE (200) left j
|
||||
explain (costs off) select * from test_tablesample tablesample SYSTEM(50) REPEATABLE (200) left join test_tablesample2 tablesample SYSTEM(50) REPEATABLE (200) on test_tablesample.id=test_tablesample2.id where test_tablesample2.id is NULL;
|
||||
select * from test_tablesample tablesample SYSTEM(50) REPEATABLE (200) left join test_tablesample2 tablesample SYSTEM(50) REPEATABLE (200) on test_tablesample.id=test_tablesample2.id where test_tablesample2.id is NULL;
|
||||
|
||||
-- test rows estimation of samplescan
|
||||
set enable_hashjoin to on;
|
||||
create table ss_rows_t1 (a int);
|
||||
create table ss_rows_t2 (b int);
|
||||
create table ss_rows_t3 (c int);
|
||||
insert into ss_rows_t1 values (generate_series(1, 20000));
|
||||
insert into ss_rows_t2 values (generate_series(1, 20000));
|
||||
insert into ss_rows_t3 values (generate_series(1, 20000));
|
||||
|
||||
explain select a from ss_rows_t1 tablesample system (99.999999);
|
||||
explain select b from ss_rows_t2 tablesample bernoulli (99.999999);
|
||||
explain select c from ss_rows_t3 tablesample hybrid (99.999999,99.999999);
|
||||
|
||||
explain select a, b, c from
|
||||
ss_rows_t1 tablesample system (99.999999) repeatable (325),
|
||||
ss_rows_t2 tablesample bernoulli (99.999999) repeatable (0),
|
||||
ss_rows_t3 tablesample hybrid (99.999999,99.999999) repeatable (510)
|
||||
where a = b and b = c;
|
||||
|
||||
reset search_path;
|
||||
drop schema tablesample_schema4 cascade;
|
||||
|
||||
Reference in New Issue
Block a user