From 8865e5fc822d130fc8bac21422af52945240051e Mon Sep 17 00:00:00 2001 From: totaj Date: Mon, 28 Nov 2022 21:18:07 +0800 Subject: [PATCH] Fix sample scan bug. --- src/common/port/erand48.cpp | 23 +- .../runtime/executor/nodeSamplescan.cpp | 20 +- src/include/executor/node/nodeSamplescan.h | 1 + src/include/port.h | 5 +- src/test/regress/expected/tablesample_3.out | 328 ++++++++++++++++++ src/test/regress/parallel_schedule0 | 2 +- src/test/regress/sql/tablesample_3.sql | 99 ++++++ 7 files changed, 458 insertions(+), 20 deletions(-) create mode 100755 src/test/regress/expected/tablesample_3.out create mode 100755 src/test/regress/sql/tablesample_3.sql diff --git a/src/common/port/erand48.cpp b/src/common/port/erand48.cpp index e13380d75..54466e7ef 100644 --- a/src/common/port/erand48.cpp +++ b/src/common/port/erand48.cpp @@ -69,17 +69,19 @@ double pg_erand48(unsigned short xseed[3]) return ldexp((double)xseed[0], -48) + ldexp((double)xseed[1], -32) + ldexp((double)xseed[2], -16); } -long pg_lrand48(void) +long pg_lrand48(unsigned short rand48_seed[3]) { - _dorand48(_rand48_seed); - return ((long)_rand48_seed[2] << 15) + ((long)_rand48_seed[1] >> 1); + unsigned short *rand_seed = rand48_seed == NULL ? _rand48_seed : rand48_seed; + _dorand48(rand_seed); + return ((long)rand_seed[2] << 15) + ((long)rand_seed[1] >> 1); } -void pg_srand48(long seed) +void pg_srand48(long seed, unsigned short rand48_seed[3]) { - _rand48_seed[0] = RAND48_SEED_0; - _rand48_seed[1] = (unsigned short)seed; - _rand48_seed[2] = (unsigned short)((unsigned long)seed >> 16); + unsigned short *rand_seed = rand48_seed == NULL ? _rand48_seed : rand48_seed; + rand_seed[0] = RAND48_SEED_0; + rand_seed[1] = (unsigned short)seed; + rand_seed[2] = (unsigned short)((unsigned long)seed >> 16); _rand48_mult[0] = RAND48_MULT_0; _rand48_mult[1] = RAND48_MULT_1; _rand48_mult[2] = RAND48_MULT_2; @@ -97,3 +99,10 @@ unsigned short* pg_get_srand48() { return _rand48_seed; } + +void pg_srand48_default(unsigned short rand48_seed[3]) +{ + rand48_seed[0] = RAND48_SEED_0; + rand48_seed[1] = RAND48_SEED_1; + rand48_seed[2] = RAND48_SEED_2; +} diff --git a/src/gausskernel/runtime/executor/nodeSamplescan.cpp b/src/gausskernel/runtime/executor/nodeSamplescan.cpp index e3db6c80f..f696f017e 100644 --- a/src/gausskernel/runtime/executor/nodeSamplescan.cpp +++ b/src/gausskernel/runtime/executor/nodeSamplescan.cpp @@ -45,8 +45,6 @@ #include "nodes/execnodes.h" #include "access/ustore/knl_uscan.h" -static double sample_random_fract(void); - /* * Description: Initialize relation descriptor for sample table scan. * @@ -220,7 +218,9 @@ void BaseTableSample::getSeed() } if (seed > 0) { - gs_srandom(seed); + pg_srand48(seed, rand48Seed); + } else { + pg_srand48_default(rand48Seed); } } @@ -273,7 +273,7 @@ void BaseTableSample::system_nextsampleblock() /* We should start from currentBlock + 1. */ for (blockindex = currentBlock + 1; blockindex < totalBlockNum; blockindex++) { - if (sample_random_fract() < percent[SYSTEM_SAMPLE]) { + if ((((double)pg_lrand48(rand48Seed) + 1) / ((double)MAX_RANDOM_VALUE + 2)) < percent[SYSTEM_SAMPLE]) { break; } } @@ -343,7 +343,7 @@ void BaseTableSample::bernoulli_nextsampletuple() * block. */ for (; tupoffset <= curBlockMaxoffset; tupoffset++) { - if (sample_random_fract() < percent[BERNOULLI_SAMPLE]) { + if ((((double)pg_lrand48(rand48Seed) + 1) / ((double)MAX_RANDOM_VALUE + 2)) < percent[BERNOULLI_SAMPLE]) { break; } } @@ -429,6 +429,11 @@ void BaseTableSample::resetSampleScan() currentBlock = InvalidBlockNumber; curBlockMaxoffset = InvalidOffsetNumber; finished = false; + if (seed > 0) { + pg_srand48(seed, rand48Seed); + } else { + pg_srand48_default(rand48Seed); + } } /* @@ -1185,8 +1190,3 @@ void ColumnTableSample::scanVecSample(VectorBatch* pOutBatch) } } } - -static double sample_random_fract(void) -{ - return ((double)gs_random() + 1) / ((double)MAX_RANDOM_VALUE + 2); -} diff --git a/src/include/executor/node/nodeSamplescan.h b/src/include/executor/node/nodeSamplescan.h index 6e7820e14..58e02e36c 100644 --- a/src/include/executor/node/nodeSamplescan.h +++ b/src/include/executor/node/nodeSamplescan.h @@ -64,6 +64,7 @@ public: /* how many tuples in current page. */ OffsetNumber curBlockMaxoffset; bool finished; + unsigned short rand48Seed[3]; void (BaseTableSample::*nextSampleBlock_function)(); void (BaseTableSample::*nextSampleTuple_function)(); diff --git a/src/include/port.h b/src/include/port.h index 8ce5044ee..7fc793d2b 100644 --- a/src/include/port.h +++ b/src/include/port.h @@ -304,8 +304,9 @@ extern off_t ftello(FILE* stream); #endif extern double pg_erand48(unsigned short xseed[3]); -extern long pg_lrand48(void); -extern void pg_srand48(long seed); +extern long pg_lrand48(unsigned short rand48_seed[3] = NULL); +extern void pg_srand48(long seed, unsigned short rand48_seed[3] = NULL); +extern void pg_srand48_default(unsigned short rand48_seed[3]); extern long free_list_lrand48(void); extern void free_list_srand48(long seed); extern void pg_reset_srand48(unsigned short xseed[3]); diff --git a/src/test/regress/expected/tablesample_3.out b/src/test/regress/expected/tablesample_3.out new file mode 100755 index 000000000..fa6cff64c --- /dev/null +++ b/src/test/regress/expected/tablesample_3.out @@ -0,0 +1,328 @@ +create schema tablesample_schema4; +set current_schema = tablesample_schema4; +create table test_tablesample(id int, name text, salary numeric); +create table test_tablesample2(id int, name text, salary numeric); +insert into test_tablesample select generate_series(1, 3000), 'row'|| generate_series(1,3000), generate_series(1, 3000); +insert into test_tablesample2 select * from test_tablesample; +analyze test_tablesample; +analyze test_tablesample2; +-- union same query, should return only 1 row +select count(*) from ((select * from test_tablesample tablesample BERNOULLI(5) REPEATABLE (200) limit 1) union (select * from test_tablesample tablesample BERNOULLI(5) REPEATABLE (200) limit 1)); + count +------- + 1 +(1 row) + +select count(*) from ((select * from test_tablesample tablesample SYSTEM(20) REPEATABLE (200) limit 1) union (select * from test_tablesample tablesample SYSTEM(20) REPEATABLE (200) limit 1)); + count +------- + 1 +(1 row) + +explain (costs off) select * from test_tablesample tablesample BERNOULLI(50) REPEATABLE (200) left join test_tablesample2 tablesample BERNOULLI(50) REPEATABLE (200) on test_tablesample.id=test_tablesample2.id where test_tablesample2.id is NULL; + QUERY PLAN +--------------------------------------------------------------------------------- + Hash Anti Join + Hash Cond: (test_tablesample.id = test_tablesample2.id) + -> Sample Scan on test_tablesample + Sampling: bernoulli (50::real) REPEATABLE (200::double precision) + -> Hash + -> Sample Scan on test_tablesample2 + Sampling: bernoulli (50::real) REPEATABLE (200::double precision) +(7 rows) + +select * from test_tablesample tablesample BERNOULLI(50) REPEATABLE (200) left join test_tablesample2 tablesample BERNOULLI(50) REPEATABLE (200) on test_tablesample.id=test_tablesample2.id where test_tablesample2.id is NULL; + id | name | salary | id | name | salary +----+------+--------+----+------+-------- +(0 rows) + +set enable_hashjoin to off; +explain (costs off) select * from test_tablesample tablesample BERNOULLI(50) REPEATABLE (200) left join test_tablesample2 tablesample BERNOULLI(50) REPEATABLE (200) on test_tablesample.id=test_tablesample2.id where test_tablesample2.id is NULL; + QUERY PLAN +--------------------------------------------------------------------------------- + Merge Anti Join + Merge Cond: (test_tablesample.id = test_tablesample2.id) + -> Sort + Sort Key: test_tablesample.id + -> Sample Scan on test_tablesample + Sampling: bernoulli (50::real) REPEATABLE (200::double precision) + -> Sort + Sort Key: test_tablesample2.id + -> Sample Scan on test_tablesample2 + Sampling: bernoulli (50::real) REPEATABLE (200::double precision) +(10 rows) + +select * from test_tablesample tablesample BERNOULLI(50) REPEATABLE (200) left join test_tablesample2 tablesample BERNOULLI(50) REPEATABLE (200) on test_tablesample.id=test_tablesample2.id where test_tablesample2.id is NULL; + id | name | salary | id | name | salary +----+------+--------+----+------+-------- +(0 rows) + +set enable_mergejoin to off; +explain (costs off) select * from test_tablesample tablesample BERNOULLI(50) REPEATABLE (200) left join test_tablesample2 tablesample BERNOULLI(50) REPEATABLE (200) on test_tablesample.id=test_tablesample2.id where test_tablesample2.id is NULL; + QUERY PLAN +--------------------------------------------------------------------------------- + Nested Loop Anti Join + Join Filter: (test_tablesample.id = test_tablesample2.id) + -> Sample Scan on test_tablesample + Sampling: bernoulli (50::real) REPEATABLE (200::double precision) + -> Materialize + -> Sample Scan on test_tablesample2 + Sampling: bernoulli (50::real) REPEATABLE (200::double precision) +(7 rows) + +select * from test_tablesample tablesample BERNOULLI(50) REPEATABLE (200) left join test_tablesample2 tablesample BERNOULLI(50) REPEATABLE (200) on test_tablesample.id=test_tablesample2.id where test_tablesample2.id is NULL; + id | name | salary | id | name | salary +----+------+--------+----+------+-------- +(0 rows) + +set enable_material to off; +explain (costs off) select * from test_tablesample tablesample BERNOULLI(50) REPEATABLE (200) left join test_tablesample2 tablesample BERNOULLI(50) REPEATABLE (200) on test_tablesample.id=test_tablesample2.id where test_tablesample2.id is NULL; + QUERY PLAN +--------------------------------------------------------------------------- + Nested Loop Anti Join + Join Filter: (test_tablesample.id = test_tablesample2.id) + -> Sample Scan on test_tablesample + Sampling: bernoulli (50::real) REPEATABLE (200::double precision) + -> Sample Scan on test_tablesample2 + Sampling: bernoulli (50::real) REPEATABLE (200::double precision) +(6 rows) + +select * from test_tablesample tablesample BERNOULLI(50) REPEATABLE (200) left join test_tablesample2 tablesample BERNOULLI(50) REPEATABLE (200) on test_tablesample.id=test_tablesample2.id where test_tablesample2.id is NULL; + id | name | salary | id | name | salary +----+------+--------+----+------+-------- +(0 rows) + +explain (costs off) select * from test_tablesample tablesample SYSTEM(50) REPEATABLE (200) left join test_tablesample2 tablesample SYSTEM(50) REPEATABLE (200) on test_tablesample.id=test_tablesample2.id where test_tablesample2.id is NULL; + QUERY PLAN +------------------------------------------------------------------------ + Nested Loop Anti Join + Join Filter: (test_tablesample.id = test_tablesample2.id) + -> Sample Scan on test_tablesample + Sampling: system (50::real) REPEATABLE (200::double precision) + -> Sample Scan on test_tablesample2 + Sampling: system (50::real) REPEATABLE (200::double precision) +(6 rows) + +select * from test_tablesample tablesample SYSTEM(50) REPEATABLE (200) left join test_tablesample2 tablesample SYSTEM(50) REPEATABLE (200) on test_tablesample.id=test_tablesample2.id where test_tablesample2.id is NULL; + id | name | salary | id | name | salary +----+------+--------+----+------+-------- +(0 rows) + +drop table test_tablesample; +drop table test_tablesample2; +create table test_tablesample(id int, name text, salary numeric) with (orientation=column); +create table test_tablesample2(id int, name text, salary numeric) with (orientation=column); +insert into test_tablesample values(generate_series(1, 50), 'row'|| generate_series(1,50), generate_series(1, 50)); +insert into test_tablesample2 values(generate_series(1, 50), 'row'|| generate_series(1,50), generate_series(1, 50)); +analyze test_tablesample; +analyze test_tablesample2; +-- union same query, should return only 1 row +select count(*) from ((select * from test_tablesample tablesample BERNOULLI(10) REPEATABLE (200) limit 1) union (select * from test_tablesample tablesample BERNOULLI(10) REPEATABLE (200) limit 1)); + count +------- + 1 +(1 row) + +select count(*) from ((select * from test_tablesample tablesample SYSTEM(30) REPEATABLE (200) limit 1) union (select * from test_tablesample tablesample SYSTEM(30) REPEATABLE (200) limit 1)); + count +------- + 1 +(1 row) + +explain (costs off) select * from test_tablesample tablesample BERNOULLI(50) REPEATABLE (200) left join test_tablesample2 tablesample BERNOULLI(50) REPEATABLE (200) on test_tablesample.id=test_tablesample2.id where test_tablesample2.id is NULL; + QUERY PLAN +--------------------------------------------------------------------------------- + Row Adapter + -> Vector Nest Loop Anti Join + Join Filter: (test_tablesample.id = test_tablesample2.id) + -> VecSample Scan on test_tablesample + Sampling: bernoulli (50::real) REPEATABLE (200::double precision) + -> VecSample Scan on test_tablesample2 + Sampling: bernoulli (50::real) REPEATABLE (200::double precision) +(7 rows) + +select * from test_tablesample tablesample BERNOULLI(50) REPEATABLE (200) left join test_tablesample2 tablesample BERNOULLI(50) REPEATABLE (200) on test_tablesample.id=test_tablesample2.id where test_tablesample2.id is NULL; + id | name | salary | id | name | salary +----+------+--------+----+------+-------- +(0 rows) + +set enable_hashjoin to off; +explain (costs off) select * from test_tablesample tablesample BERNOULLI(50) REPEATABLE (200) left join test_tablesample2 tablesample BERNOULLI(50) REPEATABLE (200) on test_tablesample.id=test_tablesample2.id where test_tablesample2.id is NULL; + QUERY PLAN +--------------------------------------------------------------------------------- + Row Adapter + -> Vector Nest Loop Anti Join + Join Filter: (test_tablesample.id = test_tablesample2.id) + -> VecSample Scan on test_tablesample + Sampling: bernoulli (50::real) REPEATABLE (200::double precision) + -> VecSample Scan on test_tablesample2 + Sampling: bernoulli (50::real) REPEATABLE (200::double precision) +(7 rows) + +select * from test_tablesample tablesample BERNOULLI(50) REPEATABLE (200) left join test_tablesample2 tablesample BERNOULLI(50) REPEATABLE (200) on test_tablesample.id=test_tablesample2.id where test_tablesample2.id is NULL; + id | name | salary | id | name | salary +----+------+--------+----+------+-------- +(0 rows) + +set enable_mergejoin to off; +explain (costs off) select * from test_tablesample tablesample BERNOULLI(50) REPEATABLE (200) left join test_tablesample2 tablesample BERNOULLI(50) REPEATABLE (200) on test_tablesample.id=test_tablesample2.id where test_tablesample2.id is NULL; + QUERY PLAN +--------------------------------------------------------------------------------- + Row Adapter + -> Vector Nest Loop Anti Join + Join Filter: (test_tablesample.id = test_tablesample2.id) + -> VecSample Scan on test_tablesample + Sampling: bernoulli (50::real) REPEATABLE (200::double precision) + -> VecSample Scan on test_tablesample2 + Sampling: bernoulli (50::real) REPEATABLE (200::double precision) +(7 rows) + +select * from test_tablesample tablesample BERNOULLI(50) REPEATABLE (200) left join test_tablesample2 tablesample BERNOULLI(50) REPEATABLE (200) on test_tablesample.id=test_tablesample2.id where test_tablesample2.id is NULL; + id | name | salary | id | name | salary +----+------+--------+----+------+-------- +(0 rows) + +set enable_material to off; +explain (costs off) select * from test_tablesample tablesample BERNOULLI(50) REPEATABLE (200) left join test_tablesample2 tablesample BERNOULLI(50) REPEATABLE (200) on test_tablesample.id=test_tablesample2.id where test_tablesample2.id is NULL; + QUERY PLAN +--------------------------------------------------------------------------------- + Row Adapter + -> Vector Nest Loop Anti Join + Join Filter: (test_tablesample.id = test_tablesample2.id) + -> VecSample Scan on test_tablesample + Sampling: bernoulli (50::real) REPEATABLE (200::double precision) + -> VecSample Scan on test_tablesample2 + Sampling: bernoulli (50::real) REPEATABLE (200::double precision) +(7 rows) + +select * from test_tablesample tablesample BERNOULLI(50) REPEATABLE (200) left join test_tablesample2 tablesample BERNOULLI(50) REPEATABLE (200) on test_tablesample.id=test_tablesample2.id where test_tablesample2.id is NULL; + id | name | salary | id | name | salary +----+------+--------+----+------+-------- +(0 rows) + +explain (costs off) select * from test_tablesample tablesample SYSTEM(50) REPEATABLE (200) left join test_tablesample2 tablesample SYSTEM(50) REPEATABLE (200) on test_tablesample.id=test_tablesample2.id where test_tablesample2.id is NULL; + QUERY PLAN +------------------------------------------------------------------------------ + Row Adapter + -> Vector Nest Loop Anti Join + Join Filter: (test_tablesample.id = test_tablesample2.id) + -> VecSample Scan on test_tablesample + Sampling: system (50::real) REPEATABLE (200::double precision) + -> VecSample Scan on test_tablesample2 + Sampling: system (50::real) REPEATABLE (200::double precision) +(7 rows) + +select * from test_tablesample tablesample SYSTEM(50) REPEATABLE (200) left join test_tablesample2 tablesample SYSTEM(50) REPEATABLE (200) on test_tablesample.id=test_tablesample2.id where test_tablesample2.id is NULL; + id | name | salary | id | name | salary +----+------+--------+----+------+-------- +(0 rows) + +drop table test_tablesample; +drop table test_tablesample2; +create table test_tablesample(id int, name text, salary numeric) with (storage_type=ustore); +create table test_tablesample2(id int, name text, salary numeric) with (storage_type=ustore); +insert into test_tablesample select generate_series(1, 500), 'row'|| generate_series(1,500), generate_series(1, 500); +insert into test_tablesample2 select * from test_tablesample; +analyze test_tablesample; +analyze test_tablesample2; +-- union same query, should return only 1 row +select count(*) from ((select * from test_tablesample tablesample BERNOULLI(10) REPEATABLE (200) limit 1) union (select * from test_tablesample tablesample BERNOULLI(10) REPEATABLE (200) limit 1)); + count +------- + 1 +(1 row) + +select count(*) from ((select * from test_tablesample tablesample SYSTEM(70) REPEATABLE (200) limit 1) union (select * from test_tablesample tablesample SYSTEM(70) REPEATABLE (200) limit 1)); + count +------- + 1 +(1 row) + +explain (costs off) select * from test_tablesample tablesample BERNOULLI(50) REPEATABLE (200) left join test_tablesample2 tablesample BERNOULLI(50) REPEATABLE (200) on test_tablesample.id=test_tablesample2.id where test_tablesample2.id is NULL; + QUERY PLAN +--------------------------------------------------------------------------- + Nested Loop Anti Join + Join Filter: (test_tablesample.id = test_tablesample2.id) + -> Sample Scan on test_tablesample + Sampling: bernoulli (50::real) REPEATABLE (200::double precision) + -> Sample Scan on test_tablesample2 + Sampling: bernoulli (50::real) REPEATABLE (200::double precision) +(6 rows) + +select * from test_tablesample tablesample BERNOULLI(50) REPEATABLE (200) left join test_tablesample2 tablesample BERNOULLI(50) REPEATABLE (200) on test_tablesample.id=test_tablesample2.id where test_tablesample2.id is NULL; + id | name | salary | id | name | salary +----+------+--------+----+------+-------- +(0 rows) + +set enable_hashjoin to off; +explain (costs off) select * from test_tablesample tablesample BERNOULLI(50) REPEATABLE (200) left join test_tablesample2 tablesample BERNOULLI(50) REPEATABLE (200) on test_tablesample.id=test_tablesample2.id where test_tablesample2.id is NULL; + QUERY PLAN +--------------------------------------------------------------------------- + Nested Loop Anti Join + Join Filter: (test_tablesample.id = test_tablesample2.id) + -> Sample Scan on test_tablesample + Sampling: bernoulli (50::real) REPEATABLE (200::double precision) + -> Sample Scan on test_tablesample2 + Sampling: bernoulli (50::real) REPEATABLE (200::double precision) +(6 rows) + +select * from test_tablesample tablesample BERNOULLI(50) REPEATABLE (200) left join test_tablesample2 tablesample BERNOULLI(50) REPEATABLE (200) on test_tablesample.id=test_tablesample2.id where test_tablesample2.id is NULL; + id | name | salary | id | name | salary +----+------+--------+----+------+-------- +(0 rows) + +set enable_mergejoin to off; +explain (costs off) select * from test_tablesample tablesample BERNOULLI(50) REPEATABLE (200) left join test_tablesample2 tablesample BERNOULLI(50) REPEATABLE (200) on test_tablesample.id=test_tablesample2.id where test_tablesample2.id is NULL; + QUERY PLAN +--------------------------------------------------------------------------- + Nested Loop Anti Join + Join Filter: (test_tablesample.id = test_tablesample2.id) + -> Sample Scan on test_tablesample + Sampling: bernoulli (50::real) REPEATABLE (200::double precision) + -> Sample Scan on test_tablesample2 + Sampling: bernoulli (50::real) REPEATABLE (200::double precision) +(6 rows) + +select * from test_tablesample tablesample BERNOULLI(50) REPEATABLE (200) left join test_tablesample2 tablesample BERNOULLI(50) REPEATABLE (200) on test_tablesample.id=test_tablesample2.id where test_tablesample2.id is NULL; + id | name | salary | id | name | salary +----+------+--------+----+------+-------- +(0 rows) + +set enable_material to off; +explain (costs off) select * from test_tablesample tablesample BERNOULLI(50) REPEATABLE (200) left join test_tablesample2 tablesample BERNOULLI(50) REPEATABLE (200) on test_tablesample.id=test_tablesample2.id where test_tablesample2.id is NULL; + QUERY PLAN +--------------------------------------------------------------------------- + Nested Loop Anti Join + Join Filter: (test_tablesample.id = test_tablesample2.id) + -> Sample Scan on test_tablesample + Sampling: bernoulli (50::real) REPEATABLE (200::double precision) + -> Sample Scan on test_tablesample2 + Sampling: bernoulli (50::real) REPEATABLE (200::double precision) +(6 rows) + +select * from test_tablesample tablesample BERNOULLI(50) REPEATABLE (200) left join test_tablesample2 tablesample BERNOULLI(50) REPEATABLE (200) on test_tablesample.id=test_tablesample2.id where test_tablesample2.id is NULL; + id | name | salary | id | name | salary +----+------+--------+----+------+-------- +(0 rows) + +explain (costs off) select * from test_tablesample tablesample SYSTEM(50) REPEATABLE (200) left join test_tablesample2 tablesample SYSTEM(50) REPEATABLE (200) on test_tablesample.id=test_tablesample2.id where test_tablesample2.id is NULL; + QUERY PLAN +------------------------------------------------------------------------ + Nested Loop Anti Join + Join Filter: (test_tablesample.id = test_tablesample2.id) + -> Sample Scan on test_tablesample + Sampling: system (50::real) REPEATABLE (200::double precision) + -> Sample Scan on test_tablesample2 + Sampling: system (50::real) REPEATABLE (200::double precision) +(6 rows) + +select * from test_tablesample tablesample SYSTEM(50) REPEATABLE (200) left join test_tablesample2 tablesample SYSTEM(50) REPEATABLE (200) on test_tablesample.id=test_tablesample2.id where test_tablesample2.id is NULL; + id | name | salary | id | name | salary +----+------+--------+----+------+-------- +(0 rows) + +reset search_path; +drop schema tablesample_schema4 cascade; +NOTICE: drop cascades to 2 other objects +DETAIL: drop cascades to table tablesample_schema4.test_tablesample +drop cascades to table tablesample_schema4.test_tablesample2 diff --git a/src/test/regress/parallel_schedule0 b/src/test/regress/parallel_schedule0 index 2b348de77..fe4d68084 100644 --- a/src/test/regress/parallel_schedule0 +++ b/src/test/regress/parallel_schedule0 @@ -661,7 +661,7 @@ test: bypass_preparedexecute_support test: sqlbypass_partition test: sqlbypass_partition_prepare -test: string_digit_to_numeric +test: string_digit_to_numeric tablesample_3 # Another group of parallel tests # ---------- #test: collate tablesample tablesample_1 tablesample_2 matview diff --git a/src/test/regress/sql/tablesample_3.sql b/src/test/regress/sql/tablesample_3.sql new file mode 100755 index 000000000..696fef561 --- /dev/null +++ b/src/test/regress/sql/tablesample_3.sql @@ -0,0 +1,99 @@ +create schema tablesample_schema4; +set current_schema = tablesample_schema4; + +create table test_tablesample(id int, name text, salary numeric); +create table test_tablesample2(id int, name text, salary numeric); + +insert into test_tablesample select generate_series(1, 3000), 'row'|| generate_series(1,3000), generate_series(1, 3000); +insert into test_tablesample2 select * from test_tablesample; +analyze test_tablesample; +analyze test_tablesample2; + +-- union same query, should return only 1 row +select count(*) from ((select * from test_tablesample tablesample BERNOULLI(5) REPEATABLE (200) limit 1) union (select * from test_tablesample tablesample BERNOULLI(5) REPEATABLE (200) limit 1)); +select count(*) from ((select * from test_tablesample tablesample SYSTEM(20) REPEATABLE (200) limit 1) union (select * from test_tablesample tablesample SYSTEM(20) REPEATABLE (200) limit 1)); + +explain (costs off) select * from test_tablesample tablesample BERNOULLI(50) REPEATABLE (200) left join test_tablesample2 tablesample BERNOULLI(50) REPEATABLE (200) on test_tablesample.id=test_tablesample2.id where test_tablesample2.id is NULL; +select * from test_tablesample tablesample BERNOULLI(50) REPEATABLE (200) left join test_tablesample2 tablesample BERNOULLI(50) REPEATABLE (200) on test_tablesample.id=test_tablesample2.id where test_tablesample2.id is NULL; + +set enable_hashjoin to off; +explain (costs off) select * from test_tablesample tablesample BERNOULLI(50) REPEATABLE (200) left join test_tablesample2 tablesample BERNOULLI(50) REPEATABLE (200) on test_tablesample.id=test_tablesample2.id where test_tablesample2.id is NULL; +select * from test_tablesample tablesample BERNOULLI(50) REPEATABLE (200) left join test_tablesample2 tablesample BERNOULLI(50) REPEATABLE (200) on test_tablesample.id=test_tablesample2.id where test_tablesample2.id is NULL; + +set enable_mergejoin to off; +explain (costs off) select * from test_tablesample tablesample BERNOULLI(50) REPEATABLE (200) left join test_tablesample2 tablesample BERNOULLI(50) REPEATABLE (200) on test_tablesample.id=test_tablesample2.id where test_tablesample2.id is NULL; +select * from test_tablesample tablesample BERNOULLI(50) REPEATABLE (200) left join test_tablesample2 tablesample BERNOULLI(50) REPEATABLE (200) on test_tablesample.id=test_tablesample2.id where test_tablesample2.id is NULL; + +set enable_material to off; +explain (costs off) select * from test_tablesample tablesample BERNOULLI(50) REPEATABLE (200) left join test_tablesample2 tablesample BERNOULLI(50) REPEATABLE (200) on test_tablesample.id=test_tablesample2.id where test_tablesample2.id is NULL; +select * from test_tablesample tablesample BERNOULLI(50) REPEATABLE (200) left join test_tablesample2 tablesample BERNOULLI(50) REPEATABLE (200) on test_tablesample.id=test_tablesample2.id where test_tablesample2.id is NULL; + +explain (costs off) select * from test_tablesample tablesample SYSTEM(50) REPEATABLE (200) left join test_tablesample2 tablesample SYSTEM(50) REPEATABLE (200) on test_tablesample.id=test_tablesample2.id where test_tablesample2.id is NULL; +select * from test_tablesample tablesample SYSTEM(50) REPEATABLE (200) left join test_tablesample2 tablesample SYSTEM(50) REPEATABLE (200) on test_tablesample.id=test_tablesample2.id where test_tablesample2.id is NULL; + +drop table test_tablesample; +drop table test_tablesample2; +create table test_tablesample(id int, name text, salary numeric) with (orientation=column); +create table test_tablesample2(id int, name text, salary numeric) with (orientation=column); + +insert into test_tablesample values(generate_series(1, 50), 'row'|| generate_series(1,50), generate_series(1, 50)); +insert into test_tablesample2 values(generate_series(1, 50), 'row'|| generate_series(1,50), generate_series(1, 50)); +analyze test_tablesample; +analyze test_tablesample2; + +-- union same query, should return only 1 row +select count(*) from ((select * from test_tablesample tablesample BERNOULLI(10) REPEATABLE (200) limit 1) union (select * from test_tablesample tablesample BERNOULLI(10) REPEATABLE (200) limit 1)); +select count(*) from ((select * from test_tablesample tablesample SYSTEM(30) REPEATABLE (200) limit 1) union (select * from test_tablesample tablesample SYSTEM(30) REPEATABLE (200) limit 1)); + +explain (costs off) select * from test_tablesample tablesample BERNOULLI(50) REPEATABLE (200) left join test_tablesample2 tablesample BERNOULLI(50) REPEATABLE (200) on test_tablesample.id=test_tablesample2.id where test_tablesample2.id is NULL; +select * from test_tablesample tablesample BERNOULLI(50) REPEATABLE (200) left join test_tablesample2 tablesample BERNOULLI(50) REPEATABLE (200) on test_tablesample.id=test_tablesample2.id where test_tablesample2.id is NULL; + +set enable_hashjoin to off; +explain (costs off) select * from test_tablesample tablesample BERNOULLI(50) REPEATABLE (200) left join test_tablesample2 tablesample BERNOULLI(50) REPEATABLE (200) on test_tablesample.id=test_tablesample2.id where test_tablesample2.id is NULL; +select * from test_tablesample tablesample BERNOULLI(50) REPEATABLE (200) left join test_tablesample2 tablesample BERNOULLI(50) REPEATABLE (200) on test_tablesample.id=test_tablesample2.id where test_tablesample2.id is NULL; + +set enable_mergejoin to off; +explain (costs off) select * from test_tablesample tablesample BERNOULLI(50) REPEATABLE (200) left join test_tablesample2 tablesample BERNOULLI(50) REPEATABLE (200) on test_tablesample.id=test_tablesample2.id where test_tablesample2.id is NULL; +select * from test_tablesample tablesample BERNOULLI(50) REPEATABLE (200) left join test_tablesample2 tablesample BERNOULLI(50) REPEATABLE (200) on test_tablesample.id=test_tablesample2.id where test_tablesample2.id is NULL; + +set enable_material to off; +explain (costs off) select * from test_tablesample tablesample BERNOULLI(50) REPEATABLE (200) left join test_tablesample2 tablesample BERNOULLI(50) REPEATABLE (200) on test_tablesample.id=test_tablesample2.id where test_tablesample2.id is NULL; +select * from test_tablesample tablesample BERNOULLI(50) REPEATABLE (200) left join test_tablesample2 tablesample BERNOULLI(50) REPEATABLE (200) on test_tablesample.id=test_tablesample2.id where test_tablesample2.id is NULL; + +explain (costs off) select * from test_tablesample tablesample SYSTEM(50) REPEATABLE (200) left join test_tablesample2 tablesample SYSTEM(50) REPEATABLE (200) on test_tablesample.id=test_tablesample2.id where test_tablesample2.id is NULL; +select * from test_tablesample tablesample SYSTEM(50) REPEATABLE (200) left join test_tablesample2 tablesample SYSTEM(50) REPEATABLE (200) on test_tablesample.id=test_tablesample2.id where test_tablesample2.id is NULL; + +drop table test_tablesample; +drop table test_tablesample2; +create table test_tablesample(id int, name text, salary numeric) with (storage_type=ustore); +create table test_tablesample2(id int, name text, salary numeric) with (storage_type=ustore); + +insert into test_tablesample select generate_series(1, 500), 'row'|| generate_series(1,500), generate_series(1, 500); +insert into test_tablesample2 select * from test_tablesample; +analyze test_tablesample; +analyze test_tablesample2; + +-- union same query, should return only 1 row +select count(*) from ((select * from test_tablesample tablesample BERNOULLI(10) REPEATABLE (200) limit 1) union (select * from test_tablesample tablesample BERNOULLI(10) REPEATABLE (200) limit 1)); +select count(*) from ((select * from test_tablesample tablesample SYSTEM(70) REPEATABLE (200) limit 1) union (select * from test_tablesample tablesample SYSTEM(70) REPEATABLE (200) limit 1)); + +explain (costs off) select * from test_tablesample tablesample BERNOULLI(50) REPEATABLE (200) left join test_tablesample2 tablesample BERNOULLI(50) REPEATABLE (200) on test_tablesample.id=test_tablesample2.id where test_tablesample2.id is NULL; +select * from test_tablesample tablesample BERNOULLI(50) REPEATABLE (200) left join test_tablesample2 tablesample BERNOULLI(50) REPEATABLE (200) on test_tablesample.id=test_tablesample2.id where test_tablesample2.id is NULL; + +set enable_hashjoin to off; +explain (costs off) select * from test_tablesample tablesample BERNOULLI(50) REPEATABLE (200) left join test_tablesample2 tablesample BERNOULLI(50) REPEATABLE (200) on test_tablesample.id=test_tablesample2.id where test_tablesample2.id is NULL; +select * from test_tablesample tablesample BERNOULLI(50) REPEATABLE (200) left join test_tablesample2 tablesample BERNOULLI(50) REPEATABLE (200) on test_tablesample.id=test_tablesample2.id where test_tablesample2.id is NULL; + +set enable_mergejoin to off; +explain (costs off) select * from test_tablesample tablesample BERNOULLI(50) REPEATABLE (200) left join test_tablesample2 tablesample BERNOULLI(50) REPEATABLE (200) on test_tablesample.id=test_tablesample2.id where test_tablesample2.id is NULL; +select * from test_tablesample tablesample BERNOULLI(50) REPEATABLE (200) left join test_tablesample2 tablesample BERNOULLI(50) REPEATABLE (200) on test_tablesample.id=test_tablesample2.id where test_tablesample2.id is NULL; + +set enable_material to off; +explain (costs off) select * from test_tablesample tablesample BERNOULLI(50) REPEATABLE (200) left join test_tablesample2 tablesample BERNOULLI(50) REPEATABLE (200) on test_tablesample.id=test_tablesample2.id where test_tablesample2.id is NULL; +select * from test_tablesample tablesample BERNOULLI(50) REPEATABLE (200) left join test_tablesample2 tablesample BERNOULLI(50) REPEATABLE (200) on test_tablesample.id=test_tablesample2.id where test_tablesample2.id is NULL; + +explain (costs off) select * from test_tablesample tablesample SYSTEM(50) REPEATABLE (200) left join test_tablesample2 tablesample SYSTEM(50) REPEATABLE (200) on test_tablesample.id=test_tablesample2.id where test_tablesample2.id is NULL; +select * from test_tablesample tablesample SYSTEM(50) REPEATABLE (200) left join test_tablesample2 tablesample SYSTEM(50) REPEATABLE (200) on test_tablesample.id=test_tablesample2.id where test_tablesample2.id is NULL; + +reset search_path; +drop schema tablesample_schema4 cascade;