diff --git a/src/common/backend/utils/adt/corr_sk.cpp b/src/common/backend/utils/adt/corr_sk.cpp index 8d33281e8..11c50fbd2 100644 --- a/src/common/backend/utils/adt/corr_sk.cpp +++ b/src/common/backend/utils/adt/corr_sk.cpp @@ -18,6 +18,8 @@ constexpr int INIT_CORR_ARRAY_LENGTH = 64; constexpr int MAX_CORR_ARRAY_LENGTH = 524288; +constexpr float HALF = 0.5; + enum class ModeType { COEFFICIENT, ONE_SIDED_SIG, @@ -303,7 +305,13 @@ Datum corr_s_final_fn(PG_FUNCTION_ARGS) float8 one_sided_p_value_pos = 1 - boost::math::cdf(t_dist, t_stat); float8 one_sided_p_value_neg = 1 - one_sided_p_value_pos; - float8 two_sided_p_value = 2 * one_sided_p_value_pos; + float8 one_sided_p_value; + if (one_sided_p_value_pos < HALF) { + one_sided_p_value = one_sided_p_value_pos; + } else { + one_sided_p_value = one_sided_p_value_neg; + } + float8 two_sided_p_value = 2 * one_sided_p_value; pfree(x_ranks); pfree(y_ranks); @@ -380,7 +388,13 @@ Datum corr_k_final_fn(PG_FUNCTION_ARGS) boost::math::normal_distribution<> normal_dist(0.0, 1.0); float8 one_sided_p_value_pos = 1.0 - boost::math::cdf(normal_dist, z_stat); float8 one_sided_p_value_neg = boost::math::cdf(normal_dist, z_stat); - float8 two_sided_p_value = 2 * one_sided_p_value_pos; + float8 one_sided_p_value; + if (one_sided_p_value_pos < HALF) { + one_sided_p_value = one_sided_p_value_pos; + } else { + one_sided_p_value = one_sided_p_value_neg; + } + float8 two_sided_p_value = 2 * one_sided_p_value; pfree(x_ranks); pfree(y_ranks); diff --git a/src/test/regress/expected/agg.out b/src/test/regress/expected/agg.out index dd7a6cd8f..61e24dfdd 100644 --- a/src/test/regress/expected/agg.out +++ b/src/test/regress/expected/agg.out @@ -299,10 +299,99 @@ SELECT corr_s(column_x, column_y, 'COEFFICIENT') FROM null_table3; .8 (1 row) +create table customers1(customer_id number,id number, cust_last_name varchar2(50)); +insert into customers1 values(001,1,'张生'); +insert into customers1 values(002,2,'刘生'); +insert into customers1 values(001,3,'李生'); +select corr_k(customer_id,id,'ONE_SIDED_SIG') from customers1; + corr_k +-------- + .5 +(1 row) + +select corr_k(customer_id,id,'ONE_SIDED_SIG_POS') from customers1; + corr_k +-------- + .5 +(1 row) + +select corr_k(customer_id,id,'ONE_SIDED_SIG_NEG') from customers1; + corr_k +-------- + .5 +(1 row) + +select corr_k(customer_id,id,'TWO_SIDED_SIG') from customers1; + corr_k +-------- + 1 +(1 row) + +CREATE TABLE EMP +(EMPNO NUMBER(4) NOT NULL, +ENAME VARCHAR2(10), +JOB VARCHAR2(9), +MGR NUMBER(4), +HIREDATE DATE, +SAL NUMBER(7, 2), +COMM NUMBER(7, 2), +DEPTNO NUMBER(2)); +INSERT INTO EMP VALUES +(7369, 'SMITH', 'CLERK', 7902, +TO_DATE('17-DEC-1980', 'DD-MON-YYYY'), 800, NULL, 20); +INSERT INTO EMP VALUES +(7499, 'ALLEN', 'SALESMAN', 7698, +TO_DATE('20-FEB-1981', 'DD-MON-YYYY'), 1600, 300, 30); +INSERT INTO EMP VALUES +(7521, 'WARD', 'SALESMAN', 7698, +TO_DATE('22-FEB-1981', 'DD-MON-YYYY'), 1250, 500, 30); +INSERT INTO EMP VALUES +(7566, 'JONES', 'MANAGER', 7839, +TO_DATE('2-APR-1981', 'DD-MON-YYYY'), 2975, NULL, 20); +INSERT INTO EMP VALUES +(7654, 'MARTIN', 'SALESMAN', 7698, +TO_DATE('28-SEP-1981', 'DD-MON-YYYY'), 1250, 1400, 30); +INSERT INTO EMP VALUES +(7698, 'BLAKE', 'MANAGER', 7839, +TO_DATE('1-MAY-1981', 'DD-MON-YYYY'), 2850, NULL, 30); +INSERT INTO EMP VALUES +(7782, 'CLARK', 'MANAGER', 7839, +TO_DATE('9-JUN-1981', 'DD-MON-YYYY'), 2450, NULL, 10); +INSERT INTO EMP VALUES +(7788, 'SCOTT', 'ANALYST', 7566, +TO_DATE('09-DEC-1982', 'DD-MON-YYYY'), 3000, NULL, 20); +INSERT INTO EMP VALUES +(7839, 'KING', 'PRESIDENT', NULL, +TO_DATE('17-NOV-1981', 'DD-MON-YYYY'), 5000, NULL, 10); +INSERT INTO EMP VALUES +(7844, 'TURNER', 'SALESMAN', 7698, +TO_DATE('8-SEP-1981', 'DD-MON-YYYY'), 1500, 0, 30); +INSERT INTO EMP VALUES +(7876, 'ADAMS', 'CLERK', 7788, +TO_DATE('12-JAN-1983', 'DD-MON-YYYY'), 1100, NULL, 20); +INSERT INTO EMP VALUES +(7900, 'JAMES', 'CLERK', 7698, +TO_DATE('3-DEC-1981', 'DD-MON-YYYY'), 950, NULL, 30); +INSERT INTO EMP VALUES +(7902, 'FORD', 'ANALYST', 7566, +TO_DATE('3-DEC-1981', 'DD-MON-YYYY'), 3000, NULL, 20); +INSERT INTO EMP VALUES +(7934, 'MILLER', 'CLERK', 7782, +TO_DATE('23-JAN-1982', 'DD-MON-YYYY'), 1300, NULL, 10); +SELECT CORR_K(sal, comm, 'COEFFICIENT') coefficient, +CORR_K(sal, comm, 'TWO_SIDED_SIG') two_sided_p_value +FROM EMP; + coefficient | two_sided_p_value +-------------------+------------------- + -.547722557505166 | .264288345226028 +(1 row) + drop table test_table; drop table null_table1; drop table null_table2; drop table null_table3; +drop table customers1; +drop table EMP; drop table t1; drop schema aggregate CASCADE; NOTICE: drop cascades to 3 other objects diff --git a/src/test/regress/sql/agg.sql b/src/test/regress/sql/agg.sql index 5f1d3d442..81371f783 100644 --- a/src/test/regress/sql/agg.sql +++ b/src/test/regress/sql/agg.sql @@ -132,9 +132,78 @@ INSERT INTO null_table3(column_x, column_y) VALUES (51, 39); INSERT INTO null_table3(column_x, column_y) VALUES (49, 32); SELECT corr_s(column_x, column_y, 'COEFFICIENT') FROM null_table3; +create table customers1(customer_id number,id number, cust_last_name varchar2(50)); +insert into customers1 values(001,1,'张生'); +insert into customers1 values(002,2,'刘生'); +insert into customers1 values(001,3,'李生'); + +select corr_k(customer_id,id,'ONE_SIDED_SIG') from customers1; +select corr_k(customer_id,id,'ONE_SIDED_SIG_POS') from customers1; +select corr_k(customer_id,id,'ONE_SIDED_SIG_NEG') from customers1; +select corr_k(customer_id,id,'TWO_SIDED_SIG') from customers1; + +CREATE TABLE EMP +(EMPNO NUMBER(4) NOT NULL, +ENAME VARCHAR2(10), +JOB VARCHAR2(9), +MGR NUMBER(4), +HIREDATE DATE, +SAL NUMBER(7, 2), +COMM NUMBER(7, 2), +DEPTNO NUMBER(2)); + +INSERT INTO EMP VALUES +(7369, 'SMITH', 'CLERK', 7902, +TO_DATE('17-DEC-1980', 'DD-MON-YYYY'), 800, NULL, 20); +INSERT INTO EMP VALUES +(7499, 'ALLEN', 'SALESMAN', 7698, +TO_DATE('20-FEB-1981', 'DD-MON-YYYY'), 1600, 300, 30); +INSERT INTO EMP VALUES +(7521, 'WARD', 'SALESMAN', 7698, +TO_DATE('22-FEB-1981', 'DD-MON-YYYY'), 1250, 500, 30); +INSERT INTO EMP VALUES +(7566, 'JONES', 'MANAGER', 7839, +TO_DATE('2-APR-1981', 'DD-MON-YYYY'), 2975, NULL, 20); +INSERT INTO EMP VALUES +(7654, 'MARTIN', 'SALESMAN', 7698, +TO_DATE('28-SEP-1981', 'DD-MON-YYYY'), 1250, 1400, 30); +INSERT INTO EMP VALUES +(7698, 'BLAKE', 'MANAGER', 7839, +TO_DATE('1-MAY-1981', 'DD-MON-YYYY'), 2850, NULL, 30); +INSERT INTO EMP VALUES +(7782, 'CLARK', 'MANAGER', 7839, +TO_DATE('9-JUN-1981', 'DD-MON-YYYY'), 2450, NULL, 10); +INSERT INTO EMP VALUES +(7788, 'SCOTT', 'ANALYST', 7566, +TO_DATE('09-DEC-1982', 'DD-MON-YYYY'), 3000, NULL, 20); +INSERT INTO EMP VALUES +(7839, 'KING', 'PRESIDENT', NULL, +TO_DATE('17-NOV-1981', 'DD-MON-YYYY'), 5000, NULL, 10); +INSERT INTO EMP VALUES +(7844, 'TURNER', 'SALESMAN', 7698, +TO_DATE('8-SEP-1981', 'DD-MON-YYYY'), 1500, 0, 30); +INSERT INTO EMP VALUES +(7876, 'ADAMS', 'CLERK', 7788, +TO_DATE('12-JAN-1983', 'DD-MON-YYYY'), 1100, NULL, 20); +INSERT INTO EMP VALUES +(7900, 'JAMES', 'CLERK', 7698, +TO_DATE('3-DEC-1981', 'DD-MON-YYYY'), 950, NULL, 30); +INSERT INTO EMP VALUES +(7902, 'FORD', 'ANALYST', 7566, +TO_DATE('3-DEC-1981', 'DD-MON-YYYY'), 3000, NULL, 20); +INSERT INTO EMP VALUES +(7934, 'MILLER', 'CLERK', 7782, +TO_DATE('23-JAN-1982', 'DD-MON-YYYY'), 1300, NULL, 10); + +SELECT CORR_K(sal, comm, 'COEFFICIENT') coefficient, +CORR_K(sal, comm, 'TWO_SIDED_SIG') two_sided_p_value +FROM EMP; + drop table test_table; drop table null_table1; drop table null_table2; drop table null_table3; +drop table customers1; +drop table EMP; drop table t1; drop schema aggregate CASCADE;