!6658 修复 corr_* 中 two_sided_p_value > 1 的缺陷

Merge pull request !6658 from yigecheng/corr_dev
This commit is contained in:
opengauss_bot
2024-11-14 07:55:24 +00:00
committed by Gitee
3 changed files with 174 additions and 2 deletions

View File

@ -18,6 +18,8 @@
constexpr int INIT_CORR_ARRAY_LENGTH = 64;
constexpr int MAX_CORR_ARRAY_LENGTH = 524288;
constexpr float HALF = 0.5;
enum class ModeType {
COEFFICIENT,
ONE_SIDED_SIG,
@ -303,7 +305,13 @@ Datum corr_s_final_fn(PG_FUNCTION_ARGS)
float8 one_sided_p_value_pos = 1 - boost::math::cdf(t_dist, t_stat);
float8 one_sided_p_value_neg = 1 - one_sided_p_value_pos;
float8 two_sided_p_value = 2 * one_sided_p_value_pos;
float8 one_sided_p_value;
if (one_sided_p_value_pos < HALF) {
one_sided_p_value = one_sided_p_value_pos;
} else {
one_sided_p_value = one_sided_p_value_neg;
}
float8 two_sided_p_value = 2 * one_sided_p_value;
pfree(x_ranks);
pfree(y_ranks);
@ -380,7 +388,13 @@ Datum corr_k_final_fn(PG_FUNCTION_ARGS)
boost::math::normal_distribution<> normal_dist(0.0, 1.0);
float8 one_sided_p_value_pos = 1.0 - boost::math::cdf(normal_dist, z_stat);
float8 one_sided_p_value_neg = boost::math::cdf(normal_dist, z_stat);
float8 two_sided_p_value = 2 * one_sided_p_value_pos;
float8 one_sided_p_value;
if (one_sided_p_value_pos < HALF) {
one_sided_p_value = one_sided_p_value_pos;
} else {
one_sided_p_value = one_sided_p_value_neg;
}
float8 two_sided_p_value = 2 * one_sided_p_value;
pfree(x_ranks);
pfree(y_ranks);

View File

@ -299,10 +299,99 @@ SELECT corr_s(column_x, column_y, 'COEFFICIENT') FROM null_table3;
.8
(1 row)
create table customers1(customer_id number,id number, cust_last_name varchar2(50));
insert into customers1 values(001,1,'张生');
insert into customers1 values(002,2,'刘生');
insert into customers1 values(001,3,'李生');
select corr_k(customer_id,id,'ONE_SIDED_SIG') from customers1;
corr_k
--------
.5
(1 row)
select corr_k(customer_id,id,'ONE_SIDED_SIG_POS') from customers1;
corr_k
--------
.5
(1 row)
select corr_k(customer_id,id,'ONE_SIDED_SIG_NEG') from customers1;
corr_k
--------
.5
(1 row)
select corr_k(customer_id,id,'TWO_SIDED_SIG') from customers1;
corr_k
--------
1
(1 row)
CREATE TABLE EMP
(EMPNO NUMBER(4) NOT NULL,
ENAME VARCHAR2(10),
JOB VARCHAR2(9),
MGR NUMBER(4),
HIREDATE DATE,
SAL NUMBER(7, 2),
COMM NUMBER(7, 2),
DEPTNO NUMBER(2));
INSERT INTO EMP VALUES
(7369, 'SMITH', 'CLERK', 7902,
TO_DATE('17-DEC-1980', 'DD-MON-YYYY'), 800, NULL, 20);
INSERT INTO EMP VALUES
(7499, 'ALLEN', 'SALESMAN', 7698,
TO_DATE('20-FEB-1981', 'DD-MON-YYYY'), 1600, 300, 30);
INSERT INTO EMP VALUES
(7521, 'WARD', 'SALESMAN', 7698,
TO_DATE('22-FEB-1981', 'DD-MON-YYYY'), 1250, 500, 30);
INSERT INTO EMP VALUES
(7566, 'JONES', 'MANAGER', 7839,
TO_DATE('2-APR-1981', 'DD-MON-YYYY'), 2975, NULL, 20);
INSERT INTO EMP VALUES
(7654, 'MARTIN', 'SALESMAN', 7698,
TO_DATE('28-SEP-1981', 'DD-MON-YYYY'), 1250, 1400, 30);
INSERT INTO EMP VALUES
(7698, 'BLAKE', 'MANAGER', 7839,
TO_DATE('1-MAY-1981', 'DD-MON-YYYY'), 2850, NULL, 30);
INSERT INTO EMP VALUES
(7782, 'CLARK', 'MANAGER', 7839,
TO_DATE('9-JUN-1981', 'DD-MON-YYYY'), 2450, NULL, 10);
INSERT INTO EMP VALUES
(7788, 'SCOTT', 'ANALYST', 7566,
TO_DATE('09-DEC-1982', 'DD-MON-YYYY'), 3000, NULL, 20);
INSERT INTO EMP VALUES
(7839, 'KING', 'PRESIDENT', NULL,
TO_DATE('17-NOV-1981', 'DD-MON-YYYY'), 5000, NULL, 10);
INSERT INTO EMP VALUES
(7844, 'TURNER', 'SALESMAN', 7698,
TO_DATE('8-SEP-1981', 'DD-MON-YYYY'), 1500, 0, 30);
INSERT INTO EMP VALUES
(7876, 'ADAMS', 'CLERK', 7788,
TO_DATE('12-JAN-1983', 'DD-MON-YYYY'), 1100, NULL, 20);
INSERT INTO EMP VALUES
(7900, 'JAMES', 'CLERK', 7698,
TO_DATE('3-DEC-1981', 'DD-MON-YYYY'), 950, NULL, 30);
INSERT INTO EMP VALUES
(7902, 'FORD', 'ANALYST', 7566,
TO_DATE('3-DEC-1981', 'DD-MON-YYYY'), 3000, NULL, 20);
INSERT INTO EMP VALUES
(7934, 'MILLER', 'CLERK', 7782,
TO_DATE('23-JAN-1982', 'DD-MON-YYYY'), 1300, NULL, 10);
SELECT CORR_K(sal, comm, 'COEFFICIENT') coefficient,
CORR_K(sal, comm, 'TWO_SIDED_SIG') two_sided_p_value
FROM EMP;
coefficient | two_sided_p_value
-------------------+-------------------
-.547722557505166 | .264288345226028
(1 row)
drop table test_table;
drop table null_table1;
drop table null_table2;
drop table null_table3;
drop table customers1;
drop table EMP;
drop table t1;
drop schema aggregate CASCADE;
NOTICE: drop cascades to 3 other objects

View File

@ -132,9 +132,78 @@ INSERT INTO null_table3(column_x, column_y) VALUES (51, 39);
INSERT INTO null_table3(column_x, column_y) VALUES (49, 32);
SELECT corr_s(column_x, column_y, 'COEFFICIENT') FROM null_table3;
create table customers1(customer_id number,id number, cust_last_name varchar2(50));
insert into customers1 values(001,1,'张生');
insert into customers1 values(002,2,'刘生');
insert into customers1 values(001,3,'李生');
select corr_k(customer_id,id,'ONE_SIDED_SIG') from customers1;
select corr_k(customer_id,id,'ONE_SIDED_SIG_POS') from customers1;
select corr_k(customer_id,id,'ONE_SIDED_SIG_NEG') from customers1;
select corr_k(customer_id,id,'TWO_SIDED_SIG') from customers1;
CREATE TABLE EMP
(EMPNO NUMBER(4) NOT NULL,
ENAME VARCHAR2(10),
JOB VARCHAR2(9),
MGR NUMBER(4),
HIREDATE DATE,
SAL NUMBER(7, 2),
COMM NUMBER(7, 2),
DEPTNO NUMBER(2));
INSERT INTO EMP VALUES
(7369, 'SMITH', 'CLERK', 7902,
TO_DATE('17-DEC-1980', 'DD-MON-YYYY'), 800, NULL, 20);
INSERT INTO EMP VALUES
(7499, 'ALLEN', 'SALESMAN', 7698,
TO_DATE('20-FEB-1981', 'DD-MON-YYYY'), 1600, 300, 30);
INSERT INTO EMP VALUES
(7521, 'WARD', 'SALESMAN', 7698,
TO_DATE('22-FEB-1981', 'DD-MON-YYYY'), 1250, 500, 30);
INSERT INTO EMP VALUES
(7566, 'JONES', 'MANAGER', 7839,
TO_DATE('2-APR-1981', 'DD-MON-YYYY'), 2975, NULL, 20);
INSERT INTO EMP VALUES
(7654, 'MARTIN', 'SALESMAN', 7698,
TO_DATE('28-SEP-1981', 'DD-MON-YYYY'), 1250, 1400, 30);
INSERT INTO EMP VALUES
(7698, 'BLAKE', 'MANAGER', 7839,
TO_DATE('1-MAY-1981', 'DD-MON-YYYY'), 2850, NULL, 30);
INSERT INTO EMP VALUES
(7782, 'CLARK', 'MANAGER', 7839,
TO_DATE('9-JUN-1981', 'DD-MON-YYYY'), 2450, NULL, 10);
INSERT INTO EMP VALUES
(7788, 'SCOTT', 'ANALYST', 7566,
TO_DATE('09-DEC-1982', 'DD-MON-YYYY'), 3000, NULL, 20);
INSERT INTO EMP VALUES
(7839, 'KING', 'PRESIDENT', NULL,
TO_DATE('17-NOV-1981', 'DD-MON-YYYY'), 5000, NULL, 10);
INSERT INTO EMP VALUES
(7844, 'TURNER', 'SALESMAN', 7698,
TO_DATE('8-SEP-1981', 'DD-MON-YYYY'), 1500, 0, 30);
INSERT INTO EMP VALUES
(7876, 'ADAMS', 'CLERK', 7788,
TO_DATE('12-JAN-1983', 'DD-MON-YYYY'), 1100, NULL, 20);
INSERT INTO EMP VALUES
(7900, 'JAMES', 'CLERK', 7698,
TO_DATE('3-DEC-1981', 'DD-MON-YYYY'), 950, NULL, 30);
INSERT INTO EMP VALUES
(7902, 'FORD', 'ANALYST', 7566,
TO_DATE('3-DEC-1981', 'DD-MON-YYYY'), 3000, NULL, 20);
INSERT INTO EMP VALUES
(7934, 'MILLER', 'CLERK', 7782,
TO_DATE('23-JAN-1982', 'DD-MON-YYYY'), 1300, NULL, 10);
SELECT CORR_K(sal, comm, 'COEFFICIENT') coefficient,
CORR_K(sal, comm, 'TWO_SIDED_SIG') two_sided_p_value
FROM EMP;
drop table test_table;
drop table null_table1;
drop table null_table2;
drop table null_table3;
drop table customers1;
drop table EMP;
drop table t1;
drop schema aggregate CASCADE;