Files
openGauss-OM/script/gspylib/inspection/lib/checkblacklist/GetTableSkew.sql
hangjin2020 a2679f0cf7 om仓迁移
2020-12-16 17:25:24 +08:00

98 lines
3.7 KiB
PL/PgSQL

analyze pg_catalog.pg_class;
analyze pg_catalog.pg_namespace;
analyze pg_catalog.pgxc_class;
analyze pg_catalog.pg_statistic;
--sqlblock
DROP FUNCTION IF EXISTS PUBLIC.pgxc_analyzed_tuples() CASCADE;
--sqlblock
CREATE OR REPLACE FUNCTION PUBLIC.pgxc_analyzed_tuples
(
OUT schemaname text,
OUT tablename text,
OUT dn_name text,
OUT tuples real
)
RETURNS SETOF record
AS $$
DECLARE
datanode_rd record;
fetch_tuples record;
fetch_dn text;
fetch_tuple_str text;
BEGIN
fetch_dn := 'SELECT node_name FROM pg_catalog.pgxc_node WHERE node_type=''D'' order by node_name';
FOR datanode_rd IN EXECUTE(fetch_dn) LOOP
dn_name := datanode_rd.node_name;
fetch_tuple_str := 'EXECUTE DIRECT ON (' || dn_name || ') ''SELECT
n.nspname,
c.relname,
c.reltuples
FROM pg_catalog.pg_class c
INNER JOIN pg_catalog.pg_namespace n ON n.oid = c.relnamespace
where c.oid >16384 AND c.relkind = ''''r'''' and n.nspname <> ''''cstore'''' and n.nspname <> ''''pmk'''' and n.nspname <> ''''pg_catalog''''
''';
FOR fetch_tuples IN EXECUTE(fetch_tuple_str) LOOP
tuples := fetch_tuples.reltuples;
schemaname := fetch_tuples.nspname;
tablename := fetch_tuples.relname;
return next;
END LOOP;
RAISE INFO 'Finished fetching stats info from DataNode % at %',dn_name, clock_timestamp();
END LOOP;
return;
END; $$
LANGUAGE 'plpgsql'
ROWS 1000000;
--sqlblock
DROP VIEW IF EXISTS PUBLIC.pgxc_analyzed_skewness;
--sqlblock
CREATE VIEW PUBLIC.pgxc_analyzed_skewness
AS
SELECT
schemaname,
tablename,
(min(ratio)::numeric(6,3)) AS ratio_min,
(max(ratio)::numeric(6,3) ) AS ratio_max,
(max(ratio) - min(ratio))::numeric(6,3) AS skewness_ratio,
((max(ratio) - min(ratio)) * total_tuples / 100)::numeric(35) as skewness_tuple,
((max(ratio) - min(ratio)) * relwidth * total_tuples / 100)::numeric(35) as skewness_size,
(stddev_samp(ratio)::numeric(6,3)) AS skewness_stddev
FROM
(
WITH udt AS
(
SELECT
n.nspname AS schemaname,
c.relname AS tablename,
relwidth
FROM pg_catalog.pg_class c
INNER JOIN pg_catalog.pg_namespace n ON (n.oid = c.relnamespace)
INNER JOIN (SELECT sum(stawidth) as relwidth, starelid FROM pg_catalog.pg_statistic GROUP BY starelid)s ON s.starelid = c.oid
INNER JOIN pg_catalog.pgxc_class x ON c.oid = x.pcrelid
WHERE x.pclocatortype = 'H' AND c.reltuples > 500
)
SELECT
schemaname,
tablename,
total_tuples,
relwidth,
(round(tuples/total_tuples, 4) * 100)AS ratio
FROM
(
SELECT
t.schemaname,
t.tablename,
t.dn_name,
t.tuples,
relwidth,
sum(tuples) OVER (PARTITION BY t.schemaname, t.tablename) AS total_tuples
FROM PUBLIC.pgxc_analyzed_tuples() t
INNER JOIN udt u on (u.schemaname = t.schemaname and u.tablename = t.tablename)
)
)
GROUP BY schemaname, tablename, total_tuples, relwidth;
--sqlblock
SELECT * FROM PUBLIC.pgxc_analyzed_skewness
WHERE skewness_tuple > 100000
ORDER BY skewness_tuple DESC, skewness_ratio DESC, skewness_size DESC;