From 55b7512b436ff6c3633ff764a7af24ba81ae0d82 Mon Sep 17 00:00:00 2001 From: humengyao Date: Thu, 14 Mar 2024 00:15:01 -0700 Subject: [PATCH] =?UTF-8?q?=E6=94=AF=E6=8C=81gms=5Fstats=E5=8C=85?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- GNUmakefile.in | 1 + build/script/aarch64_opengauss_list | 3 + .../opengauss_release_list_ubuntu_single | 3 + build/script/x86_64_opengauss_list | 3 + contrib/CMakeLists.txt | 2 + contrib/gms_stats/CMakeLists.txt | 21 ++++ contrib/gms_stats/Makefile | 25 ++++ contrib/gms_stats/data/dummy.txt | 1 + contrib/gms_stats/expected/gms_stats.out | 57 +++++++++ contrib/gms_stats/gms_stats--1.0.sql | 42 +++++++ contrib/gms_stats/gms_stats.control | 5 + contrib/gms_stats/gms_stats.cpp | 113 ++++++++++++++++++ contrib/gms_stats/gms_stats.h | 15 +++ contrib/gms_stats/sql/gms_stats.sql | 26 ++++ 14 files changed, 317 insertions(+) create mode 100644 contrib/gms_stats/CMakeLists.txt create mode 100644 contrib/gms_stats/Makefile create mode 100644 contrib/gms_stats/data/dummy.txt create mode 100644 contrib/gms_stats/expected/gms_stats.out create mode 100644 contrib/gms_stats/gms_stats--1.0.sql create mode 100644 contrib/gms_stats/gms_stats.control create mode 100644 contrib/gms_stats/gms_stats.cpp create mode 100644 contrib/gms_stats/gms_stats.h create mode 100644 contrib/gms_stats/sql/gms_stats.sql diff --git a/GNUmakefile.in b/GNUmakefile.in index fa61a14ed..8867d87a6 100644 --- a/GNUmakefile.in +++ b/GNUmakefile.in @@ -99,6 +99,7 @@ install: @if test -d contrib/dolphin; then $(MAKE) -C contrib/dolphin $@; fi @if test -d contrib/age; then $(MAKE) -C contrib/age $@; fi @if test -d contrib/datavec; then $(MAKE) -C contrib/datavec $@; fi + @if test -d contrib/gms_stats; then $(MAKE) -C contrib/gms_stats $@; fi @if test -d contrib/gms_profiler; then $(MAKE) -C contrib/gms_profiler $@; fi +@echo "openGauss installation complete." endif diff --git a/build/script/aarch64_opengauss_list b/build/script/aarch64_opengauss_list index 7d66c8903..0ea08d5ec 100644 --- a/build/script/aarch64_opengauss_list +++ b/build/script/aarch64_opengauss_list @@ -112,6 +112,8 @@ ./share/postgresql/extension/dblink--1.0.sql ./share/postgresql/extension/dblink--unpackaged--1.0.sql ./share/postgresql/extension/dblink.control +./share/postgresql/extension/gms_stats--1.0.sql +./share/postgresql/extension/gms_stats.control ./share/postgresql/extension/gms_profiler--1.0.sql ./share/postgresql/extension/gms_profiler.control ./share/postgresql/timezone/GB-Eire @@ -808,6 +810,7 @@ ./lib/postgresql/dblink.so ./lib/postgresql/pgoutput.so ./lib/postgresql/assessment.so +./lib/postgresql/gms_stats.so ./lib/postgresql/gms_profiler.so ./lib/libpljava.so ./lib/libpq.a diff --git a/build/script/opengauss_release_list_ubuntu_single b/build/script/opengauss_release_list_ubuntu_single index 6911a2989..df3a30224 100644 --- a/build/script/opengauss_release_list_ubuntu_single +++ b/build/script/opengauss_release_list_ubuntu_single @@ -101,6 +101,8 @@ ./share/postgresql/extension/dblink--1.0.sql ./share/postgresql/extension/dblink--unpackaged--1.0.sql ./share/postgresql/extension/dblink.control +./share/postgresql/extension/gms_stats--1.0.sql +./share/postgresql/extension/gms_stats.control ./share/postgresql/extension/gms_profiler--1.0.sql ./share/postgresql/extension/gms_profiler.control ./share/postgresql/timezone/GB-Eire @@ -779,6 +781,7 @@ ./lib/postgresql/java/pljava.jar ./lib/postgresql/postgres_fdw.so ./lib/postgresql/dblink.so +./lib/postgresql/gms_stats.so ./lib/postgresql/gms_profiler.so ./lib/libpljava.so ./lib/libpq.a diff --git a/build/script/x86_64_opengauss_list b/build/script/x86_64_opengauss_list index e407e109b..64a001940 100644 --- a/build/script/x86_64_opengauss_list +++ b/build/script/x86_64_opengauss_list @@ -112,6 +112,8 @@ ./share/postgresql/extension/dblink--1.0.sql ./share/postgresql/extension/dblink--unpackaged--1.0.sql ./share/postgresql/extension/dblink.control +./share/postgresql/extension/gms_stats--1.0.sql +./share/postgresql/extension/gms_stats.control ./share/postgresql/extension/gms_profiler--1.0.sql ./share/postgresql/extension/gms_profiler.control ./share/postgresql/timezone/GB-Eire @@ -806,6 +808,7 @@ ./lib/postgresql/java/pljava.jar ./lib/postgresql/postgres_fdw.so ./lib/postgresql/dblink.so +./lib/postgresql/gms_stats.so ./lib/postgresql/pgoutput.so ./lib/postgresql/assessment.so ./lib/postgresql/gms_profiler.so diff --git a/contrib/CMakeLists.txt b/contrib/CMakeLists.txt index a7917aa1c..bce18b0e6 100644 --- a/contrib/CMakeLists.txt +++ b/contrib/CMakeLists.txt @@ -24,6 +24,7 @@ set(CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/gc_fdw ${CMAKE_CURRENT_SOURCE_DIR}/ndpplugin ${CMAKE_CURRENT_SOURCE_DIR}/spq_plugin + ${CMAKE_CURRENT_SOURCE_DIR}/gms_stats ${CMAKE_CURRENT_SOURCE_DIR}/gms_profiler ) @@ -42,6 +43,7 @@ add_subdirectory(pagehack) add_subdirectory(pg_xlogdump) add_subdirectory(file_fdw) add_subdirectory(log_fdw) +add_subdirectory(gms_stats) if("${ENABLE_MULTIPLE_NODES}" STREQUAL "OFF") add_subdirectory(gc_fdw) endif() diff --git a/contrib/gms_stats/CMakeLists.txt b/contrib/gms_stats/CMakeLists.txt new file mode 100644 index 000000000..ea8a6a719 --- /dev/null +++ b/contrib/gms_stats/CMakeLists.txt @@ -0,0 +1,21 @@ +#This is the main CMAKE for build all gms_stats. +# gms_stats +AUX_SOURCE_DIRECTORY(${CMAKE_CURRENT_SOURCE_DIR} TGT_gms_stats_SRC) +set(TGT_gms_stats_INC + ${PROJECT_OPENGS_DIR}/contrib/gms_stats + ${PROJECT_OPENGS_DIR}/contrib +) + +set(gms_stats_DEF_OPTIONS ${MACRO_OPTIONS}) +set(gms_stats_COMPILE_OPTIONS ${OPTIMIZE_OPTIONS} ${OS_OPTIONS} ${PROTECT_OPTIONS} ${WARNING_OPTIONS} ${LIB_SECURE_OPTIONS} ${CHECK_OPTIONS}) +set(gms_stats_LINK_OPTIONS ${LIB_LINK_OPTIONS}) +add_shared_libtarget(gms_stats TGT_gms_stats_SRC TGT_gms_stats_INC "${gms_stats_DEF_OPTIONS}" "${gms_stats_COMPILE_OPTIONS}" "${gms_stats_LINK_OPTIONS}") +set_target_properties(gms_stats PROPERTIES PREFIX "") + +install(FILES ${CMAKE_CURRENT_SOURCE_DIR}/gms_stats.control + DESTINATION share/postgresql/extension/ +) +install(FILES ${CMAKE_CURRENT_SOURCE_DIR}/gms_stats--1.0.sql + DESTINATION share/postgresql/extension/ +) +install(TARGETS gms_stats DESTINATION lib/postgresql) diff --git a/contrib/gms_stats/Makefile b/contrib/gms_stats/Makefile new file mode 100644 index 000000000..ff09558da --- /dev/null +++ b/contrib/gms_stats/Makefile @@ -0,0 +1,25 @@ +# contrib/gms_stats/Makefile +MODULE_big = gms_stats +OBJS = gms_stats.o + +EXTENSION = gms_stats +DATA = gms_stats--1.0.sql + +REGRESS = gms_stats + +ifdef USE_PGXS +PG_CONFIG = pg_config +PGXS := $(shell $(PG_CONFIG) --pgxs) +include $(PGXS) +else +subdir = contrib/gms_stats +top_builddir = ../.. +include $(top_builddir)/src/Makefile.global +regress_home = $(top_builddir)/src/test/regress +REGRESS_OPTS = -c 0 -d 1 -r 1 -p 25632 --single_node -w --keep_last_data=false \ + --regconf=$(regress_home)/regress.conf \ + --temp-config=$(regress_home)/make_fastcheck_postgresql.conf +include $(top_srcdir)/contrib/contrib-global.mk +endif + +gms_stats.o: gms_stats.cpp diff --git a/contrib/gms_stats/data/dummy.txt b/contrib/gms_stats/data/dummy.txt new file mode 100644 index 000000000..8e09a4f6c --- /dev/null +++ b/contrib/gms_stats/data/dummy.txt @@ -0,0 +1 @@ +The openGauss regression needs this file to run. diff --git a/contrib/gms_stats/expected/gms_stats.out b/contrib/gms_stats/expected/gms_stats.out new file mode 100644 index 000000000..24df6425b --- /dev/null +++ b/contrib/gms_stats/expected/gms_stats.out @@ -0,0 +1,57 @@ +create extension gms_stats; +create schema gms_stats_test; +set search_path=gms_stats_test; +create table normal_table(a int, b char(10)); +insert into normal_table select generate_series(1,500), 'abc'; +create table partition_table(a int) partition by range(a) (partition p1 values less than(100),partition p2 values less than(maxvalue)); +insert into partition_table select generate_series(1,600); +create materialized view mv_tb as select * from normal_table; +select schemaname, tablename, attname, avg_width, most_common_vals, most_common_freqs from pg_stats where schemaname='gms_stats_test' order by tablename, attname; + schemaname | tablename | attname | avg_width | most_common_vals | most_common_freqs +------------+-----------+---------+-----------+------------------+------------------- +(0 rows) + +begin +gms_stats.gather_schema_stats('gms_stats_test'); +end; +/ +NOTICE: PL/SQL procedure successfully completed. +CONTEXT: SQL statement "CALL gms_stats.gather_schema_stats('gms_stats_test')" +PL/pgSQL function inline_code_block line 3 at PERFORM +select schemaname, tablename, attname, avg_width, most_common_vals, most_common_freqs from pg_stats where schemaname='gms_stats_test' order by tablename, attname; + schemaname | tablename | attname | avg_width | most_common_vals | most_common_freqs +----------------+-----------------+---------+-----------+------------------+------------------- + gms_stats_test | mv_tb | a | 4 | | + gms_stats_test | mv_tb | b | 11 | {"abc "} | {1} + gms_stats_test | normal_table | a | 4 | | + gms_stats_test | normal_table | b | 11 | {"abc "} | {1} + gms_stats_test | partition_table | a | 4 | | +(5 rows) + +create table normal_table2(a int, b char(10)); +insert into normal_table2 select generate_series(1,700), 'abc'; +call gms_stats.gather_schema_stats('gms_stats_test'); +NOTICE: PL/SQL procedure successfully completed. + gather_schema_stats +--------------------- + +(1 row) + +select schemaname, tablename, attname, avg_width, most_common_vals, most_common_freqs from pg_stats where schemaname='gms_stats_test' order by tablename, attname; + schemaname | tablename | attname | avg_width | most_common_vals | most_common_freqs +----------------+-----------------+---------+-----------+------------------+------------------- + gms_stats_test | mv_tb | a | 4 | | + gms_stats_test | mv_tb | b | 11 | {"abc "} | {1} + gms_stats_test | normal_table | a | 4 | | + gms_stats_test | normal_table | b | 11 | {"abc "} | {1} + gms_stats_test | normal_table2 | a | 4 | | + gms_stats_test | normal_table2 | b | 11 | {"abc "} | {1} + gms_stats_test | partition_table | a | 4 | | +(7 rows) + +drop schema gms_stats_test cascade; +NOTICE: drop cascades to 4 other objects +DETAIL: drop cascades to table normal_table +drop cascades to table partition_table +drop cascades to materialized view mv_tb +drop cascades to table normal_table2 diff --git a/contrib/gms_stats/gms_stats--1.0.sql b/contrib/gms_stats/gms_stats--1.0.sql new file mode 100644 index 000000000..ad9e008b7 --- /dev/null +++ b/contrib/gms_stats/gms_stats--1.0.sql @@ -0,0 +1,42 @@ +/* contrib/gms_stats/gms_stats--1.0.sql */ + +-- complain if script is sourced in psql, rather than via CREATE EXTENSION +\echo Use "CREATE EXTENSION gms_stats" to load this file. \quit + +-- gms_stats package begin +CREATE SCHEMA gms_stats; +GRANT USAGE ON SCHEMA gms_stats TO PUBLIC; +CREATE TYPE objecttab AS ( + ownname varchar2(30), + objtype varchar2(6), + objname varchar2(30), + partname varchar2(30), + subpartname varchar2(30)); + +CREATE FUNCTION gms_stats.gs_analyze_schema_tables(schemaname varchar2) + RETURNS void +AS 'MODULE_PATHNAME','gs_analyze_schema_tables' +LANGUAGE C VOLATILE NOT FENCED; + +CREATE OR REPLACE PROCEDURE gms_stats.gather_schema_stats( + ownname varchar2, + estimate_percent number default 100, + block_sample boolean default false, + method_opt varchar2 default 'FOR ALL COLUMNS SIZE AUTO', + degree number default null, + granularity varchar2 default 'GLOBAL', + cascade boolean default false, + stattab varchar2 default null, + statid varchar2 default null, + options varchar2 default 'GATHER', + objlist ObjectTab default null, + statown varchar2 default null, + no_invalidate boolean default false, + force boolean default false, + obj_filter_list objecttab default null) IS +BEGIN + perform gms_stats.gs_analyze_schema_tables(ownname); + raise notice 'PL/SQL procedure successfully completed.'; +END; + +-- gms_stats package end diff --git a/contrib/gms_stats/gms_stats.control b/contrib/gms_stats/gms_stats.control new file mode 100644 index 000000000..d83876ad1 --- /dev/null +++ b/contrib/gms_stats/gms_stats.control @@ -0,0 +1,5 @@ +# gms_stats extension +comment = 'collection of stats data for PL/SQL applications' +default_version = '1.0' +module_pathname = '$libdir/gms_stats' +relocatable = true diff --git a/contrib/gms_stats/gms_stats.cpp b/contrib/gms_stats/gms_stats.cpp new file mode 100644 index 000000000..fef7cd591 --- /dev/null +++ b/contrib/gms_stats/gms_stats.cpp @@ -0,0 +1,113 @@ +/* + * Copyright (c) 2024 Huawei Technologies Co.,Ltd. + * + * openGauss is licensed under Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * + * http://license.coscl.org.cn/MulanPSL2 + * + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PSL v2 for more details. + * -------------------------------------------------------------------------------------- + * + * gms_stats.cpp + * gms_stats can effectively estimate statistical data. + * + * + * IDENTIFICATION + * contrib/gms_stats/gms_stats.cpp + * + * -------------------------------------------------------------------------------------- + */ +#include "postgres.h" +#include "funcapi.h" +#include "fmgr.h" + +#include "access/skey.h" +#include "access/heapam.h" +#include "catalog/indexing.h" +#include "commands/sqladvisor.h" +#include "commands/vacuum.h" +#include "executor/spi.h" +#include "lib/stringinfo.h" +#include "utils/builtins.h" +#include "utils/fmgroids.h" +#include "utils/lsyscache.h" +#include "gms_stats.h" + +PG_MODULE_MAGIC; + +PG_FUNCTION_INFO_V1(gs_analyze_schema_tables); + +static List* GetRelationsInSchema(char *namespc) +{ + Relation pg_class_rel = NULL; + ScanKeyData skey[1]; + SysScanDesc sysscan; + HeapTuple tuple; + char* relname; + List* tbl_relnames = NIL; + Oid nspid; + + nspid = get_namespace_oid(namespc, true); + if (!OidIsValid(nspid)) + ereport(ERROR, (errcode(ERRCODE_UNDEFINED_SCHEMA), errmsg("schema \"%s\" does not exists", namespc))); + + ScanKeyInit(&skey[0], Anum_pg_class_relnamespace, BTEqualStrategyNumber, F_OIDEQ, ObjectIdGetDatum(nspid)); + pg_class_rel = heap_open(RelationRelationId, AccessShareLock); + sysscan = systable_beginscan(pg_class_rel, ClassNameNspIndexId, true, SnapshotNow, 1, skey); + while (HeapTupleIsValid(tuple = systable_getnext(sysscan))) { + Form_pg_class reltup = (Form_pg_class)GETSTRUCT(tuple); + if (reltup->relkind == RELKIND_RELATION || reltup->relkind == RELKIND_MATVIEW) { + relname = reltup->relname.data; + tbl_relnames = lappend(tbl_relnames, relname); + } + } + systable_endscan(sysscan); + heap_close(pg_class_rel, AccessShareLock); + return tbl_relnames; +} + +static void analyze_tables(char *namespc, List *relnames_list) +{ + StringInfo execute_sql; + ListCell* lc; + VacuumStmt* stmt; + execute_sql = makeStringInfo(); + foreach(lc, relnames_list) + { + char* relnames = (char*)lfirst(lc); + appendStringInfo(execute_sql, "ANALYZE %s.%s;", quote_identifier(namespc), quote_identifier(relnames)); + + List* parsetree_list = NULL; + ListCell* parsetree_item = NULL; + parsetree_list = raw_parser(execute_sql->data, NULL); + foreach (parsetree_item, parsetree_list) { + Node* parsetree = (Node*)lfirst(parsetree_item); + stmt = (VacuumStmt*)parsetree; + } + vacuum(stmt, InvalidOid, true, NULL, true); + list_free(parsetree_list); + resetStringInfo(execute_sql); + } + DestroyStringInfo(execute_sql); +} + +Datum +gs_analyze_schema_tables(PG_FUNCTION_ARGS) +{ + char *schema_name = text_to_cstring(PG_GETARG_TEXT_P(0)); + List* relnames_list; + + relnames_list = GetRelationsInSchema(schema_name); + analyze_tables(schema_name, relnames_list); + + pfree_ext(schema_name); + list_free(relnames_list); + + PG_RETURN_VOID(); +} + diff --git a/contrib/gms_stats/gms_stats.h b/contrib/gms_stats/gms_stats.h new file mode 100644 index 000000000..5e4c65eab --- /dev/null +++ b/contrib/gms_stats/gms_stats.h @@ -0,0 +1,15 @@ +/*---------------------------------------------------------------------------------------* + * gms_stats.h + * + * Definition about gms_stats package. + * + * IDENTIFICATION + * contrib/gms_stats/gms_stats.h + * + * --------------------------------------------------------------------------------------- + */ +#ifndef GMS_STATS_H +#define GMS_STATS_H + +extern "C" Datum gs_analyze_schema_tables(PG_FUNCTION_ARGS); +#endif diff --git a/contrib/gms_stats/sql/gms_stats.sql b/contrib/gms_stats/sql/gms_stats.sql new file mode 100644 index 000000000..4cc3b86df --- /dev/null +++ b/contrib/gms_stats/sql/gms_stats.sql @@ -0,0 +1,26 @@ +create extension gms_stats; +create schema gms_stats_test; +set search_path=gms_stats_test; +create table normal_table(a int, b char(10)); +insert into normal_table select generate_series(1,500), 'abc'; + +create table partition_table(a int) partition by range(a) (partition p1 values less than(100),partition p2 values less than(maxvalue)); +insert into partition_table select generate_series(1,600); + +create materialized view mv_tb as select * from normal_table; + +select schemaname, tablename, attname, avg_width, most_common_vals, most_common_freqs from pg_stats where schemaname='gms_stats_test' order by tablename, attname; + +begin +gms_stats.gather_schema_stats('gms_stats_test'); +end; +/ +select schemaname, tablename, attname, avg_width, most_common_vals, most_common_freqs from pg_stats where schemaname='gms_stats_test' order by tablename, attname; + +create table normal_table2(a int, b char(10)); +insert into normal_table2 select generate_series(1,700), 'abc'; +call gms_stats.gather_schema_stats('gms_stats_test'); + +select schemaname, tablename, attname, avg_width, most_common_vals, most_common_freqs from pg_stats where schemaname='gms_stats_test' order by tablename, attname; + +drop schema gms_stats_test cascade;