diff --git a/src/bin/pg_config/pg_config.cpp b/src/bin/pg_config/pg_config.cpp index a9ad371e0..bc86cc22b 100644 --- a/src/bin/pg_config/pg_config.cpp +++ b/src/bin/pg_config/pg_config.cpp @@ -292,9 +292,11 @@ static void show_version(int alls) if (alls) { printf("VERSION = "); } -#ifdef ENABLE_MULTIPLE_NODES printf("PostgreSQL 9.2.4\n"); -#else +#ifndef ENABLE_MULTIPLE_NODES + if (alls) { + printf("OPENGAUSS_VERSION = "); + } printf("openGauss 3.0.0\n"); #endif } diff --git a/src/gausskernel/dbmind/tools/anomaly_detection/structure.png b/src/gausskernel/dbmind/tools/anomaly_detection/structure.png deleted file mode 100644 index 8f692355e..000000000 Binary files a/src/gausskernel/dbmind/tools/anomaly_detection/structure.png and /dev/null differ diff --git a/src/gausskernel/dbmind/tools/app/diagnosis/query/__init__.py b/src/gausskernel/dbmind/tools/app/diagnosis/query/__init__.py index 666956fa1..538a1d4be 100644 --- a/src/gausskernel/dbmind/tools/app/diagnosis/query/__init__.py +++ b/src/gausskernel/dbmind/tools/app/diagnosis/query/__init__.py @@ -12,10 +12,19 @@ # See the Mulan PSL v2 for more details. import logging +from dbmind.common.platform import LINUX from dbmind.common.types.root_cause import RootCause + from .slow_sql.analyzer import SlowSQLAnalyzer -_analyzer = SlowSQLAnalyzer() +if LINUX: + from dbmind.common.dispatcher.task_worker import get_mp_sync_manager + + shared_sql_buffer = get_mp_sync_manager().list() +else: + shared_sql_buffer = None + +_analyzer = SlowSQLAnalyzer(buffer=shared_sql_buffer) def diagnose_query(slow_query): @@ -25,3 +34,4 @@ def diagnose_query(slow_query): slow_query.add_cause(RootCause.get('LACK_INFORMATION')) logging.exception(e) return slow_query + diff --git a/src/gausskernel/dbmind/tools/app/diagnosis/query/slow_sql/analyzer.py b/src/gausskernel/dbmind/tools/app/diagnosis/query/slow_sql/analyzer.py index dddc4509d..4cada0ec6 100644 --- a/src/gausskernel/dbmind/tools/app/diagnosis/query/slow_sql/analyzer.py +++ b/src/gausskernel/dbmind/tools/app/diagnosis/query/slow_sql/analyzer.py @@ -93,13 +93,13 @@ class SlowSQLAnalyzer: Classes for diagnosing slow SQL """ - def __init__(self, topk: int = 3, buffer_capacity: int = 500): + def __init__(self, topk: int = 3, buffer_capacity: int = 500, buffer=None): """ :param topk: The number of output root causes :param buffer_capacity: The length of slow SQL buffer queue """ self.topk = topk - self.sql_buffers = [] + self.sql_buffers = buffer if buffer is not None else [] self.buffer_capacity = buffer_capacity def run(self, slow_query_instance: SlowQuery) -> [SlowQuery, None]: @@ -128,13 +128,37 @@ class SlowSQLAnalyzer: self.sql_buffers.append(diagnosed_flag) return False + @staticmethod + def associate_table_with_schema(schema_infos: Dict, query: str, schema_name: str, exist_tables: Dict): + """ + Find schema and table in query, there are the following three situations: + 1. schema.table: We can match out table information based on regularity. + 2. find table information based on pg_class if SLOW_SQL's schema in pg_class(schema_infos). + 3. if the second step is not found, then find out possible table information from pg_class(schema_infos). + """ + regex_result = re.findall(r"([\w\d_]+)\.([\w\d_]+)", query) + if regex_result: + for schema, table in regex_result: + exist_tables[schema].append(table) + query.replace("%s.%s" % (schema, table), ' ') + if schema_name in schema_infos: + for table in schema_infos[schema_name]: + if table in query: + exist_tables[schema_name].append(table) + else: + for schema, tables in schema_infos.items(): + for table in tables: + if table.upper() in query.upper(): + exist_tables[schema].append(table) + return + def _analyze(self, slow_sql_instance: SlowQuery, data_factory: query_info_source.QueryContext, schema_infos: Dict) -> [SlowQuery, None]: """Slow SQL diagnosis main process""" logging.debug(f"[SLOW QUERY] Diagnosing SQL: {slow_sql_instance.query}") exist_tables = defaultdict(list) - if slow_sql_instance.query.upper() == 'COMMIT' or slow_sql_instance.query.upper().startswith('SET'): + if slow_sql_instance.query.strip().upper() == 'COMMIT' or slow_sql_instance.query.strip().upper().startswith('SET'): title = FEATURES_CAUSE_MAPPER.get('C_UNKNOWN') root_cause = RootCause.get(title) slow_sql_instance.add_cause(root_cause) @@ -147,16 +171,8 @@ class SlowSQLAnalyzer: root_cause = RootCause.get(FEATURES_CAUSE_MAPPER.get('C_SQL')) slow_sql_instance.add_cause(root_cause) return - if schema_infos: - query = slow_sql_instance.query - regex_result = re.findall(r"([\w\d_]+)\.([\w\d_]+)", slow_sql_instance.query) - if regex_result: - for schema, table in regex_result: - exist_tables[schema].append(table) - query.replace("%s.%s" % (schema, table), ' ') - for table in schema_infos[slow_sql_instance.schema_name]: - if table in query: - exist_tables[slow_sql_instance.schema_name].append(table) + query = slow_sql_instance.query + self.associate_table_with_schema(schema_infos, query, slow_sql_instance.schema_name, exist_tables) slow_sql_instance.tables_name = exist_tables feature_generator = QueryFeature(slow_sql_instance, data_factory) feature_generator.initialize_metrics() diff --git a/src/gausskernel/dbmind/tools/app/diagnosis/query/slow_sql/query_feature.py b/src/gausskernel/dbmind/tools/app/diagnosis/query/slow_sql/query_feature.py index f7bdccc0d..5c98a524a 100644 --- a/src/gausskernel/dbmind/tools/app/diagnosis/query/slow_sql/query_feature.py +++ b/src/gausskernel/dbmind/tools/app/diagnosis/query/slow_sql/query_feature.py @@ -150,7 +150,9 @@ class QueryFeature: return False live_tuples_list = {f"{item.schema_name}:{item.table_name}": item.live_tuples for item in self.table_structure} - if (fetched_tuples + returned_tuples) / max(live_tuples_list.values()) > _get_threshold('fetch_rate_limit'): + if (fetched_tuples + returned_tuples) / max(live_tuples_list.values()) > _get_threshold( + 'fetch_rate_limit') or (fetched_tuples + returned_tuples) > _get_threshold( + 'fetch_tuples_limit'): self.detail['fetched_tuples'] = fetched_tuples + returned_tuples self.detail['fetched_tuples_rate'] = round( (fetched_tuples + returned_tuples) / max(live_tuples_list.values()), 4) @@ -171,7 +173,7 @@ class QueryFeature: live_tuples_list = {f"{item.schema_name}:{item.table_name}": item.live_tuples for item in self.table_structure} if returned_rows / max(live_tuples_list.values()) > _get_threshold( - 'returned_rate_limit'): + 'returned_rate_limit') or returned_rows > _get_threshold('returned_rows_limit'): self.detail['returned_rows'] = returned_rows self.detail['returned_rows_rate'] = round(returned_rows / max(live_tuples_list.values()), 4) return True @@ -231,7 +233,7 @@ class QueryFeature: live_tuples_list = {f"{item.schema_name}:{item.table_name}": item.live_tuples for item in self.table_structure} if updated_tuples / max(live_tuples_list.values()) > _get_threshold( - 'updated_rate_limit'): + 'updated_rate_limit') or updated_tuples > _get_threshold('updated_tuples_limit'): self.detail['updated_tuples'] = updated_tuples self.detail['updated_tuples_rate'] = round(updated_tuples / max(live_tuples_list.values()), 4) return True @@ -251,7 +253,7 @@ class QueryFeature: live_tuples_list = {f"{item.schema_name}:{item.table_name}": item.live_tuples for item in self.table_structure} if inserted_tuples / max(live_tuples_list.values()) > _get_threshold( - 'inserted_rate_limit'): + 'inserted_rate_limit') or inserted_tuples > _get_threshold('inserted_tuples_limit'): self.detail['inserted_tuples'] = inserted_tuples self.detail['inserted_tuples_rate'] = round(inserted_tuples / max(live_tuples_list.values()), 4) return True @@ -291,7 +293,7 @@ class QueryFeature: live_tuples_list = {f"{item.schema_name}:{item.table_name}": item.live_tuples for item in self.table_structure} if deleted_tuples / max(live_tuples_list.values()) > _get_threshold( - 'deleted_rate_limit'): + 'deleted_rate_limit') or deleted_tuples > _get_threshold('deleted_tuples_limit'): self.detail['deleted_tuples'] = deleted_tuples self.detail['deleted_tuples_rate'] = round(deleted_tuples / max(live_tuples_list.values()), 4) return True diff --git a/src/gausskernel/dbmind/tools/app/diagnosis/query/slow_sql/query_info_source.py b/src/gausskernel/dbmind/tools/app/diagnosis/query/slow_sql/query_info_source.py index 1c9064034..96b13bd41 100644 --- a/src/gausskernel/dbmind/tools/app/diagnosis/query/slow_sql/query_info_source.py +++ b/src/gausskernel/dbmind/tools/app/diagnosis/query/slow_sql/query_info_source.py @@ -18,7 +18,7 @@ from dbmind.common.parser.sql_parsing import is_num, str2int from dbmind.common.utils import ExceptionCatch from dbmind.service import dai -excetpion_catcher = ExceptionCatch(strategy='exit', name='SLOW QUERY') +exception_catcher = ExceptionCatch(strategy='warn', name='SLOW QUERY') class TableStructure: @@ -74,7 +74,7 @@ class SystemInfo: def __init__(self): self.db_host = None self.db_port = None - self.iops = 0.0 + self.iops = 0 self.ioutils = {} self.iocapacity = 0.0 self.iowait = 0.0 @@ -86,7 +86,7 @@ class SystemInfo: class QueryContext: """The object of slow query data processing factory""" - def __init__(self, slow_sql_instance, default_fetch_interval=15, expansion_factor=5, + def __init__(self, slow_sql_instance, default_fetch_interval=15, expansion_factor=8, retrieval_time=5): """ :param slow_sql_instance: The instance of slow query @@ -108,13 +108,13 @@ class QueryContext: logging.debug('[SLOW QUERY] fetch start time: %s, fetch end time: %s', self.query_start_time, self.query_end_time) logging.debug('[SLOW QUERY] fetch interval: %s', self.fetch_interval) - - @excetpion_catcher + @exception_catcher def acquire_pg_class(self) -> Dict: """Get all object information in the database""" pg_class = {} sequences = dai.get_metric_sequence('pg_class_relsize', self.query_start_time, self.query_end_time).from_server( f"{self.slow_sql_instance.db_host}:{self.slow_sql_instance.db_port}").fetchall() + sequences = [sequence for sequence in sequences if sequence.labels] for sequence in sequences: pg_class['db_host'] = self.slow_sql_instance.db_host pg_class['db_port'] = self.slow_sql_instance.db_port @@ -132,7 +132,7 @@ class QueryContext: pg_class[db_name][schema_name].append(table_name) return pg_class - @excetpion_catcher + @exception_catcher def acquire_fetch_interval(self) -> int: """Get data source collection frequency""" sequence = dai.get_latest_metric_sequence("os_disk_iops", self.retrieval_time).from_server( @@ -143,7 +143,7 @@ class QueryContext: self.fetch_interval = int(timestamps[-1]) // 1000 - int(timestamps[-2]) // 1000 return self.fetch_interval - @excetpion_catcher + @exception_catcher def acquire_lock_info(self) -> LockInfo: """Get lock information during slow SQL execution""" blocks_info = LockInfo() @@ -152,6 +152,7 @@ class QueryContext: f"{self.slow_sql_instance.db_host}:{self.slow_sql_instance.db_port}").fetchall() logging.debug('[SLOW QUERY] acquire_lock_info: %s.', locks_sequences) locked_query, locked_query_start, locker_query, locker_query_start = [], [], [], [] + locks_sequences = [sequence for sequence in locks_sequences if sequence.labels] for locks_sequence in locks_sequences: logging.debug('[SLOW QUERY] acquire_lock_info: %s.', locks_sequence) locked_query.append(locks_sequence.labels.get('locked_query', 'Unknown')) @@ -165,7 +166,7 @@ class QueryContext: return blocks_info - @excetpion_catcher + @exception_catcher def acquire_tables_structure_info(self) -> List: """Acquire table structure information related to slow query""" table_structure = [] @@ -224,12 +225,12 @@ class QueryContext: if index_number_info: table_info.index = [item.labels['relname'] for item in index_number_info if item.labels] if redundant_index_info: - table_info.redundant_index = [item.labels['indexrelname'] for item in redundant_index_info] + table_info.redundant_index = [item.labels['indexrelname'] for item in redundant_index_info if item.labels] table_structure.append(table_info) return table_structure - @excetpion_catcher + @exception_catcher def acquire_database_info(self) -> DatabaseInfo: """Acquire table database information related to slow query""" database_info = DatabaseInfo() @@ -262,7 +263,7 @@ class QueryContext: return database_info - @excetpion_catcher + @exception_catcher def acquire_system_info(self) -> SystemInfo: """Acquire system information on the database server """ system_info = SystemInfo() @@ -288,16 +289,23 @@ class QueryContext: self.query_end_time).from_server( f"{self.slow_sql_instance.db_host}").fetchone() logging.debug('[SLOW QUERY] acquire_database_info[mem_usage]: %s.', mem_usage_info) - load_average_info = dai.get_metric_sequence("node_load1", self.query_start_time, self.query_end_time).filter( - instance=f"{self.slow_sql_instance.db_host}:9100").fetchone() + load_average_info = dai.get_metric_sequence("load_average", self.query_start_time, self.query_end_time).from_server( + f"{self.slow_sql_instance.db_host}").fetchone() logging.debug('[SLOW QUERY] acquire_database_info[load_average]: %s.', load_average_info) - system_info.iops = int(max(iops_info.values)) - ioutils_dict = {item.labels['device']: round(float(max(item.values)), 4) for item in ioutils_info} - system_info.ioutils = ioutils_dict - system_info.iocapacity = round(float(max(iocapacity_info.values)), 4) - system_info.iowait = round(float(max(iowait_info.values)), 4) - system_info.cpu_usage = round(float(max(cpu_usage_info.values)), 4) - system_info.mem_usage = round(float(max(mem_usage_info.values)), 4) - system_info.load_average = round(float(max(load_average_info.values)), 4) + if iops_info.values: + system_info.iops = int(max(iops_info.values)) + if ioutils_info: + ioutils_dict = {item.labels['device']: round(float(max(item.values)), 4) for item in ioutils_info if item.labels} + system_info.ioutils = ioutils_dict + if iocapacity_info.values: + system_info.iocapacity = round(float(max(iocapacity_info.values)), 4) + if iowait_info.values: + system_info.iowait = round(float(max(iowait_info.values)), 4) + if cpu_usage_info.values: + system_info.cpu_usage = round(float(max(cpu_usage_info.values)), 4) + if mem_usage_info.values: + system_info.mem_usage = round(float(max(mem_usage_info.values)), 4) + if load_average_info.values: + system_info.load_average = round(float(max(load_average_info.values)), 4) return system_info diff --git a/src/gausskernel/dbmind/tools/app/timed_app.py b/src/gausskernel/dbmind/tools/app/timed_app.py index 8945cd969..28c5dc152 100644 --- a/src/gausskernel/dbmind/tools/app/timed_app.py +++ b/src/gausskernel/dbmind/tools/app/timed_app.py @@ -11,6 +11,7 @@ # MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. # See the Mulan PSL v2 for more details. import logging +from datetime import timedelta, datetime from dbmind import constants from dbmind import global_vars @@ -44,30 +45,6 @@ golden_kpi = list(map( ).split(',') )) - -def quickly_forecast_wrapper(sequence, forecasting_minutes): - forecast_result = quickly_forecast(sequence, forecasting_minutes) - metric_value_range = metric_value_range_map.get(sequence.name) - if metric_value_range and forecast_result: - metric_value_range = metric_value_range.split(",") - try: - metric_value_low = float(metric_value_range[0]) - metric_value_high = float(metric_value_range[1]) - except ValueError as ex: - logging.warning("quickly_forecast_wrapper value error:%s," - " so forecast_result will not be cliped." % ex) - return forecast_result - - f_values = list(forecast_result.values) - for i in range(len(f_values)): - if f_values[i] < metric_value_low: - f_values[i] = metric_value_low - if f_values[i] > metric_value_high: - f_values[i] = metric_value_high - forecast_result.values = tuple(f_values) - return forecast_result - - @timer(detection_interval) def self_monitoring(): # diagnose for slow queries @@ -96,10 +73,19 @@ def forecast_kpi(): ) return + start = datetime.now() - timedelta(minutes=enough_history_minutes) + end = datetime.now() for metric in golden_kpi: - last_sequences = dai.get_latest_metric_sequence(metric, enough_history_minutes).fetchall() + last_sequences = dai.get_metric_sequence(metric, start, end).fetchall() + + try: + metric_value_range = global_vars.metric_value_range_map.get(metric) + lower, upper = map(float, metric_value_range.split(',')) + except Exception: + lower, upper = 0, float("inf") + future_sequences = global_vars.worker.parallel_execute( - quickly_forecast_wrapper, ((sequence, how_long_to_forecast_minutes) + quickly_forecast, ((sequence, how_long_to_forecast_minutes, lower, upper) for sequence in last_sequences) ) detect_materials = list() diff --git a/src/gausskernel/dbmind/tools/cmd/config_utils.py b/src/gausskernel/dbmind/tools/cmd/config_utils.py index e1c518186..a277f8cde 100644 --- a/src/gausskernel/dbmind/tools/cmd/config_utils.py +++ b/src/gausskernel/dbmind/tools/cmd/config_utils.py @@ -17,6 +17,7 @@ from configparser import NoSectionError, NoOptionError from dbmind import constants from dbmind.common import security from dbmind.common.exceptions import InvalidPasswordException, ConfigSettingError +from dbmind.common.security import check_ip_valid, check_port_valid from dbmind.common.utils import write_to_terminal from dbmind.metadatabase.dao.dynamic_config import dynamic_config_get, dynamic_config_set @@ -55,34 +56,54 @@ CONFIG_OPTIONS = { 'LOG-level': ['DEBUG', 'INFO', 'WARNING', 'ERROR'] } +# Used by check_config_validity(). +INTEGER_CONFIG = ['SELF-MONITORING-detection_interval', + 'SELF-MONITORING-last_detection_time', + 'SELF-MONITORING-forecasting_future_time', + 'LOG-maxbytes', + 'LOG-backupcount'] -def check_config_validity(section, option, value): + +def check_config_validity(section, option, value, silent=False): config_item = '%s-%s' % (section, option) # exceptional cases: - if config_item == 'METADATABASE-port': - return True, None + if config_item in ('METADATABASE-port', 'METADATABASE-host'): + if value.strip() == '' or value == NULL_TYPE: + return True, None # normal inspection process: if 'port' in option: - valid_port = str.isdigit(value) and 0 < int(value) <= 65535 + valid_port = check_port_valid(value) if not valid_port: - return False, 'Invalid port %s' % value + return False, 'Invalid port for %s: %s(1024-65535)' % (config_item, value) + if 'host' in option: + valid_host = check_ip_valid(value) + if not valid_host: + return False, 'Invalid IP Address for %s: %s' % (config_item, value) if 'database' in option: if value == NULL_TYPE or value.strip() == '': return False, 'Unspecified database name' - + if config_item in INTEGER_CONFIG: + if not str.isdigit(value) or int(value) <= 0: + return False, 'Invalid value for %s: %s' % (config_item, value) options = CONFIG_OPTIONS.get(config_item) if options and value not in options: - return False, 'Invalid choice: %s' % value + return False, 'Invalid choice for %s: %s' % (config_item, value) - if 'dbtype' in option and value == 'opengauss': + if 'dbtype' in option and value == 'opengauss' and not silent: write_to_terminal( 'WARN: default PostgreSQL connector (psycopg2-binary) does not support openGauss.\n' 'It would help if you compiled psycopg2 with openGauss manually or ' 'created a connection user after setting the GUC password_encryption_type to 1.', color='yellow' ) - + if 'dbtype' in option and value == 'sqlite' and not silent: + write_to_terminal( + 'NOTE: SQLite currently only supports local deployment, so you only need to provide ' + 'METADATABASE-database information. if you provide other information, DBMind will ' + 'ignore them.', + color='yellow' + ) # Add more checks here. return True, None @@ -115,11 +136,28 @@ def load_sys_configs(confile): s2 = dynamic_config_get('dbmind_config', 'cipher_s2') iv = dynamic_config_get('iv_table', '%s-%s' % (section, option)) try: - value = security.decrypt(s1, s2, iv, value.lstrip(ENCRYPTED_SIGNAL)) + real_value = value[len(ENCRYPTED_SIGNAL):] if value.startswith(ENCRYPTED_SIGNAL) else value + value = security.decrypt(s1, s2, iv, real_value) except Exception as e: raise InvalidPasswordException(e) + + else: + valid, reason = check_config_validity(section, option, value, silent=True) + if not valid: + raise ConfigSettingError('DBMind failed to start due to %s.' % reason) + return value + @staticmethod + def getint(section, option, *args, **kwargs): + """Faked getint() for ConfigParser class.""" + value = configs.get(section, option, *args, **kwargs) + valid, reason = check_config_validity(section, option, value, silent=True) + if not valid: + raise ConfigSettingError('DBMind failed to start due to %s.' % reason) + + return int(value) + return ConfigWrapper() diff --git a/src/gausskernel/dbmind/tools/cmd/setup.py b/src/gausskernel/dbmind/tools/cmd/setup.py index b49f3f113..179e88dbf 100644 --- a/src/gausskernel/dbmind/tools/cmd/setup.py +++ b/src/gausskernel/dbmind/tools/cmd/setup.py @@ -23,7 +23,7 @@ from dbmind.cmd.config_utils import ( ) from dbmind.cmd.edbmind import SKIP_LIST from dbmind.common import utils, security -from dbmind.common.exceptions import SetupError, SQLExecutionError +from dbmind.common.exceptions import SetupError, SQLExecutionError, DuplicateTableError from dbmind.metadatabase import ( create_dynamic_config_schema, create_metadatabase_schema, @@ -103,7 +103,8 @@ def initialize_and_check_config(confpath, interactive=False): utils.write_to_terminal('Starting to connect to meta-database and create tables...', color='green') try: create_metadatabase_schema(check_first=False) - except SQLExecutionError: + utils.write_to_terminal('The setup process finished successfully.', color='green') + except DuplicateTableError: utils.write_to_terminal('The given database has duplicate tables. ' 'If you want to reinitialize the database, press [R]. ' 'If you want to keep the existent tables, press [K].', color='red') @@ -117,7 +118,10 @@ def initialize_and_check_config(confpath, interactive=False): create_metadatabase_schema(check_first=True) if input_char == 'K': utils.write_to_terminal('Ignoring...', color='green') - utils.write_to_terminal('The setup process finished successfully.', color='green') + utils.write_to_terminal('The setup process finished successfully.', color='green') + except SQLExecutionError: + utils.write_to_terminal('Failed to link metadatabase due to unknown error, ' + 'please check the database and its configuration.', color='red') def setup_directory_interactive(confpath): diff --git a/src/gausskernel/dbmind/tools/common/algorithm/basic.py b/src/gausskernel/dbmind/tools/common/algorithm/basic.py index 9776ee9b4..b829695b0 100644 --- a/src/gausskernel/dbmind/tools/common/algorithm/basic.py +++ b/src/gausskernel/dbmind/tools/common/algorithm/basic.py @@ -34,7 +34,7 @@ def binary_search(L, target): return -1 -def how_many_lesser_elements(L, target): +def binary_search_leftmost(L, target): """The function bases on finding the leftmost element with binary search. About Binary Search @@ -68,7 +68,18 @@ def how_many_lesser_elements(L, target): return lo -def how_many_larger_elements(L, target): +def binary_search_left(L, target): + """Wrap the function ``how_many_lesser_elements(L, target)`` by adding + a check for return target. + + :return -1 when not found the target target. + """ + lo = binary_search_leftmost(L, target) + return -1 if lo >= len(L) or L[lo] != target else lo + + +def binary_search_rightmost(L, target): + """Similar to above function.""" if len(L) == 0: return -1 # [0, length - 1] @@ -86,18 +97,16 @@ def how_many_larger_elements(L, target): return hi -def binary_search_left(L, target): - """Wrap the function ``how_many_lesser_elements(L, target)`` by adding - a check for return target. - - :return -1 when not found the target target. - """ - lo = how_many_lesser_elements(L, target) - return -1 if lo >= len(L) or L[lo] != target else lo - - def binary_search_right(L, target): - """Similar to above function.""" - hi = how_many_larger_elements(L, target) + hi = binary_search_rightmost(L, target) return -1 if hi < 0 or L[hi] != target else hi + +how_many_lesser_elements = binary_search_leftmost + + +def how_many_larger_elements(L, target): + right_most = binary_search_right(L, target) + if right_most >= 0: + return len(L) - 1 - right_most + return len(L) - binary_search_leftmost(L, target) diff --git a/src/gausskernel/dbmind/tools/common/algorithm/forecasting/arima_model/arima_alg.py b/src/gausskernel/dbmind/tools/common/algorithm/forecasting/arima_model/arima_alg.py index 48d7b5ca4..7ae0a66f3 100644 --- a/src/gausskernel/dbmind/tools/common/algorithm/forecasting/arima_model/arima_alg.py +++ b/src/gausskernel/dbmind/tools/common/algorithm/forecasting/arima_model/arima_alg.py @@ -10,16 +10,34 @@ # EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, # MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. # See the Mulan PSL v2 for more details. - +"""\ +Some snippets and implementations refer to Python library statsmodels (BSD-3 license). +But we cannot import the library directly because this library includes +so many statistical modeling and econometric algorithms that we never use. +On the other hand, this library introduces other heavy dependencies, leading to lightweight DBMind loss. +""" +import itertools from types import SimpleNamespace -import numpy as np import logging -import time + +import numpy as np from numpy import dot, log, zeros, pi from scipy import optimize from scipy import signal from scipy.signal import lfilter + from .arima_common import lagmat, OLS +from ..forcasting_algorithm import ForecastingAlgorithm + +MAX_AR_ORDER = 5 +MAX_MA_ORDER = 5 +K_AR_MIN = K_DIFF_MIN = K_MA_MIN = 0 +K_DIFF_MAX = 2 +MIN_DATA_LENGTH = max(MAX_AR_ORDER, MAX_MA_ORDER) + + +class InvalidParameter(Exception): + pass def _ar_transparams(params): @@ -76,7 +94,6 @@ def _ma_transparams(params): def _ma_invtransparams(macoefs): """ return the inverse of the ma params. - :param params: type->np.array :return invmacoefs: type->np.array """ tmp = macoefs.copy() @@ -91,7 +108,7 @@ def _ma_invtransparams(macoefs): class DummyArray: - """ support __array_interface__ and base""" + """support __array_interface__ and base""" def __init__(self, interface, base=None): self.__array_interface__ = interface @@ -182,15 +199,17 @@ def yule_walker(x_raw, order=1): adj_needed = method == "adjusted" if x_raw.ndim > 1 and x_raw.shape[1] != 1: - raise ValueError("expecting a vector to estimate ar parameters") + raise InvalidParameter("expecting a vector to estimate ar parameters") r_raw = np.zeros(order + 1, np.float64) r_raw[0] = (x_raw ** 2).sum() / num for k in range(1, order + 1): r_raw[k] = (x_raw[0:-k] * x_raw[k:]).sum() / (num - k * adj_needed) r_tope = _toeplitz(r_raw[:-1]) - rho = np.linalg.solve(r_tope, r_raw[1:]) - - return rho + try: + rho = np.linalg.solve(r_tope, r_raw[1:]) + return rho + except np.linalg.LinAlgError as e: + raise InvalidParameter(e) def _arma_impulse_response(new_ar_coeffs, new_ma_coeffs, leads=100): @@ -435,12 +454,15 @@ def _compute_start_ar_ma_coeffs(k_ar, k_ma, y_raw): _x_mat, _y_mat = lagmat(y_raw, ar_order, original="sep") _y_mat = _y_mat[ar_order:] _x_mat = _x_mat[ar_order:] - ols_mod = OLS(_y_mat, _x_mat) + try: + ols_mod = OLS(_y_mat, _x_mat) + except ValueError as e: + raise InvalidParameter(e) ols_res = ols_mod.fit() arcoefs_tmp = ols_res if ar_order + k_ma >= len(y_raw): - raise ValueError("start ar order is not valid") + raise InvalidParameter("start ar order is not valid") lag_endog, lag_resid = _get_lag_data_and_resid(y_raw, ar_order, @@ -480,21 +502,14 @@ def _get_errors(params, raw_data, order): return errors -class ARIMA: - """ARIMA model can forecast series according to history series""" +class ARIMA(ForecastingAlgorithm): + """ARIMA model can forecast series according to historical series""" - def __init__(self, y_raw, order): - """ - :param y_raw: type->np.array - :param order: type->tuple - """ - k_ar, k_diff, k_ma = order - self.order = SimpleNamespace(k_ar=k_ar, k_diff=k_diff, k_ma=k_ma) - y_raw = np.asarray(y_raw) if isinstance(y_raw, (list, tuple)) else y_raw - y_fit = np.diff(y_raw, n=k_diff) - x_fit = np.ones((len(y_fit), 1)) - self.raw_data = SimpleNamespace(x=x_fit, y=y_fit, raw_y=y_raw, k_trend=1) - self.nobs = len(y_fit) - k_ar + def __init__(self): + self.order = None + self.given_data = None + self.once_data = None + self.nobs = None self.is_transparams = True self.resid = None self.params = None @@ -508,8 +523,8 @@ class ARIMA: k_ar, k_ma, k_trend = order start_params = zeros((k_ar + k_ma + k_trend)) - y_raw = np.array(self.raw_data.y, np.float64) - x_raw = self.raw_data.x + y_raw = np.array(self.once_data.y, np.float64) + x_raw = self.once_data.x if k_trend != 0: ols_params = OLS(y_raw, x_raw).fit() start_params[:k_trend] = ols_params @@ -529,10 +544,10 @@ class ARIMA: if k_ar and not np.all(np.abs(np.roots(np.r_[1, -start_params[k_trend:k_trend + k_ar]] )) < 1): - raise ValueError("the ar start coeffs is invalid") + raise InvalidParameter("the ar start coeffs %s is invalid" % k_ar) if k_ma and not np.all(np.abs(np.roots(np.r_[1, start_params[k_trend + k_ar:]] )) < 1): - raise ValueError("the ma start coeffs is invalid") + raise InvalidParameter("the ma start coeffs %s is invalid." % k_ma) return self._invtransparams(start_params) @@ -547,7 +562,7 @@ class ARIMA: newparams = self._transparams(params) else: newparams = params - errors = _get_errors(newparams, self.raw_data, self.order) + errors = _get_errors(newparams, self.once_data, self.order) ssr = np.dot(errors, errors) sigma2 = ssr / nobs @@ -561,7 +576,7 @@ class ARIMA: :return newparams: type->np.array """ k_ar, k_ma = self.order.k_ar, self.order.k_ma - k = self.raw_data.k_trend + k = self.once_data.k_trend newparams = np.zeros_like(params) if k != 0: @@ -581,7 +596,7 @@ class ARIMA: :return newparams: type->np.array """ k_ar, k_ma = self.order.k_ar, self.order.k_ma - k = self.raw_data.k_trend + k = self.once_data.k_trend newparams = start_params.copy() arcoefs = newparams[k:k + k_ar] macoefs = newparams[k + k_ar:] @@ -602,16 +617,61 @@ class ARIMA: def bic(self): """the BIC is for optimal parameters:(p d q)""" nobs = self.nobs - df_model = self.raw_data.k_trend + self.order.k_ar + self.order.k_ma + df_model = self.once_data.k_trend + self.order.k_ar + self.order.k_ma return -2 * self.llf + np.log(nobs) * (df_model + 1) - def fit(self, sequence=None): + def fit(self, sequence): + self.given_data = np.array(sequence.values).astype('float32') + + min_bic = np.inf + optimal_ar = optimal_ma = 0 + + diff_data = np.diff(self.given_data) + # Look for the optimal parameters. + for k_ar, k_diff, k_ma in \ + itertools.product(range(K_AR_MIN, MAX_AR_ORDER + 1, 2), + range(K_DIFF_MIN, K_DIFF_MAX + 1), + range(K_MA_MIN, MAX_MA_ORDER + 1, 2)): + if k_ar == 0 and k_diff == 0 and k_ma == 0: + continue + + try: + self.is_transparams = True + self.fit_once(diff_data, k_ar, k_diff, k_ma) + if not np.isnan(self.bic) and self.bic < min_bic: + min_bic = self.bic + optimal_ar = k_ar + optimal_ma = k_ma + except InvalidParameter: + """Ignore while InvalidParameter occurred.""" + + self.is_transparams = True + try: + self.fit_once(self.given_data, optimal_ar, 1, optimal_ma) + except InvalidParameter: + logging.warning('[ARIMA] Found invalid parameters for forecasting metric %s: ar %d, diff 1, ma %d.', + sequence.name, optimal_ar, optimal_ma, exc_info=True) + self.is_transparams = True + self.fit_once(self.given_data, 2, 1, 0) + + def fit_once(self, y_raw, k_ar, k_diff, k_ma): """ fit trend_coeffs, ar_coeffs, ma_coeffs for ARIMA model. :return None """ - k = self.raw_data.k_trend - nobs = self.raw_data.y.shape[0] + """ + :param y_raw: type->np.array + :param order: type->tuple + """ + self.order = SimpleNamespace(k_ar=k_ar, k_diff=k_diff, k_ma=k_ma) + y_fit = np.array(np.diff(y_raw, n=k_diff)) + x_fit = np.ones((len(y_fit), 1)) + self.once_data = SimpleNamespace(x=x_fit, y=y_fit, raw_y=y_raw, k_trend=1) + self.nobs = len(y_fit) - k_ar + self.params = None + + k = self.once_data.k_trend + nobs = self.once_data.y.shape[0] start_params = self._fit_start_coeffs((self.order.k_ar, self.order.k_ma, k)) def loglike(params, *args): @@ -633,20 +693,14 @@ class ARIMA: :param steps: type->int :return forecast: type->np.array """ - ctime = int(time.time()) - logging.debug("[ARIMA:forecast:%s]: steps:%s, order:%s, coeffs:%s" % - (ctime, steps, self.order, self.params)) - logging.debug("[ARIMA:forecast:%s]: raw_data:%s" % (ctime, self.raw_data.y)) - self.resid = _get_errors(self.params, self.raw_data, self.order).squeeze() + self.resid = _get_errors(self.params, self.once_data, self.order).squeeze() forecast = _arma_predict_out_of_sample(self.params, steps, self.resid, - self.order, self.raw_data) - + self.order, self.once_data) forecast = unintegrate( forecast, unintegrate_levels( - self.raw_data.raw_y[-self.order.k_diff:], + self.once_data.raw_y[-self.order.k_diff:], self.order.k_diff ) )[self.order.k_diff:] - logging.debug("[ARIMA:forecast:%s]: forecast result: %s" % (ctime, forecast)) return forecast diff --git a/src/gausskernel/dbmind/tools/common/algorithm/forecasting/arima_model/arima_common.py b/src/gausskernel/dbmind/tools/common/algorithm/forecasting/arima_model/arima_common.py index 4b9e97ce0..649c6b071 100644 --- a/src/gausskernel/dbmind/tools/common/algorithm/forecasting/arima_model/arima_common.py +++ b/src/gausskernel/dbmind/tools/common/algorithm/forecasting/arima_model/arima_common.py @@ -91,8 +91,6 @@ def lagmat(x_raw, maxlag, trim='forward', original='ex'): lags = lmat[startobs:stopobs, dropidx:] if original == 'sep': leads = lmat[startobs:stopobs, :dropidx] - - if original == 'sep': return lags, leads return lags @@ -129,7 +127,7 @@ def get_k_constant(x_raw): return k_constant -class OLS(): +class OLS: """The OLS can compute linear correlation coefficient about x and y""" def __init__(self, y_raw, x_raw): self._x = np.asarray(x_raw) diff --git a/src/gausskernel/dbmind/tools/common/algorithm/forecasting/forcasting_algorithm.py b/src/gausskernel/dbmind/tools/common/algorithm/forecasting/forcasting_algorithm.py index 501122c3c..b62314f60 100644 --- a/src/gausskernel/dbmind/tools/common/algorithm/forecasting/forcasting_algorithm.py +++ b/src/gausskernel/dbmind/tools/common/algorithm/forecasting/forcasting_algorithm.py @@ -12,53 +12,19 @@ # See the Mulan PSL v2 for more details. import logging -import numpy as np -import itertools from types import SimpleNamespace -from ...types import Sequence -from ..statistics import sequence_interpolate, trim_head_and_tail_nan +import threading +from typing import Union, List + +import numpy as np + from .. import seasonal as seasonal_interface +from ..stat_utils import sequence_interpolate, trim_head_and_tail_nan +from ...types import Sequence +from dbmind.common.utils import dbmind_assert -MAX_AR_ORDER = 5 -MAX_MA_ORDER = 5 -MIN_DATA_LENGTH = max(MAX_AR_ORDER, MAX_MA_ORDER) - - -def estimate_order_of_model_parameters(raw_data, k_ar_min=0, k_diff_min=0, - k_ma_min=0, k_diff_max=0): - """return model type and model order""" - diff_data = np.diff(raw_data) - algorithm_name = "linear" - k_ar_valid, k_ma_valid = 0, 0 - min_bic = np.inf - bic_result_list = [] - for k_ar, k_diff, k_ma in \ - itertools.product(range(k_ar_min, MAX_AR_ORDER + 1), - range(k_diff_min, k_diff_max + 1), - range(k_ma_min, MAX_MA_ORDER + 1)): - if k_ar == 0 and k_diff == 0 and k_ma == 0: - continue - - try: - from .arima_model.arima_alg import ARIMA - - model = ARIMA(diff_data, order=(k_ar, k_diff, k_ma), ) - model.fit() - bic_result = model.bic - bic_result_list.append(bic_result) - if not np.isnan(bic_result) and bic_result < min_bic: - algorithm_name = "arima" - min_bic = bic_result - k_ar_valid = k_ar - k_ma_valid = k_ma - except ValueError: - """Ignore while ValueError occurred.""" - except Exception as e: - logging.warning("Warning occurred when estimate order of model parameters, " - "warning_msg is: %s", e) - order = (k_ar_valid, 1, k_ma_valid) - return algorithm_name, order +LINEAR_THRESHOLD = 0.80 class ForecastingAlgorithm: @@ -68,37 +34,48 @@ class ForecastingAlgorithm: """the subclass should implement, tarin model param""" pass - def forecast(self, forecast_length: int) -> Sequence: + def forecast(self, forecast_length: int) -> Union[List, np.array]: """the subclass should implement, forecast series according history series""" pass class ForecastingFactory: """the ForecastingFactory can create forecast model""" - _CACHE = {} # Reuse an instantiated object. + _CACHE = threading.local() # Reuse an instantiated object. @staticmethod - def get_instance(raw_data) -> ForecastingAlgorithm: - """return forecast model according algorithm_name""" - algorithm_name, order = estimate_order_of_model_parameters(raw_data) - logging.debug('Choose %s algorithm to forecast.', algorithm_name) - if algorithm_name == "linear": - from .simple_forecasting import SimpleLinearFitting - ForecastingFactory._CACHE[algorithm_name] = SimpleLinearFitting() - elif algorithm_name == "arima" or algorithm_name is None: - from .arima_model.arima_alg import ARIMA - ForecastingFactory._CACHE[algorithm_name] = ARIMA(raw_data, order) - else: - raise NotImplementedError(f'Failed to load {algorithm_name} algorithm.') + def _get(algorithm_name): + if not hasattr(ForecastingFactory._CACHE, algorithm_name): + if algorithm_name == 'linear': + from .simple_forecasting import SimpleLinearFitting + setattr(ForecastingFactory._CACHE, algorithm_name, SimpleLinearFitting(avoid_repetitive_fitting=True)) + elif algorithm_name == 'arima': + from .arima_model.arima_alg import ARIMA + setattr(ForecastingFactory._CACHE, algorithm_name, ARIMA()) + else: + raise NotImplementedError(f'Failed to load {algorithm_name} algorithm.') - return ForecastingFactory._CACHE[algorithm_name] + return getattr(ForecastingFactory._CACHE, algorithm_name) + + @staticmethod + def get_instance(sequence) -> ForecastingAlgorithm: + """Return a forecast model according to the feature of given sequence.""" + linear = ForecastingFactory._get('linear') + linear.refit() + linear.fit(sequence) + if linear.r2_score >= LINEAR_THRESHOLD: + logging.debug('Choose linear fitting algorithm to forecast.') + return linear + logging.debug('Choose ARIMA algorithm to forecast.') + return ForecastingFactory._get('arima') def _check_forecasting_minutes(forecasting_minutes): """ - check input params: forecasting_minutes whether is valid. - :param forecasting_minutes: type->int or float + check whether input params forecasting_minutes is valid. + :param forecasting_minutes: int or float :return: None + :exception: raise ValueError if given parameter is invalid. """ check_result = True message = "" @@ -118,66 +95,96 @@ def _check_forecasting_minutes(forecasting_minutes): def decompose_sequence(sequence): seasonal_data = None - raw_data = np.array(list(sequence.values)) - is_seasonal, period = seasonal_interface.is_seasonal_series(raw_data) + raw_data = np.array(sequence.values) + is_seasonal, period = seasonal_interface.is_seasonal_series( + raw_data, + high_ac_threshold=0.5, + min_seasonal_freq=3 + ) if is_seasonal: - decompose_results = seasonal_interface.seasonal_decompose(raw_data, period=period) - seasonal = decompose_results[0] - trend = decompose_results[1] - resid = decompose_results[2] + seasonal, trend, residual = seasonal_interface.seasonal_decompose(raw_data, period=period) train_sequence = Sequence(timestamps=sequence.timestamps, values=trend) train_sequence = sequence_interpolate(train_sequence) seasonal_data = SimpleNamespace(is_seasonal=is_seasonal, seasonal=seasonal, trend=trend, - resid=resid, + resid=residual, period=period) else: train_sequence = sequence return seasonal_data, train_sequence -def compose_sequence(seasonal_data, train_sequence, forecast_length, forecast_data): +def compose_sequence(seasonal_data, train_sequence, forecast_values): + forecast_length = len(forecast_values) if seasonal_data and seasonal_data.is_seasonal: start_index = len(train_sequence) % seasonal_data.period - forecast_data = seasonal_data.seasonal[start_index: start_index + forecast_length] + \ - forecast_data + \ - seasonal_data.resid[start_index: start_index + forecast_length] - forecast_timestamps = [train_sequence.timestamps[-1] + train_sequence.step * (i + 1) - for i in range(int(forecast_length))] - return Sequence(timestamps=forecast_timestamps, values=forecast_data) + seasonal = seasonal_data.seasonal + resid = seasonal_data.resid + dbmind_assert(len(seasonal) == len(resid)) + + if len(seasonal) - start_index < forecast_length: + # pad it. + padding_length = forecast_length - (len(seasonal) - start_index) + seasonal = np.pad(seasonal, (0, padding_length), mode='wrap') + resid = np.pad(resid, (0, padding_length), mode='wrap') + seasonal = seasonal[start_index: start_index + forecast_length] + resid = resid[start_index: start_index + forecast_length] + forecast_values = seasonal + forecast_values + resid + + forecast_timestamps = [train_sequence.timestamps[-1] + train_sequence.step * i + for i in range(1, forecast_length + 1)] + return forecast_timestamps, forecast_values -def quickly_forecast(sequence, forecasting_minutes): +def quickly_forecast(sequence, forecasting_minutes, lower=0, upper=float('inf')): """ - return forecast sequence in forecasting_minutes from raw sequnece + Return forecast sequence in forecasting_minutes from raw sequence. :param sequence: type->Sequence :param forecasting_minutes: type->int or float - :return: forecase sequence: type->Sequence + :param lower: The lower limit of the forecast result + :param upper: The upper limit of the forecast result. + :return: forecast sequence: type->Sequence """ - # 1 check forecasting minutes + if len(sequence) <= 1: + return Sequence() + + # 1. check for forecasting minutes _check_forecasting_minutes(forecasting_minutes) - forecasting_length = int(forecasting_minutes * 60 * 1000 // sequence.step) + forecasting_length = int(forecasting_minutes * 60 * 1000 / sequence.step) if forecasting_length == 0 or forecasting_minutes == 0: return Sequence() - # 2 interpolate - sequence = sequence_interpolate(sequence) + # 2. interpolate + interpolated_sequence = sequence_interpolate(sequence) - # 3 decompose sequence - seasonal_data, train_sequence = decompose_sequence(sequence) + # 3. decompose sequence + seasonal_data, train_sequence = decompose_sequence(interpolated_sequence) - # 4 get model from ForecastingFactory - model = ForecastingFactory.get_instance(list(train_sequence.values)) + # 4. get model from ForecastingFactory + model = ForecastingFactory.get_instance(train_sequence) - # 5 model fit and forecast + # 5. fit and forecast model.fit(train_sequence) + forecast_data = model.forecast(forecasting_length) forecast_data = trim_head_and_tail_nan(forecast_data) + dbmind_assert(len(forecast_data) == forecasting_length) - # 6 compose sequence - forecast_sequence = compose_sequence(seasonal_data, - train_sequence, - forecasting_length, - forecast_data) - return forecast_sequence + # 6. compose sequence + forecast_timestamps, forecast_values = compose_sequence( + seasonal_data, + train_sequence, + forecast_data + ) + + for i in range(len(forecast_values)): + forecast_values[i] = min(forecast_values[i], upper) + forecast_values[i] = max(forecast_values[i], lower) + + return Sequence( + timestamps=forecast_timestamps, + values=forecast_values, + name=sequence.name, + labels=sequence.labels + ) diff --git a/src/gausskernel/dbmind/tools/common/algorithm/forecasting/simple_forecasting.py b/src/gausskernel/dbmind/tools/common/algorithm/forecasting/simple_forecasting.py index 0629b545e..8777d46c9 100644 --- a/src/gausskernel/dbmind/tools/common/algorithm/forecasting/simple_forecasting.py +++ b/src/gausskernel/dbmind/tools/common/algorithm/forecasting/simple_forecasting.py @@ -10,29 +10,90 @@ # EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, # MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. # See the Mulan PSL v2 for more details. - import numpy as np from sklearn.linear_model import LinearRegression +from sklearn.model_selection import train_test_split from sklearn.preprocessing import PolynomialFeatures -from ...types import Sequence +from dbmind.common.types import Sequence from .forcasting_algorithm import ForecastingAlgorithm -def series_to_supervised(sequence: Sequence, test_split=.0, poly_degree=None): +def series_to_supervised(sequence: Sequence, test_size=.1, poly_degree=None, + random_state=None, shuffle=False): x, y = sequence.to_2d_array() - length = sequence.length - test_length = int(length * test_split) - x_train, x_test = x[:length - test_length], x[length - test_length:] - y_train, y_test = y[:length - test_length], y[length - test_length:] + x_train, x_test, y_train, y_test = train_test_split( + x, y, test_size=test_size, shuffle=shuffle, random_state=random_state + ) + if poly_degree: poly = PolynomialFeatures(degree=poly_degree).fit(x) x_train = poly.transform(x_train) x_test = poly.transform(x_test) + return x_train, x_test, y_train, y_test class SimpleLinearFitting(ForecastingAlgorithm): + def __init__(self, avoid_repetitive_fitting=False): + self._a = None + self._b = None + self._r2 = None + self._last_x = None + self._step = None + self._fitted = False + self._avoid_repetitive_fitting = avoid_repetitive_fitting + + def refit(self): + self._fitted = False + + def fit(self, sequence: Sequence): + # `self._fitted` is a flag to control whether performing the fitting process because + # this fitting algorithm can estimate the linear degree. And if the class has + # estimated a sequence, it should not fit one more. So, we use this flag to + # prevent fitting again. + if self._avoid_repetitive_fitting and self._fitted: + return + + if sequence.length < 2: + raise ValueError('Unable to fit the sequence due to short length.') + + n = len(sequence) + sx = sy = sxx = syy = sxy = 0 + # timestamp acts x-axis, values acts y-axis. + for t, v in sequence: + sx += t + sy += v + sxx += t * t + syy += v * v + sxy += t * v + a = (sy * sx / n - sxy) / (sx * sx / n - sxx) + b = (sy - a * sx) / n + numerator = syy + a * a * sxx + b * b * n + 2 * a * b * sx - 2 * a * sxy - 2 * b * sy + denominator = syy - sy * sy / n + 1e-9 + r2 = 1 - numerator / denominator + + self._a = a + self._b = b + self._r2 = r2 + self._last_x = sequence.timestamps[-1] + self._step = sequence.step + self._fitted = True + + def forecast(self, forecast_length): + future = [] + for i in range(1, forecast_length + 1): + t = self._last_x + i * self._step + v = self._a * t + self._b + future.append(v) + return future + + @property + def r2_score(self): + return self._r2 + + +class SimpleLinearRegression(ForecastingAlgorithm): def __init__(self): self.model = LinearRegression(copy_X=False) self.interval = None @@ -46,7 +107,6 @@ class SimpleLinearFitting(ForecastingAlgorithm): self.interval = x[1] - x[0] self.last_x = x[-1] x = np.reshape(x, newshape=(-1, 1)) - self.model.fit(x, y) def forecast(self, forecast_length): @@ -91,5 +151,4 @@ class SupervisedModel(ForecastingAlgorithm): if self.bias: bias = y_pred.flatten()[0] - self.sequence.values[-1] y_pred -= bias - return Sequence(timestamps=x_pred.flatten().tolist(), - values=y_pred.flatten().tolist()) + return y_pred.flatten().tolist() diff --git a/src/gausskernel/dbmind/tools/common/algorithm/seasonal.py b/src/gausskernel/dbmind/tools/common/algorithm/seasonal.py index 655c85c5a..da55da9e2 100644 --- a/src/gausskernel/dbmind/tools/common/algorithm/seasonal.py +++ b/src/gausskernel/dbmind/tools/common/algorithm/seasonal.py @@ -13,18 +13,22 @@ import numpy as np from scipy import signal -from .statistics import trim_head_and_tail_nan +from .stat_utils import trim_head_and_tail_nan import warnings + warnings.filterwarnings("ignore") -def acf(x_raw: np, nlags=None): - """the acf can compute correlation from x[t] and x[t -k]""" - x_raw = np.array(x_raw) - x_diff = x_raw - x_raw.mean() - n_x = len(x_raw) - d_param = n_x * np.ones(2 * n_x - 1) - acov = np.correlate(x_diff, x_diff, "full")[n_x - 1:] / d_param[n_x - 1:] - return acov[: nlags + 1] / acov[0] + +def acf(x_raw: np.array, nlags=None): + x = np.array(x_raw) + n = x.shape[0] + if nlags is None: + nlags = min(int(10 * np.log10(n)), n - 1) + + x_diff = x - x.mean() + avf = np.correlate(x_diff, x_diff, "full")[n - 1:] / n + res = avf[: nlags + 1] / avf[0] + return res def _padding_nans(x_raw, trim_head=None, trim_tail=None): @@ -41,50 +45,47 @@ def _padding_nans(x_raw, trim_head=None, trim_tail=None): return result -def _get_trend(x_raw, filt): - """"use filt to extract trend component""" - trim_head = int(np.ceil(len(filt) / 2.) - 1) or None - trim_tail = int(np.ceil(len(filt) / 2.) - len(filt) % 2) or None - result = signal.convolve(x_raw, filt, mode='valid') +def _get_trend(x_raw, filter_): + """"use the filter to extract trend component""" + length = len(filter_) + trim_tail = (length - 1) // 2 or None + trim_head = length - 1 - trim_tail or None + result = signal.convolve(x_raw, filter_, mode='valid') result = _padding_nans(result, trim_head, trim_tail) - return result def is_seasonal_series(s_values, high_ac_threshold: float = 0.7, min_seasonal_freq=3): - """judge series whether is seasonal with acf alg""" - result = False - period = None + """Judge whether the series is seasonal by using the acf alg""" s_ac = acf(s_values, nlags=len(s_values)) diff_ac = np.diff(s_ac) - high_ac_peak_pos = (1 + np.argwhere((diff_ac[:-1] > 0) & (diff_ac[1:] < 0) - & (s_ac[1: -1] > high_ac_threshold)).flatten()) + high_ac_peak_pos = 1 + np.argwhere( + (diff_ac[:-1] > 0) & (diff_ac[1:] < 0) & (s_ac[1: -1] > high_ac_threshold) + ).flatten() for i in high_ac_peak_pos: if i > min_seasonal_freq: - period = high_ac_peak_pos[np.argmax(s_ac[high_ac_peak_pos])] - result = True - break - return result, period + return True, high_ac_peak_pos[np.argmax(s_ac[high_ac_peak_pos])] + + return False, None -def get_seasonal_period(s_values, high_ac_threshold: float = 0.5): +def get_seasonal_period(s_values, high_ac_threshold: float = 0.5, min_seasonal_freq=3): """"return seasonal period""" - result = is_seasonal_series(s_values, high_ac_threshold) - return result[1] + return is_seasonal_series(s_values, high_ac_threshold, min_seasonal_freq)[1] -def _get_filt(period): +def _get_filter(period): """the filter to extract trend component""" if period % 2 == 0: - filt = np.array([.5] + [1] * (period - 1) + [.5]) / period + filter_ = np.array([.5] + [1] * (period - 1) + [.5]) / period else: - filt = np.repeat(1. / period, period) - return filt + filter_ = np.repeat(1. / period, period) + return filter_ def _get_seasonal(x_raw, detrended, period): - """"return seasonal component from x_raw, detrended and period""" + """"return the seasonal component from x_raw, detrended and period""" nobs = len(x_raw) period_averages = np.array([np.nanmean(detrended[i::period]) for i in range(period)]) period_averages -= np.mean(period_averages, axis=0) @@ -94,23 +95,22 @@ def _get_seasonal(x_raw, detrended, period): def seasonal_decompose(x_raw, period=None): - """seasonal series can decompose three component: trend, seasonal, resid""" - pfreq = period + """decompose a series into three components: seasonal, trend, residual""" if np.ndim(x_raw) > 1: - raise ValueError("x ndim > 1 not implemented") + raise ValueError("The input data must be 1-D array.") if period is None: - raise ValueError("preiod must not None") + raise ValueError("You must specify a period.") if not np.all(np.isfinite(x_raw)): - raise ValueError("the x has no valid values") + raise ValueError("The input data has infinite value or nan.") + + if x_raw.shape[0] < 2 * period: + raise ValueError(f"The input data should be longer than two periods:{2 * period} at least") - if x_raw.shape[0] < 2 * pfreq: - raise ValueError(f"the x length:{x_raw.shape[0]} not meet 2 preiod:{2 * pfreq}") x_raw = trim_head_and_tail_nan(x_raw) - filt = _get_filt(period) - trend = _get_trend(x_raw, filt) + trend = _get_trend(x_raw, _get_filter(period)) trend = trim_head_and_tail_nan(trend) detrended = x_raw - trend diff --git a/src/gausskernel/dbmind/tools/common/algorithm/stat_utils.py b/src/gausskernel/dbmind/tools/common/algorithm/stat_utils.py new file mode 100644 index 000000000..218c95ac9 --- /dev/null +++ b/src/gausskernel/dbmind/tools/common/algorithm/stat_utils.py @@ -0,0 +1,229 @@ +# Copyright (c) 2020 Huawei Technologies Co.,Ltd. +# +# openGauss is licensed under Mulan PSL v2. +# You can use this software according to the terms and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# +# http://license.coscl.org.cn/MulanPSL2 +# +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. + +from types import SimpleNamespace + +import numpy as np +from scipy.interpolate import interp1d + +from dbmind.common.types import Sequence + + +def double_padding(values, window): + left_idx = window - 1 - (window - 1) // 2 + right_idx = len(values) - 1 - (window - 1) // 2 + values[:left_idx] = values[left_idx] # padding left + values[right_idx + 1:] = values[right_idx] # padding right + return values + + +def np_shift(values, shift_distance=1): + """shift values a shift_distance""" + if len(values) < 2: + return values + shifted_values = np.roll(values, shift_distance) + for i in range(shift_distance): + shifted_values[i] = shifted_values[shift_distance] + return shifted_values + + +def np_moving_avg(values, window=5, mode="same"): + """Computes the moving average for sequence + and returns a new sequence padded with valid + value at both ends. + """ + moving_avg_values = np.convolve(values, np.ones((window,)) / window, mode=mode) + moving_avg_values = double_padding(moving_avg_values, window) + return moving_avg_values + + +def np_moving_std(values, window=10): + """Computes the standard deviation for sequence + and returns a new sequence padded with valid + value at both ends. + """ + sequence_length = len(values) + moving_std_values = np.zeros(sequence_length) + left_idx = window - 1 - (window - 1) // 2 + for i in range(sequence_length - window + 1): + moving_std_values[left_idx+i] = np.std(values[i:i + window]) + + moving_std_values = double_padding(moving_std_values, window) + return moving_std_values + + +def np_double_rolling(values, window1=5, window2=1, diff_mode="diff"): + values_length = len(values) + window1 = 1 if values_length < window1 else window1 + window2 = 1 if values_length < window2 else window2 + + left_rolling = np_moving_avg(np_shift(values), window=window1) + right_rolling = np_moving_avg(values[::-1], window=window2)[::-1] + r_data = right_rolling - left_rolling + + functions = { + 'abs': lambda x: np.abs(x), + 'rel': lambda x: x / left_rolling + } + methods = diff_mode.split('_')[:-1] + for method in methods: + r_data = functions[method](r_data) + + r_data = double_padding(r_data, max(window1, window2)) + return r_data + + +def measure_head_and_tail_nan(data): + data_not_nan = -1 * np.isnan(data) + left = data_not_nan.argmax() + right = data_not_nan[::-1].argmax() + return left, right + + +def trim_head_and_tail_nan(data): + """ + when there are nan value at head or tail of forecast_data, + this function will fill value with near value + :param data: type->np.array or list + :return data: type->same type as the input 'data' + """ + length = len(data) + if length == 0: + return data + + data_not_nan = np.isnan(data) + if data_not_nan.all(): + data[:] = [0] * length + return data + + left, right = measure_head_and_tail_nan(data) + + data[:left] = [data[left]] * left + data[length - right:] = [data[length - right - 1]] * right + return data + + +def _valid_value(v): + return not (np.isnan(v) or np.isinf(v)) + + +def _init_interpolate_param(sequence): + """"init interpolate param for sequence_interpolate function""" + length = len(sequence) + if length == 0: + return sequence + + x = np.array(range(len(sequence))) + y = np.array(sequence.values) + left, right = measure_head_and_tail_nan(y) + na_param = SimpleNamespace(head_na_index=range(left), tail_na_index=range(length-right, length), + head_start_nona_value=y[left], + tail_start_nona_value=y[length-right-1]) + return x[left:length - right], y[left:length - right], na_param + + +def tidy_up_sequence(sequence): + """Fill up missing values for sequence and + align sequence's timestamps. + """ + if sequence.step <= 0: + return sequence + + def estimate_error(a, b): + return (a - b) / b + + timestamps = list(sequence.timestamps) + values = list(sequence.values) + + i = 1 + while i < len(timestamps): + real_interval = timestamps[i] - timestamps[i - 1] + error = estimate_error(real_interval, sequence.step) + if error < 0: + # This is because the current timestamp is lesser than the previous one. + # We should remove one to keep monotonic. + if not _valid_value(values[i - 1]): + values[i - 1] = values[i] + timestamps.pop(i) + values.pop(i) + i -= 1 # We have removed an element so we have to decrease the cursor. + elif error == 0: + """Everything is normal, skipping.""" + elif 0 < error < 1: + # Align the current timestamp. + timestamps[i] = timestamps[i - 1] + sequence.step + else: + # Fill up missing value with NaN. + next_ = timestamps[i - 1] + sequence.step + timestamps.insert(i, next_) + values.insert(i, float('nan')) + i += 1 + + return Sequence(timestamps, values) + + +def sequence_interpolate(sequence: Sequence, fit_method="cubic", strip_details=True): + """interpolate with scipy interp1d""" + filled_sequence = tidy_up_sequence(sequence) + has_defined = [_valid_value(v) for v in filled_sequence.values] + + if all(has_defined): + if strip_details: + return filled_sequence + else: + return Sequence( + timestamps=filled_sequence.timestamps, + values=filled_sequence.values, + name=sequence.name, + step=sequence.step, + labels=sequence.labels + ) + + if True not in has_defined: + raise ValueError("All of sequence values are undefined.") + + y_raw = np.array(filled_sequence.values) + y_nona = [] + x_nona = [] + na_index = [] + + x_new, y_new, na_param = _init_interpolate_param(filled_sequence) + + # prepare x_nona and y_nona for interp1d + for i in range(len(y_new)): + if _valid_value(y_new[i]): + y_nona.append(y_new[i]) + x_nona.append(x_new[i]) + else: + na_index.append(i) + + fit_func = interp1d(x_nona, y_nona, kind=fit_method) + y_new = fit_func(x_new) + + # replace the nan with interp1d value for raw y + for i in na_index: + raw_index = i + len(na_param.head_na_index) + y_raw[raw_index] = y_new[i] + + y_raw[na_param.head_na_index] = na_param.head_start_nona_value + y_raw[na_param.tail_na_index] = na_param.tail_start_nona_value + if strip_details: + return Sequence(timestamps=filled_sequence.timestamps, values=y_raw) + else: + return Sequence( + timestamps=filled_sequence.timestamps, + values=y_raw, + name=sequence.name, + step=sequence.step, + labels=sequence.labels + ) diff --git a/src/gausskernel/dbmind/tools/common/algorithm/statistics.py b/src/gausskernel/dbmind/tools/common/algorithm/statistics.py deleted file mode 100644 index 3eb18b684..000000000 --- a/src/gausskernel/dbmind/tools/common/algorithm/statistics.py +++ /dev/null @@ -1,200 +0,0 @@ -# Copyright (c) 2020 Huawei Technologies Co.,Ltd. -# -# openGauss is licensed under Mulan PSL v2. -# You can use this software according to the terms and conditions of the Mulan PSL v2. -# You may obtain a copy of Mulan PSL v2 at: -# -# http://license.coscl.org.cn/MulanPSL2 -# -# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, -# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, -# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. -# See the Mulan PSL v2 for more details. - -from types import SimpleNamespace -import numpy as np -from scipy.interpolate import interp1d -from dbmind.common.types import Sequence - - -def np_quantile(values, quantile): - """return the quantile of values""" - return np.nanpercentile(values, quantile) - - -def np_shift(values, shift_distance=1): - """shift values a shift_distance""" - shifted_values = np.roll(values, shift_distance) - for i in range(shift_distance): - shifted_values[i] = shifted_values[shift_distance] - return shifted_values - - -def np_moving_avg(values, window=5, mode="same"): - """Compute the moving average for sequence - and create a new sequence as the return value.""" - moving_avg_values = np.convolve(values, np.ones((window,)) / window, mode=mode) - start_idx = len(values) - window - moving_avg_values[start_idx:] = moving_avg_values[start_idx] # padding other remaining value - return moving_avg_values - - -def np_moving_std(values, window=10): - """Compute and return the standard deviation for sequence.""" - sequence_length = len(values) - calculation_length = sequence_length - window - moving_std_values = [np.std(values[i:i + window]) for i in range(calculation_length)] - # padding - for _ in range(window): - moving_std_values.append(moving_std_values[-1]) - - return np.array(moving_std_values) - - -def np_double_rolling(values, agg="mean", window1=5, window2=1, diff_mode="diff"): - """double rolling the values""" - if agg == "mean": - left_rolling = np_moving_avg(np_shift(values), window=window1) - right_rolling = np_moving_avg(values[::-1], window=window2)[::-1] - elif agg == "std": - left_rolling = np_moving_std(np_shift(values), window=window1) - right_rolling = np_moving_std(values[::-1], window=window2)[::-1] - else: - return values - diff_mode_map = { - "diff": (right_rolling - left_rolling), - "abs_diff": np.abs(right_rolling - left_rolling), - "rel_diff": (right_rolling - left_rolling) / left_rolling, - "abs_rel_diff": np.abs(right_rolling - left_rolling) / left_rolling - } - r_data = diff_mode_map.get(diff_mode) - values_length = len(values) - window = max(window1, window2) - tail_length = int(window / 2) - for i in range(tail_length): - r_data[values_length - i - 1] = r_data[values_length - tail_length - 1] - return r_data - - -def trim_head_and_tail_nan(data): - """ - when there are nan value at head or tail of forecast_data, - this function will fill value with near value - :param data: type->np.array - :return:data: type->np.array - """ - head_start_nona_value = 0 - head_na_index = [] - tail_start_nona_value = 0 - tail_na_index = [] - - if len(data) == 0: - return data - - for i in range(len(data)): - if not np.isnan(data[0]): - break - if not np.isnan(data[i]): - head_start_nona_value = data[i] - break - else: - head_na_index.append(i) - - for i in range(len(data) - 1, 1, -1): - if not np.isnan(data[-1]): - break - if not np.isnan(data[i]): - tail_start_nona_value = data[i] - break - else: - tail_na_index.append(i) - - for i in head_na_index: - data[i] = head_start_nona_value - - for i in tail_na_index: - data[i] = tail_start_nona_value - - return data - - -def _init_interpolate_param(sequence): - """"init interpolate param for sequence_interpolate function""" - x_raw = np.array(list(range(len(sequence.timestamps)))) - y_raw = np.array(sequence.values) - head_na_index = [] - head_start_nona_value = None - tail_na_index = [] - tail_start_nona_value = None - x_new = list(x_raw) - y_new = list(y_raw) - - #init head_start_nona_value, head_na_index - for i in range(len(y_raw)): - if not np.isnan(y_raw[0]): - break - if not np.isnan(y_raw[i]): - head_start_nona_value = y_raw[i] - break - else: - head_na_index.append(i) - - #init tail_start_nona_value, tail_na_index - for i in range(len(y_raw) - 1, 1, -1): - if not np.isnan(y_raw[-1]): - break - if not np.isnan(y_raw[i]): - tail_start_nona_value = y_raw[i] - break - else: - tail_na_index.append(i) - - #pop the nan from head and tail of data - for i in range(len(head_na_index)): - x_new.pop(0) - y_new.pop(0) - - for i in range(len(tail_na_index)): - x_new.pop(-1) - y_new.pop(-1) - - na_param = SimpleNamespace(head_na_index=head_na_index, tail_na_index=tail_na_index, - head_start_nona_value=head_start_nona_value, - tail_start_nona_value=tail_start_nona_value) - return x_new, y_new, na_param - - -def sequence_interpolate(sequence: Sequence, fit_method="cubic"): - """interpolate with scipy interp1d""" - nan_exist_result = [True if not np.isnan(i) else False for i in sequence.values] - if all(nan_exist_result): - return sequence - if True not in nan_exist_result: - raise ValueError("sequence values are all nan") - - y_raw = np.array(sequence.values) - y_nona = [] - x_nona = [] - na_index = [] - - x_new, y_new, na_param = _init_interpolate_param(sequence) - - #prepare x_nona and y_nona for interp1d - for i in range(len(y_new)): - if not np.isnan(y_new[i]): - y_nona.append(y_new[i]) - x_nona.append(x_new[i]) - else: - na_index.append(i) - - fit_func = interp1d(x_nona, y_nona, kind=fit_method) - y_new = fit_func(x_new) - - #replace the nan with interp1d value for raw y - for i in na_index: - raw_index = i + len(na_param.head_na_index) - y_raw[raw_index] = y_new[i] - - y_raw[na_param.head_na_index] = na_param.head_start_nona_value - y_raw[na_param.tail_na_index] = na_param.tail_start_nona_value - return Sequence(timestamps=sequence.timestamps, values=y_raw) diff --git a/src/gausskernel/dbmind/tools/common/dispatcher/task_scheduler.py b/src/gausskernel/dbmind/tools/common/dispatcher/task_scheduler.py index 9123e3100..d1f58c6ff 100644 --- a/src/gausskernel/dbmind/tools/common/dispatcher/task_scheduler.py +++ b/src/gausskernel/dbmind/tools/common/dispatcher/task_scheduler.py @@ -29,7 +29,7 @@ class RepeatedTimer(Thread): self._args = args self._kwargs = kwargs self._finished = Event() - Thread.__init__(self) + Thread.__init__(self, daemon=True) def run(self): while not self._finished.is_set(): diff --git a/src/gausskernel/dbmind/tools/common/dispatcher/task_worker.py b/src/gausskernel/dbmind/tools/common/dispatcher/task_worker.py index 60797d97a..18bde284f 100644 --- a/src/gausskernel/dbmind/tools/common/dispatcher/task_worker.py +++ b/src/gausskernel/dbmind/tools/common/dispatcher/task_worker.py @@ -11,14 +11,13 @@ # MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. # See the Mulan PSL v2 for more details. +import concurrent import logging import os import signal from abc import ABC, abstractmethod -from concurrent.futures.process import ProcessPoolExecutor from concurrent.futures import as_completed, wait -import concurrent -from multiprocessing import Event +from concurrent.futures.process import ProcessPoolExecutor from dbmind.common import utils from dbmind.common.platform import WIN32 @@ -26,6 +25,8 @@ from dbmind.common.platform import WIN32 IN_PROCESS = 'DBMind [Worker Process] [IN PROCESS]' PENDING = 'DBMind [Worker Process] [IDLE]' +_mp_sync_mgr_instance = None + def _initializer(): signal.signal(signal.SIGTERM, signal.SIG_IGN) @@ -77,19 +78,44 @@ class AbstractWorker(ABC): self.status = self.CLOSED +def get_mp_sync_manager(): + global _mp_sync_mgr_instance + + from multiprocessing.managers import DictProxy, SyncManager + from collections import defaultdict + + class MPSyncManager(SyncManager): + __proc_title__ = 'DBMind [SyncManager Process]' + + @staticmethod + def _initializer(): + utils.set_proc_title(MPSyncManager.__proc_title__) + + def start(self): + super().start(initializer=MPSyncManager._initializer) + + MPSyncManager.register('defaultdict', defaultdict, DictProxy) + if not _mp_sync_mgr_instance: + _mp_sync_mgr_instance = MPSyncManager() + _mp_sync_mgr_instance.start() + return _mp_sync_mgr_instance + + class _ProcessPoolExecutor(ProcessPoolExecutor): @staticmethod def _wait_for_notify(event): + # Set the status of the current work process. + _initializer() event.wait() - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) + def __init__(self, worker_num): + super().__init__(worker_num) # Make the process pool is a fixed process pool, which creates many idle processes and waits for the # scheduler's task. Why not use lazy-loading mode? Because the worker process forked from the master process, # the master process maybe have some running backend threads while forking. This action will cause unexpected # behaviors, such as timed backend threads also being forked and run in the child process. - event = Event() + event = get_mp_sync_manager().Event() for _ in range(self._max_workers): self.submit(self._wait_for_notify, event) event.set() @@ -121,7 +147,7 @@ class ProcessWorker(AbstractWorker): from concurrent.futures.thread import ThreadPoolExecutor self.pool = ThreadPoolExecutor(worker_num) else: - self.pool = _ProcessPoolExecutor(worker_num, initializer=_initializer) + self.pool = _ProcessPoolExecutor(worker_num) super().__init__(worker_num) @@ -167,7 +193,7 @@ class ProcessWorker(AbstractWorker): self.pool.shutdown(True, cancel_futures=cancel_futures) -def get_worker_instance(_type, process_num, hosts=None) -> AbstractWorker: +def get_worker_instance(_type, process_num) -> AbstractWorker: if _type == 'local': return ProcessWorker(process_num) elif _type == 'dist': diff --git a/src/gausskernel/dbmind/tools/common/exceptions.py b/src/gausskernel/dbmind/tools/common/exceptions.py index 6c9843143..f23db1520 100644 --- a/src/gausskernel/dbmind/tools/common/exceptions.py +++ b/src/gausskernel/dbmind/tools/common/exceptions.py @@ -33,3 +33,6 @@ class SQLExecutionError(Exception): class ConfigSettingError(Exception): pass + +class DuplicateTableError(Exception): + pass diff --git a/src/gausskernel/dbmind/tools/common/security.py b/src/gausskernel/dbmind/tools/common/security.py index d7f47ec4b..8f264ba2c 100644 --- a/src/gausskernel/dbmind/tools/common/security.py +++ b/src/gausskernel/dbmind/tools/common/security.py @@ -15,6 +15,7 @@ import hmac import random import secrets import string +import re from Crypto.Cipher import AES from Crypto.Util.Padding import pad, unpad @@ -35,6 +36,25 @@ def check_path_valid(path): return True +def check_ip_valid(value): + ip_pattern = re.compile(r'^(1\d{2}|2[0-4]\d|25[0-5]|[1-9]\d|[1-9])\.' + '(1\d{2}|2[0-4]\d|25[0-5]|[1-9]\d|\d)\.' + '(1\d{2}|2[0-4]\d|25[0-5]|[1-9]\d|\d)\.' + '(1\d{2}|2[0-4]\d|25[0-5]|[1-9]\d|\d)$') + if ip_pattern.match(value): + return True + return False + + +def check_port_valid(value): + if isinstance(value, str): + return str.isdigit(value) and 1023 < int(value) <= 65535 + elif isinstance(value, int): + return 1023 < value <= 65535 + else: + return False + + def unsafe_random_string(length): """Used to generate a fixed-length random string which is not used in the sensitive scenarios.""" diff --git a/src/gausskernel/dbmind/tools/common/types/sequence.py b/src/gausskernel/dbmind/tools/common/types/sequence.py index 76004833d..5fdb09505 100644 --- a/src/gausskernel/dbmind/tools/common/types/sequence.py +++ b/src/gausskernel/dbmind/tools/common/types/sequence.py @@ -12,8 +12,7 @@ # See the Mulan PSL v2 for more details. from typing import Optional -from dbmind.common.algorithm.basic import binary_search -from dbmind.common.algorithm.basic import how_many_lesser_elements, how_many_larger_elements +from dbmind.common.algorithm.basic import binary_search, binary_search_leftmost, binary_search_rightmost from ..either import OptionalContainer, OptionalValue from ..utils import cached_property @@ -140,8 +139,8 @@ class Sequence: # ``how_many_larger_elements()`` can ensure that # the position of the searching element always stays # at the position of the last element not greater than it in the array. - start_position = how_many_lesser_elements(timestamps, ts_start) - end_position = how_many_larger_elements(timestamps, ts_end) + start_position = binary_search_leftmost(timestamps, ts_start) + end_position = binary_search_rightmost(timestamps, ts_end) return end_position - start_position + 1 def to_2d_array(self): @@ -151,15 +150,15 @@ class Sequence: def values(self): """The property will generate a copy.""" timestamps, values, ts_start, ts_end = self._get_entity() - return values[how_many_lesser_elements(timestamps, ts_start): - how_many_larger_elements(timestamps, ts_end) + 1] + return values[binary_search_leftmost(timestamps, ts_start): + binary_search_rightmost(timestamps, ts_end) + 1] - @cached_property + @property def timestamps(self): """The property will generate a copy.""" timestamps, values, ts_start, ts_end = self._get_entity() - return timestamps[how_many_lesser_elements(timestamps, ts_start): - how_many_larger_elements(timestamps, ts_end) + 1] + return timestamps[binary_search_leftmost(timestamps, ts_start): + binary_search_rightmost(timestamps, ts_end) + 1] @cached_property def step(self): diff --git a/src/gausskernel/dbmind/tools/components/extract_log.py b/src/gausskernel/dbmind/tools/components/extract_log.py index c989aea2e..f31c03095 100644 --- a/src/gausskernel/dbmind/tools/components/extract_log.py +++ b/src/gausskernel/dbmind/tools/components/extract_log.py @@ -11,15 +11,14 @@ # MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. # See the Mulan PSL v2 for more details. -import re -import os -import sys import argparse import json +import os import random +import re +import sys import time from collections import deque -from subprocess import Popen, PIPE SQL_TYPE = ['select ', 'delete ', 'insert ', 'update '] SQL_AMOUNT = 0 @@ -27,9 +26,10 @@ PLACEHOLDER = r'@@@' SAMPLE_NUM = 5 IS_ALL_LATEST_SQL = False FILEHANDLES = 500 -SQL_PATTERN = [r'\((\s*(\d+(\.\d+)?\s*)[,]?)+\)', # match integer set in the IN collection - r'([^\\])\'((\')|(.*?([^\\])\'))', # match all content in single quotes - r'(([^<>]\s*=\s*)|([^<>]\s+))(\d+)(\.\d+)?'] # match single integer +SQL_PATTERN = [r'([^\\])\'((\')|(.*?([^\\])\'))', # match all content in single quotes + r'\((\s*(\-|\+)?\d+(\.\d+)?\s*)(,\s*(\-|\+)?\d+(\.\d+)?\s*)*\)', + # match integer set in the IN collection + r'(([<>=]+\s*)|(\s+))(\-|\+)?\d+(\.\d+)?'] # match single integer def truncate_template(templates, update_time, avg_update): @@ -104,7 +104,8 @@ def get_workload_template(templates, sqls, args): def output_valid_sql(sql): is_quotation_valid = sql.count("'") % 2 - if re.search(r'=([\s]+)?\$', sql): + if re.search(r'=([\s]+)?\$', sql) or re.search(r'[\s]+\((([\s]+)?\$[\d]+([\s]+)?)((,([\s]+)?\$[\d]+([\s]+)?)+)?\)', + sql): return '' if 'from pg_' in sql.lower() or 'gs_index_advise' in sql.lower() or is_quotation_valid: return '' @@ -169,7 +170,7 @@ def get_parsed_sql(file, filter_config, log_info_position): SQL_AMOUNT += 1 sql_record.sqllist = [] sql = '' if len(sql.lower().strip(';').split(';', 1)) == 1 else \ - sql.lower().strip(';').split(';', 1)[1] + sql.lower().strip(';').split(';', 1)[1] if sql.lower().strip().strip(';').strip().endswith(('commit', 'rollback')) \ and threadid_position: output_sql = output_valid_sql(sql.lower().strip().strip(';') \ @@ -214,17 +215,18 @@ def get_parsed_sql(file, filter_config, log_info_position): def get_start_position(start_time, file_path): - while start_time: - cmd = 'head -n $(cat %s | grep -m 1 -n "^%s" | awk -F : \'{print $1}\') %s | wc -c' % \ - (file_path, start_time, file_path) - proc = Popen(cmd, stdout=PIPE, stderr=PIPE, shell=True) - std, err_msg = proc.communicate() - if proc.returncode == 0 and not err_msg: - return int(std) - elif len(start_time) > 13: - start_time = start_time[0: -3] - else: - break + time_pattern = re.compile(r'\d{4}-\d{1,2}-\d{1,2} \d{1,2}:\d{1,2}:\d{1,2}') + start_time_stamp = int(time.mktime(time.strptime(start_time, '%Y-%m-%d %H:%M:%S'))) + start_position = 0 + for line in open(file_path, 'r', errors='ignore'): + match_result = time_pattern.match(line) + if match_result: + matched_time = match_result.group() + current_time_stamp = int(time.mktime(time.strptime(matched_time, '%Y-%m-%d %H:%M:%S'))) + if current_time_stamp >= start_time_stamp: + return start_position + start_position += len(line) + return -1 @@ -257,6 +259,17 @@ class threadid_info: self.fileh.write(content) +def generate_line(file): + templine = '' + for line in file: + if line.endswith('\r'): + templine += line[:-1] + else: + templine += line + yield templine + templine = '' + + # split the log to different files groupby the threadid with file handles below FILEHANDLES def group_log_by_threadid(f, threadid_position): threadid = '000000' @@ -266,14 +279,16 @@ def group_log_by_threadid(f, threadid_position): threadid_log_files = [] try: - for line in f: + for line in generate_line(f): if not line.startswith('\t') and threadid_position: - try: + if len(line.strip().split()) > threadid_position: threadid = line.strip().split()[threadid_position] - except IndexError: - raise ValueError(f'wrong format for log line:{line.strip()}') + else: + print(f'wrong format for log line:{line.strip()}') + continue if not threadid.isdigit(): - raise ValueError(f'invalid int value {threadid} for %p') + print(f'wrong format for log line:{line.strip()}') + continue if not threadid in threadid_log: threadid_log_file = get_tempfile_name(threadid) threadid_log_files.append(threadid_log_file) @@ -339,7 +354,7 @@ def record_sql(valid_files, args, log_info_position, output_obj): start_position = get_start_position(args.start_time, file_path) if start_position == -1: continue - with open(file_path) as f: + with open(file_path, errors='ignore') as f: f.seek(start_position, 0) threadid_log_files = group_log_by_threadid(f, log_info_position.get('p')) try: @@ -350,17 +365,15 @@ def record_sql(valid_files, args, log_info_position, output_obj): for threadid_log_file in threadid_log_files: if os.path.isfile(threadid_log_file): os.remove(threadid_log_file) + filter_config = {'user': args.U, 'database': args.d, + 'sql_amount': args.sql_amount, 'statement': args.statement} try: - with open(merged_log_file, mode='r') as f: + with open(merged_log_file, mode='r', errors='ignore') as f: if isinstance(output_obj, dict): get_workload_template(output_obj, split_transaction( - get_parsed_sql(f, args.U, args.d, - args.sql_amount, - args.statement), + get_parsed_sql(f, filter_config, log_info_position) ), args) else: - filter_config = {'user': args.U, 'database': args.d, - 'sql_amount': args.sql_amount, 'statement': args.statement} for sql in get_parsed_sql(f, filter_config, log_info_position): output_obj.write(sql + '\n') except Exception as ex: @@ -383,7 +396,7 @@ def extract_sql_from_log(args): valid_files.insert(0, file) if args.json: try: - with open(args.f, 'r') as output_file: + with open(args.f, 'r', errors='ignore') as output_file: templates = json.load(output_file) except (json.JSONDecodeError, FileNotFoundError) as e: templates = {} @@ -417,11 +430,11 @@ def main(argv): args = arg_parser.parse_args(argv) if args.U: - if not 'u' in args.p: + if 'u' not in args.p: raise argparse.ArgumentTypeError(f"input parameter p '{args.p}' does not contain" " '%u' and U is not allowed.") if args.d: - if not 'd' in args.p: + if 'd' not in args.p: raise argparse.ArgumentTypeError(f"input parameter p '{args.p}' does not contain" " '%d' and d is not allowed.") if args.start_time: @@ -430,19 +443,19 @@ def main(argv): time.strptime(args.start_time, '%Y-%m-%d %H:%M:%S') ) - if not 'm' in args.p: + if 'm' not in args.p: raise argparse.ArgumentTypeError(f"input parameter p '{args.p}' does not contain" " '%m' and start_time is not allowed.") if args.sql_amount is not None and args.sql_amount <= 0: raise argparse.ArgumentTypeError("sql_amount %s is an invalid positive int value" % args.sql_amount) - if args.max_reserved_period and args.max_reserved_period <= 0: + if args.max_reserved_period is not None and args.max_reserved_period <= 0: raise argparse.ArgumentTypeError("max_reserved_period %s is an invalid positive int value" % args.max_reserved_period) - if args.max_template_num and args.max_template_num <= 0: + if args.max_template_num is not None and args.max_template_num <= 0: raise argparse.ArgumentTypeError("max_template_num %s is an invalid positive int value" % args.max_template_num) - elif args.max_template_num and args.max_template_num > 5000: + elif args.max_template_num is not None and args.max_template_num > 5000: print('max_template_num %d above 5000 is not advised for time cost' % args.max_template_num) if not args.max_reserved_period: args.max_reserved_period = float('inf') diff --git a/src/gausskernel/dbmind/tools/components/index_advisor/README.md b/src/gausskernel/dbmind/tools/components/index_advisor/README.md index e12630e2c..306ce324d 100644 --- a/src/gausskernel/dbmind/tools/components/index_advisor/README.md +++ b/src/gausskernel/dbmind/tools/components/index_advisor/README.md @@ -11,15 +11,6 @@ benefit of it for the workload. [-W PASSWORD] [--schema SCHEMA] [--max_index_num MAX_INDEX_NUM] [--max_index_storage MAX_INDEX_STORAGE] [--multi_iter_mode] [--multi_node] [--json] [--driver] [--show_detail] -# Extract_log - -**extract_log** is a tool for extracting business data from pg_log. - -## Usage - - python extract_log.py [l LOG_DIRECTORY] [f OUTPUT_FILE] [-d DATABASE] [-U USERNAME] [--start_time] - [--sql_amount] [--statement] [--json] - ## Dependencies python3.x diff --git a/src/gausskernel/dbmind/tools/components/index_advisor/dao/driver_execute.py b/src/gausskernel/dbmind/tools/components/index_advisor/dao/driver_execute.py index 62fe9bf9b..742ccdb23 100644 --- a/src/gausskernel/dbmind/tools/components/index_advisor/dao/driver_execute.py +++ b/src/gausskernel/dbmind/tools/components/index_advisor/dao/driver_execute.py @@ -49,7 +49,7 @@ class DriverExecute(ExecuteFactory): def is_multi_node(self): self.init_conn_handle() try: - self.cur.execute("select count(*) from pgxc_node where node_type='C';") + self.cur.execute("select pg_catalog.count(*) from pg_catalog.pgxc_node where node_type='C';") self.conn.commit() return self.cur.fetchall()[0][0] > 0 finally: @@ -100,17 +100,17 @@ class DriverExecute(ExecuteFactory): # create hypo-indexes if self.schema: sqls = 'SET current_schema = %s;' % self.schema - sqls += 'SET enable_hypo_index = on;SELECT hypopg_reset_index();' + sqls += 'SET enable_hypo_index = on;SELECT pg_catalog.hypopg_reset_index();' if multi_node: sqls += 'SET enable_fast_query_shipping = off;SET enable_stream_operator = on;' for table in query_index_dict.keys(): for columns_tulpe in query_index_dict[table]: if columns_tulpe != '': - content = "SELECT hypopg_create_index('CREATE INDEX ON %s(%s) %s');" % \ + content = "SELECT pg_catalog.hypopg_create_index('CREATE INDEX ON %s(%s) %s');" % \ (table, columns_tulpe[0], columns_tulpe[1]) content = content.replace('""', '') sqls += content - sqls += 'SELECT * from hypopg_display_index();' + sqls += 'SELECT * from pg_catalog.hypopg_display_index();' result = self.execute(sqls) if not result: return valid_indexes @@ -122,21 +122,21 @@ class DriverExecute(ExecuteFactory): match_flag, table_name = ExecuteFactory.match_table_name(table_name, query_index_dict) if not match_flag: - self.execute('SELECT hypopg_reset_index()') + self.execute('SELECT pg_catalog.hypopg_reset_index()') return valid_indexes hypoid_table_column[str(item[1])] = \ table_name + ':' + item[3].strip('()') sqls = "SET explain_perf_mode = 'normal'; explain %s" % query result = self.execute(sqls) if not result: - self.execute('SELECT hypopg_reset_index()') + self.execute('SELECT pg_catalog.hypopg_reset_index()') return valid_indexes # parse the result of explain plan for item in result: if 'Index' in item[0] and 'Scan' in item[0] and 'btree' in item[0]: super().get_valid_indexes( item[0], hypoid_table_column, valid_indexes) - self.execute('SELECT hypopg_reset_index()') + self.execute('SELECT pg_catalog.hypopg_reset_index()') return valid_indexes @staticmethod @@ -164,7 +164,7 @@ class DriverExecute(ExecuteFactory): return cost_total def update_index_storage(self, index_id, index_config, hypo_index_num): - index_size_sql = 'select * from hypopg_estimate_size(%s);' % index_id + index_size_sql = 'select * from pg_catalog.hypopg_estimate_size(%s);' % index_id res = self.execute(index_size_sql) if res: index_config[hypo_index_num].storage = float( @@ -181,7 +181,7 @@ class DriverExecute(ExecuteFactory): # create hypo-indexes self.execute('SET enable_hypo_index = on') for index in index_config: - res = self.execute("SELECT * from hypopg_create_index('CREATE INDEX ON %s(%s) %s')" % + res = self.execute("SELECT * from pg_catalog.hypopg_create_index('CREATE INDEX ON %s(%s) %s')" % (index.table, index.columns, index.index_type)) if self.max_index_storage and res: self.update_index_storage( @@ -206,13 +206,14 @@ class DriverExecute(ExecuteFactory): res, index_config, ori_indexes_name) query_cost *= workload[ind].frequency workload[ind].cost_list.append(query_cost) + # update positive_pos and negative_pos if index_config and len(index_config) == 1 and query_cost < workload[ind].cost_list[0]: - index_config[0].positive_pos.append(ind) + index_config[0].update_positive_pos(ind) total_cost += query_cost else: workload[ind].cost_list.append(0) if index_config: - self.execute('SELECT hypopg_reset_index()') + self.execute('SELECT pg_catalog.hypopg_reset_index()') return total_cost def check_useless_index(self, history_indexes, history_invalid_indexes): @@ -221,7 +222,7 @@ class DriverExecute(ExecuteFactory): whole_indexes = list() redundant_indexes = list() for schema in schemas: - table_sql = "select tablename from pg_tables where schemaname = '%s'" % schema + table_sql = "select tablename from pg_catalog.pg_tables where schemaname = '%s'" % schema table_res = self.execute(table_sql) if not table_res: continue @@ -229,10 +230,10 @@ class DriverExecute(ExecuteFactory): tables_string = ','.join(["'%s'" % table for table in tables]) # query all table index information and primary key information sql = "set current_schema = %s; SELECT c.relname AS tablename, i.relname AS indexname, " \ - "pg_get_indexdef(i.oid) AS indexdef, p.contype AS pkey from " \ - "pg_index x JOIN pg_class c ON c.oid = x.indrelid JOIN " \ - "pg_class i ON i.oid = x.indexrelid LEFT JOIN pg_namespace n " \ - "ON n.oid = c.relnamespace LEFT JOIN pg_constraint p ON (i.oid = p.conindid " \ + "pg_catalog.pg_get_indexdef(i.oid) AS indexdef, p.contype AS pkey from " \ + "pg_catalog.pg_index x JOIN pg_catalog.pg_class c ON c.oid = x.indrelid JOIN " \ + "pg_catalog.pg_class i ON i.oid = x.indexrelid LEFT JOIN pg_catalog.pg_namespace n " \ + "ON n.oid = c.relnamespace LEFT JOIN pg_catalog.pg_constraint p ON (i.oid = p.conindid " \ "AND p.contype = 'p') WHERE (c.relkind = ANY (ARRAY['r'::\"char\", " \ "'m'::\"char\"])) AND (i.relkind = ANY (ARRAY['i'::\"char\", 'I'::\"char\"])) " \ "AND n.nspname = '%s' AND c.relname in (%s) order by c.relname;" % \ diff --git a/src/gausskernel/dbmind/tools/components/index_advisor/dao/gsql_execute.py b/src/gausskernel/dbmind/tools/components/index_advisor/dao/gsql_execute.py index 074a721ea..bea6c0708 100644 --- a/src/gausskernel/dbmind/tools/components/index_advisor/dao/gsql_execute.py +++ b/src/gausskernel/dbmind/tools/components/index_advisor/dao/gsql_execute.py @@ -48,7 +48,7 @@ class GSqlExecute(ExecuteFactory): proc = subprocess.Popen( cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True) (stdout, stderr) = proc.communicate() - stdout, stderr = stdout.decode(), stderr.decode() + stdout, stderr = stdout.decode(errors='ignore'), stderr.decode(errors='ignore') if 'gsql: FATAL:' in stderr or 'failed to connect' in stderr: raise ConnectionError("An error occurred while connecting to the database.\n" + "Details: " + stderr) @@ -61,9 +61,9 @@ class GSqlExecute(ExecuteFactory): ret = subprocess.check_output( shlex.split(cmd), stderr=subprocess.STDOUT) except subprocess.CalledProcessError as e: - print(e.output.decode(), file=sys.stderr) + print(e.output.decode(errors='ignore'), file=sys.stderr) - return ret.decode() + return ret.decode(errors='ignore') def is_multi_node(self): cmd = BASE_CMD + " -c " + shlex.quote("select count(*) from pgxc_node where node_type='C';") @@ -71,8 +71,8 @@ class GSqlExecute(ExecuteFactory): ret = subprocess.check_output( shlex.split(cmd), stderr=subprocess.STDOUT) except subprocess.CalledProcessError as e: - print(e.output.decode(), file=sys.stderr) - return int(ret.decode().strip().split()[2]) > 0 + print(e.output.decode(errors='ignore'), file=sys.stderr) + return int(ret.decode(errors='ignore').strip().split()[2]) > 0 @staticmethod def parse_single_advisor_result(res, table_index_dict): @@ -199,8 +199,9 @@ class GSqlExecute(ExecuteFactory): query_cost = GSqlExecute.parse_plan_cost(line) query_cost *= workload[select_sql_pos[i]].frequency workload[select_sql_pos[i]].cost_list.append(query_cost) + # update positive_pos and negative_pos if index_config and len(index_config) == 1 and query_cost < workload[select_sql_pos[i]].cost_list[0]: - index_config[0].positive_pos.append(select_sql_pos[i]) + index_config[0].update_positive_pos(select_sql_pos[i]) total_cost += query_cost found_plan = False i += 1 @@ -303,8 +304,9 @@ class GSqlExecute(ExecuteFactory): elif re.match(r'\(\d+ rows?\)', line): continue elif '|' in line: - temptable, tempindex, indexdef, temppkey = [ - item.strip() for item in line.split('|')] + temptable, tempindex = [item.strip() for item in line.split('|')[:2]] + indexdef = ('|'.join(line.split('|')[2:-1])).strip() + temppkey = line.split('|')[-1].strip() if temptable and tempindex: table, index, pkey = temptable, tempindex, temppkey if line.strip().endswith(('+| p', '+|')): diff --git a/src/gausskernel/dbmind/tools/components/opengauss_exporter/yamls/default.yml b/src/gausskernel/dbmind/tools/components/opengauss_exporter/yamls/default.yml index 6b6f908e2..0a3a87891 100644 --- a/src/gausskernel/dbmind/tools/components/opengauss_exporter/yamls/default.yml +++ b/src/gausskernel/dbmind/tools/components/opengauss_exporter/yamls/default.yml @@ -19,11 +19,13 @@ pg_db: sql: |- SELECT d.datid,d.datname,numbackends, xact_commit,xact_rollback,xact_rollback + xact_commit AS xact_total, - blks_read,blks_hit,blks_read + blks_hit AS blks_access, + blks_read,blks_hit,(blks_hit / (blks_read+blks_hit+0.001)) AS blks_access, tup_returned,tup_fetched,tup_inserted,tup_updated,tup_deleted,tup_inserted + tup_updated + tup_deleted AS tup_modified, conflicts,temp_files,temp_bytes,deadlocks, blk_read_time,blk_write_time, extract(epoch from stats_reset) as stats_reset, - confl_tablespace,confl_lock,confl_snapshot,confl_bufferpin,confl_deadlock + confl_tablespace,confl_lock,confl_snapshot,confl_bufferpin,confl_deadlock, + tup_returned / (extract (epoch from (pg_catalog.now() - stats_reset))) AS read_tup_speed, + (tup_inserted + tup_updated + tup_deleted) / (extract (epoch from (pg_catalog.now() - stats_reset))) AS write_tup_speed FROM pg_stat_database d,pg_stat_database_conflicts pdc WHERE pdc.datname = d.datname and d.datname NOT IN ('postgres', 'template0', 'template1'); version: '>=0.0.0' @@ -53,6 +55,9 @@ pg_db: - name: blks_hit description: Number of times disk blocks were found already in the buffer cache, so that a read was not necessary (this only includes hits in the OpenGauss buffer cache, not the operating system's file system cache) usage: COUNTER + - name: blks_access + description: hit rate of database + usage: GAUGE - name: tup_returned description: Number of rows returned by queries in this database usage: COUNTER @@ -104,6 +109,12 @@ pg_db: - name: confl_deadlock description: Number of queries in this database that have been canceled due to deadlocks usage: COUNTER + - name: read_tup_speed + description: read_tup_speed + usage: COUNTER + - name: write_tup_speed + description: write_tup_speed + usage: COUNTER status: enable ttl: -1 timeout: 1 @@ -115,11 +126,11 @@ pg_meta: query: - name: pg_meta sql: |- - SELECT (SELECT system_identifier FROM pg_control_system()) AS cluster_id, - current_setting('port') AS listen_port, - current_setting('wal_level') AS wal_level, - current_setting('server_version') AS version, - current_setting('server_version_num') AS ver_num, + SELECT (SELECT system_identifier FROM pg_catalog.pg_control_system()) AS cluster_id, + pg_catalog.current_setting('port') AS listen_port, + pg_catalog.current_setting('wal_level') AS wal_level, + pg_catalog.current_setting('server_version') AS version, + pg_catalog.current_setting('server_version_num') AS ver_num, 'N/A' AS primary_conninfo, 1 AS info; version: '>=0.0.0' @@ -158,21 +169,24 @@ pg_connections: desc: OpenGauss database connections query: - name: pg_connections - sql: select max_conn,used_conn,max_conn-used_conn res_for_normal from (select count(*) used_conn from pg_stat_activity) t1,(select setting::int max_conn from pg_settings where name='max_connections') t2; - version: '>=0.0.0' + sql: select t1.used_conn, t2.enqueue_sql, t3.idle_session from (select pg_catalog.count(*) used_conn from pg_stat_activity) t1, + (select pg_catalog.count(*) enqueue_sql from pg_stat_activity where enqueue is not NULL) t2, + (select pg_catalog.count(*) idle_session from pg_stat_activity where state='idle') t3; + version: '>=0.0.0' timeout: 1 status: enable dbRole: "" metrics: - - name: max_conn - description: total of connections - usage: GAUGE - name: used_conn description: used of connections usage: GAUGE - - name: res_for_normal - description: reserve of connections + - name: idle_session + description: idle session number usage: GAUGE + - name: enqueue_sql + description: running sql + usage: GAUGE + status: enable ttl: 60 timeout: 1 @@ -183,7 +197,7 @@ pg_session_connection: desc: OpenGauss backend activity group by state query: - name: pg_session_connection - sql: select client_addr,state,count(1) as count from pg_stat_activity group by client_addr,state order by 3 desc limit 20 ; + sql: select client_addr,state,pg_catalog.count(1) as count from pg_stat_activity group by client_addr,state order by 3 desc limit 20 ; version: '>=0.0.0' timeout: 1 status: enable @@ -204,7 +218,6 @@ pg_session_connection: public: true - # ┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ # ┃ pg_stat_activity # ┃ OpenGauss backend activity group by state @@ -238,15 +251,15 @@ pg_activity: coalesce(max_conn_duration, 0) AS max_conn_duration FROM (SELECT d.oid AS database, d.datname, a.state FROM pg_database d, - unnest(ARRAY ['active','idle','idle in transaction','idle in transaction (aborted)','fastpath function call','disabled']) a(state) + pg_catalog.unnest(ARRAY ['active','idle','idle in transaction','idle in transaction (aborted)','fastpath function call','disabled']) a(state) WHERE d.datname NOT IN ('template0','template1')) base LEFT JOIN ( SELECT datname, state, - count(*) AS count, - max(extract(epoch from now() - state_change)) AS max_duration, - max(extract(epoch from now() - xact_start)) AS max_tx_duration, - max(extract(epoch from now() - backend_start)) AS max_conn_duration - FROM pg_stat_activity WHERE pid <> pg_backend_pid() + pg_catalog.count(*) AS count, + pg_catalog.max(extract(epoch from pg_catalog.now() - state_change)) AS max_duration, + pg_catalog.max(extract(epoch from pg_catalog.now() - xact_start)) AS max_tx_duration, + pg_catalog.max(extract(epoch from pg_catalog.now() - backend_start)) AS max_conn_duration + FROM pg_stat_activity WHERE pid <> pg_catalog.pg_backend_pid() GROUP BY datname, state ) a USING (datname, state); version: '>=1.0.0' @@ -297,8 +310,8 @@ pg_downstream: - name: pg_downstream sql: | SELECT l.state, coalesce(count, 0 ) AS count - FROM unnest(ARRAY ['Streaming','Startup','Catchup', 'Backup', 'Stopping']) l(state) - LEFT JOIN (SELECT state, count(*) AS count FROM pg_stat_replication GROUP BY state)r ON l.state = r.state + FROM pg_catalog.unnest(ARRAY ['Streaming','Startup','Catchup', 'Backup', 'Stopping']) l(state) + LEFT JOIN (SELECT state, pg_catalog.count(*) AS count FROM pg_stat_replication GROUP BY state)r ON l.state = r.state version: '>=0.0.0' timeout: 0.5 ttl: 10 @@ -371,13 +384,13 @@ pg_replication: from ( select pr.pid,client_addr,application_name,pr.state,pr.sync_state, - pg_xlog_location_diff (case when pg_is_in_recovery() then pg_last_xlog_receive_location() else pg_current_xlog_location() end, '0/0') as lsn, - pg_xlog_location_diff(pr.sender_sent_location,'0/0') as sent_location, - pg_xlog_location_diff(pr.receiver_write_location,'0/0') as write_location, - pg_xlog_location_diff(pr.receiver_flush_location,'0/0') as flush_location, - pg_xlog_location_diff(pr.receiver_replay_location,'0/0') as replay_location, - pg_xlog_location_diff(pr.receiver_replay_location, pg_current_xlog_location()) as replay_lag, - extract(EPOCH from now() - backend_start) as backend_uptime,pr.sync_priority + pg_xlog_location_diff (case when pg_catalog.pg_is_in_recovery() then pg_catalog.pg_last_xlog_receive_location() else pg_catalog.pg_current_xlog_location() end, '0/0') as lsn, + pg_catalog.pg_xlog_location_diff(pr.sender_sent_location,'0/0') as sent_location, + pg_catalog.pg_xlog_location_diff(pr.receiver_write_location,'0/0') as write_location, + pg_catalog.pg_xlog_location_diff(pr.receiver_flush_location,'0/0') as flush_location, + pg_catalog.pg_xlog_location_diff(pr.receiver_replay_location,'0/0') as replay_location, + pg_catalog.pg_xlog_location_diff(pr.receiver_replay_location, pg_catalog.pg_current_xlog_location()) as replay_lag, + extract(EPOCH from pg_catalog.now() - backend_start) as backend_uptime,pr.sync_priority from pg_stat_replication pr ); @@ -460,10 +473,10 @@ pg_replication: # ┃ LABEL xmin replication xid # ┃ LABEL catalog_xmin logical decode xid # ┃ LABEL restart_lsn Xlog info -# ┃ GAUGE delay_lsn delay lsn from pg_current_xlog_location() +# ┃ GAUGE delay_lsn delay lsn from pg_catalog.pg_current_xlog_location() # ┃ DISCARD dummy_standby Is real standby # ┣┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈ -# ┃ pg_replication_slots_delay_lsn{slot_name,plugin,slot_type,datoid,database,active,xmin,catalog_xmin,restart_lsn} GAUGE delay lsn from pg_current_xlog_location() +# ┃ pg_replication_slots_delay_lsn{slot_name,plugin,slot_type,datoid,database,active,xmin,catalog_xmin,restart_lsn} GAUGE delay lsn from pg_catalog.pg_current_xlog_location() # ┗━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ pg_slot: name: pg_replication_slots @@ -476,8 +489,8 @@ pg_slot: (case active when 't' then 1 else 0 end)as active, coalesce(xmin,'_') as xmin, dummy_standby, - pg_xlog_location_diff(CASE WHEN pg_is_in_recovery() THEN restart_lsn - ELSE pg_current_xlog_location() END , restart_lsn) AS delay_lsn + pg_catalog.pg_xlog_location_diff(CASE WHEN pg_catalog.pg_is_in_recovery() THEN restart_lsn + ELSE pg_catalog.pg_current_xlog_location() END , restart_lsn) AS delay_lsn from pg_replication_slots; version: '>=1.0.0' timeout: 1 @@ -507,7 +520,7 @@ pg_slot: description: replication xid usage: LABEL - name: delay_lsn - description: delay lsn from pg_current_xlog_location() + description: delay lsn from pg_catalog.pg_current_xlog_location() usage: GAUGE - name: dummy_standby description: Is real standby @@ -526,14 +539,14 @@ pg_slot: # ┣┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈ # ┃ LABEL datname Name of this database # ┃ GAUGE size_bytes Disk space used by the database -# ┃ GAUGE age database age calculated by age(datfrozenxid64) +# ┃ GAUGE age database age calculated by pg_catalog.age(datfrozenxid64) # ┃ GAUGE is_template 1 for template db and 0 for normal db # ┃ GAUGE allow_conn 1 allow connection and 0 does not allow # ┃ GAUGE conn_limit connection limit, -1 for no limit # ┃ GAUGE frozen_xid tuple with xmin below this will always be visable (until wrap around) # ┣┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈ # ┃ pg_database_size_bytes{datname} GAUGE Disk space used by the database -# ┃ pg_database_age{datname} GAUGE database age calculated by age(datfrozenxid64) +# ┃ pg_database_age{datname} GAUGE database age calculated by pg_catalog.age(datfrozenxid64) # ┃ pg_database_is_template{datname} GAUGE 1 for template db and 0 for normal db # ┃ pg_database_allow_conn{datname} GAUGE 1 allow connection and 0 does not allow # ┃ pg_database_conn_limit{datname} GAUGE connection limit, -1 for no limit @@ -546,8 +559,8 @@ pg_database: - name: pg_database sql: |- SELECT datname, - pg_database_size(pg_database.datname) as size_bytes, - age(datfrozenxid64) AS age, + pg_catalog.pg_database_size(pg_database.datname) as size_bytes, + pg_catalog.age(datfrozenxid64) AS age, datistemplate AS is_template, datallowconn AS allow_conn, datconnlimit AS conn_limit, @@ -562,8 +575,8 @@ pg_database: - name: pg_database sql: |- SELECT datname, - pg_database_size(pg_database.datname) as size_bytes, - age(datfrozenxid64) AS age, + pg_catalog.pg_database_size(pg_database.datname) as size_bytes, + pg_catalog.age(datfrozenxid64) AS age, datistemplate AS is_template, datallowconn AS allow_conn, datconnlimit AS conn_limit, @@ -583,7 +596,7 @@ pg_database: description: Disk space used by the database usage: GAUGE - name: age - description: database age calculated by age(datfrozenxid64) + description: database age calculated by pg_catalog.age(datfrozenxid64) usage: GAUGE - name: is_template description: 1 for template db and 0 for normal db @@ -668,8 +681,8 @@ pg_checkpoint: oldest_xid_dbid::text::BIGINT, oldest_active_xid::text::BIGINT, checkpoint_time AS time, - extract(epoch from now() - checkpoint_time) AS elapse - FROM pg_control_checkpoint(); + extract(epoch from pg_catalog.now() - checkpoint_time) AS elapse + FROM pg_catalog.pg_control_checkpoint(); version: '>=0.0.0' timeout: 1 ttl: 5 @@ -757,7 +770,7 @@ pg_run_times: desc: OpenGauss database run times query: - name: pg_run_times - sql: select 'cluster_runtime' as run_name,(case pg_is_in_recovery() when 'f' then 1 else 0 end) as db_role,extract(epoch from(now() - pg_postmaster_start_time())) as run_time; + sql: select 'cluster_runtime' as run_name,(case pg_catalog.pg_is_in_recovery() when 'f' then 1 else 0 end) as db_role,extract(epoch from(pg_catalog.now() - pg_catalog.pg_postmaster_start_time())) as run_time; version: '>=0.0.0' timeout: 1 ttl: 60 @@ -823,22 +836,24 @@ pg: desc: 'primary database ' sql: | SELECT extract(EPOCH FROM CURRENT_TIMESTAMP) AS timestamp, - extract(EPOCH FROM now() - pg_postmaster_start_time()) AS uptime, - extract(EPOCH FROM pg_postmaster_start_time()) AS boot_time, - pg_xlog_location_diff(pg_current_xlog_location() , '0/0') AS lsn, - pg_xlog_location_diff(pg_current_xlog_insert_location(),'0/0') AS insert_lsn, - pg_xlog_location_diff(pg_current_xlog_location() , '0/0') AS write_lsn, - pg_xlog_location_diff(pg_current_xlog_location() , '0/0') AS flush_lsn, + extract(EPOCH FROM pg_catalog.now() - pg_catalog.pg_postmaster_start_time()) AS uptime, + extract(EPOCH FROM pg_catalog.pg_postmaster_start_time()) AS boot_time, + pg_catalog.pg_xlog_location_diff(pg_catalog.pg_current_xlog_location() , '0/0') AS lsn, + pg_catalog.pg_xlog_location_diff(pg_catalog.pg_current_xlog_insert_location(),'0/0') AS insert_lsn, + pg_catalog.pg_xlog_location_diff(pg_catalog.pg_current_xlog_location() , '0/0') AS write_lsn, + pg_catalog.pg_xlog_location_diff(pg_catalog.pg_current_xlog_location() , '0/0') AS flush_lsn, NULL::BIGINT AS receive_lsn, NULL::BIGINT AS replay_lsn, - extract(EPOCH FROM now() - pg_conf_load_time()) AS conf_reload_time, + extract(EPOCH FROM pg_catalog.now() - pg_catalog.pg_conf_load_time()) AS conf_reload_time, NULL::FLOAT AS last_replay_time, 0::FLOAT AS lag, - pg_is_in_recovery() AS is_in_recovery, - FALSE AS is_wal_replay_paused + pg_catalog.pg_is_in_recovery() AS is_in_recovery, + FALSE AS is_wal_replay_paused, + extract(epoch from pg_catalog.avg(pg_catalog.now()-backend_start)) AS avg_time + from pg_stat_activity where client_port is not null ; version: '>=0.0.0' - timeout: 0.1 + timeout: 1 ttl: 10 status: enable dbRole: primary @@ -846,22 +861,24 @@ pg: desc: 'standby database ' sql: | SELECT extract(EPOCH FROM CURRENT_TIMESTAMP) AS timestamp, - extract(EPOCH FROM now() - pg_postmaster_start_time()) AS uptime, - extract(EPOCH FROM pg_postmaster_start_time()) AS boot_time, - pg_xlog_location_diff(pg_last_xlog_receive_location() , '0/0') AS lsn, + extract(EPOCH FROM pg_catalog.now() - pg_catalog.pg_postmaster_start_time()) AS uptime, + extract(EPOCH FROM pg_catalog.pg_postmaster_start_time()) AS boot_time, + pg_catalog.pg_xlog_location_diff(pg_catalog.pg_last_xlog_receive_location() , '0/0') AS lsn, NULL::BIGINT AS insert_lsn, NULL::BIGINT AS write_lsn, NULL::BIGINT AS flush_lsn, - pg_xlog_location_diff(pg_last_xlog_receive_location() , '0/0') AS receive_lsn, - pg_xlog_location_diff(pg_last_xlog_receive_location() , '0/0') AS replay_lsn, - extract(EPOCH FROM now() - pg_conf_load_time()) AS conf_reload_time, - extract(EPOCH FROM pg_last_xact_replay_timestamp()) AS last_replay_time, - pg_is_in_recovery() AS is_in_recovery + pg_catalog.pg_xlog_location_diff(pg_catalog.pg_last_xlog_receive_location() , '0/0') AS receive_lsn, + pg_catalog.pg_xlog_location_diff(pg_catalog.pg_last_xlog_receive_location() , '0/0') AS replay_lsn, + extract(EPOCH FROM pg_catalog.now() - pg_catalog.pg_conf_load_time()) AS conf_reload_time, + extract(EPOCH FROM pg_catalog.pg_last_xact_replay_timestamp()) AS last_replay_time, + pg_catalog.pg_is_in_recovery() AS is_in_recovery, + extract(epoch from pg_catalog.avg(now()-backend_start)) AS avg_time + from pg_stat_activity where client_port is not null ; version: '>=0.0.0' - timeout: 0.1 + timeout: 1 ttl: 10 - status: enable + status: disable dbRole: standby metrics: - name: timestamp @@ -906,6 +923,9 @@ pg: - name: is_wal_replay_paused description: 1 if wal play is paused usage: GAUGE + - name: avg_time + description: avg_time + usage: GAUGE status: enable ttl: 60 timeout: 0.1 @@ -917,13 +937,13 @@ pg_setting: query: - name: pg_setting sql: |- - SELECT current_setting('max_connections') AS max_connections, - current_setting('max_prepared_transactions') AS max_prepared_transactions, - current_setting('max_replication_slots') AS max_replication_slots, - current_setting('max_wal_senders') AS max_wal_senders, - current_setting('max_locks_per_transaction') AS max_locks_per_transaction, - current_setting('block_size') AS block_size, - CASE current_setting('wal_log_hints') WHEN 'on' THEN 1 ELSE 0 END AS wal_log_hints; + SELECT pg_catalog.current_setting('max_connections') AS max_connections, + pg_catalog.current_setting('max_prepared_transactions') AS max_prepared_transactions, + pg_catalog.current_setting('max_replication_slots') AS max_replication_slots, + pg_catalog.current_setting('max_wal_senders') AS max_wal_senders, + pg_catalog.current_setting('max_locks_per_transaction') AS max_locks_per_transaction, + pg_catalog.current_setting('block_size') AS block_size, + CASE pg_catalog.current_setting('wal_log_hints') WHEN 'on' THEN 1 ELSE 0 END AS wal_log_hints; version: '>=0.0.0' timeout: 1 ttl: 60 @@ -963,7 +983,7 @@ pg_class: - name: pg_class sql: |- SELECT CURRENT_CATALOG AS datname,(select nspname from pg_namespace where oid=relnamespace) as nspname,relname,relkind,relpages,reltuples, - CASE WHEN relkind = 'i' THEN NULL ELSE age(relfrozenxid64) END AS relage,pg_relation_size(oid) AS relsize + CASE WHEN relkind = 'i' THEN NULL ELSE pg_catalog.age(relfrozenxid64) END AS relage,pg_catalog.pg_relation_size(oid) AS relsize FROM pg_class WHERE relkind = 'r' and relname not like 'pg_%' and relname not like 'gs_%' and nspname not in ('information_schema', 'pg_catalog') ORDER BY relpages DESC LIMIT 32; @@ -975,7 +995,7 @@ pg_class: - name: pg_class sql: |- SELECT CURRENT_CATALOG AS datname,(select nspname from pg_namespace where oid=relnamespace) as nspname,relname,relkind,relpages,reltuples, - CASE WHEN relkind = 'i' THEN NULL ELSE age(relfrozenxid64) END AS relage,pg_relation_size(oid) AS relsize + CASE WHEN relkind = 'i' THEN NULL ELSE pg_catalog.age(relfrozenxid64) END AS relage,pg_catalog.pg_relation_size(oid) AS relsize FROM pg_class WHERE relkind = 'r' and relname not like 'pg_%' and relname not like 'gs_%' and nspname not in ('information_schema', 'pg_catalog') ORDER BY relpages DESC LIMIT 32; @@ -1036,9 +1056,9 @@ pg_lock: SELECT datname, mode, coalesce(count, 0) AS count FROM ( SELECT d.oid AS database, d.datname, l.mode - FROM pg_database d,unnest(ARRAY ['AccessShareLock','RowShareLock','RowExclusiveLock','ShareUpdateExclusiveLock','ShareLock','ShareRowExclusiveLock','ExclusiveLock','AccessExclusiveLock']) l(mode) + FROM pg_database d,pg_catalog.unnest(ARRAY ['AccessShareLock','RowShareLock','RowExclusiveLock','ShareUpdateExclusiveLock','ShareLock','ShareRowExclusiveLock','ExclusiveLock','AccessExclusiveLock']) l(mode) WHERE d.datname NOT IN ('template0','template1')) base - LEFT JOIN (SELECT database, mode, count(1) AS count FROM pg_locks WHERE database IS NOT NULL GROUP BY database, mode) cnt + LEFT JOIN (SELECT database, mode, pg_catalog.count(1) AS count FROM pg_locks WHERE database IS NOT NULL GROUP BY database, mode) cnt USING (database, mode); version: '>=0.0.0' timeout: 1 @@ -1070,7 +1090,7 @@ pg_lock: # with tl as (select usename,granted,locktag,query_start,query # from pg_locks l,pg_stat_activity a # where l.pid=a.pid and locktag in(select locktag from pg_locks where granted='f')) -# select ts.usename locker_user,ts.query_start locker_query_start,ts.granted locker_granted,ts.query locker_query,tt.query locked_query,tt.query_start locked_query_start,tt.granted locked_granted,tt.usename locked_user,extract(epoch from now() - tt.query_start) as locked_times +# select ts.usename locker_user,ts.query_start locker_query_start,ts.granted locker_granted,ts.query locker_query,tt.query locked_query,tt.query_start locked_query_start,tt.granted locked_granted,tt.usename locked_user,extract(epoch from pg_catalog.now() - tt.query_start) as locked_times # from (select * from tl where granted='t') as ts,(select * from tl where granted='f') tt # where ts.locktag=tt.locktag order by 1; # version: '>=0.0.0' @@ -1120,7 +1140,7 @@ pg_locker: with tl as (select usename,granted,locktag,query_start,query from pg_locks l,pg_stat_activity a where l.pid=a.pid and locktag in(select locktag from pg_locks where granted='f')) - select usename,query_start,granted,query,count(query) count + select usename,query_start,granted,query,pg_catalog.count(query) count from tl where granted='t' group by usename,query_start,granted,query order by 5 desc; version: '>=0.0.0' timeout: 1 @@ -1153,7 +1173,7 @@ pg_active_slowsql: desc: OpenGauss active slow query query: - name: pg_active_slowsql - sql: select datname,usename,client_addr,pid,query_start::text,extract(epoch from (now() - query_start)) as query_runtime,xact_start::text,extract(epoch from(now() - xact_start)) as xact_runtime,state,query from pg_stat_activity where state not in('idle') and query_start is not null; + sql: select datname,usename,client_addr,pid,query_start::text,extract(epoch from (pg_catalog.now() - query_start)) as query_runtime,xact_start::text,extract(epoch from(pg_catalog.now() - xact_start)) as xact_runtime,state,query from pg_stat_activity where state not in('idle') and query_start is not null; version: '>=0.0.0' timeout: 1 ttl: 60 @@ -1241,8 +1261,8 @@ pg_table: n_tup_ins,n_tup_upd,n_tup_del,(n_tup_ins + n_tup_upd + n_tup_del) AS n_tup_mod, n_tup_hot_upd,n_live_tup,n_dead_tup, nvl(last_vacuum::text,'1970-01-01') as last_vacuum,nvl(last_autovacuum::text,'1970-01-01') as last_autovacuum,nvl(last_analyze::text,'1970-01-01') as last_analyze,nvl(last_autoanalyze::text,'1970-01-01') as last_autoanalyze,vacuum_count,autovacuum_count,analyze_count,autoanalyze_count, - extract(epoch from now() -(case when nvl(last_vacuum,'1970-01-01')>nvl(last_autovacuum,'1970-01-01') then nvl(last_vacuum,'1970-01-01') else nvl(last_autovacuum,'1970-01-01') end))::int vacuum_delay, - extract(epoch from now() -(case when nvl(last_analyze,'1970-01-01')>nvl(last_autoanalyze,'1970-01-01') then nvl(last_analyze,'1970-01-01') else nvl(last_autoanalyze,'1970-01-01') end))::int analyze_delay, + extract(epoch from pg_catalog.now() -(case when nvl(last_vacuum,'1970-01-01')>nvl(last_autovacuum,'1970-01-01') then nvl(last_vacuum,'1970-01-01') else nvl(last_autovacuum,'1970-01-01') end))::int vacuum_delay, + extract(epoch from pg_catalog.now() -(case when nvl(last_analyze,'1970-01-01')>nvl(last_autoanalyze,'1970-01-01') then nvl(last_analyze,'1970-01-01') else nvl(last_autoanalyze,'1970-01-01') end))::int analyze_delay, heap_blks_read,heap_blks_hit,idx_blks_read,idx_blks_hit, toast_blks_read,toast_blks_hit,tidx_blks_read,tidx_blks_hit FROM pg_stat_user_tables psut,pg_statio_user_tables psio @@ -1259,8 +1279,8 @@ pg_table: n_tup_ins,n_tup_upd,n_tup_del,(n_tup_ins + n_tup_upd + n_tup_del) AS n_tup_mod, n_tup_hot_upd,n_live_tup,n_dead_tup, nvl(last_vacuum::text,'1970-01-01') as last_vacuum,nvl(last_autovacuum::text,'1970-01-01') as last_autovacuum,nvl(last_analyze::text,'1970-01-01') as last_analyze,nvl(last_autoanalyze::text,'1970-01-01') as last_autoanalyze,vacuum_count,autovacuum_count,analyze_count,autoanalyze_count, - extract(epoch from now() -(case when nvl(last_vacuum,'1970-01-01')>nvl(last_autovacuum,'1970-01-01') then nvl(last_vacuum,'1970-01-01') else nvl(last_autovacuum,'1970-01-01') end))::int vacuum_delay, - extract(epoch from now() -(case when nvl(last_analyze,'1970-01-01')>nvl(last_autoanalyze,'1970-01-01') then nvl(last_analyze,'1970-01-01') else nvl(last_autoanalyze,'1970-01-01') end))::int analyze_delay, + extract(epoch from pg_catalog.now() -(case when nvl(last_vacuum,'1970-01-01')>nvl(last_autovacuum,'1970-01-01') then nvl(last_vacuum,'1970-01-01') else nvl(last_autovacuum,'1970-01-01') end))::int vacuum_delay, + extract(epoch from pg_catalog.now() -(case when nvl(last_analyze,'1970-01-01')>nvl(last_autoanalyze,'1970-01-01') then nvl(last_analyze,'1970-01-01') else nvl(last_autoanalyze,'1970-01-01') end))::int analyze_delay, heap_blks_read,heap_blks_hit,idx_blks_read,idx_blks_hit, toast_blks_read,toast_blks_hit,tidx_blks_read,tidx_blks_hit FROM pg_stat_user_tables psut,pg_statio_user_tables psio @@ -1386,7 +1406,7 @@ pg_index: query: - name: pg_index sql: |- - SELECT CURRENT_CATALOG AS datname,psui.schemaname AS nspname,psui.relname AS tablename,psui.indexrelname AS relname, + SELECT CURRENT_CATALOG AS datname,psui.schemaname AS nspname,psui.relname AS tablename,psui.indexrelname AS relname, pg_get_indexdef(psui.indexrelid) AS indexdef, idx_scan, idx_tup_read,idx_tup_fetch,idx_blks_read,idx_blks_hit FROM pg_stat_user_indexes psui,pg_statio_user_indexes psio WHERE psio.indexrelid = psui.indexrelid and psui.schemaname not in ('pg_catalog', 'information_schema','snapshot') @@ -1398,7 +1418,7 @@ pg_index: status: enable - name: pg_index sql: |- - SELECT CURRENT_CATALOG AS datname,psui.schemaname AS nspname,psui.relname AS tablename,psui.indexrelname AS relname, + SELECT CURRENT_CATALOG AS datname,psui.schemaname AS nspname,psui.relname AS tablename,psui.indexrelname AS relname, pg_get_indexdef(psui.indexrelid) AS indexdef, idx_scan,idx_tup_read,idx_tup_fetch,idx_blks_read,idx_blks_hit FROM pg_stat_user_indexes psui,pg_statio_user_indexes psio WHERE psio.indexrelid = psui.indexrelid and psui.schemaname not in ('pg_catalog', 'information_schema','snapshot') @@ -1421,6 +1441,9 @@ pg_index: - name: relname description: index name of this relation usage: LABEL + - name: indexdef + description: index definition of this relation + usage: LABEL - name: idx_scan description: index scans initiated on this index usage: GAUGE @@ -1447,10 +1470,10 @@ pg_tables_size: - name: pg_tables_size sql: |- SELECT CURRENT_CATALOG AS datname,nsp.nspname,rel.relname, - pg_total_relation_size(rel.oid) AS bytes, - pg_relation_size(rel.oid) AS relsize, - pg_indexes_size(rel.oid) AS indexsize, - pg_total_relation_size(reltoastrelid) AS toastsize + pg_catalog.pg_total_relation_size(rel.oid) AS bytes, + pg_catalog.pg_relation_size(rel.oid) AS relsize, + pg_catalog.pg_indexes_size(rel.oid) AS indexsize, + pg_catalog.pg_total_relation_size(reltoastrelid) AS toastsize FROM pg_namespace nsp JOIN pg_class rel ON nsp.oid = rel.relnamespace WHERE nspname NOT IN ('pg_catalog', 'information_schema','snapshot') AND rel.relkind = 'r' order by 4 desc limit 100; @@ -1462,10 +1485,10 @@ pg_tables_size: - name: pg_tables_size sql: |- SELECT CURRENT_CATALOG AS datname,nsp.nspname,rel.relname, - pg_total_relation_size(rel.oid) AS bytes, - pg_relation_size(rel.oid) AS relsize, - pg_indexes_size(rel.oid) AS indexsize, - pg_total_relation_size(reltoastrelid) AS toastsize + pg_catalog.pg_total_relation_size(rel.oid) AS bytes, + pg_catalog.pg_relation_size(rel.oid) AS relsize, + pg_catalog.pg_indexes_size(rel.oid) AS indexsize, + pg_catalog.pg_total_relation_size(reltoastrelid) AS toastsize FROM pg_namespace nsp JOIN pg_class rel ON nsp.oid = rel.relnamespace WHERE nspname NOT IN ('pg_catalog', 'information_schema','snapshot') AND rel.relkind = 'r' order by 4 desc limit 100; @@ -1506,7 +1529,7 @@ pg_indexes_size: query: - name: pg_indexes_size sql: |- - select schemaname schema_name,relname table_name,indexrelname index_name,pg_table_size(indexrelid) as index_size + select schemaname schema_name,relname table_name,indexrelname index_name,pg_catalog.pg_table_size(indexrelid) as index_size from pg_stat_user_indexes where schemaname not in('pg_catalog', 'information_schema','snapshot') order by 4 desc limit 100; @@ -1517,7 +1540,7 @@ pg_indexes_size: status: disable - name: pg_indexes_size sql: |- - select schemaname schema_name,relname table_name,indexrelname index_name,pg_table_size(indexrelid) as index_size + select schemaname schema_name,relname table_name,indexrelname index_name,pg_catalog.pg_table_size(indexrelid) as index_size from pg_stat_user_indexes where schemaname not in('pg_catalog', 'information_schema','snapshot') order by 4 desc limit 100; @@ -1549,9 +1572,9 @@ pg_need_indexes: query: - name: pg_need_indexes sql: |- - select schemaname||'.'||relname as tablename, pg_size_pretty(pg_table_size(relid)) as table_size, seq_scan, seq_tup_read, coalesce(idx_scan,0) idx_scan, coalesce(idx_tup_fetch,0) idx_tup_fetch,coalesce((idx_scan/(case when (seq_scan+idx_scan) >0 then (seq_scan+idx_scan) else 1 end) * 100),0) as rate + select schemaname||'.'||relname as tablename, pg_catalog.pg_size_pretty(pg_catalog.pg_table_size(relid)) as table_size, seq_scan, seq_tup_read, coalesce(idx_scan,0) idx_scan, coalesce(idx_tup_fetch,0) idx_tup_fetch,coalesce((idx_scan/(case when (seq_scan+idx_scan) >0 then (seq_scan+idx_scan) else 1 end) * 100),0) as rate from pg_stat_user_tables - where schemaname not in('pg_catalog', 'information_schema','snapshot') and pg_table_size(relid) > 1024*1024*1024 and coalesce((idx_scan/(case when (seq_scan+idx_scan) >0 then (seq_scan+idx_scan) else 1 end) * 100),0) < 90 + where schemaname not in('pg_catalog', 'information_schema','snapshot') and pg_catalog.pg_table_size(relid) > 1024*1024*1024 and coalesce((idx_scan/(case when (seq_scan+idx_scan) >0 then (seq_scan+idx_scan) else 1 end) * 100),0) < 90 order by seq_scan desc limit 10; version: '>=0.0.0' timeout: 10 @@ -1560,9 +1583,9 @@ pg_need_indexes: status: enable - name: pg_need_indexes sql: |- - select schemaname||'.'||relname as tablename, pg_size_pretty(pg_table_size(relid)) as table_size, seq_scan, seq_tup_read, coalesce(idx_scan,0) idx_scan, coalesce(idx_tup_fetch,0) idx_tup_fetch,coalesce((idx_scan/(case when (seq_scan+idx_scan) >0 then (seq_scan+idx_scan) else 1 end) * 100),0) as rate + select schemaname||'.'||relname as tablename, pg_catalog.pg_size_pretty(pg_catalog.pg_table_size(relid)) as table_size, seq_scan, seq_tup_read, coalesce(idx_scan,0) idx_scan, coalesce(idx_tup_fetch,0) idx_tup_fetch,coalesce((idx_scan/(case when (seq_scan+idx_scan) >0 then (seq_scan+idx_scan) else 1 end) * 100),0) as rate from pg_stat_user_tables - where schemaname not in('pg_catalog', 'information_schema','snapshot') and pg_table_size(relid) > 1024*1024*1024 and coalesce((idx_scan/(case when (seq_scan+idx_scan) >0 then (seq_scan+idx_scan) else 1 end) * 100),0) < 90 + where schemaname not in('pg_catalog', 'information_schema','snapshot') and pg_catalog.pg_table_size(relid) > 1024*1024*1024 and coalesce((idx_scan/(case when (seq_scan+idx_scan) >0 then (seq_scan+idx_scan) else 1 end) * 100),0) < 90 order by seq_scan desc limit 10; version: '>=0.0.0' timeout: 10 @@ -1601,7 +1624,7 @@ pg_never_used_indexes: query: - name: pg_never_used_indexes sql: |- - select CURRENT_CATALOG as datname, pi.schemaname, pi.relname, pi.indexrelname, pg_table_size(pi.indexrelid) as index_size + select CURRENT_CATALOG as datname, pi.schemaname, pi.relname, pi.indexrelname, pg_catalog.pg_table_size(pi.indexrelid) as index_size from pg_indexes pis join pg_stat_user_indexes pi on pis.schemaname = pi.schemaname and pis.tablename = pi.relname and pis.indexname = pi.indexrelname @@ -1610,8 +1633,8 @@ pg_never_used_indexes: where pco.contype is distinct from 'p' and pco.contype is distinct from 'u' and (idx_scan,idx_tup_read,idx_tup_fetch) = (0,0,0) and pis.indexdef !~ ' UNIQUE INDEX ' - and pis.schemaname not in('pg_catalog', 'information_schema','snapshot') - order by pg_table_size(indexrelid) desc; + and pis.schemaname not in('pg_catalog', 'information_schema','snapshot', 'dbe_pldeveloper') + order by pg_catalog.pg_table_size(indexrelid) desc; version: '>=0.0.0' timeout: 10 ttl: 3600 @@ -1619,7 +1642,7 @@ pg_never_used_indexes: status: enable - name: pg_never_used_indexes sql: |- - select CURRENT_CATALOG as datname, pi.schemaname, pi.relname, pi.indexrelname, pg_table_size(pi.indexrelid) as index_size + select CURRENT_CATALOG as datname, pi.schemaname, pi.relname, pi.indexrelname, pg_catalog.pg_table_size(pi.indexrelid) as index_size from pg_indexes pis join pg_stat_user_indexes pi on pis.schemaname = pi.schemaname and pis.tablename = pi.relname and pis.indexname = pi.indexrelname @@ -1628,8 +1651,8 @@ pg_never_used_indexes: where pco.contype is distinct from 'p' and pco.contype is distinct from 'u' and (idx_scan,idx_tup_read,idx_tup_fetch) = (0,0,0) and pis.indexdef !~ ' UNIQUE INDEX ' - and pis.schemaname not in('pg_catalog', 'information_schema','snapshot') - order by pg_table_size(indexrelid) desc; + and pis.schemaname not in('pg_catalog', 'information_schema','snapshot', 'dbe_pldeveloper') + order by pg_catalog.pg_table_size(indexrelid) desc; version: '>=0.0.0' timeout: 10 ttl: 3600 @@ -1661,15 +1684,11 @@ pg_tables_expansion_rate: query: - name: pg_tables_expansion_rate sql: |- - select CURRENT_CATALOG as datname, schemaname,relname,n_live_tup,n_dead_tup,round((n_dead_tup/(n_dead_tup+n_live_tup) *100),2) as dead_rate, - extract(epoch from coalesce(last_vacuum,'1970-01-01')::text) as last_vacuum, - extract(epoch from coalesce(last_autovacuum,'1970-01-01')::text) as last_autovacuum , - extract(epoch from coalesce(last_analyze,'1970-01-01')::text) as last_analyze, - extract(epoch from coalesce(last_autoanalyze,'1970-01-01')::text) as last_autoanalyze, - vacuum_count,autovacuum_count,analyze_count,autoanalyze_count - from pg_stat_user_tables - where n_live_tup > 0 - order by 5 asc; + select t1.*, t2.column_number from (select CURRENT_CATALOG as datname, schemaname,relname,n_live_tup,n_dead_tup,round(n_dead_tup/(n_live_tup+1),2) as dead_rate, + last_vacuum, last_autovacuum, last_analyze, last_autoanalyze, vacuum_count,autovacuum_count,analyze_count,autoanalyze_count + from pg_stat_user_tables where n_live_tup > 0 order by 5 asc) t1 join + (select table_schema, table_name, count(column_name) as column_number from information_schema.columns group by table_schema, table_name) + t2 on t1.schemaname=t2.table_schema and t1.relname=t2.table_name; version: '>=0.0.0' timeout: 1 status: enable @@ -1689,6 +1708,9 @@ pg_tables_expansion_rate: - name: n_dead_tup description: dead tup of table usage: LABEL + - name: column_number + description: column number of table + usage: label - name: dead_rate description: Dead rate of table usage: GAUGE @@ -1742,7 +1764,7 @@ pg_lock_sql: (locked_act.xact_start)::text as locked_xact_start, (locker_act.query_start)::text as locker_query_start, (locked_act.query_start)::text as locked_query_start, - extract(epoch from now() - locked_act.query_start) as locked_times, + extract(epoch from pg_catalog.now() - locked_act.query_start) as locked_times, locker_act.query as locker_query, locked_act.query as locked_query from pg_locks locked, @@ -1828,7 +1850,7 @@ og_memory_info: desc: OpenGauss memory usage informations query: - name: og_memory_info - sql: select memorytype,memorymbytes from pv_total_memory_detail(); + sql: select memorytype,memorymbytes from pg_catalog.pv_total_memory_detail(); version: '>=0.0.0' timeout: 1 ttl: 60 @@ -1895,14 +1917,14 @@ og_context_memory: name: og_context_memory desc: OpenGauss context use memory information query: - - name: og_session_memory + - name: og_context_memory sql: |- select contextname, - sum(usedsize)::bigint as usedsize, - sum(totalsize)::bigint as totalsize + pg_catalog.sum(usedsize)::bigint as usedsize, + pg_catalog.sum(totalsize)::bigint as totalsize from gs_session_memory_detail group by contextname - order by sum(totalsize) desc limit 10; + order by pg_catalog.sum(totalsize) desc limit 10; version: '>=0.0.0' timeout: 30 ttl: 600 @@ -1956,7 +1978,7 @@ og_cpu_load: desc: OpenGauss cpu load query: - name: og_cpu_load - sql: select 'og_total_cpu' og_total_cpu,total_cpu() total_cpu; + sql: select 'og_total_cpu' og_total_cpu,pg_catalog.total_cpu() total_cpu; version: '>=0.0.0' timeout: 1 ttl: 10 @@ -1974,3 +1996,334 @@ og_cpu_load: timeout: 1 public: true + + +pg_thread_pool: + name: pg_thread_pool + desc: OpenGauss thread pool + query: + - name: pf_thread_pool + sql: select group_id, listener, worker_info, session_info from dbe_perf.GLOBAL_THREADPOOL_STATUS; + version: '>=0.0.0' + timeout: 1 + ttl: 10 + status: enable + dbRole: "" + metrics: + - name: group_id + description: group id + usage: LABEL + - name: listener + description: listener + usage: GAUGE + - name: worker_info + description: worker info + usage: LABEL + - name: session_info + description: session info + usage: LABEL + status: enable + ttl: 10 + timeout: 1 + public: true + + +pg_recovery_status: + name: pg_recovery_status + desc: pg recovery status + query: + - name: pg_recovery_status + sql: SELECT standby_node_name, current_sleep_time, current_rto FROM dbe_perf.global_recovery_status; + version: '>=0.0.0' + timeout: 1 + ttl: 10 + status: enable + dbRole: "" + metrics: + - name: standby_node_name + description: node name + usage: LABEL + - name: current_sleep_time + description: current sleep time + usage: LABEL + - name: current_rto + description: current rto + usage: GAUGE + status: enable + ttl: 10 + timeout: 1 + public: true + + +pg_stat_get_wal_senders: + name: pg_stat_get_wal_senders + desc: pg stat get wal senders + query: + - name: pg_stat_get_wal_senders + sql: SELECT pid, sender_flush_location, receiver_replay_location,pg_catalog.pg_xlog_location_diff(sender_flush_location,receiver_replay_location) as xlog_location_diff FROM pg_catalog.pg_stat_get_wal_senders(); + version: '>=0.0.0' + timeout: 1 + ttl: 10 + status: enable + dbRole: "" + metrics: + - name: pid + description: pid + usage: GAUGE + - name: sender_flush_location + description: sender flush location + usage: LABEL + - name: receiver_replay_location + description: receiver replay location + usage: LABEL + - name: xlog_location_diff + description: xlog_location_diff + usage: LABEL + status: enable + ttl: 10 + timeout: 1 + public: true + + +statement_responsetime_percentile: + name: statement_responsetime_percentile + desc: statement responsetime percentile + query: + - name: statement_responsetime_percentile + sql: SELECT p80, p95 FROM dbe_perf.statement_responsetime_percentile; + version: '>=0.0.0' + timeout: 1 + ttl: 10 + status: enable + dbRole: "" + metrics: + - name: p80 + description: 80percent SQL rt + usage: GAUGE + - name: p95 + description: 95percent SQL rt + usage: GAUGE + status: enable + ttl: 10 + timeout: 1 + public: true + +pg_node_info: + name: pg_node_info + desc: the information of current node + query: + - name: pg_node_info + sql: SELECT CURRENT_CATALOG AS datname, CASE WHEN pg_catalog.pg_is_in_recovery() THEN 'Y' ELSE 'N' END AS is_slave, node_name, installpath, datapath, EXTRACT(EPOCH FROM pg_catalog.now() - pg_catalog.pg_postmaster_start_time()) AS uptime, pg_catalog.version() FROM pg_catalog.pg_stat_get_env(); + version: '>=0.0.0' + timeout: 1 + ttl: 100 + status: enable + dbRole: "" + metrics: + - name: is_slave + description: is slave node? + usage: LABEL + - name: node_name + description: node name + usage: LABEL + - name: installpath + description: install path + usage: LABEL + - name: datapath + description: data path + usage: LABEL + - name: uptime + description: uptime + usage: GAUGE + - name: version + description: database version + usage: LABEL + - name: datname + description: current connecting database + usage: LABEL + status: enable + ttl: 100 + timeout: 1 + public: true + +pg_stat_bgwriter: + name: pg_stat_bgwriter + desc: the information of current node + query: + - name: pg_stat_bgwriter + sql: |- + select checkpoint_sync_time / (checkpoints_timed + checkpoints_req) AS checkpoint_avg_sync_time, + checkpoints_req / (checkpoints_timed + checkpoints_req) AS checkpoint_proactive_triggering_ratio, + buffers_checkpoint, buffers_clean, buffers_backend, buffers_alloc from pg_stat_bgwriter; + version: '>=0.0.0' + timeout: 1 + ttl: 100 + status: enable + dbRole: "" + metrics: + - name: checkpoint_avg_sync_time + description: checkpoint_avg_sync_time + usage: GAUGE + - name: checkpoint_proactive_triggering_ratio + description: checkpoint_proactive_triggering_ratio + usage: GAUGE + - name: buffers_checkpoint + description: number of checkpoint write buffers + usage: LABEL + - name: buffers_clean + description: number of write buffers of the backend writer process + usage: LABEL + - name: buffers_backend + description: number of direct write buffers through the backend + usage: LABEL + - name: buffers_alloc + description: number of buffers allocated + usage: LABEL + +pg_statio_all_tables: + name: pg_statio_all_tables + desc: the information of current node + query: + - name: pg_statio_all_tables + sql: |- + select pg_catalog.sum(heap_blks_hit)*100/(pg_catalog.sum(heap_blks_read)+pg_catalog.sum(heap_blks_hit)+1) AS shared_buffer_heap_hit_rate, + pg_catalog.sum(toast_blks_hit)*100/(pg_catalog.sum(toast_blks_read)+pg_catalog.sum(toast_blks_hit)+1) AS shared_buffer_toast_hit_rate, + pg_catalog.sum(tidx_blks_hit)*100/(pg_catalog.sum(tidx_blks_read)+pg_catalog.sum(tidx_blks_hit)+1) AS shared_buffer_tidx_hit_rate, + pg_catalog.sum(idx_blks_hit)*100/(pg_catalog.sum(idx_blks_read)+pg_catalog.sum(idx_blks_hit)+1) AS shared_buffer_idx_hit_rate + from pg_statio_all_tables ; + version: '>=0.0.0' + timeout: 1 + ttl: 100 + status: enable + dbRole: "" + metrics: + - name: shared_buffer_heap_hit_rate + description: shared_buffer_heap_hit_rate + usage: GAUGE + - name: shared_buffer_toast_hit_rate + description: shared_buffer_toast_hit_rate + usage: GAUGE + - name: shared_buffer_tidx_hit_rate + description: shared_buffer_tidx_hit_rate + usage: GAUGE + - name: shared_buffer_idx_hit_rate + description: shared_buffer_idx_hit_rate + usage: GAUGE + status: enable + ttl: 100 + timeout: 1 + public: true + +pg_prepared_xacts: + name: pg_prepared_xacts + desc: the information of current node + query: + - name: pg_prepared_xacts + sql: select pg_catalog.count(1) AS count from pg_prepared_xacts; + version: '>=0.0.0' + timeout: 1 + ttl: 100 + status: enable + dbRole: "" + metrics: + - name: count + description: current_prepared_xacts_count + usage: GAUGE + status: enable + ttl: 100 + timeout: 1 + public: true + +pg_stat_database: + name: pg_stat_database + desc: the information of current node + query: + - name: pg_stat_database + sql: select pg_catalog.max(temp_bytes / temp_files) / 1024 AS temp_file_size from pg_stat_database where temp_files > 0; + version: '>=0.0.0' + timeout: 1 + ttl: 100 + status: enable + dbRole: "" + metrics: + - name: temp_file_size + description: temp_file_size + usage: GAUGE + status: enable + ttl: 100 + timeout: 1 + public: true + + +pg_lock_time_info: + name: pg_lock_time_info + desc: pg_lock_time_info + query: + - name: pg_lock_time_info + sql: SELECT + d.datname, + pg_catalog.sum(extract(epoch + FROM pg_catalog.now() - s.xact_start)) AS holding_time + FROM pg_locks AS l + INNER JOIN pg_database AS d ON l.database = d.oid + INNER JOIN pg_stat_activity AS s ON l.pid = s.pid + WHERE s.pid != pg_catalog.pg_backend_pid() group by d.datname; + version: '>=0.0.0' + timeout: 1 + ttl: 100 + status: enable + dbRole: "" + metrics: + - name: datname + description: database name + usage: LABEL + - name: holding_time + description: database lock holding time + usage: GAUGE + status: enable + ttl: 100 + timeout: 1 + public: true + +gs_sql_count: + name: gs_sql_count + desc: gs_sql_count + + query: + - name: gs_sql_count + sql: |- + select sum(select_count) as select, sum(update_count) as update, sum(insert_count) as insert, sum(delete_count) as delete, sum(mergeinto_count) as mergeinto, + sum(ddl_count) as ddl, sum(dml_count) as dml, sum(dcl_count) as dcl from gs_sql_count; + version: '>=0.0.0' + timeout: 1 + ttl: 100 + status: enable + dbRole: "" + metrics: + - name: node_name + description: the name of node + usage: LABEL + - name: select + description: the count of select sql + usage: LABEL + - name: insert + description: the count of delete sql + usage: LABEL + - name: delete + description: Table distribution skew variance + usage: LABEL + - name: update + description: Table distribution skew ratio + usage: LABEL + - name: mergeinto + description: Table distribution skew ratio + usage: LABEL + - name: ddl + description: Table distribution skew ratio + usage: LABEL + - name: dml + description: Table distribution skew ratio + usage: LABEL + - name: dcl + description: Table distribution skew ratio + usage: GAUGE diff --git a/src/gausskernel/dbmind/tools/components/opengauss_exporter/yamls/statements.yml b/src/gausskernel/dbmind/tools/components/opengauss_exporter/yamls/statements.yml index cfece2a1a..4c3480baf 100644 --- a/src/gausskernel/dbmind/tools/components/opengauss_exporter/yamls/statements.yml +++ b/src/gausskernel/dbmind/tools/components/opengauss_exporter/yamls/statements.yml @@ -17,45 +17,47 @@ pg_sql_statement_history: query: - name: pg_sql_statement_history sql: " - SELECT H.unique_query_id, - H.db_name AS datname, - H.schema_name AS SCHEMA, - H.query, - (extract(epoch - FROM H.start_time) * 1000)::bigint as start_time, - (extract(epoch - FROM H.finish_time) * 1000)::bigint as finish_time, - extract(epoch - FROM H.finish_time - H.start_time)* 1000 AS exc_time, - H.cpu_time, - H.data_io_time, - H.n_returned_rows, - H.n_tuples_fetched, - H.n_tuples_returned, - H.n_tuples_inserted, - H.n_tuples_updated, - H.n_tuples_deleted, - (H.n_blocks_hit / (H.n_blocks_fetched+0.01)) AS hit_rate, - (H.n_blocks_fetched / (H.n_blocks_hit+0.01)) AS fetch_rate, - H.lock_wait_count, - H.lwlock_wait_count, - S.n_calls, - S.sort_count / S.n_calls AS sort_count, - S.sort_mem_used / S.n_calls AS sort_mem_used, - S.sort_spill_count / S.n_calls AS sort_spill_count, - S.hash_count / S.n_calls AS hash_count, - S.hash_mem_used / S.n_calls AS hash_mem_used, - S.hash_spill_count / S.n_calls AS hash_spill_count - FROM dbe_perf.statement_history H inner join dbe_perf.statement S - on H.unique_query_id = S.unique_sql_id - WHERE H.query !='COMMIT' - AND H.application_name != 'gs_clean' - AND S.n_calls > 1 - AND (H.start_time > now() - (1 / 24 / 60 / 60) * ({scrape_interval} / 1000) - OR (exc_time > {scrape_interval} AND H.finish_time > now() - (1 / 24 / 60 / 60) * ({scrape_interval} / 1000)) - ) - ORDER BY H.start_time DESC - LIMIT 50;" + SELECT H.unique_query_id, + H.db_name AS datname, + H.schema_name AS SCHEMA, + H.query, + H.query_plan, + (extract(epoch + FROM H.start_time) * 1000)::bigint as start_time, + (extract(epoch + FROM H.finish_time) * 1000)::bigint as finish_time, + extract(epoch + FROM H.finish_time - H.start_time) * 1000 AS exc_time, + H.cpu_time, + H.data_io_time, + H.n_returned_rows, + H.n_tuples_fetched, + H.n_tuples_returned, + H.n_tuples_inserted, + H.n_tuples_updated, + H.n_tuples_deleted, + (H.n_blocks_hit / (H.n_blocks_fetched+0.01)) AS hit_rate, + (H.n_blocks_fetched / (H.n_blocks_hit+0.01)) AS fetch_rate, + H.lock_wait_count, + H.lwlock_wait_count, + S.n_calls, + S.sort_count / S.n_calls AS sort_count, + S.sort_mem_used / S.n_calls AS sort_mem_used, + S.sort_spill_count / S.n_calls AS sort_spill_count, + S.hash_count / S.n_calls AS hash_count, + S.hash_mem_used / S.n_calls AS hash_mem_used, + S.hash_spill_count / S.n_calls AS hash_spill_count + FROM dbe_perf.statement_history H inner join dbe_perf.statement S + on H.unique_query_id = S.unique_sql_id + WHERE H.query !='COMMIT' + AND H.application_name != 'gs_clean' + AND S.n_calls > 1 + AND (H.start_time > extract(epoch from now()) * 1000 - (1 / 24 / 60 / 60) * ({scrape_interval}) + OR (exc_time > {scrape_interval} + AND H.finish_time > extract(epoch from now()) * 1000 - (1 / 24 / 60 / 60) * ({scrape_interval})) + ) + ORDER BY H.start_time DESC + LIMIT 50;" version: '>=0.0.0' timeout: 10 status: enable @@ -75,6 +77,9 @@ pg_sql_statement_history: - name: query description: query text usage: LABEL + - name: query_plan + description: query plan text + usage: LABEL - name: unique_query_id description: unique query id usage: LABEL @@ -144,3 +149,49 @@ pg_sql_statement_history: status: enable ttl: 0 timeout: 1 + + +pg_sql_statement_full: + name: pg_sql_statement_full + desc: Extract full SQL statements from openGauss, which is a sampling process. + query: + - name: pg_sql_statement_full + sql: " + SELECT count(1), + datname, + application_name, + query + FROM pg_stat_activity + WHERE application_name NOT IN ('Asp', + 'statement flush thread', + 'JobScheduler', + 'WDRSnapshot', + 'PercentileJob', + 'gs_clean') + AND Pg_current_sessid() != sessionid + AND query ilike '%FROM%' + AND ( + query_start > now() - (1 / 24 / 60 / 60) * ({scrape_interval} / 1000)) + GROUP BY datname, + application_name, + query; + " + version: '>=0.0.0' + timeout: 10 + status: enable + metrics: + - name: count + description: count + usage: GAUGE + - name: datname + description: Name of database + usage: LABEL + - name: application_name + description: Name of applacation + usage: LABEL + - name: query + description: SQL statement + usage: LABEL + status: enable + ttl: 0 + timeout: 1 diff --git a/src/gausskernel/dbmind/tools/components/reprocessing_exporter/reprocessing_exporter.yml b/src/gausskernel/dbmind/tools/components/reprocessing_exporter/reprocessing_exporter.yml index 1435ec6ef..801b5abfd 100644 --- a/src/gausskernel/dbmind/tools/components/reprocessing_exporter/reprocessing_exporter.yml +++ b/src/gausskernel/dbmind/tools/components/reprocessing_exporter/reprocessing_exporter.yml @@ -476,3 +476,31 @@ gaussdb_qps_by_instance: ttl: 60 timeout: 0.1 +load_average: + name: load_average + desc: load average for database server. + query: + - name: load_average + promql: " + label_replace( + node_load1, + 'instance', '$1', 'instance', '(.*):.*') + " + version: '>=0.0.0' + timeout: 0.1 + ttl: 10 + status: enable + dbRole: "" + metrics: + - name: from_job + label: job + description: from job + usage: LABEL + - name: from_instance + label: instance + description: from instance + usage: LABEL + status: enable + ttl: 60 + timeout: 0.1 + diff --git a/src/gausskernel/dbmind/tools/components/sqldiag/result.png b/src/gausskernel/dbmind/tools/components/sqldiag/result.png index aaabd4bf3..b616bb6e8 100644 Binary files a/src/gausskernel/dbmind/tools/components/sqldiag/result.png and b/src/gausskernel/dbmind/tools/components/sqldiag/result.png differ diff --git a/src/gausskernel/dbmind/tools/components/xtuner/tuner/benchmark/__init__.py b/src/gausskernel/dbmind/tools/components/xtuner/tuner/benchmark/__init__.py index 266b1b9f1..3f47a9c7c 100644 --- a/src/gausskernel/dbmind/tools/components/xtuner/tuner/benchmark/__init__.py +++ b/src/gausskernel/dbmind/tools/components/xtuner/tuner/benchmark/__init__.py @@ -30,7 +30,11 @@ local_ssh = ExecutorFactory() \ def get_benchmark_instance(script, path, cmd, db_info): - name = script.rstrip('.py') + if script.endswith('.py'): + name = script[:-len('.py')] + else: + name = script + if not os.path.exists(os.path.join(os.path.dirname(__file__), name + '.py')): raise ConfigureError('Incorrect configuration option benchmark_script. ' 'Enter the filename of the script in the benchmark directory ' diff --git a/src/gausskernel/dbmind/tools/components/xtuner/tuner/benchmark/period.py b/src/gausskernel/dbmind/tools/components/xtuner/tuner/benchmark/period.py index 9e53e4875..2aa378adc 100644 --- a/src/gausskernel/dbmind/tools/components/xtuner/tuner/benchmark/period.py +++ b/src/gausskernel/dbmind/tools/components/xtuner/tuner/benchmark/period.py @@ -22,7 +22,7 @@ from tuner.exceptions import ExecutionError path = '' # Measure current total committed transactions that do not include xact_rollback. cmd = "gsql -U {user} -W {password} -d postgres -p {port} -c " \ - "\"SELECT sum(xact_commit) FROM pg_stat_database where datname = '{db}';\"" + "\"SELECT pg_catalog.sum(xact_commit) FROM pg_catalog.pg_stat_database where datname = '{db}';\"" # This script captures the performance indicators in the user's periodic execution task, and measures the quality diff --git a/src/gausskernel/dbmind/tools/components/xtuner/tuner/character.py b/src/gausskernel/dbmind/tools/components/xtuner/tuner/character.py index 6174765fa..c0b85e31f 100644 --- a/src/gausskernel/dbmind/tools/components/xtuner/tuner/character.py +++ b/src/gausskernel/dbmind/tools/components/xtuner/tuner/character.py @@ -61,7 +61,7 @@ class OpenGaussMetric: # main mem: max_connections * (work_mem + temp_buffers) + shared_buffers + wal_buffers sql = "select " \ "setting " \ - "from pg_settings " \ + "from pg_catalog.pg_settings " \ "where name in ('max_connections', 'work_mem', 'temp_buffers', 'shared_buffers', 'wal_buffers') " \ "order by name;" res = self._db.exec_statement(sql) @@ -75,29 +75,29 @@ class OpenGaussMetric: # You could define used internal state here. # this is a demo, cache_hit_rate, we will use it while tuning shared_buffer. cache_hit_rate_sql = "select blks_hit / (blks_read + blks_hit + 0.001) " \ - "from pg_stat_database " \ + "from pg_catalog.pg_stat_database " \ "where datname = '{}';".format(self._db.db_name) return self._get_numeric_metric(cache_hit_rate_sql) @property def uptime(self): return self._get_numeric_metric( - "select extract(epoch from now()-pg_postmaster_start_time()) / 60 / 60;") # unit: hour + "select extract(epoch from pg_catalog.now()-pg_catalog.pg_postmaster_start_time()) / 60 / 60;") # unit: hour @property def current_connections(self): return self._get_numeric_metric( - "select count(1) from pg_stat_activity where client_port is not null;") + "select pg_catalog.count(1) from pg_catalog.pg_stat_activity where client_port is not null;") @property def average_connection_age(self): - return self._get_numeric_metric("select extract(epoch from avg(now()-backend_start)) as age " - "from pg_stat_activity where client_port is not null;") # unit: second + return self._get_numeric_metric("select extract(epoch from pg_catalog.avg(pg_catalog.now()-backend_start)) as age " + "from pg_catalog.pg_stat_activity where client_port is not null;") # unit: second @property def all_database_size(self): return self._get_numeric_metric( - "select sum(pg_database_size(datname)) / 1024 from pg_database;") # unit: kB + "select sum(pg_catalog.pg_database_size(datname)) / 1024 from pg_catalog.pg_database;") # unit: kB @property def max_processes(self): @@ -109,12 +109,12 @@ class OpenGaussMetric: @property def current_prepared_xacts_count(self): - return self._get_numeric_metric("select count(1) from pg_prepared_xacts;") + return self._get_numeric_metric("select pg_catalog.count(1) from pg_catalog.pg_prepared_xacts;") @property def current_locks_count(self): return self._get_numeric_metric( - "select count(1) from pg_locks where transactionid in (select transaction from pg_prepared_xacts)") + "select pg_catalog.count(1) from pg_catalog.pg_locks where transactionid in (select transaction from pg_catalog.pg_prepared_xacts)") @property def checkpoint_dirty_writing_time_window(self): @@ -123,84 +123,84 @@ class OpenGaussMetric: @property def checkpoint_proactive_triggering_ratio(self): return self._get_numeric_metric( - "select checkpoints_req / (checkpoints_timed + checkpoints_req) from pg_stat_bgwriter;" + "select checkpoints_req / (checkpoints_timed + checkpoints_req) from pg_catalog.pg_stat_bgwriter;" ) @property def checkpoint_avg_sync_time(self): return self._get_numeric_metric( - "select checkpoint_sync_time / (checkpoints_timed + checkpoints_req) from pg_stat_bgwriter;" + "select checkpoint_sync_time / (checkpoints_timed + checkpoints_req) from pg_catalog.pg_stat_bgwriter;" ) @property def shared_buffer_heap_hit_rate(self): return self._get_numeric_metric( - "select sum(heap_blks_hit)*100/(sum(heap_blks_read)+sum(heap_blks_hit)+1) from pg_statio_all_tables ;") + "select pg_catalog.sum(heap_blks_hit)*100/(pg_catalog.sum(heap_blks_read)+pg_catalog.sum(heap_blks_hit)+1) from pg_catalog.pg_statio_all_tables ;") @property def shared_buffer_toast_hit_rate(self): return self._get_numeric_metric( - "select sum(toast_blks_hit)*100/(sum(toast_blks_read)+sum(toast_blks_hit)+1) from pg_statio_all_tables ;" + "select pg_catalog.sum(toast_blks_hit)*100/(pg_catalog.sum(toast_blks_read)+pg_catalog.sum(toast_blks_hit)+1) from pg_catalog.pg_statio_all_tables ;" ) @property def shared_buffer_tidx_hit_rate(self): return self._get_numeric_metric( - "select sum(tidx_blks_hit)*100/(sum(tidx_blks_read)+sum(tidx_blks_hit)+1) from pg_statio_all_tables ;" + "select pg_catalog.sum(tidx_blks_hit)*100/(pg_catalog.sum(tidx_blks_read)+pg_catalog.sum(tidx_blks_hit)+1) from pg_catalog.pg_statio_all_tables ;" ) @property def shared_buffer_idx_hit_rate(self): return self._get_numeric_metric( - "select sum(idx_blks_hit)*100/(sum(idx_blks_read)+sum(idx_blks_hit)+1) from pg_statio_all_tables ;" + "select pg_catalog.sum(idx_blks_hit)*100/(pg_catalog.sum(idx_blks_read)+pg_catalog.sum(idx_blks_hit)+1) from pg_catalog.pg_statio_all_tables ;" ) @property def temp_file_size(self): return self._get_numeric_metric( - "select max(temp_bytes / temp_files) / 1024 from pg_stat_database where temp_files > 0;" + "select pg_catalog.max(temp_bytes / temp_files) / 1024 from pg_catalog.pg_stat_database where temp_files > 0;" ) # unit: kB @property def read_write_ratio(self): return self._get_numeric_metric( "select tup_returned / (tup_inserted + tup_updated + tup_deleted + 0.001) " - "from pg_stat_database where datname = '%s';" % self._db.db_name + "from pg_catalog.pg_stat_database where datname = '%s';" % self._db.db_name ) @property def search_modify_ratio(self): return self._get_numeric_metric( "select (tup_returned + tup_inserted) / (tup_updated + tup_deleted + 0.01) " - "from pg_stat_database where datname = '%s';" % self._db.db_name + "from pg_catalog.pg_stat_database where datname = '%s';" % self._db.db_name ) @property def fetched_returned_ratio(self): return self._get_numeric_metric( "select tup_fetched / (tup_returned + 0.01) " - "from pg_stat_database where datname = '%s';" % self._db.db_name + "from pg_catalog.pg_stat_database where datname = '%s';" % self._db.db_name ) @property def rollback_commit_ratio(self): return self._get_numeric_metric( "select xact_rollback / (xact_commit + 0.01) " - "from pg_stat_database where datname = '%s';" % self._db.db_name + "from pg_catalog.pg_stat_database where datname = '%s';" % self._db.db_name ) @property def read_tup_speed(self): return self._get_numeric_metric( - "select tup_returned / (extract (epoch from (now() - stats_reset))) " - "from pg_stat_database where datname = '%s';" % self._db.db_name + "select tup_returned / (extract (epoch from (pg_catalog.now() - stats_reset))) " + "from pg_catalog.pg_stat_database where datname = '%s';" % self._db.db_name ) @property def write_tup_speed(self): return self._get_numeric_metric( - "select (tup_inserted + tup_updated + tup_deleted) / (extract (epoch from (now() - stats_reset))) " - "from pg_stat_database where datname = '%s';" % self._db.db_name + "select (tup_inserted + tup_updated + tup_deleted) / (extract (epoch from (pg_catalog.now() - stats_reset))) " + "from pg_catalog.pg_stat_database where datname = '%s';" % self._db.db_name ) @cached_property @@ -244,7 +244,7 @@ class OpenGaussMetric: @cached_property def block_size(self): return self._get_numeric_metric( - "select setting / 1024 from pg_settings where name = 'block_size';" + "select setting / 1024 from pg_catalog.pg_settings where name = 'block_size';" ) # unit: kB @property @@ -350,7 +350,7 @@ class OpenGaussMetric: @cached_property def enable_autovacuum(self): setting = self._db.exec_statement( - "select setting from pg_settings where name = 'autovacuum';" + "select setting from pg_catalog.pg_settings where name = 'autovacuum';" )[0][0] return setting == 'on' @@ -358,7 +358,7 @@ class OpenGaussMetric: return [self.cache_hit_rate, self.load_average[0]] def reset(self): - self._db.exec_statement("SELECT pg_stat_reset();") + self._db.exec_statement("SELECT pg_catalog.pg_stat_reset();") def to_dict(self): rv = dict() diff --git a/src/gausskernel/dbmind/tools/components/xtuner/tuner/executor.py b/src/gausskernel/dbmind/tools/components/xtuner/tuner/executor.py index 0e165ecf9..d8b447b8a 100644 --- a/src/gausskernel/dbmind/tools/components/xtuner/tuner/executor.py +++ b/src/gausskernel/dbmind/tools/components/xtuner/tuner/executor.py @@ -259,7 +259,7 @@ class LocalExec(Executor): # Have to use the `cwd` argument. # Otherwise, we can not change the current directory. if line.strip().startswith('cd '): - cwd = line.lstrip('cd ') + cwd = line.strip()[len('cd'):] continue proc = subprocess.Popen(shlex.split(line), diff --git a/src/gausskernel/dbmind/tools/components/xtuner/tuner/main.py b/src/gausskernel/dbmind/tools/components/xtuner/tuner/main.py index 2cca6cea8..97454f31f 100644 --- a/src/gausskernel/dbmind/tools/components/xtuner/tuner/main.py +++ b/src/gausskernel/dbmind/tools/components/xtuner/tuner/main.py @@ -29,7 +29,7 @@ from .exceptions import OptionError from .xtuner import procedure_main from . import utils -__version__ = '3.0.0' +__version__ = '2.1.0' __description__ = 'X-Tuner: a self-tuning tool integrated by openGauss.' @@ -189,8 +189,11 @@ def get_config(filepath): break benchmark_script = cp['Benchmark'].get('benchmark_script', '') - if benchmark_script.rstrip('.py') + '.py' not in benchmarks: + if benchmark_script.endswith('.py'): + benchmark_script = benchmark_script[:-len('.py')] + if benchmark_script + '.py' not in benchmarks: raise OptionError(invalid_opt_msg % ('benchmark_script', benchmarks)) + config['benchmark_path'] = cp['Benchmark'].get('benchmark_path', '') config['benchmark_cmd'] = cp['Benchmark'].get('benchmark_cmd', '') benchmark_period = cp['Benchmark'].get('benchmark_period', '0') diff --git a/src/gausskernel/dbmind/tools/metadatabase/__init__.py b/src/gausskernel/dbmind/tools/metadatabase/__init__.py index c582d38cc..d159672c2 100644 --- a/src/gausskernel/dbmind/tools/metadatabase/__init__.py +++ b/src/gausskernel/dbmind/tools/metadatabase/__init__.py @@ -13,11 +13,10 @@ import sqlalchemy from sqlalchemy.engine import create_engine -from sqlalchemy.exc import ProgrammingError from .base import Base, DynamicConfig from .schema import load_all_schema_models -from ..common.exceptions import SQLExecutionError +from ..common.exceptions import SQLExecutionError, DuplicateTableError def create_metadatabase_schema(check_first=True): @@ -32,6 +31,8 @@ def create_metadatabase_schema(check_first=True): checkfirst=check_first ) except Exception as e: + if 'DuplicateTable' in str(e): + raise DuplicateTableError(e) raise SQLExecutionError(e) @@ -46,6 +47,8 @@ def destroy_metadatabase(): session_clz.get('engine') ) except Exception as e: + if 'DuplicateTable' in str(e): + raise DuplicateTableError(e) raise SQLExecutionError(e) diff --git a/src/gausskernel/dbmind/tools/metadatabase/business_db.py b/src/gausskernel/dbmind/tools/metadatabase/business_db.py index 388078ffe..5c8c9afa0 100644 --- a/src/gausskernel/dbmind/tools/metadatabase/business_db.py +++ b/src/gausskernel/dbmind/tools/metadatabase/business_db.py @@ -28,6 +28,13 @@ def update_session_clz_from_configs(): port = global_vars.configs.get('METADATABASE', 'port') username = global_vars.configs.get('METADATABASE', 'username') password = global_vars.configs.get('METADATABASE', 'password') + if db_type in ('opengauss', 'postgres'): + valid_port = port.strip() != '' and port is not None + valid_host = host.strip() != '' and host is not None + if not valid_port: + raise ValueError('Invalid port for metadatabase %s: %s.' % (db_type, port)) + if not valid_host: + raise ValueError('Invalid host for metadatabase %s: %s.' % (db_type, host)) dsn = create_dsn(db_type, database, host, port, username, password) postgres_dsn = create_dsn(db_type, 'postgres', host, port, username, password) diff --git a/src/gausskernel/dbmind/tools/metadatabase/schema/slow_queries.py b/src/gausskernel/dbmind/tools/metadatabase/schema/slow_queries.py index f8af2dca1..598cf3444 100644 --- a/src/gausskernel/dbmind/tools/metadatabase/schema/slow_queries.py +++ b/src/gausskernel/dbmind/tools/metadatabase/schema/slow_queries.py @@ -10,7 +10,7 @@ # EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, # MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. # See the Mulan PSL v2 for more details. -from sqlalchemy import Column, String, Integer, BigInteger, Float, Index +from sqlalchemy import Column, String, Integer, BigInteger, Float, Index, TEXT from .. import Base @@ -21,7 +21,7 @@ class SlowQueries(Base): slow_query_id = Column(Integer, primary_key=True, autoincrement=True) schema_name = Column(String(64), nullable=False) db_name = Column(String(64), nullable=False) - query = Column(String(1024), nullable=False) + query = Column(TEXT, nullable=False) template_id = Column(BigInteger) start_at = Column(BigInteger, nullable=False) duration_time = Column(Float, nullable=False) diff --git a/src/gausskernel/dbmind/tools/misc/grafana-template-slow-query-analysis.json b/src/gausskernel/dbmind/tools/misc/grafana-template-slow-query-analysis.json new file mode 100644 index 000000000..e8334d2a1 --- /dev/null +++ b/src/gausskernel/dbmind/tools/misc/grafana-template-slow-query-analysis.json @@ -0,0 +1,1476 @@ +{ + "__inputs": [ + { + "name": "DS_DBMIND-METADATABASE-OPENGAUSS", + "label": "DBMind-metadatabase-openGauss", + "description": "", + "type": "datasource", + "pluginId": "postgres", + "pluginName": "PostgreSQL" + } + ], + "__requires": [ + { + "type": "panel", + "id": "bargauge", + "name": "Bar gauge", + "version": "" + }, + { + "type": "panel", + "id": "gauge", + "name": "Gauge", + "version": "" + }, + { + "type": "grafana", + "id": "grafana", + "name": "Grafana", + "version": "8.2.3" + }, + { + "type": "panel", + "id": "piechart", + "name": "Pie chart", + "version": "" + }, + { + "type": "datasource", + "id": "postgres", + "name": "PostgreSQL", + "version": "1.0.0" + }, + { + "type": "panel", + "id": "stat", + "name": "Stat", + "version": "" + }, + { + "type": "panel", + "id": "table", + "name": "Table", + "version": "" + }, + { + "type": "panel", + "id": "timeseries", + "name": "Time series", + "version": "" + } + ], + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "target": { + "limit": 100, + "matchAny": false, + "tags": [], + "type": "dashboard" + }, + "type": "dashboard" + } + ] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "gnetId": null, + "graphTooltip": 0, + "id": null, + "links": [], + "liveNow": false, + "panels": [ + { + "datasource": "${DS_DBMIND-METADATABASE-OPENGAUSS}", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 20 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 6, + "w": 4, + "x": 0, + "y": 0 + }, + "id": 4, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "text": {}, + "textMode": "auto" + }, + "pluginVersion": "8.2.3", + "targets": [ + { + "format": "table", + "group": [], + "metricColumn": "none", + "rawQuery": true, + "rawSql": "SELECT pg_catalog.count(1) FROM (SELECT distinct schema_name, db_name, query FROM tb_slow_queries);", + "refId": "A", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "column" + } + ] + ], + "timeColumn": "time", + "where": [ + { + "name": "$__timeFilter", + "params": [], + "type": "macro" + } + ] + } + ], + "title": "Number of Unique Slow Queries", + "type": "stat" + }, + { + "datasource": "${DS_DBMIND-METADATABASE-OPENGAUSS}", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 1000 + } + ] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 6, + "w": 4, + "x": 4, + "y": 0 + }, + "id": 10, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "allValues" + ], + "fields": "/^threshold$/", + "values": true + }, + "text": {}, + "textMode": "auto" + }, + "pluginVersion": "8.2.3", + "targets": [ + { + "format": "table", + "group": [], + "hide": false, + "metricColumn": "none", + "rawQuery": true, + "rawSql": "SELECT setting::bigint as threshold FROM pg_settings WHERE name = 'log_min_duration_statement';", + "refId": "A", + "select": [ + [ + { + "params": [ + "setting" + ], + "type": "column" + } + ] + ], + "table": "pg_settings", + "timeColumn": "none", + "timeColumnType": "float8", + "where": [ + { + "name": "", + "params": [ + "name", + "=", + "log_min_duration_statement" + ], + "type": "expression" + } + ] + } + ], + "title": "Slow Query Threshold", + "type": "stat" + }, + { + "datasource": "${DS_DBMIND-METADATABASE-OPENGAUSS}", + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 1000 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 6, + "w": 4, + "x": 8, + "y": 0 + }, + "id": 12, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "/^avg$/", + "values": false + }, + "text": {}, + "textMode": "auto" + }, + "pluginVersion": "8.2.3", + "targets": [ + { + "format": "table", + "group": [], + "metricColumn": "none", + "rawQuery": true, + "rawSql": "select pg_catalog.avg(duration_time) from tb_slow_queries;", + "refId": "A", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "column" + } + ] + ], + "timeColumn": "time", + "where": [ + { + "name": "$__timeFilter", + "params": [], + "type": "macro" + } + ] + } + ], + "title": "Mean Duration for Slow Query", + "type": "stat" + }, + { + "datasource": "${DS_DBMIND-METADATABASE-OPENGAUSS}", + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + } + }, + "mappings": [] + }, + "overrides": [] + }, + "gridPos": { + "h": 6, + "w": 4, + "x": 12, + "y": 0 + }, + "id": 6, + "options": { + "displayLabels": [], + "legend": { + "displayMode": "list", + "placement": "bottom", + "values": [] + }, + "pieType": "pie", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "/^count$/", + "values": true + }, + "text": {}, + "tooltip": { + "mode": "single" + } + }, + "targets": [ + { + "format": "table", + "group": [], + "metricColumn": "none", + "rawQuery": true, + "rawSql": "SELECT db_name, pg_catalog.count(1) FROM tb_slow_queries GROUP BY db_name;", + "refId": "A", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "column" + } + ] + ], + "timeColumn": "time", + "where": [ + { + "name": "$__timeFilter", + "params": [], + "type": "macro" + } + ] + } + ], + "title": "Statistics For Database", + "type": "piechart" + }, + { + "datasource": "${DS_DBMIND-METADATABASE-OPENGAUSS}", + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + } + }, + "mappings": [] + }, + "overrides": [] + }, + "gridPos": { + "h": 6, + "w": 4, + "x": 16, + "y": 0 + }, + "id": 8, + "options": { + "displayLabels": [], + "legend": { + "displayMode": "list", + "placement": "bottom", + "values": [] + }, + "pieType": "pie", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "/^count$/", + "values": true + }, + "text": {}, + "tooltip": { + "mode": "single" + } + }, + "targets": [ + { + "format": "table", + "group": [], + "metricColumn": "none", + "rawQuery": true, + "rawSql": "SELECT schema_name, pg_catalog.count(1) FROM tb_slow_queries GROUP BY schema_name;", + "refId": "A", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "column" + } + ] + ], + "timeColumn": "time", + "where": [ + { + "name": "$__timeFilter", + "params": [], + "type": "macro" + } + ] + } + ], + "title": "Statistics For Schema", + "type": "piechart" + }, + { + "datasource": "${DS_DBMIND-METADATABASE-OPENGAUSS}", + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + } + }, + "mappings": [] + }, + "overrides": [] + }, + "gridPos": { + "h": 6, + "w": 4, + "x": 20, + "y": 0 + }, + "id": 14, + "options": { + "displayLabels": [], + "legend": { + "displayMode": "list", + "placement": "bottom", + "values": [] + }, + "pieType": "pie", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "text": {}, + "tooltip": { + "mode": "single" + } + }, + "targets": [ + { + "format": "table", + "group": [], + "metricColumn": "none", + "rawQuery": true, + "rawSql": "select (select pg_catalog.count(1) from tb_slow_queries where query like '%pg\\_%') as system_table, (select pg_catalog.count(1) from tb_slow_queries) - system_table as bussiness_table;", + "refId": "A", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "column" + } + ] + ], + "timeColumn": "time", + "where": [ + { + "name": "$__timeFilter", + "params": [], + "type": "macro" + } + ] + } + ], + "title": "System Table Rate In Slow Queries", + "type": "piechart" + }, + { + "datasource": "${DS_DBMIND-METADATABASE-OPENGAUSS}", + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 4, + "x": 0, + "y": 6 + }, + "id": 16, + "options": { + "displayMode": "gradient", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showUnfilled": true, + "text": {} + }, + "pluginVersion": "8.2.3", + "targets": [ + { + "format": "table", + "group": [], + "metricColumn": "none", + "rawQuery": true, + "rawSql": "select (select pg_catalog.count(1) from tb_slow_queries where query like '%select%') as SELECT, (select pg_catalog.count(1) from tb_slow_queries where query like '%delete%') as DELETE, (select pg_catalog.count(1) from tb_slow_queries where query like '%insert%') as INSERT, (select pg_catalog.count(1) from tb_slow_queries where query like '%update%') as UPDATE;\n", + "refId": "A", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "column" + } + ] + ], + "timeColumn": "time", + "where": [ + { + "name": "$__timeFilter", + "params": [], + "type": "macro" + } + ] + } + ], + "title": "Distribution of Slow Query", + "type": "bargauge" + }, + { + "datasource": "${DS_DBMIND-METADATABASE-OPENGAUSS}", + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + } + }, + "mappings": [] + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 14, + "x": 4, + "y": 6 + }, + "id": 30, + "options": { + "displayLabels": [], + "legend": { + "displayMode": "list", + "placement": "right", + "values": [] + }, + "pieType": "pie", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": true + }, + "text": {}, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "7.5.5", + "targets": [ + { + "format": "table", + "group": [], + "metricColumn": "none", + "rawQuery": true, + "rawSql": "WITH t_causes AS (SELECT trim(pg_catalog.split_part(pg_catalog.split_part(cause, ':', '2'), ',', '2')) AS title,\r\n trim(pg_catalog.split_part(pg_catalog.split_part(cause, ':', '2'), ',', '1'))::float AS prob\r\n FROM\r\n (SELECT pg_catalog.unnest(pg_catalog.regexp_split_to_array(root_cause, '\\n')) AS cause\r\n FROM tb_slow_queries))\r\nSELECT title,\r\n pg_catalog.sum(prob)\r\nFROM\r\n t_causes\r\nGROUP BY title \r\nhaving title is not null;", + "refId": "A", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "column" + } + ] + ], + "timeColumn": "time", + "where": [ + { + "name": "$__timeFilter", + "params": [], + "type": "macro" + } + ] + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Statistics for Root Cause", + "type": "piechart" + }, + { + "datasource": "${DS_DBMIND-METADATABASE-OPENGAUSS}", + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 5 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 3, + "x": 18, + "y": 6 + }, + "id": 18, + "options": { + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": true + }, + "showThresholdLabels": false, + "showThresholdMarkers": true, + "text": {} + }, + "pluginVersion": "8.2.3", + "targets": [ + { + "format": "table", + "group": [], + "metricColumn": "none", + "rawQuery": true, + "rawSql": "SELECT (pg_catalog.avg(cpu_time) / 1000 / 1000) FROM tb_slow_queries;\n", + "refId": "A", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "column" + } + ] + ], + "timeColumn": "time", + "where": [ + { + "name": "$__timeFilter", + "params": [], + "type": "macro" + } + ] + } + ], + "title": "Mean CPU Time ", + "type": "gauge" + }, + { + "datasource": "${DS_DBMIND-METADATABASE-OPENGAUSS}", + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 1 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 3, + "x": 21, + "y": 6 + }, + "id": 19, + "options": { + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": true + }, + "showThresholdLabels": false, + "showThresholdMarkers": true, + "text": {} + }, + "pluginVersion": "8.2.3", + "targets": [ + { + "format": "table", + "group": [], + "metricColumn": "none", + "rawQuery": true, + "rawSql": "SELECT (pg_catalog.avg(data_io_time) / 1000 / 1000) FROM tb_slow_queries;\n", + "refId": "A", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "column" + } + ] + ], + "timeColumn": "time", + "where": [ + { + "name": "$__timeFilter", + "params": [], + "type": "macro" + } + ] + } + ], + "title": "Mean IO Time ", + "type": "gauge" + }, + { + "datasource": "${DS_DBMIND-METADATABASE-OPENGAUSS}", + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "max": 100, + "min": 0, + "noValue": "-1", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "red", + "value": null + }, + { + "color": "green", + "value": 20 + } + ] + }, + "unit": "percent" + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 3, + "x": 18, + "y": 10 + }, + "id": 20, + "options": { + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": true + }, + "showThresholdLabels": false, + "showThresholdMarkers": true, + "text": {} + }, + "pluginVersion": "8.2.3", + "targets": [ + { + "format": "table", + "group": [], + "metricColumn": "none", + "rawQuery": true, + "rawSql": "SELECT (pg_catalog.avg(hit_rate) * 100) FROM tb_slow_queries;\n", + "refId": "A", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "column" + } + ] + ], + "timeColumn": "time", + "where": [ + { + "name": "$__timeFilter", + "params": [], + "type": "macro" + } + ] + } + ], + "title": "Mean Buffer Hit Rate", + "type": "gauge" + }, + { + "datasource": "${DS_DBMIND-METADATABASE-OPENGAUSS}", + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 200 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 3, + "x": 21, + "y": 10 + }, + "id": 21, + "options": { + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showThresholdLabels": false, + "showThresholdMarkers": true, + "text": {} + }, + "pluginVersion": "8.2.3", + "targets": [ + { + "format": "table", + "group": [], + "metricColumn": "none", + "rawQuery": true, + "rawSql": "SELECT (pg_catalog.avg(fetch_rate) * 100) FROM tb_slow_queries;\n", + "refId": "A", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "column" + } + ] + ], + "timeColumn": "time", + "where": [ + { + "name": "$__timeFilter", + "params": [], + "type": "macro" + } + ] + } + ], + "title": "Mean Fetch Rate", + "type": "gauge" + }, + { + "datasource": "${DS_DBMIND-METADATABASE-OPENGAUSS}", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "graph": false, + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "smooth", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 24, + "x": 0, + "y": 14 + }, + "id": 27, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "targets": [ + { + "format": "time_series", + "group": [], + "metricColumn": "none", + "rawQuery": true, + "rawSql": "select (start_at / 1000)::bigint as time, pg_catalog.count(1) from tb_slow_queries group by time order by time;\n", + "refId": "A", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "column" + } + ] + ], + "timeColumn": "time", + "where": [ + { + "name": "$__timeFilter", + "params": [], + "type": "macro" + } + ] + } + ], + "title": "Slow Query Trend", + "type": "timeseries" + }, + { + "datasource": "${DS_DBMIND-METADATABASE-OPENGAUSS}", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + } + }, + "mappings": [] + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 11, + "x": 0, + "y": 23 + }, + "id": 23, + "options": { + "displayLabels": [], + "legend": { + "displayMode": "list", + "placement": "bottom", + "values": [] + }, + "pieType": "pie", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "/^count$/", + "values": true + }, + "text": {}, + "tooltip": { + "mode": "single" + } + }, + "targets": [ + { + "format": "table", + "group": [], + "metricColumn": "none", + "rawQuery": true, + "rawSql": "select template_id::text, pg_catalog.count(1) from tb_slow_queries group by template_id;", + "refId": "A", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "column" + } + ] + ], + "timeColumn": "time", + "where": [ + { + "name": "$__timeFilter", + "params": [], + "type": "macro" + } + ] + } + ], + "title": "Statistics for Slow Query Template (template_id)", + "type": "piechart" + }, + { + "datasource": "${DS_DBMIND-METADATABASE-OPENGAUSS}", + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "left", + "displayMode": "auto", + "filterable": false + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "template_id" + }, + "properties": [ + { + "id": "custom.width", + "value": 163 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "count" + }, + "properties": [ + { + "id": "custom.width", + "value": 134 + } + ] + } + ] + }, + "gridPos": { + "h": 9, + "w": 13, + "x": 11, + "y": 23 + }, + "id": 25, + "options": { + "showHeader": true, + "sortBy": [] + }, + "pluginVersion": "8.2.3", + "targets": [ + { + "format": "table", + "group": [], + "metricColumn": "none", + "rawQuery": true, + "rawSql": "select distinct t1.template_id, t1.count, t2.query from (select template_id, pg_catalog.count(1) as count from tb_slow_queries group by template_id) t1 inner join tb_slow_queries t2 on t1.template_id = t2.template_id order by t1.count desc;", + "refId": "A", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "column" + } + ] + ], + "timeColumn": "time", + "where": [ + { + "name": "$__timeFilter", + "params": [], + "type": "macro" + } + ] + } + ], + "title": "Slow Query Templates", + "type": "table" + }, + { + "datasource": "${DS_DBMIND-METADATABASE-OPENGAUSS}", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "auto", + "displayMode": "auto" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 11, + "w": 24, + "x": 0, + "y": 32 + }, + "id": 34, + "options": { + "showHeader": true + }, + "pluginVersion": "8.2.3", + "targets": [ + { + "format": "table", + "group": [], + "metricColumn": "none", + "rawQuery": true, + "rawSql": "SELECT db_name, query, root_cause, suggestion, pg_catalog.count(query) FROM tb_slow_queries GROUP BY db_name, query, root_cause, suggestion;\n", + "refId": "A", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "column" + } + ] + ], + "timeColumn": "time", + "where": [ + { + "name": "$__timeFilter", + "params": [], + "type": "macro" + } + ] + } + ], + "title": "Slow Query Statistics", + "type": "table" + }, + { + "datasource": "${DS_DBMIND-METADATABASE-OPENGAUSS}", + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "auto", + "displayMode": "auto", + "filterable": true + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 19, + "w": 24, + "x": 0, + "y": 43 + }, + "id": 2, + "options": { + "showHeader": true, + "sortBy": [ + { + "desc": true, + "displayName": "start_at" + } + ] + }, + "pluginVersion": "8.2.3", + "targets": [ + { + "format": "table", + "group": [], + "metricColumn": "none", + "rawQuery": true, + "rawSql": "select * from tb_slow_queries order by start_at desc limit 50;", + "refId": "A", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "column" + } + ] + ], + "timeColumn": "time", + "where": [ + { + "name": "$__timeFilter", + "params": [], + "type": "macro" + } + ] + } + ], + "title": "Recent 50 Slow Queries", + "type": "table" + } + ], + "refresh": "", + "schemaVersion": 31, + "style": "dark", + "tags": [], + "templating": { + "list": [] + }, + "time": { + "from": "now-6h", + "to": "now" + }, + "timepicker": {}, + "timezone": "", + "title": "DBMind-slow-query-analysis", + "uid": "KowbonKnz", + "version": 42 +} + diff --git a/src/gausskernel/dbmind/tools/requirements-aarch64.txt b/src/gausskernel/dbmind/tools/requirements-aarch64.txt index caea1e5db..d2deee98a 100644 --- a/src/gausskernel/dbmind/tools/requirements-aarch64.txt +++ b/src/gausskernel/dbmind/tools/requirements-aarch64.txt @@ -17,4 +17,5 @@ pyyaml prometheus-client ## X-Tuner ## bayesian-optimization -ptable +prettytable>=2.5.0 + diff --git a/src/gausskernel/dbmind/tools/requirements-x86.txt b/src/gausskernel/dbmind/tools/requirements-x86.txt index 64b206871..b69dcab2d 100644 --- a/src/gausskernel/dbmind/tools/requirements-x86.txt +++ b/src/gausskernel/dbmind/tools/requirements-x86.txt @@ -15,4 +15,5 @@ pyyaml prometheus-client ## X-Tuner ## bayesian-optimization -ptable +prettytable>=2.5.0 + diff --git a/src/gausskernel/dbmind/tools/service/dai.py b/src/gausskernel/dbmind/tools/service/dai.py index 78df0e0f2..d828ea691 100644 --- a/src/gausskernel/dbmind/tools/service/dai.py +++ b/src/gausskernel/dbmind/tools/service/dai.py @@ -140,26 +140,28 @@ def get_latest_metric_value(metric_name): def save_forecast_sequence(metric_name, host, sequence): - dao.forecasting_metrics.batch_insert_forecasting_metric( - metric_name, host, sequence.values, sequence.timestamps, - metric_type=get_metric_type(metric_name), - node_id=None - ) + if sequence is not None: + dao.forecasting_metrics.batch_insert_forecasting_metric( + metric_name, host, sequence.values, sequence.timestamps, + metric_type=get_metric_type(metric_name), + node_id=None + ) def save_slow_queries(slow_queries): for slow_query in slow_queries: - dao.slow_queries.insert_slow_query( - schema_name=slow_query.schema_name, - db_name=slow_query.db_name, - query=slow_query.query, - start_at=slow_query.start_at, - duration_time=slow_query.duration_time, - hit_rate=slow_query.hit_rate, fetch_rate=slow_query.fetch_rate, - cpu_time=slow_query.cpu_time, data_io_time=slow_query.data_io_time, - root_cause=slow_query.root_causes, suggestion=slow_query.suggestions, - template_id=slow_query.template_id - ) + if slow_query is not None: + dao.slow_queries.insert_slow_query( + schema_name=slow_query.schema_name, + db_name=slow_query.db_name, + query=slow_query.query, + start_at=slow_query.start_at, + duration_time=slow_query.duration_time, + hit_rate=slow_query.hit_rate, fetch_rate=slow_query.fetch_rate, + cpu_time=slow_query.cpu_time, data_io_time=slow_query.data_io_time, + root_cause=slow_query.root_causes, suggestion=slow_query.suggestions, + template_id=slow_query.template_id + ) def get_all_slow_queries(minutes):