!1883 Fix #I5DL5V: Fix some known bugs of DBMind

Merge pull request !1883 from wangtq/master
This commit is contained in:
opengauss-bot
2022-06-22 12:10:45 +00:00
committed by Gitee
42 changed files with 2997 additions and 791 deletions

View File

@ -292,9 +292,11 @@ static void show_version(int alls)
if (alls) {
printf("VERSION = ");
}
#ifdef ENABLE_MULTIPLE_NODES
printf("PostgreSQL 9.2.4\n");
#else
#ifndef ENABLE_MULTIPLE_NODES
if (alls) {
printf("OPENGAUSS_VERSION = ");
}
printf("openGauss 3.0.0\n");
#endif
}

Binary file not shown.

Before

Width:  |  Height:  |  Size: 26 KiB

View File

@ -12,10 +12,19 @@
# See the Mulan PSL v2 for more details.
import logging
from dbmind.common.platform import LINUX
from dbmind.common.types.root_cause import RootCause
from .slow_sql.analyzer import SlowSQLAnalyzer
_analyzer = SlowSQLAnalyzer()
if LINUX:
from dbmind.common.dispatcher.task_worker import get_mp_sync_manager
shared_sql_buffer = get_mp_sync_manager().list()
else:
shared_sql_buffer = None
_analyzer = SlowSQLAnalyzer(buffer=shared_sql_buffer)
def diagnose_query(slow_query):
@ -25,3 +34,4 @@ def diagnose_query(slow_query):
slow_query.add_cause(RootCause.get('LACK_INFORMATION'))
logging.exception(e)
return slow_query

View File

@ -93,13 +93,13 @@ class SlowSQLAnalyzer:
Classes for diagnosing slow SQL
"""
def __init__(self, topk: int = 3, buffer_capacity: int = 500):
def __init__(self, topk: int = 3, buffer_capacity: int = 500, buffer=None):
"""
:param topk: The number of output root causes
:param buffer_capacity: The length of slow SQL buffer queue
"""
self.topk = topk
self.sql_buffers = []
self.sql_buffers = buffer if buffer is not None else []
self.buffer_capacity = buffer_capacity
def run(self, slow_query_instance: SlowQuery) -> [SlowQuery, None]:
@ -128,13 +128,37 @@ class SlowSQLAnalyzer:
self.sql_buffers.append(diagnosed_flag)
return False
@staticmethod
def associate_table_with_schema(schema_infos: Dict, query: str, schema_name: str, exist_tables: Dict):
"""
Find schema and table in query, there are the following three situations:
1. schema.table: We can match out table information based on regularity.
2. find table information based on pg_class if SLOW_SQL's schema in pg_class(schema_infos).
3. if the second step is not found, then find out possible table information from pg_class(schema_infos).
"""
regex_result = re.findall(r"([\w\d_]+)\.([\w\d_]+)", query)
if regex_result:
for schema, table in regex_result:
exist_tables[schema].append(table)
query.replace("%s.%s" % (schema, table), ' ')
if schema_name in schema_infos:
for table in schema_infos[schema_name]:
if table in query:
exist_tables[schema_name].append(table)
else:
for schema, tables in schema_infos.items():
for table in tables:
if table.upper() in query.upper():
exist_tables[schema].append(table)
return
def _analyze(self, slow_sql_instance: SlowQuery, data_factory: query_info_source.QueryContext,
schema_infos: Dict) -> [SlowQuery,
None]:
"""Slow SQL diagnosis main process"""
logging.debug(f"[SLOW QUERY] Diagnosing SQL: {slow_sql_instance.query}")
exist_tables = defaultdict(list)
if slow_sql_instance.query.upper() == 'COMMIT' or slow_sql_instance.query.upper().startswith('SET'):
if slow_sql_instance.query.strip().upper() == 'COMMIT' or slow_sql_instance.query.strip().upper().startswith('SET'):
title = FEATURES_CAUSE_MAPPER.get('C_UNKNOWN')
root_cause = RootCause.get(title)
slow_sql_instance.add_cause(root_cause)
@ -147,16 +171,8 @@ class SlowSQLAnalyzer:
root_cause = RootCause.get(FEATURES_CAUSE_MAPPER.get('C_SQL'))
slow_sql_instance.add_cause(root_cause)
return
if schema_infos:
query = slow_sql_instance.query
regex_result = re.findall(r"([\w\d_]+)\.([\w\d_]+)", slow_sql_instance.query)
if regex_result:
for schema, table in regex_result:
exist_tables[schema].append(table)
query.replace("%s.%s" % (schema, table), ' ')
for table in schema_infos[slow_sql_instance.schema_name]:
if table in query:
exist_tables[slow_sql_instance.schema_name].append(table)
query = slow_sql_instance.query
self.associate_table_with_schema(schema_infos, query, slow_sql_instance.schema_name, exist_tables)
slow_sql_instance.tables_name = exist_tables
feature_generator = QueryFeature(slow_sql_instance, data_factory)
feature_generator.initialize_metrics()

View File

@ -150,7 +150,9 @@ class QueryFeature:
return False
live_tuples_list = {f"{item.schema_name}:{item.table_name}": item.live_tuples for item in
self.table_structure}
if (fetched_tuples + returned_tuples) / max(live_tuples_list.values()) > _get_threshold('fetch_rate_limit'):
if (fetched_tuples + returned_tuples) / max(live_tuples_list.values()) > _get_threshold(
'fetch_rate_limit') or (fetched_tuples + returned_tuples) > _get_threshold(
'fetch_tuples_limit'):
self.detail['fetched_tuples'] = fetched_tuples + returned_tuples
self.detail['fetched_tuples_rate'] = round(
(fetched_tuples + returned_tuples) / max(live_tuples_list.values()), 4)
@ -171,7 +173,7 @@ class QueryFeature:
live_tuples_list = {f"{item.schema_name}:{item.table_name}": item.live_tuples for item in
self.table_structure}
if returned_rows / max(live_tuples_list.values()) > _get_threshold(
'returned_rate_limit'):
'returned_rate_limit') or returned_rows > _get_threshold('returned_rows_limit'):
self.detail['returned_rows'] = returned_rows
self.detail['returned_rows_rate'] = round(returned_rows / max(live_tuples_list.values()), 4)
return True
@ -231,7 +233,7 @@ class QueryFeature:
live_tuples_list = {f"{item.schema_name}:{item.table_name}": item.live_tuples for item in
self.table_structure}
if updated_tuples / max(live_tuples_list.values()) > _get_threshold(
'updated_rate_limit'):
'updated_rate_limit') or updated_tuples > _get_threshold('updated_tuples_limit'):
self.detail['updated_tuples'] = updated_tuples
self.detail['updated_tuples_rate'] = round(updated_tuples / max(live_tuples_list.values()), 4)
return True
@ -251,7 +253,7 @@ class QueryFeature:
live_tuples_list = {f"{item.schema_name}:{item.table_name}": item.live_tuples for item in
self.table_structure}
if inserted_tuples / max(live_tuples_list.values()) > _get_threshold(
'inserted_rate_limit'):
'inserted_rate_limit') or inserted_tuples > _get_threshold('inserted_tuples_limit'):
self.detail['inserted_tuples'] = inserted_tuples
self.detail['inserted_tuples_rate'] = round(inserted_tuples / max(live_tuples_list.values()), 4)
return True
@ -291,7 +293,7 @@ class QueryFeature:
live_tuples_list = {f"{item.schema_name}:{item.table_name}": item.live_tuples for item in
self.table_structure}
if deleted_tuples / max(live_tuples_list.values()) > _get_threshold(
'deleted_rate_limit'):
'deleted_rate_limit') or deleted_tuples > _get_threshold('deleted_tuples_limit'):
self.detail['deleted_tuples'] = deleted_tuples
self.detail['deleted_tuples_rate'] = round(deleted_tuples / max(live_tuples_list.values()), 4)
return True

View File

@ -18,7 +18,7 @@ from dbmind.common.parser.sql_parsing import is_num, str2int
from dbmind.common.utils import ExceptionCatch
from dbmind.service import dai
excetpion_catcher = ExceptionCatch(strategy='exit', name='SLOW QUERY')
exception_catcher = ExceptionCatch(strategy='warn', name='SLOW QUERY')
class TableStructure:
@ -74,7 +74,7 @@ class SystemInfo:
def __init__(self):
self.db_host = None
self.db_port = None
self.iops = 0.0
self.iops = 0
self.ioutils = {}
self.iocapacity = 0.0
self.iowait = 0.0
@ -86,7 +86,7 @@ class SystemInfo:
class QueryContext:
"""The object of slow query data processing factory"""
def __init__(self, slow_sql_instance, default_fetch_interval=15, expansion_factor=5,
def __init__(self, slow_sql_instance, default_fetch_interval=15, expansion_factor=8,
retrieval_time=5):
"""
:param slow_sql_instance: The instance of slow query
@ -108,13 +108,13 @@ class QueryContext:
logging.debug('[SLOW QUERY] fetch start time: %s, fetch end time: %s', self.query_start_time, self.query_end_time)
logging.debug('[SLOW QUERY] fetch interval: %s', self.fetch_interval)
@excetpion_catcher
@exception_catcher
def acquire_pg_class(self) -> Dict:
"""Get all object information in the database"""
pg_class = {}
sequences = dai.get_metric_sequence('pg_class_relsize', self.query_start_time, self.query_end_time).from_server(
f"{self.slow_sql_instance.db_host}:{self.slow_sql_instance.db_port}").fetchall()
sequences = [sequence for sequence in sequences if sequence.labels]
for sequence in sequences:
pg_class['db_host'] = self.slow_sql_instance.db_host
pg_class['db_port'] = self.slow_sql_instance.db_port
@ -132,7 +132,7 @@ class QueryContext:
pg_class[db_name][schema_name].append(table_name)
return pg_class
@excetpion_catcher
@exception_catcher
def acquire_fetch_interval(self) -> int:
"""Get data source collection frequency"""
sequence = dai.get_latest_metric_sequence("os_disk_iops", self.retrieval_time).from_server(
@ -143,7 +143,7 @@ class QueryContext:
self.fetch_interval = int(timestamps[-1]) // 1000 - int(timestamps[-2]) // 1000
return self.fetch_interval
@excetpion_catcher
@exception_catcher
def acquire_lock_info(self) -> LockInfo:
"""Get lock information during slow SQL execution"""
blocks_info = LockInfo()
@ -152,6 +152,7 @@ class QueryContext:
f"{self.slow_sql_instance.db_host}:{self.slow_sql_instance.db_port}").fetchall()
logging.debug('[SLOW QUERY] acquire_lock_info: %s.', locks_sequences)
locked_query, locked_query_start, locker_query, locker_query_start = [], [], [], []
locks_sequences = [sequence for sequence in locks_sequences if sequence.labels]
for locks_sequence in locks_sequences:
logging.debug('[SLOW QUERY] acquire_lock_info: %s.', locks_sequence)
locked_query.append(locks_sequence.labels.get('locked_query', 'Unknown'))
@ -165,7 +166,7 @@ class QueryContext:
return blocks_info
@excetpion_catcher
@exception_catcher
def acquire_tables_structure_info(self) -> List:
"""Acquire table structure information related to slow query"""
table_structure = []
@ -224,12 +225,12 @@ class QueryContext:
if index_number_info:
table_info.index = [item.labels['relname'] for item in index_number_info if item.labels]
if redundant_index_info:
table_info.redundant_index = [item.labels['indexrelname'] for item in redundant_index_info]
table_info.redundant_index = [item.labels['indexrelname'] for item in redundant_index_info if item.labels]
table_structure.append(table_info)
return table_structure
@excetpion_catcher
@exception_catcher
def acquire_database_info(self) -> DatabaseInfo:
"""Acquire table database information related to slow query"""
database_info = DatabaseInfo()
@ -262,7 +263,7 @@ class QueryContext:
return database_info
@excetpion_catcher
@exception_catcher
def acquire_system_info(self) -> SystemInfo:
"""Acquire system information on the database server """
system_info = SystemInfo()
@ -288,16 +289,23 @@ class QueryContext:
self.query_end_time).from_server(
f"{self.slow_sql_instance.db_host}").fetchone()
logging.debug('[SLOW QUERY] acquire_database_info[mem_usage]: %s.', mem_usage_info)
load_average_info = dai.get_metric_sequence("node_load1", self.query_start_time, self.query_end_time).filter(
instance=f"{self.slow_sql_instance.db_host}:9100").fetchone()
load_average_info = dai.get_metric_sequence("load_average", self.query_start_time, self.query_end_time).from_server(
f"{self.slow_sql_instance.db_host}").fetchone()
logging.debug('[SLOW QUERY] acquire_database_info[load_average]: %s.', load_average_info)
system_info.iops = int(max(iops_info.values))
ioutils_dict = {item.labels['device']: round(float(max(item.values)), 4) for item in ioutils_info}
system_info.ioutils = ioutils_dict
system_info.iocapacity = round(float(max(iocapacity_info.values)), 4)
system_info.iowait = round(float(max(iowait_info.values)), 4)
system_info.cpu_usage = round(float(max(cpu_usage_info.values)), 4)
system_info.mem_usage = round(float(max(mem_usage_info.values)), 4)
system_info.load_average = round(float(max(load_average_info.values)), 4)
if iops_info.values:
system_info.iops = int(max(iops_info.values))
if ioutils_info:
ioutils_dict = {item.labels['device']: round(float(max(item.values)), 4) for item in ioutils_info if item.labels}
system_info.ioutils = ioutils_dict
if iocapacity_info.values:
system_info.iocapacity = round(float(max(iocapacity_info.values)), 4)
if iowait_info.values:
system_info.iowait = round(float(max(iowait_info.values)), 4)
if cpu_usage_info.values:
system_info.cpu_usage = round(float(max(cpu_usage_info.values)), 4)
if mem_usage_info.values:
system_info.mem_usage = round(float(max(mem_usage_info.values)), 4)
if load_average_info.values:
system_info.load_average = round(float(max(load_average_info.values)), 4)
return system_info

View File

@ -11,6 +11,7 @@
# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
# See the Mulan PSL v2 for more details.
import logging
from datetime import timedelta, datetime
from dbmind import constants
from dbmind import global_vars
@ -44,30 +45,6 @@ golden_kpi = list(map(
).split(',')
))
def quickly_forecast_wrapper(sequence, forecasting_minutes):
forecast_result = quickly_forecast(sequence, forecasting_minutes)
metric_value_range = metric_value_range_map.get(sequence.name)
if metric_value_range and forecast_result:
metric_value_range = metric_value_range.split(",")
try:
metric_value_low = float(metric_value_range[0])
metric_value_high = float(metric_value_range[1])
except ValueError as ex:
logging.warning("quickly_forecast_wrapper value error:%s,"
" so forecast_result will not be cliped." % ex)
return forecast_result
f_values = list(forecast_result.values)
for i in range(len(f_values)):
if f_values[i] < metric_value_low:
f_values[i] = metric_value_low
if f_values[i] > metric_value_high:
f_values[i] = metric_value_high
forecast_result.values = tuple(f_values)
return forecast_result
@timer(detection_interval)
def self_monitoring():
# diagnose for slow queries
@ -96,10 +73,19 @@ def forecast_kpi():
)
return
start = datetime.now() - timedelta(minutes=enough_history_minutes)
end = datetime.now()
for metric in golden_kpi:
last_sequences = dai.get_latest_metric_sequence(metric, enough_history_minutes).fetchall()
last_sequences = dai.get_metric_sequence(metric, start, end).fetchall()
try:
metric_value_range = global_vars.metric_value_range_map.get(metric)
lower, upper = map(float, metric_value_range.split(','))
except Exception:
lower, upper = 0, float("inf")
future_sequences = global_vars.worker.parallel_execute(
quickly_forecast_wrapper, ((sequence, how_long_to_forecast_minutes)
quickly_forecast, ((sequence, how_long_to_forecast_minutes, lower, upper)
for sequence in last_sequences)
)
detect_materials = list()

View File

@ -17,6 +17,7 @@ from configparser import NoSectionError, NoOptionError
from dbmind import constants
from dbmind.common import security
from dbmind.common.exceptions import InvalidPasswordException, ConfigSettingError
from dbmind.common.security import check_ip_valid, check_port_valid
from dbmind.common.utils import write_to_terminal
from dbmind.metadatabase.dao.dynamic_config import dynamic_config_get, dynamic_config_set
@ -55,34 +56,54 @@ CONFIG_OPTIONS = {
'LOG-level': ['DEBUG', 'INFO', 'WARNING', 'ERROR']
}
# Used by check_config_validity().
INTEGER_CONFIG = ['SELF-MONITORING-detection_interval',
'SELF-MONITORING-last_detection_time',
'SELF-MONITORING-forecasting_future_time',
'LOG-maxbytes',
'LOG-backupcount']
def check_config_validity(section, option, value):
def check_config_validity(section, option, value, silent=False):
config_item = '%s-%s' % (section, option)
# exceptional cases:
if config_item == 'METADATABASE-port':
return True, None
if config_item in ('METADATABASE-port', 'METADATABASE-host'):
if value.strip() == '' or value == NULL_TYPE:
return True, None
# normal inspection process:
if 'port' in option:
valid_port = str.isdigit(value) and 0 < int(value) <= 65535
valid_port = check_port_valid(value)
if not valid_port:
return False, 'Invalid port %s' % value
return False, 'Invalid port for %s: %s(1024-65535)' % (config_item, value)
if 'host' in option:
valid_host = check_ip_valid(value)
if not valid_host:
return False, 'Invalid IP Address for %s: %s' % (config_item, value)
if 'database' in option:
if value == NULL_TYPE or value.strip() == '':
return False, 'Unspecified database name'
if config_item in INTEGER_CONFIG:
if not str.isdigit(value) or int(value) <= 0:
return False, 'Invalid value for %s: %s' % (config_item, value)
options = CONFIG_OPTIONS.get(config_item)
if options and value not in options:
return False, 'Invalid choice: %s' % value
return False, 'Invalid choice for %s: %s' % (config_item, value)
if 'dbtype' in option and value == 'opengauss':
if 'dbtype' in option and value == 'opengauss' and not silent:
write_to_terminal(
'WARN: default PostgreSQL connector (psycopg2-binary) does not support openGauss.\n'
'It would help if you compiled psycopg2 with openGauss manually or '
'created a connection user after setting the GUC password_encryption_type to 1.',
color='yellow'
)
if 'dbtype' in option and value == 'sqlite' and not silent:
write_to_terminal(
'NOTE: SQLite currently only supports local deployment, so you only need to provide '
'METADATABASE-database information. if you provide other information, DBMind will '
'ignore them.',
color='yellow'
)
# Add more checks here.
return True, None
@ -115,11 +136,28 @@ def load_sys_configs(confile):
s2 = dynamic_config_get('dbmind_config', 'cipher_s2')
iv = dynamic_config_get('iv_table', '%s-%s' % (section, option))
try:
value = security.decrypt(s1, s2, iv, value.lstrip(ENCRYPTED_SIGNAL))
real_value = value[len(ENCRYPTED_SIGNAL):] if value.startswith(ENCRYPTED_SIGNAL) else value
value = security.decrypt(s1, s2, iv, real_value)
except Exception as e:
raise InvalidPasswordException(e)
else:
valid, reason = check_config_validity(section, option, value, silent=True)
if not valid:
raise ConfigSettingError('DBMind failed to start due to %s.' % reason)
return value
@staticmethod
def getint(section, option, *args, **kwargs):
"""Faked getint() for ConfigParser class."""
value = configs.get(section, option, *args, **kwargs)
valid, reason = check_config_validity(section, option, value, silent=True)
if not valid:
raise ConfigSettingError('DBMind failed to start due to %s.' % reason)
return int(value)
return ConfigWrapper()

View File

@ -23,7 +23,7 @@ from dbmind.cmd.config_utils import (
)
from dbmind.cmd.edbmind import SKIP_LIST
from dbmind.common import utils, security
from dbmind.common.exceptions import SetupError, SQLExecutionError
from dbmind.common.exceptions import SetupError, SQLExecutionError, DuplicateTableError
from dbmind.metadatabase import (
create_dynamic_config_schema,
create_metadatabase_schema,
@ -103,7 +103,8 @@ def initialize_and_check_config(confpath, interactive=False):
utils.write_to_terminal('Starting to connect to meta-database and create tables...', color='green')
try:
create_metadatabase_schema(check_first=False)
except SQLExecutionError:
utils.write_to_terminal('The setup process finished successfully.', color='green')
except DuplicateTableError:
utils.write_to_terminal('The given database has duplicate tables. '
'If you want to reinitialize the database, press [R]. '
'If you want to keep the existent tables, press [K].', color='red')
@ -117,7 +118,10 @@ def initialize_and_check_config(confpath, interactive=False):
create_metadatabase_schema(check_first=True)
if input_char == 'K':
utils.write_to_terminal('Ignoring...', color='green')
utils.write_to_terminal('The setup process finished successfully.', color='green')
utils.write_to_terminal('The setup process finished successfully.', color='green')
except SQLExecutionError:
utils.write_to_terminal('Failed to link metadatabase due to unknown error, '
'please check the database and its configuration.', color='red')
def setup_directory_interactive(confpath):

View File

@ -34,7 +34,7 @@ def binary_search(L, target):
return -1
def how_many_lesser_elements(L, target):
def binary_search_leftmost(L, target):
"""The function bases on finding the leftmost element with binary search.
About Binary Search
@ -68,7 +68,18 @@ def how_many_lesser_elements(L, target):
return lo
def how_many_larger_elements(L, target):
def binary_search_left(L, target):
"""Wrap the function ``how_many_lesser_elements(L, target)`` by adding
a check for return target.
:return -1 when not found the target target.
"""
lo = binary_search_leftmost(L, target)
return -1 if lo >= len(L) or L[lo] != target else lo
def binary_search_rightmost(L, target):
"""Similar to above function."""
if len(L) == 0:
return -1
# [0, length - 1]
@ -86,18 +97,16 @@ def how_many_larger_elements(L, target):
return hi
def binary_search_left(L, target):
"""Wrap the function ``how_many_lesser_elements(L, target)`` by adding
a check for return target.
:return -1 when not found the target target.
"""
lo = how_many_lesser_elements(L, target)
return -1 if lo >= len(L) or L[lo] != target else lo
def binary_search_right(L, target):
"""Similar to above function."""
hi = how_many_larger_elements(L, target)
hi = binary_search_rightmost(L, target)
return -1 if hi < 0 or L[hi] != target else hi
how_many_lesser_elements = binary_search_leftmost
def how_many_larger_elements(L, target):
right_most = binary_search_right(L, target)
if right_most >= 0:
return len(L) - 1 - right_most
return len(L) - binary_search_leftmost(L, target)

View File

@ -10,16 +10,34 @@
# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
# See the Mulan PSL v2 for more details.
"""\
Some snippets and implementations refer to Python library statsmodels (BSD-3 license).
But we cannot import the library directly because this library includes
so many statistical modeling and econometric algorithms that we never use.
On the other hand, this library introduces other heavy dependencies, leading to lightweight DBMind loss.
"""
import itertools
from types import SimpleNamespace
import numpy as np
import logging
import time
import numpy as np
from numpy import dot, log, zeros, pi
from scipy import optimize
from scipy import signal
from scipy.signal import lfilter
from .arima_common import lagmat, OLS
from ..forcasting_algorithm import ForecastingAlgorithm
MAX_AR_ORDER = 5
MAX_MA_ORDER = 5
K_AR_MIN = K_DIFF_MIN = K_MA_MIN = 0
K_DIFF_MAX = 2
MIN_DATA_LENGTH = max(MAX_AR_ORDER, MAX_MA_ORDER)
class InvalidParameter(Exception):
pass
def _ar_transparams(params):
@ -76,7 +94,6 @@ def _ma_transparams(params):
def _ma_invtransparams(macoefs):
"""
return the inverse of the ma params.
:param params: type->np.array
:return invmacoefs: type->np.array
"""
tmp = macoefs.copy()
@ -91,7 +108,7 @@ def _ma_invtransparams(macoefs):
class DummyArray:
""" support __array_interface__ and base"""
"""support __array_interface__ and base"""
def __init__(self, interface, base=None):
self.__array_interface__ = interface
@ -182,15 +199,17 @@ def yule_walker(x_raw, order=1):
adj_needed = method == "adjusted"
if x_raw.ndim > 1 and x_raw.shape[1] != 1:
raise ValueError("expecting a vector to estimate ar parameters")
raise InvalidParameter("expecting a vector to estimate ar parameters")
r_raw = np.zeros(order + 1, np.float64)
r_raw[0] = (x_raw ** 2).sum() / num
for k in range(1, order + 1):
r_raw[k] = (x_raw[0:-k] * x_raw[k:]).sum() / (num - k * adj_needed)
r_tope = _toeplitz(r_raw[:-1])
rho = np.linalg.solve(r_tope, r_raw[1:])
return rho
try:
rho = np.linalg.solve(r_tope, r_raw[1:])
return rho
except np.linalg.LinAlgError as e:
raise InvalidParameter(e)
def _arma_impulse_response(new_ar_coeffs, new_ma_coeffs, leads=100):
@ -435,12 +454,15 @@ def _compute_start_ar_ma_coeffs(k_ar, k_ma, y_raw):
_x_mat, _y_mat = lagmat(y_raw, ar_order, original="sep")
_y_mat = _y_mat[ar_order:]
_x_mat = _x_mat[ar_order:]
ols_mod = OLS(_y_mat, _x_mat)
try:
ols_mod = OLS(_y_mat, _x_mat)
except ValueError as e:
raise InvalidParameter(e)
ols_res = ols_mod.fit()
arcoefs_tmp = ols_res
if ar_order + k_ma >= len(y_raw):
raise ValueError("start ar order is not valid")
raise InvalidParameter("start ar order is not valid")
lag_endog, lag_resid = _get_lag_data_and_resid(y_raw,
ar_order,
@ -480,21 +502,14 @@ def _get_errors(params, raw_data, order):
return errors
class ARIMA:
"""ARIMA model can forecast series according to history series"""
class ARIMA(ForecastingAlgorithm):
"""ARIMA model can forecast series according to historical series"""
def __init__(self, y_raw, order):
"""
:param y_raw: type->np.array
:param order: type->tuple
"""
k_ar, k_diff, k_ma = order
self.order = SimpleNamespace(k_ar=k_ar, k_diff=k_diff, k_ma=k_ma)
y_raw = np.asarray(y_raw) if isinstance(y_raw, (list, tuple)) else y_raw
y_fit = np.diff(y_raw, n=k_diff)
x_fit = np.ones((len(y_fit), 1))
self.raw_data = SimpleNamespace(x=x_fit, y=y_fit, raw_y=y_raw, k_trend=1)
self.nobs = len(y_fit) - k_ar
def __init__(self):
self.order = None
self.given_data = None
self.once_data = None
self.nobs = None
self.is_transparams = True
self.resid = None
self.params = None
@ -508,8 +523,8 @@ class ARIMA:
k_ar, k_ma, k_trend = order
start_params = zeros((k_ar + k_ma + k_trend))
y_raw = np.array(self.raw_data.y, np.float64)
x_raw = self.raw_data.x
y_raw = np.array(self.once_data.y, np.float64)
x_raw = self.once_data.x
if k_trend != 0:
ols_params = OLS(y_raw, x_raw).fit()
start_params[:k_trend] = ols_params
@ -529,10 +544,10 @@ class ARIMA:
if k_ar and not np.all(np.abs(np.roots(np.r_[1, -start_params[k_trend:k_trend + k_ar]]
)) < 1):
raise ValueError("the ar start coeffs is invalid")
raise InvalidParameter("the ar start coeffs %s is invalid" % k_ar)
if k_ma and not np.all(np.abs(np.roots(np.r_[1, start_params[k_trend + k_ar:]]
)) < 1):
raise ValueError("the ma start coeffs is invalid")
raise InvalidParameter("the ma start coeffs %s is invalid." % k_ma)
return self._invtransparams(start_params)
@ -547,7 +562,7 @@ class ARIMA:
newparams = self._transparams(params)
else:
newparams = params
errors = _get_errors(newparams, self.raw_data, self.order)
errors = _get_errors(newparams, self.once_data, self.order)
ssr = np.dot(errors, errors)
sigma2 = ssr / nobs
@ -561,7 +576,7 @@ class ARIMA:
:return newparams: type->np.array
"""
k_ar, k_ma = self.order.k_ar, self.order.k_ma
k = self.raw_data.k_trend
k = self.once_data.k_trend
newparams = np.zeros_like(params)
if k != 0:
@ -581,7 +596,7 @@ class ARIMA:
:return newparams: type->np.array
"""
k_ar, k_ma = self.order.k_ar, self.order.k_ma
k = self.raw_data.k_trend
k = self.once_data.k_trend
newparams = start_params.copy()
arcoefs = newparams[k:k + k_ar]
macoefs = newparams[k + k_ar:]
@ -602,16 +617,61 @@ class ARIMA:
def bic(self):
"""the BIC is for optimal parameters:(p d q)"""
nobs = self.nobs
df_model = self.raw_data.k_trend + self.order.k_ar + self.order.k_ma
df_model = self.once_data.k_trend + self.order.k_ar + self.order.k_ma
return -2 * self.llf + np.log(nobs) * (df_model + 1)
def fit(self, sequence=None):
def fit(self, sequence):
self.given_data = np.array(sequence.values).astype('float32')
min_bic = np.inf
optimal_ar = optimal_ma = 0
diff_data = np.diff(self.given_data)
# Look for the optimal parameters.
for k_ar, k_diff, k_ma in \
itertools.product(range(K_AR_MIN, MAX_AR_ORDER + 1, 2),
range(K_DIFF_MIN, K_DIFF_MAX + 1),
range(K_MA_MIN, MAX_MA_ORDER + 1, 2)):
if k_ar == 0 and k_diff == 0 and k_ma == 0:
continue
try:
self.is_transparams = True
self.fit_once(diff_data, k_ar, k_diff, k_ma)
if not np.isnan(self.bic) and self.bic < min_bic:
min_bic = self.bic
optimal_ar = k_ar
optimal_ma = k_ma
except InvalidParameter:
"""Ignore while InvalidParameter occurred."""
self.is_transparams = True
try:
self.fit_once(self.given_data, optimal_ar, 1, optimal_ma)
except InvalidParameter:
logging.warning('[ARIMA] Found invalid parameters for forecasting metric %s: ar %d, diff 1, ma %d.',
sequence.name, optimal_ar, optimal_ma, exc_info=True)
self.is_transparams = True
self.fit_once(self.given_data, 2, 1, 0)
def fit_once(self, y_raw, k_ar, k_diff, k_ma):
"""
fit trend_coeffs, ar_coeffs, ma_coeffs for ARIMA model.
:return None
"""
k = self.raw_data.k_trend
nobs = self.raw_data.y.shape[0]
"""
:param y_raw: type->np.array
:param order: type->tuple
"""
self.order = SimpleNamespace(k_ar=k_ar, k_diff=k_diff, k_ma=k_ma)
y_fit = np.array(np.diff(y_raw, n=k_diff))
x_fit = np.ones((len(y_fit), 1))
self.once_data = SimpleNamespace(x=x_fit, y=y_fit, raw_y=y_raw, k_trend=1)
self.nobs = len(y_fit) - k_ar
self.params = None
k = self.once_data.k_trend
nobs = self.once_data.y.shape[0]
start_params = self._fit_start_coeffs((self.order.k_ar, self.order.k_ma, k))
def loglike(params, *args):
@ -633,20 +693,14 @@ class ARIMA:
:param steps: type->int
:return forecast: type->np.array
"""
ctime = int(time.time())
logging.debug("[ARIMA:forecast:%s]: steps:%s, order:%s, coeffs:%s" %
(ctime, steps, self.order, self.params))
logging.debug("[ARIMA:forecast:%s]: raw_data:%s" % (ctime, self.raw_data.y))
self.resid = _get_errors(self.params, self.raw_data, self.order).squeeze()
self.resid = _get_errors(self.params, self.once_data, self.order).squeeze()
forecast = _arma_predict_out_of_sample(self.params, steps, self.resid,
self.order, self.raw_data)
self.order, self.once_data)
forecast = unintegrate(
forecast,
unintegrate_levels(
self.raw_data.raw_y[-self.order.k_diff:],
self.once_data.raw_y[-self.order.k_diff:],
self.order.k_diff
)
)[self.order.k_diff:]
logging.debug("[ARIMA:forecast:%s]: forecast result: %s" % (ctime, forecast))
return forecast

View File

@ -91,8 +91,6 @@ def lagmat(x_raw, maxlag, trim='forward', original='ex'):
lags = lmat[startobs:stopobs, dropidx:]
if original == 'sep':
leads = lmat[startobs:stopobs, :dropidx]
if original == 'sep':
return lags, leads
return lags
@ -129,7 +127,7 @@ def get_k_constant(x_raw):
return k_constant
class OLS():
class OLS:
"""The OLS can compute linear correlation coefficient about x and y"""
def __init__(self, y_raw, x_raw):
self._x = np.asarray(x_raw)

View File

@ -12,53 +12,19 @@
# See the Mulan PSL v2 for more details.
import logging
import numpy as np
import itertools
from types import SimpleNamespace
from ...types import Sequence
from ..statistics import sequence_interpolate, trim_head_and_tail_nan
import threading
from typing import Union, List
import numpy as np
from .. import seasonal as seasonal_interface
from ..stat_utils import sequence_interpolate, trim_head_and_tail_nan
from ...types import Sequence
from dbmind.common.utils import dbmind_assert
MAX_AR_ORDER = 5
MAX_MA_ORDER = 5
MIN_DATA_LENGTH = max(MAX_AR_ORDER, MAX_MA_ORDER)
def estimate_order_of_model_parameters(raw_data, k_ar_min=0, k_diff_min=0,
k_ma_min=0, k_diff_max=0):
"""return model type and model order"""
diff_data = np.diff(raw_data)
algorithm_name = "linear"
k_ar_valid, k_ma_valid = 0, 0
min_bic = np.inf
bic_result_list = []
for k_ar, k_diff, k_ma in \
itertools.product(range(k_ar_min, MAX_AR_ORDER + 1),
range(k_diff_min, k_diff_max + 1),
range(k_ma_min, MAX_MA_ORDER + 1)):
if k_ar == 0 and k_diff == 0 and k_ma == 0:
continue
try:
from .arima_model.arima_alg import ARIMA
model = ARIMA(diff_data, order=(k_ar, k_diff, k_ma), )
model.fit()
bic_result = model.bic
bic_result_list.append(bic_result)
if not np.isnan(bic_result) and bic_result < min_bic:
algorithm_name = "arima"
min_bic = bic_result
k_ar_valid = k_ar
k_ma_valid = k_ma
except ValueError:
"""Ignore while ValueError occurred."""
except Exception as e:
logging.warning("Warning occurred when estimate order of model parameters, "
"warning_msg is: %s", e)
order = (k_ar_valid, 1, k_ma_valid)
return algorithm_name, order
LINEAR_THRESHOLD = 0.80
class ForecastingAlgorithm:
@ -68,37 +34,48 @@ class ForecastingAlgorithm:
"""the subclass should implement, tarin model param"""
pass
def forecast(self, forecast_length: int) -> Sequence:
def forecast(self, forecast_length: int) -> Union[List, np.array]:
"""the subclass should implement, forecast series according history series"""
pass
class ForecastingFactory:
"""the ForecastingFactory can create forecast model"""
_CACHE = {} # Reuse an instantiated object.
_CACHE = threading.local() # Reuse an instantiated object.
@staticmethod
def get_instance(raw_data) -> ForecastingAlgorithm:
"""return forecast model according algorithm_name"""
algorithm_name, order = estimate_order_of_model_parameters(raw_data)
logging.debug('Choose %s algorithm to forecast.', algorithm_name)
if algorithm_name == "linear":
from .simple_forecasting import SimpleLinearFitting
ForecastingFactory._CACHE[algorithm_name] = SimpleLinearFitting()
elif algorithm_name == "arima" or algorithm_name is None:
from .arima_model.arima_alg import ARIMA
ForecastingFactory._CACHE[algorithm_name] = ARIMA(raw_data, order)
else:
raise NotImplementedError(f'Failed to load {algorithm_name} algorithm.')
def _get(algorithm_name):
if not hasattr(ForecastingFactory._CACHE, algorithm_name):
if algorithm_name == 'linear':
from .simple_forecasting import SimpleLinearFitting
setattr(ForecastingFactory._CACHE, algorithm_name, SimpleLinearFitting(avoid_repetitive_fitting=True))
elif algorithm_name == 'arima':
from .arima_model.arima_alg import ARIMA
setattr(ForecastingFactory._CACHE, algorithm_name, ARIMA())
else:
raise NotImplementedError(f'Failed to load {algorithm_name} algorithm.')
return ForecastingFactory._CACHE[algorithm_name]
return getattr(ForecastingFactory._CACHE, algorithm_name)
@staticmethod
def get_instance(sequence) -> ForecastingAlgorithm:
"""Return a forecast model according to the feature of given sequence."""
linear = ForecastingFactory._get('linear')
linear.refit()
linear.fit(sequence)
if linear.r2_score >= LINEAR_THRESHOLD:
logging.debug('Choose linear fitting algorithm to forecast.')
return linear
logging.debug('Choose ARIMA algorithm to forecast.')
return ForecastingFactory._get('arima')
def _check_forecasting_minutes(forecasting_minutes):
"""
check input params: forecasting_minutes whether is valid.
:param forecasting_minutes: type->int or float
check whether input params forecasting_minutes is valid.
:param forecasting_minutes: int or float
:return: None
:exception: raise ValueError if given parameter is invalid.
"""
check_result = True
message = ""
@ -118,66 +95,96 @@ def _check_forecasting_minutes(forecasting_minutes):
def decompose_sequence(sequence):
seasonal_data = None
raw_data = np.array(list(sequence.values))
is_seasonal, period = seasonal_interface.is_seasonal_series(raw_data)
raw_data = np.array(sequence.values)
is_seasonal, period = seasonal_interface.is_seasonal_series(
raw_data,
high_ac_threshold=0.5,
min_seasonal_freq=3
)
if is_seasonal:
decompose_results = seasonal_interface.seasonal_decompose(raw_data, period=period)
seasonal = decompose_results[0]
trend = decompose_results[1]
resid = decompose_results[2]
seasonal, trend, residual = seasonal_interface.seasonal_decompose(raw_data, period=period)
train_sequence = Sequence(timestamps=sequence.timestamps, values=trend)
train_sequence = sequence_interpolate(train_sequence)
seasonal_data = SimpleNamespace(is_seasonal=is_seasonal,
seasonal=seasonal,
trend=trend,
resid=resid,
resid=residual,
period=period)
else:
train_sequence = sequence
return seasonal_data, train_sequence
def compose_sequence(seasonal_data, train_sequence, forecast_length, forecast_data):
def compose_sequence(seasonal_data, train_sequence, forecast_values):
forecast_length = len(forecast_values)
if seasonal_data and seasonal_data.is_seasonal:
start_index = len(train_sequence) % seasonal_data.period
forecast_data = seasonal_data.seasonal[start_index: start_index + forecast_length] + \
forecast_data + \
seasonal_data.resid[start_index: start_index + forecast_length]
forecast_timestamps = [train_sequence.timestamps[-1] + train_sequence.step * (i + 1)
for i in range(int(forecast_length))]
return Sequence(timestamps=forecast_timestamps, values=forecast_data)
seasonal = seasonal_data.seasonal
resid = seasonal_data.resid
dbmind_assert(len(seasonal) == len(resid))
if len(seasonal) - start_index < forecast_length:
# pad it.
padding_length = forecast_length - (len(seasonal) - start_index)
seasonal = np.pad(seasonal, (0, padding_length), mode='wrap')
resid = np.pad(resid, (0, padding_length), mode='wrap')
seasonal = seasonal[start_index: start_index + forecast_length]
resid = resid[start_index: start_index + forecast_length]
forecast_values = seasonal + forecast_values + resid
forecast_timestamps = [train_sequence.timestamps[-1] + train_sequence.step * i
for i in range(1, forecast_length + 1)]
return forecast_timestamps, forecast_values
def quickly_forecast(sequence, forecasting_minutes):
def quickly_forecast(sequence, forecasting_minutes, lower=0, upper=float('inf')):
"""
return forecast sequence in forecasting_minutes from raw sequnece
Return forecast sequence in forecasting_minutes from raw sequence.
:param sequence: type->Sequence
:param forecasting_minutes: type->int or float
:return: forecase sequence: type->Sequence
:param lower: The lower limit of the forecast result
:param upper: The upper limit of the forecast result.
:return: forecast sequence: type->Sequence
"""
# 1 check forecasting minutes
if len(sequence) <= 1:
return Sequence()
# 1. check for forecasting minutes
_check_forecasting_minutes(forecasting_minutes)
forecasting_length = int(forecasting_minutes * 60 * 1000 // sequence.step)
forecasting_length = int(forecasting_minutes * 60 * 1000 / sequence.step)
if forecasting_length == 0 or forecasting_minutes == 0:
return Sequence()
# 2 interpolate
sequence = sequence_interpolate(sequence)
# 2. interpolate
interpolated_sequence = sequence_interpolate(sequence)
# 3 decompose sequence
seasonal_data, train_sequence = decompose_sequence(sequence)
# 3. decompose sequence
seasonal_data, train_sequence = decompose_sequence(interpolated_sequence)
# 4 get model from ForecastingFactory
model = ForecastingFactory.get_instance(list(train_sequence.values))
# 4. get model from ForecastingFactory
model = ForecastingFactory.get_instance(train_sequence)
# 5 model fit and forecast
# 5. fit and forecast
model.fit(train_sequence)
forecast_data = model.forecast(forecasting_length)
forecast_data = trim_head_and_tail_nan(forecast_data)
dbmind_assert(len(forecast_data) == forecasting_length)
# 6 compose sequence
forecast_sequence = compose_sequence(seasonal_data,
train_sequence,
forecasting_length,
forecast_data)
return forecast_sequence
# 6. compose sequence
forecast_timestamps, forecast_values = compose_sequence(
seasonal_data,
train_sequence,
forecast_data
)
for i in range(len(forecast_values)):
forecast_values[i] = min(forecast_values[i], upper)
forecast_values[i] = max(forecast_values[i], lower)
return Sequence(
timestamps=forecast_timestamps,
values=forecast_values,
name=sequence.name,
labels=sequence.labels
)

View File

@ -10,29 +10,90 @@
# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
# See the Mulan PSL v2 for more details.
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import PolynomialFeatures
from ...types import Sequence
from dbmind.common.types import Sequence
from .forcasting_algorithm import ForecastingAlgorithm
def series_to_supervised(sequence: Sequence, test_split=.0, poly_degree=None):
def series_to_supervised(sequence: Sequence, test_size=.1, poly_degree=None,
random_state=None, shuffle=False):
x, y = sequence.to_2d_array()
length = sequence.length
test_length = int(length * test_split)
x_train, x_test = x[:length - test_length], x[length - test_length:]
y_train, y_test = y[:length - test_length], y[length - test_length:]
x_train, x_test, y_train, y_test = train_test_split(
x, y, test_size=test_size, shuffle=shuffle, random_state=random_state
)
if poly_degree:
poly = PolynomialFeatures(degree=poly_degree).fit(x)
x_train = poly.transform(x_train)
x_test = poly.transform(x_test)
return x_train, x_test, y_train, y_test
class SimpleLinearFitting(ForecastingAlgorithm):
def __init__(self, avoid_repetitive_fitting=False):
self._a = None
self._b = None
self._r2 = None
self._last_x = None
self._step = None
self._fitted = False
self._avoid_repetitive_fitting = avoid_repetitive_fitting
def refit(self):
self._fitted = False
def fit(self, sequence: Sequence):
# `self._fitted` is a flag to control whether performing the fitting process because
# this fitting algorithm can estimate the linear degree. And if the class has
# estimated a sequence, it should not fit one more. So, we use this flag to
# prevent fitting again.
if self._avoid_repetitive_fitting and self._fitted:
return
if sequence.length < 2:
raise ValueError('Unable to fit the sequence due to short length.')
n = len(sequence)
sx = sy = sxx = syy = sxy = 0
# timestamp acts x-axis, values acts y-axis.
for t, v in sequence:
sx += t
sy += v
sxx += t * t
syy += v * v
sxy += t * v
a = (sy * sx / n - sxy) / (sx * sx / n - sxx)
b = (sy - a * sx) / n
numerator = syy + a * a * sxx + b * b * n + 2 * a * b * sx - 2 * a * sxy - 2 * b * sy
denominator = syy - sy * sy / n + 1e-9
r2 = 1 - numerator / denominator
self._a = a
self._b = b
self._r2 = r2
self._last_x = sequence.timestamps[-1]
self._step = sequence.step
self._fitted = True
def forecast(self, forecast_length):
future = []
for i in range(1, forecast_length + 1):
t = self._last_x + i * self._step
v = self._a * t + self._b
future.append(v)
return future
@property
def r2_score(self):
return self._r2
class SimpleLinearRegression(ForecastingAlgorithm):
def __init__(self):
self.model = LinearRegression(copy_X=False)
self.interval = None
@ -46,7 +107,6 @@ class SimpleLinearFitting(ForecastingAlgorithm):
self.interval = x[1] - x[0]
self.last_x = x[-1]
x = np.reshape(x, newshape=(-1, 1))
self.model.fit(x, y)
def forecast(self, forecast_length):
@ -91,5 +151,4 @@ class SupervisedModel(ForecastingAlgorithm):
if self.bias:
bias = y_pred.flatten()[0] - self.sequence.values[-1]
y_pred -= bias
return Sequence(timestamps=x_pred.flatten().tolist(),
values=y_pred.flatten().tolist())
return y_pred.flatten().tolist()

View File

@ -13,18 +13,22 @@
import numpy as np
from scipy import signal
from .statistics import trim_head_and_tail_nan
from .stat_utils import trim_head_and_tail_nan
import warnings
warnings.filterwarnings("ignore")
def acf(x_raw: np, nlags=None):
"""the acf can compute correlation from x[t] and x[t -k]"""
x_raw = np.array(x_raw)
x_diff = x_raw - x_raw.mean()
n_x = len(x_raw)
d_param = n_x * np.ones(2 * n_x - 1)
acov = np.correlate(x_diff, x_diff, "full")[n_x - 1:] / d_param[n_x - 1:]
return acov[: nlags + 1] / acov[0]
def acf(x_raw: np.array, nlags=None):
x = np.array(x_raw)
n = x.shape[0]
if nlags is None:
nlags = min(int(10 * np.log10(n)), n - 1)
x_diff = x - x.mean()
avf = np.correlate(x_diff, x_diff, "full")[n - 1:] / n
res = avf[: nlags + 1] / avf[0]
return res
def _padding_nans(x_raw, trim_head=None, trim_tail=None):
@ -41,50 +45,47 @@ def _padding_nans(x_raw, trim_head=None, trim_tail=None):
return result
def _get_trend(x_raw, filt):
""""use filt to extract trend component"""
trim_head = int(np.ceil(len(filt) / 2.) - 1) or None
trim_tail = int(np.ceil(len(filt) / 2.) - len(filt) % 2) or None
result = signal.convolve(x_raw, filt, mode='valid')
def _get_trend(x_raw, filter_):
""""use the filter to extract trend component"""
length = len(filter_)
trim_tail = (length - 1) // 2 or None
trim_head = length - 1 - trim_tail or None
result = signal.convolve(x_raw, filter_, mode='valid')
result = _padding_nans(result, trim_head, trim_tail)
return result
def is_seasonal_series(s_values, high_ac_threshold: float = 0.7, min_seasonal_freq=3):
"""judge series whether is seasonal with acf alg"""
result = False
period = None
"""Judge whether the series is seasonal by using the acf alg"""
s_ac = acf(s_values, nlags=len(s_values))
diff_ac = np.diff(s_ac)
high_ac_peak_pos = (1 + np.argwhere((diff_ac[:-1] > 0) & (diff_ac[1:] < 0)
& (s_ac[1: -1] > high_ac_threshold)).flatten())
high_ac_peak_pos = 1 + np.argwhere(
(diff_ac[:-1] > 0) & (diff_ac[1:] < 0) & (s_ac[1: -1] > high_ac_threshold)
).flatten()
for i in high_ac_peak_pos:
if i > min_seasonal_freq:
period = high_ac_peak_pos[np.argmax(s_ac[high_ac_peak_pos])]
result = True
break
return result, period
return True, high_ac_peak_pos[np.argmax(s_ac[high_ac_peak_pos])]
return False, None
def get_seasonal_period(s_values, high_ac_threshold: float = 0.5):
def get_seasonal_period(s_values, high_ac_threshold: float = 0.5, min_seasonal_freq=3):
""""return seasonal period"""
result = is_seasonal_series(s_values, high_ac_threshold)
return result[1]
return is_seasonal_series(s_values, high_ac_threshold, min_seasonal_freq)[1]
def _get_filt(period):
def _get_filter(period):
"""the filter to extract trend component"""
if period % 2 == 0:
filt = np.array([.5] + [1] * (period - 1) + [.5]) / period
filter_ = np.array([.5] + [1] * (period - 1) + [.5]) / period
else:
filt = np.repeat(1. / period, period)
return filt
filter_ = np.repeat(1. / period, period)
return filter_
def _get_seasonal(x_raw, detrended, period):
""""return seasonal component from x_raw, detrended and period"""
""""return the seasonal component from x_raw, detrended and period"""
nobs = len(x_raw)
period_averages = np.array([np.nanmean(detrended[i::period]) for i in range(period)])
period_averages -= np.mean(period_averages, axis=0)
@ -94,23 +95,22 @@ def _get_seasonal(x_raw, detrended, period):
def seasonal_decompose(x_raw, period=None):
"""seasonal series can decompose three component: trend, seasonal, resid"""
pfreq = period
"""decompose a series into three components: seasonal, trend, residual"""
if np.ndim(x_raw) > 1:
raise ValueError("x ndim > 1 not implemented")
raise ValueError("The input data must be 1-D array.")
if period is None:
raise ValueError("preiod must not None")
raise ValueError("You must specify a period.")
if not np.all(np.isfinite(x_raw)):
raise ValueError("the x has no valid values")
raise ValueError("The input data has infinite value or nan.")
if x_raw.shape[0] < 2 * period:
raise ValueError(f"The input data should be longer than two periods:{2 * period} at least")
if x_raw.shape[0] < 2 * pfreq:
raise ValueError(f"the x length:{x_raw.shape[0]} not meet 2 preiod:{2 * pfreq}")
x_raw = trim_head_and_tail_nan(x_raw)
filt = _get_filt(period)
trend = _get_trend(x_raw, filt)
trend = _get_trend(x_raw, _get_filter(period))
trend = trim_head_and_tail_nan(trend)
detrended = x_raw - trend

View File

@ -0,0 +1,229 @@
# Copyright (c) 2020 Huawei Technologies Co.,Ltd.
#
# openGauss is licensed under Mulan PSL v2.
# You can use this software according to the terms and conditions of the Mulan PSL v2.
# You may obtain a copy of Mulan PSL v2 at:
#
# http://license.coscl.org.cn/MulanPSL2
#
# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
# See the Mulan PSL v2 for more details.
from types import SimpleNamespace
import numpy as np
from scipy.interpolate import interp1d
from dbmind.common.types import Sequence
def double_padding(values, window):
left_idx = window - 1 - (window - 1) // 2
right_idx = len(values) - 1 - (window - 1) // 2
values[:left_idx] = values[left_idx] # padding left
values[right_idx + 1:] = values[right_idx] # padding right
return values
def np_shift(values, shift_distance=1):
"""shift values a shift_distance"""
if len(values) < 2:
return values
shifted_values = np.roll(values, shift_distance)
for i in range(shift_distance):
shifted_values[i] = shifted_values[shift_distance]
return shifted_values
def np_moving_avg(values, window=5, mode="same"):
"""Computes the moving average for sequence
and returns a new sequence padded with valid
value at both ends.
"""
moving_avg_values = np.convolve(values, np.ones((window,)) / window, mode=mode)
moving_avg_values = double_padding(moving_avg_values, window)
return moving_avg_values
def np_moving_std(values, window=10):
"""Computes the standard deviation for sequence
and returns a new sequence padded with valid
value at both ends.
"""
sequence_length = len(values)
moving_std_values = np.zeros(sequence_length)
left_idx = window - 1 - (window - 1) // 2
for i in range(sequence_length - window + 1):
moving_std_values[left_idx+i] = np.std(values[i:i + window])
moving_std_values = double_padding(moving_std_values, window)
return moving_std_values
def np_double_rolling(values, window1=5, window2=1, diff_mode="diff"):
values_length = len(values)
window1 = 1 if values_length < window1 else window1
window2 = 1 if values_length < window2 else window2
left_rolling = np_moving_avg(np_shift(values), window=window1)
right_rolling = np_moving_avg(values[::-1], window=window2)[::-1]
r_data = right_rolling - left_rolling
functions = {
'abs': lambda x: np.abs(x),
'rel': lambda x: x / left_rolling
}
methods = diff_mode.split('_')[:-1]
for method in methods:
r_data = functions[method](r_data)
r_data = double_padding(r_data, max(window1, window2))
return r_data
def measure_head_and_tail_nan(data):
data_not_nan = -1 * np.isnan(data)
left = data_not_nan.argmax()
right = data_not_nan[::-1].argmax()
return left, right
def trim_head_and_tail_nan(data):
"""
when there are nan value at head or tail of forecast_data,
this function will fill value with near value
:param data: type->np.array or list
:return data: type->same type as the input 'data'
"""
length = len(data)
if length == 0:
return data
data_not_nan = np.isnan(data)
if data_not_nan.all():
data[:] = [0] * length
return data
left, right = measure_head_and_tail_nan(data)
data[:left] = [data[left]] * left
data[length - right:] = [data[length - right - 1]] * right
return data
def _valid_value(v):
return not (np.isnan(v) or np.isinf(v))
def _init_interpolate_param(sequence):
""""init interpolate param for sequence_interpolate function"""
length = len(sequence)
if length == 0:
return sequence
x = np.array(range(len(sequence)))
y = np.array(sequence.values)
left, right = measure_head_and_tail_nan(y)
na_param = SimpleNamespace(head_na_index=range(left), tail_na_index=range(length-right, length),
head_start_nona_value=y[left],
tail_start_nona_value=y[length-right-1])
return x[left:length - right], y[left:length - right], na_param
def tidy_up_sequence(sequence):
"""Fill up missing values for sequence and
align sequence's timestamps.
"""
if sequence.step <= 0:
return sequence
def estimate_error(a, b):
return (a - b) / b
timestamps = list(sequence.timestamps)
values = list(sequence.values)
i = 1
while i < len(timestamps):
real_interval = timestamps[i] - timestamps[i - 1]
error = estimate_error(real_interval, sequence.step)
if error < 0:
# This is because the current timestamp is lesser than the previous one.
# We should remove one to keep monotonic.
if not _valid_value(values[i - 1]):
values[i - 1] = values[i]
timestamps.pop(i)
values.pop(i)
i -= 1 # We have removed an element so we have to decrease the cursor.
elif error == 0:
"""Everything is normal, skipping."""
elif 0 < error < 1:
# Align the current timestamp.
timestamps[i] = timestamps[i - 1] + sequence.step
else:
# Fill up missing value with NaN.
next_ = timestamps[i - 1] + sequence.step
timestamps.insert(i, next_)
values.insert(i, float('nan'))
i += 1
return Sequence(timestamps, values)
def sequence_interpolate(sequence: Sequence, fit_method="cubic", strip_details=True):
"""interpolate with scipy interp1d"""
filled_sequence = tidy_up_sequence(sequence)
has_defined = [_valid_value(v) for v in filled_sequence.values]
if all(has_defined):
if strip_details:
return filled_sequence
else:
return Sequence(
timestamps=filled_sequence.timestamps,
values=filled_sequence.values,
name=sequence.name,
step=sequence.step,
labels=sequence.labels
)
if True not in has_defined:
raise ValueError("All of sequence values are undefined.")
y_raw = np.array(filled_sequence.values)
y_nona = []
x_nona = []
na_index = []
x_new, y_new, na_param = _init_interpolate_param(filled_sequence)
# prepare x_nona and y_nona for interp1d
for i in range(len(y_new)):
if _valid_value(y_new[i]):
y_nona.append(y_new[i])
x_nona.append(x_new[i])
else:
na_index.append(i)
fit_func = interp1d(x_nona, y_nona, kind=fit_method)
y_new = fit_func(x_new)
# replace the nan with interp1d value for raw y
for i in na_index:
raw_index = i + len(na_param.head_na_index)
y_raw[raw_index] = y_new[i]
y_raw[na_param.head_na_index] = na_param.head_start_nona_value
y_raw[na_param.tail_na_index] = na_param.tail_start_nona_value
if strip_details:
return Sequence(timestamps=filled_sequence.timestamps, values=y_raw)
else:
return Sequence(
timestamps=filled_sequence.timestamps,
values=y_raw,
name=sequence.name,
step=sequence.step,
labels=sequence.labels
)

View File

@ -1,200 +0,0 @@
# Copyright (c) 2020 Huawei Technologies Co.,Ltd.
#
# openGauss is licensed under Mulan PSL v2.
# You can use this software according to the terms and conditions of the Mulan PSL v2.
# You may obtain a copy of Mulan PSL v2 at:
#
# http://license.coscl.org.cn/MulanPSL2
#
# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
# See the Mulan PSL v2 for more details.
from types import SimpleNamespace
import numpy as np
from scipy.interpolate import interp1d
from dbmind.common.types import Sequence
def np_quantile(values, quantile):
"""return the quantile of values"""
return np.nanpercentile(values, quantile)
def np_shift(values, shift_distance=1):
"""shift values a shift_distance"""
shifted_values = np.roll(values, shift_distance)
for i in range(shift_distance):
shifted_values[i] = shifted_values[shift_distance]
return shifted_values
def np_moving_avg(values, window=5, mode="same"):
"""Compute the moving average for sequence
and create a new sequence as the return value."""
moving_avg_values = np.convolve(values, np.ones((window,)) / window, mode=mode)
start_idx = len(values) - window
moving_avg_values[start_idx:] = moving_avg_values[start_idx] # padding other remaining value
return moving_avg_values
def np_moving_std(values, window=10):
"""Compute and return the standard deviation for sequence."""
sequence_length = len(values)
calculation_length = sequence_length - window
moving_std_values = [np.std(values[i:i + window]) for i in range(calculation_length)]
# padding
for _ in range(window):
moving_std_values.append(moving_std_values[-1])
return np.array(moving_std_values)
def np_double_rolling(values, agg="mean", window1=5, window2=1, diff_mode="diff"):
"""double rolling the values"""
if agg == "mean":
left_rolling = np_moving_avg(np_shift(values), window=window1)
right_rolling = np_moving_avg(values[::-1], window=window2)[::-1]
elif agg == "std":
left_rolling = np_moving_std(np_shift(values), window=window1)
right_rolling = np_moving_std(values[::-1], window=window2)[::-1]
else:
return values
diff_mode_map = {
"diff": (right_rolling - left_rolling),
"abs_diff": np.abs(right_rolling - left_rolling),
"rel_diff": (right_rolling - left_rolling) / left_rolling,
"abs_rel_diff": np.abs(right_rolling - left_rolling) / left_rolling
}
r_data = diff_mode_map.get(diff_mode)
values_length = len(values)
window = max(window1, window2)
tail_length = int(window / 2)
for i in range(tail_length):
r_data[values_length - i - 1] = r_data[values_length - tail_length - 1]
return r_data
def trim_head_and_tail_nan(data):
"""
when there are nan value at head or tail of forecast_data,
this function will fill value with near value
:param data: type->np.array
:return:data: type->np.array
"""
head_start_nona_value = 0
head_na_index = []
tail_start_nona_value = 0
tail_na_index = []
if len(data) == 0:
return data
for i in range(len(data)):
if not np.isnan(data[0]):
break
if not np.isnan(data[i]):
head_start_nona_value = data[i]
break
else:
head_na_index.append(i)
for i in range(len(data) - 1, 1, -1):
if not np.isnan(data[-1]):
break
if not np.isnan(data[i]):
tail_start_nona_value = data[i]
break
else:
tail_na_index.append(i)
for i in head_na_index:
data[i] = head_start_nona_value
for i in tail_na_index:
data[i] = tail_start_nona_value
return data
def _init_interpolate_param(sequence):
""""init interpolate param for sequence_interpolate function"""
x_raw = np.array(list(range(len(sequence.timestamps))))
y_raw = np.array(sequence.values)
head_na_index = []
head_start_nona_value = None
tail_na_index = []
tail_start_nona_value = None
x_new = list(x_raw)
y_new = list(y_raw)
#init head_start_nona_value, head_na_index
for i in range(len(y_raw)):
if not np.isnan(y_raw[0]):
break
if not np.isnan(y_raw[i]):
head_start_nona_value = y_raw[i]
break
else:
head_na_index.append(i)
#init tail_start_nona_value, tail_na_index
for i in range(len(y_raw) - 1, 1, -1):
if not np.isnan(y_raw[-1]):
break
if not np.isnan(y_raw[i]):
tail_start_nona_value = y_raw[i]
break
else:
tail_na_index.append(i)
#pop the nan from head and tail of data
for i in range(len(head_na_index)):
x_new.pop(0)
y_new.pop(0)
for i in range(len(tail_na_index)):
x_new.pop(-1)
y_new.pop(-1)
na_param = SimpleNamespace(head_na_index=head_na_index, tail_na_index=tail_na_index,
head_start_nona_value=head_start_nona_value,
tail_start_nona_value=tail_start_nona_value)
return x_new, y_new, na_param
def sequence_interpolate(sequence: Sequence, fit_method="cubic"):
"""interpolate with scipy interp1d"""
nan_exist_result = [True if not np.isnan(i) else False for i in sequence.values]
if all(nan_exist_result):
return sequence
if True not in nan_exist_result:
raise ValueError("sequence values are all nan")
y_raw = np.array(sequence.values)
y_nona = []
x_nona = []
na_index = []
x_new, y_new, na_param = _init_interpolate_param(sequence)
#prepare x_nona and y_nona for interp1d
for i in range(len(y_new)):
if not np.isnan(y_new[i]):
y_nona.append(y_new[i])
x_nona.append(x_new[i])
else:
na_index.append(i)
fit_func = interp1d(x_nona, y_nona, kind=fit_method)
y_new = fit_func(x_new)
#replace the nan with interp1d value for raw y
for i in na_index:
raw_index = i + len(na_param.head_na_index)
y_raw[raw_index] = y_new[i]
y_raw[na_param.head_na_index] = na_param.head_start_nona_value
y_raw[na_param.tail_na_index] = na_param.tail_start_nona_value
return Sequence(timestamps=sequence.timestamps, values=y_raw)

View File

@ -29,7 +29,7 @@ class RepeatedTimer(Thread):
self._args = args
self._kwargs = kwargs
self._finished = Event()
Thread.__init__(self)
Thread.__init__(self, daemon=True)
def run(self):
while not self._finished.is_set():

View File

@ -11,14 +11,13 @@
# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
# See the Mulan PSL v2 for more details.
import concurrent
import logging
import os
import signal
from abc import ABC, abstractmethod
from concurrent.futures.process import ProcessPoolExecutor
from concurrent.futures import as_completed, wait
import concurrent
from multiprocessing import Event
from concurrent.futures.process import ProcessPoolExecutor
from dbmind.common import utils
from dbmind.common.platform import WIN32
@ -26,6 +25,8 @@ from dbmind.common.platform import WIN32
IN_PROCESS = 'DBMind [Worker Process] [IN PROCESS]'
PENDING = 'DBMind [Worker Process] [IDLE]'
_mp_sync_mgr_instance = None
def _initializer():
signal.signal(signal.SIGTERM, signal.SIG_IGN)
@ -77,19 +78,44 @@ class AbstractWorker(ABC):
self.status = self.CLOSED
def get_mp_sync_manager():
global _mp_sync_mgr_instance
from multiprocessing.managers import DictProxy, SyncManager
from collections import defaultdict
class MPSyncManager(SyncManager):
__proc_title__ = 'DBMind [SyncManager Process]'
@staticmethod
def _initializer():
utils.set_proc_title(MPSyncManager.__proc_title__)
def start(self):
super().start(initializer=MPSyncManager._initializer)
MPSyncManager.register('defaultdict', defaultdict, DictProxy)
if not _mp_sync_mgr_instance:
_mp_sync_mgr_instance = MPSyncManager()
_mp_sync_mgr_instance.start()
return _mp_sync_mgr_instance
class _ProcessPoolExecutor(ProcessPoolExecutor):
@staticmethod
def _wait_for_notify(event):
# Set the status of the current work process.
_initializer()
event.wait()
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
def __init__(self, worker_num):
super().__init__(worker_num)
# Make the process pool is a fixed process pool, which creates many idle processes and waits for the
# scheduler's task. Why not use lazy-loading mode? Because the worker process forked from the master process,
# the master process maybe have some running backend threads while forking. This action will cause unexpected
# behaviors, such as timed backend threads also being forked and run in the child process.
event = Event()
event = get_mp_sync_manager().Event()
for _ in range(self._max_workers):
self.submit(self._wait_for_notify, event)
event.set()
@ -121,7 +147,7 @@ class ProcessWorker(AbstractWorker):
from concurrent.futures.thread import ThreadPoolExecutor
self.pool = ThreadPoolExecutor(worker_num)
else:
self.pool = _ProcessPoolExecutor(worker_num, initializer=_initializer)
self.pool = _ProcessPoolExecutor(worker_num)
super().__init__(worker_num)
@ -167,7 +193,7 @@ class ProcessWorker(AbstractWorker):
self.pool.shutdown(True, cancel_futures=cancel_futures)
def get_worker_instance(_type, process_num, hosts=None) -> AbstractWorker:
def get_worker_instance(_type, process_num) -> AbstractWorker:
if _type == 'local':
return ProcessWorker(process_num)
elif _type == 'dist':

View File

@ -33,3 +33,6 @@ class SQLExecutionError(Exception):
class ConfigSettingError(Exception):
pass
class DuplicateTableError(Exception):
pass

View File

@ -15,6 +15,7 @@ import hmac
import random
import secrets
import string
import re
from Crypto.Cipher import AES
from Crypto.Util.Padding import pad, unpad
@ -35,6 +36,25 @@ def check_path_valid(path):
return True
def check_ip_valid(value):
ip_pattern = re.compile(r'^(1\d{2}|2[0-4]\d|25[0-5]|[1-9]\d|[1-9])\.'
'(1\d{2}|2[0-4]\d|25[0-5]|[1-9]\d|\d)\.'
'(1\d{2}|2[0-4]\d|25[0-5]|[1-9]\d|\d)\.'
'(1\d{2}|2[0-4]\d|25[0-5]|[1-9]\d|\d)$')
if ip_pattern.match(value):
return True
return False
def check_port_valid(value):
if isinstance(value, str):
return str.isdigit(value) and 1023 < int(value) <= 65535
elif isinstance(value, int):
return 1023 < value <= 65535
else:
return False
def unsafe_random_string(length):
"""Used to generate a fixed-length random
string which is not used in the sensitive scenarios."""

View File

@ -12,8 +12,7 @@
# See the Mulan PSL v2 for more details.
from typing import Optional
from dbmind.common.algorithm.basic import binary_search
from dbmind.common.algorithm.basic import how_many_lesser_elements, how_many_larger_elements
from dbmind.common.algorithm.basic import binary_search, binary_search_leftmost, binary_search_rightmost
from ..either import OptionalContainer, OptionalValue
from ..utils import cached_property
@ -140,8 +139,8 @@ class Sequence:
# ``how_many_larger_elements()`` can ensure that
# the position of the searching element always stays
# at the position of the last element not greater than it in the array.
start_position = how_many_lesser_elements(timestamps, ts_start)
end_position = how_many_larger_elements(timestamps, ts_end)
start_position = binary_search_leftmost(timestamps, ts_start)
end_position = binary_search_rightmost(timestamps, ts_end)
return end_position - start_position + 1
def to_2d_array(self):
@ -151,15 +150,15 @@ class Sequence:
def values(self):
"""The property will generate a copy."""
timestamps, values, ts_start, ts_end = self._get_entity()
return values[how_many_lesser_elements(timestamps, ts_start):
how_many_larger_elements(timestamps, ts_end) + 1]
return values[binary_search_leftmost(timestamps, ts_start):
binary_search_rightmost(timestamps, ts_end) + 1]
@cached_property
@property
def timestamps(self):
"""The property will generate a copy."""
timestamps, values, ts_start, ts_end = self._get_entity()
return timestamps[how_many_lesser_elements(timestamps, ts_start):
how_many_larger_elements(timestamps, ts_end) + 1]
return timestamps[binary_search_leftmost(timestamps, ts_start):
binary_search_rightmost(timestamps, ts_end) + 1]
@cached_property
def step(self):

View File

@ -11,15 +11,14 @@
# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
# See the Mulan PSL v2 for more details.
import re
import os
import sys
import argparse
import json
import os
import random
import re
import sys
import time
from collections import deque
from subprocess import Popen, PIPE
SQL_TYPE = ['select ', 'delete ', 'insert ', 'update ']
SQL_AMOUNT = 0
@ -27,9 +26,10 @@ PLACEHOLDER = r'@@@'
SAMPLE_NUM = 5
IS_ALL_LATEST_SQL = False
FILEHANDLES = 500
SQL_PATTERN = [r'\((\s*(\d+(\.\d+)?\s*)[,]?)+\)', # match integer set in the IN collection
r'([^\\])\'((\')|(.*?([^\\])\'))', # match all content in single quotes
r'(([^<>]\s*=\s*)|([^<>]\s+))(\d+)(\.\d+)?'] # match single integer
SQL_PATTERN = [r'([^\\])\'((\')|(.*?([^\\])\'))', # match all content in single quotes
r'\((\s*(\-|\+)?\d+(\.\d+)?\s*)(,\s*(\-|\+)?\d+(\.\d+)?\s*)*\)',
# match integer set in the IN collection
r'(([<>=]+\s*)|(\s+))(\-|\+)?\d+(\.\d+)?'] # match single integer
def truncate_template(templates, update_time, avg_update):
@ -104,7 +104,8 @@ def get_workload_template(templates, sqls, args):
def output_valid_sql(sql):
is_quotation_valid = sql.count("'") % 2
if re.search(r'=([\s]+)?\$', sql):
if re.search(r'=([\s]+)?\$', sql) or re.search(r'[\s]+\((([\s]+)?\$[\d]+([\s]+)?)((,([\s]+)?\$[\d]+([\s]+)?)+)?\)',
sql):
return ''
if 'from pg_' in sql.lower() or 'gs_index_advise' in sql.lower() or is_quotation_valid:
return ''
@ -169,7 +170,7 @@ def get_parsed_sql(file, filter_config, log_info_position):
SQL_AMOUNT += 1
sql_record.sqllist = []
sql = '' if len(sql.lower().strip(';').split(';', 1)) == 1 else \
sql.lower().strip(';').split(';', 1)[1]
sql.lower().strip(';').split(';', 1)[1]
if sql.lower().strip().strip(';').strip().endswith(('commit', 'rollback')) \
and threadid_position:
output_sql = output_valid_sql(sql.lower().strip().strip(';') \
@ -214,17 +215,18 @@ def get_parsed_sql(file, filter_config, log_info_position):
def get_start_position(start_time, file_path):
while start_time:
cmd = 'head -n $(cat %s | grep -m 1 -n "^%s" | awk -F : \'{print $1}\') %s | wc -c' % \
(file_path, start_time, file_path)
proc = Popen(cmd, stdout=PIPE, stderr=PIPE, shell=True)
std, err_msg = proc.communicate()
if proc.returncode == 0 and not err_msg:
return int(std)
elif len(start_time) > 13:
start_time = start_time[0: -3]
else:
break
time_pattern = re.compile(r'\d{4}-\d{1,2}-\d{1,2} \d{1,2}:\d{1,2}:\d{1,2}')
start_time_stamp = int(time.mktime(time.strptime(start_time, '%Y-%m-%d %H:%M:%S')))
start_position = 0
for line in open(file_path, 'r', errors='ignore'):
match_result = time_pattern.match(line)
if match_result:
matched_time = match_result.group()
current_time_stamp = int(time.mktime(time.strptime(matched_time, '%Y-%m-%d %H:%M:%S')))
if current_time_stamp >= start_time_stamp:
return start_position
start_position += len(line)
return -1
@ -257,6 +259,17 @@ class threadid_info:
self.fileh.write(content)
def generate_line(file):
templine = ''
for line in file:
if line.endswith('\r'):
templine += line[:-1]
else:
templine += line
yield templine
templine = ''
# split the log to different files groupby the threadid with file handles below FILEHANDLES
def group_log_by_threadid(f, threadid_position):
threadid = '000000'
@ -266,14 +279,16 @@ def group_log_by_threadid(f, threadid_position):
threadid_log_files = []
try:
for line in f:
for line in generate_line(f):
if not line.startswith('\t') and threadid_position:
try:
if len(line.strip().split()) > threadid_position:
threadid = line.strip().split()[threadid_position]
except IndexError:
raise ValueError(f'wrong format for log line:{line.strip()}')
else:
print(f'wrong format for log line:{line.strip()}')
continue
if not threadid.isdigit():
raise ValueError(f'invalid int value {threadid} for %p')
print(f'wrong format for log line:{line.strip()}')
continue
if not threadid in threadid_log:
threadid_log_file = get_tempfile_name(threadid)
threadid_log_files.append(threadid_log_file)
@ -339,7 +354,7 @@ def record_sql(valid_files, args, log_info_position, output_obj):
start_position = get_start_position(args.start_time, file_path)
if start_position == -1:
continue
with open(file_path) as f:
with open(file_path, errors='ignore') as f:
f.seek(start_position, 0)
threadid_log_files = group_log_by_threadid(f, log_info_position.get('p'))
try:
@ -350,17 +365,15 @@ def record_sql(valid_files, args, log_info_position, output_obj):
for threadid_log_file in threadid_log_files:
if os.path.isfile(threadid_log_file):
os.remove(threadid_log_file)
filter_config = {'user': args.U, 'database': args.d,
'sql_amount': args.sql_amount, 'statement': args.statement}
try:
with open(merged_log_file, mode='r') as f:
with open(merged_log_file, mode='r', errors='ignore') as f:
if isinstance(output_obj, dict):
get_workload_template(output_obj, split_transaction(
get_parsed_sql(f, args.U, args.d,
args.sql_amount,
args.statement),
get_parsed_sql(f, filter_config, log_info_position)
), args)
else:
filter_config = {'user': args.U, 'database': args.d,
'sql_amount': args.sql_amount, 'statement': args.statement}
for sql in get_parsed_sql(f, filter_config, log_info_position):
output_obj.write(sql + '\n')
except Exception as ex:
@ -383,7 +396,7 @@ def extract_sql_from_log(args):
valid_files.insert(0, file)
if args.json:
try:
with open(args.f, 'r') as output_file:
with open(args.f, 'r', errors='ignore') as output_file:
templates = json.load(output_file)
except (json.JSONDecodeError, FileNotFoundError) as e:
templates = {}
@ -417,11 +430,11 @@ def main(argv):
args = arg_parser.parse_args(argv)
if args.U:
if not 'u' in args.p:
if 'u' not in args.p:
raise argparse.ArgumentTypeError(f"input parameter p '{args.p}' does not contain"
" '%u' and U is not allowed.")
if args.d:
if not 'd' in args.p:
if 'd' not in args.p:
raise argparse.ArgumentTypeError(f"input parameter p '{args.p}' does not contain"
" '%d' and d is not allowed.")
if args.start_time:
@ -430,19 +443,19 @@ def main(argv):
time.strptime(args.start_time,
'%Y-%m-%d %H:%M:%S')
)
if not 'm' in args.p:
if 'm' not in args.p:
raise argparse.ArgumentTypeError(f"input parameter p '{args.p}' does not contain"
" '%m' and start_time is not allowed.")
if args.sql_amount is not None and args.sql_amount <= 0:
raise argparse.ArgumentTypeError("sql_amount %s is an invalid positive int value" %
args.sql_amount)
if args.max_reserved_period and args.max_reserved_period <= 0:
if args.max_reserved_period is not None and args.max_reserved_period <= 0:
raise argparse.ArgumentTypeError("max_reserved_period %s is an invalid positive int value" %
args.max_reserved_period)
if args.max_template_num and args.max_template_num <= 0:
if args.max_template_num is not None and args.max_template_num <= 0:
raise argparse.ArgumentTypeError("max_template_num %s is an invalid positive int value" %
args.max_template_num)
elif args.max_template_num and args.max_template_num > 5000:
elif args.max_template_num is not None and args.max_template_num > 5000:
print('max_template_num %d above 5000 is not advised for time cost' % args.max_template_num)
if not args.max_reserved_period:
args.max_reserved_period = float('inf')

View File

@ -11,15 +11,6 @@ benefit of it for the workload.
[-W PASSWORD] [--schema SCHEMA] [--max_index_num MAX_INDEX_NUM] [--max_index_storage MAX_INDEX_STORAGE]
[--multi_iter_mode] [--multi_node] [--json] [--driver] [--show_detail]
# Extract_log
**extract_log** is a tool for extracting business data from pg_log.
## Usage
python extract_log.py [l LOG_DIRECTORY] [f OUTPUT_FILE] [-d DATABASE] [-U USERNAME] [--start_time]
[--sql_amount] [--statement] [--json]
## Dependencies
python3.x

View File

@ -49,7 +49,7 @@ class DriverExecute(ExecuteFactory):
def is_multi_node(self):
self.init_conn_handle()
try:
self.cur.execute("select count(*) from pgxc_node where node_type='C';")
self.cur.execute("select pg_catalog.count(*) from pg_catalog.pgxc_node where node_type='C';")
self.conn.commit()
return self.cur.fetchall()[0][0] > 0
finally:
@ -100,17 +100,17 @@ class DriverExecute(ExecuteFactory):
# create hypo-indexes
if self.schema:
sqls = 'SET current_schema = %s;' % self.schema
sqls += 'SET enable_hypo_index = on;SELECT hypopg_reset_index();'
sqls += 'SET enable_hypo_index = on;SELECT pg_catalog.hypopg_reset_index();'
if multi_node:
sqls += 'SET enable_fast_query_shipping = off;SET enable_stream_operator = on;'
for table in query_index_dict.keys():
for columns_tulpe in query_index_dict[table]:
if columns_tulpe != '':
content = "SELECT hypopg_create_index('CREATE INDEX ON %s(%s) %s');" % \
content = "SELECT pg_catalog.hypopg_create_index('CREATE INDEX ON %s(%s) %s');" % \
(table, columns_tulpe[0], columns_tulpe[1])
content = content.replace('""', '')
sqls += content
sqls += 'SELECT * from hypopg_display_index();'
sqls += 'SELECT * from pg_catalog.hypopg_display_index();'
result = self.execute(sqls)
if not result:
return valid_indexes
@ -122,21 +122,21 @@ class DriverExecute(ExecuteFactory):
match_flag, table_name = ExecuteFactory.match_table_name(table_name,
query_index_dict)
if not match_flag:
self.execute('SELECT hypopg_reset_index()')
self.execute('SELECT pg_catalog.hypopg_reset_index()')
return valid_indexes
hypoid_table_column[str(item[1])] = \
table_name + ':' + item[3].strip('()')
sqls = "SET explain_perf_mode = 'normal'; explain %s" % query
result = self.execute(sqls)
if not result:
self.execute('SELECT hypopg_reset_index()')
self.execute('SELECT pg_catalog.hypopg_reset_index()')
return valid_indexes
# parse the result of explain plan
for item in result:
if 'Index' in item[0] and 'Scan' in item[0] and 'btree' in item[0]:
super().get_valid_indexes(
item[0], hypoid_table_column, valid_indexes)
self.execute('SELECT hypopg_reset_index()')
self.execute('SELECT pg_catalog.hypopg_reset_index()')
return valid_indexes
@staticmethod
@ -164,7 +164,7 @@ class DriverExecute(ExecuteFactory):
return cost_total
def update_index_storage(self, index_id, index_config, hypo_index_num):
index_size_sql = 'select * from hypopg_estimate_size(%s);' % index_id
index_size_sql = 'select * from pg_catalog.hypopg_estimate_size(%s);' % index_id
res = self.execute(index_size_sql)
if res:
index_config[hypo_index_num].storage = float(
@ -181,7 +181,7 @@ class DriverExecute(ExecuteFactory):
# create hypo-indexes
self.execute('SET enable_hypo_index = on')
for index in index_config:
res = self.execute("SELECT * from hypopg_create_index('CREATE INDEX ON %s(%s) %s')" %
res = self.execute("SELECT * from pg_catalog.hypopg_create_index('CREATE INDEX ON %s(%s) %s')" %
(index.table, index.columns, index.index_type))
if self.max_index_storage and res:
self.update_index_storage(
@ -206,13 +206,14 @@ class DriverExecute(ExecuteFactory):
res, index_config, ori_indexes_name)
query_cost *= workload[ind].frequency
workload[ind].cost_list.append(query_cost)
# update positive_pos and negative_pos
if index_config and len(index_config) == 1 and query_cost < workload[ind].cost_list[0]:
index_config[0].positive_pos.append(ind)
index_config[0].update_positive_pos(ind)
total_cost += query_cost
else:
workload[ind].cost_list.append(0)
if index_config:
self.execute('SELECT hypopg_reset_index()')
self.execute('SELECT pg_catalog.hypopg_reset_index()')
return total_cost
def check_useless_index(self, history_indexes, history_invalid_indexes):
@ -221,7 +222,7 @@ class DriverExecute(ExecuteFactory):
whole_indexes = list()
redundant_indexes = list()
for schema in schemas:
table_sql = "select tablename from pg_tables where schemaname = '%s'" % schema
table_sql = "select tablename from pg_catalog.pg_tables where schemaname = '%s'" % schema
table_res = self.execute(table_sql)
if not table_res:
continue
@ -229,10 +230,10 @@ class DriverExecute(ExecuteFactory):
tables_string = ','.join(["'%s'" % table for table in tables])
# query all table index information and primary key information
sql = "set current_schema = %s; SELECT c.relname AS tablename, i.relname AS indexname, " \
"pg_get_indexdef(i.oid) AS indexdef, p.contype AS pkey from " \
"pg_index x JOIN pg_class c ON c.oid = x.indrelid JOIN " \
"pg_class i ON i.oid = x.indexrelid LEFT JOIN pg_namespace n " \
"ON n.oid = c.relnamespace LEFT JOIN pg_constraint p ON (i.oid = p.conindid " \
"pg_catalog.pg_get_indexdef(i.oid) AS indexdef, p.contype AS pkey from " \
"pg_catalog.pg_index x JOIN pg_catalog.pg_class c ON c.oid = x.indrelid JOIN " \
"pg_catalog.pg_class i ON i.oid = x.indexrelid LEFT JOIN pg_catalog.pg_namespace n " \
"ON n.oid = c.relnamespace LEFT JOIN pg_catalog.pg_constraint p ON (i.oid = p.conindid " \
"AND p.contype = 'p') WHERE (c.relkind = ANY (ARRAY['r'::\"char\", " \
"'m'::\"char\"])) AND (i.relkind = ANY (ARRAY['i'::\"char\", 'I'::\"char\"])) " \
"AND n.nspname = '%s' AND c.relname in (%s) order by c.relname;" % \

View File

@ -48,7 +48,7 @@ class GSqlExecute(ExecuteFactory):
proc = subprocess.Popen(
cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True)
(stdout, stderr) = proc.communicate()
stdout, stderr = stdout.decode(), stderr.decode()
stdout, stderr = stdout.decode(errors='ignore'), stderr.decode(errors='ignore')
if 'gsql: FATAL:' in stderr or 'failed to connect' in stderr:
raise ConnectionError("An error occurred while connecting to the database.\n"
+ "Details: " + stderr)
@ -61,9 +61,9 @@ class GSqlExecute(ExecuteFactory):
ret = subprocess.check_output(
shlex.split(cmd), stderr=subprocess.STDOUT)
except subprocess.CalledProcessError as e:
print(e.output.decode(), file=sys.stderr)
print(e.output.decode(errors='ignore'), file=sys.stderr)
return ret.decode()
return ret.decode(errors='ignore')
def is_multi_node(self):
cmd = BASE_CMD + " -c " + shlex.quote("select count(*) from pgxc_node where node_type='C';")
@ -71,8 +71,8 @@ class GSqlExecute(ExecuteFactory):
ret = subprocess.check_output(
shlex.split(cmd), stderr=subprocess.STDOUT)
except subprocess.CalledProcessError as e:
print(e.output.decode(), file=sys.stderr)
return int(ret.decode().strip().split()[2]) > 0
print(e.output.decode(errors='ignore'), file=sys.stderr)
return int(ret.decode(errors='ignore').strip().split()[2]) > 0
@staticmethod
def parse_single_advisor_result(res, table_index_dict):
@ -199,8 +199,9 @@ class GSqlExecute(ExecuteFactory):
query_cost = GSqlExecute.parse_plan_cost(line)
query_cost *= workload[select_sql_pos[i]].frequency
workload[select_sql_pos[i]].cost_list.append(query_cost)
# update positive_pos and negative_pos
if index_config and len(index_config) == 1 and query_cost < workload[select_sql_pos[i]].cost_list[0]:
index_config[0].positive_pos.append(select_sql_pos[i])
index_config[0].update_positive_pos(select_sql_pos[i])
total_cost += query_cost
found_plan = False
i += 1
@ -303,8 +304,9 @@ class GSqlExecute(ExecuteFactory):
elif re.match(r'\(\d+ rows?\)', line):
continue
elif '|' in line:
temptable, tempindex, indexdef, temppkey = [
item.strip() for item in line.split('|')]
temptable, tempindex = [item.strip() for item in line.split('|')[:2]]
indexdef = ('|'.join(line.split('|')[2:-1])).strip()
temppkey = line.split('|')[-1].strip()
if temptable and tempindex:
table, index, pkey = temptable, tempindex, temppkey
if line.strip().endswith(('+| p', '+|')):

View File

@ -19,11 +19,13 @@ pg_db:
sql: |-
SELECT d.datid,d.datname,numbackends,
xact_commit,xact_rollback,xact_rollback + xact_commit AS xact_total,
blks_read,blks_hit,blks_read + blks_hit AS blks_access,
blks_read,blks_hit,(blks_hit / (blks_read+blks_hit+0.001)) AS blks_access,
tup_returned,tup_fetched,tup_inserted,tup_updated,tup_deleted,tup_inserted + tup_updated + tup_deleted AS tup_modified,
conflicts,temp_files,temp_bytes,deadlocks,
blk_read_time,blk_write_time, extract(epoch from stats_reset) as stats_reset,
confl_tablespace,confl_lock,confl_snapshot,confl_bufferpin,confl_deadlock
confl_tablespace,confl_lock,confl_snapshot,confl_bufferpin,confl_deadlock,
tup_returned / (extract (epoch from (pg_catalog.now() - stats_reset))) AS read_tup_speed,
(tup_inserted + tup_updated + tup_deleted) / (extract (epoch from (pg_catalog.now() - stats_reset))) AS write_tup_speed
FROM pg_stat_database d,pg_stat_database_conflicts pdc
WHERE pdc.datname = d.datname and d.datname NOT IN ('postgres', 'template0', 'template1');
version: '>=0.0.0'
@ -53,6 +55,9 @@ pg_db:
- name: blks_hit
description: Number of times disk blocks were found already in the buffer cache, so that a read was not necessary (this only includes hits in the OpenGauss buffer cache, not the operating system's file system cache)
usage: COUNTER
- name: blks_access
description: hit rate of database
usage: GAUGE
- name: tup_returned
description: Number of rows returned by queries in this database
usage: COUNTER
@ -104,6 +109,12 @@ pg_db:
- name: confl_deadlock
description: Number of queries in this database that have been canceled due to deadlocks
usage: COUNTER
- name: read_tup_speed
description: read_tup_speed
usage: COUNTER
- name: write_tup_speed
description: write_tup_speed
usage: COUNTER
status: enable
ttl: -1
timeout: 1
@ -115,11 +126,11 @@ pg_meta:
query:
- name: pg_meta
sql: |-
SELECT (SELECT system_identifier FROM pg_control_system()) AS cluster_id,
current_setting('port') AS listen_port,
current_setting('wal_level') AS wal_level,
current_setting('server_version') AS version,
current_setting('server_version_num') AS ver_num,
SELECT (SELECT system_identifier FROM pg_catalog.pg_control_system()) AS cluster_id,
pg_catalog.current_setting('port') AS listen_port,
pg_catalog.current_setting('wal_level') AS wal_level,
pg_catalog.current_setting('server_version') AS version,
pg_catalog.current_setting('server_version_num') AS ver_num,
'N/A' AS primary_conninfo,
1 AS info;
version: '>=0.0.0'
@ -158,21 +169,24 @@ pg_connections:
desc: OpenGauss database connections
query:
- name: pg_connections
sql: select max_conn,used_conn,max_conn-used_conn res_for_normal from (select count(*) used_conn from pg_stat_activity) t1,(select setting::int max_conn from pg_settings where name='max_connections') t2;
version: '>=0.0.0'
sql: select t1.used_conn, t2.enqueue_sql, t3.idle_session from (select pg_catalog.count(*) used_conn from pg_stat_activity) t1,
(select pg_catalog.count(*) enqueue_sql from pg_stat_activity where enqueue is not NULL) t2,
(select pg_catalog.count(*) idle_session from pg_stat_activity where state='idle') t3;
version: '>=0.0.0'
timeout: 1
status: enable
dbRole: ""
metrics:
- name: max_conn
description: total of connections
usage: GAUGE
- name: used_conn
description: used of connections
usage: GAUGE
- name: res_for_normal
description: reserve of connections
- name: idle_session
description: idle session number
usage: GAUGE
- name: enqueue_sql
description: running sql
usage: GAUGE
status: enable
ttl: 60
timeout: 1
@ -183,7 +197,7 @@ pg_session_connection:
desc: OpenGauss backend activity group by state
query:
- name: pg_session_connection
sql: select client_addr,state,count(1) as count from pg_stat_activity group by client_addr,state order by 3 desc limit 20 ;
sql: select client_addr,state,pg_catalog.count(1) as count from pg_stat_activity group by client_addr,state order by 3 desc limit 20 ;
version: '>=0.0.0'
timeout: 1
status: enable
@ -204,7 +218,6 @@ pg_session_connection:
public: true
# ┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
# ┃ pg_stat_activity
# ┃ OpenGauss backend activity group by state
@ -238,15 +251,15 @@ pg_activity:
coalesce(max_conn_duration, 0) AS max_conn_duration
FROM (SELECT d.oid AS database, d.datname, a.state
FROM pg_database d,
unnest(ARRAY ['active','idle','idle in transaction','idle in transaction (aborted)','fastpath function call','disabled']) a(state)
pg_catalog.unnest(ARRAY ['active','idle','idle in transaction','idle in transaction (aborted)','fastpath function call','disabled']) a(state)
WHERE d.datname NOT IN ('template0','template1')) base
LEFT JOIN (
SELECT datname, state,
count(*) AS count,
max(extract(epoch from now() - state_change)) AS max_duration,
max(extract(epoch from now() - xact_start)) AS max_tx_duration,
max(extract(epoch from now() - backend_start)) AS max_conn_duration
FROM pg_stat_activity WHERE pid <> pg_backend_pid()
pg_catalog.count(*) AS count,
pg_catalog.max(extract(epoch from pg_catalog.now() - state_change)) AS max_duration,
pg_catalog.max(extract(epoch from pg_catalog.now() - xact_start)) AS max_tx_duration,
pg_catalog.max(extract(epoch from pg_catalog.now() - backend_start)) AS max_conn_duration
FROM pg_stat_activity WHERE pid <> pg_catalog.pg_backend_pid()
GROUP BY datname, state
) a USING (datname, state);
version: '>=1.0.0'
@ -297,8 +310,8 @@ pg_downstream:
- name: pg_downstream
sql: |
SELECT l.state, coalesce(count, 0 ) AS count
FROM unnest(ARRAY ['Streaming','Startup','Catchup', 'Backup', 'Stopping']) l(state)
LEFT JOIN (SELECT state, count(*) AS count FROM pg_stat_replication GROUP BY state)r ON l.state = r.state
FROM pg_catalog.unnest(ARRAY ['Streaming','Startup','Catchup', 'Backup', 'Stopping']) l(state)
LEFT JOIN (SELECT state, pg_catalog.count(*) AS count FROM pg_stat_replication GROUP BY state)r ON l.state = r.state
version: '>=0.0.0'
timeout: 0.5
ttl: 10
@ -371,13 +384,13 @@ pg_replication:
from
(
select pr.pid,client_addr,application_name,pr.state,pr.sync_state,
pg_xlog_location_diff (case when pg_is_in_recovery() then pg_last_xlog_receive_location() else pg_current_xlog_location() end, '0/0') as lsn,
pg_xlog_location_diff(pr.sender_sent_location,'0/0') as sent_location,
pg_xlog_location_diff(pr.receiver_write_location,'0/0') as write_location,
pg_xlog_location_diff(pr.receiver_flush_location,'0/0') as flush_location,
pg_xlog_location_diff(pr.receiver_replay_location,'0/0') as replay_location,
pg_xlog_location_diff(pr.receiver_replay_location, pg_current_xlog_location()) as replay_lag,
extract(EPOCH from now() - backend_start) as backend_uptime,pr.sync_priority
pg_xlog_location_diff (case when pg_catalog.pg_is_in_recovery() then pg_catalog.pg_last_xlog_receive_location() else pg_catalog.pg_current_xlog_location() end, '0/0') as lsn,
pg_catalog.pg_xlog_location_diff(pr.sender_sent_location,'0/0') as sent_location,
pg_catalog.pg_xlog_location_diff(pr.receiver_write_location,'0/0') as write_location,
pg_catalog.pg_xlog_location_diff(pr.receiver_flush_location,'0/0') as flush_location,
pg_catalog.pg_xlog_location_diff(pr.receiver_replay_location,'0/0') as replay_location,
pg_catalog.pg_xlog_location_diff(pr.receiver_replay_location, pg_catalog.pg_current_xlog_location()) as replay_lag,
extract(EPOCH from pg_catalog.now() - backend_start) as backend_uptime,pr.sync_priority
from
pg_stat_replication pr
);
@ -460,10 +473,10 @@ pg_replication:
# ┃ LABEL xmin replication xid
# ┃ LABEL catalog_xmin logical decode xid
# ┃ LABEL restart_lsn Xlog info
# ┃ GAUGE delay_lsn delay lsn from pg_current_xlog_location()
# ┃ GAUGE delay_lsn delay lsn from pg_catalog.pg_current_xlog_location()
# ┃ DISCARD dummy_standby Is real standby
# ┣┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈
# ┃ pg_replication_slots_delay_lsn{slot_name,plugin,slot_type,datoid,database,active,xmin,catalog_xmin,restart_lsn} GAUGE delay lsn from pg_current_xlog_location()
# ┃ pg_replication_slots_delay_lsn{slot_name,plugin,slot_type,datoid,database,active,xmin,catalog_xmin,restart_lsn} GAUGE delay lsn from pg_catalog.pg_current_xlog_location()
# ┗━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
pg_slot:
name: pg_replication_slots
@ -476,8 +489,8 @@ pg_slot:
(case active when 't' then 1 else 0 end)as active,
coalesce(xmin,'_') as xmin,
dummy_standby,
pg_xlog_location_diff(CASE WHEN pg_is_in_recovery() THEN restart_lsn
ELSE pg_current_xlog_location() END , restart_lsn) AS delay_lsn
pg_catalog.pg_xlog_location_diff(CASE WHEN pg_catalog.pg_is_in_recovery() THEN restart_lsn
ELSE pg_catalog.pg_current_xlog_location() END , restart_lsn) AS delay_lsn
from pg_replication_slots;
version: '>=1.0.0'
timeout: 1
@ -507,7 +520,7 @@ pg_slot:
description: replication xid
usage: LABEL
- name: delay_lsn
description: delay lsn from pg_current_xlog_location()
description: delay lsn from pg_catalog.pg_current_xlog_location()
usage: GAUGE
- name: dummy_standby
description: Is real standby
@ -526,14 +539,14 @@ pg_slot:
# ┣┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈
# ┃ LABEL datname Name of this database
# ┃ GAUGE size_bytes Disk space used by the database
# ┃ GAUGE age database age calculated by age(datfrozenxid64)
# ┃ GAUGE age database age calculated by pg_catalog.age(datfrozenxid64)
# ┃ GAUGE is_template 1 for template db and 0 for normal db
# ┃ GAUGE allow_conn 1 allow connection and 0 does not allow
# ┃ GAUGE conn_limit connection limit, -1 for no limit
# ┃ GAUGE frozen_xid tuple with xmin below this will always be visable (until wrap around)
# ┣┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈
# ┃ pg_database_size_bytes{datname} GAUGE Disk space used by the database
# ┃ pg_database_age{datname} GAUGE database age calculated by age(datfrozenxid64)
# ┃ pg_database_age{datname} GAUGE database age calculated by pg_catalog.age(datfrozenxid64)
# ┃ pg_database_is_template{datname} GAUGE 1 for template db and 0 for normal db
# ┃ pg_database_allow_conn{datname} GAUGE 1 allow connection and 0 does not allow
# ┃ pg_database_conn_limit{datname} GAUGE connection limit, -1 for no limit
@ -546,8 +559,8 @@ pg_database:
- name: pg_database
sql: |-
SELECT datname,
pg_database_size(pg_database.datname) as size_bytes,
age(datfrozenxid64) AS age,
pg_catalog.pg_database_size(pg_database.datname) as size_bytes,
pg_catalog.age(datfrozenxid64) AS age,
datistemplate AS is_template,
datallowconn AS allow_conn,
datconnlimit AS conn_limit,
@ -562,8 +575,8 @@ pg_database:
- name: pg_database
sql: |-
SELECT datname,
pg_database_size(pg_database.datname) as size_bytes,
age(datfrozenxid64) AS age,
pg_catalog.pg_database_size(pg_database.datname) as size_bytes,
pg_catalog.age(datfrozenxid64) AS age,
datistemplate AS is_template,
datallowconn AS allow_conn,
datconnlimit AS conn_limit,
@ -583,7 +596,7 @@ pg_database:
description: Disk space used by the database
usage: GAUGE
- name: age
description: database age calculated by age(datfrozenxid64)
description: database age calculated by pg_catalog.age(datfrozenxid64)
usage: GAUGE
- name: is_template
description: 1 for template db and 0 for normal db
@ -668,8 +681,8 @@ pg_checkpoint:
oldest_xid_dbid::text::BIGINT,
oldest_active_xid::text::BIGINT,
checkpoint_time AS time,
extract(epoch from now() - checkpoint_time) AS elapse
FROM pg_control_checkpoint();
extract(epoch from pg_catalog.now() - checkpoint_time) AS elapse
FROM pg_catalog.pg_control_checkpoint();
version: '>=0.0.0'
timeout: 1
ttl: 5
@ -757,7 +770,7 @@ pg_run_times:
desc: OpenGauss database run times
query:
- name: pg_run_times
sql: select 'cluster_runtime' as run_name,(case pg_is_in_recovery() when 'f' then 1 else 0 end) as db_role,extract(epoch from(now() - pg_postmaster_start_time())) as run_time;
sql: select 'cluster_runtime' as run_name,(case pg_catalog.pg_is_in_recovery() when 'f' then 1 else 0 end) as db_role,extract(epoch from(pg_catalog.now() - pg_catalog.pg_postmaster_start_time())) as run_time;
version: '>=0.0.0'
timeout: 1
ttl: 60
@ -823,22 +836,24 @@ pg:
desc: 'primary database '
sql: |
SELECT extract(EPOCH FROM CURRENT_TIMESTAMP) AS timestamp,
extract(EPOCH FROM now() - pg_postmaster_start_time()) AS uptime,
extract(EPOCH FROM pg_postmaster_start_time()) AS boot_time,
pg_xlog_location_diff(pg_current_xlog_location() , '0/0') AS lsn,
pg_xlog_location_diff(pg_current_xlog_insert_location(),'0/0') AS insert_lsn,
pg_xlog_location_diff(pg_current_xlog_location() , '0/0') AS write_lsn,
pg_xlog_location_diff(pg_current_xlog_location() , '0/0') AS flush_lsn,
extract(EPOCH FROM pg_catalog.now() - pg_catalog.pg_postmaster_start_time()) AS uptime,
extract(EPOCH FROM pg_catalog.pg_postmaster_start_time()) AS boot_time,
pg_catalog.pg_xlog_location_diff(pg_catalog.pg_current_xlog_location() , '0/0') AS lsn,
pg_catalog.pg_xlog_location_diff(pg_catalog.pg_current_xlog_insert_location(),'0/0') AS insert_lsn,
pg_catalog.pg_xlog_location_diff(pg_catalog.pg_current_xlog_location() , '0/0') AS write_lsn,
pg_catalog.pg_xlog_location_diff(pg_catalog.pg_current_xlog_location() , '0/0') AS flush_lsn,
NULL::BIGINT AS receive_lsn,
NULL::BIGINT AS replay_lsn,
extract(EPOCH FROM now() - pg_conf_load_time()) AS conf_reload_time,
extract(EPOCH FROM pg_catalog.now() - pg_catalog.pg_conf_load_time()) AS conf_reload_time,
NULL::FLOAT AS last_replay_time,
0::FLOAT AS lag,
pg_is_in_recovery() AS is_in_recovery,
FALSE AS is_wal_replay_paused
pg_catalog.pg_is_in_recovery() AS is_in_recovery,
FALSE AS is_wal_replay_paused,
extract(epoch from pg_catalog.avg(pg_catalog.now()-backend_start)) AS avg_time
from pg_stat_activity where client_port is not null
;
version: '>=0.0.0'
timeout: 0.1
timeout: 1
ttl: 10
status: enable
dbRole: primary
@ -846,22 +861,24 @@ pg:
desc: 'standby database '
sql: |
SELECT extract(EPOCH FROM CURRENT_TIMESTAMP) AS timestamp,
extract(EPOCH FROM now() - pg_postmaster_start_time()) AS uptime,
extract(EPOCH FROM pg_postmaster_start_time()) AS boot_time,
pg_xlog_location_diff(pg_last_xlog_receive_location() , '0/0') AS lsn,
extract(EPOCH FROM pg_catalog.now() - pg_catalog.pg_postmaster_start_time()) AS uptime,
extract(EPOCH FROM pg_catalog.pg_postmaster_start_time()) AS boot_time,
pg_catalog.pg_xlog_location_diff(pg_catalog.pg_last_xlog_receive_location() , '0/0') AS lsn,
NULL::BIGINT AS insert_lsn,
NULL::BIGINT AS write_lsn,
NULL::BIGINT AS flush_lsn,
pg_xlog_location_diff(pg_last_xlog_receive_location() , '0/0') AS receive_lsn,
pg_xlog_location_diff(pg_last_xlog_receive_location() , '0/0') AS replay_lsn,
extract(EPOCH FROM now() - pg_conf_load_time()) AS conf_reload_time,
extract(EPOCH FROM pg_last_xact_replay_timestamp()) AS last_replay_time,
pg_is_in_recovery() AS is_in_recovery
pg_catalog.pg_xlog_location_diff(pg_catalog.pg_last_xlog_receive_location() , '0/0') AS receive_lsn,
pg_catalog.pg_xlog_location_diff(pg_catalog.pg_last_xlog_receive_location() , '0/0') AS replay_lsn,
extract(EPOCH FROM pg_catalog.now() - pg_catalog.pg_conf_load_time()) AS conf_reload_time,
extract(EPOCH FROM pg_catalog.pg_last_xact_replay_timestamp()) AS last_replay_time,
pg_catalog.pg_is_in_recovery() AS is_in_recovery,
extract(epoch from pg_catalog.avg(now()-backend_start)) AS avg_time
from pg_stat_activity where client_port is not null
;
version: '>=0.0.0'
timeout: 0.1
timeout: 1
ttl: 10
status: enable
status: disable
dbRole: standby
metrics:
- name: timestamp
@ -906,6 +923,9 @@ pg:
- name: is_wal_replay_paused
description: 1 if wal play is paused
usage: GAUGE
- name: avg_time
description: avg_time
usage: GAUGE
status: enable
ttl: 60
timeout: 0.1
@ -917,13 +937,13 @@ pg_setting:
query:
- name: pg_setting
sql: |-
SELECT current_setting('max_connections') AS max_connections,
current_setting('max_prepared_transactions') AS max_prepared_transactions,
current_setting('max_replication_slots') AS max_replication_slots,
current_setting('max_wal_senders') AS max_wal_senders,
current_setting('max_locks_per_transaction') AS max_locks_per_transaction,
current_setting('block_size') AS block_size,
CASE current_setting('wal_log_hints') WHEN 'on' THEN 1 ELSE 0 END AS wal_log_hints;
SELECT pg_catalog.current_setting('max_connections') AS max_connections,
pg_catalog.current_setting('max_prepared_transactions') AS max_prepared_transactions,
pg_catalog.current_setting('max_replication_slots') AS max_replication_slots,
pg_catalog.current_setting('max_wal_senders') AS max_wal_senders,
pg_catalog.current_setting('max_locks_per_transaction') AS max_locks_per_transaction,
pg_catalog.current_setting('block_size') AS block_size,
CASE pg_catalog.current_setting('wal_log_hints') WHEN 'on' THEN 1 ELSE 0 END AS wal_log_hints;
version: '>=0.0.0'
timeout: 1
ttl: 60
@ -963,7 +983,7 @@ pg_class:
- name: pg_class
sql: |-
SELECT CURRENT_CATALOG AS datname,(select nspname from pg_namespace where oid=relnamespace) as nspname,relname,relkind,relpages,reltuples,
CASE WHEN relkind = 'i' THEN NULL ELSE age(relfrozenxid64) END AS relage,pg_relation_size(oid) AS relsize
CASE WHEN relkind = 'i' THEN NULL ELSE pg_catalog.age(relfrozenxid64) END AS relage,pg_catalog.pg_relation_size(oid) AS relsize
FROM pg_class
WHERE relkind = 'r' and relname not like 'pg_%' and relname not like 'gs_%' and nspname not in ('information_schema', 'pg_catalog')
ORDER BY relpages DESC LIMIT 32;
@ -975,7 +995,7 @@ pg_class:
- name: pg_class
sql: |-
SELECT CURRENT_CATALOG AS datname,(select nspname from pg_namespace where oid=relnamespace) as nspname,relname,relkind,relpages,reltuples,
CASE WHEN relkind = 'i' THEN NULL ELSE age(relfrozenxid64) END AS relage,pg_relation_size(oid) AS relsize
CASE WHEN relkind = 'i' THEN NULL ELSE pg_catalog.age(relfrozenxid64) END AS relage,pg_catalog.pg_relation_size(oid) AS relsize
FROM pg_class
WHERE relkind = 'r' and relname not like 'pg_%' and relname not like 'gs_%' and nspname not in ('information_schema', 'pg_catalog')
ORDER BY relpages DESC LIMIT 32;
@ -1036,9 +1056,9 @@ pg_lock:
SELECT datname, mode, coalesce(count, 0) AS count
FROM (
SELECT d.oid AS database, d.datname, l.mode
FROM pg_database d,unnest(ARRAY ['AccessShareLock','RowShareLock','RowExclusiveLock','ShareUpdateExclusiveLock','ShareLock','ShareRowExclusiveLock','ExclusiveLock','AccessExclusiveLock']) l(mode)
FROM pg_database d,pg_catalog.unnest(ARRAY ['AccessShareLock','RowShareLock','RowExclusiveLock','ShareUpdateExclusiveLock','ShareLock','ShareRowExclusiveLock','ExclusiveLock','AccessExclusiveLock']) l(mode)
WHERE d.datname NOT IN ('template0','template1')) base
LEFT JOIN (SELECT database, mode, count(1) AS count FROM pg_locks WHERE database IS NOT NULL GROUP BY database, mode) cnt
LEFT JOIN (SELECT database, mode, pg_catalog.count(1) AS count FROM pg_locks WHERE database IS NOT NULL GROUP BY database, mode) cnt
USING (database, mode);
version: '>=0.0.0'
timeout: 1
@ -1070,7 +1090,7 @@ pg_lock:
# with tl as (select usename,granted,locktag,query_start,query
# from pg_locks l,pg_stat_activity a
# where l.pid=a.pid and locktag in(select locktag from pg_locks where granted='f'))
# select ts.usename locker_user,ts.query_start locker_query_start,ts.granted locker_granted,ts.query locker_query,tt.query locked_query,tt.query_start locked_query_start,tt.granted locked_granted,tt.usename locked_user,extract(epoch from now() - tt.query_start) as locked_times
# select ts.usename locker_user,ts.query_start locker_query_start,ts.granted locker_granted,ts.query locker_query,tt.query locked_query,tt.query_start locked_query_start,tt.granted locked_granted,tt.usename locked_user,extract(epoch from pg_catalog.now() - tt.query_start) as locked_times
# from (select * from tl where granted='t') as ts,(select * from tl where granted='f') tt
# where ts.locktag=tt.locktag order by 1;
# version: '>=0.0.0'
@ -1120,7 +1140,7 @@ pg_locker:
with tl as (select usename,granted,locktag,query_start,query
from pg_locks l,pg_stat_activity a
where l.pid=a.pid and locktag in(select locktag from pg_locks where granted='f'))
select usename,query_start,granted,query,count(query) count
select usename,query_start,granted,query,pg_catalog.count(query) count
from tl where granted='t' group by usename,query_start,granted,query order by 5 desc;
version: '>=0.0.0'
timeout: 1
@ -1153,7 +1173,7 @@ pg_active_slowsql:
desc: OpenGauss active slow query
query:
- name: pg_active_slowsql
sql: select datname,usename,client_addr,pid,query_start::text,extract(epoch from (now() - query_start)) as query_runtime,xact_start::text,extract(epoch from(now() - xact_start)) as xact_runtime,state,query from pg_stat_activity where state not in('idle') and query_start is not null;
sql: select datname,usename,client_addr,pid,query_start::text,extract(epoch from (pg_catalog.now() - query_start)) as query_runtime,xact_start::text,extract(epoch from(pg_catalog.now() - xact_start)) as xact_runtime,state,query from pg_stat_activity where state not in('idle') and query_start is not null;
version: '>=0.0.0'
timeout: 1
ttl: 60
@ -1241,8 +1261,8 @@ pg_table:
n_tup_ins,n_tup_upd,n_tup_del,(n_tup_ins + n_tup_upd + n_tup_del) AS n_tup_mod,
n_tup_hot_upd,n_live_tup,n_dead_tup,
nvl(last_vacuum::text,'1970-01-01') as last_vacuum,nvl(last_autovacuum::text,'1970-01-01') as last_autovacuum,nvl(last_analyze::text,'1970-01-01') as last_analyze,nvl(last_autoanalyze::text,'1970-01-01') as last_autoanalyze,vacuum_count,autovacuum_count,analyze_count,autoanalyze_count,
extract(epoch from now() -(case when nvl(last_vacuum,'1970-01-01')>nvl(last_autovacuum,'1970-01-01') then nvl(last_vacuum,'1970-01-01') else nvl(last_autovacuum,'1970-01-01') end))::int vacuum_delay,
extract(epoch from now() -(case when nvl(last_analyze,'1970-01-01')>nvl(last_autoanalyze,'1970-01-01') then nvl(last_analyze,'1970-01-01') else nvl(last_autoanalyze,'1970-01-01') end))::int analyze_delay,
extract(epoch from pg_catalog.now() -(case when nvl(last_vacuum,'1970-01-01')>nvl(last_autovacuum,'1970-01-01') then nvl(last_vacuum,'1970-01-01') else nvl(last_autovacuum,'1970-01-01') end))::int vacuum_delay,
extract(epoch from pg_catalog.now() -(case when nvl(last_analyze,'1970-01-01')>nvl(last_autoanalyze,'1970-01-01') then nvl(last_analyze,'1970-01-01') else nvl(last_autoanalyze,'1970-01-01') end))::int analyze_delay,
heap_blks_read,heap_blks_hit,idx_blks_read,idx_blks_hit,
toast_blks_read,toast_blks_hit,tidx_blks_read,tidx_blks_hit
FROM pg_stat_user_tables psut,pg_statio_user_tables psio
@ -1259,8 +1279,8 @@ pg_table:
n_tup_ins,n_tup_upd,n_tup_del,(n_tup_ins + n_tup_upd + n_tup_del) AS n_tup_mod,
n_tup_hot_upd,n_live_tup,n_dead_tup,
nvl(last_vacuum::text,'1970-01-01') as last_vacuum,nvl(last_autovacuum::text,'1970-01-01') as last_autovacuum,nvl(last_analyze::text,'1970-01-01') as last_analyze,nvl(last_autoanalyze::text,'1970-01-01') as last_autoanalyze,vacuum_count,autovacuum_count,analyze_count,autoanalyze_count,
extract(epoch from now() -(case when nvl(last_vacuum,'1970-01-01')>nvl(last_autovacuum,'1970-01-01') then nvl(last_vacuum,'1970-01-01') else nvl(last_autovacuum,'1970-01-01') end))::int vacuum_delay,
extract(epoch from now() -(case when nvl(last_analyze,'1970-01-01')>nvl(last_autoanalyze,'1970-01-01') then nvl(last_analyze,'1970-01-01') else nvl(last_autoanalyze,'1970-01-01') end))::int analyze_delay,
extract(epoch from pg_catalog.now() -(case when nvl(last_vacuum,'1970-01-01')>nvl(last_autovacuum,'1970-01-01') then nvl(last_vacuum,'1970-01-01') else nvl(last_autovacuum,'1970-01-01') end))::int vacuum_delay,
extract(epoch from pg_catalog.now() -(case when nvl(last_analyze,'1970-01-01')>nvl(last_autoanalyze,'1970-01-01') then nvl(last_analyze,'1970-01-01') else nvl(last_autoanalyze,'1970-01-01') end))::int analyze_delay,
heap_blks_read,heap_blks_hit,idx_blks_read,idx_blks_hit,
toast_blks_read,toast_blks_hit,tidx_blks_read,tidx_blks_hit
FROM pg_stat_user_tables psut,pg_statio_user_tables psio
@ -1386,7 +1406,7 @@ pg_index:
query:
- name: pg_index
sql: |-
SELECT CURRENT_CATALOG AS datname,psui.schemaname AS nspname,psui.relname AS tablename,psui.indexrelname AS relname,
SELECT CURRENT_CATALOG AS datname,psui.schemaname AS nspname,psui.relname AS tablename,psui.indexrelname AS relname, pg_get_indexdef(psui.indexrelid) AS indexdef,
idx_scan, idx_tup_read,idx_tup_fetch,idx_blks_read,idx_blks_hit
FROM pg_stat_user_indexes psui,pg_statio_user_indexes psio
WHERE psio.indexrelid = psui.indexrelid and psui.schemaname not in ('pg_catalog', 'information_schema','snapshot')
@ -1398,7 +1418,7 @@ pg_index:
status: enable
- name: pg_index
sql: |-
SELECT CURRENT_CATALOG AS datname,psui.schemaname AS nspname,psui.relname AS tablename,psui.indexrelname AS relname,
SELECT CURRENT_CATALOG AS datname,psui.schemaname AS nspname,psui.relname AS tablename,psui.indexrelname AS relname, pg_get_indexdef(psui.indexrelid) AS indexdef,
idx_scan,idx_tup_read,idx_tup_fetch,idx_blks_read,idx_blks_hit
FROM pg_stat_user_indexes psui,pg_statio_user_indexes psio
WHERE psio.indexrelid = psui.indexrelid and psui.schemaname not in ('pg_catalog', 'information_schema','snapshot')
@ -1421,6 +1441,9 @@ pg_index:
- name: relname
description: index name of this relation
usage: LABEL
- name: indexdef
description: index definition of this relation
usage: LABEL
- name: idx_scan
description: index scans initiated on this index
usage: GAUGE
@ -1447,10 +1470,10 @@ pg_tables_size:
- name: pg_tables_size
sql: |-
SELECT CURRENT_CATALOG AS datname,nsp.nspname,rel.relname,
pg_total_relation_size(rel.oid) AS bytes,
pg_relation_size(rel.oid) AS relsize,
pg_indexes_size(rel.oid) AS indexsize,
pg_total_relation_size(reltoastrelid) AS toastsize
pg_catalog.pg_total_relation_size(rel.oid) AS bytes,
pg_catalog.pg_relation_size(rel.oid) AS relsize,
pg_catalog.pg_indexes_size(rel.oid) AS indexsize,
pg_catalog.pg_total_relation_size(reltoastrelid) AS toastsize
FROM pg_namespace nsp JOIN pg_class rel ON nsp.oid = rel.relnamespace
WHERE nspname NOT IN ('pg_catalog', 'information_schema','snapshot') AND rel.relkind = 'r'
order by 4 desc limit 100;
@ -1462,10 +1485,10 @@ pg_tables_size:
- name: pg_tables_size
sql: |-
SELECT CURRENT_CATALOG AS datname,nsp.nspname,rel.relname,
pg_total_relation_size(rel.oid) AS bytes,
pg_relation_size(rel.oid) AS relsize,
pg_indexes_size(rel.oid) AS indexsize,
pg_total_relation_size(reltoastrelid) AS toastsize
pg_catalog.pg_total_relation_size(rel.oid) AS bytes,
pg_catalog.pg_relation_size(rel.oid) AS relsize,
pg_catalog.pg_indexes_size(rel.oid) AS indexsize,
pg_catalog.pg_total_relation_size(reltoastrelid) AS toastsize
FROM pg_namespace nsp JOIN pg_class rel ON nsp.oid = rel.relnamespace
WHERE nspname NOT IN ('pg_catalog', 'information_schema','snapshot') AND rel.relkind = 'r'
order by 4 desc limit 100;
@ -1506,7 +1529,7 @@ pg_indexes_size:
query:
- name: pg_indexes_size
sql: |-
select schemaname schema_name,relname table_name,indexrelname index_name,pg_table_size(indexrelid) as index_size
select schemaname schema_name,relname table_name,indexrelname index_name,pg_catalog.pg_table_size(indexrelid) as index_size
from pg_stat_user_indexes
where schemaname not in('pg_catalog', 'information_schema','snapshot')
order by 4 desc limit 100;
@ -1517,7 +1540,7 @@ pg_indexes_size:
status: disable
- name: pg_indexes_size
sql: |-
select schemaname schema_name,relname table_name,indexrelname index_name,pg_table_size(indexrelid) as index_size
select schemaname schema_name,relname table_name,indexrelname index_name,pg_catalog.pg_table_size(indexrelid) as index_size
from pg_stat_user_indexes
where schemaname not in('pg_catalog', 'information_schema','snapshot')
order by 4 desc limit 100;
@ -1549,9 +1572,9 @@ pg_need_indexes:
query:
- name: pg_need_indexes
sql: |-
select schemaname||'.'||relname as tablename, pg_size_pretty(pg_table_size(relid)) as table_size, seq_scan, seq_tup_read, coalesce(idx_scan,0) idx_scan, coalesce(idx_tup_fetch,0) idx_tup_fetch,coalesce((idx_scan/(case when (seq_scan+idx_scan) >0 then (seq_scan+idx_scan) else 1 end) * 100),0) as rate
select schemaname||'.'||relname as tablename, pg_catalog.pg_size_pretty(pg_catalog.pg_table_size(relid)) as table_size, seq_scan, seq_tup_read, coalesce(idx_scan,0) idx_scan, coalesce(idx_tup_fetch,0) idx_tup_fetch,coalesce((idx_scan/(case when (seq_scan+idx_scan) >0 then (seq_scan+idx_scan) else 1 end) * 100),0) as rate
from pg_stat_user_tables
where schemaname not in('pg_catalog', 'information_schema','snapshot') and pg_table_size(relid) > 1024*1024*1024 and coalesce((idx_scan/(case when (seq_scan+idx_scan) >0 then (seq_scan+idx_scan) else 1 end) * 100),0) < 90
where schemaname not in('pg_catalog', 'information_schema','snapshot') and pg_catalog.pg_table_size(relid) > 1024*1024*1024 and coalesce((idx_scan/(case when (seq_scan+idx_scan) >0 then (seq_scan+idx_scan) else 1 end) * 100),0) < 90
order by seq_scan desc limit 10;
version: '>=0.0.0'
timeout: 10
@ -1560,9 +1583,9 @@ pg_need_indexes:
status: enable
- name: pg_need_indexes
sql: |-
select schemaname||'.'||relname as tablename, pg_size_pretty(pg_table_size(relid)) as table_size, seq_scan, seq_tup_read, coalesce(idx_scan,0) idx_scan, coalesce(idx_tup_fetch,0) idx_tup_fetch,coalesce((idx_scan/(case when (seq_scan+idx_scan) >0 then (seq_scan+idx_scan) else 1 end) * 100),0) as rate
select schemaname||'.'||relname as tablename, pg_catalog.pg_size_pretty(pg_catalog.pg_table_size(relid)) as table_size, seq_scan, seq_tup_read, coalesce(idx_scan,0) idx_scan, coalesce(idx_tup_fetch,0) idx_tup_fetch,coalesce((idx_scan/(case when (seq_scan+idx_scan) >0 then (seq_scan+idx_scan) else 1 end) * 100),0) as rate
from pg_stat_user_tables
where schemaname not in('pg_catalog', 'information_schema','snapshot') and pg_table_size(relid) > 1024*1024*1024 and coalesce((idx_scan/(case when (seq_scan+idx_scan) >0 then (seq_scan+idx_scan) else 1 end) * 100),0) < 90
where schemaname not in('pg_catalog', 'information_schema','snapshot') and pg_catalog.pg_table_size(relid) > 1024*1024*1024 and coalesce((idx_scan/(case when (seq_scan+idx_scan) >0 then (seq_scan+idx_scan) else 1 end) * 100),0) < 90
order by seq_scan desc limit 10;
version: '>=0.0.0'
timeout: 10
@ -1601,7 +1624,7 @@ pg_never_used_indexes:
query:
- name: pg_never_used_indexes
sql: |-
select CURRENT_CATALOG as datname, pi.schemaname, pi.relname, pi.indexrelname, pg_table_size(pi.indexrelid) as index_size
select CURRENT_CATALOG as datname, pi.schemaname, pi.relname, pi.indexrelname, pg_catalog.pg_table_size(pi.indexrelid) as index_size
from pg_indexes pis
join pg_stat_user_indexes pi
on pis.schemaname = pi.schemaname and pis.tablename = pi.relname and pis.indexname = pi.indexrelname
@ -1610,8 +1633,8 @@ pg_never_used_indexes:
where pco.contype is distinct from 'p' and pco.contype is distinct from 'u'
and (idx_scan,idx_tup_read,idx_tup_fetch) = (0,0,0)
and pis.indexdef !~ ' UNIQUE INDEX '
and pis.schemaname not in('pg_catalog', 'information_schema','snapshot')
order by pg_table_size(indexrelid) desc;
and pis.schemaname not in('pg_catalog', 'information_schema','snapshot', 'dbe_pldeveloper')
order by pg_catalog.pg_table_size(indexrelid) desc;
version: '>=0.0.0'
timeout: 10
ttl: 3600
@ -1619,7 +1642,7 @@ pg_never_used_indexes:
status: enable
- name: pg_never_used_indexes
sql: |-
select CURRENT_CATALOG as datname, pi.schemaname, pi.relname, pi.indexrelname, pg_table_size(pi.indexrelid) as index_size
select CURRENT_CATALOG as datname, pi.schemaname, pi.relname, pi.indexrelname, pg_catalog.pg_table_size(pi.indexrelid) as index_size
from pg_indexes pis
join pg_stat_user_indexes pi
on pis.schemaname = pi.schemaname and pis.tablename = pi.relname and pis.indexname = pi.indexrelname
@ -1628,8 +1651,8 @@ pg_never_used_indexes:
where pco.contype is distinct from 'p' and pco.contype is distinct from 'u'
and (idx_scan,idx_tup_read,idx_tup_fetch) = (0,0,0)
and pis.indexdef !~ ' UNIQUE INDEX '
and pis.schemaname not in('pg_catalog', 'information_schema','snapshot')
order by pg_table_size(indexrelid) desc;
and pis.schemaname not in('pg_catalog', 'information_schema','snapshot', 'dbe_pldeveloper')
order by pg_catalog.pg_table_size(indexrelid) desc;
version: '>=0.0.0'
timeout: 10
ttl: 3600
@ -1661,15 +1684,11 @@ pg_tables_expansion_rate:
query:
- name: pg_tables_expansion_rate
sql: |-
select CURRENT_CATALOG as datname, schemaname,relname,n_live_tup,n_dead_tup,round((n_dead_tup/(n_dead_tup+n_live_tup) *100),2) as dead_rate,
extract(epoch from coalesce(last_vacuum,'1970-01-01')::text) as last_vacuum,
extract(epoch from coalesce(last_autovacuum,'1970-01-01')::text) as last_autovacuum ,
extract(epoch from coalesce(last_analyze,'1970-01-01')::text) as last_analyze,
extract(epoch from coalesce(last_autoanalyze,'1970-01-01')::text) as last_autoanalyze,
vacuum_count,autovacuum_count,analyze_count,autoanalyze_count
from pg_stat_user_tables
where n_live_tup > 0
order by 5 asc;
select t1.*, t2.column_number from (select CURRENT_CATALOG as datname, schemaname,relname,n_live_tup,n_dead_tup,round(n_dead_tup/(n_live_tup+1),2) as dead_rate,
last_vacuum, last_autovacuum, last_analyze, last_autoanalyze, vacuum_count,autovacuum_count,analyze_count,autoanalyze_count
from pg_stat_user_tables where n_live_tup > 0 order by 5 asc) t1 join
(select table_schema, table_name, count(column_name) as column_number from information_schema.columns group by table_schema, table_name)
t2 on t1.schemaname=t2.table_schema and t1.relname=t2.table_name;
version: '>=0.0.0'
timeout: 1
status: enable
@ -1689,6 +1708,9 @@ pg_tables_expansion_rate:
- name: n_dead_tup
description: dead tup of table
usage: LABEL
- name: column_number
description: column number of table
usage: label
- name: dead_rate
description: Dead rate of table
usage: GAUGE
@ -1742,7 +1764,7 @@ pg_lock_sql:
(locked_act.xact_start)::text as locked_xact_start,
(locker_act.query_start)::text as locker_query_start,
(locked_act.query_start)::text as locked_query_start,
extract(epoch from now() - locked_act.query_start) as locked_times,
extract(epoch from pg_catalog.now() - locked_act.query_start) as locked_times,
locker_act.query as locker_query,
locked_act.query as locked_query
from pg_locks locked,
@ -1828,7 +1850,7 @@ og_memory_info:
desc: OpenGauss memory usage informations
query:
- name: og_memory_info
sql: select memorytype,memorymbytes from pv_total_memory_detail();
sql: select memorytype,memorymbytes from pg_catalog.pv_total_memory_detail();
version: '>=0.0.0'
timeout: 1
ttl: 60
@ -1895,14 +1917,14 @@ og_context_memory:
name: og_context_memory
desc: OpenGauss context use memory information
query:
- name: og_session_memory
- name: og_context_memory
sql: |-
select contextname,
sum(usedsize)::bigint as usedsize,
sum(totalsize)::bigint as totalsize
pg_catalog.sum(usedsize)::bigint as usedsize,
pg_catalog.sum(totalsize)::bigint as totalsize
from gs_session_memory_detail
group by contextname
order by sum(totalsize) desc limit 10;
order by pg_catalog.sum(totalsize) desc limit 10;
version: '>=0.0.0'
timeout: 30
ttl: 600
@ -1956,7 +1978,7 @@ og_cpu_load:
desc: OpenGauss cpu load
query:
- name: og_cpu_load
sql: select 'og_total_cpu' og_total_cpu,total_cpu() total_cpu;
sql: select 'og_total_cpu' og_total_cpu,pg_catalog.total_cpu() total_cpu;
version: '>=0.0.0'
timeout: 1
ttl: 10
@ -1974,3 +1996,334 @@ og_cpu_load:
timeout: 1
public: true
pg_thread_pool:
name: pg_thread_pool
desc: OpenGauss thread pool
query:
- name: pf_thread_pool
sql: select group_id, listener, worker_info, session_info from dbe_perf.GLOBAL_THREADPOOL_STATUS;
version: '>=0.0.0'
timeout: 1
ttl: 10
status: enable
dbRole: ""
metrics:
- name: group_id
description: group id
usage: LABEL
- name: listener
description: listener
usage: GAUGE
- name: worker_info
description: worker info
usage: LABEL
- name: session_info
description: session info
usage: LABEL
status: enable
ttl: 10
timeout: 1
public: true
pg_recovery_status:
name: pg_recovery_status
desc: pg recovery status
query:
- name: pg_recovery_status
sql: SELECT standby_node_name, current_sleep_time, current_rto FROM dbe_perf.global_recovery_status;
version: '>=0.0.0'
timeout: 1
ttl: 10
status: enable
dbRole: ""
metrics:
- name: standby_node_name
description: node name
usage: LABEL
- name: current_sleep_time
description: current sleep time
usage: LABEL
- name: current_rto
description: current rto
usage: GAUGE
status: enable
ttl: 10
timeout: 1
public: true
pg_stat_get_wal_senders:
name: pg_stat_get_wal_senders
desc: pg stat get wal senders
query:
- name: pg_stat_get_wal_senders
sql: SELECT pid, sender_flush_location, receiver_replay_location,pg_catalog.pg_xlog_location_diff(sender_flush_location,receiver_replay_location) as xlog_location_diff FROM pg_catalog.pg_stat_get_wal_senders();
version: '>=0.0.0'
timeout: 1
ttl: 10
status: enable
dbRole: ""
metrics:
- name: pid
description: pid
usage: GAUGE
- name: sender_flush_location
description: sender flush location
usage: LABEL
- name: receiver_replay_location
description: receiver replay location
usage: LABEL
- name: xlog_location_diff
description: xlog_location_diff
usage: LABEL
status: enable
ttl: 10
timeout: 1
public: true
statement_responsetime_percentile:
name: statement_responsetime_percentile
desc: statement responsetime percentile
query:
- name: statement_responsetime_percentile
sql: SELECT p80, p95 FROM dbe_perf.statement_responsetime_percentile;
version: '>=0.0.0'
timeout: 1
ttl: 10
status: enable
dbRole: ""
metrics:
- name: p80
description: 80percent SQL rt
usage: GAUGE
- name: p95
description: 95percent SQL rt
usage: GAUGE
status: enable
ttl: 10
timeout: 1
public: true
pg_node_info:
name: pg_node_info
desc: the information of current node
query:
- name: pg_node_info
sql: SELECT CURRENT_CATALOG AS datname, CASE WHEN pg_catalog.pg_is_in_recovery() THEN 'Y' ELSE 'N' END AS is_slave, node_name, installpath, datapath, EXTRACT(EPOCH FROM pg_catalog.now() - pg_catalog.pg_postmaster_start_time()) AS uptime, pg_catalog.version() FROM pg_catalog.pg_stat_get_env();
version: '>=0.0.0'
timeout: 1
ttl: 100
status: enable
dbRole: ""
metrics:
- name: is_slave
description: is slave node?
usage: LABEL
- name: node_name
description: node name
usage: LABEL
- name: installpath
description: install path
usage: LABEL
- name: datapath
description: data path
usage: LABEL
- name: uptime
description: uptime
usage: GAUGE
- name: version
description: database version
usage: LABEL
- name: datname
description: current connecting database
usage: LABEL
status: enable
ttl: 100
timeout: 1
public: true
pg_stat_bgwriter:
name: pg_stat_bgwriter
desc: the information of current node
query:
- name: pg_stat_bgwriter
sql: |-
select checkpoint_sync_time / (checkpoints_timed + checkpoints_req) AS checkpoint_avg_sync_time,
checkpoints_req / (checkpoints_timed + checkpoints_req) AS checkpoint_proactive_triggering_ratio,
buffers_checkpoint, buffers_clean, buffers_backend, buffers_alloc from pg_stat_bgwriter;
version: '>=0.0.0'
timeout: 1
ttl: 100
status: enable
dbRole: ""
metrics:
- name: checkpoint_avg_sync_time
description: checkpoint_avg_sync_time
usage: GAUGE
- name: checkpoint_proactive_triggering_ratio
description: checkpoint_proactive_triggering_ratio
usage: GAUGE
- name: buffers_checkpoint
description: number of checkpoint write buffers
usage: LABEL
- name: buffers_clean
description: number of write buffers of the backend writer process
usage: LABEL
- name: buffers_backend
description: number of direct write buffers through the backend
usage: LABEL
- name: buffers_alloc
description: number of buffers allocated
usage: LABEL
pg_statio_all_tables:
name: pg_statio_all_tables
desc: the information of current node
query:
- name: pg_statio_all_tables
sql: |-
select pg_catalog.sum(heap_blks_hit)*100/(pg_catalog.sum(heap_blks_read)+pg_catalog.sum(heap_blks_hit)+1) AS shared_buffer_heap_hit_rate,
pg_catalog.sum(toast_blks_hit)*100/(pg_catalog.sum(toast_blks_read)+pg_catalog.sum(toast_blks_hit)+1) AS shared_buffer_toast_hit_rate,
pg_catalog.sum(tidx_blks_hit)*100/(pg_catalog.sum(tidx_blks_read)+pg_catalog.sum(tidx_blks_hit)+1) AS shared_buffer_tidx_hit_rate,
pg_catalog.sum(idx_blks_hit)*100/(pg_catalog.sum(idx_blks_read)+pg_catalog.sum(idx_blks_hit)+1) AS shared_buffer_idx_hit_rate
from pg_statio_all_tables ;
version: '>=0.0.0'
timeout: 1
ttl: 100
status: enable
dbRole: ""
metrics:
- name: shared_buffer_heap_hit_rate
description: shared_buffer_heap_hit_rate
usage: GAUGE
- name: shared_buffer_toast_hit_rate
description: shared_buffer_toast_hit_rate
usage: GAUGE
- name: shared_buffer_tidx_hit_rate
description: shared_buffer_tidx_hit_rate
usage: GAUGE
- name: shared_buffer_idx_hit_rate
description: shared_buffer_idx_hit_rate
usage: GAUGE
status: enable
ttl: 100
timeout: 1
public: true
pg_prepared_xacts:
name: pg_prepared_xacts
desc: the information of current node
query:
- name: pg_prepared_xacts
sql: select pg_catalog.count(1) AS count from pg_prepared_xacts;
version: '>=0.0.0'
timeout: 1
ttl: 100
status: enable
dbRole: ""
metrics:
- name: count
description: current_prepared_xacts_count
usage: GAUGE
status: enable
ttl: 100
timeout: 1
public: true
pg_stat_database:
name: pg_stat_database
desc: the information of current node
query:
- name: pg_stat_database
sql: select pg_catalog.max(temp_bytes / temp_files) / 1024 AS temp_file_size from pg_stat_database where temp_files > 0;
version: '>=0.0.0'
timeout: 1
ttl: 100
status: enable
dbRole: ""
metrics:
- name: temp_file_size
description: temp_file_size
usage: GAUGE
status: enable
ttl: 100
timeout: 1
public: true
pg_lock_time_info:
name: pg_lock_time_info
desc: pg_lock_time_info
query:
- name: pg_lock_time_info
sql: SELECT
d.datname,
pg_catalog.sum(extract(epoch
FROM pg_catalog.now() - s.xact_start)) AS holding_time
FROM pg_locks AS l
INNER JOIN pg_database AS d ON l.database = d.oid
INNER JOIN pg_stat_activity AS s ON l.pid = s.pid
WHERE s.pid != pg_catalog.pg_backend_pid() group by d.datname;
version: '>=0.0.0'
timeout: 1
ttl: 100
status: enable
dbRole: ""
metrics:
- name: datname
description: database name
usage: LABEL
- name: holding_time
description: database lock holding time
usage: GAUGE
status: enable
ttl: 100
timeout: 1
public: true
gs_sql_count:
name: gs_sql_count
desc: gs_sql_count
query:
- name: gs_sql_count
sql: |-
select sum(select_count) as select, sum(update_count) as update, sum(insert_count) as insert, sum(delete_count) as delete, sum(mergeinto_count) as mergeinto,
sum(ddl_count) as ddl, sum(dml_count) as dml, sum(dcl_count) as dcl from gs_sql_count;
version: '>=0.0.0'
timeout: 1
ttl: 100
status: enable
dbRole: ""
metrics:
- name: node_name
description: the name of node
usage: LABEL
- name: select
description: the count of select sql
usage: LABEL
- name: insert
description: the count of delete sql
usage: LABEL
- name: delete
description: Table distribution skew variance
usage: LABEL
- name: update
description: Table distribution skew ratio
usage: LABEL
- name: mergeinto
description: Table distribution skew ratio
usage: LABEL
- name: ddl
description: Table distribution skew ratio
usage: LABEL
- name: dml
description: Table distribution skew ratio
usage: LABEL
- name: dcl
description: Table distribution skew ratio
usage: GAUGE

View File

@ -17,45 +17,47 @@ pg_sql_statement_history:
query:
- name: pg_sql_statement_history
sql: "
SELECT H.unique_query_id,
H.db_name AS datname,
H.schema_name AS SCHEMA,
H.query,
(extract(epoch
FROM H.start_time) * 1000)::bigint as start_time,
(extract(epoch
FROM H.finish_time) * 1000)::bigint as finish_time,
extract(epoch
FROM H.finish_time - H.start_time)* 1000 AS exc_time,
H.cpu_time,
H.data_io_time,
H.n_returned_rows,
H.n_tuples_fetched,
H.n_tuples_returned,
H.n_tuples_inserted,
H.n_tuples_updated,
H.n_tuples_deleted,
(H.n_blocks_hit / (H.n_blocks_fetched+0.01)) AS hit_rate,
(H.n_blocks_fetched / (H.n_blocks_hit+0.01)) AS fetch_rate,
H.lock_wait_count,
H.lwlock_wait_count,
S.n_calls,
S.sort_count / S.n_calls AS sort_count,
S.sort_mem_used / S.n_calls AS sort_mem_used,
S.sort_spill_count / S.n_calls AS sort_spill_count,
S.hash_count / S.n_calls AS hash_count,
S.hash_mem_used / S.n_calls AS hash_mem_used,
S.hash_spill_count / S.n_calls AS hash_spill_count
FROM dbe_perf.statement_history H inner join dbe_perf.statement S
on H.unique_query_id = S.unique_sql_id
WHERE H.query !='COMMIT'
AND H.application_name != 'gs_clean'
AND S.n_calls > 1
AND (H.start_time > now() - (1 / 24 / 60 / 60) * ({scrape_interval} / 1000)
OR (exc_time > {scrape_interval} AND H.finish_time > now() - (1 / 24 / 60 / 60) * ({scrape_interval} / 1000))
)
ORDER BY H.start_time DESC
LIMIT 50;"
SELECT H.unique_query_id,
H.db_name AS datname,
H.schema_name AS SCHEMA,
H.query,
H.query_plan,
(extract(epoch
FROM H.start_time) * 1000)::bigint as start_time,
(extract(epoch
FROM H.finish_time) * 1000)::bigint as finish_time,
extract(epoch
FROM H.finish_time - H.start_time) * 1000 AS exc_time,
H.cpu_time,
H.data_io_time,
H.n_returned_rows,
H.n_tuples_fetched,
H.n_tuples_returned,
H.n_tuples_inserted,
H.n_tuples_updated,
H.n_tuples_deleted,
(H.n_blocks_hit / (H.n_blocks_fetched+0.01)) AS hit_rate,
(H.n_blocks_fetched / (H.n_blocks_hit+0.01)) AS fetch_rate,
H.lock_wait_count,
H.lwlock_wait_count,
S.n_calls,
S.sort_count / S.n_calls AS sort_count,
S.sort_mem_used / S.n_calls AS sort_mem_used,
S.sort_spill_count / S.n_calls AS sort_spill_count,
S.hash_count / S.n_calls AS hash_count,
S.hash_mem_used / S.n_calls AS hash_mem_used,
S.hash_spill_count / S.n_calls AS hash_spill_count
FROM dbe_perf.statement_history H inner join dbe_perf.statement S
on H.unique_query_id = S.unique_sql_id
WHERE H.query !='COMMIT'
AND H.application_name != 'gs_clean'
AND S.n_calls > 1
AND (H.start_time > extract(epoch from now()) * 1000 - (1 / 24 / 60 / 60) * ({scrape_interval})
OR (exc_time > {scrape_interval}
AND H.finish_time > extract(epoch from now()) * 1000 - (1 / 24 / 60 / 60) * ({scrape_interval}))
)
ORDER BY H.start_time DESC
LIMIT 50;"
version: '>=0.0.0'
timeout: 10
status: enable
@ -75,6 +77,9 @@ pg_sql_statement_history:
- name: query
description: query text
usage: LABEL
- name: query_plan
description: query plan text
usage: LABEL
- name: unique_query_id
description: unique query id
usage: LABEL
@ -144,3 +149,49 @@ pg_sql_statement_history:
status: enable
ttl: 0
timeout: 1
pg_sql_statement_full:
name: pg_sql_statement_full
desc: Extract full SQL statements from openGauss, which is a sampling process.
query:
- name: pg_sql_statement_full
sql: "
SELECT count(1),
datname,
application_name,
query
FROM pg_stat_activity
WHERE application_name NOT IN ('Asp',
'statement flush thread',
'JobScheduler',
'WDRSnapshot',
'PercentileJob',
'gs_clean')
AND Pg_current_sessid() != sessionid
AND query ilike '%FROM%'
AND (
query_start > now() - (1 / 24 / 60 / 60) * ({scrape_interval} / 1000))
GROUP BY datname,
application_name,
query;
"
version: '>=0.0.0'
timeout: 10
status: enable
metrics:
- name: count
description: count
usage: GAUGE
- name: datname
description: Name of database
usage: LABEL
- name: application_name
description: Name of applacation
usage: LABEL
- name: query
description: SQL statement
usage: LABEL
status: enable
ttl: 0
timeout: 1

View File

@ -476,3 +476,31 @@ gaussdb_qps_by_instance:
ttl: 60
timeout: 0.1
load_average:
name: load_average
desc: load average for database server.
query:
- name: load_average
promql: "
label_replace(
node_load1,
'instance', '$1', 'instance', '(.*):.*')
"
version: '>=0.0.0'
timeout: 0.1
ttl: 10
status: enable
dbRole: ""
metrics:
- name: from_job
label: job
description: from job
usage: LABEL
- name: from_instance
label: instance
description: from instance
usage: LABEL
status: enable
ttl: 60
timeout: 0.1

Binary file not shown.

Before

Width:  |  Height:  |  Size: 15 KiB

After

Width:  |  Height:  |  Size: 15 KiB

View File

@ -30,7 +30,11 @@ local_ssh = ExecutorFactory() \
def get_benchmark_instance(script, path, cmd, db_info):
name = script.rstrip('.py')
if script.endswith('.py'):
name = script[:-len('.py')]
else:
name = script
if not os.path.exists(os.path.join(os.path.dirname(__file__), name + '.py')):
raise ConfigureError('Incorrect configuration option benchmark_script. '
'Enter the filename of the script in the benchmark directory '

View File

@ -22,7 +22,7 @@ from tuner.exceptions import ExecutionError
path = ''
# Measure current total committed transactions that do not include xact_rollback.
cmd = "gsql -U {user} -W {password} -d postgres -p {port} -c " \
"\"SELECT sum(xact_commit) FROM pg_stat_database where datname = '{db}';\""
"\"SELECT pg_catalog.sum(xact_commit) FROM pg_catalog.pg_stat_database where datname = '{db}';\""
# This script captures the performance indicators in the user's periodic execution task, and measures the quality

View File

@ -61,7 +61,7 @@ class OpenGaussMetric:
# main mem: max_connections * (work_mem + temp_buffers) + shared_buffers + wal_buffers
sql = "select " \
"setting " \
"from pg_settings " \
"from pg_catalog.pg_settings " \
"where name in ('max_connections', 'work_mem', 'temp_buffers', 'shared_buffers', 'wal_buffers') " \
"order by name;"
res = self._db.exec_statement(sql)
@ -75,29 +75,29 @@ class OpenGaussMetric:
# You could define used internal state here.
# this is a demo, cache_hit_rate, we will use it while tuning shared_buffer.
cache_hit_rate_sql = "select blks_hit / (blks_read + blks_hit + 0.001) " \
"from pg_stat_database " \
"from pg_catalog.pg_stat_database " \
"where datname = '{}';".format(self._db.db_name)
return self._get_numeric_metric(cache_hit_rate_sql)
@property
def uptime(self):
return self._get_numeric_metric(
"select extract(epoch from now()-pg_postmaster_start_time()) / 60 / 60;") # unit: hour
"select extract(epoch from pg_catalog.now()-pg_catalog.pg_postmaster_start_time()) / 60 / 60;") # unit: hour
@property
def current_connections(self):
return self._get_numeric_metric(
"select count(1) from pg_stat_activity where client_port is not null;")
"select pg_catalog.count(1) from pg_catalog.pg_stat_activity where client_port is not null;")
@property
def average_connection_age(self):
return self._get_numeric_metric("select extract(epoch from avg(now()-backend_start)) as age "
"from pg_stat_activity where client_port is not null;") # unit: second
return self._get_numeric_metric("select extract(epoch from pg_catalog.avg(pg_catalog.now()-backend_start)) as age "
"from pg_catalog.pg_stat_activity where client_port is not null;") # unit: second
@property
def all_database_size(self):
return self._get_numeric_metric(
"select sum(pg_database_size(datname)) / 1024 from pg_database;") # unit: kB
"select sum(pg_catalog.pg_database_size(datname)) / 1024 from pg_catalog.pg_database;") # unit: kB
@property
def max_processes(self):
@ -109,12 +109,12 @@ class OpenGaussMetric:
@property
def current_prepared_xacts_count(self):
return self._get_numeric_metric("select count(1) from pg_prepared_xacts;")
return self._get_numeric_metric("select pg_catalog.count(1) from pg_catalog.pg_prepared_xacts;")
@property
def current_locks_count(self):
return self._get_numeric_metric(
"select count(1) from pg_locks where transactionid in (select transaction from pg_prepared_xacts)")
"select pg_catalog.count(1) from pg_catalog.pg_locks where transactionid in (select transaction from pg_catalog.pg_prepared_xacts)")
@property
def checkpoint_dirty_writing_time_window(self):
@ -123,84 +123,84 @@ class OpenGaussMetric:
@property
def checkpoint_proactive_triggering_ratio(self):
return self._get_numeric_metric(
"select checkpoints_req / (checkpoints_timed + checkpoints_req) from pg_stat_bgwriter;"
"select checkpoints_req / (checkpoints_timed + checkpoints_req) from pg_catalog.pg_stat_bgwriter;"
)
@property
def checkpoint_avg_sync_time(self):
return self._get_numeric_metric(
"select checkpoint_sync_time / (checkpoints_timed + checkpoints_req) from pg_stat_bgwriter;"
"select checkpoint_sync_time / (checkpoints_timed + checkpoints_req) from pg_catalog.pg_stat_bgwriter;"
)
@property
def shared_buffer_heap_hit_rate(self):
return self._get_numeric_metric(
"select sum(heap_blks_hit)*100/(sum(heap_blks_read)+sum(heap_blks_hit)+1) from pg_statio_all_tables ;")
"select pg_catalog.sum(heap_blks_hit)*100/(pg_catalog.sum(heap_blks_read)+pg_catalog.sum(heap_blks_hit)+1) from pg_catalog.pg_statio_all_tables ;")
@property
def shared_buffer_toast_hit_rate(self):
return self._get_numeric_metric(
"select sum(toast_blks_hit)*100/(sum(toast_blks_read)+sum(toast_blks_hit)+1) from pg_statio_all_tables ;"
"select pg_catalog.sum(toast_blks_hit)*100/(pg_catalog.sum(toast_blks_read)+pg_catalog.sum(toast_blks_hit)+1) from pg_catalog.pg_statio_all_tables ;"
)
@property
def shared_buffer_tidx_hit_rate(self):
return self._get_numeric_metric(
"select sum(tidx_blks_hit)*100/(sum(tidx_blks_read)+sum(tidx_blks_hit)+1) from pg_statio_all_tables ;"
"select pg_catalog.sum(tidx_blks_hit)*100/(pg_catalog.sum(tidx_blks_read)+pg_catalog.sum(tidx_blks_hit)+1) from pg_catalog.pg_statio_all_tables ;"
)
@property
def shared_buffer_idx_hit_rate(self):
return self._get_numeric_metric(
"select sum(idx_blks_hit)*100/(sum(idx_blks_read)+sum(idx_blks_hit)+1) from pg_statio_all_tables ;"
"select pg_catalog.sum(idx_blks_hit)*100/(pg_catalog.sum(idx_blks_read)+pg_catalog.sum(idx_blks_hit)+1) from pg_catalog.pg_statio_all_tables ;"
)
@property
def temp_file_size(self):
return self._get_numeric_metric(
"select max(temp_bytes / temp_files) / 1024 from pg_stat_database where temp_files > 0;"
"select pg_catalog.max(temp_bytes / temp_files) / 1024 from pg_catalog.pg_stat_database where temp_files > 0;"
) # unit: kB
@property
def read_write_ratio(self):
return self._get_numeric_metric(
"select tup_returned / (tup_inserted + tup_updated + tup_deleted + 0.001) "
"from pg_stat_database where datname = '%s';" % self._db.db_name
"from pg_catalog.pg_stat_database where datname = '%s';" % self._db.db_name
)
@property
def search_modify_ratio(self):
return self._get_numeric_metric(
"select (tup_returned + tup_inserted) / (tup_updated + tup_deleted + 0.01) "
"from pg_stat_database where datname = '%s';" % self._db.db_name
"from pg_catalog.pg_stat_database where datname = '%s';" % self._db.db_name
)
@property
def fetched_returned_ratio(self):
return self._get_numeric_metric(
"select tup_fetched / (tup_returned + 0.01) "
"from pg_stat_database where datname = '%s';" % self._db.db_name
"from pg_catalog.pg_stat_database where datname = '%s';" % self._db.db_name
)
@property
def rollback_commit_ratio(self):
return self._get_numeric_metric(
"select xact_rollback / (xact_commit + 0.01) "
"from pg_stat_database where datname = '%s';" % self._db.db_name
"from pg_catalog.pg_stat_database where datname = '%s';" % self._db.db_name
)
@property
def read_tup_speed(self):
return self._get_numeric_metric(
"select tup_returned / (extract (epoch from (now() - stats_reset))) "
"from pg_stat_database where datname = '%s';" % self._db.db_name
"select tup_returned / (extract (epoch from (pg_catalog.now() - stats_reset))) "
"from pg_catalog.pg_stat_database where datname = '%s';" % self._db.db_name
)
@property
def write_tup_speed(self):
return self._get_numeric_metric(
"select (tup_inserted + tup_updated + tup_deleted) / (extract (epoch from (now() - stats_reset))) "
"from pg_stat_database where datname = '%s';" % self._db.db_name
"select (tup_inserted + tup_updated + tup_deleted) / (extract (epoch from (pg_catalog.now() - stats_reset))) "
"from pg_catalog.pg_stat_database where datname = '%s';" % self._db.db_name
)
@cached_property
@ -244,7 +244,7 @@ class OpenGaussMetric:
@cached_property
def block_size(self):
return self._get_numeric_metric(
"select setting / 1024 from pg_settings where name = 'block_size';"
"select setting / 1024 from pg_catalog.pg_settings where name = 'block_size';"
) # unit: kB
@property
@ -350,7 +350,7 @@ class OpenGaussMetric:
@cached_property
def enable_autovacuum(self):
setting = self._db.exec_statement(
"select setting from pg_settings where name = 'autovacuum';"
"select setting from pg_catalog.pg_settings where name = 'autovacuum';"
)[0][0]
return setting == 'on'
@ -358,7 +358,7 @@ class OpenGaussMetric:
return [self.cache_hit_rate, self.load_average[0]]
def reset(self):
self._db.exec_statement("SELECT pg_stat_reset();")
self._db.exec_statement("SELECT pg_catalog.pg_stat_reset();")
def to_dict(self):
rv = dict()

View File

@ -259,7 +259,7 @@ class LocalExec(Executor):
# Have to use the `cwd` argument.
# Otherwise, we can not change the current directory.
if line.strip().startswith('cd '):
cwd = line.lstrip('cd ')
cwd = line.strip()[len('cd'):]
continue
proc = subprocess.Popen(shlex.split(line),

View File

@ -29,7 +29,7 @@ from .exceptions import OptionError
from .xtuner import procedure_main
from . import utils
__version__ = '3.0.0'
__version__ = '2.1.0'
__description__ = 'X-Tuner: a self-tuning tool integrated by openGauss.'
@ -189,8 +189,11 @@ def get_config(filepath):
break
benchmark_script = cp['Benchmark'].get('benchmark_script', '')
if benchmark_script.rstrip('.py') + '.py' not in benchmarks:
if benchmark_script.endswith('.py'):
benchmark_script = benchmark_script[:-len('.py')]
if benchmark_script + '.py' not in benchmarks:
raise OptionError(invalid_opt_msg % ('benchmark_script', benchmarks))
config['benchmark_path'] = cp['Benchmark'].get('benchmark_path', '')
config['benchmark_cmd'] = cp['Benchmark'].get('benchmark_cmd', '')
benchmark_period = cp['Benchmark'].get('benchmark_period', '0')

View File

@ -13,11 +13,10 @@
import sqlalchemy
from sqlalchemy.engine import create_engine
from sqlalchemy.exc import ProgrammingError
from .base import Base, DynamicConfig
from .schema import load_all_schema_models
from ..common.exceptions import SQLExecutionError
from ..common.exceptions import SQLExecutionError, DuplicateTableError
def create_metadatabase_schema(check_first=True):
@ -32,6 +31,8 @@ def create_metadatabase_schema(check_first=True):
checkfirst=check_first
)
except Exception as e:
if 'DuplicateTable' in str(e):
raise DuplicateTableError(e)
raise SQLExecutionError(e)
@ -46,6 +47,8 @@ def destroy_metadatabase():
session_clz.get('engine')
)
except Exception as e:
if 'DuplicateTable' in str(e):
raise DuplicateTableError(e)
raise SQLExecutionError(e)

View File

@ -28,6 +28,13 @@ def update_session_clz_from_configs():
port = global_vars.configs.get('METADATABASE', 'port')
username = global_vars.configs.get('METADATABASE', 'username')
password = global_vars.configs.get('METADATABASE', 'password')
if db_type in ('opengauss', 'postgres'):
valid_port = port.strip() != '' and port is not None
valid_host = host.strip() != '' and host is not None
if not valid_port:
raise ValueError('Invalid port for metadatabase %s: %s.' % (db_type, port))
if not valid_host:
raise ValueError('Invalid host for metadatabase %s: %s.' % (db_type, host))
dsn = create_dsn(db_type, database, host, port, username, password)
postgres_dsn = create_dsn(db_type, 'postgres', host, port, username, password)

View File

@ -10,7 +10,7 @@
# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
# See the Mulan PSL v2 for more details.
from sqlalchemy import Column, String, Integer, BigInteger, Float, Index
from sqlalchemy import Column, String, Integer, BigInteger, Float, Index, TEXT
from .. import Base
@ -21,7 +21,7 @@ class SlowQueries(Base):
slow_query_id = Column(Integer, primary_key=True, autoincrement=True)
schema_name = Column(String(64), nullable=False)
db_name = Column(String(64), nullable=False)
query = Column(String(1024), nullable=False)
query = Column(TEXT, nullable=False)
template_id = Column(BigInteger)
start_at = Column(BigInteger, nullable=False)
duration_time = Column(Float, nullable=False)

File diff suppressed because it is too large Load Diff

View File

@ -17,4 +17,5 @@ pyyaml
prometheus-client
## X-Tuner ##
bayesian-optimization
ptable
prettytable>=2.5.0

View File

@ -15,4 +15,5 @@ pyyaml
prometheus-client
## X-Tuner ##
bayesian-optimization
ptable
prettytable>=2.5.0

View File

@ -140,26 +140,28 @@ def get_latest_metric_value(metric_name):
def save_forecast_sequence(metric_name, host, sequence):
dao.forecasting_metrics.batch_insert_forecasting_metric(
metric_name, host, sequence.values, sequence.timestamps,
metric_type=get_metric_type(metric_name),
node_id=None
)
if sequence is not None:
dao.forecasting_metrics.batch_insert_forecasting_metric(
metric_name, host, sequence.values, sequence.timestamps,
metric_type=get_metric_type(metric_name),
node_id=None
)
def save_slow_queries(slow_queries):
for slow_query in slow_queries:
dao.slow_queries.insert_slow_query(
schema_name=slow_query.schema_name,
db_name=slow_query.db_name,
query=slow_query.query,
start_at=slow_query.start_at,
duration_time=slow_query.duration_time,
hit_rate=slow_query.hit_rate, fetch_rate=slow_query.fetch_rate,
cpu_time=slow_query.cpu_time, data_io_time=slow_query.data_io_time,
root_cause=slow_query.root_causes, suggestion=slow_query.suggestions,
template_id=slow_query.template_id
)
if slow_query is not None:
dao.slow_queries.insert_slow_query(
schema_name=slow_query.schema_name,
db_name=slow_query.db_name,
query=slow_query.query,
start_at=slow_query.start_at,
duration_time=slow_query.duration_time,
hit_rate=slow_query.hit_rate, fetch_rate=slow_query.fetch_rate,
cpu_time=slow_query.cpu_time, data_io_time=slow_query.data_io_time,
root_cause=slow_query.root_causes, suggestion=slow_query.suggestions,
template_id=slow_query.template_id
)
def get_all_slow_queries(minutes):