diff --git a/src/gausskernel/dbmind/tools/xtuner/README.md b/src/gausskernel/dbmind/tools/xtuner/README.md index d6846149a..8842c2e1e 100644 --- a/src/gausskernel/dbmind/tools/xtuner/README.md +++ b/src/gausskernel/dbmind/tools/xtuner/README.md @@ -40,13 +40,32 @@ python3 main.py --help ## Dependencies +If you use a Python runtime that OS comes with, you should install Python SDK, such as: + + sudo yum install python3-devel + +You should install the following mathematical libraries to your OS, so that some Python libraries can import them. + +For CentOS-based OS (e.g., Redhat, CentOS, EulerOS): + + sudo yum install lapack lapack-devel blas blas-devel + +For Debian-based OS (e.g., Ubuntu, KaliLinux): + + sudo apt-get install gfortran libopenblas-dev liblapack-dev + +You should install the following dependencies by python-pip. + + paramiko + bayesian-optimization + ptable + +If you want to use deep learning, you should also install the following libraries: tensorflow>=2.2.0 keras-rl2 keras>=2.4.0 - paramiko - bayesian-optimization - ptable + Note: Firstly, please upgrade your pip: ```python -m pip install --upgrade pip``` diff --git a/src/gausskernel/dbmind/tools/xtuner/requirements-aarch64.txt b/src/gausskernel/dbmind/tools/xtuner/requirements-aarch64.txt new file mode 100644 index 000000000..8823951df --- /dev/null +++ b/src/gausskernel/dbmind/tools/xtuner/requirements-aarch64.txt @@ -0,0 +1,6 @@ +cryptography==2.5 +paramiko==2.7.2 +numpy==1.16.5 +scipy==1.6.0 +bayesian-optimization +ptable diff --git a/src/gausskernel/dbmind/tools/xtuner/requirements-x86.txt b/src/gausskernel/dbmind/tools/xtuner/requirements-x86.txt new file mode 100644 index 000000000..7f5dbf350 --- /dev/null +++ b/src/gausskernel/dbmind/tools/xtuner/requirements-x86.txt @@ -0,0 +1,5 @@ +tensorflow >= 2.1.0 # optional +keras-rl2~=1.0.4 # optional +paramiko==2.7.2 +bayesian-optimization +ptable diff --git a/src/gausskernel/dbmind/tools/xtuner/requirements.txt b/src/gausskernel/dbmind/tools/xtuner/requirements.txt deleted file mode 100644 index a2d8446b2..000000000 --- a/src/gausskernel/dbmind/tools/xtuner/requirements.txt +++ /dev/null @@ -1,5 +0,0 @@ -tensorflow>=2.2.0 -keras-rl2 -paramiko -bayesian-optimization -ptable diff --git a/src/gausskernel/dbmind/tools/xtuner/setup.py b/src/gausskernel/dbmind/tools/xtuner/setup.py index fff12a9a9..3d2df8dfe 100644 --- a/src/gausskernel/dbmind/tools/xtuner/setup.py +++ b/src/gausskernel/dbmind/tools/xtuner/setup.py @@ -14,18 +14,34 @@ See the Mulan PSL v2 for more details. """ import os +import sys +import platform from setuptools import setup, find_packages def read_requirements(): """Parse requirements.txt.""" - filepath = os.path.join('.', 'requirements.txt') + if 'aarch64' in platform.uname().machine: + filepath = os.path.join('.', 'requirements-aarch64.txt') + else: + filepath = os.path.join('.', 'requirements-x86.txt') with open(filepath, 'r') as f: requirements = [_line.rstrip() for _line in f] requirements.reverse() return requirements +def check_version(): + version_info = sys.version_info + major, minor = version_info.major, version_info.minor + # At least, the Python version is (3, 6) + if major < 3 or minor <= 5: + return False + return True + +if not check_version(): + print("Requires Python >= 3.6") + exit(-1) # Read the package information from the main.py. pkginfo = dict() diff --git a/src/gausskernel/dbmind/tools/xtuner/share/xtuner.conf.template b/src/gausskernel/dbmind/tools/xtuner/share/xtuner.conf.template index 8fafb1a42..e03f03a73 100644 --- a/src/gausskernel/dbmind/tools/xtuner/share/xtuner.conf.template +++ b/src/gausskernel/dbmind/tools/xtuner/share/xtuner.conf.template @@ -15,7 +15,7 @@ logfile = log/opengauss_tuner.log output_tuning_result = tuned_knobs.json verbose = on -recorder_file = log/recorder.log +recorder_file = log/recorder.csv tune_strategy = auto # rl, gop or auto drop_cache = on # You must modify the permission of the login user in the /etc/sudoers file and grant the NOPASSWD permission to the user. used_mem_penalty_term = 1e-9 # Prevent taking up more memory. @@ -45,12 +45,26 @@ particle_nums = 3 # A larger value indicates higher accuracy but slower speed. [Benchmark] # Some examples of benchmark script implementation are provided in the benchmark directory. # Implement the benchmark script based on the specified interface. -benchmark_script = tpch -# These parameters are used to replace the path and cmd in the benchmark script. +benchmark_script = period # Build-in benchmarks: period, tpcc, tpch, tpcds, sysbench (not tested) + +# The following parameters are used to replace the path and cmd in the benchmark script. # The path and cmd variables in the benchmark script are examples. You can set the parameters # by referring to the benchmark script file in the benchmark directory. -benchmark_path = # If this parameter is blank, the default path in the benchmark script is used. -benchmark_cmd = # If this parameter is blank, the default cmd in the benchmark script is used. + +# If this parameter is blank, the default path in the benchmark script is used. +# For build-in benchmarks, only 'period' does not need it. +benchmark_path = +# If this parameter is blank, the default cmd in the benchmark script is used. +# For build-in benchmarks, only 'tpcc' needs it. +benchmark_cmd = + +# Optional parameter. Only for 'period'. +# By appointment this parameter, use can measure the performance of period task. +# The unit of this parameter is second. +# The default parameter is 60 seconds, which means that the cycle of the workload is 60 seconds. +# This value needs to be greater than or equal to the cycle of the workload itself, +# try not to be less than, otherwise it will easily lead to judgment distortion. +benchmark_period = 60 #------------------------------------------------------------------------------ # Tuning Knobs Configurations diff --git a/src/gausskernel/dbmind/tools/xtuner/tuner/algorithms/pso.py b/src/gausskernel/dbmind/tools/xtuner/tuner/algorithms/pso.py index b1d1e9ebf..4387a1e99 100644 --- a/src/gausskernel/dbmind/tools/xtuner/tuner/algorithms/pso.py +++ b/src/gausskernel/dbmind/tools/xtuner/tuner/algorithms/pso.py @@ -16,6 +16,8 @@ See the Mulan PSL v2 for more details. import numpy as np +PATTERN = '|\t%s\t|\t%s\t|\t%s\t|' + class Particle: def __init__(self, position, velocity, best_position, fitness): self.position = position @@ -94,13 +96,16 @@ class Pso: self.best_position = particle.position def update_one_step(self): - for particle in self.particles: + for i, particle in enumerate(self.particles): self._update_velocity(particle) # update velocity self._update_position(particle) # update position + print(PATTERN % ('%s-%s' % (self.iteration_count, i), self.best_fitness, self.best_position), flush=True) self.iteration_count += 1 self.fitness_val_list.append(self.best_fitness) def minimize(self): + # Print a header + print(PATTERN % ('iter', 'best_fitness', 'best_position'), flush=True) for i in range(self.max_iteration): self.update_one_step() return self.best_fitness, self.best_position diff --git a/src/gausskernel/dbmind/tools/xtuner/tuner/benchmark/__init__.py b/src/gausskernel/dbmind/tools/xtuner/tuner/benchmark/__init__.py index 8ffc9c246..338a80d3b 100644 --- a/src/gausskernel/dbmind/tools/xtuner/tuner/benchmark/__init__.py +++ b/src/gausskernel/dbmind/tools/xtuner/tuner/benchmark/__init__.py @@ -38,7 +38,7 @@ def get_benchmark_instance(script, path, cmd, db_info): bm = importlib.import_module('tuner.benchmark.{}'.format(name)) # Verify the validity of the benchmark script. # An exception will be thrown if benchmark instance does not have specified attributes. - if (not getattr(bm, 'path', False)) or (not getattr(bm, 'cmd', False)) or (not getattr(bm, 'run', False)): + if not getattr(bm, 'run', False): raise ConfigureError('The benchmark script %s is invalid. ' 'For details, see the example template and description document.' % script) # Check whether function run exists and whether its type matches. @@ -64,6 +64,10 @@ def get_benchmark_instance(script, path, cmd, db_info): # Wrap remote server shell as an API and pass it to benchmark instance. def wrapper(server_ssh): - return bm.run(server_ssh, local_ssh) + try: + return bm.run(server_ssh, local_ssh) + except Exception as e: + logging.warning("An error occured while running the benchmark, hence the benchmark score is 0. The error is %s.", e, exc_info=True) + return .0 return wrapper diff --git a/src/gausskernel/dbmind/tools/xtuner/tuner/benchmark/period.py b/src/gausskernel/dbmind/tools/xtuner/tuner/benchmark/period.py new file mode 100644 index 000000000..9e53e4875 --- /dev/null +++ b/src/gausskernel/dbmind/tools/xtuner/tuner/benchmark/period.py @@ -0,0 +1,49 @@ +""" +Copyright (c) 2020 Huawei Technologies Co.,Ltd. + +openGauss is licensed under Mulan PSL v2. +You can use this software according to the terms and conditions of the Mulan PSL v2. +You may obtain a copy of Mulan PSL v2 at: + + http://license.coscl.org.cn/MulanPSL2 + +THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +See the Mulan PSL v2 for more details. +""" +import sys +import time +import logging + +from tuner import utils +from tuner.exceptions import ExecutionError + +path = '' +# Measure current total committed transactions that do not include xact_rollback. +cmd = "gsql -U {user} -W {password} -d postgres -p {port} -c " \ + "\"SELECT sum(xact_commit) FROM pg_stat_database where datname = '{db}';\"" + + +# This script captures the performance indicators in the user's periodic execution task, and measures the quality +# of the tuning results by measuring the range of changes in the indicators. +def run(remote_server, local_host) -> float: + wait_seconds = utils.config['benchmark_period'] + if not wait_seconds: + print("Not configured the parameter 'benchmark_period' in the configuration file.", + file=sys.stderr) + exit(-1) + + stdout, stderr = remote_server.exec_command_sync(cmd) + if len(stderr) > 0: + raise ExecutionError(stderr) + prev_txn = int(utils.to_tuples(stdout)[0][0]) + + time.sleep(wait_seconds) + stdout, stderr = remote_server.exec_command_sync(cmd) + if len(stderr) > 0: + raise ExecutionError(stderr) + current_txn = int(utils.to_tuples(stdout)[0][0]) + + # Return TPS in this period. + return (current_txn - prev_txn) / wait_seconds diff --git a/src/gausskernel/dbmind/tools/xtuner/tuner/benchmark/tpcc.py b/src/gausskernel/dbmind/tools/xtuner/tuner/benchmark/tpcc.py index 387f79e5e..47e994c5e 100644 --- a/src/gausskernel/dbmind/tools/xtuner/tuner/benchmark/tpcc.py +++ b/src/gausskernel/dbmind/tools/xtuner/tuner/benchmark/tpcc.py @@ -13,6 +13,10 @@ MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. See the Mulan PSL v2 for more details. """ +import os +import sys +import shlex + from tuner.exceptions import ExecutionError # WARN: You need to download the benchmark-sql test tool to the system, @@ -38,9 +42,28 @@ def run(remote_server, local_host): :return: benchmark score, higher one must be better, be sure to keep in mind. """ # Benchmark can be deployed on a remote server or a local server. - # The process of generating the final report of the Benchmarksql-5.0 is separate from the test process. - # Therefore, the `sleep` command needs to be added to wait to prevent the process from exiting prematurely. - stdout, stderr = remote_server.exec_command_sync(['cd %s' % path, 'rm -rf benchmarksql-error.log', cmd, 'sleep 3']) + # Here we set the terminal as a remote server. + terminal = remote_server + err_logfile = os.path.join(path, 'benchmarksql-error.log') + cmd_files = shlex.split(cmd) + if len(cmd_files) != 2: + print('Invalid configuration parameter `benchmark_cmd`. ' + 'You should check the item in the configuration file.', file=sys.stderr) + exit(-1) + # Check whether these files exist. + shell_file, conf_file = cmd_files + shell_file = os.path.join(path, shell_file) + conf_file = os.path.join(path, conf_file) + _, stderr1 = terminal.exec_command_sync('ls %s' % shell_file) + _, stderr2 = terminal.exec_command_sync('ls %s' % conf_file) + if len(stderr1) > 0 or len(stderr2) > 0: + print('You should correct the parameter `benchmark_path` that the path contains several executable SQL files ' + 'in the configuration file.') + exit(-1) + # Clean log file + terminal.exec_command_sync('rm -rf %s' % err_logfile) + # Run benchmark + stdout, stderr = terminal.exec_command_sync('cd %s; %s %s' % (path, shell_file, conf_file)) if len(stderr) > 0: raise ExecutionError(stderr) @@ -51,7 +74,7 @@ def run(remote_server, local_host): if "(NewOrders)" in st: tpmC = split_string[i + 2] break - stdout, stderr = remote_server.exec_command_sync( + stdout, stderr = terminal.exec_command_sync( "cat %s/benchmarksql-error.log" % path) nb_err = stdout.count("ERROR:") # Penalty term. return float(tpmC) - 10 * nb_err # You can modify the penalty factor. diff --git a/src/gausskernel/dbmind/tools/xtuner/tuner/benchmark/tpcds.py b/src/gausskernel/dbmind/tools/xtuner/tuner/benchmark/tpcds.py index 4b5c7021e..bce2d484c 100644 --- a/src/gausskernel/dbmind/tools/xtuner/tuner/benchmark/tpcds.py +++ b/src/gausskernel/dbmind/tools/xtuner/tuner/benchmark/tpcds.py @@ -12,25 +12,27 @@ EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. See the Mulan PSL v2 for more details. """ +import os +import sys import time # WARN: You need to import data into the database and SQL statements in the following path will be executed. # The program automatically collects the total execution duration of these SQL statements. -path = '/path/to/tpcds/queries' # modify this path -cmd = "gsql -U {user} -W {password} -d {db} -p {port} -f {file}" +path = '/path/to/tpcds/queries' # modify this path which contains benchmark SQL files. +cmd = "find %s -type f -name '*.sql' -exec gsql -U {user} -W {password} -d {db} -p {port} -f {} > /dev/null \\;" def run(remote_server, local_host): - find_file_cmd = "find . -type f -name '*.sql'" - stdout, stderr = remote_server.exec_command_sync(['cd %s' % path, find_file_cmd]) - if len(stderr) > 0: - raise Exception(stderr) - files = stdout.strip().split('\n') time_start = time.time() - for file in files: - perform_cmd = cmd.format(file=file) - stdout, stderr = remote_server.exec_command_sync(['cd %s' % path, perform_cmd]) - if len(stderr) > 0: - print(stderr) + # Check whether the path is valid. + stdout, stderr = remote_server.exec_command_sync('ls %s' % path) + if len(stderr) > 0: + print('You should correct the parameter `benchmark_path` that the path contains several executable SQL files ' + 'in the configuration file.') + exit(-1) + + stdout, stderr = remote_server.exec_command_sync(cmd % path) + if len(stderr) > 0: + print(stderr, file=sys.stderr) cost = time.time() - time_start return - cost diff --git a/src/gausskernel/dbmind/tools/xtuner/tuner/benchmark/tpch.py b/src/gausskernel/dbmind/tools/xtuner/tuner/benchmark/tpch.py index c4c2a35e9..823fb2c9a 100644 --- a/src/gausskernel/dbmind/tools/xtuner/tuner/benchmark/tpch.py +++ b/src/gausskernel/dbmind/tools/xtuner/tuner/benchmark/tpch.py @@ -12,27 +12,29 @@ EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. See the Mulan PSL v2 for more details. """ +import os +import sys import time from tuner.exceptions import ExecutionError # WARN: You need to import data into the database and SQL statements in the following path will be executed. # The program automatically collects the total execution duration of these SQL statements. -path = '/path/to/tpch/queries' # modify this path -cmd = "gsql -U {user} -W {password} -d {db} -p {port} -f {file}" +path = '/path/to/tpch/queries' # modify this path which contains benchmark SQL files. +cmd = "find %s -type f -name '*.sql' -exec gsql -U {user} -W {password} -d {db} -p {port} -f {} > /dev/null \\;" def run(remote_server, local_host): - find_file_cmd = "find . -type f -name '*.sql'" - stdout, stderr = remote_server.exec_command_sync(['cd %s' % path, find_file_cmd]) - if len(stderr) > 0: - raise ExecutionError(stderr) - files = stdout.strip().split('\n') time_start = time.time() - for file in files: - perform_cmd = cmd.format(file=file) - stdout, stderr = remote_server.exec_command_sync(['cd %s' % path, perform_cmd]) - if len(stderr) > 0: - print(stderr) + # Check whether the path is valid. + stdout, stderr = remote_server.exec_command_sync('ls %s' % path) + if len(stderr) > 0: + print('You should correct the parameter `benchmark_path` that the path contains several executable SQL files ' + 'in the configuration file.') + exit(-1) + + stdout, stderr = remote_server.exec_command_sync(cmd % path) + if len(stderr) > 0: + print(stderr, file=sys.stderr) cost = time.time() - time_start return - cost diff --git a/src/gausskernel/dbmind/tools/xtuner/tuner/character.py b/src/gausskernel/dbmind/tools/xtuner/tuner/character.py index 71b1ff256..770280391 100644 --- a/src/gausskernel/dbmind/tools/xtuner/tuner/character.py +++ b/src/gausskernel/dbmind/tools/xtuner/tuner/character.py @@ -44,9 +44,8 @@ class OpenGaussMetric: def _get_numeric_metric(self, sql): result = self._db.exec_statement(sql) - if len(result) > 1: - _, value = result - return float(value) + if len(result) > 0: + return float(result[0][0]) else: return 0 @@ -66,9 +65,8 @@ class OpenGaussMetric: "where name in ('max_connections', 'work_mem', 'temp_buffers', 'shared_buffers', 'wal_buffers') " \ "order by name;" res = self._db.exec_statement(sql) - res.pop(0) - res = map(int, res) - max_conn, s_buff, t_buff, w_buff, work_mem = res + values = map(lambda x: int(x[0]), res) + max_conn, s_buff, t_buff, w_buff, work_mem = values total_mem = max_conn * (work_mem / 64 + t_buff / 128) + s_buff / 64 + w_buff / 4096 # unit: MB return total_mem * 1024 # unit: kB @@ -205,11 +203,20 @@ class OpenGaussMetric: "from pg_stat_database where datname = '%s';" % self._db.db_name ) + @cached_property + def nb_gaussdb(self): + return int(self._db.exec_command_on_host("ps -ux | grep gaussd[b] | wc -l")) + @cached_property def os_mem_total(self): mem = self._db.exec_command_on_host("free -k | awk 'NR==2{print $2}'") # unit kB return int(mem) + @cached_property + def min_free_mem(self): + kbytes = self._db.exec_command_on_host("cat /proc/sys/vm/min_free_kbytes") + return int(kbytes) # unit: kB + @cached_property def os_cpu_count(self): cores = self._db.exec_command_on_host("lscpu | grep 'CPU(s)' | head -1 | awk '{print $2}'") @@ -342,9 +349,9 @@ class OpenGaussMetric: @cached_property def enable_autovacuum(self): - _, setting = self._db.exec_statement( + setting = self._db.exec_statement( "select setting from pg_settings where name = 'autovacuum';" - ) + )[0][0] return setting == 'on' def get_internal_state(self): diff --git a/src/gausskernel/dbmind/tools/xtuner/tuner/db_agent.py b/src/gausskernel/dbmind/tools/xtuner/tuner/db_agent.py index 16a427be4..11c55c76d 100644 --- a/src/gausskernel/dbmind/tools/xtuner/tuner/db_agent.py +++ b/src/gausskernel/dbmind/tools/xtuner/tuner/db_agent.py @@ -14,14 +14,14 @@ See the Mulan PSL v2 for more details. """ import logging -import re from tuner.character import OpenGaussMetric from tuner.exceptions import DBStatusError, SecurityError, ExecutionError, OptionError from tuner.executor import ExecutorFactory -from tuner.knob import RecommendedKnobs +from tuner.knob import RecommendedKnobs, Knob from tuner.utils import clip from tuner.utils import construct_dividing_line +from tuner.utils import to_tuples def check_special_character(phrase): @@ -128,9 +128,9 @@ class DB_Agent: "Check whether the database is started. ") # Get database instance pid and data_path. - _, self.data_path = self.exec_statement( + self.data_path = self.exec_statement( "SELECT datapath FROM pg_node_env;" - ) + )[0][0] except ExecutionError as e: logging.exception("An exception occurred while checking connection parameters: %s", e) raise DBStatusError("Failed to login to the database. " @@ -159,19 +159,24 @@ class DB_Agent: check_special_character(knob) wherein_list.append("'%s'" % knob) - sql = "SELECT name, boot_val, min_val, max_val FROM pg_settings WHERE name IN ({})".format( + sql = "SELECT name, setting, min_val, max_val FROM pg_settings WHERE name IN ({})".format( ','.join(wherein_list) ) - stdout = self.exec_statement(sql)[4:] - tuples = [[stdout[4 * i], stdout[4 * i + 1], stdout[4 * i + 2], stdout[4 * i + 3]] for i in - range(len(stdout) // 4)] - - for name, boot_val, min_val, max_val in tuples: + # If the value is missing, use the default value obtained in the system table to fill it. + tuples = self.exec_statement(sql) + for name, setting, min_val, max_val in tuples: knob = self.knobs[name] - knob.min = min_val if not knob.min else max(knob.min, min_val, key=lambda x: float(x)) - knob.max = max_val if not knob.max else min(knob.max, max_val, key=lambda x: float(x)) - knob.default = boot_val if not knob.default else clip(knob.default, knob.min, knob.max) + if knob.type != Knob.TYPE.BOOL: + min_val = float(min_val) + max_val = float(max_val) + knob.min = min_val if knob.min is None else max(knob.min, min_val) + knob.max = max_val if knob.max is None else min(knob.max, max_val) + knob.original = setting + # If user did not set default field, then make the original value as the default value. + # The default value (knob.current) is the starting point while tuning. + if knob.current is None: + knob.current = knob.to_numeric(setting) def exec_statement(self, sql, timeout=None): """ @@ -198,14 +203,7 @@ class DB_Agent: logging.error("Cannot execute SQL statement: %s. Error message: %s.", sql, stderr) raise ExecutionError("Cannot execute SQL statement: %s." % sql) - # Parse the result. - result = re.sub(r'[-+]{2,}', r'', stdout) # remove '----+----' - result = re.sub(r'\|', r'', result) # remove '|' - result = re.sub(r'\(\d*[\s,]*row[s]*?\)', r'', result) # remove '(n rows)' - result = re.sub(r'\n', r' ', result) - result = result.strip() - result = re.split(r'\s+', result) - return result + return to_tuples(stdout) def is_alive(self): """ @@ -260,6 +258,17 @@ class DB_Agent: nv.append(self.knobs[name].to_numeric(val)) return nv + def get_default_normalized_vector(self): + """ + In order to get the initial performance and rollback the settings while tuning is finished. + :return: the vector that normalized from original/initial knobs. + """ + nv = list() + for name in self.ordered_knob_list: + val = self.knobs[name].original + nv.append(self.knobs[name].to_numeric(val)) + return nv + def set_knob_normalized_vector(self, nv): restart = False for i, val in enumerate(nv): @@ -274,7 +283,7 @@ class DB_Agent: def get_knob_value(self, name): check_special_character(name) sql = "SELECT setting FROM pg_settings WHERE name = '{}';".format(name) - _, value = self.exec_statement(sql) + value = self.exec_statement(sql)[0][0] return value def set_knob_value(self, name, value): @@ -288,18 +297,12 @@ class DB_Agent: def reset_state(self): self.metric.reset() - def set_default_knob(self): - restart = False - - for knob in self.knobs: - self.set_knob_value(knob.name, knob.default) - restart = True if knob.restart else restart - - self.restart() - def restart(self): logging.info(construct_dividing_line("Restarting database.", padding="*")) - self.exec_statement("checkpoint;") # Prevent the database from being shut down for a long time. + try: + self.exec_statement("checkpoint;") # Prevent the database from being shut down for a long time. + except ExecutionError: + logging.warning("Cannot checkpoint perhaps due to bad GUC settings.") self.exec_command_on_host("gs_ctl stop -D {data_path}".format(data_path=self.data_path), ignore_status_code=True) self.exec_command_on_host("gs_ctl start -D {data_path}".format(data_path=self.data_path), diff --git a/src/gausskernel/dbmind/tools/xtuner/tuner/db_env.py b/src/gausskernel/dbmind/tools/xtuner/tuner/db_env.py index 9cd3c2781..16ac5203a 100644 --- a/src/gausskernel/dbmind/tools/xtuner/tuner/db_env.py +++ b/src/gausskernel/dbmind/tools/xtuner/tuner/db_env.py @@ -13,9 +13,12 @@ MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. See the Mulan PSL v2 for more details. """ +import logging + import numpy as np from tuner.env import Env, Box +from tuner.exceptions import ExecutionError class DB_Env(Env): @@ -72,22 +75,33 @@ class DB_Env(Env): if self.drop_cache: self.db.drop_cache() - obs = self._get_obs() - score = self.perf(self.bm) - used_mem = self.db.metric.used_mem - reward = score - self.mem_penalty * used_mem # Use the memory usage as a regular term. + try: + obs = self._get_obs() + score = self.perf(self.bm) + used_mem = self.db.metric.used_mem + reward = score - self.mem_penalty * used_mem # Use the memory usage as a regular term. + except ExecutionError as e: + logging.error('An error errored after changed the settings, ' + 'hence rollback to the default settings. The error is %s.', e) + self.reset() # Rollback to default setting + obs = [.0 for _ in self._get_obs()] # Pad 0 to the observation vector. + score = .0 + used_mem = self.db.metric.os_mem_total + # This value is minimal theoretically, so that regard it as a penalty term. + reward = score - self.mem_penalty * used_mem # Record each tuning process. - knob_dict = {k: self.db.get_knob_value(k) for k in self.db.ordered_knob_list} - self.recorder.record(reward, knob_dict) + knob_values = [self.db.knobs[name].to_string(value) for name, value in zip(self.db.ordered_knob_list, action)] + self.recorder.record(score, used_mem, reward, names=self.db.ordered_knob_list, values=knob_values) + self.recorder.prompt_message('Database metrics: %s.', obs) self.recorder.prompt_message('Benchmark score: %f, used mem: %d kB, reward: %f.', score, used_mem, reward) return obs, reward, False, {} def reset(self): + self.db.set_knob_normalized_vector(self.db.get_default_normalized_vector()) self.db.reset_state() - self.db.set_knob_normalized_vector(np.random.random(self.nb_actions)) # Maybe we can have more samples. return self._get_obs() def _get_obs(self): diff --git a/src/gausskernel/dbmind/tools/xtuner/tuner/executor.py b/src/gausskernel/dbmind/tools/xtuner/tuner/executor.py index f7cb24680..f2c5634ed 100644 --- a/src/gausskernel/dbmind/tools/xtuner/tuner/executor.py +++ b/src/gausskernel/dbmind/tools/xtuner/tuner/executor.py @@ -276,7 +276,7 @@ class LocalExec(Executor): return [bytes2text(stdout), bytes2text(stderr)] else: # Pipeline does not support running in shell=False, so we run it with the 'bash -c' command. - split_cmd = ['bash', '-c', command] if command.find('|') >= 0 else shlex.split(command) + split_cmd = ['bash', '-c', command] if '|' in command or ';' in command else shlex.split(command) proc = subprocess.Popen(split_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, diff --git a/src/gausskernel/dbmind/tools/xtuner/tuner/knob.py b/src/gausskernel/dbmind/tools/xtuner/tuner/knob.py index 6dbb7e135..4178da9cb 100644 --- a/src/gausskernel/dbmind/tools/xtuner/tuner/knob.py +++ b/src/gausskernel/dbmind/tools/xtuner/tuner/knob.py @@ -48,10 +48,8 @@ class RecommendedKnobs: knob_list.extend(self._need_tune_knobs) knob_list.extend(self._only_report_knobs) for knob in knob_list: - if knob.type in ('int', 'float'): - knob_table.add_row([knob.name, knob.default, knob.min, knob.max, knob.restart]) - else: - knob_table.add_row([knob.name, knob.to_string(knob.default), knob.min, knob.max, knob.restart]) + row = (knob.name, knob.to_string(knob.current), knob.min, knob.max, knob.restart) + knob_table.add_row(row) print(knob_table) def dump(self, fp, dump_report_knobs=True): @@ -71,14 +69,16 @@ class RecommendedKnobs: def append_need_tune_knobs(self, *args): for knob in args: - if knob: - self._need_tune_knobs.append(knob) - self._tbl[knob.name] = knob + if knob is None: + continue + self._need_tune_knobs.append(knob) + self._tbl[knob.name] = knob def append_only_report_knobs(self, *args): for knob in args: - if knob: - self._only_report_knobs.append(knob) + if knob is None: + continue + self._only_report_knobs.append(knob) def names(self): return sorted(self._tbl.keys()) @@ -100,41 +100,46 @@ class RecommendedKnobs: class Knob: - def __init__(self, name, knob): + def __init__(self, name, **kwargs): """ Wrap a tuning knob and abstract it as a class. The second argument knob is dict type. Its fields include: - default: Int, float or bool type. + recommend: The knob value recommended by knob recommendation. + current: Normalized current value. + original: Str type. A setting value from `pg_settings` or user's configuration. min: Optional. Int, float type. max: Optional. Int, float type. type: String type. Constrained by Knob.TYPE. restart: Boolean type. - :param name: The name of a knob. - :param knob: The dict data-structure of a knob. + :param name: The name of the knob. + :param kwargs: A dict structure contains the knob's all fields. """ - if not isinstance(knob, dict): - raise TypeError - self.name = name - self.type = knob.get('type') - self.min = knob.get('min') - self.max = knob.get('max') - self.default = knob.get('default') - self.restart = knob.get('restart', False) + self.recommend = kwargs.get('recommend') + self.original = self.current = None + self.user_set = kwargs.get('default') + self._min = kwargs.get('min') + self._max = kwargs.get('max') + self.type = kwargs.get('type') + self.restart = kwargs.get('restart', False) + + if '' in (self.name, self.type): + raise ValueError("'name', and 'type' fields of knob are essential.") if self.type == 'bool': - self.min = 0 - self.max = 1 + self._min = 0 + self._max = 1 - self._scale = self.max - self.min + if str in (type(self._min), type(self._max)): + raise ValueError("'min', and 'max' fields of knob should not be str type.") - if self._scale < 0: - raise ValueError('Knob %s is incorrectly configured. ' - 'The max value must be greater than or equal to the min value.' % self.name) + # Refresh scale. + self._scale = None + self.fresh_scale() def to_string(self, val): - rv = val * self._scale + float(self.min) if self.type in ('int', 'float') else val + rv = self.denormalize(val) if self.type in ('int', 'float') else val if self.type == 'int': rv = str(int(round(rv))) elif self.type == 'bool': @@ -148,7 +153,7 @@ class Knob: def to_numeric(self, val): if self.type in ('float', 'int'): - rv = (float(val) - float(self.min)) / self._scale + rv = self.normalize(val) elif self.type == 'bool': rv = 0. if val == 'off' else 1. else: @@ -156,38 +161,63 @@ class Knob: return rv - @staticmethod - def new_instance(name, value_default, knob_type, value_min=0, value_max=1, restart=False): - if knob_type not in Knob.TYPE.ITEMS: - raise TypeError("The type of parameter 'knob_type' is incorrect.") + def fresh_scale(self): + if None in (self._min, self._max): + return - if knob_type == Knob.TYPE.INT: - value_default = int(value_default) - value_max = int(value_max) - value_min = int(value_min) - elif knob_type == Knob.TYPE.FLOAT: - value_default = float(value_default) - value_max = float(value_max) - value_min = float(value_min) - else: - if type(value_default) is not bool: - raise ValueError + self._scale = self._max - self._min + if self._scale < 0: + raise ValueError('Knob %s is incorrectly configured. ' + 'The max value must be greater than ' + 'or equal to the min value.' % self.name) + if type(self.user_set) is str: + self.current = self.to_numeric(self.user_set) + elif type(self.user_set) in (int, float): + self.user_set = str(self.user_set) + self.current = self.normalize(self.user_set) + elif self.recommend is not None: + self.current = self.normalize(self.recommend) - value_default = 1 if value_default else 0 - value_max = 1 - value_min = 0 + @property + def min(self): + return self._min - return Knob(name, - {'type': knob_type, 'restart': restart, - 'default': value_default, 'min': value_min, 'max': value_max}) + @min.setter + def min(self, val): + if val is None: + return + self._min = val + self.fresh_scale() + + @property + def max(self): + return self._max + + @max.setter + def max(self, val): + if val is None: + return + self._max = val + self.fresh_scale() + + def normalize(self, val): + return (float(val) - float(self._min)) / self._scale + + def denormalize(self, val): + return val * self._scale + float(self._min) def to_dict(self): return \ - {self.name: { - 'type': self.type, 'restart': self.restart, - 'default': self.default, - 'min': self.min, 'max': self.max - }} + {self.name: + { + 'type': self.type, + 'restart': self.restart, + 'default': self.user_set if self.user_set is not None else self.original, + 'recommend': self.to_string(self.current), + 'min': self._min, + 'max': self._max + } + } def __str__(self): return str(self.to_dict()) @@ -202,9 +232,10 @@ class Knob: def load_knobs_from_json_file(filename): knobs = RecommendedKnobs() - with open(filename) as fp: - for name, val in json.load(fp).items(): - val['name'] = name - knobs.append_need_tune_knobs(Knob(name=name, knob=val)) + with open(filename) as f: + for name, _dict in json.load(f).items(): + knobs.append_need_tune_knobs( + Knob(name=name, **_dict) + ) return knobs diff --git a/src/gausskernel/dbmind/tools/xtuner/tuner/main.py b/src/gausskernel/dbmind/tools/xtuner/tuner/main.py index c253a02a7..77eff02f3 100644 --- a/src/gausskernel/dbmind/tools/xtuner/tuner/main.py +++ b/src/gausskernel/dbmind/tools/xtuner/tuner/main.py @@ -12,9 +12,13 @@ EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. See the Mulan PSL v2 for more details. """ +from __future__ import print_function import argparse -import configparser +try: + import configparser +except ImportError: + import ConfigParser as configparser import json import os import sys @@ -23,8 +27,9 @@ from getpass import getpass from tuner.exceptions import OptionError from tuner.xtuner import procedure_main +from tuner import utils -__version__ = '2.0.0' +__version__ = '2.1.0' __description__ = 'X-Tuner: a self-tuning tool integrated by openGauss.' @@ -47,6 +52,15 @@ def check_path_valid(path): return True +def check_version(): + version_info = sys.version_info + major, minor = version_info.major, version_info.minor + # At least, the Python version is (3, 6) + if major < 3 or minor <= 5: + return False + return True + + def build_db_info(args): if args.db_config_file: if not check_path_valid(args.db_config_file): @@ -179,6 +193,10 @@ def get_config(filepath): raise OptionError(invalid_opt_msg % ('benchmark_script', benchmarks)) config['benchmark_path'] = cp['Benchmark'].get('benchmark_path', '') config['benchmark_cmd'] = cp['Benchmark'].get('benchmark_cmd', '') + benchmark_period = cp['Benchmark'].get('benchmark_period', '0') + if not benchmark_period: + benchmark_period = '0' + config['benchmark_period'] = int(benchmark_period) # Section Knobs scenario_opts = ['auto', 'ap', 'tp', 'htap'] @@ -220,6 +238,10 @@ def get_config(filepath): def main(): + if not check_version(): + print("FATAL: You should use at least Python 3.6 or above version.") + return -1 + parser = get_argv_parser() args = parser.parse_args() mode = args.mode @@ -228,7 +250,7 @@ def main(): parser.print_usage() return -1 - config = get_config(args.tuner_config_file) + utils.config = config = get_config(args.tuner_config_file) if not config: return -1 diff --git a/src/gausskernel/dbmind/tools/xtuner/tuner/recommend.py b/src/gausskernel/dbmind/tools/xtuner/tuner/recommend.py index fef2e7852..39c8e649d 100644 --- a/src/gausskernel/dbmind/tools/xtuner/tuner/recommend.py +++ b/src/gausskernel/dbmind/tools/xtuner/tuner/recommend.py @@ -12,6 +12,7 @@ EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. See the Mulan PSL v2 for more details. """ +import inspect from prettytable import PrettyTable @@ -29,55 +30,74 @@ SIZE_UNIT_MAP = {"kB": 1, "GB": 1024 * 1024} +def round4(v): + return int(v + (4 - v % 4)) + + +def instantiate_knob(name, recommend, knob_type, value_min=0, value_max=1, restart=False): + if knob_type not in Knob.TYPE.ITEMS: + raise TypeError("The type of parameter 'knob_type' is incorrect.") + + if knob_type == Knob.TYPE.INT: + recommend = int(recommend) + value_max = int(value_max) + value_min = int(value_min) + elif knob_type == Knob.TYPE.FLOAT: + recommend = float(recommend) + value_max = float(value_max) + value_min = float(value_min) + elif knob_type == Knob.TYPE.BOOL: + value_type = type(recommend) + if value_type is bool: + recommend = 1 if recommend else 0 + elif value_type is str and value_type in ('on', 'off'): + recommend = 1 if recommend == 'on' else 0 + elif value_type is int: + pass + else: + raise ValueError + + value_max = 1 + value_min = 0 + + return Knob(name, + recommend=recommend, + min=value_min, + max=value_max, + type=knob_type, + restart=restart) + + def recommend_knobs(mode, metric): advisor = OpenGaussKnobAdvisor(metric) + reporter = advisor.report knobs = RecommendedKnobs() if mode == "recommend": if metric.uptime < 1: - advisor.report.print_bad( + reporter.print_bad( "The database runs for a short period of time, and the database description may not be accumulated. " "The recommendation result may be inaccurate." ) elif metric.uptime < 12: - advisor.report.print_warn( + reporter.print_warn( "The database runs for a short period of time, and the database description may not be accumulated. " "The recommendation result may be inaccurate." ) - knobs.append_need_tune_knobs(advisor.shared_buffers, - advisor.max_connections, - advisor.max_prepared_transactions, - advisor.work_mem, - advisor.maintenance_work_mem, - advisor.effective_cache_size, - advisor.effective_io_concurrency, - advisor.wal_buffers, - advisor.random_page_cost, - advisor.default_statistics_target) + _, recommend_list = advisor.all_properties() + knobs.append_need_tune_knobs(*recommend_list) elif mode == "tune": - knobs.append_need_tune_knobs(advisor.random_page_cost, - advisor.effective_io_concurrency, - advisor.work_mem) - knobs.append_only_report_knobs(advisor.shared_buffers, - advisor.max_connections, - advisor.max_prepared_transactions, - advisor.maintenance_work_mem, - advisor.effective_cache_size, - advisor.wal_buffers, - advisor.default_statistics_target) + tune_knobs = ["random_page_cost", "effective_io_concurrency", "work_mem"] + tune_list, recommend_list = advisor.all_properties(tune_knobs) + knobs.append_need_tune_knobs(*tune_list) + knobs.append_only_report_knobs(*recommend_list) elif mode == "train": - knobs.append_need_tune_knobs(advisor.work_mem, - advisor.shared_buffers) - knobs.append_only_report_knobs(advisor.max_connections, - advisor.max_prepared_transactions, - advisor.maintenance_work_mem, - advisor.effective_cache_size, - advisor.effective_io_concurrency, - advisor.wal_buffers, - advisor.random_page_cost, - advisor.default_statistics_target) + tune_knobs = ["shared_buffers", "work_mem"] + tune_list, recommend_list = advisor.all_properties(tune_knobs) + knobs.append_need_tune_knobs(*tune_list) + knobs.append_only_report_knobs(*recommend_list) - knobs.report = advisor.report.generate + knobs.report = reporter.generate return knobs @@ -130,7 +150,57 @@ class OpenGaussKnobAdvisor: # Append metric and workload info to ReportMsg. self.report.print_info(self.metric.to_dict()) + def all_properties(self, tune_knobs=None): + members = inspect.getmembers( + self, lambda x: isinstance(x, Knob)) + if tune_knobs is None: + tune_knobs = [] + + tune_list = [] + recommend_list = [] + for name, value in members: + if name in tune_knobs: + tune_list.append(value) + else: + recommend_list.append(value) + return tune_list, recommend_list + # Allocation of memory or storage I/O resources. + @cached_property + def max_process_memory(self): + # max_process_memory unit is kB. + omega = 0.9 # retention corr + omega_min = 0.7 + total_mem = self.metric.os_mem_total + min_free_mem = self.metric.min_free_mem + nb_gaussdb = self.metric.nb_gaussdb + # This is a simplified formula that developer gave. + suitable_mem = round4((total_mem - min_free_mem) * omega / nb_gaussdb) + min_mem = round4((total_mem - min_free_mem) * omega_min / nb_gaussdb) + + if min_mem <= self.metric["max_process_memory"] <= suitable_mem: + self.report.print_info("We only found %s gaussdb process(es). " + "In this case, your 'max_process_memory' setting may be just fitting." + % self.metric.nb_gaussdb) + + if self.metric["max_process_memory"] > suitable_mem: + self.report.print_warn("We only found %s gaussdb process(es). " + "In this case, your 'max_process_memory' setting may be a bit large." + % self.metric.nb_gaussdb) + + if self.metric["max_process_memory"] < min_mem: + self.report.print_bad("We only found %s gaussdb process(es). " + "In this case, your 'max_process_memory' setting is heavily small." + % self.metric.nb_gaussdb) + + # Should always return the recommendation because other recommendations depend on it. + return instantiate_knob(name="max_process_memory", + recommend=suitable_mem, + knob_type=Knob.TYPE.INT, + value_max=suitable_mem, + value_min=min_mem, + restart=True) + @cached_property def max_connections(self): max_conn = self.metric["max_connections"] @@ -178,34 +248,34 @@ class OpenGaussKnobAdvisor: lower = max(20, cores * 3) recommend = clip(recommend, max(20, cores * 5), max(100, cores * 7)) - return Knob.new_instance(name="max_connections", - value_default=recommend, - knob_type=Knob.TYPE.INT, - value_max=upper, - value_min=lower, - restart=True) + return instantiate_knob(name="max_connections", + recommend=recommend, + knob_type=Knob.TYPE.INT, + value_max=upper, + value_min=lower, + restart=True) # Should be based on work_mem. - if self.metric.os_mem_total > 16 * SIZE_UNIT_MAP["GB"]: - remain_mem = self.metric.os_mem_total * 0.85 - self.shared_buffers.default - elif self.metric.os_mem_total > 8 * SIZE_UNIT_MAP["GB"]: - remain_mem = self.metric.os_mem_total * 0.75 - self.shared_buffers.default - elif self.metric.os_mem_total < 4 * SIZE_UNIT_MAP["GB"]: - remain_mem = self.metric.os_mem_total * 0.6 - self.shared_buffers.default + if self.max_process_memory.recommend > 16 * SIZE_UNIT_MAP["GB"]: + remain_mem = self.max_process_memory.recommend * 0.85 - self.shared_buffers.recommend + elif self.max_process_memory.recommend > 8 * SIZE_UNIT_MAP["GB"]: + remain_mem = self.max_process_memory.recommend * 0.75 - self.shared_buffers.recommend + elif self.max_process_memory.recommend < 4 * SIZE_UNIT_MAP["GB"]: + remain_mem = self.max_process_memory.recommend * 0.6 - self.shared_buffers.recommend else: - remain_mem = self.metric.os_mem_total * 0.7 - self.shared_buffers.default + remain_mem = self.max_process_memory.recommend * 0.7 - self.shared_buffers.recommend # AP and HTAP # The value of work_mem is adapted based on the value of max_connections. work_mem = max(self.metric["work_mem"], self.metric.temp_file_size * 4) lower = max(15, cores * 3) recommend = max(remain_mem / (work_mem + 0.01), lower) - return Knob.new_instance(name="max_connections", - value_default=recommend, - knob_type=Knob.TYPE.INT, - value_max=recommend * 2, - value_min=lower, - restart=True) + return instantiate_knob(name="max_connections", + recommend=recommend, + knob_type=Knob.TYPE.INT, + value_max=recommend * 2, + value_min=lower, + restart=True) @property def max_prepared_transactions(self): @@ -220,18 +290,18 @@ class OpenGaussKnobAdvisor: "indicating that the two-phase commit function is not used.") return - if max_pt < max_conn.default: + if max_pt < max_conn.recommend: self.report.print_bad("Most applications do not use XA prepared transactions, " "so should set the max_prepared_transactions to 0. " "If you do require prepared transactions, " "you should set this equal to max_connections to avoid blocking. " "May require increasing kernel memory parameters.") - return Knob.new_instance(name="max_prepared_transactions", - value_default=max_conn.default, - knob_type=Knob.TYPE.INT, - value_max=max_conn.max, - value_min=max_conn.min, - restart=True) + return instantiate_knob(name="max_prepared_transactions", + recommend=max_conn.recommend, + knob_type=Knob.TYPE.INT, + value_max=max_conn.max, + value_min=max_conn.min, + restart=True) @cached_property def shared_buffers(self): @@ -241,7 +311,7 @@ class OpenGaussKnobAdvisor: but because database also relies on the operating system cache, it is unlikely that an allocation of more than 40% of RAM to shared_buffers will work better than a smaller amount. """ - mem_total = self.metric.os_mem_total # unit: kB + mem_total = self.max_process_memory.recommend # unit: kB if mem_total < 1 * SIZE_UNIT_MAP['GB']: default = 0.15 * mem_total elif mem_total > 8 * SIZE_UNIT_MAP['GB']: @@ -250,37 +320,38 @@ class OpenGaussKnobAdvisor: default = 0.25 * mem_total # The value of this knob means the number of maximum cached blocks. - recommend = default / self.metric.block_size + recommend = round4(default / self.metric.block_size) if self.metric.is_64bit: database_blocks = self.metric.all_database_size / self.metric.block_size if database_blocks < recommend: self.report.print_warn("The total size of all databases is less than the memory size. " "Therefore, it is unnecessary to set shared_buffers to a large value.") - recommend = min(database_blocks, recommend) - upper = recommend * 1.15 - lower = min(0.15 * mem_total / self.metric.block_size, recommend) + recommend = round4(min(database_blocks, recommend)) + upper = round4(recommend * 1.15) + lower = round4(min(0.15 * mem_total / self.metric.block_size, recommend)) - return Knob.new_instance(name="shared_buffers", - value_default=recommend, - knob_type=Knob.TYPE.INT, - value_max=upper, - value_min=lower, - restart=True) + return instantiate_knob(name="shared_buffers", + recommend=recommend, + knob_type=Knob.TYPE.INT, + value_max=upper, + value_min=lower, + restart=True) else: - upper = min(recommend, 2 * SIZE_UNIT_MAP["GB"] / self.metric.block_size) # 32-bit OS only can use 2 GB mem. - lower = min(0.15 * mem_total / self.metric.block_size, recommend) - return Knob.new_instance(name="shared_buffers", - value_default=recommend, - knob_type=Knob.TYPE.INT, - value_max=upper, - value_min=lower, - restart=True) + upper = round4( + min(recommend, 2 * SIZE_UNIT_MAP["GB"] / self.metric.block_size)) # 32-bit OS only can use 2 GB mem. + lower = round4(min(0.15 * mem_total / self.metric.block_size, recommend)) + return instantiate_knob(name="shared_buffers", + recommend=recommend, + knob_type=Knob.TYPE.INT, + value_max=upper, + value_min=lower, + restart=True) @property def work_mem(self): temp_file_size = self.metric.temp_file_size - max_conn = self.max_connections.default + max_conn = self.max_connections.recommend # This knob does not need to be modified. if temp_file_size < 16 * SIZE_UNIT_MAP["MB"]: @@ -296,25 +367,25 @@ class OpenGaussKnobAdvisor: return # conservative operations - recommend = (self.metric.os_mem_total - self.shared_buffers.default) / (max_conn * 2) + recommend = (self.max_process_memory.recommend - self.shared_buffers.recommend) / (max_conn * 2) upper = max(recommend, 256 * SIZE_UNIT_MAP["MB"]) lower = min(recommend, 64 * SIZE_UNIT_MAP["MB"]) - return Knob.new_instance(name="work_mem", - value_default=recommend, - knob_type=Knob.TYPE.INT, - value_max=upper, - value_min=lower, - restart=False) + return instantiate_knob(name="work_mem", + recommend=recommend, + knob_type=Knob.TYPE.INT, + value_max=upper, + value_min=lower, + restart=False) else: - recommend = (self.metric.os_mem_total - self.shared_buffers.default) / max_conn + recommend = (self.max_process_memory.recommend - self.shared_buffers.recommend) / max_conn upper = max(recommend, 1 * SIZE_UNIT_MAP["GB"]) lower = min(recommend, 64 * SIZE_UNIT_MAP["MB"]) - return Knob.new_instance(name="work_mem", - value_default=recommend, - knob_type=Knob.TYPE.INT, - value_max=upper, - value_min=lower, - restart=False) + return instantiate_knob(name="work_mem", + recommend=recommend, + knob_type=Knob.TYPE.INT, + value_max=upper, + value_min=lower, + restart=False) @property def maintenance_work_mem(self): @@ -322,23 +393,23 @@ class OpenGaussKnobAdvisor: @property def effective_cache_size(self): - upper = self.metric.os_mem_total * 0.75 - lower = self.shared_buffers.default + upper = self.max_process_memory.recommend * 0.75 + lower = self.shared_buffers.recommend if self.metric.workload_type == WORKLOAD_TYPE.TP: - return Knob.new_instance(name="effective_cache_size", - value_default=lower, - knob_type=Knob.TYPE.INT, - value_max=upper, - value_min=lower, - restart=False) + return instantiate_knob(name="effective_cache_size", + recommend=lower, + knob_type=Knob.TYPE.INT, + value_max=upper, + value_min=lower, + restart=False) else: - return Knob.new_instance(name="effective_cache_size", - value_default=upper, - knob_type=Knob.TYPE.INT, - value_max=upper, - value_min=lower, - restart=False) + return instantiate_knob(name="effective_cache_size", + recommend=upper, + knob_type=Knob.TYPE.INT, + value_max=upper, + value_min=lower, + restart=False) @property def effective_io_concurrency(self): @@ -346,19 +417,19 @@ class OpenGaussKnobAdvisor: if 0 <= self.metric["effective_io_concurrency"] <= 2: # No need for recommendation. return - return Knob.new_instance(name="effective_io_concurrency", - value_default=2, - knob_type=Knob.TYPE.INT, - value_max=4, - value_min=0, - restart=False) + return instantiate_knob(name="effective_io_concurrency", + recommend=2, + knob_type=Knob.TYPE.INT, + value_max=4, + value_min=0, + restart=False) else: - return Knob.new_instance(name="effective_io_concurrency", - value_default=200, - knob_type=Knob.TYPE.INT, - value_max=250, - value_min=150, - restart=False) + return instantiate_knob(name="effective_io_concurrency", + recommend=200, + knob_type=Knob.TYPE.INT, + value_max=250, + value_min=150, + restart=False) # Background writer. @property @@ -370,53 +441,55 @@ class OpenGaussKnobAdvisor: blocks_16m = 16 * SIZE_UNIT_MAP["MB"] / self.metric.block_size # Generally, this value is sufficient. A large value does not bring better performance. if wal_buffers >= blocks_16m: - if wal_buffers > self.shared_buffers.default * 1 / 32: + if wal_buffers > self.shared_buffers.recommend * 1 / 32: self.report.print_bad( - "The value of wal_buffers is too high. Generally, a large value does not bring better performance.") - return Knob.new_instance(name="wal_buffers", - value_default=self.shared_buffers.default * 1 / 32, - knob_type=Knob.TYPE.INT, - value_max=max(self.shared_buffers.default * 1 / 32, blocks_16m), - value_min=min(self.shared_buffers.default * 1 / 64, blocks_16m), - restart=True) + "The value of wal_buffers is too high. Generally, " + "a large value does not bring better performance.") + return instantiate_knob(name="wal_buffers", + recommend=self.shared_buffers.recommend * 1 / 32, + knob_type=Knob.TYPE.INT, + value_max=max(self.shared_buffers.recommend * 1 / 32, blocks_16m), + value_min=min(self.shared_buffers.recommend * 1 / 64, blocks_16m), + restart=True) else: self.report.print_warn( - "The value of wal_buffers is a bit high. Generally, an excessively large value does not bring " + "The value of wal_buffers is a bit high. Generally, " + "an excessively large value does not bring " "better performance. You can also set this parameter to -1. " "The database automatically performs adaptation. " ) - return Knob.new_instance(name="wal_buffers", - value_default=self.shared_buffers.default * 1 / 32, - knob_type=Knob.TYPE.INT, - value_max=self.shared_buffers.default * 1 / 32, - value_min=blocks_16m, - restart=True) - elif wal_buffers < self.shared_buffers.default * 1 / 64: - return Knob.new_instance(name="wal_buffers", - value_default=-1, - knob_type=Knob.TYPE.INT, - value_max=-1, - value_min=-1, - restart=True) + return instantiate_knob(name="wal_buffers", + recommend=self.shared_buffers.recommend * 1 / 32, + knob_type=Knob.TYPE.INT, + value_max=self.shared_buffers.recommend * 1 / 32, + value_min=blocks_16m, + restart=True) + elif wal_buffers < self.shared_buffers.recommend * 1 / 64: + return instantiate_knob(name="wal_buffers", + recommend=-1, + knob_type=Knob.TYPE.INT, + value_max=-1, + value_min=-1, + restart=True) # Optimizer @property def random_page_cost(self): if self.metric.is_hdd: # Currently, with the rise of storage technology, the default value of 4 is too large. - return Knob.new_instance(name="random_page_cost", - value_default=3, - knob_type=Knob.TYPE.FLOAT, - value_max=3, - value_min=2, - restart=False) + return instantiate_knob(name="random_page_cost", + recommend=3, + knob_type=Knob.TYPE.FLOAT, + value_max=3, + value_min=2, + restart=False) else: - return Knob.new_instance(name="random_page_cost", - value_default=1, - knob_type=Knob.TYPE.FLOAT, - value_max=2, - value_min=1, - restart=False) + return instantiate_knob(name="random_page_cost", + recommend=1, + knob_type=Knob.TYPE.FLOAT, + value_max=2, + value_min=1, + restart=False) @property def default_statistics_target(self): @@ -443,12 +516,12 @@ class OpenGaussKnobAdvisor: recommend = 600 else: recommend = 800 - return Knob.new_instance(name="default_statistics_target", - value_default=recommend, - knob_type=Knob.TYPE.INT, - value_max=1000, - value_min=100, - restart=False) + return instantiate_knob(name="default_statistics_target", + recommend=recommend, + knob_type=Knob.TYPE.INT, + value_max=1000, + value_min=100, + restart=False) elif workload_type == WORKLOAD_TYPE.TP: if read_write_ratio < 0.5: recommend = 10 @@ -459,16 +532,52 @@ class OpenGaussKnobAdvisor: else: recommend = 100 - return Knob.new_instance(name="default_statistics_target", - value_default=recommend, - knob_type=Knob.TYPE.INT, - value_max=150, - value_min=10, - restart=False) + return instantiate_knob(name="default_statistics_target", + recommend=recommend, + knob_type=Knob.TYPE.INT, + value_max=150, + value_min=10, + restart=False) else: - return Knob.new_instance(name="default_statistics_target", - value_default=100, - knob_type=Knob.TYPE.INT, - value_max=300, - value_min=80, - restart=False) + return instantiate_knob(name="default_statistics_target", + recommend=100, + knob_type=Knob.TYPE.INT, + value_max=300, + value_min=80, + restart=False) + + @property + def enable_nestloop(self): + if self.metric.workload_type != WORKLOAD_TYPE.TP and self.metric['enable_nestloop'] == 'on': + self.report.print_warn("Detect that your appointed workload does not seem to a TP workload, " + "hence disable enable_nestloop is better.") + return instantiate_knob(name="enable_nestloop", + recommend=0, + knob_type=Knob.TYPE.BOOL, + value_max=1, + value_min=0, + restart=False) + + @property + def enable_mergejoin(self): + if self.metric.workload_type != WORKLOAD_TYPE.TP and self.metric['enable_mergejoin'] == 'on': + self.report.print_warn("Detect that your appointed workload does not seem to a TP workload, " + "hence disable enable_mergejoin is better.") + return instantiate_knob(name="enable_mergejoin", + recommend=0, + knob_type=Knob.TYPE.BOOL, + value_max=1, + value_min=0, + restart=False) + + @property + def enable_hashjoin(self): + if self.metric['enable_hashjoin'] == 'off': + self.report.print_warn("Detect that you disabled enable_hashjoin. " + "We suggest that if there is no special reason, please enable enable_hashjoin.") + return instantiate_knob(name="enable_hashjoin", + recommend=1, + knob_type=Knob.TYPE.BOOL, + value_max=1, + value_min=0, + restart=False) diff --git a/src/gausskernel/dbmind/tools/xtuner/tuner/recorder.py b/src/gausskernel/dbmind/tools/xtuner/tuner/recorder.py index ed0ba00cb..e77138b5e 100644 --- a/src/gausskernel/dbmind/tools/xtuner/tuner/recorder.py +++ b/src/gausskernel/dbmind/tools/xtuner/tuner/recorder.py @@ -13,60 +13,71 @@ MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. See the Mulan PSL v2 for more details. """ import logging +import csv + +DELIMITER = "," class Recorder: - def __init__(self, filepath, verbose=True): + def __init__(self, filepath): """ Record each tuning process and write it to a file. """ - logger = logging.getLogger('recorder') - fmt = logging.Formatter("%(asctime)s: %(message)s") + self._fd = open(filepath, 'w', newline='') + self.writer = csv.writer(self._fd, delimiter=DELIMITER, + quotechar='\\', quoting=csv.QUOTE_MINIMAL) - file_handler = logging.FileHandler(filepath, mode='w') # no appending - file_handler.setFormatter(fmt) - logger.addHandler(file_handler) - - if verbose: - stream_handler = logging.StreamHandler() - stream_handler.setFormatter(fmt) - logger.addHandler(stream_handler) - - logger.setLevel(logging.INFO) - - self.logger = logger - self.logger.info('Recorder is starting.') + # Record the information of best knobs. + self.best_id = None + self.names = None + self.best_values = None self.best_reward = None - self.count = 0 + + self.current_id = 0 def prompt_message(self, msg, *args, **kwargs): - self.logger.info('[%d] ' % self.count + msg, *args, **kwargs) - logging.info('[Recorder %d]: ' + msg, self.count, *args, **kwargs) + logging.info("[Recorder %d]: " + msg, self.current_id, *args, **kwargs) - def record(self, reward, knobs): + def record(self, score, used_mem, reward, names, values): """ Record the reward value and knobs of the step, and update the maximum reward value and the corresponding knobs. + + :values: A list contains each knob value. The knob value is str type, not denormalized numeric. """ - record = (reward, knobs) - if self.best_reward is None: - self.best_reward = record - else: - self.best_reward = max(record, self.best_reward, key=lambda r: r[0]) + _names = tuple(names) + _values = tuple(values) + if self.current_id == 0: + header = ("id",) + _names + ("score", "used_mem", "reward", "best_reward", "best_id") + self.writer.writerow(header) + self.best_id = 0 + self.names = names + self.best_values = values + self.best_reward = reward - self.logger.info('[%d] Current reward is %f, knobs: %s.', self.count, reward, knobs) - self.logger.info('[%d] Best reward is %f, knobs: %s.', self.count, self.best_reward[0], self.best_reward[1]) + if reward >= self.best_reward: + self.best_id = self.current_id + self.best_values = values + self.best_reward = reward - self.count += 1 + record = (self.current_id,) + _values + (score, used_mem, reward, self.best_reward, self.best_id) + self.writer.writerow(record) + self._fd.flush() + + self.current_id += 1 def give_best(self, rk): """ Give the knobs with the maximum reward value to the parameter RecommendKnobs (RK) object. So that RK can update itself with the passed knobs. """ - reward, best_knobs = self.best_reward - self.logger.info('The tuning process is complete. The best reward is %f, best knobs are:\n%s.', - reward, best_knobs) + logging.info("The tuning process is finished. The best reward is %f, and best knobs (%s) are %s.", + self.best_reward, self.names, self.best_values) - for name, setting in best_knobs.items(): - rk[name].default = setting + for name, value in zip(self.names, self.best_values): + knob = rk[name] + knob.current = knob.to_numeric(value) # self.current is always a normalized numeric. + + def __del__(self): + self._fd.flush() + self._fd.close() diff --git a/src/gausskernel/dbmind/tools/xtuner/tuner/utils.py b/src/gausskernel/dbmind/tools/xtuner/tuner/utils.py index 291be0965..8cc8f87e6 100644 --- a/src/gausskernel/dbmind/tools/xtuner/tuner/utils.py +++ b/src/gausskernel/dbmind/tools/xtuner/tuner/utils.py @@ -14,17 +14,22 @@ See the Mulan PSL v2 for more details. """ import os +import re +BLANK = " " RED_FMT = "\033[31;1m{}\033[0m" GREEN_FMT = "\033[32;1m{}\033[0m" YELLOW_FMT = "\033[33;1m{}\033[0m" WHITE_FMT = "\033[37;1m{}\033[0m" +config = None + class cached_property: """ A decorator for caching properties in classes. """ + def __init__(self, func): self.func = func @@ -65,3 +70,54 @@ def construct_dividing_line(title='', padding='-'): return padding * term_width else: return padding * side_width + ' ' + title + ' ' + padding * side_width + + +def to_tuples(text): + lines = text.splitlines() + separator_location = -1 + for i, line in enumerate(lines): + # Find separator line such as '-----+-----+------'. + if re.match(r'^\s*?[-|+]+\s*$', line): + separator_location = i + break + + if separator_location < 0: + return [] + + separator = lines[separator_location] + left = 0 + right = len(separator) + locations = list() + while left < right: + try: + location = separator.index('+', left, right) + except ValueError: + break + locations.append(location) + left = location + 1 + # Record each value start location and end location. + pairs = list(zip([0] + locations, locations + [right])) + tuples = [] + row = [] + wrap_flag = False + # Continue to parse each line. + for line in lines[separator_location + 1:]: + # Prevent from parsing bottom lines. + if len(line.strip()) == 0 or re.match(r'\(\d+ rows?\)', line): + continue + # Parse a record to tuple. + if wrap_flag: + row[-1] += line[pairs[-1][0] + 1: pairs[-1][1]].strip() + else: + for start, end in pairs: + # Increase 1 to start index to go over vertical bar (|). + row.append(line[start + 1: end].strip()) + + if len(line) == right and re.match(r'.*\s*\+$', line): + wrap_flag = True + row[-1] = row[-1].strip('+').strip(BLANK) + BLANK + else: + tuples.append(tuple(row)) + row = [] + wrap_flag = False + return tuples diff --git a/src/gausskernel/dbmind/tools/xtuner/tuner/xtuner.conf b/src/gausskernel/dbmind/tools/xtuner/tuner/xtuner.conf index 8fafb1a42..e03f03a73 100644 --- a/src/gausskernel/dbmind/tools/xtuner/tuner/xtuner.conf +++ b/src/gausskernel/dbmind/tools/xtuner/tuner/xtuner.conf @@ -15,7 +15,7 @@ logfile = log/opengauss_tuner.log output_tuning_result = tuned_knobs.json verbose = on -recorder_file = log/recorder.log +recorder_file = log/recorder.csv tune_strategy = auto # rl, gop or auto drop_cache = on # You must modify the permission of the login user in the /etc/sudoers file and grant the NOPASSWD permission to the user. used_mem_penalty_term = 1e-9 # Prevent taking up more memory. @@ -45,12 +45,26 @@ particle_nums = 3 # A larger value indicates higher accuracy but slower speed. [Benchmark] # Some examples of benchmark script implementation are provided in the benchmark directory. # Implement the benchmark script based on the specified interface. -benchmark_script = tpch -# These parameters are used to replace the path and cmd in the benchmark script. +benchmark_script = period # Build-in benchmarks: period, tpcc, tpch, tpcds, sysbench (not tested) + +# The following parameters are used to replace the path and cmd in the benchmark script. # The path and cmd variables in the benchmark script are examples. You can set the parameters # by referring to the benchmark script file in the benchmark directory. -benchmark_path = # If this parameter is blank, the default path in the benchmark script is used. -benchmark_cmd = # If this parameter is blank, the default cmd in the benchmark script is used. + +# If this parameter is blank, the default path in the benchmark script is used. +# For build-in benchmarks, only 'period' does not need it. +benchmark_path = +# If this parameter is blank, the default cmd in the benchmark script is used. +# For build-in benchmarks, only 'tpcc' needs it. +benchmark_cmd = + +# Optional parameter. Only for 'period'. +# By appointment this parameter, use can measure the performance of period task. +# The unit of this parameter is second. +# The default parameter is 60 seconds, which means that the cycle of the workload is 60 seconds. +# This value needs to be greater than or equal to the cycle of the workload itself, +# try not to be less than, otherwise it will easily lead to judgment distortion. +benchmark_period = 60 #------------------------------------------------------------------------------ # Tuning Knobs Configurations diff --git a/src/gausskernel/dbmind/tools/xtuner/tuner/xtuner.py b/src/gausskernel/dbmind/tools/xtuner/tuner/xtuner.py index ac98c5315..2f41b003e 100644 --- a/src/gausskernel/dbmind/tools/xtuner/tuner/xtuner.py +++ b/src/gausskernel/dbmind/tools/xtuner/tuner/xtuner.py @@ -15,6 +15,7 @@ See the Mulan PSL v2 for more details. import logging import os +import signal from logging import handlers from tuner import benchmark @@ -47,6 +48,7 @@ def prompt_restart_risks(): def set_logger(filename): logger = logging.getLogger() + logger.setLevel(logging.INFO) dirname = os.path.dirname(filename) if not os.path.exists(dirname): @@ -100,6 +102,7 @@ def procedure_main(mode, db_info, config): logging.info("Configurations: %s.", config) if config['tuning_list'].strip() != '' and mode != 'recommend': + print("You have configured the tuning list, so use this list to tune.") knobs = load_knobs_from_json_file(config['tuning_list']) else: print("Start to recommend knobs. Just a moment, please.") @@ -123,16 +126,32 @@ def procedure_main(mode, db_info, config): mem_penalty=config['used_mem_penalty_term']) env.set_tuning_knobs(knobs) + print('The benchmark will start to run iteratively. ' + 'This process may take a long time. Please wait a moment.') if mode == 'train': rl_model('train', env, config) elif mode == 'tune': - if config['tune_strategy'] == 'rl': - rl_model('tune', env, config) - elif config['tune_strategy'] == 'gop': - global_search(env, config) - else: - raise ValueError('Incorrect tune strategy: %s.' % config['tune_strategy']) + # Run once the performance under the default knob configuration. + # Its id is 0, aka the first one. + original_knobs = db_agent.get_default_normalized_vector() + env.step(original_knobs) + try: + if config['tune_strategy'] == 'rl': + rl_model('tune', env, config) + elif config['tune_strategy'] == 'gop': + global_search(env, config) + else: + raise ValueError('Incorrect tune strategy: %s.' % config['tune_strategy']) + + except KeyboardInterrupt: + signal.signal(signal.SIGINT, signal.SIG_IGN) + print("Trigger an interrupt via the keyboard. " + "Continue to generate current tuning results.") + + # Rollback/reset to the original/initial knobs while the tuning process is finished. + db_agent.set_knob_normalized_vector(original_knobs) + # Modify the variable `knobs` with tuned result. recorder.give_best(knobs) else: raise ValueError('Incorrect mode value: %s.' % mode) @@ -208,7 +227,7 @@ def global_search(env, config): from tuner.algorithms.pso import Pso def performance_function(v): - s, r, d, _ = env.step(v, False) + s, r, d, _ = env.step(v) return -r # Use -reward because PSO wishes to minimize. pso = Pso(