chore (xtuner): enhance xtuner's stability and add several new features and fix some bugs

Modifications:
    fix (xtuner): easy to install dependencies
    fix (xtuner): refactor exec_statement
    fix (xtuner): change recorder to csv format and run the initial knobs
    fix (xtuner): fix the bug that shared_buffers has no round
    fix (xtuner): change round4() to return a int value and apply it to max_process_memory
    fix (xtuner): remove out default field and repalce with recommend field
    feat (xtuner): support period benchmark
    feat (xtuner): support Python version check
    feat (xtuner): support to recommend more knobs
    feat (xtuner): add a new function that can traverse all properties from Advisor
    refactor (xtuner): refactor some benchmark driver scripts
This commit is contained in:
w00464751
2021-05-08 15:56:32 +08:00
committed by wangtq
parent 4cfb4cdbc5
commit 7aa8f7aafa
23 changed files with 797 additions and 371 deletions

View File

@ -40,13 +40,32 @@ python3 main.py --help
## Dependencies
If you use a Python runtime that OS comes with, you should install Python SDK, such as:
sudo yum install python3-devel
You should install the following mathematical libraries to your OS, so that some Python libraries can import them.
For CentOS-based OS (e.g., Redhat, CentOS, EulerOS):
sudo yum install lapack lapack-devel blas blas-devel
For Debian-based OS (e.g., Ubuntu, KaliLinux):
sudo apt-get install gfortran libopenblas-dev liblapack-dev
You should install the following dependencies by python-pip.
paramiko
bayesian-optimization
ptable
If you want to use deep learning, you should also install the following libraries:
tensorflow>=2.2.0
keras-rl2
keras>=2.4.0
paramiko
bayesian-optimization
ptable
Note: Firstly, please upgrade your pip: ```python -m pip install --upgrade pip```

View File

@ -0,0 +1,6 @@
cryptography==2.5
paramiko==2.7.2
numpy==1.16.5
scipy==1.6.0
bayesian-optimization
ptable

View File

@ -0,0 +1,5 @@
tensorflow >= 2.1.0 # optional
keras-rl2~=1.0.4 # optional
paramiko==2.7.2
bayesian-optimization
ptable

View File

@ -1,5 +0,0 @@
tensorflow>=2.2.0
keras-rl2
paramiko
bayesian-optimization
ptable

View File

@ -14,18 +14,34 @@ See the Mulan PSL v2 for more details.
"""
import os
import sys
import platform
from setuptools import setup, find_packages
def read_requirements():
"""Parse requirements.txt."""
filepath = os.path.join('.', 'requirements.txt')
if 'aarch64' in platform.uname().machine:
filepath = os.path.join('.', 'requirements-aarch64.txt')
else:
filepath = os.path.join('.', 'requirements-x86.txt')
with open(filepath, 'r') as f:
requirements = [_line.rstrip() for _line in f]
requirements.reverse()
return requirements
def check_version():
version_info = sys.version_info
major, minor = version_info.major, version_info.minor
# At least, the Python version is (3, 6)
if major < 3 or minor <= 5:
return False
return True
if not check_version():
print("Requires Python >= 3.6")
exit(-1)
# Read the package information from the main.py.
pkginfo = dict()

View File

@ -15,7 +15,7 @@
logfile = log/opengauss_tuner.log
output_tuning_result = tuned_knobs.json
verbose = on
recorder_file = log/recorder.log
recorder_file = log/recorder.csv
tune_strategy = auto # rl, gop or auto
drop_cache = on # You must modify the permission of the login user in the /etc/sudoers file and grant the NOPASSWD permission to the user.
used_mem_penalty_term = 1e-9 # Prevent taking up more memory.
@ -45,12 +45,26 @@ particle_nums = 3 # A larger value indicates higher accuracy but slower speed.
[Benchmark]
# Some examples of benchmark script implementation are provided in the benchmark directory.
# Implement the benchmark script based on the specified interface.
benchmark_script = tpch
# These parameters are used to replace the path and cmd in the benchmark script.
benchmark_script = period # Build-in benchmarks: period, tpcc, tpch, tpcds, sysbench (not tested)
# The following parameters are used to replace the path and cmd in the benchmark script.
# The path and cmd variables in the benchmark script are examples. You can set the parameters
# by referring to the benchmark script file in the benchmark directory.
benchmark_path = # If this parameter is blank, the default path in the benchmark script is used.
benchmark_cmd = # If this parameter is blank, the default cmd in the benchmark script is used.
# If this parameter is blank, the default path in the benchmark script is used.
# For build-in benchmarks, only 'period' does not need it.
benchmark_path =
# If this parameter is blank, the default cmd in the benchmark script is used.
# For build-in benchmarks, only 'tpcc' needs it.
benchmark_cmd =
# Optional parameter. Only for 'period'.
# By appointment this parameter, use can measure the performance of period task.
# The unit of this parameter is second.
# The default parameter is 60 seconds, which means that the cycle of the workload is 60 seconds.
# This value needs to be greater than or equal to the cycle of the workload itself,
# try not to be less than, otherwise it will easily lead to judgment distortion.
benchmark_period = 60
#------------------------------------------------------------------------------
# Tuning Knobs Configurations

View File

@ -16,6 +16,8 @@ See the Mulan PSL v2 for more details.
import numpy as np
PATTERN = '|\t%s\t|\t%s\t|\t%s\t|'
class Particle:
def __init__(self, position, velocity, best_position, fitness):
self.position = position
@ -94,13 +96,16 @@ class Pso:
self.best_position = particle.position
def update_one_step(self):
for particle in self.particles:
for i, particle in enumerate(self.particles):
self._update_velocity(particle) # update velocity
self._update_position(particle) # update position
print(PATTERN % ('%s-%s' % (self.iteration_count, i), self.best_fitness, self.best_position), flush=True)
self.iteration_count += 1
self.fitness_val_list.append(self.best_fitness)
def minimize(self):
# Print a header
print(PATTERN % ('iter', 'best_fitness', 'best_position'), flush=True)
for i in range(self.max_iteration):
self.update_one_step()
return self.best_fitness, self.best_position

View File

@ -38,7 +38,7 @@ def get_benchmark_instance(script, path, cmd, db_info):
bm = importlib.import_module('tuner.benchmark.{}'.format(name))
# Verify the validity of the benchmark script.
# An exception will be thrown if benchmark instance does not have specified attributes.
if (not getattr(bm, 'path', False)) or (not getattr(bm, 'cmd', False)) or (not getattr(bm, 'run', False)):
if not getattr(bm, 'run', False):
raise ConfigureError('The benchmark script %s is invalid. '
'For details, see the example template and description document.' % script)
# Check whether function run exists and whether its type matches.
@ -64,6 +64,10 @@ def get_benchmark_instance(script, path, cmd, db_info):
# Wrap remote server shell as an API and pass it to benchmark instance.
def wrapper(server_ssh):
return bm.run(server_ssh, local_ssh)
try:
return bm.run(server_ssh, local_ssh)
except Exception as e:
logging.warning("An error occured while running the benchmark, hence the benchmark score is 0. The error is %s.", e, exc_info=True)
return .0
return wrapper

View File

@ -0,0 +1,49 @@
"""
Copyright (c) 2020 Huawei Technologies Co.,Ltd.
openGauss is licensed under Mulan PSL v2.
You can use this software according to the terms and conditions of the Mulan PSL v2.
You may obtain a copy of Mulan PSL v2 at:
http://license.coscl.org.cn/MulanPSL2
THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
See the Mulan PSL v2 for more details.
"""
import sys
import time
import logging
from tuner import utils
from tuner.exceptions import ExecutionError
path = ''
# Measure current total committed transactions that do not include xact_rollback.
cmd = "gsql -U {user} -W {password} -d postgres -p {port} -c " \
"\"SELECT sum(xact_commit) FROM pg_stat_database where datname = '{db}';\""
# This script captures the performance indicators in the user's periodic execution task, and measures the quality
# of the tuning results by measuring the range of changes in the indicators.
def run(remote_server, local_host) -> float:
wait_seconds = utils.config['benchmark_period']
if not wait_seconds:
print("Not configured the parameter 'benchmark_period' in the configuration file.",
file=sys.stderr)
exit(-1)
stdout, stderr = remote_server.exec_command_sync(cmd)
if len(stderr) > 0:
raise ExecutionError(stderr)
prev_txn = int(utils.to_tuples(stdout)[0][0])
time.sleep(wait_seconds)
stdout, stderr = remote_server.exec_command_sync(cmd)
if len(stderr) > 0:
raise ExecutionError(stderr)
current_txn = int(utils.to_tuples(stdout)[0][0])
# Return TPS in this period.
return (current_txn - prev_txn) / wait_seconds

View File

@ -13,6 +13,10 @@ MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
See the Mulan PSL v2 for more details.
"""
import os
import sys
import shlex
from tuner.exceptions import ExecutionError
# WARN: You need to download the benchmark-sql test tool to the system,
@ -38,9 +42,28 @@ def run(remote_server, local_host):
:return: benchmark score, higher one must be better, be sure to keep in mind.
"""
# Benchmark can be deployed on a remote server or a local server.
# The process of generating the final report of the Benchmarksql-5.0 is separate from the test process.
# Therefore, the `sleep` command needs to be added to wait to prevent the process from exiting prematurely.
stdout, stderr = remote_server.exec_command_sync(['cd %s' % path, 'rm -rf benchmarksql-error.log', cmd, 'sleep 3'])
# Here we set the terminal as a remote server.
terminal = remote_server
err_logfile = os.path.join(path, 'benchmarksql-error.log')
cmd_files = shlex.split(cmd)
if len(cmd_files) != 2:
print('Invalid configuration parameter `benchmark_cmd`. '
'You should check the item in the configuration file.', file=sys.stderr)
exit(-1)
# Check whether these files exist.
shell_file, conf_file = cmd_files
shell_file = os.path.join(path, shell_file)
conf_file = os.path.join(path, conf_file)
_, stderr1 = terminal.exec_command_sync('ls %s' % shell_file)
_, stderr2 = terminal.exec_command_sync('ls %s' % conf_file)
if len(stderr1) > 0 or len(stderr2) > 0:
print('You should correct the parameter `benchmark_path` that the path contains several executable SQL files '
'in the configuration file.')
exit(-1)
# Clean log file
terminal.exec_command_sync('rm -rf %s' % err_logfile)
# Run benchmark
stdout, stderr = terminal.exec_command_sync('cd %s; %s %s' % (path, shell_file, conf_file))
if len(stderr) > 0:
raise ExecutionError(stderr)
@ -51,7 +74,7 @@ def run(remote_server, local_host):
if "(NewOrders)" in st:
tpmC = split_string[i + 2]
break
stdout, stderr = remote_server.exec_command_sync(
stdout, stderr = terminal.exec_command_sync(
"cat %s/benchmarksql-error.log" % path)
nb_err = stdout.count("ERROR:") # Penalty term.
return float(tpmC) - 10 * nb_err # You can modify the penalty factor.

View File

@ -12,25 +12,27 @@ EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
See the Mulan PSL v2 for more details.
"""
import os
import sys
import time
# WARN: You need to import data into the database and SQL statements in the following path will be executed.
# The program automatically collects the total execution duration of these SQL statements.
path = '/path/to/tpcds/queries' # modify this path
cmd = "gsql -U {user} -W {password} -d {db} -p {port} -f {file}"
path = '/path/to/tpcds/queries' # modify this path which contains benchmark SQL files.
cmd = "find %s -type f -name '*.sql' -exec gsql -U {user} -W {password} -d {db} -p {port} -f {} > /dev/null \\;"
def run(remote_server, local_host):
find_file_cmd = "find . -type f -name '*.sql'"
stdout, stderr = remote_server.exec_command_sync(['cd %s' % path, find_file_cmd])
if len(stderr) > 0:
raise Exception(stderr)
files = stdout.strip().split('\n')
time_start = time.time()
for file in files:
perform_cmd = cmd.format(file=file)
stdout, stderr = remote_server.exec_command_sync(['cd %s' % path, perform_cmd])
if len(stderr) > 0:
print(stderr)
# Check whether the path is valid.
stdout, stderr = remote_server.exec_command_sync('ls %s' % path)
if len(stderr) > 0:
print('You should correct the parameter `benchmark_path` that the path contains several executable SQL files '
'in the configuration file.')
exit(-1)
stdout, stderr = remote_server.exec_command_sync(cmd % path)
if len(stderr) > 0:
print(stderr, file=sys.stderr)
cost = time.time() - time_start
return - cost

View File

@ -12,27 +12,29 @@ EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
See the Mulan PSL v2 for more details.
"""
import os
import sys
import time
from tuner.exceptions import ExecutionError
# WARN: You need to import data into the database and SQL statements in the following path will be executed.
# The program automatically collects the total execution duration of these SQL statements.
path = '/path/to/tpch/queries' # modify this path
cmd = "gsql -U {user} -W {password} -d {db} -p {port} -f {file}"
path = '/path/to/tpch/queries' # modify this path which contains benchmark SQL files.
cmd = "find %s -type f -name '*.sql' -exec gsql -U {user} -W {password} -d {db} -p {port} -f {} > /dev/null \\;"
def run(remote_server, local_host):
find_file_cmd = "find . -type f -name '*.sql'"
stdout, stderr = remote_server.exec_command_sync(['cd %s' % path, find_file_cmd])
if len(stderr) > 0:
raise ExecutionError(stderr)
files = stdout.strip().split('\n')
time_start = time.time()
for file in files:
perform_cmd = cmd.format(file=file)
stdout, stderr = remote_server.exec_command_sync(['cd %s' % path, perform_cmd])
if len(stderr) > 0:
print(stderr)
# Check whether the path is valid.
stdout, stderr = remote_server.exec_command_sync('ls %s' % path)
if len(stderr) > 0:
print('You should correct the parameter `benchmark_path` that the path contains several executable SQL files '
'in the configuration file.')
exit(-1)
stdout, stderr = remote_server.exec_command_sync(cmd % path)
if len(stderr) > 0:
print(stderr, file=sys.stderr)
cost = time.time() - time_start
return - cost

View File

@ -44,9 +44,8 @@ class OpenGaussMetric:
def _get_numeric_metric(self, sql):
result = self._db.exec_statement(sql)
if len(result) > 1:
_, value = result
return float(value)
if len(result) > 0:
return float(result[0][0])
else:
return 0
@ -66,9 +65,8 @@ class OpenGaussMetric:
"where name in ('max_connections', 'work_mem', 'temp_buffers', 'shared_buffers', 'wal_buffers') " \
"order by name;"
res = self._db.exec_statement(sql)
res.pop(0)
res = map(int, res)
max_conn, s_buff, t_buff, w_buff, work_mem = res
values = map(lambda x: int(x[0]), res)
max_conn, s_buff, t_buff, w_buff, work_mem = values
total_mem = max_conn * (work_mem / 64 + t_buff / 128) + s_buff / 64 + w_buff / 4096 # unit: MB
return total_mem * 1024 # unit: kB
@ -205,11 +203,20 @@ class OpenGaussMetric:
"from pg_stat_database where datname = '%s';" % self._db.db_name
)
@cached_property
def nb_gaussdb(self):
return int(self._db.exec_command_on_host("ps -ux | grep gaussd[b] | wc -l"))
@cached_property
def os_mem_total(self):
mem = self._db.exec_command_on_host("free -k | awk 'NR==2{print $2}'") # unit kB
return int(mem)
@cached_property
def min_free_mem(self):
kbytes = self._db.exec_command_on_host("cat /proc/sys/vm/min_free_kbytes")
return int(kbytes) # unit: kB
@cached_property
def os_cpu_count(self):
cores = self._db.exec_command_on_host("lscpu | grep 'CPU(s)' | head -1 | awk '{print $2}'")
@ -342,9 +349,9 @@ class OpenGaussMetric:
@cached_property
def enable_autovacuum(self):
_, setting = self._db.exec_statement(
setting = self._db.exec_statement(
"select setting from pg_settings where name = 'autovacuum';"
)
)[0][0]
return setting == 'on'
def get_internal_state(self):

View File

@ -14,14 +14,14 @@ See the Mulan PSL v2 for more details.
"""
import logging
import re
from tuner.character import OpenGaussMetric
from tuner.exceptions import DBStatusError, SecurityError, ExecutionError, OptionError
from tuner.executor import ExecutorFactory
from tuner.knob import RecommendedKnobs
from tuner.knob import RecommendedKnobs, Knob
from tuner.utils import clip
from tuner.utils import construct_dividing_line
from tuner.utils import to_tuples
def check_special_character(phrase):
@ -128,9 +128,9 @@ class DB_Agent:
"Check whether the database is started. ")
# Get database instance pid and data_path.
_, self.data_path = self.exec_statement(
self.data_path = self.exec_statement(
"SELECT datapath FROM pg_node_env;"
)
)[0][0]
except ExecutionError as e:
logging.exception("An exception occurred while checking connection parameters: %s", e)
raise DBStatusError("Failed to login to the database. "
@ -159,19 +159,24 @@ class DB_Agent:
check_special_character(knob)
wherein_list.append("'%s'" % knob)
sql = "SELECT name, boot_val, min_val, max_val FROM pg_settings WHERE name IN ({})".format(
sql = "SELECT name, setting, min_val, max_val FROM pg_settings WHERE name IN ({})".format(
','.join(wherein_list)
)
stdout = self.exec_statement(sql)[4:]
tuples = [[stdout[4 * i], stdout[4 * i + 1], stdout[4 * i + 2], stdout[4 * i + 3]] for i in
range(len(stdout) // 4)]
for name, boot_val, min_val, max_val in tuples:
# If the value is missing, use the default value obtained in the system table to fill it.
tuples = self.exec_statement(sql)
for name, setting, min_val, max_val in tuples:
knob = self.knobs[name]
knob.min = min_val if not knob.min else max(knob.min, min_val, key=lambda x: float(x))
knob.max = max_val if not knob.max else min(knob.max, max_val, key=lambda x: float(x))
knob.default = boot_val if not knob.default else clip(knob.default, knob.min, knob.max)
if knob.type != Knob.TYPE.BOOL:
min_val = float(min_val)
max_val = float(max_val)
knob.min = min_val if knob.min is None else max(knob.min, min_val)
knob.max = max_val if knob.max is None else min(knob.max, max_val)
knob.original = setting
# If user did not set default field, then make the original value as the default value.
# The default value (knob.current) is the starting point while tuning.
if knob.current is None:
knob.current = knob.to_numeric(setting)
def exec_statement(self, sql, timeout=None):
"""
@ -198,14 +203,7 @@ class DB_Agent:
logging.error("Cannot execute SQL statement: %s. Error message: %s.", sql, stderr)
raise ExecutionError("Cannot execute SQL statement: %s." % sql)
# Parse the result.
result = re.sub(r'[-+]{2,}', r'', stdout) # remove '----+----'
result = re.sub(r'\|', r'', result) # remove '|'
result = re.sub(r'\(\d*[\s,]*row[s]*?\)', r'', result) # remove '(n rows)'
result = re.sub(r'\n', r' ', result)
result = result.strip()
result = re.split(r'\s+', result)
return result
return to_tuples(stdout)
def is_alive(self):
"""
@ -260,6 +258,17 @@ class DB_Agent:
nv.append(self.knobs[name].to_numeric(val))
return nv
def get_default_normalized_vector(self):
"""
In order to get the initial performance and rollback the settings while tuning is finished.
:return: the vector that normalized from original/initial knobs.
"""
nv = list()
for name in self.ordered_knob_list:
val = self.knobs[name].original
nv.append(self.knobs[name].to_numeric(val))
return nv
def set_knob_normalized_vector(self, nv):
restart = False
for i, val in enumerate(nv):
@ -274,7 +283,7 @@ class DB_Agent:
def get_knob_value(self, name):
check_special_character(name)
sql = "SELECT setting FROM pg_settings WHERE name = '{}';".format(name)
_, value = self.exec_statement(sql)
value = self.exec_statement(sql)[0][0]
return value
def set_knob_value(self, name, value):
@ -288,18 +297,12 @@ class DB_Agent:
def reset_state(self):
self.metric.reset()
def set_default_knob(self):
restart = False
for knob in self.knobs:
self.set_knob_value(knob.name, knob.default)
restart = True if knob.restart else restart
self.restart()
def restart(self):
logging.info(construct_dividing_line("Restarting database.", padding="*"))
self.exec_statement("checkpoint;") # Prevent the database from being shut down for a long time.
try:
self.exec_statement("checkpoint;") # Prevent the database from being shut down for a long time.
except ExecutionError:
logging.warning("Cannot checkpoint perhaps due to bad GUC settings.")
self.exec_command_on_host("gs_ctl stop -D {data_path}".format(data_path=self.data_path),
ignore_status_code=True)
self.exec_command_on_host("gs_ctl start -D {data_path}".format(data_path=self.data_path),

View File

@ -13,9 +13,12 @@ MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
See the Mulan PSL v2 for more details.
"""
import logging
import numpy as np
from tuner.env import Env, Box
from tuner.exceptions import ExecutionError
class DB_Env(Env):
@ -72,22 +75,33 @@ class DB_Env(Env):
if self.drop_cache:
self.db.drop_cache()
obs = self._get_obs()
score = self.perf(self.bm)
used_mem = self.db.metric.used_mem
reward = score - self.mem_penalty * used_mem # Use the memory usage as a regular term.
try:
obs = self._get_obs()
score = self.perf(self.bm)
used_mem = self.db.metric.used_mem
reward = score - self.mem_penalty * used_mem # Use the memory usage as a regular term.
except ExecutionError as e:
logging.error('An error errored after changed the settings, '
'hence rollback to the default settings. The error is %s.', e)
self.reset() # Rollback to default setting
obs = [.0 for _ in self._get_obs()] # Pad 0 to the observation vector.
score = .0
used_mem = self.db.metric.os_mem_total
# This value is minimal theoretically, so that regard it as a penalty term.
reward = score - self.mem_penalty * used_mem
# Record each tuning process.
knob_dict = {k: self.db.get_knob_value(k) for k in self.db.ordered_knob_list}
self.recorder.record(reward, knob_dict)
knob_values = [self.db.knobs[name].to_string(value) for name, value in zip(self.db.ordered_knob_list, action)]
self.recorder.record(score, used_mem, reward, names=self.db.ordered_knob_list, values=knob_values)
self.recorder.prompt_message('Database metrics: %s.', obs)
self.recorder.prompt_message('Benchmark score: %f, used mem: %d kB, reward: %f.', score, used_mem, reward)
return obs, reward, False, {}
def reset(self):
self.db.set_knob_normalized_vector(self.db.get_default_normalized_vector())
self.db.reset_state()
self.db.set_knob_normalized_vector(np.random.random(self.nb_actions)) # Maybe we can have more samples.
return self._get_obs()
def _get_obs(self):

View File

@ -276,7 +276,7 @@ class LocalExec(Executor):
return [bytes2text(stdout), bytes2text(stderr)]
else:
# Pipeline does not support running in shell=False, so we run it with the 'bash -c' command.
split_cmd = ['bash', '-c', command] if command.find('|') >= 0 else shlex.split(command)
split_cmd = ['bash', '-c', command] if '|' in command or ';' in command else shlex.split(command)
proc = subprocess.Popen(split_cmd,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,

View File

@ -48,10 +48,8 @@ class RecommendedKnobs:
knob_list.extend(self._need_tune_knobs)
knob_list.extend(self._only_report_knobs)
for knob in knob_list:
if knob.type in ('int', 'float'):
knob_table.add_row([knob.name, knob.default, knob.min, knob.max, knob.restart])
else:
knob_table.add_row([knob.name, knob.to_string(knob.default), knob.min, knob.max, knob.restart])
row = (knob.name, knob.to_string(knob.current), knob.min, knob.max, knob.restart)
knob_table.add_row(row)
print(knob_table)
def dump(self, fp, dump_report_knobs=True):
@ -71,14 +69,16 @@ class RecommendedKnobs:
def append_need_tune_knobs(self, *args):
for knob in args:
if knob:
self._need_tune_knobs.append(knob)
self._tbl[knob.name] = knob
if knob is None:
continue
self._need_tune_knobs.append(knob)
self._tbl[knob.name] = knob
def append_only_report_knobs(self, *args):
for knob in args:
if knob:
self._only_report_knobs.append(knob)
if knob is None:
continue
self._only_report_knobs.append(knob)
def names(self):
return sorted(self._tbl.keys())
@ -100,41 +100,46 @@ class RecommendedKnobs:
class Knob:
def __init__(self, name, knob):
def __init__(self, name, **kwargs):
"""
Wrap a tuning knob and abstract it as a class.
The second argument knob is dict type. Its fields include:
default: Int, float or bool type.
recommend: The knob value recommended by knob recommendation.
current: Normalized current value.
original: Str type. A setting value from `pg_settings` or user's configuration.
min: Optional. Int, float type.
max: Optional. Int, float type.
type: String type. Constrained by Knob.TYPE.
restart: Boolean type.
:param name: The name of a knob.
:param knob: The dict data-structure of a knob.
:param name: The name of the knob.
:param kwargs: A dict structure contains the knob's all fields.
"""
if not isinstance(knob, dict):
raise TypeError
self.name = name
self.type = knob.get('type')
self.min = knob.get('min')
self.max = knob.get('max')
self.default = knob.get('default')
self.restart = knob.get('restart', False)
self.recommend = kwargs.get('recommend')
self.original = self.current = None
self.user_set = kwargs.get('default')
self._min = kwargs.get('min')
self._max = kwargs.get('max')
self.type = kwargs.get('type')
self.restart = kwargs.get('restart', False)
if '' in (self.name, self.type):
raise ValueError("'name', and 'type' fields of knob are essential.")
if self.type == 'bool':
self.min = 0
self.max = 1
self._min = 0
self._max = 1
self._scale = self.max - self.min
if str in (type(self._min), type(self._max)):
raise ValueError("'min', and 'max' fields of knob should not be str type.")
if self._scale < 0:
raise ValueError('Knob %s is incorrectly configured. '
'The max value must be greater than or equal to the min value.' % self.name)
# Refresh scale.
self._scale = None
self.fresh_scale()
def to_string(self, val):
rv = val * self._scale + float(self.min) if self.type in ('int', 'float') else val
rv = self.denormalize(val) if self.type in ('int', 'float') else val
if self.type == 'int':
rv = str(int(round(rv)))
elif self.type == 'bool':
@ -148,7 +153,7 @@ class Knob:
def to_numeric(self, val):
if self.type in ('float', 'int'):
rv = (float(val) - float(self.min)) / self._scale
rv = self.normalize(val)
elif self.type == 'bool':
rv = 0. if val == 'off' else 1.
else:
@ -156,38 +161,63 @@ class Knob:
return rv
@staticmethod
def new_instance(name, value_default, knob_type, value_min=0, value_max=1, restart=False):
if knob_type not in Knob.TYPE.ITEMS:
raise TypeError("The type of parameter 'knob_type' is incorrect.")
def fresh_scale(self):
if None in (self._min, self._max):
return
if knob_type == Knob.TYPE.INT:
value_default = int(value_default)
value_max = int(value_max)
value_min = int(value_min)
elif knob_type == Knob.TYPE.FLOAT:
value_default = float(value_default)
value_max = float(value_max)
value_min = float(value_min)
else:
if type(value_default) is not bool:
raise ValueError
self._scale = self._max - self._min
if self._scale < 0:
raise ValueError('Knob %s is incorrectly configured. '
'The max value must be greater than '
'or equal to the min value.' % self.name)
if type(self.user_set) is str:
self.current = self.to_numeric(self.user_set)
elif type(self.user_set) in (int, float):
self.user_set = str(self.user_set)
self.current = self.normalize(self.user_set)
elif self.recommend is not None:
self.current = self.normalize(self.recommend)
value_default = 1 if value_default else 0
value_max = 1
value_min = 0
@property
def min(self):
return self._min
return Knob(name,
{'type': knob_type, 'restart': restart,
'default': value_default, 'min': value_min, 'max': value_max})
@min.setter
def min(self, val):
if val is None:
return
self._min = val
self.fresh_scale()
@property
def max(self):
return self._max
@max.setter
def max(self, val):
if val is None:
return
self._max = val
self.fresh_scale()
def normalize(self, val):
return (float(val) - float(self._min)) / self._scale
def denormalize(self, val):
return val * self._scale + float(self._min)
def to_dict(self):
return \
{self.name: {
'type': self.type, 'restart': self.restart,
'default': self.default,
'min': self.min, 'max': self.max
}}
{self.name:
{
'type': self.type,
'restart': self.restart,
'default': self.user_set if self.user_set is not None else self.original,
'recommend': self.to_string(self.current),
'min': self._min,
'max': self._max
}
}
def __str__(self):
return str(self.to_dict())
@ -202,9 +232,10 @@ class Knob:
def load_knobs_from_json_file(filename):
knobs = RecommendedKnobs()
with open(filename) as fp:
for name, val in json.load(fp).items():
val['name'] = name
knobs.append_need_tune_knobs(Knob(name=name, knob=val))
with open(filename) as f:
for name, _dict in json.load(f).items():
knobs.append_need_tune_knobs(
Knob(name=name, **_dict)
)
return knobs

View File

@ -12,9 +12,13 @@ EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
See the Mulan PSL v2 for more details.
"""
from __future__ import print_function
import argparse
import configparser
try:
import configparser
except ImportError:
import ConfigParser as configparser
import json
import os
import sys
@ -23,8 +27,9 @@ from getpass import getpass
from tuner.exceptions import OptionError
from tuner.xtuner import procedure_main
from tuner import utils
__version__ = '2.0.0'
__version__ = '2.1.0'
__description__ = 'X-Tuner: a self-tuning tool integrated by openGauss.'
@ -47,6 +52,15 @@ def check_path_valid(path):
return True
def check_version():
version_info = sys.version_info
major, minor = version_info.major, version_info.minor
# At least, the Python version is (3, 6)
if major < 3 or minor <= 5:
return False
return True
def build_db_info(args):
if args.db_config_file:
if not check_path_valid(args.db_config_file):
@ -179,6 +193,10 @@ def get_config(filepath):
raise OptionError(invalid_opt_msg % ('benchmark_script', benchmarks))
config['benchmark_path'] = cp['Benchmark'].get('benchmark_path', '')
config['benchmark_cmd'] = cp['Benchmark'].get('benchmark_cmd', '')
benchmark_period = cp['Benchmark'].get('benchmark_period', '0')
if not benchmark_period:
benchmark_period = '0'
config['benchmark_period'] = int(benchmark_period)
# Section Knobs
scenario_opts = ['auto', 'ap', 'tp', 'htap']
@ -220,6 +238,10 @@ def get_config(filepath):
def main():
if not check_version():
print("FATAL: You should use at least Python 3.6 or above version.")
return -1
parser = get_argv_parser()
args = parser.parse_args()
mode = args.mode
@ -228,7 +250,7 @@ def main():
parser.print_usage()
return -1
config = get_config(args.tuner_config_file)
utils.config = config = get_config(args.tuner_config_file)
if not config:
return -1

View File

@ -12,6 +12,7 @@ EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
See the Mulan PSL v2 for more details.
"""
import inspect
from prettytable import PrettyTable
@ -29,55 +30,74 @@ SIZE_UNIT_MAP = {"kB": 1,
"GB": 1024 * 1024}
def round4(v):
return int(v + (4 - v % 4))
def instantiate_knob(name, recommend, knob_type, value_min=0, value_max=1, restart=False):
if knob_type not in Knob.TYPE.ITEMS:
raise TypeError("The type of parameter 'knob_type' is incorrect.")
if knob_type == Knob.TYPE.INT:
recommend = int(recommend)
value_max = int(value_max)
value_min = int(value_min)
elif knob_type == Knob.TYPE.FLOAT:
recommend = float(recommend)
value_max = float(value_max)
value_min = float(value_min)
elif knob_type == Knob.TYPE.BOOL:
value_type = type(recommend)
if value_type is bool:
recommend = 1 if recommend else 0
elif value_type is str and value_type in ('on', 'off'):
recommend = 1 if recommend == 'on' else 0
elif value_type is int:
pass
else:
raise ValueError
value_max = 1
value_min = 0
return Knob(name,
recommend=recommend,
min=value_min,
max=value_max,
type=knob_type,
restart=restart)
def recommend_knobs(mode, metric):
advisor = OpenGaussKnobAdvisor(metric)
reporter = advisor.report
knobs = RecommendedKnobs()
if mode == "recommend":
if metric.uptime < 1:
advisor.report.print_bad(
reporter.print_bad(
"The database runs for a short period of time, and the database description may not be accumulated. "
"The recommendation result may be inaccurate."
)
elif metric.uptime < 12:
advisor.report.print_warn(
reporter.print_warn(
"The database runs for a short period of time, and the database description may not be accumulated. "
"The recommendation result may be inaccurate."
)
knobs.append_need_tune_knobs(advisor.shared_buffers,
advisor.max_connections,
advisor.max_prepared_transactions,
advisor.work_mem,
advisor.maintenance_work_mem,
advisor.effective_cache_size,
advisor.effective_io_concurrency,
advisor.wal_buffers,
advisor.random_page_cost,
advisor.default_statistics_target)
_, recommend_list = advisor.all_properties()
knobs.append_need_tune_knobs(*recommend_list)
elif mode == "tune":
knobs.append_need_tune_knobs(advisor.random_page_cost,
advisor.effective_io_concurrency,
advisor.work_mem)
knobs.append_only_report_knobs(advisor.shared_buffers,
advisor.max_connections,
advisor.max_prepared_transactions,
advisor.maintenance_work_mem,
advisor.effective_cache_size,
advisor.wal_buffers,
advisor.default_statistics_target)
tune_knobs = ["random_page_cost", "effective_io_concurrency", "work_mem"]
tune_list, recommend_list = advisor.all_properties(tune_knobs)
knobs.append_need_tune_knobs(*tune_list)
knobs.append_only_report_knobs(*recommend_list)
elif mode == "train":
knobs.append_need_tune_knobs(advisor.work_mem,
advisor.shared_buffers)
knobs.append_only_report_knobs(advisor.max_connections,
advisor.max_prepared_transactions,
advisor.maintenance_work_mem,
advisor.effective_cache_size,
advisor.effective_io_concurrency,
advisor.wal_buffers,
advisor.random_page_cost,
advisor.default_statistics_target)
tune_knobs = ["shared_buffers", "work_mem"]
tune_list, recommend_list = advisor.all_properties(tune_knobs)
knobs.append_need_tune_knobs(*tune_list)
knobs.append_only_report_knobs(*recommend_list)
knobs.report = advisor.report.generate
knobs.report = reporter.generate
return knobs
@ -130,7 +150,57 @@ class OpenGaussKnobAdvisor:
# Append metric and workload info to ReportMsg.
self.report.print_info(self.metric.to_dict())
def all_properties(self, tune_knobs=None):
members = inspect.getmembers(
self, lambda x: isinstance(x, Knob))
if tune_knobs is None:
tune_knobs = []
tune_list = []
recommend_list = []
for name, value in members:
if name in tune_knobs:
tune_list.append(value)
else:
recommend_list.append(value)
return tune_list, recommend_list
# Allocation of memory or storage I/O resources.
@cached_property
def max_process_memory(self):
# max_process_memory unit is kB.
omega = 0.9 # retention corr
omega_min = 0.7
total_mem = self.metric.os_mem_total
min_free_mem = self.metric.min_free_mem
nb_gaussdb = self.metric.nb_gaussdb
# This is a simplified formula that developer gave.
suitable_mem = round4((total_mem - min_free_mem) * omega / nb_gaussdb)
min_mem = round4((total_mem - min_free_mem) * omega_min / nb_gaussdb)
if min_mem <= self.metric["max_process_memory"] <= suitable_mem:
self.report.print_info("We only found %s gaussdb process(es). "
"In this case, your 'max_process_memory' setting may be just fitting."
% self.metric.nb_gaussdb)
if self.metric["max_process_memory"] > suitable_mem:
self.report.print_warn("We only found %s gaussdb process(es). "
"In this case, your 'max_process_memory' setting may be a bit large."
% self.metric.nb_gaussdb)
if self.metric["max_process_memory"] < min_mem:
self.report.print_bad("We only found %s gaussdb process(es). "
"In this case, your 'max_process_memory' setting is heavily small."
% self.metric.nb_gaussdb)
# Should always return the recommendation because other recommendations depend on it.
return instantiate_knob(name="max_process_memory",
recommend=suitable_mem,
knob_type=Knob.TYPE.INT,
value_max=suitable_mem,
value_min=min_mem,
restart=True)
@cached_property
def max_connections(self):
max_conn = self.metric["max_connections"]
@ -178,34 +248,34 @@ class OpenGaussKnobAdvisor:
lower = max(20, cores * 3)
recommend = clip(recommend, max(20, cores * 5), max(100, cores * 7))
return Knob.new_instance(name="max_connections",
value_default=recommend,
knob_type=Knob.TYPE.INT,
value_max=upper,
value_min=lower,
restart=True)
return instantiate_knob(name="max_connections",
recommend=recommend,
knob_type=Knob.TYPE.INT,
value_max=upper,
value_min=lower,
restart=True)
# Should be based on work_mem.
if self.metric.os_mem_total > 16 * SIZE_UNIT_MAP["GB"]:
remain_mem = self.metric.os_mem_total * 0.85 - self.shared_buffers.default
elif self.metric.os_mem_total > 8 * SIZE_UNIT_MAP["GB"]:
remain_mem = self.metric.os_mem_total * 0.75 - self.shared_buffers.default
elif self.metric.os_mem_total < 4 * SIZE_UNIT_MAP["GB"]:
remain_mem = self.metric.os_mem_total * 0.6 - self.shared_buffers.default
if self.max_process_memory.recommend > 16 * SIZE_UNIT_MAP["GB"]:
remain_mem = self.max_process_memory.recommend * 0.85 - self.shared_buffers.recommend
elif self.max_process_memory.recommend > 8 * SIZE_UNIT_MAP["GB"]:
remain_mem = self.max_process_memory.recommend * 0.75 - self.shared_buffers.recommend
elif self.max_process_memory.recommend < 4 * SIZE_UNIT_MAP["GB"]:
remain_mem = self.max_process_memory.recommend * 0.6 - self.shared_buffers.recommend
else:
remain_mem = self.metric.os_mem_total * 0.7 - self.shared_buffers.default
remain_mem = self.max_process_memory.recommend * 0.7 - self.shared_buffers.recommend
# AP and HTAP
# The value of work_mem is adapted based on the value of max_connections.
work_mem = max(self.metric["work_mem"], self.metric.temp_file_size * 4)
lower = max(15, cores * 3)
recommend = max(remain_mem / (work_mem + 0.01), lower)
return Knob.new_instance(name="max_connections",
value_default=recommend,
knob_type=Knob.TYPE.INT,
value_max=recommend * 2,
value_min=lower,
restart=True)
return instantiate_knob(name="max_connections",
recommend=recommend,
knob_type=Knob.TYPE.INT,
value_max=recommend * 2,
value_min=lower,
restart=True)
@property
def max_prepared_transactions(self):
@ -220,18 +290,18 @@ class OpenGaussKnobAdvisor:
"indicating that the two-phase commit function is not used.")
return
if max_pt < max_conn.default:
if max_pt < max_conn.recommend:
self.report.print_bad("Most applications do not use XA prepared transactions, "
"so should set the max_prepared_transactions to 0. "
"If you do require prepared transactions, "
"you should set this equal to max_connections to avoid blocking. "
"May require increasing kernel memory parameters.")
return Knob.new_instance(name="max_prepared_transactions",
value_default=max_conn.default,
knob_type=Knob.TYPE.INT,
value_max=max_conn.max,
value_min=max_conn.min,
restart=True)
return instantiate_knob(name="max_prepared_transactions",
recommend=max_conn.recommend,
knob_type=Knob.TYPE.INT,
value_max=max_conn.max,
value_min=max_conn.min,
restart=True)
@cached_property
def shared_buffers(self):
@ -241,7 +311,7 @@ class OpenGaussKnobAdvisor:
but because database also relies on the operating system cache,
it is unlikely that an allocation of more than 40% of RAM to shared_buffers will
work better than a smaller amount. """
mem_total = self.metric.os_mem_total # unit: kB
mem_total = self.max_process_memory.recommend # unit: kB
if mem_total < 1 * SIZE_UNIT_MAP['GB']:
default = 0.15 * mem_total
elif mem_total > 8 * SIZE_UNIT_MAP['GB']:
@ -250,37 +320,38 @@ class OpenGaussKnobAdvisor:
default = 0.25 * mem_total
# The value of this knob means the number of maximum cached blocks.
recommend = default / self.metric.block_size
recommend = round4(default / self.metric.block_size)
if self.metric.is_64bit:
database_blocks = self.metric.all_database_size / self.metric.block_size
if database_blocks < recommend:
self.report.print_warn("The total size of all databases is less than the memory size. "
"Therefore, it is unnecessary to set shared_buffers to a large value.")
recommend = min(database_blocks, recommend)
upper = recommend * 1.15
lower = min(0.15 * mem_total / self.metric.block_size, recommend)
recommend = round4(min(database_blocks, recommend))
upper = round4(recommend * 1.15)
lower = round4(min(0.15 * mem_total / self.metric.block_size, recommend))
return Knob.new_instance(name="shared_buffers",
value_default=recommend,
knob_type=Knob.TYPE.INT,
value_max=upper,
value_min=lower,
restart=True)
return instantiate_knob(name="shared_buffers",
recommend=recommend,
knob_type=Knob.TYPE.INT,
value_max=upper,
value_min=lower,
restart=True)
else:
upper = min(recommend, 2 * SIZE_UNIT_MAP["GB"] / self.metric.block_size) # 32-bit OS only can use 2 GB mem.
lower = min(0.15 * mem_total / self.metric.block_size, recommend)
return Knob.new_instance(name="shared_buffers",
value_default=recommend,
knob_type=Knob.TYPE.INT,
value_max=upper,
value_min=lower,
restart=True)
upper = round4(
min(recommend, 2 * SIZE_UNIT_MAP["GB"] / self.metric.block_size)) # 32-bit OS only can use 2 GB mem.
lower = round4(min(0.15 * mem_total / self.metric.block_size, recommend))
return instantiate_knob(name="shared_buffers",
recommend=recommend,
knob_type=Knob.TYPE.INT,
value_max=upper,
value_min=lower,
restart=True)
@property
def work_mem(self):
temp_file_size = self.metric.temp_file_size
max_conn = self.max_connections.default
max_conn = self.max_connections.recommend
# This knob does not need to be modified.
if temp_file_size < 16 * SIZE_UNIT_MAP["MB"]:
@ -296,25 +367,25 @@ class OpenGaussKnobAdvisor:
return
# conservative operations
recommend = (self.metric.os_mem_total - self.shared_buffers.default) / (max_conn * 2)
recommend = (self.max_process_memory.recommend - self.shared_buffers.recommend) / (max_conn * 2)
upper = max(recommend, 256 * SIZE_UNIT_MAP["MB"])
lower = min(recommend, 64 * SIZE_UNIT_MAP["MB"])
return Knob.new_instance(name="work_mem",
value_default=recommend,
knob_type=Knob.TYPE.INT,
value_max=upper,
value_min=lower,
restart=False)
return instantiate_knob(name="work_mem",
recommend=recommend,
knob_type=Knob.TYPE.INT,
value_max=upper,
value_min=lower,
restart=False)
else:
recommend = (self.metric.os_mem_total - self.shared_buffers.default) / max_conn
recommend = (self.max_process_memory.recommend - self.shared_buffers.recommend) / max_conn
upper = max(recommend, 1 * SIZE_UNIT_MAP["GB"])
lower = min(recommend, 64 * SIZE_UNIT_MAP["MB"])
return Knob.new_instance(name="work_mem",
value_default=recommend,
knob_type=Knob.TYPE.INT,
value_max=upper,
value_min=lower,
restart=False)
return instantiate_knob(name="work_mem",
recommend=recommend,
knob_type=Knob.TYPE.INT,
value_max=upper,
value_min=lower,
restart=False)
@property
def maintenance_work_mem(self):
@ -322,23 +393,23 @@ class OpenGaussKnobAdvisor:
@property
def effective_cache_size(self):
upper = self.metric.os_mem_total * 0.75
lower = self.shared_buffers.default
upper = self.max_process_memory.recommend * 0.75
lower = self.shared_buffers.recommend
if self.metric.workload_type == WORKLOAD_TYPE.TP:
return Knob.new_instance(name="effective_cache_size",
value_default=lower,
knob_type=Knob.TYPE.INT,
value_max=upper,
value_min=lower,
restart=False)
return instantiate_knob(name="effective_cache_size",
recommend=lower,
knob_type=Knob.TYPE.INT,
value_max=upper,
value_min=lower,
restart=False)
else:
return Knob.new_instance(name="effective_cache_size",
value_default=upper,
knob_type=Knob.TYPE.INT,
value_max=upper,
value_min=lower,
restart=False)
return instantiate_knob(name="effective_cache_size",
recommend=upper,
knob_type=Knob.TYPE.INT,
value_max=upper,
value_min=lower,
restart=False)
@property
def effective_io_concurrency(self):
@ -346,19 +417,19 @@ class OpenGaussKnobAdvisor:
if 0 <= self.metric["effective_io_concurrency"] <= 2: # No need for recommendation.
return
return Knob.new_instance(name="effective_io_concurrency",
value_default=2,
knob_type=Knob.TYPE.INT,
value_max=4,
value_min=0,
restart=False)
return instantiate_knob(name="effective_io_concurrency",
recommend=2,
knob_type=Knob.TYPE.INT,
value_max=4,
value_min=0,
restart=False)
else:
return Knob.new_instance(name="effective_io_concurrency",
value_default=200,
knob_type=Knob.TYPE.INT,
value_max=250,
value_min=150,
restart=False)
return instantiate_knob(name="effective_io_concurrency",
recommend=200,
knob_type=Knob.TYPE.INT,
value_max=250,
value_min=150,
restart=False)
# Background writer.
@property
@ -370,53 +441,55 @@ class OpenGaussKnobAdvisor:
blocks_16m = 16 * SIZE_UNIT_MAP["MB"] / self.metric.block_size
# Generally, this value is sufficient. A large value does not bring better performance.
if wal_buffers >= blocks_16m:
if wal_buffers > self.shared_buffers.default * 1 / 32:
if wal_buffers > self.shared_buffers.recommend * 1 / 32:
self.report.print_bad(
"The value of wal_buffers is too high. Generally, a large value does not bring better performance.")
return Knob.new_instance(name="wal_buffers",
value_default=self.shared_buffers.default * 1 / 32,
knob_type=Knob.TYPE.INT,
value_max=max(self.shared_buffers.default * 1 / 32, blocks_16m),
value_min=min(self.shared_buffers.default * 1 / 64, blocks_16m),
restart=True)
"The value of wal_buffers is too high. Generally, "
"a large value does not bring better performance.")
return instantiate_knob(name="wal_buffers",
recommend=self.shared_buffers.recommend * 1 / 32,
knob_type=Knob.TYPE.INT,
value_max=max(self.shared_buffers.recommend * 1 / 32, blocks_16m),
value_min=min(self.shared_buffers.recommend * 1 / 64, blocks_16m),
restart=True)
else:
self.report.print_warn(
"The value of wal_buffers is a bit high. Generally, an excessively large value does not bring "
"The value of wal_buffers is a bit high. Generally, "
"an excessively large value does not bring "
"better performance. You can also set this parameter to -1. "
"The database automatically performs adaptation. "
)
return Knob.new_instance(name="wal_buffers",
value_default=self.shared_buffers.default * 1 / 32,
knob_type=Knob.TYPE.INT,
value_max=self.shared_buffers.default * 1 / 32,
value_min=blocks_16m,
restart=True)
elif wal_buffers < self.shared_buffers.default * 1 / 64:
return Knob.new_instance(name="wal_buffers",
value_default=-1,
knob_type=Knob.TYPE.INT,
value_max=-1,
value_min=-1,
restart=True)
return instantiate_knob(name="wal_buffers",
recommend=self.shared_buffers.recommend * 1 / 32,
knob_type=Knob.TYPE.INT,
value_max=self.shared_buffers.recommend * 1 / 32,
value_min=blocks_16m,
restart=True)
elif wal_buffers < self.shared_buffers.recommend * 1 / 64:
return instantiate_knob(name="wal_buffers",
recommend=-1,
knob_type=Knob.TYPE.INT,
value_max=-1,
value_min=-1,
restart=True)
# Optimizer
@property
def random_page_cost(self):
if self.metric.is_hdd:
# Currently, with the rise of storage technology, the default value of 4 is too large.
return Knob.new_instance(name="random_page_cost",
value_default=3,
knob_type=Knob.TYPE.FLOAT,
value_max=3,
value_min=2,
restart=False)
return instantiate_knob(name="random_page_cost",
recommend=3,
knob_type=Knob.TYPE.FLOAT,
value_max=3,
value_min=2,
restart=False)
else:
return Knob.new_instance(name="random_page_cost",
value_default=1,
knob_type=Knob.TYPE.FLOAT,
value_max=2,
value_min=1,
restart=False)
return instantiate_knob(name="random_page_cost",
recommend=1,
knob_type=Knob.TYPE.FLOAT,
value_max=2,
value_min=1,
restart=False)
@property
def default_statistics_target(self):
@ -443,12 +516,12 @@ class OpenGaussKnobAdvisor:
recommend = 600
else:
recommend = 800
return Knob.new_instance(name="default_statistics_target",
value_default=recommend,
knob_type=Knob.TYPE.INT,
value_max=1000,
value_min=100,
restart=False)
return instantiate_knob(name="default_statistics_target",
recommend=recommend,
knob_type=Knob.TYPE.INT,
value_max=1000,
value_min=100,
restart=False)
elif workload_type == WORKLOAD_TYPE.TP:
if read_write_ratio < 0.5:
recommend = 10
@ -459,16 +532,52 @@ class OpenGaussKnobAdvisor:
else:
recommend = 100
return Knob.new_instance(name="default_statistics_target",
value_default=recommend,
knob_type=Knob.TYPE.INT,
value_max=150,
value_min=10,
restart=False)
return instantiate_knob(name="default_statistics_target",
recommend=recommend,
knob_type=Knob.TYPE.INT,
value_max=150,
value_min=10,
restart=False)
else:
return Knob.new_instance(name="default_statistics_target",
value_default=100,
knob_type=Knob.TYPE.INT,
value_max=300,
value_min=80,
restart=False)
return instantiate_knob(name="default_statistics_target",
recommend=100,
knob_type=Knob.TYPE.INT,
value_max=300,
value_min=80,
restart=False)
@property
def enable_nestloop(self):
if self.metric.workload_type != WORKLOAD_TYPE.TP and self.metric['enable_nestloop'] == 'on':
self.report.print_warn("Detect that your appointed workload does not seem to a TP workload, "
"hence disable enable_nestloop is better.")
return instantiate_knob(name="enable_nestloop",
recommend=0,
knob_type=Knob.TYPE.BOOL,
value_max=1,
value_min=0,
restart=False)
@property
def enable_mergejoin(self):
if self.metric.workload_type != WORKLOAD_TYPE.TP and self.metric['enable_mergejoin'] == 'on':
self.report.print_warn("Detect that your appointed workload does not seem to a TP workload, "
"hence disable enable_mergejoin is better.")
return instantiate_knob(name="enable_mergejoin",
recommend=0,
knob_type=Knob.TYPE.BOOL,
value_max=1,
value_min=0,
restart=False)
@property
def enable_hashjoin(self):
if self.metric['enable_hashjoin'] == 'off':
self.report.print_warn("Detect that you disabled enable_hashjoin. "
"We suggest that if there is no special reason, please enable enable_hashjoin.")
return instantiate_knob(name="enable_hashjoin",
recommend=1,
knob_type=Knob.TYPE.BOOL,
value_max=1,
value_min=0,
restart=False)

View File

@ -13,60 +13,71 @@ MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
See the Mulan PSL v2 for more details.
"""
import logging
import csv
DELIMITER = ","
class Recorder:
def __init__(self, filepath, verbose=True):
def __init__(self, filepath):
"""
Record each tuning process and write it to a file.
"""
logger = logging.getLogger('recorder')
fmt = logging.Formatter("%(asctime)s: %(message)s")
self._fd = open(filepath, 'w', newline='')
self.writer = csv.writer(self._fd, delimiter=DELIMITER,
quotechar='\\', quoting=csv.QUOTE_MINIMAL)
file_handler = logging.FileHandler(filepath, mode='w') # no appending
file_handler.setFormatter(fmt)
logger.addHandler(file_handler)
if verbose:
stream_handler = logging.StreamHandler()
stream_handler.setFormatter(fmt)
logger.addHandler(stream_handler)
logger.setLevel(logging.INFO)
self.logger = logger
self.logger.info('Recorder is starting.')
# Record the information of best knobs.
self.best_id = None
self.names = None
self.best_values = None
self.best_reward = None
self.count = 0
self.current_id = 0
def prompt_message(self, msg, *args, **kwargs):
self.logger.info('[%d] ' % self.count + msg, *args, **kwargs)
logging.info('[Recorder %d]: ' + msg, self.count, *args, **kwargs)
logging.info("[Recorder %d]: " + msg, self.current_id, *args, **kwargs)
def record(self, reward, knobs):
def record(self, score, used_mem, reward, names, values):
"""
Record the reward value and knobs of the step,
and update the maximum reward value and the corresponding knobs.
:values: A list contains each knob value. The knob value is str type, not denormalized numeric.
"""
record = (reward, knobs)
if self.best_reward is None:
self.best_reward = record
else:
self.best_reward = max(record, self.best_reward, key=lambda r: r[0])
_names = tuple(names)
_values = tuple(values)
if self.current_id == 0:
header = ("id",) + _names + ("score", "used_mem", "reward", "best_reward", "best_id")
self.writer.writerow(header)
self.best_id = 0
self.names = names
self.best_values = values
self.best_reward = reward
self.logger.info('[%d] Current reward is %f, knobs: %s.', self.count, reward, knobs)
self.logger.info('[%d] Best reward is %f, knobs: %s.', self.count, self.best_reward[0], self.best_reward[1])
if reward >= self.best_reward:
self.best_id = self.current_id
self.best_values = values
self.best_reward = reward
self.count += 1
record = (self.current_id,) + _values + (score, used_mem, reward, self.best_reward, self.best_id)
self.writer.writerow(record)
self._fd.flush()
self.current_id += 1
def give_best(self, rk):
"""
Give the knobs with the maximum reward value to the parameter RecommendKnobs (RK) object.
So that RK can update itself with the passed knobs.
"""
reward, best_knobs = self.best_reward
self.logger.info('The tuning process is complete. The best reward is %f, best knobs are:\n%s.',
reward, best_knobs)
logging.info("The tuning process is finished. The best reward is %f, and best knobs (%s) are %s.",
self.best_reward, self.names, self.best_values)
for name, setting in best_knobs.items():
rk[name].default = setting
for name, value in zip(self.names, self.best_values):
knob = rk[name]
knob.current = knob.to_numeric(value) # self.current is always a normalized numeric.
def __del__(self):
self._fd.flush()
self._fd.close()

View File

@ -14,17 +14,22 @@ See the Mulan PSL v2 for more details.
"""
import os
import re
BLANK = " "
RED_FMT = "\033[31;1m{}\033[0m"
GREEN_FMT = "\033[32;1m{}\033[0m"
YELLOW_FMT = "\033[33;1m{}\033[0m"
WHITE_FMT = "\033[37;1m{}\033[0m"
config = None
class cached_property:
"""
A decorator for caching properties in classes.
"""
def __init__(self, func):
self.func = func
@ -65,3 +70,54 @@ def construct_dividing_line(title='', padding='-'):
return padding * term_width
else:
return padding * side_width + ' ' + title + ' ' + padding * side_width
def to_tuples(text):
lines = text.splitlines()
separator_location = -1
for i, line in enumerate(lines):
# Find separator line such as '-----+-----+------'.
if re.match(r'^\s*?[-|+]+\s*$', line):
separator_location = i
break
if separator_location < 0:
return []
separator = lines[separator_location]
left = 0
right = len(separator)
locations = list()
while left < right:
try:
location = separator.index('+', left, right)
except ValueError:
break
locations.append(location)
left = location + 1
# Record each value start location and end location.
pairs = list(zip([0] + locations, locations + [right]))
tuples = []
row = []
wrap_flag = False
# Continue to parse each line.
for line in lines[separator_location + 1:]:
# Prevent from parsing bottom lines.
if len(line.strip()) == 0 or re.match(r'\(\d+ rows?\)', line):
continue
# Parse a record to tuple.
if wrap_flag:
row[-1] += line[pairs[-1][0] + 1: pairs[-1][1]].strip()
else:
for start, end in pairs:
# Increase 1 to start index to go over vertical bar (|).
row.append(line[start + 1: end].strip())
if len(line) == right and re.match(r'.*\s*\+$', line):
wrap_flag = True
row[-1] = row[-1].strip('+').strip(BLANK) + BLANK
else:
tuples.append(tuple(row))
row = []
wrap_flag = False
return tuples

View File

@ -15,7 +15,7 @@
logfile = log/opengauss_tuner.log
output_tuning_result = tuned_knobs.json
verbose = on
recorder_file = log/recorder.log
recorder_file = log/recorder.csv
tune_strategy = auto # rl, gop or auto
drop_cache = on # You must modify the permission of the login user in the /etc/sudoers file and grant the NOPASSWD permission to the user.
used_mem_penalty_term = 1e-9 # Prevent taking up more memory.
@ -45,12 +45,26 @@ particle_nums = 3 # A larger value indicates higher accuracy but slower speed.
[Benchmark]
# Some examples of benchmark script implementation are provided in the benchmark directory.
# Implement the benchmark script based on the specified interface.
benchmark_script = tpch
# These parameters are used to replace the path and cmd in the benchmark script.
benchmark_script = period # Build-in benchmarks: period, tpcc, tpch, tpcds, sysbench (not tested)
# The following parameters are used to replace the path and cmd in the benchmark script.
# The path and cmd variables in the benchmark script are examples. You can set the parameters
# by referring to the benchmark script file in the benchmark directory.
benchmark_path = # If this parameter is blank, the default path in the benchmark script is used.
benchmark_cmd = # If this parameter is blank, the default cmd in the benchmark script is used.
# If this parameter is blank, the default path in the benchmark script is used.
# For build-in benchmarks, only 'period' does not need it.
benchmark_path =
# If this parameter is blank, the default cmd in the benchmark script is used.
# For build-in benchmarks, only 'tpcc' needs it.
benchmark_cmd =
# Optional parameter. Only for 'period'.
# By appointment this parameter, use can measure the performance of period task.
# The unit of this parameter is second.
# The default parameter is 60 seconds, which means that the cycle of the workload is 60 seconds.
# This value needs to be greater than or equal to the cycle of the workload itself,
# try not to be less than, otherwise it will easily lead to judgment distortion.
benchmark_period = 60
#------------------------------------------------------------------------------
# Tuning Knobs Configurations

View File

@ -15,6 +15,7 @@ See the Mulan PSL v2 for more details.
import logging
import os
import signal
from logging import handlers
from tuner import benchmark
@ -47,6 +48,7 @@ def prompt_restart_risks():
def set_logger(filename):
logger = logging.getLogger()
logger.setLevel(logging.INFO)
dirname = os.path.dirname(filename)
if not os.path.exists(dirname):
@ -100,6 +102,7 @@ def procedure_main(mode, db_info, config):
logging.info("Configurations: %s.", config)
if config['tuning_list'].strip() != '' and mode != 'recommend':
print("You have configured the tuning list, so use this list to tune.")
knobs = load_knobs_from_json_file(config['tuning_list'])
else:
print("Start to recommend knobs. Just a moment, please.")
@ -123,16 +126,32 @@ def procedure_main(mode, db_info, config):
mem_penalty=config['used_mem_penalty_term'])
env.set_tuning_knobs(knobs)
print('The benchmark will start to run iteratively. '
'This process may take a long time. Please wait a moment.')
if mode == 'train':
rl_model('train', env, config)
elif mode == 'tune':
if config['tune_strategy'] == 'rl':
rl_model('tune', env, config)
elif config['tune_strategy'] == 'gop':
global_search(env, config)
else:
raise ValueError('Incorrect tune strategy: %s.' % config['tune_strategy'])
# Run once the performance under the default knob configuration.
# Its id is 0, aka the first one.
original_knobs = db_agent.get_default_normalized_vector()
env.step(original_knobs)
try:
if config['tune_strategy'] == 'rl':
rl_model('tune', env, config)
elif config['tune_strategy'] == 'gop':
global_search(env, config)
else:
raise ValueError('Incorrect tune strategy: %s.' % config['tune_strategy'])
except KeyboardInterrupt:
signal.signal(signal.SIGINT, signal.SIG_IGN)
print("Trigger an interrupt via the keyboard. "
"Continue to generate current tuning results.")
# Rollback/reset to the original/initial knobs while the tuning process is finished.
db_agent.set_knob_normalized_vector(original_knobs)
# Modify the variable `knobs` with tuned result.
recorder.give_best(knobs)
else:
raise ValueError('Incorrect mode value: %s.' % mode)
@ -208,7 +227,7 @@ def global_search(env, config):
from tuner.algorithms.pso import Pso
def performance_function(v):
s, r, d, _ = env.step(v, False)
s, r, d, _ = env.step(v)
return -r # Use -reward because PSO wishes to minimize.
pso = Pso(