MXS-2271 Refactor disk space checking

The functions are now in MonitorServer. Disk space can only be checked
during specific ticks. If a server misses a tick (e.g. is down) it will
be checked after disk_space_check_interval has passed.
This commit is contained in:
Esa Korhonen 2019-03-13 13:05:50 +02:00
parent cc3891a43c
commit 3ac1b40b63
5 changed files with 96 additions and 68 deletions

View File

@ -22,6 +22,7 @@
#include <mutex>
#include <openssl/sha.h>
#include <maxbase/semaphore.hh>
#include <maxbase/stopwatch.hh>
#include <maxbase/worker.hh>
#include <maxbase/iterator.hh>
#include <maxscale/config.hh>
@ -181,7 +182,7 @@ public:
static const int BEING_DRAINED_OFF = 3;
static const int BEING_DRAINED_ON = 4;
MonitorServer(SERVER* server);
MonitorServer(SERVER* server, const SERVER::DiskSpaceLimits& monitor_limits);
~MonitorServer();
@ -239,15 +240,38 @@ public:
void log_state_change();
SERVER* server = nullptr; /**< The server being monitored */
MYSQL* con = nullptr; /**< The MySQL connection */
bool log_version_err = true;
int mon_err_count = 0;
uint64_t mon_prev_status = -1; /**< Status before starting the current monitor loop */
uint64_t pending_status = 0; /**< Status during current monitor loop */
int64_t disk_space_checked = 0; /**< When was the disk space checked the last time */
int status_request = NO_CHANGE; /**< Is admin requesting Maintenance=ON/OFF on the
* server? */
/**
* Is this server ok to update disk space status. Only checks if the server knows of valid disk space
* limits settings and that the check has not failed before. Disk space check interval should be
* checked by the monitor.
*
* @return True, if the disk space should be checked, false otherwise.
*/
bool can_update_disk_space_status() const;
/**
* @brief Update the disk space status of a server.
*
* After the call, the bit @c SERVER_DISK_SPACE_EXHAUSTED will be set on
* @c pMonitored_server->pending_status if the disk space is exhausted
* or cleared if it is not.
*/
void update_disk_space_status();
SERVER* server = nullptr; /**< The server being monitored */
MYSQL* con = nullptr; /**< The MySQL connection */
bool log_version_err = true;
int mon_err_count = 0;
uint64_t mon_prev_status = -1; /**< Status before starting the current monitor loop */
uint64_t pending_status = 0; /**< Status during current monitor loop */
int status_request = NO_CHANGE; /**< Is admin requesting Maintenance=ON/OFF on the
* server? */
private:
const SERVER::DiskSpaceLimits& monitor_limits; /**< Monitor-level disk-space limits */
bool ok_to_check_disk_space {true}; /**< Set to false if check fails */
};
/**
@ -472,6 +496,14 @@ protected:
std::string child_nodes(MonitorServer* parent);
/**
* Checks if it's time to check disk space. If true is returned, the internal timer is reset
* so that the next true is only returned once disk_space_check_interval has again passed.
*
* @return True if disk space should be checked
*/
bool check_disk_space_this_tick();
/**
* Contains monitor base class settings. Since monitors are stopped before a setting change,
* the items cannot be modified while a monitor is running. No locking required.
@ -488,10 +520,9 @@ protected:
time_t journal_max_age {0}; /**< Maximum age of journal file */
SERVER::DiskSpaceLimits disk_space_limits; /**< Disk space thresholds */
/**
* How often should a disk space check be made at most, in milliseconds. Negative values imply
* disabling. */
int64_t disk_space_check_interval = -1;
// How often should a disk space check be made at most. Negative values imply disabling.
maxbase::Duration disk_space_check_interval {-1};
MonitorServer::ConnectionSettings conn_settings;
};
@ -552,6 +583,8 @@ private:
FILE* open_data_file(Monitor* monitor, char* path);
int get_data_file_path(char* path) const;
mxb::StopWatch m_disk_space_checked; /**< When was disk space checked the last time */
};
/**
@ -656,24 +689,6 @@ protected:
return atomic_load_int32(&m_shutdown) != 0;
}
/**
* @brief Should the disk space status be updated.
*
* @param pMonitored_server The monitored server in question.
*
* @return True, if the disk space should be checked, false otherwise.
*/
bool should_update_disk_space_status(const MonitorServer* pMonitored_server) const;
/**
* @brief Update the disk space status of a server.
*
* After the call, the bit @c SERVER_DISK_SPACE_EXHAUSTED will be set on
* @c pMonitored_server->pending_status if the disk space is exhausted
* or cleared if it is not.
*/
void update_disk_space_status(MonitorServer* pMonitored_server);
/**
* @brief Configure the monitor.
*

View File

@ -898,19 +898,27 @@ void Monitor::stop()
bool Monitor::configure(const MXS_CONFIG_PARAMETER* params)
{
m_settings.conn_settings.read_timeout = params->get_integer(CN_BACKEND_READ_TIMEOUT);
m_settings.conn_settings.write_timeout = params->get_integer(CN_BACKEND_WRITE_TIMEOUT);
m_settings.conn_settings.connect_timeout = params->get_integer(CN_BACKEND_CONNECT_TIMEOUT);
m_settings.conn_settings.connect_attempts = params->get_integer(CN_BACKEND_CONNECT_ATTEMPTS);
m_settings.interval = params->get_integer(CN_MONITOR_INTERVAL);
m_settings.journal_max_age = params->get_integer(CN_JOURNAL_MAX_AGE);
m_settings.script_timeout = params->get_integer(CN_SCRIPT_TIMEOUT);
m_settings.script = params->get_string(CN_SCRIPT);
m_settings.events = params->get_enum(CN_EVENTS, mxs_monitor_event_enum_values);
m_settings.disk_space_check_interval = params->get_integer(CN_DISK_SPACE_CHECK_INTERVAL);
m_settings.conn_settings.read_timeout = params->get_integer(CN_BACKEND_READ_TIMEOUT);
m_settings.conn_settings.write_timeout = params->get_integer(CN_BACKEND_WRITE_TIMEOUT);
m_settings.conn_settings.connect_timeout = params->get_integer(CN_BACKEND_CONNECT_TIMEOUT);
m_settings.conn_settings.connect_attempts = params->get_integer(CN_BACKEND_CONNECT_ATTEMPTS);
m_settings.conn_settings.username = params->get_string(CN_USER);
m_settings.conn_settings.password = params->get_string(CN_PASSWORD);
// Disk check interval is given in ms, duration is constructed from seconds.
auto dsc_interval = params->get_integer(CN_DISK_SPACE_CHECK_INTERVAL);
// 0 implies disabling -> save negative value to interval.
m_settings.disk_space_check_interval = (dsc_interval > 0) ?
mxb::Duration(static_cast<double>(dsc_interval) / 1000) : mxb::Duration(-1);
// The monitor serverlist has already been checked to be valid. Empty value is ok too.
// First, remove all servers.
while (!m_servers.empty())
@ -981,7 +989,7 @@ Monitor::~Monitor()
void Monitor::add_server(SERVER* server)
{
mxb_assert(state() != MONITOR_STATE_RUNNING);
auto new_server = new MonitorServer(server);
auto new_server = new MonitorServer(server, m_settings.disk_space_limits);
m_servers.push_back(new_server);
server_added(server);
}
@ -2282,6 +2290,21 @@ void Monitor::populate_services()
}
}
bool Monitor::check_disk_space_this_tick()
{
bool should_update_disk_space = false;
auto check_interval = m_settings.disk_space_check_interval;
if ((check_interval.secs() > 0) && m_disk_space_checked.split() > check_interval)
{
should_update_disk_space = true;
// Whether or not disk space check succeeds, reset the timer. This way, disk space is always
// checked during the same tick for all servers.
m_disk_space_checked.restart();
}
return should_update_disk_space;
}
MonitorWorker::MonitorWorker(const string& name, const string& module)
: Monitor(name, module)
, m_master(NULL)
@ -2393,22 +2416,9 @@ int64_t MonitorWorker::get_time_ms()
return t.tv_sec * 1000 + (t.tv_nsec / 1000000);
}
bool MonitorWorker::should_update_disk_space_status(const MonitorServer* pMs) const
bool MonitorServer::can_update_disk_space_status() const
{
bool should_check = false;
if ((m_settings.disk_space_check_interval > 0)
&& (pMs->disk_space_checked != -1) // -1 means disabled
&& (!m_settings.disk_space_limits.empty() || pMs->server->have_disk_space_limits()))
{
int64_t now = get_time_ms();
if (now - pMs->disk_space_checked > m_settings.disk_space_check_interval)
{
should_check = true;
}
}
return should_check;
return ok_to_check_disk_space && (!monitor_limits.empty() || server->have_disk_space_limits());
}
namespace
@ -2439,8 +2449,9 @@ bool check_disk_space_exhausted(MonitorServer* pMs,
}
}
void MonitorWorker::update_disk_space_status(MonitorServer* pMs)
void MonitorServer::update_disk_space_status()
{
auto pMs = this; // TODO: Clean
std::map<std::string, disk::SizesAndName> info;
int rv = disk::get_info_by_path(pMs->con, &info);
@ -2451,7 +2462,7 @@ void MonitorWorker::update_disk_space_status(MonitorServer* pMs)
auto dst = pMs->server->get_disk_space_limits();
if (dst.empty())
{
dst = m_settings.disk_space_limits;
dst = monitor_limits;
}
bool disk_space_exhausted = false;
@ -2512,8 +2523,6 @@ void MonitorWorker::update_disk_space_status(MonitorServer* pMs)
{
pMs->pending_status &= ~SERVER_DISK_SPACE_EXHAUSTED;
}
pMs->disk_space_checked = get_time_ms();
}
else
{
@ -2522,7 +2531,7 @@ void MonitorWorker::update_disk_space_status(MonitorServer* pMs)
if (mysql_errno(pMs->con) == ER_UNKNOWN_TABLE)
{
// Disable disk space checking for this server.
pMs->disk_space_checked = -1;
pMs->ok_to_check_disk_space = false;
MXS_ERROR("Disk space cannot be checked for %s at %s, because either the "
"version (%s) is too old, or the DISKS information schema plugin "
@ -2575,6 +2584,8 @@ void MonitorWorkerSimple::tick()
{
pre_tick();
const bool should_update_disk_space = check_disk_space_this_tick();
for (MonitorServer* pMs : m_servers)
{
if (!pMs->server->is_in_maint())
@ -2589,9 +2600,9 @@ void MonitorWorkerSimple::tick()
pMs->clear_pending_status(SERVER_AUTH_ERROR);
pMs->set_pending_status(SERVER_RUNNING);
if (should_update_disk_space_status(pMs))
if (should_update_disk_space && pMs->can_update_disk_space_status())
{
update_disk_space_status(pMs);
pMs->update_disk_space_status();
}
update_server_status(pMs);
@ -2742,9 +2753,9 @@ bool MonitorWorker::immediate_tick_required() const
}
}
MonitorServer::MonitorServer(SERVER* server)
MonitorServer::MonitorServer(SERVER* server, const SERVER::DiskSpaceLimits& monitor_limits)
: server(server)
, disk_space_checked(maxscale::MonitorWorker::get_time_ms()) // Pretend disk space was just checked.
, monitor_limits(monitor_limits)
{
}

View File

@ -334,7 +334,7 @@ json_t* MariaDBMonitor::to_json() const
*
* @param server The server to update
*/
void MariaDBMonitor::update_server(MariaDBServer* server)
void MariaDBMonitor::update_server(MariaDBServer* server, bool time_to_update_disk_space)
{
MonitorServer* mon_srv = server->m_server_base;
mxs_connect_result_t conn_status = mon_srv->ping_or_connect(m_settings.conn_settings);
@ -361,9 +361,9 @@ void MariaDBMonitor::update_server(MariaDBServer* server)
// If permissions are ok, continue.
if (!server->has_status(SERVER_AUTH_ERROR))
{
if (should_update_disk_space_status(mon_srv))
if (time_to_update_disk_space && mon_srv->can_update_disk_space_status())
{
update_disk_space_status(mon_srv);
mon_srv->update_disk_space_status();
}
// Query MariaDBServer specific data
@ -433,10 +433,12 @@ void MariaDBMonitor::tick()
mon_srv->mon_prev_status = status;
}
bool should_update_disk_space = check_disk_space_this_tick();
// Query all servers for their status.
for (MariaDBServer* server : m_servers)
{
update_server(server);
update_server(server, should_update_disk_space);
if (server->m_topology_changed)
{
m_cluster_topology_changed = true;

View File

@ -255,7 +255,7 @@ private:
MariaDBServer* get_server(SERVER* server);
// Cluster discovery and status assignment methods, top levels
void update_server(MariaDBServer* server);
void update_server(MariaDBServer* server, bool time_to_update_disk_space);
void update_topology();
void build_replication_graph();
void assign_new_master(MariaDBServer* new_master);

View File

@ -159,7 +159,7 @@ void MariaDBMonitor::Test::init_servers(int count)
{
// Server contents mostly undefined
auto base_server = Server::create_test_server();
MonitorServer* mon_server = new MonitorServer(base_server);
MonitorServer* mon_server = new MonitorServer(base_server, m_monitor->m_settings.disk_space_limits);
MariaDBServer* mariadb_server = new MariaDBServer(mon_server, i - 1, m_use_hostnames, true);
if (m_use_hostnames)