MXS-2271 Refactor disk space checking
The functions are now in MonitorServer. Disk space can only be checked during specific ticks. If a server misses a tick (e.g. is down) it will be checked after disk_space_check_interval has passed.
This commit is contained in:
@ -22,6 +22,7 @@
|
|||||||
#include <mutex>
|
#include <mutex>
|
||||||
#include <openssl/sha.h>
|
#include <openssl/sha.h>
|
||||||
#include <maxbase/semaphore.hh>
|
#include <maxbase/semaphore.hh>
|
||||||
|
#include <maxbase/stopwatch.hh>
|
||||||
#include <maxbase/worker.hh>
|
#include <maxbase/worker.hh>
|
||||||
#include <maxbase/iterator.hh>
|
#include <maxbase/iterator.hh>
|
||||||
#include <maxscale/config.hh>
|
#include <maxscale/config.hh>
|
||||||
@ -181,7 +182,7 @@ public:
|
|||||||
static const int BEING_DRAINED_OFF = 3;
|
static const int BEING_DRAINED_OFF = 3;
|
||||||
static const int BEING_DRAINED_ON = 4;
|
static const int BEING_DRAINED_ON = 4;
|
||||||
|
|
||||||
MonitorServer(SERVER* server);
|
MonitorServer(SERVER* server, const SERVER::DiskSpaceLimits& monitor_limits);
|
||||||
|
|
||||||
~MonitorServer();
|
~MonitorServer();
|
||||||
|
|
||||||
@ -239,15 +240,38 @@ public:
|
|||||||
|
|
||||||
void log_state_change();
|
void log_state_change();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Is this server ok to update disk space status. Only checks if the server knows of valid disk space
|
||||||
|
* limits settings and that the check has not failed before. Disk space check interval should be
|
||||||
|
* checked by the monitor.
|
||||||
|
*
|
||||||
|
* @return True, if the disk space should be checked, false otherwise.
|
||||||
|
*/
|
||||||
|
bool can_update_disk_space_status() const;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Update the disk space status of a server.
|
||||||
|
*
|
||||||
|
* After the call, the bit @c SERVER_DISK_SPACE_EXHAUSTED will be set on
|
||||||
|
* @c pMonitored_server->pending_status if the disk space is exhausted
|
||||||
|
* or cleared if it is not.
|
||||||
|
*/
|
||||||
|
void update_disk_space_status();
|
||||||
|
|
||||||
SERVER* server = nullptr; /**< The server being monitored */
|
SERVER* server = nullptr; /**< The server being monitored */
|
||||||
MYSQL* con = nullptr; /**< The MySQL connection */
|
MYSQL* con = nullptr; /**< The MySQL connection */
|
||||||
bool log_version_err = true;
|
bool log_version_err = true;
|
||||||
int mon_err_count = 0;
|
int mon_err_count = 0;
|
||||||
|
|
||||||
uint64_t mon_prev_status = -1; /**< Status before starting the current monitor loop */
|
uint64_t mon_prev_status = -1; /**< Status before starting the current monitor loop */
|
||||||
uint64_t pending_status = 0; /**< Status during current monitor loop */
|
uint64_t pending_status = 0; /**< Status during current monitor loop */
|
||||||
int64_t disk_space_checked = 0; /**< When was the disk space checked the last time */
|
|
||||||
int status_request = NO_CHANGE; /**< Is admin requesting Maintenance=ON/OFF on the
|
int status_request = NO_CHANGE; /**< Is admin requesting Maintenance=ON/OFF on the
|
||||||
* server? */
|
* server? */
|
||||||
|
private:
|
||||||
|
const SERVER::DiskSpaceLimits& monitor_limits; /**< Monitor-level disk-space limits */
|
||||||
|
|
||||||
|
bool ok_to_check_disk_space {true}; /**< Set to false if check fails */
|
||||||
};
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -472,6 +496,14 @@ protected:
|
|||||||
|
|
||||||
std::string child_nodes(MonitorServer* parent);
|
std::string child_nodes(MonitorServer* parent);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Checks if it's time to check disk space. If true is returned, the internal timer is reset
|
||||||
|
* so that the next true is only returned once disk_space_check_interval has again passed.
|
||||||
|
*
|
||||||
|
* @return True if disk space should be checked
|
||||||
|
*/
|
||||||
|
bool check_disk_space_this_tick();
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Contains monitor base class settings. Since monitors are stopped before a setting change,
|
* Contains monitor base class settings. Since monitors are stopped before a setting change,
|
||||||
* the items cannot be modified while a monitor is running. No locking required.
|
* the items cannot be modified while a monitor is running. No locking required.
|
||||||
@ -488,10 +520,9 @@ protected:
|
|||||||
time_t journal_max_age {0}; /**< Maximum age of journal file */
|
time_t journal_max_age {0}; /**< Maximum age of journal file */
|
||||||
|
|
||||||
SERVER::DiskSpaceLimits disk_space_limits; /**< Disk space thresholds */
|
SERVER::DiskSpaceLimits disk_space_limits; /**< Disk space thresholds */
|
||||||
/**
|
|
||||||
* How often should a disk space check be made at most, in milliseconds. Negative values imply
|
// How often should a disk space check be made at most. Negative values imply disabling.
|
||||||
* disabling. */
|
maxbase::Duration disk_space_check_interval {-1};
|
||||||
int64_t disk_space_check_interval = -1;
|
|
||||||
|
|
||||||
MonitorServer::ConnectionSettings conn_settings;
|
MonitorServer::ConnectionSettings conn_settings;
|
||||||
};
|
};
|
||||||
@ -552,6 +583,8 @@ private:
|
|||||||
|
|
||||||
FILE* open_data_file(Monitor* monitor, char* path);
|
FILE* open_data_file(Monitor* monitor, char* path);
|
||||||
int get_data_file_path(char* path) const;
|
int get_data_file_path(char* path) const;
|
||||||
|
|
||||||
|
mxb::StopWatch m_disk_space_checked; /**< When was disk space checked the last time */
|
||||||
};
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -656,24 +689,6 @@ protected:
|
|||||||
return atomic_load_int32(&m_shutdown) != 0;
|
return atomic_load_int32(&m_shutdown) != 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* @brief Should the disk space status be updated.
|
|
||||||
*
|
|
||||||
* @param pMonitored_server The monitored server in question.
|
|
||||||
*
|
|
||||||
* @return True, if the disk space should be checked, false otherwise.
|
|
||||||
*/
|
|
||||||
bool should_update_disk_space_status(const MonitorServer* pMonitored_server) const;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @brief Update the disk space status of a server.
|
|
||||||
*
|
|
||||||
* After the call, the bit @c SERVER_DISK_SPACE_EXHAUSTED will be set on
|
|
||||||
* @c pMonitored_server->pending_status if the disk space is exhausted
|
|
||||||
* or cleared if it is not.
|
|
||||||
*/
|
|
||||||
void update_disk_space_status(MonitorServer* pMonitored_server);
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @brief Configure the monitor.
|
* @brief Configure the monitor.
|
||||||
*
|
*
|
||||||
|
@ -898,19 +898,27 @@ void Monitor::stop()
|
|||||||
|
|
||||||
bool Monitor::configure(const MXS_CONFIG_PARAMETER* params)
|
bool Monitor::configure(const MXS_CONFIG_PARAMETER* params)
|
||||||
{
|
{
|
||||||
m_settings.conn_settings.read_timeout = params->get_integer(CN_BACKEND_READ_TIMEOUT);
|
|
||||||
m_settings.conn_settings.write_timeout = params->get_integer(CN_BACKEND_WRITE_TIMEOUT);
|
|
||||||
m_settings.conn_settings.connect_timeout = params->get_integer(CN_BACKEND_CONNECT_TIMEOUT);
|
|
||||||
m_settings.conn_settings.connect_attempts = params->get_integer(CN_BACKEND_CONNECT_ATTEMPTS);
|
|
||||||
m_settings.interval = params->get_integer(CN_MONITOR_INTERVAL);
|
m_settings.interval = params->get_integer(CN_MONITOR_INTERVAL);
|
||||||
m_settings.journal_max_age = params->get_integer(CN_JOURNAL_MAX_AGE);
|
m_settings.journal_max_age = params->get_integer(CN_JOURNAL_MAX_AGE);
|
||||||
m_settings.script_timeout = params->get_integer(CN_SCRIPT_TIMEOUT);
|
m_settings.script_timeout = params->get_integer(CN_SCRIPT_TIMEOUT);
|
||||||
m_settings.script = params->get_string(CN_SCRIPT);
|
m_settings.script = params->get_string(CN_SCRIPT);
|
||||||
m_settings.events = params->get_enum(CN_EVENTS, mxs_monitor_event_enum_values);
|
m_settings.events = params->get_enum(CN_EVENTS, mxs_monitor_event_enum_values);
|
||||||
m_settings.disk_space_check_interval = params->get_integer(CN_DISK_SPACE_CHECK_INTERVAL);
|
|
||||||
|
m_settings.conn_settings.read_timeout = params->get_integer(CN_BACKEND_READ_TIMEOUT);
|
||||||
|
m_settings.conn_settings.write_timeout = params->get_integer(CN_BACKEND_WRITE_TIMEOUT);
|
||||||
|
m_settings.conn_settings.connect_timeout = params->get_integer(CN_BACKEND_CONNECT_TIMEOUT);
|
||||||
|
m_settings.conn_settings.connect_attempts = params->get_integer(CN_BACKEND_CONNECT_ATTEMPTS);
|
||||||
m_settings.conn_settings.username = params->get_string(CN_USER);
|
m_settings.conn_settings.username = params->get_string(CN_USER);
|
||||||
m_settings.conn_settings.password = params->get_string(CN_PASSWORD);
|
m_settings.conn_settings.password = params->get_string(CN_PASSWORD);
|
||||||
|
|
||||||
|
// Disk check interval is given in ms, duration is constructed from seconds.
|
||||||
|
auto dsc_interval = params->get_integer(CN_DISK_SPACE_CHECK_INTERVAL);
|
||||||
|
// 0 implies disabling -> save negative value to interval.
|
||||||
|
m_settings.disk_space_check_interval = (dsc_interval > 0) ?
|
||||||
|
mxb::Duration(static_cast<double>(dsc_interval) / 1000) : mxb::Duration(-1);
|
||||||
|
|
||||||
// The monitor serverlist has already been checked to be valid. Empty value is ok too.
|
// The monitor serverlist has already been checked to be valid. Empty value is ok too.
|
||||||
// First, remove all servers.
|
// First, remove all servers.
|
||||||
while (!m_servers.empty())
|
while (!m_servers.empty())
|
||||||
@ -981,7 +989,7 @@ Monitor::~Monitor()
|
|||||||
void Monitor::add_server(SERVER* server)
|
void Monitor::add_server(SERVER* server)
|
||||||
{
|
{
|
||||||
mxb_assert(state() != MONITOR_STATE_RUNNING);
|
mxb_assert(state() != MONITOR_STATE_RUNNING);
|
||||||
auto new_server = new MonitorServer(server);
|
auto new_server = new MonitorServer(server, m_settings.disk_space_limits);
|
||||||
m_servers.push_back(new_server);
|
m_servers.push_back(new_server);
|
||||||
server_added(server);
|
server_added(server);
|
||||||
}
|
}
|
||||||
@ -2282,6 +2290,21 @@ void Monitor::populate_services()
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool Monitor::check_disk_space_this_tick()
|
||||||
|
{
|
||||||
|
bool should_update_disk_space = false;
|
||||||
|
auto check_interval = m_settings.disk_space_check_interval;
|
||||||
|
|
||||||
|
if ((check_interval.secs() > 0) && m_disk_space_checked.split() > check_interval)
|
||||||
|
{
|
||||||
|
should_update_disk_space = true;
|
||||||
|
// Whether or not disk space check succeeds, reset the timer. This way, disk space is always
|
||||||
|
// checked during the same tick for all servers.
|
||||||
|
m_disk_space_checked.restart();
|
||||||
|
}
|
||||||
|
return should_update_disk_space;
|
||||||
|
}
|
||||||
|
|
||||||
MonitorWorker::MonitorWorker(const string& name, const string& module)
|
MonitorWorker::MonitorWorker(const string& name, const string& module)
|
||||||
: Monitor(name, module)
|
: Monitor(name, module)
|
||||||
, m_master(NULL)
|
, m_master(NULL)
|
||||||
@ -2393,22 +2416,9 @@ int64_t MonitorWorker::get_time_ms()
|
|||||||
return t.tv_sec * 1000 + (t.tv_nsec / 1000000);
|
return t.tv_sec * 1000 + (t.tv_nsec / 1000000);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool MonitorWorker::should_update_disk_space_status(const MonitorServer* pMs) const
|
bool MonitorServer::can_update_disk_space_status() const
|
||||||
{
|
{
|
||||||
bool should_check = false;
|
return ok_to_check_disk_space && (!monitor_limits.empty() || server->have_disk_space_limits());
|
||||||
|
|
||||||
if ((m_settings.disk_space_check_interval > 0)
|
|
||||||
&& (pMs->disk_space_checked != -1) // -1 means disabled
|
|
||||||
&& (!m_settings.disk_space_limits.empty() || pMs->server->have_disk_space_limits()))
|
|
||||||
{
|
|
||||||
int64_t now = get_time_ms();
|
|
||||||
if (now - pMs->disk_space_checked > m_settings.disk_space_check_interval)
|
|
||||||
{
|
|
||||||
should_check = true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return should_check;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
namespace
|
namespace
|
||||||
@ -2439,8 +2449,9 @@ bool check_disk_space_exhausted(MonitorServer* pMs,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void MonitorWorker::update_disk_space_status(MonitorServer* pMs)
|
void MonitorServer::update_disk_space_status()
|
||||||
{
|
{
|
||||||
|
auto pMs = this; // TODO: Clean
|
||||||
std::map<std::string, disk::SizesAndName> info;
|
std::map<std::string, disk::SizesAndName> info;
|
||||||
|
|
||||||
int rv = disk::get_info_by_path(pMs->con, &info);
|
int rv = disk::get_info_by_path(pMs->con, &info);
|
||||||
@ -2451,7 +2462,7 @@ void MonitorWorker::update_disk_space_status(MonitorServer* pMs)
|
|||||||
auto dst = pMs->server->get_disk_space_limits();
|
auto dst = pMs->server->get_disk_space_limits();
|
||||||
if (dst.empty())
|
if (dst.empty())
|
||||||
{
|
{
|
||||||
dst = m_settings.disk_space_limits;
|
dst = monitor_limits;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool disk_space_exhausted = false;
|
bool disk_space_exhausted = false;
|
||||||
@ -2512,8 +2523,6 @@ void MonitorWorker::update_disk_space_status(MonitorServer* pMs)
|
|||||||
{
|
{
|
||||||
pMs->pending_status &= ~SERVER_DISK_SPACE_EXHAUSTED;
|
pMs->pending_status &= ~SERVER_DISK_SPACE_EXHAUSTED;
|
||||||
}
|
}
|
||||||
|
|
||||||
pMs->disk_space_checked = get_time_ms();
|
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
@ -2522,7 +2531,7 @@ void MonitorWorker::update_disk_space_status(MonitorServer* pMs)
|
|||||||
if (mysql_errno(pMs->con) == ER_UNKNOWN_TABLE)
|
if (mysql_errno(pMs->con) == ER_UNKNOWN_TABLE)
|
||||||
{
|
{
|
||||||
// Disable disk space checking for this server.
|
// Disable disk space checking for this server.
|
||||||
pMs->disk_space_checked = -1;
|
pMs->ok_to_check_disk_space = false;
|
||||||
|
|
||||||
MXS_ERROR("Disk space cannot be checked for %s at %s, because either the "
|
MXS_ERROR("Disk space cannot be checked for %s at %s, because either the "
|
||||||
"version (%s) is too old, or the DISKS information schema plugin "
|
"version (%s) is too old, or the DISKS information schema plugin "
|
||||||
@ -2575,6 +2584,8 @@ void MonitorWorkerSimple::tick()
|
|||||||
{
|
{
|
||||||
pre_tick();
|
pre_tick();
|
||||||
|
|
||||||
|
const bool should_update_disk_space = check_disk_space_this_tick();
|
||||||
|
|
||||||
for (MonitorServer* pMs : m_servers)
|
for (MonitorServer* pMs : m_servers)
|
||||||
{
|
{
|
||||||
if (!pMs->server->is_in_maint())
|
if (!pMs->server->is_in_maint())
|
||||||
@ -2589,9 +2600,9 @@ void MonitorWorkerSimple::tick()
|
|||||||
pMs->clear_pending_status(SERVER_AUTH_ERROR);
|
pMs->clear_pending_status(SERVER_AUTH_ERROR);
|
||||||
pMs->set_pending_status(SERVER_RUNNING);
|
pMs->set_pending_status(SERVER_RUNNING);
|
||||||
|
|
||||||
if (should_update_disk_space_status(pMs))
|
if (should_update_disk_space && pMs->can_update_disk_space_status())
|
||||||
{
|
{
|
||||||
update_disk_space_status(pMs);
|
pMs->update_disk_space_status();
|
||||||
}
|
}
|
||||||
|
|
||||||
update_server_status(pMs);
|
update_server_status(pMs);
|
||||||
@ -2742,9 +2753,9 @@ bool MonitorWorker::immediate_tick_required() const
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
MonitorServer::MonitorServer(SERVER* server)
|
MonitorServer::MonitorServer(SERVER* server, const SERVER::DiskSpaceLimits& monitor_limits)
|
||||||
: server(server)
|
: server(server)
|
||||||
, disk_space_checked(maxscale::MonitorWorker::get_time_ms()) // Pretend disk space was just checked.
|
, monitor_limits(monitor_limits)
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -334,7 +334,7 @@ json_t* MariaDBMonitor::to_json() const
|
|||||||
*
|
*
|
||||||
* @param server The server to update
|
* @param server The server to update
|
||||||
*/
|
*/
|
||||||
void MariaDBMonitor::update_server(MariaDBServer* server)
|
void MariaDBMonitor::update_server(MariaDBServer* server, bool time_to_update_disk_space)
|
||||||
{
|
{
|
||||||
MonitorServer* mon_srv = server->m_server_base;
|
MonitorServer* mon_srv = server->m_server_base;
|
||||||
mxs_connect_result_t conn_status = mon_srv->ping_or_connect(m_settings.conn_settings);
|
mxs_connect_result_t conn_status = mon_srv->ping_or_connect(m_settings.conn_settings);
|
||||||
@ -361,9 +361,9 @@ void MariaDBMonitor::update_server(MariaDBServer* server)
|
|||||||
// If permissions are ok, continue.
|
// If permissions are ok, continue.
|
||||||
if (!server->has_status(SERVER_AUTH_ERROR))
|
if (!server->has_status(SERVER_AUTH_ERROR))
|
||||||
{
|
{
|
||||||
if (should_update_disk_space_status(mon_srv))
|
if (time_to_update_disk_space && mon_srv->can_update_disk_space_status())
|
||||||
{
|
{
|
||||||
update_disk_space_status(mon_srv);
|
mon_srv->update_disk_space_status();
|
||||||
}
|
}
|
||||||
|
|
||||||
// Query MariaDBServer specific data
|
// Query MariaDBServer specific data
|
||||||
@ -433,10 +433,12 @@ void MariaDBMonitor::tick()
|
|||||||
mon_srv->mon_prev_status = status;
|
mon_srv->mon_prev_status = status;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool should_update_disk_space = check_disk_space_this_tick();
|
||||||
|
|
||||||
// Query all servers for their status.
|
// Query all servers for their status.
|
||||||
for (MariaDBServer* server : m_servers)
|
for (MariaDBServer* server : m_servers)
|
||||||
{
|
{
|
||||||
update_server(server);
|
update_server(server, should_update_disk_space);
|
||||||
if (server->m_topology_changed)
|
if (server->m_topology_changed)
|
||||||
{
|
{
|
||||||
m_cluster_topology_changed = true;
|
m_cluster_topology_changed = true;
|
||||||
|
@ -255,7 +255,7 @@ private:
|
|||||||
MariaDBServer* get_server(SERVER* server);
|
MariaDBServer* get_server(SERVER* server);
|
||||||
|
|
||||||
// Cluster discovery and status assignment methods, top levels
|
// Cluster discovery and status assignment methods, top levels
|
||||||
void update_server(MariaDBServer* server);
|
void update_server(MariaDBServer* server, bool time_to_update_disk_space);
|
||||||
void update_topology();
|
void update_topology();
|
||||||
void build_replication_graph();
|
void build_replication_graph();
|
||||||
void assign_new_master(MariaDBServer* new_master);
|
void assign_new_master(MariaDBServer* new_master);
|
||||||
|
@ -159,7 +159,7 @@ void MariaDBMonitor::Test::init_servers(int count)
|
|||||||
{
|
{
|
||||||
// Server contents mostly undefined
|
// Server contents mostly undefined
|
||||||
auto base_server = Server::create_test_server();
|
auto base_server = Server::create_test_server();
|
||||||
MonitorServer* mon_server = new MonitorServer(base_server);
|
MonitorServer* mon_server = new MonitorServer(base_server, m_monitor->m_settings.disk_space_limits);
|
||||||
MariaDBServer* mariadb_server = new MariaDBServer(mon_server, i - 1, m_use_hostnames, true);
|
MariaDBServer* mariadb_server = new MariaDBServer(mon_server, i - 1, m_use_hostnames, true);
|
||||||
|
|
||||||
if (m_use_hostnames)
|
if (m_use_hostnames)
|
||||||
|
Reference in New Issue
Block a user