MXS-2271 Refactor disk space checking
The functions are now in MonitorServer. Disk space can only be checked during specific ticks. If a server misses a tick (e.g. is down) it will be checked after disk_space_check_interval has passed.
This commit is contained in:
parent
cc3891a43c
commit
3ac1b40b63
@ -22,6 +22,7 @@
|
||||
#include <mutex>
|
||||
#include <openssl/sha.h>
|
||||
#include <maxbase/semaphore.hh>
|
||||
#include <maxbase/stopwatch.hh>
|
||||
#include <maxbase/worker.hh>
|
||||
#include <maxbase/iterator.hh>
|
||||
#include <maxscale/config.hh>
|
||||
@ -181,7 +182,7 @@ public:
|
||||
static const int BEING_DRAINED_OFF = 3;
|
||||
static const int BEING_DRAINED_ON = 4;
|
||||
|
||||
MonitorServer(SERVER* server);
|
||||
MonitorServer(SERVER* server, const SERVER::DiskSpaceLimits& monitor_limits);
|
||||
|
||||
~MonitorServer();
|
||||
|
||||
@ -239,15 +240,38 @@ public:
|
||||
|
||||
void log_state_change();
|
||||
|
||||
SERVER* server = nullptr; /**< The server being monitored */
|
||||
MYSQL* con = nullptr; /**< The MySQL connection */
|
||||
bool log_version_err = true;
|
||||
int mon_err_count = 0;
|
||||
uint64_t mon_prev_status = -1; /**< Status before starting the current monitor loop */
|
||||
uint64_t pending_status = 0; /**< Status during current monitor loop */
|
||||
int64_t disk_space_checked = 0; /**< When was the disk space checked the last time */
|
||||
int status_request = NO_CHANGE; /**< Is admin requesting Maintenance=ON/OFF on the
|
||||
* server? */
|
||||
/**
|
||||
* Is this server ok to update disk space status. Only checks if the server knows of valid disk space
|
||||
* limits settings and that the check has not failed before. Disk space check interval should be
|
||||
* checked by the monitor.
|
||||
*
|
||||
* @return True, if the disk space should be checked, false otherwise.
|
||||
*/
|
||||
bool can_update_disk_space_status() const;
|
||||
|
||||
/**
|
||||
* @brief Update the disk space status of a server.
|
||||
*
|
||||
* After the call, the bit @c SERVER_DISK_SPACE_EXHAUSTED will be set on
|
||||
* @c pMonitored_server->pending_status if the disk space is exhausted
|
||||
* or cleared if it is not.
|
||||
*/
|
||||
void update_disk_space_status();
|
||||
|
||||
SERVER* server = nullptr; /**< The server being monitored */
|
||||
MYSQL* con = nullptr; /**< The MySQL connection */
|
||||
bool log_version_err = true;
|
||||
int mon_err_count = 0;
|
||||
|
||||
uint64_t mon_prev_status = -1; /**< Status before starting the current monitor loop */
|
||||
uint64_t pending_status = 0; /**< Status during current monitor loop */
|
||||
|
||||
int status_request = NO_CHANGE; /**< Is admin requesting Maintenance=ON/OFF on the
|
||||
* server? */
|
||||
private:
|
||||
const SERVER::DiskSpaceLimits& monitor_limits; /**< Monitor-level disk-space limits */
|
||||
|
||||
bool ok_to_check_disk_space {true}; /**< Set to false if check fails */
|
||||
};
|
||||
|
||||
/**
|
||||
@ -472,6 +496,14 @@ protected:
|
||||
|
||||
std::string child_nodes(MonitorServer* parent);
|
||||
|
||||
/**
|
||||
* Checks if it's time to check disk space. If true is returned, the internal timer is reset
|
||||
* so that the next true is only returned once disk_space_check_interval has again passed.
|
||||
*
|
||||
* @return True if disk space should be checked
|
||||
*/
|
||||
bool check_disk_space_this_tick();
|
||||
|
||||
/**
|
||||
* Contains monitor base class settings. Since monitors are stopped before a setting change,
|
||||
* the items cannot be modified while a monitor is running. No locking required.
|
||||
@ -488,10 +520,9 @@ protected:
|
||||
time_t journal_max_age {0}; /**< Maximum age of journal file */
|
||||
|
||||
SERVER::DiskSpaceLimits disk_space_limits; /**< Disk space thresholds */
|
||||
/**
|
||||
* How often should a disk space check be made at most, in milliseconds. Negative values imply
|
||||
* disabling. */
|
||||
int64_t disk_space_check_interval = -1;
|
||||
|
||||
// How often should a disk space check be made at most. Negative values imply disabling.
|
||||
maxbase::Duration disk_space_check_interval {-1};
|
||||
|
||||
MonitorServer::ConnectionSettings conn_settings;
|
||||
};
|
||||
@ -552,6 +583,8 @@ private:
|
||||
|
||||
FILE* open_data_file(Monitor* monitor, char* path);
|
||||
int get_data_file_path(char* path) const;
|
||||
|
||||
mxb::StopWatch m_disk_space_checked; /**< When was disk space checked the last time */
|
||||
};
|
||||
|
||||
/**
|
||||
@ -656,24 +689,6 @@ protected:
|
||||
return atomic_load_int32(&m_shutdown) != 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Should the disk space status be updated.
|
||||
*
|
||||
* @param pMonitored_server The monitored server in question.
|
||||
*
|
||||
* @return True, if the disk space should be checked, false otherwise.
|
||||
*/
|
||||
bool should_update_disk_space_status(const MonitorServer* pMonitored_server) const;
|
||||
|
||||
/**
|
||||
* @brief Update the disk space status of a server.
|
||||
*
|
||||
* After the call, the bit @c SERVER_DISK_SPACE_EXHAUSTED will be set on
|
||||
* @c pMonitored_server->pending_status if the disk space is exhausted
|
||||
* or cleared if it is not.
|
||||
*/
|
||||
void update_disk_space_status(MonitorServer* pMonitored_server);
|
||||
|
||||
/**
|
||||
* @brief Configure the monitor.
|
||||
*
|
||||
|
@ -898,19 +898,27 @@ void Monitor::stop()
|
||||
|
||||
bool Monitor::configure(const MXS_CONFIG_PARAMETER* params)
|
||||
{
|
||||
m_settings.conn_settings.read_timeout = params->get_integer(CN_BACKEND_READ_TIMEOUT);
|
||||
m_settings.conn_settings.write_timeout = params->get_integer(CN_BACKEND_WRITE_TIMEOUT);
|
||||
m_settings.conn_settings.connect_timeout = params->get_integer(CN_BACKEND_CONNECT_TIMEOUT);
|
||||
m_settings.conn_settings.connect_attempts = params->get_integer(CN_BACKEND_CONNECT_ATTEMPTS);
|
||||
|
||||
|
||||
m_settings.interval = params->get_integer(CN_MONITOR_INTERVAL);
|
||||
m_settings.journal_max_age = params->get_integer(CN_JOURNAL_MAX_AGE);
|
||||
m_settings.script_timeout = params->get_integer(CN_SCRIPT_TIMEOUT);
|
||||
m_settings.script = params->get_string(CN_SCRIPT);
|
||||
m_settings.events = params->get_enum(CN_EVENTS, mxs_monitor_event_enum_values);
|
||||
m_settings.disk_space_check_interval = params->get_integer(CN_DISK_SPACE_CHECK_INTERVAL);
|
||||
|
||||
m_settings.conn_settings.read_timeout = params->get_integer(CN_BACKEND_READ_TIMEOUT);
|
||||
m_settings.conn_settings.write_timeout = params->get_integer(CN_BACKEND_WRITE_TIMEOUT);
|
||||
m_settings.conn_settings.connect_timeout = params->get_integer(CN_BACKEND_CONNECT_TIMEOUT);
|
||||
m_settings.conn_settings.connect_attempts = params->get_integer(CN_BACKEND_CONNECT_ATTEMPTS);
|
||||
m_settings.conn_settings.username = params->get_string(CN_USER);
|
||||
m_settings.conn_settings.password = params->get_string(CN_PASSWORD);
|
||||
|
||||
// Disk check interval is given in ms, duration is constructed from seconds.
|
||||
auto dsc_interval = params->get_integer(CN_DISK_SPACE_CHECK_INTERVAL);
|
||||
// 0 implies disabling -> save negative value to interval.
|
||||
m_settings.disk_space_check_interval = (dsc_interval > 0) ?
|
||||
mxb::Duration(static_cast<double>(dsc_interval) / 1000) : mxb::Duration(-1);
|
||||
|
||||
// The monitor serverlist has already been checked to be valid. Empty value is ok too.
|
||||
// First, remove all servers.
|
||||
while (!m_servers.empty())
|
||||
@ -981,7 +989,7 @@ Monitor::~Monitor()
|
||||
void Monitor::add_server(SERVER* server)
|
||||
{
|
||||
mxb_assert(state() != MONITOR_STATE_RUNNING);
|
||||
auto new_server = new MonitorServer(server);
|
||||
auto new_server = new MonitorServer(server, m_settings.disk_space_limits);
|
||||
m_servers.push_back(new_server);
|
||||
server_added(server);
|
||||
}
|
||||
@ -2282,6 +2290,21 @@ void Monitor::populate_services()
|
||||
}
|
||||
}
|
||||
|
||||
bool Monitor::check_disk_space_this_tick()
|
||||
{
|
||||
bool should_update_disk_space = false;
|
||||
auto check_interval = m_settings.disk_space_check_interval;
|
||||
|
||||
if ((check_interval.secs() > 0) && m_disk_space_checked.split() > check_interval)
|
||||
{
|
||||
should_update_disk_space = true;
|
||||
// Whether or not disk space check succeeds, reset the timer. This way, disk space is always
|
||||
// checked during the same tick for all servers.
|
||||
m_disk_space_checked.restart();
|
||||
}
|
||||
return should_update_disk_space;
|
||||
}
|
||||
|
||||
MonitorWorker::MonitorWorker(const string& name, const string& module)
|
||||
: Monitor(name, module)
|
||||
, m_master(NULL)
|
||||
@ -2393,22 +2416,9 @@ int64_t MonitorWorker::get_time_ms()
|
||||
return t.tv_sec * 1000 + (t.tv_nsec / 1000000);
|
||||
}
|
||||
|
||||
bool MonitorWorker::should_update_disk_space_status(const MonitorServer* pMs) const
|
||||
bool MonitorServer::can_update_disk_space_status() const
|
||||
{
|
||||
bool should_check = false;
|
||||
|
||||
if ((m_settings.disk_space_check_interval > 0)
|
||||
&& (pMs->disk_space_checked != -1) // -1 means disabled
|
||||
&& (!m_settings.disk_space_limits.empty() || pMs->server->have_disk_space_limits()))
|
||||
{
|
||||
int64_t now = get_time_ms();
|
||||
if (now - pMs->disk_space_checked > m_settings.disk_space_check_interval)
|
||||
{
|
||||
should_check = true;
|
||||
}
|
||||
}
|
||||
|
||||
return should_check;
|
||||
return ok_to_check_disk_space && (!monitor_limits.empty() || server->have_disk_space_limits());
|
||||
}
|
||||
|
||||
namespace
|
||||
@ -2439,8 +2449,9 @@ bool check_disk_space_exhausted(MonitorServer* pMs,
|
||||
}
|
||||
}
|
||||
|
||||
void MonitorWorker::update_disk_space_status(MonitorServer* pMs)
|
||||
void MonitorServer::update_disk_space_status()
|
||||
{
|
||||
auto pMs = this; // TODO: Clean
|
||||
std::map<std::string, disk::SizesAndName> info;
|
||||
|
||||
int rv = disk::get_info_by_path(pMs->con, &info);
|
||||
@ -2451,7 +2462,7 @@ void MonitorWorker::update_disk_space_status(MonitorServer* pMs)
|
||||
auto dst = pMs->server->get_disk_space_limits();
|
||||
if (dst.empty())
|
||||
{
|
||||
dst = m_settings.disk_space_limits;
|
||||
dst = monitor_limits;
|
||||
}
|
||||
|
||||
bool disk_space_exhausted = false;
|
||||
@ -2512,8 +2523,6 @@ void MonitorWorker::update_disk_space_status(MonitorServer* pMs)
|
||||
{
|
||||
pMs->pending_status &= ~SERVER_DISK_SPACE_EXHAUSTED;
|
||||
}
|
||||
|
||||
pMs->disk_space_checked = get_time_ms();
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -2522,7 +2531,7 @@ void MonitorWorker::update_disk_space_status(MonitorServer* pMs)
|
||||
if (mysql_errno(pMs->con) == ER_UNKNOWN_TABLE)
|
||||
{
|
||||
// Disable disk space checking for this server.
|
||||
pMs->disk_space_checked = -1;
|
||||
pMs->ok_to_check_disk_space = false;
|
||||
|
||||
MXS_ERROR("Disk space cannot be checked for %s at %s, because either the "
|
||||
"version (%s) is too old, or the DISKS information schema plugin "
|
||||
@ -2575,6 +2584,8 @@ void MonitorWorkerSimple::tick()
|
||||
{
|
||||
pre_tick();
|
||||
|
||||
const bool should_update_disk_space = check_disk_space_this_tick();
|
||||
|
||||
for (MonitorServer* pMs : m_servers)
|
||||
{
|
||||
if (!pMs->server->is_in_maint())
|
||||
@ -2589,9 +2600,9 @@ void MonitorWorkerSimple::tick()
|
||||
pMs->clear_pending_status(SERVER_AUTH_ERROR);
|
||||
pMs->set_pending_status(SERVER_RUNNING);
|
||||
|
||||
if (should_update_disk_space_status(pMs))
|
||||
if (should_update_disk_space && pMs->can_update_disk_space_status())
|
||||
{
|
||||
update_disk_space_status(pMs);
|
||||
pMs->update_disk_space_status();
|
||||
}
|
||||
|
||||
update_server_status(pMs);
|
||||
@ -2742,9 +2753,9 @@ bool MonitorWorker::immediate_tick_required() const
|
||||
}
|
||||
}
|
||||
|
||||
MonitorServer::MonitorServer(SERVER* server)
|
||||
MonitorServer::MonitorServer(SERVER* server, const SERVER::DiskSpaceLimits& monitor_limits)
|
||||
: server(server)
|
||||
, disk_space_checked(maxscale::MonitorWorker::get_time_ms()) // Pretend disk space was just checked.
|
||||
, monitor_limits(monitor_limits)
|
||||
{
|
||||
}
|
||||
|
||||
|
@ -334,7 +334,7 @@ json_t* MariaDBMonitor::to_json() const
|
||||
*
|
||||
* @param server The server to update
|
||||
*/
|
||||
void MariaDBMonitor::update_server(MariaDBServer* server)
|
||||
void MariaDBMonitor::update_server(MariaDBServer* server, bool time_to_update_disk_space)
|
||||
{
|
||||
MonitorServer* mon_srv = server->m_server_base;
|
||||
mxs_connect_result_t conn_status = mon_srv->ping_or_connect(m_settings.conn_settings);
|
||||
@ -361,9 +361,9 @@ void MariaDBMonitor::update_server(MariaDBServer* server)
|
||||
// If permissions are ok, continue.
|
||||
if (!server->has_status(SERVER_AUTH_ERROR))
|
||||
{
|
||||
if (should_update_disk_space_status(mon_srv))
|
||||
if (time_to_update_disk_space && mon_srv->can_update_disk_space_status())
|
||||
{
|
||||
update_disk_space_status(mon_srv);
|
||||
mon_srv->update_disk_space_status();
|
||||
}
|
||||
|
||||
// Query MariaDBServer specific data
|
||||
@ -433,10 +433,12 @@ void MariaDBMonitor::tick()
|
||||
mon_srv->mon_prev_status = status;
|
||||
}
|
||||
|
||||
bool should_update_disk_space = check_disk_space_this_tick();
|
||||
|
||||
// Query all servers for their status.
|
||||
for (MariaDBServer* server : m_servers)
|
||||
{
|
||||
update_server(server);
|
||||
update_server(server, should_update_disk_space);
|
||||
if (server->m_topology_changed)
|
||||
{
|
||||
m_cluster_topology_changed = true;
|
||||
|
@ -255,7 +255,7 @@ private:
|
||||
MariaDBServer* get_server(SERVER* server);
|
||||
|
||||
// Cluster discovery and status assignment methods, top levels
|
||||
void update_server(MariaDBServer* server);
|
||||
void update_server(MariaDBServer* server, bool time_to_update_disk_space);
|
||||
void update_topology();
|
||||
void build_replication_graph();
|
||||
void assign_new_master(MariaDBServer* new_master);
|
||||
|
@ -159,7 +159,7 @@ void MariaDBMonitor::Test::init_servers(int count)
|
||||
{
|
||||
// Server contents mostly undefined
|
||||
auto base_server = Server::create_test_server();
|
||||
MonitorServer* mon_server = new MonitorServer(base_server);
|
||||
MonitorServer* mon_server = new MonitorServer(base_server, m_monitor->m_settings.disk_space_limits);
|
||||
MariaDBServer* mariadb_server = new MariaDBServer(mon_server, i - 1, m_use_hostnames, true);
|
||||
|
||||
if (m_use_hostnames)
|
||||
|
Loading…
x
Reference in New Issue
Block a user