MXS-1905 Set slaves with low disk space to maintenance
Also, servers in maintenance are updated just as other servers.
This commit is contained in:
parent
8d7cb27884
commit
fd31c9cced
@ -262,6 +262,14 @@ Note that once the server has been put in maintenance mode, the disk space
|
||||
situation will no longer be monitored and the server will thus not automatically
|
||||
be taken out of maintanance mode even if disk space again would become available.
|
||||
|
||||
### `maintenance_on_low_disk_space`
|
||||
|
||||
This feature is enabled by default. If a running server that is not the master
|
||||
or a relay master is out of disk space (as defined by the general monitor
|
||||
setting `disk_space_threshold`) the server is set to maintenance mode. Such
|
||||
servers are not used for router sessions and are ignored when performing a
|
||||
failover or other cluster modification operation.
|
||||
|
||||
## Failover, switchover and auto-rejoin
|
||||
|
||||
Starting with MaxScale 2.2.1, MariaDB Monitor supports replication cluster
|
||||
|
@ -1610,3 +1610,16 @@ void MariaDBMonitor::enforce_read_only_on_slaves()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void MariaDBMonitor::set_low_disk_slaves_maintenance()
|
||||
{
|
||||
// Only set pure slave and standalone servers to maintenance.
|
||||
for (MariaDBServer* server : m_servers)
|
||||
{
|
||||
if (server->has_status(SERVER_DISK_SPACE_EXHAUSTED) && server->is_running() &&
|
||||
!server->is_master() && !server->is_relay_server())
|
||||
{
|
||||
server->set_status(SERVER_MAINT);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -42,7 +42,7 @@ static const char CN_NO_PROMOTE_SERVERS[] = "servers_no_promotion";
|
||||
static const char CN_FAILOVER_TIMEOUT[] = "failover_timeout";
|
||||
static const char CN_SWITCHOVER_ON_LOW_DISK_SPACE[] = "switchover_on_low_disk_space";
|
||||
static const char CN_SWITCHOVER_TIMEOUT[] = "switchover_timeout";
|
||||
|
||||
static const char CN_MAINTENANCE_ON_LOW_DISK_SPACE[] = "maintenance_on_low_disk_space";
|
||||
// Parameters for master failure verification and timeout
|
||||
static const char CN_VERIFY_MASTER_FAILURE[] = "verify_master_failure";
|
||||
static const char CN_MASTER_FAILURE_TIMEOUT[] = "master_failure_timeout";
|
||||
@ -198,6 +198,7 @@ bool MariaDBMonitor::configure(const MXS_CONFIG_PARAMETER* params)
|
||||
m_promote_sql_file = config_get_string(params, CN_PROMOTION_SQL_FILE);
|
||||
m_demote_sql_file = config_get_string(params, CN_DEMOTION_SQL_FILE);
|
||||
m_switchover_on_low_disk_space = config_get_bool(params, CN_SWITCHOVER_ON_LOW_DISK_SPACE);
|
||||
m_maintenance_on_low_disk_space = config_get_bool(params, CN_MAINTENANCE_ON_LOW_DISK_SPACE);
|
||||
|
||||
m_excluded_servers.clear();
|
||||
MXS_MONITORED_SERVER** excluded_array = NULL;
|
||||
@ -296,65 +297,61 @@ json_t* MariaDBMonitor::diagnostics_json() const
|
||||
void MariaDBMonitor::update_server(MariaDBServer& server)
|
||||
{
|
||||
MXS_MONITORED_SERVER* mon_srv = server.m_server_base;
|
||||
/* Monitor server if not in maintenance. */
|
||||
bool in_maintenance = server.is_in_maintenance();
|
||||
if (!in_maintenance)
|
||||
{
|
||||
mxs_connect_result_t conn_status = mon_ping_or_connect_to_db(m_monitor, mon_srv);
|
||||
MYSQL* conn = mon_srv->con; // mon_ping_or_connect_to_db() may have reallocated the MYSQL struct.
|
||||
mxs_connect_result_t conn_status = mon_ping_or_connect_to_db(m_monitor, mon_srv);
|
||||
MYSQL* conn = mon_srv->con; // mon_ping_or_connect_to_db() may have reallocated the MYSQL struct.
|
||||
|
||||
if (mon_connection_is_ok(conn_status))
|
||||
if (mon_connection_is_ok(conn_status))
|
||||
{
|
||||
server.set_status(SERVER_RUNNING);
|
||||
if (conn_status == MONITOR_CONN_NEWCONN_OK)
|
||||
{
|
||||
server.set_status(SERVER_RUNNING);
|
||||
if (conn_status == MONITOR_CONN_NEWCONN_OK)
|
||||
// Is a new connection or a reconnection. Check server version.
|
||||
server.update_server_version();
|
||||
}
|
||||
|
||||
if (server.m_version != MariaDBServer::version::UNKNOWN)
|
||||
{
|
||||
// Check permissions if permissions failed last time or if this is a new connection.
|
||||
if (server.had_status(SERVER_AUTH_ERROR) || conn_status == MONITOR_CONN_NEWCONN_OK)
|
||||
{
|
||||
// Is a new connection or a reconnection. Check server version.
|
||||
server.update_server_version();
|
||||
server.check_permissions();
|
||||
}
|
||||
|
||||
if (server.m_version != MariaDBServer::version::UNKNOWN)
|
||||
// If permissions are ok, continue.
|
||||
if (!server.has_status(SERVER_AUTH_ERROR))
|
||||
{
|
||||
// Check permissions if permissions failed last time or if this is a new connection.
|
||||
if (server.had_status(SERVER_AUTH_ERROR) || conn_status == MONITOR_CONN_NEWCONN_OK)
|
||||
if (should_update_disk_space_status(mon_srv))
|
||||
{
|
||||
server.check_permissions();
|
||||
update_disk_space_status(mon_srv);
|
||||
}
|
||||
|
||||
// If permissions are ok, continue.
|
||||
if (!server.has_status(SERVER_AUTH_ERROR))
|
||||
{
|
||||
if (should_update_disk_space_status(mon_srv))
|
||||
{
|
||||
update_disk_space_status(mon_srv);
|
||||
}
|
||||
|
||||
// Query MariaDBServer specific data
|
||||
server.monitor_server();
|
||||
}
|
||||
// Query MariaDBServer specific data
|
||||
server.monitor_server();
|
||||
}
|
||||
}
|
||||
else
|
||||
}
|
||||
else
|
||||
{
|
||||
/* The current server is not running. Clear all but the stale master bit as it is used to detect
|
||||
* masters that went down but came up. */
|
||||
server.clear_status(~SERVER_WAS_MASTER);
|
||||
auto conn_errno = mysql_errno(conn);
|
||||
if (conn_errno == ER_ACCESS_DENIED_ERROR || conn_errno == ER_ACCESS_DENIED_NO_PASSWORD_ERROR)
|
||||
{
|
||||
/* The current server is not running. Clear all but the stale master bit as it is used to detect
|
||||
* masters that went down but came up. */
|
||||
server.clear_status(~SERVER_WAS_MASTER);
|
||||
auto conn_errno = mysql_errno(conn);
|
||||
if (conn_errno == ER_ACCESS_DENIED_ERROR || conn_errno == ER_ACCESS_DENIED_NO_PASSWORD_ERROR)
|
||||
{
|
||||
server.set_status(SERVER_AUTH_ERROR);
|
||||
}
|
||||
server.set_status(SERVER_AUTH_ERROR);
|
||||
}
|
||||
|
||||
/* Log connect failure only once, that is, if server was RUNNING or MAINTENANCE during last
|
||||
* iteration. */
|
||||
if (mon_srv->mon_prev_status & (SERVER_RUNNING | SERVER_MAINT))
|
||||
{
|
||||
mon_log_connect_error(mon_srv, conn_status);
|
||||
}
|
||||
/* Log connect failure only once, that is, if server was RUNNING or MAINTENANCE during last
|
||||
* iteration. */
|
||||
if (mon_srv->mon_prev_status & (SERVER_RUNNING | SERVER_MAINT))
|
||||
{
|
||||
mon_log_connect_error(mon_srv, conn_status);
|
||||
}
|
||||
}
|
||||
|
||||
/** Increase or reset the error count of the server. */
|
||||
bool is_running = server.is_running();
|
||||
bool in_maintenance = server.is_in_maintenance();
|
||||
mon_srv->mon_err_count = (is_running || in_maintenance) ? 0 : mon_srv->mon_err_count + 1;
|
||||
}
|
||||
|
||||
@ -526,6 +523,11 @@ void MariaDBMonitor::tick()
|
||||
measure_replication_lag();
|
||||
}
|
||||
|
||||
if (m_maintenance_on_low_disk_space)
|
||||
{
|
||||
set_low_disk_slaves_maintenance();
|
||||
}
|
||||
|
||||
// Update shared status. The next functions read the shared status. TODO: change the following
|
||||
// functions to read "pending_status" instead.
|
||||
for (auto mon_srv = m_monitor->monitored_servers; mon_srv; mon_srv = mon_srv->next)
|
||||
@ -1304,6 +1306,7 @@ extern "C" MXS_MODULE* MXS_CREATE_MODULE()
|
||||
{CN_PROMOTION_SQL_FILE, MXS_MODULE_PARAM_PATH},
|
||||
{CN_DEMOTION_SQL_FILE, MXS_MODULE_PARAM_PATH},
|
||||
{CN_SWITCHOVER_ON_LOW_DISK_SPACE, MXS_MODULE_PARAM_BOOL, "false"},
|
||||
{CN_MAINTENANCE_ON_LOW_DISK_SPACE, MXS_MODULE_PARAM_BOOL, "true"},
|
||||
{MXS_END_MODULE_PARAMS}
|
||||
}
|
||||
};
|
||||
|
@ -168,6 +168,8 @@ private:
|
||||
std::string m_demote_sql_file; /**< File with sql commands which are ran to a server being demoted. */
|
||||
bool m_enforce_read_only_slaves; /**< Should the monitor set read-only=1 on any slave servers. */
|
||||
bool m_switchover_on_low_disk_space; /**< Should the monitor do a switchover on low disk space. */
|
||||
bool m_maintenance_on_low_disk_space; /**< Set slave and unreplicating servers with low disk space to
|
||||
* maintenance. */
|
||||
|
||||
// Other settings
|
||||
std::string m_script; /**< Script to call when state changes occur on servers */
|
||||
@ -215,6 +217,7 @@ private:
|
||||
bool master_is_valid(std::string* reason_out);
|
||||
bool cycle_has_master_server(ServerArray& cycle_servers);
|
||||
void update_master_cycle_info();
|
||||
void set_low_disk_slaves_maintenance();
|
||||
|
||||
// Switchover methods
|
||||
bool manual_switchover(SERVER* new_master, SERVER* current_master, json_t** error_out);
|
||||
|
Loading…
x
Reference in New Issue
Block a user