MXS-1703 Rearrange functions and methods

Lots of cleanup, but mostly distributing functions/methods to correct files.
This commit is contained in:
Esa Korhonen
2018-03-15 15:54:59 +02:00
parent 3331eb9eb6
commit 4a6fc6b1c8
6 changed files with 895 additions and 913 deletions

View File

@ -15,6 +15,7 @@
#include <inttypes.h>
#include <sstream>
#include <maxscale/hk_heartbeat.h>
#include <maxscale/mysql_utils.h>
bool MariaDBMonitor::manual_switchover(MXS_MONITORED_SERVER* new_master, MXS_MONITORED_SERVER* current_master, json_t** error_out)
@ -1286,4 +1287,383 @@ bool MariaDBMonitor::is_candidate_better(const MySqlServerInfo* current_best_inf
}
}
return is_better;
}
/**
* Check that the given server is a master and it's the only master.
*
* @param suggested_curr_master The server to check, given by user.
* @param error_out On output, error object if function failed.
* @return True if current master seems ok. False, if there is some error with the
* specified current master.
*/
bool MariaDBMonitor::switchover_check_current(const MXS_MONITORED_SERVER* suggested_curr_master,
json_t** error_out) const
{
bool server_is_master = false;
MXS_MONITORED_SERVER* extra_master = NULL; // A master server which is not the suggested one
for (MXS_MONITORED_SERVER* mon_serv = m_monitor_base->monitored_servers;
mon_serv != NULL && extra_master == NULL;
mon_serv = mon_serv->next)
{
if (SERVER_IS_MASTER(mon_serv->server))
{
if (mon_serv == suggested_curr_master)
{
server_is_master = true;
}
else
{
extra_master = mon_serv;
}
}
}
if (!server_is_master)
{
PRINT_MXS_JSON_ERROR(error_out, "Server '%s' is not the current master or it's in maintenance.",
suggested_curr_master->server->unique_name);
}
else if (extra_master)
{
PRINT_MXS_JSON_ERROR(error_out, "Cluster has an additional master server '%s'.",
extra_master->server->unique_name);
}
return server_is_master && !extra_master;
}
/**
* Check whether specified new master is acceptable.
*
* @param monitored_server The server to check against.
* @param error On output, error object if function failed.
*
* @return True, if suggested new master is a viable promotion candidate.
*/
bool MariaDBMonitor::switchover_check_new(const MXS_MONITORED_SERVER* monitored_server, json_t** error)
{
SERVER* server = monitored_server->server;
const char* name = server->unique_name;
bool is_master = SERVER_IS_MASTER(server);
bool is_slave = SERVER_IS_SLAVE(server);
if (is_master)
{
const char IS_MASTER[] = "Specified new master '%s' is already the current master.";
PRINT_MXS_JSON_ERROR(error, IS_MASTER, name);
}
else if (!is_slave)
{
const char NOT_SLAVE[] = "Specified new master '%s' is not a slave.";
PRINT_MXS_JSON_ERROR(error, NOT_SLAVE, name);
}
return !is_master && is_slave;
}
/**
* Check that preconditions for a failover are met.
*
* @param error_out JSON error out
* @return True if failover may proceed
*/
bool MariaDBMonitor::failover_check(json_t** error_out)
{
// Check that there is no running master and that there is at least one running server in the cluster.
// Also, all slaves must be using gtid-replication.
int slaves = 0;
bool error = false;
for (MXS_MONITORED_SERVER* mon_server = m_monitor_base->monitored_servers;
mon_server != NULL;
mon_server = mon_server->next)
{
uint64_t status_bits = mon_server->server->status;
uint64_t master_up = (SERVER_MASTER | SERVER_RUNNING);
if ((status_bits & master_up) == master_up)
{
string master_up_msg = string("Master server '") + mon_server->server->unique_name +
"' is running";
if (status_bits & SERVER_MAINT)
{
master_up_msg += ", although in maintenance mode";
}
master_up_msg += ".";
PRINT_MXS_JSON_ERROR(error_out, "%s", master_up_msg.c_str());
error = true;
}
else if (SERVER_IS_SLAVE(mon_server->server))
{
if (uses_gtid(mon_server, error_out))
{
slaves++;
}
else
{
error = true;
}
}
}
if (error)
{
PRINT_MXS_JSON_ERROR(error_out, "Failover not allowed due to errors.");
}
else if (slaves == 0)
{
PRINT_MXS_JSON_ERROR(error_out, "No running slaves, cannot failover.");
}
return !error && slaves > 0;
}
/**
* Check if server has binary log enabled. Print warnings if gtid_strict_mode or log_slave_updates is off.
*
* @param server Server to check
* @param server_info Server info
* @param print_on Print warnings or not
* @return True if log_bin is on
*/
bool check_replication_settings(const MXS_MONITORED_SERVER* server, MySqlServerInfo* server_info,
print_repl_warnings_t print_warnings)
{
bool rval = true;
const char* servername = server->server->unique_name;
if (server_info->rpl_settings.log_bin == false)
{
if (print_warnings == WARNINGS_ON)
{
const char NO_BINLOG[] =
"Slave '%s' has binary log disabled and is not a valid promotion candidate.";
MXS_WARNING(NO_BINLOG, servername);
}
rval = false;
}
else if (print_warnings == WARNINGS_ON)
{
if (server_info->rpl_settings.gtid_strict_mode == false)
{
const char NO_STRICT[] =
"Slave '%s' has gtid_strict_mode disabled. Enabling this setting is recommended. "
"For more information, see https://mariadb.com/kb/en/library/gtid/#gtid_strict_mode";
MXS_WARNING(NO_STRICT, servername);
}
if (server_info->rpl_settings.log_slave_updates == false)
{
const char NO_SLAVE_UPDATES[] =
"Slave '%s' has log_slave_updates disabled. It is a valid candidate but replication "
"will break for lagging slaves if '%s' is promoted.";
MXS_WARNING(NO_SLAVE_UPDATES, servername, servername);
}
}
return rval;
}
/**
* Checks if slave can replicate from master. Only considers gtid:s and only detects obvious errors. The
* non-detected errors will mostly be detected once the slave tries to start replicating.
*
* @param slave Slave server candidate
* @param slave_info Slave info
* @param master_info Master info
* @return True if slave can replicate from master
*/
bool MariaDBMonitor::can_replicate_from(MXS_MONITORED_SERVER* slave,
MySqlServerInfo* slave_info, MySqlServerInfo* master_info)
{
bool rval = false;
if (update_gtids(slave, slave_info))
{
Gtid slave_gtid = slave_info->gtid_current_pos;
Gtid master_gtid = master_info->gtid_binlog_pos;
// The following are not sufficient requirements for replication to work, they only cover the basics.
// If the servers have diverging histories, the redirection will seem to succeed but the slave IO
// thread will stop in error.
if (slave_gtid.server_id != SERVER_ID_UNKNOWN && master_gtid.server_id != SERVER_ID_UNKNOWN &&
slave_gtid.domain == master_gtid.domain &&
slave_gtid.sequence <= master_info->gtid_current_pos.sequence)
{
rval = true;
}
}
return rval;
}
/**
* @brief Process possible failover event
*
* If a master failure has occurred and MaxScale is configured with failover functionality, this fuction
* executes failover to select and promote a new master server. This function should be called immediately
* after @c mon_process_state_changes.
*
* @param cluster_modified_out Set to true if modifying cluster
* @return True on success, false on error
*/
bool MariaDBMonitor::mon_process_failover(bool* cluster_modified_out)
{
ss_dassert(*cluster_modified_out == false);
if (config_get_global_options()->passive ||
(master && SERVER_IS_MASTER(master->server)))
{
return true;
}
bool rval = true;
MXS_MONITORED_SERVER* failed_master = NULL;
for (MXS_MONITORED_SERVER *ptr = m_monitor_base->monitored_servers; ptr; ptr = ptr->next)
{
if (ptr->new_event && ptr->server->last_event == MASTER_DOWN_EVENT)
{
if (failed_master)
{
MXS_ALERT("Multiple failed master servers detected: "
"'%s' is the first master to fail but server "
"'%s' has also triggered a master_down event.",
failed_master->server->unique_name,
ptr->server->unique_name);
return false;
}
if (ptr->server->active_event)
{
// MaxScale was active when the event took place
failed_master = ptr;
}
else if (m_monitor_base->master_has_failed)
{
/**
* If a master_down event was triggered when this MaxScale was
* passive, we need to execute the failover script again if no new
* masters have appeared.
*/
int64_t timeout = SEC_TO_HB(m_failover_timeout);
int64_t t = hkheartbeat - ptr->server->triggered_at;
if (t > timeout)
{
MXS_WARNING("Failover of server '%s' did not take place within "
"%u seconds, failover needs to be re-triggered",
ptr->server->unique_name, m_failover_timeout);
failed_master = ptr;
}
}
}
}
if (failed_master)
{
if (m_failcount > 1 && failed_master->mon_err_count == 1)
{
MXS_WARNING("Master has failed. If master status does not change in %d monitor passes, failover "
"begins.", m_failcount - 1);
}
else if (failed_master->mon_err_count >= m_failcount)
{
MXS_NOTICE("Performing automatic failover to replace failed master '%s'.",
failed_master->server->unique_name);
failed_master->new_event = false;
rval = failover_check(NULL) && do_failover(NULL);
if (rval)
{
*cluster_modified_out = true;
}
}
}
return rval;
}
/**
* Print a redirect error to logs. If err_out exists, generate a combined error message by querying all
* the server parameters for connection errors and append these errors to err_out.
*
* @param demotion_target If not NULL, this is the first server to query.
* @param redirectable_slaves Other servers to query for errors.
* @param err_out If not null, the error output object.
*/
void print_redirect_errors(MXS_MONITORED_SERVER* first_server, const ServerVector& servers,
json_t** err_out)
{
// Individual server errors have already been printed to the log.
// For JSON, gather the errors again.
const char MSG[] = "Could not redirect any slaves to the new master.";
MXS_ERROR(MSG);
if (err_out)
{
ServerVector failed_slaves;
if (first_server)
{
failed_slaves.push_back(first_server);
}
failed_slaves.insert(failed_slaves.end(),
servers.begin(), servers.end());
string combined_error = get_connection_errors(failed_slaves);
*err_out = mxs_json_error_append(*err_out,
"%s Errors: %s.", MSG, combined_error.c_str());
}
}
bool MariaDBMonitor::uses_gtid(MXS_MONITORED_SERVER* mon_server, json_t** error_out)
{
bool rval = false;
const MySqlServerInfo* info = get_server_info(mon_server);
if (info->slave_status.gtid_io_pos.server_id == SERVER_ID_UNKNOWN)
{
string slave_not_gtid_msg = string("Slave server ") + mon_server->server->unique_name +
" is not using gtid replication.";
PRINT_MXS_JSON_ERROR(error_out, "%s", slave_not_gtid_msg.c_str());
}
else
{
rval = true;
}
return rval;
}
bool MariaDBMonitor::failover_not_possible()
{
bool rval = false;
for (MXS_MONITORED_SERVER* s = m_monitor_base->monitored_servers; s; s = s->next)
{
MySqlServerInfo* info = get_server_info(s);
if (info->n_slaves_configured > 1)
{
MXS_ERROR("Server '%s' is configured to replicate from multiple "
"masters, failover is not possible.", s->server->unique_name);
rval = true;
}
}
return rval;
}
/**
* Check if a slave is receiving events from master.
*
* @return True, if a slave has an event more recent than master_failure_timeout.
*/
bool MariaDBMonitor::slave_receiving_events()
{
ss_dassert(master);
bool received_event = false;
int64_t master_id = master->server->node_id;
for (MXS_MONITORED_SERVER* server = m_monitor_base->monitored_servers; server; server = server->next)
{
MySqlServerInfo* info = get_server_info(server);
if (info->slave_configured &&
info->slave_status.slave_io_running &&
info->slave_status.master_server_id == master_id &&
difftime(time(NULL), info->latest_event) < m_master_failure_timeout)
{
/**
* The slave is still connected to the correct master and has received events. This means that
* while MaxScale can't connect to the master, it's probably still alive.
*/
received_event = true;
break;
}
}
return received_event;
}