MXS-1703 Rearrange functions and methods

Lots of cleanup, but mostly distributing functions/methods to correct files.
This commit is contained in:
Esa Korhonen 2018-03-15 15:54:59 +02:00
parent 3331eb9eb6
commit 4a6fc6b1c8
6 changed files with 895 additions and 913 deletions

View File

@ -917,4 +917,72 @@ void MariaDBMonitor::monitor_mysql_db(MXS_MONITORED_SERVER* database, MySqlServe
/** Store master_id of current node. For MySQL 5.1 it will be set at a later point. */
database->server->master_id = serv_info->slave_status.master_server_id;
}
}
/**
* Query gtid_current_pos and gtid_binlog_pos and save the values to the server info object.
* Only the cluster master domain is parsed.
*
* @param mon Cluster monitor
* @param database The server to query
* @param info Server info structure for saving result TODO: remove
* @return True if successful
*/
bool MariaDBMonitor::update_gtids(MXS_MONITORED_SERVER *database, MySqlServerInfo* info)
{
StringVector row;
const char query[] = "SELECT @@gtid_current_pos, @@gtid_binlog_pos;";
const int ind_current_pos = 0;
const int ind_binlog_pos = 1;
int64_t domain = m_master_gtid_domain;
ss_dassert(domain >= 0);
bool rval = false;
if (query_one_row(database, query, 2, &row))
{
info->gtid_current_pos = (row[ind_current_pos] != "") ?
Gtid(row[ind_current_pos].c_str(), domain) : Gtid();
info->gtid_binlog_pos = (row[ind_binlog_pos] != "") ?
Gtid(row[ind_binlog_pos].c_str(), domain) : Gtid();
rval = true;
}
return rval;
}
/**
* Update replication settings and gtid:s of the slave server.
*
* @param server Slave to update
* @return Slave server info. NULL on error, or if server is not a slave.
*/
MySqlServerInfo* MariaDBMonitor::update_slave_info(MXS_MONITORED_SERVER* server)
{
MySqlServerInfo* info = get_server_info(server);
if (info->slave_status.slave_sql_running &&
update_replication_settings(server, info) &&
update_gtids(server, info) &&
do_show_slave_status(info, server))
{
return info;
}
return NULL;
}
/**
* Query a few miscellaneous replication settings.
*
* @param database The slave server to query
* @param info Where to save results
* @return True on success
*/
bool MariaDBMonitor::update_replication_settings(MXS_MONITORED_SERVER *database, MySqlServerInfo* info)
{
StringVector row;
bool ok = query_one_row(database, "SELECT @@gtid_strict_mode, @@log_bin, @@log_slave_updates;", 3, &row);
if (ok)
{
info->rpl_settings.gtid_strict_mode = (row[0] == "1");
info->rpl_settings.log_bin = (row[1] == "1");
info->rpl_settings.log_slave_updates = (row[2] == "1");
}
return ok;
}

View File

@ -15,6 +15,7 @@
#include <inttypes.h>
#include <sstream>
#include <maxscale/hk_heartbeat.h>
#include <maxscale/mysql_utils.h>
bool MariaDBMonitor::manual_switchover(MXS_MONITORED_SERVER* new_master, MXS_MONITORED_SERVER* current_master, json_t** error_out)
@ -1286,4 +1287,383 @@ bool MariaDBMonitor::is_candidate_better(const MySqlServerInfo* current_best_inf
}
}
return is_better;
}
/**
* Check that the given server is a master and it's the only master.
*
* @param suggested_curr_master The server to check, given by user.
* @param error_out On output, error object if function failed.
* @return True if current master seems ok. False, if there is some error with the
* specified current master.
*/
bool MariaDBMonitor::switchover_check_current(const MXS_MONITORED_SERVER* suggested_curr_master,
json_t** error_out) const
{
bool server_is_master = false;
MXS_MONITORED_SERVER* extra_master = NULL; // A master server which is not the suggested one
for (MXS_MONITORED_SERVER* mon_serv = m_monitor_base->monitored_servers;
mon_serv != NULL && extra_master == NULL;
mon_serv = mon_serv->next)
{
if (SERVER_IS_MASTER(mon_serv->server))
{
if (mon_serv == suggested_curr_master)
{
server_is_master = true;
}
else
{
extra_master = mon_serv;
}
}
}
if (!server_is_master)
{
PRINT_MXS_JSON_ERROR(error_out, "Server '%s' is not the current master or it's in maintenance.",
suggested_curr_master->server->unique_name);
}
else if (extra_master)
{
PRINT_MXS_JSON_ERROR(error_out, "Cluster has an additional master server '%s'.",
extra_master->server->unique_name);
}
return server_is_master && !extra_master;
}
/**
* Check whether specified new master is acceptable.
*
* @param monitored_server The server to check against.
* @param error On output, error object if function failed.
*
* @return True, if suggested new master is a viable promotion candidate.
*/
bool MariaDBMonitor::switchover_check_new(const MXS_MONITORED_SERVER* monitored_server, json_t** error)
{
SERVER* server = monitored_server->server;
const char* name = server->unique_name;
bool is_master = SERVER_IS_MASTER(server);
bool is_slave = SERVER_IS_SLAVE(server);
if (is_master)
{
const char IS_MASTER[] = "Specified new master '%s' is already the current master.";
PRINT_MXS_JSON_ERROR(error, IS_MASTER, name);
}
else if (!is_slave)
{
const char NOT_SLAVE[] = "Specified new master '%s' is not a slave.";
PRINT_MXS_JSON_ERROR(error, NOT_SLAVE, name);
}
return !is_master && is_slave;
}
/**
* Check that preconditions for a failover are met.
*
* @param error_out JSON error out
* @return True if failover may proceed
*/
bool MariaDBMonitor::failover_check(json_t** error_out)
{
// Check that there is no running master and that there is at least one running server in the cluster.
// Also, all slaves must be using gtid-replication.
int slaves = 0;
bool error = false;
for (MXS_MONITORED_SERVER* mon_server = m_monitor_base->monitored_servers;
mon_server != NULL;
mon_server = mon_server->next)
{
uint64_t status_bits = mon_server->server->status;
uint64_t master_up = (SERVER_MASTER | SERVER_RUNNING);
if ((status_bits & master_up) == master_up)
{
string master_up_msg = string("Master server '") + mon_server->server->unique_name +
"' is running";
if (status_bits & SERVER_MAINT)
{
master_up_msg += ", although in maintenance mode";
}
master_up_msg += ".";
PRINT_MXS_JSON_ERROR(error_out, "%s", master_up_msg.c_str());
error = true;
}
else if (SERVER_IS_SLAVE(mon_server->server))
{
if (uses_gtid(mon_server, error_out))
{
slaves++;
}
else
{
error = true;
}
}
}
if (error)
{
PRINT_MXS_JSON_ERROR(error_out, "Failover not allowed due to errors.");
}
else if (slaves == 0)
{
PRINT_MXS_JSON_ERROR(error_out, "No running slaves, cannot failover.");
}
return !error && slaves > 0;
}
/**
* Check if server has binary log enabled. Print warnings if gtid_strict_mode or log_slave_updates is off.
*
* @param server Server to check
* @param server_info Server info
* @param print_on Print warnings or not
* @return True if log_bin is on
*/
bool check_replication_settings(const MXS_MONITORED_SERVER* server, MySqlServerInfo* server_info,
print_repl_warnings_t print_warnings)
{
bool rval = true;
const char* servername = server->server->unique_name;
if (server_info->rpl_settings.log_bin == false)
{
if (print_warnings == WARNINGS_ON)
{
const char NO_BINLOG[] =
"Slave '%s' has binary log disabled and is not a valid promotion candidate.";
MXS_WARNING(NO_BINLOG, servername);
}
rval = false;
}
else if (print_warnings == WARNINGS_ON)
{
if (server_info->rpl_settings.gtid_strict_mode == false)
{
const char NO_STRICT[] =
"Slave '%s' has gtid_strict_mode disabled. Enabling this setting is recommended. "
"For more information, see https://mariadb.com/kb/en/library/gtid/#gtid_strict_mode";
MXS_WARNING(NO_STRICT, servername);
}
if (server_info->rpl_settings.log_slave_updates == false)
{
const char NO_SLAVE_UPDATES[] =
"Slave '%s' has log_slave_updates disabled. It is a valid candidate but replication "
"will break for lagging slaves if '%s' is promoted.";
MXS_WARNING(NO_SLAVE_UPDATES, servername, servername);
}
}
return rval;
}
/**
* Checks if slave can replicate from master. Only considers gtid:s and only detects obvious errors. The
* non-detected errors will mostly be detected once the slave tries to start replicating.
*
* @param slave Slave server candidate
* @param slave_info Slave info
* @param master_info Master info
* @return True if slave can replicate from master
*/
bool MariaDBMonitor::can_replicate_from(MXS_MONITORED_SERVER* slave,
MySqlServerInfo* slave_info, MySqlServerInfo* master_info)
{
bool rval = false;
if (update_gtids(slave, slave_info))
{
Gtid slave_gtid = slave_info->gtid_current_pos;
Gtid master_gtid = master_info->gtid_binlog_pos;
// The following are not sufficient requirements for replication to work, they only cover the basics.
// If the servers have diverging histories, the redirection will seem to succeed but the slave IO
// thread will stop in error.
if (slave_gtid.server_id != SERVER_ID_UNKNOWN && master_gtid.server_id != SERVER_ID_UNKNOWN &&
slave_gtid.domain == master_gtid.domain &&
slave_gtid.sequence <= master_info->gtid_current_pos.sequence)
{
rval = true;
}
}
return rval;
}
/**
* @brief Process possible failover event
*
* If a master failure has occurred and MaxScale is configured with failover functionality, this fuction
* executes failover to select and promote a new master server. This function should be called immediately
* after @c mon_process_state_changes.
*
* @param cluster_modified_out Set to true if modifying cluster
* @return True on success, false on error
*/
bool MariaDBMonitor::mon_process_failover(bool* cluster_modified_out)
{
ss_dassert(*cluster_modified_out == false);
if (config_get_global_options()->passive ||
(master && SERVER_IS_MASTER(master->server)))
{
return true;
}
bool rval = true;
MXS_MONITORED_SERVER* failed_master = NULL;
for (MXS_MONITORED_SERVER *ptr = m_monitor_base->monitored_servers; ptr; ptr = ptr->next)
{
if (ptr->new_event && ptr->server->last_event == MASTER_DOWN_EVENT)
{
if (failed_master)
{
MXS_ALERT("Multiple failed master servers detected: "
"'%s' is the first master to fail but server "
"'%s' has also triggered a master_down event.",
failed_master->server->unique_name,
ptr->server->unique_name);
return false;
}
if (ptr->server->active_event)
{
// MaxScale was active when the event took place
failed_master = ptr;
}
else if (m_monitor_base->master_has_failed)
{
/**
* If a master_down event was triggered when this MaxScale was
* passive, we need to execute the failover script again if no new
* masters have appeared.
*/
int64_t timeout = SEC_TO_HB(m_failover_timeout);
int64_t t = hkheartbeat - ptr->server->triggered_at;
if (t > timeout)
{
MXS_WARNING("Failover of server '%s' did not take place within "
"%u seconds, failover needs to be re-triggered",
ptr->server->unique_name, m_failover_timeout);
failed_master = ptr;
}
}
}
}
if (failed_master)
{
if (m_failcount > 1 && failed_master->mon_err_count == 1)
{
MXS_WARNING("Master has failed. If master status does not change in %d monitor passes, failover "
"begins.", m_failcount - 1);
}
else if (failed_master->mon_err_count >= m_failcount)
{
MXS_NOTICE("Performing automatic failover to replace failed master '%s'.",
failed_master->server->unique_name);
failed_master->new_event = false;
rval = failover_check(NULL) && do_failover(NULL);
if (rval)
{
*cluster_modified_out = true;
}
}
}
return rval;
}
/**
* Print a redirect error to logs. If err_out exists, generate a combined error message by querying all
* the server parameters for connection errors and append these errors to err_out.
*
* @param demotion_target If not NULL, this is the first server to query.
* @param redirectable_slaves Other servers to query for errors.
* @param err_out If not null, the error output object.
*/
void print_redirect_errors(MXS_MONITORED_SERVER* first_server, const ServerVector& servers,
json_t** err_out)
{
// Individual server errors have already been printed to the log.
// For JSON, gather the errors again.
const char MSG[] = "Could not redirect any slaves to the new master.";
MXS_ERROR(MSG);
if (err_out)
{
ServerVector failed_slaves;
if (first_server)
{
failed_slaves.push_back(first_server);
}
failed_slaves.insert(failed_slaves.end(),
servers.begin(), servers.end());
string combined_error = get_connection_errors(failed_slaves);
*err_out = mxs_json_error_append(*err_out,
"%s Errors: %s.", MSG, combined_error.c_str());
}
}
bool MariaDBMonitor::uses_gtid(MXS_MONITORED_SERVER* mon_server, json_t** error_out)
{
bool rval = false;
const MySqlServerInfo* info = get_server_info(mon_server);
if (info->slave_status.gtid_io_pos.server_id == SERVER_ID_UNKNOWN)
{
string slave_not_gtid_msg = string("Slave server ") + mon_server->server->unique_name +
" is not using gtid replication.";
PRINT_MXS_JSON_ERROR(error_out, "%s", slave_not_gtid_msg.c_str());
}
else
{
rval = true;
}
return rval;
}
bool MariaDBMonitor::failover_not_possible()
{
bool rval = false;
for (MXS_MONITORED_SERVER* s = m_monitor_base->monitored_servers; s; s = s->next)
{
MySqlServerInfo* info = get_server_info(s);
if (info->n_slaves_configured > 1)
{
MXS_ERROR("Server '%s' is configured to replicate from multiple "
"masters, failover is not possible.", s->server->unique_name);
rval = true;
}
}
return rval;
}
/**
* Check if a slave is receiving events from master.
*
* @return True, if a slave has an event more recent than master_failure_timeout.
*/
bool MariaDBMonitor::slave_receiving_events()
{
ss_dassert(master);
bool received_event = false;
int64_t master_id = master->server->node_id;
for (MXS_MONITORED_SERVER* server = m_monitor_base->monitored_servers; server; server = server->next)
{
MySqlServerInfo* info = get_server_info(server);
if (info->slave_configured &&
info->slave_status.slave_io_running &&
info->slave_status.master_server_id == master_id &&
difftime(time(NULL), info->latest_event) < m_master_failure_timeout)
{
/**
* The slave is still connected to the correct master and has received events. This means that
* while MaxScale can't connect to the master, it's probably still alive.
*/
received_event = true;
break;
}
}
return received_event;
}

File diff suppressed because it is too large Load Diff

View File

@ -24,25 +24,14 @@
#include "utilities.hh"
/** Utility macro for printing both MXS_ERROR and json error */
#define PRINT_MXS_JSON_ERROR(err_out, format, ...)\
do {\
MXS_ERROR(format, ##__VA_ARGS__);\
if (err_out)\
{\
*err_out = mxs_json_error_append(*err_out, format, ##__VA_ARGS__);\
}\
} while (false)
using std::string;
extern const int PORT_UNKNOWN;
extern const int64_t SERVER_ID_UNKNOWN;
extern const char * const CN_AUTO_FAILOVER;
class MariaDBMonitor;
typedef std::tr1::unordered_map<const MXS_MONITORED_SERVER*, MySqlServerInfo> ServerInfoMap;
typedef std::vector<MXS_MONITORED_SERVER*> ServerVector;
typedef std::vector<string> StringVector;
enum print_repl_warnings_t
{
@ -58,14 +47,11 @@ enum slave_down_setting_t
// TODO: Most of following should be class methods
void print_redirect_errors(MXS_MONITORED_SERVER* first_server, const ServerVector& servers, json_t** err_out);
string generate_master_gtid_wait_cmd(const Gtid& gtid, double timeout);
bool query_one_row(MXS_MONITORED_SERVER *database, const char* query, unsigned int expected_cols,
StringVector* output);
bool check_replication_settings(const MXS_MONITORED_SERVER* server, MySqlServerInfo* server_info,
print_repl_warnings_t print_warnings = WARNINGS_ON);
MXS_MONITORED_SERVER* getServerByNodeId(MXS_MONITORED_SERVER *, long);
MXS_MONITORED_SERVER* getSlaveOfNodeId(MXS_MONITORED_SERVER *, long, slave_down_setting_t);
int64_t scan_server_id(const char* id_string);
void find_graph_cycles(MariaDBMonitor *handle, MXS_MONITORED_SERVER *database, int nservers);
// MariaDB Monitor instance data
@ -163,42 +149,9 @@ public:
*/
const MySqlServerInfo* get_server_info(const MXS_MONITORED_SERVER* db) const;
/**
* Check that the given server is a master and it's the only master.
*
* @param suggested_curr_master The server to check, given by user.
* @param error_out On output, error object if function failed.
* @return True if current master seems ok. False, if there is some error with the
* specified current master.
*/
bool switchover_check_current(const MXS_MONITORED_SERVER* suggested_curr_master,
json_t** error_out) const;
/**
* Check whether specified new master is acceptable.
*
* @param monitored_server The server to check against.
* @param error On output, error object if function failed.
*
* @return True, if suggested new master is a viable promotion candidate.
*/
bool switchover_check_new(const MXS_MONITORED_SERVER* monitored_server, json_t** error);
/**
* Checks if slave can replicate from master. Only considers gtid:s and only detects obvious errors. The
* non-detected errors will mostly be detected once the slave tries to start replicating.
*
* @param slave Slave server candidate
* @param slave_info Slave info
* @param master_info Master info
* @return True if slave can replicate from master
*/
bool can_replicate_from(MXS_MONITORED_SERVER* slave, MySqlServerInfo* slave_info,
MySqlServerInfo* master_info);
int status; /**< Monitor status. TODO: This should be in MXS_MONITOR */
MXS_MONITORED_SERVER *master; /**< Master server for MySQL Master/Slave replication */
bool detectStaleMaster; /**< Monitor flag for MySQL replication Stale Master detection */
bool detectStaleMaster; /**< Monitor flag for MySQL replication Stale Master detection */
private:
MXS_MONITOR* m_monitor_base; /**< Generic monitor object */
@ -291,4 +244,9 @@ private:
bool failover_check(json_t** error_out);
bool update_gtids(MXS_MONITORED_SERVER *database, MySqlServerInfo* info);
void disable_setting(const char* setting);
bool switchover_check_new(const MXS_MONITORED_SERVER* monitored_server, json_t** error);
bool switchover_check_current(const MXS_MONITORED_SERVER* suggested_curr_master,
json_t** error_out) const;
bool can_replicate_from(MXS_MONITORED_SERVER* slave, MySqlServerInfo* slave_info,
MySqlServerInfo* master_info);
};

View File

@ -14,12 +14,15 @@
#include "utilities.hh"
#include <inttypes.h>
#include <limits>
#include <stdio.h>
#include <string>
#include <sstream>
#include <maxscale/debug.h>
#include <maxscale/mysql_utils.h>
#include "mariadbmon.hh"
/** Server id default value */
const int64_t SERVER_ID_UNKNOWN = -1;
Gtid::Gtid()
: domain(0)
@ -110,3 +113,99 @@ int64_t MySqlServerInfo::relay_log_events()
}
return -1;
}
int64_t scan_server_id(const char* id_string)
{
int64_t server_id = SERVER_ID_UNKNOWN;
ss_debug(int rv = ) sscanf(id_string, "%" PRId64, &server_id);
ss_dassert(rv == 1);
// Server id can be 0, which was even the default value until 10.2.1.
// KB is a bit hazy on this, but apparently when replicating, the server id should not be 0. Not sure,
// so MaxScale allows this.
#if defined(SS_DEBUG)
const int64_t SERVER_ID_MIN = std::numeric_limits<uint32_t>::min();
const int64_t SERVER_ID_MAX = std::numeric_limits<uint32_t>::max();
#endif
ss_dassert(server_id >= SERVER_ID_MIN && server_id <= SERVER_ID_MAX);
return server_id;
}
string generate_master_gtid_wait_cmd(const Gtid& gtid, double timeout)
{
std::stringstream query_ss;
query_ss << "SELECT MASTER_GTID_WAIT(\"" << gtid.to_string() << "\", " << timeout << ");";
return query_ss.str();
}
bool query_one_row(MXS_MONITORED_SERVER *database, const char* query, unsigned int expected_cols,
StringVector* output)
{
bool rval = false;
MYSQL_RES *result;
if (mxs_mysql_query(database->con, query) == 0 && (result = mysql_store_result(database->con)) != NULL)
{
unsigned int columns = mysql_field_count(database->con);
if (columns != expected_cols)
{
mysql_free_result(result);
MXS_ERROR("Unexpected result for '%s'. Expected %d columns, got %d. Server version: %s",
query, expected_cols, columns, database->server->version_string);
}
else
{
MYSQL_ROW row = mysql_fetch_row(result);
if (row)
{
for (unsigned int i = 0; i < columns; i++)
{
output->push_back((row[i] != NULL) ? row[i] : "");
}
rval = true;
}
else
{
MXS_ERROR("Query '%s' returned no rows.", query);
}
mysql_free_result(result);
}
}
else
{
mon_report_query_error(database);
}
return rval;
}
string get_connection_errors(const ServerVector& servers)
{
// Get errors from all connections, form a string.
std::stringstream ss;
for (ServerVector::const_iterator iter = servers.begin(); iter != servers.end(); iter++)
{
const char* error = mysql_error((*iter)->con);
ss_dassert(*error); // Every connection should have an error.
ss << (*iter)->server->unique_name << ": '" << error << "'";
if (iter + 1 != servers.end())
{
ss << ", ";
}
}
return ss.str();
}
string monitored_servers_to_string(const ServerVector& array)
{
string rval;
size_t array_size = array.size();
if (array_size > 0)
{
const char* separator = "";
for (size_t i = 0; i < array_size; i++)
{
rval += separator;
rval += array[i]->server->unique_name;
separator = ",";
}
}
return rval;
}

View File

@ -15,6 +15,19 @@
#include <maxscale/cppdefs.hh>
#include <string>
#include <vector>
#include <maxscale/monitor.h>
/** Utility macro for printing both MXS_ERROR and json error */
#define PRINT_MXS_JSON_ERROR(err_out, format, ...)\
do {\
MXS_ERROR(format, ##__VA_ARGS__);\
if (err_out)\
{\
*err_out = mxs_json_error_append(*err_out, format, ##__VA_ARGS__);\
}\
} while (false)
using std::string;
@ -25,6 +38,11 @@ enum mysql_server_version
MYSQL_SERVER_VERSION_51
};
extern const int64_t SERVER_ID_UNKNOWN;
typedef std::vector<string> StringVector;
typedef std::vector<MXS_MONITORED_SERVER*> ServerVector;
class Gtid
{
public:
@ -125,3 +143,47 @@ public:
*/
int64_t relay_log_events();
};
/**
* Scan a server id from a string.
*
* @param id_string
* @return Server id, or -1 if scanning fails
*/
int64_t scan_server_id(const char* id_string);
/**
* Generates a MASTER_GTID_WAIT()- query.
* @param gtid What gtid to wait for
* @param timeout Maximum wait time in seconds
* @return The query
*/
string generate_master_gtid_wait_cmd(const Gtid& gtid, double timeout);
/**
* Query one row of results, save strings to array. Any additional rows are ignored.
*
* @param database The database to query.
* @param query The query to execute.
* @param expected_cols How many columns the result should have.
* @param output The output array to populate.
* @return True on success.
*/
bool query_one_row(MXS_MONITORED_SERVER *database, const char* query, unsigned int expected_cols,
StringVector* output);
/**
* Get MariaDB connection error strings from all the given servers, form one string.
*
* @param slaves Servers with errors
* @return Concatenated string.
*/
string get_connection_errors(const ServerVector& servers);
/**
* Generates a list of server names separated by ', '
*
* @param array The servers
* @return Server names
*/
string monitored_servers_to_string(const ServerVector& array);