MXS-1703 Manual switchover, failover, rejoin to class methods
This allows privatising several public methods. Also, cleaned up monitor start and stop a bit.
This commit is contained in:
@ -893,6 +893,12 @@ static void read_server_variables(MXS_MONITORED_SERVER* database, MySqlServerInf
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Monitor a database with given server info.
|
||||
*
|
||||
* @param database Database to monitor
|
||||
* @param serv_info Server info for database
|
||||
*/
|
||||
void MariaDBMonitor::monitor_mysql_db(MXS_MONITORED_SERVER* database, MySqlServerInfo *serv_info)
|
||||
{
|
||||
/** Clear old states */
|
||||
|
||||
@ -17,6 +17,175 @@
|
||||
#include <sstream>
|
||||
#include <maxscale/mysql_utils.h>
|
||||
|
||||
bool MariaDBMonitor::manual_switchover(MXS_MONITORED_SERVER* new_master, MXS_MONITORED_SERVER* current_master, json_t** error_out)
|
||||
{
|
||||
bool stopped = stop();
|
||||
if (stopped)
|
||||
{
|
||||
MXS_NOTICE("Stopped the monitor %s for the duration of switchover.", m_monitor_base->name);
|
||||
}
|
||||
else
|
||||
{
|
||||
MXS_NOTICE("Monitor %s already stopped, switchover can proceed.", m_monitor_base->name);
|
||||
}
|
||||
|
||||
bool rval = false;
|
||||
bool current_ok = switchover_check_current(current_master, error_out);
|
||||
bool new_ok = switchover_check_new(new_master, error_out);
|
||||
// Check that all slaves are using gtid-replication
|
||||
bool gtid_ok = true;
|
||||
for (auto mon_serv = m_monitor_base->monitored_servers; mon_serv != NULL; mon_serv = mon_serv->next)
|
||||
{
|
||||
if (SERVER_IS_SLAVE(mon_serv->server))
|
||||
{
|
||||
if (!uses_gtid(mon_serv, error_out))
|
||||
{
|
||||
gtid_ok = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (current_ok && new_ok && gtid_ok)
|
||||
{
|
||||
bool switched = do_switchover(current_master, new_master, error_out);
|
||||
|
||||
const char* curr_master_name = current_master->server->unique_name;
|
||||
const char* new_master_name = new_master->server->unique_name;
|
||||
|
||||
if (switched)
|
||||
{
|
||||
MXS_NOTICE("Switchover %s -> %s performed.", curr_master_name, new_master_name);
|
||||
rval = true;
|
||||
}
|
||||
else
|
||||
{
|
||||
string format = "Switchover %s -> %s failed";
|
||||
bool failover_setting = config_get_bool(m_monitor_base->parameters, CN_AUTO_FAILOVER);
|
||||
if (failover_setting)
|
||||
{
|
||||
disable_setting(CN_AUTO_FAILOVER);
|
||||
format += ", automatic failover has been disabled.";
|
||||
}
|
||||
format += ".";
|
||||
PRINT_MXS_JSON_ERROR(error_out, format.c_str(), curr_master_name, new_master_name);
|
||||
}
|
||||
}
|
||||
|
||||
if (stopped)
|
||||
{
|
||||
MariaDBMonitor::start(m_monitor_base, m_monitor_base->parameters);
|
||||
}
|
||||
return rval;
|
||||
}
|
||||
|
||||
bool MariaDBMonitor::manual_failover(json_t** output)
|
||||
{
|
||||
bool stopped = stop();
|
||||
if (stopped)
|
||||
{
|
||||
MXS_NOTICE("Stopped monitor %s for the duration of failover.", m_monitor_base->name);
|
||||
}
|
||||
else
|
||||
{
|
||||
MXS_NOTICE("Monitor %s already stopped, failover can proceed.", m_monitor_base->name);
|
||||
}
|
||||
|
||||
bool rv = true;
|
||||
rv = failover_check(output);
|
||||
if (rv)
|
||||
{
|
||||
rv = do_failover(output);
|
||||
if (rv)
|
||||
{
|
||||
MXS_NOTICE("Failover performed.");
|
||||
}
|
||||
else
|
||||
{
|
||||
PRINT_MXS_JSON_ERROR(output, "Failover failed.");
|
||||
}
|
||||
}
|
||||
|
||||
if (stopped)
|
||||
{
|
||||
MariaDBMonitor::start(m_monitor_base, m_monitor_base->parameters);
|
||||
}
|
||||
return rv;
|
||||
}
|
||||
|
||||
bool MariaDBMonitor::manual_rejoin(SERVER* rejoin_server, json_t** output)
|
||||
{
|
||||
bool stopped = stop();
|
||||
if (stopped)
|
||||
{
|
||||
MXS_NOTICE("Stopped monitor %s for the duration of rejoin.", m_monitor_base->name);
|
||||
}
|
||||
else
|
||||
{
|
||||
MXS_NOTICE("Monitor %s already stopped, rejoin can proceed.", m_monitor_base->name);
|
||||
}
|
||||
|
||||
bool rval = false;
|
||||
if (cluster_can_be_joined())
|
||||
{
|
||||
const char* rejoin_serv_name = rejoin_server->unique_name;
|
||||
MXS_MONITORED_SERVER* mon_server = mon_get_monitored_server(m_monitor_base, rejoin_server);
|
||||
if (mon_server)
|
||||
{
|
||||
const char* master_name = master->server->unique_name;
|
||||
MySqlServerInfo* master_info = get_server_info(master);
|
||||
MySqlServerInfo* server_info = get_server_info(mon_server);
|
||||
|
||||
if (server_is_rejoin_suspect(mon_server, master_info, output))
|
||||
{
|
||||
if (update_gtids(master, master_info))
|
||||
{
|
||||
if (can_replicate_from(mon_server, server_info, master_info))
|
||||
{
|
||||
ServerVector joinable_server;
|
||||
joinable_server.push_back(mon_server);
|
||||
if (do_rejoin(joinable_server) == 1)
|
||||
{
|
||||
rval = true;
|
||||
MXS_NOTICE("Rejoin performed.");
|
||||
}
|
||||
else
|
||||
{
|
||||
PRINT_MXS_JSON_ERROR(output, "Rejoin attempted but failed.");
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
PRINT_MXS_JSON_ERROR(output, "Server '%s' cannot replicate from cluster master '%s' "
|
||||
"or it could not be queried.", rejoin_serv_name, master_name);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
PRINT_MXS_JSON_ERROR(output, "Cluster master '%s' gtid info could not be updated.",
|
||||
master_name);
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
PRINT_MXS_JSON_ERROR(output, "The given server '%s' is not monitored by this monitor.",
|
||||
rejoin_serv_name);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
const char BAD_CLUSTER[] = "The server cluster of monitor '%s' is not in a state valid for joining. "
|
||||
"Either it has no master or its gtid domain is unknown.";
|
||||
PRINT_MXS_JSON_ERROR(output, BAD_CLUSTER, m_monitor_base->name);
|
||||
}
|
||||
|
||||
if (stopped)
|
||||
{
|
||||
MariaDBMonitor::start(m_monitor_base, m_monitor_base->parameters);
|
||||
}
|
||||
return rval;
|
||||
}
|
||||
|
||||
/**
|
||||
* Generate a CHANGE MASTER TO-query.
|
||||
*
|
||||
@ -148,6 +317,13 @@ bool MariaDBMonitor::redirect_one_slave(MXS_MONITORED_SERVER* slave, const char*
|
||||
return rval;
|
||||
}
|
||||
|
||||
/**
|
||||
* (Re)join given servers to the cluster. The servers in the array are assumed to be joinable.
|
||||
* Usually the list is created by get_joinable_servers().
|
||||
*
|
||||
* @param joinable_servers Which servers to rejoin
|
||||
* @return The number of servers successfully rejoined
|
||||
*/
|
||||
uint32_t MariaDBMonitor::do_rejoin(const ServerVector& joinable_servers)
|
||||
{
|
||||
SERVER* master_server = master->server;
|
||||
@ -186,6 +362,11 @@ uint32_t MariaDBMonitor::do_rejoin(const ServerVector& joinable_servers)
|
||||
return servers_joined;
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if the cluster is a valid rejoin target.
|
||||
*
|
||||
* @return True if master and gtid domain are known
|
||||
*/
|
||||
bool MariaDBMonitor::cluster_can_be_joined()
|
||||
{
|
||||
return (master != NULL && SERVER_IS_MASTER(master->server) && m_master_gtid_domain >= 0);
|
||||
@ -267,6 +448,15 @@ bool MariaDBMonitor::join_cluster(MXS_MONITORED_SERVER* server, const char* chan
|
||||
return rval;
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks if a server is a possible rejoin candidate. A true result from this function is not yet sufficient
|
||||
* criteria and another call to can_replicate_from() should be made.
|
||||
*
|
||||
* @param server Server to check
|
||||
* @param master_info Master server info
|
||||
* @param output Error output. If NULL, no error is printed to log.
|
||||
* @return True, if server is a rejoin suspect.
|
||||
*/
|
||||
bool MariaDBMonitor::server_is_rejoin_suspect(MXS_MONITORED_SERVER* rejoin_server,
|
||||
MySqlServerInfo* master_info, json_t** output)
|
||||
{
|
||||
@ -306,6 +496,14 @@ bool MariaDBMonitor::server_is_rejoin_suspect(MXS_MONITORED_SERVER* rejoin_serve
|
||||
return is_suspect;
|
||||
}
|
||||
|
||||
/**
|
||||
* Performs switchover for a simple topology (1 master, N slaves, no intermediate masters). If an
|
||||
* intermediate step fails, the cluster may be left without a master.
|
||||
*
|
||||
* @param err_out json object for error printing. Can be NULL.
|
||||
* @return True if successful. If false, the cluster can be in various situations depending on which step
|
||||
* failed. In practice, manual intervention is usually required on failure.
|
||||
*/
|
||||
bool MariaDBMonitor::do_switchover(MXS_MONITORED_SERVER* current_master, MXS_MONITORED_SERVER* new_master,
|
||||
json_t** err_out)
|
||||
{
|
||||
@ -453,6 +651,12 @@ bool MariaDBMonitor::do_switchover(MXS_MONITORED_SERVER* current_master, MXS_MON
|
||||
return rval;
|
||||
}
|
||||
|
||||
/**
|
||||
* Performs failover for a simple topology (1 master, N slaves, no intermediate masters).
|
||||
*
|
||||
* @param err_out Json output
|
||||
* @return True if successful
|
||||
*/
|
||||
bool MariaDBMonitor::do_failover(json_t** err_out)
|
||||
{
|
||||
// Topology has already been tested to be simple.
|
||||
|
||||
@ -42,7 +42,6 @@ class MariaDBMonitor;
|
||||
static void monitorMain(void *);
|
||||
static void *startMonitor(MXS_MONITOR *, const MXS_CONFIG_PARAMETER*);
|
||||
static void stopMonitor(MXS_MONITOR *);
|
||||
static bool stop_monitor(MXS_MONITOR *);
|
||||
static void diagnostics(DCB *, const MXS_MONITOR *);
|
||||
static json_t* diagnostics_json(const MXS_MONITOR *);
|
||||
static bool isMySQLEvent(mxs_monitor_event_t event);
|
||||
@ -52,7 +51,7 @@ static string get_connection_errors(const ServerVector& servers);
|
||||
|
||||
static const char* hb_table_name = "maxscale_schema.replication_heartbeat";
|
||||
|
||||
static const char CN_AUTO_FAILOVER[] = "auto_failover";
|
||||
const char * const CN_AUTO_FAILOVER = "auto_failover";
|
||||
static const char CN_FAILOVER_TIMEOUT[] = "failover_timeout";
|
||||
static const char CN_SWITCHOVER_TIMEOUT[] = "switchover_timeout";
|
||||
static const char CN_AUTO_REJOIN[] = "auto_rejoin";
|
||||
@ -229,79 +228,6 @@ bool MariaDBMonitor::failover_check(json_t** error_out)
|
||||
return !error && slaves > 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Handle switchover
|
||||
*
|
||||
* @mon The monitor.
|
||||
* @new_master The specified new master.
|
||||
* @current_master The specified current master.
|
||||
* @output Pointer where to place output object.
|
||||
*
|
||||
* @return True, if switchover was performed, false otherwise.
|
||||
*/
|
||||
bool mysql_switchover(MXS_MONITOR* mon, MXS_MONITORED_SERVER* new_master, MXS_MONITORED_SERVER* current_master, json_t** error_out)
|
||||
{
|
||||
bool stopped = stop_monitor(mon);
|
||||
if (stopped)
|
||||
{
|
||||
MXS_NOTICE("Stopped the monitor %s for the duration of switchover.", mon->name);
|
||||
}
|
||||
else
|
||||
{
|
||||
MXS_NOTICE("Monitor %s already stopped, switchover can proceed.", mon->name);
|
||||
}
|
||||
|
||||
bool rval = false;
|
||||
MariaDBMonitor* handle = static_cast<MariaDBMonitor*>(mon->handle);
|
||||
|
||||
bool current_ok = handle->switchover_check_current(current_master, error_out);
|
||||
bool new_ok = handle->switchover_check_new(new_master, error_out);
|
||||
// Check that all slaves are using gtid-replication
|
||||
bool gtid_ok = true;
|
||||
for (MXS_MONITORED_SERVER* mon_serv = mon->monitored_servers; mon_serv != NULL; mon_serv = mon_serv->next)
|
||||
{
|
||||
if (SERVER_IS_SLAVE(mon_serv->server))
|
||||
{
|
||||
if (!handle->uses_gtid(mon_serv, error_out))
|
||||
{
|
||||
gtid_ok = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (current_ok && new_ok && gtid_ok)
|
||||
{
|
||||
bool switched = handle->do_switchover(current_master, new_master, error_out);
|
||||
|
||||
const char* curr_master_name = current_master->server->unique_name;
|
||||
const char* new_master_name = new_master->server->unique_name;
|
||||
|
||||
if (switched)
|
||||
{
|
||||
MXS_NOTICE("Switchover %s -> %s performed.", curr_master_name, new_master_name);
|
||||
rval = true;
|
||||
}
|
||||
else
|
||||
{
|
||||
string format = "Switchover %s -> %s failed";
|
||||
bool failover = config_get_bool(mon->parameters, CN_AUTO_FAILOVER);
|
||||
if (failover)
|
||||
{
|
||||
handle->disable_setting(CN_AUTO_FAILOVER);
|
||||
format += ", failover has been disabled.";
|
||||
}
|
||||
format += ".";
|
||||
PRINT_MXS_JSON_ERROR(error_out, format.c_str(), curr_master_name, new_master_name);
|
||||
}
|
||||
}
|
||||
|
||||
if (stopped)
|
||||
{
|
||||
startMonitor(mon, mon->parameters);
|
||||
}
|
||||
return rval;
|
||||
}
|
||||
|
||||
/**
|
||||
* Command handler for 'switchover'
|
||||
*
|
||||
@ -317,103 +243,61 @@ bool mysql_handle_switchover(const MODULECMD_ARG* args, json_t** error_out)
|
||||
ss_dassert(MODULECMD_GET_TYPE(&args->argv[1].type) == MODULECMD_ARG_SERVER);
|
||||
ss_dassert((args->argc == 2) || (MODULECMD_GET_TYPE(&args->argv[2].type) == MODULECMD_ARG_SERVER));
|
||||
|
||||
MXS_MONITOR* mon = args->argv[0].value.monitor;
|
||||
SERVER* new_master = args->argv[1].value.server;
|
||||
SERVER* current_master = (args->argc == 3) ? args->argv[2].value.server : NULL;
|
||||
bool error = false;
|
||||
|
||||
const char NO_SERVER[] = "Server '%s' is not a member of monitor '%s'.";
|
||||
MXS_MONITORED_SERVER* mon_new_master = mon_get_monitored_server(mon, new_master);
|
||||
if (mon_new_master == NULL)
|
||||
{
|
||||
PRINT_MXS_JSON_ERROR(error_out, NO_SERVER, new_master->unique_name, mon->name);
|
||||
error = true;
|
||||
}
|
||||
|
||||
MXS_MONITORED_SERVER* mon_curr_master = NULL;
|
||||
if (current_master)
|
||||
{
|
||||
mon_curr_master = mon_get_monitored_server(mon, current_master);
|
||||
if (mon_curr_master == NULL)
|
||||
{
|
||||
PRINT_MXS_JSON_ERROR(error_out, NO_SERVER, current_master->unique_name, mon->name);
|
||||
error = true;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// Autoselect current master
|
||||
MariaDBMonitor* handle = static_cast<MariaDBMonitor*>(mon->handle);
|
||||
if (handle->master)
|
||||
{
|
||||
mon_curr_master = handle->master;
|
||||
}
|
||||
else
|
||||
{
|
||||
const char NO_MASTER[] = "Monitor '%s' has no master server.";
|
||||
PRINT_MXS_JSON_ERROR(error_out, NO_MASTER, mon->name);
|
||||
error = true;
|
||||
}
|
||||
}
|
||||
if (error)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
bool rval = false;
|
||||
if (!config_get_global_options()->passive)
|
||||
if (config_get_global_options()->passive)
|
||||
{
|
||||
rval = mysql_switchover(mon, mon_new_master, mon_curr_master, error_out);
|
||||
}
|
||||
else
|
||||
{
|
||||
const char MSG[] = "Switchover attempted but not performed, as MaxScale is in passive mode.";
|
||||
const char MSG[] = "Switchover requested but not performed, as MaxScale is in passive mode.";
|
||||
PRINT_MXS_JSON_ERROR(error_out, MSG);
|
||||
}
|
||||
|
||||
return rval;
|
||||
}
|
||||
|
||||
/**
|
||||
* Perform user-activated failover
|
||||
*
|
||||
* @param mon Cluster monitor
|
||||
* @param output Json error output
|
||||
* @return True on success
|
||||
*/
|
||||
bool mysql_failover(MXS_MONITOR* mon, json_t** output)
|
||||
{
|
||||
bool stopped = stop_monitor(mon);
|
||||
if (stopped)
|
||||
{
|
||||
MXS_NOTICE("Stopped monitor %s for the duration of failover.", mon->name);
|
||||
}
|
||||
else
|
||||
{
|
||||
MXS_NOTICE("Monitor %s already stopped, failover can proceed.", mon->name);
|
||||
}
|
||||
MXS_MONITOR* mon = args->argv[0].value.monitor;
|
||||
auto handle = static_cast<MariaDBMonitor*>(mon->handle);
|
||||
SERVER* new_master = args->argv[1].value.server;
|
||||
SERVER* current_master = (args->argc == 3) ? args->argv[2].value.server : NULL;
|
||||
bool error = false;
|
||||
const char NO_SERVER[] = "Server '%s' is not a member of monitor '%s'.";
|
||||
|
||||
bool rv = true;
|
||||
MariaDBMonitor *handle = static_cast<MariaDBMonitor*>(mon->handle);
|
||||
rv = handle->failover_check(output);
|
||||
if (rv)
|
||||
{
|
||||
rv = handle->do_failover(output);
|
||||
if (rv)
|
||||
// Check given new master.
|
||||
MXS_MONITORED_SERVER* mon_new_master = mon_get_monitored_server(mon, new_master);
|
||||
if (mon_new_master == NULL)
|
||||
{
|
||||
MXS_NOTICE("Failover performed.");
|
||||
PRINT_MXS_JSON_ERROR(error_out, NO_SERVER, new_master->unique_name, mon->name);
|
||||
error = true;
|
||||
}
|
||||
|
||||
// Check given old master or autoselect.
|
||||
MXS_MONITORED_SERVER* mon_curr_master = NULL;
|
||||
if (current_master)
|
||||
{
|
||||
mon_curr_master = mon_get_monitored_server(mon, current_master);
|
||||
if (mon_curr_master == NULL)
|
||||
{
|
||||
PRINT_MXS_JSON_ERROR(error_out, NO_SERVER, current_master->unique_name, mon->name);
|
||||
error = true;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
PRINT_MXS_JSON_ERROR(output, "Failover failed.");
|
||||
// Autoselect current master
|
||||
if (handle->master)
|
||||
{
|
||||
mon_curr_master = handle->master;
|
||||
}
|
||||
else
|
||||
{
|
||||
const char NO_MASTER[] = "Monitor '%s' has no master server.";
|
||||
PRINT_MXS_JSON_ERROR(error_out, NO_MASTER, mon->name);
|
||||
error = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (!error)
|
||||
{
|
||||
rval = handle->manual_switchover(mon_new_master, mon_curr_master, error_out);
|
||||
}
|
||||
}
|
||||
|
||||
if (stopped)
|
||||
{
|
||||
startMonitor(mon, mon->parameters);
|
||||
}
|
||||
return rv;
|
||||
return rval;
|
||||
}
|
||||
|
||||
/**
|
||||
@ -427,105 +311,21 @@ bool mysql_handle_failover(const MODULECMD_ARG* args, json_t** output)
|
||||
{
|
||||
ss_dassert(args->argc == 1);
|
||||
ss_dassert(MODULECMD_GET_TYPE(&args->argv[0].type) == MODULECMD_ARG_MONITOR);
|
||||
|
||||
MXS_MONITOR* mon = args->argv[0].value.monitor;
|
||||
|
||||
bool rv = false;
|
||||
if (!config_get_global_options()->passive)
|
||||
|
||||
if (config_get_global_options()->passive)
|
||||
{
|
||||
rv = mysql_failover(mon, output);
|
||||
PRINT_MXS_JSON_ERROR(output, "Failover requested but not performed, as MaxScale is in passive mode.");
|
||||
}
|
||||
else
|
||||
{
|
||||
PRINT_MXS_JSON_ERROR(output, "Failover attempted but not performed, as MaxScale is in passive mode.");
|
||||
MXS_MONITOR* mon = args->argv[0].value.monitor;
|
||||
auto handle = static_cast<MariaDBMonitor*>(mon->handle);
|
||||
rv = handle->manual_failover(output);
|
||||
}
|
||||
return rv;
|
||||
}
|
||||
|
||||
/**
|
||||
* Perform user-activated rejoin
|
||||
*
|
||||
* @param mon Cluster monitor
|
||||
* @param rejoin_server Server to join
|
||||
* @param output Json error output
|
||||
* @return True on success
|
||||
*/
|
||||
bool mysql_rejoin(MXS_MONITOR* mon, SERVER* rejoin_server, json_t** output)
|
||||
{
|
||||
bool stopped = stop_monitor(mon);
|
||||
if (stopped)
|
||||
{
|
||||
MXS_NOTICE("Stopped monitor %s for the duration of rejoin.", mon->name);
|
||||
}
|
||||
else
|
||||
{
|
||||
MXS_NOTICE("Monitor %s already stopped, rejoin can proceed.", mon->name);
|
||||
}
|
||||
|
||||
bool rval = false;
|
||||
MariaDBMonitor *handle = static_cast<MariaDBMonitor*>(mon->handle);
|
||||
if (handle->cluster_can_be_joined())
|
||||
{
|
||||
const char* rejoin_serv_name = rejoin_server->unique_name;
|
||||
MXS_MONITORED_SERVER* mon_server = mon_get_monitored_server(mon, rejoin_server);
|
||||
if (mon_server)
|
||||
{
|
||||
MXS_MONITORED_SERVER* master = handle->master;
|
||||
const char* master_name = master->server->unique_name;
|
||||
MySqlServerInfo* master_info = handle->get_server_info(master);
|
||||
MySqlServerInfo* server_info = handle->get_server_info(mon_server);
|
||||
|
||||
if (handle->server_is_rejoin_suspect(mon_server, master_info, output))
|
||||
{
|
||||
if (handle->update_gtids(master, master_info))
|
||||
{
|
||||
if (handle->can_replicate_from(mon_server, server_info, master_info))
|
||||
{
|
||||
ServerVector joinable_server;
|
||||
joinable_server.push_back(mon_server);
|
||||
if (handle->do_rejoin(joinable_server) == 1)
|
||||
{
|
||||
rval = true;
|
||||
MXS_NOTICE("Rejoin performed.");
|
||||
}
|
||||
else
|
||||
{
|
||||
PRINT_MXS_JSON_ERROR(output, "Rejoin attempted but failed.");
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
PRINT_MXS_JSON_ERROR(output, "Server '%s' cannot replicate from cluster master '%s' "
|
||||
"or it could not be queried.", rejoin_serv_name, master_name);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
PRINT_MXS_JSON_ERROR(output, "Cluster master '%s' gtid info could not be updated.",
|
||||
master_name);
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
PRINT_MXS_JSON_ERROR(output, "The given server '%s' is not monitored by this monitor.",
|
||||
rejoin_serv_name);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
const char BAD_CLUSTER[] = "The server cluster of monitor '%s' is not in a state valid for joining. "
|
||||
"Either it has no master or its gtid domain is unknown.";
|
||||
PRINT_MXS_JSON_ERROR(output, BAD_CLUSTER, mon->name);
|
||||
}
|
||||
|
||||
if (stopped)
|
||||
{
|
||||
startMonitor(mon, mon->parameters);
|
||||
}
|
||||
return rval;
|
||||
}
|
||||
|
||||
/**
|
||||
* Command handler for 'rejoin'
|
||||
*
|
||||
@ -539,17 +339,17 @@ bool mysql_handle_rejoin(const MODULECMD_ARG* args, json_t** output)
|
||||
ss_dassert(MODULECMD_GET_TYPE(&args->argv[0].type) == MODULECMD_ARG_MONITOR);
|
||||
ss_dassert(MODULECMD_GET_TYPE(&args->argv[1].type) == MODULECMD_ARG_SERVER);
|
||||
|
||||
MXS_MONITOR* mon = args->argv[0].value.monitor;
|
||||
SERVER* server = args->argv[1].value.server;
|
||||
|
||||
bool rv = false;
|
||||
if (!config_get_global_options()->passive)
|
||||
if (config_get_global_options()->passive)
|
||||
{
|
||||
rv = mysql_rejoin(mon, server, output);
|
||||
PRINT_MXS_JSON_ERROR(output, "Rejoin requested but not performed, as MaxScale is in passive mode.");
|
||||
}
|
||||
else
|
||||
{
|
||||
PRINT_MXS_JSON_ERROR(output, "Rejoin attempted but not performed, as MaxScale is in passive mode.");
|
||||
MXS_MONITOR* mon = args->argv[0].value.monitor;
|
||||
SERVER* server = args->argv[1].value.server;
|
||||
auto handle = static_cast<MariaDBMonitor*>(mon->handle);
|
||||
rv = handle->manual_rejoin(server, output);
|
||||
}
|
||||
return rv;
|
||||
}
|
||||
@ -719,7 +519,7 @@ bool MariaDBMonitor::set_replication_credentials(const MXS_CONFIG_PARAMETER* par
|
||||
return rval;
|
||||
}
|
||||
|
||||
MariaDBMonitor* MariaDBMonitor::start_monitor(MXS_MONITOR *monitor, const MXS_CONFIG_PARAMETER* params)
|
||||
MariaDBMonitor* MariaDBMonitor::start(MXS_MONITOR *monitor, const MXS_CONFIG_PARAMETER* params)
|
||||
{
|
||||
bool error = false;
|
||||
MariaDBMonitor *handle = static_cast<MariaDBMonitor*>(monitor->handle);
|
||||
@ -820,13 +620,7 @@ bool MariaDBMonitor::load_config_params(const MXS_CONFIG_PARAMETER* params)
|
||||
*/
|
||||
static void* startMonitor(MXS_MONITOR *monitor, const MXS_CONFIG_PARAMETER* params)
|
||||
{
|
||||
return MariaDBMonitor::start_monitor(monitor, params);
|
||||
}
|
||||
|
||||
void MariaDBMonitor::stop_monitor()
|
||||
{
|
||||
m_shutdown = 1;
|
||||
thread_wait(m_thread);
|
||||
return MariaDBMonitor::start(monitor, params);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -836,33 +630,26 @@ void MariaDBMonitor::stop_monitor()
|
||||
*/
|
||||
static void stopMonitor(MXS_MONITOR *mon)
|
||||
{
|
||||
MariaDBMonitor *handle = static_cast<MariaDBMonitor*>(mon->handle);
|
||||
handle->stop_monitor();
|
||||
auto handle = static_cast<MariaDBMonitor*>(mon->handle);
|
||||
handle->stop();
|
||||
}
|
||||
|
||||
/**
|
||||
* Stop a running monitor
|
||||
* Stop the monitor.
|
||||
*
|
||||
* @param mon The monitor that should be stopped.
|
||||
*
|
||||
* @return True, if the monitor had to be stopped.
|
||||
* False, if the monitor already was stopped.
|
||||
* @return True, if the monitor had to be stopped. False, if the monitor already was stopped.
|
||||
*/
|
||||
static bool stop_monitor(MXS_MONITOR* mon)
|
||||
bool MariaDBMonitor::stop()
|
||||
{
|
||||
// There should be no race here as long as admin operations are performed
|
||||
// with the single admin lock locked.
|
||||
|
||||
bool actually_stopped = false;
|
||||
|
||||
MariaDBMonitor *handle = static_cast<MariaDBMonitor*>(mon->handle);
|
||||
|
||||
if (handle->status == MXS_MONITOR_RUNNING)
|
||||
if (status == MXS_MONITOR_RUNNING)
|
||||
{
|
||||
stopMonitor(mon);
|
||||
m_shutdown = 1;
|
||||
thread_wait(m_thread);
|
||||
actually_stopped = true;
|
||||
}
|
||||
|
||||
return actually_stopped;
|
||||
}
|
||||
|
||||
@ -2139,6 +1926,16 @@ void check_maxscale_schema_replication(MXS_MONITOR *monitor)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Process possible failover event
|
||||
*
|
||||
* If a master failure has occurred and MaxScale is configured with failover functionality, this fuction
|
||||
* executes failover to select and promote a new master server. This function should be called immediately
|
||||
* after @c mon_process_state_changes.
|
||||
*
|
||||
* @param cluster_modified_out Set to true if modifying cluster
|
||||
* @return True on success, false on error
|
||||
*/
|
||||
bool MariaDBMonitor::mon_process_failover(bool* cluster_modified_out)
|
||||
{
|
||||
ss_dassert(*cluster_modified_out == false);
|
||||
|
||||
@ -36,6 +36,8 @@
|
||||
|
||||
extern const int PORT_UNKNOWN;
|
||||
extern const int64_t SERVER_ID_UNKNOWN;
|
||||
extern const char * const CN_AUTO_FAILOVER;
|
||||
|
||||
class MariaDBMonitor;
|
||||
|
||||
typedef std::tr1::unordered_map<const MXS_MONITORED_SERVER*, MySqlServerInfo> ServerInfoMap;
|
||||
@ -102,90 +104,40 @@ public:
|
||||
* @param params Configuration parameters
|
||||
* @return A pointer to MariaDBMonitor specific data.
|
||||
*/
|
||||
static MariaDBMonitor* start_monitor(MXS_MONITOR *monitor, const MXS_CONFIG_PARAMETER* params);
|
||||
static MariaDBMonitor* start(MXS_MONITOR *monitor, const MXS_CONFIG_PARAMETER* params);
|
||||
|
||||
/**
|
||||
* Stop the monitor. Waits until monitor has stopped.
|
||||
*/
|
||||
void stop_monitor();
|
||||
bool stop();
|
||||
|
||||
/**
|
||||
* Monitor a database with given server info.
|
||||
* Handle switchover
|
||||
*
|
||||
* @param mon
|
||||
* @param database Database to monitor
|
||||
* @param serv_info Server info for database
|
||||
* @new_master The specified new master
|
||||
* @current_master The specified current master
|
||||
* @output Pointer where to place output object
|
||||
*
|
||||
* @return True, if switchover was performed, false otherwise.
|
||||
*/
|
||||
void monitor_mysql_db(MXS_MONITORED_SERVER* database, MySqlServerInfo *serv_info);
|
||||
bool manual_switchover(MXS_MONITORED_SERVER* new_master, MXS_MONITORED_SERVER* current_master, json_t** error_out);
|
||||
|
||||
/**
|
||||
* Performs switchover for a simple topology (1 master, N slaves, no intermediate masters). If an
|
||||
* intermediate step fails, the cluster may be left without a master.
|
||||
* Perform user-activated failover.
|
||||
*
|
||||
* @param err_out json object for error printing. Can be NULL.
|
||||
* @return True if successful. If false, the cluster can be in various situations depending on which step
|
||||
* failed. In practice, manual intervention is usually required on failure.
|
||||
* @param output Json error output
|
||||
* @return True on success
|
||||
*/
|
||||
bool do_switchover(MXS_MONITORED_SERVER* current_master, MXS_MONITORED_SERVER* new_master,
|
||||
json_t** err_out);
|
||||
bool manual_failover(json_t** output);
|
||||
|
||||
/**
|
||||
* @brief Process possible failover event
|
||||
* Perform user-activated rejoin
|
||||
*
|
||||
* If a master failure has occurred and MaxScale is configured with failover functionality, this fuction
|
||||
* executes failover to select and promote a new master server. This function should be called immediately
|
||||
* after @c mon_process_state_changes.
|
||||
*
|
||||
* @param cluster_modified_out Set to true if modifying cluster
|
||||
* @return True on success, false on error
|
||||
* @param rejoin_server Server to join
|
||||
* @param output Json error output
|
||||
* @return True on success
|
||||
*/
|
||||
bool mon_process_failover(bool* cluster_modified_out);
|
||||
|
||||
/**
|
||||
* Performs failover for a simple topology (1 master, N slaves, no intermediate masters).
|
||||
*
|
||||
* @param mon Server cluster monitor
|
||||
* @param err_out Json output
|
||||
* @return True if successful
|
||||
*/
|
||||
bool do_failover(json_t** err_out);
|
||||
|
||||
/**
|
||||
* Checks if a server is a possible rejoin candidate. A true result from this function is not yet sufficient
|
||||
* criteria and another call to can_replicate_from() should be made.
|
||||
*
|
||||
* @param server Server to check
|
||||
* @param master_info Master server info
|
||||
* @param output Error output. If NULL, no error is printed to log.
|
||||
* @return True, if server is a rejoin suspect.
|
||||
*/
|
||||
bool server_is_rejoin_suspect(MXS_MONITORED_SERVER* server, MySqlServerInfo* master_info,
|
||||
json_t** output);
|
||||
|
||||
/**
|
||||
* (Re)join given servers to the cluster. The servers in the array are assumed to be joinable.
|
||||
* Usually the list is created by get_joinable_servers().
|
||||
*
|
||||
* @param joinable_servers Which servers to rejoin
|
||||
* @return The number of servers successfully rejoined
|
||||
*/
|
||||
uint32_t do_rejoin(const ServerVector& joinable_servers);
|
||||
|
||||
/**
|
||||
* Check if the cluster is a valid rejoin target.
|
||||
*
|
||||
* @return True if master and gtid domain are known
|
||||
*/
|
||||
bool cluster_can_be_joined();
|
||||
|
||||
/**
|
||||
* Check that preconditions for a failover are met.
|
||||
*
|
||||
* @param mon Cluster monitor
|
||||
* @param error_out JSON error out
|
||||
* @return True if failover may proceed
|
||||
*/
|
||||
bool failover_check(json_t** error_out);
|
||||
bool manual_rejoin(SERVER* rejoin_server, json_t** output);
|
||||
|
||||
/**
|
||||
* Check if server is using gtid replication.
|
||||
@ -327,8 +279,16 @@ private:
|
||||
void set_slave_heartbeat(MXS_MONITORED_SERVER *);
|
||||
MXS_MONITORED_SERVER* build_mysql51_replication_tree();
|
||||
MXS_MONITORED_SERVER* get_replication_tree(int num_servers);
|
||||
public:
|
||||
// Following methods should be private, change it once refactoring is done.
|
||||
void monitor_mysql_db(MXS_MONITORED_SERVER* database, MySqlServerInfo *serv_info);
|
||||
bool do_switchover(MXS_MONITORED_SERVER* current_master, MXS_MONITORED_SERVER* new_master,
|
||||
json_t** err_out);
|
||||
bool do_failover(json_t** err_out);
|
||||
uint32_t do_rejoin(const ServerVector& joinable_servers);
|
||||
bool mon_process_failover(bool* cluster_modified_out);
|
||||
bool server_is_rejoin_suspect(MXS_MONITORED_SERVER* server, MySqlServerInfo* master_info,
|
||||
json_t** output);
|
||||
bool cluster_can_be_joined();
|
||||
bool failover_check(json_t** error_out);
|
||||
bool update_gtids(MXS_MONITORED_SERVER *database, MySqlServerInfo* info);
|
||||
void disable_setting(const char* setting);
|
||||
};
|
||||
|
||||
Reference in New Issue
Block a user