MXS-1490-1492: First version of failover script
Works in ideal situations and can be tested. Does not consider relay log and only checks that commands were received by a backend. Work in progress.
This commit is contained in:
@ -19,6 +19,8 @@
|
|||||||
|
|
||||||
#include "../mysqlmon.h"
|
#include "../mysqlmon.h"
|
||||||
#include <string>
|
#include <string>
|
||||||
|
#include <sstream>
|
||||||
|
#include <vector>
|
||||||
#include <maxscale/alloc.h>
|
#include <maxscale/alloc.h>
|
||||||
#include <maxscale/dcb.h>
|
#include <maxscale/dcb.h>
|
||||||
#include <maxscale/debug.h>
|
#include <maxscale/debug.h>
|
||||||
@ -66,7 +68,7 @@ static int add_slave_to_master(long *, int, long);
|
|||||||
static bool isMySQLEvent(mxs_monitor_event_t event);
|
static bool isMySQLEvent(mxs_monitor_event_t event);
|
||||||
void check_maxscale_schema_replication(MXS_MONITOR *monitor);
|
void check_maxscale_schema_replication(MXS_MONITOR *monitor);
|
||||||
static bool mon_process_failover(MYSQL_MONITOR* monitor, const char* failover_script, uint32_t failover_timeout);
|
static bool mon_process_failover(MYSQL_MONITOR* monitor, const char* failover_script, uint32_t failover_timeout);
|
||||||
static bool do_failover(MYSQL_MONITOR* mon, MXS_MONITORED_SERVER* failed_master);
|
static bool do_failover(MYSQL_MONITOR* mon);
|
||||||
static bool report_version_err = true;
|
static bool report_version_err = true;
|
||||||
static const char* hb_table_name = "maxscale_schema.replication_heartbeat";
|
static const char* hb_table_name = "maxscale_schema.replication_heartbeat";
|
||||||
|
|
||||||
@ -86,6 +88,7 @@ static const char CN_REPLICATION_PASSWORD[] = "replication_password";
|
|||||||
/** Default switchover timeout */
|
/** Default switchover timeout */
|
||||||
#define DEFAULT_SWITCHOVER_TIMEOUT "90"
|
#define DEFAULT_SWITCHOVER_TIMEOUT "90"
|
||||||
|
|
||||||
|
typedef std::vector<MXS_MONITORED_SERVER*> ServerVector;
|
||||||
// TODO: Specify the real default failover script.
|
// TODO: Specify the real default failover script.
|
||||||
static const char DEFAULT_FAILOVER_SCRIPT[] =
|
static const char DEFAULT_FAILOVER_SCRIPT[] =
|
||||||
"/usr/bin/echo INITIATOR=$INITIATOR "
|
"/usr/bin/echo INITIATOR=$INITIATOR "
|
||||||
@ -2844,15 +2847,165 @@ bool mon_process_failover(MYSQL_MONITOR* monitor, const char* failover_script, u
|
|||||||
|
|
||||||
if (failed_master)
|
if (failed_master)
|
||||||
{
|
{
|
||||||
MXS_NOTICE("Performing failover of server '%s'", failed_master->server->unique_name);
|
MXS_NOTICE("Performing automatic failover to replace failed master '%s'.", failed_master->server->unique_name);
|
||||||
rval = do_failover(monitor, failed_master);
|
rval = do_failover(monitor);
|
||||||
}
|
}
|
||||||
|
|
||||||
return rval;
|
return rval;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool do_failover(MYSQL_MONITOR* mon, MXS_MONITORED_SERVER* failed_master)
|
/**
|
||||||
|
* Selects a new master. Also adds slaves which should be redirected to an array.
|
||||||
|
*
|
||||||
|
* @param mon The monitor
|
||||||
|
* @param out_slaves Vector for storing slave servers, can be NULL
|
||||||
|
* @return The found master, or NULL if not found
|
||||||
|
*/
|
||||||
|
MXS_MONITORED_SERVER* failover_select_new_master(MYSQL_MONITOR* mon, ServerVector* out_slaves)
|
||||||
{
|
{
|
||||||
// Implement here a simple failover script
|
// Select a new master candidate. Currently does not properly wait for relay logs to clear. Requires that
|
||||||
return false;
|
// "detect_stale_slave" is on.
|
||||||
|
MXS_MONITORED_SERVER* new_master = NULL;
|
||||||
|
MYSQL_SERVER_INFO* new_master_info = NULL;
|
||||||
|
int master_vector_index = -1;
|
||||||
|
for (MXS_MONITORED_SERVER *mon_server = mon->monitor->monitored_servers; mon_server; mon_server = mon_server->next)
|
||||||
|
{
|
||||||
|
MYSQL_SERVER_INFO* cand_info = get_server_info(mon, mon_server);
|
||||||
|
if (cand_info->slave_sql) // Assumed to be a valid slave.
|
||||||
|
{
|
||||||
|
if (out_slaves)
|
||||||
|
{
|
||||||
|
out_slaves->push_back(mon_server);
|
||||||
|
}
|
||||||
|
bool set_master = false;
|
||||||
|
// Accept any candidate at this point.
|
||||||
|
if (new_master == NULL)
|
||||||
|
{
|
||||||
|
set_master = true;
|
||||||
|
}
|
||||||
|
// TODO: Add more checks here, this may give wrong result if filenames are different
|
||||||
|
else if (cand_info->binlog_pos > new_master_info->binlog_pos)
|
||||||
|
{
|
||||||
|
set_master = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (set_master)
|
||||||
|
{
|
||||||
|
new_master = mon_server;
|
||||||
|
new_master_info = cand_info;
|
||||||
|
if (out_slaves)
|
||||||
|
{
|
||||||
|
master_vector_index = out_slaves->size() - 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (new_master && out_slaves)
|
||||||
|
{
|
||||||
|
// Remove the selected master from the vector.
|
||||||
|
ServerVector::iterator remove_this = out_slaves->begin();
|
||||||
|
remove_this += master_vector_index;
|
||||||
|
out_slaves->erase(remove_this);
|
||||||
|
}
|
||||||
|
return new_master;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Prepares a server for the replication master role.
|
||||||
|
*
|
||||||
|
* @param mon The monitor
|
||||||
|
* @param new_master The new master server
|
||||||
|
* @return True if successful
|
||||||
|
*/
|
||||||
|
bool failover_promote_new_master(MYSQL_MONITOR* mon, MXS_MONITORED_SERVER* new_master)
|
||||||
|
{
|
||||||
|
MXS_NOTICE("Failover: Promoting server '%s' to master.", new_master->server->unique_name);
|
||||||
|
if (mxs_mysql_query(new_master->con, "STOP SLAVE;") == 0 &&
|
||||||
|
mxs_mysql_query(new_master->con, "RESET SLAVE ALL;") == 0 &&
|
||||||
|
mxs_mysql_query(new_master->con, "SET GLOBAL read_only=0;") == 0)
|
||||||
|
{
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
MXS_WARNING("Failover: Promotion failed: '%s'.", mysql_error(new_master->con));
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Redirects slaves to replicate from another master server.
|
||||||
|
*
|
||||||
|
* @param mon The monitor
|
||||||
|
* @param slaves An array of slaves
|
||||||
|
* @param new_master The replication master
|
||||||
|
* @return True, if even one slave was redirected, or if there was none to redirect
|
||||||
|
*/
|
||||||
|
bool failover_redirect_slaves(MYSQL_MONITOR* mon, ServerVector& slaves, MXS_MONITORED_SERVER* new_master)
|
||||||
|
{
|
||||||
|
MXS_NOTICE("Failover: Redirecting slaves to new master.");
|
||||||
|
std::stringstream change_cmd_temp;
|
||||||
|
change_cmd_temp << "CHANGE MASTER TO MASTER_HOST = '" << new_master->server->name << "', ";
|
||||||
|
change_cmd_temp << "MASTER_PORT = " << new_master->server->port << ", ";
|
||||||
|
change_cmd_temp << "MASTER_USE_GTID = slave_pos, ";
|
||||||
|
change_cmd_temp << "MASTER_USER = '" << mon->replication_user << "', ";
|
||||||
|
const char MASTER_PW[] = "MASTER_PASSWORD = '";
|
||||||
|
const char END[] = "';";
|
||||||
|
#if defined(SS_DEBUG)
|
||||||
|
std::stringstream change_cmd_nopw;
|
||||||
|
change_cmd_nopw << change_cmd_temp.str();
|
||||||
|
change_cmd_nopw << MASTER_PW << "******" << END;;
|
||||||
|
MXS_DEBUG("Failover: Change master command is '%s'.", change_cmd_nopw.str().c_str());
|
||||||
|
#endif
|
||||||
|
change_cmd_temp << MASTER_PW << mon->replication_password << END;
|
||||||
|
std::string change_cmd = change_cmd_temp.str();
|
||||||
|
int fails = 0;
|
||||||
|
int successes = 0;
|
||||||
|
for (ServerVector::const_iterator iter = slaves.begin(); iter != slaves.end(); iter++)
|
||||||
|
{
|
||||||
|
MXS_MONITORED_SERVER* mon_server = *iter;
|
||||||
|
if (mxs_mysql_query(mon_server->con, "STOP SLAVE;") == 0 &&
|
||||||
|
mxs_mysql_query(mon_server->con, change_cmd.c_str()) == 0 &&
|
||||||
|
mxs_mysql_query(mon_server->con, "START SLAVE;") == 0)
|
||||||
|
{
|
||||||
|
successes++;
|
||||||
|
MXS_NOTICE("Failover: Slave '%s' redirected to new master.", mon_server->server->unique_name);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
fails++;
|
||||||
|
MXS_ERROR("Failover: Slave '%s' redirection failed: '%s'.", mon_server->server->unique_name,
|
||||||
|
mysql_error(mon_server->con));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return (successes + fails) == 0 || successes > 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Performs failover for a simple topology (1 master, N slaves, no intermediate masters).
|
||||||
|
*
|
||||||
|
* @param mon Server cluster monitor
|
||||||
|
* @return True if successful
|
||||||
|
*/
|
||||||
|
bool do_failover(MYSQL_MONITOR* mon)
|
||||||
|
{
|
||||||
|
// Topology has already been tested to be simple.
|
||||||
|
// Step 1: Select new master. Also populate a vector with all slaves not the selected master.
|
||||||
|
ServerVector redirect_slaves;
|
||||||
|
MXS_MONITORED_SERVER* new_master = failover_select_new_master(mon, &redirect_slaves);
|
||||||
|
if (new_master == NULL)
|
||||||
|
{
|
||||||
|
MXS_ERROR("Failover: No suitable promotion candidates found, cancelling.");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool rval = false;
|
||||||
|
// Step 2: Stop and reset slave, set read-only to 0.
|
||||||
|
if (failover_promote_new_master(mon, new_master))
|
||||||
|
{
|
||||||
|
// Step 3: Redirect slaves.
|
||||||
|
rval = failover_redirect_slaves(mon, redirect_slaves, new_master);
|
||||||
|
}
|
||||||
|
return rval;
|
||||||
}
|
}
|
||||||
|
Reference in New Issue
Block a user