MXS-1490: Wait for a slave to clear relay logs before promotion

When selecting the new master server, Gtid_IO_Pos is checked to
select the slave with the latest event in relay log. If there is a
tie, the slave that has processed most events wins.

It's possible that the winning slave has unprocessed events. In
this case, failover waits for the slave to complete processing the
log. The maximum wait is defined in monitor parameter
"failover_timeout", defaulting to 90 seconds. If time runs out
failover ends in failure.

The Gtid struct was separated to its own definition to handle gtid:s
easier.
This commit is contained in:
Esa Korhonen
2017-10-30 18:37:38 +02:00
parent daaf8f5c53
commit 0f2c1ff7d6

View File

@ -35,15 +35,15 @@
#include "../../../core/maxscale/monitor.h" #include "../../../core/maxscale/monitor.h"
/** Column positions for SHOW SLAVE STATUS */ /** Column positions for SHOW SLAVE STATUS */
#define MYSQL55_STATUS_BINLOG_POS 5 #define MYSQL55_STATUS_MASTER_LOG_POS 5
#define MYSQL55_STATUS_BINLOG_NAME 6 #define MYSQL55_STATUS_MASTER_LOG_FILE 6
#define MYSQL55_STATUS_IO_RUNNING 10 #define MYSQL55_STATUS_IO_RUNNING 10
#define MYSQL55_STATUS_SQL_RUNNING 11 #define MYSQL55_STATUS_SQL_RUNNING 11
#define MYSQL55_STATUS_MASTER_ID 39 #define MYSQL55_STATUS_MASTER_ID 39
/** Column positions for SHOW SLAVE STATUS */ /** Column positions for SHOW SLAVE STATUS */
#define MARIA10_STATUS_BINLOG_NAME 7 #define MARIA10_STATUS_MASTER_LOG_FILE 7
#define MARIA10_STATUS_BINLOG_POS 8 #define MARIA10_STATUS_MASTER_LOG_POS 8
#define MARIA10_STATUS_IO_RUNNING 12 #define MARIA10_STATUS_IO_RUNNING 12
#define MARIA10_STATUS_SQL_RUNNING 13 #define MARIA10_STATUS_SQL_RUNNING 13
#define MARIA10_STATUS_MASTER_ID 41 #define MARIA10_STATUS_MASTER_ID 41
@ -572,20 +572,45 @@ MXS_MODULE* MXS_CREATE_MODULE()
} }
class Gtid
{
public:
uint32_t domain;
uint32_t server_id;
uint64_t sequence;
Gtid()
: domain(0),
server_id(0),
sequence(0)
{}
Gtid(const char* str)
{
if (sscanf(str, "%" PRIu32 "-%" PRIu32 "-%" PRIu64, &domain, &server_id, &sequence) != 3)
{
domain = 0;
server_id = 0;
sequence = 0;
}
}
};
// Contains data returned by one row of SHOW ALL SLAVES STATUS // Contains data returned by one row of SHOW ALL SLAVES STATUS
class SlaveStatusInfo class SlaveStatusInfo
{ {
public: public:
int master_id; /**< Master server id */ int master_server_id; /**< The master's server_id value. */
bool slave_io; /**< If Slave IO thread is running */ bool slave_io_running; /**< Whether the slave I/O thread is running and connected. */
bool slave_sql; /**< If Slave SQL thread is running */ bool slave_sql_running; /**< Whether or not the SQL thread is running. */
uint64_t binlog_pos; /**< Binlog position */ string master_log_file; /**< Name of the master binary log file that the I/O thread is currently reading from. */
string binlog_name; /**< Binlog name */ uint64_t read_master_log_pos; /**< Position up to which the I/O thread has read in the current master
* binary log file. */
Gtid gtid_io_pos; /**< Gtid I/O position of the slave thread. */
SlaveStatusInfo() SlaveStatusInfo()
: master_id(0), : master_server_id(0),
slave_io(false), slave_io_running(false),
slave_sql(false), slave_sql_running(false),
binlog_pos(0) read_master_log_pos(0)
{} {}
}; };
@ -607,12 +632,7 @@ public:
int slave_heartbeats; /**< Number of received heartbeats*/ int slave_heartbeats; /**< Number of received heartbeats*/
double heartbeat_period; /**< The time interval between heartbeats */ double heartbeat_period; /**< The time interval between heartbeats */
time_t latest_event; /**< Time when latest event was received from the master */ time_t latest_event; /**< Time when latest event was received from the master */
struct Gtid gtid_slave_pos; /**< Gtid of latest replicated event */
{
uint32_t domain;
uint32_t server_id;
uint64_t sequence;
} slave_gtid;
SlaveStatusInfo slave_status; /**< Data returned from SHOW SLAVE STATUS */ SlaveStatusInfo slave_status; /**< Data returned from SHOW SLAVE STATUS */
MySqlServerInfo() MySqlServerInfo()
@ -625,9 +645,18 @@ public:
n_slaves_running(0), n_slaves_running(0),
slave_heartbeats(0), slave_heartbeats(0),
heartbeat_period(0), heartbeat_period(0),
latest_event(0), latest_event(0)
slave_gtid({0, 0, 0})
{} {}
/**
* Calculate how many events are left in relay log.
*
* @return Number of events in relay log according to latest queried info.
*/
int64_t relay_log_events()
{
return slave_status.gtid_io_pos.sequence - gtid_slave_pos.sequence;
}
}; };
void* info_copy_func(const void *val) void* info_copy_func(const void *val)
@ -892,11 +921,11 @@ static void diagnostics(DCB *dcb, const MXS_MONITOR *mon)
dcb_printf(dcb, "Server ID: %d\n", serv_info->server_id); dcb_printf(dcb, "Server ID: %d\n", serv_info->server_id);
dcb_printf(dcb, "Read only: %s\n", serv_info->read_only ? "ON" : "OFF"); dcb_printf(dcb, "Read only: %s\n", serv_info->read_only ? "ON" : "OFF");
dcb_printf(dcb, "Slave configured: %s\n", serv_info->slave_configured ? "YES" : "NO"); dcb_printf(dcb, "Slave configured: %s\n", serv_info->slave_configured ? "YES" : "NO");
dcb_printf(dcb, "Slave IO running: %s\n", serv_info->slave_status.slave_io ? "YES" : "NO"); dcb_printf(dcb, "Slave IO running: %s\n", serv_info->slave_status.slave_io_running ? "YES" : "NO");
dcb_printf(dcb, "Slave SQL running: %s\n", serv_info->slave_status.slave_sql ? "YES" : "NO"); dcb_printf(dcb, "Slave SQL running: %s\n", serv_info->slave_status.slave_sql_running ? "YES" : "NO");
dcb_printf(dcb, "Master ID: %d\n", serv_info->slave_status.master_id); dcb_printf(dcb, "Master ID: %d\n", serv_info->slave_status.master_server_id);
dcb_printf(dcb, "Master binlog file: %s\n", serv_info->slave_status.binlog_name.c_str()); dcb_printf(dcb, "Master binlog file: %s\n", serv_info->slave_status.master_log_file.c_str());
dcb_printf(dcb, "Master binlog position: %lu\n", serv_info->slave_status.binlog_pos); dcb_printf(dcb, "Master binlog position: %lu\n", serv_info->slave_status.read_master_log_pos);
if (handle->multimaster) if (handle->multimaster)
{ {
@ -951,15 +980,15 @@ static json_t* diagnostics_json(const MXS_MONITOR *mon)
MySqlServerInfo *serv_info = get_server_info(handle, db); MySqlServerInfo *serv_info = get_server_info(handle, db);
json_object_set_new(srv, "name", json_string(db->server->unique_name)); json_object_set_new(srv, "name", json_string(db->server->unique_name));
json_object_set_new(srv, "server_id", json_integer(serv_info->server_id)); json_object_set_new(srv, "server_id", json_integer(serv_info->server_id));
json_object_set_new(srv, "master_id", json_integer(serv_info->slave_status.master_id)); json_object_set_new(srv, "master_id", json_integer(serv_info->slave_status.master_server_id));
json_object_set_new(srv, "read_only", json_boolean(serv_info->read_only)); json_object_set_new(srv, "read_only", json_boolean(serv_info->read_only));
json_object_set_new(srv, "slave_configured", json_boolean(serv_info->slave_configured)); json_object_set_new(srv, "slave_configured", json_boolean(serv_info->slave_configured));
json_object_set_new(srv, "slave_io_running", json_boolean(serv_info->slave_status.slave_io)); json_object_set_new(srv, "slave_io_running", json_boolean(serv_info->slave_status.slave_io_running));
json_object_set_new(srv, "slave_sql_running", json_boolean(serv_info->slave_status.slave_sql)); json_object_set_new(srv, "slave_sql_running", json_boolean(serv_info->slave_status.slave_sql_running));
json_object_set_new(srv, "master_binlog_file", json_string(serv_info->slave_status.binlog_name.c_str())); json_object_set_new(srv, "master_binlog_file", json_string(serv_info->slave_status.master_log_file.c_str()));
json_object_set_new(srv, "master_binlog_position", json_integer(serv_info->slave_status.binlog_pos)); json_object_set_new(srv, "master_binlog_position", json_integer(serv_info->slave_status.read_master_log_pos));
if (handle->multimaster) if (handle->multimaster)
{ {
@ -998,43 +1027,37 @@ static enum mysql_server_version get_server_version(MXS_MONITORED_SERVER* db)
return MYSQL_SERVER_VERSION_51; return MYSQL_SERVER_VERSION_51;
} }
static void extract_slave_gtid(MySqlServerInfo* info, const char* str)
{
sscanf(str, "%" PRIu32 "-%" PRIu32 "-%" PRIu64, &info->slave_gtid.domain,
&info->slave_gtid.server_id, &info->slave_gtid.sequence);
}
static bool do_show_slave_status(MySqlServerInfo* serv_info, MXS_MONITORED_SERVER* database, static bool do_show_slave_status(MySqlServerInfo* serv_info, MXS_MONITORED_SERVER* database,
enum mysql_server_version server_version) enum mysql_server_version server_version)
{ {
bool rval = true; bool rval = true;
unsigned int columns; unsigned int columns;
int i_io_thread, i_sql_thread, i_binlog_pos, i_master_id, i_binlog_name; int i_slave_io_running, i_slave_sql_running, i_read_master_log_pos, i_master_server_id, i_master_log_file;
const char *query; const char *query;
if (server_version == MYSQL_SERVER_VERSION_100) if (server_version == MYSQL_SERVER_VERSION_100)
{ {
columns = 42; columns = 42;
query = "SHOW ALL SLAVES STATUS"; query = "SHOW ALL SLAVES STATUS";
i_io_thread = MARIA10_STATUS_IO_RUNNING; i_slave_io_running = MARIA10_STATUS_IO_RUNNING;
i_sql_thread = MARIA10_STATUS_SQL_RUNNING; i_slave_sql_running = MARIA10_STATUS_SQL_RUNNING;
i_binlog_name = MARIA10_STATUS_BINLOG_NAME; i_master_log_file = MARIA10_STATUS_MASTER_LOG_FILE;
i_binlog_pos = MARIA10_STATUS_BINLOG_POS; i_read_master_log_pos = MARIA10_STATUS_MASTER_LOG_POS;
i_master_id = MARIA10_STATUS_MASTER_ID; i_master_server_id = MARIA10_STATUS_MASTER_ID;
} }
else else
{ {
columns = server_version == MYSQL_SERVER_VERSION_55 ? 40 : 38; columns = server_version == MYSQL_SERVER_VERSION_55 ? 40 : 38;
query = "SHOW SLAVE STATUS"; query = "SHOW SLAVE STATUS";
i_io_thread = MYSQL55_STATUS_IO_RUNNING; i_slave_io_running = MYSQL55_STATUS_IO_RUNNING;
i_sql_thread = MYSQL55_STATUS_SQL_RUNNING; i_slave_sql_running = MYSQL55_STATUS_SQL_RUNNING;
i_binlog_name = MYSQL55_STATUS_BINLOG_NAME; i_master_log_file = MYSQL55_STATUS_MASTER_LOG_FILE;
i_binlog_pos = MYSQL55_STATUS_BINLOG_POS; i_read_master_log_pos = MYSQL55_STATUS_MASTER_LOG_POS;
i_master_id = MYSQL55_STATUS_MASTER_ID; i_master_server_id = MYSQL55_STATUS_MASTER_ID;
} }
MYSQL_RES* result; MYSQL_RES* result;
int master_id = -1; int master_server_id = -1;
int nconfigured = 0; int nconfigured = 0;
int nrunning = 0; int nrunning = 0;
@ -1058,25 +1081,25 @@ static bool do_show_slave_status(MySqlServerInfo* serv_info, MXS_MONITORED_SERVE
do do
{ {
/* get Slave_IO_Running and Slave_SQL_Running values*/ /* get Slave_IO_Running and Slave_SQL_Running values*/
serv_info->slave_status.slave_io = strncmp(row[i_io_thread], "Yes", 3) == 0; serv_info->slave_status.slave_io_running = strncmp(row[i_slave_io_running], "Yes", 3) == 0;
serv_info->slave_status.slave_sql = strncmp(row[i_sql_thread], "Yes", 3) == 0; serv_info->slave_status.slave_sql_running = strncmp(row[i_slave_sql_running], "Yes", 3) == 0;
if (serv_info->slave_status.slave_io && serv_info->slave_status.slave_sql) if (serv_info->slave_status.slave_io_running && serv_info->slave_status.slave_sql_running)
{ {
if (nrunning == 0) if (nrunning == 0)
{ {
/** Only check binlog name for the first running slave */ /** Only check binlog name for the first running slave */
uint64_t binlog_pos = atol(row[i_binlog_pos]); uint64_t read_master_log_pos = atol(row[i_read_master_log_pos]);
char* binlog_name = row[i_binlog_name]; char* master_log_file = row[i_master_log_file];
if (serv_info->slave_status.binlog_name != binlog_name || if (serv_info->slave_status.master_log_file != master_log_file ||
binlog_pos != serv_info->slave_status.binlog_pos) read_master_log_pos != serv_info->slave_status.read_master_log_pos)
{ {
// IO thread is reading events from the master // IO thread is reading events from the master
serv_info->latest_event = time(NULL); serv_info->latest_event = time(NULL);
} }
serv_info->slave_status.binlog_name = binlog_name; serv_info->slave_status.master_log_file = master_log_file;
serv_info->slave_status.binlog_pos = binlog_pos; serv_info->slave_status.read_master_log_pos = read_master_log_pos;
} }
nrunning++; nrunning++;
@ -1087,13 +1110,13 @@ static bool do_show_slave_status(MySqlServerInfo* serv_info, MXS_MONITORED_SERVE
* root master server. * root master server.
* Please note, there could be no slaves at all if Slave_SQL_Running == 'No' * Please note, there could be no slaves at all if Slave_SQL_Running == 'No'
*/ */
if (serv_info->slave_status.slave_io && server_version != MYSQL_SERVER_VERSION_51) if (serv_info->slave_status.slave_io_running && server_version != MYSQL_SERVER_VERSION_51)
{ {
/* Get Master_Server_Id */ /* Get Master_Server_Id */
master_id = atoi(row[i_master_id]); master_server_id = atoi(row[i_master_server_id]);
if (master_id == 0) if (master_server_id == 0)
{ {
master_id = -1; master_server_id = -1;
} }
} }
@ -1101,7 +1124,8 @@ static bool do_show_slave_status(MySqlServerInfo* serv_info, MXS_MONITORED_SERVE
{ {
const char* beats = mxs_mysql_get_value(result, row, "Slave_received_heartbeats"); const char* beats = mxs_mysql_get_value(result, row, "Slave_received_heartbeats");
const char* period = mxs_mysql_get_value(result, row, "Slave_heartbeat_period"); const char* period = mxs_mysql_get_value(result, row, "Slave_heartbeat_period");
const char* gtid = mxs_mysql_get_value(result, row, "Gtid_Slave_Pos"); const char* gtid_slave_pos = mxs_mysql_get_value(result, row, "Gtid_Slave_Pos");
const char* gtid_io_pos = mxs_mysql_get_value(result, row, "Gtid_IO_Pos");
ss_dassert(beats && period); ss_dassert(beats && period);
int heartbeats = atoi(beats); int heartbeats = atoi(beats);
@ -1112,9 +1136,13 @@ static bool do_show_slave_status(MySqlServerInfo* serv_info, MXS_MONITORED_SERVE
serv_info->heartbeat_period = atof(period); serv_info->heartbeat_period = atof(period);
} }
if (gtid) if (gtid_slave_pos)
{ {
extract_slave_gtid(serv_info, gtid); serv_info->gtid_slave_pos = Gtid(gtid_slave_pos);
}
if (gtid_io_pos)
{
serv_info->slave_status.gtid_io_pos = Gtid(gtid_io_pos);
} }
} }
@ -1127,15 +1155,12 @@ static bool do_show_slave_status(MySqlServerInfo* serv_info, MXS_MONITORED_SERVE
{ {
/** Query returned no rows, replication is not configured */ /** Query returned no rows, replication is not configured */
serv_info->slave_configured = false; serv_info->slave_configured = false;
serv_info->slave_status.slave_io = false;
serv_info->slave_status.slave_sql = false;
serv_info->slave_status.binlog_pos = 0;
serv_info->slave_status.binlog_name = "";
serv_info->slave_heartbeats = 0; serv_info->slave_heartbeats = 0;
serv_info->slave_gtid = {}; serv_info->gtid_slave_pos = Gtid();
serv_info->slave_status = SlaveStatusInfo();
} }
serv_info->slave_status.master_id = master_id; serv_info->slave_status.master_server_id = master_server_id;
mysql_free_result(result); mysql_free_result(result);
} }
else else
@ -1177,7 +1202,7 @@ static bool master_still_alive(MYSQL_MONITOR* handle)
{ {
MySqlServerInfo* info = get_server_info(handle, s); MySqlServerInfo* info = get_server_info(handle, s);
if (info->slave_configured && info->slave_status.master_id == handle->master->server->node_id && if (info->slave_configured && info->slave_status.master_server_id == handle->master->server->node_id &&
difftime(time(NULL), info->latest_event) < handle->master_failure_timeout) difftime(time(NULL), info->latest_event) < handle->master_failure_timeout)
{ {
/** /**
@ -1211,7 +1236,7 @@ static inline void monitor_mysql_db(MXS_MONITORED_SERVER* database, MySqlServerI
} }
/** Store master_id of current node. For MySQL 5.1 it will be set at a later point. */ /** Store master_id of current node. For MySQL 5.1 it will be set at a later point. */
database->server->master_id = serv_info->slave_status.master_id; database->server->master_id = serv_info->slave_status.master_server_id;
} }
} }
@ -1630,12 +1655,12 @@ void find_graph_cycles(MYSQL_MONITOR *handle, MXS_MONITORED_SERVER *database, in
/** Build the graph */ /** Build the graph */
for (int i = 0; i < nservers; i++) for (int i = 0; i < nservers; i++)
{ {
if (graph[i].info->slave_status.master_id > 0) if (graph[i].info->slave_status.master_server_id > 0)
{ {
/** Found a connected node */ /** Found a connected node */
for (int k = 0; k < nservers; k++) for (int k = 0; k < nservers; k++)
{ {
if (graph[k].info->server_id == graph[i].info->slave_status.master_id) if (graph[k].info->server_id == graph[i].info->slave_status.master_server_id)
{ {
graph[i].parent = &graph[k]; graph[i].parent = &graph[k];
break; break;
@ -2967,33 +2992,27 @@ bool mon_process_failover(MYSQL_MONITOR* monitor, const char* failover_script, u
*/ */
MXS_MONITORED_SERVER* failover_select_new_master(MYSQL_MONITOR* mon, ServerVector* out_slaves) MXS_MONITORED_SERVER* failover_select_new_master(MYSQL_MONITOR* mon, ServerVector* out_slaves)
{ {
// Select a new master candidate. Currently does not properly wait for relay logs to clear. Requires that /* Select a new master candidate. Selects the one with the latest event in relay log. If multiple slaves have same
// "detect_stale_slave" is on. * number of events, select the one with most processed events. */
MXS_MONITORED_SERVER* new_master = NULL; MXS_MONITORED_SERVER* new_master = NULL;
MySqlServerInfo* new_master_info = NULL; MySqlServerInfo* new_master_info = NULL;
int master_vector_index = -1; int master_vector_index = -1;
for (MXS_MONITORED_SERVER *mon_server = mon->monitor->monitored_servers; mon_server; mon_server = mon_server->next) for (MXS_MONITORED_SERVER *mon_server = mon->monitor->monitored_servers; mon_server; mon_server = mon_server->next)
{ {
MySqlServerInfo* cand_info = get_server_info(mon, mon_server); MySqlServerInfo* cand_info = get_server_info(mon, mon_server);
if (cand_info->slave_status.slave_sql) // Assumed to be a valid slave. if (cand_info->slave_status.slave_sql_running) // Assumed to be a valid slave.
{ {
if (out_slaves) if (out_slaves)
{ {
out_slaves->push_back(mon_server); out_slaves->push_back(mon_server);
} }
bool set_master = false; // If no candidate yet, accept any.
// Accept any candidate at this point. if (new_master == NULL ||
if (new_master == NULL) // Otherwise accept a slave with a later event in relay log.
{ cand_info->slave_status.gtid_io_pos.sequence > new_master_info->slave_status.gtid_io_pos.sequence ||
set_master = true; // If io sequences are identical, the slave with more events processed wins.
} (cand_info->slave_status.gtid_io_pos.sequence == new_master_info->slave_status.gtid_io_pos.sequence &&
// TODO: Add more checks here, this may give wrong result if filenames are different cand_info->gtid_slave_pos.sequence > new_master_info->gtid_slave_pos.sequence))
else if (cand_info->slave_status.binlog_pos > new_master_info->slave_status.binlog_pos)
{
set_master = true;
}
if (set_master)
{ {
new_master = mon_server; new_master = mon_server;
new_master_info = cand_info; new_master_info = cand_info;
@ -3015,6 +3034,42 @@ MXS_MONITORED_SERVER* failover_select_new_master(MYSQL_MONITOR* mon, ServerVecto
return new_master; return new_master;
} }
/**
* Waits until the new master has processed all its relay log, or time is up.
*
* @param mon The monitor
* @param new_master The new master
* @return True if relay log was processed within time limit, or false if time ran out or an error occurred.
*/
bool failover_wait_relay_log(MYSQL_MONITOR* mon, MXS_MONITORED_SERVER* new_master)
{
MySqlServerInfo* master_info = get_server_info(mon, new_master);
time_t begin = time(NULL);
bool query_ok = true;
while (master_info->relay_log_events() > 0 && query_ok && difftime(time(NULL), begin) < mon->failover_timeout)
{
MXS_NOTICE("Failover: Relay log of server '%s' not yet empty, waiting to clear %" PRId64 " events.",
new_master->server->unique_name, master_info->relay_log_events());
thread_millisleep(1000); // Sleep for a while before querying server again.
// Todo: check server version before entering failover.
query_ok = do_show_slave_status(master_info, new_master, MYSQL_SERVER_VERSION_100);
}
bool rval = false;
if (master_info->relay_log_events() == 0)
{
rval = true;
}
else
{
MXS_ALERT("Failover: %s while waiting for server '%s' to process relay log.",
query_ok ? "Timeout" : "Status query error",
new_master->server->unique_name);
rval = false;
}
return rval;
}
/** /**
* Prepares a server for the replication master role. * Prepares a server for the replication master role.
* *
@ -3100,16 +3155,17 @@ bool do_failover(MYSQL_MONITOR* mon)
MXS_MONITORED_SERVER* new_master = failover_select_new_master(mon, &redirect_slaves); MXS_MONITORED_SERVER* new_master = failover_select_new_master(mon, &redirect_slaves);
if (new_master == NULL) if (new_master == NULL)
{ {
MXS_ERROR("Failover: No suitable promotion candidates found, cancelling."); MXS_ERROR("Failover: No suitable promotion candidate found, cancelling.");
return false; return false;
} }
// Step 2: Wait until relay log consumed.
bool rval = false; if (failover_wait_relay_log(mon, new_master) &&
// Step 2: Stop and reset slave, set read-only to 0. // Step 3: Stop and reset slave, set read-only to 0.
if (failover_promote_new_master(mon, new_master)) failover_promote_new_master(mon, new_master) &&
// Step 4: Redirect slaves.
failover_redirect_slaves(mon, redirect_slaves, new_master))
{ {
// Step 3: Redirect slaves. return true;
rval = failover_redirect_slaves(mon, redirect_slaves, new_master);
} }
return rval; return false;
} }