MXS-1490: Query gtid_slave_pos only during failover

The value of the global gtid_slave_pos is only needed during
failover, so querying it every monitor loop is unnecessary. The
value is now only requested when deciding on a new master server
or when waiting for the selected promotion target to clear its
relay logs.

Also, when waiting for the logs to clear, gtid_io_pos must stay
constant or failover is cancelled. Io_pos advancing indicates that
the server is still receiving events from the old master.
This commit is contained in:
Esa Korhonen
2017-11-06 12:11:29 +02:00
parent f44020496a
commit 84e95cee96

View File

@ -79,7 +79,7 @@ static bool mon_process_failover(MYSQL_MONITOR* monitor,
const char* failover_script, const char* failover_script,
uint32_t failover_timeout); uint32_t failover_timeout);
static bool do_failover(MYSQL_MONITOR* mon); static bool do_failover(MYSQL_MONITOR* mon);
static void update_gtid_slave_pos(MXS_MONITORED_SERVER *database, int64_t domain, MySqlServerInfo* info); static bool update_gtid_slave_pos(MXS_MONITORED_SERVER *database, int64_t domain, MySqlServerInfo* info);
static bool update_replication_settings(MXS_MONITORED_SERVER *database, MySqlServerInfo* info); static bool update_replication_settings(MXS_MONITORED_SERVER *database, MySqlServerInfo* info);
static bool report_version_err = true; static bool report_version_err = true;
@ -623,6 +623,10 @@ public:
ss_dassert(found); ss_dassert(found);
} }
} }
bool operator == (const Gtid& rhs) const
{
return domain == rhs.domain && server_id == rhs.server_id && sequence == rhs.sequence;
}
private: private:
void parse_triplet(const char* str) void parse_triplet(const char* str)
{ {
@ -1192,17 +1196,8 @@ static bool do_show_slave_status(MySqlServerInfo* serv_info, MXS_MONITORED_SERVE
serv_info->heartbeat_period = atof(period); serv_info->heartbeat_period = atof(period);
} }
if (serv_info->slave_status.slave_sql_running && gtid_io_pos) serv_info->slave_status.gtid_io_pos = (serv_info->slave_status.slave_sql_running &&
{ gtid_io_pos) ? Gtid(gtid_io_pos) : Gtid();
Gtid io_pos = Gtid(gtid_io_pos);
serv_info->slave_status.gtid_io_pos = io_pos;
update_gtid_slave_pos(database, io_pos.domain, serv_info);
}
else
{
serv_info->slave_status.gtid_io_pos = Gtid();
serv_info->gtid_slave_pos = Gtid();
}
} }
nconfigured++; nconfigured++;
@ -1215,7 +1210,6 @@ static bool do_show_slave_status(MySqlServerInfo* serv_info, MXS_MONITORED_SERVE
/** Query returned no rows, replication is not configured */ /** Query returned no rows, replication is not configured */
serv_info->slave_configured = false; serv_info->slave_configured = false;
serv_info->slave_heartbeats = 0; serv_info->slave_heartbeats = 0;
serv_info->gtid_slave_pos = Gtid();
serv_info->slave_status = SlaveStatusInfo(); serv_info->slave_status = SlaveStatusInfo();
} }
@ -3065,7 +3059,9 @@ MXS_MONITORED_SERVER* failover_select_new_master(MYSQL_MONITOR* mon, ServerVecto
mon_server = mon_server->next) mon_server = mon_server->next)
{ {
MySqlServerInfo* cand_info = get_server_info(mon, mon_server); MySqlServerInfo* cand_info = get_server_info(mon, mon_server);
if (cand_info->slave_status.slave_sql_running && update_replication_settings(mon_server, cand_info)) if (cand_info->slave_status.slave_sql_running &&
update_replication_settings(mon_server, cand_info) &&
update_gtid_slave_pos(mon_server, cand_info->slave_status.gtid_io_pos.domain, cand_info))
{ {
if (out_slaves) if (out_slaves)
{ {
@ -3149,15 +3145,20 @@ bool failover_wait_relay_log(MYSQL_MONITOR* mon, MXS_MONITORED_SERVER* new_maste
MySqlServerInfo* master_info = get_server_info(mon, new_master); MySqlServerInfo* master_info = get_server_info(mon, new_master);
time_t begin = time(NULL); time_t begin = time(NULL);
bool query_ok = true; bool query_ok = true;
bool io_pos_changed = false;
while (master_info->relay_log_events() > 0 && while (master_info->relay_log_events() > 0 &&
query_ok && query_ok &&
!io_pos_changed &&
difftime(time(NULL), begin) < mon->failover_timeout) difftime(time(NULL), begin) < mon->failover_timeout)
{ {
MXS_NOTICE("Failover: Relay log of server '%s' not yet empty, waiting to clear %" PRId64 " events.", MXS_NOTICE("Failover: Relay log of server '%s' not yet empty, waiting to clear %" PRId64 " events.",
new_master->server->unique_name, master_info->relay_log_events()); new_master->server->unique_name, master_info->relay_log_events());
thread_millisleep(1000); // Sleep for a while before querying server again. thread_millisleep(1000); // Sleep for a while before querying server again.
// Todo: check server version before entering failover. // Todo: check server version before entering failover.
query_ok = do_show_slave_status(master_info, new_master, MYSQL_SERVER_VERSION_100); Gtid old_gtid_io_pos = master_info->slave_status.gtid_io_pos;
query_ok = do_show_slave_status(master_info, new_master, MYSQL_SERVER_VERSION_100) &&
update_gtid_slave_pos(new_master, old_gtid_io_pos.domain, master_info);
io_pos_changed = (old_gtid_io_pos == master_info->slave_status.gtid_io_pos);
} }
bool rval = false; bool rval = false;
@ -3167,9 +3168,17 @@ bool failover_wait_relay_log(MYSQL_MONITOR* mon, MXS_MONITORED_SERVER* new_maste
} }
else else
{ {
MXS_ALERT("Failover: %s while waiting for server '%s' to process relay log.", const char* reason = "Timeout";
query_ok ? "Timeout" : "Status query error", if (!query_ok)
new_master->server->unique_name); {
reason = "Query error";
}
else if (io_pos_changed)
{
reason = "Old master sent new event(s)";
}
MXS_ERROR("Failover: %s while waiting for server '%s' to process relay log. Cancelling failover.",
reason, new_master->server->unique_name);
rval = false; rval = false;
} }
return rval; return rval;
@ -3349,12 +3358,15 @@ static bool update_replication_settings(MXS_MONITORED_SERVER *database, MySqlSer
* @param database The server to query. * @param database The server to query.
* @param domain Which gtid domain should be saved. * @param domain Which gtid domain should be saved.
* @param info Server info structure for saving result. * @param info Server info structure for saving result.
* @return True if successful
*/ */
static void update_gtid_slave_pos(MXS_MONITORED_SERVER *database, int64_t domain, MySqlServerInfo* info) static bool update_gtid_slave_pos(MXS_MONITORED_SERVER *database, int64_t domain, MySqlServerInfo* info)
{ {
StringVector row; StringVector row;
if (query_one_row(database, "SELECT @@gtid_slave_pos;", 1, &row)) bool rval = query_one_row(database, "SELECT @@gtid_slave_pos;", 1, &row);
if (rval)
{ {
info->gtid_slave_pos = Gtid(row.front().c_str(), domain); info->gtid_slave_pos = Gtid(row.front().c_str(), domain);
} }
return rval;
} }