Relay log clear supports multiple slave connections

Now waits for the relay log of the correct slave connection.
This commit is contained in:
Esa Korhonen
2018-08-28 13:54:53 +03:00
parent 85d8a85cde
commit c39177bc8d
5 changed files with 57 additions and 48 deletions

View File

@ -73,15 +73,19 @@ void NodeData::reset_indexes()
in_stack = false;
}
int64_t MariaDBServer::relay_log_events()
int64_t MariaDBServer::relay_log_events(const MariaDBServer* master)
{
/* The events_ahead-call below ignores domains where current_pos is ahead of io_pos. This situation is
* rare but is possible (I guess?) if the server is replicating a domain from multiple masters
* and decides to process events from one relay log before getting new events to the other. In
* any case, such events are obsolete and the server can be considered to have processed such logs. */
// TODO: Fix for multisource repl
return !m_slave_status.empty() ? GtidList::events_ahead(m_slave_status[0].gtid_io_pos, m_gtid_current_pos,
GtidList::MISSING_DOMAIN_LHS_ADD) : 0;
int64_t rval = -1;
const SlaveStatus* sstatus = slave_connection_status(master);
if (sstatus)
{
rval = sstatus->gtid_io_pos.events_ahead(m_gtid_current_pos, GtidList::MISSING_DOMAIN_IGNORE);
}
return rval;
}
std::unique_ptr<QueryResult> MariaDBServer::execute_query(const string& query, std::string* errmsg_out)
@ -397,7 +401,7 @@ bool MariaDBServer::wait_until_gtid(const GtidList& target, int timeout, json_t*
if (update_gtids())
{
const GtidList& compare_to = use_binlog_pos ? m_gtid_binlog_pos : m_gtid_current_pos;
if (GtidList::events_ahead(target, compare_to, GtidList::MISSING_DOMAIN_IGNORE) == 0)
if (target.events_ahead(compare_to, GtidList::MISSING_DOMAIN_IGNORE) == 0)
{
gtid_reached = true;
}
@ -656,31 +660,37 @@ bool MariaDBServer::join_cluster(const string& change_cmd)
return success;
}
bool MariaDBServer::failover_wait_relay_log(int seconds_remaining, json_t** err_out)
bool MariaDBServer::failover_wait_relay_log(const MariaDBServer* master, int seconds_remaining,
json_t** err_out)
{
time_t begin = time(NULL);
bool query_ok = true;
bool io_pos_stable = true;
while (relay_log_events() > 0 &&
query_ok &&
io_pos_stable &&
difftime(time(NULL), begin) < seconds_remaining)
int64_t events = relay_log_events(master);
while (events > 0 && query_ok && io_pos_stable && difftime(time(NULL), begin) < seconds_remaining)
{
MXS_INFO("Relay log of server '%s' not yet empty, waiting to clear %" PRId64 " events.",
name(), relay_log_events());
const SlaveStatus* sstatus = slave_connection_status(master);
mxb_assert(sstatus);
GtidList old_gtid_io_pos = sstatus->gtid_io_pos;
// Sleep for a while before querying server again.
std::this_thread::sleep_for(std::chrono::milliseconds(1000));
// TODO: check server version before entering failover.
// TODO: fix for multisource
GtidList old_gtid_io_pos = m_slave_status[0].gtid_io_pos;
MXS_NOTICE("Relay log of server '%s' not yet empty, waiting to clear %" PRId64 " events.",
name(), events);
std::this_thread::sleep_for(std::chrono::seconds(1));
// Update gtid:s first to make sure Gtid_IO_Pos is the more recent value.
// It doesn't matter here, but is a general rule.
query_ok = update_gtids() && do_show_slave_status();
io_pos_stable = (old_gtid_io_pos == m_slave_status[0].gtid_io_pos);
if (query_ok)
{
const SlaveStatus* new_sstatus = slave_connection_status(master);
io_pos_stable = new_sstatus ? (old_gtid_io_pos == new_sstatus->gtid_io_pos) : false;
events = relay_log_events(master);
}
}
bool rval = false;
if (relay_log_events() == 0)
if (events == 0 && query_ok && io_pos_stable)
{
rval = true;
}
@ -695,10 +705,9 @@ bool MariaDBServer::failover_wait_relay_log(int seconds_remaining, json_t** err_
{
reason = "Old master sent new event(s)";
}
else if (relay_log_events() < 0) // TODO: This is currently impossible
else if (events < 0)
{
reason = "Invalid Gtid(s) (current_pos: " + m_gtid_current_pos.to_string() +
", io_pos: " + m_slave_status[0].gtid_io_pos.to_string() + ")";
reason = string_printf("Slave connection to '%s' was removed", master->name());
}
PRINT_MXS_JSON_ERROR(err_out, "Failover: %s while waiting for server '%s' to process relay log. "
"Cancelling failover.", reason.c_str(), name());