Relay log clear supports multiple slave connections
Now waits for the relay log of the correct slave connection.
This commit is contained in:
@ -73,15 +73,19 @@ void NodeData::reset_indexes()
|
||||
in_stack = false;
|
||||
}
|
||||
|
||||
int64_t MariaDBServer::relay_log_events()
|
||||
int64_t MariaDBServer::relay_log_events(const MariaDBServer* master)
|
||||
{
|
||||
/* The events_ahead-call below ignores domains where current_pos is ahead of io_pos. This situation is
|
||||
* rare but is possible (I guess?) if the server is replicating a domain from multiple masters
|
||||
* and decides to process events from one relay log before getting new events to the other. In
|
||||
* any case, such events are obsolete and the server can be considered to have processed such logs. */
|
||||
// TODO: Fix for multisource repl
|
||||
return !m_slave_status.empty() ? GtidList::events_ahead(m_slave_status[0].gtid_io_pos, m_gtid_current_pos,
|
||||
GtidList::MISSING_DOMAIN_LHS_ADD) : 0;
|
||||
int64_t rval = -1;
|
||||
const SlaveStatus* sstatus = slave_connection_status(master);
|
||||
if (sstatus)
|
||||
{
|
||||
rval = sstatus->gtid_io_pos.events_ahead(m_gtid_current_pos, GtidList::MISSING_DOMAIN_IGNORE);
|
||||
}
|
||||
return rval;
|
||||
}
|
||||
|
||||
std::unique_ptr<QueryResult> MariaDBServer::execute_query(const string& query, std::string* errmsg_out)
|
||||
@ -397,7 +401,7 @@ bool MariaDBServer::wait_until_gtid(const GtidList& target, int timeout, json_t*
|
||||
if (update_gtids())
|
||||
{
|
||||
const GtidList& compare_to = use_binlog_pos ? m_gtid_binlog_pos : m_gtid_current_pos;
|
||||
if (GtidList::events_ahead(target, compare_to, GtidList::MISSING_DOMAIN_IGNORE) == 0)
|
||||
if (target.events_ahead(compare_to, GtidList::MISSING_DOMAIN_IGNORE) == 0)
|
||||
{
|
||||
gtid_reached = true;
|
||||
}
|
||||
@ -656,31 +660,37 @@ bool MariaDBServer::join_cluster(const string& change_cmd)
|
||||
return success;
|
||||
}
|
||||
|
||||
bool MariaDBServer::failover_wait_relay_log(int seconds_remaining, json_t** err_out)
|
||||
bool MariaDBServer::failover_wait_relay_log(const MariaDBServer* master, int seconds_remaining,
|
||||
json_t** err_out)
|
||||
{
|
||||
time_t begin = time(NULL);
|
||||
bool query_ok = true;
|
||||
bool io_pos_stable = true;
|
||||
while (relay_log_events() > 0 &&
|
||||
query_ok &&
|
||||
io_pos_stable &&
|
||||
difftime(time(NULL), begin) < seconds_remaining)
|
||||
int64_t events = relay_log_events(master);
|
||||
while (events > 0 && query_ok && io_pos_stable && difftime(time(NULL), begin) < seconds_remaining)
|
||||
{
|
||||
MXS_INFO("Relay log of server '%s' not yet empty, waiting to clear %" PRId64 " events.",
|
||||
name(), relay_log_events());
|
||||
const SlaveStatus* sstatus = slave_connection_status(master);
|
||||
mxb_assert(sstatus);
|
||||
GtidList old_gtid_io_pos = sstatus->gtid_io_pos;
|
||||
|
||||
// Sleep for a while before querying server again.
|
||||
std::this_thread::sleep_for(std::chrono::milliseconds(1000));
|
||||
// TODO: check server version before entering failover.
|
||||
// TODO: fix for multisource
|
||||
GtidList old_gtid_io_pos = m_slave_status[0].gtid_io_pos;
|
||||
MXS_NOTICE("Relay log of server '%s' not yet empty, waiting to clear %" PRId64 " events.",
|
||||
name(), events);
|
||||
std::this_thread::sleep_for(std::chrono::seconds(1));
|
||||
|
||||
// Update gtid:s first to make sure Gtid_IO_Pos is the more recent value.
|
||||
// It doesn't matter here, but is a general rule.
|
||||
query_ok = update_gtids() && do_show_slave_status();
|
||||
io_pos_stable = (old_gtid_io_pos == m_slave_status[0].gtid_io_pos);
|
||||
if (query_ok)
|
||||
{
|
||||
const SlaveStatus* new_sstatus = slave_connection_status(master);
|
||||
io_pos_stable = new_sstatus ? (old_gtid_io_pos == new_sstatus->gtid_io_pos) : false;
|
||||
events = relay_log_events(master);
|
||||
}
|
||||
}
|
||||
|
||||
bool rval = false;
|
||||
if (relay_log_events() == 0)
|
||||
if (events == 0 && query_ok && io_pos_stable)
|
||||
{
|
||||
rval = true;
|
||||
}
|
||||
@ -695,10 +705,9 @@ bool MariaDBServer::failover_wait_relay_log(int seconds_remaining, json_t** err_
|
||||
{
|
||||
reason = "Old master sent new event(s)";
|
||||
}
|
||||
else if (relay_log_events() < 0) // TODO: This is currently impossible
|
||||
else if (events < 0)
|
||||
{
|
||||
reason = "Invalid Gtid(s) (current_pos: " + m_gtid_current_pos.to_string() +
|
||||
", io_pos: " + m_slave_status[0].gtid_io_pos.to_string() + ")";
|
||||
reason = string_printf("Slave connection to '%s' was removed", master->name());
|
||||
}
|
||||
PRINT_MXS_JSON_ERROR(err_out, "Failover: %s while waiting for server '%s' to process relay log. "
|
||||
"Cancelling failover.", reason.c_str(), name());
|
||||
|
||||
Reference in New Issue
Block a user