MXS-1744 Take new Gtid-class into use

Also cleaned up mariadbserver a bit.
This commit is contained in:
Esa Korhonen
2018-03-29 14:07:51 +03:00
parent 36bea39b63
commit e43678bed9
8 changed files with 115 additions and 97 deletions

View File

@ -658,7 +658,7 @@ void MariaDBMonitor::monitor_database(MariaDBServer* serv_info)
/* If gtid domain exists and server is 10.0, update gtid:s */
if (m_master_gtid_domain >= 0 && serv_info->version == MYSQL_SERVER_VERSION_100)
{
serv_info->update_gtids(m_master_gtid_domain);
serv_info->update_gtids();
}
/* Check for MariaDB 10.x.x and get status for multi-master replication */
if (serv_info->version == MYSQL_SERVER_VERSION_100 || serv_info->version == MYSQL_SERVER_VERSION_55)
@ -693,7 +693,7 @@ void MariaDBMonitor::monitor_mysql_db(MariaDBServer* serv_info)
monitor_clear_pending_status(database, SERVER_SLAVE | SERVER_MASTER | SERVER_RELAY_MASTER |
SERVER_SLAVE_OF_EXTERNAL_MASTER);
if (serv_info->do_show_slave_status(m_master_gtid_domain))
if (serv_info->do_show_slave_status())
{
/* If all configured slaves are running set this node as slave */
if (serv_info->slave_configured && serv_info->n_slaves_running > 0 &&
@ -718,8 +718,8 @@ MariaDBServer* MariaDBMonitor::update_slave_info(MXS_MONITORED_SERVER* server)
MariaDBServer* info = get_server_info(server);
if (info->slave_status.slave_sql_running &&
info->update_replication_settings() &&
info->update_gtids(m_master_gtid_domain) &&
info->do_show_slave_status(m_master_gtid_domain))
info->update_gtids() &&
info->do_show_slave_status())
{
return info;
}

View File

@ -156,7 +156,7 @@ bool MariaDBMonitor::manual_rejoin(SERVER* rejoin_server, json_t** output)
if (server_is_rejoin_suspect(mon_server, master_info, output))
{
if (master_info->update_gtids(m_master_gtid_domain))
if (master_info->update_gtids())
{
if (can_replicate_from(mon_server, server_info, master_info))
{
@ -433,7 +433,7 @@ bool MariaDBMonitor::get_joinable_servers(ServerVector* output)
bool comm_ok = true;
if (!suspects.empty())
{
if (master_info->update_gtids(m_master_gtid_domain))
if (master_info->update_gtids())
{
for (size_t i = 0; i < suspects.size(); i++)
{
@ -811,11 +811,10 @@ bool MariaDBMonitor::failover_wait_relay_log(MXS_MONITORED_SERVER* new_master, i
new_master->server->unique_name, master_info->relay_log_events());
thread_millisleep(1000); // Sleep for a while before querying server again.
// Todo: check server version before entering failover.
GtidTriplet old_gtid_io_pos = master_info->slave_status.gtid_io_pos;
Gtid old_gtid_io_pos = master_info->slave_status.gtid_io_pos;
// Update gtid:s first to make sure Gtid_IO_Pos is the more recent value.
// It doesn't matter here, but is a general rule.
query_ok = master_info->update_gtids(m_master_gtid_domain) &&
master_info->do_show_slave_status(m_master_gtid_domain);
query_ok = master_info->update_gtids() && master_info->do_show_slave_status();
io_pos_stable = (old_gtid_io_pos == master_info->slave_status.gtid_io_pos);
}
@ -903,7 +902,7 @@ bool MariaDBMonitor::switchover_demote_master(MXS_MONITORED_SERVER* current_mast
if (!query_error)
{
query = "";
if (info->update_gtids(m_master_gtid_domain))
if (info->update_gtids())
{
success = true;
}
@ -962,7 +961,7 @@ bool MariaDBMonitor::switchover_demote_master(MXS_MONITORED_SERVER* current_mast
* @param err_out json object for error printing. Can be NULL.
* @return True, if target gtid was reached within allotted time for all servers
*/
bool MariaDBMonitor::switchover_wait_slaves_catchup(const ServerVector& slaves, const GtidTriplet& gtid,
bool MariaDBMonitor::switchover_wait_slaves_catchup(const ServerVector& slaves, const Gtid& gtid,
int total_timeout, int read_timeout, json_t** err_out)
{
bool success = true;
@ -1003,7 +1002,7 @@ bool MariaDBMonitor::switchover_wait_slaves_catchup(const ServerVector& slaves,
* @param err_out json object for error printing. Can be NULL.
* @return True, if target gtid was reached within allotted time
*/
bool MariaDBMonitor::switchover_wait_slave_catchup(MXS_MONITORED_SERVER* slave, const GtidTriplet& gtid,
bool MariaDBMonitor::switchover_wait_slave_catchup(MXS_MONITORED_SERVER* slave, const Gtid& gtid,
int total_timeout, int read_timeout,
json_t** err_out)
{
@ -1071,12 +1070,12 @@ bool MariaDBMonitor::wait_cluster_stabilization(MXS_MONITORED_SERVER* new_master
MariaDBServer* new_master_info = get_server_info(new_master);
if (mxs_mysql_query(new_master->con, "FLUSH TABLES;") == 0 &&
new_master_info->update_gtids(m_master_gtid_domain))
new_master_info->update_gtids())
{
int query_fails = 0;
int repl_fails = 0;
int successes = 0;
const GtidTriplet target = new_master_info->gtid_current_pos;
const Gtid& target = new_master_info->gtid_current_pos;
ServerVector wait_list = slaves; // Check all the servers in the list
bool first_round = true;
bool time_is_up = false;
@ -1094,8 +1093,8 @@ bool MariaDBMonitor::wait_cluster_stabilization(MXS_MONITORED_SERVER* new_master
{
MXS_MONITORED_SERVER* slave = wait_list[i];
MariaDBServer* slave_info = get_server_info(slave);
if (slave_info->update_gtids(m_master_gtid_domain) &&
slave_info->do_show_slave_status(m_master_gtid_domain))
if (slave_info->update_gtids() &&
slave_info->do_show_slave_status())
{
if (!slave_info->slave_status.last_error.empty())
{
@ -1106,7 +1105,8 @@ bool MariaDBMonitor::wait_cluster_stabilization(MXS_MONITORED_SERVER* new_master
wait_list.erase(wait_list.begin() + i);
repl_fails++;
}
else if (slave_info->gtid_current_pos.sequence >= target.sequence)
else if (Gtid::events_ahead(target, slave_info->gtid_current_pos,
Gtid::MISSING_DOMAIN_IGNORE) == 0)
{
// This slave has reached the same gtid as master, remove from list
wait_list.erase(wait_list.begin() + i);
@ -1245,7 +1245,8 @@ MXS_MONITORED_SERVER* MariaDBMonitor::select_new_master(ServerVector* slaves_out
else if (cand_info->check_replication_settings())
{
// If no new master yet, accept any valid candidate. Otherwise check.
if (current_best == NULL || is_candidate_better(current_best_info, cand_info))
if (current_best == NULL ||
is_candidate_better(current_best_info, cand_info, m_master_gtid_domain))
{
// The server has been selected for promotion, for now.
current_best = cand;
@ -1278,7 +1279,7 @@ MXS_MONITORED_SERVER* MariaDBMonitor::select_new_master(ServerVector* slaves_out
MXS_WARNING(EXCLUDED_ONLY_CAND, excluded_name);
break;
}
else if (is_candidate_better(current_best_info, excluded_info))
else if (is_candidate_better(current_best_info, excluded_info, m_master_gtid_domain))
{
// Print a warning if this server is actually a better candidate than the previous
// best.
@ -1322,15 +1323,17 @@ bool MariaDBMonitor::server_is_excluded(const MXS_MONITORED_SERVER* server)
*
* @param current_best_info Server info of current best choice
* @param candidate_info Server info of new candidate
* @param gtid_domain Which domain to compare
* @return True if candidate is better
*/
bool MariaDBMonitor::is_candidate_better(const MariaDBServer* current_best_info,
const MariaDBServer* candidate_info)
const MariaDBServer* candidate_info, uint32_t gtid_domain)
{
uint64_t cand_io = candidate_info->slave_status.gtid_io_pos.sequence;
uint64_t cand_processed = candidate_info->gtid_current_pos.sequence;
uint64_t curr_io = current_best_info->slave_status.gtid_io_pos.sequence;
uint64_t curr_processed = current_best_info->gtid_current_pos.sequence;
uint64_t cand_io = candidate_info->slave_status.gtid_io_pos.get_triplet(gtid_domain).sequence;
uint64_t cand_processed = candidate_info->gtid_current_pos.get_triplet(gtid_domain).sequence;
uint64_t curr_io = current_best_info->slave_status.gtid_io_pos.get_triplet(gtid_domain).sequence;
uint64_t curr_processed = current_best_info->gtid_current_pos.get_triplet(gtid_domain).sequence;
bool cand_updates = candidate_info->rpl_settings.log_slave_updates;
bool curr_updates = current_best_info->rpl_settings.log_slave_updates;
bool is_better = false;
@ -1496,19 +1499,9 @@ bool MariaDBMonitor::can_replicate_from(MXS_MONITORED_SERVER* slave,
MariaDBServer* slave_info, MariaDBServer* master_info)
{
bool rval = false;
if (slave_info->update_gtids(m_master_gtid_domain))
if (slave_info->update_gtids())
{
GtidTriplet slave_gtid = slave_info->gtid_current_pos;
GtidTriplet master_gtid = master_info->gtid_binlog_pos;
// The following are not sufficient requirements for replication to work, they only cover the basics.
// If the servers have diverging histories, the redirection will seem to succeed but the slave IO
// thread will stop in error.
if (slave_gtid.server_id != SERVER_ID_UNKNOWN && master_gtid.server_id != SERVER_ID_UNKNOWN &&
slave_gtid.domain == master_gtid.domain &&
slave_gtid.sequence <= master_info->gtid_current_pos.sequence)
{
rval = true;
}
rval = slave_info->gtid_current_pos.can_replicate_from(master_info->gtid_binlog_pos);
}
return rval;
}
@ -1630,7 +1623,7 @@ bool MariaDBMonitor::uses_gtid(MXS_MONITORED_SERVER* mon_server, json_t** error_
{
bool rval = false;
const MariaDBServer* info = get_server_info(mon_server);
if (info->slave_status.gtid_io_pos.server_id == SERVER_ID_UNKNOWN)
if (info->slave_status.gtid_io_pos.empty())
{
string slave_not_gtid_msg = string("Slave server ") + mon_server->server->unique_name +
" is not using gtid replication.";

View File

@ -269,17 +269,17 @@ void MariaDBMonitor::diagnostics(DCB *dcb) const
dcb_printf(dcb, "Master binlog position: %lu\n",
serv_info->slave_status.read_master_log_pos);
}
if (serv_info->gtid_current_pos.server_id != SERVER_ID_UNKNOWN)
if (!serv_info->gtid_current_pos.empty())
{
dcb_printf(dcb, "Gtid current position: %s\n",
serv_info->gtid_current_pos.to_string().c_str());
}
if (serv_info->gtid_binlog_pos.server_id != SERVER_ID_UNKNOWN)
if (!serv_info->gtid_binlog_pos.empty())
{
dcb_printf(dcb, "Gtid binlog position: %s\n",
serv_info->gtid_current_pos.to_string().c_str());
}
if (serv_info->slave_status.gtid_io_pos.server_id != SERVER_ID_UNKNOWN)
if (!serv_info->slave_status.gtid_io_pos.empty())
{
dcb_printf(dcb, "Gtid slave IO position: %s\n",
serv_info->slave_status.gtid_io_pos.to_string().c_str());

View File

@ -197,9 +197,9 @@ private:
bool failover_wait_relay_log(MXS_MONITORED_SERVER* new_master, int seconds_remaining, json_t** err_out);
bool switchover_demote_master(MXS_MONITORED_SERVER* current_master, MariaDBServer* info,
json_t** err_out);
bool switchover_wait_slaves_catchup(const ServerVector& slaves, const GtidTriplet& gtid, int total_timeout,
bool switchover_wait_slaves_catchup(const ServerVector& slaves, const Gtid& gtid, int total_timeout,
int read_timeout, json_t** err_out);
bool switchover_wait_slave_catchup(MXS_MONITORED_SERVER* slave, const GtidTriplet& gtid,
bool switchover_wait_slave_catchup(MXS_MONITORED_SERVER* slave, const Gtid& gtid,
int total_timeout, int read_timeout, json_t** err_out);
bool wait_cluster_stabilization(MXS_MONITORED_SERVER* new_master, const ServerVector& slaves,
int seconds_remaining);
@ -207,7 +207,8 @@ private:
bool promote_new_master(MXS_MONITORED_SERVER* new_master, json_t** err_out);
MXS_MONITORED_SERVER* select_new_master(ServerVector* slaves_out, json_t** err_out);
bool server_is_excluded(const MXS_MONITORED_SERVER* server);
bool is_candidate_better(const MariaDBServer* current_best_info, const MariaDBServer* candidate_info);
bool is_candidate_better(const MariaDBServer* current_best_info, const MariaDBServer* candidate_info,
uint32_t gtid_domain);
MariaDBServer* update_slave_info(MXS_MONITORED_SERVER* server);
void init_server_info();
bool slave_receiving_events();

View File

@ -44,14 +44,11 @@ MariaDBServer::MariaDBServer(MXS_MONITORED_SERVER* monitored_server)
int64_t MariaDBServer::relay_log_events()
{
if (slave_status.gtid_io_pos.server_id != SERVER_ID_UNKNOWN &&
gtid_current_pos.server_id != SERVER_ID_UNKNOWN &&
slave_status.gtid_io_pos.domain == gtid_current_pos.domain &&
slave_status.gtid_io_pos.sequence >= gtid_current_pos.sequence)
{
return slave_status.gtid_io_pos.sequence - gtid_current_pos.sequence;
}
return -1;
/* The events_ahead-call below ignores domains where current_pos is ahead of io_pos. This situation is
* rare but is possible (I guess?) if the server is replicating a domain from multiple masters
* and decides to process events from one relay log before getting new events to the other. In
* any case, such events are obsolete and the server can be considered to have processed such logs. */
return Gtid::events_ahead(slave_status.gtid_io_pos, gtid_current_pos, Gtid::MISSING_DOMAIN_LHS_ADD);
}
std::auto_ptr<QueryResult> MariaDBServer::execute_query(const string& query)
@ -70,7 +67,7 @@ std::auto_ptr<QueryResult> MariaDBServer::execute_query(const string& query)
return rval;
}
bool MariaDBServer::do_show_slave_status(int64_t gtid_domain)
bool MariaDBServer::do_show_slave_status()
{
/** Column positions for SHOW SLAVE STATUS */
const size_t MYSQL55_STATUS_MASTER_LOG_POS = 5;
@ -79,7 +76,6 @@ bool MariaDBServer::do_show_slave_status(int64_t gtid_domain)
const size_t MYSQL55_STATUS_SQL_RUNNING = 11;
const size_t MYSQL55_STATUS_MASTER_ID = 39;
bool rval = true;
unsigned int columns;
int i_slave_io_running, i_slave_sql_running, i_read_master_log_pos, i_master_server_id, i_master_log_file;
int i_last_io_errno, i_last_io_error, i_last_sql_error, i_slave_rec_hbs, i_slave_hb_period;
@ -199,16 +195,15 @@ bool MariaDBServer::do_show_slave_status(int64_t gtid_domain)
heartbeat_period = result->get_uint(i_slave_hb_period);
}
string using_gtid = result->get_string(i_using_gtid);
if (gtid_domain >= 0 && (using_gtid == "Current_Pos" || using_gtid == "Slave_Pos"))
string gtid_io_pos = result->get_string(i_gtid_io_pos);
if (!gtid_io_pos.empty() &&
(using_gtid == "Current_Pos" || using_gtid == "Slave_Pos"))
{
string gtid_io_pos = result->get_string(i_gtid_io_pos);
slave_status.gtid_io_pos = !gtid_io_pos.empty() ?
GtidTriplet(gtid_io_pos.c_str(), gtid_domain) :
GtidTriplet();
slave_status.gtid_io_pos = Gtid::from_string(gtid_io_pos);
}
else
{
slave_status.gtid_io_pos = GtidTriplet();
slave_status.gtid_io_pos = Gtid();
}
}
}
@ -228,23 +223,44 @@ bool MariaDBServer::do_show_slave_status(int64_t gtid_domain)
slave_status.master_server_id = master_server_id;
n_slaves_configured = nconfigured;
n_slaves_running = nrunning;
return rval;
return true;
}
bool MariaDBServer::update_gtids(int64_t gtid_domain)
bool MariaDBServer::update_gtids()
{
ss_dassert(gtid_domain >= 0);
static const string query = "SELECT @@gtid_current_pos, @@gtid_binlog_pos;";
const int ind_current_pos = 0;
const int ind_binlog_pos = 1;
const int i_current_pos = 0;
const int i_binlog_pos = 1;
bool rval = false;
auto result = execute_query(query);
if (result.get() != NULL && result->next_row())
{
gtid_current_pos = result->get_gtid(ind_current_pos, gtid_domain);
gtid_binlog_pos = result->get_gtid(ind_binlog_pos, gtid_domain);
rval = true;
auto current_str = result->get_string(i_current_pos);
auto binlog_str = result->get_string(i_binlog_pos);
bool current_ok = false;
bool binlog_ok = false;
if (current_str.empty())
{
gtid_current_pos = Gtid();
}
else
{
gtid_current_pos = Gtid::from_string(current_str);
current_ok = !gtid_current_pos.empty();
}
if (binlog_str.empty())
{
gtid_binlog_pos = Gtid();
}
else
{
gtid_binlog_pos = Gtid::from_string(binlog_str);
binlog_ok = !gtid_binlog_pos.empty();
}
rval = (current_ok && binlog_ok);
}
return rval;
}
@ -276,23 +292,23 @@ void MariaDBServer::read_server_variables()
columns = 3;
}
int ind_id = 0;
int ind_ro = 1;
int ind_domain = 2;
int i_id = 0;
int i_ro = 1;
int i_domain = 2;
auto result = execute_query(query);
if (result.get() != NULL && result->next_row())
{
int64_t server_id_parsed = result->get_uint(ind_id);
int64_t server_id_parsed = result->get_uint(i_id);
if (server_id_parsed < 0)
{
server_id_parsed = SERVER_ID_UNKNOWN;
}
database->server->node_id = server_id_parsed;
server_id = server_id_parsed;
read_only = result->get_bool(ind_ro);
read_only = result->get_bool(i_ro);
if (columns == 3)
{
gtid_domain_id = result->get_uint(ind_domain);
gtid_domain_id = result->get_uint(i_domain);
}
}
}

View File

@ -47,8 +47,7 @@ public:
* reading from. */
uint64_t read_master_log_pos; /**< Position up to which the I/O thread has read in the current master
* binary log file. */
GtidTriplet gtid_io_pos; /**< Gtid I/O position of the slave thread. Only shows the triplet with
* the current master domain. */
Gtid gtid_io_pos; /**< Gtid I/O position of the slave thread. */
std::string last_error; /**< Last IO or SQL error encountered. */
SlaveStatusInfo();
@ -92,21 +91,17 @@ public:
time_t latest_event; /**< Time when latest event was received from the master */
int64_t gtid_domain_id; /**< The value of gtid_domain_id, the domain which is used for
* new non-replicated events. */
GtidTriplet gtid_current_pos; /**< Gtid of latest event. Only shows the triplet
* with the current master domain. */
GtidTriplet gtid_binlog_pos; /**< Gtid of latest event written to binlog. Only shows
* the triplet with the current master domain. */
Gtid gtid_current_pos; /**< Gtid of latest event. */
Gtid gtid_binlog_pos; /**< Gtid of latest event written to binlog. */
SlaveStatusInfo slave_status; /**< Data returned from SHOW SLAVE STATUS */
ReplicationSettings rpl_settings; /**< Miscellaneous replication related settings */
MariaDBServer(MXS_MONITORED_SERVER* monitored_server);
/**
* Calculate how many events are left in the relay log. If gtid_current_pos is ahead of Gtid_IO_Pos,
* or a server_id is unknown, an error value is returned.
* Calculate how many events are left in the relay log.
*
* @return Number of events in relay log according to latest queried info. A negative value signifies
* an error in the gtid-values.
* @return Number of events in relay log according to latest queried info.
*/
int64_t relay_log_events();
@ -126,16 +121,14 @@ public:
* @param gtid_domain Which gtid domain should be parsed.
* @return True on success
*/
bool do_show_slave_status(int64_t gtid_domain);
bool do_show_slave_status();
/**
* Query gtid_current_pos and gtid_binlog_pos and save the values to the server.
* Only the given domain is parsed.
*
* @param gtid_domain Which gtid domain should be parsed
* @return True if successful
*/
bool update_gtids(int64_t gtid_domain);
bool update_gtids();
/**
* Query a few miscellaneous replication settings.

View File

@ -446,9 +446,23 @@ void GtidTriplet::parse_triplet(const char* str)
ss_dassert(rv == 3);
}
string GtidTriplet::generate_master_gtid_wait_cmd(double timeout) const
string Gtid::generate_master_gtid_wait_cmd(double timeout) const
{
std::stringstream query_ss;
query_ss << "SELECT MASTER_GTID_WAIT(\"" << to_string() << "\", " << timeout << ");";
return query_ss.str();
}
GtidTriplet Gtid::get_triplet(uint32_t domain) const
{
GtidTriplet rval;
// Make a dummy triplet for the domain search
GtidTriplet search_val(domain, -1, 0);
auto found = std::lower_bound(m_triplets.begin(), m_triplets.end(), search_val,
GtidTriplet::compare_domains);
if (found != m_triplets.end() && found->domain == domain)
{
rval = *found;
}
return rval;
}

View File

@ -109,14 +109,6 @@ public:
std::string to_string() const;
/**
* Generate a MASTER_GTID_WAIT()-query to this gtid.
*
* @param timeout Maximum wait time in seconds
* @return The query
*/
std::string generate_master_gtid_wait_cmd(double timeout) const;
/**
* Comparator, used when sorting by domain id.
*
@ -205,11 +197,20 @@ public:
static uint64_t events_ahead(const Gtid& lhs, const Gtid& rhs,
substraction_mode_t domain_substraction_mode);
/**
* Generate a MASTER_GTID_WAIT()-query to this gtid.
*
* @param timeout Maximum wait time in seconds
* @return The query
*/
std::string generate_master_gtid_wait_cmd(double timeout) const;
GtidTriplet get_triplet(uint32_t domain) const;
private:
std::vector<GtidTriplet> m_triplets;
};
/**
* Helper class for simplifying working with resultsets. Used in MariaDBServer.
*/