MXS-2012 Write replication lag to SERVER

Allows routers to read the value.
This commit is contained in:
Esa Korhonen
2018-08-20 15:25:01 +03:00
parent 44a57dbefd
commit 03cefcc4ac
7 changed files with 39 additions and 19 deletions

View File

@ -406,6 +406,7 @@ MariaDBServer* MariaDBMonitor::find_master_inside_cycle(ServerArray& cycle_membe
/**
* Assign replication role status bits to the servers in the cluster. Starts from the cluster master server.
* Also updates replication lag.
*/
void MariaDBMonitor::assign_server_roles()
{
@ -416,6 +417,7 @@ void MariaDBMonitor::assign_server_roles()
for (auto server : m_servers)
{
server->clear_status(remove_bits);
server->m_replication_lag = MXS_RLAG_UNDEFINED;
}
// Check the the master node, label it as the [Master] if
@ -425,6 +427,8 @@ void MariaDBMonitor::assign_server_roles()
{
if (m_master->is_running())
{
// Master gets replication lag 0 even if it's replicating from an external server.
m_master->m_replication_lag = 0;
if (m_master->is_read_only())
{
// Special case: read_only is ON on a running master but there is no alternative master.
@ -464,7 +468,7 @@ void MariaDBMonitor::assign_server_roles()
/**
* Check if the servers replicating from the given node qualify for [Slave] and mark them. Continue the
* search to any found slaves.
* search to any found slaves. Also updates replication lag.
*
* @param start_node The root master node where the search begins. The node itself is not marked [Slave].
*/
@ -557,6 +561,16 @@ void MariaDBMonitor::assign_slave_and_relay_master(MariaDBServer* start_node)
if (slave->is_running())
{
slave->set_status(SERVER_SLAVE);
// Write the replication lag for this slave. It may have multiple slave connections,
// in which case take the smallest value. This only counts the slave connections
// leading to the master or a relay.
int curr_rlag = slave->m_replication_lag;
int new_rlag = sstatus->seconds_behind_master;
if (new_rlag != MXS_RLAG_UNDEFINED &&
(curr_rlag == MXS_RLAG_UNDEFINED || new_rlag < curr_rlag))
{
slave->m_replication_lag = new_rlag;
}
}
}
}

View File

@ -468,9 +468,11 @@ void MariaDBMonitor::tick()
// Update shared status. The next functions read the shared status. TODO: change the following
// functions to read "pending_status" instead.
for (auto mon_srv = m_monitor->monitored_servers; mon_srv; mon_srv = mon_srv->next)
for (auto server : m_servers)
{
mon_srv->server->status = mon_srv->pending_status;
SERVER* srv = server->m_server_base->server;
srv->rlag = server->m_replication_lag;
srv->status = server->m_server_base->pending_status;
}
log_master_changes();

View File

@ -46,6 +46,7 @@ MariaDBServer::MariaDBServer(MXS_MONITORED_SERVER* monitored_server, int config_
, m_latest_event(time(NULL))
, m_gtid_domain_id(GTID_DOMAIN_UNKNOWN)
, m_topology_changed(true)
, m_replication_lag(MXS_RLAG_UNDEFINED)
, m_print_update_errormsg(true)
{
ss_dassert(monitored_server);
@ -188,7 +189,11 @@ bool MariaDBServer::do_show_slave_status(string* errmsg_out)
SlaveStatus::slave_io_from_string(result->get_string(i_slave_io_running));
sstatus_row.slave_sql_running = (result->get_string(i_slave_sql_running) == "Yes");
sstatus_row.master_server_id = result->get_uint(i_master_server_id);
sstatus_row.seconds_behind_master = result->get_uint(i_seconds_behind_master);
auto rlag = result->get_uint(i_seconds_behind_master);
// If slave connection is stopped, the value given by the backend is null -> -1.
sstatus_row.seconds_behind_master = (rlag < 0) ? MXS_RLAG_UNDEFINED :
(rlag > INT_MAX) ? INT_MAX : rlag;
if (sstatus_row.slave_io_running == SlaveStatus::SLAVE_IO_YES && sstatus_row.slave_sql_running)
{
@ -1175,7 +1180,7 @@ string SlaveStatus::to_string() const
slave_sql_running ? "Yes" : "No");
string rval = string_printf(
" Host: %22s, IO/SQL running: %7s, Master ID: %4" PRId64 ", Gtid_IO_Pos: %s, R.Lag: %" PRId64,
" Host: %22s, IO/SQL running: %7s, Master ID: %4" PRId64 ", Gtid_IO_Pos: %s, R.Lag: %d",
host_port.c_str(), running_states.c_str(), master_server_id,
gtid_io_pos.to_string().c_str(), seconds_behind_master);
return rval;

View File

@ -53,7 +53,8 @@ public:
bool slave_sql_running = false; /* Slave SQL thread running state, true if "Yes" */
GtidList gtid_io_pos; /* Gtid I/O position of the slave thread. */
std::string last_error; /* Last IO or SQL error encountered. */
int64_t seconds_behind_master = 0; /* How much behind the slave is. */
int seconds_behind_master = MXS_RLAG_UNDEFINED; /* How much behind the slave is. */
std::string to_string() const;
static slave_io_running_t slave_io_from_string(const std::string& str);
static std::string slave_io_to_string(slave_io_running_t slave_io);
@ -151,6 +152,8 @@ public:
bool m_topology_changed; /**< Has anything that could affect replication topology changed
* this iteration? Causes: server id, slave connections,
* read-only. */
int m_replication_lag; /**< Replication lag of the server. Used during calculation so
* that the actual SERVER struct is only written to once. */
NodeData m_node; /**< Replication topology data */
SlaveStatusArray m_slave_status; /**< Data returned from SHOW SLAVE STATUS */
ReplicationSettings m_rpl_settings; /**< Miscellaneous replication related settings. These are not