MXS-2012 Write replication lag to SERVER

Allows routers to read the value.
This commit is contained in:
Esa Korhonen 2018-08-20 15:25:01 +03:00
parent 44a57dbefd
commit 03cefcc4ac
7 changed files with 39 additions and 19 deletions

View File

@ -49,6 +49,10 @@ const int MAINTENANCE_NO_CHANGE = 0;
const int MAINTENANCE_ON = 100;
const int MAINTENANCE_FLAG_NOCHECK = 0;
const int MAINTENANCE_FLAG_CHECK = -1;
// Default replication lag value
const int MXS_RLAG_UNDEFINED = -1;
/**
* The server parameters used for weighting routing decissions
*/
@ -158,12 +162,6 @@ typedef struct server
MxsDiskSpaceThreshold* disk_space_threshold; /**< Disk space thresholds */
} SERVER;
enum
{
MAX_RLAG_NOT_AVAILABLE = -1,
MAX_RLAG_UNDEFINED = -2
};
/**
* Status bits in the SERVER->status member, which describes the general state of a server. Although the
* individual bits are independent, not all combinations make sense or are used. The bitfield is 64bits wide.

View File

@ -169,7 +169,7 @@ SERVER* server_alloc(const char *name, MXS_CONFIG_PARAMETER* params)
server->version = 0;
server->server_type = SERVER_TYPE_MARIADB;
server->node_id = -1;
server->rlag = MAX_RLAG_UNDEFINED;
server->rlag = MXS_RLAG_UNDEFINED;
server->node_ts = 0;
server->master_id = -1;
server->master_err_is_logged = false;

View File

@ -406,6 +406,7 @@ MariaDBServer* MariaDBMonitor::find_master_inside_cycle(ServerArray& cycle_membe
/**
* Assign replication role status bits to the servers in the cluster. Starts from the cluster master server.
* Also updates replication lag.
*/
void MariaDBMonitor::assign_server_roles()
{
@ -416,6 +417,7 @@ void MariaDBMonitor::assign_server_roles()
for (auto server : m_servers)
{
server->clear_status(remove_bits);
server->m_replication_lag = MXS_RLAG_UNDEFINED;
}
// Check the the master node, label it as the [Master] if
@ -425,6 +427,8 @@ void MariaDBMonitor::assign_server_roles()
{
if (m_master->is_running())
{
// Master gets replication lag 0 even if it's replicating from an external server.
m_master->m_replication_lag = 0;
if (m_master->is_read_only())
{
// Special case: read_only is ON on a running master but there is no alternative master.
@ -464,7 +468,7 @@ void MariaDBMonitor::assign_server_roles()
/**
* Check if the servers replicating from the given node qualify for [Slave] and mark them. Continue the
* search to any found slaves.
* search to any found slaves. Also updates replication lag.
*
* @param start_node The root master node where the search begins. The node itself is not marked [Slave].
*/
@ -557,6 +561,16 @@ void MariaDBMonitor::assign_slave_and_relay_master(MariaDBServer* start_node)
if (slave->is_running())
{
slave->set_status(SERVER_SLAVE);
// Write the replication lag for this slave. It may have multiple slave connections,
// in which case take the smallest value. This only counts the slave connections
// leading to the master or a relay.
int curr_rlag = slave->m_replication_lag;
int new_rlag = sstatus->seconds_behind_master;
if (new_rlag != MXS_RLAG_UNDEFINED &&
(curr_rlag == MXS_RLAG_UNDEFINED || new_rlag < curr_rlag))
{
slave->m_replication_lag = new_rlag;
}
}
}
}

View File

@ -468,9 +468,11 @@ void MariaDBMonitor::tick()
// Update shared status. The next functions read the shared status. TODO: change the following
// functions to read "pending_status" instead.
for (auto mon_srv = m_monitor->monitored_servers; mon_srv; mon_srv = mon_srv->next)
for (auto server : m_servers)
{
mon_srv->server->status = mon_srv->pending_status;
SERVER* srv = server->m_server_base->server;
srv->rlag = server->m_replication_lag;
srv->status = server->m_server_base->pending_status;
}
log_master_changes();

View File

@ -46,6 +46,7 @@ MariaDBServer::MariaDBServer(MXS_MONITORED_SERVER* monitored_server, int config_
, m_latest_event(time(NULL))
, m_gtid_domain_id(GTID_DOMAIN_UNKNOWN)
, m_topology_changed(true)
, m_replication_lag(MXS_RLAG_UNDEFINED)
, m_print_update_errormsg(true)
{
ss_dassert(monitored_server);
@ -188,7 +189,11 @@ bool MariaDBServer::do_show_slave_status(string* errmsg_out)
SlaveStatus::slave_io_from_string(result->get_string(i_slave_io_running));
sstatus_row.slave_sql_running = (result->get_string(i_slave_sql_running) == "Yes");
sstatus_row.master_server_id = result->get_uint(i_master_server_id);
sstatus_row.seconds_behind_master = result->get_uint(i_seconds_behind_master);
auto rlag = result->get_uint(i_seconds_behind_master);
// If slave connection is stopped, the value given by the backend is null -> -1.
sstatus_row.seconds_behind_master = (rlag < 0) ? MXS_RLAG_UNDEFINED :
(rlag > INT_MAX) ? INT_MAX : rlag;
if (sstatus_row.slave_io_running == SlaveStatus::SLAVE_IO_YES && sstatus_row.slave_sql_running)
{
@ -1175,7 +1180,7 @@ string SlaveStatus::to_string() const
slave_sql_running ? "Yes" : "No");
string rval = string_printf(
" Host: %22s, IO/SQL running: %7s, Master ID: %4" PRId64 ", Gtid_IO_Pos: %s, R.Lag: %" PRId64,
" Host: %22s, IO/SQL running: %7s, Master ID: %4" PRId64 ", Gtid_IO_Pos: %s, R.Lag: %d",
host_port.c_str(), running_states.c_str(), master_server_id,
gtid_io_pos.to_string().c_str(), seconds_behind_master);
return rval;

View File

@ -53,7 +53,8 @@ public:
bool slave_sql_running = false; /* Slave SQL thread running state, true if "Yes" */
GtidList gtid_io_pos; /* Gtid I/O position of the slave thread. */
std::string last_error; /* Last IO or SQL error encountered. */
int64_t seconds_behind_master = 0; /* How much behind the slave is. */
int seconds_behind_master = MXS_RLAG_UNDEFINED; /* How much behind the slave is. */
std::string to_string() const;
static slave_io_running_t slave_io_from_string(const std::string& str);
static std::string slave_io_to_string(slave_io_running_t slave_io);
@ -151,6 +152,8 @@ public:
bool m_topology_changed; /**< Has anything that could affect replication topology changed
* this iteration? Causes: server id, slave connections,
* read-only. */
int m_replication_lag; /**< Replication lag of the server. Used during calculation so
* that the actual SERVER struct is only written to once. */
NodeData m_node; /**< Replication topology data */
SlaveStatusArray m_slave_status; /**< Data returned from SHOW SLAVE STATUS */
ReplicationSettings m_rpl_settings; /**< Miscellaneous replication related settings. These are not

View File

@ -552,9 +552,7 @@ bool RWSplitSession::route_session_write(GWBUF *querybuf, uint8_t command, uint3
*/
static inline bool rpl_lag_is_ok(SRWBackend& backend, int max_rlag)
{
return max_rlag == MAX_RLAG_UNDEFINED ||
(backend->server()->rlag != MAX_RLAG_NOT_AVAILABLE &&
backend->server()->rlag <= max_rlag);
return max_rlag == MXS_RLAG_UNDEFINED || backend->server()->rlag <= max_rlag;
}
SRWBackend RWSplitSession::get_hinted_backend(char *name)
@ -735,7 +733,7 @@ int RWSplitSession::get_max_replication_lag()
SRWBackend RWSplitSession::handle_hinted_target(GWBUF *querybuf, route_target_t route_target)
{
char *named_server = NULL;
int rlag_max = MAX_RLAG_UNDEFINED;
int rlag_max = MXS_RLAG_UNDEFINED;
HINT* hint = querybuf->hint;
@ -766,7 +764,7 @@ SRWBackend RWSplitSession::handle_hinted_target(GWBUF *querybuf, route_target_t
hint = hint->next;
} /*< while */
if (rlag_max == MAX_RLAG_UNDEFINED) /*< no rlag max hint, use config */
if (rlag_max == MXS_RLAG_UNDEFINED) /*< no rlag max hint, use config */
{
rlag_max = get_max_replication_lag();
}
@ -953,7 +951,7 @@ bool RWSplitSession::should_migrate_trx(SRWBackend& target)
*/
bool RWSplitSession::handle_master_is_target(SRWBackend* dest)
{
SRWBackend target = get_target_backend(BE_MASTER, NULL, MAX_RLAG_UNDEFINED);
SRWBackend target = get_target_backend(BE_MASTER, NULL, MXS_RLAG_UNDEFINED);
bool succp = true;
if (should_replace_master(target))