diff --git a/include/maxscale/server.h b/include/maxscale/server.h index 2be3bf4c5..ddc8b18a9 100644 --- a/include/maxscale/server.h +++ b/include/maxscale/server.h @@ -49,6 +49,10 @@ const int MAINTENANCE_NO_CHANGE = 0; const int MAINTENANCE_ON = 100; const int MAINTENANCE_FLAG_NOCHECK = 0; const int MAINTENANCE_FLAG_CHECK = -1; + +// Default replication lag value +const int MXS_RLAG_UNDEFINED = -1; + /** * The server parameters used for weighting routing decissions */ @@ -158,12 +162,6 @@ typedef struct server MxsDiskSpaceThreshold* disk_space_threshold; /**< Disk space thresholds */ } SERVER; -enum -{ - MAX_RLAG_NOT_AVAILABLE = -1, - MAX_RLAG_UNDEFINED = -2 -}; - /** * Status bits in the SERVER->status member, which describes the general state of a server. Although the * individual bits are independent, not all combinations make sense or are used. The bitfield is 64bits wide. diff --git a/server/core/server.cc b/server/core/server.cc index a823a6760..53cf2c256 100644 --- a/server/core/server.cc +++ b/server/core/server.cc @@ -169,7 +169,7 @@ SERVER* server_alloc(const char *name, MXS_CONFIG_PARAMETER* params) server->version = 0; server->server_type = SERVER_TYPE_MARIADB; server->node_id = -1; - server->rlag = MAX_RLAG_UNDEFINED; + server->rlag = MXS_RLAG_UNDEFINED; server->node_ts = 0; server->master_id = -1; server->master_err_is_logged = false; diff --git a/server/modules/monitor/mariadbmon/cluster_discovery.cc b/server/modules/monitor/mariadbmon/cluster_discovery.cc index f8b0d601d..af58b232c 100644 --- a/server/modules/monitor/mariadbmon/cluster_discovery.cc +++ b/server/modules/monitor/mariadbmon/cluster_discovery.cc @@ -406,6 +406,7 @@ MariaDBServer* MariaDBMonitor::find_master_inside_cycle(ServerArray& cycle_membe /** * Assign replication role status bits to the servers in the cluster. Starts from the cluster master server. + * Also updates replication lag. */ void MariaDBMonitor::assign_server_roles() { @@ -416,6 +417,7 @@ void MariaDBMonitor::assign_server_roles() for (auto server : m_servers) { server->clear_status(remove_bits); + server->m_replication_lag = MXS_RLAG_UNDEFINED; } // Check the the master node, label it as the [Master] if @@ -425,6 +427,8 @@ void MariaDBMonitor::assign_server_roles() { if (m_master->is_running()) { + // Master gets replication lag 0 even if it's replicating from an external server. + m_master->m_replication_lag = 0; if (m_master->is_read_only()) { // Special case: read_only is ON on a running master but there is no alternative master. @@ -464,7 +468,7 @@ void MariaDBMonitor::assign_server_roles() /** * Check if the servers replicating from the given node qualify for [Slave] and mark them. Continue the - * search to any found slaves. + * search to any found slaves. Also updates replication lag. * * @param start_node The root master node where the search begins. The node itself is not marked [Slave]. */ @@ -557,6 +561,16 @@ void MariaDBMonitor::assign_slave_and_relay_master(MariaDBServer* start_node) if (slave->is_running()) { slave->set_status(SERVER_SLAVE); + // Write the replication lag for this slave. It may have multiple slave connections, + // in which case take the smallest value. This only counts the slave connections + // leading to the master or a relay. + int curr_rlag = slave->m_replication_lag; + int new_rlag = sstatus->seconds_behind_master; + if (new_rlag != MXS_RLAG_UNDEFINED && + (curr_rlag == MXS_RLAG_UNDEFINED || new_rlag < curr_rlag)) + { + slave->m_replication_lag = new_rlag; + } } } } diff --git a/server/modules/monitor/mariadbmon/mariadbmon.cc b/server/modules/monitor/mariadbmon/mariadbmon.cc index f2e15ff76..6c6175534 100644 --- a/server/modules/monitor/mariadbmon/mariadbmon.cc +++ b/server/modules/monitor/mariadbmon/mariadbmon.cc @@ -468,9 +468,11 @@ void MariaDBMonitor::tick() // Update shared status. The next functions read the shared status. TODO: change the following // functions to read "pending_status" instead. - for (auto mon_srv = m_monitor->monitored_servers; mon_srv; mon_srv = mon_srv->next) + for (auto server : m_servers) { - mon_srv->server->status = mon_srv->pending_status; + SERVER* srv = server->m_server_base->server; + srv->rlag = server->m_replication_lag; + srv->status = server->m_server_base->pending_status; } log_master_changes(); diff --git a/server/modules/monitor/mariadbmon/mariadbserver.cc b/server/modules/monitor/mariadbmon/mariadbserver.cc index e16ee218d..56928d714 100644 --- a/server/modules/monitor/mariadbmon/mariadbserver.cc +++ b/server/modules/monitor/mariadbmon/mariadbserver.cc @@ -46,6 +46,7 @@ MariaDBServer::MariaDBServer(MXS_MONITORED_SERVER* monitored_server, int config_ , m_latest_event(time(NULL)) , m_gtid_domain_id(GTID_DOMAIN_UNKNOWN) , m_topology_changed(true) + , m_replication_lag(MXS_RLAG_UNDEFINED) , m_print_update_errormsg(true) { ss_dassert(monitored_server); @@ -188,7 +189,11 @@ bool MariaDBServer::do_show_slave_status(string* errmsg_out) SlaveStatus::slave_io_from_string(result->get_string(i_slave_io_running)); sstatus_row.slave_sql_running = (result->get_string(i_slave_sql_running) == "Yes"); sstatus_row.master_server_id = result->get_uint(i_master_server_id); - sstatus_row.seconds_behind_master = result->get_uint(i_seconds_behind_master); + + auto rlag = result->get_uint(i_seconds_behind_master); + // If slave connection is stopped, the value given by the backend is null -> -1. + sstatus_row.seconds_behind_master = (rlag < 0) ? MXS_RLAG_UNDEFINED : + (rlag > INT_MAX) ? INT_MAX : rlag; if (sstatus_row.slave_io_running == SlaveStatus::SLAVE_IO_YES && sstatus_row.slave_sql_running) { @@ -1175,7 +1180,7 @@ string SlaveStatus::to_string() const slave_sql_running ? "Yes" : "No"); string rval = string_printf( - " Host: %22s, IO/SQL running: %7s, Master ID: %4" PRId64 ", Gtid_IO_Pos: %s, R.Lag: %" PRId64, + " Host: %22s, IO/SQL running: %7s, Master ID: %4" PRId64 ", Gtid_IO_Pos: %s, R.Lag: %d", host_port.c_str(), running_states.c_str(), master_server_id, gtid_io_pos.to_string().c_str(), seconds_behind_master); return rval; diff --git a/server/modules/monitor/mariadbmon/mariadbserver.hh b/server/modules/monitor/mariadbmon/mariadbserver.hh index bfbb18147..11ed9d3e4 100644 --- a/server/modules/monitor/mariadbmon/mariadbserver.hh +++ b/server/modules/monitor/mariadbmon/mariadbserver.hh @@ -53,7 +53,8 @@ public: bool slave_sql_running = false; /* Slave SQL thread running state, true if "Yes" */ GtidList gtid_io_pos; /* Gtid I/O position of the slave thread. */ std::string last_error; /* Last IO or SQL error encountered. */ - int64_t seconds_behind_master = 0; /* How much behind the slave is. */ + int seconds_behind_master = MXS_RLAG_UNDEFINED; /* How much behind the slave is. */ + std::string to_string() const; static slave_io_running_t slave_io_from_string(const std::string& str); static std::string slave_io_to_string(slave_io_running_t slave_io); @@ -151,6 +152,8 @@ public: bool m_topology_changed; /**< Has anything that could affect replication topology changed * this iteration? Causes: server id, slave connections, * read-only. */ + int m_replication_lag; /**< Replication lag of the server. Used during calculation so + * that the actual SERVER struct is only written to once. */ NodeData m_node; /**< Replication topology data */ SlaveStatusArray m_slave_status; /**< Data returned from SHOW SLAVE STATUS */ ReplicationSettings m_rpl_settings; /**< Miscellaneous replication related settings. These are not diff --git a/server/modules/routing/readwritesplit/rwsplit_route_stmt.cc b/server/modules/routing/readwritesplit/rwsplit_route_stmt.cc index 4227c073b..66de6a6b5 100644 --- a/server/modules/routing/readwritesplit/rwsplit_route_stmt.cc +++ b/server/modules/routing/readwritesplit/rwsplit_route_stmt.cc @@ -552,9 +552,7 @@ bool RWSplitSession::route_session_write(GWBUF *querybuf, uint8_t command, uint3 */ static inline bool rpl_lag_is_ok(SRWBackend& backend, int max_rlag) { - return max_rlag == MAX_RLAG_UNDEFINED || - (backend->server()->rlag != MAX_RLAG_NOT_AVAILABLE && - backend->server()->rlag <= max_rlag); + return max_rlag == MXS_RLAG_UNDEFINED || backend->server()->rlag <= max_rlag; } SRWBackend RWSplitSession::get_hinted_backend(char *name) @@ -735,7 +733,7 @@ int RWSplitSession::get_max_replication_lag() SRWBackend RWSplitSession::handle_hinted_target(GWBUF *querybuf, route_target_t route_target) { char *named_server = NULL; - int rlag_max = MAX_RLAG_UNDEFINED; + int rlag_max = MXS_RLAG_UNDEFINED; HINT* hint = querybuf->hint; @@ -766,7 +764,7 @@ SRWBackend RWSplitSession::handle_hinted_target(GWBUF *querybuf, route_target_t hint = hint->next; } /*< while */ - if (rlag_max == MAX_RLAG_UNDEFINED) /*< no rlag max hint, use config */ + if (rlag_max == MXS_RLAG_UNDEFINED) /*< no rlag max hint, use config */ { rlag_max = get_max_replication_lag(); } @@ -953,7 +951,7 @@ bool RWSplitSession::should_migrate_trx(SRWBackend& target) */ bool RWSplitSession::handle_master_is_target(SRWBackend* dest) { - SRWBackend target = get_target_backend(BE_MASTER, NULL, MAX_RLAG_UNDEFINED); + SRWBackend target = get_target_backend(BE_MASTER, NULL, MXS_RLAG_UNDEFINED); bool succp = true; if (should_replace_master(target))