MXS-1703 Reorganize cluster manipulation methods
Just moving code around.
This commit is contained in:
@ -950,3 +950,299 @@ static bool check_replicate_wild_ignore_table(MXS_MONITORED_SERVER* database)
|
|||||||
}
|
}
|
||||||
return rval;
|
return rval;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Check whether standalone master conditions have been met
|
||||||
|
*
|
||||||
|
* This function checks whether all the conditions to use a standalone master are met. For this to happen,
|
||||||
|
* only one server must be available and other servers must have passed the configured tolerance level of
|
||||||
|
* failures.
|
||||||
|
*
|
||||||
|
* @param db Monitor servers
|
||||||
|
*
|
||||||
|
* @return True if standalone master should be used
|
||||||
|
*/
|
||||||
|
bool MariaDBMonitor::standalone_master_required(MXS_MONITORED_SERVER *db)
|
||||||
|
{
|
||||||
|
int candidates = 0;
|
||||||
|
|
||||||
|
while (db)
|
||||||
|
{
|
||||||
|
if (SERVER_IS_RUNNING(db->server))
|
||||||
|
{
|
||||||
|
candidates++;
|
||||||
|
MariaDBServer *server_info = get_server_info(db);
|
||||||
|
|
||||||
|
if (server_info->read_only || server_info->slave_configured || candidates > 1)
|
||||||
|
{
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else if (db->mon_err_count < m_failcount)
|
||||||
|
{
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
db = db->next;
|
||||||
|
}
|
||||||
|
|
||||||
|
return candidates == 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Use standalone master
|
||||||
|
*
|
||||||
|
* This function assigns the last remaining server the master status and sets all other servers into
|
||||||
|
* maintenance mode. By setting the servers into maintenance mode, we prevent any possible conflicts when
|
||||||
|
* the failed servers come back up.
|
||||||
|
*
|
||||||
|
* @param db Monitor servers
|
||||||
|
*/
|
||||||
|
bool MariaDBMonitor::set_standalone_master(MXS_MONITORED_SERVER *db)
|
||||||
|
{
|
||||||
|
bool rval = false;
|
||||||
|
|
||||||
|
while (db)
|
||||||
|
{
|
||||||
|
if (SERVER_IS_RUNNING(db->server))
|
||||||
|
{
|
||||||
|
if (!SERVER_IS_MASTER(db->server) && m_warn_set_standalone_master)
|
||||||
|
{
|
||||||
|
MXS_WARNING("Setting standalone master, server '%s' is now the master.%s",
|
||||||
|
db->server->unique_name,
|
||||||
|
m_allow_cluster_recovery ?
|
||||||
|
"" : " All other servers are set into maintenance mode.");
|
||||||
|
m_warn_set_standalone_master = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
server_clear_set_status(db->server, SERVER_SLAVE, SERVER_MASTER | SERVER_STALE_STATUS);
|
||||||
|
monitor_set_pending_status(db, SERVER_MASTER | SERVER_STALE_STATUS);
|
||||||
|
monitor_clear_pending_status(db, SERVER_SLAVE);
|
||||||
|
m_master = db;
|
||||||
|
rval = true;
|
||||||
|
}
|
||||||
|
else if (!m_allow_cluster_recovery)
|
||||||
|
{
|
||||||
|
server_set_status_nolock(db->server, SERVER_MAINT);
|
||||||
|
monitor_set_pending_status(db, SERVER_MAINT);
|
||||||
|
}
|
||||||
|
db = db->next;
|
||||||
|
}
|
||||||
|
|
||||||
|
return rval;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Monitor a server. Should be moved to the server class later on.
|
||||||
|
*
|
||||||
|
* @param server The server
|
||||||
|
*/
|
||||||
|
void MariaDBMonitor::monitor_one_server(MariaDBServer& server)
|
||||||
|
{
|
||||||
|
MXS_MONITORED_SERVER* ptr = server.server_base;
|
||||||
|
|
||||||
|
ptr->mon_prev_status = ptr->server->status;
|
||||||
|
/* copy server status into monitor pending_status */
|
||||||
|
ptr->pending_status = ptr->server->status;
|
||||||
|
|
||||||
|
/* monitor current node */
|
||||||
|
monitor_database(get_server_info(ptr));
|
||||||
|
|
||||||
|
/* reset the slave list of current node */
|
||||||
|
memset(&ptr->server->slaves, 0, sizeof(ptr->server->slaves));
|
||||||
|
|
||||||
|
if (mon_status_changed(ptr))
|
||||||
|
{
|
||||||
|
if (SRV_MASTER_STATUS(ptr->mon_prev_status))
|
||||||
|
{
|
||||||
|
/** Master failed, can't recover */
|
||||||
|
MXS_NOTICE("Server [%s]:%d lost the master status.",
|
||||||
|
ptr->server->name,
|
||||||
|
ptr->server->port);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (mon_status_changed(ptr))
|
||||||
|
{
|
||||||
|
#if defined(SS_DEBUG)
|
||||||
|
MXS_INFO("Backend server [%s]:%d state : %s",
|
||||||
|
ptr->server->name,
|
||||||
|
ptr->server->port,
|
||||||
|
STRSRVSTATUS(ptr->server));
|
||||||
|
#else
|
||||||
|
MXS_DEBUG("Backend server [%s]:%d state : %s",
|
||||||
|
ptr->server->name,
|
||||||
|
ptr->server->port,
|
||||||
|
STRSRVSTATUS(ptr->server));
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
if (SERVER_IS_DOWN(ptr->server))
|
||||||
|
{
|
||||||
|
/** Increase this server'e error count */
|
||||||
|
ptr->mon_err_count += 1;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
/** Reset this server's error count */
|
||||||
|
ptr->mon_err_count = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Compute replication tree, find root master.
|
||||||
|
*
|
||||||
|
* @return Found master server or NULL
|
||||||
|
*/
|
||||||
|
MariaDBServer* MariaDBMonitor::find_root_master()
|
||||||
|
{
|
||||||
|
MXS_MONITORED_SERVER* found_root_master = NULL;
|
||||||
|
const int num_servers = m_servers.size();
|
||||||
|
/* if only one server is configured, that's is Master */
|
||||||
|
if (num_servers == 1)
|
||||||
|
{
|
||||||
|
auto mon_server = m_servers[0].server_base;
|
||||||
|
if (SERVER_IS_RUNNING(mon_server->server))
|
||||||
|
{
|
||||||
|
mon_server->server->depth = 0;
|
||||||
|
/* status cleanup */
|
||||||
|
monitor_clear_pending_status(mon_server, SERVER_SLAVE);
|
||||||
|
/* master status set */
|
||||||
|
monitor_set_pending_status(mon_server, SERVER_MASTER);
|
||||||
|
|
||||||
|
mon_server->server->depth = 0;
|
||||||
|
m_master = mon_server;
|
||||||
|
found_root_master = mon_server;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
/* Compute the replication tree */
|
||||||
|
if (m_mysql51_replication)
|
||||||
|
{
|
||||||
|
found_root_master = build_mysql51_replication_tree();
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
found_root_master = get_replication_tree();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (m_detect_multimaster && num_servers > 0)
|
||||||
|
{
|
||||||
|
/** Find all the master server cycles in the cluster graph. If
|
||||||
|
multiple masters are found, the servers with the read_only
|
||||||
|
variable set to ON will be assigned the slave status. */
|
||||||
|
find_graph_cycles();
|
||||||
|
}
|
||||||
|
|
||||||
|
return found_root_master ? get_server_info(found_root_master) : NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test if server is a relay master and assign status if yes.
|
||||||
|
*
|
||||||
|
* @param candidate The server to assign
|
||||||
|
*/
|
||||||
|
void MariaDBMonitor::assign_relay_master(MariaDBServer& candidate)
|
||||||
|
{
|
||||||
|
MXS_MONITORED_SERVER* ptr = candidate.server_base;
|
||||||
|
if (ptr->server->node_id > 0 && ptr->server->master_id > 0 &&
|
||||||
|
getSlaveOfNodeId(ptr->server->node_id, REJECT_DOWN) &&
|
||||||
|
getServerByNodeId(ptr->server->master_id) &&
|
||||||
|
(!m_detect_multimaster || candidate.group == 0))
|
||||||
|
{
|
||||||
|
/** This server is both a slave and a master i.e. a relay master */
|
||||||
|
monitor_set_pending_status(ptr, SERVER_RELAY_MASTER);
|
||||||
|
monitor_clear_pending_status(ptr, SERVER_MASTER);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Update serve states of a single server
|
||||||
|
*
|
||||||
|
* @param db_server Server to update
|
||||||
|
* @param root_master_server The current best master
|
||||||
|
*/
|
||||||
|
void MariaDBMonitor::update_server_states(MariaDBServer& db_server, MariaDBServer* root_master_server)
|
||||||
|
{
|
||||||
|
MXS_MONITORED_SERVER* ptr = db_server.server_base;
|
||||||
|
MXS_MONITORED_SERVER* root_master = root_master_server ? root_master_server->server_base : NULL;
|
||||||
|
if (!SERVER_IN_MAINT(ptr->server))
|
||||||
|
{
|
||||||
|
MariaDBServer *serv_info = get_server_info(ptr);
|
||||||
|
|
||||||
|
/** If "detect_stale_master" option is On, let's use the previous master.
|
||||||
|
*
|
||||||
|
* Multi-master mode detects the stale masters in find_graph_cycles().
|
||||||
|
*
|
||||||
|
* TODO: If a stale master goes down and comes back up, it loses
|
||||||
|
* the master status. An adequate solution would be to promote
|
||||||
|
* the stale master as a real master if it is the last running server.
|
||||||
|
*/
|
||||||
|
if (m_detect_stale_master && root_master && !m_detect_multimaster &&
|
||||||
|
(strcmp(ptr->server->name, root_master->server->name) == 0 &&
|
||||||
|
ptr->server->port == root_master->server->port) &&
|
||||||
|
(ptr->server->status & SERVER_MASTER) &&
|
||||||
|
!(ptr->pending_status & SERVER_MASTER) &&
|
||||||
|
!serv_info->read_only)
|
||||||
|
{
|
||||||
|
/**
|
||||||
|
* In this case server->status will not be updated from pending_status
|
||||||
|
* Set the STALE bit for this server in server struct
|
||||||
|
*/
|
||||||
|
server_set_status_nolock(ptr->server, SERVER_STALE_STATUS | SERVER_MASTER);
|
||||||
|
monitor_set_pending_status(ptr, SERVER_STALE_STATUS | SERVER_MASTER);
|
||||||
|
|
||||||
|
/** Log the message only if the master server didn't have
|
||||||
|
* the stale master bit set */
|
||||||
|
if ((ptr->mon_prev_status & SERVER_STALE_STATUS) == 0)
|
||||||
|
{
|
||||||
|
MXS_WARNING("All slave servers under the current master "
|
||||||
|
"server have been lost. Assigning Stale Master"
|
||||||
|
" status to the old master server '%s' (%s:%i).",
|
||||||
|
ptr->server->unique_name, ptr->server->name,
|
||||||
|
ptr->server->port);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (m_detect_stale_slave)
|
||||||
|
{
|
||||||
|
unsigned int bits = SERVER_SLAVE | SERVER_RUNNING;
|
||||||
|
|
||||||
|
if ((ptr->mon_prev_status & bits) == bits &&
|
||||||
|
root_master && SERVER_IS_MASTER(root_master->server))
|
||||||
|
{
|
||||||
|
/** Slave with a running master, assign stale slave candidacy */
|
||||||
|
if ((ptr->pending_status & bits) == bits)
|
||||||
|
{
|
||||||
|
monitor_set_pending_status(ptr, SERVER_STALE_SLAVE);
|
||||||
|
}
|
||||||
|
/** Server lost slave when a master is available, remove
|
||||||
|
* stale slave candidacy */
|
||||||
|
else if ((ptr->pending_status & bits) == SERVER_RUNNING)
|
||||||
|
{
|
||||||
|
monitor_clear_pending_status(ptr, SERVER_STALE_SLAVE);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
/** If this server was a stale slave candidate, assign
|
||||||
|
* slave status to it */
|
||||||
|
else if (ptr->mon_prev_status & SERVER_STALE_SLAVE &&
|
||||||
|
ptr->pending_status & SERVER_RUNNING &&
|
||||||
|
// Master is down
|
||||||
|
(!root_master || !SERVER_IS_MASTER(root_master->server) ||
|
||||||
|
// Master just came up
|
||||||
|
(SERVER_IS_MASTER(root_master->server) &&
|
||||||
|
(root_master->mon_prev_status & SERVER_MASTER) == 0)))
|
||||||
|
{
|
||||||
|
monitor_set_pending_status(ptr, SERVER_SLAVE);
|
||||||
|
}
|
||||||
|
else if (root_master == NULL && serv_info->slave_configured)
|
||||||
|
{
|
||||||
|
monitor_set_pending_status(ptr, SERVER_SLAVE);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
ptr->server->status = ptr->pending_status;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|||||||
@ -1531,6 +1531,14 @@ bool MariaDBMonitor::mon_process_failover(bool* cluster_modified_out)
|
|||||||
return rval;
|
return rval;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Check if server is using gtid replication.
|
||||||
|
*
|
||||||
|
* @param mon_server Server to check
|
||||||
|
* @param error_out Error output
|
||||||
|
* @return True if using gtid-replication. False if not, or if server is not a slave or otherwise does
|
||||||
|
* not have a gtid_IO_Pos.
|
||||||
|
*/
|
||||||
bool MariaDBMonitor::uses_gtid(MXS_MONITORED_SERVER* mon_server, json_t** error_out)
|
bool MariaDBMonitor::uses_gtid(MXS_MONITORED_SERVER* mon_server, json_t** error_out)
|
||||||
{
|
{
|
||||||
bool rval = false;
|
bool rval = false;
|
||||||
|
|||||||
@ -86,12 +86,22 @@ void MariaDBMonitor::init_server_info()
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get monitor-specific server info for the monitored server.
|
||||||
|
*
|
||||||
|
* @param handle
|
||||||
|
* @param db Server to get info for. Must be a valid server or function crashes.
|
||||||
|
* @return The server info.
|
||||||
|
*/
|
||||||
MariaDBServer* MariaDBMonitor::get_server_info(MXS_MONITORED_SERVER* db)
|
MariaDBServer* MariaDBMonitor::get_server_info(MXS_MONITORED_SERVER* db)
|
||||||
{
|
{
|
||||||
ss_dassert(m_server_info.count(db) == 1); // Should always exist in the map
|
ss_dassert(m_server_info.count(db) == 1); // Should always exist in the map
|
||||||
return m_server_info[db];
|
return m_server_info[db];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Constant version of get_server_info().
|
||||||
|
*/
|
||||||
const MariaDBServer* MariaDBMonitor::get_server_info(const MXS_MONITORED_SERVER* db) const
|
const MariaDBServer* MariaDBMonitor::get_server_info(const MXS_MONITORED_SERVER* db) const
|
||||||
{
|
{
|
||||||
return const_cast<MariaDBMonitor*>(this)->get_server_info(const_cast<MXS_MONITORED_SERVER*>(db));
|
return const_cast<MariaDBMonitor*>(this)->get_server_info(const_cast<MXS_MONITORED_SERVER*>(db));
|
||||||
@ -359,87 +369,6 @@ json_t* MariaDBMonitor::diagnostics_json() const
|
|||||||
return rval;
|
return rval;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* @brief Check whether standalone master conditions have been met
|
|
||||||
*
|
|
||||||
* This function checks whether all the conditions to use a standalone master are met. For this to happen,
|
|
||||||
* only one server must be available and other servers must have passed the configured tolerance level of
|
|
||||||
* failures.
|
|
||||||
*
|
|
||||||
* @param db Monitor servers
|
|
||||||
*
|
|
||||||
* @return True if standalone master should be used
|
|
||||||
*/
|
|
||||||
bool MariaDBMonitor::standalone_master_required(MXS_MONITORED_SERVER *db)
|
|
||||||
{
|
|
||||||
int candidates = 0;
|
|
||||||
|
|
||||||
while (db)
|
|
||||||
{
|
|
||||||
if (SERVER_IS_RUNNING(db->server))
|
|
||||||
{
|
|
||||||
candidates++;
|
|
||||||
MariaDBServer *server_info = get_server_info(db);
|
|
||||||
|
|
||||||
if (server_info->read_only || server_info->slave_configured || candidates > 1)
|
|
||||||
{
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else if (db->mon_err_count < m_failcount)
|
|
||||||
{
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
db = db->next;
|
|
||||||
}
|
|
||||||
|
|
||||||
return candidates == 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @brief Use standalone master
|
|
||||||
*
|
|
||||||
* This function assigns the last remaining server the master status and sets all other servers into
|
|
||||||
* maintenance mode. By setting the servers into maintenance mode, we prevent any possible conflicts when
|
|
||||||
* the failed servers come back up.
|
|
||||||
*
|
|
||||||
* @param db Monitor servers
|
|
||||||
*/
|
|
||||||
bool MariaDBMonitor::set_standalone_master(MXS_MONITORED_SERVER *db)
|
|
||||||
{
|
|
||||||
bool rval = false;
|
|
||||||
|
|
||||||
while (db)
|
|
||||||
{
|
|
||||||
if (SERVER_IS_RUNNING(db->server))
|
|
||||||
{
|
|
||||||
if (!SERVER_IS_MASTER(db->server) && m_warn_set_standalone_master)
|
|
||||||
{
|
|
||||||
MXS_WARNING("Setting standalone master, server '%s' is now the master.%s",
|
|
||||||
db->server->unique_name,
|
|
||||||
m_allow_cluster_recovery ?
|
|
||||||
"" : " All other servers are set into maintenance mode.");
|
|
||||||
m_warn_set_standalone_master = false;
|
|
||||||
}
|
|
||||||
|
|
||||||
server_clear_set_status(db->server, SERVER_SLAVE, SERVER_MASTER | SERVER_STALE_STATUS);
|
|
||||||
monitor_set_pending_status(db, SERVER_MASTER | SERVER_STALE_STATUS);
|
|
||||||
monitor_clear_pending_status(db, SERVER_SLAVE);
|
|
||||||
m_master = db;
|
|
||||||
rval = true;
|
|
||||||
}
|
|
||||||
else if (!m_allow_cluster_recovery)
|
|
||||||
{
|
|
||||||
server_set_status_nolock(db->server, SERVER_MAINT);
|
|
||||||
monitor_set_pending_status(db, SERVER_MAINT);
|
|
||||||
}
|
|
||||||
db = db->next;
|
|
||||||
}
|
|
||||||
|
|
||||||
return rval;
|
|
||||||
}
|
|
||||||
|
|
||||||
void MariaDBMonitor::main_loop()
|
void MariaDBMonitor::main_loop()
|
||||||
{
|
{
|
||||||
m_status = MXS_MONITOR_RUNNING;
|
m_status = MXS_MONITOR_RUNNING;
|
||||||
@ -608,113 +537,6 @@ void MariaDBMonitor::main_loop()
|
|||||||
m_status = MXS_MONITOR_STOPPED;
|
m_status = MXS_MONITOR_STOPPED;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Monitor a server. Should be moved to the server class later on.
|
|
||||||
*
|
|
||||||
* @param server The server
|
|
||||||
*/
|
|
||||||
void MariaDBMonitor::monitor_one_server(MariaDBServer& server)
|
|
||||||
{
|
|
||||||
MXS_MONITORED_SERVER* ptr = server.server_base;
|
|
||||||
|
|
||||||
ptr->mon_prev_status = ptr->server->status;
|
|
||||||
/* copy server status into monitor pending_status */
|
|
||||||
ptr->pending_status = ptr->server->status;
|
|
||||||
|
|
||||||
/* monitor current node */
|
|
||||||
monitor_database(get_server_info(ptr));
|
|
||||||
|
|
||||||
/* reset the slave list of current node */
|
|
||||||
memset(&ptr->server->slaves, 0, sizeof(ptr->server->slaves));
|
|
||||||
|
|
||||||
if (mon_status_changed(ptr))
|
|
||||||
{
|
|
||||||
if (SRV_MASTER_STATUS(ptr->mon_prev_status))
|
|
||||||
{
|
|
||||||
/** Master failed, can't recover */
|
|
||||||
MXS_NOTICE("Server [%s]:%d lost the master status.",
|
|
||||||
ptr->server->name,
|
|
||||||
ptr->server->port);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (mon_status_changed(ptr))
|
|
||||||
{
|
|
||||||
#if defined(SS_DEBUG)
|
|
||||||
MXS_INFO("Backend server [%s]:%d state : %s",
|
|
||||||
ptr->server->name,
|
|
||||||
ptr->server->port,
|
|
||||||
STRSRVSTATUS(ptr->server));
|
|
||||||
#else
|
|
||||||
MXS_DEBUG("Backend server [%s]:%d state : %s",
|
|
||||||
ptr->server->name,
|
|
||||||
ptr->server->port,
|
|
||||||
STRSRVSTATUS(ptr->server));
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
|
|
||||||
if (SERVER_IS_DOWN(ptr->server))
|
|
||||||
{
|
|
||||||
/** Increase this server'e error count */
|
|
||||||
ptr->mon_err_count += 1;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
/** Reset this server's error count */
|
|
||||||
ptr->mon_err_count = 0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Compute replication tree, find root master.
|
|
||||||
*
|
|
||||||
* @return Found master server or NULL
|
|
||||||
*/
|
|
||||||
MariaDBServer* MariaDBMonitor::find_root_master()
|
|
||||||
{
|
|
||||||
MXS_MONITORED_SERVER* found_root_master = NULL;
|
|
||||||
const int num_servers = m_servers.size();
|
|
||||||
/* if only one server is configured, that's is Master */
|
|
||||||
if (num_servers == 1)
|
|
||||||
{
|
|
||||||
auto mon_server = m_servers[0].server_base;
|
|
||||||
if (SERVER_IS_RUNNING(mon_server->server))
|
|
||||||
{
|
|
||||||
mon_server->server->depth = 0;
|
|
||||||
/* status cleanup */
|
|
||||||
monitor_clear_pending_status(mon_server, SERVER_SLAVE);
|
|
||||||
/* master status set */
|
|
||||||
monitor_set_pending_status(mon_server, SERVER_MASTER);
|
|
||||||
|
|
||||||
mon_server->server->depth = 0;
|
|
||||||
m_master = mon_server;
|
|
||||||
found_root_master = mon_server;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
/* Compute the replication tree */
|
|
||||||
if (m_mysql51_replication)
|
|
||||||
{
|
|
||||||
found_root_master = build_mysql51_replication_tree();
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
found_root_master = get_replication_tree();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (m_detect_multimaster && num_servers > 0)
|
|
||||||
{
|
|
||||||
/** Find all the master server cycles in the cluster graph. If
|
|
||||||
multiple masters are found, the servers with the read_only
|
|
||||||
variable set to ON will be assigned the slave status. */
|
|
||||||
find_graph_cycles();
|
|
||||||
}
|
|
||||||
|
|
||||||
return found_root_master ? get_server_info(found_root_master) : NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
void MariaDBMonitor::update_gtid_domain()
|
void MariaDBMonitor::update_gtid_domain()
|
||||||
{
|
{
|
||||||
MariaDBServer* master_info = get_server_info(m_master);
|
MariaDBServer* master_info = get_server_info(m_master);
|
||||||
@ -763,108 +585,6 @@ void MariaDBMonitor::update_external_master()
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* TODO: Move to MariaDBServer.
|
|
||||||
*
|
|
||||||
* @param serv_info
|
|
||||||
*/
|
|
||||||
void MariaDBMonitor::assign_relay_master(MariaDBServer& serv_info)
|
|
||||||
{
|
|
||||||
MXS_MONITORED_SERVER* ptr = serv_info.server_base;
|
|
||||||
if (ptr->server->node_id > 0 && ptr->server->master_id > 0 &&
|
|
||||||
getSlaveOfNodeId(ptr->server->node_id, REJECT_DOWN) &&
|
|
||||||
getServerByNodeId(ptr->server->master_id) &&
|
|
||||||
(!m_detect_multimaster || serv_info.group == 0))
|
|
||||||
{
|
|
||||||
/** This server is both a slave and a master i.e. a relay master */
|
|
||||||
monitor_set_pending_status(ptr, SERVER_RELAY_MASTER);
|
|
||||||
monitor_clear_pending_status(ptr, SERVER_MASTER);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void MariaDBMonitor::update_server_states(MariaDBServer& db_server, MariaDBServer* root_master_server)
|
|
||||||
{
|
|
||||||
MXS_MONITORED_SERVER* ptr = db_server.server_base;
|
|
||||||
MXS_MONITORED_SERVER* root_master = root_master_server ? root_master_server->server_base : NULL;
|
|
||||||
if (!SERVER_IN_MAINT(ptr->server))
|
|
||||||
{
|
|
||||||
MariaDBServer *serv_info = get_server_info(ptr);
|
|
||||||
|
|
||||||
/** If "detect_stale_master" option is On, let's use the previous master.
|
|
||||||
*
|
|
||||||
* Multi-master mode detects the stale masters in find_graph_cycles().
|
|
||||||
*
|
|
||||||
* TODO: If a stale master goes down and comes back up, it loses
|
|
||||||
* the master status. An adequate solution would be to promote
|
|
||||||
* the stale master as a real master if it is the last running server.
|
|
||||||
*/
|
|
||||||
if (m_detect_stale_master && root_master && !m_detect_multimaster &&
|
|
||||||
(strcmp(ptr->server->name, root_master->server->name) == 0 &&
|
|
||||||
ptr->server->port == root_master->server->port) &&
|
|
||||||
(ptr->server->status & SERVER_MASTER) &&
|
|
||||||
!(ptr->pending_status & SERVER_MASTER) &&
|
|
||||||
!serv_info->read_only)
|
|
||||||
{
|
|
||||||
/**
|
|
||||||
* In this case server->status will not be updated from pending_status
|
|
||||||
* Set the STALE bit for this server in server struct
|
|
||||||
*/
|
|
||||||
server_set_status_nolock(ptr->server, SERVER_STALE_STATUS | SERVER_MASTER);
|
|
||||||
monitor_set_pending_status(ptr, SERVER_STALE_STATUS | SERVER_MASTER);
|
|
||||||
|
|
||||||
/** Log the message only if the master server didn't have
|
|
||||||
* the stale master bit set */
|
|
||||||
if ((ptr->mon_prev_status & SERVER_STALE_STATUS) == 0)
|
|
||||||
{
|
|
||||||
MXS_WARNING("All slave servers under the current master "
|
|
||||||
"server have been lost. Assigning Stale Master"
|
|
||||||
" status to the old master server '%s' (%s:%i).",
|
|
||||||
ptr->server->unique_name, ptr->server->name,
|
|
||||||
ptr->server->port);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (m_detect_stale_slave)
|
|
||||||
{
|
|
||||||
unsigned int bits = SERVER_SLAVE | SERVER_RUNNING;
|
|
||||||
|
|
||||||
if ((ptr->mon_prev_status & bits) == bits &&
|
|
||||||
root_master && SERVER_IS_MASTER(root_master->server))
|
|
||||||
{
|
|
||||||
/** Slave with a running master, assign stale slave candidacy */
|
|
||||||
if ((ptr->pending_status & bits) == bits)
|
|
||||||
{
|
|
||||||
monitor_set_pending_status(ptr, SERVER_STALE_SLAVE);
|
|
||||||
}
|
|
||||||
/** Server lost slave when a master is available, remove
|
|
||||||
* stale slave candidacy */
|
|
||||||
else if ((ptr->pending_status & bits) == SERVER_RUNNING)
|
|
||||||
{
|
|
||||||
monitor_clear_pending_status(ptr, SERVER_STALE_SLAVE);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
/** If this server was a stale slave candidate, assign
|
|
||||||
* slave status to it */
|
|
||||||
else if (ptr->mon_prev_status & SERVER_STALE_SLAVE &&
|
|
||||||
ptr->pending_status & SERVER_RUNNING &&
|
|
||||||
// Master is down
|
|
||||||
(!root_master || !SERVER_IS_MASTER(root_master->server) ||
|
|
||||||
// Master just came up
|
|
||||||
(SERVER_IS_MASTER(root_master->server) &&
|
|
||||||
(root_master->mon_prev_status & SERVER_MASTER) == 0)))
|
|
||||||
{
|
|
||||||
monitor_set_pending_status(ptr, SERVER_SLAVE);
|
|
||||||
}
|
|
||||||
else if (root_master == NULL && serv_info->slave_configured)
|
|
||||||
{
|
|
||||||
monitor_set_pending_status(ptr, SERVER_SLAVE);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
ptr->server->status = ptr->pending_status;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void MariaDBMonitor::measure_replication_lag(MariaDBServer* root_master_server)
|
void MariaDBMonitor::measure_replication_lag(MariaDBServer* root_master_server)
|
||||||
{
|
{
|
||||||
MXS_MONITORED_SERVER* root_master = root_master_server ? root_master_server->server_base : NULL;
|
MXS_MONITORED_SERVER* root_master = root_master_server ? root_master_server->server_base : NULL;
|
||||||
|
|||||||
@ -108,30 +108,6 @@ public:
|
|||||||
*/
|
*/
|
||||||
bool manual_rejoin(SERVER* rejoin_server, json_t** output);
|
bool manual_rejoin(SERVER* rejoin_server, json_t** output);
|
||||||
|
|
||||||
/**
|
|
||||||
* Check if server is using gtid replication.
|
|
||||||
*
|
|
||||||
* @param mon_server Server to check
|
|
||||||
* @param error_out Error output
|
|
||||||
* @return True if using gtid-replication. False if not, or if server is not a slave or otherwise does
|
|
||||||
* not have a gtid_IO_Pos.
|
|
||||||
*/
|
|
||||||
bool uses_gtid(MXS_MONITORED_SERVER* mon_server, json_t** error_out);
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Get monitor-specific server info for the monitored server.
|
|
||||||
*
|
|
||||||
* @param handle
|
|
||||||
* @param db Server to get info for. Must be a valid server or function crashes.
|
|
||||||
* @return The server info.
|
|
||||||
*/
|
|
||||||
MariaDBServer* get_server_info(MXS_MONITORED_SERVER* db);
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Constant version of get_server_info().
|
|
||||||
*/
|
|
||||||
const MariaDBServer* get_server_info(const MXS_MONITORED_SERVER* db) const;
|
|
||||||
|
|
||||||
private:
|
private:
|
||||||
MXS_MONITOR* m_monitor_base; /**< Generic monitor object */
|
MXS_MONITOR* m_monitor_base; /**< Generic monitor object */
|
||||||
THREAD m_thread; /**< Monitor thread */
|
THREAD m_thread; /**< Monitor thread */
|
||||||
@ -182,72 +158,86 @@ private:
|
|||||||
REJECT_DOWN
|
REJECT_DOWN
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// Base methods
|
||||||
MariaDBMonitor(MXS_MONITOR* monitor_base);
|
MariaDBMonitor(MXS_MONITOR* monitor_base);
|
||||||
~MariaDBMonitor();
|
~MariaDBMonitor();
|
||||||
bool load_config_params(const MXS_CONFIG_PARAMETER* params);
|
|
||||||
bool failover_wait_relay_log(MXS_MONITORED_SERVER* new_master, int seconds_remaining, json_t** err_out);
|
|
||||||
bool switchover_demote_master(MXS_MONITORED_SERVER* current_master, MariaDBServer* info,
|
|
||||||
json_t** err_out);
|
|
||||||
bool switchover_wait_slaves_catchup(const ServerRefArray& slaves, const GtidList& gtid, int total_timeout,
|
|
||||||
int read_timeout, json_t** err_out);
|
|
||||||
bool wait_cluster_stabilization(MariaDBServer* new_master, const ServerRefArray& slaves,
|
|
||||||
int seconds_remaining);
|
|
||||||
bool switchover_check_preferred_master(MXS_MONITORED_SERVER* preferred, json_t** err_out);
|
|
||||||
bool promote_new_master(MXS_MONITORED_SERVER* new_master, json_t** err_out);
|
|
||||||
MariaDBServer* select_new_master(ServerRefArray* slaves_out, json_t** err_out);
|
|
||||||
bool server_is_excluded(const MXS_MONITORED_SERVER* server);
|
|
||||||
bool is_candidate_better(const MariaDBServer* current_best_info, const MariaDBServer* candidate_info,
|
|
||||||
uint32_t gtid_domain);
|
|
||||||
MariaDBServer* update_slave_info(MXS_MONITORED_SERVER* server);
|
|
||||||
void init_server_info();
|
void init_server_info();
|
||||||
bool slave_receiving_events();
|
bool load_config_params(const MXS_CONFIG_PARAMETER* params);
|
||||||
void monitor_database(MariaDBServer* param_db);
|
|
||||||
bool standalone_master_required(MXS_MONITORED_SERVER *db);
|
|
||||||
bool set_standalone_master(MXS_MONITORED_SERVER *db);
|
|
||||||
bool failover_not_possible();
|
|
||||||
std::string generate_change_master_cmd(const std::string& master_host, int master_port);
|
|
||||||
int redirect_slaves(MariaDBServer* new_master, const ServerRefArray& slaves,
|
|
||||||
ServerRefArray* redirected_slaves);
|
|
||||||
bool set_replication_credentials(const MXS_CONFIG_PARAMETER* params);
|
bool set_replication_credentials(const MXS_CONFIG_PARAMETER* params);
|
||||||
bool start_external_replication(MXS_MONITORED_SERVER* new_master, json_t** err_out);
|
MariaDBServer* get_server_info(MXS_MONITORED_SERVER* db);
|
||||||
bool switchover_start_slave(MXS_MONITORED_SERVER* old_master, SERVER* new_master);
|
const MariaDBServer* get_server_info(const MXS_MONITORED_SERVER* db) const;
|
||||||
bool redirect_one_slave(MXS_MONITORED_SERVER* slave, const char* change_cmd);
|
|
||||||
bool get_joinable_servers(ServerRefArray* output);
|
// Cluster discovery and status assignment methods
|
||||||
bool join_cluster(MXS_MONITORED_SERVER* server, const char* change_cmd);
|
void monitor_one_server(MariaDBServer& server);
|
||||||
|
void monitor_database(MariaDBServer* param_db);
|
||||||
|
void monitor_mysql_db(MariaDBServer *serv_info);
|
||||||
|
MariaDBServer* find_root_master();
|
||||||
|
MXS_MONITORED_SERVER* get_replication_tree();
|
||||||
|
MXS_MONITORED_SERVER* build_mysql51_replication_tree();
|
||||||
|
void find_graph_cycles();
|
||||||
|
void update_server_states(MariaDBServer& db_server, MariaDBServer* root_master);
|
||||||
|
bool standalone_master_required(MXS_MONITORED_SERVER* db);
|
||||||
|
bool set_standalone_master(MXS_MONITORED_SERVER* db);
|
||||||
|
void assign_relay_master(MariaDBServer& serv_info);
|
||||||
|
void log_master_changes(MariaDBServer* root_master, int* log_no_master);
|
||||||
|
void update_gtid_domain();
|
||||||
|
void update_external_master();
|
||||||
void set_master_heartbeat(MXS_MONITORED_SERVER *);
|
void set_master_heartbeat(MXS_MONITORED_SERVER *);
|
||||||
void set_slave_heartbeat(MXS_MONITORED_SERVER *);
|
void set_slave_heartbeat(MXS_MONITORED_SERVER *);
|
||||||
MXS_MONITORED_SERVER* build_mysql51_replication_tree();
|
void measure_replication_lag(MariaDBServer* root_master);
|
||||||
MXS_MONITORED_SERVER* get_replication_tree();
|
void check_maxscale_schema_replication();
|
||||||
void monitor_mysql_db(MariaDBServer *serv_info);
|
MXS_MONITORED_SERVER* getServerByNodeId(long);
|
||||||
bool do_switchover(MariaDBServer** current_master, MariaDBServer** new_master, json_t** err_out);
|
MXS_MONITORED_SERVER* getSlaveOfNodeId(long, slave_down_setting_t);
|
||||||
bool do_failover(json_t** err_out);
|
|
||||||
uint32_t do_rejoin(const ServerRefArray& joinable_servers);
|
// Switchover methods
|
||||||
bool mon_process_failover(bool* cluster_modified_out);
|
|
||||||
bool server_is_rejoin_suspect(MariaDBServer* rejoin_cand, MariaDBServer* master, json_t** output);
|
|
||||||
bool cluster_can_be_joined();
|
|
||||||
bool failover_check(json_t** error_out);
|
|
||||||
void disable_setting(const char* setting);
|
|
||||||
bool switchover_check(SERVER* new_master, SERVER* current_master,
|
bool switchover_check(SERVER* new_master, SERVER* current_master,
|
||||||
MariaDBServer** new_master_out, MariaDBServer** current_master_out,
|
MariaDBServer** new_master_out, MariaDBServer** current_master_out,
|
||||||
json_t** error_out);
|
json_t** error_out);
|
||||||
bool switchover_check_new(const MXS_MONITORED_SERVER* monitored_server, json_t** error);
|
bool switchover_check_new(const MXS_MONITORED_SERVER* monitored_server, json_t** error);
|
||||||
bool switchover_check_current(const MXS_MONITORED_SERVER* suggested_curr_master,
|
bool switchover_check_current(const MXS_MONITORED_SERVER* suggested_curr_master,
|
||||||
json_t** error_out) const;
|
json_t** error_out) const;
|
||||||
bool can_replicate_from(MariaDBServer* slave_cand, MariaDBServer* master);
|
bool do_switchover(MariaDBServer** current_master, MariaDBServer** new_master, json_t** err_out);
|
||||||
void monitor_one_server(MariaDBServer& server);
|
bool switchover_check_preferred_master(MXS_MONITORED_SERVER* preferred, json_t** err_out);
|
||||||
MariaDBServer* find_root_master();
|
bool switchover_demote_master(MXS_MONITORED_SERVER* current_master, MariaDBServer* info,
|
||||||
void update_gtid_domain();
|
json_t** err_out);
|
||||||
void update_external_master();
|
bool switchover_wait_slaves_catchup(const ServerRefArray& slaves, const GtidList& gtid, int total_timeout,
|
||||||
void assign_relay_master(MariaDBServer& serv_info);
|
int read_timeout, json_t** err_out);
|
||||||
void update_server_states(MariaDBServer& db_server, MariaDBServer* root_master);
|
bool switchover_start_slave(MXS_MONITORED_SERVER* old_master, SERVER* new_master);
|
||||||
void log_master_changes(MariaDBServer* root_master, int* log_no_master);
|
|
||||||
|
// Failover methods
|
||||||
void handle_auto_failover(bool* failover_performed);
|
void handle_auto_failover(bool* failover_performed);
|
||||||
void measure_replication_lag(MariaDBServer* root_master);
|
bool failover_not_possible();
|
||||||
|
bool slave_receiving_events();
|
||||||
|
bool mon_process_failover(bool* cluster_modified_out);
|
||||||
|
bool failover_check(json_t** error_out);
|
||||||
|
bool do_failover(json_t** err_out);
|
||||||
|
bool failover_wait_relay_log(MXS_MONITORED_SERVER* new_master, int seconds_remaining, json_t** err_out);
|
||||||
|
|
||||||
|
// Rejoin methods
|
||||||
|
bool cluster_can_be_joined();
|
||||||
void handle_auto_rejoin();
|
void handle_auto_rejoin();
|
||||||
void find_graph_cycles();
|
bool get_joinable_servers(ServerRefArray* output);
|
||||||
void check_maxscale_schema_replication();
|
bool server_is_rejoin_suspect(MariaDBServer* rejoin_cand, MariaDBServer* master, json_t** output);
|
||||||
MXS_MONITORED_SERVER* getServerByNodeId(long);
|
bool can_replicate_from(MariaDBServer* slave_cand, MariaDBServer* master);
|
||||||
MXS_MONITORED_SERVER* getSlaveOfNodeId(long, slave_down_setting_t);
|
uint32_t do_rejoin(const ServerRefArray& joinable_servers);
|
||||||
|
bool join_cluster(MXS_MONITORED_SERVER* server, const char* change_cmd);
|
||||||
|
|
||||||
|
// Methods common to failover/switchover/rejoin
|
||||||
|
bool uses_gtid(MXS_MONITORED_SERVER* mon_server, json_t** error_out);
|
||||||
|
MariaDBServer* select_new_master(ServerRefArray* slaves_out, json_t** err_out);
|
||||||
|
MariaDBServer* update_slave_info(MXS_MONITORED_SERVER* server);
|
||||||
|
bool server_is_excluded(const MXS_MONITORED_SERVER* server);
|
||||||
|
bool is_candidate_better(const MariaDBServer* current_best_info, const MariaDBServer* candidate_info,
|
||||||
|
uint32_t gtid_domain);
|
||||||
|
bool promote_new_master(MXS_MONITORED_SERVER* new_master, json_t** err_out);
|
||||||
|
int redirect_slaves(MariaDBServer* new_master, const ServerRefArray& slaves,
|
||||||
|
ServerRefArray* redirected_slaves);
|
||||||
|
bool redirect_one_slave(MXS_MONITORED_SERVER* slave, const char* change_cmd);
|
||||||
|
std::string generate_change_master_cmd(const std::string& master_host, int master_port);
|
||||||
|
bool start_external_replication(MXS_MONITORED_SERVER* new_master, json_t** err_out);
|
||||||
|
bool wait_cluster_stabilization(MariaDBServer* new_master, const ServerRefArray& slaves,
|
||||||
|
int seconds_remaining);
|
||||||
|
void disable_setting(const char* setting);
|
||||||
};
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|||||||
Reference in New Issue
Block a user