MXS-1703 Move monitor main loop + other entrypoint contents to class methods
This commit is contained in:
@ -935,35 +935,28 @@ static string monitored_servers_to_string(const ServerVector& array)
|
|||||||
}
|
}
|
||||||
return rval;
|
return rval;
|
||||||
}
|
}
|
||||||
/**
|
|
||||||
* Daignostic interface
|
|
||||||
*
|
|
||||||
* @param dcb DCB to print diagnostics
|
|
||||||
* @param arg The monitor handle
|
|
||||||
*/
|
|
||||||
static void diagnostics(DCB *dcb, const MXS_MONITOR *mon)
|
|
||||||
{
|
|
||||||
const MariaDBMonitor *handle = (const MariaDBMonitor *)mon->handle;
|
|
||||||
|
|
||||||
dcb_printf(dcb, "Automatic failover: %s\n", handle->auto_failover ? "Enabled" : "Disabled");
|
void MariaDBMonitor::diagnostics(DCB *dcb) const
|
||||||
dcb_printf(dcb, "Failcount: %d\n", handle->failcount);
|
{
|
||||||
dcb_printf(dcb, "Failover timeout: %u\n", handle->failover_timeout);
|
dcb_printf(dcb, "Automatic failover: %s\n", auto_failover ? "Enabled" : "Disabled");
|
||||||
dcb_printf(dcb, "Switchover timeout: %u\n", handle->switchover_timeout);
|
dcb_printf(dcb, "Failcount: %d\n", failcount);
|
||||||
dcb_printf(dcb, "Automatic rejoin: %s\n", handle->auto_rejoin ? "Enabled" : "Disabled");
|
dcb_printf(dcb, "Failover timeout: %u\n", failover_timeout);
|
||||||
dcb_printf(dcb, "MaxScale monitor ID: %lu\n", handle->id);
|
dcb_printf(dcb, "Switchover timeout: %u\n", switchover_timeout);
|
||||||
dcb_printf(dcb, "Detect replication lag: %s\n", (handle->replicationHeartbeat) ? "Enabled" : "Disabled");
|
dcb_printf(dcb, "Automatic rejoin: %s\n", auto_rejoin ? "Enabled" : "Disabled");
|
||||||
dcb_printf(dcb, "Detect stale master: %s\n", (handle->detectStaleMaster == 1) ?
|
dcb_printf(dcb, "MaxScale monitor ID: %lu\n", id);
|
||||||
|
dcb_printf(dcb, "Detect replication lag: %s\n", (replicationHeartbeat) ? "Enabled" : "Disabled");
|
||||||
|
dcb_printf(dcb, "Detect stale master: %s\n", (detectStaleMaster == 1) ?
|
||||||
"Enabled" : "Disabled");
|
"Enabled" : "Disabled");
|
||||||
if (handle->excluded_servers.size() > 0)
|
if (excluded_servers.size() > 0)
|
||||||
{
|
{
|
||||||
dcb_printf(dcb, "Non-promotable servers (failover): ");
|
dcb_printf(dcb, "Non-promotable servers (failover): ");
|
||||||
dcb_printf(dcb, "%s\n", monitored_servers_to_string(handle->excluded_servers).c_str());
|
dcb_printf(dcb, "%s\n", monitored_servers_to_string(excluded_servers).c_str());
|
||||||
}
|
}
|
||||||
|
|
||||||
dcb_printf(dcb, "\nServer information:\n-------------------\n\n");
|
dcb_printf(dcb, "\nServer information:\n-------------------\n\n");
|
||||||
for (MXS_MONITORED_SERVER *db = mon->monitored_servers; db; db = db->next)
|
for (MXS_MONITORED_SERVER *db = monitor->monitored_servers; db; db = db->next)
|
||||||
{
|
{
|
||||||
const MySqlServerInfo* serv_info = get_server_info(handle, db);
|
const MySqlServerInfo* serv_info = get_server_info(this, db);
|
||||||
dcb_printf(dcb, "Server: %s\n", db->server->unique_name);
|
dcb_printf(dcb, "Server: %s\n", db->server->unique_name);
|
||||||
dcb_printf(dcb, "Server ID: %" PRId64 "\n", serv_info->server_id);
|
dcb_printf(dcb, "Server ID: %" PRId64 "\n", serv_info->server_id);
|
||||||
dcb_printf(dcb, "Read only: %s\n", serv_info->read_only ? "YES" : "NO");
|
dcb_printf(dcb, "Read only: %s\n", serv_info->read_only ? "YES" : "NO");
|
||||||
@ -991,7 +984,7 @@ static void diagnostics(DCB *dcb, const MXS_MONITOR *mon)
|
|||||||
dcb_printf(dcb, "Gtid slave IO position: %s\n",
|
dcb_printf(dcb, "Gtid slave IO position: %s\n",
|
||||||
serv_info->slave_status.gtid_io_pos.to_string().c_str());
|
serv_info->slave_status.gtid_io_pos.to_string().c_str());
|
||||||
}
|
}
|
||||||
if (handle->multimaster)
|
if (multimaster)
|
||||||
{
|
{
|
||||||
dcb_printf(dcb, "Master group: %d\n", serv_info->group);
|
dcb_printf(dcb, "Master group: %d\n", serv_info->group);
|
||||||
}
|
}
|
||||||
@ -1001,46 +994,51 @@ static void diagnostics(DCB *dcb, const MXS_MONITOR *mon)
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Diagnostic interface
|
* Daignostic interface
|
||||||
*
|
*
|
||||||
|
* @param dcb DCB to print diagnostics
|
||||||
* @param arg The monitor handle
|
* @param arg The monitor handle
|
||||||
*/
|
*/
|
||||||
static json_t* diagnostics_json(const MXS_MONITOR *mon)
|
static void diagnostics(DCB *dcb, const MXS_MONITOR *mon)
|
||||||
|
{
|
||||||
|
const MariaDBMonitor* handle = static_cast<const MariaDBMonitor*>(mon->handle);
|
||||||
|
handle->diagnostics(dcb);
|
||||||
|
}
|
||||||
|
|
||||||
|
json_t* MariaDBMonitor::diagnostics_json() const
|
||||||
{
|
{
|
||||||
json_t* rval = json_object();
|
json_t* rval = json_object();
|
||||||
|
json_object_set_new(rval, "monitor_id", json_integer(id));
|
||||||
|
json_object_set_new(rval, "detect_stale_master", json_boolean(detectStaleMaster));
|
||||||
|
json_object_set_new(rval, "detect_stale_slave", json_boolean(detectStaleSlave));
|
||||||
|
json_object_set_new(rval, "detect_replication_lag", json_boolean(replicationHeartbeat));
|
||||||
|
json_object_set_new(rval, "multimaster", json_boolean(multimaster));
|
||||||
|
json_object_set_new(rval, "detect_standalone_master", json_boolean(detect_standalone_master));
|
||||||
|
json_object_set_new(rval, CN_FAILCOUNT, json_integer(failcount));
|
||||||
|
json_object_set_new(rval, "allow_cluster_recovery", json_boolean(allow_cluster_recovery));
|
||||||
|
json_object_set_new(rval, "mysql51_replication", json_boolean(mysql51_replication));
|
||||||
|
json_object_set_new(rval, CN_AUTO_FAILOVER, json_boolean(auto_failover));
|
||||||
|
json_object_set_new(rval, CN_FAILOVER_TIMEOUT, json_integer(failover_timeout));
|
||||||
|
json_object_set_new(rval, CN_SWITCHOVER_TIMEOUT, json_integer(switchover_timeout));
|
||||||
|
json_object_set_new(rval, CN_AUTO_REJOIN, json_boolean(auto_rejoin));
|
||||||
|
|
||||||
const MariaDBMonitor *handle = (const MariaDBMonitor *)mon->handle;
|
if (!script.empty())
|
||||||
json_object_set_new(rval, "monitor_id", json_integer(handle->id));
|
|
||||||
json_object_set_new(rval, "detect_stale_master", json_boolean(handle->detectStaleMaster));
|
|
||||||
json_object_set_new(rval, "detect_stale_slave", json_boolean(handle->detectStaleSlave));
|
|
||||||
json_object_set_new(rval, "detect_replication_lag", json_boolean(handle->replicationHeartbeat));
|
|
||||||
json_object_set_new(rval, "multimaster", json_boolean(handle->multimaster));
|
|
||||||
json_object_set_new(rval, "detect_standalone_master", json_boolean(handle->detect_standalone_master));
|
|
||||||
json_object_set_new(rval, CN_FAILCOUNT, json_integer(handle->failcount));
|
|
||||||
json_object_set_new(rval, "allow_cluster_recovery", json_boolean(handle->allow_cluster_recovery));
|
|
||||||
json_object_set_new(rval, "mysql51_replication", json_boolean(handle->mysql51_replication));
|
|
||||||
json_object_set_new(rval, CN_AUTO_FAILOVER, json_boolean(handle->auto_failover));
|
|
||||||
json_object_set_new(rval, CN_FAILOVER_TIMEOUT, json_integer(handle->failover_timeout));
|
|
||||||
json_object_set_new(rval, CN_SWITCHOVER_TIMEOUT, json_integer(handle->switchover_timeout));
|
|
||||||
json_object_set_new(rval, CN_AUTO_REJOIN, json_boolean(handle->auto_rejoin));
|
|
||||||
|
|
||||||
if (!handle->script.empty())
|
|
||||||
{
|
{
|
||||||
json_object_set_new(rval, "script", json_string(handle->script.c_str()));
|
json_object_set_new(rval, "script", json_string(script.c_str()));
|
||||||
}
|
}
|
||||||
if (handle->excluded_servers.size() > 0)
|
if (excluded_servers.size() > 0)
|
||||||
{
|
{
|
||||||
string list = monitored_servers_to_string(handle->excluded_servers);
|
string list = monitored_servers_to_string(excluded_servers);
|
||||||
json_object_set_new(rval, CN_NO_PROMOTE_SERVERS, json_string(list.c_str()));
|
json_object_set_new(rval, CN_NO_PROMOTE_SERVERS, json_string(list.c_str()));
|
||||||
}
|
}
|
||||||
if (mon->monitored_servers)
|
if (monitor->monitored_servers)
|
||||||
{
|
{
|
||||||
json_t* arr = json_array();
|
json_t* arr = json_array();
|
||||||
|
|
||||||
for (MXS_MONITORED_SERVER *db = mon->monitored_servers; db; db = db->next)
|
for (MXS_MONITORED_SERVER *db = monitor->monitored_servers; db; db = db->next)
|
||||||
{
|
{
|
||||||
json_t* srv = json_object();
|
json_t* srv = json_object();
|
||||||
const MySqlServerInfo* serv_info = get_server_info(handle, db);
|
const MySqlServerInfo* serv_info = get_server_info(this, db);
|
||||||
json_object_set_new(srv, "name", json_string(db->server->unique_name));
|
json_object_set_new(srv, "name", json_string(db->server->unique_name));
|
||||||
json_object_set_new(srv, "server_id", json_integer(serv_info->server_id));
|
json_object_set_new(srv, "server_id", json_integer(serv_info->server_id));
|
||||||
json_object_set_new(srv, "master_id", json_integer(serv_info->slave_status.master_server_id));
|
json_object_set_new(srv, "master_id", json_integer(serv_info->slave_status.master_server_id));
|
||||||
@ -1062,7 +1060,7 @@ static json_t* diagnostics_json(const MXS_MONITOR *mon)
|
|||||||
json_string(serv_info->gtid_binlog_pos.to_string().c_str()));
|
json_string(serv_info->gtid_binlog_pos.to_string().c_str()));
|
||||||
json_object_set_new(srv, "gtid_io_pos",
|
json_object_set_new(srv, "gtid_io_pos",
|
||||||
json_string(serv_info->slave_status.gtid_io_pos.to_string().c_str()));
|
json_string(serv_info->slave_status.gtid_io_pos.to_string().c_str()));
|
||||||
if (handle->multimaster)
|
if (multimaster)
|
||||||
{
|
{
|
||||||
json_object_set_new(srv, "master_group", json_integer(serv_info->group));
|
json_object_set_new(srv, "master_group", json_integer(serv_info->group));
|
||||||
}
|
}
|
||||||
@ -1076,6 +1074,17 @@ static json_t* diagnostics_json(const MXS_MONITOR *mon)
|
|||||||
return rval;
|
return rval;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Diagnostic interface
|
||||||
|
*
|
||||||
|
* @param arg The monitor handle
|
||||||
|
*/
|
||||||
|
static json_t* diagnostics_json(const MXS_MONITOR *mon)
|
||||||
|
{
|
||||||
|
const MariaDBMonitor *handle = (const MariaDBMonitor *)mon->handle;
|
||||||
|
return handle->diagnostics_json();
|
||||||
|
}
|
||||||
|
|
||||||
static enum mysql_server_version get_server_version(MXS_MONITORED_SERVER* db)
|
static enum mysql_server_version get_server_version(MXS_MONITORED_SERVER* db)
|
||||||
{
|
{
|
||||||
unsigned long server_version = mysql_get_server_version(db->con);
|
unsigned long server_version = mysql_get_server_version(db->con);
|
||||||
@ -1845,16 +1854,8 @@ bool failover_not_possible(MariaDBMonitor* handle)
|
|||||||
return rval;
|
return rval;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
void MariaDBMonitor::main_loop()
|
||||||
* The entry point for the monitoring module thread
|
|
||||||
*
|
|
||||||
* @param arg The handle of the monitor
|
|
||||||
*/
|
|
||||||
static void
|
|
||||||
monitorMain(void *arg)
|
|
||||||
{
|
{
|
||||||
MariaDBMonitor *handle = (MariaDBMonitor *) arg;
|
|
||||||
MXS_MONITOR* mon = handle->monitor;
|
|
||||||
MXS_MONITORED_SERVER *ptr;
|
MXS_MONITORED_SERVER *ptr;
|
||||||
bool replication_heartbeat;
|
bool replication_heartbeat;
|
||||||
bool detect_stale_master;
|
bool detect_stale_master;
|
||||||
@ -1864,33 +1865,33 @@ monitorMain(void *arg)
|
|||||||
int log_no_master = 1;
|
int log_no_master = 1;
|
||||||
bool heartbeat_checked = false;
|
bool heartbeat_checked = false;
|
||||||
|
|
||||||
replication_heartbeat = handle->replicationHeartbeat;
|
replication_heartbeat = replicationHeartbeat;
|
||||||
detect_stale_master = handle->detectStaleMaster;
|
detect_stale_master = detectStaleMaster;
|
||||||
|
|
||||||
if (mysql_thread_init())
|
if (mysql_thread_init())
|
||||||
{
|
{
|
||||||
MXS_ERROR("mysql_thread_init failed in monitor module. Exiting.");
|
MXS_ERROR("mysql_thread_init failed in monitor module. Exiting.");
|
||||||
handle->status = MXS_MONITOR_STOPPED;
|
status = MXS_MONITOR_STOPPED;
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
load_server_journal(mon, &handle->master);
|
load_server_journal(monitor, &master);
|
||||||
|
|
||||||
while (1)
|
while (1)
|
||||||
{
|
{
|
||||||
if (handle->shutdown)
|
if (shutdown)
|
||||||
{
|
{
|
||||||
handle->status = MXS_MONITOR_STOPPING;
|
status = MXS_MONITOR_STOPPING;
|
||||||
mysql_thread_end();
|
mysql_thread_end();
|
||||||
handle->status = MXS_MONITOR_STOPPED;
|
status = MXS_MONITOR_STOPPED;
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
/** Wait base interval */
|
/** Wait base interval */
|
||||||
thread_millisleep(MXS_MON_BASE_INTERVAL_MS);
|
thread_millisleep(MXS_MON_BASE_INTERVAL_MS);
|
||||||
|
|
||||||
if (handle->replicationHeartbeat && !heartbeat_checked)
|
if (replicationHeartbeat && !heartbeat_checked)
|
||||||
{
|
{
|
||||||
check_maxscale_schema_replication(mon);
|
check_maxscale_schema_replication(monitor);
|
||||||
heartbeat_checked = true;
|
heartbeat_checked = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1901,8 +1902,8 @@ monitorMain(void *arg)
|
|||||||
* round.
|
* round.
|
||||||
*/
|
*/
|
||||||
if (nrounds != 0 &&
|
if (nrounds != 0 &&
|
||||||
(((nrounds * MXS_MON_BASE_INTERVAL_MS) % mon->interval) >=
|
(((nrounds * MXS_MON_BASE_INTERVAL_MS) % monitor->interval) >=
|
||||||
MXS_MON_BASE_INTERVAL_MS) && (!mon->server_pending_changes))
|
MXS_MON_BASE_INTERVAL_MS) && (!monitor->server_pending_changes))
|
||||||
{
|
{
|
||||||
nrounds += 1;
|
nrounds += 1;
|
||||||
continue;
|
continue;
|
||||||
@ -1911,11 +1912,11 @@ monitorMain(void *arg)
|
|||||||
/* reset num_servers */
|
/* reset num_servers */
|
||||||
num_servers = 0;
|
num_servers = 0;
|
||||||
|
|
||||||
lock_monitor_servers(mon);
|
lock_monitor_servers(monitor);
|
||||||
servers_status_pending_to_current(mon);
|
servers_status_pending_to_current(monitor);
|
||||||
|
|
||||||
/* start from the first server in the list */
|
/* start from the first server in the list */
|
||||||
ptr = mon->monitored_servers;
|
ptr = monitor->monitored_servers;
|
||||||
|
|
||||||
while (ptr)
|
while (ptr)
|
||||||
{
|
{
|
||||||
@ -1925,7 +1926,7 @@ monitorMain(void *arg)
|
|||||||
ptr->pending_status = ptr->server->status;
|
ptr->pending_status = ptr->server->status;
|
||||||
|
|
||||||
/* monitor current node */
|
/* monitor current node */
|
||||||
monitorDatabase(mon, ptr);
|
monitorDatabase(monitor, ptr);
|
||||||
|
|
||||||
/* reset the slave list of current node */
|
/* reset the slave list of current node */
|
||||||
memset(&ptr->server->slaves, 0, sizeof(ptr->server->slaves));
|
memset(&ptr->server->slaves, 0, sizeof(ptr->server->slaves));
|
||||||
@ -1972,7 +1973,7 @@ monitorMain(void *arg)
|
|||||||
ptr = ptr->next;
|
ptr = ptr->next;
|
||||||
}
|
}
|
||||||
|
|
||||||
ptr = mon->monitored_servers;
|
ptr = monitor->monitored_servers;
|
||||||
/* if only one server is configured, that's is Master */
|
/* if only one server is configured, that's is Master */
|
||||||
if (num_servers == 1)
|
if (num_servers == 1)
|
||||||
{
|
{
|
||||||
@ -1986,52 +1987,52 @@ monitorMain(void *arg)
|
|||||||
monitor_set_pending_status(ptr, SERVER_MASTER);
|
monitor_set_pending_status(ptr, SERVER_MASTER);
|
||||||
|
|
||||||
ptr->server->depth = 0;
|
ptr->server->depth = 0;
|
||||||
handle->master = ptr;
|
master = ptr;
|
||||||
root_master = ptr;
|
root_master = ptr;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
/* Compute the replication tree */
|
/* Compute the replication tree */
|
||||||
if (handle->mysql51_replication)
|
if (mysql51_replication)
|
||||||
{
|
{
|
||||||
root_master = build_mysql51_replication_tree(mon);
|
root_master = build_mysql51_replication_tree(monitor);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
root_master = get_replication_tree(mon, num_servers);
|
root_master = get_replication_tree(monitor, num_servers);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (handle->multimaster && num_servers > 0)
|
if (multimaster && num_servers > 0)
|
||||||
{
|
{
|
||||||
/** Find all the master server cycles in the cluster graph. If
|
/** Find all the master server cycles in the cluster graph. If
|
||||||
multiple masters are found, the servers with the read_only
|
multiple masters are found, the servers with the read_only
|
||||||
variable set to ON will be assigned the slave status. */
|
variable set to ON will be assigned the slave status. */
|
||||||
find_graph_cycles(handle, mon->monitored_servers, num_servers);
|
find_graph_cycles(this, monitor->monitored_servers, num_servers);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (handle->master != NULL && SERVER_IS_MASTER(handle->master->server))
|
if (master != NULL && SERVER_IS_MASTER(master->server))
|
||||||
{
|
{
|
||||||
MySqlServerInfo* master_info = get_server_info(handle, handle->master);
|
MySqlServerInfo* master_info = get_server_info(this, master);
|
||||||
// Update cluster gtid domain
|
// Update cluster gtid domain
|
||||||
int64_t domain = master_info->gtid_domain_id;
|
int64_t domain = master_info->gtid_domain_id;
|
||||||
if (handle->master_gtid_domain >= 0 && domain != handle->master_gtid_domain)
|
if (master_gtid_domain >= 0 && domain != master_gtid_domain)
|
||||||
{
|
{
|
||||||
MXS_NOTICE("Gtid domain id of master has changed: %" PRId64 " -> %" PRId64 ".",
|
MXS_NOTICE("Gtid domain id of master has changed: %" PRId64 " -> %" PRId64 ".",
|
||||||
handle->master_gtid_domain, domain);
|
master_gtid_domain, domain);
|
||||||
}
|
}
|
||||||
handle->master_gtid_domain = domain;
|
master_gtid_domain = domain;
|
||||||
|
|
||||||
// Update cluster external master
|
// Update cluster external master
|
||||||
if (SERVER_IS_SLAVE_OF_EXTERNAL_MASTER(handle->master->server))
|
if (SERVER_IS_SLAVE_OF_EXTERNAL_MASTER(master->server))
|
||||||
{
|
{
|
||||||
if (master_info->slave_status.master_host != handle->external_master_host ||
|
if (master_info->slave_status.master_host != external_master_host ||
|
||||||
master_info->slave_status.master_port != handle->external_master_port)
|
master_info->slave_status.master_port != external_master_port)
|
||||||
{
|
{
|
||||||
const string new_ext_host = master_info->slave_status.master_host;
|
const string new_ext_host = master_info->slave_status.master_host;
|
||||||
const int new_ext_port = master_info->slave_status.master_port;
|
const int new_ext_port = master_info->slave_status.master_port;
|
||||||
if (handle->external_master_port == PORT_UNKNOWN)
|
if (external_master_port == PORT_UNKNOWN)
|
||||||
{
|
{
|
||||||
MXS_NOTICE("Cluster master server is replicating from an external master: %s:%d",
|
MXS_NOTICE("Cluster master server is replicating from an external master: %s:%d",
|
||||||
new_ext_host.c_str(), new_ext_port);
|
new_ext_host.c_str(), new_ext_port);
|
||||||
@ -2039,34 +2040,34 @@ monitorMain(void *arg)
|
|||||||
else
|
else
|
||||||
{
|
{
|
||||||
MXS_NOTICE("The external master of the cluster has changed: %s:%d -> %s:%d.",
|
MXS_NOTICE("The external master of the cluster has changed: %s:%d -> %s:%d.",
|
||||||
handle->external_master_host.c_str(), handle->external_master_port,
|
external_master_host.c_str(), external_master_port,
|
||||||
new_ext_host.c_str(), new_ext_port);
|
new_ext_host.c_str(), new_ext_port);
|
||||||
}
|
}
|
||||||
handle->external_master_host = new_ext_host;
|
external_master_host = new_ext_host;
|
||||||
handle->external_master_port = new_ext_port;
|
external_master_port = new_ext_port;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
if (handle->external_master_port != PORT_UNKNOWN)
|
if (external_master_port != PORT_UNKNOWN)
|
||||||
{
|
{
|
||||||
MXS_NOTICE("Cluster lost the external master.");
|
MXS_NOTICE("Cluster lost the external master.");
|
||||||
}
|
}
|
||||||
handle->external_master_host.clear();
|
external_master_host.clear();
|
||||||
handle->external_master_port = PORT_UNKNOWN;
|
external_master_port = PORT_UNKNOWN;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
ptr = mon->monitored_servers;
|
ptr = monitor->monitored_servers;
|
||||||
while (ptr)
|
while (ptr)
|
||||||
{
|
{
|
||||||
MySqlServerInfo *serv_info = get_server_info(handle, ptr);
|
MySqlServerInfo *serv_info = get_server_info(this, ptr);
|
||||||
ss_dassert(serv_info);
|
ss_dassert(serv_info);
|
||||||
|
|
||||||
if (ptr->server->node_id > 0 && ptr->server->master_id > 0 &&
|
if (ptr->server->node_id > 0 && ptr->server->master_id > 0 &&
|
||||||
getSlaveOfNodeId(mon->monitored_servers, ptr->server->node_id, REJECT_DOWN) &&
|
getSlaveOfNodeId(monitor->monitored_servers, ptr->server->node_id, REJECT_DOWN) &&
|
||||||
getServerByNodeId(mon->monitored_servers, ptr->server->master_id) &&
|
getServerByNodeId(monitor->monitored_servers, ptr->server->master_id) &&
|
||||||
(!handle->multimaster || serv_info->group == 0))
|
(!multimaster || serv_info->group == 0))
|
||||||
{
|
{
|
||||||
/** This server is both a slave and a master i.e. a relay master */
|
/** This server is both a slave and a master i.e. a relay master */
|
||||||
monitor_set_pending_status(ptr, SERVER_RELAY_MASTER);
|
monitor_set_pending_status(ptr, SERVER_RELAY_MASTER);
|
||||||
@ -2084,12 +2085,12 @@ monitorMain(void *arg)
|
|||||||
|
|
||||||
/* Update server status from monitor pending status on that server*/
|
/* Update server status from monitor pending status on that server*/
|
||||||
|
|
||||||
ptr = mon->monitored_servers;
|
ptr = monitor->monitored_servers;
|
||||||
while (ptr)
|
while (ptr)
|
||||||
{
|
{
|
||||||
if (!SERVER_IN_MAINT(ptr->server))
|
if (!SERVER_IN_MAINT(ptr->server))
|
||||||
{
|
{
|
||||||
MySqlServerInfo *serv_info = get_server_info(handle, ptr);
|
MySqlServerInfo *serv_info = get_server_info(this, ptr);
|
||||||
|
|
||||||
/** If "detect_stale_master" option is On, let's use the previous master.
|
/** If "detect_stale_master" option is On, let's use the previous master.
|
||||||
*
|
*
|
||||||
@ -2099,7 +2100,7 @@ monitorMain(void *arg)
|
|||||||
* the master status. An adequate solution would be to promote
|
* the master status. An adequate solution would be to promote
|
||||||
* the stale master as a real master if it is the last running server.
|
* the stale master as a real master if it is the last running server.
|
||||||
*/
|
*/
|
||||||
if (detect_stale_master && root_master && !handle->multimaster &&
|
if (detect_stale_master && root_master && !multimaster &&
|
||||||
(strcmp(ptr->server->name, root_master->server->name) == 0 &&
|
(strcmp(ptr->server->name, root_master->server->name) == 0 &&
|
||||||
ptr->server->port == root_master->server->port) &&
|
ptr->server->port == root_master->server->port) &&
|
||||||
(ptr->server->status & SERVER_MASTER) &&
|
(ptr->server->status & SERVER_MASTER) &&
|
||||||
@ -2125,7 +2126,7 @@ monitorMain(void *arg)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (handle->detectStaleSlave)
|
if (detectStaleSlave)
|
||||||
{
|
{
|
||||||
unsigned int bits = SERVER_SLAVE | SERVER_RUNNING;
|
unsigned int bits = SERVER_SLAVE | SERVER_RUNNING;
|
||||||
|
|
||||||
@ -2169,20 +2170,20 @@ monitorMain(void *arg)
|
|||||||
|
|
||||||
/** Now that all servers have their status correctly set, we can check
|
/** Now that all servers have their status correctly set, we can check
|
||||||
if we need to use standalone master. */
|
if we need to use standalone master. */
|
||||||
if (handle->detect_standalone_master)
|
if (detect_standalone_master)
|
||||||
{
|
{
|
||||||
if (standalone_master_required(handle, mon->monitored_servers))
|
if (standalone_master_required(this, monitor->monitored_servers))
|
||||||
{
|
{
|
||||||
// Other servers have died, set last remaining server as master
|
// Other servers have died, set last remaining server as master
|
||||||
if (set_standalone_master(handle, mon->monitored_servers))
|
if (set_standalone_master(this, monitor->monitored_servers))
|
||||||
{
|
{
|
||||||
// Update the root_master to point to the standalone master
|
// Update the root_master to point to the standalone master
|
||||||
root_master = handle->master;
|
root_master = master;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
handle->warn_set_standalone_master = true;
|
warn_set_standalone_master = true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -2197,14 +2198,14 @@ monitorMain(void *arg)
|
|||||||
* This allows parts of a multi-tiered replication setup to be used
|
* This allows parts of a multi-tiered replication setup to be used
|
||||||
* in MaxScale.
|
* in MaxScale.
|
||||||
*/
|
*/
|
||||||
if (handle->ignore_external_masters)
|
if (ignore_external_masters)
|
||||||
{
|
{
|
||||||
monitor_clear_pending_status(root_master, SERVER_SLAVE_OF_EXTERNAL_MASTER);
|
monitor_clear_pending_status(root_master, SERVER_SLAVE_OF_EXTERNAL_MASTER);
|
||||||
server_clear_status_nolock(root_master->server, SERVER_SLAVE_OF_EXTERNAL_MASTER);
|
server_clear_status_nolock(root_master->server, SERVER_SLAVE_OF_EXTERNAL_MASTER);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
ss_dassert(root_master == NULL || handle->master == root_master);
|
ss_dassert(root_master == NULL || master == root_master);
|
||||||
ss_dassert(!root_master ||
|
ss_dassert(!root_master ||
|
||||||
((root_master->server->status & (SERVER_SLAVE | SERVER_MASTER))
|
((root_master->server->status & (SERVER_SLAVE | SERVER_MASTER))
|
||||||
!= (SERVER_SLAVE | SERVER_MASTER)));
|
!= (SERVER_SLAVE | SERVER_MASTER)));
|
||||||
@ -2213,35 +2214,35 @@ monitorMain(void *arg)
|
|||||||
* After updating the status of all servers, check if monitor events
|
* After updating the status of all servers, check if monitor events
|
||||||
* need to be launched.
|
* need to be launched.
|
||||||
*/
|
*/
|
||||||
mon_process_state_changes(mon, handle->script.c_str(), handle->events);
|
mon_process_state_changes(monitor, script.c_str(), events);
|
||||||
bool failover_performed = false; // Has an automatic failover been performed this loop?
|
bool failover_performed = false; // Has an automatic failover been performed this loop?
|
||||||
|
|
||||||
if (handle->auto_failover)
|
if (auto_failover)
|
||||||
{
|
{
|
||||||
const char RE_ENABLE_FMT[] = "%s To re-enable failover, manually set '%s' to 'true' for monitor "
|
const char RE_ENABLE_FMT[] = "%s To re-enable failover, manually set '%s' to 'true' for monitor "
|
||||||
"'%s' via MaxAdmin or the REST API, or restart MaxScale.";
|
"'%s' via MaxAdmin or the REST API, or restart MaxScale.";
|
||||||
if (failover_not_possible(handle))
|
if (failover_not_possible(this))
|
||||||
{
|
{
|
||||||
const char PROBLEMS[] = "Failover is not possible due to one or more problems in the "
|
const char PROBLEMS[] = "Failover is not possible due to one or more problems in the "
|
||||||
"replication configuration, disabling automatic failover. Failover "
|
"replication configuration, disabling automatic failover. Failover "
|
||||||
"should only be enabled after the replication configuration has been "
|
"should only be enabled after the replication configuration has been "
|
||||||
"fixed.";
|
"fixed.";
|
||||||
MXS_ERROR(RE_ENABLE_FMT, PROBLEMS, CN_AUTO_FAILOVER, mon->name);
|
MXS_ERROR(RE_ENABLE_FMT, PROBLEMS, CN_AUTO_FAILOVER, monitor->name);
|
||||||
handle->auto_failover = false;
|
auto_failover = false;
|
||||||
handle->disable_setting(CN_AUTO_FAILOVER);
|
disable_setting(CN_AUTO_FAILOVER);
|
||||||
}
|
}
|
||||||
// If master seems to be down, check if slaves are receiving events.
|
// If master seems to be down, check if slaves are receiving events.
|
||||||
else if (handle->verify_master_failure && handle->master &&
|
else if (verify_master_failure && master &&
|
||||||
SERVER_IS_DOWN(handle->master->server) && slave_receiving_events(handle))
|
SERVER_IS_DOWN(master->server) && slave_receiving_events(this))
|
||||||
{
|
{
|
||||||
MXS_INFO("Master failure not yet confirmed by slaves, delaying failover.");
|
MXS_INFO("Master failure not yet confirmed by slaves, delaying failover.");
|
||||||
}
|
}
|
||||||
else if (!handle->mon_process_failover(&failover_performed))
|
else if (!mon_process_failover(&failover_performed))
|
||||||
{
|
{
|
||||||
const char FAILED[] = "Failed to perform failover, disabling automatic failover.";
|
const char FAILED[] = "Failed to perform failover, disabling automatic failover.";
|
||||||
MXS_ERROR(RE_ENABLE_FMT, FAILED, CN_AUTO_FAILOVER, mon->name);
|
MXS_ERROR(RE_ENABLE_FMT, FAILED, CN_AUTO_FAILOVER, monitor->name);
|
||||||
handle->auto_failover = false;
|
auto_failover = false;
|
||||||
handle->disable_setting(CN_AUTO_FAILOVER);
|
disable_setting(CN_AUTO_FAILOVER);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -2283,12 +2284,12 @@ monitorMain(void *arg)
|
|||||||
(SERVER_IS_MASTER(root_master->server) ||
|
(SERVER_IS_MASTER(root_master->server) ||
|
||||||
SERVER_IS_RELAY_SERVER(root_master->server)))
|
SERVER_IS_RELAY_SERVER(root_master->server)))
|
||||||
{
|
{
|
||||||
handle->set_master_heartbeat(root_master);
|
set_master_heartbeat(root_master);
|
||||||
ptr = mon->monitored_servers;
|
ptr = monitor->monitored_servers;
|
||||||
|
|
||||||
while (ptr)
|
while (ptr)
|
||||||
{
|
{
|
||||||
MySqlServerInfo *serv_info = get_server_info(handle, ptr);
|
MySqlServerInfo *serv_info = get_server_info(this, ptr);
|
||||||
|
|
||||||
if ((!SERVER_IN_MAINT(ptr->server)) && SERVER_IS_RUNNING(ptr->server))
|
if ((!SERVER_IN_MAINT(ptr->server)) && SERVER_IS_RUNNING(ptr->server))
|
||||||
{
|
{
|
||||||
@ -2297,7 +2298,7 @@ monitorMain(void *arg)
|
|||||||
SERVER_IS_RELAY_SERVER(ptr->server)) &&
|
SERVER_IS_RELAY_SERVER(ptr->server)) &&
|
||||||
!serv_info->binlog_relay) // No select lag for Binlog Server
|
!serv_info->binlog_relay) // No select lag for Binlog Server
|
||||||
{
|
{
|
||||||
handle->set_slave_heartbeat(ptr);
|
set_slave_heartbeat(ptr);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
ptr = ptr->next;
|
ptr = ptr->next;
|
||||||
@ -2306,14 +2307,14 @@ monitorMain(void *arg)
|
|||||||
|
|
||||||
// Do not auto-join servers on this monitor loop if a failover (or any other cluster modification)
|
// Do not auto-join servers on this monitor loop if a failover (or any other cluster modification)
|
||||||
// has been performed, as server states have not been updated yet. It will happen next iteration.
|
// has been performed, as server states have not been updated yet. It will happen next iteration.
|
||||||
if (!config_get_global_options()->passive && handle->auto_rejoin &&
|
if (!config_get_global_options()->passive && auto_rejoin &&
|
||||||
!failover_performed && handle->cluster_can_be_joined())
|
!failover_performed && cluster_can_be_joined())
|
||||||
{
|
{
|
||||||
// Check if any servers should be autojoined to the cluster
|
// Check if any servers should be autojoined to the cluster
|
||||||
ServerVector joinable_servers;
|
ServerVector joinable_servers;
|
||||||
if (handle->get_joinable_servers(&joinable_servers))
|
if (get_joinable_servers(&joinable_servers))
|
||||||
{
|
{
|
||||||
uint32_t joins = handle->do_rejoin(joinable_servers);
|
uint32_t joins = do_rejoin(joinable_servers);
|
||||||
if (joins > 0)
|
if (joins > 0)
|
||||||
{
|
{
|
||||||
MXS_NOTICE("%d server(s) redirected or rejoined the cluster.", joins);
|
MXS_NOTICE("%d server(s) redirected or rejoined the cluster.", joins);
|
||||||
@ -2322,25 +2323,36 @@ monitorMain(void *arg)
|
|||||||
{
|
{
|
||||||
MXS_ERROR("A cluster join operation failed, disabling automatic rejoining. "
|
MXS_ERROR("A cluster join operation failed, disabling automatic rejoining. "
|
||||||
"To re-enable, manually set '%s' to 'true' for monitor '%s' via MaxAdmin or "
|
"To re-enable, manually set '%s' to 'true' for monitor '%s' via MaxAdmin or "
|
||||||
"the REST API.", CN_AUTO_REJOIN, mon->name);
|
"the REST API.", CN_AUTO_REJOIN, monitor->name);
|
||||||
handle->auto_rejoin = false;
|
auto_rejoin = false;
|
||||||
handle->disable_setting(CN_AUTO_REJOIN);
|
disable_setting(CN_AUTO_REJOIN);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
MXS_ERROR("Query error to master '%s' prevented a possible rejoin operation.",
|
MXS_ERROR("Query error to master '%s' prevented a possible rejoin operation.",
|
||||||
handle->master->server->unique_name);
|
master->server->unique_name);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
mon_hangup_failed_servers(mon);
|
mon_hangup_failed_servers(monitor);
|
||||||
servers_status_current_to_pending(mon);
|
servers_status_current_to_pending(monitor);
|
||||||
store_server_journal(mon, handle->master);
|
store_server_journal(monitor, master);
|
||||||
release_monitor_servers(mon);
|
release_monitor_servers(monitor);
|
||||||
} /*< while (1) */
|
} /*< while (1) */
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The entry point for the monitoring module thread
|
||||||
|
*
|
||||||
|
* @param arg The handle of the monitor. Must be the object returned by startMonitor.
|
||||||
|
*/
|
||||||
|
static void monitorMain(void *arg)
|
||||||
|
{
|
||||||
|
MariaDBMonitor* handle = static_cast<MariaDBMonitor*>(arg);
|
||||||
|
handle->main_loop();
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Fetch a node by node_id
|
* Fetch a node by node_id
|
||||||
*
|
*
|
||||||
|
|||||||
@ -73,6 +73,25 @@ private:
|
|||||||
public:
|
public:
|
||||||
// TODO: Once done refactoring, see which of these can be moved to private.
|
// TODO: Once done refactoring, see which of these can be moved to private.
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Print diagnostics.
|
||||||
|
*
|
||||||
|
* @param dcb DCB to print to
|
||||||
|
*/
|
||||||
|
void diagnostics(DCB *dcb) const;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Print diagnostics to json object.
|
||||||
|
*
|
||||||
|
* @return Diagnostics messages
|
||||||
|
*/
|
||||||
|
json_t* diagnostics_json() const;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Runs the main monitor loop. Called from the static monitorMain()-function.
|
||||||
|
*/
|
||||||
|
void main_loop();
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Start the monitor instance and return the instance data, creating it if starting for the first time.
|
* Start the monitor instance and return the instance data, creating it if starting for the first time.
|
||||||
* This function creates a thread to execute the monitoring.
|
* This function creates a thread to execute the monitoring.
|
||||||
|
|||||||
Reference in New Issue
Block a user