Server status changes now happen under a lock

MXS-873 To prevent monitors and MaxAdmin from interfering with each other,
changes to the server status flags now happen under a lock. To avoid
interfering with monitor logic, the monitors now acquire locks to all
of their servers at the start of the monitor loop and release them
before sleeping.
This commit is contained in:
ekorh475
2016-12-07 14:52:46 +02:00
parent 162ae04d20
commit 259e944b3d
10 changed files with 135 additions and 65 deletions

View File

@ -79,7 +79,7 @@ void update_server_status(MONITOR *monitor, MONITOR_SERVERS *database)
if (!SERVER_IN_MAINT(database->server))
{
SERVER temp_server = {.status = database->server->status};
server_clear_status(&temp_server, SERVER_RUNNING | SERVER_MASTER | SERVER_SLAVE | SERVER_AUTH_ERROR);
server_clear_status_nolock(&temp_server, SERVER_RUNNING | SERVER_MASTER | SERVER_SLAVE | SERVER_AUTH_ERROR);
database->mon_prev_status = database->server->status;
/** Try to connect to or ping the database */
@ -87,7 +87,7 @@ void update_server_status(MONITOR *monitor, MONITOR_SERVERS *database)
if (rval == MONITOR_CONN_OK)
{
server_set_status(&temp_server, SERVER_RUNNING);
server_set_status_nolock(&temp_server, SERVER_RUNNING);
MYSQL_RES *result;
/** Connection is OK, query for replica status */
@ -106,7 +106,7 @@ void update_server_status(MONITOR *monitor, MONITOR_SERVERS *database)
status = SERVER_MASTER;
}
server_set_status(&temp_server, status);
server_set_status_nolock(&temp_server, status);
mysql_free_result(result);
}
else
@ -122,7 +122,7 @@ void update_server_status(MONITOR *monitor, MONITOR_SERVERS *database)
/** Failed to connect to the database */
if (mysql_errno(database->con) == ER_ACCESS_DENIED_ERROR)
{
server_set_status(&temp_server, SERVER_AUTH_ERROR);
server_set_status_nolock(&temp_server, SERVER_AUTH_ERROR);
}
if (mon_status_changed(database) && mon_print_fail_status(database))
@ -187,6 +187,7 @@ monitorMain(void *arg)
while (!handle->shutdown)
{
lock_monitor_servers(monitor);
for (MONITOR_SERVERS *ptr = monitor->databases; ptr; ptr = ptr->next)
{
update_server_status(monitor, ptr);
@ -221,6 +222,7 @@ monitorMain(void *arg)
}
}
release_monitor_servers(monitor);
/** Sleep until the next monitoring interval */
int ms = 0;
while (ms < monitor->interval && !handle->shutdown)

View File

@ -285,20 +285,20 @@ monitorDatabase(MONITOR *mon, MONITOR_SERVERS *database)
database->mon_prev_status = database->server->status;
server_transfer_status(&temp_server, database->server);
server_clear_status(&temp_server, SERVER_RUNNING);
server_clear_status_nolock(&temp_server, SERVER_RUNNING);
/* Also clear Joined */
server_clear_status(&temp_server, SERVER_JOINED);
server_clear_status_nolock(&temp_server, SERVER_JOINED);
connect_result_t rval = mon_connect_to_db(mon, database);
if (rval != MONITOR_CONN_OK)
{
if (mysql_errno(database->con) == ER_ACCESS_DENIED_ERROR)
{
server_set_status(&temp_server, SERVER_AUTH_ERROR);
server_set_status_nolock(&temp_server, SERVER_AUTH_ERROR);
}
else
{
server_clear_status(&temp_server, SERVER_AUTH_ERROR);
server_clear_status_nolock(&temp_server, SERVER_AUTH_ERROR);
}
database->server->node_id = -1;
@ -314,7 +314,7 @@ monitorDatabase(MONITOR *mon, MONITOR_SERVERS *database)
}
/* If we get this far then we have a working connection */
server_set_status(&temp_server, SERVER_RUNNING);
server_set_status_nolock(&temp_server, SERVER_RUNNING);
/* get server version string */
server_string = (char *) mysql_get_server_info(database->con);
@ -406,11 +406,11 @@ monitorDatabase(MONITOR *mon, MONITOR_SERVERS *database)
mysql_free_result(result);
}
server_set_status(&temp_server, SERVER_JOINED);
server_set_status_nolock(&temp_server, SERVER_JOINED);
}
else
{
server_clear_status(&temp_server, SERVER_JOINED);
server_clear_status_nolock(&temp_server, SERVER_JOINED);
}
/* clear bits for non member nodes */
@ -419,11 +419,11 @@ monitorDatabase(MONITOR *mon, MONITOR_SERVERS *database)
database->server->depth = -1;
/* clear M/S status */
server_clear_status(&temp_server, SERVER_SLAVE);
server_clear_status(&temp_server, SERVER_MASTER);
server_clear_status_nolock(&temp_server, SERVER_SLAVE);
server_clear_status_nolock(&temp_server, SERVER_MASTER);
/* clear master sticky status */
server_clear_status(&temp_server, SERVER_MASTER_STICKINESS);
server_clear_status_nolock(&temp_server, SERVER_MASTER_STICKINESS);
}
server_transfer_status(database->server, &temp_server);
@ -488,8 +488,9 @@ monitorMain(void *arg)
/* reset cluster members counter */
is_cluster = 0;
ptr = mon->databases;
lock_monitor_servers(mon);
ptr = mon->databases;
while (ptr)
{
ptr->mon_prev_status = ptr->server->status;
@ -615,6 +616,7 @@ monitorMain(void *arg)
}
mon_hangup_failed_servers(mon);
release_monitor_servers(mon);
}
}

View File

@ -252,20 +252,20 @@ monitorDatabase(MONITOR* mon, MONITOR_SERVERS *database)
{
if (mysql_errno(database->con) == ER_ACCESS_DENIED_ERROR)
{
server_set_status(database->server, SERVER_AUTH_ERROR);
server_set_status_nolock(database->server, SERVER_AUTH_ERROR);
monitor_set_pending_status(database, SERVER_AUTH_ERROR);
}
server_clear_status(database->server, SERVER_RUNNING);
server_clear_status_nolock(database->server, SERVER_RUNNING);
monitor_clear_pending_status(database, SERVER_RUNNING);
/* Also clear M/S state in both server and monitor server pending struct */
server_clear_status(database->server, SERVER_SLAVE);
server_clear_status(database->server, SERVER_MASTER);
server_clear_status_nolock(database->server, SERVER_SLAVE);
server_clear_status_nolock(database->server, SERVER_MASTER);
monitor_clear_pending_status(database, SERVER_SLAVE);
monitor_clear_pending_status(database, SERVER_MASTER);
/* Clean addition status too */
server_clear_status(database->server, SERVER_STALE_STATUS);
server_clear_status_nolock(database->server, SERVER_STALE_STATUS);
monitor_clear_pending_status(database, SERVER_STALE_STATUS);
if (mon_status_changed(database) && mon_print_fail_status(database))
@ -276,12 +276,12 @@ monitorDatabase(MONITOR* mon, MONITOR_SERVERS *database)
}
else
{
server_clear_status(database->server, SERVER_AUTH_ERROR);
server_clear_status_nolock(database->server, SERVER_AUTH_ERROR);
monitor_clear_pending_status(database, SERVER_AUTH_ERROR);
}
/* Store current status in both server and monitor server pending struct */
server_set_status(database->server, SERVER_RUNNING);
server_set_status_nolock(database->server, SERVER_RUNNING);
monitor_set_pending_status(database, SERVER_RUNNING);
/* get server version from current server */
@ -558,6 +558,7 @@ monitorMain(void *arg)
}
nrounds += 1;
lock_monitor_servers(mon);
/* start from the first server in the list */
ptr = mon->databases;
@ -612,7 +613,7 @@ monitorMain(void *arg)
"use it again even if it could be a stale master, you have "
"been warned!", ptr->server->name, ptr->server->port);
/* Set the STALE bit for this server in server struct */
server_set_status(ptr->server, SERVER_STALE_STATUS);
server_set_status_nolock(ptr->server, SERVER_STALE_STATUS);
}
else
{
@ -642,6 +643,7 @@ monitorMain(void *arg)
}
mon_hangup_failed_servers(mon);
release_monitor_servers(mon);
}
}

View File

@ -685,7 +685,7 @@ monitorDatabase(MONITOR *mon, MONITOR_SERVERS *database)
connect_result_t rval;
if ((rval = mon_connect_to_db(mon, database)) == MONITOR_CONN_OK)
{
server_clear_status(database->server, SERVER_AUTH_ERROR);
server_clear_status_nolock(database->server, SERVER_AUTH_ERROR);
monitor_clear_pending_status(database, SERVER_AUTH_ERROR);
}
else
@ -697,24 +697,24 @@ monitorDatabase(MONITOR *mon, MONITOR_SERVERS *database)
*/
if (mysql_errno(database->con) == ER_ACCESS_DENIED_ERROR)
{
server_set_status(database->server, SERVER_AUTH_ERROR);
server_set_status_nolock(database->server, SERVER_AUTH_ERROR);
monitor_set_pending_status(database, SERVER_AUTH_ERROR);
}
server_clear_status(database->server, SERVER_RUNNING);
server_clear_status_nolock(database->server, SERVER_RUNNING);
monitor_clear_pending_status(database, SERVER_RUNNING);
/* Also clear M/S state in both server and monitor server pending struct */
server_clear_status(database->server, SERVER_SLAVE);
server_clear_status(database->server, SERVER_MASTER);
server_clear_status(database->server, SERVER_RELAY_MASTER);
server_clear_status_nolock(database->server, SERVER_SLAVE);
server_clear_status_nolock(database->server, SERVER_MASTER);
server_clear_status_nolock(database->server, SERVER_RELAY_MASTER);
monitor_clear_pending_status(database, SERVER_SLAVE);
monitor_clear_pending_status(database, SERVER_MASTER);
monitor_clear_pending_status(database, SERVER_RELAY_MASTER);
/* Clean addition status too */
server_clear_status(database->server, SERVER_SLAVE_OF_EXTERNAL_MASTER);
server_clear_status(database->server, SERVER_STALE_STATUS);
server_clear_status(database->server, SERVER_STALE_SLAVE);
server_clear_status_nolock(database->server, SERVER_SLAVE_OF_EXTERNAL_MASTER);
server_clear_status_nolock(database->server, SERVER_STALE_STATUS);
server_clear_status_nolock(database->server, SERVER_STALE_SLAVE);
monitor_clear_pending_status(database, SERVER_SLAVE_OF_EXTERNAL_MASTER);
monitor_clear_pending_status(database, SERVER_STALE_STATUS);
monitor_clear_pending_status(database, SERVER_STALE_SLAVE);
@ -729,7 +729,7 @@ monitorDatabase(MONITOR *mon, MONITOR_SERVERS *database)
}
}
/* Store current status in both server and monitor server pending struct */
server_set_status(database->server, SERVER_RUNNING);
server_set_status_nolock(database->server, SERVER_RUNNING);
monitor_set_pending_status(database, SERVER_RUNNING);
/* get server version from current server */
@ -828,7 +828,7 @@ struct graph_node
* https://en.wikipedia.org/wiki/Tarjan%27s_strongly_connected_components_algorithm
*/
static void visit_node(struct graph_node *node, struct graph_node **stack,
int *stacksize, int *index, int *cycle)
int *stacksize, int *index, int *cycle)
{
/** Assign an index to this node */
node->lowest_index = node->index = *index;
@ -1087,7 +1087,7 @@ void do_failover(MYSQL_MONITOR *handle, MONITOR_SERVERS *db)
}
else
{
server_set_status(db->server, SERVER_MAINT);
server_set_status_nolock(db->server, SERVER_MAINT);
monitor_set_pending_status(db, SERVER_MAINT);
}
db = db->next;
@ -1161,6 +1161,7 @@ monitorMain(void *arg)
/* reset num_servers */
num_servers = 0;
lock_monitor_servers(mon);
/* start from the first server in the list */
ptr = mon->databases;
@ -1302,7 +1303,7 @@ monitorMain(void *arg)
* In this case server->status will not be updated from pending_status
* Set the STALE bit for this server in server struct
*/
server_set_status(ptr->server, SERVER_STALE_STATUS | SERVER_MASTER);
server_set_status_nolock(ptr->server, SERVER_STALE_STATUS | SERVER_MASTER);
ptr->pending_status |= SERVER_STALE_STATUS | SERVER_MASTER;
/** Log the message only if the master server didn't have
@ -1451,6 +1452,7 @@ monitorMain(void *arg)
}
mon_hangup_failed_servers(mon);
release_monitor_servers(mon);
} /*< while (1) */
}
@ -1841,7 +1843,7 @@ static MONITOR_SERVERS *get_replication_tree(MONITOR *mon, int num_servers)
current->node_id);
master->server->depth = current->depth - 1;
if(handle->master && master->server->depth < handle->master->server->depth)
if (handle->master && master->server->depth < handle->master->server->depth)
{
/** A master with a lower depth was found, remove
the master status from the previous master. */

View File

@ -234,11 +234,11 @@ monitorDatabase(MONITOR_SERVERS *database, char *defaultUser, char *defaultPassw
connect_result_t rval = mon_connect_to_db(mon, database);
if (rval != MONITOR_CONN_OK)
{
server_clear_status(database->server, SERVER_RUNNING);
server_clear_status_nolock(database->server, SERVER_RUNNING);
if (mysql_errno(database->con) == ER_ACCESS_DENIED_ERROR)
{
server_set_status(database->server, SERVER_AUTH_ERROR);
server_set_status_nolock(database->server, SERVER_AUTH_ERROR);
}
database->server->node_id = -1;
@ -250,9 +250,9 @@ monitorDatabase(MONITOR_SERVERS *database, char *defaultUser, char *defaultPassw
return;
}
server_clear_status(database->server, SERVER_AUTH_ERROR);
server_clear_status_nolock(database->server, SERVER_AUTH_ERROR);
/* If we get this far then we have a working connection */
server_set_status(database->server, SERVER_RUNNING);
server_set_status_nolock(database->server, SERVER_RUNNING);
/* get server version string */
server_string = (char *) mysql_get_server_info(database->con);
@ -313,12 +313,12 @@ monitorDatabase(MONITOR_SERVERS *database, char *defaultUser, char *defaultPassw
if (isjoined)
{
server_set_status(database->server, SERVER_NDB);
server_set_status_nolock(database->server, SERVER_NDB);
database->server->depth = 0;
}
else
{
server_clear_status(database->server, SERVER_NDB);
server_clear_status_nolock(database->server, SERVER_NDB);
database->server->depth = -1;
}
}
@ -373,8 +373,9 @@ monitorMain(void *arg)
continue;
}
nrounds += 1;
ptr = mon->databases;
lock_monitor_servers(mon);
ptr = mon->databases;
while (ptr)
{
ptr->mon_prev_status = ptr->server->status;
@ -414,6 +415,7 @@ monitorMain(void *arg)
}
mon_hangup_failed_servers(mon);
release_monitor_servers(mon);
}
}