Have server status updates applied during monitor loop
Previously, server status changes from MaxAdmin would be set immediately as long as the server lock could be acquired. This meant that it might take several seconds until the next monitor pass is executed. Usually, this was fine but in some situations we would want the monitor to run immediately after the change (MXS-740 and Galera). This patch changes the logic of setting and clearing status bits to a delayed mode: changes are first applied to a "status_pending"-variable, and only once the monitor runs will the setting be applied. To reduce the delay, the monitor now has a flag which is checked during sleep (between short 0.1s naps). If set, the sleep is cut short. If a server is not monitored, the status bits are set directly. There is a small possibility of a race condition: If a monitor is stopped or destroyed before the pending change is applied, the change is forgotten.
This commit is contained in:
@ -105,6 +105,7 @@ monitor_alloc(char *name, char *module)
|
||||
mon->interval = MONITOR_INTERVAL;
|
||||
mon->parameters = NULL;
|
||||
mon->created_online = false;
|
||||
mon->server_pending_changes = false;
|
||||
spinlock_init(&mon->lock);
|
||||
spinlock_acquire(&monLock);
|
||||
mon->next = allMonitors;
|
||||
@ -1248,9 +1249,9 @@ void mon_log_state_change(MONITOR_SERVERS *ptr)
|
||||
MXS_FREE(next);
|
||||
}
|
||||
|
||||
bool monitor_server_in_use(const SERVER *server)
|
||||
MONITOR* monitor_server_in_use(const SERVER *server)
|
||||
{
|
||||
bool rval = false;
|
||||
MONITOR *rval = NULL;
|
||||
|
||||
spinlock_acquire(&monLock);
|
||||
|
||||
@ -1262,7 +1263,7 @@ bool monitor_server_in_use(const SERVER *server)
|
||||
{
|
||||
if (db->server == server)
|
||||
{
|
||||
rval = true;
|
||||
rval = mon;
|
||||
}
|
||||
}
|
||||
|
||||
@ -1439,7 +1440,7 @@ void mon_hangup_failed_servers(MONITOR *monitor)
|
||||
}
|
||||
}
|
||||
/**
|
||||
* Acquire locks on all servers monitored my this monitor. There should
|
||||
* Acquire locks on all servers monitored by this monitor. There should
|
||||
* only be max 1 monitor per server.
|
||||
* @param monitor The target monitor
|
||||
*/
|
||||
@ -1452,6 +1453,11 @@ void lock_monitor_servers(MONITOR *monitor)
|
||||
ptr = ptr->next;
|
||||
}
|
||||
}
|
||||
/**
|
||||
* Release locks on all servers monitored by this monitor. There should
|
||||
* only be max 1 monitor per server.
|
||||
* @param monitor The target monitor
|
||||
*/
|
||||
void release_monitor_servers(MONITOR *monitor)
|
||||
{
|
||||
MONITOR_SERVERS *ptr = monitor->databases;
|
||||
@ -1460,4 +1466,35 @@ void release_monitor_servers(MONITOR *monitor)
|
||||
spinlock_release(&ptr->server->lock);
|
||||
ptr = ptr->next;
|
||||
}
|
||||
}
|
||||
/**
|
||||
* Sets the current status of all servers monitored by this monitor to
|
||||
* the pending status. This should only be called at the beginning of
|
||||
* a monitor loop, after the servers are locked.
|
||||
* @param monitor The target monitor
|
||||
*/
|
||||
void servers_status_pending_to_current(MONITOR *monitor)
|
||||
{
|
||||
MONITOR_SERVERS *ptr = monitor->databases;
|
||||
while (ptr)
|
||||
{
|
||||
ptr->server->status = ptr->server->status_pending;
|
||||
ptr = ptr->next;
|
||||
}
|
||||
monitor->server_pending_changes = false;
|
||||
}
|
||||
/**
|
||||
* Sets the pending status of all servers monitored by this monitor to
|
||||
* the current status. This should only be called at the end of
|
||||
* a monitor loop, before the servers are released.
|
||||
* @param monitor The target monitor
|
||||
*/
|
||||
void servers_status_current_to_pending(MONITOR *monitor)
|
||||
{
|
||||
MONITOR_SERVERS *ptr = monitor->databases;
|
||||
while (ptr)
|
||||
{
|
||||
ptr->server->status_pending = ptr->server->status;
|
||||
ptr = ptr->next;
|
||||
}
|
||||
}
|
@ -121,6 +121,7 @@ SERVER* server_alloc(const char *name, const char *address, unsigned short port,
|
||||
server->auth_options = my_auth_options;
|
||||
server->port = port;
|
||||
server->status = SERVER_RUNNING;
|
||||
server->status_pending = SERVER_RUNNING;
|
||||
server->node_id = -1;
|
||||
server->rlag = -2;
|
||||
server->master_id = -1;
|
||||
@ -1264,28 +1265,58 @@ SERVER* server_find_destroyed(const char *name, const char *protocol,
|
||||
/**
|
||||
* Set a status bit in the server under a lock. This ensures synchronization
|
||||
* with the server monitor thread. Calling this inside the monitor will likely
|
||||
* cause a deadlock.
|
||||
* cause a deadlock. If the server is monitored, only set the pending bit.
|
||||
*
|
||||
* @param server The server to update
|
||||
* @param bit The bit to set for the server
|
||||
*/
|
||||
void server_set_status(SERVER *server, int bit)
|
||||
{
|
||||
/* First check if the server is monitored. This isn't done under a lock
|
||||
* but the race condition cannot cause significant harm. Monitors are never
|
||||
* freed so the pointer stays valid.
|
||||
*/
|
||||
MONITOR *mon = monitor_server_in_use(server);
|
||||
spinlock_acquire(&server->lock);
|
||||
server_set_status_nolock(server, bit);
|
||||
if (mon)
|
||||
{
|
||||
/* Set a pending status bit. It will be activated on the next monitor
|
||||
* loop. Also set a flag so the next loop happens sooner.
|
||||
*/
|
||||
server->status_pending |= bit;
|
||||
mon->server_pending_changes = true;
|
||||
}
|
||||
else
|
||||
{
|
||||
/* Set the bit directly */
|
||||
server_set_status_nolock(server, bit);
|
||||
}
|
||||
spinlock_release(&server->lock);
|
||||
}
|
||||
/**
|
||||
* Clear a status bit in the server under a lock. This ensures synchronization
|
||||
* with the server monitor thread. Calling this inside the monitor will likely
|
||||
* cause a deadlock.
|
||||
* cause a deadlock. If the server is monitored, only clear the pending bit.
|
||||
*
|
||||
* @param server The server to update
|
||||
* @param bit The bit to clear for the server
|
||||
*/
|
||||
void server_clear_status(SERVER *server, int bit)
|
||||
{
|
||||
MONITOR *mon = monitor_server_in_use(server);
|
||||
spinlock_acquire(&server->lock);
|
||||
server_clear_status_nolock(server, bit);
|
||||
if (mon)
|
||||
{
|
||||
/* Clear a pending status bit. It will be activated on the next monitor
|
||||
* loop. Also set a flag so the next loop happens sooner.
|
||||
*/
|
||||
server->status_pending &= ~bit;
|
||||
mon->server_pending_changes = true;
|
||||
}
|
||||
else
|
||||
{
|
||||
/* Clear bit directly */
|
||||
server_clear_status_nolock(server, bit);
|
||||
}
|
||||
spinlock_release(&server->lock);
|
||||
}
|
||||
|
@ -188,6 +188,8 @@ monitorMain(void *arg)
|
||||
while (!handle->shutdown)
|
||||
{
|
||||
lock_monitor_servers(monitor);
|
||||
servers_status_pending_to_current(monitor);
|
||||
|
||||
for (MONITOR_SERVERS *ptr = monitor->databases; ptr; ptr = ptr->next)
|
||||
{
|
||||
update_server_status(monitor, ptr);
|
||||
@ -221,12 +223,18 @@ monitorMain(void *arg)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
servers_status_current_to_pending(monitor);
|
||||
release_monitor_servers(monitor);
|
||||
|
||||
/** Sleep until the next monitoring interval */
|
||||
int ms = 0;
|
||||
while (ms < monitor->interval && !handle->shutdown)
|
||||
{
|
||||
if (monitor->server_pending_changes)
|
||||
{
|
||||
// Admin has changed something, skip sleep
|
||||
break;
|
||||
}
|
||||
thread_millisleep(MON_BASE_INTERVAL_MS);
|
||||
ms += MON_BASE_INTERVAL_MS;
|
||||
}
|
||||
|
@ -482,7 +482,9 @@ monitorMain(void *arg)
|
||||
* interval, then skip monitoring checks. Excluding the first
|
||||
* round.
|
||||
*/
|
||||
if (nrounds != 0 && ((nrounds * MON_BASE_INTERVAL_MS) % mon->interval) >= MON_BASE_INTERVAL_MS)
|
||||
if (nrounds != 0 &&
|
||||
(((nrounds * MON_BASE_INTERVAL_MS) % mon->interval) >=
|
||||
MON_BASE_INTERVAL_MS) && (!mon->server_pending_changes))
|
||||
{
|
||||
nrounds += 1;
|
||||
continue;
|
||||
@ -494,6 +496,7 @@ monitorMain(void *arg)
|
||||
is_cluster = 0;
|
||||
|
||||
lock_monitor_servers(mon);
|
||||
servers_status_pending_to_current(mon);
|
||||
|
||||
ptr = mon->databases;
|
||||
while (ptr)
|
||||
@ -619,6 +622,7 @@ monitorMain(void *arg)
|
||||
}
|
||||
|
||||
mon_hangup_failed_servers(mon);
|
||||
servers_status_current_to_pending(mon);
|
||||
release_monitor_servers(mon);
|
||||
}
|
||||
}
|
||||
|
@ -550,8 +550,8 @@ monitorMain(void *arg)
|
||||
* round.
|
||||
*/
|
||||
if (nrounds != 0 &&
|
||||
((nrounds * MON_BASE_INTERVAL_MS) % mon->interval) >=
|
||||
MON_BASE_INTERVAL_MS)
|
||||
(((nrounds * MON_BASE_INTERVAL_MS) % mon->interval) >=
|
||||
MON_BASE_INTERVAL_MS) && (!mon->server_pending_changes))
|
||||
{
|
||||
nrounds += 1;
|
||||
continue;
|
||||
@ -559,6 +559,8 @@ monitorMain(void *arg)
|
||||
nrounds += 1;
|
||||
|
||||
lock_monitor_servers(mon);
|
||||
servers_status_pending_to_current(mon);
|
||||
|
||||
/* start from the first server in the list */
|
||||
ptr = mon->databases;
|
||||
|
||||
@ -643,6 +645,7 @@ monitorMain(void *arg)
|
||||
}
|
||||
|
||||
mon_hangup_failed_servers(mon);
|
||||
servers_status_current_to_pending(mon);
|
||||
release_monitor_servers(mon);
|
||||
}
|
||||
}
|
||||
|
@ -1151,8 +1151,8 @@ monitorMain(void *arg)
|
||||
* round.
|
||||
*/
|
||||
if (nrounds != 0 &&
|
||||
((nrounds * MON_BASE_INTERVAL_MS) % mon->interval) >=
|
||||
MON_BASE_INTERVAL_MS)
|
||||
(((nrounds * MON_BASE_INTERVAL_MS) % mon->interval) >=
|
||||
MON_BASE_INTERVAL_MS) && (!mon->server_pending_changes))
|
||||
{
|
||||
nrounds += 1;
|
||||
continue;
|
||||
@ -1162,6 +1162,8 @@ monitorMain(void *arg)
|
||||
num_servers = 0;
|
||||
|
||||
lock_monitor_servers(mon);
|
||||
servers_status_pending_to_current(mon);
|
||||
|
||||
/* start from the first server in the list */
|
||||
ptr = mon->databases;
|
||||
|
||||
@ -1452,6 +1454,7 @@ monitorMain(void *arg)
|
||||
}
|
||||
|
||||
mon_hangup_failed_servers(mon);
|
||||
servers_status_current_to_pending(mon);
|
||||
release_monitor_servers(mon);
|
||||
} /*< while (1) */
|
||||
}
|
||||
|
@ -375,6 +375,8 @@ monitorMain(void *arg)
|
||||
nrounds += 1;
|
||||
|
||||
lock_monitor_servers(mon);
|
||||
servers_status_pending_to_current(mon);
|
||||
|
||||
ptr = mon->databases;
|
||||
while (ptr)
|
||||
{
|
||||
@ -415,6 +417,7 @@ monitorMain(void *arg)
|
||||
}
|
||||
|
||||
mon_hangup_failed_servers(mon);
|
||||
servers_status_current_to_pending(mon);
|
||||
release_monitor_servers(mon);
|
||||
}
|
||||
}
|
||||
|
Reference in New Issue
Block a user