Have server status updates applied during monitor loop
Previously, server status changes from MaxAdmin would be set immediately as long as the server lock could be acquired. This meant that it might take several seconds until the next monitor pass is executed. Usually, this was fine but in some situations we would want the monitor to run immediately after the change (MXS-740 and Galera). This patch changes the logic of setting and clearing status bits to a delayed mode: changes are first applied to a "status_pending"-variable, and only once the monitor runs will the setting be applied. To reduce the delay, the monitor now has a flag which is checked during sleep (between short 0.1s naps). If set, the sleep is cut short. If a server is not monitored, the status bits are set directly. There is a small possibility of a race condition: If a monitor is stopped or destroyed before the pending change is applied, the change is forgotten.
This commit is contained in:
@ -200,6 +200,9 @@ struct monitor
|
||||
void *handle; /**< Handle returned from startMonitor */
|
||||
size_t interval; /**< The monitor interval */
|
||||
bool created_online; /**< Whether this monitor was created at runtime */
|
||||
volatile bool server_pending_changes;
|
||||
/**< Are there any pending changes to a server?
|
||||
* If yes, the next monitor loop starts early. */
|
||||
struct monitor *next; /**< Next monitor in the linked list */
|
||||
};
|
||||
|
||||
@ -237,7 +240,8 @@ void mon_log_connect_error(MONITOR_SERVERS* database, connect_result_t rval);
|
||||
void mon_log_state_change(MONITOR_SERVERS *ptr);
|
||||
void lock_monitor_servers(MONITOR *monitor);
|
||||
void release_monitor_servers(MONITOR *monitor);
|
||||
|
||||
void servers_status_pending_to_current(MONITOR *monitor);
|
||||
void servers_status_current_to_pending(MONITOR *monitor);
|
||||
/**
|
||||
* @brief Hangup connections to failed servers
|
||||
*
|
||||
@ -274,10 +278,10 @@ bool monitor_serialize_servers(const MONITOR *monitor);
|
||||
bool monitor_serialize(const MONITOR *monitor);
|
||||
|
||||
/**
|
||||
* Check if a monitor uses @c servers
|
||||
* Check if a server is being monitored and return the monitor.
|
||||
* @param server Server that is queried
|
||||
* @return True if server is used by at least one monitor
|
||||
* @return The monitor watching this server, or NULL if not monitored
|
||||
*/
|
||||
bool monitor_server_in_use(const SERVER *server);
|
||||
MONITOR* monitor_server_in_use(const SERVER *server);
|
||||
|
||||
MXS_END_DECLS
|
||||
|
@ -96,6 +96,7 @@ typedef struct server
|
||||
char *auth_options; /**< Authenticator options */
|
||||
SSL_LISTENER *server_ssl; /**< SSL data structure for server, if any */
|
||||
unsigned int status; /**< Status flag bitmap for the server */
|
||||
unsigned int status_pending; /**< Pending status flag bitmap for the server */
|
||||
char monuser[MAX_SERVER_MONUSER_LEN]; /**< User name to use to monitor the db */
|
||||
char monpw[MAX_SERVER_MONPW_LEN]; /**< Password to use to monitor the db */
|
||||
SERVER_STATS stats; /**< The server statistics */
|
||||
|
@ -105,6 +105,7 @@ monitor_alloc(char *name, char *module)
|
||||
mon->interval = MONITOR_INTERVAL;
|
||||
mon->parameters = NULL;
|
||||
mon->created_online = false;
|
||||
mon->server_pending_changes = false;
|
||||
spinlock_init(&mon->lock);
|
||||
spinlock_acquire(&monLock);
|
||||
mon->next = allMonitors;
|
||||
@ -1248,9 +1249,9 @@ void mon_log_state_change(MONITOR_SERVERS *ptr)
|
||||
MXS_FREE(next);
|
||||
}
|
||||
|
||||
bool monitor_server_in_use(const SERVER *server)
|
||||
MONITOR* monitor_server_in_use(const SERVER *server)
|
||||
{
|
||||
bool rval = false;
|
||||
MONITOR *rval = NULL;
|
||||
|
||||
spinlock_acquire(&monLock);
|
||||
|
||||
@ -1262,7 +1263,7 @@ bool monitor_server_in_use(const SERVER *server)
|
||||
{
|
||||
if (db->server == server)
|
||||
{
|
||||
rval = true;
|
||||
rval = mon;
|
||||
}
|
||||
}
|
||||
|
||||
@ -1439,7 +1440,7 @@ void mon_hangup_failed_servers(MONITOR *monitor)
|
||||
}
|
||||
}
|
||||
/**
|
||||
* Acquire locks on all servers monitored my this monitor. There should
|
||||
* Acquire locks on all servers monitored by this monitor. There should
|
||||
* only be max 1 monitor per server.
|
||||
* @param monitor The target monitor
|
||||
*/
|
||||
@ -1452,6 +1453,11 @@ void lock_monitor_servers(MONITOR *monitor)
|
||||
ptr = ptr->next;
|
||||
}
|
||||
}
|
||||
/**
|
||||
* Release locks on all servers monitored by this monitor. There should
|
||||
* only be max 1 monitor per server.
|
||||
* @param monitor The target monitor
|
||||
*/
|
||||
void release_monitor_servers(MONITOR *monitor)
|
||||
{
|
||||
MONITOR_SERVERS *ptr = monitor->databases;
|
||||
@ -1461,3 +1467,34 @@ void release_monitor_servers(MONITOR *monitor)
|
||||
ptr = ptr->next;
|
||||
}
|
||||
}
|
||||
/**
|
||||
* Sets the current status of all servers monitored by this monitor to
|
||||
* the pending status. This should only be called at the beginning of
|
||||
* a monitor loop, after the servers are locked.
|
||||
* @param monitor The target monitor
|
||||
*/
|
||||
void servers_status_pending_to_current(MONITOR *monitor)
|
||||
{
|
||||
MONITOR_SERVERS *ptr = monitor->databases;
|
||||
while (ptr)
|
||||
{
|
||||
ptr->server->status = ptr->server->status_pending;
|
||||
ptr = ptr->next;
|
||||
}
|
||||
monitor->server_pending_changes = false;
|
||||
}
|
||||
/**
|
||||
* Sets the pending status of all servers monitored by this monitor to
|
||||
* the current status. This should only be called at the end of
|
||||
* a monitor loop, before the servers are released.
|
||||
* @param monitor The target monitor
|
||||
*/
|
||||
void servers_status_current_to_pending(MONITOR *monitor)
|
||||
{
|
||||
MONITOR_SERVERS *ptr = monitor->databases;
|
||||
while (ptr)
|
||||
{
|
||||
ptr->server->status_pending = ptr->server->status;
|
||||
ptr = ptr->next;
|
||||
}
|
||||
}
|
@ -121,6 +121,7 @@ SERVER* server_alloc(const char *name, const char *address, unsigned short port,
|
||||
server->auth_options = my_auth_options;
|
||||
server->port = port;
|
||||
server->status = SERVER_RUNNING;
|
||||
server->status_pending = SERVER_RUNNING;
|
||||
server->node_id = -1;
|
||||
server->rlag = -2;
|
||||
server->master_id = -1;
|
||||
@ -1264,28 +1265,58 @@ SERVER* server_find_destroyed(const char *name, const char *protocol,
|
||||
/**
|
||||
* Set a status bit in the server under a lock. This ensures synchronization
|
||||
* with the server monitor thread. Calling this inside the monitor will likely
|
||||
* cause a deadlock.
|
||||
* cause a deadlock. If the server is monitored, only set the pending bit.
|
||||
*
|
||||
* @param server The server to update
|
||||
* @param bit The bit to set for the server
|
||||
*/
|
||||
void server_set_status(SERVER *server, int bit)
|
||||
{
|
||||
/* First check if the server is monitored. This isn't done under a lock
|
||||
* but the race condition cannot cause significant harm. Monitors are never
|
||||
* freed so the pointer stays valid.
|
||||
*/
|
||||
MONITOR *mon = monitor_server_in_use(server);
|
||||
spinlock_acquire(&server->lock);
|
||||
server_set_status_nolock(server, bit);
|
||||
if (mon)
|
||||
{
|
||||
/* Set a pending status bit. It will be activated on the next monitor
|
||||
* loop. Also set a flag so the next loop happens sooner.
|
||||
*/
|
||||
server->status_pending |= bit;
|
||||
mon->server_pending_changes = true;
|
||||
}
|
||||
else
|
||||
{
|
||||
/* Set the bit directly */
|
||||
server_set_status_nolock(server, bit);
|
||||
}
|
||||
spinlock_release(&server->lock);
|
||||
}
|
||||
/**
|
||||
* Clear a status bit in the server under a lock. This ensures synchronization
|
||||
* with the server monitor thread. Calling this inside the monitor will likely
|
||||
* cause a deadlock.
|
||||
* cause a deadlock. If the server is monitored, only clear the pending bit.
|
||||
*
|
||||
* @param server The server to update
|
||||
* @param bit The bit to clear for the server
|
||||
*/
|
||||
void server_clear_status(SERVER *server, int bit)
|
||||
{
|
||||
MONITOR *mon = monitor_server_in_use(server);
|
||||
spinlock_acquire(&server->lock);
|
||||
server_clear_status_nolock(server, bit);
|
||||
if (mon)
|
||||
{
|
||||
/* Clear a pending status bit. It will be activated on the next monitor
|
||||
* loop. Also set a flag so the next loop happens sooner.
|
||||
*/
|
||||
server->status_pending &= ~bit;
|
||||
mon->server_pending_changes = true;
|
||||
}
|
||||
else
|
||||
{
|
||||
/* Clear bit directly */
|
||||
server_clear_status_nolock(server, bit);
|
||||
}
|
||||
spinlock_release(&server->lock);
|
||||
}
|
||||
|
@ -188,6 +188,8 @@ monitorMain(void *arg)
|
||||
while (!handle->shutdown)
|
||||
{
|
||||
lock_monitor_servers(monitor);
|
||||
servers_status_pending_to_current(monitor);
|
||||
|
||||
for (MONITOR_SERVERS *ptr = monitor->databases; ptr; ptr = ptr->next)
|
||||
{
|
||||
update_server_status(monitor, ptr);
|
||||
@ -221,12 +223,18 @@ monitorMain(void *arg)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
servers_status_current_to_pending(monitor);
|
||||
release_monitor_servers(monitor);
|
||||
|
||||
/** Sleep until the next monitoring interval */
|
||||
int ms = 0;
|
||||
while (ms < monitor->interval && !handle->shutdown)
|
||||
{
|
||||
if (monitor->server_pending_changes)
|
||||
{
|
||||
// Admin has changed something, skip sleep
|
||||
break;
|
||||
}
|
||||
thread_millisleep(MON_BASE_INTERVAL_MS);
|
||||
ms += MON_BASE_INTERVAL_MS;
|
||||
}
|
||||
|
@ -482,7 +482,9 @@ monitorMain(void *arg)
|
||||
* interval, then skip monitoring checks. Excluding the first
|
||||
* round.
|
||||
*/
|
||||
if (nrounds != 0 && ((nrounds * MON_BASE_INTERVAL_MS) % mon->interval) >= MON_BASE_INTERVAL_MS)
|
||||
if (nrounds != 0 &&
|
||||
(((nrounds * MON_BASE_INTERVAL_MS) % mon->interval) >=
|
||||
MON_BASE_INTERVAL_MS) && (!mon->server_pending_changes))
|
||||
{
|
||||
nrounds += 1;
|
||||
continue;
|
||||
@ -494,6 +496,7 @@ monitorMain(void *arg)
|
||||
is_cluster = 0;
|
||||
|
||||
lock_monitor_servers(mon);
|
||||
servers_status_pending_to_current(mon);
|
||||
|
||||
ptr = mon->databases;
|
||||
while (ptr)
|
||||
@ -619,6 +622,7 @@ monitorMain(void *arg)
|
||||
}
|
||||
|
||||
mon_hangup_failed_servers(mon);
|
||||
servers_status_current_to_pending(mon);
|
||||
release_monitor_servers(mon);
|
||||
}
|
||||
}
|
||||
|
@ -550,8 +550,8 @@ monitorMain(void *arg)
|
||||
* round.
|
||||
*/
|
||||
if (nrounds != 0 &&
|
||||
((nrounds * MON_BASE_INTERVAL_MS) % mon->interval) >=
|
||||
MON_BASE_INTERVAL_MS)
|
||||
(((nrounds * MON_BASE_INTERVAL_MS) % mon->interval) >=
|
||||
MON_BASE_INTERVAL_MS) && (!mon->server_pending_changes))
|
||||
{
|
||||
nrounds += 1;
|
||||
continue;
|
||||
@ -559,6 +559,8 @@ monitorMain(void *arg)
|
||||
nrounds += 1;
|
||||
|
||||
lock_monitor_servers(mon);
|
||||
servers_status_pending_to_current(mon);
|
||||
|
||||
/* start from the first server in the list */
|
||||
ptr = mon->databases;
|
||||
|
||||
@ -643,6 +645,7 @@ monitorMain(void *arg)
|
||||
}
|
||||
|
||||
mon_hangup_failed_servers(mon);
|
||||
servers_status_current_to_pending(mon);
|
||||
release_monitor_servers(mon);
|
||||
}
|
||||
}
|
||||
|
@ -1151,8 +1151,8 @@ monitorMain(void *arg)
|
||||
* round.
|
||||
*/
|
||||
if (nrounds != 0 &&
|
||||
((nrounds * MON_BASE_INTERVAL_MS) % mon->interval) >=
|
||||
MON_BASE_INTERVAL_MS)
|
||||
(((nrounds * MON_BASE_INTERVAL_MS) % mon->interval) >=
|
||||
MON_BASE_INTERVAL_MS) && (!mon->server_pending_changes))
|
||||
{
|
||||
nrounds += 1;
|
||||
continue;
|
||||
@ -1162,6 +1162,8 @@ monitorMain(void *arg)
|
||||
num_servers = 0;
|
||||
|
||||
lock_monitor_servers(mon);
|
||||
servers_status_pending_to_current(mon);
|
||||
|
||||
/* start from the first server in the list */
|
||||
ptr = mon->databases;
|
||||
|
||||
@ -1452,6 +1454,7 @@ monitorMain(void *arg)
|
||||
}
|
||||
|
||||
mon_hangup_failed_servers(mon);
|
||||
servers_status_current_to_pending(mon);
|
||||
release_monitor_servers(mon);
|
||||
} /*< while (1) */
|
||||
}
|
||||
|
@ -375,6 +375,8 @@ monitorMain(void *arg)
|
||||
nrounds += 1;
|
||||
|
||||
lock_monitor_servers(mon);
|
||||
servers_status_pending_to_current(mon);
|
||||
|
||||
ptr = mon->databases;
|
||||
while (ptr)
|
||||
{
|
||||
@ -415,6 +417,7 @@ monitorMain(void *arg)
|
||||
}
|
||||
|
||||
mon_hangup_failed_servers(mon);
|
||||
servers_status_current_to_pending(mon);
|
||||
release_monitor_servers(mon);
|
||||
}
|
||||
}
|
||||
|
Reference in New Issue
Block a user