From 7e9db7ed0c0311dc3cf93ff85870e5de59680364 Mon Sep 17 00:00:00 2001 From: Esa Korhonen Date: Wed, 14 Dec 2016 17:50:17 +0200 Subject: [PATCH] Have server status updates applied during monitor loop Previously, server status changes from MaxAdmin would be set immediately as long as the server lock could be acquired. This meant that it might take several seconds until the next monitor pass is executed. Usually, this was fine but in some situations we would want the monitor to run immediately after the change (MXS-740 and Galera). This patch changes the logic of setting and clearing status bits to a delayed mode: changes are first applied to a "status_pending"-variable, and only once the monitor runs will the setting be applied. To reduce the delay, the monitor now has a flag which is checked during sleep (between short 0.1s naps). If set, the sleep is cut short. If a server is not monitored, the status bits are set directly. There is a small possibility of a race condition: If a monitor is stopped or destroyed before the pending change is applied, the change is forgotten. --- include/maxscale/monitor.h | 12 +++-- include/maxscale/server.h | 1 + server/core/monitor.c | 45 +++++++++++++++++-- server/core/server.c | 39 ++++++++++++++-- server/modules/monitor/auroramon/auroramon.c | 10 ++++- server/modules/monitor/galeramon/galeramon.c | 6 ++- server/modules/monitor/mmmon/mmmon.c | 7 ++- server/modules/monitor/mysqlmon/mysql_mon.c | 7 ++- .../monitor/ndbclustermon/ndbclustermon.c | 3 ++ 9 files changed, 112 insertions(+), 18 deletions(-) diff --git a/include/maxscale/monitor.h b/include/maxscale/monitor.h index 6cf5bb225..4b3a63092 100644 --- a/include/maxscale/monitor.h +++ b/include/maxscale/monitor.h @@ -200,6 +200,9 @@ struct monitor void *handle; /**< Handle returned from startMonitor */ size_t interval; /**< The monitor interval */ bool created_online; /**< Whether this monitor was created at runtime */ + volatile bool server_pending_changes; + /**< Are there any pending changes to a server? + * If yes, the next monitor loop starts early. */ struct monitor *next; /**< Next monitor in the linked list */ }; @@ -237,7 +240,8 @@ void mon_log_connect_error(MONITOR_SERVERS* database, connect_result_t rval); void mon_log_state_change(MONITOR_SERVERS *ptr); void lock_monitor_servers(MONITOR *monitor); void release_monitor_servers(MONITOR *monitor); - +void servers_status_pending_to_current(MONITOR *monitor); +void servers_status_current_to_pending(MONITOR *monitor); /** * @brief Hangup connections to failed servers * @@ -274,10 +278,10 @@ bool monitor_serialize_servers(const MONITOR *monitor); bool monitor_serialize(const MONITOR *monitor); /** - * Check if a monitor uses @c servers + * Check if a server is being monitored and return the monitor. * @param server Server that is queried - * @return True if server is used by at least one monitor + * @return The monitor watching this server, or NULL if not monitored */ -bool monitor_server_in_use(const SERVER *server); +MONITOR* monitor_server_in_use(const SERVER *server); MXS_END_DECLS diff --git a/include/maxscale/server.h b/include/maxscale/server.h index 7f04f9c31..1117cd167 100644 --- a/include/maxscale/server.h +++ b/include/maxscale/server.h @@ -96,6 +96,7 @@ typedef struct server char *auth_options; /**< Authenticator options */ SSL_LISTENER *server_ssl; /**< SSL data structure for server, if any */ unsigned int status; /**< Status flag bitmap for the server */ + unsigned int status_pending; /**< Pending status flag bitmap for the server */ char monuser[MAX_SERVER_MONUSER_LEN]; /**< User name to use to monitor the db */ char monpw[MAX_SERVER_MONPW_LEN]; /**< Password to use to monitor the db */ SERVER_STATS stats; /**< The server statistics */ diff --git a/server/core/monitor.c b/server/core/monitor.c index f75f8fe23..9dbf38461 100644 --- a/server/core/monitor.c +++ b/server/core/monitor.c @@ -105,6 +105,7 @@ monitor_alloc(char *name, char *module) mon->interval = MONITOR_INTERVAL; mon->parameters = NULL; mon->created_online = false; + mon->server_pending_changes = false; spinlock_init(&mon->lock); spinlock_acquire(&monLock); mon->next = allMonitors; @@ -1248,9 +1249,9 @@ void mon_log_state_change(MONITOR_SERVERS *ptr) MXS_FREE(next); } -bool monitor_server_in_use(const SERVER *server) +MONITOR* monitor_server_in_use(const SERVER *server) { - bool rval = false; + MONITOR *rval = NULL; spinlock_acquire(&monLock); @@ -1262,7 +1263,7 @@ bool monitor_server_in_use(const SERVER *server) { if (db->server == server) { - rval = true; + rval = mon; } } @@ -1439,7 +1440,7 @@ void mon_hangup_failed_servers(MONITOR *monitor) } } /** - * Acquire locks on all servers monitored my this monitor. There should + * Acquire locks on all servers monitored by this monitor. There should * only be max 1 monitor per server. * @param monitor The target monitor */ @@ -1452,6 +1453,11 @@ void lock_monitor_servers(MONITOR *monitor) ptr = ptr->next; } } +/** + * Release locks on all servers monitored by this monitor. There should + * only be max 1 monitor per server. + * @param monitor The target monitor + */ void release_monitor_servers(MONITOR *monitor) { MONITOR_SERVERS *ptr = monitor->databases; @@ -1460,4 +1466,35 @@ void release_monitor_servers(MONITOR *monitor) spinlock_release(&ptr->server->lock); ptr = ptr->next; } +} +/** + * Sets the current status of all servers monitored by this monitor to + * the pending status. This should only be called at the beginning of + * a monitor loop, after the servers are locked. + * @param monitor The target monitor + */ +void servers_status_pending_to_current(MONITOR *monitor) +{ + MONITOR_SERVERS *ptr = monitor->databases; + while (ptr) + { + ptr->server->status = ptr->server->status_pending; + ptr = ptr->next; + } + monitor->server_pending_changes = false; +} +/** + * Sets the pending status of all servers monitored by this monitor to + * the current status. This should only be called at the end of + * a monitor loop, before the servers are released. + * @param monitor The target monitor + */ +void servers_status_current_to_pending(MONITOR *monitor) +{ + MONITOR_SERVERS *ptr = monitor->databases; + while (ptr) + { + ptr->server->status_pending = ptr->server->status; + ptr = ptr->next; + } } \ No newline at end of file diff --git a/server/core/server.c b/server/core/server.c index f9a57ff15..4ea174223 100644 --- a/server/core/server.c +++ b/server/core/server.c @@ -121,6 +121,7 @@ SERVER* server_alloc(const char *name, const char *address, unsigned short port, server->auth_options = my_auth_options; server->port = port; server->status = SERVER_RUNNING; + server->status_pending = SERVER_RUNNING; server->node_id = -1; server->rlag = -2; server->master_id = -1; @@ -1264,28 +1265,58 @@ SERVER* server_find_destroyed(const char *name, const char *protocol, /** * Set a status bit in the server under a lock. This ensures synchronization * with the server monitor thread. Calling this inside the monitor will likely - * cause a deadlock. + * cause a deadlock. If the server is monitored, only set the pending bit. * * @param server The server to update * @param bit The bit to set for the server */ void server_set_status(SERVER *server, int bit) { + /* First check if the server is monitored. This isn't done under a lock + * but the race condition cannot cause significant harm. Monitors are never + * freed so the pointer stays valid. + */ + MONITOR *mon = monitor_server_in_use(server); spinlock_acquire(&server->lock); - server_set_status_nolock(server, bit); + if (mon) + { + /* Set a pending status bit. It will be activated on the next monitor + * loop. Also set a flag so the next loop happens sooner. + */ + server->status_pending |= bit; + mon->server_pending_changes = true; + } + else + { + /* Set the bit directly */ + server_set_status_nolock(server, bit); + } spinlock_release(&server->lock); } /** * Clear a status bit in the server under a lock. This ensures synchronization * with the server monitor thread. Calling this inside the monitor will likely - * cause a deadlock. + * cause a deadlock. If the server is monitored, only clear the pending bit. * * @param server The server to update * @param bit The bit to clear for the server */ void server_clear_status(SERVER *server, int bit) { + MONITOR *mon = monitor_server_in_use(server); spinlock_acquire(&server->lock); - server_clear_status_nolock(server, bit); + if (mon) + { + /* Clear a pending status bit. It will be activated on the next monitor + * loop. Also set a flag so the next loop happens sooner. + */ + server->status_pending &= ~bit; + mon->server_pending_changes = true; + } + else + { + /* Clear bit directly */ + server_clear_status_nolock(server, bit); + } spinlock_release(&server->lock); } diff --git a/server/modules/monitor/auroramon/auroramon.c b/server/modules/monitor/auroramon/auroramon.c index c3c0835ec..5e34b9d6a 100644 --- a/server/modules/monitor/auroramon/auroramon.c +++ b/server/modules/monitor/auroramon/auroramon.c @@ -188,6 +188,8 @@ monitorMain(void *arg) while (!handle->shutdown) { lock_monitor_servers(monitor); + servers_status_pending_to_current(monitor); + for (MONITOR_SERVERS *ptr = monitor->databases; ptr; ptr = ptr->next) { update_server_status(monitor, ptr); @@ -221,12 +223,18 @@ monitorMain(void *arg) } } } - + servers_status_current_to_pending(monitor); release_monitor_servers(monitor); + /** Sleep until the next monitoring interval */ int ms = 0; while (ms < monitor->interval && !handle->shutdown) { + if (monitor->server_pending_changes) + { + // Admin has changed something, skip sleep + break; + } thread_millisleep(MON_BASE_INTERVAL_MS); ms += MON_BASE_INTERVAL_MS; } diff --git a/server/modules/monitor/galeramon/galeramon.c b/server/modules/monitor/galeramon/galeramon.c index 169161de1..21237f64e 100644 --- a/server/modules/monitor/galeramon/galeramon.c +++ b/server/modules/monitor/galeramon/galeramon.c @@ -482,7 +482,9 @@ monitorMain(void *arg) * interval, then skip monitoring checks. Excluding the first * round. */ - if (nrounds != 0 && ((nrounds * MON_BASE_INTERVAL_MS) % mon->interval) >= MON_BASE_INTERVAL_MS) + if (nrounds != 0 && + (((nrounds * MON_BASE_INTERVAL_MS) % mon->interval) >= + MON_BASE_INTERVAL_MS) && (!mon->server_pending_changes)) { nrounds += 1; continue; @@ -494,6 +496,7 @@ monitorMain(void *arg) is_cluster = 0; lock_monitor_servers(mon); + servers_status_pending_to_current(mon); ptr = mon->databases; while (ptr) @@ -619,6 +622,7 @@ monitorMain(void *arg) } mon_hangup_failed_servers(mon); + servers_status_current_to_pending(mon); release_monitor_servers(mon); } } diff --git a/server/modules/monitor/mmmon/mmmon.c b/server/modules/monitor/mmmon/mmmon.c index 1989429a7..8d22a1e13 100644 --- a/server/modules/monitor/mmmon/mmmon.c +++ b/server/modules/monitor/mmmon/mmmon.c @@ -550,8 +550,8 @@ monitorMain(void *arg) * round. */ if (nrounds != 0 && - ((nrounds * MON_BASE_INTERVAL_MS) % mon->interval) >= - MON_BASE_INTERVAL_MS) + (((nrounds * MON_BASE_INTERVAL_MS) % mon->interval) >= + MON_BASE_INTERVAL_MS) && (!mon->server_pending_changes)) { nrounds += 1; continue; @@ -559,6 +559,8 @@ monitorMain(void *arg) nrounds += 1; lock_monitor_servers(mon); + servers_status_pending_to_current(mon); + /* start from the first server in the list */ ptr = mon->databases; @@ -643,6 +645,7 @@ monitorMain(void *arg) } mon_hangup_failed_servers(mon); + servers_status_current_to_pending(mon); release_monitor_servers(mon); } } diff --git a/server/modules/monitor/mysqlmon/mysql_mon.c b/server/modules/monitor/mysqlmon/mysql_mon.c index ac1df9edd..12eff2341 100644 --- a/server/modules/monitor/mysqlmon/mysql_mon.c +++ b/server/modules/monitor/mysqlmon/mysql_mon.c @@ -1151,8 +1151,8 @@ monitorMain(void *arg) * round. */ if (nrounds != 0 && - ((nrounds * MON_BASE_INTERVAL_MS) % mon->interval) >= - MON_BASE_INTERVAL_MS) + (((nrounds * MON_BASE_INTERVAL_MS) % mon->interval) >= + MON_BASE_INTERVAL_MS) && (!mon->server_pending_changes)) { nrounds += 1; continue; @@ -1162,6 +1162,8 @@ monitorMain(void *arg) num_servers = 0; lock_monitor_servers(mon); + servers_status_pending_to_current(mon); + /* start from the first server in the list */ ptr = mon->databases; @@ -1452,6 +1454,7 @@ monitorMain(void *arg) } mon_hangup_failed_servers(mon); + servers_status_current_to_pending(mon); release_monitor_servers(mon); } /*< while (1) */ } diff --git a/server/modules/monitor/ndbclustermon/ndbclustermon.c b/server/modules/monitor/ndbclustermon/ndbclustermon.c index a1cef4274..499dbbb28 100644 --- a/server/modules/monitor/ndbclustermon/ndbclustermon.c +++ b/server/modules/monitor/ndbclustermon/ndbclustermon.c @@ -375,6 +375,8 @@ monitorMain(void *arg) nrounds += 1; lock_monitor_servers(mon); + servers_status_pending_to_current(mon); + ptr = mon->databases; while (ptr) { @@ -415,6 +417,7 @@ monitorMain(void *arg) } mon_hangup_failed_servers(mon); + servers_status_current_to_pending(mon); release_monitor_servers(mon); } }