From b434c945636d61ca317ac9019816e3a3e7bcb387 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Markus=20M=C3=A4kel=C3=A4?= Date: Sun, 23 Apr 2017 02:49:35 +0300 Subject: [PATCH] Prevent monitor deadlocks with repeated restarts If a monitor is started and stopped before the external monitoring thread has had time to start, a deadlock will occur. The first thing that the monitoring threads do is read the monitor handle from the monitor object. This handle is given as the return value of startMonitor and it is stored in the monitor object. As this can still be NULL when the monitor thread starts, the threads use locks to prevent this. The correct way to prevent this is to pass the handle as the thread parameter so that no locks are required. --- server/modules/monitor/auroramon/auroramon.c | 8 +++++--- server/modules/monitor/galeramon/galeramon.c | 17 ++++++++--------- server/modules/monitor/galeramon/galeramon.h | 2 +- server/modules/monitor/mmmon/mmmon.c | 14 +++++++------- server/modules/monitor/mmmon/mmmon.h | 2 +- server/modules/monitor/mysqlmon.h | 2 +- server/modules/monitor/mysqlmon/mysql_mon.c | 15 ++++++++------- .../monitor/ndbclustermon/ndbclustermon.c | 15 +++++++-------- 8 files changed, 38 insertions(+), 37 deletions(-) diff --git a/server/modules/monitor/auroramon/auroramon.c b/server/modules/monitor/auroramon/auroramon.c index 5f5d01150..34f1c867b 100644 --- a/server/modules/monitor/auroramon/auroramon.c +++ b/server/modules/monitor/auroramon/auroramon.c @@ -30,6 +30,7 @@ typedef struct aurora_monitor THREAD thread; /**< Monitor thread */ char* script; /**< Launchable script */ uint64_t events; /**< Enabled monitor events */ + MXS_MONITOR* monitor; } AURORA_MONITOR; /** @@ -111,8 +112,8 @@ void update_server_status(MXS_MONITOR *monitor, MXS_MONITOR_SERVERS *database) static void monitorMain(void *arg) { - MXS_MONITOR *monitor = (MXS_MONITOR*)arg; - AURORA_MONITOR *handle = monitor->handle; + AURORA_MONITOR *handle = (AURORA_MONITOR*)arg; + MXS_MONITOR *monitor = handle->monitor; if (mysql_thread_init()) { @@ -201,6 +202,7 @@ startMonitor(MXS_MONITOR *mon, const MXS_CONFIG_PARAMETER *params) } handle->shutdown = false; + handle->monitor = mon; if (!check_monitor_permissions(mon, "SELECT @@aurora_server_id, server_id FROM " "information_schema.replica_host_status " @@ -215,7 +217,7 @@ startMonitor(MXS_MONITOR *mon, const MXS_CONFIG_PARAMETER *params) handle->script = config_copy_string(params, "script"); handle->events = config_get_enum(params, "events", mxs_monitor_event_enum_values); - if (thread_start(&handle->thread, monitorMain, mon) == NULL) + if (thread_start(&handle->thread, monitorMain, handle) == NULL) { MXS_ERROR("Failed to start monitor thread for monitor '%s'.", mon->name); auroramon_free(handle); diff --git a/server/modules/monitor/galeramon/galeramon.c b/server/modules/monitor/galeramon/galeramon.c index fec4483dc..9ed720010 100644 --- a/server/modules/monitor/galeramon/galeramon.c +++ b/server/modules/monitor/galeramon/galeramon.c @@ -168,8 +168,7 @@ startMonitor(MXS_MONITOR *mon, const MXS_CONFIG_PARAMETER *params) handle->galera_nodes_info = nodes_info; handle->cluster_info.c_size = 0; handle->cluster_info.c_uuid = NULL; - - spinlock_init(&handle->lock); + handle->monitor = mon; } handle->disableMasterFailback = config_get_bool(params, "disable_master_failback"); @@ -195,9 +194,13 @@ startMonitor(MXS_MONITOR *mon, const MXS_CONFIG_PARAMETER *params) return NULL; } - if (thread_start(&handle->thread, monitorMain, mon) == NULL) + if (thread_start(&handle->thread, monitorMain, handle) == NULL) { MXS_ERROR("Failed to start monitor thread for monitor '%s'.", mon->name); + hashtable_free(handle->galera_nodes_info); + MXS_FREE(handle->script); + MXS_FREE(handle); + return NULL; } return handle; @@ -490,19 +493,15 @@ monitorDatabase(MXS_MONITOR *mon, MXS_MONITOR_SERVERS *database) static void monitorMain(void *arg) { - MXS_MONITOR* mon = (MXS_MONITOR*) arg; - GALERA_MONITOR *handle; + GALERA_MONITOR *handle = (GALERA_MONITOR*)arg; + MXS_MONITOR* mon = handle->monitor; MXS_MONITOR_SERVERS *ptr; size_t nrounds = 0; MXS_MONITOR_SERVERS *candidate_master = NULL; int master_stickiness; int is_cluster = 0; int log_no_members = 1; - mxs_monitor_event_t evtype; - spinlock_acquire(&mon->lock); - handle = (GALERA_MONITOR *) mon->handle; - spinlock_release(&mon->lock); master_stickiness = handle->disableMasterFailback; if (mysql_thread_init()) { diff --git a/server/modules/monitor/galeramon/galeramon.h b/server/modules/monitor/galeramon/galeramon.h index e9c7ff7c6..d7f305085 100644 --- a/server/modules/monitor/galeramon/galeramon.h +++ b/server/modules/monitor/galeramon/galeramon.h @@ -78,7 +78,6 @@ typedef struct galera_cluster_info */ typedef struct { - SPINLOCK lock; /**< The monitor spinlock */ THREAD thread; /**< Monitor thread */ int shutdown; /**< Flag to shutdown the monitor thread */ int status; /**< Monitor status */ @@ -96,6 +95,7 @@ typedef struct * ordered list of nodes */ HASHTABLE *galera_nodes_info; /**< Contains Galera Cluster variables of all nodes */ GALERA_CLUSTER_INFO cluster_info; /**< Contains Galera cluster info */ + MXS_MONITOR* monitor; } GALERA_MONITOR; MXS_END_DECLS diff --git a/server/modules/monitor/mmmon/mmmon.c b/server/modules/monitor/mmmon/mmmon.c index 500d65fcb..f3a18145b 100644 --- a/server/modules/monitor/mmmon/mmmon.c +++ b/server/modules/monitor/mmmon/mmmon.c @@ -128,7 +128,7 @@ startMonitor(MXS_MONITOR *mon, const MXS_CONFIG_PARAMETER *params) handle->shutdown = 0; handle->id = MXS_MONITOR_DEFAULT_ID; handle->master = NULL; - spinlock_init(&handle->lock); + handle->monitor = mon; } handle->detectStaleMaster = config_get_bool(params, "detect_stale_master"); @@ -143,9 +143,12 @@ startMonitor(MXS_MONITOR *mon, const MXS_CONFIG_PARAMETER *params) return NULL; } - if (thread_start(&handle->thread, monitorMain, mon) == NULL) + if (thread_start(&handle->thread, monitorMain, handle) == NULL) { MXS_ERROR("Failed to start monitor thread for monitor '%s'.", mon->name); + MXS_FREE(handle->script); + MXS_FREE(handle); + return NULL; } return handle; @@ -482,16 +485,13 @@ monitorDatabase(MXS_MONITOR* mon, MXS_MONITOR_SERVERS *database) static void monitorMain(void *arg) { - MXS_MONITOR* mon = (MXS_MONITOR*) arg; - MM_MONITOR *handle; + MM_MONITOR *handle = (MM_MONITOR *)arg; + MXS_MONITOR* mon = handle->monitor; MXS_MONITOR_SERVERS *ptr; int detect_stale_master = false; MXS_MONITOR_SERVERS *root_master = NULL; size_t nrounds = 0; - spinlock_acquire(&mon->lock); - handle = (MM_MONITOR *) mon->handle; - spinlock_release(&mon->lock); detect_stale_master = handle->detectStaleMaster; if (mysql_thread_init()) diff --git a/server/modules/monitor/mmmon/mmmon.h b/server/modules/monitor/mmmon/mmmon.h index 726f8ffd6..5af8ac671 100644 --- a/server/modules/monitor/mmmon/mmmon.h +++ b/server/modules/monitor/mmmon/mmmon.h @@ -40,7 +40,6 @@ MXS_BEGIN_DECLS */ typedef struct { - SPINLOCK lock; /**< The monitor spinlock */ THREAD thread; /**< Monitor thread */ int shutdown; /**< Flag to shutdown the monitor thread */ int status; /**< Monitor status */ @@ -49,6 +48,7 @@ typedef struct MXS_MONITOR_SERVERS *master; /**< Master server for Master/Slave replication */ char* script; /*< Script to call when state changes occur on servers */ uint64_t events; /*< enabled events */ + MXS_MONITOR* monitor; } MM_MONITOR; MXS_END_DECLS diff --git a/server/modules/monitor/mysqlmon.h b/server/modules/monitor/mysqlmon.h index 6e013b62f..73f214e2a 100644 --- a/server/modules/monitor/mysqlmon.h +++ b/server/modules/monitor/mysqlmon.h @@ -57,7 +57,6 @@ MXS_BEGIN_DECLS */ typedef struct { - SPINLOCK lock; /**< The monitor spinlock */ THREAD thread; /**< Monitor thread */ int shutdown; /**< Flag to shutdown the monitor thread */ int status; /**< Monitor status */ @@ -81,6 +80,7 @@ typedef struct bool warn_failover; /**< Log a warning when failover happens */ bool load_journal; /**< Whether journal file should be loaded */ time_t journal_max_age; /**< Maximum age of journal file */ + MXS_MONITOR* monitor; } MYSQL_MONITOR; /** diff --git a/server/modules/monitor/mysqlmon/mysql_mon.c b/server/modules/monitor/mysqlmon/mysql_mon.c index 00c241ace..84ba2f40f 100644 --- a/server/modules/monitor/mysqlmon/mysql_mon.c +++ b/server/modules/monitor/mysqlmon/mysql_mon.c @@ -246,7 +246,7 @@ startMonitor(MXS_MONITOR *monitor, const MXS_CONFIG_PARAMETER* params) handle->id = config_get_global_options()->id; handle->warn_failover = true; handle->load_journal = true; - spinlock_init(&handle->lock); + handle->monitor = monitor; } /** This should always be reset to NULL */ @@ -290,9 +290,13 @@ startMonitor(MXS_MONITOR *monitor, const MXS_CONFIG_PARAMETER* params) MXS_FREE(handle); handle = NULL; } - else if (thread_start(&handle->thread, monitorMain, monitor) == NULL) + else if (thread_start(&handle->thread, monitorMain, handle) == NULL) { MXS_ERROR("Failed to start monitor thread for monitor '%s'.", monitor->name); + hashtable_free(handle->server_info); + MXS_FREE(handle->script); + MXS_FREE(handle); + handle = NULL; } return handle; @@ -1090,8 +1094,8 @@ void do_failover(MYSQL_MONITOR *handle, MXS_MONITOR_SERVERS *db) static void monitorMain(void *arg) { - MXS_MONITOR* mon = (MXS_MONITOR*) arg; - MYSQL_MONITOR *handle; + MYSQL_MONITOR *handle = (MYSQL_MONITOR *) arg; + MXS_MONITOR* mon = handle->monitor; MXS_MONITOR_SERVERS *ptr; int replication_heartbeat; bool detect_stale_master; @@ -1101,9 +1105,6 @@ monitorMain(void *arg) int log_no_master = 1; bool heartbeat_checked = false; - spinlock_acquire(&mon->lock); - handle = (MYSQL_MONITOR *) mon->handle; - spinlock_release(&mon->lock); replication_heartbeat = handle->replicationHeartbeat; detect_stale_master = handle->detectStaleMaster; diff --git a/server/modules/monitor/ndbclustermon/ndbclustermon.c b/server/modules/monitor/ndbclustermon/ndbclustermon.c index 72bf5f1cb..26b36677d 100644 --- a/server/modules/monitor/ndbclustermon/ndbclustermon.c +++ b/server/modules/monitor/ndbclustermon/ndbclustermon.c @@ -129,7 +129,7 @@ startMonitor(MXS_MONITOR *mon, const MXS_CONFIG_PARAMETER *params) handle->shutdown = 0; handle->id = MXS_MONITOR_DEFAULT_ID; handle->master = NULL; - spinlock_init(&handle->lock); + handle->monitor = mon; } handle->script = config_copy_string(params, "script"); @@ -144,9 +144,12 @@ startMonitor(MXS_MONITOR *mon, const MXS_CONFIG_PARAMETER *params) return NULL; } - if (thread_start(&handle->thread, monitorMain, mon) == NULL) + if (thread_start(&handle->thread, monitorMain, handle) == NULL) { MXS_ERROR("Failed to start monitor thread for monitor '%s'.", mon->name); + MXS_FREE(handle->script); + MXS_FREE(handle); + return NULL; } return handle; @@ -307,15 +310,11 @@ monitorDatabase(MXS_MONITOR_SERVERS *database, char *defaultUser, char *defaultP static void monitorMain(void *arg) { - MXS_MONITOR* mon = arg; - MYSQL_MONITOR *handle; + MYSQL_MONITOR *handle = (MYSQL_MONITOR*)arg; + MXS_MONITOR* mon = handle->monitor; MXS_MONITOR_SERVERS *ptr; size_t nrounds = 0; - spinlock_acquire(&mon->lock); - handle = (MYSQL_MONITOR *) mon->handle; - spinlock_release(&mon->lock); - if (mysql_thread_init()) { MXS_ERROR("Fatal : mysql_thread_init failed in monitor module. Exiting.");