Prevent monitor deadlocks with repeated restarts

If a monitor is started and stopped before the external monitoring thread
has had time to start, a deadlock will occur.

The first thing that the monitoring threads do is read the monitor handle
from the monitor object. This handle is given as the return value of
startMonitor and it is stored in the monitor object. As this can still be
NULL when the monitor thread starts, the threads use locks to prevent
this.

The correct way to prevent this is to pass the handle as the thread
parameter so that no locks are required.
This commit is contained in:
Markus Mäkelä 2017-04-23 02:49:35 +03:00
parent a4e361b5e5
commit b434c94563
8 changed files with 38 additions and 37 deletions

View File

@ -30,6 +30,7 @@ typedef struct aurora_monitor
THREAD thread; /**< Monitor thread */
char* script; /**< Launchable script */
uint64_t events; /**< Enabled monitor events */
MXS_MONITOR* monitor;
} AURORA_MONITOR;
/**
@ -111,8 +112,8 @@ void update_server_status(MXS_MONITOR *monitor, MXS_MONITOR_SERVERS *database)
static void
monitorMain(void *arg)
{
MXS_MONITOR *monitor = (MXS_MONITOR*)arg;
AURORA_MONITOR *handle = monitor->handle;
AURORA_MONITOR *handle = (AURORA_MONITOR*)arg;
MXS_MONITOR *monitor = handle->monitor;
if (mysql_thread_init())
{
@ -201,6 +202,7 @@ startMonitor(MXS_MONITOR *mon, const MXS_CONFIG_PARAMETER *params)
}
handle->shutdown = false;
handle->monitor = mon;
if (!check_monitor_permissions(mon, "SELECT @@aurora_server_id, server_id FROM "
"information_schema.replica_host_status "
@ -215,7 +217,7 @@ startMonitor(MXS_MONITOR *mon, const MXS_CONFIG_PARAMETER *params)
handle->script = config_copy_string(params, "script");
handle->events = config_get_enum(params, "events", mxs_monitor_event_enum_values);
if (thread_start(&handle->thread, monitorMain, mon) == NULL)
if (thread_start(&handle->thread, monitorMain, handle) == NULL)
{
MXS_ERROR("Failed to start monitor thread for monitor '%s'.", mon->name);
auroramon_free(handle);

View File

@ -168,8 +168,7 @@ startMonitor(MXS_MONITOR *mon, const MXS_CONFIG_PARAMETER *params)
handle->galera_nodes_info = nodes_info;
handle->cluster_info.c_size = 0;
handle->cluster_info.c_uuid = NULL;
spinlock_init(&handle->lock);
handle->monitor = mon;
}
handle->disableMasterFailback = config_get_bool(params, "disable_master_failback");
@ -195,9 +194,13 @@ startMonitor(MXS_MONITOR *mon, const MXS_CONFIG_PARAMETER *params)
return NULL;
}
if (thread_start(&handle->thread, monitorMain, mon) == NULL)
if (thread_start(&handle->thread, monitorMain, handle) == NULL)
{
MXS_ERROR("Failed to start monitor thread for monitor '%s'.", mon->name);
hashtable_free(handle->galera_nodes_info);
MXS_FREE(handle->script);
MXS_FREE(handle);
return NULL;
}
return handle;
@ -490,19 +493,15 @@ monitorDatabase(MXS_MONITOR *mon, MXS_MONITOR_SERVERS *database)
static void
monitorMain(void *arg)
{
MXS_MONITOR* mon = (MXS_MONITOR*) arg;
GALERA_MONITOR *handle;
GALERA_MONITOR *handle = (GALERA_MONITOR*)arg;
MXS_MONITOR* mon = handle->monitor;
MXS_MONITOR_SERVERS *ptr;
size_t nrounds = 0;
MXS_MONITOR_SERVERS *candidate_master = NULL;
int master_stickiness;
int is_cluster = 0;
int log_no_members = 1;
mxs_monitor_event_t evtype;
spinlock_acquire(&mon->lock);
handle = (GALERA_MONITOR *) mon->handle;
spinlock_release(&mon->lock);
master_stickiness = handle->disableMasterFailback;
if (mysql_thread_init())
{

View File

@ -78,7 +78,6 @@ typedef struct galera_cluster_info
*/
typedef struct
{
SPINLOCK lock; /**< The monitor spinlock */
THREAD thread; /**< Monitor thread */
int shutdown; /**< Flag to shutdown the monitor thread */
int status; /**< Monitor status */
@ -96,6 +95,7 @@ typedef struct
* ordered list of nodes */
HASHTABLE *galera_nodes_info; /**< Contains Galera Cluster variables of all nodes */
GALERA_CLUSTER_INFO cluster_info; /**< Contains Galera cluster info */
MXS_MONITOR* monitor;
} GALERA_MONITOR;
MXS_END_DECLS

View File

@ -128,7 +128,7 @@ startMonitor(MXS_MONITOR *mon, const MXS_CONFIG_PARAMETER *params)
handle->shutdown = 0;
handle->id = MXS_MONITOR_DEFAULT_ID;
handle->master = NULL;
spinlock_init(&handle->lock);
handle->monitor = mon;
}
handle->detectStaleMaster = config_get_bool(params, "detect_stale_master");
@ -143,9 +143,12 @@ startMonitor(MXS_MONITOR *mon, const MXS_CONFIG_PARAMETER *params)
return NULL;
}
if (thread_start(&handle->thread, monitorMain, mon) == NULL)
if (thread_start(&handle->thread, monitorMain, handle) == NULL)
{
MXS_ERROR("Failed to start monitor thread for monitor '%s'.", mon->name);
MXS_FREE(handle->script);
MXS_FREE(handle);
return NULL;
}
return handle;
@ -482,16 +485,13 @@ monitorDatabase(MXS_MONITOR* mon, MXS_MONITOR_SERVERS *database)
static void
monitorMain(void *arg)
{
MXS_MONITOR* mon = (MXS_MONITOR*) arg;
MM_MONITOR *handle;
MM_MONITOR *handle = (MM_MONITOR *)arg;
MXS_MONITOR* mon = handle->monitor;
MXS_MONITOR_SERVERS *ptr;
int detect_stale_master = false;
MXS_MONITOR_SERVERS *root_master = NULL;
size_t nrounds = 0;
spinlock_acquire(&mon->lock);
handle = (MM_MONITOR *) mon->handle;
spinlock_release(&mon->lock);
detect_stale_master = handle->detectStaleMaster;
if (mysql_thread_init())

View File

@ -40,7 +40,6 @@ MXS_BEGIN_DECLS
*/
typedef struct
{
SPINLOCK lock; /**< The monitor spinlock */
THREAD thread; /**< Monitor thread */
int shutdown; /**< Flag to shutdown the monitor thread */
int status; /**< Monitor status */
@ -49,6 +48,7 @@ typedef struct
MXS_MONITOR_SERVERS *master; /**< Master server for Master/Slave replication */
char* script; /*< Script to call when state changes occur on servers */
uint64_t events; /*< enabled events */
MXS_MONITOR* monitor;
} MM_MONITOR;
MXS_END_DECLS

View File

@ -57,7 +57,6 @@ MXS_BEGIN_DECLS
*/
typedef struct
{
SPINLOCK lock; /**< The monitor spinlock */
THREAD thread; /**< Monitor thread */
int shutdown; /**< Flag to shutdown the monitor thread */
int status; /**< Monitor status */
@ -81,6 +80,7 @@ typedef struct
bool warn_failover; /**< Log a warning when failover happens */
bool load_journal; /**< Whether journal file should be loaded */
time_t journal_max_age; /**< Maximum age of journal file */
MXS_MONITOR* monitor;
} MYSQL_MONITOR;
/**

View File

@ -246,7 +246,7 @@ startMonitor(MXS_MONITOR *monitor, const MXS_CONFIG_PARAMETER* params)
handle->id = config_get_global_options()->id;
handle->warn_failover = true;
handle->load_journal = true;
spinlock_init(&handle->lock);
handle->monitor = monitor;
}
/** This should always be reset to NULL */
@ -290,9 +290,13 @@ startMonitor(MXS_MONITOR *monitor, const MXS_CONFIG_PARAMETER* params)
MXS_FREE(handle);
handle = NULL;
}
else if (thread_start(&handle->thread, monitorMain, monitor) == NULL)
else if (thread_start(&handle->thread, monitorMain, handle) == NULL)
{
MXS_ERROR("Failed to start monitor thread for monitor '%s'.", monitor->name);
hashtable_free(handle->server_info);
MXS_FREE(handle->script);
MXS_FREE(handle);
handle = NULL;
}
return handle;
@ -1090,8 +1094,8 @@ void do_failover(MYSQL_MONITOR *handle, MXS_MONITOR_SERVERS *db)
static void
monitorMain(void *arg)
{
MXS_MONITOR* mon = (MXS_MONITOR*) arg;
MYSQL_MONITOR *handle;
MYSQL_MONITOR *handle = (MYSQL_MONITOR *) arg;
MXS_MONITOR* mon = handle->monitor;
MXS_MONITOR_SERVERS *ptr;
int replication_heartbeat;
bool detect_stale_master;
@ -1101,9 +1105,6 @@ monitorMain(void *arg)
int log_no_master = 1;
bool heartbeat_checked = false;
spinlock_acquire(&mon->lock);
handle = (MYSQL_MONITOR *) mon->handle;
spinlock_release(&mon->lock);
replication_heartbeat = handle->replicationHeartbeat;
detect_stale_master = handle->detectStaleMaster;

View File

@ -129,7 +129,7 @@ startMonitor(MXS_MONITOR *mon, const MXS_CONFIG_PARAMETER *params)
handle->shutdown = 0;
handle->id = MXS_MONITOR_DEFAULT_ID;
handle->master = NULL;
spinlock_init(&handle->lock);
handle->monitor = mon;
}
handle->script = config_copy_string(params, "script");
@ -144,9 +144,12 @@ startMonitor(MXS_MONITOR *mon, const MXS_CONFIG_PARAMETER *params)
return NULL;
}
if (thread_start(&handle->thread, monitorMain, mon) == NULL)
if (thread_start(&handle->thread, monitorMain, handle) == NULL)
{
MXS_ERROR("Failed to start monitor thread for monitor '%s'.", mon->name);
MXS_FREE(handle->script);
MXS_FREE(handle);
return NULL;
}
return handle;
@ -307,15 +310,11 @@ monitorDatabase(MXS_MONITOR_SERVERS *database, char *defaultUser, char *defaultP
static void
monitorMain(void *arg)
{
MXS_MONITOR* mon = arg;
MYSQL_MONITOR *handle;
MYSQL_MONITOR *handle = (MYSQL_MONITOR*)arg;
MXS_MONITOR* mon = handle->monitor;
MXS_MONITOR_SERVERS *ptr;
size_t nrounds = 0;
spinlock_acquire(&mon->lock);
handle = (MYSQL_MONITOR *) mon->handle;
spinlock_release(&mon->lock);
if (mysql_thread_init())
{
MXS_ERROR("Fatal : mysql_thread_init failed in monitor module. Exiting.");