Prevent monitor deadlocks with repeated restarts
If a monitor is started and stopped before the external monitoring thread has had time to start, a deadlock will occur. The first thing that the monitoring threads do is read the monitor handle from the monitor object. This handle is given as the return value of startMonitor and it is stored in the monitor object. As this can still be NULL when the monitor thread starts, the threads use locks to prevent this. The correct way to prevent this is to pass the handle as the thread parameter so that no locks are required.
This commit is contained in:
parent
a4e361b5e5
commit
b434c94563
@ -30,6 +30,7 @@ typedef struct aurora_monitor
|
||||
THREAD thread; /**< Monitor thread */
|
||||
char* script; /**< Launchable script */
|
||||
uint64_t events; /**< Enabled monitor events */
|
||||
MXS_MONITOR* monitor;
|
||||
} AURORA_MONITOR;
|
||||
|
||||
/**
|
||||
@ -111,8 +112,8 @@ void update_server_status(MXS_MONITOR *monitor, MXS_MONITOR_SERVERS *database)
|
||||
static void
|
||||
monitorMain(void *arg)
|
||||
{
|
||||
MXS_MONITOR *monitor = (MXS_MONITOR*)arg;
|
||||
AURORA_MONITOR *handle = monitor->handle;
|
||||
AURORA_MONITOR *handle = (AURORA_MONITOR*)arg;
|
||||
MXS_MONITOR *monitor = handle->monitor;
|
||||
|
||||
if (mysql_thread_init())
|
||||
{
|
||||
@ -201,6 +202,7 @@ startMonitor(MXS_MONITOR *mon, const MXS_CONFIG_PARAMETER *params)
|
||||
}
|
||||
|
||||
handle->shutdown = false;
|
||||
handle->monitor = mon;
|
||||
|
||||
if (!check_monitor_permissions(mon, "SELECT @@aurora_server_id, server_id FROM "
|
||||
"information_schema.replica_host_status "
|
||||
@ -215,7 +217,7 @@ startMonitor(MXS_MONITOR *mon, const MXS_CONFIG_PARAMETER *params)
|
||||
handle->script = config_copy_string(params, "script");
|
||||
handle->events = config_get_enum(params, "events", mxs_monitor_event_enum_values);
|
||||
|
||||
if (thread_start(&handle->thread, monitorMain, mon) == NULL)
|
||||
if (thread_start(&handle->thread, monitorMain, handle) == NULL)
|
||||
{
|
||||
MXS_ERROR("Failed to start monitor thread for monitor '%s'.", mon->name);
|
||||
auroramon_free(handle);
|
||||
|
@ -168,8 +168,7 @@ startMonitor(MXS_MONITOR *mon, const MXS_CONFIG_PARAMETER *params)
|
||||
handle->galera_nodes_info = nodes_info;
|
||||
handle->cluster_info.c_size = 0;
|
||||
handle->cluster_info.c_uuid = NULL;
|
||||
|
||||
spinlock_init(&handle->lock);
|
||||
handle->monitor = mon;
|
||||
}
|
||||
|
||||
handle->disableMasterFailback = config_get_bool(params, "disable_master_failback");
|
||||
@ -195,9 +194,13 @@ startMonitor(MXS_MONITOR *mon, const MXS_CONFIG_PARAMETER *params)
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (thread_start(&handle->thread, monitorMain, mon) == NULL)
|
||||
if (thread_start(&handle->thread, monitorMain, handle) == NULL)
|
||||
{
|
||||
MXS_ERROR("Failed to start monitor thread for monitor '%s'.", mon->name);
|
||||
hashtable_free(handle->galera_nodes_info);
|
||||
MXS_FREE(handle->script);
|
||||
MXS_FREE(handle);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return handle;
|
||||
@ -490,19 +493,15 @@ monitorDatabase(MXS_MONITOR *mon, MXS_MONITOR_SERVERS *database)
|
||||
static void
|
||||
monitorMain(void *arg)
|
||||
{
|
||||
MXS_MONITOR* mon = (MXS_MONITOR*) arg;
|
||||
GALERA_MONITOR *handle;
|
||||
GALERA_MONITOR *handle = (GALERA_MONITOR*)arg;
|
||||
MXS_MONITOR* mon = handle->monitor;
|
||||
MXS_MONITOR_SERVERS *ptr;
|
||||
size_t nrounds = 0;
|
||||
MXS_MONITOR_SERVERS *candidate_master = NULL;
|
||||
int master_stickiness;
|
||||
int is_cluster = 0;
|
||||
int log_no_members = 1;
|
||||
mxs_monitor_event_t evtype;
|
||||
|
||||
spinlock_acquire(&mon->lock);
|
||||
handle = (GALERA_MONITOR *) mon->handle;
|
||||
spinlock_release(&mon->lock);
|
||||
master_stickiness = handle->disableMasterFailback;
|
||||
if (mysql_thread_init())
|
||||
{
|
||||
|
@ -78,7 +78,6 @@ typedef struct galera_cluster_info
|
||||
*/
|
||||
typedef struct
|
||||
{
|
||||
SPINLOCK lock; /**< The monitor spinlock */
|
||||
THREAD thread; /**< Monitor thread */
|
||||
int shutdown; /**< Flag to shutdown the monitor thread */
|
||||
int status; /**< Monitor status */
|
||||
@ -96,6 +95,7 @@ typedef struct
|
||||
* ordered list of nodes */
|
||||
HASHTABLE *galera_nodes_info; /**< Contains Galera Cluster variables of all nodes */
|
||||
GALERA_CLUSTER_INFO cluster_info; /**< Contains Galera cluster info */
|
||||
MXS_MONITOR* monitor;
|
||||
} GALERA_MONITOR;
|
||||
|
||||
MXS_END_DECLS
|
||||
|
@ -128,7 +128,7 @@ startMonitor(MXS_MONITOR *mon, const MXS_CONFIG_PARAMETER *params)
|
||||
handle->shutdown = 0;
|
||||
handle->id = MXS_MONITOR_DEFAULT_ID;
|
||||
handle->master = NULL;
|
||||
spinlock_init(&handle->lock);
|
||||
handle->monitor = mon;
|
||||
}
|
||||
|
||||
handle->detectStaleMaster = config_get_bool(params, "detect_stale_master");
|
||||
@ -143,9 +143,12 @@ startMonitor(MXS_MONITOR *mon, const MXS_CONFIG_PARAMETER *params)
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (thread_start(&handle->thread, monitorMain, mon) == NULL)
|
||||
if (thread_start(&handle->thread, monitorMain, handle) == NULL)
|
||||
{
|
||||
MXS_ERROR("Failed to start monitor thread for monitor '%s'.", mon->name);
|
||||
MXS_FREE(handle->script);
|
||||
MXS_FREE(handle);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return handle;
|
||||
@ -482,16 +485,13 @@ monitorDatabase(MXS_MONITOR* mon, MXS_MONITOR_SERVERS *database)
|
||||
static void
|
||||
monitorMain(void *arg)
|
||||
{
|
||||
MXS_MONITOR* mon = (MXS_MONITOR*) arg;
|
||||
MM_MONITOR *handle;
|
||||
MM_MONITOR *handle = (MM_MONITOR *)arg;
|
||||
MXS_MONITOR* mon = handle->monitor;
|
||||
MXS_MONITOR_SERVERS *ptr;
|
||||
int detect_stale_master = false;
|
||||
MXS_MONITOR_SERVERS *root_master = NULL;
|
||||
size_t nrounds = 0;
|
||||
|
||||
spinlock_acquire(&mon->lock);
|
||||
handle = (MM_MONITOR *) mon->handle;
|
||||
spinlock_release(&mon->lock);
|
||||
detect_stale_master = handle->detectStaleMaster;
|
||||
|
||||
if (mysql_thread_init())
|
||||
|
@ -40,7 +40,6 @@ MXS_BEGIN_DECLS
|
||||
*/
|
||||
typedef struct
|
||||
{
|
||||
SPINLOCK lock; /**< The monitor spinlock */
|
||||
THREAD thread; /**< Monitor thread */
|
||||
int shutdown; /**< Flag to shutdown the monitor thread */
|
||||
int status; /**< Monitor status */
|
||||
@ -49,6 +48,7 @@ typedef struct
|
||||
MXS_MONITOR_SERVERS *master; /**< Master server for Master/Slave replication */
|
||||
char* script; /*< Script to call when state changes occur on servers */
|
||||
uint64_t events; /*< enabled events */
|
||||
MXS_MONITOR* monitor;
|
||||
} MM_MONITOR;
|
||||
|
||||
MXS_END_DECLS
|
||||
|
@ -57,7 +57,6 @@ MXS_BEGIN_DECLS
|
||||
*/
|
||||
typedef struct
|
||||
{
|
||||
SPINLOCK lock; /**< The monitor spinlock */
|
||||
THREAD thread; /**< Monitor thread */
|
||||
int shutdown; /**< Flag to shutdown the monitor thread */
|
||||
int status; /**< Monitor status */
|
||||
@ -81,6 +80,7 @@ typedef struct
|
||||
bool warn_failover; /**< Log a warning when failover happens */
|
||||
bool load_journal; /**< Whether journal file should be loaded */
|
||||
time_t journal_max_age; /**< Maximum age of journal file */
|
||||
MXS_MONITOR* monitor;
|
||||
} MYSQL_MONITOR;
|
||||
|
||||
/**
|
||||
|
@ -246,7 +246,7 @@ startMonitor(MXS_MONITOR *monitor, const MXS_CONFIG_PARAMETER* params)
|
||||
handle->id = config_get_global_options()->id;
|
||||
handle->warn_failover = true;
|
||||
handle->load_journal = true;
|
||||
spinlock_init(&handle->lock);
|
||||
handle->monitor = monitor;
|
||||
}
|
||||
|
||||
/** This should always be reset to NULL */
|
||||
@ -290,9 +290,13 @@ startMonitor(MXS_MONITOR *monitor, const MXS_CONFIG_PARAMETER* params)
|
||||
MXS_FREE(handle);
|
||||
handle = NULL;
|
||||
}
|
||||
else if (thread_start(&handle->thread, monitorMain, monitor) == NULL)
|
||||
else if (thread_start(&handle->thread, monitorMain, handle) == NULL)
|
||||
{
|
||||
MXS_ERROR("Failed to start monitor thread for monitor '%s'.", monitor->name);
|
||||
hashtable_free(handle->server_info);
|
||||
MXS_FREE(handle->script);
|
||||
MXS_FREE(handle);
|
||||
handle = NULL;
|
||||
}
|
||||
|
||||
return handle;
|
||||
@ -1090,8 +1094,8 @@ void do_failover(MYSQL_MONITOR *handle, MXS_MONITOR_SERVERS *db)
|
||||
static void
|
||||
monitorMain(void *arg)
|
||||
{
|
||||
MXS_MONITOR* mon = (MXS_MONITOR*) arg;
|
||||
MYSQL_MONITOR *handle;
|
||||
MYSQL_MONITOR *handle = (MYSQL_MONITOR *) arg;
|
||||
MXS_MONITOR* mon = handle->monitor;
|
||||
MXS_MONITOR_SERVERS *ptr;
|
||||
int replication_heartbeat;
|
||||
bool detect_stale_master;
|
||||
@ -1101,9 +1105,6 @@ monitorMain(void *arg)
|
||||
int log_no_master = 1;
|
||||
bool heartbeat_checked = false;
|
||||
|
||||
spinlock_acquire(&mon->lock);
|
||||
handle = (MYSQL_MONITOR *) mon->handle;
|
||||
spinlock_release(&mon->lock);
|
||||
replication_heartbeat = handle->replicationHeartbeat;
|
||||
detect_stale_master = handle->detectStaleMaster;
|
||||
|
||||
|
@ -129,7 +129,7 @@ startMonitor(MXS_MONITOR *mon, const MXS_CONFIG_PARAMETER *params)
|
||||
handle->shutdown = 0;
|
||||
handle->id = MXS_MONITOR_DEFAULT_ID;
|
||||
handle->master = NULL;
|
||||
spinlock_init(&handle->lock);
|
||||
handle->monitor = mon;
|
||||
}
|
||||
|
||||
handle->script = config_copy_string(params, "script");
|
||||
@ -144,9 +144,12 @@ startMonitor(MXS_MONITOR *mon, const MXS_CONFIG_PARAMETER *params)
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (thread_start(&handle->thread, monitorMain, mon) == NULL)
|
||||
if (thread_start(&handle->thread, monitorMain, handle) == NULL)
|
||||
{
|
||||
MXS_ERROR("Failed to start monitor thread for monitor '%s'.", mon->name);
|
||||
MXS_FREE(handle->script);
|
||||
MXS_FREE(handle);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return handle;
|
||||
@ -307,15 +310,11 @@ monitorDatabase(MXS_MONITOR_SERVERS *database, char *defaultUser, char *defaultP
|
||||
static void
|
||||
monitorMain(void *arg)
|
||||
{
|
||||
MXS_MONITOR* mon = arg;
|
||||
MYSQL_MONITOR *handle;
|
||||
MYSQL_MONITOR *handle = (MYSQL_MONITOR*)arg;
|
||||
MXS_MONITOR* mon = handle->monitor;
|
||||
MXS_MONITOR_SERVERS *ptr;
|
||||
size_t nrounds = 0;
|
||||
|
||||
spinlock_acquire(&mon->lock);
|
||||
handle = (MYSQL_MONITOR *) mon->handle;
|
||||
spinlock_release(&mon->lock);
|
||||
|
||||
if (mysql_thread_init())
|
||||
{
|
||||
MXS_ERROR("Fatal : mysql_thread_init failed in monitor module. Exiting.");
|
||||
|
Loading…
x
Reference in New Issue
Block a user