Moved common monitor functionality to the MONITOR type

Common variables, like databases, timeouts and interval, and functionality was moved to the MONITOR type.
This reduces the redundant functionality of the monitor API's functions like registerServer and setInterval.
This commit is contained in:
Markus Makela
2015-05-09 03:54:54 +03:00
parent da6d597987
commit 5a3ed0de9b
9 changed files with 323 additions and 936 deletions

View File

@ -84,20 +84,15 @@ MODULE_INFO info = {
static void *startMonitor(void *,void*);
static void stopMonitor(void *);
static void registerServer(void *, SERVER *);
static void unregisterServer(void *, SERVER *);
static void defaultUser(void *, char *, char *);
static void diagnostics(DCB *, void *);
static void setInterval(void *, size_t);
static void defaultId(void *, unsigned long);
static void setNetworkTimeout(void *, int, int);
static bool mon_status_changed(MONITOR_SERVERS* mon_srv);
static bool mon_print_fail_status(MONITOR_SERVERS* mon_srv);
static MONITOR_SERVERS *getServerByNodeId(MONITOR_SERVERS *, long);
static MONITOR_SERVERS *getSlaveOfNodeId(MONITOR_SERVERS *, long);
static MONITOR_SERVERS *get_replication_tree(MYSQL_MONITOR *, int);
static MONITOR_SERVERS *get_replication_tree(MONITOR *, int);
static void set_master_heartbeat(MYSQL_MONITOR *, MONITOR_SERVERS *);
static void set_slave_heartbeat(MYSQL_MONITOR *, MONITOR_SERVERS *);
static void set_slave_heartbeat(MONITOR *, MONITOR_SERVERS *);
static int add_slave_to_master(long *, int, long);
static void monitor_set_pending_status(MONITOR_SERVERS *, int);
static void monitor_clear_pending_status(MONITOR_SERVERS *, int);
@ -105,12 +100,7 @@ static void monitor_clear_pending_status(MONITOR_SERVERS *, int);
static MONITOR_OBJECT MyObject = {
startMonitor,
stopMonitor,
registerServer,
unregisterServer,
defaultUser,
diagnostics,
setInterval,
setNetworkTimeout
diagnostics
};
/**
@ -157,55 +147,50 @@ GetModuleObject()
* This function creates a thread to execute the actual monitoring.
*
* @param arg The current handle - NULL if first start
* @param opt Configuration parameters
* @return A handle to use when interacting with the monitor
*/
static void *
startMonitor(void *arg, void* opt)
{
MYSQL_MONITOR *handle;
CONFIG_PARAMETER* params = (CONFIG_PARAMETER*)opt;
if (arg)
{
handle = arg; /* Must be a restart */
handle->shutdown = 0;
}
else
{
if ((handle = (MYSQL_MONITOR *)malloc(sizeof(MYSQL_MONITOR))) == NULL)
return NULL;
handle->databases = NULL;
handle->shutdown = 0;
handle->defaultUser = NULL;
handle->defaultPasswd = NULL;
handle->id = config_get_gateway_id();
handle->interval = MONITOR_INTERVAL;
handle->replicationHeartbeat = 0;
handle->detectStaleMaster = 0;
handle->master = NULL;
handle->connect_timeout=DEFAULT_CONNECT_TIMEOUT;
handle->read_timeout=DEFAULT_READ_TIMEOUT;
handle->write_timeout=DEFAULT_WRITE_TIMEOUT;
handle->master_down_script = NULL;
spinlock_init(&handle->lock);
}
MONITOR* monitor = (MONITOR*)arg;
MYSQL_MONITOR *handle = (MYSQL_MONITOR*)monitor->handle;
CONFIG_PARAMETER* params = (CONFIG_PARAMETER*)opt;
if (arg)
{
handle = arg; /* Must be a restart */
handle->shutdown = 0;
}
else
{
if ((handle = (MYSQL_MONITOR *)malloc(sizeof(MYSQL_MONITOR))) == NULL)
return NULL;
handle->shutdown = 0;
handle->id = config_get_gateway_id();
handle->replicationHeartbeat = 0;
handle->detectStaleMaster = 0;
handle->master = NULL;
handle->master_down_script = NULL;
spinlock_init(&handle->lock);
}
while(params)
while(params)
{
if(!strcmp(params->name,"detect_stale_master"))
handle->detectStaleMaster = config_truth_value(params->value);
else if(!strcmp(params->name,"detect_replication_lag"))
handle->replicationHeartbeat = config_truth_value(params->value);
else if(!strcmp(params->name,"master_down_script"))
{
if(!strcmp(params->name,"detect_stale_master"))
handle->detectStaleMaster = config_truth_value(params->value);
else if(!strcmp(params->name,"detect_replication_lag"))
handle->replicationHeartbeat = config_truth_value(params->value);
else if(!strcmp(params->name,"master_down_script"))
{
if(handle->master_down_script)
externcmd_free(handle->master_down_script);
handle->master_down_script = externcmd_allocate(params->value);
}
params = params->next;
if(handle->master_down_script)
externcmd_free(handle->master_down_script);
handle->master_down_script = externcmd_allocate(params->value);
}
params = params->next;
}
handle->tid = (THREAD)thread_start(monitorMain, handle);
return handle;
handle->tid = (THREAD)thread_start(monitorMain, handle);
return handle;
}
/**
@ -216,107 +201,11 @@ CONFIG_PARAMETER* params = (CONFIG_PARAMETER*)opt;
static void
stopMonitor(void *arg)
{
MYSQL_MONITOR *handle = (MYSQL_MONITOR *)arg;
MONITOR* mon = (MONITOR*)arg;
MYSQL_MONITOR *handle = (MYSQL_MONITOR *)mon->handle;
handle->shutdown = 1;
thread_wait((void *)handle->tid);
}
/**
* Register a server that must be added to the monitored servers for
* a monitoring module.
*
* @param arg A handle on the running monitor module
* @param server The server to add
*/
static void
registerServer(void *arg, SERVER *server)
{
MYSQL_MONITOR *handle = (MYSQL_MONITOR *)arg;
MONITOR_SERVERS *ptr, *db;
if ((db = (MONITOR_SERVERS *)malloc(sizeof(MONITOR_SERVERS))) == NULL)
return;
db->server = server;
db->con = NULL;
db->next = NULL;
db->mon_err_count = 0;
db->mon_prev_status = 0;
/* pending status is updated by get_replication_tree */
db->pending_status = 0;
spinlock_acquire(&handle->lock);
if (handle->databases == NULL)
handle->databases = db;
else
{
ptr = handle->databases;
while (ptr->next != NULL)
ptr = ptr->next;
ptr->next = db;
}
spinlock_release(&handle->lock);
}
/**
* Remove a server from those being monitored by a monitoring module
*
* @param arg A handle on the running monitor module
* @param server The server to remove
*/
static void
unregisterServer(void *arg, SERVER *server)
{
MYSQL_MONITOR *handle = (MYSQL_MONITOR *)arg;
MONITOR_SERVERS *ptr, *lptr;
spinlock_acquire(&handle->lock);
if (handle->databases == NULL)
{
spinlock_release(&handle->lock);
return;
}
if (handle->databases->server == server)
{
ptr = handle->databases;
handle->databases = handle->databases->next;
free(ptr);
}
else
{
ptr = handle->databases;
while (ptr->next != NULL && ptr->next->server != server)
ptr = ptr->next;
if (ptr->next)
{
lptr = ptr->next;
ptr->next = ptr->next->next;
free(lptr);
}
}
spinlock_release(&handle->lock);
}
/**
* Set the default username and password to use to monitor if the server does not
* override this.
*
* @param arg The handle allocated by startMonitor
* @param uname The default user name
* @param passwd The default password
*/
static void
defaultUser(void *arg, char *uname, char *passwd)
{
MYSQL_MONITOR *handle = (MYSQL_MONITOR *)arg;
if (handle->defaultUser)
free(handle->defaultUser);
if (handle->defaultPasswd)
free(handle->defaultPasswd);
handle->defaultUser = strdup(uname);
handle->defaultPasswd = strdup(passwd);
handle->shutdown = 1;
thread_wait((void *)handle->tid);
}
/**
@ -327,45 +216,46 @@ MYSQL_MONITOR *handle = (MYSQL_MONITOR *)arg;
*/
static void diagnostics(DCB *dcb, void *arg)
{
MYSQL_MONITOR *handle = (MYSQL_MONITOR *)arg;
MONITOR_SERVERS *db;
char *sep;
MONITOR* mon = (MONITOR*)arg;
MYSQL_MONITOR *handle = (MYSQL_MONITOR *)mon->handle;
MONITOR_SERVERS *db;
char *sep;
switch (handle->status)
{
case MONITOR_RUNNING:
dcb_printf(dcb, "\tMonitor running\n");
break;
case MONITOR_STOPPING:
dcb_printf(dcb, "\tMonitor stopping\n");
break;
case MONITOR_STOPPED:
dcb_printf(dcb, "\tMonitor stopped\n");
break;
}
switch (handle->status)
{
case MONITOR_RUNNING:
dcb_printf(dcb, "\tMonitor running\n");
break;
case MONITOR_STOPPING:
dcb_printf(dcb, "\tMonitor stopping\n");
break;
case MONITOR_STOPPED:
dcb_printf(dcb, "\tMonitor stopped\n");
break;
}
dcb_printf(dcb,"\tSampling interval:\t%lu milliseconds\n", handle->interval);
dcb_printf(dcb,"\tMaxScale MonitorId:\t%lu\n", handle->id);
dcb_printf(dcb,"\tReplication lag:\t%s\n", (handle->replicationHeartbeat == 1) ? "enabled" : "disabled");
dcb_printf(dcb,"\tDetect Stale Master:\t%s\n", (handle->detectStaleMaster == 1) ? "enabled" : "disabled");
dcb_printf(dcb,"\tConnect Timeout:\t%i seconds\n", handle->connect_timeout);
dcb_printf(dcb,"\tRead Timeout:\t\t%i seconds\n", handle->read_timeout);
dcb_printf(dcb,"\tWrite Timeout:\t\t%i seconds\n", handle->write_timeout);
dcb_printf(dcb, "\tMonitored servers: ");
db = handle->databases;
sep = "";
while (db)
{
dcb_printf(dcb,
"%s%s:%d",
sep,
db->server->name,
db->server->port);
sep = ", ";
db = db->next;
}
dcb_printf(dcb, "\n");
dcb_printf(dcb,"\tSampling interval:\t%lu milliseconds\n", mon->interval);
dcb_printf(dcb,"\tMaxScale MonitorId:\t%lu\n", handle->id);
dcb_printf(dcb,"\tReplication lag:\t%s\n", (handle->replicationHeartbeat == 1) ? "enabled" : "disabled");
dcb_printf(dcb,"\tDetect Stale Master:\t%s\n", (handle->detectStaleMaster == 1) ? "enabled" : "disabled");
dcb_printf(dcb,"\tConnect Timeout:\t%i seconds\n", mon->connect_timeout);
dcb_printf(dcb,"\tRead Timeout:\t\t%i seconds\n", mon->read_timeout);
dcb_printf(dcb,"\tWrite Timeout:\t\t%i seconds\n", mon->write_timeout);
dcb_printf(dcb, "\tMonitored servers: ");
db = mon->databases;
sep = "";
while (db)
{
dcb_printf(dcb,
"%s%s:%d",
sep,
db->server->name,
db->server->port);
sep = ", ";
db = db->next;
}
dcb_printf(dcb, "\n");
}
/**
@ -375,14 +265,14 @@ char *sep;
* @param database The database to probe
*/
static void
monitorDatabase(MYSQL_MONITOR *handle, MONITOR_SERVERS *database)
monitorDatabase(MONITOR *mon, MONITOR_SERVERS *database)
{
MYSQL_MONITOR* handle = mon->handle;
MYSQL_ROW row;
MYSQL_RES *result;
int num_fields;
int isslave = 0;
char *uname = handle->defaultUser;
char *passwd = handle->defaultPasswd;
char *uname = mon->user;
char *passwd = mon->password;
unsigned long int server_version = 0;
char *server_string;
@ -405,16 +295,15 @@ char *server_string;
if (database->con == NULL || mysql_ping(database->con) != 0)
{
char *dpwd = decryptPassword(passwd);
int rc;
int connect_timeout = handle->connect_timeout;
int read_timeout = handle->read_timeout;
int write_timeout = handle->write_timeout;
int connect_timeout = mon->connect_timeout;
int read_timeout = mon->read_timeout;
int write_timeout = mon->write_timeout;
database->con = mysql_init(NULL);
rc = mysql_options(database->con, MYSQL_OPT_CONNECT_TIMEOUT, (void *)&connect_timeout);
rc = mysql_options(database->con, MYSQL_OPT_READ_TIMEOUT, (void *)&read_timeout);
rc = mysql_options(database->con, MYSQL_OPT_WRITE_TIMEOUT, (void *)&write_timeout);
mysql_options(database->con, MYSQL_OPT_CONNECT_TIMEOUT, (void *)&connect_timeout);
mysql_options(database->con, MYSQL_OPT_READ_TIMEOUT, (void *)&read_timeout);
mysql_options(database->con, MYSQL_OPT_WRITE_TIMEOUT, (void *)&write_timeout);
if (mysql_real_connect(database->con,
database->server->name,
@ -493,7 +382,6 @@ char *server_string;
&& (result = mysql_store_result(database->con)) != NULL)
{
long server_id = -1;
num_fields = mysql_num_fields(result);
while ((row = mysql_fetch_row(result)))
{
server_id = strtol(row[0], NULL, 10);
@ -519,7 +407,6 @@ char *server_string;
{
int i = 0;
long master_id = -1;
num_fields = mysql_num_fields(result);
while ((row = mysql_fetch_row(result)))
{
/* get Slave_IO_Running and Slave_SQL_Running values*/
@ -558,7 +445,6 @@ char *server_string;
&& (result = mysql_store_result(database->con)) != NULL)
{
long master_id = -1;
num_fields = mysql_num_fields(result);
while ((row = mysql_fetch_row(result)))
{
/* get Slave_IO_Running and Slave_SQL_Running values*/
@ -615,7 +501,8 @@ char *server_string;
static void
monitorMain(void *arg)
{
MYSQL_MONITOR *handle = (MYSQL_MONITOR *)arg;
MONITOR* mon = (MONITOR*) arg;
MYSQL_MONITOR *handle = (MYSQL_MONITOR *)mon->handle;
MONITOR_SERVERS *ptr;
int replication_heartbeat = handle->replicationHeartbeat;
int detect_stale_master = handle->detectStaleMaster;
@ -652,7 +539,7 @@ int log_no_master = 1;
* round.
*/
if (nrounds != 0 &&
((nrounds*MON_BASE_INTERVAL_MS)%handle->interval) >=
((nrounds*MON_BASE_INTERVAL_MS)%mon->interval) >=
MON_BASE_INTERVAL_MS)
{
nrounds += 1;
@ -663,7 +550,7 @@ int log_no_master = 1;
num_servers = 0;
/* start from the first server in the list */
ptr = handle->databases;
ptr = mon->databases;
while (ptr)
{
@ -671,7 +558,7 @@ int log_no_master = 1;
ptr->pending_status = ptr->server->status;
/* monitor current node */
monitorDatabase(handle, ptr);
monitorDatabase(mon, ptr);
/* reset the slave list of current node */
if (ptr->server->slaves) {
@ -753,7 +640,7 @@ int log_no_master = 1;
ptr = ptr->next;
}
ptr = handle->databases;
ptr = mon->databases;
/* if only one server is configured, that's is Master */
if (num_servers == 1) {
if (SERVER_IS_RUNNING(ptr->server)) {
@ -770,12 +657,12 @@ int log_no_master = 1;
}
} else {
/* Compute the replication tree */
root_master = get_replication_tree(handle, num_servers);
root_master = get_replication_tree(mon, num_servers);
}
/* Update server status from monitor pending status on that server*/
ptr = handle->databases;
ptr = mon->databases;
while (ptr)
{
if (! SERVER_IN_MAINT(ptr->server)) {
@ -852,7 +739,7 @@ int log_no_master = 1;
SERVER_IS_RELAY_SERVER(root_master->server)))
{
set_master_heartbeat(handle, root_master);
ptr = handle->databases;
ptr = mon->databases;
while (ptr) {
if( (! SERVER_IN_MAINT(ptr->server)) && SERVER_IS_RUNNING(ptr->server))
@ -861,7 +748,7 @@ int log_no_master = 1;
(SERVER_IS_SLAVE(ptr->server) ||
SERVER_IS_RELAY_SERVER(ptr->server)))
{
set_slave_heartbeat(handle, ptr);
set_slave_heartbeat(mon, ptr);
}
}
ptr = ptr->next;
@ -883,19 +770,6 @@ MYSQL_MONITOR *handle = (MYSQL_MONITOR *)arg;
memcpy(&handle->id, &id, sizeof(unsigned long));
}
/**
* Set the monitor sampling interval.
*
* @param arg The handle allocated by startMonitor
* @param interval The interval to set in monitor struct, in milliseconds
*/
static void
setInterval(void *arg, size_t interval)
{
MYSQL_MONITOR *handle = (MYSQL_MONITOR *)arg;
memcpy(&handle->interval, &interval, sizeof(unsigned long));
}
/**
* Enable/Disable the MySQL Replication hearbeat, detecting slave lag behind master.
*
@ -957,9 +831,6 @@ static bool mon_print_fail_status(
{
bool succp;
int errcount = mon_srv->mon_err_count;
uint8_t modval;
modval = 1<<(MIN(errcount/10, 7));
if (SERVER_IS_DOWN(mon_srv->server) && errcount == 0)
{
@ -1133,13 +1004,13 @@ static void set_master_heartbeat(MYSQL_MONITOR *handle, MONITOR_SERVERS *databas
* @param handle The monitor handle
* @param database The number database server
*/
static void set_slave_heartbeat(MYSQL_MONITOR *handle, MONITOR_SERVERS *database) {
static void set_slave_heartbeat(MONITOR* mon, MONITOR_SERVERS *database) {
MYSQL_MONITOR *handle = (MYSQL_MONITOR*)mon->handle;
unsigned long id = handle->id;
time_t heartbeat;
char select_heartbeat_query[256] = "";
MYSQL_ROW row;
MYSQL_RES *result;
int num_fields;
if (handle->master == NULL) {
LOGIF(LE, (skygw_log_write_flush(
@ -1159,7 +1030,6 @@ static void set_slave_heartbeat(MYSQL_MONITOR *handle, MONITOR_SERVERS *database
if (handle->master !=NULL && (mysql_query(database->con, select_heartbeat_query) == 0
&& (result = mysql_store_result(database->con)) != NULL)) {
int rows_found = 0;
num_fields = mysql_num_fields(result);
while ((row = mysql_fetch_row(result))) {
int rlag = -1;
@ -1184,7 +1054,7 @@ static void set_slave_heartbeat(MYSQL_MONITOR *handle, MONITOR_SERVERS *database
if (rlag >= 0) {
/* store rlag only if greater than monitor sampling interval */
database->server->rlag = (rlag > (handle->interval / 1000)) ? rlag : 0;
database->server->rlag = (rlag > (mon->interval / 1000)) ? rlag : 0;
} else {
database->server->rlag = -1;
}
@ -1238,7 +1108,8 @@ static void set_slave_heartbeat(MYSQL_MONITOR *handle, MONITOR_SERVERS *database
* @return The server at root level with SERVER_MASTER bit
*/
static MONITOR_SERVERS *get_replication_tree(MYSQL_MONITOR *handle, int num_servers) {
static MONITOR_SERVERS *get_replication_tree(MONITOR *mon, int num_servers) {
MYSQL_MONITOR* handle = (MYSQL_MONITOR*)mon->handle;
MONITOR_SERVERS *ptr;
MONITOR_SERVERS *backend;
SERVER *current;
@ -1246,7 +1117,7 @@ static MONITOR_SERVERS *get_replication_tree(MYSQL_MONITOR *handle, int num_serv
long node_id;
int root_level;
ptr = handle->databases;
ptr = mon->databases;
root_level = num_servers;
while (ptr)
@ -1265,7 +1136,7 @@ static MONITOR_SERVERS *get_replication_tree(MYSQL_MONITOR *handle, int num_serv
node_id = current->master_id;
if (node_id < 1) {
MONITOR_SERVERS *find_slave;
find_slave = getSlaveOfNodeId(handle->databases, current->node_id);
find_slave = getSlaveOfNodeId(mon->databases, current->node_id);
if (find_slave == NULL) {
current->depth = -1;
@ -1285,7 +1156,7 @@ static MONITOR_SERVERS *get_replication_tree(MYSQL_MONITOR *handle, int num_serv
root_level = current->depth;
handle->master = ptr;
}
backend = getServerByNodeId(handle->databases, node_id);
backend = getServerByNodeId(mon->databases, node_id);
if (backend) {
node_id = backend->server->master_id;
@ -1301,7 +1172,7 @@ static MONITOR_SERVERS *get_replication_tree(MYSQL_MONITOR *handle, int num_serv
MONITOR_SERVERS *master;
current->depth = depth;
master = getServerByNodeId(handle->databases, current->master_id);
master = getServerByNodeId(mon->databases, current->master_id);
if (master && master->server && master->server->node_id > 0) {
add_slave_to_master(master->server->slaves, MONITOR_MAX_NUM_SLAVES, current->node_id);
master->server->depth = current->depth -1;
@ -1383,65 +1254,3 @@ monitor_clear_pending_status(MONITOR_SERVERS *ptr, int bit)
{
ptr->pending_status &= ~bit;
}
/**
* Set the default id to use in the monitor.
*
* @param arg The handle allocated by startMonitor
* @param type The connect timeout type
* @param value The timeout value to set
*/
static void
setNetworkTimeout(void *arg, int type, int value)
{
MYSQL_MONITOR *handle = (MYSQL_MONITOR *)arg;
int max_timeout = (int)(handle->interval/1000);
int new_timeout = max_timeout -1;
if (new_timeout <= 0)
new_timeout = DEFAULT_CONNECT_TIMEOUT;
switch(type) {
case MONITOR_CONNECT_TIMEOUT:
if (value < max_timeout) {
memcpy(&handle->connect_timeout, &value, sizeof(int));
} else {
memcpy(&handle->connect_timeout, &new_timeout, sizeof(int));
LOGIF(LE, (skygw_log_write_flush(
LOGFILE_ERROR,
"warning : Monitor Connect Timeout %i is greater than monitor interval ~%i seconds"
", lowering to %i seconds", value, max_timeout, new_timeout)));
}
break;
case MONITOR_READ_TIMEOUT:
if (value < max_timeout) {
memcpy(&handle->read_timeout, &value, sizeof(int));
} else {
memcpy(&handle->read_timeout, &new_timeout, sizeof(int));
LOGIF(LE, (skygw_log_write_flush(
LOGFILE_ERROR,
"warning : Monitor Read Timeout %i is greater than monitor interval ~%i seconds"
", lowering to %i seconds", value, max_timeout, new_timeout)));
}
break;
case MONITOR_WRITE_TIMEOUT:
if (value < max_timeout) {
memcpy(&handle->write_timeout, &value, sizeof(int));
} else {
memcpy(&handle->write_timeout, &new_timeout, sizeof(int));
LOGIF(LE, (skygw_log_write_flush(
LOGFILE_ERROR,
"warning : Monitor Write Timeout %i is greater than monitor interval ~%i seconds"
", lowering to %i seconds", value, max_timeout, new_timeout)));
}
break;
default:
LOGIF(LE, (skygw_log_write_flush(
LOGFILE_ERROR,
"Error : Monitor setNetworkTimeout received an unsupported action type %i", type)));
break;
}
}