Added full implementation of MXS-121 to all monitors.

This commit is contained in:
Markus Makela
2015-05-09 17:37:34 +03:00
parent 0c15812340
commit 9845923f81
6 changed files with 334 additions and 84 deletions

View File

@ -148,7 +148,7 @@ CONFIG_PARAMETER* params = (CONFIG_PARAMETER*)opt;
handle->availableWhenDonor = config_truth_value(params->value); handle->availableWhenDonor = config_truth_value(params->value);
else if(!strcmp(params->name,"disable_master_role_setting")) else if(!strcmp(params->name,"disable_master_role_setting"))
handle->disableMasterRoleSetting = config_truth_value(params->value); handle->disableMasterRoleSetting = config_truth_value(params->value);
else if(!strcmp(params->name,"master_down_script")) else if(!strcmp(params->name,"script"))
{ {
if(handle->script) if(handle->script)
free(handle->script); free(handle->script);
@ -157,7 +157,7 @@ CONFIG_PARAMETER* params = (CONFIG_PARAMETER*)opt;
params = params->next; params = params->next;
} }
handle->tid = (THREAD)thread_start(monitorMain, handle); handle->tid = (THREAD)thread_start(monitorMain, mon);
return handle; return handle;
} }
@ -547,6 +547,26 @@ int log_no_members = 1;
log_no_members = 1; log_no_members = 1;
} }
} }
ptr = mon->databases;
while(ptr)
{
/** Execute monitor script if a server state has changed */
if(mon_status_changed(ptr) && mon_get_event_type(ptr) != UNDEFINED_MONITOR_EVENT)
{
skygw_log_write(LOGFILE_TRACE,"Server changed state: %s[%s:%u]: %s",
ptr->server->unique_name,
ptr->server->name,ptr->server->port,
mon_get_event_name(ptr));
if(handle->script)
{
monitor_launch_script(mon,ptr,handle->script);
}
}
ptr = ptr->next;
}
} }
} }

View File

@ -109,33 +109,44 @@ static void *
startMonitor(void *arg,void* opt) startMonitor(void *arg,void* opt)
{ {
MONITOR* mon = (MONITOR*)arg; MONITOR* mon = (MONITOR*)arg;
MYSQL_MONITOR *handle = mon->handle; MYSQL_MONITOR *handle = mon->handle;
CONFIG_PARAMETER* params = (CONFIG_PARAMETER*)opt; CONFIG_PARAMETER* params = (CONFIG_PARAMETER*)opt;
if (handle) if (handle)
{ {
handle->shutdown = 0; handle->shutdown = 0;
} }
else else
{ {
if ((handle = (MYSQL_MONITOR *)malloc(sizeof(MYSQL_MONITOR))) == NULL) if ((handle = (MYSQL_MONITOR *)malloc(sizeof(MYSQL_MONITOR))) == NULL)
return NULL; return NULL;
handle->shutdown = 0; handle->shutdown = 0;
handle->id = MONITOR_DEFAULT_ID; handle->id = MONITOR_DEFAULT_ID;
handle->replicationHeartbeat = 0; handle->replicationHeartbeat = 0;
handle->detectStaleMaster = 0; handle->detectStaleMaster = 0;
handle->master = NULL; handle->master = NULL;
spinlock_init(&handle->lock); spinlock_init(&handle->lock);
} }
while(params) while(params)
{
if(!strcmp(params->name,"detect_stale_master"))
{ {
if(!strcmp(params->name,"detect_stale_master")) handle->detectStaleMaster = config_truth_value(params->value);
handle->detectStaleMaster = config_truth_value(params->value);
params = params->next;
} }
else if(!strcmp(params->name,"script"))
{
if(handle->script)
{
free(handle->script);
}
handle->tid = (THREAD)thread_start(monitorMain, mon); handle->script = strdup(params->value);
return handle; }
params = params->next;
}
handle->tid = (THREAD)thread_start(monitorMain, mon);
return handle;
} }
/** /**
@ -567,6 +578,25 @@ size_t nrounds = 0;
} }
ptr = ptr->next; ptr = ptr->next;
} }
ptr = mon->databases;
while(ptr)
{
/** Execute monitor script if a server state has changed */
if(mon_status_changed(ptr) && mon_get_event_type(ptr) != UNDEFINED_MONITOR_EVENT)
{
skygw_log_write(LOGFILE_TRACE,"Server changed state: %s[%s:%u]: %s",
ptr->server->unique_name,
ptr->server->name,ptr->server->port,
mon_get_event_name(ptr));
if(handle->script)
{
monitor_launch_script(mon,ptr,handle->script);
}
}
ptr = ptr->next;
}
} }
} }

View File

@ -40,61 +40,187 @@ void monitor_clear_pending_status(MONITOR_SERVERS *ptr, int bit)
ptr->pending_status &= ~bit; ptr->pending_status &= ~bit;
} }
char* mon_get_event_type(MONITOR_SERVERS* node)
monitor_event_t mon_get_event_type(MONITOR_SERVERS* node)
{ {
unsigned int prev = node->mon_prev_status; unsigned int prev = node->mon_prev_status;
if((prev & (SERVER_MASTER|SERVER_RUNNING)) == (SERVER_MASTER|SERVER_RUNNING) && if((prev & (SERVER_MASTER|SERVER_RUNNING)) == (SERVER_MASTER|SERVER_RUNNING) &&
SERVER_IS_DOWN(node->server)) SERVER_IS_DOWN(node->server))
{ {
return "master_down"; return MASTER_DOWN_EVENT;
} }
if((prev & (SERVER_RUNNING)) == 0 && if((prev & (SERVER_RUNNING)) == 0 &&
SERVER_IS_RUNNING(node->server) && SERVER_IS_MASTER(node->server)) SERVER_IS_RUNNING(node->server) && SERVER_IS_MASTER(node->server))
{ {
return "master_up"; return MASTER_UP_EVENT;
} }
if((prev & (SERVER_SLAVE|SERVER_RUNNING)) == (SERVER_SLAVE|SERVER_RUNNING) && if((prev & (SERVER_SLAVE|SERVER_RUNNING)) == (SERVER_SLAVE|SERVER_RUNNING) &&
SERVER_IS_DOWN(node->server)) SERVER_IS_DOWN(node->server))
{ {
return "slave_down"; return SLAVE_DOWN_EVENT;
} }
if((prev & (SERVER_RUNNING)) == 0 && if((prev & (SERVER_RUNNING)) == 0 &&
SERVER_IS_RUNNING(node->server) && SERVER_IS_SLAVE(node->server)) SERVER_IS_RUNNING(node->server) && SERVER_IS_SLAVE(node->server))
{ {
return "slave_up"; return SLAVE_UP_EVENT;
} }
/** Galera specific events */
if((prev & (SERVER_JOINED|SERVER_RUNNING)) == (SERVER_JOINED|SERVER_RUNNING) &&
SERVER_IS_DOWN(node->server))
{
return SYNCED_DOWN_EVENT;
}
if((prev & (SERVER_RUNNING)) == 0 &&
SERVER_IS_RUNNING(node->server) && SERVER_IS_JOINED(node->server))
{
return SYNCED_UP_EVENT;
}
/** NDB events*/
if((prev & (SERVER_NDB|SERVER_RUNNING)) == (SERVER_NDB|SERVER_RUNNING) &&
SERVER_IS_DOWN(node->server))
{
return NDB_DOWN_EVENT;
}
if((prev & (SERVER_RUNNING)) == 0 &&
SERVER_IS_RUNNING(node->server) && SERVER_IS_NDB(node->server))
{
return NDB_UP_EVENT;
}
if((prev & (SERVER_RUNNING)) == SERVER_RUNNING && if((prev & (SERVER_RUNNING)) == SERVER_RUNNING &&
SERVER_IS_RUNNING(node->server) && SERVER_IS_MASTER(node->server)) SERVER_IS_RUNNING(node->server) && SERVER_IS_MASTER(node->server))
{ {
return "new_master"; return NEW_MASTER_EVENT;
} }
if((prev & (SERVER_RUNNING)) == SERVER_RUNNING && if((prev & (SERVER_RUNNING)) == SERVER_RUNNING &&
SERVER_IS_RUNNING(node->server) && SERVER_IS_SLAVE(node->server)) SERVER_IS_RUNNING(node->server) && SERVER_IS_SLAVE(node->server))
{ {
return "new_slave"; return NEW_SLAVE_EVENT;
} }
/** Status loss events */
if((prev & (SERVER_RUNNING|SERVER_MASTER)) == (SERVER_RUNNING|SERVER_MASTER) && if((prev & (SERVER_RUNNING|SERVER_MASTER)) == (SERVER_RUNNING|SERVER_MASTER) &&
SERVER_IS_RUNNING(node->server) && !SERVER_IS_MASTER(node->server)) SERVER_IS_RUNNING(node->server) && !SERVER_IS_MASTER(node->server))
{ {
return "lost_master"; return LOST_MASTER_EVENT;
} }
if((prev & (SERVER_RUNNING|SERVER_SLAVE)) == (SERVER_RUNNING|SERVER_SLAVE) && if((prev & (SERVER_RUNNING|SERVER_SLAVE)) == (SERVER_RUNNING|SERVER_SLAVE) &&
SERVER_IS_RUNNING(node->server) && !SERVER_IS_SLAVE(node->server)) SERVER_IS_RUNNING(node->server) && !SERVER_IS_SLAVE(node->server))
{ {
return "lost_slave"; return LOST_SLAVE_EVENT;
} }
if((prev & (SERVER_RUNNING|SERVER_JOINED)) == (SERVER_RUNNING|SERVER_JOINED) &&
SERVER_IS_RUNNING(node->server) && !SERVER_IS_JOINED(node->server))
{
return LOST_SYNCED_EVENT;
}
if((prev & (SERVER_RUNNING|SERVER_NDB)) == (SERVER_RUNNING|SERVER_NDB) &&
SERVER_IS_RUNNING(node->server) && !SERVER_IS_NDB(node->server))
{
return LOST_NDB_EVENT;
}
/** Generic server failure */
if((prev & SERVER_RUNNING) == 0 && if((prev & SERVER_RUNNING) == 0 &&
SERVER_IS_RUNNING(node->server)) SERVER_IS_RUNNING(node->server))
{ {
return "server_up"; return SERVER_UP_EVENT;
} }
if((prev & SERVER_RUNNING) == SERVER_RUNNING && if((prev & SERVER_RUNNING) == SERVER_RUNNING &&
SERVER_IS_DOWN(node->server)) SERVER_IS_DOWN(node->server))
{ {
return "server_down"; return SERVER_DOWN_EVENT;
} }
return "unknown";
/** Something else, most likely a state that does not matter.
* For example SERVER_DOWN -> SERVER_MASTER|SERVER_DOWN still results in a
* server state equal to not running.*/
return UNDEFINED_MONITOR_EVENT;
}
char* mon_get_event_name(MONITOR_SERVERS* node)
{
switch(mon_get_event_type(node))
{
case UNDEFINED_MONITOR_EVENT:
return "undefined";
case MASTER_DOWN_EVENT:
return "master_down";
case MASTER_UP_EVENT:
return "master_up";
case SLAVE_DOWN_EVENT:
return "slave_down";
case SLAVE_UP_EVENT:
return "slave_up";
case SERVER_DOWN_EVENT:
return "server_down";
case SERVER_UP_EVENT:
return "server_up";
case SYNCED_DOWN_EVENT:
return "synced_down";
case SYNCED_UP_EVENT:
return "synced_up";
case DONOR_DOWN_EVENT:
return "donor_down";
case DONOR_UP_EVENT:
return "donor_up";
case NDB_DOWN_EVENT:
return "ndb_down";
case NDB_UP_EVENT:
return "ndb_up";
case LOST_MASTER_EVENT:
return "lost_master";
case LOST_SLAVE_EVENT:
return "lost_slave";
case LOST_SYNCED_EVENT:
return "lost_synced";
case LOST_DONOR_EVENT:
return "lost_donor";
case LOST_NDB_EVENT:
return "lost_ndb";
case NEW_MASTER_EVENT:
return "new_master";
case NEW_SLAVE_EVENT:
return "new_slave";
case NEW_SYNCED_EVENT:
return "new_synced";
case NEW_DONOR_EVENT:
return "new_donor";
case NEW_NDB_EVENT:
return "new_ndb";
default:
return "MONITOR_EVENT_FAILURE";
}
} }
void mon_append_node_names(MONITOR_SERVERS* start,char* str, int len) void mon_append_node_names(MONITOR_SERVERS* start,char* str, int len)
@ -162,3 +288,27 @@ bool mon_print_fail_status(
} }
return succp; return succp;
} }
void monitor_launch_script(MONITOR* mon,MONITOR_SERVERS* ptr, char* script)
{
char argstr[PATH_MAX + MON_ARG_MAX + 1];
EXTERNCMD* cmd;
snprintf(argstr,PATH_MAX + MON_ARG_MAX,
"%s --event=%s --node=%s --nodelist=",
script,
mon_get_event_name(ptr),
ptr->server->unique_name);
mon_append_node_names(mon->databases,argstr,PATH_MAX + MON_ARG_MAX + 1);
cmd = externcmd_allocate(argstr);
if(externcmd_execute(cmd))
{
skygw_log_write(LOGFILE_ERROR,
"Error: Failed to execute script "
"'%s' on server state change event %s.",
script,mon_get_event_type(ptr));
}
externcmd_free(cmd);
}

View File

@ -21,6 +21,8 @@
#include <server.h> #include <server.h>
#include <mysql.h> #include <mysql.h>
#include <monitor.h> #include <monitor.h>
#include <log_manager.h>
#include <mon_exec.h>
/** /**
* @file monitor_common.h - The generic monitor structures all monitors use * @file monitor_common.h - The generic monitor structures all monitors use
* *
@ -31,10 +33,42 @@
* @endverbatim * @endverbatim
*/ */
#define MON_ARG_MAX 8192
/** Monitor events that are caused by servers moving from
* one state to another.*/
typedef enum {
UNDEFINED_MONITOR_EVENT,
MASTER_DOWN_EVENT,
MASTER_UP_EVENT,
SLAVE_DOWN_EVENT,
SLAVE_UP_EVENT,
SERVER_DOWN_EVENT,
SERVER_UP_EVENT,
SYNCED_DOWN_EVENT,
SYNCED_UP_EVENT,
DONOR_DOWN_EVENT,
DONOR_UP_EVENT,
NDB_DOWN_EVENT,
NDB_UP_EVENT,
LOST_MASTER_EVENT,
LOST_SLAVE_EVENT,
LOST_SYNCED_EVENT,
LOST_DONOR_EVENT,
LOST_NDB_EVENT,
NEW_MASTER_EVENT,
NEW_SLAVE_EVENT,
NEW_SYNCED_EVENT,
NEW_DONOR_EVENT,
NEW_NDB_EVENT
}monitor_event_t;
void mon_append_node_names(MONITOR_SERVERS* start,char* str, int len); void mon_append_node_names(MONITOR_SERVERS* start,char* str, int len);
char* mon_get_event_type(MONITOR_SERVERS* node); monitor_event_t mon_get_event_type(MONITOR_SERVERS* node);
char* mon_get_event_name(MONITOR_SERVERS* node);
void monitor_clear_pending_status(MONITOR_SERVERS *ptr, int bit); void monitor_clear_pending_status(MONITOR_SERVERS *ptr, int bit);
void monitor_set_pending_status(MONITOR_SERVERS *ptr, int bit); void monitor_set_pending_status(MONITOR_SERVERS *ptr, int bit);
bool mon_status_changed(MONITOR_SERVERS* mon_srv); bool mon_status_changed(MONITOR_SERVERS* mon_srv);
bool mon_print_fail_status(MONITOR_SERVERS* mon_srv); bool mon_print_fail_status(MONITOR_SERVERS* mon_srv);
void monitor_launch_script(MONITOR* mon,MONITOR_SERVERS* ptr, char* script);
#endif #endif

View File

@ -54,9 +54,6 @@
#include <mysqlmon.h> #include <mysqlmon.h>
#define MON_ARG_MAX 8192
/** Defined in log_manager.cc */ /** Defined in log_manager.cc */
extern int lm_enabled_logfiles_bitmask; extern int lm_enabled_logfiles_bitmask;
extern size_t log_ses_count[]; extern size_t log_ses_count[];
@ -674,39 +671,31 @@ int log_no_master = 1;
} }
} else { } else {
ptr->server->status = ptr->pending_status; ptr->server->status = ptr->pending_status;
if(mon_status_changed(ptr))
{
/** Execute monitor script */
if(handle->script && strcmp(mon_get_event_type(ptr),"unknown") != 0)
{
char argstr[PATH_MAX + MON_ARG_MAX + 1];
snprintf(argstr,PATH_MAX + MON_ARG_MAX,
"%s --event=%s --node=%s --nodelist=",
handle->script,
mon_get_event_type(ptr),
ptr->server->unique_name);
mon_append_node_names(mon->databases,argstr,PATH_MAX + MON_ARG_MAX + 1);
EXTERNCMD* cmd = externcmd_allocate(argstr);
if(externcmd_execute(cmd))
{
skygw_log_write(LOGFILE_ERROR,
"Error: Failed to execute script "
"'%s' on server state change.",
handle->script);
}
externcmd_free(cmd);
skygw_log_write(LOGFILE_TRACE,"monitor_state_change: %s: %s",
ptr->server->unique_name,mon_get_event_type(ptr));
}
}
} }
} }
ptr = ptr->next; ptr = ptr->next;
} }
/* log master detection failure od first master becomes available after failure */ ptr = mon->databases;
while(ptr)
{
/** Execute monitor script if a server state has changed */
if(mon_status_changed(ptr) && mon_get_event_type(ptr) != UNDEFINED_MONITOR_EVENT)
{
skygw_log_write(LOGFILE_TRACE,"Server changed state: %s[%s:%u]: %s",
ptr->server->unique_name,
ptr->server->name,ptr->server->port,
mon_get_event_name(ptr));
if(handle->script)
{
monitor_launch_script(mon,ptr,handle->script);
}
}
ptr = ptr->next;
}
/* log master detection failure of first master becomes available after failure */
if (root_master && if (root_master &&
mon_status_changed(root_master) && mon_status_changed(root_master) &&
!(root_master->server->status & SERVER_STALE_STATUS)) !(root_master->server->status & SERVER_STALE_STATUS))

View File

@ -107,23 +107,31 @@ static void *
startMonitor(void *arg,void* opt) startMonitor(void *arg,void* opt)
{ {
MONITOR* mon = (MONITOR*)arg; MONITOR* mon = (MONITOR*)arg;
MYSQL_MONITOR *handle = mon->handle; MYSQL_MONITOR *handle = mon->handle;
CONFIG_PARAMETER* params = (CONFIG_PARAMETER*)opt; CONFIG_PARAMETER* params = (CONFIG_PARAMETER*)opt;
if (handle != NULL) if (handle != NULL)
{
handle->shutdown = 0;
}
else
{
if ((handle = (MYSQL_MONITOR *)malloc(sizeof(MYSQL_MONITOR))) == NULL)
return NULL;
handle->shutdown = 0;
handle->id = MONITOR_DEFAULT_ID;
spinlock_init(&handle->lock);
}
while(params)
{
if(!strcmp(params->name,"script"))
{ {
handle->shutdown = 0; if(handle->script)
free(handle->script);
handle->script = strdup(params->value);
} }
else }
{ handle->tid = (THREAD)thread_start(monitorMain, mon);
if ((handle = (MYSQL_MONITOR *)malloc(sizeof(MYSQL_MONITOR))) == NULL) return handle;
return NULL;
handle->shutdown = 0;
handle->id = MONITOR_DEFAULT_ID;
spinlock_init(&handle->lock);
}
handle->tid = (THREAD)thread_start(monitorMain, mon);
return handle;
} }
/** /**
@ -372,5 +380,24 @@ size_t nrounds = 0;
ptr = ptr->next; ptr = ptr->next;
} }
ptr = mon->databases;
while(ptr)
{
/** Execute monitor script if a server state has changed */
if(mon_status_changed(ptr) && mon_get_event_type(ptr) != UNDEFINED_MONITOR_EVENT)
{
skygw_log_write(LOGFILE_TRACE,"Server changed state: %s[%s:%u]: %s",
ptr->server->unique_name,
ptr->server->name,ptr->server->port,
mon_get_event_name(ptr));
if(handle->script)
{
monitor_launch_script(mon,ptr,handle->script);
}
}
ptr = ptr->next;
}
} }
} }