Changes to Galera monitoring logic and tidying up following code review.

This commit is contained in:
counterpoint
2015-11-24 10:08:07 +00:00
parent 68a7f4b472
commit 1af89298b9
6 changed files with 139 additions and 93 deletions

View File

@ -45,6 +45,20 @@
#include <externcmd.h>
#include <mysql/mysqld_error.h>
/*
* Create declarations of the enum for monitor events and also the array of
* structs containing the matching names. The data is taken from def_monitor_event.h
*
*/
#undef ADDITEM
#define ADDITEM( _event_type, _event_name ) { #_event_name }
const monitor_def_t monitor_event_definitions[MAX_MONITOR_EVENT] =
{
#include "def_monitor_event.h"
};
#undef ADDITEM
static MONITOR *allMonitors = NULL;
static SPINLOCK monLock = SPINLOCK_INIT;
@ -350,7 +364,7 @@ monitorSetInterval (MONITOR *mon, unsigned long interval)
*/
void
monitorSetNetworkTimeout(MONITOR *mon, int type, int value) {
int max_timeout = (int)(mon->interval/1000);
int new_timeout = max_timeout -1;
@ -579,7 +593,7 @@ monitor_clear_pending_status(MONITOR_SERVERS *ptr, int bit)
/*
* Determine a monitor event, defined by the difference between the old
* status of a server and the new status.
*
*
* @param node The monitor server data for a particular server
* @result monitor_event_t A monitor event (enum)
*/
@ -594,18 +608,18 @@ mon_get_event_type(MONITOR_SERVERS* node)
UNSUPPORTED_EVENT
} general_event_type;
general_event_type event_type = UNSUPPORTED_EVENT;
unsigned int prev = node->mon_prev_status
unsigned int prev = node->mon_prev_status
& (SERVER_RUNNING|SERVER_MASTER|SERVER_SLAVE|SERVER_JOINED|SERVER_NDB);
unsigned int present = node->server->status
unsigned int present = node->server->status
& (SERVER_RUNNING|SERVER_MASTER|SERVER_SLAVE|SERVER_JOINED|SERVER_NDB);
if (prev == present)
{
/* No change in the bits we're interested in */
return UNDEFINED_MONITOR_EVENT;
}
if ((prev & SERVER_RUNNING) == 0)
{
/* The server was not running previously */
@ -638,7 +652,7 @@ mon_get_event_type(MONITOR_SERVERS* node)
}
}
}
switch (event_type)
{
case UP_EVENT:
@ -648,10 +662,10 @@ mon_get_event_type(MONITOR_SERVERS* node)
(present & SERVER_NDB) ? NDB_UP_EVENT :
SERVER_UP_EVENT;
case DOWN_EVENT:
return (present & SERVER_MASTER) ? MASTER_DOWN_EVENT :
(present & SERVER_SLAVE) ? SLAVE_DOWN_EVENT :
(present & SERVER_JOINED) ? SYNCED_DOWN_EVENT :
(present & SERVER_NDB) ? NDB_DOWN_EVENT :
return (prev & SERVER_MASTER) ? MASTER_DOWN_EVENT :
(prev & SERVER_SLAVE) ? SLAVE_DOWN_EVENT :
(prev & SERVER_JOINED) ? SYNCED_DOWN_EVENT :
(prev & SERVER_NDB) ? NDB_DOWN_EVENT :
SERVER_DOWN_EVENT;
case LOSS_EVENT:
return (prev & SERVER_MASTER) ? LOST_MASTER_EVENT :
@ -681,15 +695,15 @@ mon_get_event_name(MONITOR_SERVERS* node)
/*
* Given the text version of a monitor event, determine the event (enum)
*
*
* @param event_name String containing the event name
* @result monitor_event_t Monitor event corresponding to name
*/
monitor_event_t
mon_name_to_event (char *event_name)
mon_name_to_event (const char *event_name)
{
monitor_event_t event;
for (event = 0; event < MAX_MONITOR_EVENT; event++)
{
if (0 == strcasecmp(monitor_event_definitions[event].name, event_name))
@ -702,7 +716,7 @@ mon_name_to_event (char *event_name)
/**
* Create a list of running servers
*
*
* @param servers Monitored servers
* @param dest Destination where the string is appended, must be null terminated
* @param len Length of @c dest
@ -712,7 +726,7 @@ mon_append_node_names(MONITOR_SERVERS* servers, char* dest, int len)
{
char *separator = "";
char arr[MAX_SERVER_NAME_LEN + 32]; // Some extra space for port
while (servers && strlen(dest) < (len - strlen(separator)))
{
if (SERVER_IS_RUNNING(servers->server))
@ -736,7 +750,7 @@ bool
mon_status_changed(MONITOR_SERVERS* mon_srv)
{
/* Previous status is -1 if not yet set */
return (mon_srv->mon_prev_status != -1
return (mon_srv->mon_prev_status != -1
&& mon_srv->mon_prev_status != mon_srv->server->status);
}
@ -797,15 +811,16 @@ monitor_launch_script(MONITOR* mon, MONITOR_SERVERS* ptr, char* script)
* found.
*/
int
mon_parse_event_string(bool* events, size_t count, char* string)
mon_parse_event_string(bool* events, size_t count, char* given_string)
{
char *tok, *saved;
char *tok, *saved, *string = strdup(given_string);
monitor_event_t event;
tok = strtok_r(string, ",| ", &saved);
if (tok == NULL)
{
free(string);
return -1;
}
@ -815,12 +830,17 @@ mon_parse_event_string(bool* events, size_t count, char* string)
if (event == UNDEFINED_MONITOR_EVENT)
{
MXS_ERROR("Invalid event name %s", tok);
free(string);
return -1;
}
events[event] = true;
tok = strtok_r(NULL, ",| ", &saved);
if (event < count)
{
events[event] = true;
tok = strtok_r(NULL, ",| ", &saved);
}
}
free(string);
return 0;
}

View File

@ -30,7 +30,7 @@
* 28/05/14 Massimiliano Pinto Addition of rlagd and node_ts fields
* 20/06/14 Massimiliano Pinto Addition of master_id, depth, slaves fields
* 26/06/14 Mark Riddoch Addition of server parameters
* 30/08/14 Massimiliano Pinto Addition of new service status description
* 30/08/14 Massimiliano Pinto Addition of new service status description
* 30/10/14 Massimiliano Pinto Addition of SERVER_MASTER_STICKINESS description
* 01/06/15 Massimiliano Pinto Addition of server_update_address/port
* 19/06/15 Martin Brampton Extra code for persistent connections
@ -149,20 +149,20 @@ DCB *
server_get_persistent(SERVER *server, char *user, const char *protocol)
{
DCB *dcb, *previous = NULL;
if (server->persistent
&& dcb_persistent_clean_count(server->persistent, false)
if (server->persistent
&& dcb_persistent_clean_count(server->persistent, false)
&& server->persistent
&& (server->status & SERVER_RUNNING))
{
spinlock_acquire(&server->persistlock);
dcb = server->persistent;
while (dcb) {
if (dcb->user
&& dcb->protoname
if (dcb->user
&& dcb->protoname
&& !dcb-> dcb_errhandle_called
&& !(dcb->flags & DCBF_HUNG)
&& 0 == strcmp(dcb->user, user)
&& 0 == strcmp(dcb->user, user)
&& 0 == strcmp(dcb->protoname, protocol))
{
if (NULL == previous)
@ -562,7 +562,7 @@ spin_reporter(void *dcb, char *desc, int value)
* @param pdcb DCB to print results to
* @param server SERVER for which DCBs are to be printed
*/
void
void
dprintPersistentDCBs(DCB *pdcb, SERVER *server)
{
DCB *dcb;
@ -666,14 +666,36 @@ void
server_set_status(SERVER *server, int bit)
{
server->status |= bit;
/** clear error logged flag before the next failure */
if (SERVER_IS_MASTER(server))
if (SERVER_IS_MASTER(server))
{
server->master_err_is_logged = false;
}
}
/**
* Set one or more status bit(s) from a specified set, clearing any others
* in the specified set
*
* @param server The server to update
* @param bit The bit to set for the server
*/
void
server_clear_set_status(SERVER *server, int specified_bits, int bits_to_set)
{
/** clear error logged flag before the next failure */
if ((bits_to_set & SERVER_MASTER) && ((server->status & SERVER_MASTER) == 0))
{
server->master_err_is_logged = false;
}
if ((server->status & specified_bits) != bits_to_set)
{
server->status = (server->status & ~specified_bits) | bits_to_set;
}
}
/**
* Clear a status bit in the server
*

View File

@ -1,14 +1,29 @@
/*
* To change this license header, choose License Headers in Project Properties.
* To change this template file, choose Tools | Templates
* and open the template in the editor.
* This file is distributed as part of the MariaDB Corporation MaxScale. It is free
* software: you can redistribute it and/or modify it under the terms of the
* GNU General Public License as published by the Free Software Foundation,
* version 2.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
* details.
*
* You should have received a copy of the GNU General Public License along with
* this program; if not, write to the Free Software Foundation, Inc., 51
* Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Copyright MariaDB Corporation Ab 2013-2014
*/
/*
* File: def_event.h
* Author: mbrampton
/**
* @file def_monitor_event.h
*
* Created on 18 November 2015, 15:21
* @verbatim
* Revision History
*
* Date Who Description
* 01-06-2013 Martin Brampton Initial implementation
*/
ADDITEM( UNDEFINED_MONITOR_EVENT, undefined ),

View File

@ -51,18 +51,18 @@
/**
* The "Module Object" for a monitor module.
*
* The monitor modules are designed to monitor the backend databases that the gateway
* The monitor modules are designed to monitor the backend databases that the gateway
* connects to and provide information regarding the status of the databases that
* is used in the routing decisions.
*
* startMonitor is called to start the monitoring process, it is called on the main
* thread of the gateway and is responsible for creating a thread for the monitor
* itself to run on. This should use the entry points defined in the thread.h
* itself to run on. This should use the entry points defined in the thread.h
* header file rather than make direct calls to the operating system thrading libraries.
* The return from startMonitor is a void * handle that will be passed to all other monitor
* API calls.
*
* stopMonitor is responsible for shuting down and destroying a monitor, it is called
* stopMonitor is responsible for shuting down and destroying a monitor, it is called
* with the void * handle that was returned by startMonitor.
*
* registerServer is called to register a server that must be monitored with a running
@ -91,7 +91,7 @@ typedef struct {
/**
* Monitor state bit mask values
*/
typedef enum
typedef enum
{
MONITOR_STATE_ALLOC = 0x00,
MONITOR_STATE_RUNNING = 0x01,
@ -153,13 +153,7 @@ typedef struct monitor_def_s
char name[30];
} monitor_def_t;
#undef ADDITEM
#define ADDITEM( _event_type, _event_name ) { #_event_name }
static const monitor_def_t monitor_event_definitions[MAX_MONITOR_EVENT] =
{
#include "def_monitor_event.h"
};
#undef ADDITEM
extern const monitor_def_t monitor_event_definitions[];
/**
* The linked list of servers that are being monitored by the monitor module.
@ -217,7 +211,7 @@ extern void monitorSetNetworkTimeout(MONITOR *, int, int);
extern RESULTSET *monitorGetList();
bool check_monitor_permissions(MONITOR* monitor);
monitor_event_t mon_name_to_event(char* tok);
monitor_event_t mon_name_to_event(const char* tok);
void mon_append_node_names(MONITOR_SERVERS* start, char* str, int len);
monitor_event_t mon_get_event_type(MONITOR_SERVERS* node);
const char* mon_get_event_name(MONITOR_SERVERS* node);

View File

@ -150,12 +150,12 @@ typedef struct server {
(SERVER_RUNNING|SERVER_MASTER))
/**
* Is the server valid candidate for root master. The server must be running,
* Is the server valid candidate for root master. The server must be running,
* marked as master and not have maintenance bit set.
*/
#define SERVER_IS_ROOT_MASTER(server) \
(((server)->status & (SERVER_RUNNING|SERVER_MASTER|SERVER_MAINT)) == (SERVER_RUNNING|SERVER_MASTER))
/**
* Is the server a slave? The server must be both running and marked as a slave
* in order for the macro to return true
@ -164,7 +164,7 @@ typedef struct server {
(((server)->status & (SERVER_RUNNING|SERVER_MASTER|SERVER_SLAVE|SERVER_MAINT)) == (SERVER_RUNNING|SERVER_SLAVE))
/**
* Is the server joined Galera node? The server must be running and joined.
* Is the server joined Galera node? The server must be running and joined.
*/
#define SERVER_IS_JOINED(server) \
(((server)->status & (SERVER_RUNNING|SERVER_JOINED|SERVER_MAINT)) == (SERVER_RUNNING|SERVER_JOINED))
@ -176,7 +176,7 @@ typedef struct server {
(((server)->status & (SERVER_RUNNING|SERVER_NDB|SERVER_MAINT)) == (SERVER_RUNNING|SERVER_NDB))
/**
* Is the server in maintenance mode.
* Is the server in maintenance mode.
*/
#define SERVER_IN_MAINT(server) ((server)->status & SERVER_MAINT)
@ -200,6 +200,7 @@ extern void dprintServer(DCB *, SERVER *);
extern void dprintPersistentDCBs(DCB *, SERVER *);
extern void dListServers(DCB *);
extern char *server_status(SERVER *);
extern void server_clear_set_status(SERVER *server, int specified_bits, int bits_to_set);
extern void server_set_status(SERVER *, int);
extern void server_clear_status(SERVER *, int);
extern void server_transfer_status(SERVER *dest_server, SERVER *source_server);

View File

@ -24,7 +24,7 @@
*
* Date Who Description
* 22/07/13 Mark Riddoch Initial implementation
* 21/05/14 Massimiliano Pinto Monitor sets a master server
* 21/05/14 Massimiliano Pinto Monitor sets a master server
* that has the lowest value of wsrep_local_index
* 23/05/14 Massimiliano Pinto Added 1 configuration option (setInterval).
* Interval is printed in diagnostics.
@ -267,7 +267,9 @@ diagnostics(DCB *dcb, void *arg)
}
/**
* Monitor an individual server
* Monitor an individual server. Does not deal with the setting of master or
* slave bits, except for clearing them when a server is not joined to the
* cluster.
*
* @param handle The MySQL Monitor object
* @param database The database to probe
@ -292,12 +294,8 @@ monitorDatabase(MONITOR *mon, MONITOR_SERVERS *database)
server_transfer_status(&temp_server, database->server);
server_clear_status(&temp_server, SERVER_RUNNING);
/* Also clear Joined, M/S and Stickiness bits */
/* Also clear Joined */
server_clear_status(&temp_server, SERVER_JOINED);
server_clear_status(&temp_server, SERVER_SLAVE);
server_clear_status(&temp_server, SERVER_MASTER);
server_clear_status(&temp_server, SERVER_MASTER_STICKINESS);
connect_result_t rval = mon_connect_to_db(mon, database);
if (rval != MONITOR_CONN_OK)
@ -412,7 +410,20 @@ monitorDatabase(MONITOR *mon, MONITOR_SERVERS *database)
{
server_clear_status(&temp_server, SERVER_JOINED);
}
/* clear bits for non member nodes */
if (!SERVER_IN_MAINT(database->server) && (!SERVER_IS_JOINED(&temp_server)))
{
database->server->depth = -1;
/* clear M/S status */
server_clear_status(&temp_server, SERVER_SLAVE);
server_clear_status(&temp_server, SERVER_MASTER);
/* clear master sticky status */
server_clear_status(&temp_server, SERVER_MASTER_STICKINESS);
}
server_transfer_status(database->server, &temp_server);
}
@ -456,9 +467,9 @@ monitorMain(void *arg)
}
/** Wait base interval */
thread_millisleep(MON_BASE_INTERVAL_MS);
/**
* Calculate how far away the monitor interval is from its full
* cycle and if monitor interval time further than the base
/**
* Calculate how far away the monitor interval is from its full
* cycle and if monitor interval time further than the base
* interval, then skip monitoring checks. Excluding the first
* round.
*/
@ -482,19 +493,6 @@ monitorMain(void *arg)
monitorDatabase(mon, ptr);
/* clear bits for non member nodes */
if (!SERVER_IN_MAINT(ptr->server) && (!SERVER_IS_JOINED(ptr->server)))
{
ptr->server->depth = -1;
/* clear M/S status */
server_clear_status(ptr->server, SERVER_SLAVE);
server_clear_status(ptr->server, SERVER_MASTER);
/* clear master sticky status */
server_clear_status(ptr->server, SERVER_MASTER_STICKINESS);
}
/* Log server status change */
if (mon_status_changed(ptr))
{
@ -560,28 +558,24 @@ monitorMain(void *arg)
{
if (ptr != handle->master)
{
/* set the Slave role */
server_set_status(ptr->server, SERVER_SLAVE);
server_clear_status(ptr->server, SERVER_MASTER);
/* clear master stickiness */
server_clear_status(ptr->server, SERVER_MASTER_STICKINESS);
/* set the Slave role and clear master stickiness */
server_clear_set_status(ptr->server, (SERVER_SLAVE|SERVER_MASTER|SERVER_MASTER_STICKINESS), SERVER_SLAVE);
}
else
{
/* set the Master role */
server_set_status(handle->master->server, SERVER_MASTER);
server_clear_status(handle->master->server, SERVER_SLAVE);
if (candidate_master && handle->master->server->node_id != candidate_master->server->node_id)
{
/* set master stickiness */
server_set_status(handle->master->server, SERVER_MASTER_STICKINESS);
/* set master role and master stickiness */
server_clear_set_status(ptr->server,
(SERVER_SLAVE|SERVER_MASTER|SERVER_MASTER_STICKINESS),
(SERVER_MASTER|SERVER_MASTER_STICKINESS));
}
else
{
/* clear master stickiness */
server_clear_status(ptr->server, SERVER_MASTER_STICKINESS);
/* set master role and clear master stickiness */
server_clear_set_status(ptr->server,
(SERVER_SLAVE|SERVER_MASTER|SERVER_MASTER_STICKINESS),
SERVER_MASTER);
}
}
}