MXS-109: membership based on cluster_size
Monitored nodes could be part of different cluster UUIDs: select only the ones belonging to UUID with more joined nodes. In case of different UUIDs if the joined numbers is less than (n_nodes / 2 ) + 1 don’t consider any node part of the cluster
This commit is contained in:
@ -57,6 +57,12 @@ bool isGaleraEvent(mxs_monitor_event_t event);
|
|||||||
static void update_sst_donor_nodes(MXS_MONITOR*, int);
|
static void update_sst_donor_nodes(MXS_MONITOR*, int);
|
||||||
static int compare_node_index(const void*, const void*);
|
static int compare_node_index(const void*, const void*);
|
||||||
static int compare_node_priority(const void*, const void*);
|
static int compare_node_priority(const void*, const void*);
|
||||||
|
static void reset_cluster_info(GALERA_MONITOR *);
|
||||||
|
static GALERA_NODE_INFO *nodeval_dup(const GALERA_NODE_INFO *);
|
||||||
|
static void nodeval_free(GALERA_NODE_INFO *);
|
||||||
|
static void set_galera_cluster(MXS_MONITOR *);
|
||||||
|
static bool detect_cluster_size(const GALERA_MONITOR *, const int, const char *, const int);
|
||||||
|
static void set_cluster_members(MXS_MONITOR *);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* The module entry point routine. It is this routine that
|
* The module entry point routine. It is this routine that
|
||||||
@ -127,6 +133,7 @@ static void *
|
|||||||
startMonitor(MXS_MONITOR *mon, const MXS_CONFIG_PARAMETER *params)
|
startMonitor(MXS_MONITOR *mon, const MXS_CONFIG_PARAMETER *params)
|
||||||
{
|
{
|
||||||
GALERA_MONITOR *handle = mon->handle;
|
GALERA_MONITOR *handle = mon->handle;
|
||||||
|
GALERA_CLUSTER_INFO *cluster_info;
|
||||||
if (handle != NULL)
|
if (handle != NULL)
|
||||||
{
|
{
|
||||||
handle->shutdown = 0;
|
handle->shutdown = 0;
|
||||||
@ -134,13 +141,35 @@ startMonitor(MXS_MONITOR *mon, const MXS_CONFIG_PARAMETER *params)
|
|||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
if ((handle = (GALERA_MONITOR *) MXS_MALLOC(sizeof(GALERA_MONITOR))) == NULL)
|
handle = (GALERA_MONITOR *) MXS_MALLOC(sizeof(GALERA_MONITOR));
|
||||||
|
cluster_info = MXS_MALLOC(sizeof(GALERA_CLUSTER_INFO));
|
||||||
|
HASHTABLE *nodes_info = hashtable_alloc(MAX_NUM_SLAVES, hashtable_item_strhash, hashtable_item_strcmp);
|
||||||
|
|
||||||
|
if (!handle || !nodes_info || !cluster_info)
|
||||||
{
|
{
|
||||||
|
hashtable_free(nodes_info);
|
||||||
|
MXS_FREE(cluster_info);
|
||||||
|
MXS_FREE(handle);
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Set copy / free routines for hashtable */
|
||||||
|
hashtable_memory_fns(nodes_info,
|
||||||
|
hashtable_item_strdup,
|
||||||
|
(HASHCOPYFN)nodeval_dup,
|
||||||
|
hashtable_item_free,
|
||||||
|
(HASHFREEFN)nodeval_free);
|
||||||
|
|
||||||
handle->shutdown = 0;
|
handle->shutdown = 0;
|
||||||
handle->id = MXS_MONITOR_DEFAULT_ID;
|
handle->id = MXS_MONITOR_DEFAULT_ID;
|
||||||
handle->master = NULL;
|
handle->master = NULL;
|
||||||
|
|
||||||
|
/* Initialise cluster nodes hash and Cluster info */
|
||||||
|
handle->galera_nodes_info = nodes_info;
|
||||||
|
cluster_info->c_size = 0;
|
||||||
|
cluster_info->c_uuid = NULL;
|
||||||
|
handle->cluster_info = cluster_info;
|
||||||
|
|
||||||
spinlock_init(&handle->lock);
|
spinlock_init(&handle->lock);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -153,11 +182,17 @@ startMonitor(MXS_MONITOR *mon, const MXS_CONFIG_PARAMETER *params)
|
|||||||
handle->events = config_get_enum(params, "events", mxs_monitor_event_enum_values);
|
handle->events = config_get_enum(params, "events", mxs_monitor_event_enum_values);
|
||||||
handle->set_donor_nodes = config_get_bool(params, "set_donor_nodes");
|
handle->set_donor_nodes = config_get_bool(params, "set_donor_nodes");
|
||||||
|
|
||||||
|
/* Reset all data in the hashtable */
|
||||||
|
reset_cluster_info(handle);
|
||||||
|
|
||||||
|
|
||||||
/** SHOW STATUS doesn't require any special permissions */
|
/** SHOW STATUS doesn't require any special permissions */
|
||||||
if (!check_monitor_permissions(mon, "SHOW STATUS LIKE 'wsrep_local_state'"))
|
if (!check_monitor_permissions(mon, "SHOW STATUS LIKE 'wsrep_local_state'"))
|
||||||
{
|
{
|
||||||
MXS_ERROR("Failed to start monitor. See earlier errors for more information.");
|
MXS_ERROR("Failed to start monitor. See earlier errors for more information.");
|
||||||
|
hashtable_free(handle->galera_nodes_info);
|
||||||
MXS_FREE(handle->script);
|
MXS_FREE(handle->script);
|
||||||
|
MXS_FREE(handle->cluster_info);
|
||||||
MXS_FREE(handle);
|
MXS_FREE(handle);
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
@ -200,6 +235,15 @@ diagnostics(DCB *dcb, const MXS_MONITOR *mon)
|
|||||||
dcb_printf(dcb, "Master Role Setting Disabled:\t%s\n",
|
dcb_printf(dcb, "Master Role Setting Disabled:\t%s\n",
|
||||||
handle->disableMasterRoleSetting ? "on" : "off");
|
handle->disableMasterRoleSetting ? "on" : "off");
|
||||||
dcb_printf(dcb, "Set wsrep_sst_donor node list:\t%s\n", (handle->set_donor_nodes == 1) ? "on" : "off");
|
dcb_printf(dcb, "Set wsrep_sst_donor node list:\t%s\n", (handle->set_donor_nodes == 1) ? "on" : "off");
|
||||||
|
if (handle->cluster_info->c_uuid)
|
||||||
|
{
|
||||||
|
dcb_printf(dcb, "Galera Cluster UUID:\t%s\n", handle->cluster_info->c_uuid);
|
||||||
|
dcb_printf(dcb, "Galera Cluster size:\t%d\n", handle->cluster_info->c_size);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
dcb_printf(dcb, "Galera Cluster NOT set:\tno member nodes\n");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -218,7 +262,6 @@ monitorDatabase(MXS_MONITOR *mon, MXS_MONITOR_SERVERS *database)
|
|||||||
MYSQL_RES *result, *result2;
|
MYSQL_RES *result, *result2;
|
||||||
int isjoined = 0;
|
int isjoined = 0;
|
||||||
char *server_string;
|
char *server_string;
|
||||||
SERVER temp_server;
|
|
||||||
|
|
||||||
/* Don't even probe server flagged as in maintenance */
|
/* Don't even probe server flagged as in maintenance */
|
||||||
if (SERVER_IN_MAINT(database->server))
|
if (SERVER_IN_MAINT(database->server))
|
||||||
@ -229,26 +272,21 @@ monitorDatabase(MXS_MONITOR *mon, MXS_MONITOR_SERVERS *database)
|
|||||||
/** Store previous status */
|
/** Store previous status */
|
||||||
database->mon_prev_status = database->server->status;
|
database->mon_prev_status = database->server->status;
|
||||||
|
|
||||||
server_transfer_status(&temp_server, database->server);
|
|
||||||
server_clear_status_nolock(&temp_server, SERVER_RUNNING);
|
|
||||||
/* Also clear Joined */
|
|
||||||
server_clear_status_nolock(&temp_server, SERVER_JOINED);
|
|
||||||
|
|
||||||
mxs_connect_result_t rval = mon_connect_to_db(mon, database);
|
mxs_connect_result_t rval = mon_connect_to_db(mon, database);
|
||||||
if (rval != MONITOR_CONN_OK)
|
if (rval != MONITOR_CONN_OK)
|
||||||
{
|
{
|
||||||
if (mysql_errno(database->con) == ER_ACCESS_DENIED_ERROR)
|
if (mysql_errno(database->con) == ER_ACCESS_DENIED_ERROR)
|
||||||
{
|
{
|
||||||
server_set_status_nolock(&temp_server, SERVER_AUTH_ERROR);
|
server_set_status_nolock(database->server, SERVER_AUTH_ERROR);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
server_clear_status_nolock(&temp_server, SERVER_AUTH_ERROR);
|
server_clear_status_nolock(database->server, SERVER_AUTH_ERROR);
|
||||||
}
|
}
|
||||||
|
|
||||||
database->server->node_id = -1;
|
database->server->node_id = -1;
|
||||||
|
|
||||||
server_transfer_status(database->server, &temp_server);
|
server_clear_status_nolock(database->server, SERVER_RUNNING);
|
||||||
|
|
||||||
if (mon_status_changed(database) && mon_print_fail_status(database))
|
if (mon_status_changed(database) && mon_print_fail_status(database))
|
||||||
{
|
{
|
||||||
@ -259,7 +297,7 @@ monitorDatabase(MXS_MONITOR *mon, MXS_MONITOR_SERVERS *database)
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* If we get this far then we have a working connection */
|
/* If we get this far then we have a working connection */
|
||||||
server_set_status_nolock(&temp_server, SERVER_RUNNING);
|
server_set_status_nolock(database->server, SERVER_RUNNING);
|
||||||
|
|
||||||
/* get server version string */
|
/* get server version string */
|
||||||
server_string = (char *) mysql_get_server_info(database->con);
|
server_string = (char *) mysql_get_server_info(database->con);
|
||||||
@ -269,75 +307,38 @@ monitorDatabase(MXS_MONITOR *mon, MXS_MONITOR_SERVERS *database)
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* Check if the the Galera FSM shows this node is joined to the cluster */
|
/* Check if the the Galera FSM shows this node is joined to the cluster */
|
||||||
if (mysql_query(database->con, "SHOW STATUS LIKE 'wsrep_local_state'") == 0
|
char *cluster_member = "SHOW STATUS WHERE Variable_name IN"
|
||||||
|
" ('wsrep_cluster_state_uuid',"
|
||||||
|
" 'wsrep_cluster_size',"
|
||||||
|
" 'wsrep_local_index',"
|
||||||
|
" 'wsrep_local_state')";
|
||||||
|
|
||||||
|
if (mysql_query(database->con, cluster_member) == 0
|
||||||
&& (result = mysql_store_result(database->con)) != NULL)
|
&& (result = mysql_store_result(database->con)) != NULL)
|
||||||
{
|
{
|
||||||
if (mysql_field_count(database->con) < 2)
|
if (mysql_field_count(database->con) < 2)
|
||||||
{
|
{
|
||||||
mysql_free_result(result);
|
mysql_free_result(result);
|
||||||
MXS_ERROR("Unexpected result for \"SHOW STATUS LIKE 'wsrep_local_state'\". "
|
MXS_ERROR("Unexpected result for \"%s\". "
|
||||||
"Expected 2 columns. MySQL Version: %s", server_string);
|
"Expected 2 columns. MySQL Version: %s",
|
||||||
|
cluster_member, server_string);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
GALERA_NODE_INFO info = {};
|
||||||
while ((row = mysql_fetch_row(result)))
|
while ((row = mysql_fetch_row(result)))
|
||||||
{
|
{
|
||||||
if (strcmp(row[1], "4") == 0)
|
if (strcmp(row[0], "wsrep_cluster_size") == 0)
|
||||||
{
|
{
|
||||||
isjoined = 1;
|
info.cluster_size = atoi(row[1]);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Check if the node is a donor and is using xtrabackup, in this case it can stay alive */
|
if (strcmp(row[0], "wsrep_local_index") == 0)
|
||||||
else if (strcmp(row[1], "2") == 0 && handle->availableWhenDonor == 1)
|
|
||||||
{
|
|
||||||
if (mysql_query(database->con, "SHOW VARIABLES LIKE 'wsrep_sst_method'") == 0
|
|
||||||
&& (result2 = mysql_store_result(database->con)) != NULL)
|
|
||||||
{
|
|
||||||
if (mysql_field_count(database->con) < 2)
|
|
||||||
{
|
|
||||||
mysql_free_result(result);
|
|
||||||
mysql_free_result(result2);
|
|
||||||
MXS_ERROR("Unexpected result for \"SHOW VARIABLES LIKE "
|
|
||||||
"'wsrep_sst_method'\". Expected 2 columns."
|
|
||||||
" MySQL Version: %s", server_string);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
while ((row = mysql_fetch_row(result2)))
|
|
||||||
{
|
|
||||||
if (strncmp(row[1], "xtrabackup", 10) == 0)
|
|
||||||
{
|
|
||||||
isjoined = 1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
mysql_free_result(result2);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
mysql_free_result(result);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (isjoined)
|
|
||||||
{
|
|
||||||
/* Check the the Galera node index in the cluster */
|
|
||||||
if (mysql_query(database->con, "SHOW STATUS LIKE 'wsrep_local_index'") == 0
|
|
||||||
&& (result = mysql_store_result(database->con)) != NULL)
|
|
||||||
{
|
|
||||||
if (mysql_field_count(database->con) < 2)
|
|
||||||
{
|
|
||||||
mysql_free_result(result);
|
|
||||||
MXS_ERROR("Unexpected result for \"SHOW STATUS LIKE 'wsrep_local_index'\". "
|
|
||||||
"Expected 2 columns. MySQL Version: %s", server_string);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
while ((row = mysql_fetch_row(result)))
|
|
||||||
{
|
{
|
||||||
char* endchar;
|
char* endchar;
|
||||||
long local_index = strtol(row[1], &endchar, 10);
|
long local_index = strtol(row[1], &endchar, 10);
|
||||||
if (*endchar != '\0' ||
|
if (*endchar != '\0' ||
|
||||||
(errno == ERANGE && (local_index == LONG_MAX || local_index == LONG_MIN)))
|
(errno == ERANGE && (local_index == LONG_MAX || local_index == LONG_MIN)))
|
||||||
{
|
{
|
||||||
/** TODO: Create a mechanism to log warnings on a per server basis */
|
|
||||||
if (warn_erange_on_local_index)
|
if (warn_erange_on_local_index)
|
||||||
{
|
{
|
||||||
MXS_WARNING("Invalid 'wsrep_local_index' on server '%s': %s",
|
MXS_WARNING("Invalid 'wsrep_local_index' on server '%s': %s",
|
||||||
@ -345,33 +346,110 @@ monitorDatabase(MXS_MONITOR *mon, MXS_MONITOR_SERVERS *database)
|
|||||||
warn_erange_on_local_index = false;
|
warn_erange_on_local_index = false;
|
||||||
}
|
}
|
||||||
local_index = -1;
|
local_index = -1;
|
||||||
|
/* Force joined = 0 */
|
||||||
|
info.joined = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
info.local_index = local_index;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (strcmp(row[0], "wsrep_local_state") == 0)
|
||||||
|
{
|
||||||
|
if (strcmp(row[1], "4") == 0)
|
||||||
|
{
|
||||||
|
info.joined = 1;
|
||||||
|
}
|
||||||
|
/* Check if the node is a donor and is using xtrabackup, in this case it can stay alive */
|
||||||
|
else if (strcmp(row[1], "2") == 0 && handle->availableWhenDonor == 1)
|
||||||
|
{
|
||||||
|
if (mysql_query(database->con, "SHOW VARIABLES LIKE 'wsrep_sst_method'") == 0
|
||||||
|
&& (result2 = mysql_store_result(database->con)) != NULL)
|
||||||
|
{
|
||||||
|
if (mysql_field_count(database->con) < 2)
|
||||||
|
{
|
||||||
|
mysql_free_result(result);
|
||||||
|
mysql_free_result(result2);
|
||||||
|
MXS_ERROR("Unexpected result for \"SHOW VARIABLES LIKE "
|
||||||
|
"'wsrep_sst_method'\". Expected 2 columns."
|
||||||
|
" MySQL Version: %s", server_string);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
while ((row = mysql_fetch_row(result2)))
|
||||||
|
{
|
||||||
|
if (strncmp(row[1], "xtrabackup", 10) == 0)
|
||||||
|
{
|
||||||
|
info.joined = 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
mysql_free_result(result2);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
/* Force joined = 0 */
|
||||||
|
info.joined = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
info.local_state = atoi(row[1]);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* We can check:
|
||||||
|
* wsrep_local_state == 0
|
||||||
|
* wsrep_cluster_size == 0
|
||||||
|
* wsrep_cluster_state_uuid == ""
|
||||||
|
*/
|
||||||
|
if (strcmp(row[0], "wsrep_cluster_state_uuid") == 0)
|
||||||
|
{
|
||||||
|
if (row[1] == NULL || !strlen(row[1]))
|
||||||
|
{
|
||||||
|
MXS_DEBUG("Node %s is not running Galera Cluster",
|
||||||
|
database->server->unique_name);
|
||||||
|
info.cluster_uuid = NULL;
|
||||||
|
info.joined = 0;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
info.cluster_uuid = MXS_STRDUP(row[1]);
|
||||||
}
|
}
|
||||||
database->server->node_id = local_index;
|
|
||||||
}
|
}
|
||||||
mysql_free_result(result);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
server_set_status_nolock(&temp_server, SERVER_JOINED);
|
database->server->node_id = info.joined ? info.local_index : -1;
|
||||||
|
|
||||||
|
/* Add server pointer */
|
||||||
|
info.node = database->server;
|
||||||
|
|
||||||
|
/* Galera Cluster vars fetch */
|
||||||
|
HASHTABLE *table = handle->galera_nodes_info;
|
||||||
|
GALERA_NODE_INFO *node = hashtable_fetch(table, database->server->unique_name);
|
||||||
|
if (node)
|
||||||
|
{
|
||||||
|
MXS_DEBUG("Node %s is present in galera_nodes_info, updtating info",
|
||||||
|
database->server->unique_name);
|
||||||
|
|
||||||
|
MXS_FREE(node->cluster_uuid);
|
||||||
|
/* Update node data */
|
||||||
|
memcpy(node, &info, sizeof(GALERA_NODE_INFO));
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
if (hashtable_add(table, database->server->unique_name, &info))
|
||||||
|
{
|
||||||
|
MXS_DEBUG("Added %s to galera_nodes_info",
|
||||||
|
database->server->unique_name);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
MXS_DEBUG("Server %s: local_state %d, local_index %d, UUID %s, size %d, possible member %d",
|
||||||
|
database->server->unique_name,
|
||||||
|
info.local_state,
|
||||||
|
info.local_index,
|
||||||
|
info.cluster_uuid ? info.cluster_uuid : "_none_",
|
||||||
|
info.cluster_size,
|
||||||
|
info.joined);
|
||||||
|
|
||||||
|
mysql_free_result(result);
|
||||||
}
|
}
|
||||||
else
|
|
||||||
{
|
|
||||||
server_clear_status_nolock(&temp_server, SERVER_JOINED);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* clear bits for non member nodes */
|
|
||||||
if (!SERVER_IN_MAINT(database->server) && (!SERVER_IS_JOINED(&temp_server)))
|
|
||||||
{
|
|
||||||
database->server->depth = -1;
|
|
||||||
|
|
||||||
/* clear M/S status */
|
|
||||||
server_clear_status_nolock(&temp_server, SERVER_SLAVE);
|
|
||||||
server_clear_status_nolock(&temp_server, SERVER_MASTER);
|
|
||||||
|
|
||||||
/* clear master sticky status */
|
|
||||||
server_clear_status_nolock(&temp_server, SERVER_MASTER_STICKINESS);
|
|
||||||
}
|
|
||||||
|
|
||||||
server_transfer_status(database->server, &temp_server);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -391,6 +469,7 @@ monitorMain(void *arg)
|
|||||||
int is_cluster = 0;
|
int is_cluster = 0;
|
||||||
int log_no_members = 1;
|
int log_no_members = 1;
|
||||||
mxs_monitor_event_t evtype;
|
mxs_monitor_event_t evtype;
|
||||||
|
int log_uuid_change = 1;
|
||||||
|
|
||||||
spinlock_acquire(&mon->lock);
|
spinlock_acquire(&mon->lock);
|
||||||
handle = (GALERA_MONITOR *) mon->handle;
|
handle = (GALERA_MONITOR *) mon->handle;
|
||||||
@ -469,6 +548,12 @@ monitorMain(void *arg)
|
|||||||
ptr = ptr->next;
|
ptr = ptr->next;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Try to set a Galera cluster based on
|
||||||
|
* UUID and cluster_size each node reports:
|
||||||
|
* no multiple clusters UUID are allowed.
|
||||||
|
*/
|
||||||
|
set_galera_cluster(mon);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Let's select a master server:
|
* Let's select a master server:
|
||||||
* it could be the candidate master following MXS_MIN(node_id) rule or
|
* it could be the candidate master following MXS_MIN(node_id) rule or
|
||||||
@ -532,7 +617,6 @@ monitorMain(void *arg)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* After updating the status of all servers, check if monitor events
|
* After updating the status of all servers, check if monitor events
|
||||||
* need to be launched.
|
* need to be launched.
|
||||||
@ -919,3 +1003,326 @@ static int compare_node_priority (const void *a, const void *b)
|
|||||||
// The order is DESC: b -a
|
// The order is DESC: b -a
|
||||||
return pri_val_b - pri_val_a;
|
return pri_val_b - pri_val_a;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* When monitor starts all entries in hashable are deleted
|
||||||
|
*
|
||||||
|
* @param handle The Galera specific data
|
||||||
|
*/
|
||||||
|
static void reset_cluster_info(GALERA_MONITOR *handle)
|
||||||
|
{
|
||||||
|
int n_nodes = 0;
|
||||||
|
HASHITERATOR *iterator;
|
||||||
|
HASHTABLE *table = handle->galera_nodes_info;
|
||||||
|
void *key;
|
||||||
|
|
||||||
|
/* Delete all entries in the hashtable */
|
||||||
|
while ((iterator = hashtable_iterator(table)))
|
||||||
|
{
|
||||||
|
key = hashtable_next(iterator);
|
||||||
|
if (!key)
|
||||||
|
{
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
hashtable_iterator_free(iterator);
|
||||||
|
hashtable_delete(table, key);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Copy routine for hashtable values
|
||||||
|
*
|
||||||
|
* @param in The nut data
|
||||||
|
* @return The copied data or NULL
|
||||||
|
*/
|
||||||
|
static GALERA_NODE_INFO *nodeval_dup(const GALERA_NODE_INFO *in)
|
||||||
|
{
|
||||||
|
if (in == NULL ||
|
||||||
|
in->cluster_size == 0 ||
|
||||||
|
in->cluster_uuid == NULL ||
|
||||||
|
in->node == NULL)
|
||||||
|
{
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
GALERA_NODE_INFO *rval = (GALERA_NODE_INFO *) MXS_CALLOC(1, sizeof(GALERA_NODE_INFO));
|
||||||
|
char* uuid = MXS_STRDUP(in->cluster_uuid);
|
||||||
|
|
||||||
|
if (!uuid || !rval)
|
||||||
|
{
|
||||||
|
MXS_FREE(rval);
|
||||||
|
MXS_FREE(uuid);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
rval->cluster_uuid = uuid;
|
||||||
|
rval->cluster_size = in->cluster_size;
|
||||||
|
rval->local_index = in->local_index;
|
||||||
|
rval->local_state = in->local_state;
|
||||||
|
rval->node = in->node;
|
||||||
|
rval->joined = in->joined;
|
||||||
|
|
||||||
|
return (void *) rval;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Free routine for hashtable values
|
||||||
|
*
|
||||||
|
* @param in The data to be freed
|
||||||
|
*/
|
||||||
|
static void nodeval_free(GALERA_NODE_INFO *in)
|
||||||
|
{
|
||||||
|
if (in)
|
||||||
|
{
|
||||||
|
MXS_FREE(in->cluster_uuid);
|
||||||
|
MXS_FREE(in);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Detect possible cluster_uuid and cluster_size
|
||||||
|
* in monitored nodes.
|
||||||
|
* Set the cluster memebership in nodes
|
||||||
|
* if a cluster can be set.
|
||||||
|
*
|
||||||
|
* @param mon The Monitor Instance
|
||||||
|
*/
|
||||||
|
static void set_galera_cluster(MXS_MONITOR *mon)
|
||||||
|
{
|
||||||
|
GALERA_MONITOR *handle = mon->handle;
|
||||||
|
int ret = false;
|
||||||
|
int n_nodes = 0;
|
||||||
|
HASHITERATOR *iterator;
|
||||||
|
HASHTABLE *table = handle->galera_nodes_info;
|
||||||
|
char *key;
|
||||||
|
GALERA_NODE_INFO *value;
|
||||||
|
int cluster_size = 0;
|
||||||
|
char *cluster_uuid = NULL;
|
||||||
|
|
||||||
|
/* Fetch all entries in the hashtable */
|
||||||
|
if ((iterator = hashtable_iterator(table)) != NULL)
|
||||||
|
{
|
||||||
|
/* Get the Key */
|
||||||
|
while ((key = hashtable_next(iterator)) != NULL)
|
||||||
|
{
|
||||||
|
/* fetch the Value for the Key */
|
||||||
|
value = hashtable_fetch(table, key);
|
||||||
|
if (value)
|
||||||
|
{
|
||||||
|
if (!SERVER_IN_MAINT(value->node) &&
|
||||||
|
SERVER_IS_RUNNING(value->node) &&
|
||||||
|
value->joined)
|
||||||
|
{
|
||||||
|
/* This server can be part of a cluster */
|
||||||
|
n_nodes++;
|
||||||
|
|
||||||
|
/* Set cluster_uuid for nodes that report
|
||||||
|
* highest value of cluster_size
|
||||||
|
*/
|
||||||
|
if (value->cluster_size > cluster_size)
|
||||||
|
{
|
||||||
|
cluster_size = value->cluster_size;
|
||||||
|
cluster_uuid = value->cluster_uuid;
|
||||||
|
}
|
||||||
|
|
||||||
|
MXS_DEBUG("Candidate cluster member %s: UUID %s, joined nodes %d",
|
||||||
|
value->node->unique_name,
|
||||||
|
value->cluster_uuid,
|
||||||
|
value->cluster_size);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
hashtable_iterator_free(iterator);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Detect if a possible cluster can
|
||||||
|
* be set with n_nodes and cluster_size
|
||||||
|
*
|
||||||
|
* Special cases for n_nodes = 0 or 1.
|
||||||
|
* If cluster_size > 1 there is rule
|
||||||
|
*/
|
||||||
|
ret = detect_cluster_size(handle,
|
||||||
|
n_nodes,
|
||||||
|
cluster_uuid,
|
||||||
|
cluster_size);
|
||||||
|
/**
|
||||||
|
* Free && set the new cluster_uuid:
|
||||||
|
* Handling the special case n_nodes == 1
|
||||||
|
*/
|
||||||
|
if (ret || (!ret && n_nodes != 1))
|
||||||
|
{
|
||||||
|
/* Set the new cluster_uuid */
|
||||||
|
MXS_FREE(handle->cluster_info->c_uuid);
|
||||||
|
handle->cluster_info->c_uuid = ret ? MXS_STRDUP(cluster_uuid) : NULL;
|
||||||
|
handle->cluster_info->c_size = cluster_size;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Set the JOINED status in cluster members only, if any.
|
||||||
|
*/
|
||||||
|
set_cluster_members(mon);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Set the SERVER_JOINED in member nodes only
|
||||||
|
*
|
||||||
|
* Status bits SERVER_JOINED, SERVER_SLAVE, SERVER_MASTER
|
||||||
|
* and SERVER_MASTER_STICKINESS are removed
|
||||||
|
* in non member nodes.
|
||||||
|
*
|
||||||
|
* @param mon The Monitor Instance
|
||||||
|
*/
|
||||||
|
static void set_cluster_members(MXS_MONITOR *mon)
|
||||||
|
{
|
||||||
|
GALERA_MONITOR *handle = mon->handle;
|
||||||
|
GALERA_NODE_INFO *value;
|
||||||
|
MXS_MONITOR_SERVERS *ptr;
|
||||||
|
char *c_uuid = handle->cluster_info->c_uuid;
|
||||||
|
int c_size = handle->cluster_info->c_size;
|
||||||
|
|
||||||
|
ptr = mon->databases;
|
||||||
|
while (ptr)
|
||||||
|
{
|
||||||
|
/* Fetch cluster info for this server, if any */
|
||||||
|
value = hashtable_fetch(handle->galera_nodes_info, ptr->server->unique_name);
|
||||||
|
|
||||||
|
if (value && handle->cluster_info->c_uuid)
|
||||||
|
{
|
||||||
|
/* Check whether this server is a candidate member */
|
||||||
|
if (!SERVER_IN_MAINT(ptr->server) &&
|
||||||
|
SERVER_IS_RUNNING(ptr->server) &&
|
||||||
|
value->joined &&
|
||||||
|
strcmp(value->cluster_uuid, c_uuid) == 0 &&
|
||||||
|
value->cluster_size == c_size)
|
||||||
|
{
|
||||||
|
/* Server is member of current cluster */
|
||||||
|
server_set_status_nolock(ptr->server, SERVER_JOINED);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
/* This server is not part of current cluster */
|
||||||
|
server_clear_status_nolock(ptr->server, SERVER_JOINED);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
/* This server is not member of any cluster */
|
||||||
|
server_clear_status_nolock(ptr->server, SERVER_JOINED);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Clear bits for non member nodes */
|
||||||
|
if (!SERVER_IN_MAINT(ptr->server) && (!SERVER_IS_JOINED(ptr->server)))
|
||||||
|
{
|
||||||
|
ptr->server->depth = -1;
|
||||||
|
ptr->server->node_id = -1;
|
||||||
|
|
||||||
|
/* clear M/S status */
|
||||||
|
server_clear_status_nolock(ptr->server, SERVER_SLAVE);
|
||||||
|
server_clear_status_nolock(ptr->server, SERVER_MASTER);
|
||||||
|
|
||||||
|
/* clear master sticky status */
|
||||||
|
server_clear_status_nolock(ptr->server, SERVER_MASTER_STICKINESS);
|
||||||
|
}
|
||||||
|
|
||||||
|
ptr = ptr->next;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Detect whether a Galer cluster can be set.
|
||||||
|
*
|
||||||
|
* @param handle The Galera specific data
|
||||||
|
* @param n_nodes Nodes configured for this monitor
|
||||||
|
* @param cluster_uuid Possible cluster_uuid in nodes
|
||||||
|
* @param cluster_size Possible cluster_size in nodes
|
||||||
|
* @return True is a cluster can be set
|
||||||
|
*/
|
||||||
|
static bool detect_cluster_size(const GALERA_MONITOR *handle,
|
||||||
|
const int n_nodes,
|
||||||
|
const char *candidate_uuid,
|
||||||
|
const int candidate_size)
|
||||||
|
{
|
||||||
|
bool ret = false;
|
||||||
|
char *c_uuid = handle->cluster_info->c_uuid;
|
||||||
|
int c_size = handle->cluster_info->c_size;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Decide whether we have a cluster
|
||||||
|
*/
|
||||||
|
if (n_nodes == 0)
|
||||||
|
{
|
||||||
|
/* Log change if a previous UUID was set */
|
||||||
|
if (c_uuid != NULL)
|
||||||
|
{
|
||||||
|
MXS_INFO("No nodes found to be part of a Galera cluster right now: aborting");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else if (n_nodes == 1)
|
||||||
|
{
|
||||||
|
char *msg = "Galera cluster with 1 node only";
|
||||||
|
|
||||||
|
/* If 1 node only:
|
||||||
|
* ifc_uuid is not set, return value will be true.
|
||||||
|
* if c_uuid is equal to candidate_uuid, return value will be true.
|
||||||
|
*/
|
||||||
|
if (c_uuid == NULL ||
|
||||||
|
(c_uuid && strcmp(c_uuid, candidate_uuid) == 0))
|
||||||
|
{
|
||||||
|
ret = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Log change if no previous UUID was set */
|
||||||
|
if (c_uuid == NULL)
|
||||||
|
{
|
||||||
|
if (ret)
|
||||||
|
{
|
||||||
|
MXS_INFO("%s has UUID %s: continue", msg, candidate_uuid);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
if (strcmp(c_uuid, candidate_uuid) && c_size != 1)
|
||||||
|
{
|
||||||
|
/* This error should be ogged once */
|
||||||
|
MXS_ERROR("%s and its UUID %s is different from previous set one %s: aborting",
|
||||||
|
msg,
|
||||||
|
candidate_uuid,
|
||||||
|
c_uuid);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
int min_cluster_size = ((int)(n_nodes / 2 ) + 1);
|
||||||
|
|
||||||
|
/* Return true if there are enough members */
|
||||||
|
if (candidate_size >= min_cluster_size)
|
||||||
|
{
|
||||||
|
ret = true;
|
||||||
|
/* Log the successful change once */
|
||||||
|
if (c_uuid == NULL ||
|
||||||
|
(c_uuid && strcmp(c_uuid, candidate_uuid)))
|
||||||
|
{
|
||||||
|
MXS_INFO("Galera cluster UUID is now %s with %d members of %d nodes",
|
||||||
|
candidate_uuid, candidate_size, n_nodes);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
if (!ret && c_uuid)
|
||||||
|
{
|
||||||
|
/* This error is being logged at every monitor cycle */
|
||||||
|
MXS_ERROR("Galera cluster cannot be set with %d members of %d:"
|
||||||
|
" not enough nodes (%d at least)",
|
||||||
|
candidate_size, n_nodes, min_cluster_size);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
@ -39,9 +39,26 @@
|
|||||||
#include <maxscale/dcb.h>
|
#include <maxscale/dcb.h>
|
||||||
#include <maxscale/modinfo.h>
|
#include <maxscale/modinfo.h>
|
||||||
#include <maxscale/config.h>
|
#include <maxscale/config.h>
|
||||||
|
#include <maxscale/hashtable.h>
|
||||||
|
|
||||||
MXS_BEGIN_DECLS
|
MXS_BEGIN_DECLS
|
||||||
|
|
||||||
|
typedef struct galera_node_info
|
||||||
|
{
|
||||||
|
int joined;
|
||||||
|
int local_index;
|
||||||
|
int local_state;
|
||||||
|
int cluster_size;
|
||||||
|
char *cluster_uuid;
|
||||||
|
const SERVER *node;
|
||||||
|
} GALERA_NODE_INFO;
|
||||||
|
|
||||||
|
typedef struct galera_cluster_info
|
||||||
|
{
|
||||||
|
int c_size;
|
||||||
|
char *c_uuid;
|
||||||
|
} GALERA_CLUSTER_INFO;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* The handle for an instance of a Galera Monitor module
|
* The handle for an instance of a Galera Monitor module
|
||||||
*/
|
*/
|
||||||
@ -63,6 +80,8 @@ typedef struct
|
|||||||
uint64_t events; /*< enabled events */
|
uint64_t events; /*< enabled events */
|
||||||
bool set_donor_nodes; /**< set the wrep_sst_donor variable with an
|
bool set_donor_nodes; /**< set the wrep_sst_donor variable with an
|
||||||
* ordered list of nodes */
|
* ordered list of nodes */
|
||||||
|
HASHTABLE *galera_nodes_info; /**< Contains Galera Cluster variables of all nodes */
|
||||||
|
GALERA_CLUSTER_INFO *cluster_info; /**< Contains Galera cluster info */
|
||||||
} GALERA_MONITOR;
|
} GALERA_MONITOR;
|
||||||
|
|
||||||
MXS_END_DECLS
|
MXS_END_DECLS
|
||||||
|
Reference in New Issue
Block a user