Merge branch 'develop' of https://github.com/mariadb-corporation/MaxScale into develop
This commit is contained in:
commit
a38fe08a58
@ -77,6 +77,8 @@ static MONITOR_SERVERS *get_candidate_master(MONITOR_SERVERS *);
|
||||
static MONITOR_SERVERS *set_cluster_master(MONITOR_SERVERS *, MONITOR_SERVERS *, int);
|
||||
static void disableMasterFailback(void *, int);
|
||||
static void setNetworkTimeout(void *arg, int type, int value);
|
||||
static bool mon_status_changed(MONITOR_SERVERS* mon_srv);
|
||||
static bool mon_print_fail_status(MONITOR_SERVERS* mon_srv);
|
||||
|
||||
static MONITOR_OBJECT MyObject = {
|
||||
startMonitor,
|
||||
@ -348,6 +350,9 @@ char *server_string;
|
||||
if (SERVER_IN_MAINT(database->server))
|
||||
return;
|
||||
|
||||
/** Store previous status */
|
||||
database->mon_prev_status = database->server->status;
|
||||
|
||||
if (database->con == NULL || mysql_ping(database->con) != 0)
|
||||
{
|
||||
char *dpwd = decryptPassword(passwd);
|
||||
@ -365,13 +370,7 @@ char *server_string;
|
||||
if (mysql_real_connect(database->con, database->server->name,
|
||||
uname, dpwd, NULL, database->server->port, NULL, 0) == NULL)
|
||||
{
|
||||
LOGIF(LE, (skygw_log_write_flush(
|
||||
LOGFILE_ERROR,
|
||||
"Error : Monitor was unable to connect to "
|
||||
"server %s:%d : \"%s\"",
|
||||
database->server->name,
|
||||
database->server->port,
|
||||
mysql_error(database->con))));
|
||||
free(dpwd);
|
||||
|
||||
server_clear_status(database->server, SERVER_RUNNING);
|
||||
|
||||
@ -385,8 +384,20 @@ char *server_string;
|
||||
{
|
||||
server_set_status(database->server, SERVER_AUTH_ERROR);
|
||||
}
|
||||
|
||||
database->server->node_id = -1;
|
||||
free(dpwd);
|
||||
|
||||
if (mon_status_changed(database) && mon_print_fail_status(database))
|
||||
{
|
||||
LOGIF(LE, (skygw_log_write_flush(
|
||||
LOGFILE_ERROR,
|
||||
"Error : Monitor was unable to connect to "
|
||||
"server %s:%d : \"%s\"",
|
||||
database->server->name,
|
||||
database->server->port,
|
||||
mysql_error(database->con))));
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
else
|
||||
@ -461,6 +472,8 @@ MONITOR_SERVERS *ptr;
|
||||
size_t nrounds = 0;
|
||||
MONITOR_SERVERS *candidate_master = NULL;
|
||||
int master_stickiness = handle->disableMasterFailback;
|
||||
int is_cluster=0;
|
||||
int log_no_members = 1;
|
||||
|
||||
if (mysql_thread_init())
|
||||
{
|
||||
@ -501,8 +514,6 @@ int master_stickiness = handle->disableMasterFailback;
|
||||
|
||||
while (ptr)
|
||||
{
|
||||
unsigned int prev_status = ptr->server->status;
|
||||
|
||||
monitorDatabase(handle, ptr);
|
||||
|
||||
/* clear bits for non member nodes */
|
||||
@ -518,8 +529,7 @@ int master_stickiness = handle->disableMasterFailback;
|
||||
}
|
||||
|
||||
/* Log server status change */
|
||||
if (ptr->server->status != prev_status ||
|
||||
SERVER_IS_DOWN(ptr->server))
|
||||
if (mon_status_changed(ptr))
|
||||
{
|
||||
LOGIF(LD, (skygw_log_write_flush(
|
||||
LOGFILE_DEBUG,
|
||||
@ -529,6 +539,17 @@ int master_stickiness = handle->disableMasterFailback;
|
||||
STRSRVSTATUS(ptr->server))));
|
||||
}
|
||||
|
||||
if (SERVER_IS_DOWN(ptr->server))
|
||||
{
|
||||
/** Increase this server'e error count */
|
||||
ptr->mon_err_count += 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
/** Reset this server's error count */
|
||||
ptr->mon_err_count = 0;
|
||||
}
|
||||
|
||||
ptr = ptr->next;
|
||||
}
|
||||
|
||||
@ -574,8 +595,24 @@ int master_stickiness = handle->disableMasterFailback;
|
||||
}
|
||||
}
|
||||
|
||||
is_cluster++;
|
||||
|
||||
ptr = ptr->next;
|
||||
}
|
||||
|
||||
if (is_cluster == 0 && log_no_members) {
|
||||
LOGIF(LE, (skygw_log_write_flush(
|
||||
LOGFILE_ERROR,
|
||||
"Error: there are no cluster members")));
|
||||
log_no_members = 0;
|
||||
} else {
|
||||
if (is_cluster > 0 && log_no_members == 0) {
|
||||
LOGIF(LE, (skygw_log_write_flush(
|
||||
LOGFILE_ERROR,
|
||||
"Info: found %i cluster members", is_cluster)));
|
||||
log_no_members = 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -678,6 +715,13 @@ MYSQL_MONITOR *handle = (MYSQL_MONITOR *)arg;
|
||||
memcpy(&handle->disableMasterFailback, &disable, sizeof(int));
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the default id to use in the monitor.
|
||||
*
|
||||
* @param arg The handle allocated by startMonitor
|
||||
* @param type The connect timeout type
|
||||
* @param value The timeout value to set
|
||||
*/
|
||||
static void
|
||||
setNetworkTimeout(void *arg, int type, int value)
|
||||
{
|
||||
@ -731,3 +775,51 @@ int new_timeout = max_timeout -1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if current monitored server status has changed
|
||||
*
|
||||
* @param mon_srv The monitored server
|
||||
* @return true if status has changed or false
|
||||
*/
|
||||
static bool mon_status_changed(
|
||||
MONITOR_SERVERS* mon_srv)
|
||||
{
|
||||
bool succp;
|
||||
|
||||
if (mon_srv->mon_prev_status != mon_srv->server->status)
|
||||
{
|
||||
succp = true;
|
||||
}
|
||||
else
|
||||
{
|
||||
succp = false;
|
||||
}
|
||||
return succp;
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if current monitored server has a loggable failure status
|
||||
*
|
||||
* @param mon_srv The monitored server
|
||||
* @return true if failed status can be logged or false
|
||||
*/
|
||||
static bool mon_print_fail_status(
|
||||
MONITOR_SERVERS* mon_srv)
|
||||
{
|
||||
bool succp;
|
||||
int errcount = mon_srv->mon_err_count;
|
||||
uint8_t modval;
|
||||
|
||||
modval = 1<<(MIN(errcount/10, 7));
|
||||
|
||||
if (SERVER_IS_DOWN(mon_srv->server) && errcount == 0)
|
||||
{
|
||||
succp = true;
|
||||
}
|
||||
else
|
||||
{
|
||||
succp = false;
|
||||
}
|
||||
return succp;
|
||||
}
|
||||
|
@ -386,7 +386,7 @@ char *server_string;
|
||||
if (SERVER_IN_MAINT(database->server))
|
||||
return;
|
||||
|
||||
/** Store prevous status */
|
||||
/** Store previous status */
|
||||
database->mon_prev_status = database->server->status;
|
||||
|
||||
if (database->con == NULL || mysql_ping(database->con) != 0)
|
||||
@ -414,17 +414,6 @@ char *server_string;
|
||||
{
|
||||
free(dpwd);
|
||||
|
||||
if (mon_print_fail_status(database))
|
||||
{
|
||||
LOGIF(LE, (skygw_log_write_flush(
|
||||
LOGFILE_ERROR,
|
||||
"Error : Monitor was unable to connect to "
|
||||
"server %s:%d : \"%s\"",
|
||||
database->server->name,
|
||||
database->server->port,
|
||||
mysql_error(database->con))));
|
||||
}
|
||||
|
||||
/* The current server is not running
|
||||
*
|
||||
* Store server NOT running in server and monitor server pending struct
|
||||
@ -450,6 +439,18 @@ char *server_string;
|
||||
monitor_clear_pending_status(database, SERVER_SLAVE_OF_EXTERNAL_MASTER);
|
||||
monitor_clear_pending_status(database, SERVER_STALE_STATUS);
|
||||
|
||||
/* Log connect failure only once */
|
||||
if (mon_status_changed(database) && mon_print_fail_status(database))
|
||||
{
|
||||
LOGIF(LE, (skygw_log_write_flush(
|
||||
LOGFILE_ERROR,
|
||||
"Error : Monitor was unable to connect to "
|
||||
"server %s:%d : \"%s\"",
|
||||
database->server->name,
|
||||
database->server->port,
|
||||
mysql_error(database->con))));
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
else
|
||||
@ -608,6 +609,7 @@ int detect_stale_master = handle->detectStaleMaster;
|
||||
int num_servers=0;
|
||||
MONITOR_SERVERS *root_master = NULL;
|
||||
size_t nrounds = 0;
|
||||
int log_no_master = 1;
|
||||
|
||||
if (mysql_thread_init())
|
||||
{
|
||||
@ -672,21 +674,21 @@ size_t nrounds = 0;
|
||||
dcb_call_foreach(DCB_REASON_NOT_RESPONDING);
|
||||
}
|
||||
|
||||
if (mon_status_changed(ptr) ||
|
||||
mon_print_fail_status(ptr))
|
||||
if (mon_status_changed(ptr))
|
||||
{
|
||||
LOGIF(LD, (skygw_log_write_flush(
|
||||
LOGFILE_DEBUG,
|
||||
"Backend server %s:%d state : %s",
|
||||
ptr->server->name,
|
||||
ptr->server->port,
|
||||
STRSRVSTATUS(ptr->server))));
|
||||
}
|
||||
if (SERVER_IS_DOWN(ptr->server))
|
||||
{
|
||||
/** Increase this server'e error count */
|
||||
ptr->mon_err_count += 1;
|
||||
STRSRVSTATUS(ptr->server))));
|
||||
}
|
||||
|
||||
if (SERVER_IS_DOWN(ptr->server))
|
||||
{
|
||||
/** Increase this server'e error count */
|
||||
ptr->mon_err_count += 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
/** Reset this server's error count */
|
||||
@ -724,11 +726,21 @@ size_t nrounds = 0;
|
||||
if (! SERVER_IN_MAINT(ptr->server)) {
|
||||
/* If "detect_stale_master" option is On, let's use the previus master */
|
||||
if (detect_stale_master && root_master && (!strcmp(ptr->server->name, root_master->server->name) && ptr->server->port == root_master->server->port) && (ptr->server->status & SERVER_MASTER) && !(ptr->pending_status & SERVER_MASTER)) {
|
||||
/* in this case server->status will not be updated from pending_status */
|
||||
LOGIF(LM, (skygw_log_write_flush(
|
||||
LOGFILE_MESSAGE, "[mysql_mon]: root server [%s:%i] is no longer Master, let's use it again even if it could be a stale master, you have been warned!", ptr->server->name, ptr->server->port)));
|
||||
/* Set the STALE bit for this server in server struct */
|
||||
/**
|
||||
* In this case server->status will not be updated from pending_statu
|
||||
* Set the STALE bit for this server in server struct
|
||||
*/
|
||||
server_set_status(ptr->server, SERVER_STALE_STATUS);
|
||||
|
||||
/* log it once */
|
||||
if (mon_status_changed(ptr)) {
|
||||
LOGIF(LM, (skygw_log_write_flush(
|
||||
LOGFILE_MESSAGE, "[mysql_mon]: root server [%s:%i] is no longer Master,"
|
||||
" let's use it again even if it could be a stale master,"
|
||||
" you have been warned!",
|
||||
ptr->server->name,
|
||||
ptr->server->port)));
|
||||
}
|
||||
} else {
|
||||
ptr->server->status = ptr->pending_status;
|
||||
}
|
||||
@ -737,6 +749,33 @@ size_t nrounds = 0;
|
||||
ptr = ptr->next;
|
||||
}
|
||||
|
||||
/* log master detection failure od first master becomes available after failure */
|
||||
if (root_master && mon_status_changed(root_master) && !(root_master->server->status & SERVER_STALE_STATUS)) {
|
||||
if (root_master->pending_status & (SERVER_MASTER)) {
|
||||
if (!(root_master->mon_prev_status & SERVER_STALE_STATUS)) {
|
||||
LOGIF(LE, (skygw_log_write_flush(
|
||||
LOGFILE_ERROR,
|
||||
"Info: A Master Server is now available: %s:%i",
|
||||
root_master->server->name,
|
||||
root_master->server->port)));
|
||||
}
|
||||
} else {
|
||||
LOGIF(LE, (skygw_log_write_flush(
|
||||
LOGFILE_ERROR,
|
||||
"Error: No Master can be determined. Last known was %s:%i",
|
||||
root_master->server->name,
|
||||
root_master->server->port)));
|
||||
}
|
||||
log_no_master = 1;
|
||||
} else {
|
||||
if (!root_master && log_no_master) {
|
||||
LOGIF(LE, (skygw_log_write_flush(
|
||||
LOGFILE_ERROR,
|
||||
"Error: No Master can be determined")));
|
||||
log_no_master = 0;
|
||||
}
|
||||
}
|
||||
|
||||
/* Do now the heartbeat replication set/get for MySQL Replication Consistency */
|
||||
if (replication_heartbeat && root_master && (SERVER_IS_MASTER(root_master->server) || SERVER_IS_RELAY_SERVER(root_master->server))) {
|
||||
set_master_heartbeat(handle, root_master);
|
||||
@ -808,6 +847,12 @@ MYSQL_MONITOR *handle = (MYSQL_MONITOR *)arg;
|
||||
memcpy(&handle->detectStaleMaster, &enable, sizeof(int));
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if current monitored server status has changed
|
||||
*
|
||||
* @param mon_srv The monitored server
|
||||
* @return true if status has changed or false
|
||||
*/
|
||||
static bool mon_status_changed(
|
||||
MONITOR_SERVERS* mon_srv)
|
||||
{
|
||||
@ -824,6 +869,12 @@ static bool mon_status_changed(
|
||||
return succp;
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if current monitored server has a loggable failure status
|
||||
*
|
||||
* @param mon_srv The monitored server
|
||||
* @return true if failed status can be logged or false
|
||||
*/
|
||||
static bool mon_print_fail_status(
|
||||
MONITOR_SERVERS* mon_srv)
|
||||
{
|
||||
@ -833,7 +884,7 @@ static bool mon_print_fail_status(
|
||||
|
||||
modval = 1<<(MIN(errcount/10, 7));
|
||||
|
||||
if (SERVER_IS_DOWN(mon_srv->server) && errcount%modval == 0)
|
||||
if (SERVER_IS_DOWN(mon_srv->server) && errcount == 0)
|
||||
{
|
||||
succp = true;
|
||||
}
|
||||
@ -1178,6 +1229,7 @@ static MONITOR_SERVERS *get_replication_tree(MYSQL_MONITOR *handle, int num_serv
|
||||
add_slave_to_master(master->server->slaves, MONITOR_MAX_NUM_SLAVES, current->node_id);
|
||||
master->server->depth = current->depth -1;
|
||||
monitor_set_pending_status(master, SERVER_MASTER);
|
||||
handle->master = master;
|
||||
} else {
|
||||
if (current->master_id > 0) {
|
||||
/* this server is slave of another server not in MaxScale configuration
|
||||
|
Loading…
x
Reference in New Issue
Block a user