Add config parameter for excluding servers from failover
"servers_no_promotion" is a comma-separated list of servers which cannot be chosen when selecting a new master during failover (auto or manual), or when automatically selecting a new master for switchover (currently disabled). The servers in the list are redirected normally and can be promoted by switchover when manually selecting a new master.
This commit is contained in:
parent
6f6c11e6a3
commit
1cf3de4a74
@ -382,6 +382,19 @@ void load_server_journal(MXS_MONITOR *monitor, MXS_MONITORED_SERVER **master);
|
||||
* @param search_server Server to search for
|
||||
* @return Found monitored server or NULL if not found
|
||||
*/
|
||||
MXS_MONITORED_SERVER* mon_get_monitored_server(MXS_MONITOR* mon, SERVER* search_server);
|
||||
MXS_MONITORED_SERVER* mon_get_monitored_server(const MXS_MONITOR* mon, SERVER* search_server);
|
||||
|
||||
/**
|
||||
* Get an array of monitored servers. All the servers defined in the config setting must be monitored by
|
||||
* the given monitor.
|
||||
*
|
||||
* @param params Config parameters
|
||||
* @param key Setting name
|
||||
* @param mon Monitor which should monitor the servers
|
||||
* @param monitored_servers_out Where to save output. The caller should free the array, but not the elements.
|
||||
* @return Output array size if successful, negative value otherwise
|
||||
*/
|
||||
int mon_config_get_servers(const MXS_CONFIG_PARAMETER* params, const char* key, const MXS_MONITOR* mon,
|
||||
MXS_MONITORED_SERVER*** monitored_array_out);
|
||||
|
||||
MXS_END_DECLS
|
||||
|
@ -3643,6 +3643,7 @@ void fix_serverlist(char* value)
|
||||
dest += sep;
|
||||
dest += start;
|
||||
sep = ",";
|
||||
start = strtok_r(NULL, ",", &end);
|
||||
}
|
||||
|
||||
/** The value will always be smaller than the original one or of equal size */
|
||||
|
@ -2428,7 +2428,7 @@ static bool journal_is_stale(MXS_MONITOR *monitor, time_t max_age)
|
||||
return is_stale;
|
||||
}
|
||||
|
||||
MXS_MONITORED_SERVER* mon_get_monitored_server(MXS_MONITOR* mon, SERVER* search_server)
|
||||
MXS_MONITORED_SERVER* mon_get_monitored_server(const MXS_MONITOR* mon, SERVER* search_server)
|
||||
{
|
||||
ss_dassert(mon && search_server);
|
||||
for (MXS_MONITORED_SERVER* iter = mon->monitored_servers; iter != NULL; iter = iter->next)
|
||||
@ -2439,4 +2439,45 @@ MXS_MONITORED_SERVER* mon_get_monitored_server(MXS_MONITOR* mon, SERVER* search_
|
||||
}
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
int mon_config_get_servers(const MXS_CONFIG_PARAMETER* params, const char* key, const MXS_MONITOR* mon,
|
||||
MXS_MONITORED_SERVER*** monitored_servers_out)
|
||||
{
|
||||
ss_dassert(*monitored_servers_out == NULL);
|
||||
SERVER** servers = NULL;
|
||||
int servers_size = config_get_server_list(params, key, &servers);
|
||||
int rval = -1;
|
||||
// All servers in the array must be monitored by the given monitor.
|
||||
if (servers_size > 0)
|
||||
{
|
||||
MXS_MONITORED_SERVER** monitored_array =
|
||||
(MXS_MONITORED_SERVER**)MXS_CALLOC(servers_size, sizeof(MXS_MONITORED_SERVER*));
|
||||
bool error = false;
|
||||
for (int i = 0; i < servers_size && !error; i++)
|
||||
{
|
||||
MXS_MONITORED_SERVER* mon_serv = mon_get_monitored_server(mon, servers[i]);
|
||||
if (mon_serv != NULL)
|
||||
{
|
||||
monitored_array[i] = mon_serv;
|
||||
}
|
||||
else
|
||||
{
|
||||
MXS_ERROR("Server '%s' is not monitored by monitor '%s'.", servers[i]->unique_name, mon->name);
|
||||
error = true;
|
||||
}
|
||||
}
|
||||
MXS_FREE(servers);
|
||||
|
||||
if (error)
|
||||
{
|
||||
MXS_FREE(monitored_array);
|
||||
}
|
||||
else
|
||||
{
|
||||
*monitored_servers_out = monitored_array;
|
||||
rval = servers_size;
|
||||
}
|
||||
}
|
||||
return rval;
|
||||
}
|
@ -85,6 +85,12 @@ enum slave_down_setting_t
|
||||
REJECT_DOWN
|
||||
};
|
||||
|
||||
enum print_repl_warnings_t
|
||||
{
|
||||
WARNINGS_ON,
|
||||
WARNINGS_OFF
|
||||
};
|
||||
|
||||
static void monitorMain(void *);
|
||||
static void *startMonitor(MXS_MONITOR *, const MXS_CONFIG_PARAMETER*);
|
||||
static void stopMonitor(MXS_MONITOR *);
|
||||
@ -132,6 +138,7 @@ static const char CN_FAILOVER_TIMEOUT[] = "failover_timeout";
|
||||
static const char CN_SWITCHOVER_TIMEOUT[] = "switchover_timeout";
|
||||
static const char CN_AUTO_REJOIN[] = "auto_rejoin";
|
||||
static const char CN_FAILCOUNT[] = "failcount";
|
||||
static const char CN_NO_PROMOTE_SERVERS[] = "servers_no_promotion";
|
||||
|
||||
// Parameters for master failure verification and timeout
|
||||
static const char CN_VERIFY_MASTER_FAILURE[] = "verify_master_failure";
|
||||
@ -893,6 +900,7 @@ extern "C"
|
||||
{CN_VERIFY_MASTER_FAILURE, MXS_MODULE_PARAM_BOOL, "true"},
|
||||
{CN_MASTER_FAILURE_TIMEOUT, MXS_MODULE_PARAM_COUNT, DEFAULT_MASTER_FAILURE_TIMEOUT},
|
||||
{CN_AUTO_REJOIN, MXS_MODULE_PARAM_BOOL, "false"},
|
||||
{CN_NO_PROMOTE_SERVERS, MXS_MODULE_PARAM_SERVERLIST},
|
||||
{MXS_END_MODULE_PARAMS}
|
||||
}
|
||||
};
|
||||
@ -985,7 +993,24 @@ static bool set_replication_credentials(MYSQL_MONITOR *handle, const MXS_CONFIG_
|
||||
return rval;
|
||||
}
|
||||
|
||||
/*lint +e14 */
|
||||
/**
|
||||
* Is the server in the excluded list
|
||||
*
|
||||
* @param handle Cluster monitor
|
||||
* @param server Server to test
|
||||
* @return True if server is in the excluded-list of the monitor.
|
||||
*/
|
||||
static bool server_is_excluded(const MYSQL_MONITOR *handle, const MXS_MONITORED_SERVER* server)
|
||||
{
|
||||
for (int i = 0; i < handle->n_excluded; i++)
|
||||
{
|
||||
if (handle->excluded_servers[i] == server)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Start the instance of the monitor, returning a handle on the monitor.
|
||||
@ -999,14 +1024,17 @@ static bool set_replication_credentials(MYSQL_MONITOR *handle, const MXS_CONFIG_
|
||||
static void *
|
||||
startMonitor(MXS_MONITOR *monitor, const MXS_CONFIG_PARAMETER* params)
|
||||
{
|
||||
bool error = false;
|
||||
MYSQL_MONITOR *handle = (MYSQL_MONITOR*) monitor->handle;
|
||||
|
||||
if (handle)
|
||||
{
|
||||
handle->shutdown = 0;
|
||||
MXS_FREE(handle->script);
|
||||
MXS_FREE(handle->replication_user);
|
||||
MXS_FREE(handle->replication_password);
|
||||
MXS_FREE(handle->excluded_servers);
|
||||
handle->excluded_servers = NULL;
|
||||
handle->n_excluded = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -1052,7 +1080,13 @@ startMonitor(MXS_MONITOR *monitor, const MXS_CONFIG_PARAMETER* params)
|
||||
handle->master_failure_timeout = config_get_integer(params, CN_MASTER_FAILURE_TIMEOUT);
|
||||
handle->auto_rejoin = config_get_bool(params, CN_AUTO_REJOIN);
|
||||
|
||||
bool error = false;
|
||||
handle->excluded_servers = NULL;
|
||||
handle->n_excluded = mon_config_get_servers(params, CN_NO_PROMOTE_SERVERS, monitor,
|
||||
&handle->excluded_servers);
|
||||
if (handle->n_excluded < 0)
|
||||
{
|
||||
error = true;
|
||||
}
|
||||
|
||||
if (!set_replication_credentials(handle, params))
|
||||
{
|
||||
@ -1075,6 +1109,7 @@ startMonitor(MXS_MONITOR *monitor, const MXS_CONFIG_PARAMETER* params)
|
||||
{
|
||||
hashtable_free(handle->server_info);
|
||||
MXS_FREE(handle->script);
|
||||
MXS_FREE(handle->excluded_servers);
|
||||
MXS_FREE(handle);
|
||||
handle = NULL;
|
||||
}
|
||||
@ -3274,20 +3309,25 @@ static MySqlServerInfo* update_slave_info(MYSQL_MONITOR* mon, MXS_MONITORED_SERV
|
||||
*
|
||||
* @param server Server to check
|
||||
* @param server_info Server info
|
||||
* @param print_on Print warnings or not
|
||||
* @return True if log_bin is on
|
||||
*/
|
||||
static bool check_replication_settings(const MXS_MONITORED_SERVER* server, MySqlServerInfo* server_info)
|
||||
static bool check_replication_settings(const MXS_MONITORED_SERVER* server, MySqlServerInfo* server_info,
|
||||
print_repl_warnings_t print_warnings = WARNINGS_ON)
|
||||
{
|
||||
bool rval = true;
|
||||
const char* servername = server->server->unique_name;
|
||||
if (server_info->rpl_settings.log_bin == false)
|
||||
{
|
||||
const char NO_BINLOG[] =
|
||||
"Slave '%s' has binary log disabled and is not a valid promotion candidate.";
|
||||
MXS_WARNING(NO_BINLOG, servername);
|
||||
if (print_warnings == WARNINGS_ON)
|
||||
{
|
||||
const char NO_BINLOG[] =
|
||||
"Slave '%s' has binary log disabled and is not a valid promotion candidate.";
|
||||
MXS_WARNING(NO_BINLOG, servername);
|
||||
}
|
||||
rval = false;
|
||||
}
|
||||
else
|
||||
else if (print_warnings == WARNINGS_ON)
|
||||
{
|
||||
if (server_info->rpl_settings.gtid_strict_mode == false)
|
||||
{
|
||||
@ -3346,86 +3386,132 @@ bool switchover_check_preferred_master(MYSQL_MONITOR* mon, MXS_MONITORED_SERVER*
|
||||
return rval;
|
||||
}
|
||||
|
||||
/**
|
||||
* Is the candidate a better choice for master than the previous best?
|
||||
*
|
||||
* @param current_best_info Server info of current best choice
|
||||
* @param candidate_info Server info of new candidate
|
||||
* @return True if candidate is better
|
||||
*/
|
||||
bool is_candidate_better(const MySqlServerInfo* current_best_info, const MySqlServerInfo* candidate_info)
|
||||
{
|
||||
uint64_t cand_io = candidate_info->slave_status.gtid_io_pos.sequence;
|
||||
uint64_t cand_processed = candidate_info->gtid_current_pos.sequence;
|
||||
uint64_t curr_io = current_best_info->slave_status.gtid_io_pos.sequence;
|
||||
uint64_t curr_processed = current_best_info->gtid_current_pos.sequence;
|
||||
bool cand_updates = candidate_info->rpl_settings.log_slave_updates;
|
||||
bool curr_updates = current_best_info->rpl_settings.log_slave_updates;
|
||||
bool is_better = false;
|
||||
// Accept a slave with a later event in relay log.
|
||||
if (cand_io > curr_io)
|
||||
{
|
||||
is_better = true;
|
||||
}
|
||||
// If io sequences are identical, the slave with more events processed wins.
|
||||
else if (cand_io == curr_io)
|
||||
{
|
||||
if (cand_processed > curr_processed)
|
||||
{
|
||||
is_better = true;
|
||||
}
|
||||
// Finally, if binlog positions are identical, prefer a slave with log_slave_updates.
|
||||
else if (cand_processed == curr_processed && cand_updates && !curr_updates)
|
||||
{
|
||||
is_better = true;
|
||||
}
|
||||
}
|
||||
return is_better;
|
||||
}
|
||||
|
||||
/**
|
||||
* Select a new master. Also add slaves which should be redirected to an array.
|
||||
*
|
||||
* @param mon The monitor
|
||||
* @param out_slaves Vector for storing slave servers, can be NULL
|
||||
* @param out_slaves Vector for storing slave servers.
|
||||
* @param err_out json object for error printing. Can be NULL.
|
||||
* @return The found master, or NULL if not found
|
||||
*/
|
||||
MXS_MONITORED_SERVER* select_new_master(MYSQL_MONITOR* mon,
|
||||
ServerVector* slaves_out,
|
||||
json_t** err_out)
|
||||
MXS_MONITORED_SERVER* select_new_master(MYSQL_MONITOR* mon, ServerVector* slaves_out, json_t** err_out)
|
||||
{
|
||||
ss_dassert(slaves_out && slaves_out->size() == 0);
|
||||
/* Select a new master candidate. Selects the one with the latest event in relay log.
|
||||
* If multiple slaves have same number of events, select the one with most processed events. */
|
||||
MXS_MONITORED_SERVER* new_master = NULL;
|
||||
MySqlServerInfo* new_master_info = NULL;
|
||||
MXS_MONITORED_SERVER* current_best = NULL;
|
||||
MySqlServerInfo* current_best_info = NULL;
|
||||
// Servers that cannot be selected because of exclusion, but seem otherwise ok.
|
||||
ServerVector valid_but_excluded;
|
||||
// Index of the current best candidate in slaves_out
|
||||
int master_vector_index = -1;
|
||||
|
||||
for (MXS_MONITORED_SERVER *cand = mon->monitor->monitored_servers; cand; cand = cand->next)
|
||||
{
|
||||
// If a server cannot be connected to, it won't be considered for promotion or redirected.
|
||||
// Do not worry about the exclusion list yet, querying the excluded servers is ok.
|
||||
MySqlServerInfo* cand_info = update_slave_info(mon, cand);
|
||||
if (cand_info)
|
||||
{
|
||||
if (slaves_out)
|
||||
slaves_out->push_back(cand);
|
||||
// Check that server is not in the exclusion list while still being a valid choice.
|
||||
if (server_is_excluded(mon, cand) && check_replication_settings(cand, cand_info, WARNINGS_OFF))
|
||||
{
|
||||
slaves_out->push_back(cand);
|
||||
valid_but_excluded.push_back(cand);
|
||||
const char CANNOT_SELECT[] = "Promotion candidate '%s' is excluded from new "
|
||||
"master selection.";
|
||||
MXS_INFO(CANNOT_SELECT, cand->server->unique_name);
|
||||
}
|
||||
if (check_replication_settings(cand, cand_info))
|
||||
else if (check_replication_settings(cand, cand_info))
|
||||
{
|
||||
bool select_this = false;
|
||||
// If no candidate yet, accept any slave. Slaves have already been checked to use gtid.
|
||||
if (new_master == NULL)
|
||||
// If no new master yet, accept any valid candidate. Otherwise check.
|
||||
if (current_best == NULL || is_candidate_better(current_best_info, cand_info))
|
||||
{
|
||||
select_this = true;
|
||||
}
|
||||
else
|
||||
{
|
||||
uint64_t cand_io = cand_info->slave_status.gtid_io_pos.sequence;
|
||||
uint64_t cand_processed = cand_info->gtid_current_pos.sequence;
|
||||
uint64_t master_io = new_master_info->slave_status.gtid_io_pos.sequence;
|
||||
uint64_t master_processed = new_master_info->gtid_current_pos.sequence;
|
||||
bool cand_updates = cand_info->rpl_settings.log_slave_updates;
|
||||
bool master_updates = new_master_info->rpl_settings.log_slave_updates;
|
||||
// Otherwise accept a slave with a later event in relay log.
|
||||
if (cand_io > master_io ||
|
||||
// If io sequences are identical, the slave with more events processed wins.
|
||||
(cand_io == master_io && (cand_processed > master_processed ||
|
||||
// Finally, if binlog positions are identical,
|
||||
// prefer a slave with log_slave_updates.
|
||||
(cand_processed == master_processed &&
|
||||
cand_updates && !master_updates))))
|
||||
{
|
||||
select_this = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (select_this)
|
||||
{
|
||||
new_master = cand;
|
||||
new_master_info = cand_info;
|
||||
if (slaves_out)
|
||||
{
|
||||
master_vector_index = slaves_out->size() - 1;
|
||||
}
|
||||
// The server has been selected for promotion, for now.
|
||||
current_best = cand;
|
||||
current_best_info = cand_info;
|
||||
master_vector_index = slaves_out->size() - 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (new_master && slaves_out)
|
||||
if (current_best)
|
||||
{
|
||||
// Remove the selected master from the vector.
|
||||
ServerVector::iterator remove_this = slaves_out->begin();
|
||||
remove_this += master_vector_index;
|
||||
slaves_out->erase(remove_this);
|
||||
}
|
||||
if (new_master == NULL)
|
||||
|
||||
// Check if any of the excluded servers would be better than the best candidate.
|
||||
for (ServerVector::const_iterator iter = valid_but_excluded.begin();
|
||||
iter != valid_but_excluded.end();
|
||||
iter++)
|
||||
{
|
||||
MySqlServerInfo* excluded_info = get_server_info(mon, *iter);
|
||||
const char* excluded_name = (*iter)->server->unique_name;
|
||||
if (current_best == NULL)
|
||||
{
|
||||
const char EXCLUDED_ONLY_CAND[] = "Server '%s' is a viable choice for new master, "
|
||||
"but cannot be selected as it's excluded.";
|
||||
MXS_WARNING(EXCLUDED_ONLY_CAND, excluded_name);
|
||||
break;
|
||||
}
|
||||
else if (is_candidate_better(current_best_info, excluded_info))
|
||||
{
|
||||
// Print a warning if this server is actually a better candidate than the previous
|
||||
// best.
|
||||
const char EXCLUDED_CAND[] = "Server '%s' is superior to current "
|
||||
"best candidate '%s', but cannot be selected as it's excluded. This may lead to "
|
||||
"loss of data if '%s' is ahead of other servers.";
|
||||
MXS_WARNING(EXCLUDED_CAND, excluded_name, current_best->server->unique_name, excluded_name);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (current_best == NULL)
|
||||
{
|
||||
PRINT_MXS_JSON_ERROR(err_out, "No suitable promotion candidate found.");
|
||||
}
|
||||
return new_master;
|
||||
return current_best;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -73,6 +73,9 @@ typedef struct
|
||||
int64_t master_gtid_domain; /**< Gtid domain currently used by the master */
|
||||
bool auto_rejoin; /**< Attempt to start slave replication on standalone servers or servers
|
||||
replicating from the wrong master. */
|
||||
int n_excluded; /**< Number of excluded servers */
|
||||
MXS_MONITORED_SERVER** excluded_servers; /**< Servers banned for master promotion during auto-failover. */
|
||||
|
||||
MXS_MONITOR* monitor;
|
||||
} MYSQL_MONITOR;
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user