Cleanup failover/switchover
Replication settings warnings are printed once more. Changed some parameter names to be more consistent within the monitor.
This commit is contained in:
parent
f93578c089
commit
61bb172033
@ -53,7 +53,7 @@ bool MariaDBMonitor::manual_switchover(SERVER* promotion_server, SERVER* demotio
|
||||
error_out);
|
||||
if (ok_to_switch)
|
||||
{
|
||||
switchover_done = do_switchover(demotion_target, promotion_target, error_out);
|
||||
switchover_done = switchover_perform(promotion_target, demotion_target, error_out);
|
||||
if (switchover_done)
|
||||
{
|
||||
MXS_NOTICE("Switchover '%s' -> '%s' performed.",
|
||||
@ -89,7 +89,7 @@ bool MariaDBMonitor::manual_failover(json_t** output)
|
||||
bool ok_to_failover = failover_prepare(Log::ON, &promotion_target, &demotion_target, output);
|
||||
if (ok_to_failover)
|
||||
{
|
||||
failover_done = do_failover(promotion_target, demotion_target, output);
|
||||
failover_done = failover_perform(promotion_target, demotion_target, output);
|
||||
if (failover_done)
|
||||
{
|
||||
MXS_NOTICE("Failover '%s' -> '%s' performed.",
|
||||
@ -467,15 +467,15 @@ bool MariaDBMonitor::server_is_rejoin_suspect(MariaDBServer* rejoin_cand, json_t
|
||||
* intermediate step fails, the cluster may be left without a master and manual intervention is
|
||||
* required to fix things.
|
||||
*
|
||||
* @param demotion_target Server to demote
|
||||
* @param promotion_target Server to promote
|
||||
* @param err_out json object for error printing. Can be NULL.
|
||||
* @param demotion_target Server to demote
|
||||
* @param error_out Error output. Can be NULL.
|
||||
* @return True if successful. If false, replication may be broken.
|
||||
*/
|
||||
bool MariaDBMonitor::do_switchover(MariaDBServer* demotion_target, MariaDBServer* promotion_target,
|
||||
json_t** err_out)
|
||||
bool MariaDBMonitor::switchover_perform(MariaDBServer* promotion_target, MariaDBServer* demotion_target,
|
||||
json_t** error_out)
|
||||
{
|
||||
mxb_assert(demotion_target && promotion_target);
|
||||
mxb_assert(promotion_target && demotion_target);
|
||||
|
||||
// Total time limit on how long this operation may take. Checked and modified after significant steps are
|
||||
// completed.
|
||||
@ -489,7 +489,7 @@ bool MariaDBMonitor::do_switchover(MariaDBServer* demotion_target, MariaDBServer
|
||||
|
||||
bool rval = false;
|
||||
// Step 2: Set read-only to on, flush logs, update master gtid:s
|
||||
if (switchover_demote_master(demotion_target, err_out))
|
||||
if (switchover_demote_master(demotion_target, error_out))
|
||||
{
|
||||
m_cluster_modified = true;
|
||||
bool catchup_and_promote_success = false;
|
||||
@ -500,7 +500,7 @@ bool MariaDBMonitor::do_switchover(MariaDBServer* demotion_target, MariaDBServer
|
||||
ServerArray catchup_slaves = redirectable_slaves;
|
||||
catchup_slaves.push_back(promotion_target);
|
||||
if (switchover_wait_slaves_catchup(catchup_slaves, demotion_target->m_gtid_binlog_pos,
|
||||
seconds_remaining, err_out))
|
||||
seconds_remaining, error_out))
|
||||
{
|
||||
time_t step3_time = time(NULL);
|
||||
int seconds_step3 = difftime(step3_time, step2_time);
|
||||
@ -508,7 +508,7 @@ bool MariaDBMonitor::do_switchover(MariaDBServer* demotion_target, MariaDBServer
|
||||
seconds_remaining -= seconds_step3;
|
||||
|
||||
// Step 4: On new master STOP and RESET SLAVE, set read-only to off.
|
||||
if (promote_new_master(promotion_target, err_out))
|
||||
if (promote_new_master(promotion_target, error_out))
|
||||
{
|
||||
catchup_and_promote_success = true;
|
||||
m_next_master = promotion_target;
|
||||
@ -549,7 +549,7 @@ bool MariaDBMonitor::do_switchover(MariaDBServer* demotion_target, MariaDBServer
|
||||
}
|
||||
else
|
||||
{
|
||||
print_redirect_errors(demotion_target, redirectable_slaves, err_out);
|
||||
print_redirect_errors(demotion_target, redirectable_slaves, error_out);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -560,11 +560,11 @@ bool MariaDBMonitor::do_switchover(MariaDBServer* demotion_target, MariaDBServer
|
||||
const char QUERY_UNDO[] = "SET GLOBAL read_only=0;";
|
||||
if (mxs_mysql_query(demotion_target->m_server_base->con, QUERY_UNDO) == 0)
|
||||
{
|
||||
PRINT_MXS_JSON_ERROR(err_out, "read_only disabled on server %s.", demotion_target->name());
|
||||
PRINT_MXS_JSON_ERROR(error_out, "read_only disabled on server %s.", demotion_target->name());
|
||||
}
|
||||
else
|
||||
{
|
||||
PRINT_MXS_JSON_ERROR(err_out, "Could not disable read_only on server %s: '%s'.",
|
||||
PRINT_MXS_JSON_ERROR(error_out, "Could not disable read_only on server %s: '%s'.",
|
||||
demotion_target->name(),
|
||||
mysql_error(demotion_target->m_server_base->con));
|
||||
}
|
||||
@ -572,7 +572,7 @@ bool MariaDBMonitor::do_switchover(MariaDBServer* demotion_target, MariaDBServer
|
||||
// Try to reactivate external replication if any.
|
||||
if (m_external_master_port != PORT_UNKNOWN)
|
||||
{
|
||||
start_external_replication(promotion_target, err_out);
|
||||
start_external_replication(promotion_target, error_out);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -582,14 +582,16 @@ bool MariaDBMonitor::do_switchover(MariaDBServer* demotion_target, MariaDBServer
|
||||
/**
|
||||
* Performs failover for a simple topology (1 master, N slaves, no intermediate masters).
|
||||
*
|
||||
* @param demotion_target Server to demote
|
||||
* @param promotion_target Server to promote
|
||||
* @param err_out Error output
|
||||
* @param demotion_target Server to demote
|
||||
* @param error_out Error output. Can be NULL.
|
||||
* @return True if successful
|
||||
*/
|
||||
bool MariaDBMonitor::do_failover(MariaDBServer* promotion_target, MariaDBServer* demotion_target,
|
||||
json_t** error_out)
|
||||
bool MariaDBMonitor::failover_perform(MariaDBServer* promotion_target, MariaDBServer* demotion_target,
|
||||
json_t** error_out)
|
||||
{
|
||||
mxb_assert(promotion_target && demotion_target);
|
||||
|
||||
// Total time limit on how long this operation may take. Checked and modified after significant steps are
|
||||
// completed.
|
||||
int seconds_remaining = m_failover_timeout;
|
||||
@ -946,6 +948,17 @@ bool MariaDBMonitor::promote_new_master(MariaDBServer* new_master, json_t** err_
|
||||
return success;
|
||||
}
|
||||
|
||||
/**
|
||||
* Select a promotion target for failover/switchover. Looks at the slaves of 'demotion_target' and selects
|
||||
* the server with the most up-do-date event or, if events are equal, the one with the best settings and
|
||||
* status.
|
||||
*
|
||||
* @param demotion_target The former master server/relay
|
||||
* @param op Switchover or failover
|
||||
* @param log_mode Print log or operate silently
|
||||
* @param error_out Error output
|
||||
* @return The selected promotion target or NULL if no valid candidates
|
||||
*/
|
||||
MariaDBServer* MariaDBMonitor::select_promotion_target(MariaDBServer* demotion_target,
|
||||
ClusterOperation op, Log log_mode,
|
||||
json_t** error_out)
|
||||
@ -993,6 +1006,11 @@ MariaDBServer* MariaDBMonitor::select_promotion_target(MariaDBServer* demotion_t
|
||||
else
|
||||
{
|
||||
candidates.push_back(cand);
|
||||
// Print some warnings about the candidate server.
|
||||
if (log_mode == Log::ON)
|
||||
{
|
||||
cand->warn_replication_settings();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -1297,7 +1315,7 @@ void MariaDBMonitor::handle_auto_failover()
|
||||
MXS_NOTICE("Performing automatic failover to replace failed master '%s'.",
|
||||
failed_master->name());
|
||||
failed_master->m_server_base->new_event = false;
|
||||
if (!do_failover(promotion_target, demotion_target, NULL))
|
||||
if (!failover_perform(promotion_target, demotion_target, NULL))
|
||||
{
|
||||
report_and_disable("failover", CN_AUTO_FAILOVER, &m_auto_failover);
|
||||
}
|
||||
@ -1611,7 +1629,7 @@ void MariaDBMonitor::handle_low_disk_space_master()
|
||||
if (ok_to_switch)
|
||||
{
|
||||
m_warn_switchover_precond = true;
|
||||
bool switched = do_switchover(demotion_target, promotion_target, NULL);
|
||||
bool switched = switchover_perform(promotion_target, demotion_target, NULL);
|
||||
if (switched)
|
||||
{
|
||||
MXS_NOTICE("Switchover %s -> %s performed.",
|
||||
|
@ -213,26 +213,26 @@ private:
|
||||
void assign_new_master(MariaDBServer* new_master);
|
||||
|
||||
// Switchover methods
|
||||
bool manual_switchover(SERVER* new_master, SERVER* current_master, json_t** error_out);
|
||||
bool switchover_prepare(SERVER* new_master, SERVER* current_master, Log log_mode,
|
||||
MariaDBServer** new_master_out, MariaDBServer** current_master_out,
|
||||
MariaDBServer** promotion_target_out, MariaDBServer** demotion_target_out,
|
||||
json_t** error_out);
|
||||
bool do_switchover(MariaDBServer* demotion_target, MariaDBServer* promotion_target, json_t** error_out);
|
||||
bool switchover_demote_master(MariaDBServer* current_master,
|
||||
json_t** err_out);
|
||||
bool switchover_perform(MariaDBServer* promotion_target, MariaDBServer* demotion_target, json_t** error_out);
|
||||
bool switchover_demote_master(MariaDBServer* current_master, json_t** err_out);
|
||||
bool switchover_wait_slaves_catchup(const ServerArray& slaves, const GtidList& gtid, int total_timeout,
|
||||
json_t** err_out);
|
||||
bool switchover_start_slave(MariaDBServer* old_master, MariaDBServer* new_master);
|
||||
bool manual_switchover(SERVER* new_master, SERVER* current_master, json_t** error_out);
|
||||
void handle_low_disk_space_master();
|
||||
|
||||
// Failover methods
|
||||
bool manual_failover(json_t** output);
|
||||
void handle_auto_failover();
|
||||
bool cluster_supports_failover(std::string* reasons_out);
|
||||
bool slave_receiving_events();
|
||||
bool failover_prepare(Log log_mode, MariaDBServer** promotion_target_out,
|
||||
MariaDBServer** demotion_target_out, json_t** error_out);
|
||||
bool do_failover(MariaDBServer* promotion_target, MariaDBServer* demotion_target, json_t** err_out);
|
||||
bool failover_perform(MariaDBServer* promotion_target, MariaDBServer* demotion_target,
|
||||
json_t** error_out);
|
||||
bool cluster_supports_failover(std::string* reasons_out);
|
||||
bool slave_receiving_events();
|
||||
bool manual_failover(json_t** output);
|
||||
void handle_auto_failover();
|
||||
|
||||
// Rejoin methods
|
||||
bool manual_rejoin(SERVER* rejoin_server, json_t** output);
|
||||
|
@ -353,38 +353,23 @@ bool MariaDBServer::read_server_variables(string* errmsg_out)
|
||||
return rval;
|
||||
}
|
||||
|
||||
bool MariaDBServer::check_replication_settings(print_repl_warnings_t print_warnings) const
|
||||
void MariaDBServer::warn_replication_settings() const
|
||||
{
|
||||
bool rval = true;
|
||||
const char* servername = name();
|
||||
if (m_rpl_settings.log_bin == false)
|
||||
if (m_rpl_settings.gtid_strict_mode == false)
|
||||
{
|
||||
if (print_warnings == WARNINGS_ON)
|
||||
{
|
||||
const char NO_BINLOG[] =
|
||||
"Slave '%s' has binary log disabled and is not a valid promotion candidate.";
|
||||
MXS_WARNING(NO_BINLOG, servername);
|
||||
}
|
||||
rval = false;
|
||||
const char NO_STRICT[] =
|
||||
"Slave '%s' has gtid_strict_mode disabled. Enabling this setting is recommended. "
|
||||
"For more information, see https://mariadb.com/kb/en/library/gtid/#gtid_strict_mode";
|
||||
MXS_WARNING(NO_STRICT, servername);
|
||||
}
|
||||
else if (print_warnings == WARNINGS_ON)
|
||||
if (m_rpl_settings.log_slave_updates == false)
|
||||
{
|
||||
if (m_rpl_settings.gtid_strict_mode == false)
|
||||
{
|
||||
const char NO_STRICT[] =
|
||||
"Slave '%s' has gtid_strict_mode disabled. Enabling this setting is recommended. "
|
||||
"For more information, see https://mariadb.com/kb/en/library/gtid/#gtid_strict_mode";
|
||||
MXS_WARNING(NO_STRICT, servername);
|
||||
}
|
||||
if (m_rpl_settings.log_slave_updates == false)
|
||||
{
|
||||
const char NO_SLAVE_UPDATES[] =
|
||||
"Slave '%s' has log_slave_updates disabled. It is a valid candidate but replication "
|
||||
"will break for lagging slaves if '%s' is promoted.";
|
||||
MXS_WARNING(NO_SLAVE_UPDATES, servername, servername);
|
||||
}
|
||||
const char NO_SLAVE_UPDATES[] =
|
||||
"Slave '%s' has log_slave_updates disabled. It is a valid candidate but replication "
|
||||
"will break for lagging slaves if '%s' is promoted.";
|
||||
MXS_WARNING(NO_SLAVE_UPDATES, servername, servername);
|
||||
}
|
||||
return rval;
|
||||
}
|
||||
|
||||
bool MariaDBServer::wait_until_gtid(const GtidList& target, int timeout, json_t** err_out)
|
||||
@ -1006,7 +991,7 @@ bool MariaDBServer::can_be_demoted_switchover(string* reason_out)
|
||||
}
|
||||
else if (!update_replication_settings(&query_error))
|
||||
{
|
||||
reason = string_printf("it could not be queried: '%s'.", query_error.c_str());
|
||||
reason = string_printf("it could not be queried: %s", query_error.c_str());
|
||||
}
|
||||
else if (!binlog_on())
|
||||
{
|
||||
@ -1084,7 +1069,7 @@ bool MariaDBServer::can_be_promoted(ClusterOperation op,
|
||||
}
|
||||
else if (!update_replication_settings(&query_error))
|
||||
{
|
||||
reason = string_printf("it could not be queried: '%s'.", query_error.c_str());
|
||||
reason = string_printf("it could not be queried: %s", query_error.c_str());
|
||||
}
|
||||
else if (!binlog_on())
|
||||
{
|
||||
|
@ -17,12 +17,6 @@
|
||||
#include <maxscale/monitor.h>
|
||||
#include "gtid.hh"
|
||||
|
||||
enum print_repl_warnings_t
|
||||
{
|
||||
WARNINGS_ON,
|
||||
WARNINGS_OFF
|
||||
};
|
||||
|
||||
class QueryResult;
|
||||
class MariaDBServer;
|
||||
// Server pointer array
|
||||
@ -230,12 +224,10 @@ public:
|
||||
bool read_server_variables(std::string* errmsg_out = NULL);
|
||||
|
||||
/**
|
||||
* Check if server has binary log enabled. Print warnings if gtid_strict_mode or log_slave_updates is off.
|
||||
*
|
||||
* @param print_on Print warnings or not
|
||||
* @return True if log_bin is on
|
||||
* Print warnings if gtid_strict_mode or log_slave_updates is off. Does not query the server,
|
||||
* so 'update_replication_settings' should have been called recently to update the values.
|
||||
*/
|
||||
bool check_replication_settings(print_repl_warnings_t print_warnings = WARNINGS_ON) const;
|
||||
void warn_replication_settings() const;
|
||||
|
||||
/**
|
||||
* Wait until server catches up to the target gtid. Only considers gtid domains common to this server
|
||||
|
Loading…
x
Reference in New Issue
Block a user