Cleanup failover/switchover
Replication settings warnings are printed once more. Changed some parameter names to be more consistent within the monitor.
This commit is contained in:
@ -53,7 +53,7 @@ bool MariaDBMonitor::manual_switchover(SERVER* promotion_server, SERVER* demotio
|
|||||||
error_out);
|
error_out);
|
||||||
if (ok_to_switch)
|
if (ok_to_switch)
|
||||||
{
|
{
|
||||||
switchover_done = do_switchover(demotion_target, promotion_target, error_out);
|
switchover_done = switchover_perform(promotion_target, demotion_target, error_out);
|
||||||
if (switchover_done)
|
if (switchover_done)
|
||||||
{
|
{
|
||||||
MXS_NOTICE("Switchover '%s' -> '%s' performed.",
|
MXS_NOTICE("Switchover '%s' -> '%s' performed.",
|
||||||
@ -89,7 +89,7 @@ bool MariaDBMonitor::manual_failover(json_t** output)
|
|||||||
bool ok_to_failover = failover_prepare(Log::ON, &promotion_target, &demotion_target, output);
|
bool ok_to_failover = failover_prepare(Log::ON, &promotion_target, &demotion_target, output);
|
||||||
if (ok_to_failover)
|
if (ok_to_failover)
|
||||||
{
|
{
|
||||||
failover_done = do_failover(promotion_target, demotion_target, output);
|
failover_done = failover_perform(promotion_target, demotion_target, output);
|
||||||
if (failover_done)
|
if (failover_done)
|
||||||
{
|
{
|
||||||
MXS_NOTICE("Failover '%s' -> '%s' performed.",
|
MXS_NOTICE("Failover '%s' -> '%s' performed.",
|
||||||
@ -467,15 +467,15 @@ bool MariaDBMonitor::server_is_rejoin_suspect(MariaDBServer* rejoin_cand, json_t
|
|||||||
* intermediate step fails, the cluster may be left without a master and manual intervention is
|
* intermediate step fails, the cluster may be left without a master and manual intervention is
|
||||||
* required to fix things.
|
* required to fix things.
|
||||||
*
|
*
|
||||||
* @param demotion_target Server to demote
|
|
||||||
* @param promotion_target Server to promote
|
* @param promotion_target Server to promote
|
||||||
* @param err_out json object for error printing. Can be NULL.
|
* @param demotion_target Server to demote
|
||||||
|
* @param error_out Error output. Can be NULL.
|
||||||
* @return True if successful. If false, replication may be broken.
|
* @return True if successful. If false, replication may be broken.
|
||||||
*/
|
*/
|
||||||
bool MariaDBMonitor::do_switchover(MariaDBServer* demotion_target, MariaDBServer* promotion_target,
|
bool MariaDBMonitor::switchover_perform(MariaDBServer* promotion_target, MariaDBServer* demotion_target,
|
||||||
json_t** err_out)
|
json_t** error_out)
|
||||||
{
|
{
|
||||||
mxb_assert(demotion_target && promotion_target);
|
mxb_assert(promotion_target && demotion_target);
|
||||||
|
|
||||||
// Total time limit on how long this operation may take. Checked and modified after significant steps are
|
// Total time limit on how long this operation may take. Checked and modified after significant steps are
|
||||||
// completed.
|
// completed.
|
||||||
@ -489,7 +489,7 @@ bool MariaDBMonitor::do_switchover(MariaDBServer* demotion_target, MariaDBServer
|
|||||||
|
|
||||||
bool rval = false;
|
bool rval = false;
|
||||||
// Step 2: Set read-only to on, flush logs, update master gtid:s
|
// Step 2: Set read-only to on, flush logs, update master gtid:s
|
||||||
if (switchover_demote_master(demotion_target, err_out))
|
if (switchover_demote_master(demotion_target, error_out))
|
||||||
{
|
{
|
||||||
m_cluster_modified = true;
|
m_cluster_modified = true;
|
||||||
bool catchup_and_promote_success = false;
|
bool catchup_and_promote_success = false;
|
||||||
@ -500,7 +500,7 @@ bool MariaDBMonitor::do_switchover(MariaDBServer* demotion_target, MariaDBServer
|
|||||||
ServerArray catchup_slaves = redirectable_slaves;
|
ServerArray catchup_slaves = redirectable_slaves;
|
||||||
catchup_slaves.push_back(promotion_target);
|
catchup_slaves.push_back(promotion_target);
|
||||||
if (switchover_wait_slaves_catchup(catchup_slaves, demotion_target->m_gtid_binlog_pos,
|
if (switchover_wait_slaves_catchup(catchup_slaves, demotion_target->m_gtid_binlog_pos,
|
||||||
seconds_remaining, err_out))
|
seconds_remaining, error_out))
|
||||||
{
|
{
|
||||||
time_t step3_time = time(NULL);
|
time_t step3_time = time(NULL);
|
||||||
int seconds_step3 = difftime(step3_time, step2_time);
|
int seconds_step3 = difftime(step3_time, step2_time);
|
||||||
@ -508,7 +508,7 @@ bool MariaDBMonitor::do_switchover(MariaDBServer* demotion_target, MariaDBServer
|
|||||||
seconds_remaining -= seconds_step3;
|
seconds_remaining -= seconds_step3;
|
||||||
|
|
||||||
// Step 4: On new master STOP and RESET SLAVE, set read-only to off.
|
// Step 4: On new master STOP and RESET SLAVE, set read-only to off.
|
||||||
if (promote_new_master(promotion_target, err_out))
|
if (promote_new_master(promotion_target, error_out))
|
||||||
{
|
{
|
||||||
catchup_and_promote_success = true;
|
catchup_and_promote_success = true;
|
||||||
m_next_master = promotion_target;
|
m_next_master = promotion_target;
|
||||||
@ -549,7 +549,7 @@ bool MariaDBMonitor::do_switchover(MariaDBServer* demotion_target, MariaDBServer
|
|||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
print_redirect_errors(demotion_target, redirectable_slaves, err_out);
|
print_redirect_errors(demotion_target, redirectable_slaves, error_out);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -560,11 +560,11 @@ bool MariaDBMonitor::do_switchover(MariaDBServer* demotion_target, MariaDBServer
|
|||||||
const char QUERY_UNDO[] = "SET GLOBAL read_only=0;";
|
const char QUERY_UNDO[] = "SET GLOBAL read_only=0;";
|
||||||
if (mxs_mysql_query(demotion_target->m_server_base->con, QUERY_UNDO) == 0)
|
if (mxs_mysql_query(demotion_target->m_server_base->con, QUERY_UNDO) == 0)
|
||||||
{
|
{
|
||||||
PRINT_MXS_JSON_ERROR(err_out, "read_only disabled on server %s.", demotion_target->name());
|
PRINT_MXS_JSON_ERROR(error_out, "read_only disabled on server %s.", demotion_target->name());
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
PRINT_MXS_JSON_ERROR(err_out, "Could not disable read_only on server %s: '%s'.",
|
PRINT_MXS_JSON_ERROR(error_out, "Could not disable read_only on server %s: '%s'.",
|
||||||
demotion_target->name(),
|
demotion_target->name(),
|
||||||
mysql_error(demotion_target->m_server_base->con));
|
mysql_error(demotion_target->m_server_base->con));
|
||||||
}
|
}
|
||||||
@ -572,7 +572,7 @@ bool MariaDBMonitor::do_switchover(MariaDBServer* demotion_target, MariaDBServer
|
|||||||
// Try to reactivate external replication if any.
|
// Try to reactivate external replication if any.
|
||||||
if (m_external_master_port != PORT_UNKNOWN)
|
if (m_external_master_port != PORT_UNKNOWN)
|
||||||
{
|
{
|
||||||
start_external_replication(promotion_target, err_out);
|
start_external_replication(promotion_target, error_out);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -582,14 +582,16 @@ bool MariaDBMonitor::do_switchover(MariaDBServer* demotion_target, MariaDBServer
|
|||||||
/**
|
/**
|
||||||
* Performs failover for a simple topology (1 master, N slaves, no intermediate masters).
|
* Performs failover for a simple topology (1 master, N slaves, no intermediate masters).
|
||||||
*
|
*
|
||||||
* @param demotion_target Server to demote
|
|
||||||
* @param promotion_target Server to promote
|
* @param promotion_target Server to promote
|
||||||
* @param err_out Error output
|
* @param demotion_target Server to demote
|
||||||
|
* @param error_out Error output. Can be NULL.
|
||||||
* @return True if successful
|
* @return True if successful
|
||||||
*/
|
*/
|
||||||
bool MariaDBMonitor::do_failover(MariaDBServer* promotion_target, MariaDBServer* demotion_target,
|
bool MariaDBMonitor::failover_perform(MariaDBServer* promotion_target, MariaDBServer* demotion_target,
|
||||||
json_t** error_out)
|
json_t** error_out)
|
||||||
{
|
{
|
||||||
|
mxb_assert(promotion_target && demotion_target);
|
||||||
|
|
||||||
// Total time limit on how long this operation may take. Checked and modified after significant steps are
|
// Total time limit on how long this operation may take. Checked and modified after significant steps are
|
||||||
// completed.
|
// completed.
|
||||||
int seconds_remaining = m_failover_timeout;
|
int seconds_remaining = m_failover_timeout;
|
||||||
@ -946,6 +948,17 @@ bool MariaDBMonitor::promote_new_master(MariaDBServer* new_master, json_t** err_
|
|||||||
return success;
|
return success;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Select a promotion target for failover/switchover. Looks at the slaves of 'demotion_target' and selects
|
||||||
|
* the server with the most up-do-date event or, if events are equal, the one with the best settings and
|
||||||
|
* status.
|
||||||
|
*
|
||||||
|
* @param demotion_target The former master server/relay
|
||||||
|
* @param op Switchover or failover
|
||||||
|
* @param log_mode Print log or operate silently
|
||||||
|
* @param error_out Error output
|
||||||
|
* @return The selected promotion target or NULL if no valid candidates
|
||||||
|
*/
|
||||||
MariaDBServer* MariaDBMonitor::select_promotion_target(MariaDBServer* demotion_target,
|
MariaDBServer* MariaDBMonitor::select_promotion_target(MariaDBServer* demotion_target,
|
||||||
ClusterOperation op, Log log_mode,
|
ClusterOperation op, Log log_mode,
|
||||||
json_t** error_out)
|
json_t** error_out)
|
||||||
@ -993,6 +1006,11 @@ MariaDBServer* MariaDBMonitor::select_promotion_target(MariaDBServer* demotion_t
|
|||||||
else
|
else
|
||||||
{
|
{
|
||||||
candidates.push_back(cand);
|
candidates.push_back(cand);
|
||||||
|
// Print some warnings about the candidate server.
|
||||||
|
if (log_mode == Log::ON)
|
||||||
|
{
|
||||||
|
cand->warn_replication_settings();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1297,7 +1315,7 @@ void MariaDBMonitor::handle_auto_failover()
|
|||||||
MXS_NOTICE("Performing automatic failover to replace failed master '%s'.",
|
MXS_NOTICE("Performing automatic failover to replace failed master '%s'.",
|
||||||
failed_master->name());
|
failed_master->name());
|
||||||
failed_master->m_server_base->new_event = false;
|
failed_master->m_server_base->new_event = false;
|
||||||
if (!do_failover(promotion_target, demotion_target, NULL))
|
if (!failover_perform(promotion_target, demotion_target, NULL))
|
||||||
{
|
{
|
||||||
report_and_disable("failover", CN_AUTO_FAILOVER, &m_auto_failover);
|
report_and_disable("failover", CN_AUTO_FAILOVER, &m_auto_failover);
|
||||||
}
|
}
|
||||||
@ -1611,7 +1629,7 @@ void MariaDBMonitor::handle_low_disk_space_master()
|
|||||||
if (ok_to_switch)
|
if (ok_to_switch)
|
||||||
{
|
{
|
||||||
m_warn_switchover_precond = true;
|
m_warn_switchover_precond = true;
|
||||||
bool switched = do_switchover(demotion_target, promotion_target, NULL);
|
bool switched = switchover_perform(promotion_target, demotion_target, NULL);
|
||||||
if (switched)
|
if (switched)
|
||||||
{
|
{
|
||||||
MXS_NOTICE("Switchover %s -> %s performed.",
|
MXS_NOTICE("Switchover %s -> %s performed.",
|
||||||
|
@ -213,26 +213,26 @@ private:
|
|||||||
void assign_new_master(MariaDBServer* new_master);
|
void assign_new_master(MariaDBServer* new_master);
|
||||||
|
|
||||||
// Switchover methods
|
// Switchover methods
|
||||||
bool manual_switchover(SERVER* new_master, SERVER* current_master, json_t** error_out);
|
|
||||||
bool switchover_prepare(SERVER* new_master, SERVER* current_master, Log log_mode,
|
bool switchover_prepare(SERVER* new_master, SERVER* current_master, Log log_mode,
|
||||||
MariaDBServer** new_master_out, MariaDBServer** current_master_out,
|
MariaDBServer** promotion_target_out, MariaDBServer** demotion_target_out,
|
||||||
json_t** error_out);
|
json_t** error_out);
|
||||||
bool do_switchover(MariaDBServer* demotion_target, MariaDBServer* promotion_target, json_t** error_out);
|
bool switchover_perform(MariaDBServer* promotion_target, MariaDBServer* demotion_target, json_t** error_out);
|
||||||
bool switchover_demote_master(MariaDBServer* current_master,
|
bool switchover_demote_master(MariaDBServer* current_master, json_t** err_out);
|
||||||
json_t** err_out);
|
|
||||||
bool switchover_wait_slaves_catchup(const ServerArray& slaves, const GtidList& gtid, int total_timeout,
|
bool switchover_wait_slaves_catchup(const ServerArray& slaves, const GtidList& gtid, int total_timeout,
|
||||||
json_t** err_out);
|
json_t** err_out);
|
||||||
bool switchover_start_slave(MariaDBServer* old_master, MariaDBServer* new_master);
|
bool switchover_start_slave(MariaDBServer* old_master, MariaDBServer* new_master);
|
||||||
|
bool manual_switchover(SERVER* new_master, SERVER* current_master, json_t** error_out);
|
||||||
void handle_low_disk_space_master();
|
void handle_low_disk_space_master();
|
||||||
|
|
||||||
// Failover methods
|
// Failover methods
|
||||||
bool manual_failover(json_t** output);
|
|
||||||
void handle_auto_failover();
|
|
||||||
bool cluster_supports_failover(std::string* reasons_out);
|
|
||||||
bool slave_receiving_events();
|
|
||||||
bool failover_prepare(Log log_mode, MariaDBServer** promotion_target_out,
|
bool failover_prepare(Log log_mode, MariaDBServer** promotion_target_out,
|
||||||
MariaDBServer** demotion_target_out, json_t** error_out);
|
MariaDBServer** demotion_target_out, json_t** error_out);
|
||||||
bool do_failover(MariaDBServer* promotion_target, MariaDBServer* demotion_target, json_t** err_out);
|
bool failover_perform(MariaDBServer* promotion_target, MariaDBServer* demotion_target,
|
||||||
|
json_t** error_out);
|
||||||
|
bool cluster_supports_failover(std::string* reasons_out);
|
||||||
|
bool slave_receiving_events();
|
||||||
|
bool manual_failover(json_t** output);
|
||||||
|
void handle_auto_failover();
|
||||||
|
|
||||||
// Rejoin methods
|
// Rejoin methods
|
||||||
bool manual_rejoin(SERVER* rejoin_server, json_t** output);
|
bool manual_rejoin(SERVER* rejoin_server, json_t** output);
|
||||||
|
@ -353,38 +353,23 @@ bool MariaDBServer::read_server_variables(string* errmsg_out)
|
|||||||
return rval;
|
return rval;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool MariaDBServer::check_replication_settings(print_repl_warnings_t print_warnings) const
|
void MariaDBServer::warn_replication_settings() const
|
||||||
{
|
{
|
||||||
bool rval = true;
|
|
||||||
const char* servername = name();
|
const char* servername = name();
|
||||||
if (m_rpl_settings.log_bin == false)
|
if (m_rpl_settings.gtid_strict_mode == false)
|
||||||
{
|
{
|
||||||
if (print_warnings == WARNINGS_ON)
|
const char NO_STRICT[] =
|
||||||
{
|
"Slave '%s' has gtid_strict_mode disabled. Enabling this setting is recommended. "
|
||||||
const char NO_BINLOG[] =
|
"For more information, see https://mariadb.com/kb/en/library/gtid/#gtid_strict_mode";
|
||||||
"Slave '%s' has binary log disabled and is not a valid promotion candidate.";
|
MXS_WARNING(NO_STRICT, servername);
|
||||||
MXS_WARNING(NO_BINLOG, servername);
|
|
||||||
}
|
|
||||||
rval = false;
|
|
||||||
}
|
}
|
||||||
else if (print_warnings == WARNINGS_ON)
|
if (m_rpl_settings.log_slave_updates == false)
|
||||||
{
|
{
|
||||||
if (m_rpl_settings.gtid_strict_mode == false)
|
const char NO_SLAVE_UPDATES[] =
|
||||||
{
|
"Slave '%s' has log_slave_updates disabled. It is a valid candidate but replication "
|
||||||
const char NO_STRICT[] =
|
"will break for lagging slaves if '%s' is promoted.";
|
||||||
"Slave '%s' has gtid_strict_mode disabled. Enabling this setting is recommended. "
|
MXS_WARNING(NO_SLAVE_UPDATES, servername, servername);
|
||||||
"For more information, see https://mariadb.com/kb/en/library/gtid/#gtid_strict_mode";
|
|
||||||
MXS_WARNING(NO_STRICT, servername);
|
|
||||||
}
|
|
||||||
if (m_rpl_settings.log_slave_updates == false)
|
|
||||||
{
|
|
||||||
const char NO_SLAVE_UPDATES[] =
|
|
||||||
"Slave '%s' has log_slave_updates disabled. It is a valid candidate but replication "
|
|
||||||
"will break for lagging slaves if '%s' is promoted.";
|
|
||||||
MXS_WARNING(NO_SLAVE_UPDATES, servername, servername);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
return rval;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
bool MariaDBServer::wait_until_gtid(const GtidList& target, int timeout, json_t** err_out)
|
bool MariaDBServer::wait_until_gtid(const GtidList& target, int timeout, json_t** err_out)
|
||||||
@ -1006,7 +991,7 @@ bool MariaDBServer::can_be_demoted_switchover(string* reason_out)
|
|||||||
}
|
}
|
||||||
else if (!update_replication_settings(&query_error))
|
else if (!update_replication_settings(&query_error))
|
||||||
{
|
{
|
||||||
reason = string_printf("it could not be queried: '%s'.", query_error.c_str());
|
reason = string_printf("it could not be queried: %s", query_error.c_str());
|
||||||
}
|
}
|
||||||
else if (!binlog_on())
|
else if (!binlog_on())
|
||||||
{
|
{
|
||||||
@ -1084,7 +1069,7 @@ bool MariaDBServer::can_be_promoted(ClusterOperation op,
|
|||||||
}
|
}
|
||||||
else if (!update_replication_settings(&query_error))
|
else if (!update_replication_settings(&query_error))
|
||||||
{
|
{
|
||||||
reason = string_printf("it could not be queried: '%s'.", query_error.c_str());
|
reason = string_printf("it could not be queried: %s", query_error.c_str());
|
||||||
}
|
}
|
||||||
else if (!binlog_on())
|
else if (!binlog_on())
|
||||||
{
|
{
|
||||||
|
@ -17,12 +17,6 @@
|
|||||||
#include <maxscale/monitor.h>
|
#include <maxscale/monitor.h>
|
||||||
#include "gtid.hh"
|
#include "gtid.hh"
|
||||||
|
|
||||||
enum print_repl_warnings_t
|
|
||||||
{
|
|
||||||
WARNINGS_ON,
|
|
||||||
WARNINGS_OFF
|
|
||||||
};
|
|
||||||
|
|
||||||
class QueryResult;
|
class QueryResult;
|
||||||
class MariaDBServer;
|
class MariaDBServer;
|
||||||
// Server pointer array
|
// Server pointer array
|
||||||
@ -230,12 +224,10 @@ public:
|
|||||||
bool read_server_variables(std::string* errmsg_out = NULL);
|
bool read_server_variables(std::string* errmsg_out = NULL);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Check if server has binary log enabled. Print warnings if gtid_strict_mode or log_slave_updates is off.
|
* Print warnings if gtid_strict_mode or log_slave_updates is off. Does not query the server,
|
||||||
*
|
* so 'update_replication_settings' should have been called recently to update the values.
|
||||||
* @param print_on Print warnings or not
|
|
||||||
* @return True if log_bin is on
|
|
||||||
*/
|
*/
|
||||||
bool check_replication_settings(print_repl_warnings_t print_warnings = WARNINGS_ON) const;
|
void warn_replication_settings() const;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Wait until server catches up to the target gtid. Only considers gtid domains common to this server
|
* Wait until server catches up to the target gtid. Only considers gtid domains common to this server
|
||||||
|
Reference in New Issue
Block a user