Merge branch '2.3' into develop

This commit is contained in:
Esa Korhonen
2019-04-16 16:55:12 +03:00
3 changed files with 65 additions and 70 deletions

View File

@ -27,13 +27,11 @@ using maxbase::string_printf;
using maxbase::StopWatch; using maxbase::StopWatch;
using maxbase::Duration; using maxbase::Duration;
static const char RE_ENABLE_FMT[] = "To re-enable automatic %s, manually set '%s' to 'true' "
"for monitor '%s' via MaxAdmin or the REST API, or restart MaxScale.";
const char NO_SERVER[] = "Server '%s' is not monitored by '%s'."; const char NO_SERVER[] = "Server '%s' is not monitored by '%s'.";
const char FAILOVER_OK[] = "Failover '%s' -> '%s' performed."; const char FAILOVER_OK[] = "Failover '%s' -> '%s' performed.";
const char FAILOVER_FAIL[] = "Failover '%s' -> '%s' failed."; const char FAILOVER_FAIL[] = "Failover '%s' -> '%s' failed.";
const char SWITCHOVER_OK[] = "Switchover '%s' -> '%s' performed."; const char SWITCHOVER_OK[] = "Switchover '%s' -> '%s' performed.";
const char SWITCHOVER_FAIL[] = "Switchover %s -> %s failed"; const char SWITCHOVER_FAIL[] = "Switchover %s -> %s failed.";
/** /**
* Run a manual switchover, promoting a new master server and demoting the existing master. * Run a manual switchover, promoting a new master server and demoting the existing master.
@ -64,14 +62,8 @@ bool MariaDBMonitor::manual_switchover(SERVER* promotion_server, SERVER* demotio
{ {
string msg = string_printf(SWITCHOVER_FAIL, string msg = string_printf(SWITCHOVER_FAIL,
op->demotion.target->name(), op->promotion.target->name()); op->demotion.target->name(), op->promotion.target->name());
bool failover_setting = parameters.get_bool(CN_AUTO_FAILOVER);
if (failover_setting)
{
disable_setting(CN_AUTO_FAILOVER);
msg += ", automatic failover has been disabled";
}
msg += ".";
PRINT_MXS_JSON_ERROR(error_out, "%s", msg.c_str()); PRINT_MXS_JSON_ERROR(error_out, "%s", msg.c_str());
delay_auto_cluster_ops();
} }
} }
else else
@ -625,6 +617,7 @@ uint32_t MariaDBMonitor::do_rejoin(const ServerArray& joinable_servers, json_t**
SERVER* master_server = m_master->m_server_base->server; SERVER* master_server = m_master->m_server_base->server;
const char* master_name = master_server->name(); const char* master_name = master_server->name();
uint32_t servers_joined = 0; uint32_t servers_joined = 0;
bool rejoin_error = false;
if (!joinable_servers.empty()) if (!joinable_servers.empty())
{ {
for (MariaDBServer* joinable : joinable_servers) for (MariaDBServer* joinable : joinable_servers)
@ -656,7 +649,8 @@ uint32_t MariaDBMonitor::do_rejoin(const ServerArray& joinable_servers, json_t**
else else
{ {
PRINT_MXS_JSON_ERROR(output, PRINT_MXS_JSON_ERROR(output,
"Failed to prepare (demote) standalone server '%s' for rejoin.", name); "Failed to prepare (demote) standalone server '%s' for rejoin.",
name);
} }
} }
else else
@ -675,8 +669,17 @@ uint32_t MariaDBMonitor::do_rejoin(const ServerArray& joinable_servers, json_t**
servers_joined++; servers_joined++;
m_cluster_modified = true; m_cluster_modified = true;
} }
else
{
rejoin_error = true;
} }
} }
}
if (rejoin_error)
{
delay_auto_cluster_ops();
}
return servers_joined; return servers_joined;
} }
@ -1474,7 +1477,7 @@ void MariaDBMonitor::handle_auto_failover()
else else
{ {
MXS_ERROR(FAILOVER_FAIL, op->demotion_target->name(), op->promotion.target->name()); MXS_ERROR(FAILOVER_FAIL, op->demotion_target->name(), op->promotion.target->name());
report_and_disable("failover", CN_AUTO_FAILOVER, &m_auto_failover); delay_auto_cluster_ops();
} }
} }
else else
@ -1536,25 +1539,10 @@ void MariaDBMonitor::check_cluster_operations_support()
{ {
const char PROBLEMS[] = const char PROBLEMS[] =
"The backend cluster does not support failover/switchover due to the following reason(s):\n" "The backend cluster does not support failover/switchover due to the following reason(s):\n"
"%s\n" "%s\n";
"Automatic failover/switchover has been disabled. They should only be enabled " string msg = string_printf(PROBLEMS, all_reasons.c_str());
"after the above issues have been resolved."; MXS_ERROR("%s", msg.c_str());
string p1 = string_printf(PROBLEMS, all_reasons.c_str()); delay_auto_cluster_ops();
string p2 = string_printf(RE_ENABLE_FMT, "failover", CN_AUTO_FAILOVER, name());
string p3 = string_printf(RE_ENABLE_FMT, "switchover", CN_SWITCHOVER_ON_LOW_DISK_SPACE, name());
string total_msg = p1 + " " + p2 + " " + p3;
MXS_ERROR("%s", total_msg.c_str());
if (m_auto_failover)
{
m_auto_failover = false;
disable_setting(CN_AUTO_FAILOVER);
}
if (m_switchover_on_low_disk_space)
{
m_switchover_on_low_disk_space = false;
disable_setting(CN_SWITCHOVER_ON_LOW_DISK_SPACE);
}
} }
} }
@ -1719,6 +1707,7 @@ MariaDBMonitor::switchover_prepare(SERVER* promotion_server, SERVER* demotion_se
void MariaDBMonitor::enforce_read_only_on_slaves() void MariaDBMonitor::enforce_read_only_on_slaves()
{ {
const char QUERY[] = "SET GLOBAL read_only=1;"; const char QUERY[] = "SET GLOBAL read_only=1;";
bool error = false;
for (MariaDBServer* server : m_servers) for (MariaDBServer* server : m_servers)
{ {
if (server->is_slave() && !server->is_read_only() if (server->is_slave() && !server->is_read_only()
@ -1732,9 +1721,15 @@ void MariaDBMonitor::enforce_read_only_on_slaves()
else else
{ {
MXS_ERROR("Setting read_only on '%s' failed: '%s'.", server->name(), mysql_error(conn)); MXS_ERROR("Setting read_only on '%s' failed: '%s'.", server->name(), mysql_error(conn));
error = true;
} }
} }
} }
if (error)
{
delay_auto_cluster_ops();
}
} }
void MariaDBMonitor::handle_low_disk_space_master() void MariaDBMonitor::handle_low_disk_space_master()
@ -1762,8 +1757,7 @@ void MariaDBMonitor::handle_low_disk_space_master()
else else
{ {
MXS_ERROR(SWITCHOVER_FAIL, op->demotion.target->name(), op->promotion.target->name()); MXS_ERROR(SWITCHOVER_FAIL, op->demotion.target->name(), op->promotion.target->name());
report_and_disable("switchover", CN_SWITCHOVER_ON_LOW_DISK_SPACE, delay_auto_cluster_ops();
&m_switchover_on_low_disk_space);
} }
} }
else else
@ -1798,19 +1792,6 @@ void MariaDBMonitor::handle_auto_rejoin()
// get_joinable_servers prints an error if master is unresponsive // get_joinable_servers prints an error if master is unresponsive
} }
void MariaDBMonitor::report_and_disable(const string& operation, const string& setting_name,
bool* setting_var)
{
string p1 = string_printf("Automatic %s failed, disabling automatic %s.",
operation.c_str(),
operation.c_str());
string p2 = string_printf(RE_ENABLE_FMT, operation.c_str(), setting_name.c_str(), name());
string error_msg = p1 + " " + p2;
MXS_ERROR("%s", error_msg.c_str());
*setting_var = false;
disable_setting(setting_name.c_str());
}
/** /**
* Check that the slaves to demotion target are using gtid replication and that the gtid domain of the * Check that the slaves to demotion target are using gtid replication and that the gtid domain of the
* cluster is defined. Only the slave connections to the demotion target are checked. * cluster is defined. Only the slave connections to the demotion target are checked.
@ -1878,6 +1859,24 @@ ServerArray MariaDBMonitor::get_redirectables(const MariaDBServer* old_master,
return redirectable_slaves; return redirectable_slaves;
} }
void MariaDBMonitor::delay_auto_cluster_ops()
{
if (m_auto_failover || m_auto_rejoin || m_enforce_read_only_slaves || m_switchover_on_low_disk_space)
{
const char DISABLING_AUTO_OPS[] = "Disabling automatic cluster operations for %i monitor ticks.";
MXS_NOTICE(DISABLING_AUTO_OPS, m_failcount);
}
// + 1 because the start of next tick subtracts 1.
cluster_operation_disable_timer = m_failcount + 1;
}
bool MariaDBMonitor::can_perform_cluster_ops()
{
return (!config_get_global_options()->passive && cluster_operation_disable_timer <= 0 &&
!m_cluster_modified);
}
MariaDBMonitor::SwitchoverParams::SwitchoverParams(const ServerOperation& promotion, MariaDBMonitor::SwitchoverParams::SwitchoverParams(const ServerOperation& promotion,
const ServerOperation& demotion, const ServerOperation& demotion,
const GeneralOpData& general) const GeneralOpData& general)

View File

@ -27,9 +27,7 @@
#include <maxscale/mysql_utils.hh> #include <maxscale/mysql_utils.hh>
#include <maxscale/routingworker.hh> #include <maxscale/routingworker.hh>
#include <maxscale/secrets.h> #include <maxscale/secrets.h>
#include <maxscale/utils.h> #include <maxscale/utils.hh>
// TODO: For monitor_add_parameters
#include "../../../core/internal/monitor.hh"
using std::string; using std::string;
using maxbase::string_printf; using maxbase::string_printf;
@ -387,6 +385,12 @@ void MariaDBMonitor::tick()
mon_srv->mon_prev_status = status; mon_srv->mon_prev_status = status;
} }
if (cluster_operation_disable_timer > 0)
{
cluster_operation_disable_timer--;
}
// Query all servers for their status.
bool should_update_disk_space = check_disk_space_this_tick(); bool should_update_disk_space = check_disk_space_this_tick();
const auto& conn_settings = m_settings.conn_settings; const auto& conn_settings = m_settings.conn_settings;
@ -422,7 +426,7 @@ void MariaDBMonitor::tick()
update_topology(); update_topology();
m_cluster_topology_changed = false; m_cluster_topology_changed = false;
// If cluster operations are enabled, check topology support and disable if needed. // If cluster operations are enabled, check topology support and disable if needed.
if (m_auto_failover || m_switchover_on_low_disk_space) if (m_auto_failover || m_switchover_on_low_disk_space || m_auto_rejoin)
{ {
check_cluster_operations_support(); check_cluster_operations_support();
} }
@ -495,16 +499,16 @@ void MariaDBMonitor::process_state_changes()
} }
} }
if (!config_get_global_options()->passive) if (can_perform_cluster_ops())
{ {
if (m_auto_failover && !m_cluster_modified) if (m_auto_failover)
{ {
handle_auto_failover(); handle_auto_failover();
} }
// Do not auto-join servers on this monitor loop if a failover (or any other cluster modification) // Do not auto-join servers on this monitor loop if a failover (or any other cluster modification)
// has been performed, as server states have not been updated yet. It will happen next iteration. // has been performed, as server states have not been updated yet. It will happen next iteration.
if (m_auto_rejoin && !m_cluster_modified && cluster_can_be_joined()) if (m_auto_rejoin && cluster_can_be_joined() && can_perform_cluster_ops())
{ {
// Check if any servers should be autojoined to the cluster and try to join them. // Check if any servers should be autojoined to the cluster and try to join them.
handle_auto_rejoin(); handle_auto_rejoin();
@ -513,13 +517,13 @@ void MariaDBMonitor::process_state_changes()
/* Check if any slave servers have read-only off and turn it on if user so wishes. Again, do not /* Check if any slave servers have read-only off and turn it on if user so wishes. Again, do not
* perform this if cluster has been modified this loop since it may not be clear which server * perform this if cluster has been modified this loop since it may not be clear which server
* should be a slave. */ * should be a slave. */
if (m_enforce_read_only_slaves && !m_cluster_modified) if (m_enforce_read_only_slaves && can_perform_cluster_ops())
{ {
enforce_read_only_on_slaves(); enforce_read_only_on_slaves();
} }
/* Check if the master server is on low disk space and act on it. */ /* Check if the master server is on low disk space and act on it. */
if (m_switchover_on_low_disk_space && !m_cluster_modified) if (m_switchover_on_low_disk_space && can_perform_cluster_ops())
{ {
handle_low_disk_space_master(); handle_low_disk_space_master();
} }
@ -648,17 +652,6 @@ void MariaDBMonitor::assign_new_master(MariaDBServer* new_master)
m_warn_have_better_master = true; m_warn_have_better_master = true;
} }
/**
* Set a monitor config parameter to "false". The effect persists over stopMonitor/startMonitor but not
* MaxScale restart. Only use on boolean config settings.
*
* @param setting_name Setting to disable
*/
void MariaDBMonitor::disable_setting(const std::string& setting)
{
parameters.set(setting, "false");
}
/** /**
* Check sql text file parameters. A parameter should either be empty or a valid file which can be opened. * Check sql text file parameters. A parameter should either be empty or a valid file which can be opened.
* *

View File

@ -178,6 +178,10 @@ private:
* Causes a topology rebuild on the current tick. */ * Causes a topology rebuild on the current tick. */
bool m_cluster_modified = false; /* Has a cluster operation been performed this loop? Prevents bool m_cluster_modified = false; /* Has a cluster operation been performed this loop? Prevents
* other operations during this tick. */ * other operations during this tick. */
/* Counter for temporary automatic cluster operation disabling. */
int cluster_operation_disable_timer = 0;
CycleMap m_cycles; /* Map from cycle number to cycle member servers */ CycleMap m_cycles; /* Map from cycle number to cycle member servers */
CycleInfo m_master_cycle_status; /* Info about master server cycle from previous round */ CycleInfo m_master_cycle_status; /* Info about master server cycle from previous round */
@ -296,6 +300,9 @@ private:
bool switchover_perform(SwitchoverParams& operation); bool switchover_perform(SwitchoverParams& operation);
bool failover_perform(FailoverParams& op); bool failover_perform(FailoverParams& op);
void delay_auto_cluster_ops();
bool can_perform_cluster_ops();
// Methods used by failover/switchover/rejoin // Methods used by failover/switchover/rejoin
MariaDBServer* select_promotion_target(MariaDBServer* current_master, OperationType op, MariaDBServer* select_promotion_target(MariaDBServer* current_master, OperationType op,
Log log_mode, json_t** error_out); Log log_mode, json_t** error_out);
@ -319,8 +326,6 @@ private:
std::string generate_change_master_cmd(const std::string& master_host, int master_port); std::string generate_change_master_cmd(const std::string& master_host, int master_port);
void wait_cluster_stabilization(GeneralOpData& op, const ServerArray& slaves, void wait_cluster_stabilization(GeneralOpData& op, const ServerArray& slaves,
const MariaDBServer* new_master); const MariaDBServer* new_master);
void report_and_disable(const std::string& operation, const std::string& setting_name,
bool* setting_var);
// Rejoin methods // Rejoin methods
bool cluster_can_be_joined(); bool cluster_can_be_joined();
@ -328,8 +333,6 @@ private:
bool server_is_rejoin_suspect(MariaDBServer* rejoin_cand, json_t** output); bool server_is_rejoin_suspect(MariaDBServer* rejoin_cand, json_t** output);
uint32_t do_rejoin(const ServerArray& joinable_servers, json_t** output); uint32_t do_rejoin(const ServerArray& joinable_servers, json_t** output);
// Other methods
void disable_setting(const std::string& setting);
bool check_sql_files(); bool check_sql_files();
void enforce_read_only_on_slaves(); void enforce_read_only_on_slaves();
void log_master_changes(); void log_master_changes();