Merge branch '2.2' into 2.3

This commit is contained in:
Esa Korhonen 2019-04-16 16:34:57 +03:00
commit 2ca9337da1
3 changed files with 63 additions and 78 deletions

View File

@ -27,13 +27,11 @@ using maxscale::string_printf;
using maxbase::StopWatch;
using maxbase::Duration;
static const char RE_ENABLE_FMT[] = "To re-enable automatic %s, manually set '%s' to 'true' "
"for monitor '%s' via MaxAdmin or the REST API, or restart MaxScale.";
const char NO_SERVER[] = "Server '%s' is not monitored by '%s'.";
const char FAILOVER_OK[] = "Failover '%s' -> '%s' performed.";
const char FAILOVER_FAIL[] = "Failover '%s' -> '%s' failed.";
const char SWITCHOVER_OK[] = "Switchover '%s' -> '%s' performed.";
const char SWITCHOVER_FAIL[] = "Switchover %s -> %s failed";
const char SWITCHOVER_FAIL[] = "Switchover %s -> %s failed.";
/**
* Run a manual switchover, promoting a new master server and demoting the existing master.
@ -64,14 +62,8 @@ bool MariaDBMonitor::manual_switchover(SERVER* promotion_server, SERVER* demotio
{
string msg = string_printf(SWITCHOVER_FAIL,
op->demotion.target->name(), op->promotion.target->name());
bool failover_setting = config_get_bool(m_monitor->parameters, CN_AUTO_FAILOVER);
if (failover_setting)
{
disable_setting(CN_AUTO_FAILOVER);
msg += ", automatic failover has been disabled";
}
msg += ".";
PRINT_MXS_JSON_ERROR(error_out, "%s", msg.c_str());
delay_auto_cluster_ops();
}
}
else
@ -625,6 +617,7 @@ uint32_t MariaDBMonitor::do_rejoin(const ServerArray& joinable_servers, json_t**
SERVER* master_server = m_master->m_server_base->server;
const char* master_name = master_server->name;
uint32_t servers_joined = 0;
bool rejoin_error = false;
if (!joinable_servers.empty())
{
for (MariaDBServer* joinable : joinable_servers)
@ -656,7 +649,8 @@ uint32_t MariaDBMonitor::do_rejoin(const ServerArray& joinable_servers, json_t**
else
{
PRINT_MXS_JSON_ERROR(output,
"Failed to prepare (demote) standalone server '%s' for rejoin.", name);
"Failed to prepare (demote) standalone server '%s' for rejoin.",
name);
}
}
else
@ -675,8 +669,17 @@ uint32_t MariaDBMonitor::do_rejoin(const ServerArray& joinable_servers, json_t**
servers_joined++;
m_cluster_modified = true;
}
else
{
rejoin_error = true;
}
}
}
if (rejoin_error)
{
delay_auto_cluster_ops();
}
return servers_joined;
}
@ -1474,7 +1477,7 @@ void MariaDBMonitor::handle_auto_failover()
else
{
MXS_ERROR(FAILOVER_FAIL, op->demotion_target->name(), op->promotion.target->name());
report_and_disable("failover", CN_AUTO_FAILOVER, &m_auto_failover);
delay_auto_cluster_ops();
}
}
else
@ -1535,26 +1538,10 @@ void MariaDBMonitor::check_cluster_operations_support()
{
const char PROBLEMS[] =
"The backend cluster does not support failover/switchover due to the following reason(s):\n"
"%s\n"
"Automatic failover/switchover has been disabled. They should only be enabled "
"after the above issues have been resolved.";
string p1 = string_printf(PROBLEMS, all_reasons.c_str());
string p2 = string_printf(RE_ENABLE_FMT, "failover", CN_AUTO_FAILOVER, m_monitor->name);
string p3 = string_printf(RE_ENABLE_FMT, "switchover", CN_SWITCHOVER_ON_LOW_DISK_SPACE,
m_monitor->name);
string total_msg = p1 + " " + p2 + " " + p3;
MXS_ERROR("%s", total_msg.c_str());
if (m_auto_failover)
{
m_auto_failover = false;
disable_setting(CN_AUTO_FAILOVER);
}
if (m_switchover_on_low_disk_space)
{
m_switchover_on_low_disk_space = false;
disable_setting(CN_SWITCHOVER_ON_LOW_DISK_SPACE);
}
"%s\n";
string msg = string_printf(PROBLEMS, all_reasons.c_str());
MXS_ERROR("%s", msg.c_str());
delay_auto_cluster_ops();
}
}
@ -1719,6 +1706,7 @@ MariaDBMonitor::switchover_prepare(SERVER* promotion_server, SERVER* demotion_se
void MariaDBMonitor::enforce_read_only_on_slaves()
{
const char QUERY[] = "SET GLOBAL read_only=1;";
bool error = false;
for (MariaDBServer* server : m_servers)
{
if (server->is_slave() && !server->is_read_only()
@ -1732,9 +1720,15 @@ void MariaDBMonitor::enforce_read_only_on_slaves()
else
{
MXS_ERROR("Setting read_only on '%s' failed: '%s'.", server->name(), mysql_error(conn));
error = true;
}
}
}
if (error)
{
delay_auto_cluster_ops();
}
}
void MariaDBMonitor::handle_low_disk_space_master()
@ -1762,8 +1756,7 @@ void MariaDBMonitor::handle_low_disk_space_master()
else
{
MXS_ERROR(SWITCHOVER_FAIL, op->demotion.target->name(), op->promotion.target->name());
report_and_disable("switchover", CN_SWITCHOVER_ON_LOW_DISK_SPACE,
&m_switchover_on_low_disk_space);
delay_auto_cluster_ops();
}
}
else
@ -1798,19 +1791,6 @@ void MariaDBMonitor::handle_auto_rejoin()
// get_joinable_servers prints an error if master is unresponsive
}
void MariaDBMonitor::report_and_disable(const string& operation, const string& setting_name,
bool* setting_var)
{
string p1 = string_printf("Automatic %s failed, disabling automatic %s.",
operation.c_str(),
operation.c_str());
string p2 = string_printf(RE_ENABLE_FMT, operation.c_str(), setting_name.c_str(), m_monitor->name);
string error_msg = p1 + " " + p2;
MXS_ERROR("%s", error_msg.c_str());
*setting_var = false;
disable_setting(setting_name.c_str());
}
/**
* Check that the slaves to demotion target are using gtid replication and that the gtid domain of the
* cluster is defined. Only the slave connections to the demotion target are checked.
@ -1878,6 +1858,24 @@ ServerArray MariaDBMonitor::get_redirectables(const MariaDBServer* old_master,
return redirectable_slaves;
}
void MariaDBMonitor::delay_auto_cluster_ops()
{
if (m_auto_failover || m_auto_rejoin || m_enforce_read_only_slaves || m_switchover_on_low_disk_space)
{
const char DISABLING_AUTO_OPS[] = "Disabling automatic cluster operations for %i monitor ticks.";
MXS_NOTICE(DISABLING_AUTO_OPS, m_failcount);
}
// + 1 because the start of next tick subtracts 1.
cluster_operation_disable_timer = m_failcount + 1;
}
bool MariaDBMonitor::can_perform_cluster_ops()
{
return (!config_get_global_options()->passive && cluster_operation_disable_timer <= 0 &&
!m_cluster_modified);
}
MariaDBMonitor::SwitchoverParams::SwitchoverParams(const ServerOperation& promotion,
const ServerOperation& demotion,
const GeneralOpData& general)

View File

@ -26,8 +26,6 @@
#include <maxscale/routingworker.h>
#include <maxscale/secrets.h>
#include <maxscale/utils.hh>
// TODO: For monitor_add_parameters
#include "../../../core/internal/monitor.h"
using std::string;
using maxscale::string_printf;
@ -440,6 +438,11 @@ void MariaDBMonitor::tick()
mon_srv->mon_prev_status = status;
}
if (cluster_operation_disable_timer > 0)
{
cluster_operation_disable_timer--;
}
// Query all servers for their status.
for (MariaDBServer* server : m_servers)
{
@ -457,7 +460,7 @@ void MariaDBMonitor::tick()
update_topology();
m_cluster_topology_changed = false;
// If cluster operations are enabled, check topology support and disable if needed.
if (m_auto_failover || m_switchover_on_low_disk_space)
if (m_auto_failover || m_switchover_on_low_disk_space || m_auto_rejoin)
{
check_cluster_operations_support();
}
@ -530,16 +533,16 @@ void MariaDBMonitor::process_state_changes()
}
}
if (!config_get_global_options()->passive)
if (can_perform_cluster_ops())
{
if (m_auto_failover && !m_cluster_modified)
if (m_auto_failover)
{
handle_auto_failover();
}
// Do not auto-join servers on this monitor loop if a failover (or any other cluster modification)
// has been performed, as server states have not been updated yet. It will happen next iteration.
if (m_auto_rejoin && !m_cluster_modified && cluster_can_be_joined())
if (m_auto_rejoin && cluster_can_be_joined() && can_perform_cluster_ops())
{
// Check if any servers should be autojoined to the cluster and try to join them.
handle_auto_rejoin();
@ -548,13 +551,13 @@ void MariaDBMonitor::process_state_changes()
/* Check if any slave servers have read-only off and turn it on if user so wishes. Again, do not
* perform this if cluster has been modified this loop since it may not be clear which server
* should be a slave. */
if (m_enforce_read_only_slaves && !m_cluster_modified)
if (m_enforce_read_only_slaves && can_perform_cluster_ops())
{
enforce_read_only_on_slaves();
}
/* Check if the master server is on low disk space and act on it. */
if (m_switchover_on_low_disk_space && !m_cluster_modified)
if (m_switchover_on_low_disk_space && can_perform_cluster_ops())
{
handle_low_disk_space_master();
}
@ -683,25 +686,6 @@ void MariaDBMonitor::assign_new_master(MariaDBServer* new_master)
m_warn_have_better_master = true;
}
/**
* Set a monitor config parameter to "false". The effect persists over stopMonitor/startMonitor but not
* MaxScale restart. Only use on boolean config settings.
*
* @param setting_name Setting to disable
*/
void MariaDBMonitor::disable_setting(const std::string& setting)
{
Worker* worker = static_cast<Worker*>(mxs_rworker_get(MXS_RWORKER_MAIN));
worker->execute([=]() {
MXS_CONFIG_PARAMETER p = {};
p.name = const_cast<char*>(setting.c_str());
p.value = const_cast<char*>("false");
monitor_add_parameters(m_monitor, &p);
},
EXECUTE_AUTO);
}
/**
* Check sql text file parameters. A parameter should either be empty or a valid file which can be opened.
*

View File

@ -177,6 +177,10 @@ private:
* Causes a topology rebuild on the current tick. */
bool m_cluster_modified = false; /* Has a cluster operation been performed this loop? Prevents
* other operations during this tick. */
/* Counter for temporary automatic cluster operation disabling. */
int cluster_operation_disable_timer = 0;
CycleMap m_cycles; /* Map from cycle number to cycle member servers */
CycleInfo m_master_cycle_status; /* Info about master server cycle from previous round */
@ -296,6 +300,9 @@ private:
bool switchover_perform(SwitchoverParams& operation);
bool failover_perform(FailoverParams& op);
void delay_auto_cluster_ops();
bool can_perform_cluster_ops();
// Methods used by failover/switchover/rejoin
MariaDBServer* select_promotion_target(MariaDBServer* current_master, OperationType op,
Log log_mode, json_t** error_out);
@ -319,8 +326,6 @@ private:
std::string generate_change_master_cmd(const std::string& master_host, int master_port);
void wait_cluster_stabilization(GeneralOpData& op, const ServerArray& slaves,
const MariaDBServer* new_master);
void report_and_disable(const std::string& operation, const std::string& setting_name,
bool* setting_var);
// Rejoin methods
bool cluster_can_be_joined();
@ -328,8 +333,6 @@ private:
bool server_is_rejoin_suspect(MariaDBServer* rejoin_cand, json_t** output);
uint32_t do_rejoin(const ServerArray& joinable_servers, json_t** output);
// Other methods
void disable_setting(const std::string& setting);
bool check_sql_files();
void enforce_read_only_on_slaves();
void log_master_changes();