Merge branch '2.3' into develop
This commit is contained in:
@ -16,7 +16,7 @@ then
|
||||
build-essential libssl-dev ncurses-dev bison flex \
|
||||
perl libtool libpcre3-dev tcl tcl-dev uuid \
|
||||
uuid-dev libsqlite3-dev liblzma-dev libpam0g-dev pkg-config \
|
||||
libedit-dev
|
||||
libedit-dev libsystemd-dev
|
||||
|
||||
## separatelibgnutls installation process for Ubuntu Trusty
|
||||
cat /etc/*release | grep -E "Trusty|wheezy"
|
||||
@ -71,6 +71,9 @@ else
|
||||
# Attempt to install libasan, it'll only work on CentOS 7
|
||||
sudo yum install -y --nogpgcheck libasan
|
||||
|
||||
# Attempt to install systemd-devel, doesn't work on CentOS 6
|
||||
sudo yum install -y systemd-devel
|
||||
|
||||
grep "release 6" /etc/redhat-release
|
||||
if [ $? == 0 ]
|
||||
then
|
||||
|
||||
@ -141,6 +141,9 @@ switchover and rejoin-specific parameters are listed in their own
|
||||
|
||||
Deprecated and unused as of MaxScale 2.3. Can be defined but is ignored.
|
||||
|
||||
Is effectively always on. The monitor uses the "Seconds_Behind_Master"-field of
|
||||
"SHOW SLAVE STATUS" to get the replication lag.
|
||||
|
||||
### `detect_stale_master`
|
||||
|
||||
Allow previous master to be available even in case of stopped or misconfigured
|
||||
@ -365,12 +368,17 @@ operations.
|
||||
### Manual activation
|
||||
|
||||
Cluster operations can be activated manually through the REST API, MaxCtrl or
|
||||
MaxAdmin. The commands are only performed when MaxScale is in active mode. All
|
||||
commands require the monitor instance name as the first parameter. Failover
|
||||
MaxAdmin. The commands are only performed when MaxScale is in active mode. The
|
||||
commands generally match their automatic versions. The exception is _rejoin_, in
|
||||
which the manual command allows rejoining even when the joining server has empty
|
||||
gtid:s. This rule allows the user to force a rejoin on a server without binary
|
||||
logs.
|
||||
|
||||
All commands require the monitor instance name as the first parameter. Failover
|
||||
selects the new master server automatically and does not require additional
|
||||
parameters. Rejoin requires the name of the joining server as second parameter.
|
||||
Replication reset accepts the name of the new master server as second
|
||||
parameter. If not given, the current master is selected.
|
||||
Replication reset accepts the name of the new master server as second parameter.
|
||||
If not given, the current master is selected.
|
||||
|
||||
Switchover takes one to three parameters. If only the monitor name is given,
|
||||
switchover will autoselect both the slave to promote and the current master as
|
||||
@ -398,8 +406,8 @@ to demote (OldMasterServ). For rejoin, the server to join (OldMasterServ) is
|
||||
required. Replication reset requires the server to promote (NewMasterServ).
|
||||
|
||||
It is safe to perform manual operations even with automatic failover, switchover
|
||||
or rejoin enabled since the automatic operations cannot happen simultaneously
|
||||
with the manual one.
|
||||
or rejoin enabled since automatic operations cannot happen simultaneously
|
||||
with manual ones.
|
||||
|
||||
If a switchover or failover fails, automatic failover is disabled to prevent
|
||||
master changes to a possibly malfunctioning cluster. Automatic failover can be
|
||||
|
||||
@ -115,16 +115,42 @@ bool MariaDBMonitor::manual_rejoin(SERVER* rejoin_server, json_t** output)
|
||||
if (mon_slave_cand)
|
||||
{
|
||||
MariaDBServer* slave_cand = get_server_info(mon_slave_cand);
|
||||
|
||||
if (server_is_rejoin_suspect(slave_cand, output))
|
||||
{
|
||||
if (m_master->update_gtids())
|
||||
string gtid_update_error;
|
||||
if (m_master->update_gtids(>id_update_error))
|
||||
{
|
||||
// The manual version of rejoin does not need to be as careful as the automatic one.
|
||||
// The rules are mostly the same, the only difference is that a server with empty gtid:s
|
||||
// can be rejoined manually.
|
||||
// TODO: Add the warning to JSON output.
|
||||
string no_rejoin_reason;
|
||||
if (slave_cand->can_replicate_from(m_master, &no_rejoin_reason))
|
||||
bool safe_rejoin = slave_cand->can_replicate_from(m_master, &no_rejoin_reason);
|
||||
bool empty_gtid = slave_cand->m_gtid_current_pos.empty();
|
||||
bool rejoin_allowed = false;
|
||||
if (safe_rejoin)
|
||||
{
|
||||
ServerArray joinable_server;
|
||||
joinable_server.push_back(slave_cand);
|
||||
rejoin_allowed = true;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (empty_gtid)
|
||||
{
|
||||
rejoin_allowed = true;
|
||||
MXB_WARNING("gtid_curren_pos of %s is empty. Manual rejoin is unsafe "
|
||||
"but allowed.", rejoin_serv_name);
|
||||
}
|
||||
else
|
||||
{
|
||||
PRINT_MXS_JSON_ERROR(output, "%s cannot replicate from master server %s: %s",
|
||||
rejoin_serv_name, m_master->name(),
|
||||
no_rejoin_reason.c_str());
|
||||
}
|
||||
}
|
||||
|
||||
if (rejoin_allowed)
|
||||
{
|
||||
ServerArray joinable_server = {slave_cand};
|
||||
if (do_rejoin(joinable_server, output) == 1)
|
||||
{
|
||||
rval = true;
|
||||
@ -135,28 +161,18 @@ bool MariaDBMonitor::manual_rejoin(SERVER* rejoin_server, json_t** output)
|
||||
PRINT_MXS_JSON_ERROR(output, "Rejoin attempted but failed.");
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
PRINT_MXS_JSON_ERROR(output,
|
||||
"Server '%s' cannot replicate from cluster master '%s': "
|
||||
"%s.",
|
||||
rejoin_serv_name,
|
||||
m_master->name(),
|
||||
no_rejoin_reason.c_str());
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
PRINT_MXS_JSON_ERROR(output,
|
||||
"Cluster master '%s' gtid info could not be updated.",
|
||||
m_master->name());
|
||||
"The GTIDs of master server %s could not be updated: %s",
|
||||
m_master->name(), gtid_update_error.c_str());
|
||||
}
|
||||
} // server_is_rejoin_suspect has added any error messages to the output, no need to print here
|
||||
}
|
||||
else
|
||||
{
|
||||
PRINT_MXS_JSON_ERROR(output,
|
||||
"The given server '%s' is not monitored by this monitor.",
|
||||
PRINT_MXS_JSON_ERROR(output, "The given server '%s' is not monitored by this monitor.",
|
||||
rejoin_serv_name);
|
||||
}
|
||||
}
|
||||
@ -687,7 +703,8 @@ bool MariaDBMonitor::get_joinable_servers(ServerArray* output)
|
||||
bool comm_ok = true;
|
||||
if (!suspects.empty())
|
||||
{
|
||||
if (m_master->update_gtids())
|
||||
string gtid_update_error;
|
||||
if (m_master->update_gtids(>id_update_error))
|
||||
{
|
||||
for (size_t i = 0; i < suspects.size(); i++)
|
||||
{
|
||||
@ -710,6 +727,8 @@ bool MariaDBMonitor::get_joinable_servers(ServerArray* output)
|
||||
}
|
||||
else
|
||||
{
|
||||
MXS_ERROR("The GTIDs of master server %s could not be updated while attempting an automatic "
|
||||
"rejoin: %s", m_master->name(), gtid_update_error.c_str());
|
||||
comm_ok = false;
|
||||
}
|
||||
}
|
||||
@ -1760,10 +1779,7 @@ void MariaDBMonitor::handle_auto_rejoin()
|
||||
MXS_NOTICE("%d server(s) redirected or rejoined the cluster.", joins);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
MXS_ERROR("Query error to master '%s' prevented a possible rejoin operation.", m_master->name());
|
||||
}
|
||||
// get_joinable_servers prints an error if master is unresponsive
|
||||
}
|
||||
|
||||
void MariaDBMonitor::report_and_disable(const string& operation, const string& setting_name,
|
||||
@ -1864,4 +1880,4 @@ MariaDBMonitor::FailoverParams::FailoverParams(const ServerOperation& promotion,
|
||||
, demotion_target(demotion_target)
|
||||
, general(general)
|
||||
{
|
||||
}
|
||||
}
|
||||
|
||||
@ -693,35 +693,32 @@ json_t* MariaDBServer::to_json() const
|
||||
return result;
|
||||
}
|
||||
|
||||
bool MariaDBServer::can_replicate_from(MariaDBServer* master, string* error_out)
|
||||
bool MariaDBServer::can_replicate_from(MariaDBServer* master, string* reason_out)
|
||||
{
|
||||
bool rval = false;
|
||||
if (update_gtids())
|
||||
mxb_assert(reason_out);
|
||||
mxb_assert(is_usable()); // The server must be running.
|
||||
|
||||
bool can_replicate = false;
|
||||
if (m_gtid_current_pos.empty())
|
||||
{
|
||||
if (m_gtid_current_pos.empty())
|
||||
{
|
||||
*error_out = string("'") + name() + "' does not have a valid 'gtid_current_pos'.";
|
||||
}
|
||||
else if (master->m_gtid_binlog_pos.empty())
|
||||
{
|
||||
*error_out = string("'") + master->name() + "' does not have a valid 'gtid_binlog_pos'.";
|
||||
}
|
||||
else
|
||||
{
|
||||
rval = m_gtid_current_pos.can_replicate_from(master->m_gtid_binlog_pos);
|
||||
if (!rval)
|
||||
{
|
||||
*error_out = string("gtid_current_pos of '") + name() + "' ("
|
||||
+ m_gtid_current_pos.to_string() + ") is incompatible with gtid_binlog_pos of '"
|
||||
+ master->name() + "' (" + master->m_gtid_binlog_pos.to_string() + ").";
|
||||
}
|
||||
}
|
||||
*reason_out = string_printf("%s does not have a valid gtid_current_pos.", name());
|
||||
}
|
||||
else if (master->m_gtid_binlog_pos.empty())
|
||||
{
|
||||
*reason_out = string_printf("%s does not have a valid gtid_binlog_pos.", master->name());
|
||||
}
|
||||
else
|
||||
{
|
||||
*error_out = string("Server '") + name() + "' could not be queried.";
|
||||
can_replicate = m_gtid_current_pos.can_replicate_from(master->m_gtid_binlog_pos);
|
||||
if (!can_replicate)
|
||||
{
|
||||
*reason_out = string_printf("gtid_current_pos of %s (%s) is incompatible with "
|
||||
"gtid_binlog_pos of %s (%s).",
|
||||
name(), m_gtid_current_pos.to_string().c_str(),
|
||||
master->name(), master->m_gtid_binlog_pos.to_string().c_str());
|
||||
}
|
||||
}
|
||||
return rval;
|
||||
return can_replicate;
|
||||
}
|
||||
|
||||
bool MariaDBServer::redirect_one_slave(const string& change_cmd)
|
||||
|
||||
@ -261,14 +261,16 @@ public:
|
||||
const SlaveStatus* slave_connection_status_host_port(const MariaDBServer* target) const;
|
||||
|
||||
/**
|
||||
* Checks if this server can replicate from master. Only considers gtid:s and only detects obvious errors.
|
||||
* The non-detected errors will mostly be detected once the slave tries to start replicating.
|
||||
* Checks if this server can replicate from master. Only considers gtid:s and only detects obvious
|
||||
* errors. The non-detected errors will mostly be detected once the slave tries to start replicating.
|
||||
* Before calling this, update the gtid:s of the master so that the the gtid:s of the master are more
|
||||
* recent than those of this server.
|
||||
*
|
||||
* @param master_info Master server
|
||||
* @param error_out Details the reason for a negative result
|
||||
* @param reason_out Details the reason for a negative result
|
||||
* @return True if slave can replicate from master
|
||||
*/
|
||||
bool can_replicate_from(MariaDBServer* master, std::string* error_out);
|
||||
bool can_replicate_from(MariaDBServer* master, std::string* reason_out);
|
||||
|
||||
/**
|
||||
* Redirect one slave server to another master
|
||||
|
||||
Reference in New Issue
Block a user