Merge branch '2.3' into develop
This commit is contained in:
@ -16,7 +16,7 @@ then
|
|||||||
build-essential libssl-dev ncurses-dev bison flex \
|
build-essential libssl-dev ncurses-dev bison flex \
|
||||||
perl libtool libpcre3-dev tcl tcl-dev uuid \
|
perl libtool libpcre3-dev tcl tcl-dev uuid \
|
||||||
uuid-dev libsqlite3-dev liblzma-dev libpam0g-dev pkg-config \
|
uuid-dev libsqlite3-dev liblzma-dev libpam0g-dev pkg-config \
|
||||||
libedit-dev
|
libedit-dev libsystemd-dev
|
||||||
|
|
||||||
## separatelibgnutls installation process for Ubuntu Trusty
|
## separatelibgnutls installation process for Ubuntu Trusty
|
||||||
cat /etc/*release | grep -E "Trusty|wheezy"
|
cat /etc/*release | grep -E "Trusty|wheezy"
|
||||||
@ -71,6 +71,9 @@ else
|
|||||||
# Attempt to install libasan, it'll only work on CentOS 7
|
# Attempt to install libasan, it'll only work on CentOS 7
|
||||||
sudo yum install -y --nogpgcheck libasan
|
sudo yum install -y --nogpgcheck libasan
|
||||||
|
|
||||||
|
# Attempt to install systemd-devel, doesn't work on CentOS 6
|
||||||
|
sudo yum install -y systemd-devel
|
||||||
|
|
||||||
grep "release 6" /etc/redhat-release
|
grep "release 6" /etc/redhat-release
|
||||||
if [ $? == 0 ]
|
if [ $? == 0 ]
|
||||||
then
|
then
|
||||||
|
|||||||
@ -141,6 +141,9 @@ switchover and rejoin-specific parameters are listed in their own
|
|||||||
|
|
||||||
Deprecated and unused as of MaxScale 2.3. Can be defined but is ignored.
|
Deprecated and unused as of MaxScale 2.3. Can be defined but is ignored.
|
||||||
|
|
||||||
|
Is effectively always on. The monitor uses the "Seconds_Behind_Master"-field of
|
||||||
|
"SHOW SLAVE STATUS" to get the replication lag.
|
||||||
|
|
||||||
### `detect_stale_master`
|
### `detect_stale_master`
|
||||||
|
|
||||||
Allow previous master to be available even in case of stopped or misconfigured
|
Allow previous master to be available even in case of stopped or misconfigured
|
||||||
@ -365,12 +368,17 @@ operations.
|
|||||||
### Manual activation
|
### Manual activation
|
||||||
|
|
||||||
Cluster operations can be activated manually through the REST API, MaxCtrl or
|
Cluster operations can be activated manually through the REST API, MaxCtrl or
|
||||||
MaxAdmin. The commands are only performed when MaxScale is in active mode. All
|
MaxAdmin. The commands are only performed when MaxScale is in active mode. The
|
||||||
commands require the monitor instance name as the first parameter. Failover
|
commands generally match their automatic versions. The exception is _rejoin_, in
|
||||||
|
which the manual command allows rejoining even when the joining server has empty
|
||||||
|
gtid:s. This rule allows the user to force a rejoin on a server without binary
|
||||||
|
logs.
|
||||||
|
|
||||||
|
All commands require the monitor instance name as the first parameter. Failover
|
||||||
selects the new master server automatically and does not require additional
|
selects the new master server automatically and does not require additional
|
||||||
parameters. Rejoin requires the name of the joining server as second parameter.
|
parameters. Rejoin requires the name of the joining server as second parameter.
|
||||||
Replication reset accepts the name of the new master server as second
|
Replication reset accepts the name of the new master server as second parameter.
|
||||||
parameter. If not given, the current master is selected.
|
If not given, the current master is selected.
|
||||||
|
|
||||||
Switchover takes one to three parameters. If only the monitor name is given,
|
Switchover takes one to three parameters. If only the monitor name is given,
|
||||||
switchover will autoselect both the slave to promote and the current master as
|
switchover will autoselect both the slave to promote and the current master as
|
||||||
@ -398,8 +406,8 @@ to demote (OldMasterServ). For rejoin, the server to join (OldMasterServ) is
|
|||||||
required. Replication reset requires the server to promote (NewMasterServ).
|
required. Replication reset requires the server to promote (NewMasterServ).
|
||||||
|
|
||||||
It is safe to perform manual operations even with automatic failover, switchover
|
It is safe to perform manual operations even with automatic failover, switchover
|
||||||
or rejoin enabled since the automatic operations cannot happen simultaneously
|
or rejoin enabled since automatic operations cannot happen simultaneously
|
||||||
with the manual one.
|
with manual ones.
|
||||||
|
|
||||||
If a switchover or failover fails, automatic failover is disabled to prevent
|
If a switchover or failover fails, automatic failover is disabled to prevent
|
||||||
master changes to a possibly malfunctioning cluster. Automatic failover can be
|
master changes to a possibly malfunctioning cluster. Automatic failover can be
|
||||||
|
|||||||
@ -115,16 +115,42 @@ bool MariaDBMonitor::manual_rejoin(SERVER* rejoin_server, json_t** output)
|
|||||||
if (mon_slave_cand)
|
if (mon_slave_cand)
|
||||||
{
|
{
|
||||||
MariaDBServer* slave_cand = get_server_info(mon_slave_cand);
|
MariaDBServer* slave_cand = get_server_info(mon_slave_cand);
|
||||||
|
|
||||||
if (server_is_rejoin_suspect(slave_cand, output))
|
if (server_is_rejoin_suspect(slave_cand, output))
|
||||||
{
|
{
|
||||||
if (m_master->update_gtids())
|
string gtid_update_error;
|
||||||
|
if (m_master->update_gtids(>id_update_error))
|
||||||
{
|
{
|
||||||
|
// The manual version of rejoin does not need to be as careful as the automatic one.
|
||||||
|
// The rules are mostly the same, the only difference is that a server with empty gtid:s
|
||||||
|
// can be rejoined manually.
|
||||||
|
// TODO: Add the warning to JSON output.
|
||||||
string no_rejoin_reason;
|
string no_rejoin_reason;
|
||||||
if (slave_cand->can_replicate_from(m_master, &no_rejoin_reason))
|
bool safe_rejoin = slave_cand->can_replicate_from(m_master, &no_rejoin_reason);
|
||||||
|
bool empty_gtid = slave_cand->m_gtid_current_pos.empty();
|
||||||
|
bool rejoin_allowed = false;
|
||||||
|
if (safe_rejoin)
|
||||||
{
|
{
|
||||||
ServerArray joinable_server;
|
rejoin_allowed = true;
|
||||||
joinable_server.push_back(slave_cand);
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
if (empty_gtid)
|
||||||
|
{
|
||||||
|
rejoin_allowed = true;
|
||||||
|
MXB_WARNING("gtid_curren_pos of %s is empty. Manual rejoin is unsafe "
|
||||||
|
"but allowed.", rejoin_serv_name);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
PRINT_MXS_JSON_ERROR(output, "%s cannot replicate from master server %s: %s",
|
||||||
|
rejoin_serv_name, m_master->name(),
|
||||||
|
no_rejoin_reason.c_str());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (rejoin_allowed)
|
||||||
|
{
|
||||||
|
ServerArray joinable_server = {slave_cand};
|
||||||
if (do_rejoin(joinable_server, output) == 1)
|
if (do_rejoin(joinable_server, output) == 1)
|
||||||
{
|
{
|
||||||
rval = true;
|
rval = true;
|
||||||
@ -135,28 +161,18 @@ bool MariaDBMonitor::manual_rejoin(SERVER* rejoin_server, json_t** output)
|
|||||||
PRINT_MXS_JSON_ERROR(output, "Rejoin attempted but failed.");
|
PRINT_MXS_JSON_ERROR(output, "Rejoin attempted but failed.");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else
|
|
||||||
{
|
|
||||||
PRINT_MXS_JSON_ERROR(output,
|
|
||||||
"Server '%s' cannot replicate from cluster master '%s': "
|
|
||||||
"%s.",
|
|
||||||
rejoin_serv_name,
|
|
||||||
m_master->name(),
|
|
||||||
no_rejoin_reason.c_str());
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
PRINT_MXS_JSON_ERROR(output,
|
PRINT_MXS_JSON_ERROR(output,
|
||||||
"Cluster master '%s' gtid info could not be updated.",
|
"The GTIDs of master server %s could not be updated: %s",
|
||||||
m_master->name());
|
m_master->name(), gtid_update_error.c_str());
|
||||||
}
|
}
|
||||||
} // server_is_rejoin_suspect has added any error messages to the output, no need to print here
|
} // server_is_rejoin_suspect has added any error messages to the output, no need to print here
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
PRINT_MXS_JSON_ERROR(output,
|
PRINT_MXS_JSON_ERROR(output, "The given server '%s' is not monitored by this monitor.",
|
||||||
"The given server '%s' is not monitored by this monitor.",
|
|
||||||
rejoin_serv_name);
|
rejoin_serv_name);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -687,7 +703,8 @@ bool MariaDBMonitor::get_joinable_servers(ServerArray* output)
|
|||||||
bool comm_ok = true;
|
bool comm_ok = true;
|
||||||
if (!suspects.empty())
|
if (!suspects.empty())
|
||||||
{
|
{
|
||||||
if (m_master->update_gtids())
|
string gtid_update_error;
|
||||||
|
if (m_master->update_gtids(>id_update_error))
|
||||||
{
|
{
|
||||||
for (size_t i = 0; i < suspects.size(); i++)
|
for (size_t i = 0; i < suspects.size(); i++)
|
||||||
{
|
{
|
||||||
@ -710,6 +727,8 @@ bool MariaDBMonitor::get_joinable_servers(ServerArray* output)
|
|||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
MXS_ERROR("The GTIDs of master server %s could not be updated while attempting an automatic "
|
||||||
|
"rejoin: %s", m_master->name(), gtid_update_error.c_str());
|
||||||
comm_ok = false;
|
comm_ok = false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -1760,10 +1779,7 @@ void MariaDBMonitor::handle_auto_rejoin()
|
|||||||
MXS_NOTICE("%d server(s) redirected or rejoined the cluster.", joins);
|
MXS_NOTICE("%d server(s) redirected or rejoined the cluster.", joins);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else
|
// get_joinable_servers prints an error if master is unresponsive
|
||||||
{
|
|
||||||
MXS_ERROR("Query error to master '%s' prevented a possible rejoin operation.", m_master->name());
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void MariaDBMonitor::report_and_disable(const string& operation, const string& setting_name,
|
void MariaDBMonitor::report_and_disable(const string& operation, const string& setting_name,
|
||||||
|
|||||||
@ -693,35 +693,32 @@ json_t* MariaDBServer::to_json() const
|
|||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool MariaDBServer::can_replicate_from(MariaDBServer* master, string* error_out)
|
bool MariaDBServer::can_replicate_from(MariaDBServer* master, string* reason_out)
|
||||||
{
|
|
||||||
bool rval = false;
|
|
||||||
if (update_gtids())
|
|
||||||
{
|
{
|
||||||
|
mxb_assert(reason_out);
|
||||||
|
mxb_assert(is_usable()); // The server must be running.
|
||||||
|
|
||||||
|
bool can_replicate = false;
|
||||||
if (m_gtid_current_pos.empty())
|
if (m_gtid_current_pos.empty())
|
||||||
{
|
{
|
||||||
*error_out = string("'") + name() + "' does not have a valid 'gtid_current_pos'.";
|
*reason_out = string_printf("%s does not have a valid gtid_current_pos.", name());
|
||||||
}
|
}
|
||||||
else if (master->m_gtid_binlog_pos.empty())
|
else if (master->m_gtid_binlog_pos.empty())
|
||||||
{
|
{
|
||||||
*error_out = string("'") + master->name() + "' does not have a valid 'gtid_binlog_pos'.";
|
*reason_out = string_printf("%s does not have a valid gtid_binlog_pos.", master->name());
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
rval = m_gtid_current_pos.can_replicate_from(master->m_gtid_binlog_pos);
|
can_replicate = m_gtid_current_pos.can_replicate_from(master->m_gtid_binlog_pos);
|
||||||
if (!rval)
|
if (!can_replicate)
|
||||||
{
|
{
|
||||||
*error_out = string("gtid_current_pos of '") + name() + "' ("
|
*reason_out = string_printf("gtid_current_pos of %s (%s) is incompatible with "
|
||||||
+ m_gtid_current_pos.to_string() + ") is incompatible with gtid_binlog_pos of '"
|
"gtid_binlog_pos of %s (%s).",
|
||||||
+ master->name() + "' (" + master->m_gtid_binlog_pos.to_string() + ").";
|
name(), m_gtid_current_pos.to_string().c_str(),
|
||||||
|
master->name(), master->m_gtid_binlog_pos.to_string().c_str());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
return can_replicate;
|
||||||
else
|
|
||||||
{
|
|
||||||
*error_out = string("Server '") + name() + "' could not be queried.";
|
|
||||||
}
|
|
||||||
return rval;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
bool MariaDBServer::redirect_one_slave(const string& change_cmd)
|
bool MariaDBServer::redirect_one_slave(const string& change_cmd)
|
||||||
|
|||||||
@ -261,14 +261,16 @@ public:
|
|||||||
const SlaveStatus* slave_connection_status_host_port(const MariaDBServer* target) const;
|
const SlaveStatus* slave_connection_status_host_port(const MariaDBServer* target) const;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Checks if this server can replicate from master. Only considers gtid:s and only detects obvious errors.
|
* Checks if this server can replicate from master. Only considers gtid:s and only detects obvious
|
||||||
* The non-detected errors will mostly be detected once the slave tries to start replicating.
|
* errors. The non-detected errors will mostly be detected once the slave tries to start replicating.
|
||||||
|
* Before calling this, update the gtid:s of the master so that the the gtid:s of the master are more
|
||||||
|
* recent than those of this server.
|
||||||
*
|
*
|
||||||
* @param master_info Master server
|
* @param master_info Master server
|
||||||
* @param error_out Details the reason for a negative result
|
* @param reason_out Details the reason for a negative result
|
||||||
* @return True if slave can replicate from master
|
* @return True if slave can replicate from master
|
||||||
*/
|
*/
|
||||||
bool can_replicate_from(MariaDBServer* master, std::string* error_out);
|
bool can_replicate_from(MariaDBServer* master, std::string* reason_out);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Redirect one slave server to another master
|
* Redirect one slave server to another master
|
||||||
|
|||||||
Reference in New Issue
Block a user