MXS-2158 Relax requirements for manual rejoin
The operation is now allowed even if the rejoining server has empty gtid:s. Auto-rejoin keeps the safeties on.
This commit is contained in:
parent
6a1cfddb43
commit
bba0bc0f31
@ -368,12 +368,17 @@ operations.
|
||||
### Manual activation
|
||||
|
||||
Cluster operations can be activated manually through the REST API, MaxCtrl or
|
||||
MaxAdmin. The commands are only performed when MaxScale is in active mode. All
|
||||
commands require the monitor instance name as the first parameter. Failover
|
||||
MaxAdmin. The commands are only performed when MaxScale is in active mode. The
|
||||
commands generally match their automatic versions. The exception is _rejoin_, in
|
||||
which the manual command allows rejoining even when the joining server has empty
|
||||
gtid:s. This rule allows the user to force a rejoin on a server without binary
|
||||
logs.
|
||||
|
||||
All commands require the monitor instance name as the first parameter. Failover
|
||||
selects the new master server automatically and does not require additional
|
||||
parameters. Rejoin requires the name of the joining server as second parameter.
|
||||
Replication reset accepts the name of the new master server as second
|
||||
parameter. If not given, the current master is selected.
|
||||
Replication reset accepts the name of the new master server as second parameter.
|
||||
If not given, the current master is selected.
|
||||
|
||||
Switchover takes one to three parameters. If only the monitor name is given,
|
||||
switchover will autoselect both the slave to promote and the current master as
|
||||
@ -401,8 +406,8 @@ to demote (OldMasterServ). For rejoin, the server to join (OldMasterServ) is
|
||||
required. Replication reset requires the server to promote (NewMasterServ).
|
||||
|
||||
It is safe to perform manual operations even with automatic failover, switchover
|
||||
or rejoin enabled since the automatic operations cannot happen simultaneously
|
||||
with the manual one.
|
||||
or rejoin enabled since automatic operations cannot happen simultaneously
|
||||
with manual ones.
|
||||
|
||||
If a switchover or failover fails, automatic failover is disabled to prevent
|
||||
master changes to a possibly malfunctioning cluster. Automatic failover can be
|
||||
|
@ -115,17 +115,42 @@ bool MariaDBMonitor::manual_rejoin(SERVER* rejoin_server, json_t** output)
|
||||
if (mon_slave_cand)
|
||||
{
|
||||
MariaDBServer* slave_cand = get_server_info(mon_slave_cand);
|
||||
|
||||
if (server_is_rejoin_suspect(slave_cand, output))
|
||||
{
|
||||
string gtid_update_error;
|
||||
if (m_master->update_gtids(>id_update_error))
|
||||
{
|
||||
// The manual version of rejoin does not need to be as careful as the automatic one.
|
||||
// The rules are mostly the same, the only difference is that a server with empty gtid:s
|
||||
// can be rejoined manually.
|
||||
// TODO: Add the warning to JSON output.
|
||||
string no_rejoin_reason;
|
||||
if (slave_cand->can_replicate_from(m_master, &no_rejoin_reason))
|
||||
bool safe_rejoin = slave_cand->can_replicate_from(m_master, &no_rejoin_reason);
|
||||
bool empty_gtid = slave_cand->m_gtid_current_pos.empty();
|
||||
bool rejoin_allowed = false;
|
||||
if (safe_rejoin)
|
||||
{
|
||||
ServerArray joinable_server;
|
||||
joinable_server.push_back(slave_cand);
|
||||
rejoin_allowed = true;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (empty_gtid)
|
||||
{
|
||||
rejoin_allowed = true;
|
||||
MXB_WARNING("gtid_curren_pos of %s is empty. Manual rejoin is unsafe "
|
||||
"but allowed.", rejoin_serv_name);
|
||||
}
|
||||
else
|
||||
{
|
||||
PRINT_MXS_JSON_ERROR(output, "%s cannot replicate from master server %s: %s",
|
||||
rejoin_serv_name, m_master->name(),
|
||||
no_rejoin_reason.c_str());
|
||||
}
|
||||
}
|
||||
|
||||
if (rejoin_allowed)
|
||||
{
|
||||
ServerArray joinable_server = {slave_cand};
|
||||
if (do_rejoin(joinable_server, output) == 1)
|
||||
{
|
||||
rval = true;
|
||||
@ -136,12 +161,6 @@ bool MariaDBMonitor::manual_rejoin(SERVER* rejoin_server, json_t** output)
|
||||
PRINT_MXS_JSON_ERROR(output, "Rejoin attempted but failed.");
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
PRINT_MXS_JSON_ERROR(output,
|
||||
"%s cannot replicate from cluster master %s: %s.",
|
||||
rejoin_serv_name, m_master->name(), no_rejoin_reason.c_str());
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -153,8 +172,7 @@ bool MariaDBMonitor::manual_rejoin(SERVER* rejoin_server, json_t** output)
|
||||
}
|
||||
else
|
||||
{
|
||||
PRINT_MXS_JSON_ERROR(output,
|
||||
"The given server '%s' is not monitored by this monitor.",
|
||||
PRINT_MXS_JSON_ERROR(output, "The given server '%s' is not monitored by this monitor.",
|
||||
rejoin_serv_name);
|
||||
}
|
||||
}
|
||||
@ -1862,4 +1880,4 @@ MariaDBMonitor::FailoverParams::FailoverParams(const ServerOperation& promotion,
|
||||
, demotion_target(demotion_target)
|
||||
, general(general)
|
||||
{
|
||||
}
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user