MXS-2158 Relax requirements for manual rejoin
The operation is now allowed even if the rejoining server has empty gtid:s. Auto-rejoin keeps the safeties on.
This commit is contained in:
@ -368,12 +368,17 @@ operations.
|
|||||||
### Manual activation
|
### Manual activation
|
||||||
|
|
||||||
Cluster operations can be activated manually through the REST API, MaxCtrl or
|
Cluster operations can be activated manually through the REST API, MaxCtrl or
|
||||||
MaxAdmin. The commands are only performed when MaxScale is in active mode. All
|
MaxAdmin. The commands are only performed when MaxScale is in active mode. The
|
||||||
commands require the monitor instance name as the first parameter. Failover
|
commands generally match their automatic versions. The exception is _rejoin_, in
|
||||||
|
which the manual command allows rejoining even when the joining server has empty
|
||||||
|
gtid:s. This rule allows the user to force a rejoin on a server without binary
|
||||||
|
logs.
|
||||||
|
|
||||||
|
All commands require the monitor instance name as the first parameter. Failover
|
||||||
selects the new master server automatically and does not require additional
|
selects the new master server automatically and does not require additional
|
||||||
parameters. Rejoin requires the name of the joining server as second parameter.
|
parameters. Rejoin requires the name of the joining server as second parameter.
|
||||||
Replication reset accepts the name of the new master server as second
|
Replication reset accepts the name of the new master server as second parameter.
|
||||||
parameter. If not given, the current master is selected.
|
If not given, the current master is selected.
|
||||||
|
|
||||||
Switchover takes one to three parameters. If only the monitor name is given,
|
Switchover takes one to three parameters. If only the monitor name is given,
|
||||||
switchover will autoselect both the slave to promote and the current master as
|
switchover will autoselect both the slave to promote and the current master as
|
||||||
@ -401,8 +406,8 @@ to demote (OldMasterServ). For rejoin, the server to join (OldMasterServ) is
|
|||||||
required. Replication reset requires the server to promote (NewMasterServ).
|
required. Replication reset requires the server to promote (NewMasterServ).
|
||||||
|
|
||||||
It is safe to perform manual operations even with automatic failover, switchover
|
It is safe to perform manual operations even with automatic failover, switchover
|
||||||
or rejoin enabled since the automatic operations cannot happen simultaneously
|
or rejoin enabled since automatic operations cannot happen simultaneously
|
||||||
with the manual one.
|
with manual ones.
|
||||||
|
|
||||||
If a switchover or failover fails, automatic failover is disabled to prevent
|
If a switchover or failover fails, automatic failover is disabled to prevent
|
||||||
master changes to a possibly malfunctioning cluster. Automatic failover can be
|
master changes to a possibly malfunctioning cluster. Automatic failover can be
|
||||||
|
|||||||
@ -115,17 +115,42 @@ bool MariaDBMonitor::manual_rejoin(SERVER* rejoin_server, json_t** output)
|
|||||||
if (mon_slave_cand)
|
if (mon_slave_cand)
|
||||||
{
|
{
|
||||||
MariaDBServer* slave_cand = get_server_info(mon_slave_cand);
|
MariaDBServer* slave_cand = get_server_info(mon_slave_cand);
|
||||||
|
|
||||||
if (server_is_rejoin_suspect(slave_cand, output))
|
if (server_is_rejoin_suspect(slave_cand, output))
|
||||||
{
|
{
|
||||||
string gtid_update_error;
|
string gtid_update_error;
|
||||||
if (m_master->update_gtids(>id_update_error))
|
if (m_master->update_gtids(>id_update_error))
|
||||||
{
|
{
|
||||||
|
// The manual version of rejoin does not need to be as careful as the automatic one.
|
||||||
|
// The rules are mostly the same, the only difference is that a server with empty gtid:s
|
||||||
|
// can be rejoined manually.
|
||||||
|
// TODO: Add the warning to JSON output.
|
||||||
string no_rejoin_reason;
|
string no_rejoin_reason;
|
||||||
if (slave_cand->can_replicate_from(m_master, &no_rejoin_reason))
|
bool safe_rejoin = slave_cand->can_replicate_from(m_master, &no_rejoin_reason);
|
||||||
|
bool empty_gtid = slave_cand->m_gtid_current_pos.empty();
|
||||||
|
bool rejoin_allowed = false;
|
||||||
|
if (safe_rejoin)
|
||||||
{
|
{
|
||||||
ServerArray joinable_server;
|
rejoin_allowed = true;
|
||||||
joinable_server.push_back(slave_cand);
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
if (empty_gtid)
|
||||||
|
{
|
||||||
|
rejoin_allowed = true;
|
||||||
|
MXB_WARNING("gtid_curren_pos of %s is empty. Manual rejoin is unsafe "
|
||||||
|
"but allowed.", rejoin_serv_name);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
PRINT_MXS_JSON_ERROR(output, "%s cannot replicate from master server %s: %s",
|
||||||
|
rejoin_serv_name, m_master->name(),
|
||||||
|
no_rejoin_reason.c_str());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (rejoin_allowed)
|
||||||
|
{
|
||||||
|
ServerArray joinable_server = {slave_cand};
|
||||||
if (do_rejoin(joinable_server, output) == 1)
|
if (do_rejoin(joinable_server, output) == 1)
|
||||||
{
|
{
|
||||||
rval = true;
|
rval = true;
|
||||||
@ -136,12 +161,6 @@ bool MariaDBMonitor::manual_rejoin(SERVER* rejoin_server, json_t** output)
|
|||||||
PRINT_MXS_JSON_ERROR(output, "Rejoin attempted but failed.");
|
PRINT_MXS_JSON_ERROR(output, "Rejoin attempted but failed.");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else
|
|
||||||
{
|
|
||||||
PRINT_MXS_JSON_ERROR(output,
|
|
||||||
"%s cannot replicate from cluster master %s: %s.",
|
|
||||||
rejoin_serv_name, m_master->name(), no_rejoin_reason.c_str());
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
@ -153,8 +172,7 @@ bool MariaDBMonitor::manual_rejoin(SERVER* rejoin_server, json_t** output)
|
|||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
PRINT_MXS_JSON_ERROR(output,
|
PRINT_MXS_JSON_ERROR(output, "The given server '%s' is not monitored by this monitor.",
|
||||||
"The given server '%s' is not monitored by this monitor.",
|
|
||||||
rejoin_serv_name);
|
rejoin_serv_name);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -1862,4 +1880,4 @@ MariaDBMonitor::FailoverParams::FailoverParams(const ServerOperation& promotion,
|
|||||||
, demotion_target(demotion_target)
|
, demotion_target(demotion_target)
|
||||||
, general(general)
|
, general(general)
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user