Refactor preparations to failover
The two operations are quite similar so the code should look similar as well and use shared functions.
This commit is contained in:
@ -743,44 +743,26 @@ void MariaDBMonitor::assign_slave_and_relay_master(MariaDBServer* start_node)
|
|||||||
bool has_slaves = false;
|
bool has_slaves = false;
|
||||||
for (MariaDBServer* slave : parent->m_node.children)
|
for (MariaDBServer* slave : parent->m_node.children)
|
||||||
{
|
{
|
||||||
// The slave node may have several slave connections, need to find the one that is
|
|
||||||
// connected to the parent. This section is quite similar to the one in
|
|
||||||
// 'build_replication_graph', although here we require that the sql thread is running.
|
|
||||||
|
|
||||||
// If the slave has an index, it has already been visited and labelled master/slave.
|
// If the slave has an index, it has already been visited and labelled master/slave.
|
||||||
// Even when this is the case, the node has to be checked to get correct
|
// Even when this is the case, the node has to be checked to get correct
|
||||||
// [Relay Master] labels.
|
// [Relay Master] labels.
|
||||||
|
|
||||||
// Need to differentiate between stale and running slave connections.
|
// Need to differentiate between stale and running slave connections.
|
||||||
bool found_slave_conn = false;
|
bool found_slave_conn = false; // slave->parent connection exists
|
||||||
bool conn_is_live = false;
|
bool conn_is_live = false; // live connection chain slave->cluster_master exists
|
||||||
bool slave_is_running = !slave->is_down();
|
auto sstatus = slave->slave_connection_status(parent);
|
||||||
for (SlaveStatus& ss : slave->m_slave_status)
|
if (sstatus)
|
||||||
{
|
{
|
||||||
auto master_id = ss.master_server_id;
|
if (sstatus->slave_io_running == SlaveStatus::SLAVE_IO_YES)
|
||||||
auto io_running = ss.slave_io_running;
|
|
||||||
// Should this check 'Master_Host' and 'Master_Port' instead of server id:s?
|
|
||||||
if (master_id > 0 && master_id == parent->m_server_id && ss.slave_sql_running)
|
|
||||||
{
|
{
|
||||||
// Would it be possible to have the parent down while IO is still connected? Perhaps
|
found_slave_conn = true;
|
||||||
// if the slave is slow to update the connection status.
|
// Would it be possible to have the parent down while IO is still connected?
|
||||||
if (io_running == SlaveStatus::SLAVE_IO_YES)
|
// Perhaps, if the slave is slow to update the connection status.
|
||||||
{
|
conn_is_live = parent_has_live_link && slave->is_running();
|
||||||
found_slave_conn = true;
|
}
|
||||||
// Check that a live connection chain exists from cluster master to the slave.
|
else if (sstatus->slave_io_running == SlaveStatus::SLAVE_IO_CONNECTING)
|
||||||
conn_is_live = parent_has_live_link && slave_is_running;
|
{
|
||||||
break;
|
found_slave_conn = true;
|
||||||
}
|
|
||||||
else if (io_running == SlaveStatus::SLAVE_IO_CONNECTING &&
|
|
||||||
slave->had_status(SERVER_WAS_SLAVE))
|
|
||||||
{
|
|
||||||
// Stale connection. TODO: The SERVER_WAS_SLAVE check above is not enough in
|
|
||||||
// several situations. The previously observed live slave connections
|
|
||||||
// need to be saved distinctly to avoid a SERVER_WAS_SLAVE bit from one
|
|
||||||
// connection from affecting another.
|
|
||||||
found_slave_conn = true;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -43,19 +43,20 @@ bool MariaDBMonitor::manual_switchover(SERVER* promotion_server, SERVER* demotio
|
|||||||
* Manual commands (as well as automatic ones) are ran at the end of a normal monitor loop,
|
* Manual commands (as well as automatic ones) are ran at the end of a normal monitor loop,
|
||||||
* so server states can be assumed to be up-to-date.
|
* so server states can be assumed to be up-to-date.
|
||||||
*/
|
*/
|
||||||
|
bool switchover_done = false;
|
||||||
MariaDBServer* promotion_target = NULL;
|
MariaDBServer* promotion_target = NULL;
|
||||||
MariaDBServer* demotion_target = NULL;
|
MariaDBServer* demotion_target = NULL;
|
||||||
auto ok_to_switch = switchover_prepare(promotion_server, demotion_server,
|
|
||||||
&promotion_target, &demotion_target, error_out);
|
|
||||||
|
|
||||||
bool rval = false;
|
auto ok_to_switch = switchover_prepare(promotion_server, demotion_server,
|
||||||
|
&promotion_target, &demotion_target,
|
||||||
|
error_out);
|
||||||
if (ok_to_switch)
|
if (ok_to_switch)
|
||||||
{
|
{
|
||||||
bool switched = do_switchover(demotion_target, promotion_target, error_out);
|
switchover_done = do_switchover(demotion_target, promotion_target, error_out);
|
||||||
if (switched)
|
if (switchover_done)
|
||||||
{
|
{
|
||||||
MXS_NOTICE("Switchover %s -> %s performed.", demotion_target->name(), promotion_target->name());
|
MXS_NOTICE("Switchover '%s' -> '%s' performed.",
|
||||||
rval = true;
|
demotion_target->name(), promotion_target->name());
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
@ -75,33 +76,35 @@ bool MariaDBMonitor::manual_switchover(SERVER* promotion_server, SERVER* demotio
|
|||||||
{
|
{
|
||||||
PRINT_MXS_JSON_ERROR(error_out, "Switchover cancelled.");
|
PRINT_MXS_JSON_ERROR(error_out, "Switchover cancelled.");
|
||||||
}
|
}
|
||||||
return rval;
|
return switchover_done;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool MariaDBMonitor::manual_failover(json_t** output)
|
bool MariaDBMonitor::manual_failover(json_t** output)
|
||||||
{
|
{
|
||||||
bool rv = true;
|
bool failover_done = false;
|
||||||
string failover_error;
|
MariaDBServer* promotion_target = NULL;
|
||||||
rv = failover_check(&failover_error);
|
MariaDBServer* demotion_target = NULL;
|
||||||
if (rv)
|
|
||||||
|
bool ok_to_failover = failover_prepare(&promotion_target, &demotion_target, output);
|
||||||
|
if (ok_to_failover)
|
||||||
{
|
{
|
||||||
rv = do_failover(output);
|
failover_done = do_failover(promotion_target, demotion_target, output);
|
||||||
if (rv)
|
if (failover_done)
|
||||||
{
|
{
|
||||||
MXS_NOTICE("Failover performed.");
|
MXS_NOTICE("Failover '%s' -> '%s' performed.",
|
||||||
|
demotion_target->name(), promotion_target->name());
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
PRINT_MXS_JSON_ERROR(output, "Failover failed.");
|
PRINT_MXS_JSON_ERROR(output, "Failover '%s' -> '%s' failed.",
|
||||||
|
demotion_target->name(), promotion_target->name());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
PRINT_MXS_JSON_ERROR(output, "Failover not performed due to the following errors: \n%s",
|
PRINT_MXS_JSON_ERROR(output, "Failover cancelled.");
|
||||||
failover_error.c_str());
|
|
||||||
}
|
}
|
||||||
|
return failover_done;
|
||||||
return rv;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
bool MariaDBMonitor::manual_rejoin(SERVER* rejoin_server, json_t** output)
|
bool MariaDBMonitor::manual_rejoin(SERVER* rejoin_server, json_t** output)
|
||||||
@ -480,16 +483,9 @@ bool MariaDBMonitor::do_switchover(MariaDBServer* demotion_target, MariaDBServer
|
|||||||
time_t start_time = time(NULL);
|
time_t start_time = time(NULL);
|
||||||
|
|
||||||
// Step 1: Save all slaves except promotion target to an array.
|
// Step 1: Save all slaves except promotion target to an array.
|
||||||
ServerArray redirectable_slaves;
|
// Try to redirect even disconnected slaves.
|
||||||
for (MariaDBServer* redirectable : demotion_target->m_node.children)
|
// TODO: 'switchover_wait_slaves_catchup' needs to be smarter and not bother with such slaves.
|
||||||
{
|
ServerArray redirectable_slaves = get_redirectables(promotion_target, demotion_target);
|
||||||
// TODO: Again check valid replication here
|
|
||||||
if (redirectable != promotion_target && redirectable->is_replicating_from(demotion_target) &&
|
|
||||||
redirectable->uses_gtid())
|
|
||||||
{
|
|
||||||
redirectable_slaves.push_back(redirectable);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
bool rval = false;
|
bool rval = false;
|
||||||
// Step 2: Set read-only to on, flush logs, update master gtid:s
|
// Step 2: Set read-only to on, flush logs, update master gtid:s
|
||||||
@ -586,28 +582,27 @@ bool MariaDBMonitor::do_switchover(MariaDBServer* demotion_target, MariaDBServer
|
|||||||
/**
|
/**
|
||||||
* Performs failover for a simple topology (1 master, N slaves, no intermediate masters).
|
* Performs failover for a simple topology (1 master, N slaves, no intermediate masters).
|
||||||
*
|
*
|
||||||
* @param err_out Json output
|
* @param demotion_target Server to demote
|
||||||
|
* @param promotion_target Server to promote
|
||||||
|
* @param err_out Error output
|
||||||
* @return True if successful
|
* @return True if successful
|
||||||
*/
|
*/
|
||||||
bool MariaDBMonitor::do_failover(json_t** err_out)
|
bool MariaDBMonitor::do_failover(MariaDBServer* promotion_target, MariaDBServer* demotion_target,
|
||||||
|
json_t** error_out)
|
||||||
{
|
{
|
||||||
// Total time limit on how long this operation may take. Checked and modified after significant steps are
|
// Total time limit on how long this operation may take. Checked and modified after significant steps are
|
||||||
// completed.
|
// completed.
|
||||||
int seconds_remaining = m_failover_timeout;
|
int seconds_remaining = m_failover_timeout;
|
||||||
time_t start_time = time(NULL);
|
time_t start_time = time(NULL);
|
||||||
// Step 1: Select new master. Also populate a vector with all slaves not the selected master.
|
// Step 1: Populate a vector with all slaves not the selected master.
|
||||||
ServerArray redirectable_slaves;
|
ServerArray redirectable_slaves = get_redirectables(promotion_target, demotion_target);
|
||||||
MariaDBServer* new_master = select_new_master(&redirectable_slaves, err_out);
|
|
||||||
if (new_master == NULL)
|
|
||||||
{
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
time_t step1_time = time(NULL);
|
time_t step1_time = time(NULL);
|
||||||
seconds_remaining -= difftime(step1_time, start_time);
|
seconds_remaining -= difftime(step1_time, start_time);
|
||||||
|
|
||||||
bool rval = false;
|
bool rval = false;
|
||||||
// Step 2: Wait until relay log consumed.
|
// Step 2: Wait until relay log consumed.
|
||||||
if (new_master->failover_wait_relay_log(seconds_remaining, err_out))
|
if (promotion_target->failover_wait_relay_log(seconds_remaining, error_out))
|
||||||
{
|
{
|
||||||
time_t step2_time = time(NULL);
|
time_t step2_time = time(NULL);
|
||||||
int seconds_step2 = difftime(step2_time, step1_time);
|
int seconds_step2 = difftime(step2_time, step1_time);
|
||||||
@ -615,13 +610,13 @@ bool MariaDBMonitor::do_failover(json_t** err_out)
|
|||||||
seconds_remaining -= seconds_step2;
|
seconds_remaining -= seconds_step2;
|
||||||
|
|
||||||
// Step 3: Stop and reset slave, set read-only to 0.
|
// Step 3: Stop and reset slave, set read-only to 0.
|
||||||
if (promote_new_master(new_master, err_out))
|
if (promote_new_master(promotion_target, error_out))
|
||||||
{
|
{
|
||||||
m_next_master = new_master;
|
m_next_master = promotion_target;
|
||||||
m_cluster_modified = true;
|
m_cluster_modified = true;
|
||||||
// Step 4: Redirect slaves.
|
// Step 4: Redirect slaves.
|
||||||
ServerArray redirected_slaves;
|
ServerArray redirected_slaves;
|
||||||
int redirects = redirect_slaves(new_master, redirectable_slaves, &redirected_slaves);
|
int redirects = redirect_slaves(promotion_target, redirectable_slaves, &redirected_slaves);
|
||||||
bool success = redirectable_slaves.empty() ? true : redirects > 0;
|
bool success = redirectable_slaves.empty() ? true : redirects > 0;
|
||||||
if (success)
|
if (success)
|
||||||
{
|
{
|
||||||
@ -643,7 +638,7 @@ bool MariaDBMonitor::do_failover(json_t** err_out)
|
|||||||
rval = true;
|
rval = true;
|
||||||
MXS_DEBUG("Failover: no slaves to redirect, skipping stabilization check.");
|
MXS_DEBUG("Failover: no slaves to redirect, skipping stabilization check.");
|
||||||
}
|
}
|
||||||
else if (wait_cluster_stabilization(new_master, redirected_slaves, seconds_remaining))
|
else if (wait_cluster_stabilization(promotion_target, redirected_slaves, seconds_remaining))
|
||||||
{
|
{
|
||||||
rval = true;
|
rval = true;
|
||||||
time_t step5_time = time(NULL);
|
time_t step5_time = time(NULL);
|
||||||
@ -655,7 +650,7 @@ bool MariaDBMonitor::do_failover(json_t** err_out)
|
|||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
print_redirect_errors(NULL, redirectable_slaves, err_out);
|
print_redirect_errors(NULL, redirectable_slaves, error_out);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -971,108 +966,9 @@ bool MariaDBMonitor::promote_new_master(MariaDBServer* new_master, json_t** err_
|
|||||||
return success;
|
return success;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
MariaDBServer* MariaDBMonitor::select_promotion_target(ClusterOperation op,
|
||||||
* Select a new master. Also add slaves which should be redirected to an array.
|
MariaDBServer* demotion_target,
|
||||||
*
|
json_t** err_out)
|
||||||
* @param out_slaves Vector for storing slave servers.
|
|
||||||
* @param err_out json object for error printing. Can be NULL.
|
|
||||||
* @return The found master, or NULL if not found
|
|
||||||
*/
|
|
||||||
MariaDBServer* MariaDBMonitor::select_new_master(ServerArray* slaves_out, json_t** err_out)
|
|
||||||
{
|
|
||||||
ss_dassert(slaves_out && slaves_out->size() == 0);
|
|
||||||
/* Select a new master candidate. Selects the one with the latest event in relay log.
|
|
||||||
* If multiple slaves have same number of events, select the one with most processed events. */
|
|
||||||
MariaDBServer* current_best = NULL;
|
|
||||||
string current_best_reason;
|
|
||||||
// Servers that cannot be selected because of exclusion, but seem otherwise ok.
|
|
||||||
ServerArray valid_but_excluded;
|
|
||||||
// Index of the current best candidate in slaves_out
|
|
||||||
int master_vector_index = -1;
|
|
||||||
|
|
||||||
for (auto iter = m_servers.begin(); iter != m_servers.end(); iter++)
|
|
||||||
{
|
|
||||||
/* If a server cannot be connected to, it won't be considered for promotion or redirected.
|
|
||||||
* Do not worry about the exclusion list yet, querying the excluded servers is ok.
|
|
||||||
* If master is replicating from external master, it is updated by update_slave_info()
|
|
||||||
* but not added to array. */
|
|
||||||
MariaDBServer* cand = *iter;
|
|
||||||
if (cand->update_slave_info() && cand != m_master)
|
|
||||||
{
|
|
||||||
slaves_out->push_back(cand);
|
|
||||||
// Check that server is not in the exclusion list while still being a valid choice.
|
|
||||||
if (server_is_excluded(cand) && cand->check_replication_settings(WARNINGS_OFF))
|
|
||||||
{
|
|
||||||
valid_but_excluded.push_back(cand);
|
|
||||||
const char CANNOT_SELECT[] = "Promotion candidate '%s' is excluded from new "
|
|
||||||
"master selection.";
|
|
||||||
MXS_INFO(CANNOT_SELECT, cand->name());
|
|
||||||
}
|
|
||||||
else if (cand->check_replication_settings())
|
|
||||||
{
|
|
||||||
// If no new master yet, accept any valid candidate. Otherwise check.
|
|
||||||
if (current_best == NULL ||
|
|
||||||
is_candidate_better(cand, current_best, m_master_gtid_domain, ¤t_best_reason))
|
|
||||||
{
|
|
||||||
// The server has been selected for promotion, for now.
|
|
||||||
current_best = cand;
|
|
||||||
master_vector_index = slaves_out->size() - 1;
|
|
||||||
if (!current_best_reason.empty())
|
|
||||||
{
|
|
||||||
current_best_reason = string_printf("Selected '%s' because %s", current_best->name(),
|
|
||||||
current_best_reason.c_str());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (current_best)
|
|
||||||
{
|
|
||||||
// Remove the selected master from the vector.
|
|
||||||
auto it_remove = slaves_out->begin();
|
|
||||||
it_remove += master_vector_index;
|
|
||||||
slaves_out->erase(it_remove);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Check if any of the excluded servers would be better than the best candidate.
|
|
||||||
for (auto iter = valid_but_excluded.begin(); iter != valid_but_excluded.end(); iter++)
|
|
||||||
{
|
|
||||||
MariaDBServer* excluded_info = *iter;
|
|
||||||
const char* excluded_name = (*iter)->name();
|
|
||||||
if (current_best == NULL)
|
|
||||||
{
|
|
||||||
const char EXCLUDED_ONLY_CAND[] = "Server '%s' is a viable choice for new master, "
|
|
||||||
"but cannot be selected as it's excluded.";
|
|
||||||
MXS_WARNING(EXCLUDED_ONLY_CAND, excluded_name);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
else if (is_candidate_better(excluded_info, current_best, m_master_gtid_domain))
|
|
||||||
{
|
|
||||||
// Print a warning if this server is actually a better candidate than the previous best.
|
|
||||||
const char EXCLUDED_CAND[] = "Server '%s' is superior to current best candidate '%s', "
|
|
||||||
"but cannot be selected as it's excluded. This may lead to "
|
|
||||||
"loss of data if '%s' is ahead of other servers.";
|
|
||||||
MXS_WARNING(EXCLUDED_CAND, excluded_name, current_best->name(), excluded_name);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (current_best == NULL)
|
|
||||||
{
|
|
||||||
PRINT_MXS_JSON_ERROR(err_out, "No suitable promotion candidate found.");
|
|
||||||
}
|
|
||||||
else if (!current_best_reason.empty())
|
|
||||||
{
|
|
||||||
// If there was a specific reason this server was selected, print it now. It's possible that all
|
|
||||||
// were equally good, in that case no need to print.
|
|
||||||
MXS_NOTICE("%s", current_best_reason.c_str());
|
|
||||||
}
|
|
||||||
return current_best;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
MariaDBServer* MariaDBMonitor::switchover_select_promotion(MariaDBServer* demotion_target, json_t** err_out)
|
|
||||||
{
|
{
|
||||||
/* Select a new master candidate. Selects the one with the latest event in relay log.
|
/* Select a new master candidate. Selects the one with the latest event in relay log.
|
||||||
* If multiple slaves have same number of events, select the one with most processed events. */
|
* If multiple slaves have same number of events, select the one with most processed events. */
|
||||||
@ -1099,7 +995,7 @@ MariaDBServer* MariaDBMonitor::switchover_select_promotion(MariaDBServer* demoti
|
|||||||
for (MariaDBServer* cand : demotion_target->m_node.children)
|
for (MariaDBServer* cand : demotion_target->m_node.children)
|
||||||
{
|
{
|
||||||
string reason;
|
string reason;
|
||||||
if (!cand->can_be_promoted(demotion_target, &reason))
|
if (!cand->can_be_promoted(op, demotion_target, &reason))
|
||||||
{
|
{
|
||||||
string msg = string_printf("'%s' cannot be selected because %s", cand->name(), reason.c_str());
|
string msg = string_printf("'%s' cannot be selected because %s", cand->name(), reason.c_str());
|
||||||
printer.cat(all_reasons, msg);
|
printer.cat(all_reasons, msg);
|
||||||
@ -1260,71 +1156,67 @@ bool MariaDBMonitor::is_candidate_better(const MariaDBServer* candidate, const M
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Check that preconditions for a failover are met.
|
* Check cluster and parameters for suitability to failover. Also writes found servers to output pointers.
|
||||||
*
|
*
|
||||||
|
* @param promotion_target_out Output for promotion target
|
||||||
|
* @param demotion_target_out Output for demotion target
|
||||||
* @param error_out Error output
|
* @param error_out Error output
|
||||||
* @return True if failover may proceed
|
* @return True if cluster is suitable and failover may proceed
|
||||||
*/
|
*/
|
||||||
bool MariaDBMonitor::failover_check(string* error_out)
|
bool MariaDBMonitor::failover_prepare(MariaDBServer** promotion_target_out,
|
||||||
|
MariaDBServer** demotion_target_out,
|
||||||
|
json_t** error_out)
|
||||||
{
|
{
|
||||||
// Check that there is no running master and that there is at least one promotable slave in the cluster.
|
// This function resembles 'switchover_prepare', but does not yet support manual selection.
|
||||||
// Also, all slaves must be using gtid-replication and the gtid-domain of the cluster must be known.
|
const auto op = ClusterOperation::FAILOVER;
|
||||||
bool error = false;
|
// Check that the cluster has a non-functional master server and that one of the slaves of
|
||||||
string separator;
|
// that master can be promoted. TODO: add support for demoting a relay server.
|
||||||
// Topology has already been tested to be simple.
|
MariaDBServer* demotion_target = NULL;
|
||||||
if (m_master_gtid_domain < 0)
|
// Autoselect current master as demotion target.
|
||||||
|
string demotion_msg;
|
||||||
|
if (m_master == NULL)
|
||||||
{
|
{
|
||||||
*error_out += "Cluster gtid domain is unknown. This is usually caused by the cluster never having "
|
const char msg[] = "Can not select a demotion target for failover: cluster does not have a master.";
|
||||||
"a master server while MaxScale was running.";
|
PRINT_MXS_JSON_ERROR(error_out, msg);
|
||||||
separator = "\n";
|
}
|
||||||
error = true;
|
else if (!m_master->can_be_demoted_failover(&demotion_msg))
|
||||||
|
{
|
||||||
|
const char msg[] = "Can not select '%s' as a demotion target for failover because %s";
|
||||||
|
PRINT_MXS_JSON_ERROR(error_out, msg, m_master->name(), demotion_msg.c_str());
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
demotion_target = m_master;
|
||||||
}
|
}
|
||||||
|
|
||||||
int valid_slaves = 0;
|
MariaDBServer* promotion_target = NULL;
|
||||||
for (auto iter = m_servers.begin(); iter != m_servers.end(); iter++)
|
if (demotion_target)
|
||||||
{
|
{
|
||||||
MariaDBServer* server = *iter;
|
// Autoselect best server for promotion.
|
||||||
uint64_t status_bits = server->m_server_base->pending_status;
|
MariaDBServer* promotion_candidate = select_promotion_target(op, demotion_target, error_out);
|
||||||
uint64_t master_up = (SERVER_MASTER | SERVER_RUNNING);
|
if (promotion_candidate)
|
||||||
if ((status_bits & master_up) == master_up)
|
|
||||||
{
|
{
|
||||||
string master_up_msg = string("Master server '") + server->name() + "' is running";
|
promotion_target = promotion_candidate;
|
||||||
if (status_bits & SERVER_MAINT)
|
|
||||||
{
|
|
||||||
master_up_msg += ", although in maintenance mode";
|
|
||||||
}
|
|
||||||
master_up_msg += ".";
|
|
||||||
*error_out += separator + master_up_msg;
|
|
||||||
separator = "\n";
|
|
||||||
error = true;
|
|
||||||
}
|
}
|
||||||
else if (server->is_slave())
|
else
|
||||||
{
|
{
|
||||||
// Gtid-replication is checked for all slaves, but only slaves not excluded are accepted.
|
PRINT_MXS_JSON_ERROR(error_out, "Could not autoselect promotion target for failover.");
|
||||||
string gtid_error;
|
|
||||||
if (server->uses_gtid(>id_error))
|
|
||||||
{
|
|
||||||
if (!server_is_excluded(server))
|
|
||||||
{
|
|
||||||
valid_slaves++;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
*error_out += separator + gtid_error;
|
|
||||||
separator = "\n";
|
|
||||||
error = true;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (valid_slaves == 0)
|
bool gtid_ok = false;
|
||||||
|
if (demotion_target)
|
||||||
{
|
{
|
||||||
*error_out += separator + "No valid slaves to promote.";
|
gtid_ok = check_gtid_replication(demotion_target, error_out);
|
||||||
error = true;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return !error;
|
if (promotion_target && demotion_target && gtid_ok)
|
||||||
|
{
|
||||||
|
*promotion_target_out = promotion_target;
|
||||||
|
*demotion_target_out = demotion_target;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -1406,14 +1298,15 @@ void MariaDBMonitor::handle_auto_failover()
|
|||||||
else if (failed_master->m_server_base->mon_err_count >= m_failcount)
|
else if (failed_master->m_server_base->mon_err_count >= m_failcount)
|
||||||
{
|
{
|
||||||
// Failover is required, but first we should check if preconditions are met.
|
// Failover is required, but first we should check if preconditions are met.
|
||||||
string error_msg;
|
MariaDBServer* promotion_target = NULL;
|
||||||
if (failover_check(&error_msg))
|
MariaDBServer* demotion_target = NULL;
|
||||||
|
if (failover_prepare(&promotion_target, &demotion_target, NULL))
|
||||||
{
|
{
|
||||||
m_warn_failover_precond = true;
|
m_warn_failover_precond = true;
|
||||||
MXS_NOTICE("Performing automatic failover to replace failed master '%s'.",
|
MXS_NOTICE("Performing automatic failover to replace failed master '%s'.",
|
||||||
failed_master->name());
|
failed_master->name());
|
||||||
failed_master->m_server_base->new_event = false;
|
failed_master->m_server_base->new_event = false;
|
||||||
if (!do_failover(NULL))
|
if (!do_failover(promotion_target, demotion_target, NULL))
|
||||||
{
|
{
|
||||||
report_and_disable("failover", CN_AUTO_FAILOVER, &m_auto_failover);
|
report_and_disable("failover", CN_AUTO_FAILOVER, &m_auto_failover);
|
||||||
}
|
}
|
||||||
@ -1425,7 +1318,7 @@ void MariaDBMonitor::handle_auto_failover()
|
|||||||
if (m_warn_failover_precond)
|
if (m_warn_failover_precond)
|
||||||
{
|
{
|
||||||
MXS_WARNING("Not performing automatic failover. Will keep retrying with this message "
|
MXS_WARNING("Not performing automatic failover. Will keep retrying with this message "
|
||||||
"suppressed. Errors: \n%s", error_msg.c_str());
|
"suppressed.");
|
||||||
m_warn_failover_precond = false;
|
m_warn_failover_precond = false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -1567,6 +1460,7 @@ bool MariaDBMonitor::switchover_prepare(SERVER* promotion_server, SERVER* demoti
|
|||||||
MariaDBServer** demotion_target_out,
|
MariaDBServer** demotion_target_out,
|
||||||
json_t** error_out)
|
json_t** error_out)
|
||||||
{
|
{
|
||||||
|
const auto op = ClusterOperation::SWITCHOVER;
|
||||||
// Check that both servers are ok if specified, or autoselect them. Demotion target must be checked
|
// Check that both servers are ok if specified, or autoselect them. Demotion target must be checked
|
||||||
// first since the promotion target depends on it.
|
// first since the promotion target depends on it.
|
||||||
ss_dassert(promotion_target_out && demotion_target_out &&
|
ss_dassert(promotion_target_out && demotion_target_out &&
|
||||||
@ -1583,7 +1477,7 @@ bool MariaDBMonitor::switchover_prepare(SERVER* promotion_server, SERVER* demoti
|
|||||||
{
|
{
|
||||||
PRINT_MXS_JSON_ERROR(error_out, NO_SERVER, demotion_server->name, m_monitor->name);
|
PRINT_MXS_JSON_ERROR(error_out, NO_SERVER, demotion_server->name, m_monitor->name);
|
||||||
}
|
}
|
||||||
else if (!demotion_candidate->can_be_demoted(&demotion_msg))
|
else if (!demotion_candidate->can_be_demoted_switchover(&demotion_msg))
|
||||||
{
|
{
|
||||||
PRINT_MXS_JSON_ERROR(error_out, "'%s' is not a valid demotion target for switchover: %s",
|
PRINT_MXS_JSON_ERROR(error_out, "'%s' is not a valid demotion target for switchover: %s",
|
||||||
demotion_candidate->name(), demotion_msg.c_str());
|
demotion_candidate->name(), demotion_msg.c_str());
|
||||||
@ -1602,9 +1496,9 @@ bool MariaDBMonitor::switchover_prepare(SERVER* promotion_server, SERVER* demoti
|
|||||||
"not have a master.";
|
"not have a master.";
|
||||||
PRINT_MXS_JSON_ERROR(error_out, msg);
|
PRINT_MXS_JSON_ERROR(error_out, msg);
|
||||||
}
|
}
|
||||||
else if (!m_master->can_be_demoted(&demotion_msg))
|
else if (!m_master->can_be_demoted_switchover(&demotion_msg))
|
||||||
{
|
{
|
||||||
const char msg[] = "Can not autoselect '%s' as a demotion target for switchover: %s";
|
const char msg[] = "Can not autoselect '%s' as a demotion target for switchover because %s";
|
||||||
PRINT_MXS_JSON_ERROR(error_out, msg, m_master->name(), demotion_msg.c_str());
|
PRINT_MXS_JSON_ERROR(error_out, msg, m_master->name(), demotion_msg.c_str());
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
@ -1625,9 +1519,9 @@ bool MariaDBMonitor::switchover_prepare(SERVER* promotion_server, SERVER* demoti
|
|||||||
{
|
{
|
||||||
PRINT_MXS_JSON_ERROR(error_out, NO_SERVER, promotion_server->name, m_monitor->name);
|
PRINT_MXS_JSON_ERROR(error_out, NO_SERVER, promotion_server->name, m_monitor->name);
|
||||||
}
|
}
|
||||||
else if (!promotion_candidate->can_be_promoted(demotion_target, &promotion_msg))
|
else if (!promotion_candidate->can_be_promoted(op, demotion_target, &promotion_msg))
|
||||||
{
|
{
|
||||||
const char msg[] = "'%s' is not a valid promotion target for switchover: %s";
|
const char msg[] = "'%s' is not a valid promotion target for switchover because %s";
|
||||||
PRINT_MXS_JSON_ERROR(error_out, msg, promotion_candidate->name(), promotion_msg.c_str());
|
PRINT_MXS_JSON_ERROR(error_out, msg, promotion_candidate->name(), promotion_msg.c_str());
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
@ -1638,7 +1532,7 @@ bool MariaDBMonitor::switchover_prepare(SERVER* promotion_server, SERVER* demoti
|
|||||||
else
|
else
|
||||||
{
|
{
|
||||||
// Autoselect. More involved than the autoselecting the demotion target.
|
// Autoselect. More involved than the autoselecting the demotion target.
|
||||||
MariaDBServer* promotion_candidate = switchover_select_promotion(demotion_target, error_out);
|
MariaDBServer* promotion_candidate = select_promotion_target(op, demotion_target, error_out);
|
||||||
if (promotion_candidate)
|
if (promotion_candidate)
|
||||||
{
|
{
|
||||||
promotion_target = promotion_candidate;
|
promotion_target = promotion_candidate;
|
||||||
@ -1650,20 +1544,13 @@ bool MariaDBMonitor::switchover_prepare(SERVER* promotion_server, SERVER* demoti
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
bool gtid_domain_ok = false;
|
bool gtid_ok = false;
|
||||||
if (m_master_gtid_domain == GTID_DOMAIN_UNKNOWN)
|
if (demotion_target)
|
||||||
{
|
{
|
||||||
PRINT_MXS_JSON_ERROR(error_out, "Cluster gtid domain is unknown. Cannot switchover.");
|
gtid_ok = check_gtid_replication(demotion_target, error_out);
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
gtid_domain_ok = true;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Check that all slaves are using gtid-replication.
|
if (promotion_target && demotion_target && gtid_ok)
|
||||||
bool gtid_ok = slaves_using_gtid(error_out);
|
|
||||||
|
|
||||||
if (demotion_target && promotion_target && gtid_domain_ok && gtid_ok)
|
|
||||||
{
|
{
|
||||||
*demotion_target_out = demotion_target;
|
*demotion_target_out = demotion_target;
|
||||||
*promotion_target_out = promotion_target;
|
*promotion_target_out = promotion_target;
|
||||||
@ -1672,17 +1559,25 @@ bool MariaDBMonitor::switchover_prepare(SERVER* promotion_server, SERVER* demoti
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool MariaDBMonitor::slaves_using_gtid(json_t** error_out)
|
/**
|
||||||
|
* Check that all slaves of the master are using gtid-replication. Only the slave connections to the
|
||||||
|
* master are checked.
|
||||||
|
*
|
||||||
|
* @param master The master whose slaves are checked. Can be any server in the cluster.
|
||||||
|
* @param error_out Error output
|
||||||
|
* @return True if all slaves are using gtid replication, or if there is no slaves
|
||||||
|
*/
|
||||||
|
bool MariaDBMonitor::slaves_using_gtid(const MariaDBServer* master, json_t** error_out)
|
||||||
{
|
{
|
||||||
// Check that all slaves are using gtid-replication.
|
|
||||||
bool gtid_ok = true;
|
bool gtid_ok = true;
|
||||||
for (MariaDBServer* server : m_servers)
|
for (MariaDBServer* server : master->m_node.children)
|
||||||
{
|
{
|
||||||
string gtid_error;
|
auto sstatus = server->slave_connection_status(master);
|
||||||
if (server->is_slave() && !server->uses_gtid(>id_error))
|
if (sstatus && sstatus->gtid_io_pos.empty())
|
||||||
{
|
{
|
||||||
|
PRINT_MXS_JSON_ERROR(error_out, "The slave connection of '%s' -> '%s' is not using "
|
||||||
|
"gtid replication.", server->name(), master->name());
|
||||||
gtid_ok = false;
|
gtid_ok = false;
|
||||||
PRINT_MXS_JSON_ERROR(error_out, "%s", gtid_error.c_str());
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return gtid_ok;
|
return gtid_ok;
|
||||||
@ -1747,7 +1642,8 @@ void MariaDBMonitor::handle_low_disk_space_master()
|
|||||||
bool switched = do_switchover(demotion_target, promotion_target, NULL);
|
bool switched = do_switchover(demotion_target, promotion_target, NULL);
|
||||||
if (switched)
|
if (switched)
|
||||||
{
|
{
|
||||||
MXS_NOTICE("Switchover %s -> %s performed.", demotion_target->name(), promotion_target->name());
|
MXS_NOTICE("Switchover %s -> %s performed.",
|
||||||
|
demotion_target->name(), promotion_target->name());
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
@ -1783,4 +1679,54 @@ void MariaDBMonitor::report_and_disable(const string& operation, const string& s
|
|||||||
MXS_ERROR("%s", error_msg.c_str());
|
MXS_ERROR("%s", error_msg.c_str());
|
||||||
*setting_var = false;
|
*setting_var = false;
|
||||||
disable_setting(setting_name.c_str());
|
disable_setting(setting_name.c_str());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Check that the slaves to demotion target are using gtid replication and that the gtid domain of the
|
||||||
|
* cluster is defined.
|
||||||
|
*
|
||||||
|
* @param demotion_target The server whose slaves should be checked
|
||||||
|
* @param error_out Error output
|
||||||
|
* @return True if gtid is used
|
||||||
|
*/
|
||||||
|
bool MariaDBMonitor::check_gtid_replication(const MariaDBServer* demotion_target, json_t** error_out)
|
||||||
|
{
|
||||||
|
bool gtid_domain_ok = false;
|
||||||
|
if (m_master_gtid_domain == GTID_DOMAIN_UNKNOWN)
|
||||||
|
{
|
||||||
|
PRINT_MXS_JSON_ERROR(error_out, "Cluster gtid domain is unknown. This is usually caused by "
|
||||||
|
"the cluster never having a master server while MaxScale was running.");
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
gtid_domain_ok = true;
|
||||||
|
}
|
||||||
|
// Check that all slaves are using gtid-replication.
|
||||||
|
bool gtid_ok = slaves_using_gtid(demotion_target, error_out);
|
||||||
|
return gtid_domain_ok && gtid_ok;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* List slaves which should be redirected to the new master.
|
||||||
|
*
|
||||||
|
* @param promotion_target The server which will be promoted
|
||||||
|
* @param demotion_target The server which will be demoted
|
||||||
|
* @return A list of slaves to redirect
|
||||||
|
*/
|
||||||
|
ServerArray MariaDBMonitor::get_redirectables(const MariaDBServer* promotion_target,
|
||||||
|
const MariaDBServer* demotion_target)
|
||||||
|
{
|
||||||
|
ServerArray redirectable_slaves;
|
||||||
|
for (MariaDBServer* slave : demotion_target->m_node.children)
|
||||||
|
{
|
||||||
|
if (slave != promotion_target)
|
||||||
|
{
|
||||||
|
auto sstatus = slave->slave_connection_status(demotion_target);
|
||||||
|
if (sstatus && !sstatus->gtid_io_pos.empty())
|
||||||
|
{
|
||||||
|
redirectable_slaves.push_back(slave);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return redirectable_slaves;
|
||||||
|
}
|
||||||
|
@ -221,7 +221,7 @@ private:
|
|||||||
void update_master_cycle_info();
|
void update_master_cycle_info();
|
||||||
void set_low_disk_slaves_maintenance();
|
void set_low_disk_slaves_maintenance();
|
||||||
void assign_new_master(MariaDBServer* new_master);
|
void assign_new_master(MariaDBServer* new_master);
|
||||||
bool slaves_using_gtid(json_t** error_out);
|
bool slaves_using_gtid(const MariaDBServer* master_server, json_t** error_out);
|
||||||
|
|
||||||
// Switchover methods
|
// Switchover methods
|
||||||
bool manual_switchover(SERVER* new_master, SERVER* current_master, json_t** error_out);
|
bool manual_switchover(SERVER* new_master, SERVER* current_master, json_t** error_out);
|
||||||
@ -242,8 +242,9 @@ private:
|
|||||||
void handle_auto_failover();
|
void handle_auto_failover();
|
||||||
bool cluster_supports_failover(std::string* reasons_out);
|
bool cluster_supports_failover(std::string* reasons_out);
|
||||||
bool slave_receiving_events();
|
bool slave_receiving_events();
|
||||||
bool failover_check(std::string* error_out);
|
bool failover_prepare(MariaDBServer** promotion_target_out, MariaDBServer** demotion_target_out,
|
||||||
bool do_failover(json_t** err_out);
|
json_t** error_out);
|
||||||
|
bool do_failover(MariaDBServer* promotion_target, MariaDBServer* demotion_target, json_t** err_out);
|
||||||
|
|
||||||
// Rejoin methods
|
// Rejoin methods
|
||||||
bool manual_rejoin(SERVER* rejoin_server, json_t** output);
|
bool manual_rejoin(SERVER* rejoin_server, json_t** output);
|
||||||
@ -254,8 +255,8 @@ private:
|
|||||||
uint32_t do_rejoin(const ServerArray& joinable_servers, json_t** output);
|
uint32_t do_rejoin(const ServerArray& joinable_servers, json_t** output);
|
||||||
|
|
||||||
// Methods common to failover/switchover/rejoin
|
// Methods common to failover/switchover/rejoin
|
||||||
MariaDBServer* select_new_master(ServerArray* slaves_out, json_t** err_out);
|
MariaDBServer* select_promotion_target(ClusterOperation op, MariaDBServer* current_master,
|
||||||
MariaDBServer* switchover_select_promotion(MariaDBServer* current_master, json_t** err_out);
|
json_t** err_out);
|
||||||
bool server_is_excluded(const MariaDBServer* server);
|
bool server_is_excluded(const MariaDBServer* server);
|
||||||
bool is_candidate_better(const MariaDBServer* candidate, const MariaDBServer* current_best,
|
bool is_candidate_better(const MariaDBServer* candidate, const MariaDBServer* current_best,
|
||||||
uint32_t gtid_domain, std::string* reason_out = NULL);
|
uint32_t gtid_domain, std::string* reason_out = NULL);
|
||||||
@ -267,6 +268,9 @@ private:
|
|||||||
bool wait_cluster_stabilization(MariaDBServer* new_master, const ServerArray& slaves,
|
bool wait_cluster_stabilization(MariaDBServer* new_master, const ServerArray& slaves,
|
||||||
int seconds_remaining);
|
int seconds_remaining);
|
||||||
void report_and_disable(const std::string& operation, const std::string& setting_name, bool* setting_var);
|
void report_and_disable(const std::string& operation, const std::string& setting_name, bool* setting_var);
|
||||||
|
bool check_gtid_replication(const MariaDBServer* demotion_target, json_t** error_out);
|
||||||
|
ServerArray get_redirectables(const MariaDBServer* promotion_target,
|
||||||
|
const MariaDBServer* demotion_target);
|
||||||
|
|
||||||
// Other methods
|
// Other methods
|
||||||
void disable_setting(const std::string& setting);
|
void disable_setting(const std::string& setting);
|
||||||
|
@ -59,3 +59,9 @@ private:
|
|||||||
const std::string m_separator;
|
const std::string m_separator;
|
||||||
std::string m_current_separator;
|
std::string m_current_separator;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
enum class ClusterOperation
|
||||||
|
{
|
||||||
|
SWITCHOVER,
|
||||||
|
FAILOVER
|
||||||
|
};
|
@ -960,13 +960,7 @@ bool MariaDBServer::sstatus_arrays_topology_equal(const SlaveStatusArray& lhs, c
|
|||||||
return rval;
|
return rval;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
bool MariaDBServer::can_be_demoted_switchover(string* reason_out)
|
||||||
* Check if the server can be demoted.
|
|
||||||
*
|
|
||||||
* @param reason_out Output for the reason server cannot be demoted
|
|
||||||
* @return True, if suggested new master is a viable demotion candidate
|
|
||||||
*/
|
|
||||||
bool MariaDBServer::can_be_demoted(string* reason_out)
|
|
||||||
{
|
{
|
||||||
bool demotable = false;
|
bool demotable = false;
|
||||||
string reason;
|
string reason;
|
||||||
@ -1001,32 +995,63 @@ bool MariaDBServer::can_be_demoted(string* reason_out)
|
|||||||
return demotable;
|
return demotable;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
bool MariaDBServer::can_be_demoted_failover(string* reason_out)
|
||||||
* Check if the server can be promoted.
|
{
|
||||||
*
|
bool demotable = false;
|
||||||
* @param demotion_target The server this should be promoted to
|
string reason;
|
||||||
* @param reason_out Output for the reason server cannot be promoted
|
|
||||||
* @return True, if suggested new master is a viable promotion candidate
|
if (is_master())
|
||||||
*/
|
{
|
||||||
bool MariaDBServer::can_be_promoted(const MariaDBServer* demotion_target, std::string* reason_out)
|
reason = "it is a running master.";
|
||||||
|
}
|
||||||
|
else if (is_running())
|
||||||
|
{
|
||||||
|
reason = "it is running.";
|
||||||
|
}
|
||||||
|
else if (m_gtid_binlog_pos.empty())
|
||||||
|
{
|
||||||
|
reason = "it does not have a 'gtid_binlog_pos'.";
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
demotable = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!demotable && reason_out)
|
||||||
|
{
|
||||||
|
*reason_out = reason;
|
||||||
|
}
|
||||||
|
return demotable;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool MariaDBServer::can_be_promoted(ClusterOperation op,
|
||||||
|
const MariaDBServer* demotion_target,
|
||||||
|
std::string* reason_out)
|
||||||
{
|
{
|
||||||
bool promotable = false;
|
bool promotable = false;
|
||||||
string reason;
|
string reason;
|
||||||
string query_error;
|
string query_error;
|
||||||
|
|
||||||
|
auto sstatus = slave_connection_status(demotion_target);
|
||||||
if (is_master())
|
if (is_master())
|
||||||
{
|
{
|
||||||
reason = "it is already the master.";
|
reason = "it is already the master.";
|
||||||
}
|
}
|
||||||
// TODO: Check that the correct slave connection is working properly in case of switchover.
|
else if (sstatus == NULL)
|
||||||
// For failover the connection may be in CONNECTING-stage.
|
|
||||||
else if (!is_replicating_from(demotion_target))
|
|
||||||
{
|
{
|
||||||
reason = string_printf("it is not replicating from '%s'.", demotion_target->name());
|
reason = string_printf("it is not replicating from '%s'.", demotion_target->name());
|
||||||
}
|
}
|
||||||
|
else if (sstatus->gtid_io_pos.empty())
|
||||||
|
{
|
||||||
|
reason = string_printf("its slave connection to '%s' is not using gtid.", demotion_target->name());
|
||||||
|
}
|
||||||
|
else if (op == ClusterOperation::SWITCHOVER && sstatus->slave_io_running != SlaveStatus::SLAVE_IO_YES)
|
||||||
|
{
|
||||||
|
reason = string_printf("its slave connection to '%s' is broken.", demotion_target->name());
|
||||||
|
}
|
||||||
else if (!update_replication_settings(&query_error))
|
else if (!update_replication_settings(&query_error))
|
||||||
{
|
{
|
||||||
string_printf("it could not be queried: '%s'.", query_error.c_str());
|
reason = string_printf("it could not be queried: '%s'.", query_error.c_str());
|
||||||
}
|
}
|
||||||
else if (!binlog_on())
|
else if (!binlog_on())
|
||||||
{
|
{
|
||||||
@ -1044,10 +1069,37 @@ bool MariaDBServer::can_be_promoted(const MariaDBServer* demotion_target, std::s
|
|||||||
return promotable;
|
return promotable;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool MariaDBServer::is_replicating_from(const MariaDBServer* target)
|
const SlaveStatus* MariaDBServer::slave_connection_status(const MariaDBServer* target)
|
||||||
{
|
{
|
||||||
// Not properly implemented yet, TODO
|
// The slave node may have several slave connections, need to find the one that is
|
||||||
return is_slave();
|
// connected to the parent. This section is quite similar to the one in
|
||||||
|
// 'build_replication_graph', although here we require that the sql thread is running.
|
||||||
|
auto master_server_id = target->m_server_id;
|
||||||
|
SlaveStatus* rval = NULL;
|
||||||
|
for (SlaveStatus& ss : m_slave_status)
|
||||||
|
{
|
||||||
|
auto master_id = ss.master_server_id;
|
||||||
|
auto io_running = ss.slave_io_running;
|
||||||
|
// Should this check 'Master_Host' and 'Master_Port' instead of server id:s?
|
||||||
|
if (master_id > 0 && master_id == master_server_id && ss.slave_sql_running)
|
||||||
|
{
|
||||||
|
if (io_running == SlaveStatus::SLAVE_IO_YES)
|
||||||
|
{
|
||||||
|
rval = &ss;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
else if (io_running == SlaveStatus::SLAVE_IO_CONNECTING && had_status(SERVER_WAS_SLAVE))
|
||||||
|
{
|
||||||
|
// Stale connection. TODO: The SERVER_WAS_SLAVE check above is not enough in
|
||||||
|
// several situations. The previously observed live slave connections
|
||||||
|
// need to be saved distinctly to avoid a SERVER_WAS_SLAVE bit from one
|
||||||
|
// connection from affecting another.
|
||||||
|
rval = &ss;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return rval;
|
||||||
}
|
}
|
||||||
|
|
||||||
string SlaveStatus::to_string() const
|
string SlaveStatus::to_string() const
|
||||||
|
@ -231,12 +231,14 @@ public:
|
|||||||
bool wait_until_gtid(const GtidList& target, int timeout, json_t** err_out);
|
bool wait_until_gtid(const GtidList& target, int timeout, json_t** err_out);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Is the server replicating (or trying to) from the target server.
|
* Find slave connection to the target server. If the IO thread is trying to connect
|
||||||
|
* ("Connecting"), the connection is only accepted if the 'Master_Server_Id' is known to be correct.
|
||||||
|
* If the IO or the SQL thread is stopped, the connection is not returned.
|
||||||
*
|
*
|
||||||
* @param target Immediate master or relay server
|
* @param target Immediate master or relay server
|
||||||
* @return True if replicating
|
* @return The slave status info of the slave thread, or NULL if not found or not accepted
|
||||||
*/
|
*/
|
||||||
bool is_replicating_from(const MariaDBServer* target);
|
const SlaveStatus* slave_connection_status(const MariaDBServer* target);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Is binary log on? 'update_replication_settings' should be ran before this function to query the data.
|
* Is binary log on? 'update_replication_settings' should be ran before this function to query the data.
|
||||||
@ -393,25 +395,35 @@ public:
|
|||||||
bool failover_wait_relay_log(int seconds_remaining, json_t** err_out);
|
bool failover_wait_relay_log(int seconds_remaining, json_t** err_out);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Is the server a valid demotion target?
|
* Check if the server can be demoted by switchover.
|
||||||
*
|
*
|
||||||
* @param reason_out Output explaining why server cannot be demoted
|
* @param reason_out Output explaining why server cannot be demoted
|
||||||
* @return True if server can be demoted by switchover
|
* @return True if server can be demoted
|
||||||
*/
|
*/
|
||||||
bool can_be_demoted(std::string* reason_out);
|
bool can_be_demoted_switchover(std::string* reason_out);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Is the server a valid promotion target?
|
* Check if the server can be demoted by failover.
|
||||||
*
|
*
|
||||||
* @param demotion_target Which server would be demoted
|
* @param operation Switchover or failover
|
||||||
* @param reason_out Output explaining why server cannot be promoted
|
* @param reason_out Output explaining why server cannot be demoted
|
||||||
* @return True if server can be promoted by switchover
|
* @return True if server can be demoted
|
||||||
*/
|
*/
|
||||||
bool can_be_promoted(const MariaDBServer* demotion_target, std::string* reason_out);
|
bool can_be_demoted_failover(std::string* reason_out);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Read the file contents and send them as sql queries to the server. Any data returned by the queries is
|
* Check if the server can be promoted by switchover or failover.
|
||||||
* discarded.
|
*
|
||||||
|
* @param op Switchover or failover
|
||||||
|
* @param demotion_target The server this should be promoted to
|
||||||
|
* @param reason_out Output for the reason server cannot be promoted
|
||||||
|
* @return True, if suggested new master is a viable promotion candidate
|
||||||
|
*/
|
||||||
|
bool can_be_promoted(ClusterOperation op, const MariaDBServer* demotion_target, std::string* reason_out);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Read the file contents and send them as sql queries to the server. Any data
|
||||||
|
* returned by the queries is discarded.
|
||||||
*
|
*
|
||||||
* @param server Server to send queries to
|
* @param server Server to send queries to
|
||||||
* @param path Text file path.
|
* @param path Text file path.
|
||||||
|
Reference in New Issue
Block a user