MXS-1491: Failover can be executed manually
Also, renamed config setting "failover" to "auto_failover". Removed setting "switchover" as it is now always enabled.
This commit is contained in:
@ -63,9 +63,8 @@ typedef struct
|
|||||||
bool allow_cluster_recovery; /**< Allow failed servers to rejoin the cluster */
|
bool allow_cluster_recovery; /**< Allow failed servers to rejoin the cluster */
|
||||||
bool warn_set_standalone_master; /**< Log a warning when setting standalone master */
|
bool warn_set_standalone_master; /**< Log a warning when setting standalone master */
|
||||||
bool allow_external_slaves; /**< Whether to allow usage of external slave servers */
|
bool allow_external_slaves; /**< Whether to allow usage of external slave servers */
|
||||||
bool failover; /**< If master failover is enabled */
|
bool auto_failover; /**< If automatic master failover is enabled */
|
||||||
uint32_t failover_timeout; /**< Timeout in seconds for the master failover */
|
uint32_t failover_timeout; /**< Timeout in seconds for the master failover */
|
||||||
bool switchover; /**< If master switchover is enabled */
|
|
||||||
uint32_t switchover_timeout; /**< Timeout in seconds for the master switchover */
|
uint32_t switchover_timeout; /**< Timeout in seconds for the master switchover */
|
||||||
char* replication_user; /**< Replication user for failover */
|
char* replication_user; /**< Replication user for failover */
|
||||||
char* replication_password; /**< Replication password for failover*/
|
char* replication_password; /**< Replication password for failover*/
|
||||||
|
@ -99,7 +99,7 @@ static int add_slave_to_master(long *, int, long);
|
|||||||
static bool isMySQLEvent(mxs_monitor_event_t event);
|
static bool isMySQLEvent(mxs_monitor_event_t event);
|
||||||
void check_maxscale_schema_replication(MXS_MONITOR *monitor);
|
void check_maxscale_schema_replication(MXS_MONITOR *monitor);
|
||||||
static bool mon_process_failover(MYSQL_MONITOR*, uint32_t, bool*);
|
static bool mon_process_failover(MYSQL_MONITOR*, uint32_t, bool*);
|
||||||
static bool do_failover(MYSQL_MONITOR* mon);
|
static bool do_failover(MYSQL_MONITOR* mon, json_t** output);
|
||||||
static bool do_switchover(MYSQL_MONITOR* mon, MXS_MONITORED_SERVER* current_master,
|
static bool do_switchover(MYSQL_MONITOR* mon, MXS_MONITORED_SERVER* current_master,
|
||||||
MXS_MONITORED_SERVER* new_master,json_t** err_out);
|
MXS_MONITORED_SERVER* new_master,json_t** err_out);
|
||||||
static bool update_gtids(MYSQL_MONITOR* mon, MXS_MONITORED_SERVER *database, MySqlServerInfo* info);
|
static bool update_gtids(MYSQL_MONITOR* mon, MXS_MONITORED_SERVER *database, MySqlServerInfo* info);
|
||||||
@ -114,9 +114,8 @@ static void disable_setting(MYSQL_MONITOR* mon, const char* setting);
|
|||||||
static bool report_version_err = true;
|
static bool report_version_err = true;
|
||||||
static const char* hb_table_name = "maxscale_schema.replication_heartbeat";
|
static const char* hb_table_name = "maxscale_schema.replication_heartbeat";
|
||||||
|
|
||||||
static const char CN_FAILOVER[] = "failover";
|
static const char CN_AUTO_FAILOVER[] = "auto_failover";
|
||||||
static const char CN_FAILOVER_TIMEOUT[] = "failover_timeout";
|
static const char CN_FAILOVER_TIMEOUT[] = "failover_timeout";
|
||||||
static const char CN_SWITCHOVER[] = "switchover";
|
|
||||||
static const char CN_SWITCHOVER_TIMEOUT[] = "switchover_timeout";
|
static const char CN_SWITCHOVER_TIMEOUT[] = "switchover_timeout";
|
||||||
static const char CN_AUTO_JOIN[] = "auto_join";
|
static const char CN_AUTO_JOIN[] = "auto_join";
|
||||||
|
|
||||||
@ -124,7 +123,7 @@ static const char CN_AUTO_JOIN[] = "auto_join";
|
|||||||
static const char CN_VERIFY_MASTER_FAILURE[] = "verify_master_failure";
|
static const char CN_VERIFY_MASTER_FAILURE[] = "verify_master_failure";
|
||||||
static const char CN_MASTER_FAILURE_TIMEOUT[] = "master_failure_timeout";
|
static const char CN_MASTER_FAILURE_TIMEOUT[] = "master_failure_timeout";
|
||||||
|
|
||||||
// Replication credentials parameters for failover
|
// Replication credentials parameters for failover/switchover/join
|
||||||
static const char CN_REPLICATION_USER[] = "replication_user";
|
static const char CN_REPLICATION_USER[] = "replication_user";
|
||||||
static const char CN_REPLICATION_PASSWORD[] = "replication_password";
|
static const char CN_REPLICATION_PASSWORD[] = "replication_password";
|
||||||
|
|
||||||
@ -285,6 +284,47 @@ bool mysql_switchover_check(MXS_MONITOR* mon,
|
|||||||
return rv;
|
return rv;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Check that preconditions for a failover are met.
|
||||||
|
*
|
||||||
|
* @param mon Cluster monitor
|
||||||
|
* @param error_out JSON error out
|
||||||
|
* @return True if failover may proceed
|
||||||
|
*/
|
||||||
|
bool mysql_failover_check(MYSQL_MONITOR* mon, json_t** error_out)
|
||||||
|
{
|
||||||
|
// Check that there is no running master and that there is at least one running server in the cluster.
|
||||||
|
int slaves = 0;
|
||||||
|
for (MXS_MONITORED_SERVER* mon_server = mon->monitor->monitored_servers;
|
||||||
|
mon_server != NULL;
|
||||||
|
mon_server = mon_server->next)
|
||||||
|
{
|
||||||
|
uint64_t status_bits = mon_server->server->status;
|
||||||
|
uint64_t master_up = (SERVER_MASTER | SERVER_RUNNING);
|
||||||
|
if ((status_bits & master_up) == master_up)
|
||||||
|
{
|
||||||
|
string master_up_msg = string("Master server '") + mon_server->server->unique_name +
|
||||||
|
"' is running";
|
||||||
|
if (status_bits & SERVER_MAINT)
|
||||||
|
{
|
||||||
|
master_up_msg += ", although in maintenance mode";
|
||||||
|
}
|
||||||
|
master_up_msg += ".";
|
||||||
|
PRINT_MXS_JSON_ERROR(error_out, "%s Failover not allowed.", master_up_msg.c_str());
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
else if (SERVER_IS_SLAVE(mon_server->server))
|
||||||
|
{
|
||||||
|
slaves++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (slaves == 0)
|
||||||
|
{
|
||||||
|
PRINT_MXS_JSON_ERROR(error_out, "No running slaves, cannot failover.");
|
||||||
|
}
|
||||||
|
return slaves > 0;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Handle switchover
|
* Handle switchover
|
||||||
*
|
*
|
||||||
@ -324,7 +364,7 @@ bool mysql_switchover(MXS_MONITOR* mon, SERVER* new_master, SERVER* current_mast
|
|||||||
|
|
||||||
if (rv)
|
if (rv)
|
||||||
{
|
{
|
||||||
bool failover = config_get_bool(mon->parameters, CN_FAILOVER);
|
bool failover = config_get_bool(mon->parameters, CN_AUTO_FAILOVER);
|
||||||
rv = do_switchover(handle, monitored_current_master, monitored_new_master, output);
|
rv = do_switchover(handle, monitored_current_master, monitored_new_master, output);
|
||||||
|
|
||||||
if (rv)
|
if (rv)
|
||||||
@ -344,7 +384,7 @@ bool mysql_switchover(MXS_MONITOR* mon, SERVER* new_master, SERVER* current_mast
|
|||||||
{
|
{
|
||||||
// TODO: There could be a more convenient way for this.
|
// TODO: There could be a more convenient way for this.
|
||||||
MXS_CONFIG_PARAMETER p = {};
|
MXS_CONFIG_PARAMETER p = {};
|
||||||
p.name = const_cast<char*>(CN_FAILOVER);
|
p.name = const_cast<char*>(CN_AUTO_FAILOVER);
|
||||||
p.value = const_cast<char*>("false");
|
p.value = const_cast<char*>("false");
|
||||||
|
|
||||||
monitorAddParameters(mon, &p);
|
monitorAddParameters(mon, &p);
|
||||||
@ -397,21 +437,7 @@ bool mysql_handle_switchover(const MODULECMD_ARG* args, json_t** output)
|
|||||||
|
|
||||||
if (!config_get_global_options()->passive)
|
if (!config_get_global_options()->passive)
|
||||||
{
|
{
|
||||||
if (mysql_mon->switchover)
|
rv = mysql_switchover(mon, new_master, current_master, output);
|
||||||
{
|
|
||||||
rv = mysql_switchover(mon, new_master, current_master, output);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
MXS_WARNING("Attempt to perform switchover %s -> %s, even though "
|
|
||||||
"switchover is not enabled.",
|
|
||||||
current_master ? current_master->unique_name : "none",
|
|
||||||
new_master->unique_name);
|
|
||||||
|
|
||||||
*output = mxs_json_error("Switchover %s -> %s not performed, as switchover is not enabled.",
|
|
||||||
current_master ? current_master->unique_name : "none",
|
|
||||||
new_master->unique_name);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
@ -427,6 +453,80 @@ bool mysql_handle_switchover(const MODULECMD_ARG* args, json_t** output)
|
|||||||
return rv;
|
return rv;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Perform user-activated failover
|
||||||
|
*
|
||||||
|
* @param mon Cluster monitor
|
||||||
|
* @param output Json error output
|
||||||
|
* @return True on success
|
||||||
|
*/
|
||||||
|
bool mysql_failover(MXS_MONITOR* mon, json_t** output)
|
||||||
|
{
|
||||||
|
bool rv = true;
|
||||||
|
MYSQL_MONITOR *handle = static_cast<MYSQL_MONITOR*>(mon->handle);
|
||||||
|
bool stopped = stop_monitor(mon);
|
||||||
|
if (stopped)
|
||||||
|
{
|
||||||
|
MXS_NOTICE("Stopped monitor %s for the duration of failover.", mon->name);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
MXS_NOTICE("Monitor %s already stopped, failover can proceed.", mon->name);
|
||||||
|
}
|
||||||
|
|
||||||
|
rv = mysql_failover_check(handle, output);
|
||||||
|
if (rv)
|
||||||
|
{
|
||||||
|
rv = do_failover(handle, output);
|
||||||
|
if (rv)
|
||||||
|
{
|
||||||
|
MXS_NOTICE("Failover performed.");
|
||||||
|
if (stopped)
|
||||||
|
{
|
||||||
|
startMonitor(mon, mon->parameters);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
PRINT_MXS_JSON_ERROR(output, "Failover failed.");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
if (stopped)
|
||||||
|
{
|
||||||
|
startMonitor(mon, mon->parameters);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return rv;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Command handler for 'failover'
|
||||||
|
*
|
||||||
|
* @param args Arguments given by user
|
||||||
|
* @param output Json error output
|
||||||
|
* @return True on success
|
||||||
|
*/
|
||||||
|
bool mysql_handle_failover(const MODULECMD_ARG* args, json_t** output)
|
||||||
|
{
|
||||||
|
ss_dassert(args->argc == 1);
|
||||||
|
ss_dassert(MODULECMD_GET_TYPE(&args->argv[0].type) == MODULECMD_ARG_MONITOR);
|
||||||
|
|
||||||
|
MXS_MONITOR* mon = args->argv[0].value.monitor;
|
||||||
|
|
||||||
|
bool rv = false;
|
||||||
|
if (!config_get_global_options()->passive)
|
||||||
|
{
|
||||||
|
rv = mysql_failover(mon, output);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
PRINT_MXS_JSON_ERROR(output, "Failover attempted but not performed, as MaxScale is in passive mode.");
|
||||||
|
}
|
||||||
|
return rv;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* The module entry point routine. It is this routine that
|
* The module entry point routine. It is this routine that
|
||||||
* must populate the structure that is referred to as the
|
* must populate the structure that is referred to as the
|
||||||
@ -441,14 +541,14 @@ extern "C"
|
|||||||
MXS_MODULE* MXS_CREATE_MODULE()
|
MXS_MODULE* MXS_CREATE_MODULE()
|
||||||
{
|
{
|
||||||
MXS_NOTICE("Initialise the MySQL Monitor module.");
|
MXS_NOTICE("Initialise the MySQL Monitor module.");
|
||||||
|
const char ARG_MONITOR_DESC[] = "MySQL Monitor name (from configuration file)";
|
||||||
static modulecmd_arg_type_t switchover_argv[] =
|
static modulecmd_arg_type_t switchover_argv[] =
|
||||||
{
|
{
|
||||||
{
|
{
|
||||||
MODULECMD_ARG_MONITOR | MODULECMD_ARG_NAME_MATCHES_DOMAIN,
|
MODULECMD_ARG_MONITOR | MODULECMD_ARG_NAME_MATCHES_DOMAIN,
|
||||||
"MySQL Monitor name (from configuration file)"
|
ARG_MONITOR_DESC
|
||||||
},
|
},
|
||||||
{ MODULECMD_ARG_SERVER, "New master" },
|
{ MODULECMD_ARG_SERVER, "New master" },
|
||||||
{ MODULECMD_ARG_SERVER | MODULECMD_ARG_OPTIONAL, "Current master (obligatory if exists)" }
|
{ MODULECMD_ARG_SERVER | MODULECMD_ARG_OPTIONAL, "Current master (obligatory if exists)" }
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -456,6 +556,18 @@ MXS_MODULE* MXS_CREATE_MODULE()
|
|||||||
mysql_handle_switchover, MXS_ARRAY_NELEMS(switchover_argv), switchover_argv,
|
mysql_handle_switchover, MXS_ARRAY_NELEMS(switchover_argv), switchover_argv,
|
||||||
"Perform master switchover");
|
"Perform master switchover");
|
||||||
|
|
||||||
|
static modulecmd_arg_type_t failover_argv[] =
|
||||||
|
{
|
||||||
|
{
|
||||||
|
MODULECMD_ARG_MONITOR | MODULECMD_ARG_NAME_MATCHES_DOMAIN,
|
||||||
|
ARG_MONITOR_DESC
|
||||||
|
},
|
||||||
|
};
|
||||||
|
|
||||||
|
modulecmd_register_command(MXS_MODULE_NAME, "failover", MODULECMD_TYPE_ACTIVE,
|
||||||
|
mysql_handle_failover, MXS_ARRAY_NELEMS(failover_argv), failover_argv,
|
||||||
|
"Perform master failover");
|
||||||
|
|
||||||
static MXS_MONITOR_OBJECT MyObject =
|
static MXS_MONITOR_OBJECT MyObject =
|
||||||
{
|
{
|
||||||
startMonitor,
|
startMonitor,
|
||||||
@ -500,9 +612,8 @@ MXS_MODULE* MXS_CREATE_MODULE()
|
|||||||
MXS_MODULE_OPT_NONE,
|
MXS_MODULE_OPT_NONE,
|
||||||
mxs_monitor_event_enum_values
|
mxs_monitor_event_enum_values
|
||||||
},
|
},
|
||||||
{CN_FAILOVER, MXS_MODULE_PARAM_BOOL, "false"},
|
{CN_AUTO_FAILOVER, MXS_MODULE_PARAM_BOOL, "false"},
|
||||||
{CN_FAILOVER_TIMEOUT, MXS_MODULE_PARAM_COUNT, DEFAULT_FAILOVER_TIMEOUT},
|
{CN_FAILOVER_TIMEOUT, MXS_MODULE_PARAM_COUNT, DEFAULT_FAILOVER_TIMEOUT},
|
||||||
{CN_SWITCHOVER, MXS_MODULE_PARAM_BOOL, "false"},
|
|
||||||
{CN_SWITCHOVER_TIMEOUT, MXS_MODULE_PARAM_COUNT, DEFAULT_SWITCHOVER_TIMEOUT},
|
{CN_SWITCHOVER_TIMEOUT, MXS_MODULE_PARAM_COUNT, DEFAULT_SWITCHOVER_TIMEOUT},
|
||||||
{CN_REPLICATION_USER, MXS_MODULE_PARAM_STRING},
|
{CN_REPLICATION_USER, MXS_MODULE_PARAM_STRING},
|
||||||
{CN_REPLICATION_PASSWORD, MXS_MODULE_PARAM_STRING},
|
{CN_REPLICATION_PASSWORD, MXS_MODULE_PARAM_STRING},
|
||||||
@ -820,9 +931,8 @@ startMonitor(MXS_MONITOR *monitor, const MXS_CONFIG_PARAMETER* params)
|
|||||||
handle->script = config_copy_string(params, "script");
|
handle->script = config_copy_string(params, "script");
|
||||||
handle->events = config_get_enum(params, "events", mxs_monitor_event_enum_values);
|
handle->events = config_get_enum(params, "events", mxs_monitor_event_enum_values);
|
||||||
handle->allow_external_slaves = config_get_bool(params, "allow_external_slaves");
|
handle->allow_external_slaves = config_get_bool(params, "allow_external_slaves");
|
||||||
handle->failover = config_get_bool(params, CN_FAILOVER);
|
handle->auto_failover = config_get_bool(params, CN_AUTO_FAILOVER);
|
||||||
handle->failover_timeout = config_get_integer(params, CN_FAILOVER_TIMEOUT);
|
handle->failover_timeout = config_get_integer(params, CN_FAILOVER_TIMEOUT);
|
||||||
handle->switchover = config_get_bool(params, CN_SWITCHOVER);
|
|
||||||
handle->switchover_timeout = config_get_integer(params, CN_SWITCHOVER_TIMEOUT);
|
handle->switchover_timeout = config_get_integer(params, CN_SWITCHOVER_TIMEOUT);
|
||||||
handle->verify_master_failure = config_get_bool(params, CN_VERIFY_MASTER_FAILURE);
|
handle->verify_master_failure = config_get_bool(params, CN_VERIFY_MASTER_FAILURE);
|
||||||
handle->master_failure_timeout = config_get_integer(params, CN_MASTER_FAILURE_TIMEOUT);
|
handle->master_failure_timeout = config_get_integer(params, CN_MASTER_FAILURE_TIMEOUT);
|
||||||
@ -921,9 +1031,8 @@ static void diagnostics(DCB *dcb, const MXS_MONITOR *mon)
|
|||||||
{
|
{
|
||||||
const MYSQL_MONITOR *handle = (const MYSQL_MONITOR *)mon->handle;
|
const MYSQL_MONITOR *handle = (const MYSQL_MONITOR *)mon->handle;
|
||||||
|
|
||||||
dcb_printf(dcb, "Failover:\t%s\n", handle->failover ? "Enabled" : "Disabled");
|
dcb_printf(dcb, "Automatic failover:\t%s\n", handle->auto_failover ? "Enabled" : "Disabled");
|
||||||
dcb_printf(dcb, "Failover Timeout:\t%u\n", handle->failover_timeout);
|
dcb_printf(dcb, "Failover Timeout:\t%u\n", handle->failover_timeout);
|
||||||
dcb_printf(dcb, "Switchover:\t%s\n", handle->switchover ? "Enabled" : "Disabled");
|
|
||||||
dcb_printf(dcb, "Switchover Timeout:\t%u\n", handle->switchover_timeout);
|
dcb_printf(dcb, "Switchover Timeout:\t%u\n", handle->switchover_timeout);
|
||||||
dcb_printf(dcb, "Auto join:\t%s\n", handle->auto_join_cluster ? "Enabled" : "Disabled");
|
dcb_printf(dcb, "Auto join:\t%s\n", handle->auto_join_cluster ? "Enabled" : "Disabled");
|
||||||
dcb_printf(dcb, "MaxScale MonitorId:\t%lu\n", handle->id);
|
dcb_printf(dcb, "MaxScale MonitorId:\t%lu\n", handle->id);
|
||||||
@ -972,9 +1081,8 @@ static json_t* diagnostics_json(const MXS_MONITOR *mon)
|
|||||||
json_object_set_new(rval, "failcount", json_integer(handle->failcount));
|
json_object_set_new(rval, "failcount", json_integer(handle->failcount));
|
||||||
json_object_set_new(rval, "allow_cluster_recovery", json_boolean(handle->allow_cluster_recovery));
|
json_object_set_new(rval, "allow_cluster_recovery", json_boolean(handle->allow_cluster_recovery));
|
||||||
json_object_set_new(rval, "mysql51_replication", json_boolean(handle->mysql51_replication));
|
json_object_set_new(rval, "mysql51_replication", json_boolean(handle->mysql51_replication));
|
||||||
json_object_set_new(rval, CN_FAILOVER, json_boolean(handle->failover));
|
json_object_set_new(rval, CN_AUTO_FAILOVER, json_boolean(handle->auto_failover));
|
||||||
json_object_set_new(rval, CN_FAILOVER_TIMEOUT, json_integer(handle->failover_timeout));
|
json_object_set_new(rval, CN_FAILOVER_TIMEOUT, json_integer(handle->failover_timeout));
|
||||||
json_object_set_new(rval, CN_SWITCHOVER, json_boolean(handle->switchover));
|
|
||||||
json_object_set_new(rval, CN_SWITCHOVER_TIMEOUT, json_integer(handle->switchover_timeout));
|
json_object_set_new(rval, CN_SWITCHOVER_TIMEOUT, json_integer(handle->switchover_timeout));
|
||||||
json_object_set_new(rval, CN_AUTO_JOIN, json_boolean(handle->auto_join_cluster));
|
json_object_set_new(rval, CN_AUTO_JOIN, json_boolean(handle->auto_join_cluster));
|
||||||
|
|
||||||
@ -2100,17 +2208,18 @@ monitorMain(void *arg)
|
|||||||
mon_process_state_changes(mon, handle->script, handle->events);
|
mon_process_state_changes(mon, handle->script, handle->events);
|
||||||
bool failover_performed = false; // Has an automatic failover been performed this loop?
|
bool failover_performed = false; // Has an automatic failover been performed this loop?
|
||||||
|
|
||||||
if (handle->failover)
|
if (handle->auto_failover)
|
||||||
{
|
{
|
||||||
|
const char RE_ENABLE_FMT[] = "%s To re-enable failover, manually set '%s' to 'true' for monitor "
|
||||||
|
"'%s' via MaxAdmin or the REST API, or restart MaxScale.";
|
||||||
if (failover_not_possible(handle))
|
if (failover_not_possible(handle))
|
||||||
{
|
{
|
||||||
MXS_ERROR("Failover is not possible due to one or more problems in "
|
const char PROBLEMS[] = "Failover is not possible due to one or more problems in the "
|
||||||
"the replication configuration, disabling failover. "
|
"replication configuration, disabling automatic failover. Failover should only be "
|
||||||
"Failover should only be enabled after the replication "
|
"enabled after the replication configuration has been fixed.";
|
||||||
"configuration has been fixed. To re-enable failover "
|
MXS_ERROR(RE_ENABLE_FMT, PROBLEMS, CN_AUTO_FAILOVER, mon->name);
|
||||||
"functionality, manually set '%s' to 'true' for monitor "
|
handle->auto_failover = false;
|
||||||
"'%s' via MaxAdmin or the REST API.", CN_FAILOVER, mon->name);
|
disable_setting(handle, CN_AUTO_FAILOVER);
|
||||||
handle->failover = false;
|
|
||||||
}
|
}
|
||||||
else if (master_maybe_dead(handle) && master_still_alive(handle))
|
else if (master_maybe_dead(handle) && master_still_alive(handle))
|
||||||
{
|
{
|
||||||
@ -2118,12 +2227,10 @@ monitorMain(void *arg)
|
|||||||
}
|
}
|
||||||
else if (!mon_process_failover(handle, handle->failover_timeout, &failover_performed))
|
else if (!mon_process_failover(handle, handle->failover_timeout, &failover_performed))
|
||||||
{
|
{
|
||||||
MXS_ALERT("Failed to perform failover, disabling failover functionality. "
|
const char FAILED[] = "Failed to perform failover, disabling automatic failover.";
|
||||||
"To enable failover functionality, manually set 'failover' to "
|
MXS_ERROR(RE_ENABLE_FMT, FAILED, CN_AUTO_FAILOVER, mon->name);
|
||||||
"'true' for monitor '%s' via MaxAdmin or the REST API.", mon->name);
|
handle->auto_failover = false;
|
||||||
|
disable_setting(handle, CN_AUTO_FAILOVER);
|
||||||
mon_alter_parameter(handle->monitor, CN_FAILOVER, "false");
|
|
||||||
handle->failover = false;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -2977,7 +3084,7 @@ bool mon_process_failover(MYSQL_MONITOR* monitor, uint32_t failover_timeout, boo
|
|||||||
MXS_NOTICE("Performing automatic failover to replace failed master '%s'.",
|
MXS_NOTICE("Performing automatic failover to replace failed master '%s'.",
|
||||||
failed_master->server->unique_name);
|
failed_master->server->unique_name);
|
||||||
failed_master->new_event = false;
|
failed_master->new_event = false;
|
||||||
rval = do_failover(monitor);
|
rval = mysql_failover_check(monitor, NULL) && do_failover(monitor, NULL);
|
||||||
if (rval)
|
if (rval)
|
||||||
{
|
{
|
||||||
*cluster_modified_out = true;
|
*cluster_modified_out = true;
|
||||||
@ -3170,9 +3277,10 @@ MXS_MONITORED_SERVER* failover_select_new_master(MYSQL_MONITOR* mon,
|
|||||||
*
|
*
|
||||||
* @param mon The monitor
|
* @param mon The monitor
|
||||||
* @param new_master The new master
|
* @param new_master The new master
|
||||||
|
* @param err_out Json error output
|
||||||
* @return True if relay log was processed within time limit, or false if time ran out or an error occurred.
|
* @return True if relay log was processed within time limit, or false if time ran out or an error occurred.
|
||||||
*/
|
*/
|
||||||
bool failover_wait_relay_log(MYSQL_MONITOR* mon, MXS_MONITORED_SERVER* new_master)
|
bool failover_wait_relay_log(MYSQL_MONITOR* mon, MXS_MONITORED_SERVER* new_master, json_t** err_out)
|
||||||
{
|
{
|
||||||
MySqlServerInfo* master_info = get_server_info(mon, new_master);
|
MySqlServerInfo* master_info = get_server_info(mon, new_master);
|
||||||
time_t begin = time(NULL);
|
time_t begin = time(NULL);
|
||||||
@ -3216,8 +3324,9 @@ bool failover_wait_relay_log(MYSQL_MONITOR* mon, MXS_MONITORED_SERVER* new_maste
|
|||||||
reason = "Invalid Gtid(s) (current_pos: " + master_info->gtid_current_pos.to_string() +
|
reason = "Invalid Gtid(s) (current_pos: " + master_info->gtid_current_pos.to_string() +
|
||||||
", io_pos: " + master_info->slave_status.gtid_io_pos.to_string() + ")";
|
", io_pos: " + master_info->slave_status.gtid_io_pos.to_string() + ")";
|
||||||
}
|
}
|
||||||
MXS_ERROR("Failover: %s while waiting for server '%s' to process relay log. Cancelling failover.",
|
PRINT_MXS_JSON_ERROR(err_out, "Failover: %s while waiting for server '%s' to process relay log. "
|
||||||
reason.c_str(), new_master->server->unique_name);
|
"Cancelling failover.",
|
||||||
|
reason.c_str(), new_master->server->unique_name);
|
||||||
rval = false;
|
rval = false;
|
||||||
}
|
}
|
||||||
return rval;
|
return rval;
|
||||||
@ -3318,28 +3427,29 @@ int failover_redirect_slaves(MYSQL_MONITOR* mon, ServerVector& slaves, MXS_MONIT
|
|||||||
* Performs failover for a simple topology (1 master, N slaves, no intermediate masters).
|
* Performs failover for a simple topology (1 master, N slaves, no intermediate masters).
|
||||||
*
|
*
|
||||||
* @param mon Server cluster monitor
|
* @param mon Server cluster monitor
|
||||||
|
* @param err_out Json output
|
||||||
* @return True if successful
|
* @return True if successful
|
||||||
*/
|
*/
|
||||||
static bool do_failover(MYSQL_MONITOR* mon)
|
static bool do_failover(MYSQL_MONITOR* mon, json_t** err_out)
|
||||||
{
|
{
|
||||||
// Topology has already been tested to be simple.
|
// Topology has already been tested to be simple.
|
||||||
if (mon->master_gtid_domain < 0)
|
if (mon->master_gtid_domain < 0)
|
||||||
{
|
{
|
||||||
MXS_ERROR("Cluster gtid domain is unknown. Cannot failover.");
|
PRINT_MXS_JSON_ERROR(err_out, "Cluster gtid domain is unknown. Cannot failover.");
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
// Step 1: Select new master. Also populate a vector with all slaves not the selected master.
|
// Step 1: Select new master. Also populate a vector with all slaves not the selected master.
|
||||||
ServerVector slaves;
|
ServerVector slaves;
|
||||||
MXS_MONITORED_SERVER* new_master = failover_select_new_master(mon, &slaves, NULL);
|
MXS_MONITORED_SERVER* new_master = failover_select_new_master(mon, &slaves, err_out);
|
||||||
if (new_master == NULL)
|
if (new_master == NULL)
|
||||||
{
|
{
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
bool rval = false;
|
bool rval = false;
|
||||||
// Step 2: Wait until relay log consumed.
|
// Step 2: Wait until relay log consumed.
|
||||||
if (failover_wait_relay_log(mon, new_master) &&
|
if (failover_wait_relay_log(mon, new_master, err_out) &&
|
||||||
// Step 3: Stop and reset slave, set read-only to 0.
|
// Step 3: Stop and reset slave, set read-only to 0.
|
||||||
failover_promote_new_master(mon, new_master, NULL))
|
failover_promote_new_master(mon, new_master, err_out))
|
||||||
{
|
{
|
||||||
// Step 4: Redirect slaves.
|
// Step 4: Redirect slaves.
|
||||||
int redirects = failover_redirect_slaves(mon, slaves, new_master);
|
int redirects = failover_redirect_slaves(mon, slaves, new_master);
|
||||||
|
@ -1585,8 +1585,6 @@ struct subcommand alteroptions[] =
|
|||||||
"backend_connect_attempts Number of re-connection attempts\n"
|
"backend_connect_attempts Number of re-connection attempts\n"
|
||||||
"journal_max_age Maximum age of server state journal\n"
|
"journal_max_age Maximum age of server state journal\n"
|
||||||
"script_timeout Timeout in seconds for monitor scripts\n"
|
"script_timeout Timeout in seconds for monitor scripts\n"
|
||||||
"failover Enable or disable failover\n"
|
|
||||||
"failover_timeout Failover timeout in seconds\n"
|
|
||||||
"\n"
|
"\n"
|
||||||
"This will alter an existing parameter of a monitor. To remove parameters,\n"
|
"This will alter an existing parameter of a monitor. To remove parameters,\n"
|
||||||
"pass an empty value for a key e.g. 'maxadmin alter monitor my-monitor my-key='\n"
|
"pass an empty value for a key e.g. 'maxadmin alter monitor my-monitor my-key='\n"
|
||||||
|
Reference in New Issue
Block a user