diff --git a/include/maxscale/monitor.h b/include/maxscale/monitor.h index 51f4d2b82..45174bc6f 100644 --- a/include/maxscale/monitor.h +++ b/include/maxscale/monitor.h @@ -205,8 +205,6 @@ struct mxs_monitor bool active; /**< True if monitor is active */ time_t journal_max_age; /**< Maximum age of journal file */ uint32_t script_timeout; /**< Timeout in seconds for the monitor scripts */ - uint32_t failover_timeout; /**< Timeout in seconds for failover script */ - bool failover; /**< Whether failover functionality is enabled */ int64_t last_master_up; /**< Time when the last master_up event was triggered */ int64_t last_master_down; /**< Time when the last master_down event was triggered */ struct mxs_monitor *next; /**< Next monitor in the linked list */ @@ -255,8 +253,6 @@ extern const char CN_BACKEND_CONNECT_TIMEOUT[]; extern const char CN_MONITOR_INTERVAL[]; extern const char CN_JOURNAL_MAX_AGE[]; extern const char CN_SCRIPT_TIMEOUT[]; -extern const char CN_FAILOVER[]; -extern const char CN_FAILOVER_TIMEOUT[]; extern const char CN_SCRIPT[]; extern const char CN_EVENTS[]; @@ -298,12 +294,15 @@ void mon_process_state_changes(MXS_MONITOR *monitor, const char *script, uint64_ * * This function should be called immediately after @c mon_process_state_changes. * - * @param monitor Monitor whose cluster is processed + * @param monitor Monitor whose cluster is processed + * @param failover_timeout Timeout in seconds for the failover + * + * @return True on success, false on error * * @todo Currently this only works with flat replication topologies and * needs to be moved inside mysqlmon as it is MariaDB specific code. */ -void mon_process_failover(MXS_MONITOR *monitor); +bool mon_process_failover(MXS_MONITOR *monitor, uint32_t failover_timeout); /** * @brief Hangup connections to failed servers diff --git a/server/core/config.cc b/server/core/config.cc index 2aea8955f..e0ba27e9d 100644 --- a/server/core/config.cc +++ b/server/core/config.cc @@ -244,8 +244,6 @@ const char *config_monitor_params[] = CN_MONITOR_INTERVAL, CN_JOURNAL_MAX_AGE, CN_SCRIPT_TIMEOUT, - CN_FAILOVER, - CN_FAILOVER_TIMEOUT, CN_BACKEND_CONNECT_TIMEOUT, CN_BACKEND_READ_TIMEOUT, CN_BACKEND_WRITE_TIMEOUT, @@ -3189,41 +3187,6 @@ int create_new_monitor(CONFIG_CONTEXT *context, CONFIG_CONTEXT *obj, HASHTABLE* obj->object, CN_SCRIPT_TIMEOUT, DEFAULT_SCRIPT_TIMEOUT); } - char *failover = config_get_value(obj->parameters, CN_FAILOVER); - if (failover) - { - int val = config_truth_value(failover); - - if (val != -1) - { - monitorSetFailover(monitor, val); - } - else - { - error_count++; - MXS_NOTICE("Invalid '%s' parameter for monitor '%s'", - CN_FAILOVER, obj->object); - } - } - - char *failover_timeout = config_get_value(obj->parameters, CN_FAILOVER_TIMEOUT); - if (failover_timeout) - { - char *endptr; - long interval = strtol(failover_timeout, &endptr, 0); - - if (*endptr == '\0' && interval > 0) - { - monitorSetFailoverTimeout(monitor, (uint32_t)interval); - } - else - { - error_count++; - MXS_NOTICE("Invalid '%s' parameter for monitor '%s'", - CN_FAILOVER_TIMEOUT, obj->object); - } - } - char *connect_timeout = config_get_value(obj->parameters, CN_BACKEND_CONNECT_TIMEOUT); if (connect_timeout) { diff --git a/server/core/config_runtime.cc b/server/core/config_runtime.cc index 90fc2f9d3..9000127df 100644 --- a/server/core/config_runtime.cc +++ b/server/core/config_runtime.cc @@ -522,25 +522,6 @@ bool runtime_alter_monitor(MXS_MONITOR *monitor, const char *key, const char *va monitorSetScriptTimeout(monitor, ival); } } - else if (strcmp(key, CN_FAILOVER_TIMEOUT) == 0) - { - long ival = get_positive_int(value); - if (ival) - { - valid = true; - monitorSetFailoverTimeout(monitor, ival); - } - } - else if (strcmp(key, CN_FAILOVER) == 0) - { - int val = config_truth_value(value); - - if (val != -1) - { - valid = true; - monitorSetFailover(monitor, val); - } - } else { /** We're modifying module specific parameters and we need to stop the monitor */ diff --git a/server/core/maxscale/monitor.h b/server/core/maxscale/monitor.h index a4e838824..7b91c58a5 100644 --- a/server/core/maxscale/monitor.h +++ b/server/core/maxscale/monitor.h @@ -35,9 +35,6 @@ MXS_BEGIN_DECLS /** Default script execution timeout in seconds */ #define DEFAULT_SCRIPT_TIMEOUT 90 -/** Default failover script timeout */ -#define DEFAULT_FAILOVER_TIMEOUT 90 - /** * Monitor network timeout types */ @@ -77,8 +74,6 @@ void monitorSetInterval (MXS_MONITOR *, unsigned long); bool monitorSetNetworkTimeout(MXS_MONITOR *, int, int); void monitorSetJournalMaxAge(MXS_MONITOR *mon, time_t value); void monitorSetScriptTimeout(MXS_MONITOR *mon, uint32_t value); -void monitorSetFailover(MXS_MONITOR *mon, bool value); -void monitorSetFailoverTimeout(MXS_MONITOR *mon, uint32_t value); /** * @brief Serialize a monitor to a file diff --git a/server/core/monitor.cc b/server/core/monitor.cc index 44227b09b..171d57c68 100644 --- a/server/core/monitor.cc +++ b/server/core/monitor.cc @@ -71,8 +71,6 @@ const char CN_BACKEND_CONNECT_TIMEOUT[] = "backend_connect_timeout"; const char CN_MONITOR_INTERVAL[] = "monitor_interval"; const char CN_JOURNAL_MAX_AGE[] = "journal_max_age"; const char CN_SCRIPT_TIMEOUT[] = "script_timeout"; -const char CN_FAILOVER[] = "failover"; -const char CN_FAILOVER_TIMEOUT[] = "failover_timeout"; const char CN_SCRIPT[] = "script"; const char CN_EVENTS[] = "events"; @@ -138,8 +136,6 @@ MXS_MONITOR* monitor_alloc(const char *name, const char *module) mon->script_timeout = DEFAULT_SCRIPT_TIMEOUT; mon->parameters = NULL; mon->server_pending_changes = false; - mon->failover = false; - mon->failover_timeout = DEFAULT_FAILOVER_TIMEOUT; spinlock_init(&mon->lock); spinlock_acquire(&monLock); mon->next = allMonitors; @@ -663,16 +659,6 @@ void monitorSetScriptTimeout(MXS_MONITOR *mon, uint32_t value) mon->script_timeout = value; } -void monitorSetFailover(MXS_MONITOR *mon, bool value) -{ - mon->failover = value; -} - -void monitorSetFailoverTimeout(MXS_MONITOR *mon, uint32_t value) -{ - mon->failover_timeout = value; -} - /** * Set Monitor timeouts for connect/read/write * @@ -1560,8 +1546,6 @@ static bool create_monitor_config(const MXS_MONITOR *monitor, const char *filena dprintf(file, "%s=%d\n", CN_BACKEND_CONNECT_ATTEMPTS, monitor->connect_attempts); dprintf(file, "%s=%ld\n", CN_JOURNAL_MAX_AGE, monitor->journal_max_age); dprintf(file, "%s=%d\n", CN_SCRIPT_TIMEOUT, monitor->script_timeout); - dprintf(file, "%s=%s\n", CN_FAILOVER, monitor->failover ? "true" : "false"); - dprintf(file, "%s=%d\n", CN_FAILOVER_TIMEOUT, monitor->failover_timeout); if (monitor->databases) { @@ -1591,8 +1575,6 @@ static bool create_monitor_config(const MXS_MONITOR *monitor, const char *filena CN_BACKEND_CONNECT_ATTEMPTS, CN_JOURNAL_MAX_AGE, CN_SCRIPT_TIMEOUT, - CN_FAILOVER, - CN_FAILOVER_TIMEOUT, CN_SERVERS }; @@ -1763,8 +1745,9 @@ void mon_process_state_changes(MXS_MONITOR *monitor, const char *script, uint64_ } } -void mon_process_failover(MXS_MONITOR *monitor) +bool mon_process_failover(MXS_MONITOR *monitor, uint32_t failover_timeout) { + bool rval = true; MXS_CONFIG* cnf = config_get_global_options(); MXS_MONITOR_SERVERS* failed_master = NULL; @@ -1774,18 +1757,16 @@ void mon_process_failover(MXS_MONITOR *monitor) { if (ptr->server->last_event == MASTER_DOWN_EVENT) { - if (monitor->failover && !cnf->passive) + if (!cnf->passive) { if (failed_master) { MXS_ALERT("Multiple failed master servers detected: " "'%s' is the first master to fail but server " - "'%s' has also triggered a master_down event." - "Aborting and disabling failover.", + "'%s' has also triggered a master_down event.", failed_master->server->unique_name, ptr->server->unique_name); - monitorSetFailover(monitor, false); - return; + return false; } else { @@ -1803,21 +1784,20 @@ void mon_process_failover(MXS_MONITOR *monitor) * since the event took place. */ - if (monitor->failover && // Failover is enabled - !cnf->passive && // This is not a passive MaxScale + if (!cnf->passive && // This is not a passive MaxScale ptr->server->last_event == MASTER_DOWN_EVENT && // This is a master that went down cnf->promoted_at >= ptr->server->triggered_at && // Promoted to active after the event took place ptr->new_event && // Event has not yet been processed monitor->last_master_down > monitor->last_master_up) // Latest relevant event { - int64_t timeout = SEC_TO_HB(monitor->failover_timeout); + int64_t timeout = SEC_TO_HB(failover_timeout); int64_t t = hkheartbeat - ptr->server->triggered_at; if (t > timeout) { MXS_WARNING("Failover of server '%s' did not take place within " "%u seconds, failover needs to be re-triggered", - ptr->server->unique_name, monitor->failover_timeout); + ptr->server->unique_name, failover_timeout); failed_master = ptr; ptr->new_event = false; } @@ -1837,15 +1817,13 @@ void mon_process_failover(MXS_MONITOR *monitor) "SLAVELIST=$SLAVELIST SYNCEDLIST=$SYNCEDLIST"; if (monitor_launch_script(monitor, failed_master, failover_cmd, - monitor->failover_timeout)) + failover_timeout)) { - MXS_ALERT("Failed to perform failover, disabling failover functionality. " - "To enable failover functionalty, manually set 'failover' " - "to 'true' for monitor '%s' via MaxAdmin or the REST API.", - monitor->name); - monitorSetFailover(monitor, false); + rval = false; } } + + return rval; } static const char* monitor_state_to_string(int state) @@ -1883,8 +1861,6 @@ json_t* monitor_parameters_to_json(const MXS_MONITOR* monitor) json_object_set_new(rval, CN_BACKEND_CONNECT_ATTEMPTS, json_integer(monitor->connect_attempts)); json_object_set_new(rval, CN_JOURNAL_MAX_AGE, json_integer(monitor->journal_max_age)); json_object_set_new(rval, CN_SCRIPT_TIMEOUT, json_integer(monitor->script_timeout)); - json_object_set_new(rval, CN_FAILOVER, json_boolean(monitor->failover)); - json_object_set_new(rval, CN_FAILOVER_TIMEOUT, json_integer(monitor->script_timeout)); /** Add custom module parameters */ const MXS_MODULE* mod = get_module(monitor->module_name, MODULE_MONITOR); diff --git a/server/modules/monitor/mysqlmon.h b/server/modules/monitor/mysqlmon.h index 8e8e8a723..3842e3688 100644 --- a/server/modules/monitor/mysqlmon.h +++ b/server/modules/monitor/mysqlmon.h @@ -16,22 +16,6 @@ /** * @file mysqlmon.h - The MySQL monitor - * - * @verbatim - * Revision History - * - * Date Who Description - * 08/07/13 Mark Riddoch Initial implementation - * 26/05/14 Massimiliano Pinto Default values for MONITOR_INTERVAL - * 28/05/14 Massimiliano Pinto Addition of new fields in MYSQL_MONITOR struct - * 24/06/14 Massimiliano Pinto Addition of master field in MYSQL_MONITOR struct and MONITOR_MAX_NUM_SLAVES - * 28/08/14 Massimiliano Pinto Addition of detectStaleMaster - * 30/10/14 Massimiliano Pinto Addition of disableMasterFailback - * 07/11/14 Massimiliano Pinto Addition of NetworkTimeout: connect, read, write - * 20/04/15 Guillaume Lefranc Addition of availableWhenDonor - * 22/04/15 Martin Brampton Addition of disableMasterRoleSetting - * 07/05/15 Markus Makela Addition of command execution on Master server failure - * @endverbatim */ #include @@ -79,6 +63,8 @@ typedef struct bool allow_cluster_recovery; /**< Allow failed servers to rejoin the cluster */ bool warn_failover; /**< Log a warning when failover happens */ bool allow_external_slaves; /**< Whether to allow usage of external slave servers */ + bool failover; /**< If master failover is enabled */ + uint32_t failover_timeout; /**< Timeout in seconds for the master failover */ MXS_MONITOR* monitor; } MYSQL_MONITOR; diff --git a/server/modules/monitor/mysqlmon/mysql_mon.c b/server/modules/monitor/mysqlmon/mysql_mon.c index 77b14f94e..f6da229d5 100644 --- a/server/modules/monitor/mysqlmon/mysql_mon.c +++ b/server/modules/monitor/mysqlmon/mysql_mon.c @@ -60,6 +60,12 @@ void check_maxscale_schema_replication(MXS_MONITOR *monitor); static bool report_version_err = true; static const char* hb_table_name = "maxscale_schema.replication_heartbeat"; +static const char CN_FAILOVER[] = "failover"; +static const char CN_FAILOVER_TIMEOUT[] = "failover_timeout"; + +/** Default failover timeout */ +#define DEFAULT_FAILOVER_TIMEOUT "90" + /** * The module entry point routine. It is this routine that * must populate the structure that is referred to as the @@ -116,6 +122,8 @@ MXS_MODULE* MXS_CREATE_MODULE() MXS_MODULE_OPT_NONE, mxs_monitor_event_enum_values }, + {CN_FAILOVER, MXS_MODULE_PARAM_BOOL, "false"}, + {CN_FAILOVER_TIMEOUT, MXS_MODULE_PARAM_COUNT, DEFAULT_FAILOVER_TIMEOUT}, {MXS_END_MODULE_PARAMS} } }; @@ -262,6 +270,8 @@ startMonitor(MXS_MONITOR *monitor, const MXS_CONFIG_PARAMETER* params) handle->script = config_copy_string(params, "script"); handle->events = config_get_enum(params, "events", mxs_monitor_event_enum_values); handle->allow_external_slaves = config_get_bool(params, "allow_external_slaves"); + handle->failover = config_get_bool(params, CN_FAILOVER); + handle->failover_timeout = config_get_integer(params, CN_FAILOVER_TIMEOUT); bool error = false; @@ -319,6 +329,8 @@ static void diagnostics(DCB *dcb, const MXS_MONITOR *mon) { const MYSQL_MONITOR *handle = (const MYSQL_MONITOR *)mon->handle; + dcb_printf(dcb, "Failover:\t%s\n", handle->failover ? "Enabled" : "Disabled"); + dcb_printf(dcb, "Failover Timeout:\t%u\n", handle->failover_timeout); dcb_printf(dcb, "MaxScale MonitorId:\t%lu\n", handle->id); dcb_printf(dcb, "Replication lag:\t%s\n", (handle->replicationHeartbeat == 1) ? "enabled" : "disabled"); dcb_printf(dcb, "Detect Stale Master:\t%s\n", (handle->detectStaleMaster == 1) ? "enabled" : "disabled"); @@ -365,6 +377,8 @@ static json_t* diagnostics_json(const MXS_MONITOR *mon) json_object_set_new(rval, "failcount", json_integer(handle->failcount)); json_object_set_new(rval, "allow_cluster_recovery", json_boolean(handle->allow_cluster_recovery)); json_object_set_new(rval, "mysql51_replication", json_boolean(handle->mysql51_replication)); + json_object_set_new(rval, CN_FAILOVER, json_boolean(handle->failover)); + json_object_set_new(rval, CN_FAILOVER_TIMEOUT, json_integer(handle->failover_timeout)); if (handle->script) { @@ -1401,7 +1415,17 @@ monitorMain(void *arg) * need to be launched. */ mon_process_state_changes(mon, handle->script, handle->events); - mon_process_failover(mon); + + if (handle->failover) + { + if (!mon_process_failover(mon, handle->failover_timeout)) + { + MXS_ALERT("Failed to perform failover, disabling failover functionality. " + "To enable failover functionality, manually set 'failover' to " + "'true' for monitor '%s' via MaxAdmin or the REST API.", mon->name); + handle->failover = false; + } + } /* log master detection failure of first master becomes available after failure */ if (root_master &&