MXS-1446: Move failover parameters into mysqlmon

The `failover` and `failover_timeout` parameters are now declared as a part of the mysqlmon module. Changed the implementation of the failover function so that the dependencies on the monitor struct can be removed or moved into parameters.
2017-09-28 08:15:28 +03:00
parent ef115208e6
commit d4fd34cecd
7 changed files with 44 additions and 120 deletions
--- a/include/maxscale/monitor.h
+++ b/include/maxscale/monitor.h
@ -205,8 +205,6 @@ struct mxs_monitor
    bool active; /**< True if monitor is active */
    time_t journal_max_age; /**< Maximum age of journal file */
    uint32_t script_timeout; /**< Timeout in seconds for the monitor scripts */
-    uint32_t failover_timeout; /**< Timeout in seconds for failover script */
-    bool failover; /**< Whether failover functionality is enabled */
    int64_t last_master_up; /**< Time when the last master_up event was triggered */
    int64_t last_master_down; /**< Time when the last master_down event was triggered */
    struct mxs_monitor *next;     /**< Next monitor in the linked list */
@ -255,8 +253,6 @@ extern const char CN_BACKEND_CONNECT_TIMEOUT[];
 extern const char CN_MONITOR_INTERVAL[];
 extern const char CN_JOURNAL_MAX_AGE[];
 extern const char CN_SCRIPT_TIMEOUT[];
-extern const char CN_FAILOVER[];
-extern const char CN_FAILOVER_TIMEOUT[];
 extern const char CN_SCRIPT[];
 extern const char CN_EVENTS[];

@ -298,12 +294,15 @@ void mon_process_state_changes(MXS_MONITOR *monitor, const char *script, uint64_
 *
 * This function should be called immediately after @c mon_process_state_changes.
 *
- * @param monitor Monitor whose cluster is processed
+ * @param monitor          Monitor whose cluster is processed
+ * @param failover_timeout Timeout in seconds for the failover
+ *
+ * @return True on success, false on error
 *
 * @todo Currently this only works with flat replication topologies and
 *       needs to be moved inside mysqlmon as it is MariaDB specific code.
 */
-void mon_process_failover(MXS_MONITOR *monitor);
+bool mon_process_failover(MXS_MONITOR *monitor, uint32_t failover_timeout);

 /**
 * @brief Hangup connections to failed servers
--- a/server/core/config.cc
+++ b/server/core/config.cc
@ -244,8 +244,6 @@ const char *config_monitor_params[] =
    CN_MONITOR_INTERVAL,
    CN_JOURNAL_MAX_AGE,
    CN_SCRIPT_TIMEOUT,
-    CN_FAILOVER,
-    CN_FAILOVER_TIMEOUT,
    CN_BACKEND_CONNECT_TIMEOUT,
    CN_BACKEND_READ_TIMEOUT,
    CN_BACKEND_WRITE_TIMEOUT,
@ -3189,41 +3187,6 @@ int create_new_monitor(CONFIG_CONTEXT *context, CONFIG_CONTEXT *obj, HASHTABLE*
                       obj->object, CN_SCRIPT_TIMEOUT, DEFAULT_SCRIPT_TIMEOUT);
        }

-        char *failover = config_get_value(obj->parameters, CN_FAILOVER);
-        if (failover)
-        {
-            int val = config_truth_value(failover);
-
-            if (val != -1)
-            {
-                monitorSetFailover(monitor, val);
-            }
-            else
-            {
-                error_count++;
-                MXS_NOTICE("Invalid '%s' parameter for monitor '%s'",
-                           CN_FAILOVER, obj->object);
-            }
-        }
-
-        char *failover_timeout = config_get_value(obj->parameters, CN_FAILOVER_TIMEOUT);
-        if (failover_timeout)
-        {
-            char *endptr;
-            long interval = strtol(failover_timeout, &endptr, 0);
-
-            if (*endptr == '\0' && interval > 0)
-            {
-                monitorSetFailoverTimeout(monitor, (uint32_t)interval);
-            }
-            else
-            {
-                error_count++;
-                MXS_NOTICE("Invalid '%s' parameter for monitor '%s'",
-                           CN_FAILOVER_TIMEOUT, obj->object);
-            }
-        }
-
        char *connect_timeout = config_get_value(obj->parameters, CN_BACKEND_CONNECT_TIMEOUT);
        if (connect_timeout)
        {
--- a/server/core/config_runtime.cc
+++ b/server/core/config_runtime.cc
@ -522,25 +522,6 @@ bool runtime_alter_monitor(MXS_MONITOR *monitor, const char *key, const char *va
            monitorSetScriptTimeout(monitor, ival);
        }
    }
-    else if (strcmp(key, CN_FAILOVER_TIMEOUT) == 0)
-    {
-        long ival = get_positive_int(value);
-        if (ival)
-        {
-            valid = true;
-            monitorSetFailoverTimeout(monitor, ival);
-        }
-    }
-    else if (strcmp(key, CN_FAILOVER) == 0)
-    {
-        int val = config_truth_value(value);
-
-        if (val != -1)
-        {
-            valid = true;
-            monitorSetFailover(monitor, val);
-        }
-    }
    else
    {
        /** We're modifying module specific parameters and we need to stop the monitor */
--- a/server/core/maxscale/monitor.h
+++ b/server/core/maxscale/monitor.h
@ -35,9 +35,6 @@ MXS_BEGIN_DECLS
 /** Default script execution timeout in seconds */
 #define DEFAULT_SCRIPT_TIMEOUT 90

-/** Default failover script timeout */
-#define DEFAULT_FAILOVER_TIMEOUT 90
-
 /**
 * Monitor network timeout types
 */
@ -77,8 +74,6 @@ void monitorSetInterval (MXS_MONITOR *, unsigned long);
 bool monitorSetNetworkTimeout(MXS_MONITOR *, int, int);
 void monitorSetJournalMaxAge(MXS_MONITOR *mon, time_t value);
 void monitorSetScriptTimeout(MXS_MONITOR *mon, uint32_t value);
-void monitorSetFailover(MXS_MONITOR *mon, bool value);
-void monitorSetFailoverTimeout(MXS_MONITOR *mon, uint32_t value);

 /**
 * @brief Serialize a monitor to a file
--- a/server/core/monitor.cc
+++ b/server/core/monitor.cc
@ -71,8 +71,6 @@ const char CN_BACKEND_CONNECT_TIMEOUT[]  = "backend_connect_timeout";
 const char CN_MONITOR_INTERVAL[]         = "monitor_interval";
 const char CN_JOURNAL_MAX_AGE[]          = "journal_max_age";
 const char CN_SCRIPT_TIMEOUT[]           = "script_timeout";
-const char CN_FAILOVER[]                 = "failover";
-const char CN_FAILOVER_TIMEOUT[]         = "failover_timeout";
 const char CN_SCRIPT[]                   = "script";
 const char CN_EVENTS[]                   = "events";

@ -138,8 +136,6 @@ MXS_MONITOR* monitor_alloc(const char *name, const char *module)
    mon->script_timeout = DEFAULT_SCRIPT_TIMEOUT;
    mon->parameters = NULL;
    mon->server_pending_changes = false;
-    mon->failover = false;
-    mon->failover_timeout = DEFAULT_FAILOVER_TIMEOUT;
    spinlock_init(&mon->lock);
    spinlock_acquire(&monLock);
    mon->next = allMonitors;
@ -663,16 +659,6 @@ void monitorSetScriptTimeout(MXS_MONITOR *mon, uint32_t value)
    mon->script_timeout = value;
 }

-void monitorSetFailover(MXS_MONITOR *mon, bool value)
-{
-    mon->failover = value;
-}
-
-void monitorSetFailoverTimeout(MXS_MONITOR *mon, uint32_t value)
-{
-    mon->failover_timeout = value;
-}
-
 /**
 * Set Monitor timeouts for connect/read/write
 *
@ -1560,8 +1546,6 @@ static bool create_monitor_config(const MXS_MONITOR *monitor, const char *filena
    dprintf(file, "%s=%d\n", CN_BACKEND_CONNECT_ATTEMPTS, monitor->connect_attempts);
    dprintf(file, "%s=%ld\n", CN_JOURNAL_MAX_AGE, monitor->journal_max_age);
    dprintf(file, "%s=%d\n", CN_SCRIPT_TIMEOUT, monitor->script_timeout);
-    dprintf(file, "%s=%s\n", CN_FAILOVER, monitor->failover ? "true" : "false");
-    dprintf(file, "%s=%d\n", CN_FAILOVER_TIMEOUT, monitor->failover_timeout);

    if (monitor->databases)
    {
@ -1591,8 +1575,6 @@ static bool create_monitor_config(const MXS_MONITOR *monitor, const char *filena
        CN_BACKEND_CONNECT_ATTEMPTS,
        CN_JOURNAL_MAX_AGE,
        CN_SCRIPT_TIMEOUT,
-        CN_FAILOVER,
-        CN_FAILOVER_TIMEOUT,
        CN_SERVERS
    };

@ -1763,8 +1745,9 @@ void mon_process_state_changes(MXS_MONITOR *monitor, const char *script, uint64_
    }
 }

-void mon_process_failover(MXS_MONITOR *monitor)
+bool mon_process_failover(MXS_MONITOR *monitor, uint32_t failover_timeout)
 {
+    bool rval = true;
    MXS_CONFIG* cnf = config_get_global_options();
    MXS_MONITOR_SERVERS* failed_master = NULL;

@ -1774,18 +1757,16 @@ void mon_process_failover(MXS_MONITOR *monitor)
        {
            if (ptr->server->last_event == MASTER_DOWN_EVENT)
            {
-                if (monitor->failover && !cnf->passive)
+                if (!cnf->passive)
                {
                    if (failed_master)
                    {
                        MXS_ALERT("Multiple failed master servers detected: "
                                  "'%s' is the first master to fail but server "
-                                  "'%s' has also triggered a master_down event."
-                                  "Aborting and disabling failover.",
+                                  "'%s' has also triggered a master_down event.",
                                  failed_master->server->unique_name,
                                  ptr->server->unique_name);
-                        monitorSetFailover(monitor, false);
-                        return;
+                        return false;
                    }
                    else
                    {
@ -1803,21 +1784,20 @@ void mon_process_failover(MXS_MONITOR *monitor)
             * since the event took place.
             */

-            if (monitor->failover && // Failover is enabled
-                !cnf->passive && // This is not a passive MaxScale
+            if (!cnf->passive && // This is not a passive MaxScale
                ptr->server->last_event == MASTER_DOWN_EVENT && // This is a master that went down
                cnf->promoted_at >= ptr->server->triggered_at && // Promoted to active after the event took place
                ptr->new_event && // Event has not yet been processed
                monitor->last_master_down > monitor->last_master_up) // Latest relevant event
            {
-                int64_t timeout = SEC_TO_HB(monitor->failover_timeout);
+                int64_t timeout = SEC_TO_HB(failover_timeout);
                int64_t t = hkheartbeat - ptr->server->triggered_at;

                if (t > timeout)
                {
                    MXS_WARNING("Failover of server '%s' did not take place within "
                                "%u seconds, failover needs to be re-triggered",
-                                ptr->server->unique_name, monitor->failover_timeout);
+                                ptr->server->unique_name, failover_timeout);
                    failed_master = ptr;
                    ptr->new_event = false;
                }
@ -1837,15 +1817,13 @@ void mon_process_failover(MXS_MONITOR *monitor)
                                   "SLAVELIST=$SLAVELIST SYNCEDLIST=$SYNCEDLIST";

        if (monitor_launch_script(monitor, failed_master, failover_cmd,
-                                  monitor->failover_timeout))
+                                  failover_timeout))
        {
-            MXS_ALERT("Failed to perform failover, disabling failover functionality. "
-                      "To enable failover functionalty, manually set 'failover'  "
-                      "to 'true' for monitor '%s' via MaxAdmin or the REST API.",
-                      monitor->name);
-            monitorSetFailover(monitor, false);
+            rval = false;
        }
    }
+
+    return rval;
 }

 static const char* monitor_state_to_string(int state)
@ -1883,8 +1861,6 @@ json_t* monitor_parameters_to_json(const MXS_MONITOR* monitor)
    json_object_set_new(rval, CN_BACKEND_CONNECT_ATTEMPTS, json_integer(monitor->connect_attempts));
    json_object_set_new(rval, CN_JOURNAL_MAX_AGE, json_integer(monitor->journal_max_age));
    json_object_set_new(rval, CN_SCRIPT_TIMEOUT, json_integer(monitor->script_timeout));
-    json_object_set_new(rval, CN_FAILOVER, json_boolean(monitor->failover));
-    json_object_set_new(rval, CN_FAILOVER_TIMEOUT, json_integer(monitor->script_timeout));

    /** Add custom module parameters */
    const MXS_MODULE* mod = get_module(monitor->module_name, MODULE_MONITOR);
--- a/server/modules/monitor/mysqlmon.h
+++ b/server/modules/monitor/mysqlmon.h
@ -16,22 +16,6 @@

 /**
 * @file mysqlmon.h - The MySQL monitor
- *
- * @verbatim
- * Revision History
- *
- * Date     Who                 Description
- * 08/07/13 Mark Riddoch        Initial implementation
- * 26/05/14 Massimiliano Pinto  Default values for MONITOR_INTERVAL
- * 28/05/14 Massimiliano Pinto  Addition of new fields in MYSQL_MONITOR struct
- * 24/06/14 Massimiliano Pinto  Addition of master field in MYSQL_MONITOR struct and MONITOR_MAX_NUM_SLAVES
- * 28/08/14 Massimiliano Pinto  Addition of detectStaleMaster
- * 30/10/14 Massimiliano Pinto  Addition of disableMasterFailback
- * 07/11/14 Massimiliano Pinto  Addition of NetworkTimeout: connect, read, write
- * 20/04/15 Guillaume Lefranc   Addition of availableWhenDonor
- * 22/04/15 Martin Brampton     Addition of disableMasterRoleSetting
- * 07/05/15 Markus Makela       Addition of command execution on Master server failure
- * @endverbatim
 */

 #include <maxscale/cdefs.h>
@ -79,6 +63,8 @@ typedef struct
    bool allow_cluster_recovery; /**< Allow failed servers to rejoin the cluster */
    bool warn_failover; /**< Log a warning when failover happens */
    bool allow_external_slaves; /**< Whether to allow usage of external slave servers */
+    bool failover; /**< If master failover is enabled */
+    uint32_t failover_timeout; /**< Timeout in seconds for the master failover */
    MXS_MONITOR* monitor;
 } MYSQL_MONITOR;

--- a/server/modules/monitor/mysqlmon/mysql_mon.c
+++ b/server/modules/monitor/mysqlmon/mysql_mon.c
@ -60,6 +60,12 @@ void check_maxscale_schema_replication(MXS_MONITOR *monitor);
 static bool report_version_err = true;
 static const char* hb_table_name = "maxscale_schema.replication_heartbeat";

+static const char CN_FAILOVER[]         = "failover";
+static const char CN_FAILOVER_TIMEOUT[] = "failover_timeout";
+
+/** Default failover timeout */
+#define DEFAULT_FAILOVER_TIMEOUT "90"
+
 /**
 * The module entry point routine. It is this routine that
 * must populate the structure that is referred to as the
@ -116,6 +122,8 @@ MXS_MODULE* MXS_CREATE_MODULE()
                MXS_MODULE_OPT_NONE,
                mxs_monitor_event_enum_values
            },
+            {CN_FAILOVER, MXS_MODULE_PARAM_BOOL, "false"},
+            {CN_FAILOVER_TIMEOUT, MXS_MODULE_PARAM_COUNT, DEFAULT_FAILOVER_TIMEOUT},
            {MXS_END_MODULE_PARAMS}
        }
    };
@ -262,6 +270,8 @@ startMonitor(MXS_MONITOR *monitor, const MXS_CONFIG_PARAMETER* params)
    handle->script = config_copy_string(params, "script");
    handle->events = config_get_enum(params, "events", mxs_monitor_event_enum_values);
    handle->allow_external_slaves = config_get_bool(params, "allow_external_slaves");
+    handle->failover = config_get_bool(params, CN_FAILOVER);
+    handle->failover_timeout = config_get_integer(params, CN_FAILOVER_TIMEOUT);

    bool error = false;

@ -319,6 +329,8 @@ static void diagnostics(DCB *dcb, const MXS_MONITOR *mon)
 {
    const MYSQL_MONITOR *handle = (const MYSQL_MONITOR *)mon->handle;

+    dcb_printf(dcb, "Failover:\t%s\n", handle->failover ? "Enabled" : "Disabled");
+    dcb_printf(dcb, "Failover Timeout:\t%u\n", handle->failover_timeout);
    dcb_printf(dcb, "MaxScale MonitorId:\t%lu\n", handle->id);
    dcb_printf(dcb, "Replication lag:\t%s\n", (handle->replicationHeartbeat == 1) ? "enabled" : "disabled");
    dcb_printf(dcb, "Detect Stale Master:\t%s\n", (handle->detectStaleMaster == 1) ? "enabled" : "disabled");
@ -365,6 +377,8 @@ static json_t* diagnostics_json(const MXS_MONITOR *mon)
    json_object_set_new(rval, "failcount", json_integer(handle->failcount));
    json_object_set_new(rval, "allow_cluster_recovery", json_boolean(handle->allow_cluster_recovery));
    json_object_set_new(rval, "mysql51_replication", json_boolean(handle->mysql51_replication));
+    json_object_set_new(rval, CN_FAILOVER, json_boolean(handle->failover));
+    json_object_set_new(rval, CN_FAILOVER_TIMEOUT, json_integer(handle->failover_timeout));

    if (handle->script)
    {
@ -1401,7 +1415,17 @@ monitorMain(void *arg)
         * need to be launched.
         */
        mon_process_state_changes(mon, handle->script, handle->events);
-        mon_process_failover(mon);
+
+        if (handle->failover)
+        {
+            if (!mon_process_failover(mon, handle->failover_timeout))
+            {
+                MXS_ALERT("Failed to perform failover, disabling failover functionality. "
+                          "To enable failover functionality, manually set 'failover' to "
+                          "'true' for monitor '%s' via MaxAdmin or the REST API.", mon->name);
+                handle->failover = false;
+            }
+        }

        /* log master detection failure of first master becomes available after failure */
        if (root_master &&