From b448b129d03589613ccebc3b38612af5cb494941 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Markus=20M=C3=A4kel=C3=A4?= Date: Wed, 9 Aug 2017 14:54:52 +0300 Subject: [PATCH] MXS-1262: Move journal_max_age to MaxScale core The parameter is now defined in the monitor. Further refactoring is needed to make the interface of the journal system simpler. --- Documentation/Monitors/Monitor-Common.md | 27 +++++++++++++++++++ Documentation/Monitors/MySQL-Monitor.md | 26 ------------------ include/maxscale/monitor.h | 2 ++ server/core/config.cc | 30 +++++++++++++++++++-- server/core/maxscale/monitor.h | 6 ++++- server/core/monitor.cc | 16 ++++++++++- server/modules/monitor/mysqlmon.h | 1 - server/modules/monitor/mysqlmon/mysql_mon.c | 7 +---- 8 files changed, 78 insertions(+), 37 deletions(-) diff --git a/Documentation/Monitors/Monitor-Common.md b/Documentation/Monitors/Monitor-Common.md index d1258a50f..7f6916b31 100644 --- a/Documentation/Monitors/Monitor-Common.md +++ b/Documentation/Monitors/Monitor-Common.md @@ -115,3 +115,30 @@ lost_ndb |A MySQL Cluster node lost node membership new_master |A new Master was detected new_slave |A new Slave was detected new_ndb |A new MySQL Cluster node was found + +### `journal_max_age` + +The maximum journal file age in seconds. The default value is 28800 seconds. + +When the monitor starts, it reads any stored journal files. If the journal file +is older than the value of _journal_max_age_, it will be removed and the monitor +starts with no prior knowledge of the servers. + +## Monitor Crash Safety + +Starting with MaxScale 2.2.0, the monitor modules keep an on-disk journal of the +latest server states. This change makes the monitors crash-safe when options +that introduce states are used. It also allows the monitors to retain stateful +information when MaxScale is restarted. + +For MySQL monitor, options that introduce states into the monitoring process are +the `detect_stale_master` and `detect_stale_slave` options, both of which are +enabled by default. Galeramon has the `disable_master_failback` parameter which +introduces a state. + +The default location for the server state journal is in +`/var/lib/maxscale//monitor.dat` where `` is the +name of the monitor section in the configuration file. If MaxScale crashes or is +shut down in an uncontrolled fashion, the journal will be read when MaxScale is +started. To skip the recovery process, manually delete the journal file before +starting MaxScale. diff --git a/Documentation/Monitors/MySQL-Monitor.md b/Documentation/Monitors/MySQL-Monitor.md index 35500446e..94766bcaf 100644 --- a/Documentation/Monitors/MySQL-Monitor.md +++ b/Documentation/Monitors/MySQL-Monitor.md @@ -214,32 +214,6 @@ assigned the _Slave_ status which allows them to be used like normal slave servers. When the option is disabled, the servers will only receive the _Slave of External Server_ status and they will not be used. -### `journal_max_age` - -The maximum journal file age in seconds. The default value is 28800 seconds. - -When the MySQL monitor starts, it reads any stored journal files. If the journal -file is older than the value of _journal_max_age_, it will be removed and the -monitor starts with no prior knowledge of the servers. - -## MySQL Monitor Crash Safety - -Starting with MaxScale 2.2.0, the mysqlmon module keeps an on-disk journal of -the latest server states. This change makes the monitor crash-safe when options -that introduce states are used. It also allows the monitor to retain stateful -information when MaxScale is restarted. - -Options that introduce states into the monitoring process are the -`detect_stale_master` and `detect_stale_slave` options, both of which are -enabled by default. - -The default location for the server state journal is in -`/var/lib/maxscale//mysqlmon.dat` where `` is the -name of the monitor section in the configuration file. If MaxScale crashes or is -shut down in an uncontrolled fashion, the journal will be read when MaxScale is -started. To skip the recovery process, manually delete the journal file before -starting MaxScale. - ## Example 1 - Monitor script Here is an example shell script which sends an email to an admin@my.org diff --git a/include/maxscale/monitor.h b/include/maxscale/monitor.h index c49a9c244..574e2d26f 100644 --- a/include/maxscale/monitor.h +++ b/include/maxscale/monitor.h @@ -202,6 +202,7 @@ struct mxs_monitor /**< Are there any pending changes to a server? * If yes, the next monitor loop starts early. */ bool active; /**< True if monitor is active */ + time_t journal_max_age; /**< Maximum age of journal file */ struct mxs_monitor *next; /**< Next monitor in the linked list */ }; @@ -246,6 +247,7 @@ extern const char CN_BACKEND_READ_TIMEOUT[]; extern const char CN_BACKEND_WRITE_TIMEOUT[]; extern const char CN_BACKEND_CONNECT_TIMEOUT[]; extern const char CN_MONITOR_INTERVAL[]; +extern const char CN_JOURNAL_MAX_AGE[]; extern const char CN_SCRIPT[]; extern const char CN_EVENTS[]; diff --git a/server/core/config.cc b/server/core/config.cc index 74e8f3723..c2927bf48 100644 --- a/server/core/config.cc +++ b/server/core/config.cc @@ -237,6 +237,7 @@ const char *config_monitor_params[] = CN_SCRIPT, CN_EVENTS, CN_MONITOR_INTERVAL, + CN_JOURNAL_MAX_AGE, CN_BACKEND_CONNECT_TIMEOUT, CN_BACKEND_READ_TIMEOUT, CN_BACKEND_WRITE_TIMEOUT, @@ -3152,14 +3153,39 @@ int create_new_monitor(CONFIG_CONTEXT *context, CONFIG_CONTEXT *obj, HASHTABLE* { MXS_NOTICE("Invalid '%s' parameter for monitor '%s', " "using default value of %d milliseconds.", - CN_MONITOR_INTERVAL, obj->object, MONITOR_DEFAULT_INTERVAL); + CN_MONITOR_INTERVAL, obj->object, DEFAULT_MONITOR_INTERVAL); } } else { MXS_NOTICE("Monitor '%s' is missing the '%s' parameter, " "using default value of %d milliseconds.", - CN_MONITOR_INTERVAL, obj->object, MONITOR_DEFAULT_INTERVAL); + CN_MONITOR_INTERVAL, obj->object, DEFAULT_MONITOR_INTERVAL); + } + + char *journal_age = config_get_value(obj->parameters, CN_JOURNAL_MAX_AGE); + if (journal_age) + { + char *endptr; + long interval = strtol(journal_age, &endptr, 0); + /* The interval must be >0 because it is used as a divisor. + Perhaps a greater minimum value should be added? */ + if (*endptr == '\0' && interval > 0) + { + monitorSetJournalMaxAge(monitor, (time_t)interval); + } + else + { + error_count++; + MXS_NOTICE("Invalid '%s' parameter for monitor '%s'", + CN_JOURNAL_MAX_AGE, obj->object); + } + } + else + { + MXS_NOTICE("Monitor '%s' is missing the '%s' parameter, " + "using default value of %d milliseconds.", + CN_JOURNAL_MAX_AGE, obj->object, DEFAULT_JOURNAL_MAX_AGE); } char *connect_timeout = config_get_value(obj->parameters, CN_BACKEND_CONNECT_TIMEOUT); diff --git a/server/core/maxscale/monitor.h b/server/core/maxscale/monitor.h index 681ad602b..5a4bd7c92 100644 --- a/server/core/maxscale/monitor.h +++ b/server/core/maxscale/monitor.h @@ -27,7 +27,10 @@ MXS_BEGIN_DECLS #define DEFAULT_WRITE_TIMEOUT 2 #define DEFAULT_CONNECTION_ATTEMPTS 1 -#define MONITOR_DEFAULT_INTERVAL 2000 // in milliseconds +#define DEFAULT_MONITOR_INTERVAL 2000 // in milliseconds + +/** Default maximum journal age in seconds */ +#define DEFAULT_JOURNAL_MAX_AGE 28800 /** * Monitor network timeout types @@ -66,6 +69,7 @@ bool monitorRemoveParameter(MXS_MONITOR *monitor, const char *key); void monitorSetInterval (MXS_MONITOR *, unsigned long); bool monitorSetNetworkTimeout(MXS_MONITOR *, int, int); +void monitorSetJournalMaxAge(MXS_MONITOR *mon, time_t value); /** * @brief Serialize a monitor to a file diff --git a/server/core/monitor.cc b/server/core/monitor.cc index 30c769a1e..5cd2d8b13 100644 --- a/server/core/monitor.cc +++ b/server/core/monitor.cc @@ -67,6 +67,7 @@ const char CN_BACKEND_READ_TIMEOUT[] = "backend_read_timeout"; const char CN_BACKEND_WRITE_TIMEOUT[] = "backend_write_timeout"; const char CN_BACKEND_CONNECT_TIMEOUT[] = "backend_connect_timeout"; const char CN_MONITOR_INTERVAL[] = "monitor_interval"; +const char CN_JOURNAL_MAX_AGE[] = "journal_max_age"; const char CN_SCRIPT[] = "script"; const char CN_EVENTS[] = "events"; @@ -125,7 +126,8 @@ MXS_MONITOR* monitor_alloc(const char *name, const char *module) mon->write_timeout = DEFAULT_WRITE_TIMEOUT; mon->connect_timeout = DEFAULT_CONNECT_TIMEOUT; mon->connect_attempts = DEFAULT_CONNECTION_ATTEMPTS; - mon->interval = MONITOR_DEFAULT_INTERVAL; + mon->interval = DEFAULT_MONITOR_INTERVAL; + mon->journal_max_age = DEFAULT_JOURNAL_MAX_AGE; mon->parameters = NULL; mon->server_pending_changes = false; spinlock_init(&mon->lock); @@ -627,6 +629,17 @@ monitorSetInterval(MXS_MONITOR *mon, unsigned long interval) mon->interval = interval; } +/** + * Set the maximum age of the monitor journal + * + * @param mon The monitor instance + * @param interval The journal age in seconds + */ +void monitorSetJournalMaxAge(MXS_MONITOR *mon, time_t value) +{ + mon->journal_max_age = value; +} + /** * Set Monitor timeouts for connect/read/write * @@ -1585,6 +1598,7 @@ json_t* monitor_parameters_to_json(const MXS_MONITOR* monitor) json_object_set_new(rval, CN_BACKEND_READ_TIMEOUT, json_integer(monitor->read_timeout)); json_object_set_new(rval, CN_BACKEND_WRITE_TIMEOUT, json_integer(monitor->write_timeout)); json_object_set_new(rval, CN_BACKEND_CONNECT_ATTEMPTS, json_integer(monitor->connect_attempts)); + json_object_set_new(rval, CN_JOURNAL_MAX_AGE, json_integer(monitor->journal_max_age)); /** Add custom module parameters */ const MXS_MODULE* mod = get_module(monitor->module_name, MODULE_MONITOR); diff --git a/server/modules/monitor/mysqlmon.h b/server/modules/monitor/mysqlmon.h index 1139f0f27..34fabc1c6 100644 --- a/server/modules/monitor/mysqlmon.h +++ b/server/modules/monitor/mysqlmon.h @@ -79,7 +79,6 @@ typedef struct bool allow_cluster_recovery; /**< Allow failed servers to rejoin the cluster */ bool warn_failover; /**< Log a warning when failover happens */ bool load_journal; /**< Whether journal file should be loaded */ - time_t journal_max_age; /**< Maximum age of journal file */ bool allow_external_slaves; /**< Whether to allow usage of external slave servers */ MXS_MONITOR* monitor; } MYSQL_MONITOR; diff --git a/server/modules/monitor/mysqlmon/mysql_mon.c b/server/modules/monitor/mysqlmon/mysql_mon.c index 5e2a77056..0b2ee9ee6 100644 --- a/server/modules/monitor/mysqlmon/mysql_mon.c +++ b/server/modules/monitor/mysqlmon/mysql_mon.c @@ -24,8 +24,6 @@ #include #include -#define DEFAULT_JOURNAL_MAX_AGE "28800" - /** Column positions for SHOW SLAVE STATUS */ #define MYSQL55_STATUS_BINLOG_POS 5 #define MYSQL55_STATUS_BINLOG_NAME 6 @@ -105,7 +103,6 @@ MXS_MODULE* MXS_CREATE_MODULE() {"failcount", MXS_MODULE_PARAM_COUNT, "5"}, {"allow_cluster_recovery", MXS_MODULE_PARAM_BOOL, "true"}, {"allow_external_slaves", MXS_MODULE_PARAM_BOOL, "true"}, - {"journal_max_age", MXS_MODULE_PARAM_COUNT, DEFAULT_JOURNAL_MAX_AGE}, { "script", MXS_MODULE_PARAM_PATH, @@ -264,10 +261,9 @@ startMonitor(MXS_MONITOR *monitor, const MXS_CONFIG_PARAMETER* params) handle->mysql51_replication = config_get_bool(params, "mysql51_replication"); handle->script = config_copy_string(params, "script"); handle->events = config_get_enum(params, "events", mxs_monitor_event_enum_values); - handle->journal_max_age = config_get_integer(params, "journal_max_age"); handle->allow_external_slaves = config_get_bool(params, "allow_external_slaves"); - if (journal_is_stale(monitor, handle->journal_max_age)) + if (journal_is_stale(monitor, monitor->journal_max_age)) { MXS_WARNING("Removing stale journal file."); remove_server_journal(monitor); @@ -375,7 +371,6 @@ static json_t* diagnostics_json(const MXS_MONITOR *mon) json_object_set_new(rval, "failcount", json_integer(handle->failcount)); json_object_set_new(rval, "allow_cluster_recovery", json_boolean(handle->allow_cluster_recovery)); json_object_set_new(rval, "mysql51_replication", json_boolean(handle->mysql51_replication)); - json_object_set_new(rval, "journal_max_age", json_integer(handle->journal_max_age)); if (handle->script) {