diff --git a/Documentation/Monitors/MySQL-Monitor.md b/Documentation/Monitors/MySQL-Monitor.md index c9ca7137b..9a6cb294b 100644 --- a/Documentation/Monitors/MySQL-Monitor.md +++ b/Documentation/Monitors/MySQL-Monitor.md @@ -200,6 +200,14 @@ external agent that automatically reintegrates failed servers into the cluster. One of these agents is the _replication-manager_ which automatically configures the failed servers as new slaves of the current master. +### `journal_max_age` + +The maximum journal file age in seconds. The default value is 28800 seconds. + +When the MySQL monitor starts, it reads any stored journal files. If the journal +file is older than the value of _journal_max_age_, it will be removed and the +monitor starts with no prior knowledge of the servers. + ## MySQL Monitor Crash Safety Starting with MaxScale 2.2.0, the mysqlmon module keeps an on-disk journal of diff --git a/server/modules/monitor/mysqlmon.h b/server/modules/monitor/mysqlmon.h index b91bfece1..6e013b62f 100644 --- a/server/modules/monitor/mysqlmon.h +++ b/server/modules/monitor/mysqlmon.h @@ -79,29 +79,39 @@ typedef struct down before failover is initiated */ bool allow_cluster_recovery; /**< Allow failed servers to rejoin the cluster */ bool warn_failover; /**< Log a warning when failover happens */ - bool load_backup; /**< Whether backup file should be loaded */ + bool load_journal; /**< Whether journal file should be loaded */ + time_t journal_max_age; /**< Maximum age of journal file */ } MYSQL_MONITOR; /** - * @brief Store a backup of server states + * @brief Store a journal of server states * - * @param monitor Monitor to backup + * @param monitor Monitor to journal */ -void store_server_backup(MXS_MONITOR *monitor); +void store_server_journal(MXS_MONITOR *monitor); /** - * @brief Load a backup of server states + * @brief Load a journal of server states * - * @param monitor Monitor where backup is loaded + * @param monitor Monitor where journal is loaded */ -void load_server_backup(MXS_MONITOR *monitor); +void load_server_journal(MXS_MONITOR *monitor); /** - * @brief Remove stored backup file + * @brief Remove stored journal file * - * @param monitor Monitor whose backup is removed + * @param monitor Monitor whose journal is removed */ -void remove_server_backup(MXS_MONITOR *monitor); +void remove_server_journal(MXS_MONITOR *monitor); + +/** + * @brief Check whether the journal is too old + * + * @param monitor Monitor to check + * @return True if journal is stale or an error occurred while reading the file. + * False if the file is still valid. + */ +bool journal_is_stale(MXS_MONITOR *monitor, time_t max_age); MXS_END_DECLS diff --git a/server/modules/monitor/mysqlmon/CMakeLists.txt b/server/modules/monitor/mysqlmon/CMakeLists.txt index 51ec3828f..714d7b908 100644 --- a/server/modules/monitor/mysqlmon/CMakeLists.txt +++ b/server/modules/monitor/mysqlmon/CMakeLists.txt @@ -1,4 +1,4 @@ -add_library(mysqlmon SHARED mysql_mon.c mysql_mon_backup.c) +add_library(mysqlmon SHARED mysql_mon.c mysql_mon_journal.c) target_link_libraries(mysqlmon maxscale-common) add_dependencies(mysqlmon pcre2) set_target_properties(mysqlmon PROPERTIES VERSION "1.4.0") diff --git a/server/modules/monitor/mysqlmon/mysql_mon.c b/server/modules/monitor/mysqlmon/mysql_mon.c index b0aef1395..09602a6f9 100644 --- a/server/modules/monitor/mysqlmon/mysql_mon.c +++ b/server/modules/monitor/mysqlmon/mysql_mon.c @@ -53,6 +53,8 @@ #include #include +#define DEFAULT_JOURNAL_MAX_AGE "28800" + /** Column positions for SHOW SLAVE STATUS */ #define MYSQL55_STATUS_BINLOG_POS 5 #define MYSQL55_STATUS_BINLOG_NAME 6 @@ -128,6 +130,7 @@ MXS_MODULE* MXS_CREATE_MODULE() {"detect_standalone_master", MXS_MODULE_PARAM_BOOL, "false"}, {"failcount", MXS_MODULE_PARAM_COUNT, "5"}, {"allow_cluster_recovery", MXS_MODULE_PARAM_BOOL, "true"}, + {"journal_max_age", MXS_MODULE_PARAM_COUNT, DEFAULT_JOURNAL_MAX_AGE}, { "script", MXS_MODULE_PARAM_PATH, @@ -269,7 +272,7 @@ startMonitor(MXS_MONITOR *monitor, const MXS_CONFIG_PARAMETER* params) handle->shutdown = 0; handle->id = config_get_global_options()->id; handle->warn_failover = true; - handle->load_backup = true; + handle->load_journal = true; spinlock_init(&handle->lock); } @@ -286,6 +289,13 @@ startMonitor(MXS_MONITOR *monitor, const MXS_CONFIG_PARAMETER* params) handle->mysql51_replication = config_get_bool(params, "mysql51_replication"); handle->script = config_copy_string(params, "script"); handle->events = config_get_enum(params, "events", mxs_monitor_event_enum_values); + handle->journal_max_age = config_get_integer(params, "journal_max_age"); + + if (journal_is_stale(monitor, handle->journal_max_age)) + { + MXS_WARNING("Removing stale journal file."); + remove_server_journal(monitor); + } bool error = false; @@ -1099,10 +1109,10 @@ monitorMain(void *arg) lock_monitor_servers(mon); servers_status_pending_to_current(mon); - if (handle->load_backup) + if (handle->load_journal) { - handle->load_backup = false; - load_server_backup(mon); + handle->load_journal = false; + load_server_journal(mon); } /* start from the first server in the list */ @@ -1382,7 +1392,7 @@ monitorMain(void *arg) mon_hangup_failed_servers(mon); servers_status_current_to_pending(mon); - store_server_backup(mon); + store_server_journal(mon); release_monitor_servers(mon); } /*< while (1) */ } diff --git a/server/modules/monitor/mysqlmon/mysql_mon_backup.c b/server/modules/monitor/mysqlmon/mysql_mon_journal.c similarity index 90% rename from server/modules/monitor/mysqlmon/mysql_mon_backup.c rename to server/modules/monitor/mysqlmon/mysql_mon_journal.c index 46cf64082..09047e707 100644 --- a/server/modules/monitor/mysqlmon/mysql_mon_backup.c +++ b/server/modules/monitor/mysqlmon/mysql_mon_journal.c @@ -18,14 +18,15 @@ #include #include +#include /** * Crash-safe storage of server states * - * This file contains functions to store and load backups of the server states. + * This file contains functions to store and load journals of the server states. */ -/** Schema version, backups must have a matching version */ +/** Schema version, journals must have a matching version */ #define MMB_SCHEMA_VERSION 1 /** Constants for byte lengths of the values */ @@ -163,7 +164,7 @@ static int get_data_file_path(MXS_MONITOR *monitor, char *path) } /** - * @brief Open stored backup file + * @brief Open stored journal file * * @param monitor Monitor to reload * @param path Output where path is stored @@ -259,7 +260,7 @@ static bool check_crc32(const uint8_t *data, uint32_t size, const uint8_t *crc_p } /** - * Process the stored backup data + * Process the stored journal data */ static bool process_data_file(MXS_MONITOR *monitor, const char *data, const char *crc_ptr) { @@ -300,7 +301,7 @@ static bool process_data_file(MXS_MONITOR *monitor, const char *data, const char return true; } -void store_server_backup(MXS_MONITOR *monitor) +void store_server_journal(MXS_MONITOR *monitor) { /** Calculate how much memory we need to allocate */ uint32_t size = MMB_LEN_SCHEMA_VERSION + MMB_LEN_CRC32; @@ -353,7 +354,7 @@ void store_server_backup(MXS_MONITOR *monitor) MXS_FREE(data); } -void load_server_backup(MXS_MONITOR *monitor) +void load_server_journal(MXS_MONITOR *monitor) { char path[PATH_MAX]; FILE *file = open_data_file(monitor, path); @@ -406,7 +407,7 @@ void load_server_backup(MXS_MONITOR *monitor) else { MXS_ERROR("Failed to read journal file: Expected %u bytes, " - "read %lu bytes.", size, bytes); + "read %lu bytes.", size, bytes); } } MXS_FREE(data); @@ -429,7 +430,7 @@ void load_server_backup(MXS_MONITOR *monitor) } } -void remove_server_backup(MXS_MONITOR *monitor) +void remove_server_journal(MXS_MONITOR *monitor) { char path[PATH_MAX]; @@ -442,3 +443,39 @@ void remove_server_backup(MXS_MONITOR *monitor) MXS_ERROR("Path to monitor journal directory is too long."); } } + +bool journal_is_stale(MXS_MONITOR *monitor, time_t max_age) +{ + bool is_stale = true; + char path[PATH_MAX]; + + if (get_data_file_path(monitor, path) < PATH_MAX) + { + struct stat st; + + if (stat(path, &st) == 0) + { + time_t tdiff = time(NULL) - st.st_mtim.tv_sec; + + if (tdiff >= max_age) + { + MXS_WARNING("Journal file was created %ld seconds ago. Maximum journal " + "age is %ld seconds.", tdiff, max_age); + } + else + { + is_stale = false; + } + } + else if (errno != ENOENT) + { + MXS_ERROR("Failed to inspect journal file: %d, %s", errno, mxs_strerror(errno)); + } + } + else + { + MXS_ERROR("Path to monitor journal directory is too long."); + } + + return is_stale; +}