Add stale journal file detection

Added a configurable maximum age for the mysqlmon journal files. If the
file is older than the configured value, it will be ignored and removed.
This commit is contained in:
Markus Mäkelä
2017-03-17 13:37:37 +02:00
parent 40b5e627a2
commit bbcfe98651
5 changed files with 89 additions and 24 deletions

View File

@ -200,6 +200,14 @@ external agent that automatically reintegrates failed servers into the
cluster. One of these agents is the _replication-manager_ which automatically cluster. One of these agents is the _replication-manager_ which automatically
configures the failed servers as new slaves of the current master. configures the failed servers as new slaves of the current master.
### `journal_max_age`
The maximum journal file age in seconds. The default value is 28800 seconds.
When the MySQL monitor starts, it reads any stored journal files. If the journal
file is older than the value of _journal_max_age_, it will be removed and the
monitor starts with no prior knowledge of the servers.
## MySQL Monitor Crash Safety ## MySQL Monitor Crash Safety
Starting with MaxScale 2.2.0, the mysqlmon module keeps an on-disk journal of Starting with MaxScale 2.2.0, the mysqlmon module keeps an on-disk journal of

View File

@ -79,29 +79,39 @@ typedef struct
down before failover is initiated */ down before failover is initiated */
bool allow_cluster_recovery; /**< Allow failed servers to rejoin the cluster */ bool allow_cluster_recovery; /**< Allow failed servers to rejoin the cluster */
bool warn_failover; /**< Log a warning when failover happens */ bool warn_failover; /**< Log a warning when failover happens */
bool load_backup; /**< Whether backup file should be loaded */ bool load_journal; /**< Whether journal file should be loaded */
time_t journal_max_age; /**< Maximum age of journal file */
} MYSQL_MONITOR; } MYSQL_MONITOR;
/** /**
* @brief Store a backup of server states * @brief Store a journal of server states
* *
* @param monitor Monitor to backup * @param monitor Monitor to journal
*/ */
void store_server_backup(MXS_MONITOR *monitor); void store_server_journal(MXS_MONITOR *monitor);
/** /**
* @brief Load a backup of server states * @brief Load a journal of server states
* *
* @param monitor Monitor where backup is loaded * @param monitor Monitor where journal is loaded
*/ */
void load_server_backup(MXS_MONITOR *monitor); void load_server_journal(MXS_MONITOR *monitor);
/** /**
* @brief Remove stored backup file * @brief Remove stored journal file
* *
* @param monitor Monitor whose backup is removed * @param monitor Monitor whose journal is removed
*/ */
void remove_server_backup(MXS_MONITOR *monitor); void remove_server_journal(MXS_MONITOR *monitor);
/**
* @brief Check whether the journal is too old
*
* @param monitor Monitor to check
* @return True if journal is stale or an error occurred while reading the file.
* False if the file is still valid.
*/
bool journal_is_stale(MXS_MONITOR *monitor, time_t max_age);
MXS_END_DECLS MXS_END_DECLS

View File

@ -1,4 +1,4 @@
add_library(mysqlmon SHARED mysql_mon.c mysql_mon_backup.c) add_library(mysqlmon SHARED mysql_mon.c mysql_mon_journal.c)
target_link_libraries(mysqlmon maxscale-common) target_link_libraries(mysqlmon maxscale-common)
add_dependencies(mysqlmon pcre2) add_dependencies(mysqlmon pcre2)
set_target_properties(mysqlmon PROPERTIES VERSION "1.4.0") set_target_properties(mysqlmon PROPERTIES VERSION "1.4.0")

View File

@ -53,6 +53,8 @@
#include <maxscale/alloc.h> #include <maxscale/alloc.h>
#include <maxscale/debug.h> #include <maxscale/debug.h>
#define DEFAULT_JOURNAL_MAX_AGE "28800"
/** Column positions for SHOW SLAVE STATUS */ /** Column positions for SHOW SLAVE STATUS */
#define MYSQL55_STATUS_BINLOG_POS 5 #define MYSQL55_STATUS_BINLOG_POS 5
#define MYSQL55_STATUS_BINLOG_NAME 6 #define MYSQL55_STATUS_BINLOG_NAME 6
@ -128,6 +130,7 @@ MXS_MODULE* MXS_CREATE_MODULE()
{"detect_standalone_master", MXS_MODULE_PARAM_BOOL, "false"}, {"detect_standalone_master", MXS_MODULE_PARAM_BOOL, "false"},
{"failcount", MXS_MODULE_PARAM_COUNT, "5"}, {"failcount", MXS_MODULE_PARAM_COUNT, "5"},
{"allow_cluster_recovery", MXS_MODULE_PARAM_BOOL, "true"}, {"allow_cluster_recovery", MXS_MODULE_PARAM_BOOL, "true"},
{"journal_max_age", MXS_MODULE_PARAM_COUNT, DEFAULT_JOURNAL_MAX_AGE},
{ {
"script", "script",
MXS_MODULE_PARAM_PATH, MXS_MODULE_PARAM_PATH,
@ -269,7 +272,7 @@ startMonitor(MXS_MONITOR *monitor, const MXS_CONFIG_PARAMETER* params)
handle->shutdown = 0; handle->shutdown = 0;
handle->id = config_get_global_options()->id; handle->id = config_get_global_options()->id;
handle->warn_failover = true; handle->warn_failover = true;
handle->load_backup = true; handle->load_journal = true;
spinlock_init(&handle->lock); spinlock_init(&handle->lock);
} }
@ -286,6 +289,13 @@ startMonitor(MXS_MONITOR *monitor, const MXS_CONFIG_PARAMETER* params)
handle->mysql51_replication = config_get_bool(params, "mysql51_replication"); handle->mysql51_replication = config_get_bool(params, "mysql51_replication");
handle->script = config_copy_string(params, "script"); handle->script = config_copy_string(params, "script");
handle->events = config_get_enum(params, "events", mxs_monitor_event_enum_values); handle->events = config_get_enum(params, "events", mxs_monitor_event_enum_values);
handle->journal_max_age = config_get_integer(params, "journal_max_age");
if (journal_is_stale(monitor, handle->journal_max_age))
{
MXS_WARNING("Removing stale journal file.");
remove_server_journal(monitor);
}
bool error = false; bool error = false;
@ -1099,10 +1109,10 @@ monitorMain(void *arg)
lock_monitor_servers(mon); lock_monitor_servers(mon);
servers_status_pending_to_current(mon); servers_status_pending_to_current(mon);
if (handle->load_backup) if (handle->load_journal)
{ {
handle->load_backup = false; handle->load_journal = false;
load_server_backup(mon); load_server_journal(mon);
} }
/* start from the first server in the list */ /* start from the first server in the list */
@ -1382,7 +1392,7 @@ monitorMain(void *arg)
mon_hangup_failed_servers(mon); mon_hangup_failed_servers(mon);
servers_status_current_to_pending(mon); servers_status_current_to_pending(mon);
store_server_backup(mon); store_server_journal(mon);
release_monitor_servers(mon); release_monitor_servers(mon);
} /*< while (1) */ } /*< while (1) */
} }

View File

@ -18,14 +18,15 @@
#include <maxscale/utils.h> #include <maxscale/utils.h>
#include <zlib.h> #include <zlib.h>
#include <sys/stat.h>
/** /**
* Crash-safe storage of server states * Crash-safe storage of server states
* *
* This file contains functions to store and load backups of the server states. * This file contains functions to store and load journals of the server states.
*/ */
/** Schema version, backups must have a matching version */ /** Schema version, journals must have a matching version */
#define MMB_SCHEMA_VERSION 1 #define MMB_SCHEMA_VERSION 1
/** Constants for byte lengths of the values */ /** Constants for byte lengths of the values */
@ -163,7 +164,7 @@ static int get_data_file_path(MXS_MONITOR *monitor, char *path)
} }
/** /**
* @brief Open stored backup file * @brief Open stored journal file
* *
* @param monitor Monitor to reload * @param monitor Monitor to reload
* @param path Output where path is stored * @param path Output where path is stored
@ -259,7 +260,7 @@ static bool check_crc32(const uint8_t *data, uint32_t size, const uint8_t *crc_p
} }
/** /**
* Process the stored backup data * Process the stored journal data
*/ */
static bool process_data_file(MXS_MONITOR *monitor, const char *data, const char *crc_ptr) static bool process_data_file(MXS_MONITOR *monitor, const char *data, const char *crc_ptr)
{ {
@ -300,7 +301,7 @@ static bool process_data_file(MXS_MONITOR *monitor, const char *data, const char
return true; return true;
} }
void store_server_backup(MXS_MONITOR *monitor) void store_server_journal(MXS_MONITOR *monitor)
{ {
/** Calculate how much memory we need to allocate */ /** Calculate how much memory we need to allocate */
uint32_t size = MMB_LEN_SCHEMA_VERSION + MMB_LEN_CRC32; uint32_t size = MMB_LEN_SCHEMA_VERSION + MMB_LEN_CRC32;
@ -353,7 +354,7 @@ void store_server_backup(MXS_MONITOR *monitor)
MXS_FREE(data); MXS_FREE(data);
} }
void load_server_backup(MXS_MONITOR *monitor) void load_server_journal(MXS_MONITOR *monitor)
{ {
char path[PATH_MAX]; char path[PATH_MAX];
FILE *file = open_data_file(monitor, path); FILE *file = open_data_file(monitor, path);
@ -429,7 +430,7 @@ void load_server_backup(MXS_MONITOR *monitor)
} }
} }
void remove_server_backup(MXS_MONITOR *monitor) void remove_server_journal(MXS_MONITOR *monitor)
{ {
char path[PATH_MAX]; char path[PATH_MAX];
@ -442,3 +443,39 @@ void remove_server_backup(MXS_MONITOR *monitor)
MXS_ERROR("Path to monitor journal directory is too long."); MXS_ERROR("Path to monitor journal directory is too long.");
} }
} }
bool journal_is_stale(MXS_MONITOR *monitor, time_t max_age)
{
bool is_stale = true;
char path[PATH_MAX];
if (get_data_file_path(monitor, path) < PATH_MAX)
{
struct stat st;
if (stat(path, &st) == 0)
{
time_t tdiff = time(NULL) - st.st_mtim.tv_sec;
if (tdiff >= max_age)
{
MXS_WARNING("Journal file was created %ld seconds ago. Maximum journal "
"age is %ld seconds.", tdiff, max_age);
}
else
{
is_stale = false;
}
}
else if (errno != ENOENT)
{
MXS_ERROR("Failed to inspect journal file: %d, %s", errno, mxs_strerror(errno));
}
}
else
{
MXS_ERROR("Path to monitor journal directory is too long.");
}
return is_stale;
}