Add stale journal file detection
Added a configurable maximum age for the mysqlmon journal files. If the file is older than the configured value, it will be ignored and removed.
This commit is contained in:
@ -200,6 +200,14 @@ external agent that automatically reintegrates failed servers into the
|
||||
cluster. One of these agents is the _replication-manager_ which automatically
|
||||
configures the failed servers as new slaves of the current master.
|
||||
|
||||
### `journal_max_age`
|
||||
|
||||
The maximum journal file age in seconds. The default value is 28800 seconds.
|
||||
|
||||
When the MySQL monitor starts, it reads any stored journal files. If the journal
|
||||
file is older than the value of _journal_max_age_, it will be removed and the
|
||||
monitor starts with no prior knowledge of the servers.
|
||||
|
||||
## MySQL Monitor Crash Safety
|
||||
|
||||
Starting with MaxScale 2.2.0, the mysqlmon module keeps an on-disk journal of
|
||||
|
@ -79,29 +79,39 @@ typedef struct
|
||||
down before failover is initiated */
|
||||
bool allow_cluster_recovery; /**< Allow failed servers to rejoin the cluster */
|
||||
bool warn_failover; /**< Log a warning when failover happens */
|
||||
bool load_backup; /**< Whether backup file should be loaded */
|
||||
bool load_journal; /**< Whether journal file should be loaded */
|
||||
time_t journal_max_age; /**< Maximum age of journal file */
|
||||
} MYSQL_MONITOR;
|
||||
|
||||
/**
|
||||
* @brief Store a backup of server states
|
||||
* @brief Store a journal of server states
|
||||
*
|
||||
* @param monitor Monitor to backup
|
||||
* @param monitor Monitor to journal
|
||||
*/
|
||||
void store_server_backup(MXS_MONITOR *monitor);
|
||||
void store_server_journal(MXS_MONITOR *monitor);
|
||||
|
||||
/**
|
||||
* @brief Load a backup of server states
|
||||
* @brief Load a journal of server states
|
||||
*
|
||||
* @param monitor Monitor where backup is loaded
|
||||
* @param monitor Monitor where journal is loaded
|
||||
*/
|
||||
void load_server_backup(MXS_MONITOR *monitor);
|
||||
void load_server_journal(MXS_MONITOR *monitor);
|
||||
|
||||
/**
|
||||
* @brief Remove stored backup file
|
||||
* @brief Remove stored journal file
|
||||
*
|
||||
* @param monitor Monitor whose backup is removed
|
||||
* @param monitor Monitor whose journal is removed
|
||||
*/
|
||||
void remove_server_backup(MXS_MONITOR *monitor);
|
||||
void remove_server_journal(MXS_MONITOR *monitor);
|
||||
|
||||
/**
|
||||
* @brief Check whether the journal is too old
|
||||
*
|
||||
* @param monitor Monitor to check
|
||||
* @return True if journal is stale or an error occurred while reading the file.
|
||||
* False if the file is still valid.
|
||||
*/
|
||||
bool journal_is_stale(MXS_MONITOR *monitor, time_t max_age);
|
||||
|
||||
MXS_END_DECLS
|
||||
|
||||
|
@ -1,4 +1,4 @@
|
||||
add_library(mysqlmon SHARED mysql_mon.c mysql_mon_backup.c)
|
||||
add_library(mysqlmon SHARED mysql_mon.c mysql_mon_journal.c)
|
||||
target_link_libraries(mysqlmon maxscale-common)
|
||||
add_dependencies(mysqlmon pcre2)
|
||||
set_target_properties(mysqlmon PROPERTIES VERSION "1.4.0")
|
||||
|
@ -53,6 +53,8 @@
|
||||
#include <maxscale/alloc.h>
|
||||
#include <maxscale/debug.h>
|
||||
|
||||
#define DEFAULT_JOURNAL_MAX_AGE "28800"
|
||||
|
||||
/** Column positions for SHOW SLAVE STATUS */
|
||||
#define MYSQL55_STATUS_BINLOG_POS 5
|
||||
#define MYSQL55_STATUS_BINLOG_NAME 6
|
||||
@ -128,6 +130,7 @@ MXS_MODULE* MXS_CREATE_MODULE()
|
||||
{"detect_standalone_master", MXS_MODULE_PARAM_BOOL, "false"},
|
||||
{"failcount", MXS_MODULE_PARAM_COUNT, "5"},
|
||||
{"allow_cluster_recovery", MXS_MODULE_PARAM_BOOL, "true"},
|
||||
{"journal_max_age", MXS_MODULE_PARAM_COUNT, DEFAULT_JOURNAL_MAX_AGE},
|
||||
{
|
||||
"script",
|
||||
MXS_MODULE_PARAM_PATH,
|
||||
@ -269,7 +272,7 @@ startMonitor(MXS_MONITOR *monitor, const MXS_CONFIG_PARAMETER* params)
|
||||
handle->shutdown = 0;
|
||||
handle->id = config_get_global_options()->id;
|
||||
handle->warn_failover = true;
|
||||
handle->load_backup = true;
|
||||
handle->load_journal = true;
|
||||
spinlock_init(&handle->lock);
|
||||
}
|
||||
|
||||
@ -286,6 +289,13 @@ startMonitor(MXS_MONITOR *monitor, const MXS_CONFIG_PARAMETER* params)
|
||||
handle->mysql51_replication = config_get_bool(params, "mysql51_replication");
|
||||
handle->script = config_copy_string(params, "script");
|
||||
handle->events = config_get_enum(params, "events", mxs_monitor_event_enum_values);
|
||||
handle->journal_max_age = config_get_integer(params, "journal_max_age");
|
||||
|
||||
if (journal_is_stale(monitor, handle->journal_max_age))
|
||||
{
|
||||
MXS_WARNING("Removing stale journal file.");
|
||||
remove_server_journal(monitor);
|
||||
}
|
||||
|
||||
bool error = false;
|
||||
|
||||
@ -1099,10 +1109,10 @@ monitorMain(void *arg)
|
||||
lock_monitor_servers(mon);
|
||||
servers_status_pending_to_current(mon);
|
||||
|
||||
if (handle->load_backup)
|
||||
if (handle->load_journal)
|
||||
{
|
||||
handle->load_backup = false;
|
||||
load_server_backup(mon);
|
||||
handle->load_journal = false;
|
||||
load_server_journal(mon);
|
||||
}
|
||||
|
||||
/* start from the first server in the list */
|
||||
@ -1382,7 +1392,7 @@ monitorMain(void *arg)
|
||||
|
||||
mon_hangup_failed_servers(mon);
|
||||
servers_status_current_to_pending(mon);
|
||||
store_server_backup(mon);
|
||||
store_server_journal(mon);
|
||||
release_monitor_servers(mon);
|
||||
} /*< while (1) */
|
||||
}
|
||||
|
@ -18,14 +18,15 @@
|
||||
#include <maxscale/utils.h>
|
||||
|
||||
#include <zlib.h>
|
||||
#include <sys/stat.h>
|
||||
|
||||
/**
|
||||
* Crash-safe storage of server states
|
||||
*
|
||||
* This file contains functions to store and load backups of the server states.
|
||||
* This file contains functions to store and load journals of the server states.
|
||||
*/
|
||||
|
||||
/** Schema version, backups must have a matching version */
|
||||
/** Schema version, journals must have a matching version */
|
||||
#define MMB_SCHEMA_VERSION 1
|
||||
|
||||
/** Constants for byte lengths of the values */
|
||||
@ -163,7 +164,7 @@ static int get_data_file_path(MXS_MONITOR *monitor, char *path)
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Open stored backup file
|
||||
* @brief Open stored journal file
|
||||
*
|
||||
* @param monitor Monitor to reload
|
||||
* @param path Output where path is stored
|
||||
@ -259,7 +260,7 @@ static bool check_crc32(const uint8_t *data, uint32_t size, const uint8_t *crc_p
|
||||
}
|
||||
|
||||
/**
|
||||
* Process the stored backup data
|
||||
* Process the stored journal data
|
||||
*/
|
||||
static bool process_data_file(MXS_MONITOR *monitor, const char *data, const char *crc_ptr)
|
||||
{
|
||||
@ -300,7 +301,7 @@ static bool process_data_file(MXS_MONITOR *monitor, const char *data, const char
|
||||
return true;
|
||||
}
|
||||
|
||||
void store_server_backup(MXS_MONITOR *monitor)
|
||||
void store_server_journal(MXS_MONITOR *monitor)
|
||||
{
|
||||
/** Calculate how much memory we need to allocate */
|
||||
uint32_t size = MMB_LEN_SCHEMA_VERSION + MMB_LEN_CRC32;
|
||||
@ -353,7 +354,7 @@ void store_server_backup(MXS_MONITOR *monitor)
|
||||
MXS_FREE(data);
|
||||
}
|
||||
|
||||
void load_server_backup(MXS_MONITOR *monitor)
|
||||
void load_server_journal(MXS_MONITOR *monitor)
|
||||
{
|
||||
char path[PATH_MAX];
|
||||
FILE *file = open_data_file(monitor, path);
|
||||
@ -429,7 +430,7 @@ void load_server_backup(MXS_MONITOR *monitor)
|
||||
}
|
||||
}
|
||||
|
||||
void remove_server_backup(MXS_MONITOR *monitor)
|
||||
void remove_server_journal(MXS_MONITOR *monitor)
|
||||
{
|
||||
char path[PATH_MAX];
|
||||
|
||||
@ -442,3 +443,39 @@ void remove_server_backup(MXS_MONITOR *monitor)
|
||||
MXS_ERROR("Path to monitor journal directory is too long.");
|
||||
}
|
||||
}
|
||||
|
||||
bool journal_is_stale(MXS_MONITOR *monitor, time_t max_age)
|
||||
{
|
||||
bool is_stale = true;
|
||||
char path[PATH_MAX];
|
||||
|
||||
if (get_data_file_path(monitor, path) < PATH_MAX)
|
||||
{
|
||||
struct stat st;
|
||||
|
||||
if (stat(path, &st) == 0)
|
||||
{
|
||||
time_t tdiff = time(NULL) - st.st_mtim.tv_sec;
|
||||
|
||||
if (tdiff >= max_age)
|
||||
{
|
||||
MXS_WARNING("Journal file was created %ld seconds ago. Maximum journal "
|
||||
"age is %ld seconds.", tdiff, max_age);
|
||||
}
|
||||
else
|
||||
{
|
||||
is_stale = false;
|
||||
}
|
||||
}
|
||||
else if (errno != ENOENT)
|
||||
{
|
||||
MXS_ERROR("Failed to inspect journal file: %d, %s", errno, mxs_strerror(errno));
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
MXS_ERROR("Path to monitor journal directory is too long.");
|
||||
}
|
||||
|
||||
return is_stale;
|
||||
}
|
Reference in New Issue
Block a user