Add stale journal file detection
Added a configurable maximum age for the mysqlmon journal files. If the file is older than the configured value, it will be ignored and removed.
This commit is contained in:
@ -200,6 +200,14 @@ external agent that automatically reintegrates failed servers into the
|
|||||||
cluster. One of these agents is the _replication-manager_ which automatically
|
cluster. One of these agents is the _replication-manager_ which automatically
|
||||||
configures the failed servers as new slaves of the current master.
|
configures the failed servers as new slaves of the current master.
|
||||||
|
|
||||||
|
### `journal_max_age`
|
||||||
|
|
||||||
|
The maximum journal file age in seconds. The default value is 28800 seconds.
|
||||||
|
|
||||||
|
When the MySQL monitor starts, it reads any stored journal files. If the journal
|
||||||
|
file is older than the value of _journal_max_age_, it will be removed and the
|
||||||
|
monitor starts with no prior knowledge of the servers.
|
||||||
|
|
||||||
## MySQL Monitor Crash Safety
|
## MySQL Monitor Crash Safety
|
||||||
|
|
||||||
Starting with MaxScale 2.2.0, the mysqlmon module keeps an on-disk journal of
|
Starting with MaxScale 2.2.0, the mysqlmon module keeps an on-disk journal of
|
||||||
|
@ -79,29 +79,39 @@ typedef struct
|
|||||||
down before failover is initiated */
|
down before failover is initiated */
|
||||||
bool allow_cluster_recovery; /**< Allow failed servers to rejoin the cluster */
|
bool allow_cluster_recovery; /**< Allow failed servers to rejoin the cluster */
|
||||||
bool warn_failover; /**< Log a warning when failover happens */
|
bool warn_failover; /**< Log a warning when failover happens */
|
||||||
bool load_backup; /**< Whether backup file should be loaded */
|
bool load_journal; /**< Whether journal file should be loaded */
|
||||||
|
time_t journal_max_age; /**< Maximum age of journal file */
|
||||||
} MYSQL_MONITOR;
|
} MYSQL_MONITOR;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @brief Store a backup of server states
|
* @brief Store a journal of server states
|
||||||
*
|
*
|
||||||
* @param monitor Monitor to backup
|
* @param monitor Monitor to journal
|
||||||
*/
|
*/
|
||||||
void store_server_backup(MXS_MONITOR *monitor);
|
void store_server_journal(MXS_MONITOR *monitor);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @brief Load a backup of server states
|
* @brief Load a journal of server states
|
||||||
*
|
*
|
||||||
* @param monitor Monitor where backup is loaded
|
* @param monitor Monitor where journal is loaded
|
||||||
*/
|
*/
|
||||||
void load_server_backup(MXS_MONITOR *monitor);
|
void load_server_journal(MXS_MONITOR *monitor);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @brief Remove stored backup file
|
* @brief Remove stored journal file
|
||||||
*
|
*
|
||||||
* @param monitor Monitor whose backup is removed
|
* @param monitor Monitor whose journal is removed
|
||||||
*/
|
*/
|
||||||
void remove_server_backup(MXS_MONITOR *monitor);
|
void remove_server_journal(MXS_MONITOR *monitor);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Check whether the journal is too old
|
||||||
|
*
|
||||||
|
* @param monitor Monitor to check
|
||||||
|
* @return True if journal is stale or an error occurred while reading the file.
|
||||||
|
* False if the file is still valid.
|
||||||
|
*/
|
||||||
|
bool journal_is_stale(MXS_MONITOR *monitor, time_t max_age);
|
||||||
|
|
||||||
MXS_END_DECLS
|
MXS_END_DECLS
|
||||||
|
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
add_library(mysqlmon SHARED mysql_mon.c mysql_mon_backup.c)
|
add_library(mysqlmon SHARED mysql_mon.c mysql_mon_journal.c)
|
||||||
target_link_libraries(mysqlmon maxscale-common)
|
target_link_libraries(mysqlmon maxscale-common)
|
||||||
add_dependencies(mysqlmon pcre2)
|
add_dependencies(mysqlmon pcre2)
|
||||||
set_target_properties(mysqlmon PROPERTIES VERSION "1.4.0")
|
set_target_properties(mysqlmon PROPERTIES VERSION "1.4.0")
|
||||||
|
@ -53,6 +53,8 @@
|
|||||||
#include <maxscale/alloc.h>
|
#include <maxscale/alloc.h>
|
||||||
#include <maxscale/debug.h>
|
#include <maxscale/debug.h>
|
||||||
|
|
||||||
|
#define DEFAULT_JOURNAL_MAX_AGE "28800"
|
||||||
|
|
||||||
/** Column positions for SHOW SLAVE STATUS */
|
/** Column positions for SHOW SLAVE STATUS */
|
||||||
#define MYSQL55_STATUS_BINLOG_POS 5
|
#define MYSQL55_STATUS_BINLOG_POS 5
|
||||||
#define MYSQL55_STATUS_BINLOG_NAME 6
|
#define MYSQL55_STATUS_BINLOG_NAME 6
|
||||||
@ -128,6 +130,7 @@ MXS_MODULE* MXS_CREATE_MODULE()
|
|||||||
{"detect_standalone_master", MXS_MODULE_PARAM_BOOL, "false"},
|
{"detect_standalone_master", MXS_MODULE_PARAM_BOOL, "false"},
|
||||||
{"failcount", MXS_MODULE_PARAM_COUNT, "5"},
|
{"failcount", MXS_MODULE_PARAM_COUNT, "5"},
|
||||||
{"allow_cluster_recovery", MXS_MODULE_PARAM_BOOL, "true"},
|
{"allow_cluster_recovery", MXS_MODULE_PARAM_BOOL, "true"},
|
||||||
|
{"journal_max_age", MXS_MODULE_PARAM_COUNT, DEFAULT_JOURNAL_MAX_AGE},
|
||||||
{
|
{
|
||||||
"script",
|
"script",
|
||||||
MXS_MODULE_PARAM_PATH,
|
MXS_MODULE_PARAM_PATH,
|
||||||
@ -269,7 +272,7 @@ startMonitor(MXS_MONITOR *monitor, const MXS_CONFIG_PARAMETER* params)
|
|||||||
handle->shutdown = 0;
|
handle->shutdown = 0;
|
||||||
handle->id = config_get_global_options()->id;
|
handle->id = config_get_global_options()->id;
|
||||||
handle->warn_failover = true;
|
handle->warn_failover = true;
|
||||||
handle->load_backup = true;
|
handle->load_journal = true;
|
||||||
spinlock_init(&handle->lock);
|
spinlock_init(&handle->lock);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -286,6 +289,13 @@ startMonitor(MXS_MONITOR *monitor, const MXS_CONFIG_PARAMETER* params)
|
|||||||
handle->mysql51_replication = config_get_bool(params, "mysql51_replication");
|
handle->mysql51_replication = config_get_bool(params, "mysql51_replication");
|
||||||
handle->script = config_copy_string(params, "script");
|
handle->script = config_copy_string(params, "script");
|
||||||
handle->events = config_get_enum(params, "events", mxs_monitor_event_enum_values);
|
handle->events = config_get_enum(params, "events", mxs_monitor_event_enum_values);
|
||||||
|
handle->journal_max_age = config_get_integer(params, "journal_max_age");
|
||||||
|
|
||||||
|
if (journal_is_stale(monitor, handle->journal_max_age))
|
||||||
|
{
|
||||||
|
MXS_WARNING("Removing stale journal file.");
|
||||||
|
remove_server_journal(monitor);
|
||||||
|
}
|
||||||
|
|
||||||
bool error = false;
|
bool error = false;
|
||||||
|
|
||||||
@ -1099,10 +1109,10 @@ monitorMain(void *arg)
|
|||||||
lock_monitor_servers(mon);
|
lock_monitor_servers(mon);
|
||||||
servers_status_pending_to_current(mon);
|
servers_status_pending_to_current(mon);
|
||||||
|
|
||||||
if (handle->load_backup)
|
if (handle->load_journal)
|
||||||
{
|
{
|
||||||
handle->load_backup = false;
|
handle->load_journal = false;
|
||||||
load_server_backup(mon);
|
load_server_journal(mon);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* start from the first server in the list */
|
/* start from the first server in the list */
|
||||||
@ -1382,7 +1392,7 @@ monitorMain(void *arg)
|
|||||||
|
|
||||||
mon_hangup_failed_servers(mon);
|
mon_hangup_failed_servers(mon);
|
||||||
servers_status_current_to_pending(mon);
|
servers_status_current_to_pending(mon);
|
||||||
store_server_backup(mon);
|
store_server_journal(mon);
|
||||||
release_monitor_servers(mon);
|
release_monitor_servers(mon);
|
||||||
} /*< while (1) */
|
} /*< while (1) */
|
||||||
}
|
}
|
||||||
|
@ -18,14 +18,15 @@
|
|||||||
#include <maxscale/utils.h>
|
#include <maxscale/utils.h>
|
||||||
|
|
||||||
#include <zlib.h>
|
#include <zlib.h>
|
||||||
|
#include <sys/stat.h>
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Crash-safe storage of server states
|
* Crash-safe storage of server states
|
||||||
*
|
*
|
||||||
* This file contains functions to store and load backups of the server states.
|
* This file contains functions to store and load journals of the server states.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
/** Schema version, backups must have a matching version */
|
/** Schema version, journals must have a matching version */
|
||||||
#define MMB_SCHEMA_VERSION 1
|
#define MMB_SCHEMA_VERSION 1
|
||||||
|
|
||||||
/** Constants for byte lengths of the values */
|
/** Constants for byte lengths of the values */
|
||||||
@ -163,7 +164,7 @@ static int get_data_file_path(MXS_MONITOR *monitor, char *path)
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @brief Open stored backup file
|
* @brief Open stored journal file
|
||||||
*
|
*
|
||||||
* @param monitor Monitor to reload
|
* @param monitor Monitor to reload
|
||||||
* @param path Output where path is stored
|
* @param path Output where path is stored
|
||||||
@ -259,7 +260,7 @@ static bool check_crc32(const uint8_t *data, uint32_t size, const uint8_t *crc_p
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Process the stored backup data
|
* Process the stored journal data
|
||||||
*/
|
*/
|
||||||
static bool process_data_file(MXS_MONITOR *monitor, const char *data, const char *crc_ptr)
|
static bool process_data_file(MXS_MONITOR *monitor, const char *data, const char *crc_ptr)
|
||||||
{
|
{
|
||||||
@ -300,7 +301,7 @@ static bool process_data_file(MXS_MONITOR *monitor, const char *data, const char
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
void store_server_backup(MXS_MONITOR *monitor)
|
void store_server_journal(MXS_MONITOR *monitor)
|
||||||
{
|
{
|
||||||
/** Calculate how much memory we need to allocate */
|
/** Calculate how much memory we need to allocate */
|
||||||
uint32_t size = MMB_LEN_SCHEMA_VERSION + MMB_LEN_CRC32;
|
uint32_t size = MMB_LEN_SCHEMA_VERSION + MMB_LEN_CRC32;
|
||||||
@ -353,7 +354,7 @@ void store_server_backup(MXS_MONITOR *monitor)
|
|||||||
MXS_FREE(data);
|
MXS_FREE(data);
|
||||||
}
|
}
|
||||||
|
|
||||||
void load_server_backup(MXS_MONITOR *monitor)
|
void load_server_journal(MXS_MONITOR *monitor)
|
||||||
{
|
{
|
||||||
char path[PATH_MAX];
|
char path[PATH_MAX];
|
||||||
FILE *file = open_data_file(monitor, path);
|
FILE *file = open_data_file(monitor, path);
|
||||||
@ -429,7 +430,7 @@ void load_server_backup(MXS_MONITOR *monitor)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void remove_server_backup(MXS_MONITOR *monitor)
|
void remove_server_journal(MXS_MONITOR *monitor)
|
||||||
{
|
{
|
||||||
char path[PATH_MAX];
|
char path[PATH_MAX];
|
||||||
|
|
||||||
@ -442,3 +443,39 @@ void remove_server_backup(MXS_MONITOR *monitor)
|
|||||||
MXS_ERROR("Path to monitor journal directory is too long.");
|
MXS_ERROR("Path to monitor journal directory is too long.");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool journal_is_stale(MXS_MONITOR *monitor, time_t max_age)
|
||||||
|
{
|
||||||
|
bool is_stale = true;
|
||||||
|
char path[PATH_MAX];
|
||||||
|
|
||||||
|
if (get_data_file_path(monitor, path) < PATH_MAX)
|
||||||
|
{
|
||||||
|
struct stat st;
|
||||||
|
|
||||||
|
if (stat(path, &st) == 0)
|
||||||
|
{
|
||||||
|
time_t tdiff = time(NULL) - st.st_mtim.tv_sec;
|
||||||
|
|
||||||
|
if (tdiff >= max_age)
|
||||||
|
{
|
||||||
|
MXS_WARNING("Journal file was created %ld seconds ago. Maximum journal "
|
||||||
|
"age is %ld seconds.", tdiff, max_age);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
is_stale = false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else if (errno != ENOENT)
|
||||||
|
{
|
||||||
|
MXS_ERROR("Failed to inspect journal file: %d, %s", errno, mxs_strerror(errno));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
MXS_ERROR("Path to monitor journal directory is too long.");
|
||||||
|
}
|
||||||
|
|
||||||
|
return is_stale;
|
||||||
|
}
|
Reference in New Issue
Block a user