MXS-1262: Move journal_max_age to MaxScale core

The parameter is now defined in the monitor. Further refactoring is needed
to make the interface of the journal system simpler.
This commit is contained in:
Markus Mäkelä 2017-08-09 14:54:52 +03:00
parent 837d57f4f4
commit b448b129d0
8 changed files with 78 additions and 37 deletions

View File

@ -115,3 +115,30 @@ lost_ndb |A MySQL Cluster node lost node membership
new_master |A new Master was detected
new_slave |A new Slave was detected
new_ndb |A new MySQL Cluster node was found
### `journal_max_age`
The maximum journal file age in seconds. The default value is 28800 seconds.
When the monitor starts, it reads any stored journal files. If the journal file
is older than the value of _journal_max_age_, it will be removed and the monitor
starts with no prior knowledge of the servers.
## Monitor Crash Safety
Starting with MaxScale 2.2.0, the monitor modules keep an on-disk journal of the
latest server states. This change makes the monitors crash-safe when options
that introduce states are used. It also allows the monitors to retain stateful
information when MaxScale is restarted.
For MySQL monitor, options that introduce states into the monitoring process are
the `detect_stale_master` and `detect_stale_slave` options, both of which are
enabled by default. Galeramon has the `disable_master_failback` parameter which
introduces a state.
The default location for the server state journal is in
`/var/lib/maxscale/<monitor name>/monitor.dat` where `<monitor name>` is the
name of the monitor section in the configuration file. If MaxScale crashes or is
shut down in an uncontrolled fashion, the journal will be read when MaxScale is
started. To skip the recovery process, manually delete the journal file before
starting MaxScale.

View File

@ -214,32 +214,6 @@ assigned the _Slave_ status which allows them to be used like normal slave
servers. When the option is disabled, the servers will only receive the _Slave
of External Server_ status and they will not be used.
### `journal_max_age`
The maximum journal file age in seconds. The default value is 28800 seconds.
When the MySQL monitor starts, it reads any stored journal files. If the journal
file is older than the value of _journal_max_age_, it will be removed and the
monitor starts with no prior knowledge of the servers.
## MySQL Monitor Crash Safety
Starting with MaxScale 2.2.0, the mysqlmon module keeps an on-disk journal of
the latest server states. This change makes the monitor crash-safe when options
that introduce states are used. It also allows the monitor to retain stateful
information when MaxScale is restarted.
Options that introduce states into the monitoring process are the
`detect_stale_master` and `detect_stale_slave` options, both of which are
enabled by default.
The default location for the server state journal is in
`/var/lib/maxscale/<monitor name>/mysqlmon.dat` where `<monitor name>` is the
name of the monitor section in the configuration file. If MaxScale crashes or is
shut down in an uncontrolled fashion, the journal will be read when MaxScale is
started. To skip the recovery process, manually delete the journal file before
starting MaxScale.
## Example 1 - Monitor script
Here is an example shell script which sends an email to an admin@my.org

View File

@ -202,6 +202,7 @@ struct mxs_monitor
/**< Are there any pending changes to a server?
* If yes, the next monitor loop starts early. */
bool active; /**< True if monitor is active */
time_t journal_max_age; /**< Maximum age of journal file */
struct mxs_monitor *next; /**< Next monitor in the linked list */
};
@ -246,6 +247,7 @@ extern const char CN_BACKEND_READ_TIMEOUT[];
extern const char CN_BACKEND_WRITE_TIMEOUT[];
extern const char CN_BACKEND_CONNECT_TIMEOUT[];
extern const char CN_MONITOR_INTERVAL[];
extern const char CN_JOURNAL_MAX_AGE[];
extern const char CN_SCRIPT[];
extern const char CN_EVENTS[];

View File

@ -237,6 +237,7 @@ const char *config_monitor_params[] =
CN_SCRIPT,
CN_EVENTS,
CN_MONITOR_INTERVAL,
CN_JOURNAL_MAX_AGE,
CN_BACKEND_CONNECT_TIMEOUT,
CN_BACKEND_READ_TIMEOUT,
CN_BACKEND_WRITE_TIMEOUT,
@ -3152,14 +3153,39 @@ int create_new_monitor(CONFIG_CONTEXT *context, CONFIG_CONTEXT *obj, HASHTABLE*
{
MXS_NOTICE("Invalid '%s' parameter for monitor '%s', "
"using default value of %d milliseconds.",
CN_MONITOR_INTERVAL, obj->object, MONITOR_DEFAULT_INTERVAL);
CN_MONITOR_INTERVAL, obj->object, DEFAULT_MONITOR_INTERVAL);
}
}
else
{
MXS_NOTICE("Monitor '%s' is missing the '%s' parameter, "
"using default value of %d milliseconds.",
CN_MONITOR_INTERVAL, obj->object, MONITOR_DEFAULT_INTERVAL);
CN_MONITOR_INTERVAL, obj->object, DEFAULT_MONITOR_INTERVAL);
}
char *journal_age = config_get_value(obj->parameters, CN_JOURNAL_MAX_AGE);
if (journal_age)
{
char *endptr;
long interval = strtol(journal_age, &endptr, 0);
/* The interval must be >0 because it is used as a divisor.
Perhaps a greater minimum value should be added? */
if (*endptr == '\0' && interval > 0)
{
monitorSetJournalMaxAge(monitor, (time_t)interval);
}
else
{
error_count++;
MXS_NOTICE("Invalid '%s' parameter for monitor '%s'",
CN_JOURNAL_MAX_AGE, obj->object);
}
}
else
{
MXS_NOTICE("Monitor '%s' is missing the '%s' parameter, "
"using default value of %d milliseconds.",
CN_JOURNAL_MAX_AGE, obj->object, DEFAULT_JOURNAL_MAX_AGE);
}
char *connect_timeout = config_get_value(obj->parameters, CN_BACKEND_CONNECT_TIMEOUT);

View File

@ -27,7 +27,10 @@ MXS_BEGIN_DECLS
#define DEFAULT_WRITE_TIMEOUT 2
#define DEFAULT_CONNECTION_ATTEMPTS 1
#define MONITOR_DEFAULT_INTERVAL 2000 // in milliseconds
#define DEFAULT_MONITOR_INTERVAL 2000 // in milliseconds
/** Default maximum journal age in seconds */
#define DEFAULT_JOURNAL_MAX_AGE 28800
/**
* Monitor network timeout types
@ -66,6 +69,7 @@ bool monitorRemoveParameter(MXS_MONITOR *monitor, const char *key);
void monitorSetInterval (MXS_MONITOR *, unsigned long);
bool monitorSetNetworkTimeout(MXS_MONITOR *, int, int);
void monitorSetJournalMaxAge(MXS_MONITOR *mon, time_t value);
/**
* @brief Serialize a monitor to a file

View File

@ -67,6 +67,7 @@ const char CN_BACKEND_READ_TIMEOUT[] = "backend_read_timeout";
const char CN_BACKEND_WRITE_TIMEOUT[] = "backend_write_timeout";
const char CN_BACKEND_CONNECT_TIMEOUT[] = "backend_connect_timeout";
const char CN_MONITOR_INTERVAL[] = "monitor_interval";
const char CN_JOURNAL_MAX_AGE[] = "journal_max_age";
const char CN_SCRIPT[] = "script";
const char CN_EVENTS[] = "events";
@ -125,7 +126,8 @@ MXS_MONITOR* monitor_alloc(const char *name, const char *module)
mon->write_timeout = DEFAULT_WRITE_TIMEOUT;
mon->connect_timeout = DEFAULT_CONNECT_TIMEOUT;
mon->connect_attempts = DEFAULT_CONNECTION_ATTEMPTS;
mon->interval = MONITOR_DEFAULT_INTERVAL;
mon->interval = DEFAULT_MONITOR_INTERVAL;
mon->journal_max_age = DEFAULT_JOURNAL_MAX_AGE;
mon->parameters = NULL;
mon->server_pending_changes = false;
spinlock_init(&mon->lock);
@ -627,6 +629,17 @@ monitorSetInterval(MXS_MONITOR *mon, unsigned long interval)
mon->interval = interval;
}
/**
* Set the maximum age of the monitor journal
*
* @param mon The monitor instance
* @param interval The journal age in seconds
*/
void monitorSetJournalMaxAge(MXS_MONITOR *mon, time_t value)
{
mon->journal_max_age = value;
}
/**
* Set Monitor timeouts for connect/read/write
*
@ -1585,6 +1598,7 @@ json_t* monitor_parameters_to_json(const MXS_MONITOR* monitor)
json_object_set_new(rval, CN_BACKEND_READ_TIMEOUT, json_integer(monitor->read_timeout));
json_object_set_new(rval, CN_BACKEND_WRITE_TIMEOUT, json_integer(monitor->write_timeout));
json_object_set_new(rval, CN_BACKEND_CONNECT_ATTEMPTS, json_integer(monitor->connect_attempts));
json_object_set_new(rval, CN_JOURNAL_MAX_AGE, json_integer(monitor->journal_max_age));
/** Add custom module parameters */
const MXS_MODULE* mod = get_module(monitor->module_name, MODULE_MONITOR);

View File

@ -79,7 +79,6 @@ typedef struct
bool allow_cluster_recovery; /**< Allow failed servers to rejoin the cluster */
bool warn_failover; /**< Log a warning when failover happens */
bool load_journal; /**< Whether journal file should be loaded */
time_t journal_max_age; /**< Maximum age of journal file */
bool allow_external_slaves; /**< Whether to allow usage of external slave servers */
MXS_MONITOR* monitor;
} MYSQL_MONITOR;

View File

@ -24,8 +24,6 @@
#include <maxscale/modutil.h>
#include <maxscale/mysql_utils.h>
#define DEFAULT_JOURNAL_MAX_AGE "28800"
/** Column positions for SHOW SLAVE STATUS */
#define MYSQL55_STATUS_BINLOG_POS 5
#define MYSQL55_STATUS_BINLOG_NAME 6
@ -105,7 +103,6 @@ MXS_MODULE* MXS_CREATE_MODULE()
{"failcount", MXS_MODULE_PARAM_COUNT, "5"},
{"allow_cluster_recovery", MXS_MODULE_PARAM_BOOL, "true"},
{"allow_external_slaves", MXS_MODULE_PARAM_BOOL, "true"},
{"journal_max_age", MXS_MODULE_PARAM_COUNT, DEFAULT_JOURNAL_MAX_AGE},
{
"script",
MXS_MODULE_PARAM_PATH,
@ -264,10 +261,9 @@ startMonitor(MXS_MONITOR *monitor, const MXS_CONFIG_PARAMETER* params)
handle->mysql51_replication = config_get_bool(params, "mysql51_replication");
handle->script = config_copy_string(params, "script");
handle->events = config_get_enum(params, "events", mxs_monitor_event_enum_values);
handle->journal_max_age = config_get_integer(params, "journal_max_age");
handle->allow_external_slaves = config_get_bool(params, "allow_external_slaves");
if (journal_is_stale(monitor, handle->journal_max_age))
if (journal_is_stale(monitor, monitor->journal_max_age))
{
MXS_WARNING("Removing stale journal file.");
remove_server_journal(monitor);
@ -375,7 +371,6 @@ static json_t* diagnostics_json(const MXS_MONITOR *mon)
json_object_set_new(rval, "failcount", json_integer(handle->failcount));
json_object_set_new(rval, "allow_cluster_recovery", json_boolean(handle->allow_cluster_recovery));
json_object_set_new(rval, "mysql51_replication", json_boolean(handle->mysql51_replication));
json_object_set_new(rval, "journal_max_age", json_integer(handle->journal_max_age));
if (handle->script)
{