diff --git a/server/modules/monitor/mysqlmon.h b/server/modules/monitor/mysqlmon.h index ae000150d..b91bfece1 100644 --- a/server/modules/monitor/mysqlmon.h +++ b/server/modules/monitor/mysqlmon.h @@ -79,8 +79,30 @@ typedef struct down before failover is initiated */ bool allow_cluster_recovery; /**< Allow failed servers to rejoin the cluster */ bool warn_failover; /**< Log a warning when failover happens */ + bool load_backup; /**< Whether backup file should be loaded */ } MYSQL_MONITOR; +/** + * @brief Store a backup of server states + * + * @param monitor Monitor to backup + */ +void store_server_backup(MXS_MONITOR *monitor); + +/** + * @brief Load a backup of server states + * + * @param monitor Monitor where backup is loaded + */ +void load_server_backup(MXS_MONITOR *monitor); + +/** + * @brief Remove stored backup file + * + * @param monitor Monitor whose backup is removed + */ +void remove_server_backup(MXS_MONITOR *monitor); + MXS_END_DECLS #endif diff --git a/server/modules/monitor/mysqlmon/CMakeLists.txt b/server/modules/monitor/mysqlmon/CMakeLists.txt index e5eef09af..51ec3828f 100644 --- a/server/modules/monitor/mysqlmon/CMakeLists.txt +++ b/server/modules/monitor/mysqlmon/CMakeLists.txt @@ -1,4 +1,4 @@ -add_library(mysqlmon SHARED mysql_mon.c) +add_library(mysqlmon SHARED mysql_mon.c mysql_mon_backup.c) target_link_libraries(mysqlmon maxscale-common) add_dependencies(mysqlmon pcre2) set_target_properties(mysqlmon PROPERTIES VERSION "1.4.0") diff --git a/server/modules/monitor/mysqlmon/mysql_mon.c b/server/modules/monitor/mysqlmon/mysql_mon.c index e4e5f26b3..ef8cf1330 100644 --- a/server/modules/monitor/mysqlmon/mysql_mon.c +++ b/server/modules/monitor/mysqlmon/mysql_mon.c @@ -269,6 +269,7 @@ startMonitor(MXS_MONITOR *monitor, const MXS_CONFIG_PARAMETER* params) handle->shutdown = 0; handle->id = config_get_global_options()->id; handle->warn_failover = true; + handle->load_backup = true; spinlock_init(&handle->lock); } @@ -326,6 +327,9 @@ stopMonitor(MXS_MONITOR *mon) handle->shutdown = 1; thread_wait(handle->thread); + + /** Controlled shutdown, remove stored backup */ + remove_server_backup(mon); } /** @@ -1098,6 +1102,12 @@ monitorMain(void *arg) lock_monitor_servers(mon); servers_status_pending_to_current(mon); + if (handle->load_backup) + { + handle->load_backup = false; + load_server_backup(mon); + } + /* start from the first server in the list */ ptr = mon->databases; @@ -1375,6 +1385,7 @@ monitorMain(void *arg) mon_hangup_failed_servers(mon); servers_status_current_to_pending(mon); + store_server_backup(mon); release_monitor_servers(mon); } /*< while (1) */ } diff --git a/server/modules/monitor/mysqlmon/mysql_mon_backup.c b/server/modules/monitor/mysqlmon/mysql_mon_backup.c new file mode 100644 index 000000000..4793c326e --- /dev/null +++ b/server/modules/monitor/mysqlmon/mysql_mon_backup.c @@ -0,0 +1,419 @@ +/* + * Copyright (c) 2016 MariaDB Corporation Ab + * + * Use of this software is governed by the Business Source License included + * in the LICENSE.TXT file and at www.mariadb.com/bsl11. + * + * Change Date: 2019-07-01 + * + * On the date above, in accordance with the Business Source License, use + * of this software will be governed by version 2 or later of the General + * Public License. + */ + +#include "../mysqlmon.h" + +#include +#include +#include + +#include + +/** + * Crash-safe storage of server states + * + * This file contains functions to store and load backups of the server states. + */ + +/** Schema version, backups must have a matching version */ +#define MMB_SCHEMA_VERSION 1 + +/** Constants for byte lengths of the values */ +#define MMB_LEN_BYTES 4 +#define MMB_LEN_SCHEMA_VERSION 1 +#define MMB_LEN_CRC32 4 +#define MMB_LEN_VALUE_TYPE 1 +#define MMB_LEN_SERVER_STATUS 4 + +/** Type of the stored value */ +enum stored_value_type +{ + SVT_SERVER = 1, // Generic server state information + SVT_MASTER = 2, // The master server name +}; + +/** + * @brief Remove .tmp suffix and rename file + * + * @param src File to rename + * @return True if file was successfully renamed + */ +static bool rename_tmp_file(const char *src) +{ + char dest[strlen(src) + 1]; + strcpy(dest, src); + + char *tail = strrchr(dest, '.'); + ss_dassert(tail && strcmp(tail, ".tmp") == 0); + *tail = '\0'; + + bool rval = true; + + if (rename(src, dest) == -1) + { + rval = false; + MXS_ERROR("Failed to rename '%s' to '%s'", src, dest); + } + + return rval; +} + +/** + * @brief Open temporary file + * + * @param monitor Monitor + * @param path Output where the path is stored + * @return Opened file or NULL on error + */ +static FILE* open_tmp_file(MXS_MONITOR *monitor, char *path) +{ + const char *name_template = "%s/%s/"; + const char filename[] = "mysqlmon.dat.tmp"; + int nbytes = snprintf(path, PATH_MAX, name_template, get_datadir(), monitor->name); + + FILE *rval = NULL; + + if (nbytes < PATH_MAX - sizeof(filename) && mxs_mkdir_all(path, 0744)) + { + strcat(path, filename); + rval = fopen(path, "wb"); + } + + return rval; +} + +/** + * @brief Store server data to in-memory buffer + * + * @param monitor Monitor + * @param data Pointer to in-memory buffer used for storage, should be at least + * PATH_MAX bytes long + * @param size Size of @c data + */ +static void store_data(MXS_MONITOR *monitor, char *data, uint32_t size) +{ + MYSQL_MONITOR* handle = (MYSQL_MONITOR*) monitor->handle; + char *ptr = data; + + /** Store the data length */ + ss_dassert(sizeof(size) == MMB_LEN_BYTES); + *ptr++ = size; + *ptr++ = (size >> 8); + *ptr++ = (size >> 16); + *ptr++ = (size >> 24); + + /** Then the schema version */ + *ptr++ = MMB_SCHEMA_VERSION; + + /** Store the states of all servers */ + for (MXS_MONITOR_SERVERS* db = monitor->databases; db; db = db->next) + { + *ptr++ = (char)SVT_SERVER; // Value type + strcpy(ptr, db->server->unique_name); // Name of the server + ptr += strlen(db->server->unique_name) + 1; + + uint32_t status = db->server->status; // Server status as 4 byte integer + ss_dassert(sizeof(status) == MMB_LEN_SERVER_STATUS); + *ptr++ = status; + *ptr++ = (status >> 8); + *ptr++ = (status >> 16); + *ptr++ = (status >> 24); + } + + /** Store the current root master if we have one */ + if (handle->master) + { + *ptr++ = (char)SVT_MASTER; + strcpy(ptr, handle->master->server->unique_name); + ptr += strlen(handle->master->server->unique_name) + 1; + } + + /** Calculate the CRC32 for the complete payload minus the CRC32 bytes */ + uint32_t crc = crc32(0L, NULL, 0); + crc = crc32(crc, (uint8_t*)data + MMB_LEN_BYTES, size - MMB_LEN_CRC32); + ss_dassert(sizeof(crc) == MMB_LEN_CRC32); + + *ptr++ = crc; + *ptr++ = (crc >> 8); + *ptr++ = (crc >> 16); + *ptr++ = (crc >> 24); + + ss_dassert(ptr - data == size + MMB_LEN_BYTES); +} + +static int get_data_file_path(MXS_MONITOR *monitor, char *path) +{ + const char *name_template = "%s/%s/mysqlmon.dat"; + return snprintf(path, PATH_MAX, name_template, get_datadir(), monitor->name); +} + +/** + * @brief Open stored backup file + * + * @param monitor Monitor to reload + * @param path Output where path is stored + * @return Opened file or NULL on error + */ +static FILE* open_data_file(MXS_MONITOR *monitor, char *path) +{ + FILE *rval = NULL; + int nbytes = get_data_file_path(monitor, path); + + if (nbytes < PATH_MAX) + { + if ((rval = fopen(path, "rb")) == NULL && errno != ENOENT) + { + MXS_ERROR("Failed to open persisted server states: %d, %s", + errno, mxs_strerror(errno)); + } + } + + return rval; +} + +/** + * Check that memory area contains a null terminator + */ +static bool has_null_terminator(const char *data, const char *end) +{ + while (data < end) + { + if (*data == '\0') + { + return true; + } + data++; + } + + return false; +} + +/** + * Process a generic server + */ +static const char* process_server(MXS_MONITOR *monitor, const char *data, const char *end) +{ + for (MXS_MONITOR_SERVERS* db = monitor->databases; db; db = db->next) + { + if (strcmp(db->server->unique_name, data) == 0) + { + const unsigned char *sptr = (unsigned char*)strchr(data, '\0'); + ss_dassert(sptr); + sptr++; + + uint32_t state = sptr[0] | (sptr[1] << 8) | (sptr[2] << 16) | (sptr[3] << 24); + server_set_status_nolock(db->server, state); + monitor_set_pending_status(db, state); + break; + } + } + + data += strlen(data) + 1 + MMB_LEN_SERVER_STATUS; + + return data; +} + +/** + * Process a master + */ +static const char* process_master(MXS_MONITOR *monitor, const char *data, const char *end) +{ + for (MXS_MONITOR_SERVERS* db = monitor->databases; db; db = db->next) + { + if (strcmp(db->server->unique_name, data) == 0) + { + MYSQL_MONITOR* handle = (MYSQL_MONITOR*)monitor->handle; + handle->master = db; + break; + } + } + + data += strlen(data) + 1; + + return data; +} + +/** + * Check that the calculated CRC32 matches the one stored on disk + */ +static bool check_crc32(const uint8_t *data, uint32_t size, const uint8_t *crc_ptr) +{ + uint32_t crc = crc_ptr[0] | (crc_ptr[1] << 8) | (crc_ptr[2] << 16) | (crc_ptr[3] << 24); + uint32_t calculated_crc = crc32(0L, NULL, 0); + calculated_crc = crc32(calculated_crc, data, size); + return calculated_crc == crc; +} + +/** + * Process the stored backup data + */ +static bool process_data_file(MXS_MONITOR *monitor, const char *data, const char *crc_ptr) +{ + const char *ptr = data; + ss_debug(const char *prevptr = ptr); + + while (ptr < crc_ptr) + { + /** All values contain a null terminated string */ + if (!has_null_terminator(ptr, crc_ptr)) + { + MXS_ERROR("Possible corrupted data file (no null terminator found), ignoring persisted states."); + return false; + } + + enum stored_value_type type = *ptr; + ptr += MMB_LEN_VALUE_TYPE; + + switch (type) + { + case SVT_SERVER: + ptr = process_server(monitor, ptr, crc_ptr); + break; + + case SVT_MASTER: + ptr = process_master(monitor, ptr, crc_ptr); + break; + + default: + MXS_ERROR("Possible corrupted data file (unknown stored value), ignoring persisted states."); + return false; + } + ss_dassert(prevptr != ptr); + ss_debug(prevptr = ptr); + } + + ss_dassert(ptr == crc_ptr); + return true; +} + +void store_server_backup(MXS_MONITOR *monitor) +{ + /** Calculate how much memory we need to allocate */ + uint32_t size = MMB_LEN_SCHEMA_VERSION + MMB_LEN_CRC32; + + for (MXS_MONITOR_SERVERS* db = monitor->databases; db; db = db->next) + { + /** Each server is stored as a type byte and a null-terminated string + * followed by eight byte server status. */ + size += MMB_LEN_VALUE_TYPE + strlen(db->server->unique_name) + 1 + MMB_LEN_SERVER_STATUS; + } + + MYSQL_MONITOR* handle = (MYSQL_MONITOR*) monitor->handle; + + if (handle->master) + { + /** The master server name is stored as a null terminated string */ + size += MMB_LEN_VALUE_TYPE + strlen(handle->master->server->unique_name) + 1; + } + + /** 4 bytes for file length, 1 byte for schema version and 4 bytes for CRC32 */ + uint32_t buffer_size = size + MMB_LEN_BYTES; + char *data = (char*)MXS_MALLOC(buffer_size); + char path[PATH_MAX + 1]; + + if (data) + { + /** Store the data in memory first */ + store_data(monitor, data, size); + + FILE *file = open_tmp_file(monitor, path); + + if (file) + { + /** Write the data to a temp file and rename it to the final name */ + if (fwrite(data, 1, buffer_size, file) == buffer_size && fflush(file) == 0) + { + if (!rename_tmp_file(path)) + { + unlink(path); + } + } + else + { + MXS_ERROR("Failed to write backup data to disk: %d, %s", + errno, mxs_strerror(errno)); + } + fclose(file); + } + } + MXS_FREE(data); +} + +void load_server_backup(MXS_MONITOR *monitor) +{ + char path[PATH_MAX]; + FILE *file = open_data_file(monitor, path); + + if (file) + { + uint32_t size = 0; + ss_dassert(sizeof(size) == MMB_LEN_BYTES); + + if (fread(&size, 1, MMB_LEN_BYTES, file) == MMB_LEN_BYTES) + { + /** Payload contents: + * + * - One byte of schema version + * - `size - 5` bytes of data + * - Trailing 4 bytes of CRC32 + */ + char *data = (char*)MXS_MALLOC(size); + + if (fread(data, 1, size, file) == size) + { + if (*data == MMB_SCHEMA_VERSION) + { + if (check_crc32((uint8_t*)data, size - MMB_LEN_CRC32, + (uint8_t*)data + size - MMB_LEN_CRC32)) + { + if (process_data_file(monitor, data + MMB_LEN_SCHEMA_VERSION, + data + size - MMB_LEN_CRC32)) + { + MXS_WARNING("Loaded persisted server states from backup file."); + } + } + else + { + MXS_ERROR("CRC32 mismatch in persisted server state file. " + "Ignoring persisted server states."); + } + } + else + { + MXS_ERROR("Unknown backup schema version: %d", (int)*data); + } + } + else + { + MXS_ERROR("Failed to read persisted server states."); + } + MXS_FREE(data); + } + + fclose(file); + } +} + +void remove_server_backup(MXS_MONITOR *monitor) +{ + char path[PATH_MAX]; + + if (get_data_file_path(monitor, path) < PATH_MAX) + { + unlink(path); + } + else + { + MXS_ERROR("Path to monitor data directory is too long."); + } +}