diff --git a/server/core/config.c b/server/core/config.c index 1a8689bfb..06eabd414 100644 --- a/server/core/config.c +++ b/server/core/config.c @@ -34,6 +34,7 @@ * 29/05/14 Mark Riddoch Addition of filter definition * 23/05/14 Massimiliano Pinto Added automatic set of maxscale-id: first listening ipv4_raw + port + pid * 28/05/14 Massimiliano Pinto Added detect_replication_lag parameter + * 28/08/14 Massimiliano Pinto Added detect_stale_master parameter * * @endverbatim */ @@ -650,6 +651,7 @@ int error_count = 0; char *passwd; unsigned long interval = 0; int replication_heartbeat = 0; + int detect_stale_master = 0; module = config_get_value(obj->parameters, "module"); servers = config_get_value(obj->parameters, "servers"); @@ -663,6 +665,10 @@ int error_count = 0; replication_heartbeat = atoi(config_get_value(obj->parameters, "detect_replication_lag")); } + if (config_get_value(obj->parameters, "detect_stale_master")) { + detect_stale_master = atoi(config_get_value(obj->parameters, "detect_stale_master")); + } + if (module) { obj->element = monitor_alloc(obj->object, module); @@ -686,6 +692,10 @@ int error_count = 0; if(replication_heartbeat == 1) monitorSetReplicationHeartbeat(obj->element, replication_heartbeat); + /* detect stale master */ + if(detect_stale_master == 1) + monitorDetectStaleMaster(obj->element, detect_stale_master); + /* get the servers to monitor */ s = strtok(servers, ","); while (s) @@ -1346,6 +1356,7 @@ static char *monitor_params[] = "passwd", "monitor_interval", "detect_replication_lag", + "detect_stale_master", NULL }; /** diff --git a/server/core/monitor.c b/server/core/monitor.c index 85ff878d7..6227afd5d 100644 --- a/server/core/monitor.c +++ b/server/core/monitor.c @@ -207,8 +207,7 @@ MONITOR *ptr; /** * Show a single monitor * - * @param dcb DCB for printing output - * @param monitor The monitor to print information regarding + * @param dcb DCB for printing output */ void monitorShow(DCB *dcb, MONITOR *monitor) @@ -304,12 +303,26 @@ monitorSetInterval (MONITOR *mon, unsigned long interval) * Enable Replication Heartbeat support in monitor. * * @param mon The monitor instance - * @param replication_heartbeat The replication heartbeat + * @param enable The enabling value is 1, 0 turns it off */ void -monitorSetReplicationHeartbeat(MONITOR *mon, int replication_heartbeat) +monitorSetReplicationHeartbeat(MONITOR *mon, int enable) { if (mon->module->replicationHeartbeat != NULL) { - mon->module->replicationHeartbeat(mon->handle, replication_heartbeat); + mon->module->replicationHeartbeat(mon->handle, enable); + } +} + +/** + * Enable Stale Master assignement. + * + * @param mon The monitor instance + * @param enable The enabling value is 1, 0 turns it off + */ +void +monitorDetectStaleMaster(MONITOR *mon, int enable) +{ + if (mon->module->detectStaleMaster != NULL) { + mon->module->detectStaleMaster(mon->handle, enable); } } diff --git a/server/core/server.c b/server/core/server.c index e0e2a41d3..631bb2010 100644 --- a/server/core/server.c +++ b/server/core/server.c @@ -30,6 +30,7 @@ * 28/05/14 Massimiliano Pinto Addition of rlagd and node_ts fields * 20/06/14 Massimiliano Pinto Addition of master_id, depth, slaves fields * 26/06/14 Mark Riddoch Addition of server parameters + * 30/08/14 Massimiliano Pinto Addition of new service status description * * @endverbatim */ @@ -148,7 +149,8 @@ server_set_unique_name(SERVER *server, char *name) * Find an existing server using the unique section name in * configuration file * - * @param name The Server name defined in the header file + * @param servname The Server name or address + * @param port The server port * @return The server or NULL if not found */ SERVER * @@ -405,7 +407,7 @@ server_status(SERVER *server) { char *status = NULL; - if ((status = (char *)malloc(200)) == NULL) + if ((status = (char *)malloc(256)) == NULL) return NULL; status[0] = 0; if (server->status & SERVER_MAINT) @@ -418,6 +420,10 @@ char *status = NULL; strcat(status, "Synced, "); if (server->status & SERVER_NDB) strcat(status, "NDB, "); + if (server->status & SERVER_SLAVE_OF_EXTERNAL_MASTER) + strcat(status, "Slave of External Server, "); + if (server->status & SERVER_STALE_STATUS) + strcat(status, "Stale Status, "); if (server->status & SERVER_RUNNING) strcat(status, "Running"); else diff --git a/server/include/monitor.h b/server/include/monitor.h index d65fd075f..5d018c16f 100644 --- a/server/include/monitor.h +++ b/server/include/monitor.h @@ -31,6 +31,8 @@ * 25/07/13 Mark Riddoch Addition of diagnotics * 23/05/14 Mark Riddoch Addition of routine to find monitors by name * 23/05/14 Massimiliano Pinto Addition of defaultId and setInterval + * 23/06/14 Massimiliano Pinto Addition of replicationHeartbeat + * 28/08/14 Massimiliano Pinto Addition of detectStaleMaster * * @endverbatim */ @@ -70,6 +72,7 @@ typedef struct { void (*setInterval)(void *, unsigned long); void (*defaultId)(void *, unsigned long); void (*replicationHeartbeat)(void *, int); + void (*detectStaleMaster)(void *, int); } MONITOR_OBJECT; /** @@ -110,4 +113,5 @@ extern void monitorList(DCB *); extern void monitorSetId(MONITOR *, unsigned long); extern void monitorSetInterval (MONITOR *, unsigned long); extern void monitorSetReplicationHeartbeat(MONITOR *, int); +extern void monitorDetectStaleMaster(MONITOR *, int); #endif diff --git a/server/include/server.h b/server/include/server.h index c558418bc..6905b3d6e 100644 --- a/server/include/server.h +++ b/server/include/server.h @@ -39,6 +39,7 @@ * 20/06/14 Massimiliano Pinto Addition of master_id, depth, slaves fields * 26/06/14 Mark Riddoch Adidtion of server parameters * 30/07/14 Massimiliano Pinto Addition of NDB status for MySQL Cluster + * 30/08/14 Massimiliano Pinto Addition of SERVER_STALE_STATUS * * @endverbatim */ @@ -103,6 +104,7 @@ typedef struct server { #define SERVER_NDB 0x0010 /**<< The server is part of a MySQL cluster setup */ #define SERVER_MAINT 0x1000 /**<< Server is in maintenance mode */ #define SERVER_SLAVE_OF_EXTERNAL_MASTER 0x0080 /**<< Server is slave of a Master outside the provided replication topology */ +#define SERVER_STALE_STATUS 0x2000 /**<< Server stale status, monitor didn't update it */ /** * Is the server running - the macro returns true if the server is marked as running diff --git a/server/modules/monitor/galera_mon.c b/server/modules/monitor/galera_mon.c index 3cba61325..761078453 100644 --- a/server/modules/monitor/galera_mon.c +++ b/server/modules/monitor/galera_mon.c @@ -69,7 +69,7 @@ static void defaultUsers(void *, char *, char *); static void diagnostics(DCB *, void *); static void setInterval(void *, unsigned long); -static MONITOR_OBJECT MyObject = { startMonitor, stopMonitor, registerServer, unregisterServer, defaultUsers, diagnostics, setInterval, NULL, NULL }; +static MONITOR_OBJECT MyObject = { startMonitor, stopMonitor, registerServer, unregisterServer, defaultUsers, diagnostics, setInterval, NULL, NULL, NULL }; /** * Implementation of the mandatory version entry point diff --git a/server/modules/monitor/mysql_mon.c b/server/modules/monitor/mysql_mon.c index ee5b4eeb7..e78759f82 100644 --- a/server/modules/monitor/mysql_mon.c +++ b/server/modules/monitor/mysql_mon.c @@ -40,6 +40,9 @@ * the status to update in server status field before * starting the replication consistency check. * This will also give routers a consistent "status" of all servers + * 28/08/14 Massimiliano Pinto Added detectStaleMaster feature: previous detected master will be used again, even if the replication is stopped. + * This means both IO and SQL threads are not working on slaves. + * This option is not enabled by default. * * @endverbatim */ @@ -62,7 +65,7 @@ extern int lm_enabled_logfiles_bitmask; static void monitorMain(void *); -static char *version_str = "V1.2.0"; +static char *version_str = "V1.3.0"; MODULE_INFO info = { MODULE_API_MONITOR, @@ -80,6 +83,7 @@ static void diagnostics(DCB *, void *); static void setInterval(void *, unsigned long); static void defaultId(void *, unsigned long); static void replicationHeartbeat(void *, int); +static void detectStaleMaster(void *, int); static bool mon_status_changed(MONITOR_SERVERS* mon_srv); static bool mon_print_fail_status(MONITOR_SERVERS* mon_srv); static MONITOR_SERVERS *getServerByNodeId(MONITOR_SERVERS *, long); @@ -91,7 +95,7 @@ static int add_slave_to_master(long *, int, long); static void monitor_set_pending_status(MONITOR_SERVERS *, int); static void monitor_clear_pending_status(MONITOR_SERVERS *, int); -static MONITOR_OBJECT MyObject = { startMonitor, stopMonitor, registerServer, unregisterServer, defaultUser, diagnostics, setInterval, defaultId, replicationHeartbeat }; +static MONITOR_OBJECT MyObject = { startMonitor, stopMonitor, registerServer, unregisterServer, defaultUser, diagnostics, setInterval, defaultId, replicationHeartbeat, detectStaleMaster }; /** * Implementation of the mandatory version entry point @@ -160,6 +164,7 @@ MYSQL_MONITOR *handle; handle->id = MONITOR_DEFAULT_ID; handle->interval = MONITOR_INTERVAL; handle->replicationHeartbeat = 0; + handle->detectStaleMaster = 0; handle->master = NULL; spinlock_init(&handle->lock); } @@ -306,6 +311,7 @@ char *sep; dcb_printf(dcb,"\tSampling interval:\t%lu milliseconds\n", handle->interval); dcb_printf(dcb,"\tMaxScale MonitorId:\t%lu\n", handle->id); dcb_printf(dcb,"\tReplication lag:\t%s\n", (handle->replicationHeartbeat == 1) ? "enabled" : "disabled"); + dcb_printf(dcb,"\tDetect Stale Master:\t%s\n", (handle->detectStaleMaster == 1) ? "enabled" : "disabled"); dcb_printf(dcb, "\tMonitored servers: "); db = handle->databases; @@ -403,6 +409,12 @@ char *server_string; monitor_clear_pending_status(database, SERVER_SLAVE); monitor_clear_pending_status(database, SERVER_MASTER); + /* Clean addition status too */ + server_clear_status(database->server, SERVER_SLAVE_OF_EXTERNAL_MASTER); + server_clear_status(database->server, SERVER_STALE_STATUS); + monitor_clear_pending_status(database, SERVER_SLAVE_OF_EXTERNAL_MASTER); + monitor_clear_pending_status(database, SERVER_STALE_STATUS); + return; } free(dpwd); @@ -458,12 +470,20 @@ char *server_string; if (strncmp(row[12], "Yes", 3) == 0 && strncmp(row[13], "Yes", 3) == 0) { isslave += 1; - + } + + /* If Slave_IO_Running = Yes, assign the master_id to current server: this allows building + * the replication tree, slaves ids will be added to master(s) and we will have at least the + * root master server. + * Please note, there could be no slaves at all if Slave_SQL_Running == 'No' + */ + if (strncmp(row[12], "Yes", 3) == 0) { /* get Master_Server_Id values */ master_id = atol(row[41]); if (master_id == 0) master_id = -1; } + i++; } /* store master_id of current node */ @@ -489,7 +509,14 @@ char *server_string; if (strncmp(row[10], "Yes", 3) == 0 && strncmp(row[11], "Yes", 3) == 0) { isslave = 1; + } + /* If Slave_IO_Running = Yes, assign the master_id to current server: this allows building + * the replication tree, slaves ids will be added to master(s) and we will have at least the + * root master server. + * Please note, there could be no slaves at all if Slave_SQL_Running == 'No' + */ + if (strncmp(row[10], "Yes", 3) == 0) { /* get Master_Server_Id values */ master_id = atol(row[39]); if (master_id == 0) @@ -505,6 +532,7 @@ char *server_string; /* Remove addition info */ monitor_clear_pending_status(database, SERVER_SLAVE_OF_EXTERNAL_MASTER); + monitor_clear_pending_status(database, SERVER_STALE_STATUS); /* Please note, the MASTER status and SERVER_SLAVE_OF_EXTERNAL_MASTER * will be assigned in the monitorMain() via get_replication_tree() routine @@ -534,6 +562,7 @@ monitorMain(void *arg) MYSQL_MONITOR *handle = (MYSQL_MONITOR *)arg; MONITOR_SERVERS *ptr; int replication_heartbeat = handle->replicationHeartbeat; +int detect_stale_master = handle->detectStaleMaster; int num_servers=0; MONITOR_SERVERS *root_master; @@ -545,6 +574,7 @@ MONITOR_SERVERS *root_master; "module. Exiting.\n"))); return; } + handle->status = MONITOR_RUNNING; while (1) { @@ -616,10 +646,19 @@ MONITOR_SERVERS *root_master; while (ptr) { if (! SERVER_IN_MAINT(ptr->server)) { - ptr->server->status = ptr->pending_status; + /* If "detect_stale_master" option is On, let's use the previus master */ + if (detect_stale_master && root_master && (!strcmp(ptr->server->name, root_master->server->name) && ptr->server->port == root_master->server->port) && (ptr->server->status & SERVER_MASTER) && !(ptr->pending_status & SERVER_MASTER)) { + /* in this case server->status will not be updated from pending_status */ + LOGIF(LM, (skygw_log_write_flush( + LOGFILE_MESSAGE, "[mysql_mon]: root server [%s:%i] is no longer Master, let's use it again even if it could be a stale master, you have been warned!", ptr->server->name, ptr->server->port))); + /* Set the STALE bit for this server in server struct */ + server_set_status(ptr->server, SERVER_STALE_STATUS); + } else { + ptr->server->status = ptr->pending_status; + } } - ptr = ptr->next; - } + ptr = ptr->next; + } /* Do now the heartbeat replication set/get for MySQL Replication Consistency */ if (replication_heartbeat && root_master && (SERVER_IS_MASTER(root_master->server) || SERVER_IS_RELAY_SERVER(root_master->server))) { @@ -665,19 +704,34 @@ setInterval(void *arg, unsigned long interval) { MYSQL_MONITOR *handle = (MYSQL_MONITOR *)arg; memcpy(&handle->interval, &interval, sizeof(unsigned long)); - } +} /** * Enable/Disable the MySQL Replication hearbeat, detecting slave lag behind master. * - * @param arg The handle allocated by startMonitor - * @param replicationHeartbeat To enable it 1, disable it with 0 + * @param arg The handle allocated by startMonitor + * @param enable To enable it 1, disable it with 0 */ static void -replicationHeartbeat(void *arg, int replicationHeartbeat) +replicationHeartbeat(void *arg, int enable) { MYSQL_MONITOR *handle = (MYSQL_MONITOR *)arg; - memcpy(&handle->replicationHeartbeat, &replicationHeartbeat, sizeof(int)); + memcpy(&handle->replicationHeartbeat, &enable, sizeof(int)); +} + +/** + * Enable/Disable the MySQL Replication Stale Master dectection, allowing a previouvsly detected master to still act as a Master. + * This option must be enabled in order to keep the Master when the replication is stopped or removed from slaves. + * If the replication is still stopped when MaxSclale is restarted no Master will be available. + * + * @param arg The handle allocated by startMonitor + * @param enable To enable it 1, disable it with 0 + */ +static void +detectStaleMaster(void *arg, int enable) +{ +MYSQL_MONITOR *handle = (MYSQL_MONITOR *)arg; + memcpy(&handle->detectStaleMaster, &enable, sizeof(int)); } static bool mon_status_changed( @@ -1038,6 +1092,10 @@ static MONITOR_SERVERS *get_replication_tree(MYSQL_MONITOR *handle, int num_serv monitor_set_pending_status(master, SERVER_MASTER); } else { if (current->master_id > 0) { + /* this server is slave of another server not in MaxScale configuration + * we cannot use it as a real slave. + */ + monitor_clear_pending_status(ptr, SERVER_SLAVE); monitor_set_pending_status(ptr, SERVER_SLAVE_OF_EXTERNAL_MASTER); } } diff --git a/server/modules/monitor/mysqlmon.h b/server/modules/monitor/mysqlmon.h index 0e06db6e4..eb2d37bcd 100644 --- a/server/modules/monitor/mysqlmon.h +++ b/server/modules/monitor/mysqlmon.h @@ -32,6 +32,7 @@ * 26/05/14 Massimiliano Pinto Default values for MONITOR_INTERVAL * 28/05/14 Massimiliano Pinto Addition of new fields in MYSQL_MONITOR struct * 24/06/14 Massimiliano Pinto Addition of master field in MYSQL_MONITOR struct and MONITOR_MAX_NUM_SLAVES + * 28/08/14 Massimiliano Pinto Addition of detectStaleMaster * * @endverbatim */ @@ -43,9 +44,9 @@ typedef struct monitor_servers { SERVER *server; /**< The server being monitored */ MYSQL *con; /**< The MySQL connection */ - int mon_err_count; - unsigned int mon_prev_status; - unsigned int pending_status; /**< Pending Status flag bitmap */ + int mon_err_count; + unsigned int mon_prev_status; + unsigned int pending_status; /**< Pending Status flag bitmap */ struct monitor_servers *next; /**< The next server in the list */ } MONITOR_SERVERS; @@ -54,17 +55,18 @@ typedef struct monitor_servers { * The handle for an instance of a MySQL Monitor module */ typedef struct { - SPINLOCK lock; /**< The monitor spinlock */ - pthread_t tid; /**< id of monitor thread */ - int shutdown; /**< Flag to shutdown the monitor thread */ - int status; /**< Monitor status */ - char *defaultUser; /**< Default username for monitoring */ - char *defaultPasswd; /**< Default password for monitoring */ - unsigned long interval; /**< Monitor sampling interval */ - unsigned long id; /**< Monitor ID */ + SPINLOCK lock; /**< The monitor spinlock */ + pthread_t tid; /**< id of monitor thread */ + int shutdown; /**< Flag to shutdown the monitor thread */ + int status; /**< Monitor status */ + char *defaultUser; /**< Default username for monitoring */ + char *defaultPasswd; /**< Default password for monitoring */ + unsigned long interval; /**< Monitor sampling interval */ + unsigned long id; /**< Monitor ID */ int replicationHeartbeat; /**< Monitor flag for MySQL replication heartbeat */ - MONITOR_SERVERS *master; /**< Master server for MySQL Master/Slave replication */ - MONITOR_SERVERS *databases; /**< Linked list of servers to monitor */ + int detectStaleMaster; /**< Monitor flag for MySQL replication Stale Master detection */ + MONITOR_SERVERS *master; /**< Master server for MySQL Master/Slave replication */ + MONITOR_SERVERS *databases; /**< Linked list of servers to monitor */ } MYSQL_MONITOR; #define MONITOR_RUNNING 1