From 1ebf27eab367be3ad0d6bc871fd7968ff910d611 Mon Sep 17 00:00:00 2001 From: Mark Riddoch Date: Tue, 3 Jun 2014 09:25:09 +0100 Subject: [PATCH 1/6] Add server maintenance mode. Maintenance mode prevents a server from having new connections made to it and stops MaxScale from monitoring a server. It is designed to allow a server to be taken out of an active cluster for maintenance. It prevents new connections and allowsg existing connections to drain away prior to performaing maintenance on the server. --- server/core/server.c | 2 ++ server/include/server.h | 15 +++++++++++---- server/modules/monitor/galera_mon.c | 11 ++++++++--- server/modules/monitor/mysql_mon.c | 6 ++++++ server/modules/routing/debugcmd.c | 12 +++++++----- server/modules/routing/readconnroute.c | 3 +++ 6 files changed, 37 insertions(+), 12 deletions(-) diff --git a/server/core/server.c b/server/core/server.c index 6aacf9475..42a60caea 100644 --- a/server/core/server.c +++ b/server/core/server.c @@ -344,6 +344,8 @@ char *status = NULL; if ((status = (char *)malloc(200)) == NULL) return NULL; status[0] = 0; + if (server->status & SERVER_MAINT) + strcat(status, "Maintenance, "); if (server->status & SERVER_MASTER) strcat(status, "Master, "); if (server->status & SERVER_SLAVE) diff --git a/server/include/server.h b/server/include/server.h index d9ca792b6..b15453c18 100644 --- a/server/include/server.h +++ b/server/include/server.h @@ -35,6 +35,7 @@ * 20/05/14 Massimiliano Pinto Addition of server_string field * 20/05/14 Massimiliano Pinto Addition of node_id field * 23/05/14 Massimiliano Pinto Addition of rlag and node_ts fields + * 03/06/14 Mark Riddoch Addition of maintainance mode * * @endverbatim */ @@ -80,12 +81,13 @@ typedef struct server { #define SERVER_MASTER 0x0002 /**<< The server is a master, i.e. can handle writes */ #define SERVER_SLAVE 0x0004 /**<< The server is a slave, i.e. can handle reads */ #define SERVER_JOINED 0x0008 /**<< The server is joined in a Galera cluster */ +#define SERVER_MAINT 0x1000 /**<< Server is in maintenance mode */ /** * Is the server running - the macro returns true if the server is marked as running * regardless of it's state as a master or slave */ -#define SERVER_IS_RUNNING(server) ((server)->status & SERVER_RUNNING) +#define SERVER_IS_RUNNING(server) (((server)->status & (SERVER_RUNNING|SERVER_MAINT)) == SERVER_RUNNING) /** * Is the server marked as down - the macro returns true if the server is beleived * to be inoperable. @@ -96,19 +98,24 @@ typedef struct server { * in order for the macro to return true */ #define SERVER_IS_MASTER(server) \ - (((server)->status & (SERVER_RUNNING|SERVER_MASTER|SERVER_SLAVE)) == (SERVER_RUNNING|SERVER_MASTER)) + (((server)->status & (SERVER_RUNNING|SERVER_MASTER|SERVER_SLAVE|SERVER_MAINT)) == (SERVER_RUNNING|SERVER_MASTER)) /** * Is the server a slave? The server must be both running and marked as a slave * in order for the macro to return true */ #define SERVER_IS_SLAVE(server) \ - (((server)->status & (SERVER_RUNNING|SERVER_MASTER|SERVER_SLAVE)) == (SERVER_RUNNING|SERVER_SLAVE)) + (((server)->status & (SERVER_RUNNING|SERVER_MASTER|SERVER_SLAVE|SERVER_MAINT)) == (SERVER_RUNNING|SERVER_SLAVE)) /** * Is the server joined Galera node? The server must be running and joined. */ #define SERVER_IS_JOINED(server) \ - (((server)->status & (SERVER_RUNNING|SERVER_JOINED)) == (SERVER_RUNNING|SERVER_JOINED)) + (((server)->status & (SERVER_RUNNING|SERVER_JOINED|SERVER_MAINT)) == (SERVER_RUNNING|SERVER_JOINED)) + +/** + * Is the server in maintenance mode. + */ +#define SERVER_IN_MAINT(server) ((server)->status & SERVER_MAINT) extern SERVER *server_alloc(char *, char *, unsigned short); extern int server_free(SERVER *); diff --git a/server/modules/monitor/galera_mon.c b/server/modules/monitor/galera_mon.c index 1493548d2..715edfb96 100644 --- a/server/modules/monitor/galera_mon.c +++ b/server/modules/monitor/galera_mon.c @@ -28,6 +28,7 @@ * that has the lowest value of wsrep_local_index * 23/05/14 Massimiliano Pinto Added 1 configuration option (setInterval). * Interval is printed in diagnostics. + * 03/06/14 Mark Riddoch Add support for maintenance mode * * @endverbatim */ @@ -309,6 +310,10 @@ char *server_string; if (uname == NULL) return; + /* Don't even probe server flagged as in maintenance */ + if (SERVER_IN_MAINT(database->server)) + return; + if (database->con == NULL || mysql_ping(database->con) != 0) { char *dpwd = decryptPassword(passwd); @@ -415,7 +420,7 @@ long master_id; /* set master_id to the lowest value of ptr->server->node_id */ - if (ptr->server->node_id >= 0 && SERVER_IS_JOINED(ptr->server)) { + if ((! SERVER_IN_MAINT(ptr->server)) && ptr->server->node_id >= 0 && SERVER_IS_JOINED(ptr->server)) { if (ptr->server->node_id < master_id && master_id >= 0) { master_id = ptr->server->node_id; } else { @@ -423,7 +428,7 @@ long master_id; master_id = ptr->server->node_id; } } - } else { + } else if (!SERVER_IN_MAINT(ptr->server)) { /* clear M/S status */ server_clear_status(ptr->server, SERVER_SLAVE); server_clear_status(ptr->server, SERVER_MASTER); @@ -436,7 +441,7 @@ long master_id; /* this server loop sets Master and Slave roles */ while (ptr) { - if (ptr->server->node_id >= 0 && master_id >= 0) { + if ((! SERVER_IN_MAINT(ptr->server)) && ptr->server->node_id >= 0 && master_id >= 0) { /* set the Master role */ if (SERVER_IS_JOINED(ptr->server) && (ptr->server->node_id == master_id)) { server_set_status(ptr->server, SERVER_MASTER); diff --git a/server/modules/monitor/mysql_mon.c b/server/modules/monitor/mysql_mon.c index 14b51ee29..78ced4d75 100644 --- a/server/modules/monitor/mysql_mon.c +++ b/server/modules/monitor/mysql_mon.c @@ -32,6 +32,7 @@ * New server field version_string is updated. * 28/05/14 Massimiliano Pinto Added set Id and configuration options (setInverval) * Parameters are now printed in diagnostics + * 03/06/14 Mark Ridoch Add support for maintenance mode * * @endverbatim */ @@ -323,6 +324,11 @@ int replication_heartbeat = handle->replicationHeartbeat; } if (uname == NULL) return; + + /* Don't probe servers in maintenance mode */ + if (SERVER_IN_MAINT(database->server)) + return; + if (database->con == NULL || mysql_ping(database->con) != 0) { char *dpwd = decryptPassword(passwd); diff --git a/server/modules/routing/debugcmd.c b/server/modules/routing/debugcmd.c index 5be0c72bb..0a46812e3 100644 --- a/server/modules/routing/debugcmd.c +++ b/server/modules/routing/debugcmd.c @@ -754,11 +754,13 @@ static struct { char *str; unsigned int bit; } ServerBits[] = { - { "running", SERVER_RUNNING }, - { "master", SERVER_MASTER }, - { "slave", SERVER_SLAVE }, - { "synced", SERVER_JOINED }, - { NULL, 0 } + { "running", SERVER_RUNNING }, + { "master", SERVER_MASTER }, + { "slave", SERVER_SLAVE }, + { "synced", SERVER_JOINED }, + { "maintenance", SERVER_MAINT }, + { "maint", SERVER_MAINT }, + { NULL, 0 } }; /** * Map the server status bit diff --git a/server/modules/routing/readconnroute.c b/server/modules/routing/readconnroute.c index f1981c57a..7a28241cc 100644 --- a/server/modules/routing/readconnroute.c +++ b/server/modules/routing/readconnroute.c @@ -352,6 +352,9 @@ int master_host = -1; inst->bitmask))); } + if (SERVER_IN_MAINT(inst->server)) + continue; + /* * If router_options=slave, get the running master * It will be used if there are no running slaves at all From b7f6e600cc651dc824f97a9d5942c0310848b459 Mon Sep 17 00:00:00 2001 From: Mark Riddoch Date: Tue, 3 Jun 2014 15:35:09 +0100 Subject: [PATCH 2/6] Fix compilation error --- server/modules/routing/readconnroute.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/server/modules/routing/readconnroute.c b/server/modules/routing/readconnroute.c index 7a28241cc..0652f9f0c 100644 --- a/server/modules/routing/readconnroute.c +++ b/server/modules/routing/readconnroute.c @@ -352,7 +352,7 @@ int master_host = -1; inst->bitmask))); } - if (SERVER_IN_MAINT(inst->server)) + if (SERVER_IN_MAINT(inst->servers[i]->server)) continue; /* From 46495bb59edff2d7d0e4d6b3c3b1cc4b5f50c961 Mon Sep 17 00:00:00 2001 From: VilhoRaatikka Date: Tue, 3 Jun 2014 19:39:50 +0300 Subject: [PATCH 3/6] Bug #443, http://bugs.skysql.com/show_bug.cgi?id=443 monitors used mysql_ping without MYSQL_OPT_READ_TIMEOUT which caused read to block. Fixed in mysql and galera monitor. Added log writing per each status change of each server and repeatedly if server is not running. Removed SERVER_IS_JOINED checks from rwsplit router. --- server/modules/include/readwritesplit.h | 3 +- server/modules/monitor/galera_mon.c | 23 +++++++++++ server/modules/monitor/mysql_mon.c | 41 +++++++++++++++---- server/modules/routing/readconnroute.c | 2 +- .../routing/readwritesplit/readwritesplit.c | 2 +- utils/skygw_debug.h | 8 +++- 6 files changed, 66 insertions(+), 13 deletions(-) diff --git a/server/modules/include/readwritesplit.h b/server/modules/include/readwritesplit.h index a4eecf4d5..00857ae1b 100644 --- a/server/modules/include/readwritesplit.h +++ b/server/modules/include/readwritesplit.h @@ -229,7 +229,6 @@ typedef struct router_instance { } ROUTER_INSTANCE; #define BACKEND_TYPE(b) (SERVER_IS_MASTER((b)->backend_server) ? BE_MASTER : \ - (SERVER_IS_SLAVE((b)->backend_server) ? BE_SLAVE : \ - (SERVER_IS_JOINED((b)->backend_server) ? BE_JOINED : BE_UNDEFINED))); + (SERVER_IS_SLAVE((b)->backend_server) ? BE_SLAVE : BE_UNDEFINED)); #endif /*< _RWSPLITROUTER_H */ diff --git a/server/modules/monitor/galera_mon.c b/server/modules/monitor/galera_mon.c index 715edfb96..a9f242756 100644 --- a/server/modules/monitor/galera_mon.c +++ b/server/modules/monitor/galera_mon.c @@ -317,10 +317,22 @@ char *server_string; if (database->con == NULL || mysql_ping(database->con) != 0) { char *dpwd = decryptPassword(passwd); + int rc; + int read_timeout = 1; + database->con = mysql_init(NULL); + rc = mysql_options(database->con, MYSQL_OPT_READ_TIMEOUT, (void *)&read_timeout); + if (mysql_real_connect(database->con, database->server->name, uname, dpwd, NULL, database->server->port, NULL, 0) == NULL) { + LOGIF(LE, (skygw_log_write_flush( + LOGFILE_ERROR, + "Error : Monitor was unable to connect to " + "server %s:%d : \"%s\"", + database->server->name, + database->server->port, + mysql_error(database->con)))); server_clear_status(database->server, SERVER_RUNNING); database->server->node_id = -1; free(dpwd); @@ -416,6 +428,7 @@ long master_id; while (ptr) { + unsigned int prev_status = ptr->server->status; monitorDatabase(ptr, handle->defaultUser, handle->defaultPasswd); /* set master_id to the lowest value of ptr->server->node_id */ @@ -433,6 +446,16 @@ long master_id; server_clear_status(ptr->server, SERVER_SLAVE); server_clear_status(ptr->server, SERVER_MASTER); } + if (ptr->server->status != prev_status || + SERVER_IS_DOWN(ptr->server)) + { + LOGIF(LM, (skygw_log_write_flush( + LOGFILE_MESSAGE, + "Backend server %s:%d state : %s", + ptr->server->name, + ptr->server->port, + STRSRVSTATUS(ptr->server)))); + } ptr = ptr->next; } diff --git a/server/modules/monitor/mysql_mon.c b/server/modules/monitor/mysql_mon.c index 78ced4d75..d643a00b9 100644 --- a/server/modules/monitor/mysql_mon.c +++ b/server/modules/monitor/mysql_mon.c @@ -302,7 +302,7 @@ char *sep; * Monitor an individual server * * @param handle The MySQL Monitor object - * @param database The database to probe + * @param database The database to probe */ static void monitorDatabase(MYSQL_MONITOR *handle, MONITOR_SERVERS *database) @@ -324,7 +324,7 @@ int replication_heartbeat = handle->replicationHeartbeat; } if (uname == NULL) return; - + /* Don't probe servers in maintenance mode */ if (SERVER_IN_MAINT(database->server)) return; @@ -332,7 +332,11 @@ int replication_heartbeat = handle->replicationHeartbeat; if (database->con == NULL || mysql_ping(database->con) != 0) { char *dpwd = decryptPassword(passwd); + int rc; + int read_timeout = 1; database->con = mysql_init(NULL); + rc = mysql_options(database->con, MYSQL_OPT_READ_TIMEOUT, (void *)&read_timeout); + if (mysql_real_connect(database->con, database->server->name, uname, @@ -342,6 +346,14 @@ int replication_heartbeat = handle->replicationHeartbeat; NULL, 0) == NULL) { + LOGIF(LE, (skygw_log_write_flush( + LOGFILE_ERROR, + "Error : Monitor was unable to connect to " + "server %s:%d : \"%s\"", + database->server->name, + database->server->port, + mysql_error(database->con)))); + free(dpwd); server_clear_status(database->server, SERVER_RUNNING); return; @@ -626,7 +638,6 @@ int replication_heartbeat = handle->replicationHeartbeat; server_clear_status(database->server, SERVER_SLAVE); server_clear_status(database->server, SERVER_MASTER); } - } /** @@ -661,13 +672,27 @@ MONITOR_SERVERS *ptr; ptr = handle->databases; while (ptr) { + unsigned int prev_status = ptr->server->status; + monitorDatabase(handle, ptr); + + if (ptr->server->status != prev_status || + SERVER_IS_DOWN(ptr->server)) + { + LOGIF(LM, (skygw_log_write_flush( + LOGFILE_MESSAGE, + "Backend server %s:%d state : %s", + ptr->server->name, + ptr->server->port, + STRSRVSTATUS(ptr->server)))); + } + ptr = ptr->next; } thread_millisleep(handle->interval); } } - + /** * Set the default id to use in the monitor. * @@ -676,11 +701,11 @@ MONITOR_SERVERS *ptr; */ static void defaultId(void *arg, unsigned long id) -{ + { MYSQL_MONITOR *handle = (MYSQL_MONITOR *)arg; memcpy(&handle->id, &id, sizeof(unsigned long)); -} - + } + /** * Set the monitor sampling interval. * @@ -692,7 +717,7 @@ setInterval(void *arg, unsigned long interval) { MYSQL_MONITOR *handle = (MYSQL_MONITOR *)arg; memcpy(&handle->interval, &interval, sizeof(unsigned long)); -} + } /** * Enable/Disable the MySQL Replication hearbeat, detecting slave lag behind master. diff --git a/server/modules/routing/readconnroute.c b/server/modules/routing/readconnroute.c index 7a28241cc..0652f9f0c 100644 --- a/server/modules/routing/readconnroute.c +++ b/server/modules/routing/readconnroute.c @@ -352,7 +352,7 @@ int master_host = -1; inst->bitmask))); } - if (SERVER_IN_MAINT(inst->server)) + if (SERVER_IN_MAINT(inst->servers[i]->server)) continue; /* diff --git a/server/modules/routing/readwritesplit/readwritesplit.c b/server/modules/routing/readwritesplit/readwritesplit.c index fca3bca92..ddb0422c7 100644 --- a/server/modules/routing/readwritesplit/readwritesplit.c +++ b/server/modules/routing/readwritesplit/readwritesplit.c @@ -808,7 +808,7 @@ static bool get_dcb( } ss_dassert(succp); } - else if (btype == BE_MASTER || BE_JOINED) + else if (btype == BE_MASTER) { for (i=0; irses_nbackends; i++) { diff --git a/utils/skygw_debug.h b/utils/skygw_debug.h index 7277cb2e5..835b30aaa 100644 --- a/utils/skygw_debug.h +++ b/utils/skygw_debug.h @@ -228,7 +228,13 @@ typedef enum skygw_chk_t { ((c) == LEAST_GLOBAL_CONNECTIONS ? "LEAST_GLOBAL_CONNECTIONS" : \ ((c) == LEAST_ROUTER_CONNECTIONS ? "LEAST_ROUTER_CONNECTIONS" : \ ((c) == LEAST_BEHIND_MASTER ? "LEAST_BEHIND_MASTER" : "Unknown criteria")))) - + +#define STRSRVSTATUS(s) ((SERVER_IS_RUNNING(s) && SERVER_IS_MASTER(s)) ? "RUNNING MASTER" : \ + ((SERVER_IS_RUNNING(s) && SERVER_IS_SLAVE(s)) ? "RUNNING SLAVE" : \ + ((SERVER_IS_RUNNING(s) && SERVER_IS_JOINED(s)) ? "RUNNING JOINED" : \ + ((SERVER_IS_RUNNING(s) && SERVER_IS_MAINT(s)) ? "RUNNING MAINTENANCE" : \ + (SERVER_IS_RUNNING(s) ? "RUNNING (only)" : "NO STATUS"))))) + #define CHK_MLIST(l) { \ ss_info_dassert((l->mlist_chk_top == CHK_NUM_MLIST && \ l->mlist_chk_tail == CHK_NUM_MLIST), \ From 099a38a795596901a7feb967d1c6d07645b309f7 Mon Sep 17 00:00:00 2001 From: MassimilianoPinto Date: Wed, 4 Jun 2014 11:57:33 +0200 Subject: [PATCH 4/6] monitor_interval added monitor_interval added --- server/MaxScale_template.cnf | 2 ++ 1 file changed, 2 insertions(+) diff --git a/server/MaxScale_template.cnf b/server/MaxScale_template.cnf index ee7eb6e30..94981afbc 100644 --- a/server/MaxScale_template.cnf +++ b/server/MaxScale_template.cnf @@ -20,6 +20,8 @@ threads=1 # user = # passwd= +# monitor_interval= [MySQL Monitor] type=monitor From 4dbd51b1fc3531a5de78334695a883cc58ed6b65 Mon Sep 17 00:00:00 2001 From: MassimilianoPinto Date: Wed, 4 Jun 2014 13:15:01 +0200 Subject: [PATCH 5/6] fixed macro name fixed macro name --- utils/skygw_debug.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/utils/skygw_debug.h b/utils/skygw_debug.h index 835b30aaa..3910429a2 100644 --- a/utils/skygw_debug.h +++ b/utils/skygw_debug.h @@ -232,7 +232,7 @@ typedef enum skygw_chk_t { #define STRSRVSTATUS(s) ((SERVER_IS_RUNNING(s) && SERVER_IS_MASTER(s)) ? "RUNNING MASTER" : \ ((SERVER_IS_RUNNING(s) && SERVER_IS_SLAVE(s)) ? "RUNNING SLAVE" : \ ((SERVER_IS_RUNNING(s) && SERVER_IS_JOINED(s)) ? "RUNNING JOINED" : \ - ((SERVER_IS_RUNNING(s) && SERVER_IS_MAINT(s)) ? "RUNNING MAINTENANCE" : \ + ((SERVER_IS_RUNNING(s) && SERVER_IN_MAINT(s)) ? "RUNNING MAINTENANCE" : \ (SERVER_IS_RUNNING(s) ? "RUNNING (only)" : "NO STATUS"))))) #define CHK_MLIST(l) { \ From 7e863657db8248bbc97301a8482d77f673079d59 Mon Sep 17 00:00:00 2001 From: VilhoRaatikka Date: Wed, 4 Jun 2014 14:49:05 +0300 Subject: [PATCH 6/6] Removed references to SERVER_IS_JOINED status from readwritesplit.c because rwsplit for Galera doesn't use that status at all. --- server/modules/routing/readwritesplit/readwritesplit.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/server/modules/routing/readwritesplit/readwritesplit.c b/server/modules/routing/readwritesplit/readwritesplit.c index ddb0422c7..9fec7e1bd 100644 --- a/server/modules/routing/readwritesplit/readwritesplit.c +++ b/server/modules/routing/readwritesplit/readwritesplit.c @@ -815,8 +815,7 @@ static bool get_dcb( BACKEND* b = backend_ref[i].bref_backend; if (backend_ref[i].bref_dcb != NULL && - (SERVER_IS_MASTER(b->backend_server) || - SERVER_IS_JOINED(b->backend_server))) + (SERVER_IS_MASTER(b->backend_server))) { *p_dcb = backend_ref[i].bref_dcb; succp = true; @@ -1554,8 +1553,7 @@ static bool select_connect_backend_servers( } } else if (!master_connected && - (SERVER_IS_MASTER(b->backend_server) || - SERVER_IS_JOINED(b->backend_server))) + (SERVER_IS_MASTER(b->backend_server))) { master_found = true; @@ -1656,8 +1654,7 @@ static bool select_connect_backend_servers( "Selected %s in \t%s:%d", (btype == BE_MASTER ? "master" : (btype == BE_SLAVE ? "slave" : - (btype == BE_JOINED ? "galera node" : - "unknown node type"))), + "unknown node type")), b->backend_server->name, b->backend_server->port))); }