From f41111b4bd1eee5e34b00f36bcb577c1c730904a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Markus=20M=C3=A4kel=C3=A4?= Date: Fri, 10 Nov 2017 19:42:10 +0200 Subject: [PATCH] MXS-1517: Retain stale master bit even on master failure If a server goes down and it has the stale master bit enabled, all other bits for the server are cleared. This allows failed masters that have been replaced to be first detected and then reintroduced into the replication topology. --- server/modules/monitor/mysqlmon/mysql_mon.cc | 39 +++++++------------- 1 file changed, 14 insertions(+), 25 deletions(-) diff --git a/server/modules/monitor/mysqlmon/mysql_mon.cc b/server/modules/monitor/mysqlmon/mysql_mon.cc index 188798ca3..d66850952 100644 --- a/server/modules/monitor/mysqlmon/mysql_mon.cc +++ b/server/modules/monitor/mysqlmon/mysql_mon.cc @@ -1264,7 +1264,7 @@ static inline void monitor_mysql_db(MXS_MONITORED_SERVER* database, MySqlServerI { /** Clear old states */ monitor_clear_pending_status(database, SERVER_SLAVE | SERVER_MASTER | SERVER_RELAY_MASTER | - SERVER_STALE_STATUS | SERVER_SLAVE_OF_EXTERNAL_MASTER); + SERVER_SLAVE_OF_EXTERNAL_MASTER); if (do_show_slave_status(serv_info, database, server_version)) { @@ -1428,34 +1428,19 @@ monitorDatabase(MXS_MONITOR *mon, MXS_MONITORED_SERVER *database) } else { - /* The current server is not running - * - * Store server NOT running in server and monitor server pending struct - * + /** + * The current server is not running. Clear all but the stale master bit + * as it is used to detect masters that went down but came up. */ + unsigned int all_bits = ~SERVER_STALE_STATUS; + server_clear_status_nolock(database->server, all_bits); + monitor_clear_pending_status(database, all_bits); + if (mysql_errno(database->con) == ER_ACCESS_DENIED_ERROR) { server_set_status_nolock(database->server, SERVER_AUTH_ERROR); monitor_set_pending_status(database, SERVER_AUTH_ERROR); } - server_clear_status_nolock(database->server, SERVER_RUNNING); - monitor_clear_pending_status(database, SERVER_RUNNING); - - /* Also clear M/S state in both server and monitor server pending struct */ - server_clear_status_nolock(database->server, SERVER_SLAVE); - server_clear_status_nolock(database->server, SERVER_MASTER); - server_clear_status_nolock(database->server, SERVER_RELAY_MASTER); - monitor_clear_pending_status(database, SERVER_SLAVE); - monitor_clear_pending_status(database, SERVER_MASTER); - monitor_clear_pending_status(database, SERVER_RELAY_MASTER); - - /* Clean addition status too */ - server_clear_status_nolock(database->server, SERVER_SLAVE_OF_EXTERNAL_MASTER); - server_clear_status_nolock(database->server, SERVER_STALE_STATUS); - server_clear_status_nolock(database->server, SERVER_STALE_SLAVE); - monitor_clear_pending_status(database, SERVER_SLAVE_OF_EXTERNAL_MASTER); - monitor_clear_pending_status(database, SERVER_STALE_STATUS); - monitor_clear_pending_status(database, SERVER_STALE_SLAVE); /* Log connect failure only once */ if (mon_status_changed(database) && mon_print_fail_status(database)) @@ -2069,6 +2054,10 @@ monitorMain(void *arg) /** If "detect_stale_master" option is On, let's use the previous master. * * Multi-master mode detects the stale masters in find_graph_cycles(). + * + * TODO: If a stale master goes down and comes back up, it loses + * the master status. An adequate solution would be to promote + * the stale master as a real master if it is the last running server. */ if (detect_stale_master && root_master && !handle->multimaster && (strcmp(ptr->server->name, root_master->server->name) == 0 && @@ -2082,7 +2071,7 @@ monitorMain(void *arg) * Set the STALE bit for this server in server struct */ server_set_status_nolock(ptr->server, SERVER_STALE_STATUS | SERVER_MASTER); - ptr->pending_status |= SERVER_STALE_STATUS | SERVER_MASTER; + monitor_set_pending_status(ptr, SERVER_STALE_STATUS | SERVER_MASTER); /** Log the message only if the master server didn't have * the stale master bit set */ @@ -2098,7 +2087,7 @@ monitorMain(void *arg) if (handle->detectStaleSlave) { - unsigned bits = SERVER_SLAVE | SERVER_RUNNING; + unsigned int bits = SERVER_SLAVE | SERVER_RUNNING; if ((ptr->mon_prev_status & bits) == bits && root_master && SERVER_IS_MASTER(root_master->server))