From a362bd00248570c2cac03de6dec5b1c437c689a8 Mon Sep 17 00:00:00 2001 From: Esa Korhonen Date: Wed, 29 Mar 2017 17:59:02 +0300 Subject: [PATCH] Add parameter backend_connect_attempts to monitor This number (defaults to 1) sets how many times mon_connect_to_db will try to connect to a backend before returning an error. Every connection attempt may take backend_connect_timeout seconds to complete. Also refactored code a bit. Renamed mon_connect_to_db to mon_ping_or_connect_to_db, since it does not connect if the connection is already alive. --- include/maxscale/config.h | 2 + include/maxscale/monitor.h | 3 +- server/core/config.cc | 11 +++ server/core/config_runtime.cc | 9 ++ server/core/maxscale/monitor.h | 4 +- server/core/monitor.cc | 55 ++++++----- server/modules/monitor/auroramon/auroramon.c | 2 +- server/modules/monitor/galeramon/galeramon.c | 2 +- server/modules/monitor/mmmon/mmmon.c | 2 +- server/modules/monitor/mysqlmon/mysql_mon.c | 92 +++++++++---------- .../monitor/ndbclustermon/ndbclustermon.c | 2 +- 11 files changed, 108 insertions(+), 76 deletions(-) diff --git a/include/maxscale/config.h b/include/maxscale/config.h index 499cbdd02..49c15c4e7 100644 --- a/include/maxscale/config.h +++ b/include/maxscale/config.h @@ -301,4 +301,6 @@ void config_disable_feedback_task(void); */ bool config_reload(void); +static const char BACKEND_CONNECT_ATTEMPTS[] = "backend_connect_attempts"; + MXS_END_DECLS diff --git a/include/maxscale/monitor.h b/include/maxscale/monitor.h index e48efd5d8..02e5e2457 100644 --- a/include/maxscale/monitor.h +++ b/include/maxscale/monitor.h @@ -154,6 +154,7 @@ struct mxs_monitor MXS_MONITOR_SERVERS* databases; /*< List of databases the monitor monitors */ monitor_state_t state; /**< The state of the monitor */ int connect_timeout; /**< Connect timeout in seconds for mysql_real_connect */ + int connect_attempts; /**< How many times a connection is attempted */ int read_timeout; /**< Timeout in seconds to read from the server. * There are retries and the total effective timeout * value is three times the option value. @@ -216,7 +217,7 @@ void servers_status_current_to_pending(MXS_MONITOR *monitor); bool mon_status_changed(MXS_MONITOR_SERVERS* mon_srv); bool mon_print_fail_status(MXS_MONITOR_SERVERS* mon_srv); -mxs_connect_result_t mon_connect_to_db(MXS_MONITOR* mon, MXS_MONITOR_SERVERS *database); +mxs_connect_result_t mon_ping_or_connect_to_db(MXS_MONITOR* mon, MXS_MONITOR_SERVERS *database); void mon_log_connect_error(MXS_MONITOR_SERVERS* database, mxs_connect_result_t rval); void lock_monitor_servers(MXS_MONITOR *monitor); diff --git a/server/core/config.cc b/server/core/config.cc index 0048f6535..ec9713f43 100644 --- a/server/core/config.cc +++ b/server/core/config.cc @@ -173,6 +173,7 @@ static const char *monitor_params[] = "backend_connect_timeout", "backend_read_timeout", "backend_write_timeout", + BACKEND_CONNECT_ATTEMPTS, NULL }; @@ -3018,6 +3019,16 @@ int create_new_monitor(CONFIG_CONTEXT *context, CONFIG_CONTEXT *obj, HASHTABLE* } } + char *connect_attempts = config_get_value(obj->parameters, BACKEND_CONNECT_ATTEMPTS); + if (connect_attempts) + { + if (!monitorSetNetworkTimeout(monitor, MONITOR_CONNECT_ATTEMPTS, atoi(connect_attempts))) + { + MXS_ERROR("Failed to set '%s'.", BACKEND_CONNECT_ATTEMPTS); + error_count++; + } + } + if (servers) { /* get the servers to monitor */ diff --git a/server/core/config_runtime.cc b/server/core/config_runtime.cc index 22bd913d3..741b18764 100644 --- a/server/core/config_runtime.cc +++ b/server/core/config_runtime.cc @@ -416,6 +416,15 @@ bool runtime_alter_monitor(MXS_MONITOR *monitor, char *key, char *value) monitorSetNetworkTimeout(monitor, MONITOR_READ_TIMEOUT, ival); } } + else if (strcmp(key, BACKEND_CONNECT_ATTEMPTS) == 0) + { + long ival = get_positive_int(value); + if (ival) + { + valid = true; + monitorSetNetworkTimeout(monitor, MONITOR_CONNECT_ATTEMPTS, ival); + } + } else { /** We're modifying module specific parameters and we need to stop the monitor */ diff --git a/server/core/maxscale/monitor.h b/server/core/maxscale/monitor.h index 12509baa5..d71f88411 100644 --- a/server/core/maxscale/monitor.h +++ b/server/core/maxscale/monitor.h @@ -25,6 +25,7 @@ MXS_BEGIN_DECLS #define DEFAULT_CONNECT_TIMEOUT 3 #define DEFAULT_READ_TIMEOUT 1 #define DEFAULT_WRITE_TIMEOUT 2 +#define DEFAULT_CONNECTION_ATTEMPTS 1 #define MONITOR_DEFAULT_INTERVAL 10000 // in milliseconds @@ -35,7 +36,8 @@ typedef enum { MONITOR_CONNECT_TIMEOUT = 0, MONITOR_READ_TIMEOUT = 1, - MONITOR_WRITE_TIMEOUT = 2 + MONITOR_WRITE_TIMEOUT = 2, + MONITOR_CONNECT_ATTEMPTS = 3 } monitor_timeouts_t; MXS_MONITOR *monitor_alloc(char *, char *); diff --git a/server/core/monitor.cc b/server/core/monitor.cc index 18dc52086..47ac4c488 100644 --- a/server/core/monitor.cc +++ b/server/core/monitor.cc @@ -103,6 +103,7 @@ monitor_alloc(char *name, char *module) mon->read_timeout = DEFAULT_READ_TIMEOUT; mon->write_timeout = DEFAULT_WRITE_TIMEOUT; mon->connect_timeout = DEFAULT_CONNECT_TIMEOUT; + mon->connect_attempts = DEFAULT_CONNECTION_ATTEMPTS; mon->interval = MONITOR_DEFAULT_INTERVAL; mon->parameters = NULL; mon->created_online = false; @@ -461,6 +462,7 @@ monitorShow(DCB *dcb, MXS_MONITOR *monitor) dcb_printf(dcb, "Connect Timeout: %i seconds\n", monitor->connect_timeout); dcb_printf(dcb, "Read Timeout: %i seconds\n", monitor->read_timeout); dcb_printf(dcb, "Write Timeout: %i seconds\n", monitor->write_timeout); + dcb_printf(dcb, "Connect attempts: %i \n", monitor->connect_attempts); dcb_printf(dcb, "Monitored servers: "); const char *sep = ""; @@ -582,6 +584,10 @@ monitorSetNetworkTimeout(MXS_MONITOR *mon, int type, int value) mon->write_timeout = value; break; + case MONITOR_CONNECT_ATTEMPTS: + mon->connect_attempts = value; + break; + default: MXS_ERROR("Monitor setNetworkTimeout received an unsupported action type %i", type); rval = false; @@ -683,7 +689,7 @@ bool check_monitor_permissions(MXS_MONITOR* monitor, const char* query) for (MXS_MONITOR_SERVERS *mondb = monitor->databases; mondb; mondb = mondb->next) { - if (mon_connect_to_db(monitor, mondb) != MONITOR_CONN_OK) + if (mon_ping_or_connect_to_db(monitor, mondb) != MONITOR_CONN_OK) { MXS_ERROR("[%s] Failed to connect to server '%s' (%s:%d) when" " checking monitor user credentials and permissions: %s", @@ -1145,22 +1151,21 @@ monitor_launch_script(MXS_MONITOR* mon, MXS_MONITOR_SERVERS* ptr, const char* sc } /** - * Connect to a database. This will always leave a valid database handle in the - * database->con pointer. This allows the user to call MySQL C API functions to - * find out the reason of the failure. + * Ping or, if connection does not exist or ping fails, connect to a database. This + * will always leave a valid database handle in the database->con pointer, allowing + * the user to call MySQL C API functions to find out the reason of the failure. + * * @param mon Monitor * @param database Monitored database - * @return MONITOR_CONN_OK if the connection is OK else the reason for the failure + * @return MONITOR_CONN_OK if the connection is OK, else the reason for the failure */ mxs_connect_result_t -mon_connect_to_db(MXS_MONITOR* mon, MXS_MONITOR_SERVERS *database) +mon_ping_or_connect_to_db(MXS_MONITOR* mon, MXS_MONITOR_SERVERS *database) { - mxs_connect_result_t rval = MONITOR_CONN_OK; - /** Return if the connection is OK */ if (database->con && mysql_ping(database->con) == 0) { - return rval; + return MONITOR_CONN_OK; } if (database->con) @@ -1168,6 +1173,7 @@ mon_connect_to_db(MXS_MONITOR* mon, MXS_MONITOR_SERVERS *database) mysql_close(database->con); } + mxs_connect_result_t rval = MONITOR_CONN_REFUSED; if ((database->con = mysql_init(NULL))) { char *uname = mon->user; @@ -1185,28 +1191,29 @@ mon_connect_to_db(MXS_MONITOR* mon, MXS_MONITOR_SERVERS *database) mysql_optionsv(database->con, MYSQL_OPT_READ_TIMEOUT, (void *) &mon->read_timeout); mysql_optionsv(database->con, MYSQL_OPT_WRITE_TIMEOUT, (void *) &mon->write_timeout); mysql_optionsv(database->con, MYSQL_PLUGIN_DIR, get_connector_plugindir()); - time_t start = time(NULL); - bool result = (mxs_mysql_real_connect(database->con, database->server, uname, dpwd) != NULL); - time_t end = time(NULL); - if (!result) + time_t start = 0; + time_t end = 0; + for (int i = 0; i < mon->connect_attempts; i++) { - if ((int) difftime(end, start) >= mon->connect_timeout) + start = time(NULL); + bool result = (mxs_mysql_real_connect(database->con, database->server, uname, dpwd) != NULL); + end = time(NULL); + + if (result) { - rval = MONITOR_CONN_TIMEOUT; - } - else - { - rval = MONITOR_CONN_REFUSED; + rval = MONITOR_CONN_OK; + break; } } + if (rval == MONITOR_CONN_REFUSED && + (int)difftime(end, start) >= mon->connect_timeout) + { + rval = MONITOR_CONN_TIMEOUT; + } MXS_FREE(dpwd); } - else - { - rval = MONITOR_CONN_REFUSED; - } return rval; } @@ -1337,6 +1344,8 @@ static bool create_monitor_config(const MXS_MONITOR *monitor, const char *filena dprintf(file, "backend_connect_timeout=%d\n", monitor->connect_timeout); dprintf(file, "backend_write_timeout=%d\n", monitor->write_timeout); dprintf(file, "backend_read_timeout=%d\n", monitor->read_timeout); + dprintf(file, "%s=%d\n", BACKEND_CONNECT_ATTEMPTS, monitor->connect_attempts); + close(file); return true; diff --git a/server/modules/monitor/auroramon/auroramon.c b/server/modules/monitor/auroramon/auroramon.c index c2bd49454..36d3f3ed4 100644 --- a/server/modules/monitor/auroramon/auroramon.c +++ b/server/modules/monitor/auroramon/auroramon.c @@ -51,7 +51,7 @@ void update_server_status(MXS_MONITOR *monitor, MXS_MONITOR_SERVERS *database) database->mon_prev_status = database->server->status; /** Try to connect to or ping the database */ - mxs_connect_result_t rval = mon_connect_to_db(monitor, database); + mxs_connect_result_t rval = mon_ping_or_connect_to_db(monitor, database); if (rval == MONITOR_CONN_OK) { diff --git a/server/modules/monitor/galeramon/galeramon.c b/server/modules/monitor/galeramon/galeramon.c index 2a8153c10..4b520ee94 100644 --- a/server/modules/monitor/galeramon/galeramon.c +++ b/server/modules/monitor/galeramon/galeramon.c @@ -268,7 +268,7 @@ monitorDatabase(MXS_MONITOR *mon, MXS_MONITOR_SERVERS *database) /** Store previous status */ database->mon_prev_status = database->server->status; - mxs_connect_result_t rval = mon_connect_to_db(mon, database); + mxs_connect_result_t rval = mon_ping_or_connect_to_db(mon, database); if (rval != MONITOR_CONN_OK) { if (mysql_errno(database->con) == ER_ACCESS_DENIED_ERROR) diff --git a/server/modules/monitor/mmmon/mmmon.c b/server/modules/monitor/mmmon/mmmon.c index 6f4a6b3bc..2f441fc0d 100644 --- a/server/modules/monitor/mmmon/mmmon.c +++ b/server/modules/monitor/mmmon/mmmon.c @@ -210,7 +210,7 @@ monitorDatabase(MXS_MONITOR* mon, MXS_MONITOR_SERVERS *database) /** Store previous status */ database->mon_prev_status = database->server->status; - mxs_connect_result_t rval = mon_connect_to_db(mon, database); + mxs_connect_result_t rval = mon_ping_or_connect_to_db(mon, database); if (rval != MONITOR_CONN_OK) { diff --git a/server/modules/monitor/mysqlmon/mysql_mon.c b/server/modules/monitor/mysqlmon/mysql_mon.c index f82ed0eef..1db962afb 100644 --- a/server/modules/monitor/mysqlmon/mysql_mon.c +++ b/server/modules/monitor/mysqlmon/mysql_mon.c @@ -625,54 +625,52 @@ monitorDatabase(MXS_MONITOR *mon, MXS_MONITOR_SERVERS *database) /** Store previous status */ database->mon_prev_status = database->server->status; - if (database->con == NULL || mysql_ping(database->con) != 0) + mxs_connect_result_t rval = mon_ping_or_connect_to_db(mon, database); + if (rval == MONITOR_CONN_OK) { - mxs_connect_result_t rval; - if ((rval = mon_connect_to_db(mon, database)) == MONITOR_CONN_OK) - { - server_clear_status_nolock(database->server, SERVER_AUTH_ERROR); - monitor_clear_pending_status(database, SERVER_AUTH_ERROR); - } - else - { - /* The current server is not running - * - * Store server NOT running in server and monitor server pending struct - * - */ - if (mysql_errno(database->con) == ER_ACCESS_DENIED_ERROR) - { - server_set_status_nolock(database->server, SERVER_AUTH_ERROR); - monitor_set_pending_status(database, SERVER_AUTH_ERROR); - } - server_clear_status_nolock(database->server, SERVER_RUNNING); - monitor_clear_pending_status(database, SERVER_RUNNING); - - /* Also clear M/S state in both server and monitor server pending struct */ - server_clear_status_nolock(database->server, SERVER_SLAVE); - server_clear_status_nolock(database->server, SERVER_MASTER); - server_clear_status_nolock(database->server, SERVER_RELAY_MASTER); - monitor_clear_pending_status(database, SERVER_SLAVE); - monitor_clear_pending_status(database, SERVER_MASTER); - monitor_clear_pending_status(database, SERVER_RELAY_MASTER); - - /* Clean addition status too */ - server_clear_status_nolock(database->server, SERVER_SLAVE_OF_EXTERNAL_MASTER); - server_clear_status_nolock(database->server, SERVER_STALE_STATUS); - server_clear_status_nolock(database->server, SERVER_STALE_SLAVE); - monitor_clear_pending_status(database, SERVER_SLAVE_OF_EXTERNAL_MASTER); - monitor_clear_pending_status(database, SERVER_STALE_STATUS); - monitor_clear_pending_status(database, SERVER_STALE_SLAVE); - - /* Log connect failure only once */ - if (mon_status_changed(database) && mon_print_fail_status(database)) - { - mon_log_connect_error(database, rval); - } - - return; - } + server_clear_status_nolock(database->server, SERVER_AUTH_ERROR); + monitor_clear_pending_status(database, SERVER_AUTH_ERROR); } + else + { + /* The current server is not running + * + * Store server NOT running in server and monitor server pending struct + * + */ + if (mysql_errno(database->con) == ER_ACCESS_DENIED_ERROR) + { + server_set_status_nolock(database->server, SERVER_AUTH_ERROR); + monitor_set_pending_status(database, SERVER_AUTH_ERROR); + } + server_clear_status_nolock(database->server, SERVER_RUNNING); + monitor_clear_pending_status(database, SERVER_RUNNING); + + /* Also clear M/S state in both server and monitor server pending struct */ + server_clear_status_nolock(database->server, SERVER_SLAVE); + server_clear_status_nolock(database->server, SERVER_MASTER); + server_clear_status_nolock(database->server, SERVER_RELAY_MASTER); + monitor_clear_pending_status(database, SERVER_SLAVE); + monitor_clear_pending_status(database, SERVER_MASTER); + monitor_clear_pending_status(database, SERVER_RELAY_MASTER); + + /* Clean addition status too */ + server_clear_status_nolock(database->server, SERVER_SLAVE_OF_EXTERNAL_MASTER); + server_clear_status_nolock(database->server, SERVER_STALE_STATUS); + server_clear_status_nolock(database->server, SERVER_STALE_SLAVE); + monitor_clear_pending_status(database, SERVER_SLAVE_OF_EXTERNAL_MASTER); + monitor_clear_pending_status(database, SERVER_STALE_STATUS); + monitor_clear_pending_status(database, SERVER_STALE_SLAVE); + + /* Log connect failure only once */ + if (mon_status_changed(database) && mon_print_fail_status(database)) + { + mon_log_connect_error(database, rval); + } + + return; + } + /* Store current status in both server and monitor server pending struct */ server_set_status_nolock(database->server, SERVER_RUNNING); monitor_set_pending_status(database, SERVER_RUNNING); @@ -2061,7 +2059,7 @@ void check_maxscale_schema_replication(MXS_MONITOR *monitor) while (database) { - mxs_connect_result_t rval = mon_connect_to_db(monitor, database); + mxs_connect_result_t rval = mon_ping_or_connect_to_db(monitor, database); if (rval == MONITOR_CONN_OK) { if (!check_replicate_ignore_table(database) || diff --git a/server/modules/monitor/ndbclustermon/ndbclustermon.c b/server/modules/monitor/ndbclustermon/ndbclustermon.c index d9c547f1f..8d9edb411 100644 --- a/server/modules/monitor/ndbclustermon/ndbclustermon.c +++ b/server/modules/monitor/ndbclustermon/ndbclustermon.c @@ -194,7 +194,7 @@ monitorDatabase(MXS_MONITOR_SERVERS *database, char *defaultUser, char *defaultP return; } - mxs_connect_result_t rval = mon_connect_to_db(mon, database); + mxs_connect_result_t rval = mon_ping_or_connect_to_db(mon, database); if (rval != MONITOR_CONN_OK) { server_clear_status_nolock(database->server, SERVER_RUNNING);