Add parameter backend_connect_attempts to monitor

This number (defaults to 1) sets how many times mon_connect_to_db
will try to connect to a backend before returning an error. Every
connection attempt may take backend_connect_timeout seconds to
complete.

Also refactored code a bit. Renamed mon_connect_to_db to
mon_ping_or_connect_to_db, since it does not connect if the connection
is already alive.
This commit is contained in:
Esa Korhonen
2017-03-29 17:59:02 +03:00
parent 657d79470c
commit a362bd0024
11 changed files with 108 additions and 76 deletions

View File

@ -301,4 +301,6 @@ void config_disable_feedback_task(void);
*/ */
bool config_reload(void); bool config_reload(void);
static const char BACKEND_CONNECT_ATTEMPTS[] = "backend_connect_attempts";
MXS_END_DECLS MXS_END_DECLS

View File

@ -154,6 +154,7 @@ struct mxs_monitor
MXS_MONITOR_SERVERS* databases; /*< List of databases the monitor monitors */ MXS_MONITOR_SERVERS* databases; /*< List of databases the monitor monitors */
monitor_state_t state; /**< The state of the monitor */ monitor_state_t state; /**< The state of the monitor */
int connect_timeout; /**< Connect timeout in seconds for mysql_real_connect */ int connect_timeout; /**< Connect timeout in seconds for mysql_real_connect */
int connect_attempts; /**< How many times a connection is attempted */
int read_timeout; /**< Timeout in seconds to read from the server. int read_timeout; /**< Timeout in seconds to read from the server.
* There are retries and the total effective timeout * There are retries and the total effective timeout
* value is three times the option value. * value is three times the option value.
@ -216,7 +217,7 @@ void servers_status_current_to_pending(MXS_MONITOR *monitor);
bool mon_status_changed(MXS_MONITOR_SERVERS* mon_srv); bool mon_status_changed(MXS_MONITOR_SERVERS* mon_srv);
bool mon_print_fail_status(MXS_MONITOR_SERVERS* mon_srv); bool mon_print_fail_status(MXS_MONITOR_SERVERS* mon_srv);
mxs_connect_result_t mon_connect_to_db(MXS_MONITOR* mon, MXS_MONITOR_SERVERS *database); mxs_connect_result_t mon_ping_or_connect_to_db(MXS_MONITOR* mon, MXS_MONITOR_SERVERS *database);
void mon_log_connect_error(MXS_MONITOR_SERVERS* database, mxs_connect_result_t rval); void mon_log_connect_error(MXS_MONITOR_SERVERS* database, mxs_connect_result_t rval);
void lock_monitor_servers(MXS_MONITOR *monitor); void lock_monitor_servers(MXS_MONITOR *monitor);

View File

@ -173,6 +173,7 @@ static const char *monitor_params[] =
"backend_connect_timeout", "backend_connect_timeout",
"backend_read_timeout", "backend_read_timeout",
"backend_write_timeout", "backend_write_timeout",
BACKEND_CONNECT_ATTEMPTS,
NULL NULL
}; };
@ -3018,6 +3019,16 @@ int create_new_monitor(CONFIG_CONTEXT *context, CONFIG_CONTEXT *obj, HASHTABLE*
} }
} }
char *connect_attempts = config_get_value(obj->parameters, BACKEND_CONNECT_ATTEMPTS);
if (connect_attempts)
{
if (!monitorSetNetworkTimeout(monitor, MONITOR_CONNECT_ATTEMPTS, atoi(connect_attempts)))
{
MXS_ERROR("Failed to set '%s'.", BACKEND_CONNECT_ATTEMPTS);
error_count++;
}
}
if (servers) if (servers)
{ {
/* get the servers to monitor */ /* get the servers to monitor */

View File

@ -416,6 +416,15 @@ bool runtime_alter_monitor(MXS_MONITOR *monitor, char *key, char *value)
monitorSetNetworkTimeout(monitor, MONITOR_READ_TIMEOUT, ival); monitorSetNetworkTimeout(monitor, MONITOR_READ_TIMEOUT, ival);
} }
} }
else if (strcmp(key, BACKEND_CONNECT_ATTEMPTS) == 0)
{
long ival = get_positive_int(value);
if (ival)
{
valid = true;
monitorSetNetworkTimeout(monitor, MONITOR_CONNECT_ATTEMPTS, ival);
}
}
else else
{ {
/** We're modifying module specific parameters and we need to stop the monitor */ /** We're modifying module specific parameters and we need to stop the monitor */

View File

@ -25,6 +25,7 @@ MXS_BEGIN_DECLS
#define DEFAULT_CONNECT_TIMEOUT 3 #define DEFAULT_CONNECT_TIMEOUT 3
#define DEFAULT_READ_TIMEOUT 1 #define DEFAULT_READ_TIMEOUT 1
#define DEFAULT_WRITE_TIMEOUT 2 #define DEFAULT_WRITE_TIMEOUT 2
#define DEFAULT_CONNECTION_ATTEMPTS 1
#define MONITOR_DEFAULT_INTERVAL 10000 // in milliseconds #define MONITOR_DEFAULT_INTERVAL 10000 // in milliseconds
@ -35,7 +36,8 @@ typedef enum
{ {
MONITOR_CONNECT_TIMEOUT = 0, MONITOR_CONNECT_TIMEOUT = 0,
MONITOR_READ_TIMEOUT = 1, MONITOR_READ_TIMEOUT = 1,
MONITOR_WRITE_TIMEOUT = 2 MONITOR_WRITE_TIMEOUT = 2,
MONITOR_CONNECT_ATTEMPTS = 3
} monitor_timeouts_t; } monitor_timeouts_t;
MXS_MONITOR *monitor_alloc(char *, char *); MXS_MONITOR *monitor_alloc(char *, char *);

View File

@ -103,6 +103,7 @@ monitor_alloc(char *name, char *module)
mon->read_timeout = DEFAULT_READ_TIMEOUT; mon->read_timeout = DEFAULT_READ_TIMEOUT;
mon->write_timeout = DEFAULT_WRITE_TIMEOUT; mon->write_timeout = DEFAULT_WRITE_TIMEOUT;
mon->connect_timeout = DEFAULT_CONNECT_TIMEOUT; mon->connect_timeout = DEFAULT_CONNECT_TIMEOUT;
mon->connect_attempts = DEFAULT_CONNECTION_ATTEMPTS;
mon->interval = MONITOR_DEFAULT_INTERVAL; mon->interval = MONITOR_DEFAULT_INTERVAL;
mon->parameters = NULL; mon->parameters = NULL;
mon->created_online = false; mon->created_online = false;
@ -461,6 +462,7 @@ monitorShow(DCB *dcb, MXS_MONITOR *monitor)
dcb_printf(dcb, "Connect Timeout: %i seconds\n", monitor->connect_timeout); dcb_printf(dcb, "Connect Timeout: %i seconds\n", monitor->connect_timeout);
dcb_printf(dcb, "Read Timeout: %i seconds\n", monitor->read_timeout); dcb_printf(dcb, "Read Timeout: %i seconds\n", monitor->read_timeout);
dcb_printf(dcb, "Write Timeout: %i seconds\n", monitor->write_timeout); dcb_printf(dcb, "Write Timeout: %i seconds\n", monitor->write_timeout);
dcb_printf(dcb, "Connect attempts: %i \n", monitor->connect_attempts);
dcb_printf(dcb, "Monitored servers: "); dcb_printf(dcb, "Monitored servers: ");
const char *sep = ""; const char *sep = "";
@ -582,6 +584,10 @@ monitorSetNetworkTimeout(MXS_MONITOR *mon, int type, int value)
mon->write_timeout = value; mon->write_timeout = value;
break; break;
case MONITOR_CONNECT_ATTEMPTS:
mon->connect_attempts = value;
break;
default: default:
MXS_ERROR("Monitor setNetworkTimeout received an unsupported action type %i", type); MXS_ERROR("Monitor setNetworkTimeout received an unsupported action type %i", type);
rval = false; rval = false;
@ -683,7 +689,7 @@ bool check_monitor_permissions(MXS_MONITOR* monitor, const char* query)
for (MXS_MONITOR_SERVERS *mondb = monitor->databases; mondb; mondb = mondb->next) for (MXS_MONITOR_SERVERS *mondb = monitor->databases; mondb; mondb = mondb->next)
{ {
if (mon_connect_to_db(monitor, mondb) != MONITOR_CONN_OK) if (mon_ping_or_connect_to_db(monitor, mondb) != MONITOR_CONN_OK)
{ {
MXS_ERROR("[%s] Failed to connect to server '%s' (%s:%d) when" MXS_ERROR("[%s] Failed to connect to server '%s' (%s:%d) when"
" checking monitor user credentials and permissions: %s", " checking monitor user credentials and permissions: %s",
@ -1145,22 +1151,21 @@ monitor_launch_script(MXS_MONITOR* mon, MXS_MONITOR_SERVERS* ptr, const char* sc
} }
/** /**
* Connect to a database. This will always leave a valid database handle in the * Ping or, if connection does not exist or ping fails, connect to a database. This
* database->con pointer. This allows the user to call MySQL C API functions to * will always leave a valid database handle in the database->con pointer, allowing
* find out the reason of the failure. * the user to call MySQL C API functions to find out the reason of the failure.
*
* @param mon Monitor * @param mon Monitor
* @param database Monitored database * @param database Monitored database
* @return MONITOR_CONN_OK if the connection is OK else the reason for the failure * @return MONITOR_CONN_OK if the connection is OK, else the reason for the failure
*/ */
mxs_connect_result_t mxs_connect_result_t
mon_connect_to_db(MXS_MONITOR* mon, MXS_MONITOR_SERVERS *database) mon_ping_or_connect_to_db(MXS_MONITOR* mon, MXS_MONITOR_SERVERS *database)
{ {
mxs_connect_result_t rval = MONITOR_CONN_OK;
/** Return if the connection is OK */ /** Return if the connection is OK */
if (database->con && mysql_ping(database->con) == 0) if (database->con && mysql_ping(database->con) == 0)
{ {
return rval; return MONITOR_CONN_OK;
} }
if (database->con) if (database->con)
@ -1168,6 +1173,7 @@ mon_connect_to_db(MXS_MONITOR* mon, MXS_MONITOR_SERVERS *database)
mysql_close(database->con); mysql_close(database->con);
} }
mxs_connect_result_t rval = MONITOR_CONN_REFUSED;
if ((database->con = mysql_init(NULL))) if ((database->con = mysql_init(NULL)))
{ {
char *uname = mon->user; char *uname = mon->user;
@ -1185,28 +1191,29 @@ mon_connect_to_db(MXS_MONITOR* mon, MXS_MONITOR_SERVERS *database)
mysql_optionsv(database->con, MYSQL_OPT_READ_TIMEOUT, (void *) &mon->read_timeout); mysql_optionsv(database->con, MYSQL_OPT_READ_TIMEOUT, (void *) &mon->read_timeout);
mysql_optionsv(database->con, MYSQL_OPT_WRITE_TIMEOUT, (void *) &mon->write_timeout); mysql_optionsv(database->con, MYSQL_OPT_WRITE_TIMEOUT, (void *) &mon->write_timeout);
mysql_optionsv(database->con, MYSQL_PLUGIN_DIR, get_connector_plugindir()); mysql_optionsv(database->con, MYSQL_PLUGIN_DIR, get_connector_plugindir());
time_t start = time(NULL);
bool result = (mxs_mysql_real_connect(database->con, database->server, uname, dpwd) != NULL);
time_t end = time(NULL);
if (!result) time_t start = 0;
time_t end = 0;
for (int i = 0; i < mon->connect_attempts; i++)
{ {
if ((int) difftime(end, start) >= mon->connect_timeout) start = time(NULL);
bool result = (mxs_mysql_real_connect(database->con, database->server, uname, dpwd) != NULL);
end = time(NULL);
if (result)
{ {
rval = MONITOR_CONN_TIMEOUT; rval = MONITOR_CONN_OK;
} break;
else
{
rval = MONITOR_CONN_REFUSED;
} }
} }
if (rval == MONITOR_CONN_REFUSED &&
(int)difftime(end, start) >= mon->connect_timeout)
{
rval = MONITOR_CONN_TIMEOUT;
}
MXS_FREE(dpwd); MXS_FREE(dpwd);
} }
else
{
rval = MONITOR_CONN_REFUSED;
}
return rval; return rval;
} }
@ -1337,6 +1344,8 @@ static bool create_monitor_config(const MXS_MONITOR *monitor, const char *filena
dprintf(file, "backend_connect_timeout=%d\n", monitor->connect_timeout); dprintf(file, "backend_connect_timeout=%d\n", monitor->connect_timeout);
dprintf(file, "backend_write_timeout=%d\n", monitor->write_timeout); dprintf(file, "backend_write_timeout=%d\n", monitor->write_timeout);
dprintf(file, "backend_read_timeout=%d\n", monitor->read_timeout); dprintf(file, "backend_read_timeout=%d\n", monitor->read_timeout);
dprintf(file, "%s=%d\n", BACKEND_CONNECT_ATTEMPTS, monitor->connect_attempts);
close(file); close(file);
return true; return true;

View File

@ -51,7 +51,7 @@ void update_server_status(MXS_MONITOR *monitor, MXS_MONITOR_SERVERS *database)
database->mon_prev_status = database->server->status; database->mon_prev_status = database->server->status;
/** Try to connect to or ping the database */ /** Try to connect to or ping the database */
mxs_connect_result_t rval = mon_connect_to_db(monitor, database); mxs_connect_result_t rval = mon_ping_or_connect_to_db(monitor, database);
if (rval == MONITOR_CONN_OK) if (rval == MONITOR_CONN_OK)
{ {

View File

@ -268,7 +268,7 @@ monitorDatabase(MXS_MONITOR *mon, MXS_MONITOR_SERVERS *database)
/** Store previous status */ /** Store previous status */
database->mon_prev_status = database->server->status; database->mon_prev_status = database->server->status;
mxs_connect_result_t rval = mon_connect_to_db(mon, database); mxs_connect_result_t rval = mon_ping_or_connect_to_db(mon, database);
if (rval != MONITOR_CONN_OK) if (rval != MONITOR_CONN_OK)
{ {
if (mysql_errno(database->con) == ER_ACCESS_DENIED_ERROR) if (mysql_errno(database->con) == ER_ACCESS_DENIED_ERROR)

View File

@ -210,7 +210,7 @@ monitorDatabase(MXS_MONITOR* mon, MXS_MONITOR_SERVERS *database)
/** Store previous status */ /** Store previous status */
database->mon_prev_status = database->server->status; database->mon_prev_status = database->server->status;
mxs_connect_result_t rval = mon_connect_to_db(mon, database); mxs_connect_result_t rval = mon_ping_or_connect_to_db(mon, database);
if (rval != MONITOR_CONN_OK) if (rval != MONITOR_CONN_OK)
{ {

View File

@ -625,54 +625,52 @@ monitorDatabase(MXS_MONITOR *mon, MXS_MONITOR_SERVERS *database)
/** Store previous status */ /** Store previous status */
database->mon_prev_status = database->server->status; database->mon_prev_status = database->server->status;
if (database->con == NULL || mysql_ping(database->con) != 0) mxs_connect_result_t rval = mon_ping_or_connect_to_db(mon, database);
if (rval == MONITOR_CONN_OK)
{ {
mxs_connect_result_t rval; server_clear_status_nolock(database->server, SERVER_AUTH_ERROR);
if ((rval = mon_connect_to_db(mon, database)) == MONITOR_CONN_OK) monitor_clear_pending_status(database, SERVER_AUTH_ERROR);
{
server_clear_status_nolock(database->server, SERVER_AUTH_ERROR);
monitor_clear_pending_status(database, SERVER_AUTH_ERROR);
}
else
{
/* The current server is not running
*
* Store server NOT running in server and monitor server pending struct
*
*/
if (mysql_errno(database->con) == ER_ACCESS_DENIED_ERROR)
{
server_set_status_nolock(database->server, SERVER_AUTH_ERROR);
monitor_set_pending_status(database, SERVER_AUTH_ERROR);
}
server_clear_status_nolock(database->server, SERVER_RUNNING);
monitor_clear_pending_status(database, SERVER_RUNNING);
/* Also clear M/S state in both server and monitor server pending struct */
server_clear_status_nolock(database->server, SERVER_SLAVE);
server_clear_status_nolock(database->server, SERVER_MASTER);
server_clear_status_nolock(database->server, SERVER_RELAY_MASTER);
monitor_clear_pending_status(database, SERVER_SLAVE);
monitor_clear_pending_status(database, SERVER_MASTER);
monitor_clear_pending_status(database, SERVER_RELAY_MASTER);
/* Clean addition status too */
server_clear_status_nolock(database->server, SERVER_SLAVE_OF_EXTERNAL_MASTER);
server_clear_status_nolock(database->server, SERVER_STALE_STATUS);
server_clear_status_nolock(database->server, SERVER_STALE_SLAVE);
monitor_clear_pending_status(database, SERVER_SLAVE_OF_EXTERNAL_MASTER);
monitor_clear_pending_status(database, SERVER_STALE_STATUS);
monitor_clear_pending_status(database, SERVER_STALE_SLAVE);
/* Log connect failure only once */
if (mon_status_changed(database) && mon_print_fail_status(database))
{
mon_log_connect_error(database, rval);
}
return;
}
} }
else
{
/* The current server is not running
*
* Store server NOT running in server and monitor server pending struct
*
*/
if (mysql_errno(database->con) == ER_ACCESS_DENIED_ERROR)
{
server_set_status_nolock(database->server, SERVER_AUTH_ERROR);
monitor_set_pending_status(database, SERVER_AUTH_ERROR);
}
server_clear_status_nolock(database->server, SERVER_RUNNING);
monitor_clear_pending_status(database, SERVER_RUNNING);
/* Also clear M/S state in both server and monitor server pending struct */
server_clear_status_nolock(database->server, SERVER_SLAVE);
server_clear_status_nolock(database->server, SERVER_MASTER);
server_clear_status_nolock(database->server, SERVER_RELAY_MASTER);
monitor_clear_pending_status(database, SERVER_SLAVE);
monitor_clear_pending_status(database, SERVER_MASTER);
monitor_clear_pending_status(database, SERVER_RELAY_MASTER);
/* Clean addition status too */
server_clear_status_nolock(database->server, SERVER_SLAVE_OF_EXTERNAL_MASTER);
server_clear_status_nolock(database->server, SERVER_STALE_STATUS);
server_clear_status_nolock(database->server, SERVER_STALE_SLAVE);
monitor_clear_pending_status(database, SERVER_SLAVE_OF_EXTERNAL_MASTER);
monitor_clear_pending_status(database, SERVER_STALE_STATUS);
monitor_clear_pending_status(database, SERVER_STALE_SLAVE);
/* Log connect failure only once */
if (mon_status_changed(database) && mon_print_fail_status(database))
{
mon_log_connect_error(database, rval);
}
return;
}
/* Store current status in both server and monitor server pending struct */ /* Store current status in both server and monitor server pending struct */
server_set_status_nolock(database->server, SERVER_RUNNING); server_set_status_nolock(database->server, SERVER_RUNNING);
monitor_set_pending_status(database, SERVER_RUNNING); monitor_set_pending_status(database, SERVER_RUNNING);
@ -2061,7 +2059,7 @@ void check_maxscale_schema_replication(MXS_MONITOR *monitor)
while (database) while (database)
{ {
mxs_connect_result_t rval = mon_connect_to_db(monitor, database); mxs_connect_result_t rval = mon_ping_or_connect_to_db(monitor, database);
if (rval == MONITOR_CONN_OK) if (rval == MONITOR_CONN_OK)
{ {
if (!check_replicate_ignore_table(database) || if (!check_replicate_ignore_table(database) ||

View File

@ -194,7 +194,7 @@ monitorDatabase(MXS_MONITOR_SERVERS *database, char *defaultUser, char *defaultP
return; return;
} }
mxs_connect_result_t rval = mon_connect_to_db(mon, database); mxs_connect_result_t rval = mon_ping_or_connect_to_db(mon, database);
if (rval != MONITOR_CONN_OK) if (rval != MONITOR_CONN_OK)
{ {
server_clear_status_nolock(database->server, SERVER_RUNNING); server_clear_status_nolock(database->server, SERVER_RUNNING);