Add parameter backend_connect_attempts to monitor

This number (defaults to 1) sets how many times mon_connect_to_db
will try to connect to a backend before returning an error. Every
connection attempt may take backend_connect_timeout seconds to
complete.

Also refactored code a bit. Renamed mon_connect_to_db to
mon_ping_or_connect_to_db, since it does not connect if the connection
is already alive.
This commit is contained in:
Esa Korhonen 2017-03-29 17:59:02 +03:00
parent 657d79470c
commit a362bd0024
11 changed files with 108 additions and 76 deletions

View File

@ -301,4 +301,6 @@ void config_disable_feedback_task(void);
*/
bool config_reload(void);
static const char BACKEND_CONNECT_ATTEMPTS[] = "backend_connect_attempts";
MXS_END_DECLS

View File

@ -154,6 +154,7 @@ struct mxs_monitor
MXS_MONITOR_SERVERS* databases; /*< List of databases the monitor monitors */
monitor_state_t state; /**< The state of the monitor */
int connect_timeout; /**< Connect timeout in seconds for mysql_real_connect */
int connect_attempts; /**< How many times a connection is attempted */
int read_timeout; /**< Timeout in seconds to read from the server.
* There are retries and the total effective timeout
* value is three times the option value.
@ -216,7 +217,7 @@ void servers_status_current_to_pending(MXS_MONITOR *monitor);
bool mon_status_changed(MXS_MONITOR_SERVERS* mon_srv);
bool mon_print_fail_status(MXS_MONITOR_SERVERS* mon_srv);
mxs_connect_result_t mon_connect_to_db(MXS_MONITOR* mon, MXS_MONITOR_SERVERS *database);
mxs_connect_result_t mon_ping_or_connect_to_db(MXS_MONITOR* mon, MXS_MONITOR_SERVERS *database);
void mon_log_connect_error(MXS_MONITOR_SERVERS* database, mxs_connect_result_t rval);
void lock_monitor_servers(MXS_MONITOR *monitor);

View File

@ -173,6 +173,7 @@ static const char *monitor_params[] =
"backend_connect_timeout",
"backend_read_timeout",
"backend_write_timeout",
BACKEND_CONNECT_ATTEMPTS,
NULL
};
@ -3018,6 +3019,16 @@ int create_new_monitor(CONFIG_CONTEXT *context, CONFIG_CONTEXT *obj, HASHTABLE*
}
}
char *connect_attempts = config_get_value(obj->parameters, BACKEND_CONNECT_ATTEMPTS);
if (connect_attempts)
{
if (!monitorSetNetworkTimeout(monitor, MONITOR_CONNECT_ATTEMPTS, atoi(connect_attempts)))
{
MXS_ERROR("Failed to set '%s'.", BACKEND_CONNECT_ATTEMPTS);
error_count++;
}
}
if (servers)
{
/* get the servers to monitor */

View File

@ -416,6 +416,15 @@ bool runtime_alter_monitor(MXS_MONITOR *monitor, char *key, char *value)
monitorSetNetworkTimeout(monitor, MONITOR_READ_TIMEOUT, ival);
}
}
else if (strcmp(key, BACKEND_CONNECT_ATTEMPTS) == 0)
{
long ival = get_positive_int(value);
if (ival)
{
valid = true;
monitorSetNetworkTimeout(monitor, MONITOR_CONNECT_ATTEMPTS, ival);
}
}
else
{
/** We're modifying module specific parameters and we need to stop the monitor */

View File

@ -25,6 +25,7 @@ MXS_BEGIN_DECLS
#define DEFAULT_CONNECT_TIMEOUT 3
#define DEFAULT_READ_TIMEOUT 1
#define DEFAULT_WRITE_TIMEOUT 2
#define DEFAULT_CONNECTION_ATTEMPTS 1
#define MONITOR_DEFAULT_INTERVAL 10000 // in milliseconds
@ -35,7 +36,8 @@ typedef enum
{
MONITOR_CONNECT_TIMEOUT = 0,
MONITOR_READ_TIMEOUT = 1,
MONITOR_WRITE_TIMEOUT = 2
MONITOR_WRITE_TIMEOUT = 2,
MONITOR_CONNECT_ATTEMPTS = 3
} monitor_timeouts_t;
MXS_MONITOR *monitor_alloc(char *, char *);

View File

@ -103,6 +103,7 @@ monitor_alloc(char *name, char *module)
mon->read_timeout = DEFAULT_READ_TIMEOUT;
mon->write_timeout = DEFAULT_WRITE_TIMEOUT;
mon->connect_timeout = DEFAULT_CONNECT_TIMEOUT;
mon->connect_attempts = DEFAULT_CONNECTION_ATTEMPTS;
mon->interval = MONITOR_DEFAULT_INTERVAL;
mon->parameters = NULL;
mon->created_online = false;
@ -461,6 +462,7 @@ monitorShow(DCB *dcb, MXS_MONITOR *monitor)
dcb_printf(dcb, "Connect Timeout: %i seconds\n", monitor->connect_timeout);
dcb_printf(dcb, "Read Timeout: %i seconds\n", monitor->read_timeout);
dcb_printf(dcb, "Write Timeout: %i seconds\n", monitor->write_timeout);
dcb_printf(dcb, "Connect attempts: %i \n", monitor->connect_attempts);
dcb_printf(dcb, "Monitored servers: ");
const char *sep = "";
@ -582,6 +584,10 @@ monitorSetNetworkTimeout(MXS_MONITOR *mon, int type, int value)
mon->write_timeout = value;
break;
case MONITOR_CONNECT_ATTEMPTS:
mon->connect_attempts = value;
break;
default:
MXS_ERROR("Monitor setNetworkTimeout received an unsupported action type %i", type);
rval = false;
@ -683,7 +689,7 @@ bool check_monitor_permissions(MXS_MONITOR* monitor, const char* query)
for (MXS_MONITOR_SERVERS *mondb = monitor->databases; mondb; mondb = mondb->next)
{
if (mon_connect_to_db(monitor, mondb) != MONITOR_CONN_OK)
if (mon_ping_or_connect_to_db(monitor, mondb) != MONITOR_CONN_OK)
{
MXS_ERROR("[%s] Failed to connect to server '%s' (%s:%d) when"
" checking monitor user credentials and permissions: %s",
@ -1145,22 +1151,21 @@ monitor_launch_script(MXS_MONITOR* mon, MXS_MONITOR_SERVERS* ptr, const char* sc
}
/**
* Connect to a database. This will always leave a valid database handle in the
* database->con pointer. This allows the user to call MySQL C API functions to
* find out the reason of the failure.
* Ping or, if connection does not exist or ping fails, connect to a database. This
* will always leave a valid database handle in the database->con pointer, allowing
* the user to call MySQL C API functions to find out the reason of the failure.
*
* @param mon Monitor
* @param database Monitored database
* @return MONITOR_CONN_OK if the connection is OK else the reason for the failure
* @return MONITOR_CONN_OK if the connection is OK, else the reason for the failure
*/
mxs_connect_result_t
mon_connect_to_db(MXS_MONITOR* mon, MXS_MONITOR_SERVERS *database)
mon_ping_or_connect_to_db(MXS_MONITOR* mon, MXS_MONITOR_SERVERS *database)
{
mxs_connect_result_t rval = MONITOR_CONN_OK;
/** Return if the connection is OK */
if (database->con && mysql_ping(database->con) == 0)
{
return rval;
return MONITOR_CONN_OK;
}
if (database->con)
@ -1168,6 +1173,7 @@ mon_connect_to_db(MXS_MONITOR* mon, MXS_MONITOR_SERVERS *database)
mysql_close(database->con);
}
mxs_connect_result_t rval = MONITOR_CONN_REFUSED;
if ((database->con = mysql_init(NULL)))
{
char *uname = mon->user;
@ -1185,28 +1191,29 @@ mon_connect_to_db(MXS_MONITOR* mon, MXS_MONITOR_SERVERS *database)
mysql_optionsv(database->con, MYSQL_OPT_READ_TIMEOUT, (void *) &mon->read_timeout);
mysql_optionsv(database->con, MYSQL_OPT_WRITE_TIMEOUT, (void *) &mon->write_timeout);
mysql_optionsv(database->con, MYSQL_PLUGIN_DIR, get_connector_plugindir());
time_t start = time(NULL);
bool result = (mxs_mysql_real_connect(database->con, database->server, uname, dpwd) != NULL);
time_t end = time(NULL);
if (!result)
time_t start = 0;
time_t end = 0;
for (int i = 0; i < mon->connect_attempts; i++)
{
if ((int) difftime(end, start) >= mon->connect_timeout)
start = time(NULL);
bool result = (mxs_mysql_real_connect(database->con, database->server, uname, dpwd) != NULL);
end = time(NULL);
if (result)
{
rval = MONITOR_CONN_TIMEOUT;
}
else
{
rval = MONITOR_CONN_REFUSED;
rval = MONITOR_CONN_OK;
break;
}
}
if (rval == MONITOR_CONN_REFUSED &&
(int)difftime(end, start) >= mon->connect_timeout)
{
rval = MONITOR_CONN_TIMEOUT;
}
MXS_FREE(dpwd);
}
else
{
rval = MONITOR_CONN_REFUSED;
}
return rval;
}
@ -1337,6 +1344,8 @@ static bool create_monitor_config(const MXS_MONITOR *monitor, const char *filena
dprintf(file, "backend_connect_timeout=%d\n", monitor->connect_timeout);
dprintf(file, "backend_write_timeout=%d\n", monitor->write_timeout);
dprintf(file, "backend_read_timeout=%d\n", monitor->read_timeout);
dprintf(file, "%s=%d\n", BACKEND_CONNECT_ATTEMPTS, monitor->connect_attempts);
close(file);
return true;

View File

@ -51,7 +51,7 @@ void update_server_status(MXS_MONITOR *monitor, MXS_MONITOR_SERVERS *database)
database->mon_prev_status = database->server->status;
/** Try to connect to or ping the database */
mxs_connect_result_t rval = mon_connect_to_db(monitor, database);
mxs_connect_result_t rval = mon_ping_or_connect_to_db(monitor, database);
if (rval == MONITOR_CONN_OK)
{

View File

@ -268,7 +268,7 @@ monitorDatabase(MXS_MONITOR *mon, MXS_MONITOR_SERVERS *database)
/** Store previous status */
database->mon_prev_status = database->server->status;
mxs_connect_result_t rval = mon_connect_to_db(mon, database);
mxs_connect_result_t rval = mon_ping_or_connect_to_db(mon, database);
if (rval != MONITOR_CONN_OK)
{
if (mysql_errno(database->con) == ER_ACCESS_DENIED_ERROR)

View File

@ -210,7 +210,7 @@ monitorDatabase(MXS_MONITOR* mon, MXS_MONITOR_SERVERS *database)
/** Store previous status */
database->mon_prev_status = database->server->status;
mxs_connect_result_t rval = mon_connect_to_db(mon, database);
mxs_connect_result_t rval = mon_ping_or_connect_to_db(mon, database);
if (rval != MONITOR_CONN_OK)
{

View File

@ -625,54 +625,52 @@ monitorDatabase(MXS_MONITOR *mon, MXS_MONITOR_SERVERS *database)
/** Store previous status */
database->mon_prev_status = database->server->status;
if (database->con == NULL || mysql_ping(database->con) != 0)
mxs_connect_result_t rval = mon_ping_or_connect_to_db(mon, database);
if (rval == MONITOR_CONN_OK)
{
mxs_connect_result_t rval;
if ((rval = mon_connect_to_db(mon, database)) == MONITOR_CONN_OK)
{
server_clear_status_nolock(database->server, SERVER_AUTH_ERROR);
monitor_clear_pending_status(database, SERVER_AUTH_ERROR);
}
else
{
/* The current server is not running
*
* Store server NOT running in server and monitor server pending struct
*
*/
if (mysql_errno(database->con) == ER_ACCESS_DENIED_ERROR)
{
server_set_status_nolock(database->server, SERVER_AUTH_ERROR);
monitor_set_pending_status(database, SERVER_AUTH_ERROR);
}
server_clear_status_nolock(database->server, SERVER_RUNNING);
monitor_clear_pending_status(database, SERVER_RUNNING);
/* Also clear M/S state in both server and monitor server pending struct */
server_clear_status_nolock(database->server, SERVER_SLAVE);
server_clear_status_nolock(database->server, SERVER_MASTER);
server_clear_status_nolock(database->server, SERVER_RELAY_MASTER);
monitor_clear_pending_status(database, SERVER_SLAVE);
monitor_clear_pending_status(database, SERVER_MASTER);
monitor_clear_pending_status(database, SERVER_RELAY_MASTER);
/* Clean addition status too */
server_clear_status_nolock(database->server, SERVER_SLAVE_OF_EXTERNAL_MASTER);
server_clear_status_nolock(database->server, SERVER_STALE_STATUS);
server_clear_status_nolock(database->server, SERVER_STALE_SLAVE);
monitor_clear_pending_status(database, SERVER_SLAVE_OF_EXTERNAL_MASTER);
monitor_clear_pending_status(database, SERVER_STALE_STATUS);
monitor_clear_pending_status(database, SERVER_STALE_SLAVE);
/* Log connect failure only once */
if (mon_status_changed(database) && mon_print_fail_status(database))
{
mon_log_connect_error(database, rval);
}
return;
}
server_clear_status_nolock(database->server, SERVER_AUTH_ERROR);
monitor_clear_pending_status(database, SERVER_AUTH_ERROR);
}
else
{
/* The current server is not running
*
* Store server NOT running in server and monitor server pending struct
*
*/
if (mysql_errno(database->con) == ER_ACCESS_DENIED_ERROR)
{
server_set_status_nolock(database->server, SERVER_AUTH_ERROR);
monitor_set_pending_status(database, SERVER_AUTH_ERROR);
}
server_clear_status_nolock(database->server, SERVER_RUNNING);
monitor_clear_pending_status(database, SERVER_RUNNING);
/* Also clear M/S state in both server and monitor server pending struct */
server_clear_status_nolock(database->server, SERVER_SLAVE);
server_clear_status_nolock(database->server, SERVER_MASTER);
server_clear_status_nolock(database->server, SERVER_RELAY_MASTER);
monitor_clear_pending_status(database, SERVER_SLAVE);
monitor_clear_pending_status(database, SERVER_MASTER);
monitor_clear_pending_status(database, SERVER_RELAY_MASTER);
/* Clean addition status too */
server_clear_status_nolock(database->server, SERVER_SLAVE_OF_EXTERNAL_MASTER);
server_clear_status_nolock(database->server, SERVER_STALE_STATUS);
server_clear_status_nolock(database->server, SERVER_STALE_SLAVE);
monitor_clear_pending_status(database, SERVER_SLAVE_OF_EXTERNAL_MASTER);
monitor_clear_pending_status(database, SERVER_STALE_STATUS);
monitor_clear_pending_status(database, SERVER_STALE_SLAVE);
/* Log connect failure only once */
if (mon_status_changed(database) && mon_print_fail_status(database))
{
mon_log_connect_error(database, rval);
}
return;
}
/* Store current status in both server and monitor server pending struct */
server_set_status_nolock(database->server, SERVER_RUNNING);
monitor_set_pending_status(database, SERVER_RUNNING);
@ -2061,7 +2059,7 @@ void check_maxscale_schema_replication(MXS_MONITOR *monitor)
while (database)
{
mxs_connect_result_t rval = mon_connect_to_db(monitor, database);
mxs_connect_result_t rval = mon_ping_or_connect_to_db(monitor, database);
if (rval == MONITOR_CONN_OK)
{
if (!check_replicate_ignore_table(database) ||

View File

@ -194,7 +194,7 @@ monitorDatabase(MXS_MONITOR_SERVERS *database, char *defaultUser, char *defaultP
return;
}
mxs_connect_result_t rval = mon_connect_to_db(mon, database);
mxs_connect_result_t rval = mon_ping_or_connect_to_db(mon, database);
if (rval != MONITOR_CONN_OK)
{
server_clear_status_nolock(database->server, SERVER_RUNNING);