Fix to MXS-352: https://mariadb.atlassian.net/browse/MXS-352
If a service fails, MaxScale will try to start it again later on.
This commit is contained in:
@ -622,7 +622,9 @@ process_config_context(CONFIG_CONTEXT *context)
|
||||
|
||||
}
|
||||
|
||||
if (enable_root_user)
|
||||
serviceSetRetryOnFailure(obj->element, config_get_value(obj->parameters, "retry_on_failure"));
|
||||
|
||||
if (enable_root_user)
|
||||
serviceEnableRootUser(
|
||||
obj->element,
|
||||
config_truth_value(enable_root_user));
|
||||
|
||||
@ -64,6 +64,7 @@
|
||||
#include <resultset.h>
|
||||
#include <gw.h>
|
||||
#include <gwdirs.h>
|
||||
#include <math.h>
|
||||
|
||||
/** Defined in log_manager.cc */
|
||||
extern int lm_enabled_logfiles_bitmask;
|
||||
@ -99,6 +100,7 @@ static int find_type(typelib_t* tl, const char* needle, int maxlen);
|
||||
static void service_add_qualified_param(
|
||||
SERVICE* svc,
|
||||
CONFIG_PARAMETER* param);
|
||||
void service_interal_restart(void *data);
|
||||
|
||||
/**
|
||||
* Allocate a new service for the gateway to support
|
||||
@ -140,7 +142,8 @@ SERVICE *service;
|
||||
service->routerModule = strdup(router);
|
||||
service->users_from_all = false;
|
||||
service->resources = NULL;
|
||||
service->localhost_match_wildcard_host = SERVICE_PARAM_UNINIT;
|
||||
service->localhost_match_wildcard_host = SERVICE_PARAM_UNINIT;
|
||||
service->retry_start = true;
|
||||
service->ssl_mode = SSL_DISABLED;
|
||||
service->ssl_init_done = false;
|
||||
service->ssl_ca_cert = NULL;
|
||||
@ -157,6 +160,7 @@ SERVICE *service;
|
||||
return NULL;
|
||||
}
|
||||
service->stats.started = time(0);
|
||||
service->stats.n_failed_starts = 0;
|
||||
service->state = SERVICE_STATE_ALLOC;
|
||||
spinlock_init(&service->spin);
|
||||
spinlock_init(&service->users_table_spin);
|
||||
@ -260,6 +264,7 @@ GWPROTOCOL *funcs;
|
||||
{
|
||||
hashtable_free(service->users->data);
|
||||
free(service->users);
|
||||
service->users = NULL;
|
||||
dcb_close(port->listener);
|
||||
port->listener = NULL;
|
||||
goto retblock;
|
||||
@ -403,6 +408,50 @@ retblock:
|
||||
return listeners;
|
||||
}
|
||||
|
||||
/**
|
||||
* Start all ports for a service.
|
||||
* serviceStartAllPorts will try to start all listeners associated with the service.
|
||||
* If no listeners are started, the starting of ports will be retried after a period of time.
|
||||
* @param service Service to start
|
||||
* @return Number of started listeners. This is equal to the number of ports the service
|
||||
* is listening to.
|
||||
*/
|
||||
int serviceStartAllPorts(SERVICE* service)
|
||||
{
|
||||
SERV_PROTOCOL *port = service->ports;
|
||||
int listeners = 0;
|
||||
while (!service->svc_do_shutdown && port)
|
||||
{
|
||||
listeners += serviceStartPort(service, port);
|
||||
port = port->next;
|
||||
}
|
||||
|
||||
if (listeners)
|
||||
{
|
||||
service->state = SERVICE_STATE_STARTED;
|
||||
service->stats.started = time(0);
|
||||
/** Add the task that monitors session timeouts */
|
||||
if (service->conn_timeout > 0)
|
||||
{
|
||||
hktask_add("connection_timeout", session_close_timeouts, NULL, 5);
|
||||
}
|
||||
}
|
||||
else if(service->retry_start)
|
||||
{
|
||||
/** Service failed to start any ports. Try again later. */
|
||||
service->stats.n_failed_starts++;
|
||||
char taskname[strlen(service->name) + strlen("_start_retry_") + (int)ceil(log10(INT_MAX)) + 1];
|
||||
int retry_after = MIN(service->stats.n_failed_starts * 10, SERVICE_MAX_RETRY_INTERVAL);
|
||||
snprintf(taskname, sizeof (taskname), "%s_start_retry_%d",
|
||||
service->name, service->stats.n_failed_starts);
|
||||
hktask_oneshot(taskname, service_interal_restart,
|
||||
(void*) service, retry_after);
|
||||
skygw_log_write(LM, "Failed to start service %s, retrying in %d seconds.",
|
||||
service->name, retry_after);
|
||||
}
|
||||
return listeners;
|
||||
}
|
||||
|
||||
/**
|
||||
* Start a service
|
||||
*
|
||||
@ -417,59 +466,42 @@ retblock:
|
||||
int
|
||||
serviceStart(SERVICE *service)
|
||||
{
|
||||
SERV_PROTOCOL *port;
|
||||
int listeners = 0;
|
||||
int listeners = 0;
|
||||
|
||||
|
||||
if(!check_service_permissions(service))
|
||||
{
|
||||
skygw_log_write_flush(LE,
|
||||
"%s: Error: Inadequate user permissions for service. Service not started.",
|
||||
service->name);
|
||||
service->state = SERVICE_STATE_FAILED;
|
||||
return 0;
|
||||
}
|
||||
|
||||
if(service->ssl_mode != SSL_DISABLED)
|
||||
{
|
||||
if(serviceInitSSL(service) != 0)
|
||||
if (check_service_permissions(service))
|
||||
{
|
||||
LOGIF(LE, (skygw_log_write_flush(LOGFILE_ERROR,
|
||||
"%s: SSL initialization failed. Service not started.",
|
||||
service->name)));
|
||||
service->state = SERVICE_STATE_FAILED;
|
||||
return 0;
|
||||
if (service->ssl_mode == SSL_DISABLED || (service->ssl_mode != SSL_DISABLED && serviceInitSSL(service) != 0))
|
||||
{
|
||||
if ((service->router_instance = service->router->createInstance(
|
||||
service,service->routerOptions)))
|
||||
{
|
||||
listeners += serviceStartAllPorts(service);
|
||||
}
|
||||
else
|
||||
{
|
||||
LOGIF(LE, (skygw_log_write_flush(LOGFILE_ERROR,
|
||||
"%s: Failed to create router instance for service. Service not started.",
|
||||
service->name)));
|
||||
service->state = SERVICE_STATE_FAILED;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
LOGIF(LE, (skygw_log_write_flush(LOGFILE_ERROR,
|
||||
"%s: SSL initialization failed. Service not started.",
|
||||
service->name)));
|
||||
service->state = SERVICE_STATE_FAILED;
|
||||
}
|
||||
}
|
||||
}
|
||||
if ((service->router_instance = service->router->createInstance(service,
|
||||
service->routerOptions)) == NULL)
|
||||
{
|
||||
LOGIF(LE, (skygw_log_write_flush(LOGFILE_ERROR,
|
||||
"%s: Failed to create router instance for service. Service not started.",
|
||||
service->name)));
|
||||
service->state = SERVICE_STATE_FAILED;
|
||||
return 0;
|
||||
}
|
||||
|
||||
port = service->ports;
|
||||
while (!service->svc_do_shutdown && port)
|
||||
{
|
||||
listeners += serviceStartPort(service, port);
|
||||
port = port->next;
|
||||
}
|
||||
if (listeners)
|
||||
{
|
||||
service->state = SERVICE_STATE_STARTED;
|
||||
service->stats.started = time(0);
|
||||
}
|
||||
|
||||
/** Add the task that monitors session timeouts */
|
||||
if(service->conn_timeout > 0)
|
||||
{
|
||||
hktask_add("connection_timeout",session_close_timeouts,NULL,5);
|
||||
}
|
||||
|
||||
return listeners;
|
||||
else
|
||||
{
|
||||
skygw_log_write_flush(LE,
|
||||
"%s: Error: Inadequate user permissions for service. Service not started.",
|
||||
service->name);
|
||||
service->state = SERVICE_STATE_FAILED;
|
||||
}
|
||||
return listeners;
|
||||
}
|
||||
|
||||
/**
|
||||
@ -1022,6 +1054,18 @@ serviceSetTimeout(SERVICE *service, int val)
|
||||
return 1;
|
||||
}
|
||||
|
||||
/**
|
||||
* Enable or disable the restarting of the service on failure.
|
||||
* @param service Service to configure
|
||||
* @param value A string representation of a boolean value
|
||||
*/
|
||||
void serviceSetRetryOnFailure(SERVICE *service, char* value)
|
||||
{
|
||||
if(value)
|
||||
{
|
||||
service->retry_start = config_truth_value(value);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Trim whitespace from the from an rear of a string
|
||||
@ -2055,3 +2099,12 @@ int serviceInitSSL(SERVICE* service)
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
/**
|
||||
* Function called by the housekeeper thread to retry starting of a service
|
||||
* @param data Service to restart
|
||||
*/
|
||||
void service_interal_restart(void *data)
|
||||
{
|
||||
SERVICE* service = (SERVICE*)data;
|
||||
serviceStartAllPorts(service);
|
||||
}
|
||||
Reference in New Issue
Block a user