service.h:Added the 'serviceStartFailed' function which tries to start services with successfully started routers but no successfully started listeners.
mysql_mon.c:Added a call to ServiceStartFailed when servers come available.
This commit is contained in:
Markus Makela
2015-01-05 13:28:27 +02:00
parent e27b51392a
commit 6adccb3c17
4 changed files with 83 additions and 15 deletions

View File

@ -331,7 +331,12 @@ serviceStart(SERVICE *service)
{ {
SERV_PROTOCOL *port; SERV_PROTOCOL *port;
int listeners = 0; int listeners = 0;
if(service->router_instance == NULL)
{
/*
* This is the first time this service's router is being started or the
* previous attempt failed.
*/
if((service->router_instance = service->router->createInstance(service, if((service->router_instance = service->router->createInstance(service,
service->routerOptions)) == NULL) service->routerOptions)) == NULL)
{ {
@ -341,6 +346,7 @@ int listeners = 0;
service->state = SERVICE_STATE_FAILED; service->state = SERVICE_STATE_FAILED;
return 0; return 0;
} }
}
port = service->ports; port = service->ports;
while (!service->svc_do_shutdown && port) while (!service->svc_do_shutdown && port)
@ -353,7 +359,10 @@ int listeners = 0;
service->state = SERVICE_STATE_STARTED; service->state = SERVICE_STATE_STARTED;
service->stats.started = time(0); service->stats.started = time(0);
} }
else
{
service->state = SERVICE_STATE_LISTENER_FAILED;
}
return listeners; return listeners;
} }
@ -408,6 +417,40 @@ int n = 0,i;
return n; return n;
} }
/**
* Try to start services that failed to start their listeners but successfully
* started their routers.
* @return Number of successfully started services
*/
int
serviceStartFailedListeners()
{
SERVICE *ptr;
int n = 0,i;
spinlock_acquire(&service_spin);
ptr = allServices;
spinlock_release(&service_spin);
while (ptr && !ptr->svc_do_shutdown)
{
if(ptr->state == SERVICE_STATE_LISTENER_FAILED)
{
n += (i = serviceStart(ptr));
if(i == 0)
{
LOGIF(LE, (skygw_log_write(
LOGFILE_ERROR,
"Error : Failed to start service '%s'.",
ptr->name)));
}
}
ptr = ptr->next;
}
return n;
}
/** /**
* Stop a service * Stop a service
* *
@ -815,7 +858,6 @@ SERVICE *service;
return service; return service;
} }
/** /**
* Print details of an individual service * Print details of an individual service
* *

View File

@ -130,6 +130,7 @@ typedef struct server {
(SERVER_RUNNING|SERVER_MASTER|SERVER_SLAVE|SERVER_MAINT)) == \ (SERVER_RUNNING|SERVER_MASTER|SERVER_SLAVE|SERVER_MAINT)) == \
(SERVER_RUNNING|SERVER_MASTER)) (SERVER_RUNNING|SERVER_MASTER))
#define SRV_DOWN_STATUS(status) ((status & SERVER_RUNNING) == 0)
/** /**
* Is the server valid candidate for root master. The server must be running, * Is the server valid candidate for root master. The server must be running,
* marked as master and not have maintenance bit set. * marked as master and not have maintenance bit set.

View File

@ -152,6 +152,8 @@ typedef enum count_spec_t {COUNT_NONE=0, COUNT_ATLEAST, COUNT_EXACT, COUNT_ATMOS
#define SERVICE_STATE_STARTED 2 /**< The service has been started */ #define SERVICE_STATE_STARTED 2 /**< The service has been started */
#define SERVICE_STATE_FAILED 3 /**< The service failed to start */ #define SERVICE_STATE_FAILED 3 /**< The service failed to start */
#define SERVICE_STATE_STOPPED 4 /**< The service has been stopped */ #define SERVICE_STATE_STOPPED 4 /**< The service has been stopped */
#define SERVICE_STATE_LISTENER_FAILED 5 /**< The service successfully started the
router but failed to start the listeners*/
extern SERVICE *service_alloc(const char *, const char *); extern SERVICE *service_alloc(const char *, const char *);
extern int service_free(SERVICE *); extern int service_free(SERVICE *);
@ -165,6 +167,7 @@ extern void serviceAddRouterOption(SERVICE *, char *);
extern void serviceClearRouterOptions(SERVICE *); extern void serviceClearRouterOptions(SERVICE *);
extern int serviceStart(SERVICE *); extern int serviceStart(SERVICE *);
extern int serviceStartAll(); extern int serviceStartAll();
extern int serviceStartFailedListeners();
extern void serviceStartProtocol(SERVICE *, char *, int); extern void serviceStartProtocol(SERVICE *, char *, int);
extern int serviceStop(SERVICE *); extern int serviceStop(SERVICE *);
extern int serviceRestart(SERVICE *); extern int serviceRestart(SERVICE *);

View File

@ -64,6 +64,8 @@
#include <dcb.h> #include <dcb.h>
#include <modinfo.h> #include <modinfo.h>
#include "service.h"
/** Defined in log_manager.cc */ /** Defined in log_manager.cc */
extern int lm_enabled_logfiles_bitmask; extern int lm_enabled_logfiles_bitmask;
extern size_t log_ses_count[]; extern size_t log_ses_count[];
@ -610,7 +612,7 @@ int num_servers=0;
MONITOR_SERVERS *root_master = NULL; MONITOR_SERVERS *root_master = NULL;
size_t nrounds = 0; size_t nrounds = 0;
int log_no_master = 1; int log_no_master = 1;
int new_backends = 0;
if (mysql_thread_init()) if (mysql_thread_init())
{ {
LOGIF(LE, (skygw_log_write_flush( LOGIF(LE, (skygw_log_write_flush(
@ -649,6 +651,9 @@ int log_no_master = 1;
/* reset num_servers */ /* reset num_servers */
num_servers = 0; num_servers = 0;
/* reset new_backends */
new_backends = 0;
/* start from the first server in the list */ /* start from the first server in the list */
ptr = handle->databases; ptr = handle->databases;
@ -692,6 +697,12 @@ int log_no_master = 1;
{ {
dcb_call_foreach(DCB_REASON_NOT_RESPONDING); dcb_call_foreach(DCB_REASON_NOT_RESPONDING);
} }
if(SRV_DOWN_STATUS(ptr->mon_prev_status) &&
SERVER_IS_RUNNING(ptr->server))
{
new_backends++;
}
} }
if (mon_status_changed(ptr)) if (mon_status_changed(ptr))
@ -727,6 +738,17 @@ int log_no_master = 1;
ptr = ptr->next; ptr = ptr->next;
} }
/**
* Some new servers are now running. Try to start services
* that failed to start their listeners but successfully created
* their router instances.
*/
if(new_backends > 0)
{
serviceStartFailedListeners();
}
ptr = handle->databases; ptr = handle->databases;
/* if only one server is configured, that's is Master */ /* if only one server is configured, that's is Master */
if (num_servers == 1) { if (num_servers == 1) {