readwritesplit.c: prevent switching the master during session. Added logging to cases where master has changed.

Moved DCB's member errhandle_called behing DEBUG flags to Release build. It shows if handleError is called for a DCB and makes it possible to avoid redundant calls.
This commit is contained in:
VilhoRaatikka
2014-11-10 14:15:32 +02:00
parent 3b07449daa
commit 62270412cf
4 changed files with 77 additions and 57 deletions

View File

@ -121,8 +121,8 @@ DCB *rval;
#if defined(SS_DEBUG) #if defined(SS_DEBUG)
rval->dcb_chk_top = CHK_NUM_DCB; rval->dcb_chk_top = CHK_NUM_DCB;
rval->dcb_chk_tail = CHK_NUM_DCB; rval->dcb_chk_tail = CHK_NUM_DCB;
rval->dcb_errhandle_called = false;
#endif #endif
rval->dcb_errhandle_called = false;
rval->dcb_role = role; rval->dcb_role = role;
spinlock_init(&rval->dcb_initlock); spinlock_init(&rval->dcb_initlock);
spinlock_init(&rval->writeqlock); spinlock_init(&rval->writeqlock);

View File

@ -205,9 +205,9 @@ typedef struct dcb_callback {
typedef struct dcb { typedef struct dcb {
#if defined(SS_DEBUG) #if defined(SS_DEBUG)
skygw_chk_t dcb_chk_top; skygw_chk_t dcb_chk_top;
bool dcb_errhandle_called;
#endif #endif
dcb_role_t dcb_role; bool dcb_errhandle_called; /*< this can be called only once */
dcb_role_t dcb_role;
SPINLOCK dcb_initlock; SPINLOCK dcb_initlock;
DCBEVENTQ evq; /**< The event queue for this DCB */ DCBEVENTQ evq; /**< The event queue for this DCB */
int fd; /**< The descriptor */ int fd; /**< The descriptor */

View File

@ -828,6 +828,17 @@ static void handleError(
SESSION *session = backend_dcb->session; SESSION *session = backend_dcb->session;
session_state_t sesstate; session_state_t sesstate;
/** Don't handle same error twice on same DCB */
if (backend_dcb->dcb_errhandle_called)
{
/** we optimistically assume that previous call succeed */
*succp = true;
return;
}
else
{
backend_dcb->dcb_errhandle_called = true;
}
spinlock_acquire(&session->ses_lock); spinlock_acquire(&session->ses_lock);
sesstate = session->state; sesstate = session->state;
client_dcb = session->client; client_dcb = session->client;

View File

@ -1029,6 +1029,9 @@ static void freeSession(
/** /**
* Provide the router with a pointer to a suitable backend dcb. * Provide the router with a pointer to a suitable backend dcb.
*
* As of Nov. 2014, slave which has least connections is always chosen.
*
* Detect failures in server statuses and reselect backends if necessary. * Detect failures in server statuses and reselect backends if necessary.
* If name is specified, server name becomes primary selection criteria. * If name is specified, server name becomes primary selection criteria.
* *
@ -2074,14 +2077,11 @@ static int routeQuery(
"route to master " "route to master "
"but couldn't find " "but couldn't find "
"master in a " "master in a "
"suitable state " "suitable state.")));
"failed.")));
} }
/** /**
* Master has changed. Set the dcb pointer NULL and * Master has changed. Return with error indicator.
* return with error indicator.
*/ */
router_cli_ses->rses_master_ref->bref_dcb = NULL;
rses_end_locked_router_action(router_cli_ses); rses_end_locked_router_action(router_cli_ses);
succp = false; succp = false;
ret = 0; ret = 0;
@ -2655,7 +2655,7 @@ static bool select_connect_backend_servers(
const int min_nslaves = 0; /*< not configurable at the time */ const int min_nslaves = 0; /*< not configurable at the time */
bool is_synced_master; bool is_synced_master;
int (*p)(const void *, const void *); int (*p)(const void *, const void *);
BACKEND* master_host = NULL; BACKEND* master_host;
if (p_master_ref == NULL || backend_ref == NULL) if (p_master_ref == NULL || backend_ref == NULL)
{ {
@ -2667,46 +2667,32 @@ static bool select_connect_backend_servers(
/* get the root Master */ /* get the root Master */
master_host = get_root_master(backend_ref, router_nservers); master_host = get_root_master(backend_ref, router_nservers);
/** /**
* Master is already chosen and connected. It means that the function * Existing session : master is already chosen and connected.
* was called from error handling function or from some other similar * The function was called because new slave must be selected to replace
* function where session was already established but new slaves needed * failed one.
* to be selected.
*/ */
if (*p_master_ref != NULL && if (*p_master_ref != NULL)
BREF_IS_IN_USE((*p_master_ref))) {
{
LOGIF(LD, (skygw_log_write(
LOGFILE_DEBUG,
"%lu [select_connect_backend_servers] Master %p fd %d found.",
pthread_self(),
(*p_master_ref)->bref_dcb,
(*p_master_ref)->bref_dcb->fd)));
master_found = true;
master_connected = true;
/** /**
* Ensure that *p_master_ref and master_host point to same backend * Ensure that backend reference is in use, stored master is
* and it has a master role. * still current root master.
*/ */
ss_dassert(master_host && if (!BREF_IS_IN_USE((*p_master_ref)) ||
((*p_master_ref)->bref_backend->backend_server == !SERVER_IS_MASTER((*p_master_ref)->bref_backend->backend_server) ||
master_host->backend_server) && master_host != (*p_master_ref)->bref_backend)
(master_host->backend_server->status & {
(SERVER_MASTER|SERVER_MAINT)) == SERVER_MASTER); succp = false;
} goto return_succp;
/** New session or master failure case */ }
master_found = true;
master_connected = true;
}
/**
* New session : select master and slaves
*/
else else
{ {
LOGIF(LD, (skygw_log_write(
LOGFILE_DEBUG,
"%lu [select_connect_backend_servers] Session %p doesn't "
"currently have a master chosen. Proceeding to master "
"selection.",
pthread_self(),
session)));
master_found = false; master_found = false;
master_connected = false; master_connected = false;
} }
@ -2745,11 +2731,6 @@ static bool select_connect_backend_servers(
b->backend_conn_count))); b->backend_conn_count)));
} }
#endif #endif
/* assert with master_host */
ss_dassert(!master_connected ||
(master_host &&
((*p_master_ref)->bref_backend->backend_server == master_host->backend_server) &&
SERVER_MASTER));
/** /**
* Sort the pointer list to servers according to connection counts. As * Sort the pointer list to servers according to connection counts. As
* a consequence those backends having least connections are in the * a consequence those backends having least connections are in the
@ -2903,6 +2884,9 @@ static bool select_connect_backend_servers(
else if (master_host && else if (master_host &&
(b->backend_server == master_host->backend_server)) (b->backend_server == master_host->backend_server))
{ {
/** not allowed to replace old master with new */
ss_dassert(*p_master_ref == NULL);
*p_master_ref = &backend_ref[i]; *p_master_ref = &backend_ref[i];
if (master_connected) if (master_connected)
@ -4072,7 +4056,6 @@ static void rwsplit_process_router_options(
* Even if succp == true connecting to new slave may have failed. succp is to * Even if succp == true connecting to new slave may have failed. succp is to
* tell whether router has enough master/slave connections to continue work. * tell whether router has enough master/slave connections to continue work.
*/ */
static void handleError ( static void handleError (
ROUTER* instance, ROUTER* instance,
void* router_session, void* router_session,
@ -4086,10 +4069,17 @@ static void handleError (
ROUTER_CLIENT_SES* rses = (ROUTER_CLIENT_SES *)router_session; ROUTER_CLIENT_SES* rses = (ROUTER_CLIENT_SES *)router_session;
CHK_DCB(backend_dcb); CHK_DCB(backend_dcb);
#if defined(SS_DEBUG) /** Don't handle same error twice on same DCB */
ss_dassert(!backend_dcb->dcb_errhandle_called); if (backend_dcb->dcb_errhandle_called)
backend_dcb->dcb_errhandle_called = true; {
#endif /** we optimistically assume that previous call succeed */
*succp = true;
return;
}
else
{
backend_dcb->dcb_errhandle_called = true;
}
session = backend_dcb->session; session = backend_dcb->session;
if (session != NULL) if (session != NULL)
@ -4107,7 +4097,8 @@ static void handleError (
return; return;
} }
if (rses->rses_master_ref->bref_dcb == backend_dcb) if (rses->rses_master_ref->bref_dcb == backend_dcb &&
!SERVER_IS_MASTER(rses->rses_master_ref->bref_backend->backend_server))
{ {
/** Master failed, can't recover */ /** Master failed, can't recover */
LOGIF(LE, (skygw_log_write_flush( LOGIF(LE, (skygw_log_write_flush(
@ -4206,6 +4197,11 @@ static bool handle_error_new_connection(
} }
CHK_BACKEND_REF(bref); CHK_BACKEND_REF(bref);
/**
* If query was sent through the bref and it is waiting for reply from
* the backend server it is necessary to send an error to the client
* because it is waiting for reply.
*/
if (BREF_IS_WAITING_RESULT(bref)) if (BREF_IS_WAITING_RESULT(bref))
{ {
DCB* client_dcb; DCB* client_dcb;
@ -4473,6 +4469,10 @@ static backend_ref_t* get_bref_from_dcb(
return bref; return bref;
} }
/**
* Calls hang-up function for DCB if it is not both running and in
* master/slave/joined/ndb role. Called by DCB's callback routine.
*/
static int router_handle_state_switch( static int router_handle_state_switch(
DCB* dcb, DCB* dcb,
DCB_REASON reason, DCB_REASON reason,
@ -4513,6 +4513,7 @@ return_rc:
return rc; return rc;
} }
static sescmd_cursor_t* backend_ref_get_sescmd_cursor ( static sescmd_cursor_t* backend_ref_get_sescmd_cursor (
backend_ref_t* bref) backend_ref_t* bref)
{ {
@ -4634,7 +4635,7 @@ static BACKEND *get_root_master(
* Servers are checked even if they are in 'maintenance' * Servers are checked even if they are in 'maintenance'
* *
* @param rses pointer to router session * @param rses pointer to router session
* @return pointer to backend reference of the root master * @return pointer to backend reference of the root master or NULL
* *
*/ */
static backend_ref_t* get_root_master_bref( static backend_ref_t* get_root_master_bref(
@ -4664,6 +4665,14 @@ static backend_ref_t* get_root_master_bref(
bref++; bref++;
i += 1; i += 1;
} }
if (candidate_bref == NULL)
{
LOGIF(LE, (skygw_log_write_flush(
LOGFILE_ERROR,
"Error : Could not find master among the backend "
"servers. Previous master state : %s",
STRSRVSTATUS(BREFSRV(rses->rses_master_ref)))));
}
return candidate_bref; return candidate_bref;
} }